]> rtime.felk.cvut.cz Git - jailhouse.git/blobdiff - hypervisor/paging.c
Merge remote-tracking branch 'kiszka/master'
[jailhouse.git] / hypervisor / paging.c
index ff4418083a7fa68bdab4a2643845e1fa1b8a75c5..1f228878b6f48ca11d8e52f248f6678c0f12de39 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Jailhouse, a Linux-based partitioning hypervisor
  *
- * Copyright (c) Siemens AG, 2013
+ * Copyright (c) Siemens AG, 2013, 2014
  *
  * Authors:
  *  Jan Kiszka <jan.kiszka@siemens.com>
 
 extern u8 __page_pool[];
 
+/**
+ * Offset between virtual and physical hypervisor addresses.
+ *
+ * @note Private, use page_map_hvirt2phys() or page_map_phys2hvirt() instead.
+ */
+unsigned long page_offset;
+
+/** Page pool containing physical pages for use by the hypervisor. */
 struct page_pool mem_pool;
+/** Page pool containing virtual pages for remappings by the hypervisor. */
 struct page_pool remap_pool = {
-       .base_address = (void *)REMAP_BASE_ADDR,
+       .base_address = (void *)REMAP_BASE,
        .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
 };
 
+/** Descriptor of the hypervisor paging structures. */
 struct paging_structures hv_paging_structs;
 
-unsigned long page_map_get_phys_invalid(pt_entry_t pte, unsigned long virt)
+/**
+ * Trivial implementation of paging::get_phys (for non-terminal levels)
+ * @param pte See paging::get_phys.
+ * @param virt See paging::get_phys.
+ *
+ * @return @c INVALID_PHYS_ADDR.
+ *
+ * @see paging
+ */
+unsigned long paging_get_phys_invalid(pt_entry_t pte, unsigned long virt)
 {
        return INVALID_PHYS_ADDR;
 }
@@ -40,19 +59,25 @@ unsigned long page_map_get_phys_invalid(pt_entry_t pte, unsigned long virt)
 static unsigned long find_next_free_page(struct page_pool *pool,
                                         unsigned long start)
 {
-       unsigned long start_mask =
-               ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
        unsigned long bmp_pos, bmp_val, page_nr;
+       unsigned long start_mask = 0;
 
        if (start >= pool->pages)
                return INVALID_PAGE_NR;
 
+       /*
+        * If we don't start on the beginning of a bitmap word, create a mask
+        * to mark the pages before the start page as (virtually) used.
+        */
+       if (start % BITS_PER_LONG > 0)
+               start_mask = ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
+
        for (bmp_pos = start / BITS_PER_LONG;
             bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
                bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
                start_mask = 0;
                if (bmp_val != ~0UL) {
-                       page_nr = ffz(bmp_val) + bmp_pos * BITS_PER_LONG;
+                       page_nr = ffzl(bmp_val) + bmp_pos * BITS_PER_LONG;
                        if (page_nr >= pool->pages)
                                break;
                        return page_nr;
@@ -62,25 +87,46 @@ static unsigned long find_next_free_page(struct page_pool *pool,
        return INVALID_PAGE_NR;
 }
 
-void *page_alloc(struct page_pool *pool, unsigned int num)
+/**
+ * Allocate consecutive pages from the specified pool.
+ * @param pool         Page pool to allocate from.
+ * @param num          Number of pages.
+ * @param align_mask   Choose start so that start_page_no & align_mask == 0.
+ *
+ * @return Pointer to first page or NULL if allocation failed.
+ *
+ * @see page_free
+ */
+static void *page_alloc_internal(struct page_pool *pool, unsigned int num,
+                                unsigned long align_mask)
 {
-       unsigned long start, last, next;
+       /* The pool itself might not be aligned as required. */
+       unsigned long aligned_start =
+               ((unsigned long)pool->base_address >> PAGE_SHIFT) & align_mask;
+       unsigned long next = aligned_start;
+       unsigned long start, last;
        unsigned int allocated;
 
-       start = find_next_free_page(pool, 0);
-       if (start == INVALID_PAGE_NR)
+restart:
+       /* Forward the search start to the next aligned page. */
+       if ((next - aligned_start) & align_mask)
+               next += num - ((next - aligned_start) & align_mask);
+
+       start = next = find_next_free_page(pool, next);
+       if (start == INVALID_PAGE_NR || num == 0)
                return NULL;
 
-restart:
+       /* Enforce alignment (none of align_mask is 0). */
+       if ((start - aligned_start) & align_mask)
+               goto restart;
+
        for (allocated = 1, last = start; allocated < num;
             allocated++, last = next) {
                next = find_next_free_page(pool, last + 1);
                if (next == INVALID_PAGE_NR)
                        return NULL;
-               if (next != last + 1) {
-                       start = next;
-                       goto restart;
-               }
+               if (next != last + 1)
+                       goto restart;   /* not consecutive */
        }
 
        for (allocated = 0; allocated < num; allocated++)
@@ -91,6 +137,42 @@ restart:
        return pool->base_address + start * PAGE_SIZE;
 }
 
+/**
+ * Allocate consecutive pages from the specified pool.
+ * @param pool Page pool to allocate from.
+ * @param num  Number of pages.
+ *
+ * @return Pointer to first page or NULL if allocation failed.
+ *
+ * @see page_free
+ */
+void *page_alloc(struct page_pool *pool, unsigned int num)
+{
+       return page_alloc_internal(pool, num, 0);
+}
+
+/**
+ * Allocate aligned consecutive pages from the specified pool.
+ * @param pool Page pool to allocate from.
+ * @param num  Number of pages. Num needs to be a power of 2.
+ *
+ * @return Pointer to first page or NULL if allocation failed.
+ *
+ * @see page_free
+ */
+void *page_alloc_aligned(struct page_pool *pool, unsigned int num)
+{
+       return page_alloc_internal(pool, num, num - 1);
+}
+
+/**
+ * Release pages to the specified pool.
+ * @param pool Page pool to release to.
+ * @param page Address of first page.
+ * @param num  Number of pages.
+ *
+ * @see page_alloc
+ */
 void page_free(struct page_pool *pool, void *page, unsigned int num)
 {
        unsigned long page_nr;
@@ -108,8 +190,23 @@ void page_free(struct page_pool *pool, void *page, unsigned int num)
        }
 }
 
-unsigned long page_map_virt2phys(const struct paging_structures *pg_structs,
-                                unsigned long virt)
+/**
+ * Translate virtual to physical address according to given paging structures.
+ * @param pg_structs   Paging structures to use for translation.
+ * @param virt         Virtual address.
+ * @param flags                Access flags that have to be supported by the mapping,
+ *                     see @ref PAGE_FLAGS.
+ *
+ * @return Physical address on success or @c INVALID_PHYS_ADDR if the virtual
+ *        address could not be translated or the requested access is not
+ *        supported by the mapping.
+ *
+ * @see paging_phys2hvirt
+ * @see paging_hvirt2phys
+ * @see arch_paging_gphys2phys
+ */
+unsigned long paging_virt2phys(const struct paging_structures *pg_structs,
+                              unsigned long virt, unsigned long flags)
 {
        const struct paging *paging = pg_structs->root_paging;
        page_table_t pt = pg_structs->root_table;
@@ -118,24 +215,24 @@ unsigned long page_map_virt2phys(const struct paging_structures *pg_structs,
 
        while (1) {
                pte = paging->get_entry(pt, virt);
-               if (!paging->entry_valid(pte))
+               if (!paging->entry_valid(pte, flags))
                        return INVALID_PHYS_ADDR;
                phys = paging->get_phys(pte, virt);
                if (phys != INVALID_PHYS_ADDR)
                        return phys;
-               pt = page_map_phys2hvirt(paging->get_next_pt(pte));
+               pt = paging_phys2hvirt(paging->get_next_pt(pte));
                paging++;
        }
 }
 
-static void flush_pt_entry(pt_entry_t pte, enum page_map_coherent coherent)
+static void flush_pt_entry(pt_entry_t pte, enum paging_coherent coherent)
 {
-       if (coherent == PAGE_MAP_COHERENT)
-               flush_cache(pte, sizeof(*pte));
+       if (coherent == PAGING_COHERENT)
+               arch_paging_flush_cpu_caches(pte, sizeof(*pte));
 }
 
 static int split_hugepage(const struct paging *paging, pt_entry_t pte,
-                         unsigned long virt, enum page_map_coherent coherent)
+                         unsigned long virt, enum paging_coherent coherent)
 {
        unsigned long phys = paging->get_phys(pte, virt);
        struct paging_structures sub_structs;
@@ -154,16 +251,34 @@ static int split_hugepage(const struct paging *paging, pt_entry_t pte,
        sub_structs.root_table = page_alloc(&mem_pool, 1);
        if (!sub_structs.root_table)
                return -ENOMEM;
-       paging->set_next_pt(pte, page_map_hvirt2phys(sub_structs.root_table));
+       paging->set_next_pt(pte, paging_hvirt2phys(sub_structs.root_table));
        flush_pt_entry(pte, coherent);
 
-       return page_map_create(&sub_structs, phys, paging->page_size, virt,
-                              flags, coherent);
+       return paging_create(&sub_structs, phys, paging->page_size, virt,
+                            flags, coherent);
 }
 
-int page_map_create(const struct paging_structures *pg_structs,
-                   unsigned long phys, unsigned long size, unsigned long virt,
-                   unsigned long flags, enum page_map_coherent coherent)
+/**
+ * Create or modify a page map.
+ * @param pg_structs   Descriptor of paging structures to be used.
+ * @param phys         Physical address of the region to be mapped.
+ * @param size         Size of the region.
+ * @param virt         Virtual address the region should be mapped to.
+ * @param flags                Flags describing the permitted access, see
+ *                     @ref PAGE_FLAGS.
+ * @param coherent     Coherency of mapping.
+ *
+ * @return 0 on success, negative error code otherwise.
+ *
+ * @note The function aims at using the largest possible page size for the
+ * mapping but does not consolidate with neighboring mappings.
+ *
+ * @see paging_destroy
+ * @see paging_get_guest_pages
+ */
+int paging_create(const struct paging_structures *pg_structs,
+                 unsigned long phys, unsigned long size, unsigned long virt,
+                 unsigned long flags, enum paging_coherent coherent)
 {
        phys &= PAGE_MASK;
        virt &= PAGE_MASK;
@@ -187,31 +302,32 @@ int page_map_create(const struct paging_structures *pg_structs,
                                 * boundaries.
                                 */
                                if (paging->page_size > PAGE_SIZE)
-                                       page_map_destroy(pg_structs, virt,
-                                                        paging->page_size,
-                                                        coherent);
+                                       paging_destroy(pg_structs, virt,
+                                                      paging->page_size,
+                                                      coherent);
                                paging->set_terminal(pte, phys, flags);
                                flush_pt_entry(pte, coherent);
                                break;
                        }
-                       if (paging->entry_valid(pte)) {
+                       if (paging->entry_valid(pte, PAGE_PRESENT_FLAGS)) {
                                err = split_hugepage(paging, pte, virt,
                                                     coherent);
                                if (err)
                                        return err;
-                               pt = page_map_phys2hvirt(
+                               pt = paging_phys2hvirt(
                                                paging->get_next_pt(pte));
                        } else {
                                pt = page_alloc(&mem_pool, 1);
                                if (!pt)
                                        return -ENOMEM;
                                paging->set_next_pt(pte,
-                                                   page_map_hvirt2phys(pt));
+                                                   paging_hvirt2phys(pt));
                                flush_pt_entry(pte, coherent);
                        }
                        paging++;
                }
-               arch_tlb_flush_page(virt);
+               if (pg_structs == &hv_paging_structs)
+                       arch_paging_flush_page_tlbs(virt);
 
                phys += paging->page_size;
                virt += paging->page_size;
@@ -220,15 +336,31 @@ int page_map_create(const struct paging_structures *pg_structs,
        return 0;
 }
 
-int page_map_destroy(const struct paging_structures *pg_structs,
-                    unsigned long virt, unsigned long size,
-                    enum page_map_coherent coherent)
+/**
+ * Destroy a page map.
+ * @param pg_structs   Descriptor of paging structures to be used.
+ * @param virt         Virtual address the region to be unmapped.
+ * @param size         Size of the region.
+ * @param coherent     Coherency of mapping.
+ *
+ * @return 0 on success, negative error code otherwise.
+ *
+ * @note If required, this function tries to break up hugepages if they should
+ * be unmapped only partially. This may require allocating additional pages for
+ * the paging structures, thus can fail. Unmap request that covers only full
+ * pages never fail.
+ *
+ * @see paging_create
+ */
+int paging_destroy(const struct paging_structures *pg_structs,
+                  unsigned long virt, unsigned long size,
+                  enum paging_coherent coherent)
 {
        size = PAGE_ALIGN(size);
 
        while (size > 0) {
                const struct paging *paging = pg_structs->root_paging;
-               page_table_t pt[MAX_PAGE_DIR_LEVELS];
+               page_table_t pt[MAX_PAGE_TABLE_LEVELS];
                unsigned long page_size;
                pt_entry_t pte;
                int n = 0;
@@ -238,7 +370,7 @@ int page_map_destroy(const struct paging_structures *pg_structs,
                pt[0] = pg_structs->root_table;
                while (1) {
                        pte = paging->get_entry(pt[n], virt);
-                       if (!paging->entry_valid(pte))
+                       if (!paging->entry_valid(pte, PAGE_PRESENT_FLAGS))
                                break;
                        if (paging->get_phys(pte, virt) != INVALID_PHYS_ADDR) {
                                if (paging->page_size > size) {
@@ -249,8 +381,7 @@ int page_map_destroy(const struct paging_structures *pg_structs,
                                } else
                                        break;
                        }
-                       pt[++n] = page_map_phys2hvirt(
-                                       paging->get_next_pt(pte));
+                       pt[++n] = paging_phys2hvirt(paging->get_next_pt(pte));
                        paging++;
                }
                /* advance by page size of current level paging */
@@ -266,7 +397,8 @@ int page_map_destroy(const struct paging_structures *pg_structs,
                        paging--;
                        pte = paging->get_entry(pt[--n], virt);
                }
-               arch_tlb_flush_page(virt);
+               if (pg_structs == &hv_paging_structs)
+                       arch_paging_flush_page_tlbs(virt);
 
                if (page_size > size)
                        break;
@@ -276,73 +408,117 @@ int page_map_destroy(const struct paging_structures *pg_structs,
        return 0;
 }
 
-void *page_map_get_guest_page(struct per_cpu *cpu_data,
-                             const struct guest_paging_structures *pg_structs,
-                             unsigned long virt, unsigned long flags)
+static unsigned long
+paging_gvirt2gphys(const struct guest_paging_structures *pg_structs,
+                  unsigned long gvirt, unsigned long tmp_page,
+                  unsigned long flags)
 {
        unsigned long page_table_gphys = pg_structs->root_table_gphys;
        const struct paging *paging = pg_structs->root_paging;
-       unsigned long page_virt, phys, gphys;
+       unsigned long gphys, phys;
        pt_entry_t pte;
        int err;
 
-       page_virt = TEMPORARY_MAPPING_BASE +
-               cpu_data->cpu_id * PAGE_SIZE * NUM_TEMPORARY_PAGES;
-
        while (1) {
                /* map guest page table */
-               phys = arch_page_map_gphys2phys(cpu_data, page_table_gphys);
+               phys = arch_paging_gphys2phys(this_cpu_data(),
+                                               page_table_gphys,
+                                               PAGE_READONLY_FLAGS);
                if (phys == INVALID_PHYS_ADDR)
-                       return NULL;
-               err = page_map_create(&hv_paging_structs, phys,
-                                     PAGE_SIZE, page_virt,
-                                     PAGE_READONLY_FLAGS,
-                                     PAGE_MAP_NON_COHERENT);
+                       return INVALID_PHYS_ADDR;
+               err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
+                                   tmp_page, PAGE_READONLY_FLAGS,
+                                   PAGING_NON_COHERENT);
                if (err)
-                       return NULL;
+                       return INVALID_PHYS_ADDR;
 
                /* evaluate page table entry */
-               pte = paging->get_entry((page_table_t)page_virt, virt);
-               if (!paging->entry_valid(pte))
-                       return NULL;
-               gphys = paging->get_phys(pte, virt);
+               pte = paging->get_entry((page_table_t)tmp_page, gvirt);
+               if (!paging->entry_valid(pte, flags))
+                       return INVALID_PHYS_ADDR;
+               gphys = paging->get_phys(pte, gvirt);
                if (gphys != INVALID_PHYS_ADDR)
-                       break;
+                       return gphys;
                page_table_gphys = paging->get_next_pt(pte);
                paging++;
        }
+}
 
-       phys = arch_page_map_gphys2phys(cpu_data, gphys);
-       if (phys == INVALID_PHYS_ADDR)
-               return NULL;
-       /* map guest page */
-       err = page_map_create(&hv_paging_structs, phys, PAGE_SIZE, page_virt,
-                             flags, PAGE_MAP_NON_COHERENT);
-       if (err)
+/**
+ * Map guest (cell) pages into the hypervisor address space.
+ * @param pg_structs   Descriptor of the guest paging structures if @c gaddr
+ *                     is a guest-virtual address or @c NULL if it is a
+ *                     guest-physical address.
+ * @param gaddr                Guest address of the first page to be mapped.
+ * @param num          Number of pages to be mapped.
+ * @param flags                Access flags for the hypervisor mapping, see
+ *                     @ref PAGE_FLAGS.
+ *
+ * @return Pointer to first mapped page or @c NULL on error.
+ *
+ * @note The mapping is done only for the calling CPU and must thus only be
+ * used by the very same CPU.
+ *
+ * @note The mapping is only temporary, valid until the next invocation of
+ * page_map_get_guest_pages() on this CPU. It does not require explicit
+ * unmapping when it is no longer needed.
+ */
+void *paging_get_guest_pages(const struct guest_paging_structures *pg_structs,
+                            unsigned long gaddr, unsigned int num,
+                            unsigned long flags)
+{
+       unsigned long page_base = TEMPORARY_MAPPING_BASE +
+               this_cpu_id() * PAGE_SIZE * NUM_TEMPORARY_PAGES;
+       unsigned long phys, gphys, page_virt = page_base;
+       int err;
+
+       if (num > NUM_TEMPORARY_PAGES)
                return NULL;
+       while (num-- > 0) {
+               if (pg_structs)
+                       gphys = paging_gvirt2gphys(pg_structs, gaddr,
+                                                  page_virt, flags);
+               else
+                       gphys = gaddr;
 
-       return (void *)page_virt;
+               phys = arch_paging_gphys2phys(this_cpu_data(), gphys, flags);
+               if (phys == INVALID_PHYS_ADDR)
+                       return NULL;
+               /* map guest page */
+               err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
+                                   page_virt, flags, PAGING_NON_COHERENT);
+               if (err)
+                       return NULL;
+               gaddr += PAGE_SIZE;
+               page_virt += PAGE_SIZE;
+       }
+       return (void *)page_base;
 }
 
+/**
+ * Initialize the page mapping subsystem.
+ *
+ * @return 0 on success, negative error code otherwise.
+ */
 int paging_init(void)
 {
-       unsigned long per_cpu_pages, config_pages, bitmap_pages;
-       unsigned long n;
+       unsigned long n, per_cpu_pages, config_pages, bitmap_pages, vaddr;
        int err;
 
-       mem_pool.pages = (hypervisor_header.size -
-               (__page_pool - (u8 *)&hypervisor_header)) / PAGE_SIZE;
-       per_cpu_pages = hypervisor_header.possible_cpus *
+       per_cpu_pages = hypervisor_header.max_cpus *
                sizeof(struct per_cpu) / PAGE_SIZE;
-       bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
 
-       system_config = (struct jailhouse_system *)
-               (__page_pool + per_cpu_pages * PAGE_SIZE);
-       config_pages = (jailhouse_system_config_size(system_config) +
-                       PAGE_SIZE - 1) / PAGE_SIZE;
+       config_pages = PAGES(jailhouse_system_config_size(system_config));
+
+       page_offset = JAILHOUSE_BASE -
+               system_config->hypervisor_memory.phys_start;
+
+       mem_pool.pages = (system_config->hypervisor_memory.size -
+               (__page_pool - (u8 *)&hypervisor_header)) / PAGE_SIZE;
+       bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
 
        if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
-               goto error_nomem;
+               return -ENOMEM;
 
        mem_pool.base_address = __page_pool;
        mem_pool.used_bitmap =
@@ -355,7 +531,7 @@ int paging_init(void)
 
        remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
        remap_pool.used_pages =
-               hypervisor_header.possible_cpus * NUM_TEMPORARY_PAGES;
+               hypervisor_header.max_cpus * NUM_TEMPORARY_PAGES;
        for (n = 0; n < remap_pool.used_pages; n++)
                set_bit(n, remap_pool.used_bitmap);
 
@@ -364,34 +540,46 @@ int paging_init(void)
        hv_paging_structs.root_paging = hv_paging;
        hv_paging_structs.root_table = page_alloc(&mem_pool, 1);
        if (!hv_paging_structs.root_table)
-               goto error_nomem;
+               return -ENOMEM;
 
        /* Replicate hypervisor mapping of Linux */
-       err = page_map_create(&hv_paging_structs,
-                             page_map_hvirt2phys(&hypervisor_header),
-                             hypervisor_header.size,
-                             (unsigned long)&hypervisor_header,
-                             PAGE_DEFAULT_FLAGS, PAGE_MAP_NON_COHERENT);
+       err = paging_create(&hv_paging_structs,
+                            paging_hvirt2phys(&hypervisor_header),
+                            system_config->hypervisor_memory.size,
+                            (unsigned long)&hypervisor_header,
+                            PAGE_DEFAULT_FLAGS, PAGING_NON_COHERENT);
        if (err)
-               goto error_nomem;
+               return err;
+
+       if (system_config->debug_console.flags & JAILHOUSE_MEM_IO) {
+               vaddr = (unsigned long)hypervisor_header.debug_console_base;
+               /* check if console overlaps remapping region */
+               if (vaddr + system_config->debug_console.size >= REMAP_BASE &&
+                   vaddr < REMAP_BASE + remap_pool.pages * PAGE_SIZE)
+                       return trace_error(-EINVAL);
+
+               err = paging_create(&hv_paging_structs,
+                                   system_config->debug_console.phys_start,
+                                   system_config->debug_console.size, vaddr,
+                                   PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
+                                   PAGING_NON_COHERENT);
+               if (err)
+                       return err;
+       }
 
        /* Make sure any remappings to the temporary regions can be performed
         * without allocations of page table pages. */
-       err = page_map_create(&hv_paging_structs, 0,
-                             remap_pool.used_pages * PAGE_SIZE,
-                             TEMPORARY_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
-                             PAGE_MAP_NON_COHERENT);
-       if (err)
-               goto error_nomem;
-
-       return 0;
-
-error_nomem:
-       printk("FATAL: page pool much too small\n");
-       return -ENOMEM;
+       return paging_create(&hv_paging_structs, 0,
+                            remap_pool.used_pages * PAGE_SIZE,
+                            TEMPORARY_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
+                            PAGING_NON_COHERENT);
 }
 
-void page_map_dump_stats(const char *when)
+/**
+ * Dump usage statistic of the page pools.
+ * @param when String that characterizes the associated event.
+ */
+void paging_dump_stats(const char *when)
 {
        printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
               mem_pool.used_pages, mem_pool.pages,