2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2013, 2014
7 * Jan Kiszka <jan.kiszka@siemens.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
19 #define BITS_PER_PAGE (PAGE_SIZE * 8)
21 #define INVALID_PAGE_NR (~0UL)
23 #define PAGE_SCRUB_ON_FREE 0x1
25 extern u8 __page_pool[];
28 * Offset between virtual and physical hypervisor addresses.
30 * @note Private, use page_map_hvirt2phys() or page_map_phys2hvirt() instead.
32 unsigned long page_offset;
34 /** Page pool containing physical pages for use by the hypervisor. */
35 struct page_pool mem_pool;
36 /** Page pool containing virtual pages for remappings by the hypervisor. */
37 struct page_pool remap_pool = {
38 .base_address = (void *)REMAP_BASE,
39 .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
42 /** Descriptor of the hypervisor paging structures. */
43 struct paging_structures hv_paging_structs;
46 * Trivial implementation of paging::get_phys (for non-terminal levels)
47 * @param pte See paging::get_phys.
48 * @param virt See paging::get_phys.
50 * @return @c INVALID_PHYS_ADDR.
54 unsigned long paging_get_phys_invalid(pt_entry_t pte, unsigned long virt)
56 return INVALID_PHYS_ADDR;
59 static unsigned long find_next_free_page(struct page_pool *pool,
62 unsigned long bmp_pos, bmp_val, page_nr;
63 unsigned long start_mask = 0;
65 if (start >= pool->pages)
66 return INVALID_PAGE_NR;
69 * If we don't start on the beginning of a bitmap word, create a mask
70 * to mark the pages before the start page as (virtually) used.
72 if (start % BITS_PER_LONG > 0)
73 start_mask = ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
75 for (bmp_pos = start / BITS_PER_LONG;
76 bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
77 bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
79 if (bmp_val != ~0UL) {
80 page_nr = ffzl(bmp_val) + bmp_pos * BITS_PER_LONG;
81 if (page_nr >= pool->pages)
87 return INVALID_PAGE_NR;
91 * Allocate consecutive pages from the specified pool.
92 * @param pool Page pool to allocate from.
93 * @param num Number of pages.
94 * @param align_mask Choose start so that start_page_no & align_mask == 0.
96 * @return Pointer to first page or NULL if allocation failed.
100 static void *page_alloc_internal(struct page_pool *pool, unsigned int num,
101 unsigned long align_mask)
103 /* The pool itself might not be aligned as required. */
104 unsigned long aligned_start =
105 ((unsigned long)pool->base_address >> PAGE_SHIFT) & align_mask;
106 unsigned long next = aligned_start;
107 unsigned long start, last;
108 unsigned int allocated;
111 /* Forward the search start to the next aligned page. */
112 if ((next - aligned_start) & align_mask)
113 next += num - ((next - aligned_start) & align_mask);
115 start = next = find_next_free_page(pool, next);
116 if (start == INVALID_PAGE_NR || num == 0)
119 /* Enforce alignment (none of align_mask is 0). */
120 if ((start - aligned_start) & align_mask)
123 for (allocated = 1, last = start; allocated < num;
124 allocated++, last = next) {
125 next = find_next_free_page(pool, last + 1);
126 if (next == INVALID_PAGE_NR)
128 if (next != last + 1)
129 goto restart; /* not consecutive */
132 for (allocated = 0; allocated < num; allocated++)
133 set_bit(start + allocated, pool->used_bitmap);
135 pool->used_pages += num;
137 return pool->base_address + start * PAGE_SIZE;
141 * Allocate consecutive pages from the specified pool.
142 * @param pool Page pool to allocate from.
143 * @param num Number of pages.
145 * @return Pointer to first page or NULL if allocation failed.
149 void *page_alloc(struct page_pool *pool, unsigned int num)
151 return page_alloc_internal(pool, num, 0);
155 * Allocate aligned consecutive pages from the specified pool.
156 * @param pool Page pool to allocate from.
157 * @param num Number of pages. Num needs to be a power of 2.
159 * @return Pointer to first page or NULL if allocation failed.
163 void *page_alloc_aligned(struct page_pool *pool, unsigned int num)
165 return page_alloc_internal(pool, num, num - 1);
169 * Release pages to the specified pool.
170 * @param pool Page pool to release to.
171 * @param page Address of first page.
172 * @param num Number of pages.
176 void page_free(struct page_pool *pool, void *page, unsigned int num)
178 unsigned long page_nr;
184 if (pool->flags & PAGE_SCRUB_ON_FREE)
185 memset(page, 0, PAGE_SIZE);
186 page_nr = (page - pool->base_address) / PAGE_SIZE;
187 clear_bit(page_nr, pool->used_bitmap);
194 * Translate virtual to physical address according to given paging structures.
195 * @param pg_structs Paging structures to use for translation.
196 * @param virt Virtual address.
197 * @param flags Access flags that have to be supported by the mapping,
198 * see @ref PAGE_FLAGS.
200 * @return Physical address on success or @c INVALID_PHYS_ADDR if the virtual
201 * address could not be translated or the requested access is not
202 * supported by the mapping.
204 * @see paging_phys2hvirt
205 * @see paging_hvirt2phys
206 * @see arch_paging_gphys2phys
208 unsigned long paging_virt2phys(const struct paging_structures *pg_structs,
209 unsigned long virt, unsigned long flags)
211 const struct paging *paging = pg_structs->root_paging;
212 page_table_t pt = pg_structs->root_table;
217 pte = paging->get_entry(pt, virt);
218 if (!paging->entry_valid(pte, flags))
219 return INVALID_PHYS_ADDR;
220 phys = paging->get_phys(pte, virt);
221 if (phys != INVALID_PHYS_ADDR)
223 pt = paging_phys2hvirt(paging->get_next_pt(pte));
228 static void flush_pt_entry(pt_entry_t pte, enum paging_coherent coherent)
230 if (coherent == PAGING_COHERENT)
231 arch_paging_flush_cpu_caches(pte, sizeof(*pte));
234 static int split_hugepage(const struct paging *paging, pt_entry_t pte,
235 unsigned long virt, enum paging_coherent coherent)
237 unsigned long phys = paging->get_phys(pte, virt);
238 struct paging_structures sub_structs;
239 unsigned long page_mask, flags;
241 if (phys == INVALID_PHYS_ADDR)
244 page_mask = ~(paging->page_size - 1);
248 flags = paging->get_flags(pte);
250 sub_structs.root_paging = paging + 1;
251 sub_structs.root_table = page_alloc(&mem_pool, 1);
252 if (!sub_structs.root_table)
254 paging->set_next_pt(pte, paging_hvirt2phys(sub_structs.root_table));
255 flush_pt_entry(pte, coherent);
257 return paging_create(&sub_structs, phys, paging->page_size, virt,
262 * Create or modify a page map.
263 * @param pg_structs Descriptor of paging structures to be used.
264 * @param phys Physical address of the region to be mapped.
265 * @param size Size of the region.
266 * @param virt Virtual address the region should be mapped to.
267 * @param flags Flags describing the permitted access, see
269 * @param coherent Coherency of mapping.
271 * @return 0 on success, negative error code otherwise.
273 * @note The function aims at using the largest possible page size for the
274 * mapping but does not consolidate with neighboring mappings.
276 * @see paging_destroy
277 * @see paging_get_guest_pages
279 int paging_create(const struct paging_structures *pg_structs,
280 unsigned long phys, unsigned long size, unsigned long virt,
281 unsigned long flags, enum paging_coherent coherent)
285 size = PAGE_ALIGN(size);
288 const struct paging *paging = pg_structs->root_paging;
289 page_table_t pt = pg_structs->root_table;
294 pte = paging->get_entry(pt, virt);
295 if (paging->page_size > 0 &&
296 paging->page_size <= size &&
297 ((phys | virt) & (paging->page_size - 1)) == 0) {
299 * We might be overwriting a more fine-grained
300 * mapping, so release it first. This cannot
301 * fail as we are working along hugepage
304 if (paging->page_size > PAGE_SIZE)
305 paging_destroy(pg_structs, virt,
308 paging->set_terminal(pte, phys, flags);
309 flush_pt_entry(pte, coherent);
312 if (paging->entry_valid(pte, PAGE_PRESENT_FLAGS)) {
313 err = split_hugepage(paging, pte, virt,
317 pt = paging_phys2hvirt(
318 paging->get_next_pt(pte));
320 pt = page_alloc(&mem_pool, 1);
323 paging->set_next_pt(pte,
324 paging_hvirt2phys(pt));
325 flush_pt_entry(pte, coherent);
329 if (pg_structs == &hv_paging_structs)
330 arch_paging_flush_page_tlbs(virt);
332 phys += paging->page_size;
333 virt += paging->page_size;
334 size -= paging->page_size;
340 * Destroy a page map.
341 * @param pg_structs Descriptor of paging structures to be used.
342 * @param virt Virtual address the region to be unmapped.
343 * @param size Size of the region.
344 * @param coherent Coherency of mapping.
346 * @return 0 on success, negative error code otherwise.
348 * @note If required, this function tries to break up hugepages if they should
349 * be unmapped only partially. This may require allocating additional pages for
350 * the paging structures, thus can fail. Unmap request that covers only full
355 int paging_destroy(const struct paging_structures *pg_structs,
356 unsigned long virt, unsigned long size,
357 enum paging_coherent coherent)
359 size = PAGE_ALIGN(size);
362 const struct paging *paging = pg_structs->root_paging;
363 page_table_t pt[MAX_PAGE_TABLE_LEVELS];
364 unsigned long page_size;
369 /* walk down the page table, saving intermediate tables */
370 pt[0] = pg_structs->root_table;
372 pte = paging->get_entry(pt[n], virt);
373 if (!paging->entry_valid(pte, PAGE_PRESENT_FLAGS))
375 if (paging->get_phys(pte, virt) != INVALID_PHYS_ADDR) {
376 if (paging->page_size > size) {
377 err = split_hugepage(paging, pte, virt,
384 pt[++n] = paging_phys2hvirt(paging->get_next_pt(pte));
387 /* advance by page size of current level paging */
388 page_size = paging->page_size ? paging->page_size : PAGE_SIZE;
390 /* walk up again, clearing entries, releasing empty tables */
392 paging->clear_entry(pte);
393 flush_pt_entry(pte, coherent);
394 if (n == 0 || !paging->page_table_empty(pt[n]))
396 page_free(&mem_pool, pt[n], 1);
398 pte = paging->get_entry(pt[--n], virt);
400 if (pg_structs == &hv_paging_structs)
401 arch_paging_flush_page_tlbs(virt);
403 if (page_size > size)
412 paging_gvirt2gphys(const struct guest_paging_structures *pg_structs,
413 unsigned long gvirt, unsigned long tmp_page,
416 unsigned long page_table_gphys = pg_structs->root_table_gphys;
417 const struct paging *paging = pg_structs->root_paging;
418 unsigned long gphys, phys;
423 /* map guest page table */
424 phys = arch_paging_gphys2phys(this_cpu_data(),
426 PAGE_READONLY_FLAGS);
427 if (phys == INVALID_PHYS_ADDR)
428 return INVALID_PHYS_ADDR;
429 err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
430 tmp_page, PAGE_READONLY_FLAGS,
431 PAGING_NON_COHERENT);
433 return INVALID_PHYS_ADDR;
435 /* evaluate page table entry */
436 pte = paging->get_entry((page_table_t)tmp_page, gvirt);
437 if (!paging->entry_valid(pte, flags))
438 return INVALID_PHYS_ADDR;
439 gphys = paging->get_phys(pte, gvirt);
440 if (gphys != INVALID_PHYS_ADDR)
442 page_table_gphys = paging->get_next_pt(pte);
448 * Map guest (cell) pages into the hypervisor address space.
449 * @param pg_structs Descriptor of the guest paging structures if @c gaddr
450 * is a guest-virtual address or @c NULL if it is a
451 * guest-physical address.
452 * @param gaddr Guest address of the first page to be mapped.
453 * @param num Number of pages to be mapped.
454 * @param flags Access flags for the hypervisor mapping, see
457 * @return Pointer to first mapped page or @c NULL on error.
459 * @note The mapping is done only for the calling CPU and must thus only be
460 * used by the very same CPU.
462 * @note The mapping is only temporary, valid until the next invocation of
463 * page_map_get_guest_pages() on this CPU. It does not require explicit
464 * unmapping when it is no longer needed.
466 void *paging_get_guest_pages(const struct guest_paging_structures *pg_structs,
467 unsigned long gaddr, unsigned int num,
470 unsigned long page_base = TEMPORARY_MAPPING_BASE +
471 this_cpu_id() * PAGE_SIZE * NUM_TEMPORARY_PAGES;
472 unsigned long phys, gphys, page_virt = page_base;
475 if (num > NUM_TEMPORARY_PAGES)
479 gphys = paging_gvirt2gphys(pg_structs, gaddr,
484 phys = arch_paging_gphys2phys(this_cpu_data(), gphys, flags);
485 if (phys == INVALID_PHYS_ADDR)
488 err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
489 page_virt, flags, PAGING_NON_COHERENT);
493 page_virt += PAGE_SIZE;
495 return (void *)page_base;
499 * Initialize the page mapping subsystem.
501 * @return 0 on success, negative error code otherwise.
503 int paging_init(void)
505 unsigned long n, per_cpu_pages, config_pages, bitmap_pages, vaddr;
508 per_cpu_pages = hypervisor_header.max_cpus *
509 sizeof(struct per_cpu) / PAGE_SIZE;
511 config_pages = PAGES(jailhouse_system_config_size(system_config));
513 page_offset = JAILHOUSE_BASE -
514 system_config->hypervisor_memory.phys_start;
516 mem_pool.pages = (system_config->hypervisor_memory.size -
517 (__page_pool - (u8 *)&hypervisor_header)) / PAGE_SIZE;
518 bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
520 if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
523 mem_pool.base_address = __page_pool;
524 mem_pool.used_bitmap =
525 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
526 config_pages * PAGE_SIZE);
527 mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
528 for (n = 0; n < mem_pool.used_pages; n++)
529 set_bit(n, mem_pool.used_bitmap);
530 mem_pool.flags = PAGE_SCRUB_ON_FREE;
532 remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
533 remap_pool.used_pages =
534 hypervisor_header.max_cpus * NUM_TEMPORARY_PAGES;
535 for (n = 0; n < remap_pool.used_pages; n++)
536 set_bit(n, remap_pool.used_bitmap);
540 hv_paging_structs.root_paging = hv_paging;
541 hv_paging_structs.root_table = page_alloc(&mem_pool, 1);
542 if (!hv_paging_structs.root_table)
545 /* Replicate hypervisor mapping of Linux */
546 err = paging_create(&hv_paging_structs,
547 paging_hvirt2phys(&hypervisor_header),
548 system_config->hypervisor_memory.size,
549 (unsigned long)&hypervisor_header,
550 PAGE_DEFAULT_FLAGS, PAGING_NON_COHERENT);
554 if (system_config->debug_console.flags & JAILHOUSE_MEM_IO) {
555 vaddr = (unsigned long)hypervisor_header.debug_console_base;
556 /* check if console overlaps remapping region */
557 if (vaddr + system_config->debug_console.size >= REMAP_BASE &&
558 vaddr < REMAP_BASE + remap_pool.pages * PAGE_SIZE)
559 return trace_error(-EINVAL);
561 err = paging_create(&hv_paging_structs,
562 system_config->debug_console.phys_start,
563 system_config->debug_console.size, vaddr,
564 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
565 PAGING_NON_COHERENT);
570 /* Make sure any remappings to the temporary regions can be performed
571 * without allocations of page table pages. */
572 return paging_create(&hv_paging_structs, 0,
573 remap_pool.used_pages * PAGE_SIZE,
574 TEMPORARY_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
575 PAGING_NON_COHERENT);
579 * Dump usage statistic of the page pools.
580 * @param when String that characterizes the associated event.
582 void paging_dump_stats(const char *when)
584 printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
585 mem_pool.used_pages, mem_pool.pages,
586 remap_pool.used_pages, remap_pool.pages);