2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2013
7 * Jan Kiszka <jan.kiszka@siemens.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
19 #define BITS_PER_PAGE (PAGE_SIZE * 8)
21 #define INVALID_PAGE_NR (~0UL)
23 #define PAGE_SCRUB_ON_FREE 0x1
25 extern u8 __start[], __page_pool[];
27 struct page_pool mem_pool;
28 struct page_pool remap_pool = {
29 .base_address = (void *)REMAP_BASE_ADDR,
30 .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
35 static unsigned long find_next_free_page(struct page_pool *pool,
38 unsigned long start_mask =
39 ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
40 unsigned long bmp_pos, bmp_val, page_nr;
42 if (start >= pool->pages)
43 return INVALID_PAGE_NR;
45 for (bmp_pos = start / BITS_PER_LONG;
46 bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
47 bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
49 if (bmp_val != ~0UL) {
50 page_nr = ffz(bmp_val) + bmp_pos * BITS_PER_LONG;
51 if (page_nr >= pool->pages)
57 return INVALID_PAGE_NR;
60 void *page_alloc(struct page_pool *pool, unsigned int num)
62 unsigned long start, last, next;
63 unsigned int allocated;
65 start = find_next_free_page(pool, 0);
66 if (start == INVALID_PAGE_NR)
70 for (allocated = 1, last = start; allocated < num;
71 allocated++, last = next) {
72 next = find_next_free_page(pool, last + 1);
73 if (next == INVALID_PAGE_NR)
75 if (next != last + 1) {
81 for (allocated = 0; allocated < num; allocated++)
82 set_bit(start + allocated, pool->used_bitmap);
84 pool->used_pages += num;
86 return pool->base_address + start * PAGE_SIZE;
89 void page_free(struct page_pool *pool, void *page, unsigned int num)
91 unsigned long page_nr;
97 if (pool->flags & PAGE_SCRUB_ON_FREE)
98 memset(page, 0, PAGE_SIZE);
99 page_nr = (page - pool->base_address) / PAGE_SIZE;
100 clear_bit(page_nr, pool->used_bitmap);
106 unsigned long page_map_virt2phys(pgd_t *page_table, unsigned long virt,
109 unsigned long offs = hypervisor_header.page_offset;
117 pgd = pgd_offset(page_table, virt);
119 return INVALID_PHYS_ADDR;
121 pud = pud4l_offset(pgd, offs, virt);
124 pud = pud3l_offset(page_table, virt);
127 return INVALID_PHYS_ADDR;
130 return INVALID_PHYS_ADDR;
132 pmd = pmd_offset(pud, offs, virt);
134 return INVALID_PHYS_ADDR;
136 if (pmd_is_hugepage(pmd))
137 return phys_address_hugepage(pmd, virt);
139 pte = pte_offset(pmd, offs, virt);
141 return INVALID_PHYS_ADDR;
143 return phys_address(pte, virt);
146 static void flush_page_table(void *addr, unsigned long size,
147 enum page_map_coherent coherent)
149 if (coherent == PAGE_MAP_COHERENT)
150 flush_cache(addr, size);
153 int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
154 unsigned long virt, unsigned long flags,
155 unsigned long table_flags, unsigned int levels,
156 enum page_map_coherent coherent)
158 unsigned long offs = hypervisor_header.page_offset;
164 for (size = PAGE_ALIGN(size); size > 0;
165 phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
168 pgd = pgd_offset(page_table, virt);
169 if (!pgd_valid(pgd)) {
170 pud = page_alloc(&mem_pool, 1);
173 set_pgd(pgd, page_map_hvirt2phys(pud),
175 flush_page_table(pgd, sizeof(pgd), coherent);
177 pud = pud4l_offset(pgd, offs, virt);
180 pud = pud3l_offset(page_table, virt);
186 if (!pud_valid(pud)) {
187 pmd = page_alloc(&mem_pool, 1);
190 set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
191 flush_page_table(pud, sizeof(pud), coherent);
194 pmd = pmd_offset(pud, offs, virt);
195 if (!pmd_valid(pmd)) {
196 pte = page_alloc(&mem_pool, 1);
199 set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
200 flush_page_table(pmd, sizeof(pmd), coherent);
203 pte = pte_offset(pmd, offs, virt);
204 set_pte(pte, phys, flags);
205 flush_page_table(pte, sizeof(pte), coherent);
213 void page_map_destroy(pgd_t *page_table, unsigned long virt,
214 unsigned long size, unsigned int levels,
215 enum page_map_coherent coherent)
217 unsigned long offs = hypervisor_header.page_offset;
223 for (size = PAGE_ALIGN(size); size > 0;
224 virt += PAGE_SIZE, size -= PAGE_SIZE) {
227 pgd = pgd_offset(page_table, virt);
231 pud = pud4l_offset(pgd, offs, virt);
234 pgd = 0; /* silence compiler warning */
235 pud = pud3l_offset(page_table, virt);
243 pmd = pmd_offset(pud, offs, virt);
247 pte = pte_offset(pmd, offs, virt);
249 flush_page_table(pte, sizeof(pte), coherent);
251 if (!pt_empty(pmd, offs))
253 page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
255 flush_page_table(pmd, sizeof(pmd), coherent);
257 if (!pmd_empty(pud, offs))
259 page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
261 flush_page_table(pud, sizeof(pud), coherent);
263 if (levels < 4 || !pud_empty(pgd, offs))
265 page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
267 flush_page_table(pgd, sizeof(pgd), coherent);
273 void *page_map_get_foreign_page(struct per_cpu *cpu_data,
274 unsigned long page_table_paddr,
275 unsigned long virt, unsigned long flags)
277 unsigned long page_virt, phys;
278 #if PAGE_DIR_LEVELS == 4
286 page_virt = FOREIGN_MAPPING_BASE +
287 cpu_data->cpu_id * PAGE_SIZE * NUM_FOREIGN_PAGES;
289 phys = arch_page_map_gphys2phys(cpu_data, page_table_paddr);
290 if (phys == INVALID_PHYS_ADDR)
292 err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
293 PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
294 PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
298 #if PAGE_DIR_LEVELS == 4
299 pgd = pgd_offset((pgd_t *)page_virt, virt);
302 phys = arch_page_map_gphys2phys(cpu_data,
303 (unsigned long)pud4l_offset(pgd, 0, 0));
304 if (phys == INVALID_PHYS_ADDR)
306 err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
307 PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
308 PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
312 pud = pud4l_offset((pgd_t *)&page_virt, 0, virt);
313 #elif PAGE_DIR_LEVELS == 3
314 pud = pud3l_offset((pgd_t *)page_virt, virt);
316 # error Unsupported paging level
320 phys = arch_page_map_gphys2phys(cpu_data,
321 (unsigned long)pmd_offset(pud, 0, 0));
322 if (phys == INVALID_PHYS_ADDR)
324 err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
325 PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
326 PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
330 pmd = pmd_offset((pud_t *)&page_virt, 0, virt);
333 if (pmd_is_hugepage(pmd))
334 phys = phys_address_hugepage(pmd, virt);
336 phys = arch_page_map_gphys2phys(cpu_data,
337 (unsigned long)pte_offset(pmd, 0, 0));
338 if (phys == INVALID_PHYS_ADDR)
340 err = page_map_create(hv_page_table, phys, PAGE_SIZE,
341 page_virt, PAGE_READONLY_FLAGS,
342 PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
343 PAGE_MAP_NON_COHERENT);
347 pte = pte_offset((pmd_t *)&page_virt, 0, virt);
350 phys = phys_address(pte, 0);
352 phys = arch_page_map_gphys2phys(cpu_data, phys);
353 if (phys == INVALID_PHYS_ADDR)
356 err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
357 flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
358 PAGE_MAP_NON_COHERENT);
362 return (void *)page_virt;
365 int paging_init(void)
367 unsigned long per_cpu_pages, config_pages, bitmap_pages;
372 (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
373 per_cpu_pages = hypervisor_header.possible_cpus *
374 sizeof(struct per_cpu) / PAGE_SIZE;
375 bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
377 system_config = (struct jailhouse_system *)
378 (__page_pool + per_cpu_pages * PAGE_SIZE);
379 config_pages = (jailhouse_system_config_size(system_config) +
380 PAGE_SIZE - 1) / PAGE_SIZE;
382 if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
385 mem_pool.base_address = __page_pool;
386 mem_pool.used_bitmap =
387 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
388 config_pages * PAGE_SIZE);
389 mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
390 for (n = 0; n < mem_pool.used_pages; n++)
391 set_bit(n, mem_pool.used_bitmap);
392 mem_pool.flags = PAGE_SCRUB_ON_FREE;
394 remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
395 remap_pool.used_pages =
396 hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
397 for (n = 0; n < remap_pool.used_pages; n++)
398 set_bit(n, remap_pool.used_bitmap);
400 hv_page_table = page_alloc(&mem_pool, 1);
404 /* Replicate hypervisor mapping of Linux */
405 err = page_map_create(hv_page_table, page_map_hvirt2phys(__start),
406 hypervisor_header.size, (unsigned long)__start,
407 PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
408 PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
412 /* Make sure any remappings to the foreign regions can be performed
413 * without allocations of page table pages. */
414 err = page_map_create(hv_page_table, 0,
415 remap_pool.used_pages * PAGE_SIZE,
416 FOREIGN_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
417 PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
418 PAGE_MAP_NON_COHERENT);
425 printk("FATAL: page pool much too small\n");
429 void page_map_dump_stats(const char *when)
431 printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
432 mem_pool.used_pages, mem_pool.pages,
433 remap_pool.used_pages, remap_pool.pages);