]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/paging.c
core: Prepare foreign mapping range to support allocation-free remappings
[jailhouse.git] / hypervisor / paging.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
18
19 #define BITS_PER_PAGE           (PAGE_SIZE * 8)
20
21 #define PAGE_SCRUB_ON_FREE      0x1
22
23 extern u8 __start[], __page_pool[];
24
25 struct page_pool mem_pool;
26 struct page_pool remap_pool = {
27         .base_address = (void *)REMAP_BASE_ADDR,
28         .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
29 };
30
31 pgd_t *hv_page_table;
32
33 static void *page_alloc_one(struct page_pool *pool)
34 {
35         unsigned long word, page_nr;
36
37         for (word = 0; word < pool->pages / BITS_PER_LONG; word++)
38                 if (pool->used_bitmap[word] != ~0UL) {
39                         page_nr = ffz(pool->used_bitmap[word]) +
40                                 word * BITS_PER_LONG;
41                         if (page_nr >= pool->pages)
42                                 break;
43                         set_bit(page_nr, pool->used_bitmap);
44                         pool->used_pages++;
45                         return pool->base_address + page_nr * PAGE_SIZE;
46                 }
47
48         return NULL;
49 }
50
51 void *page_alloc(struct page_pool *pool, unsigned int num)
52 {
53         void *start, *last, *next;
54         unsigned int allocated;
55
56         start = page_alloc_one(pool);
57         if (!start)
58                 return NULL;
59
60         for (allocated = 1, last = start; allocated < num;
61              allocated++, last = next) {
62                 next = page_alloc_one(pool);
63                 if (next != last + PAGE_SIZE) {
64                         page_free(pool, start, allocated);
65                         return NULL;
66                 }
67         }
68
69         return start;
70 }
71
72 void page_free(struct page_pool *pool, void *page, unsigned int num)
73 {
74         unsigned long page_nr;
75
76         if (!page)
77                 return;
78
79         while (num-- > 0) {
80                 if (pool->flags & PAGE_SCRUB_ON_FREE)
81                         memset(page, 0, PAGE_SIZE);
82                 page_nr = (page - pool->base_address) / PAGE_SIZE;
83                 clear_bit(page_nr, pool->used_bitmap);
84                 pool->used_pages--;
85                 page += PAGE_SIZE;
86         }
87 }
88
89 unsigned long page_map_virt2phys(pgd_t *page_table,
90                                  unsigned long page_table_offset,
91                                  unsigned long virt)
92 {
93         pgd_t *pgd;
94         pud_t *pud;
95         pmd_t *pmd;
96         pte_t *pte;
97
98 #if PAGE_DIR_LEVELS == 4
99         pgd = pgd_offset(page_table, virt);
100         if (!pgd_valid(pgd))
101                 return INVALID_PHYS_ADDR;
102
103         pud = pud4l_offset(pgd, page_table_offset, virt);
104 #elif PAGE_DIR_LEVELS == 3
105         pud = pud3l_offset(pgd, page_table_offset, virt);
106 #else
107 # error Unsupported paging level
108 #endif
109         if (!pud_valid(pud))
110                 return INVALID_PHYS_ADDR;
111
112         pmd = pmd_offset(pud, page_table_offset, virt);
113         if (!pmd_valid(pud))
114                 return INVALID_PHYS_ADDR;
115
116         if (pmd_is_hugepage(pmd))
117                 return phys_address_hugepage(pmd, virt);
118
119         pte = pte_offset(pmd, page_table_offset, virt);
120         if (!pte_valid(pte))
121                 return INVALID_PHYS_ADDR;
122
123         return phys_address(pte, virt);
124 }
125
126 int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
127                     unsigned long virt, unsigned long flags,
128                     unsigned long table_flags, unsigned int levels)
129 {
130         unsigned long offs = hypervisor_header.page_offset;
131         pgd_t *pgd;
132         pud_t *pud;
133         pmd_t *pmd;
134         pte_t *pte;
135
136         for (size = PAGE_ALIGN(size); size > 0;
137              phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
138                 switch (levels) {
139                 case 4:
140                         pgd = pgd_offset(page_table, virt);
141                         if (!pgd_valid(pgd)) {
142                                 pud = page_alloc(&mem_pool, 1);
143                                 if (!pud)
144                                         return -ENOMEM;
145                                 set_pgd(pgd, page_map_hvirt2phys(pud),
146                                         table_flags);
147                         }
148                         pud = pud4l_offset(pgd, offs, virt);
149                         break;
150                 case 3:
151                         pud = pud3l_offset(page_table, virt);
152                         break;
153                 default:
154                         return -EINVAL;
155                 }
156
157                 if (!pud_valid(pud)) {
158                         pmd = page_alloc(&mem_pool, 1);
159                         if (!pmd)
160                                 return -ENOMEM;
161                         set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
162                 }
163
164                 pmd = pmd_offset(pud, offs, virt);
165                 if (!pmd_valid(pmd)) {
166                         pte = page_alloc(&mem_pool, 1);
167                         if (!pte)
168                                 return -ENOMEM;
169                         set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
170                 }
171
172                 pte = pte_offset(pmd, offs, virt);
173                 set_pte(pte, phys, flags);
174         }
175
176         flush_tlb();
177
178         return 0;
179 }
180
181 void page_map_destroy(pgd_t *page_table, unsigned long virt,
182                       unsigned long size, unsigned int levels)
183 {
184         unsigned long offs = hypervisor_header.page_offset;
185         pgd_t *pgd;
186         pud_t *pud;
187         pmd_t *pmd;
188         pte_t *pte;
189
190         for (size = PAGE_ALIGN(size); size > 0;
191              virt += PAGE_SIZE, size -= PAGE_SIZE) {
192                 switch (levels) {
193                 case 4:
194                         pgd = pgd_offset(page_table, virt);
195                         if (!pgd_valid(pgd))
196                                 continue;
197
198                         pud = pud4l_offset(pgd, offs, virt);
199                         break;
200                 case 3:
201                         pgd = 0; /* silence compiler warning */
202                         pud = pud3l_offset(page_table, virt);
203                         break;
204                 default:
205                         return;
206                 }
207                 if (!pud_valid(pud))
208                         continue;
209
210                 pmd = pmd_offset(pud, offs, virt);
211                 if (!pmd_valid(pmd))
212                         continue;
213
214                 pte = pte_offset(pmd, offs, virt);
215                 clear_pte(pte);
216
217                 if (!pt_empty(pmd, offs))
218                         continue;
219                 page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
220                 clear_pmd(pmd);
221
222                 if (!pmd_empty(pud, offs))
223                         continue;
224                 page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
225                 clear_pud(pud);
226
227                 if (levels < 4 || !pud_empty(pgd, offs))
228                         continue;
229                 page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
230                 clear_pgd(pgd);
231         }
232
233         flush_tlb();
234 }
235
236 void *page_map_get_foreign_page(unsigned int mapping_region,
237                                 unsigned long page_table_paddr,
238                                 unsigned long page_table_offset,
239                                 unsigned long virt, unsigned long flags)
240 {
241         unsigned long page_virt, phys;
242 #if PAGE_DIR_LEVELS == 4
243         pgd_t *pgd;
244 #endif
245         pud_t *pud;
246         pmd_t *pmd;
247         pte_t *pte;
248         int err;
249
250         page_virt = FOREIGN_MAPPING_BASE +
251                 mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES;
252
253         phys = page_table_paddr + page_table_offset;
254         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
255                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
256                               PAGE_DIR_LEVELS);
257         if (err)
258                 return NULL;
259
260 #if PAGE_DIR_LEVELS == 4
261         pgd = pgd_offset((pgd_t *)page_virt, virt);
262         if (!pgd_valid(pgd))
263                 return NULL;
264         phys = (unsigned long)pud4l_offset(pgd, page_table_offset, 0);
265         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
266                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
267                               PAGE_DIR_LEVELS);
268         if (err)
269                 return NULL;
270
271         pud = pud4l_offset((pgd_t *)&page_virt, page_table_offset, virt);
272 #elif PAGE_DIR_LEVELS == 3
273         pud = pud3l_offset((pgd_t *)page_virt, virt);
274 #else
275 # error Unsupported paging level
276 #endif
277         if (!pud_valid(pud))
278                 return NULL;
279         phys = (unsigned long)pmd_offset(pud, page_table_offset, 0);
280         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
281                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
282                               PAGE_DIR_LEVELS);
283         if (err)
284                 return NULL;
285
286         pmd = pmd_offset((pud_t *)&page_virt, page_table_offset, virt);
287         if (!pmd_valid(pmd))
288                 return NULL;
289         if (pmd_is_hugepage(pmd))
290                 phys = phys_address_hugepage(pmd, virt);
291         else {
292                 phys = (unsigned long)pte_offset(pmd, page_table_offset, 0);
293                 err = page_map_create(hv_page_table, phys, PAGE_SIZE,
294                                       page_virt, PAGE_READONLY_FLAGS,
295                                       PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
296                 if (err)
297                         return NULL;
298
299                 pte = pte_offset((pmd_t *)&page_virt, page_table_offset, virt);
300                 if (!pte_valid(pte))
301                         return NULL;
302                 phys = phys_address(pte, 0) + page_table_offset;
303         }
304
305         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
306                               flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
307         if (err)
308                 return NULL;
309
310         return (void *)page_virt;
311 }
312
313 int paging_init(void)
314 {
315         unsigned long per_cpu_pages, config_pages, bitmap_pages;
316         unsigned long n;
317         u8 *addr;
318         int err;
319
320         mem_pool.pages =
321                 (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
322         per_cpu_pages = hypervisor_header.possible_cpus *
323                 sizeof(struct per_cpu) / PAGE_SIZE;
324         bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
325
326         system_config = (struct jailhouse_system *)
327                 (__page_pool + per_cpu_pages * PAGE_SIZE);
328         config_pages = (jailhouse_system_config_size(system_config) +
329                         PAGE_SIZE - 1) / PAGE_SIZE;
330
331         if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
332                 goto error_nomem;
333
334         mem_pool.base_address = __page_pool;
335         mem_pool.used_bitmap =
336                 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
337                                   config_pages * PAGE_SIZE);
338         mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
339         for (n = 0; n < mem_pool.used_pages; n++)
340                 set_bit(n, mem_pool.used_bitmap);
341         mem_pool.flags = PAGE_SCRUB_ON_FREE;
342
343         remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
344         remap_pool.used_pages =
345                 hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
346         for (n = 0; n < remap_pool.used_pages; n++)
347                 set_bit(n, remap_pool.used_bitmap);
348
349         hv_page_table = page_alloc(&mem_pool, 1);
350         if (!hv_page_table)
351                 goto error_nomem;
352
353         /* Replicate hypervisor mapping of Linux */
354         for (addr = __start; addr < __start + hypervisor_header.size;
355              addr += PAGE_SIZE) {
356                 err = page_map_create(hv_page_table, page_map_hvirt2phys(addr),
357                                       PAGE_SIZE, (unsigned long)addr,
358                                       PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
359                                       PAGE_DIR_LEVELS);
360                 if (err)
361                         goto error_nomem;
362         }
363
364         /* Make sure any remappings to the foreign regions can be performed
365          * without allocations of page table pages. */
366         err = page_map_create(hv_page_table, 0,
367                               remap_pool.used_pages * PAGE_SIZE,
368                               FOREIGN_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
369                               PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
370         if (err)
371                 goto error_nomem;
372
373         return 0;
374
375 error_nomem:
376         printk("FATAL: page pool much too small\n");
377         return -ENOMEM;
378 }
379
380 void page_map_dump_stats(const char *when)
381 {
382         printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
383                mem_pool.used_pages, mem_pool.pages,
384                remap_pool.used_pages, remap_pool.pages);
385 }