]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/paging.c
Jailhouse public release
[jailhouse.git] / hypervisor / paging.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
18
19 #define BITS_PER_PAGE           (PAGE_SIZE * 8)
20
21 #define PAGE_SCRUB_ON_FREE      0x1
22
23 extern u8 __start[], __page_pool[];
24
25 struct page_pool mem_pool;
26 struct page_pool remap_pool = {
27         .base_address = (void *)REMAP_BASE_ADDR,
28         .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
29 };
30
31 pgd_t *hv_page_table;
32
33 static void *page_alloc_one(struct page_pool *pool)
34 {
35         unsigned long word, page_nr;
36
37         for (word = 0; word < pool->pages / BITS_PER_LONG; word++)
38                 if (pool->used_bitmap[word] != ~0UL) {
39                         page_nr = ffz(pool->used_bitmap[word]) +
40                                 word * BITS_PER_LONG;
41                         if (page_nr >= pool->pages)
42                                 break;
43                         set_bit(page_nr, pool->used_bitmap);
44                         pool->used_pages++;
45                         return pool->base_address + page_nr * PAGE_SIZE;
46                 }
47
48         return NULL;
49 }
50
51 void *page_alloc(struct page_pool *pool, unsigned int num)
52 {
53         void *start, *last, *next;
54         unsigned int allocated;
55
56         start = page_alloc_one(pool);
57         if (!start)
58                 return NULL;
59
60         for (allocated = 1, last = start; allocated < num;
61              allocated++, last = next) {
62                 next = page_alloc_one(pool);
63                 if (next != last + PAGE_SIZE) {
64                         page_free(pool, start, allocated);
65                         return NULL;
66                 }
67         }
68
69         return start;
70 }
71
72 void page_free(struct page_pool *pool, void *page, unsigned int num)
73 {
74         unsigned long page_nr;
75
76         if (!page)
77                 return;
78
79         while (num-- > 0) {
80                 if (pool->flags & PAGE_SCRUB_ON_FREE)
81                         memset(page, 0, PAGE_SIZE);
82                 page_nr = (page - pool->base_address) / PAGE_SIZE;
83                 clear_bit(page_nr, pool->used_bitmap);
84                 pool->used_pages--;
85                 page += PAGE_SIZE;
86         }
87 }
88
89 unsigned long page_map_virt2phys(pgd_t *page_table,
90                                  unsigned long page_table_offset,
91                                  unsigned long virt)
92 {
93         pgd_t *pgd;
94         pud_t *pud;
95         pmd_t *pmd;
96         pte_t *pte;
97
98 #if PAGE_DIR_LEVELS == 4
99         pgd = pgd_offset(page_table, virt);
100         if (!pgd_valid(pgd))
101                 return INVALID_PHYS_ADDR;
102
103         pud = pud4l_offset(pgd, page_table_offset, virt);
104 #elif PAGE_DIR_LEVELS == 3
105         pud = pud3l_offset(pgd, page_table_offset, virt);
106 #else
107 # error Unsupported paging level
108 #endif
109         if (!pud_valid(pud))
110                 return INVALID_PHYS_ADDR;
111
112         pmd = pmd_offset(pud, page_table_offset, virt);
113         if (!pmd_valid(pud))
114                 return INVALID_PHYS_ADDR;
115
116         if (pmd_is_hugepage(pmd))
117                 return phys_address_hugepage(pmd, virt);
118
119         pte = pte_offset(pmd, page_table_offset, virt);
120         if (!pte_valid(pte))
121                 return INVALID_PHYS_ADDR;
122
123         return phys_address(pte, virt);
124 }
125
126 int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
127                     unsigned long virt, unsigned long flags,
128                     unsigned long table_flags, unsigned int levels)
129 {
130         unsigned long offs = hypervisor_header.page_offset;
131         pgd_t *pgd;
132         pud_t *pud;
133         pmd_t *pmd;
134         pte_t *pte;
135
136         for (size = PAGE_ALIGN(size); size > 0;
137              phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
138                 switch (levels) {
139                 case 4:
140                         pgd = pgd_offset(page_table, virt);
141                         if (!pgd_valid(pgd)) {
142                                 pud = page_alloc(&mem_pool, 1);
143                                 if (!pud)
144                                         return -ENOMEM;
145                                 set_pgd(pgd, page_map_hvirt2phys(pud),
146                                         table_flags);
147                         }
148                         pud = pud4l_offset(pgd, offs, virt);
149                         break;
150                 case 3:
151                         pud = pud3l_offset(page_table, virt);
152                         break;
153                 default:
154                         return -EINVAL;
155                 }
156
157                 if (!pud_valid(pud)) {
158                         pmd = page_alloc(&mem_pool, 1);
159                         if (!pmd)
160                                 return -ENOMEM;
161                         set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
162                 }
163
164                 pmd = pmd_offset(pud, offs, virt);
165                 if (!pmd_valid(pmd)) {
166                         pte = page_alloc(&mem_pool, 1);
167                         if (!pte)
168                                 return -ENOMEM;
169                         set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
170                 }
171
172                 pte = pte_offset(pmd, offs, virt);
173                 set_pte(pte, phys, flags);
174         }
175
176         return 0;
177 }
178
179 void page_map_destroy(pgd_t *page_table, unsigned long virt,
180                       unsigned long size, unsigned int levels)
181 {
182         unsigned long offs = hypervisor_header.page_offset;
183         pgd_t *pgd;
184         pud_t *pud;
185         pmd_t *pmd;
186         pte_t *pte;
187
188         for (size = PAGE_ALIGN(size); size > 0;
189              virt += PAGE_SIZE, size -= PAGE_SIZE) {
190                 switch (levels) {
191                 case 4:
192                         pgd = pgd_offset(page_table, virt);
193                         if (!pgd_valid(pgd))
194                                 continue;
195
196                         pud = pud4l_offset(pgd, offs, virt);
197                         break;
198                 case 3:
199                         pgd = 0; /* silence compiler warning */
200                         pud = pud3l_offset(page_table, virt);
201                         break;
202                 default:
203                         return;
204                 }
205                 if (!pud_valid(pud))
206                         continue;
207
208                 pmd = pmd_offset(pud, offs, virt);
209                 if (!pmd_valid(pmd))
210                         continue;
211
212                 pte = pte_offset(pmd, offs, virt);
213                 clear_pte(pte);
214
215                 if (!pt_empty(pmd, offs))
216                         continue;
217                 page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
218                 clear_pmd(pmd);
219
220                 if (!pmd_empty(pud, offs))
221                         continue;
222                 page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
223                 clear_pud(pud);
224
225                 if (levels < 4 || !pud_empty(pgd, offs))
226                         continue;
227                 page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
228                 clear_pgd(pgd);
229         }
230
231         flush_tlb();
232 }
233
234 void *page_map_get_foreign_page(unsigned int mapping_region,
235                                 unsigned long page_table_paddr,
236                                 unsigned long page_table_offset,
237                                 unsigned long virt, unsigned long flags)
238 {
239         unsigned long page_virt, pt_virt, phys;
240 #if PAGE_DIR_LEVELS == 4
241         pgd_t *pgd;
242 #endif
243         pud_t *pud;
244         pmd_t *pmd;
245         pte_t *pte;
246         int err;
247
248         page_virt = FOREIGN_MAPPING_BASE +
249                 mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES;
250
251         pt_virt = page_virt + PAGE_SIZE;
252         phys = page_table_paddr + page_table_offset;
253         err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
254                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
255                               PAGE_DIR_LEVELS);
256         if (err)
257                 goto error_release;
258
259 #if PAGE_DIR_LEVELS == 4
260         pgd = pgd_offset((pgd_t *)pt_virt, virt);
261         if (!pgd_valid(pgd))
262                 goto error_release;
263         pt_virt += PAGE_SIZE;
264         phys = (unsigned long)pud4l_offset(pgd, page_table_offset, 0);
265         err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
266                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
267                               PAGE_DIR_LEVELS);
268         if (err)
269                 goto error_release;
270
271         pud = pud4l_offset((pgd_t *)&pt_virt, page_table_offset, virt);
272 #elif PAGE_DIR_LEVELS == 3
273         pud = pud3l_offset((pgd_t *)pt_virt, virt);
274 #else
275 # error Unsupported paging level
276 #endif
277         if (!pud_valid(pud))
278                 goto error_release;
279         pt_virt += PAGE_SIZE;
280         phys = (unsigned long)pmd_offset(pud, page_table_offset, 0);
281         err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
282                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
283                               PAGE_DIR_LEVELS);
284         if (err)
285                 goto error_release;
286
287         pmd = pmd_offset((pud_t *)&pt_virt, page_table_offset, virt);
288         if (!pmd_valid(pmd))
289                 goto error_release;
290         if (pmd_is_hugepage(pmd))
291                 phys = phys_address_hugepage(pmd, virt);
292         else {
293                 pt_virt += PAGE_SIZE;
294                 phys = (unsigned long)pte_offset(pmd, page_table_offset, 0);
295                 err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
296                                       PAGE_READONLY_FLAGS,
297                                       PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
298                 if (err)
299                         goto error_release;
300
301                 pte = pte_offset((pmd_t *)&pt_virt, page_table_offset, virt);
302                 if (!pte_valid(pte))
303                         goto error_release;
304                 phys = phys_address(pte, 0) + page_table_offset;
305         }
306
307         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
308                               flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
309         if (err)
310                 goto error_release;
311
312         return (void *)page_virt;
313
314 error_release:
315         page_map_release_foreign_page(mapping_region);
316         return NULL;
317 }
318
319 void page_map_release_foreign_page(unsigned int mapping_region)
320 {
321         page_map_destroy(hv_page_table,
322                          FOREIGN_MAPPING_BASE +
323                          mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES,
324                          NUM_FOREIGN_PAGES * PAGE_SIZE, PAGE_DIR_LEVELS);
325 }
326
327 int paging_init(void)
328 {
329         unsigned long per_cpu_pages, config_pages, bitmap_pages;
330         unsigned long n;
331         u8 *addr;
332         int err;
333
334         mem_pool.pages =
335                 (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
336         per_cpu_pages = hypervisor_header.possible_cpus *
337                 sizeof(struct per_cpu) / PAGE_SIZE;
338         bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
339
340         system_config = (struct jailhouse_system *)
341                 (__page_pool + per_cpu_pages * PAGE_SIZE);
342         config_pages = (jailhouse_system_config_size(system_config) +
343                         PAGE_SIZE - 1) / PAGE_SIZE;
344
345         if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
346                 goto error_nomem;
347
348         mem_pool.base_address = __page_pool;
349         mem_pool.used_bitmap =
350                 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
351                                   config_pages * PAGE_SIZE);
352         mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
353         for (n = 0; n < mem_pool.used_pages; n++)
354                 set_bit(n, mem_pool.used_bitmap);
355         mem_pool.flags = PAGE_SCRUB_ON_FREE;
356
357         remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
358         remap_pool.used_pages =
359                 hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
360         for (n = 0; n < remap_pool.used_pages; n++)
361                 set_bit(n, remap_pool.used_bitmap);
362
363         hv_page_table = page_alloc(&mem_pool, 1);
364         if (!hv_page_table)
365                 goto error_nomem;
366
367         /* Replicate hypervisor mapping of Linux */
368         for (addr = __start; addr < __start + hypervisor_header.size;
369              addr += PAGE_SIZE) {
370                 err = page_map_create(hv_page_table, page_map_hvirt2phys(addr),
371                                       PAGE_SIZE, (unsigned long)addr,
372                                       PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
373                                       PAGE_DIR_LEVELS);
374                 if (err)
375                         goto error_nomem;
376         }
377
378         return 0;
379
380 error_nomem:
381         printk("FATAL: page pool much too small\n");
382         return -ENOMEM;
383 }
384
385 void page_map_dump_stats(const char *when)
386 {
387         printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
388                mem_pool.used_pages, mem_pool.pages,
389                remap_pool.used_pages, remap_pool.pages);
390 }