]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/paging.c
core: Properly translate guest physical addresses in page_map_get_foreign_page
[jailhouse.git] / hypervisor / paging.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
18
19 #define BITS_PER_PAGE           (PAGE_SIZE * 8)
20
21 #define INVALID_PAGE_NR         (~0UL)
22
23 #define PAGE_SCRUB_ON_FREE      0x1
24
25 extern u8 __start[], __page_pool[];
26
27 struct page_pool mem_pool;
28 struct page_pool remap_pool = {
29         .base_address = (void *)REMAP_BASE_ADDR,
30         .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
31 };
32
33 pgd_t *hv_page_table;
34
35 static unsigned long find_next_free_page(struct page_pool *pool,
36                                          unsigned long start)
37 {
38         unsigned long start_mask =
39                 ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
40         unsigned long bmp_pos, bmp_val, page_nr;
41
42         if (start >= pool->pages)
43                 return INVALID_PAGE_NR;
44
45         for (bmp_pos = start / BITS_PER_LONG;
46              bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
47                 bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
48                 start_mask = 0;
49                 if (bmp_val != ~0UL) {
50                         page_nr = ffz(bmp_val) + bmp_pos * BITS_PER_LONG;
51                         if (page_nr >= pool->pages)
52                                 break;
53                         return page_nr;
54                 }
55         }
56
57         return INVALID_PAGE_NR;
58 }
59
60 void *page_alloc(struct page_pool *pool, unsigned int num)
61 {
62         unsigned long start, last, next;
63         unsigned int allocated;
64
65         start = find_next_free_page(pool, 0);
66         if (start == INVALID_PAGE_NR)
67                 return NULL;
68
69 restart:
70         for (allocated = 1, last = start; allocated < num;
71              allocated++, last = next) {
72                 next = find_next_free_page(pool, last + 1);
73                 if (next == INVALID_PAGE_NR)
74                         return NULL;
75                 if (next != last + 1) {
76                         start = next;
77                         goto restart;
78                 }
79         }
80
81         for (allocated = 0; allocated < num; allocated++)
82                 set_bit(start + allocated, pool->used_bitmap);
83
84         pool->used_pages += num;
85
86         return pool->base_address + start * PAGE_SIZE;
87 }
88
89 void page_free(struct page_pool *pool, void *page, unsigned int num)
90 {
91         unsigned long page_nr;
92
93         if (!page)
94                 return;
95
96         while (num-- > 0) {
97                 if (pool->flags & PAGE_SCRUB_ON_FREE)
98                         memset(page, 0, PAGE_SIZE);
99                 page_nr = (page - pool->base_address) / PAGE_SIZE;
100                 clear_bit(page_nr, pool->used_bitmap);
101                 pool->used_pages--;
102                 page += PAGE_SIZE;
103         }
104 }
105
106 unsigned long page_map_virt2phys(pgd_t *page_table, unsigned long virt,
107                                  unsigned int levels)
108 {
109         unsigned long offs = hypervisor_header.page_offset;
110         pgd_t *pgd;
111         pud_t *pud;
112         pmd_t *pmd;
113         pte_t *pte;
114
115         switch (levels) {
116         case 4:
117                 pgd = pgd_offset(page_table, virt);
118                 if (!pgd_valid(pgd))
119                         return INVALID_PHYS_ADDR;
120
121                 pud = pud4l_offset(pgd, offs, virt);
122                 break;
123         case 3:
124                 pud = pud3l_offset(page_table, virt);
125                 break;
126         default:
127                 return INVALID_PHYS_ADDR;
128         }
129         if (!pud_valid(pud))
130                 return INVALID_PHYS_ADDR;
131
132         pmd = pmd_offset(pud, offs, virt);
133         if (!pmd_valid(pud))
134                 return INVALID_PHYS_ADDR;
135
136         if (pmd_is_hugepage(pmd))
137                 return phys_address_hugepage(pmd, virt);
138
139         pte = pte_offset(pmd, offs, virt);
140         if (!pte_valid(pte))
141                 return INVALID_PHYS_ADDR;
142
143         return phys_address(pte, virt);
144 }
145
146 static void flush_page_table(void *addr, unsigned long size,
147                              enum page_map_coherent coherent)
148 {
149         if (coherent == PAGE_MAP_COHERENT)
150                 flush_cache(addr, size);
151 }
152
153 int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
154                     unsigned long virt, unsigned long flags,
155                     unsigned long table_flags, unsigned int levels,
156                     enum page_map_coherent coherent)
157 {
158         unsigned long offs = hypervisor_header.page_offset;
159         pgd_t *pgd;
160         pud_t *pud;
161         pmd_t *pmd;
162         pte_t *pte;
163
164         for (size = PAGE_ALIGN(size); size > 0;
165              phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
166                 switch (levels) {
167                 case 4:
168                         pgd = pgd_offset(page_table, virt);
169                         if (!pgd_valid(pgd)) {
170                                 pud = page_alloc(&mem_pool, 1);
171                                 if (!pud)
172                                         return -ENOMEM;
173                                 set_pgd(pgd, page_map_hvirt2phys(pud),
174                                         table_flags);
175                                 flush_page_table(pgd, sizeof(pgd), coherent);
176                         }
177                         pud = pud4l_offset(pgd, offs, virt);
178                         break;
179                 case 3:
180                         pud = pud3l_offset(page_table, virt);
181                         break;
182                 default:
183                         return -EINVAL;
184                 }
185
186                 if (!pud_valid(pud)) {
187                         pmd = page_alloc(&mem_pool, 1);
188                         if (!pmd)
189                                 return -ENOMEM;
190                         set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
191                         flush_page_table(pud, sizeof(pud), coherent);
192                 }
193
194                 pmd = pmd_offset(pud, offs, virt);
195                 if (!pmd_valid(pmd)) {
196                         pte = page_alloc(&mem_pool, 1);
197                         if (!pte)
198                                 return -ENOMEM;
199                         set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
200                         flush_page_table(pmd, sizeof(pmd), coherent);
201                 }
202
203                 pte = pte_offset(pmd, offs, virt);
204                 set_pte(pte, phys, flags);
205                 flush_page_table(pte, sizeof(pte), coherent);
206         }
207
208         flush_tlb();
209
210         return 0;
211 }
212
213 void page_map_destroy(pgd_t *page_table, unsigned long virt,
214                       unsigned long size, unsigned int levels,
215                       enum page_map_coherent coherent)
216 {
217         unsigned long offs = hypervisor_header.page_offset;
218         pgd_t *pgd;
219         pud_t *pud;
220         pmd_t *pmd;
221         pte_t *pte;
222
223         for (size = PAGE_ALIGN(size); size > 0;
224              virt += PAGE_SIZE, size -= PAGE_SIZE) {
225                 switch (levels) {
226                 case 4:
227                         pgd = pgd_offset(page_table, virt);
228                         if (!pgd_valid(pgd))
229                                 continue;
230
231                         pud = pud4l_offset(pgd, offs, virt);
232                         break;
233                 case 3:
234                         pgd = 0; /* silence compiler warning */
235                         pud = pud3l_offset(page_table, virt);
236                         break;
237                 default:
238                         return;
239                 }
240                 if (!pud_valid(pud))
241                         continue;
242
243                 pmd = pmd_offset(pud, offs, virt);
244                 if (!pmd_valid(pmd))
245                         continue;
246
247                 pte = pte_offset(pmd, offs, virt);
248                 clear_pte(pte);
249                 flush_page_table(pte, sizeof(pte), coherent);
250
251                 if (!pt_empty(pmd, offs))
252                         continue;
253                 page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
254                 clear_pmd(pmd);
255                 flush_page_table(pmd, sizeof(pmd), coherent);
256
257                 if (!pmd_empty(pud, offs))
258                         continue;
259                 page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
260                 clear_pud(pud);
261                 flush_page_table(pud, sizeof(pud), coherent);
262
263                 if (levels < 4 || !pud_empty(pgd, offs))
264                         continue;
265                 page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
266                 clear_pgd(pgd);
267                 flush_page_table(pgd, sizeof(pgd), coherent);
268         }
269
270         flush_tlb();
271 }
272
273 void *page_map_get_foreign_page(struct per_cpu *cpu_data,
274                                 unsigned long page_table_paddr,
275                                 unsigned long virt, unsigned long flags)
276 {
277         unsigned long page_virt, phys;
278 #if PAGE_DIR_LEVELS == 4
279         pgd_t *pgd;
280 #endif
281         pud_t *pud;
282         pmd_t *pmd;
283         pte_t *pte;
284         int err;
285
286         page_virt = FOREIGN_MAPPING_BASE +
287                 cpu_data->cpu_id * PAGE_SIZE * NUM_FOREIGN_PAGES;
288
289         phys = arch_page_map_gphys2phys(cpu_data, page_table_paddr);
290         if (phys == INVALID_PHYS_ADDR)
291                 return NULL;
292         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
293                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
294                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
295         if (err)
296                 return NULL;
297
298 #if PAGE_DIR_LEVELS == 4
299         pgd = pgd_offset((pgd_t *)page_virt, virt);
300         if (!pgd_valid(pgd))
301                 return NULL;
302         phys = arch_page_map_gphys2phys(cpu_data,
303                         (unsigned long)pud4l_offset(pgd, 0, 0));
304         if (phys == INVALID_PHYS_ADDR)
305                 return NULL;
306         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
307                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
308                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
309         if (err)
310                 return NULL;
311
312         pud = pud4l_offset((pgd_t *)&page_virt, 0, virt);
313 #elif PAGE_DIR_LEVELS == 3
314         pud = pud3l_offset((pgd_t *)page_virt, virt);
315 #else
316 # error Unsupported paging level
317 #endif
318         if (!pud_valid(pud))
319                 return NULL;
320         phys = arch_page_map_gphys2phys(cpu_data,
321                                         (unsigned long)pmd_offset(pud, 0, 0));
322         if (phys == INVALID_PHYS_ADDR)
323                 return NULL;
324         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
325                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
326                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
327         if (err)
328                 return NULL;
329
330         pmd = pmd_offset((pud_t *)&page_virt, 0, virt);
331         if (!pmd_valid(pmd))
332                 return NULL;
333         if (pmd_is_hugepage(pmd))
334                 phys = phys_address_hugepage(pmd, virt);
335         else {
336                 phys = arch_page_map_gphys2phys(cpu_data,
337                                 (unsigned long)pte_offset(pmd, 0, 0));
338                 if (phys == INVALID_PHYS_ADDR)
339                         return NULL;
340                 err = page_map_create(hv_page_table, phys, PAGE_SIZE,
341                                       page_virt, PAGE_READONLY_FLAGS,
342                                       PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
343                                       PAGE_MAP_NON_COHERENT);
344                 if (err)
345                         return NULL;
346
347                 pte = pte_offset((pmd_t *)&page_virt, 0, virt);
348                 if (!pte_valid(pte))
349                         return NULL;
350                 phys = phys_address(pte, 0);
351         }
352         phys = arch_page_map_gphys2phys(cpu_data, phys);
353         if (phys == INVALID_PHYS_ADDR)
354                 return NULL;
355
356         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
357                               flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
358                               PAGE_MAP_NON_COHERENT);
359         if (err)
360                 return NULL;
361
362         return (void *)page_virt;
363 }
364
365 int paging_init(void)
366 {
367         unsigned long per_cpu_pages, config_pages, bitmap_pages;
368         unsigned long n;
369         int err;
370
371         mem_pool.pages =
372                 (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
373         per_cpu_pages = hypervisor_header.possible_cpus *
374                 sizeof(struct per_cpu) / PAGE_SIZE;
375         bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
376
377         system_config = (struct jailhouse_system *)
378                 (__page_pool + per_cpu_pages * PAGE_SIZE);
379         config_pages = (jailhouse_system_config_size(system_config) +
380                         PAGE_SIZE - 1) / PAGE_SIZE;
381
382         if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
383                 goto error_nomem;
384
385         mem_pool.base_address = __page_pool;
386         mem_pool.used_bitmap =
387                 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
388                                   config_pages * PAGE_SIZE);
389         mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
390         for (n = 0; n < mem_pool.used_pages; n++)
391                 set_bit(n, mem_pool.used_bitmap);
392         mem_pool.flags = PAGE_SCRUB_ON_FREE;
393
394         remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
395         remap_pool.used_pages =
396                 hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
397         for (n = 0; n < remap_pool.used_pages; n++)
398                 set_bit(n, remap_pool.used_bitmap);
399
400         hv_page_table = page_alloc(&mem_pool, 1);
401         if (!hv_page_table)
402                 goto error_nomem;
403
404         /* Replicate hypervisor mapping of Linux */
405         err = page_map_create(hv_page_table, page_map_hvirt2phys(__start),
406                               hypervisor_header.size, (unsigned long)__start,
407                               PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
408                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
409         if (err)
410                 goto error_nomem;
411
412         /* Make sure any remappings to the foreign regions can be performed
413          * without allocations of page table pages. */
414         err = page_map_create(hv_page_table, 0,
415                               remap_pool.used_pages * PAGE_SIZE,
416                               FOREIGN_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
417                               PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
418                               PAGE_MAP_NON_COHERENT);
419         if (err)
420                 goto error_nomem;
421
422         return 0;
423
424 error_nomem:
425         printk("FATAL: page pool much too small\n");
426         return -ENOMEM;
427 }
428
429 void page_map_dump_stats(const char *when)
430 {
431         printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
432                mem_pool.used_pages, mem_pool.pages,
433                remap_pool.used_pages, remap_pool.pages);
434 }