]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/paging.c
core: Introduce per-page TLB flushing
[jailhouse.git] / hypervisor / paging.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
18
19 #define BITS_PER_PAGE           (PAGE_SIZE * 8)
20
21 #define INVALID_PAGE_NR         (~0UL)
22
23 #define PAGE_SCRUB_ON_FREE      0x1
24
25 extern u8 __start[], __page_pool[];
26
27 struct page_pool mem_pool;
28 struct page_pool remap_pool = {
29         .base_address = (void *)REMAP_BASE_ADDR,
30         .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
31 };
32
33 pgd_t *hv_page_table;
34
35 static unsigned long find_next_free_page(struct page_pool *pool,
36                                          unsigned long start)
37 {
38         unsigned long start_mask =
39                 ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
40         unsigned long bmp_pos, bmp_val, page_nr;
41
42         if (start >= pool->pages)
43                 return INVALID_PAGE_NR;
44
45         for (bmp_pos = start / BITS_PER_LONG;
46              bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
47                 bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
48                 start_mask = 0;
49                 if (bmp_val != ~0UL) {
50                         page_nr = ffz(bmp_val) + bmp_pos * BITS_PER_LONG;
51                         if (page_nr >= pool->pages)
52                                 break;
53                         return page_nr;
54                 }
55         }
56
57         return INVALID_PAGE_NR;
58 }
59
60 void *page_alloc(struct page_pool *pool, unsigned int num)
61 {
62         unsigned long start, last, next;
63         unsigned int allocated;
64
65         start = find_next_free_page(pool, 0);
66         if (start == INVALID_PAGE_NR)
67                 return NULL;
68
69 restart:
70         for (allocated = 1, last = start; allocated < num;
71              allocated++, last = next) {
72                 next = find_next_free_page(pool, last + 1);
73                 if (next == INVALID_PAGE_NR)
74                         return NULL;
75                 if (next != last + 1) {
76                         start = next;
77                         goto restart;
78                 }
79         }
80
81         for (allocated = 0; allocated < num; allocated++)
82                 set_bit(start + allocated, pool->used_bitmap);
83
84         pool->used_pages += num;
85
86         return pool->base_address + start * PAGE_SIZE;
87 }
88
89 void page_free(struct page_pool *pool, void *page, unsigned int num)
90 {
91         unsigned long page_nr;
92
93         if (!page)
94                 return;
95
96         while (num-- > 0) {
97                 if (pool->flags & PAGE_SCRUB_ON_FREE)
98                         memset(page, 0, PAGE_SIZE);
99                 page_nr = (page - pool->base_address) / PAGE_SIZE;
100                 clear_bit(page_nr, pool->used_bitmap);
101                 pool->used_pages--;
102                 page += PAGE_SIZE;
103         }
104 }
105
106 unsigned long page_map_virt2phys(pgd_t *page_table, unsigned long virt,
107                                  unsigned int levels)
108 {
109         unsigned long offs = hypervisor_header.page_offset;
110         pgd_t *pgd;
111         pud_t *pud;
112         pmd_t *pmd;
113         pte_t *pte;
114
115         switch (levels) {
116         case 4:
117                 pgd = pgd_offset(page_table, virt);
118                 if (!pgd_valid(pgd))
119                         return INVALID_PHYS_ADDR;
120
121                 pud = pud4l_offset(pgd, offs, virt);
122                 break;
123         case 3:
124                 pud = pud3l_offset(page_table, virt);
125                 break;
126         default:
127                 return INVALID_PHYS_ADDR;
128         }
129         if (!pud_valid(pud))
130                 return INVALID_PHYS_ADDR;
131
132         pmd = pmd_offset(pud, offs, virt);
133         if (!pmd_valid(pud))
134                 return INVALID_PHYS_ADDR;
135
136         if (pmd_is_hugepage(pmd))
137                 return phys_address_hugepage(pmd, virt);
138
139         pte = pte_offset(pmd, offs, virt);
140         if (!pte_valid(pte))
141                 return INVALID_PHYS_ADDR;
142
143         return phys_address(pte, virt);
144 }
145
146 static void flush_page_table(void *addr, unsigned long size,
147                              enum page_map_coherent coherent)
148 {
149         if (coherent == PAGE_MAP_COHERENT)
150                 flush_cache(addr, size);
151 }
152
153 int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
154                     unsigned long virt, unsigned long flags,
155                     unsigned long table_flags, unsigned int levels,
156                     enum page_map_coherent coherent)
157 {
158         unsigned long offs = hypervisor_header.page_offset;
159         pgd_t *pgd;
160         pud_t *pud;
161         pmd_t *pmd;
162         pte_t *pte;
163
164         for (size = PAGE_ALIGN(size); size > 0;
165              phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
166                 switch (levels) {
167                 case 4:
168                         pgd = pgd_offset(page_table, virt);
169                         if (!pgd_valid(pgd)) {
170                                 pud = page_alloc(&mem_pool, 1);
171                                 if (!pud)
172                                         return -ENOMEM;
173                                 set_pgd(pgd, page_map_hvirt2phys(pud),
174                                         table_flags);
175                                 flush_page_table(pgd, sizeof(pgd), coherent);
176                         }
177                         pud = pud4l_offset(pgd, offs, virt);
178                         break;
179                 case 3:
180                         pud = pud3l_offset(page_table, virt);
181                         break;
182                 default:
183                         return -EINVAL;
184                 }
185
186                 if (!pud_valid(pud)) {
187                         pmd = page_alloc(&mem_pool, 1);
188                         if (!pmd)
189                                 return -ENOMEM;
190                         set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
191                         flush_page_table(pud, sizeof(pud), coherent);
192                 }
193
194                 pmd = pmd_offset(pud, offs, virt);
195                 if (!pmd_valid(pmd)) {
196                         pte = page_alloc(&mem_pool, 1);
197                         if (!pte)
198                                 return -ENOMEM;
199                         set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
200                         flush_page_table(pmd, sizeof(pmd), coherent);
201                 }
202
203                 pte = pte_offset(pmd, offs, virt);
204                 set_pte(pte, phys, flags);
205                 flush_page_table(pte, sizeof(pte), coherent);
206                 arch_tlb_flush_page(virt);
207         }
208
209         return 0;
210 }
211
212 void page_map_destroy(pgd_t *page_table, unsigned long virt,
213                       unsigned long size, unsigned int levels,
214                       enum page_map_coherent coherent)
215 {
216         unsigned long offs = hypervisor_header.page_offset;
217         pgd_t *pgd;
218         pud_t *pud;
219         pmd_t *pmd;
220         pte_t *pte;
221
222         for (size = PAGE_ALIGN(size); size > 0;
223              virt += PAGE_SIZE, size -= PAGE_SIZE) {
224                 switch (levels) {
225                 case 4:
226                         pgd = pgd_offset(page_table, virt);
227                         if (!pgd_valid(pgd))
228                                 continue;
229
230                         pud = pud4l_offset(pgd, offs, virt);
231                         break;
232                 case 3:
233                         pgd = 0; /* silence compiler warning */
234                         pud = pud3l_offset(page_table, virt);
235                         break;
236                 default:
237                         return;
238                 }
239                 if (!pud_valid(pud))
240                         continue;
241
242                 pmd = pmd_offset(pud, offs, virt);
243                 if (!pmd_valid(pmd))
244                         continue;
245
246                 pte = pte_offset(pmd, offs, virt);
247                 clear_pte(pte);
248                 flush_page_table(pte, sizeof(pte), coherent);
249
250                 if (!pt_empty(pmd, offs))
251                         continue;
252                 page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
253                 clear_pmd(pmd);
254                 flush_page_table(pmd, sizeof(pmd), coherent);
255
256                 if (!pmd_empty(pud, offs))
257                         continue;
258                 page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
259                 clear_pud(pud);
260                 flush_page_table(pud, sizeof(pud), coherent);
261
262                 if (levels < 4 || !pud_empty(pgd, offs))
263                         continue;
264                 page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
265                 clear_pgd(pgd);
266                 flush_page_table(pgd, sizeof(pgd), coherent);
267
268                 arch_tlb_flush_page(virt);
269         }
270 }
271
272 void *page_map_get_foreign_page(struct per_cpu *cpu_data,
273                                 unsigned long page_table_paddr,
274                                 unsigned long virt, unsigned long flags)
275 {
276         unsigned long page_virt, phys;
277 #if PAGE_DIR_LEVELS == 4
278         pgd_t *pgd;
279 #endif
280         pud_t *pud;
281         pmd_t *pmd;
282         pte_t *pte;
283         int err;
284
285         page_virt = FOREIGN_MAPPING_BASE +
286                 cpu_data->cpu_id * PAGE_SIZE * NUM_FOREIGN_PAGES;
287
288         phys = arch_page_map_gphys2phys(cpu_data, page_table_paddr);
289         if (phys == INVALID_PHYS_ADDR)
290                 return NULL;
291         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
292                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
293                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
294         if (err)
295                 return NULL;
296
297 #if PAGE_DIR_LEVELS == 4
298         pgd = pgd_offset((pgd_t *)page_virt, virt);
299         if (!pgd_valid(pgd))
300                 return NULL;
301         phys = arch_page_map_gphys2phys(cpu_data,
302                         (unsigned long)pud4l_offset(pgd, 0, 0));
303         if (phys == INVALID_PHYS_ADDR)
304                 return NULL;
305         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
306                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
307                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
308         if (err)
309                 return NULL;
310
311         pud = pud4l_offset((pgd_t *)&page_virt, 0, virt);
312 #elif PAGE_DIR_LEVELS == 3
313         pud = pud3l_offset((pgd_t *)page_virt, virt);
314 #else
315 # error Unsupported paging level
316 #endif
317         if (!pud_valid(pud))
318                 return NULL;
319         phys = arch_page_map_gphys2phys(cpu_data,
320                                         (unsigned long)pmd_offset(pud, 0, 0));
321         if (phys == INVALID_PHYS_ADDR)
322                 return NULL;
323         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
324                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
325                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
326         if (err)
327                 return NULL;
328
329         pmd = pmd_offset((pud_t *)&page_virt, 0, virt);
330         if (!pmd_valid(pmd))
331                 return NULL;
332         if (pmd_is_hugepage(pmd))
333                 phys = phys_address_hugepage(pmd, virt);
334         else {
335                 phys = arch_page_map_gphys2phys(cpu_data,
336                                 (unsigned long)pte_offset(pmd, 0, 0));
337                 if (phys == INVALID_PHYS_ADDR)
338                         return NULL;
339                 err = page_map_create(hv_page_table, phys, PAGE_SIZE,
340                                       page_virt, PAGE_READONLY_FLAGS,
341                                       PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
342                                       PAGE_MAP_NON_COHERENT);
343                 if (err)
344                         return NULL;
345
346                 pte = pte_offset((pmd_t *)&page_virt, 0, virt);
347                 if (!pte_valid(pte))
348                         return NULL;
349                 phys = phys_address(pte, 0);
350         }
351         phys = arch_page_map_gphys2phys(cpu_data, phys);
352         if (phys == INVALID_PHYS_ADDR)
353                 return NULL;
354
355         err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
356                               flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
357                               PAGE_MAP_NON_COHERENT);
358         if (err)
359                 return NULL;
360
361         return (void *)page_virt;
362 }
363
364 int paging_init(void)
365 {
366         unsigned long per_cpu_pages, config_pages, bitmap_pages;
367         unsigned long n;
368         int err;
369
370         mem_pool.pages =
371                 (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
372         per_cpu_pages = hypervisor_header.possible_cpus *
373                 sizeof(struct per_cpu) / PAGE_SIZE;
374         bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
375
376         system_config = (struct jailhouse_system *)
377                 (__page_pool + per_cpu_pages * PAGE_SIZE);
378         config_pages = (jailhouse_system_config_size(system_config) +
379                         PAGE_SIZE - 1) / PAGE_SIZE;
380
381         if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
382                 goto error_nomem;
383
384         mem_pool.base_address = __page_pool;
385         mem_pool.used_bitmap =
386                 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
387                                   config_pages * PAGE_SIZE);
388         mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
389         for (n = 0; n < mem_pool.used_pages; n++)
390                 set_bit(n, mem_pool.used_bitmap);
391         mem_pool.flags = PAGE_SCRUB_ON_FREE;
392
393         remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
394         remap_pool.used_pages =
395                 hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
396         for (n = 0; n < remap_pool.used_pages; n++)
397                 set_bit(n, remap_pool.used_bitmap);
398
399         hv_page_table = page_alloc(&mem_pool, 1);
400         if (!hv_page_table)
401                 goto error_nomem;
402
403         /* Replicate hypervisor mapping of Linux */
404         err = page_map_create(hv_page_table, page_map_hvirt2phys(__start),
405                               hypervisor_header.size, (unsigned long)__start,
406                               PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
407                               PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
408         if (err)
409                 goto error_nomem;
410
411         /* Make sure any remappings to the foreign regions can be performed
412          * without allocations of page table pages. */
413         err = page_map_create(hv_page_table, 0,
414                               remap_pool.used_pages * PAGE_SIZE,
415                               FOREIGN_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
416                               PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
417                               PAGE_MAP_NON_COHERENT);
418         if (err)
419                 goto error_nomem;
420
421         return 0;
422
423 error_nomem:
424         printk("FATAL: page pool much too small\n");
425         return -ENOMEM;
426 }
427
428 void page_map_dump_stats(const char *when)
429 {
430         printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
431                mem_pool.used_pages, mem_pool.pages,
432                remap_pool.used_pages, remap_pool.pages);
433 }