]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/paging.c
arm: prepare port for 48 bit PARange support
[jailhouse.git] / hypervisor / paging.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013, 2014
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
18
19 #define BITS_PER_PAGE           (PAGE_SIZE * 8)
20
21 #define INVALID_PAGE_NR         (~0UL)
22
23 #define PAGE_SCRUB_ON_FREE      0x1
24
25 extern u8 __page_pool[];
26
27 /**
28  * Offset between virtual and physical hypervisor addresses.
29  *
30  * @note Private, use page_map_hvirt2phys() or page_map_phys2hvirt() instead.
31  */
32 unsigned long page_offset;
33
34 /** Page pool containing physical pages for use by the hypervisor. */
35 struct page_pool mem_pool;
36 /** Page pool containing virtual pages for remappings by the hypervisor. */
37 struct page_pool remap_pool = {
38         .base_address = (void *)REMAP_BASE,
39         .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
40 };
41
42 /** Descriptor of the hypervisor paging structures. */
43 struct paging_structures hv_paging_structs;
44
45 /**
46  * Trivial implementation of paging::get_phys (for non-terminal levels)
47  * @param pte See paging::get_phys.
48  * @param virt See paging::get_phys.
49  *
50  * @return @c INVALID_PHYS_ADDR.
51  *
52  * @see paging
53  */
54 unsigned long paging_get_phys_invalid(pt_entry_t pte, unsigned long virt)
55 {
56         return INVALID_PHYS_ADDR;
57 }
58
59 static unsigned long find_next_free_page(struct page_pool *pool,
60                                          unsigned long start)
61 {
62         unsigned long bmp_pos, bmp_val, page_nr;
63         unsigned long start_mask = 0;
64
65         if (start >= pool->pages)
66                 return INVALID_PAGE_NR;
67
68         /*
69          * If we don't start on the beginning of a bitmap word, create a mask
70          * to mark the pages before the start page as (virtually) used.
71          */
72         if (start % BITS_PER_LONG > 0)
73                 start_mask = ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
74
75         for (bmp_pos = start / BITS_PER_LONG;
76              bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
77                 bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
78                 start_mask = 0;
79                 if (bmp_val != ~0UL) {
80                         page_nr = ffzl(bmp_val) + bmp_pos * BITS_PER_LONG;
81                         if (page_nr >= pool->pages)
82                                 break;
83                         return page_nr;
84                 }
85         }
86
87         return INVALID_PAGE_NR;
88 }
89
90 /**
91  * Allocate consecutive pages from the specified pool.
92  * @param pool          Page pool to allocate from.
93  * @param num           Number of pages.
94  * @param align_mask    Choose start so that start_page_no & align_mask == 0.
95  *
96  * @return Pointer to first page or NULL if allocation failed.
97  *
98  * @see page_free
99  */
100 static void *page_alloc_internal(struct page_pool *pool, unsigned int num,
101                                  unsigned long align_mask)
102 {
103         /* The pool itself might not be aligned as required. */
104         unsigned long aligned_start =
105                 ((unsigned long)pool->base_address >> PAGE_SHIFT) & align_mask;
106         unsigned long next = aligned_start;
107         unsigned long start, last;
108         unsigned int allocated;
109
110 restart:
111         /* Forward the search start to the next aligned page. */
112         if ((next - aligned_start) & align_mask)
113                 next += num - ((next - aligned_start) & align_mask);
114
115         start = next = find_next_free_page(pool, next);
116         if (start == INVALID_PAGE_NR || num == 0)
117                 return NULL;
118
119         /* Enforce alignment (none of align_mask is 0). */
120         if ((start - aligned_start) & align_mask)
121                 goto restart;
122
123         for (allocated = 1, last = start; allocated < num;
124              allocated++, last = next) {
125                 next = find_next_free_page(pool, last + 1);
126                 if (next == INVALID_PAGE_NR)
127                         return NULL;
128                 if (next != last + 1)
129                         goto restart;   /* not consecutive */
130         }
131
132         for (allocated = 0; allocated < num; allocated++)
133                 set_bit(start + allocated, pool->used_bitmap);
134
135         pool->used_pages += num;
136
137         return pool->base_address + start * PAGE_SIZE;
138 }
139
140 /**
141  * Allocate consecutive pages from the specified pool.
142  * @param pool  Page pool to allocate from.
143  * @param num   Number of pages.
144  *
145  * @return Pointer to first page or NULL if allocation failed.
146  *
147  * @see page_free
148  */
149 void *page_alloc(struct page_pool *pool, unsigned int num)
150 {
151         return page_alloc_internal(pool, num, 0);
152 }
153
154 /**
155  * Allocate aligned consecutive pages from the specified pool.
156  * @param pool  Page pool to allocate from.
157  * @param num   Number of pages. Num needs to be a power of 2.
158  *
159  * @return Pointer to first page or NULL if allocation failed.
160  *
161  * @see page_free
162  */
163 void *page_alloc_aligned(struct page_pool *pool, unsigned int num)
164 {
165         return page_alloc_internal(pool, num, num - 1);
166 }
167
168 /**
169  * Release pages to the specified pool.
170  * @param pool  Page pool to release to.
171  * @param page  Address of first page.
172  * @param num   Number of pages.
173  *
174  * @see page_alloc
175  */
176 void page_free(struct page_pool *pool, void *page, unsigned int num)
177 {
178         unsigned long page_nr;
179
180         if (!page)
181                 return;
182
183         while (num-- > 0) {
184                 if (pool->flags & PAGE_SCRUB_ON_FREE)
185                         memset(page, 0, PAGE_SIZE);
186                 page_nr = (page - pool->base_address) / PAGE_SIZE;
187                 clear_bit(page_nr, pool->used_bitmap);
188                 pool->used_pages--;
189                 page += PAGE_SIZE;
190         }
191 }
192
193 /**
194  * Translate virtual to physical address according to given paging structures.
195  * @param pg_structs    Paging structures to use for translation.
196  * @param virt          Virtual address.
197  * @param flags         Access flags that have to be supported by the mapping,
198  *                      see @ref PAGE_FLAGS.
199  *
200  * @return Physical address on success or @c INVALID_PHYS_ADDR if the virtual
201  *         address could not be translated or the requested access is not
202  *         supported by the mapping.
203  *
204  * @see paging_phys2hvirt
205  * @see paging_hvirt2phys
206  * @see arch_paging_gphys2phys
207  */
208 unsigned long paging_virt2phys(const struct paging_structures *pg_structs,
209                                unsigned long virt, unsigned long flags)
210 {
211         const struct paging *paging = pg_structs->root_paging;
212         page_table_t pt = pg_structs->root_table;
213         unsigned long phys;
214         pt_entry_t pte;
215
216         while (1) {
217                 pte = paging->get_entry(pt, virt);
218                 if (!paging->entry_valid(pte, flags))
219                         return INVALID_PHYS_ADDR;
220                 phys = paging->get_phys(pte, virt);
221                 if (phys != INVALID_PHYS_ADDR)
222                         return phys;
223                 pt = paging_phys2hvirt(paging->get_next_pt(pte));
224                 paging++;
225         }
226 }
227
228 static void flush_pt_entry(pt_entry_t pte, enum paging_coherent coherent)
229 {
230         if (coherent == PAGING_COHERENT)
231                 arch_paging_flush_cpu_caches(pte, sizeof(*pte));
232 }
233
234 static int split_hugepage(const struct paging *paging, pt_entry_t pte,
235                           unsigned long virt, enum paging_coherent coherent)
236 {
237         unsigned long phys = paging->get_phys(pte, virt);
238         struct paging_structures sub_structs;
239         unsigned long page_mask, flags;
240
241         if (phys == INVALID_PHYS_ADDR)
242                 return 0;
243
244         page_mask = ~(paging->page_size - 1);
245         phys &= page_mask;
246         virt &= page_mask;
247
248         flags = paging->get_flags(pte);
249
250         sub_structs.root_paging = paging + 1;
251         sub_structs.root_table = page_alloc(&mem_pool, 1);
252         if (!sub_structs.root_table)
253                 return -ENOMEM;
254         paging->set_next_pt(pte, paging_hvirt2phys(sub_structs.root_table));
255         flush_pt_entry(pte, coherent);
256
257         return paging_create(&sub_structs, phys, paging->page_size, virt,
258                              flags, coherent);
259 }
260
261 /**
262  * Create or modify a page map.
263  * @param pg_structs    Descriptor of paging structures to be used.
264  * @param phys          Physical address of the region to be mapped.
265  * @param size          Size of the region.
266  * @param virt          Virtual address the region should be mapped to.
267  * @param flags         Flags describing the permitted access, see
268  *                      @ref PAGE_FLAGS.
269  * @param coherent      Coherency of mapping.
270  *
271  * @return 0 on success, negative error code otherwise.
272  *
273  * @note The function aims at using the largest possible page size for the
274  * mapping but does not consolidate with neighboring mappings.
275  *
276  * @see paging_destroy
277  * @see paging_get_guest_pages
278  */
279 int paging_create(const struct paging_structures *pg_structs,
280                   unsigned long phys, unsigned long size, unsigned long virt,
281                   unsigned long flags, enum paging_coherent coherent)
282 {
283         phys &= PAGE_MASK;
284         virt &= PAGE_MASK;
285         size = PAGE_ALIGN(size);
286
287         while (size > 0) {
288                 const struct paging *paging = pg_structs->root_paging;
289                 page_table_t pt = pg_structs->root_table;
290                 pt_entry_t pte;
291                 int err;
292
293                 while (1) {
294                         pte = paging->get_entry(pt, virt);
295                         if (paging->page_size > 0 &&
296                             paging->page_size <= size &&
297                             ((phys | virt) & (paging->page_size - 1)) == 0) {
298                                 /*
299                                  * We might be overwriting a more fine-grained
300                                  * mapping, so release it first. This cannot
301                                  * fail as we are working along hugepage
302                                  * boundaries.
303                                  */
304                                 if (paging->page_size > PAGE_SIZE)
305                                         paging_destroy(pg_structs, virt,
306                                                        paging->page_size,
307                                                        coherent);
308                                 paging->set_terminal(pte, phys, flags);
309                                 flush_pt_entry(pte, coherent);
310                                 break;
311                         }
312                         if (paging->entry_valid(pte, PAGE_PRESENT_FLAGS)) {
313                                 err = split_hugepage(paging, pte, virt,
314                                                      coherent);
315                                 if (err)
316                                         return err;
317                                 pt = paging_phys2hvirt(
318                                                 paging->get_next_pt(pte));
319                         } else {
320                                 pt = page_alloc(&mem_pool, 1);
321                                 if (!pt)
322                                         return -ENOMEM;
323                                 paging->set_next_pt(pte,
324                                                     paging_hvirt2phys(pt));
325                                 flush_pt_entry(pte, coherent);
326                         }
327                         paging++;
328                 }
329                 if (pg_structs == &hv_paging_structs)
330                         arch_paging_flush_page_tlbs(virt);
331
332                 phys += paging->page_size;
333                 virt += paging->page_size;
334                 size -= paging->page_size;
335         }
336         return 0;
337 }
338
339 /**
340  * Destroy a page map.
341  * @param pg_structs    Descriptor of paging structures to be used.
342  * @param virt          Virtual address the region to be unmapped.
343  * @param size          Size of the region.
344  * @param coherent      Coherency of mapping.
345  *
346  * @return 0 on success, negative error code otherwise.
347  *
348  * @note If required, this function tries to break up hugepages if they should
349  * be unmapped only partially. This may require allocating additional pages for
350  * the paging structures, thus can fail. Unmap request that covers only full
351  * pages never fail.
352  *
353  * @see paging_create
354  */
355 int paging_destroy(const struct paging_structures *pg_structs,
356                    unsigned long virt, unsigned long size,
357                    enum paging_coherent coherent)
358 {
359         size = PAGE_ALIGN(size);
360
361         while (size > 0) {
362                 const struct paging *paging = pg_structs->root_paging;
363                 page_table_t pt[MAX_PAGE_TABLE_LEVELS];
364                 unsigned long page_size;
365                 pt_entry_t pte;
366                 int n = 0;
367                 int err;
368
369                 /* walk down the page table, saving intermediate tables */
370                 pt[0] = pg_structs->root_table;
371                 while (1) {
372                         pte = paging->get_entry(pt[n], virt);
373                         if (!paging->entry_valid(pte, PAGE_PRESENT_FLAGS))
374                                 break;
375                         if (paging->get_phys(pte, virt) != INVALID_PHYS_ADDR) {
376                                 if (paging->page_size > size) {
377                                         err = split_hugepage(paging, pte, virt,
378                                                              coherent);
379                                         if (err)
380                                                 return err;
381                                 } else
382                                         break;
383                         }
384                         pt[++n] = paging_phys2hvirt(paging->get_next_pt(pte));
385                         paging++;
386                 }
387                 /* advance by page size of current level paging */
388                 page_size = paging->page_size ? paging->page_size : PAGE_SIZE;
389
390                 /* walk up again, clearing entries, releasing empty tables */
391                 while (1) {
392                         paging->clear_entry(pte);
393                         flush_pt_entry(pte, coherent);
394                         if (n == 0 || !paging->page_table_empty(pt[n]))
395                                 break;
396                         page_free(&mem_pool, pt[n], 1);
397                         paging--;
398                         pte = paging->get_entry(pt[--n], virt);
399                 }
400                 if (pg_structs == &hv_paging_structs)
401                         arch_paging_flush_page_tlbs(virt);
402
403                 if (page_size > size)
404                         break;
405                 virt += page_size;
406                 size -= page_size;
407         }
408         return 0;
409 }
410
411 static unsigned long
412 paging_gvirt2gphys(const struct guest_paging_structures *pg_structs,
413                    unsigned long gvirt, unsigned long tmp_page,
414                    unsigned long flags)
415 {
416         unsigned long page_table_gphys = pg_structs->root_table_gphys;
417         const struct paging *paging = pg_structs->root_paging;
418         unsigned long gphys, phys;
419         pt_entry_t pte;
420         int err;
421
422         while (1) {
423                 /* map guest page table */
424                 phys = arch_paging_gphys2phys(this_cpu_data(),
425                                                 page_table_gphys,
426                                                 PAGE_READONLY_FLAGS);
427                 if (phys == INVALID_PHYS_ADDR)
428                         return INVALID_PHYS_ADDR;
429                 err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
430                                     tmp_page, PAGE_READONLY_FLAGS,
431                                     PAGING_NON_COHERENT);
432                 if (err)
433                         return INVALID_PHYS_ADDR;
434
435                 /* evaluate page table entry */
436                 pte = paging->get_entry((page_table_t)tmp_page, gvirt);
437                 if (!paging->entry_valid(pte, flags))
438                         return INVALID_PHYS_ADDR;
439                 gphys = paging->get_phys(pte, gvirt);
440                 if (gphys != INVALID_PHYS_ADDR)
441                         return gphys;
442                 page_table_gphys = paging->get_next_pt(pte);
443                 paging++;
444         }
445 }
446
447 /**
448  * Map guest (cell) pages into the hypervisor address space.
449  * @param pg_structs    Descriptor of the guest paging structures if @c gaddr
450  *                      is a guest-virtual address or @c NULL if it is a
451  *                      guest-physical address.
452  * @param gaddr         Guest address of the first page to be mapped.
453  * @param num           Number of pages to be mapped.
454  * @param flags         Access flags for the hypervisor mapping, see
455  *                      @ref PAGE_FLAGS.
456  *
457  * @return Pointer to first mapped page or @c NULL on error.
458  *
459  * @note The mapping is done only for the calling CPU and must thus only be
460  * used by the very same CPU.
461  *
462  * @note The mapping is only temporary, valid until the next invocation of
463  * page_map_get_guest_pages() on this CPU. It does not require explicit
464  * unmapping when it is no longer needed.
465  */
466 void *paging_get_guest_pages(const struct guest_paging_structures *pg_structs,
467                              unsigned long gaddr, unsigned int num,
468                              unsigned long flags)
469 {
470         unsigned long page_base = TEMPORARY_MAPPING_BASE +
471                 this_cpu_id() * PAGE_SIZE * NUM_TEMPORARY_PAGES;
472         unsigned long phys, gphys, page_virt = page_base;
473         int err;
474
475         if (num > NUM_TEMPORARY_PAGES)
476                 return NULL;
477         while (num-- > 0) {
478                 if (pg_structs)
479                         gphys = paging_gvirt2gphys(pg_structs, gaddr,
480                                                    page_virt, flags);
481                 else
482                         gphys = gaddr;
483
484                 phys = arch_paging_gphys2phys(this_cpu_data(), gphys, flags);
485                 if (phys == INVALID_PHYS_ADDR)
486                         return NULL;
487                 /* map guest page */
488                 err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
489                                     page_virt, flags, PAGING_NON_COHERENT);
490                 if (err)
491                         return NULL;
492                 gaddr += PAGE_SIZE;
493                 page_virt += PAGE_SIZE;
494         }
495         return (void *)page_base;
496 }
497
498 /**
499  * Initialize the page mapping subsystem.
500  *
501  * @return 0 on success, negative error code otherwise.
502  */
503 int paging_init(void)
504 {
505         unsigned long n, per_cpu_pages, config_pages, bitmap_pages, vaddr;
506         int err;
507
508         per_cpu_pages = hypervisor_header.max_cpus *
509                 sizeof(struct per_cpu) / PAGE_SIZE;
510
511         config_pages = PAGES(jailhouse_system_config_size(system_config));
512
513         page_offset = JAILHOUSE_BASE -
514                 system_config->hypervisor_memory.phys_start;
515
516         mem_pool.pages = (system_config->hypervisor_memory.size -
517                 (__page_pool - (u8 *)&hypervisor_header)) / PAGE_SIZE;
518         bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
519
520         if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
521                 return -ENOMEM;
522
523         mem_pool.base_address = __page_pool;
524         mem_pool.used_bitmap =
525                 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
526                                   config_pages * PAGE_SIZE);
527         mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
528         for (n = 0; n < mem_pool.used_pages; n++)
529                 set_bit(n, mem_pool.used_bitmap);
530         mem_pool.flags = PAGE_SCRUB_ON_FREE;
531
532         remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
533         remap_pool.used_pages =
534                 hypervisor_header.max_cpus * NUM_TEMPORARY_PAGES;
535         for (n = 0; n < remap_pool.used_pages; n++)
536                 set_bit(n, remap_pool.used_bitmap);
537
538         arch_paging_init();
539
540         hv_paging_structs.root_paging = hv_paging;
541         hv_paging_structs.root_table = page_alloc(&mem_pool, 1);
542         if (!hv_paging_structs.root_table)
543                 return -ENOMEM;
544
545         /* Replicate hypervisor mapping of Linux */
546         err = paging_create(&hv_paging_structs,
547                              paging_hvirt2phys(&hypervisor_header),
548                              system_config->hypervisor_memory.size,
549                              (unsigned long)&hypervisor_header,
550                              PAGE_DEFAULT_FLAGS, PAGING_NON_COHERENT);
551         if (err)
552                 return err;
553
554         if (system_config->debug_console.flags & JAILHOUSE_MEM_IO) {
555                 vaddr = (unsigned long)hypervisor_header.debug_console_base;
556                 /* check if console overlaps remapping region */
557                 if (vaddr + system_config->debug_console.size >= REMAP_BASE &&
558                     vaddr < REMAP_BASE + remap_pool.pages * PAGE_SIZE)
559                         return trace_error(-EINVAL);
560
561                 err = paging_create(&hv_paging_structs,
562                                     system_config->debug_console.phys_start,
563                                     system_config->debug_console.size, vaddr,
564                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
565                                     PAGING_NON_COHERENT);
566                 if (err)
567                         return err;
568         }
569
570         /* Make sure any remappings to the temporary regions can be performed
571          * without allocations of page table pages. */
572         return paging_create(&hv_paging_structs, 0,
573                              remap_pool.used_pages * PAGE_SIZE,
574                              TEMPORARY_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
575                              PAGING_NON_COHERENT);
576 }
577
578 /**
579  * Dump usage statistic of the page pools.
580  * @param when String that characterizes the associated event.
581  */
582 void paging_dump_stats(const char *when)
583 {
584         printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
585                mem_pool.used_pages, mem_pool.pages,
586                remap_pool.used_pages, remap_pool.pages);
587 }