]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/paging.c
core/driver: Add support for mapping the debug UART from the driver
[jailhouse.git] / hypervisor / paging.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013, 2014
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/paging.h>
14 #include <jailhouse/printk.h>
15 #include <jailhouse/string.h>
16 #include <jailhouse/control.h>
17 #include <asm/bitops.h>
18
19 #define BITS_PER_PAGE           (PAGE_SIZE * 8)
20
21 #define INVALID_PAGE_NR         (~0UL)
22
23 #define PAGE_SCRUB_ON_FREE      0x1
24
25 extern u8 __page_pool[];
26
27 /**
28  * Offset between virtual and physical hypervisor addresses.
29  *
30  * @note Private, use page_map_hvirt2phys() or page_map_phys2hvirt() instead.
31  */
32 unsigned long page_offset;
33
34 /** Page pool containing physical pages for use by the hypervisor. */
35 struct page_pool mem_pool;
36 /** Page pool containing virtual pages for remappings by the hypervisor. */
37 struct page_pool remap_pool = {
38         .base_address = (void *)REMAP_BASE,
39         .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
40 };
41
42 /** Descriptor of the hypervisor paging structures. */
43 struct paging_structures hv_paging_structs;
44
45 /**
46  * Trivial implementation of paging::get_phys (for non-terminal levels)
47  * @param pte See paging::get_phys.
48  * @param virt See paging::get_phys.
49  *
50  * @return @c INVALID_PHYS_ADDR.
51  *
52  * @see paging
53  */
54 unsigned long paging_get_phys_invalid(pt_entry_t pte, unsigned long virt)
55 {
56         return INVALID_PHYS_ADDR;
57 }
58
59 static unsigned long find_next_free_page(struct page_pool *pool,
60                                          unsigned long start)
61 {
62         unsigned long bmp_pos, bmp_val, page_nr;
63         unsigned long start_mask = 0;
64
65         if (start >= pool->pages)
66                 return INVALID_PAGE_NR;
67
68         /*
69          * If we don't start on the beginning of a bitmap word, create a mask
70          * to mark the pages before the start page as (virtually) used.
71          */
72         if (start % BITS_PER_LONG > 0)
73                 start_mask = ~0UL >> (BITS_PER_LONG - (start % BITS_PER_LONG));
74
75         for (bmp_pos = start / BITS_PER_LONG;
76              bmp_pos < pool->pages / BITS_PER_LONG; bmp_pos++) {
77                 bmp_val = pool->used_bitmap[bmp_pos] | start_mask;
78                 start_mask = 0;
79                 if (bmp_val != ~0UL) {
80                         page_nr = ffzl(bmp_val) + bmp_pos * BITS_PER_LONG;
81                         if (page_nr >= pool->pages)
82                                 break;
83                         return page_nr;
84                 }
85         }
86
87         return INVALID_PAGE_NR;
88 }
89
90 /**
91  * Allocate consecutive pages from the specified pool.
92  * @param pool  Page pool to allocate from.
93  * @param num   Number of pages.
94  *
95  * @return Pointer to first page or NULL if allocation failed.
96  *
97  * @see page_free
98  */
99 void *page_alloc(struct page_pool *pool, unsigned int num)
100 {
101         unsigned long start, last, next;
102         unsigned int allocated;
103
104         start = find_next_free_page(pool, 0);
105         if (start == INVALID_PAGE_NR)
106                 return NULL;
107
108 restart:
109         for (allocated = 1, last = start; allocated < num;
110              allocated++, last = next) {
111                 next = find_next_free_page(pool, last + 1);
112                 if (next == INVALID_PAGE_NR)
113                         return NULL;
114                 if (next != last + 1) {
115                         start = next;
116                         goto restart;
117                 }
118         }
119
120         for (allocated = 0; allocated < num; allocated++)
121                 set_bit(start + allocated, pool->used_bitmap);
122
123         pool->used_pages += num;
124
125         return pool->base_address + start * PAGE_SIZE;
126 }
127
128 /**
129  * Release pages to the specified pool.
130  * @param pool  Page pool to release to.
131  * @param page  Address of first page.
132  * @param num   Number of pages.
133  *
134  * @see page_alloc
135  */
136 void page_free(struct page_pool *pool, void *page, unsigned int num)
137 {
138         unsigned long page_nr;
139
140         if (!page)
141                 return;
142
143         while (num-- > 0) {
144                 if (pool->flags & PAGE_SCRUB_ON_FREE)
145                         memset(page, 0, PAGE_SIZE);
146                 page_nr = (page - pool->base_address) / PAGE_SIZE;
147                 clear_bit(page_nr, pool->used_bitmap);
148                 pool->used_pages--;
149                 page += PAGE_SIZE;
150         }
151 }
152
153 /**
154  * Translate virtual to physical address according to given paging structures.
155  * @param pg_structs    Paging structures to use for translation.
156  * @param virt          Virtual address.
157  * @param flags         Access flags that have to be supported by the mapping,
158  *                      see @ref PAGE_FLAGS.
159  *
160  * @return Physical address on success or @c INVALID_PHYS_ADDR if the virtual
161  *         address could not be translated or the requested access is not
162  *         supported by the mapping.
163  *
164  * @see paging_phys2hvirt
165  * @see paging_hvirt2phys
166  * @see arch_paging_gphys2phys
167  */
168 unsigned long paging_virt2phys(const struct paging_structures *pg_structs,
169                                unsigned long virt, unsigned long flags)
170 {
171         const struct paging *paging = pg_structs->root_paging;
172         page_table_t pt = pg_structs->root_table;
173         unsigned long phys;
174         pt_entry_t pte;
175
176         while (1) {
177                 pte = paging->get_entry(pt, virt);
178                 if (!paging->entry_valid(pte, flags))
179                         return INVALID_PHYS_ADDR;
180                 phys = paging->get_phys(pte, virt);
181                 if (phys != INVALID_PHYS_ADDR)
182                         return phys;
183                 pt = paging_phys2hvirt(paging->get_next_pt(pte));
184                 paging++;
185         }
186 }
187
188 static void flush_pt_entry(pt_entry_t pte, enum paging_coherent coherent)
189 {
190         if (coherent == PAGING_COHERENT)
191                 arch_paging_flush_cpu_caches(pte, sizeof(*pte));
192 }
193
194 static int split_hugepage(const struct paging *paging, pt_entry_t pte,
195                           unsigned long virt, enum paging_coherent coherent)
196 {
197         unsigned long phys = paging->get_phys(pte, virt);
198         struct paging_structures sub_structs;
199         unsigned long page_mask, flags;
200
201         if (phys == INVALID_PHYS_ADDR)
202                 return 0;
203
204         page_mask = ~(paging->page_size - 1);
205         phys &= page_mask;
206         virt &= page_mask;
207
208         flags = paging->get_flags(pte);
209
210         sub_structs.root_paging = paging + 1;
211         sub_structs.root_table = page_alloc(&mem_pool, 1);
212         if (!sub_structs.root_table)
213                 return -ENOMEM;
214         paging->set_next_pt(pte, paging_hvirt2phys(sub_structs.root_table));
215         flush_pt_entry(pte, coherent);
216
217         return paging_create(&sub_structs, phys, paging->page_size, virt,
218                              flags, coherent);
219 }
220
221 /**
222  * Create or modify a page map.
223  * @param pg_structs    Descriptor of paging structures to be used.
224  * @param phys          Physical address of the region to be mapped.
225  * @param size          Size of the region.
226  * @param virt          Virtual address the region should be mapped to.
227  * @param flags         Flags describing the permitted access, see
228  *                      @ref PAGE_FLAGS.
229  * @param coherent      Coherency of mapping.
230  *
231  * @return 0 on success, negative error code otherwise.
232  *
233  * @note The function aims at using the largest possible page size for the
234  * mapping but does not consolidate with neighboring mappings.
235  *
236  * @see paging_destroy
237  * @see paging_get_guest_pages
238  */
239 int paging_create(const struct paging_structures *pg_structs,
240                   unsigned long phys, unsigned long size, unsigned long virt,
241                   unsigned long flags, enum paging_coherent coherent)
242 {
243         phys &= PAGE_MASK;
244         virt &= PAGE_MASK;
245         size = PAGE_ALIGN(size);
246
247         while (size > 0) {
248                 const struct paging *paging = pg_structs->root_paging;
249                 page_table_t pt = pg_structs->root_table;
250                 pt_entry_t pte;
251                 int err;
252
253                 while (1) {
254                         pte = paging->get_entry(pt, virt);
255                         if (paging->page_size > 0 &&
256                             paging->page_size <= size &&
257                             ((phys | virt) & (paging->page_size - 1)) == 0) {
258                                 /*
259                                  * We might be overwriting a more fine-grained
260                                  * mapping, so release it first. This cannot
261                                  * fail as we are working along hugepage
262                                  * boundaries.
263                                  */
264                                 if (paging->page_size > PAGE_SIZE)
265                                         paging_destroy(pg_structs, virt,
266                                                        paging->page_size,
267                                                        coherent);
268                                 paging->set_terminal(pte, phys, flags);
269                                 flush_pt_entry(pte, coherent);
270                                 break;
271                         }
272                         if (paging->entry_valid(pte, PAGE_PRESENT_FLAGS)) {
273                                 err = split_hugepage(paging, pte, virt,
274                                                      coherent);
275                                 if (err)
276                                         return err;
277                                 pt = paging_phys2hvirt(
278                                                 paging->get_next_pt(pte));
279                         } else {
280                                 pt = page_alloc(&mem_pool, 1);
281                                 if (!pt)
282                                         return -ENOMEM;
283                                 paging->set_next_pt(pte,
284                                                     paging_hvirt2phys(pt));
285                                 flush_pt_entry(pte, coherent);
286                         }
287                         paging++;
288                 }
289                 if (pg_structs == &hv_paging_structs)
290                         arch_paging_flush_page_tlbs(virt);
291
292                 phys += paging->page_size;
293                 virt += paging->page_size;
294                 size -= paging->page_size;
295         }
296         return 0;
297 }
298
299 /**
300  * Destroy a page map.
301  * @param pg_structs    Descriptor of paging structures to be used.
302  * @param virt          Virtual address the region to be unmapped.
303  * @param size          Size of the region.
304  * @param coherent      Coherency of mapping.
305  *
306  * @return 0 on success, negative error code otherwise.
307  *
308  * @note If required, this function tries to break up hugepages if they should
309  * be unmapped only partially. This may require allocating additional pages for
310  * the paging structures, thus can fail. Unmap request that covers only full
311  * pages never fail.
312  *
313  * @see paging_create
314  */
315 int paging_destroy(const struct paging_structures *pg_structs,
316                    unsigned long virt, unsigned long size,
317                    enum paging_coherent coherent)
318 {
319         size = PAGE_ALIGN(size);
320
321         while (size > 0) {
322                 const struct paging *paging = pg_structs->root_paging;
323                 page_table_t pt[MAX_PAGE_TABLE_LEVELS];
324                 unsigned long page_size;
325                 pt_entry_t pte;
326                 int n = 0;
327                 int err;
328
329                 /* walk down the page table, saving intermediate tables */
330                 pt[0] = pg_structs->root_table;
331                 while (1) {
332                         pte = paging->get_entry(pt[n], virt);
333                         if (!paging->entry_valid(pte, PAGE_PRESENT_FLAGS))
334                                 break;
335                         if (paging->get_phys(pte, virt) != INVALID_PHYS_ADDR) {
336                                 if (paging->page_size > size) {
337                                         err = split_hugepage(paging, pte, virt,
338                                                              coherent);
339                                         if (err)
340                                                 return err;
341                                 } else
342                                         break;
343                         }
344                         pt[++n] = paging_phys2hvirt(paging->get_next_pt(pte));
345                         paging++;
346                 }
347                 /* advance by page size of current level paging */
348                 page_size = paging->page_size ? paging->page_size : PAGE_SIZE;
349
350                 /* walk up again, clearing entries, releasing empty tables */
351                 while (1) {
352                         paging->clear_entry(pte);
353                         flush_pt_entry(pte, coherent);
354                         if (n == 0 || !paging->page_table_empty(pt[n]))
355                                 break;
356                         page_free(&mem_pool, pt[n], 1);
357                         paging--;
358                         pte = paging->get_entry(pt[--n], virt);
359                 }
360                 if (pg_structs == &hv_paging_structs)
361                         arch_paging_flush_page_tlbs(virt);
362
363                 if (page_size > size)
364                         break;
365                 virt += page_size;
366                 size -= page_size;
367         }
368         return 0;
369 }
370
371 static unsigned long
372 paging_gvirt2gphys(const struct guest_paging_structures *pg_structs,
373                    unsigned long gvirt, unsigned long tmp_page,
374                    unsigned long flags)
375 {
376         unsigned long page_table_gphys = pg_structs->root_table_gphys;
377         const struct paging *paging = pg_structs->root_paging;
378         unsigned long gphys, phys;
379         pt_entry_t pte;
380         int err;
381
382         while (1) {
383                 /* map guest page table */
384                 phys = arch_paging_gphys2phys(this_cpu_data(),
385                                                 page_table_gphys,
386                                                 PAGE_READONLY_FLAGS);
387                 if (phys == INVALID_PHYS_ADDR)
388                         return INVALID_PHYS_ADDR;
389                 err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
390                                     tmp_page, PAGE_READONLY_FLAGS,
391                                     PAGING_NON_COHERENT);
392                 if (err)
393                         return INVALID_PHYS_ADDR;
394
395                 /* evaluate page table entry */
396                 pte = paging->get_entry((page_table_t)tmp_page, gvirt);
397                 if (!paging->entry_valid(pte, flags))
398                         return INVALID_PHYS_ADDR;
399                 gphys = paging->get_phys(pte, gvirt);
400                 if (gphys != INVALID_PHYS_ADDR)
401                         return gphys;
402                 page_table_gphys = paging->get_next_pt(pte);
403                 paging++;
404         }
405 }
406
407 /**
408  * Map guest (cell) pages into the hypervisor address space.
409  * @param pg_structs    Descriptor of the guest paging structures.
410  * @param gaddr         Guest-physical address to the first page to be mapped.
411  * @param num           Number of pages to be mapped.
412  * @param flags         Access flags for the hypervisor mapping, see
413  *                      @ref PAGE_FLAGS.
414  *
415  * @return Pointer to first mapped page or @c NULL on error.
416  *
417  * @note The mapping is done only for the calling CPU and must thus only be
418  * used by the very same CPU.
419  *
420  * @note The mapping is only temporary, valid until the next invocation of
421  * page_map_get_guest_pages() on this CPU. It does not require explicit
422  * unmapping when it is no longer needed.
423  */
424 void *paging_get_guest_pages(const struct guest_paging_structures *pg_structs,
425                              unsigned long gaddr, unsigned int num,
426                              unsigned long flags)
427 {
428         unsigned long page_base = TEMPORARY_MAPPING_BASE +
429                 this_cpu_id() * PAGE_SIZE * NUM_TEMPORARY_PAGES;
430         unsigned long phys, gphys, page_virt = page_base;
431         int err;
432
433         if (num > NUM_TEMPORARY_PAGES)
434                 return NULL;
435         while (num-- > 0) {
436                 if (pg_structs)
437                         gphys = paging_gvirt2gphys(pg_structs, gaddr,
438                                                    page_virt, flags);
439                 else
440                         gphys = gaddr;
441
442                 phys = arch_paging_gphys2phys(this_cpu_data(), gphys, flags);
443                 if (phys == INVALID_PHYS_ADDR)
444                         return NULL;
445                 /* map guest page */
446                 err = paging_create(&hv_paging_structs, phys, PAGE_SIZE,
447                                     page_virt, flags, PAGING_NON_COHERENT);
448                 if (err)
449                         return NULL;
450                 gaddr += PAGE_SIZE;
451                 page_virt += PAGE_SIZE;
452         }
453         return (void *)page_base;
454 }
455
456 /**
457  * Initialize the page mapping subsystem.
458  *
459  * @return 0 on success, negative error code otherwise.
460  */
461 int paging_init(void)
462 {
463         unsigned long n, per_cpu_pages, config_pages, bitmap_pages, vaddr;
464         int err;
465
466         per_cpu_pages = hypervisor_header.max_cpus *
467                 sizeof(struct per_cpu) / PAGE_SIZE;
468
469         system_config = (struct jailhouse_system *)
470                 (__page_pool + per_cpu_pages * PAGE_SIZE);
471         config_pages = PAGES(jailhouse_system_config_size(system_config));
472
473         page_offset = JAILHOUSE_BASE -
474                 system_config->hypervisor_memory.phys_start;
475
476         mem_pool.pages = (system_config->hypervisor_memory.size -
477                 (__page_pool - (u8 *)&hypervisor_header)) / PAGE_SIZE;
478         bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
479
480         if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
481                 goto error_nomem;
482
483         mem_pool.base_address = __page_pool;
484         mem_pool.used_bitmap =
485                 (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
486                                   config_pages * PAGE_SIZE);
487         mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
488         for (n = 0; n < mem_pool.used_pages; n++)
489                 set_bit(n, mem_pool.used_bitmap);
490         mem_pool.flags = PAGE_SCRUB_ON_FREE;
491
492         remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
493         remap_pool.used_pages =
494                 hypervisor_header.max_cpus * NUM_TEMPORARY_PAGES;
495         for (n = 0; n < remap_pool.used_pages; n++)
496                 set_bit(n, remap_pool.used_bitmap);
497
498         arch_paging_init();
499
500         hv_paging_structs.root_paging = hv_paging;
501         hv_paging_structs.root_table = page_alloc(&mem_pool, 1);
502         if (!hv_paging_structs.root_table)
503                 goto error_nomem;
504
505         /* Replicate hypervisor mapping of Linux */
506         err = paging_create(&hv_paging_structs,
507                              paging_hvirt2phys(&hypervisor_header),
508                              system_config->hypervisor_memory.size,
509                              (unsigned long)&hypervisor_header,
510                              PAGE_DEFAULT_FLAGS, PAGING_NON_COHERENT);
511         if (err)
512                 goto error_nomem;
513
514         if (system_config->debug_uart.flags & JAILHOUSE_MEM_IO) {
515                 vaddr = (unsigned long)hypervisor_header.debug_uart_base;
516                 if (vaddr + system_config->debug_uart.size >= REMAP_BASE &&
517                     vaddr < REMAP_BASE + remap_pool.pages * PAGE_SIZE) {
518                         printk("FATAL: UART overlaps remapping region\n");
519                         return -EINVAL;
520                 }
521                 err = paging_create(&hv_paging_structs,
522                                     system_config->debug_uart.phys_start,
523                                     system_config->debug_uart.size, vaddr,
524                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
525                                     PAGING_NON_COHERENT);
526                 if (err)
527                         goto error_nomem;
528         }
529
530         /* Make sure any remappings to the temporary regions can be performed
531          * without allocations of page table pages. */
532         err = paging_create(&hv_paging_structs, 0,
533                             remap_pool.used_pages * PAGE_SIZE,
534                             TEMPORARY_MAPPING_BASE, PAGE_NONPRESENT_FLAGS,
535                             PAGING_NON_COHERENT);
536         if (err)
537                 goto error_nomem;
538
539         return 0;
540
541 error_nomem:
542         printk("FATAL: page pool much too small\n");
543         return -ENOMEM;
544 }
545
546 /**
547  * Dump usage statistic of the page pools.
548  * @param when String that characterizes the associated event.
549  */
550 void paging_dump_stats(const char *when)
551 {
552         printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
553                mem_pool.used_pages, mem_pool.pages,
554                remap_pool.used_pages, remap_pool.pages);
555 }