]> rtime.felk.cvut.cz Git - jailhouse.git/commitdiff
core: Switch to table-driven page table construction and interpretation
authorJan Kiszka <jan.kiszka@siemens.com>
Tue, 4 Feb 2014 21:35:04 +0000 (22:35 +0100)
committerJan Kiszka <jan.kiszka@siemens.com>
Wed, 5 Feb 2014 10:01:26 +0000 (11:01 +0100)
Switch page table creation and interpretation to a new, fully
table-driven scheme. It is much more regular and also more flexible when
it comes to support more paging modes, specifically on x86 (32-bit
paging, PAE etc.) in order to extend MMIO support. It is also laying the
foundation creating hugepages, which will reduce TLB pressure and memory
usage. So far only reading of hugepages is supported.

A paging mode is now define via an array of paging structure. An array
entry represents a page table level, starting with the root level. Each
paging structure contains a number of handlers to set or get entries at
the corresponding level. It also contains a page size value which is
non-zero in case the page table level support terminal entries that
point to a physical page address. This implies that the final element in
the paging structure array must have a non-zero page size field.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
14 files changed:
hypervisor/arch/arm/include/asm/paging.h
hypervisor/arch/arm/include/asm/paging_modes.h [new file with mode: 0644]
hypervisor/arch/arm/setup.c
hypervisor/arch/x86/Makefile
hypervisor/arch/x86/include/asm/paging.h
hypervisor/arch/x86/include/asm/paging_modes.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/vmx.h
hypervisor/arch/x86/include/asm/vtd.h
hypervisor/arch/x86/mmio.c
hypervisor/arch/x86/paging.c [new file with mode: 0644]
hypervisor/arch/x86/vmx.c
hypervisor/arch/x86/vtd.c
hypervisor/include/jailhouse/paging.h
hypervisor/paging.c

index 00bcc11613a983c4c470d88412247d4fd127229b..97e7aae5c76b8b3410a68b048d28b7f5d92f61aa 100644 (file)
 
 #define PAGE_SIZE              4096
 #define PAGE_MASK              ~(PAGE_SIZE - 1)
+#define PAGE_OFFS_MASK         (PAGE_SIZE - 1)
 
 #define PAGE_DIR_LEVELS                4
-
-#define PAGE_TABLE_OFFS_MASK   0x00000ff8UL
-#define PAGE_ADDR_MASK         0xfffff000UL
-#define PAGE_OFFS_MASK         0x00000fffUL
-#define HUGEPAGE_ADDR_MASK     0xffe00000UL
-#define HUGEPAGE_OFFS_MASK     0x001fffffUL
+#define MAX_PAGE_DIR_LEVELS    4
 
 #define PAGE_FLAG_PRESENT      0x01
 #define PAGE_FLAG_RW           0x02
-#define PAGE_FLAG_SUPERVISOR   0x04
 #define PAGE_FLAG_UNCACHED     0x10
 
-#define PAGE_DEFAULT_FLAGS     (PAGE_FLAG_PRESENT | PAGE_FLAG_RW | \
-                                PAGE_FLAG_SUPERVISOR)
-#define PAGE_READONLY_FLAGS    (PAGE_FLAG_PRESENT | PAGE_FLAG_SUPERVISOR)
+#define PAGE_DEFAULT_FLAGS     (PAGE_FLAG_PRESENT | PAGE_FLAG_RW)
+#define PAGE_READONLY_FLAGS    PAGE_FLAG_PRESENT
 #define PAGE_NONPRESENT_FLAGS  0
 
 #define INVALID_PHYS_ADDR      (~0UL)
 
 #ifndef __ASSEMBLY__
 
-typedef unsigned long pgd_t;
-typedef unsigned long pud_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pte_t;
-
-static inline bool pgd_valid(pgd_t *pgd)
-{
-       return *pgd & 1;
-}
-
-static inline pgd_t *pgd_offset(pgd_t *page_table, unsigned long addr)
-{
-       return NULL;
-}
-
-static inline void set_pgd(pgd_t *pgd, unsigned long addr, unsigned long flags)
-{
-       *pgd = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pgd(pgd_t *pgd)
-{
-       *pgd = 0;
-}
-
-static inline bool pud_valid(pud_t *pud)
-{
-       return *pud & 1;
-}
-
-static inline pud_t *pud4l_offset(pgd_t *pgd, unsigned long page_table_offset,
-                                 unsigned long addr)
-{
-       return NULL;
-}
-
-static inline pud_t *pud3l_offset(pgd_t *page_table, unsigned long addr)
-{
-       return NULL;
-}
-
-static inline void set_pud(pud_t *pud, unsigned long addr, unsigned long flags)
-{
-       *pud = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pud(pud_t *pud)
-{
-       *pud = 0;
-}
-
-static inline bool pmd_valid(pmd_t *pmd)
-{
-       return *pmd & 1;
-}
-
-static inline bool pmd_is_hugepage(pmd_t *pmd)
-{
-       return *pmd & (1 << 7);
-}
-
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long page_table_offset,
-                               unsigned long addr)
-{
-       return NULL;
-}
-
-static inline void set_pmd(pmd_t *pmd, unsigned long addr, unsigned long flags)
-{
-       *pmd = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pmd(pmd_t *pmd)
-{
-       *pmd = 0;
-}
-
-static inline bool pte_valid(pte_t *pte)
-{
-       return *pte & 1;
-}
-
-static inline pte_t *pte_offset(pmd_t *pmd, unsigned long page_table_offset,
-                               unsigned long addr)
-{
-       return NULL;
-}
-
-static inline void set_pte(pte_t *pte, unsigned long addr, unsigned long flags)
-{
-       *pte = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pte(pte_t *pte)
-{
-       *pte = 0;
-}
-
-static inline unsigned long phys_address(pte_t *pte, unsigned long addr)
-{
-       return (*pte & PAGE_ADDR_MASK) + (addr & PAGE_OFFS_MASK);
-}
-
-static inline unsigned long phys_address_hugepage(pmd_t *pmd,
-                                                 unsigned long addr)
-{
-       return (*pmd & HUGEPAGE_ADDR_MASK) + (addr & HUGEPAGE_OFFS_MASK);
-}
-
-static inline bool pud_empty(pgd_t *pgd, unsigned long page_table_offset)
-{
-       pud_t *pud = (pud_t *)((*pgd & PAGE_ADDR_MASK) + page_table_offset);
-       int n;
-
-       for (n = 0; n < PAGE_SIZE / sizeof(pud_t); n++, pud++)
-               if (pud_valid(pud))
-                       return false;
-       return true;
-}
-
-static inline bool pmd_empty(pud_t *pud, unsigned long page_table_offset)
-{
-       pmd_t *pmd = (pmd_t *)((*pud & PAGE_ADDR_MASK) + page_table_offset);
-       int n;
-
-       for (n = 0; n < PAGE_SIZE / sizeof(pmd_t); n++, pmd++)
-               if (pmd_valid(pmd))
-                       return false;
-       return true;
-}
-
-static inline bool pt_empty(pmd_t *pmd, unsigned long page_table_offset)
-{
-       pte_t *pte = (pte_t *)((*pmd & PAGE_ADDR_MASK) + page_table_offset);
-       int n;
-
-       for (n = 0; n < PAGE_SIZE / sizeof(pte_t); n++, pte++)
-               if (pte_valid(pte))
-                       return false;
-       return true;
-}
+typedef unsigned long *pt_entry_t;
 
 static inline void arch_tlb_flush_page(unsigned long addr)
 {
diff --git a/hypervisor/arch/arm/include/asm/paging_modes.h b/hypervisor/arch/arm/include/asm/paging_modes.h
new file mode 100644 (file)
index 0000000..932fb6e
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2014
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/paging.h>
+
+extern const struct paging arm_paging[];
+
+#define hv_paging      arm_paging
index cc724393619eaa3045a2cd4517692220ba8e0f9c..6d43e935bbe2257ac1a160cc8ab735c1b764388f 100644 (file)
@@ -63,3 +63,5 @@ void arch_shutdown(void) {}
 unsigned long arch_page_map_gphys2phys(struct per_cpu *cpu_data,
                                       unsigned long gphys)
 { return INVALID_PHYS_ADDR; }
+
+const struct paging arm_paging[1];
index 8c228a382b53a1e4515024d8a68e4fbc9b80ed9d..d8e41f36583f24da57301ddf06a9cf435752e22c 100644 (file)
@@ -13,4 +13,4 @@
 always := built-in.o
 
 obj-y := apic.o dbg-write.o entry.o setup.o fault.o vmx.o control.o mmio.o \
-        ../../acpi.o vtd.o
+        ../../acpi.o vtd.o paging.o
index 9b23871cd332b30f2382d6adb12333473bcba2a7..116d05a96ca58c64847f5feb9afa5492a5a33867 100644 (file)
 
 #define PAGE_SIZE              4096
 #define PAGE_MASK              ~(PAGE_SIZE - 1)
+#define PAGE_OFFS_MASK         (PAGE_SIZE - 1)
 
 #define PAGE_DIR_LEVELS                4
-
-#define PAGE_TABLE_OFFS_MASK   0x0000000000000ff8UL
-#define PAGE_ADDR_MASK         0x000ffffffffff000UL
-#define PAGE_OFFS_MASK         0x0000000000000fffUL
-#define HUGEPAGE_ADDR_MASK     0x000fffffffe00000UL
-#define HUGEPAGE_OFFS_MASK     0x00000000001fffffUL
+#define MAX_PAGE_DIR_LEVELS    4
 
 #define PAGE_FLAG_PRESENT      0x01
 #define PAGE_FLAG_RW           0x02
 
 #ifndef __ASSEMBLY__
 
-typedef unsigned long pgd_t;
-typedef unsigned long pud_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pte_t;
-
-static inline bool pgd_valid(pgd_t *pgd)
-{
-       return *pgd & 1;
-}
-
-static inline pgd_t *pgd_offset(pgd_t *page_table, unsigned long addr)
-{
-       return (pgd_t *)((unsigned long)page_table +
-                        ((addr >> 36) & PAGE_TABLE_OFFS_MASK));
-}
-
-static inline void set_pgd(pgd_t *pgd, unsigned long addr, unsigned long flags)
-{
-       *pgd = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pgd(pgd_t *pgd)
-{
-       *pgd = 0;
-}
-
-static inline bool pud_valid(pud_t *pud)
-{
-       return *pud & 1;
-}
-
-static inline pud_t *pud4l_offset(pgd_t *pgd, unsigned long page_table_offset,
-                                 unsigned long addr)
-{
-       unsigned long pud = (*pgd & PAGE_ADDR_MASK) +
-               ((addr >> 27) & PAGE_TABLE_OFFS_MASK);
-
-       return (pud_t *)(pud + page_table_offset);
-}
-
-static inline pud_t *pud3l_offset(pgd_t *page_table, unsigned long addr)
-{
-       return (pud_t *)((unsigned long)page_table +
-                        ((addr >> 27) & PAGE_TABLE_OFFS_MASK));
-}
-
-static inline void set_pud(pud_t *pud, unsigned long addr, unsigned long flags)
-{
-       *pud = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pud(pud_t *pud)
-{
-       *pud = 0;
-}
-
-static inline bool pmd_valid(pmd_t *pmd)
-{
-       return *pmd & 1;
-}
-
-static inline bool pmd_is_hugepage(pmd_t *pmd)
-{
-       return *pmd & (1 << 7);
-}
-
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long page_table_offset,
-                               unsigned long addr)
-{
-       unsigned long pmd = (*pud & PAGE_ADDR_MASK) +
-               ((addr >> 18) & PAGE_TABLE_OFFS_MASK);
-
-       return (pmd_t *)(pmd + page_table_offset);
-}
-
-static inline void set_pmd(pmd_t *pmd, unsigned long addr, unsigned long flags)
-{
-       *pmd = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pmd(pmd_t *pmd)
-{
-       *pmd = 0;
-}
-
-static inline bool pte_valid(pte_t *pte)
-{
-       return *pte & 1;
-}
-
-static inline pte_t *pte_offset(pmd_t *pmd, unsigned long page_table_offset,
-                               unsigned long addr)
-{
-       unsigned long pte = (*pmd & PAGE_ADDR_MASK) +
-               ((addr >> 9) & PAGE_TABLE_OFFS_MASK);
-
-       return (pte_t *)(pte + page_table_offset);
-}
-
-static inline void set_pte(pte_t *pte, unsigned long addr, unsigned long flags)
-{
-       *pte = (addr & PAGE_ADDR_MASK) | flags;
-}
-
-static inline void clear_pte(pte_t *pte)
-{
-       *pte = 0;
-}
-
-static inline unsigned long phys_address(pte_t *pte, unsigned long addr)
-{
-       return (*pte & PAGE_ADDR_MASK) + (addr & PAGE_OFFS_MASK);
-}
-
-static inline unsigned long phys_address_hugepage(pmd_t *pmd,
-                                                 unsigned long addr)
-{
-       return (*pmd & HUGEPAGE_ADDR_MASK) + (addr & HUGEPAGE_OFFS_MASK);
-}
-
-static inline bool pud_empty(pgd_t *pgd, unsigned long page_table_offset)
-{
-       pud_t *pud = (pud_t *)((*pgd & PAGE_ADDR_MASK) + page_table_offset);
-       int n;
-
-       for (n = 0; n < PAGE_SIZE / sizeof(pud_t); n++, pud++)
-               if (pud_valid(pud))
-                       return false;
-       return true;
-}
-
-static inline bool pmd_empty(pud_t *pud, unsigned long page_table_offset)
-{
-       pmd_t *pmd = (pmd_t *)((*pud & PAGE_ADDR_MASK) + page_table_offset);
-       int n;
-
-       for (n = 0; n < PAGE_SIZE / sizeof(pmd_t); n++, pmd++)
-               if (pmd_valid(pmd))
-                       return false;
-       return true;
-}
-
-static inline bool pt_empty(pmd_t *pmd, unsigned long page_table_offset)
-{
-       pte_t *pte = (pte_t *)((*pmd & PAGE_ADDR_MASK) + page_table_offset);
-       int n;
-
-       for (n = 0; n < PAGE_SIZE / sizeof(pte_t); n++, pte++)
-               if (pte_valid(pte))
-                       return false;
-       return true;
-}
+typedef unsigned long *pt_entry_t;
 
 static inline void x86_tlb_flush_all(void)
 {
diff --git a/hypervisor/arch/x86/include/asm/paging_modes.h b/hypervisor/arch/x86/include/asm/paging_modes.h
new file mode 100644 (file)
index 0000000..f7268f4
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2014
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/paging.h>
+
+extern const struct paging x86_64_paging[];
+
+#define hv_paging      x86_64_paging
index e261b4987797c7be9d44db6ab3163eb15d3632c9..dac3678fda7c9d98e50bb7324a39d0524365ec9f 100644 (file)
@@ -255,6 +255,8 @@ enum vmcs_field {
 #define EXIT_REASON_XSETBV                     55
 #define EXIT_REASON_INVPCID                    58
 
+#define EPT_PAGE_DIR_LEVELS                    4
+
 #define EPT_FLAG_READ                          0x001
 #define EPT_FLAG_WRITE                         0x002
 #define EPT_FLAG_EXECUTE                       0x004
index f6864f645df268d320e47623c54367fe0b6d402f..76c8908d9b76aec0cfb037d1f16a1652c7a85c74 100644 (file)
@@ -65,6 +65,8 @@ struct vtd_entry {
 #define VTD_PAGE_READ                  0x00000001
 #define VTD_PAGE_WRITE                 0x00000002
 
+#define VTD_MAX_PAGE_DIR_LEVELS                4
+
 #define VTD_CAP_REG                    0x08
 # define VTD_CAP_NUM_DID_MASK          0x00000007
 # define VTD_CAP_CM                    0x00000080
index 6b3a644041624e9dff43c3e86c625b197b4c04f4..bdeb22ddbe3b52d39b29cc5348d1f2aa5ca94c62 100644 (file)
@@ -35,7 +35,8 @@ static u8 *map_code_page(struct per_cpu *cpu_data, unsigned long pc,
         * and we have to map a new one now. */
        if (current_page && ((pc & ~PAGE_MASK) != 0))
                return current_page;
-       return page_map_get_guest_page(cpu_data, page_table_addr, pc,
+       return page_map_get_guest_page(cpu_data, x86_64_paging,
+                                      page_table_addr, pc,
                                       PAGE_READONLY_FLAGS);
 }
 
diff --git a/hypervisor/arch/x86/paging.c b/hypervisor/arch/x86/paging.c
new file mode 100644 (file)
index 0000000..53d6f49
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2014
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/paging.h>
+
+#define X86_64_FLAG_HUGEPAGE   0x80
+
+static bool x86_64_entry_valid(pt_entry_t pte)
+{
+       return *pte & 1;
+}
+
+static unsigned long x86_64_get_flags(pt_entry_t pte)
+{
+       return *pte & 0x7f;
+}
+
+static void x86_64_set_next_pt(pt_entry_t pte, unsigned long next_pt)
+{
+       *pte = (next_pt & 0x000ffffffffff000UL) | PAGE_DEFAULT_FLAGS;
+}
+
+static void x86_64_clear_entry(pt_entry_t pte)
+{
+       *pte = 0;
+}
+
+static bool x86_64_page_table_empty(page_table_t page_table)
+{
+       pt_entry_t pte;
+       int n;
+
+       for (n = 0, pte = page_table; n < PAGE_SIZE / sizeof(u64); n++, pte++)
+               if (x86_64_entry_valid(pte))
+                       return false;
+       return true;
+}
+
+static pt_entry_t x86_64_get_entry_l4(page_table_t page_table,
+                                     unsigned long virt)
+{
+       return &page_table[(virt >> 39) & 0x1ff];
+}
+
+static pt_entry_t x86_64_get_entry_l3(page_table_t page_table,
+                                     unsigned long virt)
+{
+       return &page_table[(virt >> 30) & 0x1ff];
+}
+
+static pt_entry_t x86_64_get_entry_l2(page_table_t page_table,
+                                     unsigned long virt)
+{
+       return &page_table[(virt >> 21) & 0x1ff];
+}
+
+static pt_entry_t x86_64_get_entry_l1(page_table_t page_table,
+                                     unsigned long virt)
+{
+       return &page_table[(virt >> 12) & 0x1ff];
+}
+
+static void x86_64_set_terminal_l1(pt_entry_t pte, unsigned long phys,
+                                  unsigned long flags)
+{
+       *pte = (phys & 0x000ffffffffff000UL) | flags;
+}
+
+static unsigned long x86_64_get_phys_l3(pt_entry_t pte, unsigned long virt)
+{
+       if (!(*pte & X86_64_FLAG_HUGEPAGE))
+               return INVALID_PHYS_ADDR;
+       return (*pte & 0x000fffffc0000000UL) |
+              (virt & 0x000000003fffffffUL);
+}
+
+static unsigned long x86_64_get_phys_l2(pt_entry_t pte, unsigned long virt)
+{
+       if (!(*pte & X86_64_FLAG_HUGEPAGE))
+               return INVALID_PHYS_ADDR;
+       return (*pte & 0x000fffffffe00000UL) |
+              (virt & 0x00000000001fffffUL);
+}
+
+static unsigned long x86_64_get_phys_l1(pt_entry_t pte, unsigned long virt)
+{
+       return (*pte & 0x000ffffffffff000UL) |
+              (virt & 0x0000000000000fffUL);
+}
+
+static unsigned long x86_64_get_next_pt_l4(pt_entry_t pte)
+{
+       return *pte & 0x000ffffffffff000UL;
+}
+
+static unsigned long x86_64_get_next_pt_l23(pt_entry_t pte)
+{
+       return *pte & 0x000ffffffffff000UL;
+}
+
+#define X86_64_PAGING_COMMON                                   \
+       .entry_valid            = x86_64_entry_valid,           \
+       .get_flags              = x86_64_get_flags,             \
+       .set_next_pt            = x86_64_set_next_pt,           \
+       .clear_entry            = x86_64_clear_entry,           \
+       .page_table_empty       = x86_64_page_table_empty
+
+const struct paging x86_64_paging[] = {
+       {
+               X86_64_PAGING_COMMON,
+               .get_entry      = x86_64_get_entry_l4,
+               /* set_terminal not valid */
+               .get_phys       = page_map_get_phys_invalid,
+               .get_next_pt    = x86_64_get_next_pt_l4,
+       },
+       {
+               X86_64_PAGING_COMMON,
+               .get_entry      = x86_64_get_entry_l3,
+               /* set_terminal not valid */
+               .get_phys       = x86_64_get_phys_l3,
+               .get_next_pt    = x86_64_get_next_pt_l23,
+       },
+       {
+               X86_64_PAGING_COMMON,
+               .get_entry      = x86_64_get_entry_l2,
+               /* set_terminal not valid */
+               .get_phys       = x86_64_get_phys_l2,
+               .get_next_pt    = x86_64_get_next_pt_l23,
+       },
+       {
+               .page_size      = PAGE_SIZE,
+               X86_64_PAGING_COMMON,
+               .get_entry      = x86_64_get_entry_l1,
+               .set_terminal   = x86_64_set_terminal_l1,
+               .get_phys       = x86_64_get_phys_l1,
+               /* get_next_pt not valid */
+       },
+};
index 1002fdb04eef2b0d15136ef4749ffe7938ad3213..63f552080cc429979218af57785915f231196762 100644 (file)
@@ -56,6 +56,7 @@ static u8 __attribute__((aligned(PAGE_SIZE))) msr_bitmap[][0x2000/8] = {
        },
 };
 static u8 __attribute__((aligned(PAGE_SIZE))) apic_access_page[PAGE_SIZE];
+static struct paging ept_paging[EPT_PAGE_DIR_LEVELS];
 
 static unsigned int vmx_true_msr_offs;
 
@@ -147,8 +148,21 @@ static bool vmcs_write32(unsigned long field, u32 value)
        return vmcs_write64(field, value);
 }
 
+static void ept_set_next_pt(pt_entry_t pte, unsigned long next_pt)
+{
+       *pte = (next_pt & 0x000ffffffffff000UL) | EPT_FLAG_READ |
+               EPT_FLAG_WRITE | EPT_FLAG_EXECUTE;
+}
+
 void vmx_init(void)
 {
+       unsigned int n;
+
+       /* derive ept_paging from very similar x86_64_paging */
+       memcpy(ept_paging, x86_64_paging, sizeof(ept_paging));
+       for (n = 0; n < EPT_PAGE_DIR_LEVELS; n++)
+               ept_paging[n].set_next_pt = ept_set_next_pt;
+
        if (!using_x2apic)
                return;
 
@@ -206,6 +220,7 @@ int vmx_cell_init(struct cell *cell)
        u32 size;
 
        /* build root cell EPT */
+       cell->vmx.ept_structs.root_paging = ept_paging;
        cell->vmx.ept_structs.root_table = page_alloc(&mem_pool, 1);
        if (!cell->vmx.ept_structs.root_table)
                return -ENOMEM;
@@ -915,7 +930,9 @@ static bool vmx_handle_apic_access(struct registers *guest_regs,
                if (offset & 0x00f)
                        break;
 
-               page_table_addr = vmcs_read64(GUEST_CR3) & PAGE_ADDR_MASK;
+               // FIXME: retrieve actual guest paging mode!
+               page_table_addr =
+                       vmcs_read64(GUEST_CR3) & 0x000ffffffffff000UL;
 
                inst_len = apic_mmio_access(guest_regs, cpu_data,
                                            vmcs_read64(GUEST_RIP),
index e06a60d63870b1e55ce668c9a7cfc20c8d34f2c2..576bba21165b27750984b16a8203e0d91a8d736c 100644 (file)
 #include <jailhouse/mmio.h>
 #include <jailhouse/paging.h>
 #include <jailhouse/printk.h>
+#include <jailhouse/string.h>
 #include <asm/vtd.h>
 
 /* TODO: Support multiple segments */
 static struct vtd_entry __attribute__((aligned(PAGE_SIZE)))
        root_entry_table[256];
+static struct paging vtd_paging[VTD_MAX_PAGE_DIR_LEVELS];
 static void *dmar_reg_base;
 static unsigned int dmar_units;
 static unsigned int dmar_pt_levels;
@@ -61,11 +63,17 @@ static void vtd_flush_domain_caches(unsigned int did)
                                      iotlb_scope);
 }
 
+static void vtd_set_next_pt(pt_entry_t pte, unsigned long next_pt)
+{
+       *pte = (next_pt & 0x000ffffffffff000UL) | VTD_PAGE_READ |
+               VTD_PAGE_WRITE;
+}
+
 int vtd_init(void)
 {
+       unsigned int pt_levels, num_did, n;
        const struct acpi_dmar_table *dmar;
        const struct acpi_dmar_drhd *drhd;
-       unsigned int pt_levels, num_did;
        void *reg_base = NULL;
        unsigned long offset;
        unsigned long caps;
@@ -148,6 +156,15 @@ int vtd_init(void)
        } while (offset < dmar->header.length &&
                 drhd->header.type == ACPI_DMAR_DRHD);
 
+       /*
+        * Derive vdt_paging from very similar x86_64_paging,
+        * replicating 0..3 for 4 levels and 1..3 for 3 levels.
+        */
+       memcpy(vtd_paging, &x86_64_paging[4 - dmar_pt_levels],
+              sizeof(struct paging) * dmar_pt_levels);
+       for (n = 0; n < dmar_pt_levels; n++)
+               vtd_paging[n].set_next_pt = vtd_set_next_pt;
+
        return 0;
 }
 
@@ -203,6 +220,7 @@ int vtd_cell_init(struct cell *cell)
        if (cell->id >= dmar_num_did)
                return -ERANGE;
 
+       cell->vtd.pg_structs.root_paging = vtd_paging;
        cell->vtd.pg_structs.root_table = page_alloc(&mem_pool, 1);
        if (!cell->vtd.pg_structs.root_table)
                return -ENOMEM;
index 6898b859387aed03f9734dfc331f7304c3d297c2..6c36a14f5656310aa9471f48025c20f07f02e8c3 100644 (file)
@@ -37,17 +37,56 @@ enum page_map_coherent {
        PAGE_MAP_NON_COHERENT,
 };
 
-typedef pgd_t *page_table_t;
+typedef pt_entry_t page_table_t;
+
+struct paging {
+       /** Page size of terminal entries in this level or 0 if none are
+        * supported. */
+       unsigned int page_size;
+
+       /** Get entry in given table corresponding to virt address. */
+       pt_entry_t (*get_entry)(page_table_t page_table, unsigned long virt);
+
+       /** Returns true if entry is a valid (terminal or non-terminal). */
+       bool (*entry_valid)(pt_entry_t pte);
+
+       /** Set terminal entry to physical address and access flags. */
+       void (*set_terminal)(pt_entry_t pte, unsigned long phys,
+                            unsigned long flags);
+       /** Extract physical address from given entry. If entry is not
+        * terminal, INVALID_PHYS_ADDR is returned. */
+       unsigned long (*get_phys)(pt_entry_t pte, unsigned long virt);
+       /** Extract access flags from given entry. Only valid for terminal
+        * entries. */
+       unsigned long (*get_flags)(pt_entry_t pte);
+
+       /** Set entry to physical address of next-level page table. */
+       void (*set_next_pt)(pt_entry_t pte, unsigned long next_pt);
+       /** Get physical address of next-level page table from entry. Only
+        * valid for non-terminal entries. */
+       unsigned long (*get_next_pt)(pt_entry_t pte);
+
+       /** Invalidate entry. */
+       void (*clear_entry)(pt_entry_t pte);
+
+       /** Returns true if given page table contains no valid entries. */
+       bool (*page_table_empty)(page_table_t page_table);
+};
 
 struct paging_structures {
+       const struct paging *root_paging;
        page_table_t root_table;
 };
 
+#include <asm/paging_modes.h>
+
 extern struct page_pool mem_pool;
 extern struct page_pool remap_pool;
 
 extern struct paging_structures hv_paging_structs;
 
+unsigned long page_map_get_phys_invalid(pt_entry_t pte, unsigned long virt);
+
 void *page_alloc(struct page_pool *pool, unsigned int num);
 void page_free(struct page_pool *pool, void *first_page, unsigned int num);
 
@@ -76,7 +115,8 @@ void page_map_destroy(const struct paging_structures *pg_structs,
                      unsigned int levels, enum page_map_coherent coherent);
 
 void *page_map_get_guest_page(struct per_cpu *cpu_data,
-                             unsigned long page_table_paddr,
+                             const struct paging *paging,
+                             unsigned long page_table_gphys,
                              unsigned long virt, unsigned long flags);
 
 int paging_init(void);
index 4b67812420159df2b36f6bbc39dcef153ebfea4c..bd298abfc050e3ec7587ac295a7bee42757491fa 100644 (file)
@@ -32,6 +32,11 @@ struct page_pool remap_pool = {
 
 struct paging_structures hv_paging_structs;
 
+unsigned long page_map_get_phys_invalid(pt_entry_t pte, unsigned long virt)
+{
+       return INVALID_PHYS_ADDR;
+}
+
 static unsigned long find_next_free_page(struct page_pool *pool,
                                         unsigned long start)
 {
@@ -106,48 +111,27 @@ void page_free(struct page_pool *pool, void *page, unsigned int num)
 unsigned long page_map_virt2phys(const struct paging_structures *pg_structs,
                                 unsigned long virt, unsigned int levels)
 {
-       unsigned long offs = hypervisor_header.page_offset;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       switch (levels) {
-       case 4:
-               pgd = pgd_offset(pg_structs->root_table, virt);
-               if (!pgd_valid(pgd))
-                       return INVALID_PHYS_ADDR;
+       const struct paging *paging = pg_structs->root_paging;
+       page_table_t pt = pg_structs->root_table;
+       unsigned long phys;
+       pt_entry_t pte;
 
-               pud = pud4l_offset(pgd, offs, virt);
-               break;
-       case 3:
-               pud = pud3l_offset(pg_structs->root_table, virt);
-               break;
-       default:
-               return INVALID_PHYS_ADDR;
+       while (1) {
+               pte = paging->get_entry(pt, virt);
+               if (!paging->entry_valid(pte))
+                       return INVALID_PHYS_ADDR;
+               phys = paging->get_phys(pte, virt);
+               if (phys != INVALID_PHYS_ADDR)
+                       return phys;
+               pt = page_map_phys2hvirt(paging->get_next_pt(pte));
+               paging++;
        }
-       if (!pud_valid(pud))
-               return INVALID_PHYS_ADDR;
-
-       pmd = pmd_offset(pud, offs, virt);
-       if (!pmd_valid(pud))
-               return INVALID_PHYS_ADDR;
-
-       if (pmd_is_hugepage(pmd))
-               return phys_address_hugepage(pmd, virt);
-
-       pte = pte_offset(pmd, offs, virt);
-       if (!pte_valid(pte))
-               return INVALID_PHYS_ADDR;
-
-       return phys_address(pte, virt);
 }
 
-static void flush_page_table(void *addr, unsigned long size,
-                            enum page_map_coherent coherent)
+static void flush_pt_entry(pt_entry_t pte, enum page_map_coherent coherent)
 {
        if (coherent == PAGE_MAP_COHERENT)
-               flush_cache(addr, size);
+               flush_cache(pte, sizeof(*pte));
 }
 
 int page_map_create(const struct paging_structures *pg_structs,
@@ -155,57 +139,40 @@ int page_map_create(const struct paging_structures *pg_structs,
                    unsigned long flags, unsigned long table_flags,
                    unsigned int levels, enum page_map_coherent coherent)
 {
-       unsigned long offs = hypervisor_header.page_offset;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       for (size = PAGE_ALIGN(size); size > 0;
-            phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
-               switch (levels) {
-               case 4:
-                       pgd = pgd_offset(pg_structs->root_table, virt);
-                       if (!pgd_valid(pgd)) {
-                               pud = page_alloc(&mem_pool, 1);
-                               if (!pud)
+       virt &= PAGE_MASK;
+       size = PAGE_ALIGN(size);
+
+       while (size > 0) {
+               const struct paging *paging = pg_structs->root_paging;
+               page_table_t pt = pg_structs->root_table;
+               pt_entry_t pte;
+
+               while (1) {
+                       pte = paging->get_entry(pt, virt);
+                       if (paging->page_size > 0) {
+                               paging->set_terminal(pte, phys, flags);
+                               flush_pt_entry(pte, coherent);
+                               break;
+                       }
+                       if (paging->entry_valid(pte)) {
+                               pt = page_map_phys2hvirt(
+                                               paging->get_next_pt(pte));
+                       } else {
+                               pt = page_alloc(&mem_pool, 1);
+                               if (!pt)
                                        return -ENOMEM;
-                               set_pgd(pgd, page_map_hvirt2phys(pud),
-                                       table_flags);
-                               flush_page_table(pgd, sizeof(pgd), coherent);
+                               paging->set_next_pt(pte,
+                                                   page_map_hvirt2phys(pt));
+                               flush_pt_entry(pte, coherent);
                        }
-                       pud = pud4l_offset(pgd, offs, virt);
-                       break;
-               case 3:
-                       pud = pud3l_offset(pg_structs->root_table, virt);
-                       break;
-               default:
-                       return -EINVAL;
+                       paging++;
                }
-
-               if (!pud_valid(pud)) {
-                       pmd = page_alloc(&mem_pool, 1);
-                       if (!pmd)
-                               return -ENOMEM;
-                       set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
-                       flush_page_table(pud, sizeof(pud), coherent);
-               }
-
-               pmd = pmd_offset(pud, offs, virt);
-               if (!pmd_valid(pmd)) {
-                       pte = page_alloc(&mem_pool, 1);
-                       if (!pte)
-                               return -ENOMEM;
-                       set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
-                       flush_page_table(pmd, sizeof(pmd), coherent);
-               }
-
-               pte = pte_offset(pmd, offs, virt);
-               set_pte(pte, phys, flags);
-               flush_page_table(pte, sizeof(pte), coherent);
                arch_tlb_flush_page(virt);
-       }
 
+               phys += paging->page_size;
+               virt += paging->page_size;
+               size -= paging->page_size;
+       }
        return 0;
 }
 
@@ -213,143 +180,86 @@ void page_map_destroy(const struct paging_structures *pg_structs,
                      unsigned long virt, unsigned long size,
                      unsigned int levels, enum page_map_coherent coherent)
 {
-       unsigned long offs = hypervisor_header.page_offset;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       for (size = PAGE_ALIGN(size); size > 0;
-            virt += PAGE_SIZE, size -= PAGE_SIZE) {
-               switch (levels) {
-               case 4:
-                       pgd = pgd_offset(pg_structs->root_table, virt);
-                       if (!pgd_valid(pgd))
-                               continue;
-
-                       pud = pud4l_offset(pgd, offs, virt);
-                       break;
-               case 3:
-                       pgd = 0; /* silence compiler warning */
-                       pud = pud3l_offset(pg_structs->root_table, virt);
-                       break;
-               default:
-                       return;
+       size = PAGE_ALIGN(size);
+
+       while (size > 0) {
+               const struct paging *paging = pg_structs->root_paging;
+               page_table_t pt[MAX_PAGE_DIR_LEVELS];
+               unsigned long page_size;
+               pt_entry_t pte;
+               int n = 0;
+
+               /* walk down the page table, saving intermediate tables */
+               pt[0] = pg_structs->root_table;
+               while (1) {
+                       pte = paging->get_entry(pt[n], virt);
+                       if (!paging->entry_valid(pte))
+                               break;
+                       if (paging->get_phys(pte, virt) != INVALID_PHYS_ADDR)
+                               break;
+                       pt[++n] = page_map_phys2hvirt(
+                                       paging->get_next_pt(pte));
+                       paging++;
+               }
+               /* advance by page size of current level paging */
+               page_size = paging->page_size ? paging->page_size : PAGE_SIZE;
+
+               /* walk up again, clearing entries, releasing empty tables */
+               while (1) {
+                       paging->clear_entry(pte);
+                       flush_pt_entry(pte, coherent);
+                       if (n == 0 || !paging->page_table_empty(pt[n]))
+                               break;
+                       page_free(&mem_pool, pt[n], 1);
+                       paging--;
+                       pte = paging->get_entry(pt[--n], virt);
                }
-               if (!pud_valid(pud))
-                       continue;
-
-               pmd = pmd_offset(pud, offs, virt);
-               if (!pmd_valid(pmd))
-                       continue;
-
-               pte = pte_offset(pmd, offs, virt);
-               clear_pte(pte);
-               flush_page_table(pte, sizeof(pte), coherent);
-
-               if (!pt_empty(pmd, offs))
-                       continue;
-               page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
-               clear_pmd(pmd);
-               flush_page_table(pmd, sizeof(pmd), coherent);
-
-               if (!pmd_empty(pud, offs))
-                       continue;
-               page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
-               clear_pud(pud);
-               flush_page_table(pud, sizeof(pud), coherent);
-
-               if (levels < 4 || !pud_empty(pgd, offs))
-                       continue;
-               page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
-               clear_pgd(pgd);
-               flush_page_table(pgd, sizeof(pgd), coherent);
-
                arch_tlb_flush_page(virt);
+
+               if (page_size > size)
+                       break;
+               virt += page_size;
+               size -= page_size;
        }
 }
 
 void *page_map_get_guest_page(struct per_cpu *cpu_data,
-                             unsigned long page_table_paddr,
+                             const struct paging *paging,
+                             unsigned long page_table_gphys,
                              unsigned long virt, unsigned long flags)
 {
-       unsigned long page_virt = TEMPORARY_MAPPING_CPU_BASE(cpu_data);
-       unsigned long phys;
-#if PAGE_DIR_LEVELS == 4
-       pgd_t *pgd;
-#endif
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       unsigned long page_virt, phys;
+       pt_entry_t pte;
        int err;
 
-       phys = arch_page_map_gphys2phys(cpu_data, page_table_paddr);
-       if (phys == INVALID_PHYS_ADDR)
-               return NULL;
-       err = page_map_create(&hv_paging_structs, phys, PAGE_SIZE, page_virt,
-                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
-                             PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
-       if (err)
-               return NULL;
-
-#if PAGE_DIR_LEVELS == 4
-       pgd = pgd_offset((pgd_t *)page_virt, virt);
-       if (!pgd_valid(pgd))
-               return NULL;
-       phys = arch_page_map_gphys2phys(cpu_data,
-                       (unsigned long)pud4l_offset(pgd, 0, 0));
-       if (phys == INVALID_PHYS_ADDR)
-               return NULL;
-       err = page_map_create(&hv_paging_structs, phys, PAGE_SIZE, page_virt,
-                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
-                             PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
-       if (err)
-               return NULL;
-
-       pud = pud4l_offset((pgd_t *)&page_virt, 0, virt);
-#elif PAGE_DIR_LEVELS == 3
-       pud = pud3l_offset((pgd_t *)page_virt, virt);
-#else
-# error Unsupported paging level
-#endif
-       if (!pud_valid(pud))
-               return NULL;
-       phys = arch_page_map_gphys2phys(cpu_data,
-                                       (unsigned long)pmd_offset(pud, 0, 0));
-       if (phys == INVALID_PHYS_ADDR)
-               return NULL;
-       err = page_map_create(&hv_paging_structs, phys, PAGE_SIZE, page_virt,
-                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
-                             PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT);
-       if (err)
-               return NULL;
+       page_virt = TEMPORARY_MAPPING_BASE +
+               cpu_data->cpu_id * PAGE_SIZE * NUM_TEMPORARY_PAGES;
 
-       pmd = pmd_offset((pud_t *)&page_virt, 0, virt);
-       if (!pmd_valid(pmd))
-               return NULL;
-       if (pmd_is_hugepage(pmd))
-               phys = phys_address_hugepage(pmd, virt);
-       else {
-               phys = arch_page_map_gphys2phys(cpu_data,
-                               (unsigned long)pte_offset(pmd, 0, 0));
+       while (1) {
+               /* map guest page table */
+               phys = arch_page_map_gphys2phys(cpu_data, page_table_gphys);
                if (phys == INVALID_PHYS_ADDR)
                        return NULL;
-               err = page_map_create(&hv_paging_structs, phys, PAGE_SIZE,
-                                     page_virt, PAGE_READONLY_FLAGS,
+               err = page_map_create(&hv_paging_structs, phys,
+                                     PAGE_SIZE, page_virt,
+                                     PAGE_READONLY_FLAGS,
                                      PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
                                      PAGE_MAP_NON_COHERENT);
                if (err)
                        return NULL;
 
-               pte = pte_offset((pmd_t *)&page_virt, 0, virt);
-               if (!pte_valid(pte))
+               /* evaluate page table entry */
+               pte = paging->get_entry((page_table_t)page_virt, virt);
+               if (!paging->entry_valid(pte))
                        return NULL;
-               phys = phys_address(pte, 0);
+               phys = paging->get_phys(pte, virt);
+               if (phys != INVALID_PHYS_ADDR)
+                       break;
+               page_table_gphys = paging->get_next_pt(pte);
+               paging++;
        }
-       phys = arch_page_map_gphys2phys(cpu_data, phys);
-       if (phys == INVALID_PHYS_ADDR)
-               return NULL;
 
+       /* map guest page */
        err = page_map_create(&hv_paging_structs, phys, PAGE_SIZE, page_virt,
                              flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS,
                              PAGE_MAP_NON_COHERENT);
@@ -394,6 +304,7 @@ int paging_init(void)
        for (n = 0; n < remap_pool.used_pages; n++)
                set_bit(n, remap_pool.used_bitmap);
 
+       hv_paging_structs.root_paging = hv_paging;
        hv_paging_structs.root_table = page_alloc(&mem_pool, 1);
        if (!hv_paging_structs.root_table)
                goto error_nomem;