]> rtime.felk.cvut.cz Git - jailhouse.git/blobdiff - hypervisor/arch/x86/vmx.c
Merge remote-tracking branch 'kiszka/master'
[jailhouse.git] / hypervisor / arch / x86 / vmx.c
index c38ad2eb55cac37a04bb331aa2d89be2430399ab..f5cd8ca1de15b0d2a80f1449c93efdfe87aacf29 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Jailhouse, a Linux-based partitioning hypervisor
  *
- * Copyright (c) Siemens AG, 2013-2015
+ * Copyright (c) Siemens AG, 2013-2016
  * Copyright (c) Valentine Sinitsyn, 2014
  *
  * Authors:
 #include <asm/vcpu.h>
 #include <asm/vmx.h>
 
-#define CR0_IDX                0
-#define CR4_IDX                1
+#define CR0_IDX                        0
+#define CR4_IDX                        1
+
+#define PIO_BITMAP_PAGES       2
 
 static const struct segment invalid_seg = {
        .access_rights = 0x10000
@@ -69,7 +71,8 @@ static u8 __attribute__((aligned(PAGE_SIZE))) msr_bitmap[][0x2000/8] = {
                [  0x828/8 ...  0x82f/8 ] = 0x81, /* 0x828, 0x82f */
                [  0x830/8 ...  0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
                [  0x838/8 ...  0x83f/8 ] = 0xc1, /* 0x838, 0x83e, 0x83f */
-               [  0x840/8 ... 0x1fff/8 ] = 0,
+               [  0x840/8 ...  0xd8f/8 ] = 0xff, /* esp. 0xc80 - 0xd8f */
+               [  0xd90/8 ... 0x1fff/8 ] = 0,
        },
        [ VMX_MSR_BMP_C000_WRITE ] = {
                [      0/8 ... 0x1fff/8 ] = 0,
@@ -188,7 +191,7 @@ static int vmx_check_features(void)
        unsigned long vmx_pin_ctrl, vmx_basic, maybe1, required1;
        unsigned long vmx_entry_ctrl, vmx_exit_ctrl;
 
-       if (!(cpuid_ecx(1) & X86_FEATURE_VMX))
+       if (!(cpuid_ecx(1, 0) & X86_FEATURE_VMX))
                return trace_error(-ENODEV);
 
        vmx_basic = read_msr(MSR_IA32_VMX_BASIC);
@@ -231,7 +234,7 @@ static int vmx_check_features(void)
                return trace_error(-EIO);
 
        /* require RDTSCP if present in CPUID */
-       if (cpuid_edx(0x80000001) & X86_FEATURE_RDTSCP) {
+       if (cpuid_edx(0x80000001, 0) & X86_FEATURE_RDTSCP) {
                enable_rdtscp = SECONDARY_EXEC_RDTSCP;
                if (!(vmx_proc_ctrl2 & SECONDARY_EXEC_RDTSCP))
                        return trace_error(-EIO);
@@ -280,8 +283,8 @@ static int vmx_check_features(void)
 
 static void ept_set_next_pt(pt_entry_t pte, unsigned long next_pt)
 {
-       *pte = (next_pt & 0x000ffffffffff000UL) | EPT_FLAG_READ |
-               EPT_FLAG_WRITE | EPT_FLAG_EXECUTE;
+       *pte = (next_pt & BIT_MASK(51, 12)) | EPT_FLAG_READ | EPT_FLAG_WRITE |
+               EPT_FLAG_EXECUTE;
 }
 
 int vcpu_vendor_init(void)
@@ -323,12 +326,12 @@ unsigned long arch_paging_gphys2phys(struct per_cpu *cpu_data,
 
 int vcpu_vendor_cell_init(struct cell *cell)
 {
-       int err = -ENOMEM;
+       int err;
 
        /* allocate io_bitmap */
-       cell->arch.vmx.io_bitmap = page_alloc(&mem_pool, 2);
+       cell->arch.vmx.io_bitmap = page_alloc(&mem_pool, PIO_BITMAP_PAGES);
        if (!cell->arch.vmx.io_bitmap)
-               return err;
+               return -ENOMEM;
 
        /* build root EPT of cell */
        cell->arch.vmx.ept_structs.root_paging = ept_paging;
@@ -570,7 +573,9 @@ static bool vmcs_setup(struct per_cpu *cpu_data)
 
        ok &= vmx_set_cell_config();
 
-       ok &= vmcs_write32(EXCEPTION_BITMAP, 0);
+       /* see vmx_handle_exception_nmi for the interception reason */
+       ok &= vmcs_write32(EXCEPTION_BITMAP,
+                          (1 << DB_VECTOR) | (1 << AC_VECTOR));
 
        val = read_msr(MSR_IA32_VMX_EXIT_CTLS);
        val |= VM_EXIT_HOST_ADDR_SPACE_SIZE |
@@ -601,7 +606,7 @@ int vcpu_init(struct per_cpu *cpu_data)
        int err;
 
        /* make sure all perf counters are off */
-       if ((cpuid_eax(0x0a) & 0xff) > 0)
+       if ((cpuid_eax(0x0a, 0) & 0xff) > 0)
                write_msr(MSR_IA32_PERF_GLOBAL_CTRL, 0);
 
        if (cpu_data->linux_cr4 & X86_CR4_VMXE)
@@ -648,7 +653,8 @@ int vcpu_init(struct per_cpu *cpu_data)
         */
        write_cr0(X86_CR0_HOST_STATE);
        write_cr4(X86_CR4_HOST_STATE | X86_CR4_VMXE |
-                 ((cpuid_ecx(1) & X86_FEATURE_XSAVE) ? X86_CR4_OSXSAVE : 0));
+                 ((cpuid_ecx(1, 0) & X86_FEATURE_XSAVE) ?
+                  X86_CR4_OSXSAVE : 0));
 
        if (!vmxon(cpu_data))  {
                write_cr4(cpu_data->linux_cr4);
@@ -765,7 +771,7 @@ void __attribute__((noreturn)) vcpu_deactivate_vmm(void)
        __builtin_unreachable();
 }
 
-static void vmx_vcpu_reset(unsigned int sipi_vector)
+void vcpu_vendor_reset(unsigned int sipi_vector)
 {
        unsigned long val;
        bool ok = true;
@@ -844,6 +850,7 @@ static void vmx_vcpu_reset(unsigned int sipi_vector)
        ok &= vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
        ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
        ok &= vmcs_write64(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+       ok &= vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
 
        val = vmcs_read32(VM_ENTRY_CONTROLS);
        val &= ~VM_ENTRY_IA32E_MODE;
@@ -857,35 +864,66 @@ static void vmx_vcpu_reset(unsigned int sipi_vector)
        }
 }
 
-void vcpu_nmi_handler(void)
+static void vmx_preemption_timer_set_enable(bool enable)
 {
-       u32 pin_based_ctrl;
-
-       if (this_cpu_data()->vmx_state != VMCS_READY)
-               return;
+       u32 pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
 
-       pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
-       pin_based_ctrl |= PIN_BASED_VMX_PREEMPTION_TIMER;
+       if (enable)
+               pin_based_ctrl |= PIN_BASED_VMX_PREEMPTION_TIMER;
+       else
+               pin_based_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
        vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl);
 }
 
+void vcpu_nmi_handler(void)
+{
+       if (this_cpu_data()->vmx_state == VMCS_READY)
+               vmx_preemption_timer_set_enable(true);
+}
+
 void vcpu_park(void)
 {
-       vmx_vcpu_reset(0);
+       vcpu_vendor_reset(0);
        vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_HLT);
 }
 
-static void vmx_disable_preemption_timer(void)
+void vcpu_skip_emulated_instruction(unsigned int inst_len)
 {
-       u32 pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+       vmcs_write64(GUEST_RIP, vmcs_read64(GUEST_RIP) + inst_len);
+}
 
-       pin_based_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
-       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl);
+static void vmx_check_events(void)
+{
+       vmx_preemption_timer_set_enable(false);
+       x86_check_events();
 }
 
-void vcpu_skip_emulated_instruction(unsigned int inst_len)
+static void vmx_handle_exception_nmi(void)
 {
-       vmcs_write64(GUEST_RIP, vmcs_read64(GUEST_RIP) + inst_len);
+       u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) {
+               this_cpu_data()->stats[JAILHOUSE_CPU_STAT_VMEXITS_MANAGEMENT]++;
+               asm volatile("int %0" : : "i" (NMI_VECTOR));
+       } else {
+               this_cpu_data()->stats[JAILHOUSE_CPU_STAT_VMEXITS_EXCEPTION]++;
+               /*
+                * Reinject the event straight away. We only intercept #DB and
+                * #AC to prevent that malicious guests can trigger infinite
+                * loops in microcode (see e.g. CVE-2015-5307 and
+                * CVE-2015-8104).
+                */
+               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+                            intr_info & INTR_TO_VECTORING_INFO_MASK);
+               vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                            vmcs_read32(VM_EXIT_INTR_ERROR_CODE));
+       }
+
+       /*
+        * Check for events even in the exception case in order to maintain
+        * control over the guest if it triggered #DB or #AC loops.
+        */
+       vmx_check_events();
 }
 
 static void update_efer(void)
@@ -938,12 +976,12 @@ bool vcpu_get_guest_paging_structs(struct guest_paging_structures *pg_structs)
        if (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE) {
                pg_structs->root_paging = x86_64_paging;
                pg_structs->root_table_gphys =
-                       vmcs_read64(GUEST_CR3) & 0x000ffffffffff000UL;
+                       vmcs_read64(GUEST_CR3) & BIT_MASK(51, 12);
        } else if (vmcs_read64(GUEST_CR0) & X86_CR0_PG &&
                 !(vmcs_read64(GUEST_CR4) & X86_CR4_PAE)) {
                pg_structs->root_paging = i386_paging;
                pg_structs->root_table_gphys =
-                       vmcs_read64(GUEST_CR3) & 0xfffff000UL;
+                       vmcs_read64(GUEST_CR3) & BIT_MASK(31, 12);
        } else {
                printk("FATAL: Unsupported paging mode\n");
                return false;
@@ -1046,25 +1084,16 @@ void vcpu_vendor_get_mmio_intercept(struct vcpu_mmio_intercept *mmio)
 void vcpu_handle_exit(struct per_cpu *cpu_data)
 {
        u32 reason = vmcs_read32(VM_EXIT_REASON);
-       int sipi_vector;
 
        cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_TOTAL]++;
 
        switch (reason) {
        case EXIT_REASON_EXCEPTION_NMI:
-               asm volatile("int %0" : : "i" (NMI_VECTOR));
-               /* fall through */
+               vmx_handle_exception_nmi();
+               return;
        case EXIT_REASON_PREEMPTION_TIMER:
                cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_MANAGEMENT]++;
-               vmx_disable_preemption_timer();
-               sipi_vector = x86_handle_events(cpu_data);
-               if (sipi_vector >= 0) {
-                       printk("CPU %d received SIPI, vector %x\n",
-                              cpu_data->cpu_id, sipi_vector);
-                       vmx_vcpu_reset(sipi_vector);
-                       vcpu_reset(sipi_vector == APIC_BSP_PSEUDO_SIPI);
-               }
-               iommu_check_pending_faults();
+               vmx_check_events();
                return;
        case EXIT_REASON_CPUID:
                vcpu_handle_cpuid();
@@ -1133,7 +1162,7 @@ void vcpu_vendor_get_cell_io_bitmap(struct cell *cell,
                                    struct vcpu_io_bitmap *iobm)
 {
        iobm->data = cell->arch.vmx.io_bitmap;
-       iobm->size = sizeof(cell->arch.vmx.io_bitmap);
+       iobm->size = PIO_BITMAP_PAGES * PAGE_SIZE;
 }
 
 void vcpu_vendor_get_execution_state(struct vcpu_execution_state *x_state)