X-Git-Url: http://rtime.felk.cvut.cz/gitweb/jailhouse.git/blobdiff_plain/4e0ea74420900c7a66d265117b09fddbc3aaa754..f053c948f5b6fe7b9ec5cf6533e177fd42113ba3:/hypervisor/arch/x86/vmx.c diff --git a/hypervisor/arch/x86/vmx.c b/hypervisor/arch/x86/vmx.c index c38ad2e..f5cd8ca 100644 --- a/hypervisor/arch/x86/vmx.c +++ b/hypervisor/arch/x86/vmx.c @@ -1,7 +1,7 @@ /* * Jailhouse, a Linux-based partitioning hypervisor * - * Copyright (c) Siemens AG, 2013-2015 + * Copyright (c) Siemens AG, 2013-2016 * Copyright (c) Valentine Sinitsyn, 2014 * * Authors: @@ -25,8 +25,10 @@ #include #include -#define CR0_IDX 0 -#define CR4_IDX 1 +#define CR0_IDX 0 +#define CR4_IDX 1 + +#define PIO_BITMAP_PAGES 2 static const struct segment invalid_seg = { .access_rights = 0x10000 @@ -69,7 +71,8 @@ static u8 __attribute__((aligned(PAGE_SIZE))) msr_bitmap[][0x2000/8] = { [ 0x828/8 ... 0x82f/8 ] = 0x81, /* 0x828, 0x82f */ [ 0x830/8 ... 0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */ [ 0x838/8 ... 0x83f/8 ] = 0xc1, /* 0x838, 0x83e, 0x83f */ - [ 0x840/8 ... 0x1fff/8 ] = 0, + [ 0x840/8 ... 0xd8f/8 ] = 0xff, /* esp. 0xc80 - 0xd8f */ + [ 0xd90/8 ... 0x1fff/8 ] = 0, }, [ VMX_MSR_BMP_C000_WRITE ] = { [ 0/8 ... 0x1fff/8 ] = 0, @@ -188,7 +191,7 @@ static int vmx_check_features(void) unsigned long vmx_pin_ctrl, vmx_basic, maybe1, required1; unsigned long vmx_entry_ctrl, vmx_exit_ctrl; - if (!(cpuid_ecx(1) & X86_FEATURE_VMX)) + if (!(cpuid_ecx(1, 0) & X86_FEATURE_VMX)) return trace_error(-ENODEV); vmx_basic = read_msr(MSR_IA32_VMX_BASIC); @@ -231,7 +234,7 @@ static int vmx_check_features(void) return trace_error(-EIO); /* require RDTSCP if present in CPUID */ - if (cpuid_edx(0x80000001) & X86_FEATURE_RDTSCP) { + if (cpuid_edx(0x80000001, 0) & X86_FEATURE_RDTSCP) { enable_rdtscp = SECONDARY_EXEC_RDTSCP; if (!(vmx_proc_ctrl2 & SECONDARY_EXEC_RDTSCP)) return trace_error(-EIO); @@ -280,8 +283,8 @@ static int vmx_check_features(void) static void ept_set_next_pt(pt_entry_t pte, unsigned long next_pt) { - *pte = (next_pt & 0x000ffffffffff000UL) | EPT_FLAG_READ | - EPT_FLAG_WRITE | EPT_FLAG_EXECUTE; + *pte = (next_pt & BIT_MASK(51, 12)) | EPT_FLAG_READ | EPT_FLAG_WRITE | + EPT_FLAG_EXECUTE; } int vcpu_vendor_init(void) @@ -323,12 +326,12 @@ unsigned long arch_paging_gphys2phys(struct per_cpu *cpu_data, int vcpu_vendor_cell_init(struct cell *cell) { - int err = -ENOMEM; + int err; /* allocate io_bitmap */ - cell->arch.vmx.io_bitmap = page_alloc(&mem_pool, 2); + cell->arch.vmx.io_bitmap = page_alloc(&mem_pool, PIO_BITMAP_PAGES); if (!cell->arch.vmx.io_bitmap) - return err; + return -ENOMEM; /* build root EPT of cell */ cell->arch.vmx.ept_structs.root_paging = ept_paging; @@ -570,7 +573,9 @@ static bool vmcs_setup(struct per_cpu *cpu_data) ok &= vmx_set_cell_config(); - ok &= vmcs_write32(EXCEPTION_BITMAP, 0); + /* see vmx_handle_exception_nmi for the interception reason */ + ok &= vmcs_write32(EXCEPTION_BITMAP, + (1 << DB_VECTOR) | (1 << AC_VECTOR)); val = read_msr(MSR_IA32_VMX_EXIT_CTLS); val |= VM_EXIT_HOST_ADDR_SPACE_SIZE | @@ -601,7 +606,7 @@ int vcpu_init(struct per_cpu *cpu_data) int err; /* make sure all perf counters are off */ - if ((cpuid_eax(0x0a) & 0xff) > 0) + if ((cpuid_eax(0x0a, 0) & 0xff) > 0) write_msr(MSR_IA32_PERF_GLOBAL_CTRL, 0); if (cpu_data->linux_cr4 & X86_CR4_VMXE) @@ -648,7 +653,8 @@ int vcpu_init(struct per_cpu *cpu_data) */ write_cr0(X86_CR0_HOST_STATE); write_cr4(X86_CR4_HOST_STATE | X86_CR4_VMXE | - ((cpuid_ecx(1) & X86_FEATURE_XSAVE) ? X86_CR4_OSXSAVE : 0)); + ((cpuid_ecx(1, 0) & X86_FEATURE_XSAVE) ? + X86_CR4_OSXSAVE : 0)); if (!vmxon(cpu_data)) { write_cr4(cpu_data->linux_cr4); @@ -765,7 +771,7 @@ void __attribute__((noreturn)) vcpu_deactivate_vmm(void) __builtin_unreachable(); } -static void vmx_vcpu_reset(unsigned int sipi_vector) +void vcpu_vendor_reset(unsigned int sipi_vector) { unsigned long val; bool ok = true; @@ -844,6 +850,7 @@ static void vmx_vcpu_reset(unsigned int sipi_vector) ok &= vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); ok &= vmcs_write64(GUEST_PENDING_DBG_EXCEPTIONS, 0); + ok &= vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); val = vmcs_read32(VM_ENTRY_CONTROLS); val &= ~VM_ENTRY_IA32E_MODE; @@ -857,35 +864,66 @@ static void vmx_vcpu_reset(unsigned int sipi_vector) } } -void vcpu_nmi_handler(void) +static void vmx_preemption_timer_set_enable(bool enable) { - u32 pin_based_ctrl; - - if (this_cpu_data()->vmx_state != VMCS_READY) - return; + u32 pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - pin_based_ctrl |= PIN_BASED_VMX_PREEMPTION_TIMER; + if (enable) + pin_based_ctrl |= PIN_BASED_VMX_PREEMPTION_TIMER; + else + pin_based_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl); } +void vcpu_nmi_handler(void) +{ + if (this_cpu_data()->vmx_state == VMCS_READY) + vmx_preemption_timer_set_enable(true); +} + void vcpu_park(void) { - vmx_vcpu_reset(0); + vcpu_vendor_reset(0); vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_HLT); } -static void vmx_disable_preemption_timer(void) +void vcpu_skip_emulated_instruction(unsigned int inst_len) { - u32 pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + vmcs_write64(GUEST_RIP, vmcs_read64(GUEST_RIP) + inst_len); +} - pin_based_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl); +static void vmx_check_events(void) +{ + vmx_preemption_timer_set_enable(false); + x86_check_events(); } -void vcpu_skip_emulated_instruction(unsigned int inst_len) +static void vmx_handle_exception_nmi(void) { - vmcs_write64(GUEST_RIP, vmcs_read64(GUEST_RIP) + inst_len); + u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) { + this_cpu_data()->stats[JAILHOUSE_CPU_STAT_VMEXITS_MANAGEMENT]++; + asm volatile("int %0" : : "i" (NMI_VECTOR)); + } else { + this_cpu_data()->stats[JAILHOUSE_CPU_STAT_VMEXITS_EXCEPTION]++; + /* + * Reinject the event straight away. We only intercept #DB and + * #AC to prevent that malicious guests can trigger infinite + * loops in microcode (see e.g. CVE-2015-5307 and + * CVE-2015-8104). + */ + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + intr_info & INTR_TO_VECTORING_INFO_MASK); + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, + vmcs_read32(VM_EXIT_INTR_ERROR_CODE)); + } + + /* + * Check for events even in the exception case in order to maintain + * control over the guest if it triggered #DB or #AC loops. + */ + vmx_check_events(); } static void update_efer(void) @@ -938,12 +976,12 @@ bool vcpu_get_guest_paging_structs(struct guest_paging_structures *pg_structs) if (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE) { pg_structs->root_paging = x86_64_paging; pg_structs->root_table_gphys = - vmcs_read64(GUEST_CR3) & 0x000ffffffffff000UL; + vmcs_read64(GUEST_CR3) & BIT_MASK(51, 12); } else if (vmcs_read64(GUEST_CR0) & X86_CR0_PG && !(vmcs_read64(GUEST_CR4) & X86_CR4_PAE)) { pg_structs->root_paging = i386_paging; pg_structs->root_table_gphys = - vmcs_read64(GUEST_CR3) & 0xfffff000UL; + vmcs_read64(GUEST_CR3) & BIT_MASK(31, 12); } else { printk("FATAL: Unsupported paging mode\n"); return false; @@ -1046,25 +1084,16 @@ void vcpu_vendor_get_mmio_intercept(struct vcpu_mmio_intercept *mmio) void vcpu_handle_exit(struct per_cpu *cpu_data) { u32 reason = vmcs_read32(VM_EXIT_REASON); - int sipi_vector; cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_TOTAL]++; switch (reason) { case EXIT_REASON_EXCEPTION_NMI: - asm volatile("int %0" : : "i" (NMI_VECTOR)); - /* fall through */ + vmx_handle_exception_nmi(); + return; case EXIT_REASON_PREEMPTION_TIMER: cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_MANAGEMENT]++; - vmx_disable_preemption_timer(); - sipi_vector = x86_handle_events(cpu_data); - if (sipi_vector >= 0) { - printk("CPU %d received SIPI, vector %x\n", - cpu_data->cpu_id, sipi_vector); - vmx_vcpu_reset(sipi_vector); - vcpu_reset(sipi_vector == APIC_BSP_PSEUDO_SIPI); - } - iommu_check_pending_faults(); + vmx_check_events(); return; case EXIT_REASON_CPUID: vcpu_handle_cpuid(); @@ -1133,7 +1162,7 @@ void vcpu_vendor_get_cell_io_bitmap(struct cell *cell, struct vcpu_io_bitmap *iobm) { iobm->data = cell->arch.vmx.io_bitmap; - iobm->size = sizeof(cell->arch.vmx.io_bitmap); + iobm->size = PIO_BITMAP_PAGES * PAGE_SIZE; } void vcpu_vendor_get_execution_state(struct vcpu_execution_state *x_state)