X-Git-Url: https://rtime.felk.cvut.cz/gitweb/l4.git/blobdiff_plain/615241fa4695499799cebfd8a6dc8d90f3a93f13..bc7a63ab1d89d95508f55f44f23c2adda7a60037:/kernel/fiasco/src/kern/ia32/vmx.cpp diff --git a/kernel/fiasco/src/kern/ia32/vmx.cpp b/kernel/fiasco/src/kern/ia32/vmx.cpp index c81544759..599a9e43b 100644 --- a/kernel/fiasco/src/kern/ia32/vmx.cpp +++ b/kernel/fiasco/src/kern/ia32/vmx.cpp @@ -1,12 +1,13 @@ INTERFACE [vmx]: #include "per_cpu_data.h" +#include #include +#include class Vmx_info { public: - static bool nested_paging() { return false; } template class Bit_defs @@ -18,11 +19,7 @@ public: public: Bit_defs() {} Bit_defs(T _or, T _and) : _or(_or), _and(_and) {} -#if 0 - template - explicit Bit_defs(Bit_defs const &o) - : _or(o.must_be_one()), _and(o.may_be_one()) {} -#endif + T must_be_one() const { return _or; } T may_be_one() const { return _and; } @@ -44,6 +41,12 @@ public: } public: + void relax(unsigned char bit) + { + _or &= ~(T(1) << T(bit)); + _and |= T(1) << T(bit); + } + void enforce(unsigned char bit, bool value = true) { enforce_bits((T)1 << (T)bit, value); } @@ -96,10 +99,71 @@ public: Bit_defs_32 procbased_ctls2; Unsigned64 ept_vpid_cap; - Unsigned64 true_pinbased_ctls; - Unsigned64 true_procbased_ctls; - Unsigned64 true_exit_ctls; - Unsigned64 true_entry_ctls; + Unsigned64 max_index; + Unsigned32 pinbased_ctls_default1; + Unsigned32 procbased_ctls_default1; + Unsigned32 exit_ctls_default1; + Unsigned32 entry_ctls_default1; +}; + + +struct Vmx_user_info +{ + struct Fo_table + { + unsigned char offsets[32]; + static unsigned char const master_offsets[32]; + + enum + { + Foi_size = 4 + }; + + static void *field(void *b, unsigned vm_field) + { + return (char*)b + master_offsets[vm_field >> 10] * 64 + + ((vm_field & 0x3ff) << master_offsets[Foi_size + (vm_field >> 13)]); + } + + void init() + { memcpy(offsets, master_offsets, sizeof(offsets)); } + + static void check_offsets(unsigned max_idx) + { + for (unsigned t1 = 0; t1 < 3; ++t1) + for (unsigned w1 = 0; w1 < 4; ++w1) + for (unsigned t2 = 0; t2 < 3; ++t2) + for (unsigned w2 = 0; w2 < 4; ++w2) + if (t1 != t2 || w1 != w2) + { + unsigned s1 = ((t1 << 10) | (w1 << 13)); + unsigned s2 = ((t2 << 10) | (w2 << 13)); + unsigned e1 = s1 | max_idx; + unsigned e2 = s2 | max_idx; + assert (field(0, s1) > field(0, e2) + || field(0, s2) > field(0, e1)); + (void) s1; (void) s2; (void) e1; (void) e2; + } + } + }; + + Unsigned64 basic; + Vmx_info::Bit_defs_32 pinbased; + Vmx_info::Bit_defs_32 procbased; + Vmx_info::Bit_defs_32 exit; + Vmx_info::Bit_defs_32 entry; + Unsigned64 misc; + Unsigned64 cr0_or; + Unsigned64 cr0_and; + Unsigned64 cr4_or; + Unsigned64 cr4_and; + Unsigned64 vmcs_field_info; + Vmx_info::Bit_defs_32 procbased2; + Unsigned64 ept_vpid_cap; + Unsigned32 pinbased_dfl1; + Unsigned32 procbased_dfl1; + Unsigned32 exit_dfl1; + Unsigned32 entry_dfl1; }; INTERFACE: @@ -140,15 +204,20 @@ public: F_entry_ctls = 0x4012, F_entry_msr_load_cnt = 0x4014, F_entry_int_info = 0x4016, + F_entry_exc_error_code = 0x4018, F_entry_insn_len = 0x401a, F_proc_based_ctls_2 = 0x401e, + F_vm_instruction_error = 0x4400, + F_exit_reason = 0x4402, + F_preempt_timer = 0x482e, F_host_sysenter_cs = 0x4c00, - F_guest_cr2 = 0x6830, + F_sw_guest_cr2 = 0x683e, + F_host_cr0 = 0x6c00, F_host_cr3 = 0x6c02, @@ -161,6 +230,7 @@ public: F_host_sysenter_esp = 0x6c10, F_host_sysenter_eip = 0x6c12, F_host_rip = 0x6c16, + }; enum Pin_based_ctls @@ -183,8 +253,8 @@ public: PRB2_virtualize_apic = 0, PRB2_enable_ept = 1, PRB2_enable_vpid = 5, + PRB2_unrestricted = 7, }; - }; INTERFACE [vmx]: @@ -241,29 +311,44 @@ IMPLEMENTATION[vmx]: #include "l4_types.h" #include #include "idt.h" +#include "warn.h" -Per_cpu DEFINE_PER_CPU_LATE Vmx::cpus(true); +DEFINE_PER_CPU_LATE Per_cpu Vmx::cpus(Per_cpu_data::Cpu_num); PUBLIC void Vmx_info::init() { + bool ept = false; basic = Cpu::rdmsr(0x480); pinbased_ctls = Cpu::rdmsr(0x481); + pinbased_ctls_default1 = pinbased_ctls.must_be_one(); procbased_ctls = Cpu::rdmsr(0x482); + procbased_ctls_default1 = procbased_ctls.must_be_one(); exit_ctls = Cpu::rdmsr(0x483); + exit_ctls_default1 = exit_ctls.must_be_one(); entry_ctls = Cpu::rdmsr(0x484); + entry_ctls_default1 = entry_ctls.must_be_one(); misc = Cpu::rdmsr(0x485); cr0_defs = Bit_defs(Cpu::rdmsr(0x486), Cpu::rdmsr(0x487)); cr4_defs = Bit_defs(Cpu::rdmsr(0x488), Cpu::rdmsr(0x489)); + max_index = Cpu::rdmsr(0x48a); + + assert ((Vmx::F_sw_guest_cr2 & 0x3ff) > max_index); + max_index = Vmx::F_sw_guest_cr2 & 0x3ff; + Vmx_user_info::Fo_table::check_offsets(max_index); + if (basic & (1ULL << 55)) { - true_pinbased_ctls = Cpu::rdmsr(0x48d); - true_procbased_ctls = Cpu::rdmsr(0x48e); - true_exit_ctls = Cpu::rdmsr(0x48f); - true_entry_ctls = Cpu::rdmsr(0x490); + // do not use the true pin-based ctls because user-level then needs to + // be aware of the fact that it has to set bits 1, 2, and 4 to default 1 + if (0) pinbased_ctls = Cpu::rdmsr(0x48d); + + procbased_ctls = Cpu::rdmsr(0x48e); + exit_ctls = Cpu::rdmsr(0x48f); + entry_ctls = Cpu::rdmsr(0x490); } if (0) @@ -283,16 +368,38 @@ Vmx_info::init() if (procbased_ctls.allowed(31)) { + procbased_ctls.enforce(31, true); + procbased_ctls2 = Cpu::rdmsr(0x48b); - if (procbased_ctls2.allowed(1)) + if (procbased_ctls2.allowed(Vmx::PRB2_enable_ept)) ept_vpid_cap = Cpu::rdmsr(0x48c); + // we disable VPID so far, need to handle virtualize it in Fiasco, // as done for AMDs ASIDs procbased_ctls2.enforce(Vmx::PRB2_enable_vpid, false); - // no EPT support yet - procbased_ctls2.enforce(Vmx::PRB2_enable_ept, false); + // EPT only in conjunction with unrestricted guest !!! + if (procbased_ctls2.allowed(Vmx::PRB2_enable_ept)) + { + ept = true; + procbased_ctls2.enforce(Vmx::PRB2_enable_ept, true); + + if (procbased_ctls2.allowed(Vmx::PRB2_unrestricted)) + { + // unrestricted guest allows PE and PG to be 0 + cr0_defs.relax(0); // PE + cr0_defs.relax(31); // PG + procbased_ctls2.enforce(Vmx::PRB2_unrestricted); + } + else + { + assert (not cr0_defs.allowed(0, false)); + assert (not cr0_defs.allowed(31, false)); + } + } + else + assert (not procbased_ctls2.allowed(Vmx::PRB2_unrestricted)); } else procbased_ctls2 = 0; @@ -303,7 +410,7 @@ Vmx_info::init() // host-state is 64bit or not exit_ctls.enforce(9, sizeof(long) > sizeof(int)); - if (!nested_paging()) // needs to be per VM + if (!ept) // needs to be per VM { // always enable paging cr0_defs.enforce(31); @@ -334,10 +441,6 @@ Vmx_info::dump(const char *tag) const cr4_defs.print("cr4_fixed"); procbased_ctls2.print("procbased_ctls2"); printf("ept_vpid_cap = %16llx\n", ept_vpid_cap); - printf("true_pinbased_ctls = %16llx\n", true_pinbased_ctls); - printf("true_procbased_ctls = %16llx\n", true_procbased_ctls); - printf("true_exit_ctls = %16llx\n", true_exit_ctls); - printf("true_entry_ctls = %16llx\n", true_entry_ctls); } PRIVATE static inline @@ -360,61 +463,75 @@ Vmx::vmread(Mword field) return vmread_insn(field) | ((Unsigned64)vmread_insn(field + 1) << 32); } -PUBLIC static inline +PUBLIC static inline NEEDS["warn.h"] template< typename T > void Vmx::vmwrite(Mword field, T value) { Mword err; asm volatile("vmwrite %1, %2; pushf; pop %0" : "=r" (err) : "r" ((Mword)value), "r" (field)); - if (EXPECT_FALSE(err & 0x41)) - printf("FAILED vmwrite(%lx): field=%04lx with value %llx\n", err, field, (Unsigned64)value); + if (EXPECT_FALSE(err & 0x1)) + WARNX(Info, "VMX: VMfailInvalid vmwrite(0x%04lx, %llx) => %lx\n", + field, (Unsigned64)value, err); + else if (EXPECT_FALSE(err & 0x40)) + WARNX(Info, "VMX: VMfailValid vmwrite(0x%04lx, %llx) => %lx, insn error: 0x%x\n", + field, (Unsigned64)value, err, vmread(F_vm_instruction_error)); if (sizeof(T) > sizeof(Mword)) asm volatile("vmwrite %0, %1" : : "r" ((Unsigned64)value >> 32), "r" (field + 1)); } PUBLIC -Vmx::Vmx(unsigned cpu) +Vmx::Vmx(Cpu_number cpu) : _vmx_enabled(false), _has_vpid(false) { Cpu &c = Cpu::cpus.cpu(cpu); if (!c.vmx()) { - printf("VMX: Not supported\n"); + if (cpu == Cpu_number::boot_cpu()) + WARNX(Info, "VMX: Not supported\n"); return; } - printf("VMX: Enabling\n"); - // check whether vmx is enabled by BIOS Unsigned64 feature = 0; - feature = Cpu::rdmsr(0x3a); + feature = Cpu::rdmsr(MSR_IA32_FEATURE_CONTROL); - // vmxon outside SMX allowed? - if (!(feature & 0x4)) - { - printf("VMX: CPU has VMX support but it is disabled\n"); - return; - } + enum + { + Msr_ia32_feature_control_lock = 1 << 0, + Msr_ia32_feature_control_vmx_inside_SMX = 1 << 1, + Msr_ia32_feature_control_vmx_outside_SMX = 1 << 2, + }; - // check whether lock bit is set otherwise vmxon - // will cause a general-protection exception - if (!(feature & 0x1)) + if (feature & Msr_ia32_feature_control_lock) { - printf("VMX: Cannot enable VMX, lock bit not set\n"); - return; + if (!(feature & Msr_ia32_feature_control_vmx_outside_SMX)) + { + if (cpu == Cpu_number::boot_cpu()) + WARNX(Info, "VMX: CPU has VMX support but it is disabled\n"); + return; + } } + else + c.wrmsr(feature | Msr_ia32_feature_control_vmx_outside_SMX | Msr_ia32_feature_control_lock, + MSR_IA32_FEATURE_CONTROL); + + if (cpu == Cpu_number::boot_cpu()) + WARNX(Info, "VMX: Enabled\n"); info.init(); // check for EPT support - if (info.procbased_ctls2.allowed(1)) - printf("VMX: EPT supported\n"); - else - printf("VMX: No EPT available\n"); + if (cpu == Cpu_number::boot_cpu()) + { + if (info.procbased_ctls2.allowed(PRB2_enable_ept)) + WARNX(Info, "VMX: EPT supported\n"); + else + WARNX(Info, "VMX: No EPT available\n"); + } // check for vpid support - if (info.procbased_ctls2.allowed(5)) + if (info.procbased_ctls2.allowed(PRB2_enable_vpid)) _has_vpid = true; c.set_cr4(c.get_cr4() | (1 << 13)); // set CR4.VMXE to 1 @@ -431,12 +548,13 @@ Vmx::Vmx(unsigned cpu) if (vmcs_size > Vmcs_size) { - printf("VMX: VMCS size of %d bytes not supported\n", vmcs_size); + WARN("VMX: VMCS size of %d bytes not supported\n", vmcs_size); return; } // allocate a 4kb region for kernel vmcs - check(_kernel_vmcs = Mapped_allocator::allocator()->alloc(12)); + // FIXME: MUST NOT PANIC ON CPU HOTPLUG + check(_kernel_vmcs = Kmem_alloc::allocator()->alloc(12)); _kernel_vmcs_pa = Kmem::virt_to_phys(_kernel_vmcs); // clean vmcs memset(_kernel_vmcs, 0, vmcs_size); @@ -444,7 +562,8 @@ Vmx::Vmx(unsigned cpu) *(int *)_kernel_vmcs = (info.basic & 0xFFFFFFFF); // allocate a 4kb aligned region for VMXON - check(_vmxon = Mapped_allocator::allocator()->alloc(12)); + // FIXME: MUST NOT PANIC ON CPU HOTPLUG + check(_vmxon = Kmem_alloc::allocator()->alloc(12)); _vmxon_base_pa = Kmem::virt_to_phys(_vmxon); @@ -456,12 +575,14 @@ Vmx::Vmx(unsigned cpu) asm volatile("vmxon %0" : :"m"(_vmxon_base_pa):); _vmx_enabled = true; - printf("VMX: initialized\n"); + if (cpu == Cpu_number::boot_cpu()) + WARNX(Info, "VMX: initialized\n"); Mword eflags; asm volatile("vmclear %1 \n\t" "pushf \n\t" "pop %0 \n\t" : "=r"(eflags) : "m"(_kernel_vmcs_pa):); + // FIXME: MUST NOT PANIC ON CPU HOTPLUG if (eflags & 0x41) panic("VMX: vmclear: VMFailInvalid, vmcs pointer not valid\n"); @@ -470,6 +591,7 @@ Vmx::Vmx(unsigned cpu) "pushf \n\t" "pop %0 \n\t" : "=r"(eflags) : "m"(_kernel_vmcs_pa):); + // FIXME: MUST NOT PANIC ON CPU HOTPLUG if (eflags & 0x41) panic("VMX: vmptrld: VMFailInvalid, vmcs pointer not valid\n"); @@ -491,7 +613,10 @@ Vmx::Vmx(unsigned cpu) vmwrite(F_host_sysenter_eip, entry_sys_fast_ipc_c); if (c.features() & FEAT_PAT && info.exit_ctls.allowed(19)) - vmwrite(F_host_ia32_pat, Cpu::rdmsr(MSR_PAT)); + { + vmwrite(F_host_ia32_pat, Cpu::rdmsr(MSR_PAT)); + info.exit_ctls.enforce(19, true); + } else { // We have no proper PAT support, so disallow PAT load store for @@ -501,7 +626,10 @@ Vmx::Vmx(unsigned cpu) } if (info.exit_ctls.allowed(21)) // Load IA32_EFER - vmwrite(F_host_ia32_efer, Cpu::rdmsr(MSR_EFER)); + { + vmwrite(F_host_ia32_efer, Cpu::rdmsr(MSR_EFER)); + info.exit_ctls.enforce(21, true); + } else { // We have no EFER load for host, so disallow EFER load store for @@ -516,8 +644,8 @@ Vmx::Vmx(unsigned cpu) // do not allow Load IA32_PERF_GLOBAL_CTRL on entry info.entry_ctls.enforce(13, false); - vmwrite(F_host_cr0, info.cr0_defs.apply(Cpu::get_cr0())); - vmwrite(F_host_cr4, info.cr4_defs.apply(Cpu::get_cr4())); + vmwrite(F_host_cr0, Cpu::get_cr0()); + vmwrite(F_host_cr4, Cpu::get_cr4()); Pseudo_descriptor pseudo; c.get_gdt()->get(&pseudo); @@ -531,13 +659,69 @@ Vmx::Vmx(unsigned cpu) vmwrite(0x2800, ~0ULL); // link pointer vmwrite(F_cr3_target_cnt, 0); - // MSR load / store disbaled + // MSR load / store disabled vmwrite(F_exit_msr_load_cnt, 0); vmwrite(F_exit_msr_store_cnt, 0); vmwrite(F_entry_msr_load_cnt, 0); } +/* + * VMCS field offset table: + * 0h - 2h: 3 offsets for 16bit fields: + * 0: Control fields, 1: read-only fields, 2: guest state + * all offsets in 64byte granules relative to the start of the VMCS + * 3h: Reserved + * 4h - 7h: Index shift values for 16bit, 64bit, 32bit, and natural width fields + * 8h - Ah: 3 offsets for 64bit fields + * Bh - Fh: Reserved + * 10h - 12h: 3 offsets for 32bit fields + * 13h - 17h: Reserved + * 18h - 1Ah: 3 offsets for natural width fields + * 1Bh: Reserved + * 1Ch: Offset of first VMCS field + * 1Dh: Full size of VMCS fields + * 1Eh - 1Fh: Reserved + * + */ +unsigned char const Vmx_user_info::Fo_table::master_offsets[32] = +{ + 64 / 64, 768 / 64, 1472 / 64, 0, 0, 2, 1, 2, + 128 / 64, 832 / 64, 1536 / 64, 0, 0, 0, 0, 0, + 384 / 64, 1088 / 64, 1792 / 64, 0, 0, 0, 0, 0, + 512 / 64, 1216 / 64, 1920 / 64, 0, 64 / 64, 2112 / 64, 0, 0, +}; + +PUBLIC inline +void +Vmx::init_vmcs_infos(void *vcpu_state) const +{ + Vmx_user_info *i = reinterpret_cast((char*)vcpu_state + 0x200); + i->basic = info.basic; + i->pinbased = info.pinbased_ctls; + i->procbased = info.procbased_ctls; + i->exit = info.exit_ctls; + i->entry = info.entry_ctls; + i->misc = info.misc; + i->cr0_or = info.cr0_defs.must_be_one(); + i->cr0_and = info.cr0_defs.may_be_one(); + i->cr4_or = info.cr4_defs.must_be_one(); + i->cr4_and = info.cr4_defs.may_be_one(); + i->vmcs_field_info = info.max_index; + i->procbased2 = info.procbased_ctls2; + i->ept_vpid_cap = info.ept_vpid_cap; + i->pinbased_dfl1 = info.pinbased_ctls_default1; + i->procbased_dfl1 = info.procbased_ctls_default1; + i->exit_dfl1 = info.exit_ctls_default1; + i->entry_dfl1 = info.entry_ctls_default1; + + Vmx_user_info::Fo_table *infos = reinterpret_cast((char*)vcpu_state + 0x420); + Unsigned32 *inf = reinterpret_cast((char*)vcpu_state + 0x410); + inf[0] = F_sw_guest_cr2; + infos->init(); +} + + PUBLIC void * Vmx::kernel_vmcs() const