3 #include "per_cpu_data.h"
9 static bool nested_paging() { return false; }
20 Bit_defs(T _or, T _and) : _or(_or), _and(_and) {}
23 explicit Bit_defs(Bit_defs<T2> const &o)
24 : _or(o.must_be_one()), _and(o.may_be_one()) {}
26 T must_be_one() const { return _or; }
27 T may_be_one() const { return _and; }
30 void enforce_bits(T m, bool value = true)
38 bool allowed_bits(T m, bool value = true) const
47 void enforce(unsigned char bit, bool value = true)
48 { enforce_bits((T)1 << (T)bit, value); }
50 bool allowed(unsigned char bit, bool value = true) const
51 { return allowed_bits((T)1 << (T)bit, value); }
53 T apply(T v) const { return (v | _or) & _and; }
55 void print(char const *name) const
58 printf("%20s = %8x %8x\n", name, (unsigned)_and, (unsigned)_or);
59 else if (sizeof(T) <= 8)
60 printf("%20s = %16llx %16llx\n", name, (unsigned long long)_and, (unsigned long long)_or);
64 class Bit_defs_32 : public Bit_defs<Unsigned32>
68 Bit_defs_32(Unsigned64 v) : Bit_defs<Unsigned32>(v, v >> 32) {}
71 typedef Bit_defs<Unsigned64> Bit_defs_64;
78 explicit Flags(T v) : _f(v) {}
80 T test(unsigned char bit) const { return _f & ((T)1 << (T)bit); }
87 Bit_defs_32 pinbased_ctls;
88 Bit_defs_32 procbased_ctls;
90 Bit_defs_32 exit_ctls;
91 Bit_defs_32 entry_ctls;
94 Bit_defs<Mword> cr0_defs;
95 Bit_defs<Mword> cr4_defs;
96 Bit_defs_32 procbased_ctls2;
98 Unsigned64 ept_vpid_cap;
99 Unsigned64 true_pinbased_ctls;
100 Unsigned64 true_procbased_ctls;
101 Unsigned64 true_exit_ctls;
102 Unsigned64 true_entry_ctls;
114 F_host_es_selector = 0x0c00,
115 F_host_cs_selector = 0x0c02,
116 F_host_ss_selector = 0x0c04,
117 F_host_ds_selector = 0x0c06,
118 F_host_fs_selector = 0x0c08,
119 F_host_gs_selector = 0x0c0a,
120 F_host_tr_selector = 0x0c0c,
122 F_tsc_offset = 0x2010,
123 F_apic_access_addr = 0x2014,
125 F_guest_pat = 0x2804,
126 F_guest_efer = 0x2806,
128 F_host_ia32_pat = 0x2c00,
129 F_host_ia32_efer = 0x2c02,
130 F_host_ia32_perf_global_ctrl = 0x2c04,
133 F_pin_based_ctls = 0x4000,
134 F_proc_based_ctls = 0x4002,
136 F_cr3_target_cnt = 0x400a,
137 F_exit_ctls = 0x400c,
138 F_exit_msr_store_cnt = 0x400e,
139 F_exit_msr_load_cnt = 0x4010,
140 F_entry_ctls = 0x4012,
141 F_entry_msr_load_cnt = 0x4014,
142 F_entry_int_info = 0x4016,
143 F_entry_exc_error_code = 0x4018,
144 F_entry_insn_len = 0x401a,
145 F_proc_based_ctls_2 = 0x401e,
147 F_preempt_timer = 0x482e,
149 F_host_sysenter_cs = 0x4c00,
151 F_guest_cr2 = 0x6830,
156 F_host_fs_base = 0x6c06,
157 F_host_gs_base = 0x6c08,
158 F_host_tr_base = 0x6c0a,
159 F_host_gdtr_base = 0x6c0c,
160 F_host_idtr_base = 0x6c0e,
161 F_host_sysenter_esp = 0x6c10,
162 F_host_sysenter_eip = 0x6c12,
168 PIB_ext_int_exit = 0,
172 enum Primary_proc_based_ctls
174 PRB1_tpr_shadow = 21,
175 PRB1_unconditional_io_exit = 24,
176 PRB1_use_io_bitmaps = 25,
177 PRB1_use_msr_bitmaps = 28,
178 PRB1_enable_proc_based_ctls_2 = 31,
181 enum Secondary_proc_based_ctls
183 PRB2_virtualize_apic = 0,
185 PRB2_enable_vpid = 5,
193 #include "cpu_lock.h"
200 static Per_cpu<Vmx> cpus;
206 Unsigned64 _vmxon_base_pa;
208 Unsigned64 _kernel_vmcs_pa;
217 //-----------------------------------------------------------------------------
218 INTERFACE [vmx && ia32]:
223 enum { Gpregs_words = 11 };
226 //-----------------------------------------------------------------------------
227 INTERFACE [vmx && amd64]:
232 enum { Gpregs_words = 19 };
236 // -----------------------------------------------------------------------
241 #include "l4_types.h"
245 Per_cpu<Vmx> DEFINE_PER_CPU_LATE Vmx::cpus(true);
251 basic = Cpu::rdmsr(0x480);
252 pinbased_ctls = Cpu::rdmsr(0x481);
253 procbased_ctls = Cpu::rdmsr(0x482);
254 exit_ctls = Cpu::rdmsr(0x483);
255 entry_ctls = Cpu::rdmsr(0x484);
256 misc = Cpu::rdmsr(0x485);
258 cr0_defs = Bit_defs<Mword>(Cpu::rdmsr(0x486), Cpu::rdmsr(0x487));
259 cr4_defs = Bit_defs<Mword>(Cpu::rdmsr(0x488), Cpu::rdmsr(0x489));
261 if (basic & (1ULL << 55))
263 true_pinbased_ctls = Cpu::rdmsr(0x48d);
264 true_procbased_ctls = Cpu::rdmsr(0x48e);
265 true_exit_ctls = Cpu::rdmsr(0x48f);
266 true_entry_ctls = Cpu::rdmsr(0x490);
270 dump("as read from hardware");
272 pinbased_ctls.enforce(Vmx::PIB_ext_int_exit);
273 pinbased_ctls.enforce(Vmx::PIB_nmi_exit);
276 // currently we IO-passthrough is missing, disable I/O bitmaps and enforce
277 // unconditional io exiting
278 procbased_ctls.enforce(Vmx::PRB1_use_io_bitmaps, false);
279 procbased_ctls.enforce(Vmx::PRB1_unconditional_io_exit);
281 // virtual APIC not yet supported
282 procbased_ctls.enforce(Vmx::PRB1_tpr_shadow, false);
284 if (procbased_ctls.allowed(31))
286 procbased_ctls2 = Cpu::rdmsr(0x48b);
287 if (procbased_ctls2.allowed(1))
288 ept_vpid_cap = Cpu::rdmsr(0x48c);
290 // we disable VPID so far, need to handle virtualize it in Fiasco,
291 // as done for AMDs ASIDs
292 procbased_ctls2.enforce(Vmx::PRB2_enable_vpid, false);
294 // no EPT support yet
295 procbased_ctls2.enforce(Vmx::PRB2_enable_ept, false);
300 // never automatically ack interrupts on exit
301 exit_ctls.enforce(15, false);
303 // host-state is 64bit or not
304 exit_ctls.enforce(9, sizeof(long) > sizeof(int));
306 if (!nested_paging()) // needs to be per VM
308 // always enable paging
309 cr0_defs.enforce(31);
312 cr4_defs.enforce(4); // PSE
314 // enforce PAE on 64bit, and disallow it on 32bit
315 cr4_defs.enforce(5, sizeof(long) > sizeof(int));
324 Vmx_info::dump(const char *tag) const
326 printf("VMX MSRs %s:\n", tag);
327 printf("basic = %16llx\n", basic);
328 pinbased_ctls.print("pinbased_ctls");
329 procbased_ctls.print("procbased_ctls");
330 exit_ctls.print("exit_ctls");
331 entry_ctls.print("entry_ctls");
332 printf("misc = %16llx\n", misc);
333 cr0_defs.print("cr0_fixed");
334 cr4_defs.print("cr4_fixed");
335 procbased_ctls2.print("procbased_ctls2");
336 printf("ept_vpid_cap = %16llx\n", ept_vpid_cap);
337 printf("true_pinbased_ctls = %16llx\n", true_pinbased_ctls);
338 printf("true_procbased_ctls = %16llx\n", true_procbased_ctls);
339 printf("true_exit_ctls = %16llx\n", true_exit_ctls);
340 printf("true_entry_ctls = %16llx\n", true_entry_ctls);
343 PRIVATE static inline
345 Vmx::vmread_insn(Mword field)
348 asm volatile("vmread %1, %0" : "=r" (val) : "r" (field));
352 PUBLIC static inline NEEDS[Vmx::vmread_insn]
353 template< typename T >
355 Vmx::vmread(Mword field)
357 if (sizeof(T) <= sizeof(Mword))
358 return vmread_insn(field);
360 return vmread_insn(field) | ((Unsigned64)vmread_insn(field + 1) << 32);
364 template< typename T >
366 Vmx::vmwrite(Mword field, T value)
369 asm volatile("vmwrite %1, %2; pushf; pop %0" : "=r" (err) : "r" ((Mword)value), "r" (field));
370 if (EXPECT_FALSE(err & 0x41))
371 printf("FAILED vmwrite(%lx): field=%04lx with value %llx\n", err, field, (Unsigned64)value);
372 if (sizeof(T) > sizeof(Mword))
373 asm volatile("vmwrite %0, %1" : : "r" ((Unsigned64)value >> 32), "r" (field + 1));
376 static void vmxoff(void)
378 asm volatile("vmxoff");
382 Vmx::Vmx(unsigned cpu)
383 : _vmx_enabled(false), _has_vpid(false)
385 Cpu &c = Cpu::cpus.cpu(cpu);
388 printf("VMX: Not supported\n");
392 printf("VMX: Enabling\n");
394 // check whether vmx is enabled by BIOS
395 Unsigned64 feature = 0;
396 feature = Cpu::rdmsr(0x3a);
398 // vmxon outside SMX allowed?
399 if (!(feature & 0x4))
401 printf("VMX: CPU has VMX support but it is disabled\n");
405 // check whether lock bit is set otherwise vmxon
406 // will cause a general-protection exception
407 if (!(feature & 0x1))
409 printf("VMX: Cannot enable VMX, lock bit not set\n");
415 // check for EPT support
416 if (info.procbased_ctls2.allowed(1))
417 printf("VMX: EPT supported\n");
419 printf("VMX: No EPT available\n");
421 // check for vpid support
422 if (info.procbased_ctls2.allowed(5))
425 c.set_cr4(c.get_cr4() | (1 << 13)); // set CR4.VMXE to 1
427 // if NE bit is not set vmxon will fail
428 c.set_cr0(c.get_cr0() | (1 << 5));
432 Vmcs_size = 0x1000, // actual size may be different
435 Unsigned32 vmcs_size = ((info.basic & (0x1fffULL << 32)) >> 32);
437 if (vmcs_size > Vmcs_size)
439 printf("VMX: VMCS size of %d bytes not supported\n", vmcs_size);
443 // allocate a 4kb region for kernel vmcs
444 check(_kernel_vmcs = Mapped_allocator::allocator()->alloc(12));
445 _kernel_vmcs_pa = Kmem::virt_to_phys(_kernel_vmcs);
447 memset(_kernel_vmcs, 0, vmcs_size);
448 // init vmcs with revision identifier
449 *(int *)_kernel_vmcs = (info.basic & 0xFFFFFFFF);
451 // allocate a 4kb aligned region for VMXON
452 check(_vmxon = Mapped_allocator::allocator()->alloc(12));
454 _vmxon_base_pa = Kmem::virt_to_phys(_vmxon);
456 // init vmxon region with vmcs revision identifier
457 // which is stored in the lower 32 bits of MSR 0x480
458 *(unsigned *)_vmxon = (info.basic & 0xFFFFFFFF);
460 // enable vmx operation
461 asm volatile("vmxon %0" : :"m"(_vmxon_base_pa):);
466 printf("VMX: initialized\n");
469 asm volatile("vmclear %1 \n\t"
471 "pop %0 \n\t" : "=r"(eflags) : "m"(_kernel_vmcs_pa):);
473 panic("VMX: vmclear: VMFailInvalid, vmcs pointer not valid\n");
475 // make kernel vmcs current
476 asm volatile("vmptrld %1 \n\t"
478 "pop %0 \n\t" : "=r"(eflags) : "m"(_kernel_vmcs_pa):);
481 panic("VMX: vmptrld: VMFailInvalid, vmcs pointer not valid\n");
483 extern char entry_sys_fast_ipc_c[];
484 extern char vm_vmx_exit_vec[];
486 vmwrite(F_host_es_selector, GDT_DATA_KERNEL);
487 vmwrite(F_host_cs_selector, GDT_CODE_KERNEL);
488 vmwrite(F_host_ss_selector, GDT_DATA_KERNEL);
489 vmwrite(F_host_ds_selector, GDT_DATA_KERNEL);
491 Unsigned16 tr = c.get_tr();
492 vmwrite(F_host_tr_selector, tr);
494 vmwrite(F_host_tr_base, ((*c.get_gdt())[tr / 8]).base());
495 vmwrite(F_host_rip, vm_vmx_exit_vec);
496 vmwrite<Mword>(F_host_sysenter_cs, Gdt::gdt_code_kernel);
497 vmwrite(F_host_sysenter_esp, &c.kernel_sp());
498 vmwrite(F_host_sysenter_eip, entry_sys_fast_ipc_c);
500 if (c.features() & FEAT_PAT && info.exit_ctls.allowed(19))
501 vmwrite(F_host_ia32_pat, Cpu::rdmsr(MSR_PAT));
504 // We have no proper PAT support, so disallow PAT load store for
506 info.exit_ctls.enforce(18, false);
507 info.entry_ctls.enforce(14, false);
510 if (info.exit_ctls.allowed(21)) // Load IA32_EFER
511 vmwrite(F_host_ia32_efer, Cpu::rdmsr(MSR_EFER));
514 // We have no EFER load for host, so disallow EFER load store for
516 info.exit_ctls.enforce(20, false);
517 info.entry_ctls.enforce(15, false);
520 if (info.exit_ctls.allowed(12))
521 vmwrite(F_host_ia32_perf_global_ctrl, Cpu::rdmsr(0x199));
523 // do not allow Load IA32_PERF_GLOBAL_CTRL on entry
524 info.entry_ctls.enforce(13, false);
526 vmwrite(F_host_cr0, info.cr0_defs.apply(Cpu::get_cr0()));
527 vmwrite(F_host_cr4, info.cr4_defs.apply(Cpu::get_cr4()));
529 Pseudo_descriptor pseudo;
530 c.get_gdt()->get(&pseudo);
532 vmwrite(F_host_gdtr_base, pseudo.base());
535 vmwrite(F_host_idtr_base, pseudo.base());
537 // init static guest area stuff
538 vmwrite(0x2800, ~0ULL); // link pointer
539 vmwrite(F_cr3_target_cnt, 0);
541 // MSR load / store disbaled
542 vmwrite(F_exit_msr_load_cnt, 0);
543 vmwrite(F_exit_msr_store_cnt, 0);
544 vmwrite(F_entry_msr_load_cnt, 0);
550 Vmx::kernel_vmcs() const
551 { return _kernel_vmcs; }
555 Vmx::kernel_vmcs_pa() const
556 { return _kernel_vmcs_pa; }
560 Vmx::vmx_enabled() const
561 { return _vmx_enabled; }
565 Vmx::has_vpid() const
566 { return _has_vpid; }