4 #include "per_cpu_data.h"
10 class Vm_vmx : public Vm
13 static unsigned long resume_vm_vmx(Mword *regs)
14 asm("resume_vm_vmx") __attribute__((__regparm__(3)));
24 //----------------------------------------------------------------------------
28 #include "mem_space.h"
31 #include "thread.h" // XXX: circular dep, move this out here!
32 #include "thread_state.h" // XXX: circular dep, move this out here!
38 Vm_vmx::Vm_vmx(Ram_quota *q)
44 Vm_vmx::operator new (size_t size, void *p)
46 assert (size == sizeof (Vm_vmx));
52 Vm_vmx::operator delete (void *ptr)
54 Vm_vmx *t = reinterpret_cast<Vm_vmx*>(ptr);
55 allocator<Vm_vmx>()->q_free(t->ram_quota(), ptr);
63 Vm_vmx::field_offset(void *vmcs, unsigned field)
65 return (void *)((char *)vmcs
66 + ((field >> 13) * 4 + ((field >> 10) & 3) + 1) * 0x80);
71 Vm_vmx::field_width(unsigned field)
73 static const char widths[4] = { 2, 8, 4, sizeof(Mword) };
74 return widths[field >> 13];
81 Vm_vmx::load(unsigned field, void *vmcs, Vmx_info::Bit_defs<T> const &m)
83 T res = m.apply(read<T>(vmcs, field));
84 Vmx::vmwrite(field, res);
85 return Vmx_info::Flags<T>(res);
90 Vm_vmx::load(unsigned field_first, unsigned field_last, void *vmcs)
92 for (; field_first <= field_last; field_first += 2)
93 load(field_first, vmcs);
97 template< typename T >
99 Vm_vmx::_internal_read(void *vmcs, unsigned field)
101 vmcs = field_offset(vmcs, field);
102 return *((T *)vmcs + ((field >> 1) & 0xff));
105 PRIVATE inline static
106 template< typename T >
108 Vm_vmx::_internal_write(void *vmcs, unsigned field, T value)
110 vmcs = field_offset(vmcs, field);
111 *((T*)vmcs + ((field >> 1) & 0xff)) = value;
116 Vm_vmx::load(unsigned field, void *vmcs)
120 case 0: Vmx::vmwrite(field, _internal_read<Unsigned16>(vmcs, field)); break;
121 case 1: Vmx::vmwrite(field, _internal_read<Unsigned64>(vmcs, field)); break;
122 case 2: Vmx::vmwrite(field, _internal_read<Unsigned32>(vmcs, field)); break;
123 case 3: Vmx::vmwrite(field, _internal_read<Mword>(vmcs, field)); break;
129 Vm_vmx::store(unsigned field, void *vmcs)
133 case 0: _internal_write(vmcs, field, Vmx::vmread<Unsigned16>(field)); break;
134 case 1: _internal_write(vmcs, field, Vmx::vmread<Unsigned64>(field)); break;
135 case 2: _internal_write(vmcs, field, Vmx::vmread<Unsigned32>(field)); break;
136 case 3: _internal_write(vmcs, field, Vmx::vmread<Mword>(field)); break;
142 Vm_vmx::store(unsigned field_first, unsigned field_last, void *vmcs)
144 for (; field_first <= field_last; field_first += 2)
145 store(field_first, vmcs);
148 PRIVATE inline static
149 template< typename T >
151 Vm_vmx::write(void *vmcs, unsigned field, T value)
155 case 0: _internal_write(vmcs, field, (Unsigned16)value); break;
156 case 1: _internal_write(vmcs, field, (Unsigned64)value); break;
157 case 2: _internal_write(vmcs, field, (Unsigned32)value); break;
158 case 3: _internal_write(vmcs, field, (Mword)value); break;
162 PRIVATE inline static
163 template< typename T >
165 Vm_vmx::read(void *vmcs, unsigned field)
169 case 0: return _internal_read<Unsigned16>(vmcs, field);
170 case 1: return _internal_read<Unsigned64>(vmcs, field);
171 case 2: return _internal_read<Unsigned32>(vmcs, field);
172 case 3: return _internal_read<Mword>(vmcs, field);
180 Vm_vmx::load_guest_state(unsigned cpu, void *src)
182 Vmx &vmx = Vmx::cpus.cpu(cpu);
184 // read VM-entry controls, apply filter and keep for later
185 Vmx_info::Flags<Unsigned32> entry_ctls
186 = load<Unsigned32>(Vmx::F_entry_ctls, src, vmx.info.entry_ctls);
188 Vmx_info::Flags<Unsigned32> pinbased_ctls
189 = load<Unsigned32>(Vmx::F_pin_based_ctls, src, vmx.info.pinbased_ctls);
191 Vmx_info::Flags<Unsigned32> procbased_ctls
192 = load<Unsigned32>(Vmx::F_proc_based_ctls, src, vmx.info.procbased_ctls);
194 Vmx_info::Flags<Unsigned32> procbased_ctls_2;
195 if (procbased_ctls.test(Vmx::PRB1_enable_proc_based_ctls_2))
196 procbased_ctls_2 = load<Unsigned32>(Vmx::F_proc_based_ctls_2, src, vmx.info.procbased_ctls2);
198 procbased_ctls_2 = Vmx_info::Flags<Unsigned32>(0);
200 load<Unsigned32>(Vmx::F_exit_ctls, src, vmx.info.exit_ctls);
202 // write 16-bit fields
203 load(0x800, 0x80e, src);
205 // write 64-bit fields
208 // check if the following bits are allowed to be set in entry_ctls
209 if (entry_ctls.test(14)) // PAT load requested
212 if (entry_ctls.test(15)) // EFER load requested
215 if (entry_ctls.test(13)) // IA32_PERF_GLOBAL_CTRL load requested
218 // complete *beep*, this is Fiasco.OC internal state
221 load(0x280a, 0x2810, src);
224 // write 32-bit fields
225 load(0x4800, 0x482a, src);
227 if (pinbased_ctls.test(6)) // activate vmx-preemption timer
230 // write natural-width fields
231 load<Mword>(0x6800, src, vmx.info.cr0_defs);
233 if (sizeof(long) > sizeof(int))
235 if (read<Mword>(src, 0x2806) & EFER_LME)
236 Vmx::vmwrite(0x6802, (Mword)mem_space()->phys_dir());
238 WARN("VMX: No, not possible\n");
242 // for 32bit we can just load the Vm pdbr
243 Vmx::vmwrite(0x6802, (Mword)mem_space()->phys_dir());
246 load<Mword>(0x6804, src, vmx.info.cr4_defs);
247 load(0x6806, 0x6826, src);
249 // VPID must be virtualized in Fiasco
251 if (procbased_ctls_2 & Vmx::PB2_enable_vpid)
252 load(Vmx::F_vpid, src);
255 // currently io-bitmaps are unsupported
256 // currently msr-bitmaps are unsupported
258 // load(0x200C, src); for SMM virtualization
259 load(Vmx::F_tsc_offset, src);
261 // no virtual APIC yet, and has to be managed in kernel somehow
263 if (procbased_ctls.test(Vmx::PRB1_tpr_shadow))
267 if (procbased_ctls_2.test(Vmx::PRB2_virtualize_apic))
268 load(Vmx::F_apic_access_addr, src);
270 // exception bit map and pf error-code stuff
271 load(0x4004, 0x4008, src);
273 // vm entry control stuff
274 Unsigned32 irq_info = read<Unsigned32>(src, Vmx::F_entry_int_info);
275 if (irq_info & (1UL << 31))
277 // do event injection
279 // load error code, if required
280 if (irq_info & (1UL << 11))
281 load(Vmx::F_entry_exc_error_code, src);
283 // types, that require an insn length have bit 10 set (type 4, 5, and 6)
284 if (irq_info & (1UL << 10))
285 load(Vmx::F_entry_insn_len, src);
287 Vmx::vmwrite(Vmx::F_entry_int_info, irq_info);
290 // hm, we have to check for sanitizing the cr0 and cr4 shadow stuff
291 load(0x6000, 0x6006, src);
293 // no cr3 target values supported
299 Vm_vmx::store_guest_state(unsigned cpu, void *dest)
301 // read 16-bit fields
302 store(0x800, 0x80e, dest);
304 // read 64-bit fields
307 Vmx_info &vmx_info = Vmx::cpus.cpu(cpu).info;
308 Vmx_info::Flags<Unsigned32> exit_ctls
309 = Vmx_info::Flags<Unsigned32>(vmx_info.exit_ctls.apply(read<Unsigned32>(dest, Vmx::F_exit_ctls)));
311 if (exit_ctls.test(18)) store(Vmx::F_guest_pat, dest);
312 if (exit_ctls.test(20)) store(Vmx::F_guest_efer, dest);
313 if (exit_ctls.test(22)) store(Vmx::F_preempt_timer, dest);
315 // EPT and PAE handling missing
317 if (Vmx::cpus.cpu(cpu).has_ept())
318 store(0x280a, 0x2810, dest);
321 // read 32-bit fields
322 store(0x4800, 0x4826, dest);
324 // sysenter msr is not saved here, because we trap all msr accesses right now
327 store(0x6824, 0x6826, dest);
330 // read natural-width fields
333 store(0x6804, 0x6822, dest);
339 Vm_vmx::copy_execution_control_back(unsigned cpu, void *dest)
341 Vmx &v = Vmx::cpus.cpu(cpu);
342 // read 16-bit fields
346 // read 64-bit fields
347 store(0x2000, 0x2002, dest);
351 Unsigned64 msr = Vmx::cpus.cpu(cpu).info._procbased_ctls; // IA32_VMX_PROCBASED_CTLS
352 if (msr & (1ULL<<53))
355 if (vmread<Unsigned32>(0x4002) & (1 << 31))
357 msr = Vmx::cpus.cpu(cpu).info._procbased_ctls2; // IA32_VMX_PROCBASED_CTLS2
358 if (msr & (1ULL << 32))
365 // read 32-bit fields
366 store(0x4000, 0x4004, dest);
369 // read natural-width fields
370 store(0x6000, 0x600e, dest);
375 Vm_vmx::copy_exit_control_back(unsigned ,void *dest)
377 // read 64-bit fields
378 store(0x2006, 0x2008, dest);
380 // read 32-bit fields
381 store(0x400c, 0x4010, dest);
386 Vm_vmx::copy_entry_control_back(unsigned, void *dest)
388 // read 64-bit fields
391 // read 32-bit fields
392 store(0x4012, 0x401a, dest);
398 Vm_vmx::store_exit_info(unsigned cpu, void *dest)
401 // read 64-bit fields, HM EPT pf stuff
403 if (Vmx::cpus.cpu(cpu).has_ept())
407 // clear the valid bit in Vm-entry interruption information
409 Unsigned32 tmp = read<Unsigned32>(dest, Vmx::F_entry_int_info);
410 if (tmp & (1UL << 31))
411 write(dest, Vmx::F_entry_int_info, tmp & ~((Unsigned32)1 << 31));
414 // read 32-bit fields
415 store(0x4400, 0x440e, dest);
417 // read natural-width fields
418 store(0x6400, 0x640a, dest);
423 Vm_vmx::dump(void *v, unsigned f, unsigned t)
425 for (; f <= t; f += 2)
426 printf("%04x: VMCS: %16lx V: %16lx\n",
427 f, Vmx::vmread<Mword>(f), read<Mword>(v, f));
432 Vm_vmx::dump_state(void *v)
434 dump(v, 0x0800, 0x080e);
435 dump(v, 0x0c00, 0x0c0c);
436 dump(v, 0x2000, 0x201a);
437 dump(v, 0x2800, 0x2810);
438 dump(v, 0x2c00, 0x2804);
439 dump(v, 0x4000, 0x4022);
440 dump(v, 0x4400, 0x4420);
441 dump(v, 0x4800, 0x482a);
442 dump(v, 0x6800, 0x6826);
443 dump(v, 0x6c00, 0x6c16);
448 Vm_vmx::sys_vm_run(Syscall_frame *f, Utcb *utcb)
450 assert (cpu_lock.test());
452 /* these 4 must not use ldt entries */
453 assert (!(Cpu::get_cs() & (1 << 2)));
454 assert (!(Cpu::get_ss() & (1 << 2)));
455 assert (!(Cpu::get_ds() & (1 << 2)));
456 assert (!(Cpu::get_es() & (1 << 2)));
458 unsigned cpu = current_cpu();
459 Vmx &v = Vmx::cpus.cpu(cpu);
461 L4_msg_tag const &tag = f->tag();
465 WARN("VMX: not supported/enabled\n");
466 return commit_result(-L4_err::EInval);
469 if (EXPECT_FALSE(tag.words() < 1 + Vmx::Gpregs_words))
471 WARN("VMX: Invalid message length\n");
472 return commit_result(-L4_err::EInval);
475 L4_snd_item_iter vmcs_item(utcb, tag.words());
477 if (EXPECT_FALSE(!tag.items() || !vmcs_item.next()))
478 return commit_result(-L4_err::EInval);
480 L4_fpage vmcs_fpage(vmcs_item.get()->d);
482 if (EXPECT_FALSE(!vmcs_fpage.is_mempage()))
484 WARN("VMX: Fpage invalid\n");
485 return commit_error(utcb, L4_error::Overflow);
488 if (EXPECT_FALSE(vmcs_fpage.order() < 12))
489 return commit_result(-L4_err::EInval);
492 void *vmcs_s = (void *)(Virt_addr(vmcs_fpage.mem_address()).value());
494 Mem_space::Phys_addr phys_vmcs;
495 Mem_space::Size size;
497 unsigned int page_attribs;
499 Mem_space *const curr_mem_space = current()->space()->mem_space();
500 resident = curr_mem_space->v_lookup(Virt_addr(vmcs_s), &phys_vmcs, &size, &page_attribs);
502 if (EXPECT_FALSE(!resident))
504 WARN("VMX: VMCS invalid\n");
505 return commit_result(-L4_err::EInval);
509 // This generates a circular dep between thread<->task, this cries for a
510 // new abstraction...
511 if (!(current()->state() & Thread_fpu_owner))
513 if (EXPECT_FALSE(!current_thread()->switchin_fpu()))
515 WARN("VMX: switchin_fpu failed\n");
516 return commit_result(-L4_err::EInval);
521 if (EXPECT_FALSE(read<Unsigned32>(vmcs_s, 0x201a) != 0)) // EPT POINTER
523 WARN("VMX: no nested paging available\n");
524 return commit_result(-L4_err::EInval);
528 // increment our refcount, and drop it at the end automatically
529 Ref_ptr<Vm_vmx> pin_myself(this);
531 // set volatile host state
532 Vmx::vmwrite<Mword>(Vmx::F_host_cr3, Cpu::get_pdbr()); // host_area.cr3
534 load_guest_state(cpu, vmcs_s);
536 Unsigned16 ldt = Cpu::get_ldt();
539 asm volatile("mov %0, %%cr2" : : "r" (read<Mword>(vmcs_s, Vmx::F_guest_cr2)));
541 unsigned long ret = resume_vm_vmx(&utcb->values[1]);
542 if (EXPECT_FALSE(ret & 0x40))
543 return commit_result(-L4_err::EInval);
548 asm volatile("mov %%cr2, %0" : "=r" (cpu_cr2));
549 write(vmcs_s, Vmx::F_guest_cr2, cpu_cr2);
554 // reload TSS, we use I/O bitmaps
555 // ... do this lazy ...
558 Gdt_entry *e = &(*Cpu::cpus.cpu(cpu).get_gdt())[Gdt::gdt_tss / 8];
559 e->access &= ~(1 << 1);
560 asm volatile("" : : "m" (*e));
561 Cpu::set_tr(Gdt::gdt_tss);
564 store_guest_state(cpu, vmcs_s);
565 store_exit_info(cpu, vmcs_s);
567 return commit_result(L4_error::None);
572 Vm_vmx::invoke(L4_obj_ref obj, Mword rights, Syscall_frame *f, Utcb *utcb)
574 vm_invoke<Vm_vmx>(obj, rights, f, utcb);