4 #include "per_cpu_data.h"
10 class Vm_vmx : public Vm
13 static unsigned long resume_vm_vmx(Mword *regs)
14 asm("resume_vm_vmx") __attribute__((__regparm__(3)));
24 //----------------------------------------------------------------------------
28 #include "mem_space.h"
31 #include "thread.h" // XXX: circular dep, move this out here!
32 #include "thread_state.h" // XXX: circular dep, move this out here!
38 Vm_vmx::Vm_vmx(Ram_quota *q)
44 Vm_vmx::operator new (size_t size, void *p)
47 assert (size == sizeof (Vm_vmx));
53 Vm_vmx::operator delete (void *ptr)
55 Vm_vmx *t = reinterpret_cast<Vm_vmx*>(ptr);
56 allocator<Vm_vmx>()->q_free(t->ram_quota(), ptr);
64 Vm_vmx::field_offset(void *vmcs, unsigned field)
66 return (void *)((char *)vmcs
67 + ((field >> 13) * 4 + ((field >> 10) & 3) + 1) * 0x80);
72 Vm_vmx::field_width(unsigned field)
74 static const char widths[4] = { 2, 8, 4, sizeof(Mword) };
75 return widths[field >> 13];
82 Vm_vmx::load(unsigned field, void *vmcs, Vmx_info::Bit_defs<T> const &m)
84 T res = m.apply(read<T>(vmcs, field));
85 Vmx::vmwrite(field, res);
86 return Vmx_info::Flags<T>(res);
91 Vm_vmx::load(unsigned field_first, unsigned field_last, void *vmcs)
93 for (; field_first <= field_last; field_first += 2)
94 load(field_first, vmcs);
98 template< typename T >
100 Vm_vmx::_internal_read(void *vmcs, unsigned field)
102 vmcs = field_offset(vmcs, field);
103 return *((T *)vmcs + ((field >> 1) & 0xff));
106 PRIVATE inline static
107 template< typename T >
109 Vm_vmx::_internal_write(void *vmcs, unsigned field, T value)
111 vmcs = field_offset(vmcs, field);
112 *((T*)vmcs + ((field >> 1) & 0xff)) = value;
117 Vm_vmx::load(unsigned field, void *vmcs)
121 case 0: Vmx::vmwrite(field, _internal_read<Unsigned16>(vmcs, field)); break;
122 case 1: Vmx::vmwrite(field, _internal_read<Unsigned64>(vmcs, field)); break;
123 case 2: Vmx::vmwrite(field, _internal_read<Unsigned32>(vmcs, field)); break;
124 case 3: Vmx::vmwrite(field, _internal_read<Mword>(vmcs, field)); break;
130 Vm_vmx::store(unsigned field, void *vmcs)
134 case 0: _internal_write(vmcs, field, Vmx::vmread<Unsigned16>(field)); break;
135 case 1: _internal_write(vmcs, field, Vmx::vmread<Unsigned64>(field)); break;
136 case 2: _internal_write(vmcs, field, Vmx::vmread<Unsigned32>(field)); break;
137 case 3: _internal_write(vmcs, field, Vmx::vmread<Mword>(field)); break;
143 Vm_vmx::store(unsigned field_first, unsigned field_last, void *vmcs)
145 for (; field_first <= field_last; field_first += 2)
146 store(field_first, vmcs);
149 PRIVATE inline static
150 template< typename T >
152 Vm_vmx::write(void *vmcs, unsigned field, T value)
156 case 0: _internal_write(vmcs, field, (Unsigned16)value); break;
157 case 1: _internal_write(vmcs, field, (Unsigned64)value); break;
158 case 2: _internal_write(vmcs, field, (Unsigned32)value); break;
159 case 3: _internal_write(vmcs, field, (Mword)value); break;
163 PRIVATE inline static
164 template< typename T >
166 Vm_vmx::read(void *vmcs, unsigned field)
170 case 0: return _internal_read<Unsigned16>(vmcs, field);
171 case 1: return _internal_read<Unsigned64>(vmcs, field);
172 case 2: return _internal_read<Unsigned32>(vmcs, field);
173 case 3: return _internal_read<Mword>(vmcs, field);
181 Vm_vmx::load_guest_state(unsigned cpu, void *src)
183 Vmx &vmx = Vmx::cpus.cpu(cpu);
185 // read VM-entry controls, apply filter and keep for later
186 Vmx_info::Flags<Unsigned32> entry_ctls
187 = load<Unsigned32>(Vmx::F_entry_ctls, src, vmx.info.entry_ctls);
189 Vmx_info::Flags<Unsigned32> pinbased_ctls
190 = load<Unsigned32>(Vmx::F_pin_based_ctls, src, vmx.info.pinbased_ctls);
192 Vmx_info::Flags<Unsigned32> procbased_ctls
193 = load<Unsigned32>(Vmx::F_proc_based_ctls, src, vmx.info.procbased_ctls);
195 Vmx_info::Flags<Unsigned32> procbased_ctls_2;
196 if (procbased_ctls.test(Vmx::PRB1_enable_proc_based_ctls_2))
197 procbased_ctls_2 = load<Unsigned32>(Vmx::F_proc_based_ctls_2, src, vmx.info.procbased_ctls2);
199 procbased_ctls_2 = Vmx_info::Flags<Unsigned32>(0);
201 load<Unsigned32>(Vmx::F_exit_ctls, src, vmx.info.exit_ctls);
203 // write 16-bit fields
204 load(0x800, 0x80e, src);
206 // write 64-bit fields
209 // check if the following bits are allowed to be set in entry_ctls
210 if (entry_ctls.test(14)) // PAT load requested
213 if (entry_ctls.test(15)) // EFER load requested
216 if (entry_ctls.test(13)) // IA32_PERF_GLOBAL_CTRL load requested
219 // complete *beep*, this is Fiasco.OC internal state
222 load(0x280a, 0x2810, src);
225 // write 32-bit fields
226 load(0x4800, 0x482a, src);
228 if (pinbased_ctls.test(6)) // activate vmx-preemption timer
231 // write natural-width fields
232 load<Mword>(0x6800, src, vmx.info.cr0_defs);
234 if (sizeof(long) > sizeof(int))
236 if (read<Mword>(src, 0x2806) & EFER_LME)
237 Vmx::vmwrite(0x6802, (Mword)mem_space()->phys_dir());
239 WARN("VMX: No, not possible\n");
243 // for 32bit we can just load the Vm pdbr
244 Vmx::vmwrite(0x6802, (Mword)mem_space()->phys_dir());
247 load<Mword>(0x6804, src, vmx.info.cr4_defs);
248 load(0x6806, 0x6826, src);
250 // VPID must be virtualized in Fiasco
252 if (procbased_ctls_2 & Vmx::PB2_enable_vpid)
253 load(Vmx::F_vpid, src);
256 // currently io-bitmaps are unsupported
257 // currently msr-bitmaps are unsupported
259 // load(0x200C, src); for SMM virtualization
260 load(Vmx::F_tsc_offset, src);
262 // no virtual APIC yet, and has to be managed in kernel somehow
264 if (procbased_ctls.test(Vmx::PRB1_tpr_shadow))
268 if (procbased_ctls_2.test(Vmx::PRB2_virtualize_apic))
269 load(Vmx::F_apic_access_addr, src);
271 // exception bit map and pf error-code stuff
272 load(0x4004, 0x4008, src);
274 // vm entry control stuff
275 Unsigned32 irq_info = read<Unsigned32>(src, Vmx::F_entry_int_info);
276 if (irq_info & (1UL << 31))
278 // do event injection
280 // load error code, if required
281 if (irq_info & (1UL << 11))
282 load(Vmx::F_entry_exc_error_code, src);
284 // types, that require an insn length have bit 10 set (type 4, 5, and 6)
285 if (irq_info & (1UL << 10))
286 load(Vmx::F_entry_insn_len, src);
288 Vmx::vmwrite(Vmx::F_entry_int_info, irq_info);
291 // hm, we have to check for sanitizing the cr0 and cr4 shadow stuff
292 load(0x6000, 0x6006, src);
294 // no cr3 target values supported
300 Vm_vmx::store_guest_state(unsigned cpu, void *dest)
302 // read 16-bit fields
303 store(0x800, 0x80e, dest);
305 // read 64-bit fields
308 Vmx_info &vmx_info = Vmx::cpus.cpu(cpu).info;
309 Vmx_info::Flags<Unsigned32> exit_ctls
310 = Vmx_info::Flags<Unsigned32>(vmx_info.exit_ctls.apply(read<Unsigned32>(dest, Vmx::F_exit_ctls)));
312 if (exit_ctls.test(18)) store(Vmx::F_guest_pat, dest);
313 if (exit_ctls.test(20)) store(Vmx::F_guest_efer, dest);
314 if (exit_ctls.test(22)) store(Vmx::F_preempt_timer, dest);
316 // EPT and PAE handling missing
318 if (Vmx::cpus.cpu(cpu).has_ept())
319 store(0x280a, 0x2810, dest);
322 // read 32-bit fields
323 store(0x4800, 0x4826, dest);
325 // sysenter msr is not saved here, because we trap all msr accesses right now
328 store(0x6824, 0x6826, dest);
331 // read natural-width fields
334 store(0x6804, 0x6822, dest);
340 Vm_vmx::copy_execution_control_back(unsigned cpu, void *dest)
342 Vmx &v = Vmx::cpus.cpu(cpu);
343 // read 16-bit fields
347 // read 64-bit fields
348 store(0x2000, 0x2002, dest);
352 Unsigned64 msr = Vmx::cpus.cpu(cpu).info._procbased_ctls; // IA32_VMX_PROCBASED_CTLS
353 if (msr & (1ULL<<53))
356 if (vmread<Unsigned32>(0x4002) & (1 << 31))
358 msr = Vmx::cpus.cpu(cpu).info._procbased_ctls2; // IA32_VMX_PROCBASED_CTLS2
359 if (msr & (1ULL << 32))
366 // read 32-bit fields
367 store(0x4000, 0x4004, dest);
370 // read natural-width fields
371 store(0x6000, 0x600e, dest);
376 Vm_vmx::copy_exit_control_back(unsigned ,void *dest)
378 // read 64-bit fields
379 store(0x2006, 0x2008, dest);
381 // read 32-bit fields
382 store(0x400c, 0x4010, dest);
387 Vm_vmx::copy_entry_control_back(unsigned, void *dest)
389 // read 64-bit fields
392 // read 32-bit fields
393 store(0x4012, 0x401a, dest);
399 Vm_vmx::store_exit_info(unsigned cpu, void *dest)
402 // read 64-bit fields, HM EPT pf stuff
404 if (Vmx::cpus.cpu(cpu).has_ept())
408 // clear the valid bit in Vm-entry interruption information
410 Unsigned32 tmp = read<Unsigned32>(dest, Vmx::F_entry_int_info);
411 if (tmp & (1UL << 31))
412 write(dest, Vmx::F_entry_int_info, tmp & ~((Unsigned32)1 << 31));
415 // read 32-bit fields
416 store(0x4400, 0x440e, dest);
418 // read natural-width fields
419 store(0x6400, 0x640a, dest);
424 Vm_vmx::dump(void *v, unsigned f, unsigned t)
426 for (; f <= t; f += 2)
427 printf("%04x: VMCS: %16lx V: %16lx\n",
428 f, Vmx::vmread<Mword>(f), read<Mword>(v, f));
433 Vm_vmx::dump_state(void *v)
435 dump(v, 0x0800, 0x080e);
436 dump(v, 0x0c00, 0x0c0c);
437 dump(v, 0x2000, 0x201a);
438 dump(v, 0x2800, 0x2810);
439 dump(v, 0x2c00, 0x2804);
440 dump(v, 0x4000, 0x4022);
441 dump(v, 0x4400, 0x4420);
442 dump(v, 0x4800, 0x482a);
443 dump(v, 0x6800, 0x6826);
444 dump(v, 0x6c00, 0x6c16);
449 Vm_vmx::sys_vm_run(Syscall_frame *f, Utcb *utcb)
451 assert (cpu_lock.test());
453 /* these 4 must not use ldt entries */
454 assert (!(Cpu::get_cs() & (1 << 2)));
455 assert (!(Cpu::get_ss() & (1 << 2)));
456 assert (!(Cpu::get_ds() & (1 << 2)));
457 assert (!(Cpu::get_es() & (1 << 2)));
459 unsigned cpu = current_cpu();
460 Vmx &v = Vmx::cpus.cpu(cpu);
462 L4_msg_tag const &tag = f->tag();
466 WARN("VMX: not supported/enabled\n");
467 return commit_result(-L4_err::EInval);
470 if (EXPECT_FALSE(tag.words() < 1 + Vmx::Gpregs_words))
472 WARN("VMX: Invalid message length\n");
473 return commit_result(-L4_err::EInval);
476 L4_snd_item_iter vmcs_item(utcb, tag.words());
478 if (EXPECT_FALSE(!tag.items() || !vmcs_item.next()))
479 return commit_result(-L4_err::EInval);
481 L4_fpage vmcs_fpage(vmcs_item.get()->d);
483 if (EXPECT_FALSE(!vmcs_fpage.is_mempage()))
485 WARN("VMX: Fpage invalid\n");
486 return commit_error(utcb, L4_error::Overflow);
489 if (EXPECT_FALSE(vmcs_fpage.order() < 12))
490 return commit_result(-L4_err::EInval);
493 void *vmcs_s = (void *)(Virt_addr(vmcs_fpage.mem_address()).value());
495 Mem_space::Phys_addr phys_vmcs;
496 Mem_space::Size size;
498 unsigned int page_attribs;
500 Mem_space *const curr_mem_space = current()->space()->mem_space();
501 resident = curr_mem_space->v_lookup(Virt_addr(vmcs_s), &phys_vmcs, &size, &page_attribs);
503 if (EXPECT_FALSE(!resident))
505 WARN("VMX: VMCS invalid\n");
506 return commit_result(-L4_err::EInval);
510 // This generates a circular dep between thread<->task, this cries for a
511 // new abstraction...
512 if (!(current()->state() & Thread_fpu_owner))
514 if (EXPECT_FALSE(!current_thread()->switchin_fpu()))
516 WARN("VMX: switchin_fpu failed\n");
517 return commit_result(-L4_err::EInval);
522 if (EXPECT_FALSE(read<Unsigned32>(vmcs_s, 0x201a) != 0)) // EPT POINTER
524 WARN("VMX: no nested paging available\n");
525 return commit_result(-L4_err::EInval);
529 // increment our refcount, and drop it at the end automatically
530 Ref_ptr<Vm_vmx> pin_myself(this);
532 // set volatile host state
533 Vmx::vmwrite<Mword>(Vmx::F_host_cr3, Cpu::get_pdbr()); // host_area.cr3
535 load_guest_state(cpu, vmcs_s);
537 Unsigned16 ldt = Cpu::get_ldt();
540 asm volatile("mov %0, %%cr2" : : "r" (read<Mword>(vmcs_s, Vmx::F_guest_cr2)));
542 unsigned long ret = resume_vm_vmx(&utcb->values[1]);
543 if (EXPECT_FALSE(ret & 0x40))
544 return commit_result(-L4_err::EInval);
549 asm volatile("mov %%cr2, %0" : "=r" (cpu_cr2));
550 write(vmcs_s, Vmx::F_guest_cr2, cpu_cr2);
555 // reload TSS, we use I/O bitmaps
556 // ... do this lazy ...
559 Gdt_entry *e = &(*Cpu::cpus.cpu(cpu).get_gdt())[Gdt::gdt_tss / 8];
560 e->access &= ~(1 << 1);
561 asm volatile("" : : "m" (*e));
562 Cpu::set_tr(Gdt::gdt_tss);
565 store_guest_state(cpu, vmcs_s);
566 store_exit_info(cpu, vmcs_s);
568 return commit_result(L4_error::None);
573 Vm_vmx::invoke(L4_obj_ref obj, Mword rights, Syscall_frame *f, Utcb *utcb)
575 vm_invoke<Vm_vmx>(obj, rights, f, utcb);