]> rtime.felk.cvut.cz Git - l4.git/blobdiff - kernel/fiasco/src/kern/ia32/vm_vmx.cpp
update
[l4.git] / kernel / fiasco / src / kern / ia32 / vm_vmx.cpp
index 2b5277761cc91acec60d8cde243cc33f8b4fe65f..586a4a381503dbeb6bc36048cd00e2651872e359 100644 (file)
@@ -10,7 +10,7 @@ class Vmcs;
 class Vm_vmx : public Vm
 {
 private:
-  static unsigned long resume_vm_vmx(Mword *regs)
+  static unsigned long resume_vm_vmx(Vcpu_state *regs)
     asm("resume_vm_vmx") __attribute__((__regparm__(3)));
 
   enum
@@ -41,7 +41,7 @@ Vm_vmx::Vm_vmx(Ram_quota *q)
 
 PUBLIC inline
 void *
-Vm_vmx::operator new (size_t size, void *p)
+Vm_vmx::operator new (size_t size, void *p) throw()
 {
   (void)size;
   assert (size == sizeof (Vm_vmx));
@@ -216,7 +216,7 @@ Vm_vmx::load_guest_state(unsigned cpu, void *src)
   if (entry_ctls.test(13)) // IA32_PERF_GLOBAL_CTRL load requested
     load(0x2808, src);
 
-  // complete *beep*, this is Fiasco.OC internal state
+  // this is Fiasco.OC internal state
 #if 0
   if (vmx.has_ept())
     load(0x280a, 0x2810, src);
@@ -234,14 +234,14 @@ Vm_vmx::load_guest_state(unsigned cpu, void *src)
   if (sizeof(long) > sizeof(int))
     {
       if (read<Mword>(src, 0x2806) & EFER_LME)
-        Vmx::vmwrite(0x6802, (Mword)mem_space()->phys_dir());
+        Vmx::vmwrite(0x6802, (Mword)phys_dir());
       else
        WARN("VMX: No, not possible\n");
     }
   else
     {
       // for 32bit we can just load the Vm pdbr
-      Vmx::vmwrite(0x6802, (Mword)mem_space()->phys_dir());
+      Vmx::vmwrite(0x6802, (Mword)phys_dir());
     }
 
   load<Mword>(0x6804, src, vmx.info.cr4_defs);
@@ -323,10 +323,11 @@ Vm_vmx::store_guest_state(unsigned cpu, void *dest)
   store(0x4800, 0x4826, dest);
 
   // sysenter msr is not saved here, because we trap all msr accesses right now
-#if 0
-  store(0x482a, dest);
-  store(0x6824, 0x6826, dest);
-#endif
+  if (0)
+    {
+      store(0x482a, dest);
+      store(0x6824, 0x6826, dest);
+    }
 
   // read natural-width fields
   store(0x6800, dest);
@@ -334,72 +335,12 @@ Vm_vmx::store_guest_state(unsigned cpu, void *dest)
   store(0x6804, 0x6822, dest);
 }
 
-#if 0
-PRIVATE
-void
-Vm_vmx::copy_execution_control_back(unsigned cpu, void *dest)
-{
-  Vmx &v = Vmx::cpus.cpu(cpu);
-  // read 16-bit fields
-  if (v.has_vpid())
-    store(0, dest);
-
-  // read 64-bit fields
-  store(0x2000, 0x2002, dest);
-  store(0x200c, dest);
-  store(0x2010, dest);
-
-  Unsigned64 msr = Vmx::cpus.cpu(cpu).info._procbased_ctls; // IA32_VMX_PROCBASED_CTLS
-  if (msr & (1ULL<<53))
-    store(0x2012, dest);
-
-  if (vmread<Unsigned32>(0x4002) & (1 << 31))
-    {
-      msr = Vmx::cpus.cpu(cpu).info._procbased_ctls2; // IA32_VMX_PROCBASED_CTLS2
-      if (msr & (1ULL << 32))
-       store(0x2014, dest);
-    }
-
-  if (v.has_ept())
-    store(0x201a, dest);
-
-  // read 32-bit fields
-  store(0x4000, 0x4004, dest);
-  store(0x401e, dest);
-
-  // read natural-width fields
-  store(0x6000, 0x600e, dest);
-}
-
-PRIVATE
-void
-Vm_vmx::copy_exit_control_back(unsigned ,void *dest)
-{
-  // read 64-bit fields
-  store(0x2006, 0x2008, dest);
-
-  // read 32-bit fields
-  store(0x400c, 0x4010, dest);
-}
-
-PRIVATE
-void
-Vm_vmx::copy_entry_control_back(unsigned, void *dest)
-{
-  // read 64-bit fields
-  store(0x200a, dest);
-
-  // read 32-bit fields
-  store(0x4012, 0x401a, dest);
-}
-#endif
-
 PRIVATE
 void
 Vm_vmx::store_exit_info(unsigned cpu, void *dest)
 {
   (void)cpu;
-  // read 64-bit fields, HM EPT pf stuff
+  // read 64-bit fields, that is a EPT pf thing
 #if 0
   if (Vmx::cpus.cpu(cpu).has_ept())
     store(0x2400, dest);
@@ -444,9 +385,9 @@ Vm_vmx::dump_state(void *v)
   dump(v, 0x6c00, 0x6c16);
 }
 
-PUBLIC
-L4_msg_tag
-Vm_vmx::sys_vm_run(Syscall_frame *f, Utcb *utcb)
+PRIVATE inline NOEXPORT
+int
+Vm_vmx::do_resume_vcpu(Context *ctxt, Vcpu_state *vcpu, void *vmcs_s)
 {
   assert (cpu_lock.test());
 
@@ -459,37 +400,21 @@ Vm_vmx::sys_vm_run(Syscall_frame *f, Utcb *utcb)
   unsigned cpu = current_cpu();
   Vmx &v = Vmx::cpus.cpu(cpu);
 
-  L4_msg_tag const &tag = f->tag();
-
-  if(!v.vmx_enabled())
+  if (!v.vmx_enabled())
     {
-      WARN("VMX: not supported/enabled\n");
-      return commit_result(-L4_err::EInval);
-    }
-
-  if (EXPECT_FALSE(tag.words() < 1 + Vmx::Gpregs_words))
-    {
-      WARN("VMX: Invalid message length\n");
-      return commit_result(-L4_err::EInval);
-    }
-
-  void *vmcs_s;
-
-  if (int r = Vm::getpage(utcb, tag, &vmcs_s))
-    {
-      WARN("VMX: Invalid VMCS\n");
-      return commit_result(r);
+      WARNX(Info, "VMX: not supported/enabled\n");
+      return -L4_err::ENodev;
     }
 
   // XXX:
   // This generates a circular dep between thread<->task, this cries for a
   // new abstraction...
-  if (!(current()->state() & Thread_fpu_owner))
+  if (!(ctxt->state() & Thread_fpu_owner))
     {
-      if (EXPECT_FALSE(!current_thread()->switchin_fpu()))
+      if (EXPECT_FALSE(!static_cast<Thread*>(ctxt)->switchin_fpu()))
         {
           WARN("VMX: switchin_fpu failed\n");
-          return commit_result(-L4_err::EInval);
+          return -L4_err::EInval;
         }
     }
 
@@ -514,9 +439,10 @@ Vm_vmx::sys_vm_run(Syscall_frame *f, Utcb *utcb)
   // set guest CR2
   asm volatile("mov %0, %%cr2" : : "r" (read<Mword>(vmcs_s, Vmx::F_guest_cr2)));
 
-  unsigned long ret = resume_vm_vmx(&utcb->values[1]);
+  unsigned long ret = resume_vm_vmx(vcpu);
+  // vmread error?
   if (EXPECT_FALSE(ret & 0x40))
-    return commit_result(-L4_err::EInval);
+    return -L4_err::EInval;
 
   // save guest cr2
     {
@@ -540,12 +466,65 @@ Vm_vmx::sys_vm_run(Syscall_frame *f, Utcb *utcb)
   store_guest_state(cpu, vmcs_s);
   store_exit_info(cpu, vmcs_s);
 
-  return commit_result(L4_error::None);
+  if ((read<Unsigned32>(vmcs_s, Vmx::F_exit_reason) & 0xffff) == 1)
+    return 1;
+
+  vcpu->state &= ~(Vcpu_state::F_traps | Vcpu_state::F_user_mode);
+  return 0;
 }
 
 PUBLIC
-void
-Vm_vmx::invoke(L4_obj_ref obj, Mword rights, Syscall_frame *f, Utcb *utcb)
+int
+Vm_vmx::resume_vcpu(Context *ctxt, Vcpu_state *vcpu, bool user_mode)
 {
-  vm_invoke<Vm_vmx>(obj, rights, f, utcb);
+  (void)user_mode;
+  assert_kdb (user_mode);
+
+  if (EXPECT_FALSE(!(ctxt->state(true) & Thread_ext_vcpu_enabled)))
+    {
+      ctxt->arch_load_vcpu_kern_state(vcpu, true);
+      return -L4_err::EInval;
+    }
+
+  void *vmcs_s = reinterpret_cast<char *>(vcpu) + 0x400;
+
+  for (;;)
+    {
+      // in the case of disabled IRQs and a pending IRQ directly simulate an
+      // external interrupt intercept
+      if (   !(vcpu->_saved_state & Vcpu_state::F_irqs)
+         && (vcpu->sticky_flags & Vcpu_state::Sf_irq_pending))
+       {
+         // XXX: check if this is correct, we set external irq exit as reason
+         write<Unsigned32>(vmcs_s, Vmx::F_exit_reason, 1);
+          ctxt->arch_load_vcpu_kern_state(vcpu, true);
+         return 1; // return 1 to indicate pending IRQs (IPCs)
+       }
+
+      int r = do_resume_vcpu(ctxt, vcpu, vmcs_s);
+
+      // test for error or non-IRQ exit reason
+      if (r <= 0)
+        {
+          ctxt->arch_load_vcpu_kern_state(vcpu, true);
+          return r;
+        }
+
+      // check for IRQ exits and allow to handle the IRQ
+      if (r == 1)
+       Proc::preemption_point();
+
+      // Check if the current context got a message delivered.
+      // This is done by testing for a valid continuation.
+      // When a continuation is set we have to directly
+      // leave the kernel to not overwrite the vcpu-regs
+      // with bogus state.
+      Thread *t = nonull_static_cast<Thread*>(ctxt);
+      if (t->continuation_test_and_restore())
+        {
+          ctxt->arch_load_vcpu_kern_state(vcpu, true);
+          t->fast_return_to_user(vcpu->_entry_ip, vcpu->_entry_sp,
+                                 t->vcpu_state().usr().get());
+        }
+    }
 }