vmcb->cpl = 0; /* Linux runs in ring 0 before migration */
vmcb->rflags = 0x02;
+ /* Indicate success to the caller of arch_entry */
+ vmcb->rax = 0;
vmcb->rsp = cpu_data->linux_sp +
(NUM_ENTRY_REGS + 1) * sizeof(unsigned long);
vmcb->rip = cpu_data->linux_ip;
cpu_data->svm_state = SVMOFF;
+ /* We are leaving - set the GIF */
+ asm volatile ("stgi" : : : "memory");
+
efer = read_msr(MSR_EFER);
efer &= ~EFER_SVME;
write_msr(MSR_EFER, efer);
void vcpu_activate_vmm(struct per_cpu *cpu_data)
{
- /* TODO: Implement */
+ unsigned long vmcb_pa, host_stack;
+
+ vmcb_pa = paging_hvirt2phys(&cpu_data->vmcb);
+ host_stack = (unsigned long)cpu_data->stack + sizeof(cpu_data->stack);
+
+ /* Clear host-mode MSRs */
+ write_msr(MSR_IA32_SYSENTER_CS, 0);
+ write_msr(MSR_IA32_SYSENTER_EIP, 0);
+ write_msr(MSR_IA32_SYSENTER_ESP, 0);
+
+ write_msr(MSR_STAR, 0);
+ write_msr(MSR_LSTAR, 0);
+ write_msr(MSR_CSTAR, 0);
+ write_msr(MSR_SFMASK, 0);
+ write_msr(MSR_KERNGS_BASE, 0);
+
+ /*
+ * XXX: We don't set our own PAT here but rather rely on Linux PAT
+ * settigs (and MTRRs). Potentially, a malicious Linux root cell can
+ * set values different from what we expect, and interfere with APIC
+ * virtualization in non-AVIC mode.
+ */
+
+ /* We enter Linux at the point arch_entry would return to as well.
+ * rax is cleared to signal success to the caller. */
+ asm volatile(
+ "clgi\n\t"
+ "mov (%%rdi),%%r15\n\t"
+ "mov 0x8(%%rdi),%%r14\n\t"
+ "mov 0x10(%%rdi),%%r13\n\t"
+ "mov 0x18(%%rdi),%%r12\n\t"
+ "mov 0x20(%%rdi),%%rbx\n\t"
+ "mov 0x28(%%rdi),%%rbp\n\t"
+ "mov %0, %%rax\n\t"
+ "vmload\n\t"
+ "vmrun\n\t"
+ "vmsave\n\t"
+ /* Restore hypervisor stack */
+ "mov %2, %%rsp\n\t"
+ "jmp svm_vmexit"
+ : /* no output */
+ : "m" (vmcb_pa), "D" (cpu_data->linux_reg), "m" (host_stack)
+ : "memory", "r15", "r14", "r13", "r12",
+ "rbx", "rbp", "rax", "cc");
__builtin_unreachable();
}
void __attribute__((noreturn))
vcpu_deactivate_vmm(struct registers *guest_regs)
{
- /* TODO: Implement */
+ struct per_cpu *cpu_data = this_cpu_data();
+ struct vmcb *vmcb = &cpu_data->vmcb;
+ unsigned long *stack = (unsigned long *)vmcb->rsp;
+ unsigned long linux_ip = vmcb->rip;
+
+ /* We are leaving - set the GIF */
+ asm volatile ("stgi" : : : "memory");
+
+ /*
+ * Restore the MSRs.
+ *
+ * XXX: One could argue this is better to be done in
+ * arch_cpu_restore(), however, it would require changes
+ * to cpu_data to store STAR and friends.
+ */
+ write_msr(MSR_STAR, vmcb->star);
+ write_msr(MSR_LSTAR, vmcb->lstar);
+ write_msr(MSR_CSTAR, vmcb->cstar);
+ write_msr(MSR_SFMASK, vmcb->sfmask);
+ write_msr(MSR_KERNGS_BASE, vmcb->kerngsbase);
+
+ cpu_data->linux_cr3 = vmcb->cr3;
+
+ cpu_data->linux_gdtr.base = vmcb->gdtr.base;
+ cpu_data->linux_gdtr.limit = vmcb->gdtr.limit;
+ cpu_data->linux_idtr.base = vmcb->idtr.base;
+ cpu_data->linux_idtr.limit = vmcb->idtr.limit;
+
+ cpu_data->linux_cs.selector = vmcb->cs.selector;
+
+ cpu_data->linux_tss.selector = vmcb->tr.selector;
+
+ cpu_data->linux_efer = vmcb->efer & (~EFER_SVME);
+ cpu_data->linux_fs.base = vmcb->fs.base;
+ cpu_data->linux_gs.base = vmcb->gs.base;
+
+ cpu_data->linux_sysenter_cs = vmcb->sysenter_cs;
+ cpu_data->linux_sysenter_eip = vmcb->sysenter_eip;
+ cpu_data->linux_sysenter_esp = vmcb->sysenter_esp;
+
+ cpu_data->linux_ds.selector = vmcb->ds.selector;
+ cpu_data->linux_es.selector = vmcb->es.selector;
+ cpu_data->linux_fs.selector = vmcb->fs.selector;
+ cpu_data->linux_gs.selector = vmcb->gs.selector;
+
+ arch_cpu_restore(cpu_data);
+
+ stack--;
+ *stack = linux_ip;
+
+ asm volatile (
+ "mov %%rbx,%%rsp\n\t"
+ "pop %%r15\n\t"
+ "pop %%r14\n\t"
+ "pop %%r13\n\t"
+ "pop %%r12\n\t"
+ "pop %%r11\n\t"
+ "pop %%r10\n\t"
+ "pop %%r9\n\t"
+ "pop %%r8\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rsi\n\t"
+ "pop %%rbp\n\t"
+ "add $8,%%rsp\n\t"
+ "pop %%rbx\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rcx\n\t"
+ "mov %%rax,%%rsp\n\t"
+ "xor %%rax,%%rax\n\t"
+ "ret"
+ : : "a" (stack), "b" (guest_regs));
__builtin_unreachable();
}