]> rtime.felk.cvut.cz Git - jailhouse.git/commitdiff
x86: Implement vcpu_[de]activate_vmm() for AMD-V
authorValentine Sinitsyn <valentine.sinitsyn@gmail.com>
Wed, 4 Jun 2014 18:31:12 +0000 (00:31 +0600)
committerJan Kiszka <jan.kiszka@siemens.com>
Sat, 1 Nov 2014 19:10:09 +0000 (20:10 +0100)
Compared to VMX, vcpu_activate_vmm() and vcpu_deativate_vmm() for AMD-V
systems have several notable differences.

First, additional MSRs (part of VMCB but not VMCS) need to be set and
restored. Then, host state area is opaque in AMD-V, so vcpu_activate_vmm()
remembers host stack pointer and essentially begins a VMRUN/#VMEXIT loop
implemented in svm-vmexit.S. Third, as RAX register is part of VMCB, it is
copied to guest_regs on each VM exit and written back before VM entry.

As Jailhouse runs with GIF set, it needs to be cleared on VMM deactivation.

Signed-off-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
hypervisor/arch/x86/include/asm/percpu.h
hypervisor/arch/x86/svm-vmexit.S
hypervisor/arch/x86/svm.c

index e35191a8789a41df459a54e970f81b0ab3611f95..e6005793a310e53fd8c37515739631083ef78677 100644 (file)
@@ -27,6 +27,9 @@
 #define PERCPU_SIZE_SHIFT              14
 #define PERCPU_STACK_END               PAGE_SIZE
 #define PERCPU_LINUX_SP                        PERCPU_STACK_END
+/* SVM only: offsetof(struct per_cpu, vmcb) */
+#define PERCPU_VMCB_OFFSET             8192
+#define PERCPU_VMCB_RAX                        (PERCPU_VMCB_OFFSET + 0x5f8)
 
 #ifndef __ASSEMBLY__
 
index d0212fbacdccfaae31ea477d35da47e51a6acd79..d50b0458d5fbc2a3b81ea869e19c5c432cdb631c 100644 (file)
 /* SVM VM-exit handling */
        .globl svm_vmexit
 svm_vmexit:
-       /* TODO: Implement */
+       /* XXX: GIF is always cleared here */
+       push %rax
+
+       push -PERCPU_STACK_END+1*8+PERCPU_VMCB_RAX(%rsp)
+       push %rcx
+       push %rdx
+       push %rbx
+       sub $8,%rsp /* placeholder for rsp */
+       push %rbp
+       push %rsi
+       push %rdi
+       push %r8
+       push %r9
+       push %r10
+       push %r11
+       push %r12
+       push %r13
+       push %r14
+       push %r15
+
+       mov %rsp,%rdi
+       lea -PERCPU_STACK_END+1*8+16*8(%rsp),%rsi
+       call vcpu_handle_exit
+
+       pop %r15
+       pop %r14
+       pop %r13
+       pop %r12
+       pop %r11
+       pop %r10
+       pop %r9
+       pop %r8
+       pop %rdi
+       pop %rsi
+       pop %rbp
+       add $8,%rsp
+       pop %rbx
+       pop %rdx
+       pop %rcx
+       pop -PERCPU_STACK_END+1*8+PERCPU_VMCB_RAX(%rsp)
+
+       pop %rax
+
+       vmload
+       vmrun
+       vmsave
+
+       jmp svm_vmexit
index 5b0f6fcb9d657d30fbad53ba815696e4b961bb3f..ae24a13ea035632c605d9e4d71b458b2816ef043 100644 (file)
@@ -176,6 +176,8 @@ static int vmcb_setup(struct per_cpu *cpu_data)
        vmcb->cpl = 0; /* Linux runs in ring 0 before migration */
 
        vmcb->rflags = 0x02;
+       /* Indicate success to the caller of arch_entry */
+       vmcb->rax = 0;
        vmcb->rsp = cpu_data->linux_sp +
                (NUM_ENTRY_REGS + 1) * sizeof(unsigned long);
        vmcb->rip = cpu_data->linux_ip;
@@ -385,6 +387,9 @@ void vcpu_exit(struct per_cpu *cpu_data)
 
        cpu_data->svm_state = SVMOFF;
 
+       /* We are leaving - set the GIF */
+       asm volatile ("stgi" : : : "memory");
+
        efer = read_msr(MSR_EFER);
        efer &= ~EFER_SVME;
        write_msr(MSR_EFER, efer);
@@ -394,14 +399,127 @@ void vcpu_exit(struct per_cpu *cpu_data)
 
 void vcpu_activate_vmm(struct per_cpu *cpu_data)
 {
-       /* TODO: Implement */
+       unsigned long vmcb_pa, host_stack;
+
+       vmcb_pa = paging_hvirt2phys(&cpu_data->vmcb);
+       host_stack = (unsigned long)cpu_data->stack + sizeof(cpu_data->stack);
+
+       /* Clear host-mode MSRs */
+       write_msr(MSR_IA32_SYSENTER_CS, 0);
+       write_msr(MSR_IA32_SYSENTER_EIP, 0);
+       write_msr(MSR_IA32_SYSENTER_ESP, 0);
+
+       write_msr(MSR_STAR, 0);
+       write_msr(MSR_LSTAR, 0);
+       write_msr(MSR_CSTAR, 0);
+       write_msr(MSR_SFMASK, 0);
+       write_msr(MSR_KERNGS_BASE, 0);
+
+       /*
+        * XXX: We don't set our own PAT here but rather rely on Linux PAT
+        * settigs (and MTRRs). Potentially, a malicious Linux root cell can
+        * set values different from what we expect, and interfere with APIC
+        * virtualization in non-AVIC mode.
+        */
+
+       /* We enter Linux at the point arch_entry would return to as well.
+        * rax is cleared to signal success to the caller. */
+       asm volatile(
+               "clgi\n\t"
+               "mov (%%rdi),%%r15\n\t"
+               "mov 0x8(%%rdi),%%r14\n\t"
+               "mov 0x10(%%rdi),%%r13\n\t"
+               "mov 0x18(%%rdi),%%r12\n\t"
+               "mov 0x20(%%rdi),%%rbx\n\t"
+               "mov 0x28(%%rdi),%%rbp\n\t"
+               "mov %0, %%rax\n\t"
+               "vmload\n\t"
+               "vmrun\n\t"
+               "vmsave\n\t"
+               /* Restore hypervisor stack */
+               "mov %2, %%rsp\n\t"
+               "jmp svm_vmexit"
+               : /* no output */
+               : "m" (vmcb_pa), "D" (cpu_data->linux_reg), "m" (host_stack)
+               : "memory", "r15", "r14", "r13", "r12",
+                 "rbx", "rbp", "rax", "cc");
        __builtin_unreachable();
 }
 
 void __attribute__((noreturn))
 vcpu_deactivate_vmm(struct registers *guest_regs)
 {
-       /* TODO: Implement */
+       struct per_cpu *cpu_data = this_cpu_data();
+       struct vmcb *vmcb = &cpu_data->vmcb;
+       unsigned long *stack = (unsigned long *)vmcb->rsp;
+       unsigned long linux_ip = vmcb->rip;
+
+       /* We are leaving - set the GIF */
+       asm volatile ("stgi" : : : "memory");
+
+       /*
+        * Restore the MSRs.
+        *
+        * XXX: One could argue this is better to be done in
+        * arch_cpu_restore(), however, it would require changes
+        * to cpu_data to store STAR and friends.
+        */
+       write_msr(MSR_STAR, vmcb->star);
+       write_msr(MSR_LSTAR, vmcb->lstar);
+       write_msr(MSR_CSTAR, vmcb->cstar);
+       write_msr(MSR_SFMASK, vmcb->sfmask);
+       write_msr(MSR_KERNGS_BASE, vmcb->kerngsbase);
+
+       cpu_data->linux_cr3 = vmcb->cr3;
+
+       cpu_data->linux_gdtr.base = vmcb->gdtr.base;
+       cpu_data->linux_gdtr.limit = vmcb->gdtr.limit;
+       cpu_data->linux_idtr.base = vmcb->idtr.base;
+       cpu_data->linux_idtr.limit = vmcb->idtr.limit;
+
+       cpu_data->linux_cs.selector = vmcb->cs.selector;
+
+       cpu_data->linux_tss.selector = vmcb->tr.selector;
+
+       cpu_data->linux_efer = vmcb->efer & (~EFER_SVME);
+       cpu_data->linux_fs.base = vmcb->fs.base;
+       cpu_data->linux_gs.base = vmcb->gs.base;
+
+       cpu_data->linux_sysenter_cs = vmcb->sysenter_cs;
+       cpu_data->linux_sysenter_eip = vmcb->sysenter_eip;
+       cpu_data->linux_sysenter_esp = vmcb->sysenter_esp;
+
+       cpu_data->linux_ds.selector = vmcb->ds.selector;
+       cpu_data->linux_es.selector = vmcb->es.selector;
+       cpu_data->linux_fs.selector = vmcb->fs.selector;
+       cpu_data->linux_gs.selector = vmcb->gs.selector;
+
+       arch_cpu_restore(cpu_data);
+
+       stack--;
+       *stack = linux_ip;
+
+       asm volatile (
+               "mov %%rbx,%%rsp\n\t"
+               "pop %%r15\n\t"
+               "pop %%r14\n\t"
+               "pop %%r13\n\t"
+               "pop %%r12\n\t"
+               "pop %%r11\n\t"
+               "pop %%r10\n\t"
+               "pop %%r9\n\t"
+               "pop %%r8\n\t"
+               "pop %%rdi\n\t"
+               "pop %%rsi\n\t"
+               "pop %%rbp\n\t"
+               "add $8,%%rsp\n\t"
+               "pop %%rbx\n\t"
+               "pop %%rdx\n\t"
+               "pop %%rcx\n\t"
+               "mov %%rax,%%rsp\n\t"
+               "xor %%rax,%%rax\n\t"
+               "ret"
+               : : "a" (stack), "b" (guest_regs));
        __builtin_unreachable();
 }