*/
#include <jailhouse/entry.h>
+#include <jailhouse/cell.h>
#include <jailhouse/cell-config.h>
#include <jailhouse/control.h>
#include <jailhouse/paging.h>
#include <jailhouse/processor.h>
#include <jailhouse/string.h>
#include <jailhouse/utils.h>
+#include <asm/amd_iommu.h>
#include <asm/apic.h>
-#include <asm/cell.h>
#include <asm/control.h>
#include <asm/iommu.h>
#include <asm/paging.h>
* combinations of NW and CD bits are prohibited by SVM (see APMv2,
* Sect. 15.5). To handle this, we always keep the NW bit off.
*/
-#define SVM_CR0_ALLOWED_BITS (~X86_CR0_NW)
+#define SVM_CR0_ALLOWED_BITS (~X86_CR0_NW)
+
+/* IOPM size: two 4-K pages + 3 bits */
+#define IOPM_PAGES 3
+
+#define NPT_IOMMU_PAGE_DIR_LEVELS 4
static bool has_avic, has_assists, has_flush_by_asid;
static const struct segment invalid_seg;
-static struct paging npt_paging[NPT_PAGE_DIR_LEVELS];
+static struct paging npt_iommu_paging[NPT_IOMMU_PAGE_DIR_LEVELS];
/* bit cleared: direct access allowed */
// TODO: convert to whitelist
static int svm_check_features(void)
{
/* SVM is available */
- if (!(cpuid_ecx(0x80000001) & X86_FEATURE_SVM))
+ if (!(cpuid_ecx(0x80000001, 0) & X86_FEATURE_SVM))
return trace_error(-ENODEV);
/* Nested paging */
- if (!(cpuid_edx(0x8000000A) & X86_FEATURE_NP))
+ if (!(cpuid_edx(0x8000000A, 0) & X86_FEATURE_NP))
return trace_error(-EIO);
/* Decode assists */
- if ((cpuid_edx(0x8000000A) & X86_FEATURE_DECODE_ASSISTS))
+ if ((cpuid_edx(0x8000000A, 0) & X86_FEATURE_DECODE_ASSISTS))
has_assists = true;
/* AVIC support */
- if (cpuid_edx(0x8000000A) & X86_FEATURE_AVIC)
- has_avic = true;
+ /* FIXME: Jailhouse support is incomplete so far
+ if (cpuid_edx(0x8000000A, 0) & X86_FEATURE_AVIC)
+ has_avic = true; */
/* TLB Flush by ASID support */
- if (cpuid_edx(0x8000000A) & X86_FEATURE_FLUSH_BY_ASID)
+ if (cpuid_edx(0x8000000A, 0) & X86_FEATURE_FLUSH_BY_ASID)
has_flush_by_asid = true;
return 0;
static void svm_set_cell_config(struct cell *cell, struct vmcb *vmcb)
{
- vmcb->iopm_base_pa = paging_hvirt2phys(cell->svm.iopm);
- vmcb->n_cr3 = paging_hvirt2phys(cell->svm.npt_structs.root_table);
+ vmcb->iopm_base_pa = paging_hvirt2phys(cell->arch.svm.iopm);
+ vmcb->n_cr3 =
+ paging_hvirt2phys(cell->arch.svm.npt_iommu_structs.root_table);
}
static void vmcb_setup(struct per_cpu *cpu_data)
vmcb->general2_intercepts |= GENERAL2_INTERCEPT_VMRUN; /* Required */
vmcb->general2_intercepts |= GENERAL2_INTERCEPT_VMMCALL;
+ /*
+ * We only intercept #DB and #AC to prevent that malicious guests can
+ * trigger infinite loops in microcode (see e.g. CVE-2015-5307 and
+ * CVE-2015-8104).
+ */
+ vmcb->exception_intercepts |= (1 << DB_VECTOR) | (1 << AC_VECTOR);
+
vmcb->msrpm_base_pa = paging_hvirt2phys(msrpm);
vmcb->np_enable = 1;
unsigned long gphys,
unsigned long flags)
{
- return paging_virt2phys(&cpu_data->cell->svm.npt_structs,
- gphys, flags);
+ return paging_virt2phys(&cpu_data->cell->arch.svm.npt_iommu_structs,
+ gphys, flags);
+}
+
+static void npt_iommu_set_next_pt_l4(pt_entry_t pte, unsigned long next_pt)
+{
+ /*
+ * Merge IOMMU and NPT flags. We need to mark the NTP entries as user
+ * accessible, see APMv2, Section 15.25.5.
+ */
+ *pte = (next_pt & BIT_MASK(51, 12)) | AMD_IOMMU_PTE_PG_MODE(3) |
+ AMD_IOMMU_PTE_IR | AMD_IOMMU_PTE_IW | AMD_IOMMU_PTE_P |
+ PAGE_DEFAULT_FLAGS | PAGE_FLAG_US;
+}
+
+static void npt_iommu_set_next_pt_l3(pt_entry_t pte, unsigned long next_pt)
+{
+ *pte = (next_pt & BIT_MASK(51, 12)) | AMD_IOMMU_PTE_PG_MODE(2) |
+ AMD_IOMMU_PTE_IR | AMD_IOMMU_PTE_IW | AMD_IOMMU_PTE_P |
+ PAGE_DEFAULT_FLAGS | PAGE_FLAG_US;
+}
+
+static void npt_iommu_set_next_pt_l2(pt_entry_t pte, unsigned long next_pt)
+{
+ *pte = (next_pt & BIT_MASK(51, 12)) | AMD_IOMMU_PTE_PG_MODE(1) |
+ AMD_IOMMU_PTE_IR | AMD_IOMMU_PTE_IW | AMD_IOMMU_PTE_P |
+ PAGE_DEFAULT_FLAGS | PAGE_FLAG_US;
+}
+
+static unsigned long npt_iommu_get_phys_l3(pt_entry_t pte, unsigned long virt)
+{
+ if (*pte & AMD_IOMMU_PTE_PG_MODE_MASK)
+ return INVALID_PHYS_ADDR;
+ return (*pte & BIT_MASK(51, 30)) | (virt & BIT_MASK(29, 0));
}
-static void npt_set_next_pt(pt_entry_t pte, unsigned long next_pt)
+static unsigned long npt_iommu_get_phys_l2(pt_entry_t pte, unsigned long virt)
{
- /* See APMv2, Section 15.25.5 */
- *pte = (next_pt & 0x000ffffffffff000UL) |
- (PAGE_DEFAULT_FLAGS | PAGE_FLAG_US);
+ if (*pte & AMD_IOMMU_PTE_PG_MODE_MASK)
+ return INVALID_PHYS_ADDR;
+ return (*pte & BIT_MASK(51, 21)) | (virt & BIT_MASK(20, 0));
}
int vcpu_vendor_init(void)
{
struct paging_structures parking_pt;
unsigned long vm_cr;
- int err, n;
+ int err;
err = svm_check_features();
if (err)
/* SVM disabled in BIOS */
return trace_error(-EPERM);
- /* Nested paging is the same as the native one */
- memcpy(npt_paging, x86_64_paging, sizeof(npt_paging));
- for (n = 0; n < NPT_PAGE_DIR_LEVELS; n++)
- npt_paging[n].set_next_pt = npt_set_next_pt;
+ /*
+ * Nested paging is almost the same as the native one. However, we
+ * need to override some handlers in order to reuse the page table for
+ * the IOMMU as well.
+ */
+ memcpy(npt_iommu_paging, x86_64_paging, sizeof(npt_iommu_paging));
+ npt_iommu_paging[0].set_next_pt = npt_iommu_set_next_pt_l4;
+ npt_iommu_paging[1].set_next_pt = npt_iommu_set_next_pt_l3;
+ npt_iommu_paging[2].set_next_pt = npt_iommu_set_next_pt_l2;
+ npt_iommu_paging[1].get_phys = npt_iommu_get_phys_l3;
+ npt_iommu_paging[2].get_phys = npt_iommu_get_phys_l2;
/* Map guest parking code (shared between cells and CPUs) */
- parking_pt.root_paging = npt_paging;
+ parking_pt.root_paging = npt_iommu_paging;
parking_pt.root_table = parked_mode_npt = page_alloc(&mem_pool, 1);
if (!parked_mode_npt)
return -ENOMEM;
int err = -ENOMEM;
u64 flags;
- /* allocate iopm (two 4-K pages + 3 bits) */
- cell->svm.iopm = page_alloc(&mem_pool, 3);
- if (!cell->svm.iopm)
+ /* allocate iopm */
+ cell->arch.svm.iopm = page_alloc(&mem_pool, IOPM_PAGES);
+ if (!cell->arch.svm.iopm)
return err;
/* build root NPT of cell */
- cell->svm.npt_structs.root_paging = npt_paging;
- cell->svm.npt_structs.root_table = page_alloc(&mem_pool, 1);
- if (!cell->svm.npt_structs.root_table)
- goto err_free_iopm;
+ cell->arch.svm.npt_iommu_structs.root_paging = npt_iommu_paging;
+ cell->arch.svm.npt_iommu_structs.root_table =
+ (page_table_t)cell->arch.root_table_page;
if (!has_avic) {
/*
* Map xAPIC as is; reads are passed, writes are trapped.
*/
flags = PAGE_READONLY_FLAGS | PAGE_FLAG_US | PAGE_FLAG_DEVICE;
- err = paging_create(&cell->svm.npt_structs, XAPIC_BASE,
- PAGE_SIZE, XAPIC_BASE,
- flags,
- PAGING_NON_COHERENT);
+ err = paging_create(&cell->arch.svm.npt_iommu_structs,
+ XAPIC_BASE, PAGE_SIZE, XAPIC_BASE,
+ flags, PAGING_NON_COHERENT);
} else {
flags = PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE;
- err = paging_create(&cell->svm.npt_structs,
+ err = paging_create(&cell->arch.svm.npt_iommu_structs,
paging_hvirt2phys(avic_page),
PAGE_SIZE, XAPIC_BASE,
- flags,
- PAGING_NON_COHERENT);
+ flags, PAGING_NON_COHERENT);
}
if (err)
- goto err_free_root_table;
+ goto err_free_iopm;
return 0;
-err_free_root_table:
- page_free(&mem_pool, cell->svm.npt_structs.root_table, 1);
err_free_iopm:
- page_free(&mem_pool, cell->svm.iopm, 3);
+ page_free(&mem_pool, cell->arch.svm.iopm, 3);
return err;
}
if (mem->flags & JAILHOUSE_MEM_COMM_REGION)
phys_start = paging_hvirt2phys(&cell->comm_page);
- return paging_create(&cell->svm.npt_structs, phys_start, mem->size,
- mem->virt_start, flags, PAGING_NON_COHERENT);
+ flags |= amd_iommu_get_memory_region_flags(mem);
+
+ /*
+ * As we also manipulate the IOMMU page table, changes need to be
+ * coherent.
+ */
+ return paging_create(&cell->arch.svm.npt_iommu_structs, phys_start,
+ mem->size, mem->virt_start, flags,
+ PAGING_COHERENT);
}
int vcpu_unmap_memory_region(struct cell *cell,
const struct jailhouse_memory *mem)
{
- return paging_destroy(&cell->svm.npt_structs, mem->virt_start,
- mem->size, PAGING_NON_COHERENT);
+ return paging_destroy(&cell->arch.svm.npt_iommu_structs,
+ mem->virt_start, mem->size, PAGING_COHERENT);
}
void vcpu_vendor_cell_exit(struct cell *cell)
{
- paging_destroy(&cell->svm.npt_structs, XAPIC_BASE, PAGE_SIZE,
- PAGING_NON_COHERENT);
- page_free(&mem_pool, cell->svm.npt_structs.root_table, 1);
- page_free(&mem_pool, cell->svm.iopm, 3);
+ paging_destroy(&cell->arch.svm.npt_iommu_structs, XAPIC_BASE,
+ PAGE_SIZE, PAGING_NON_COHERENT);
+ page_free(&mem_pool, cell->arch.svm.iopm, 3);
}
int vcpu_init(struct per_cpu *cpu_data)
asm volatile("str %0" : "=m" (cpu_data->linux_tss.selector));
cpu_data->linux_efer = vmcb->efer & (~EFER_SVME);
- cpu_data->linux_fs.base = vmcb->fs.base;
+ cpu_data->linux_fs.base = read_msr(MSR_FS_BASE);
cpu_data->linux_gs.base = vmcb->gs.base;
cpu_data->linux_ds.selector = vmcb->ds.selector;
__builtin_unreachable();
}
-static void svm_vcpu_reset(struct per_cpu *cpu_data, unsigned int sipi_vector)
+void vcpu_vendor_reset(unsigned int sipi_vector)
{
static const struct svm_segment dataseg_reset_state = {
.selector = 0,
.limit = 0xffff,
.access_rights = 0,
};
+ struct per_cpu *cpu_data = this_cpu_data();
struct vmcb *vmcb = &cpu_data->vmcb;
unsigned long val;
vmcb->dr7 = 0x00000400;
+ vmcb->eventinj = 0;
+
/* Almost all of the guest state changed */
vmcb->clean_bits = 0;
if (vmcb->efer & EFER_LMA) {
pg_structs->root_paging = x86_64_paging;
- pg_structs->root_table_gphys =
- vmcb->cr3 & 0x000ffffffffff000UL;
+ pg_structs->root_table_gphys = vmcb->cr3 & BIT_MASK(51, 12);
} else if ((vmcb->cr0 & X86_CR0_PG) &&
!(vmcb->cr4 & X86_CR4_PAE)) {
pg_structs->root_paging = i386_paging;
- pg_structs->root_table_gphys =
- vmcb->cr3 & 0xfffff000UL;
+ pg_structs->root_table_gphys = vmcb->cr3 & BIT_MASK(31, 12);
} else if (!(vmcb->cr0 & X86_CR0_PG)) {
/*
* Can be in non-paged protected mode as well, but
{
struct vmcb *vmcb = &cpu_data->vmcb;
bool res = false;
- int sipi_vector;
vmcb->gs.base = read_msr(MSR_GS_BASE);
cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_MANAGEMENT]++;
/* Temporarily enable GIF to consume pending NMI */
asm volatile("stgi; clgi" : : : "memory");
- sipi_vector = x86_handle_events(cpu_data);
- if (sipi_vector >= 0) {
- printk("CPU %d received SIPI, vector %x\n",
- cpu_data->cpu_id, sipi_vector);
- svm_vcpu_reset(cpu_data, sipi_vector);
- vcpu_reset(sipi_vector == APIC_BSP_PSEUDO_SIPI);
- }
- iommu_check_pending_faults();
+ x86_check_events();
goto vmentry;
case VMEXIT_VMMCALL:
vcpu_handle_hypercall();
if (vcpu_handle_mmio_access())
goto vmentry;
}
-
- panic_printk("FATAL: Unhandled Nested Page Fault for (%p), "
- "error code is %x\n", vmcb->exitinfo2,
- vmcb->exitinfo1 & 0xf);
break;
case VMEXIT_XSETBV:
if (vcpu_handle_xsetbv())
if (vcpu_handle_io_access())
goto vmentry;
break;
+ case VMEXIT_EXCEPTION_DB:
+ case VMEXIT_EXCEPTION_AC:
+ cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_EXCEPTION]++;
+ /* Reinject exception, including error code if needed. */
+ vmcb->eventinj = (vmcb->exitcode - VMEXIT_EXCEPTION_DE) |
+ SVM_EVENTINJ_EXCEPTION | SVM_EVENTINJ_VALID;
+ if (vmcb->exitcode == VMEXIT_EXCEPTION_AC) {
+ vmcb->eventinj |= SVM_EVENTINJ_ERR_VALID;
+ vmcb->eventinj_err = vmcb->exitinfo1;
+ }
+ x86_check_events();
+ goto vmentry;
/* TODO: Handle VMEXIT_AVIC_NOACCEL and VMEXIT_AVIC_INCOMPLETE_IPI */
default:
panic_printk("FATAL: Unexpected #VMEXIT, exitcode %x, "
void vcpu_park(void)
{
- svm_vcpu_reset(this_cpu_data(), APIC_BSP_PSEUDO_SIPI);
- /* No need to clear VMCB Clean bit: vcpu_reset() already does this */
+ vcpu_vendor_reset(APIC_BSP_PSEUDO_SIPI);
+ /* No need to clear VMCB Clean bit: vcpu_vendor_reset() already does
+ * this. */
this_cpu_data()->vmcb.n_cr3 = paging_hvirt2phys(parked_mode_npt);
vcpu_tlb_flush();
void vcpu_vendor_get_cell_io_bitmap(struct cell *cell,
struct vcpu_io_bitmap *iobm)
{
- iobm->data = cell->svm.iopm;
- iobm->size = sizeof(cell->svm.iopm);
+ iobm->data = cell->arch.svm.iopm;
+ iobm->size = IOPM_PAGES * PAGE_SIZE;
}
void vcpu_vendor_get_execution_state(struct vcpu_execution_state *x_state)