2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2013-2015
5 * Copyright (c) Valentine Sinitsyn, 2014
8 * Jan Kiszka <jan.kiszka@siemens.com>
9 * Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
15 #include <jailhouse/control.h>
16 #include <jailhouse/mmio.h>
17 #include <jailhouse/paging.h>
18 #include <jailhouse/pci.h>
19 #include <jailhouse/printk.h>
20 #include <jailhouse/string.h>
23 #include <asm/iommu.h>
24 #include <asm/bitops.h>
25 #include <asm/ioapic.h>
26 #include <asm/spinlock.h>
29 struct vtd_irte_usage {
33 } __attribute__((packed));
35 struct vtd_emulation {
37 unsigned int irt_entries;
38 struct vtd_irte_usage *irte_map;
48 static const struct vtd_entry inv_global_context = {
49 .lo_word = VTD_REQ_INV_CONTEXT | VTD_INV_CONTEXT_GLOBAL,
51 static const struct vtd_entry inv_global_iotlb = {
52 .lo_word = VTD_REQ_INV_IOTLB | VTD_INV_IOTLB_GLOBAL |
53 VTD_INV_IOTLB_DW | VTD_INV_IOTLB_DR,
55 static const struct vtd_entry inv_global_int = {
56 .lo_word = VTD_REQ_INV_INT | VTD_INV_INT_GLOBAL,
59 /* TODO: Support multiple segments */
60 static struct vtd_entry __attribute__((aligned(PAGE_SIZE)))
61 root_entry_table[256];
62 static union vtd_irte *int_remap_table;
63 static unsigned int int_remap_table_size_log2;
64 static struct paging vtd_paging[VTD_MAX_PAGE_TABLE_LEVELS];
65 static void *dmar_reg_base;
66 static void *unit_inv_queue;
67 static unsigned int dmar_units;
68 static unsigned int dmar_pt_levels;
69 static unsigned int dmar_num_did = ~0U;
70 static unsigned int fault_reporting_cpu_id;
71 static DEFINE_SPINLOCK(inv_queue_lock);
72 static struct vtd_emulation root_cell_units[JAILHOUSE_MAX_IOMMU_UNITS];
73 static bool dmar_units_initialized;
75 unsigned int iommu_mmio_count_regions(struct cell *cell)
77 return cell == &root_cell ? iommu_count_units() : 0;
80 static unsigned int inv_queue_write(void *inv_queue, unsigned int index,
81 struct vtd_entry content)
83 struct vtd_entry *entry = inv_queue;
85 entry[index] = content;
86 arch_paging_flush_cpu_caches(&entry[index], sizeof(*entry));
88 return (index + 1) % (PAGE_SIZE / sizeof(*entry));
91 static void vtd_submit_iq_request(void *reg_base, void *inv_queue,
92 const struct vtd_entry *inv_request)
94 volatile u32 completed = 0;
95 struct vtd_entry inv_wait = {
96 .lo_word = VTD_REQ_INV_WAIT | VTD_INV_WAIT_SW |
97 VTD_INV_WAIT_FN | (1UL << VTD_INV_WAIT_SDATA_SHIFT),
98 .hi_word = paging_hvirt2phys(&completed),
102 spin_lock(&inv_queue_lock);
104 index = mmio_read64_field(reg_base + VTD_IQT_REG, VTD_IQT_QT_MASK);
107 index = inv_queue_write(inv_queue, index, *inv_request);
108 index = inv_queue_write(inv_queue, index, inv_wait);
110 mmio_write64_field(reg_base + VTD_IQT_REG, VTD_IQT_QT_MASK, index);
115 spin_unlock(&inv_queue_lock);
118 static void vtd_flush_domain_caches(unsigned int did)
120 const struct vtd_entry inv_context = {
121 .lo_word = VTD_REQ_INV_CONTEXT | VTD_INV_CONTEXT_DOMAIN |
122 (did << VTD_INV_CONTEXT_DOMAIN_SHIFT),
124 const struct vtd_entry inv_iotlb = {
125 .lo_word = VTD_REQ_INV_IOTLB | VTD_INV_IOTLB_DOMAIN |
126 VTD_INV_IOTLB_DW | VTD_INV_IOTLB_DR |
127 (did << VTD_INV_IOTLB_DOMAIN_SHIFT),
129 void *inv_queue = unit_inv_queue;
130 void *reg_base = dmar_reg_base;
133 for (n = 0; n < dmar_units; n++) {
134 vtd_submit_iq_request(reg_base, inv_queue, &inv_context);
135 vtd_submit_iq_request(reg_base, inv_queue, &inv_iotlb);
136 reg_base += PAGE_SIZE;
137 inv_queue += PAGE_SIZE;
141 static void vtd_update_gcmd_reg(void *reg_base, u32 mask, unsigned int set)
143 u32 val = mmio_read32(reg_base + VTD_GSTS_REG) & VTD_GSTS_USED_CTRLS;
149 mmio_write32(reg_base + VTD_GCMD_REG, val);
151 /* Note: This test is built on the fact related bits are at the same
152 * position in VTD_GCMD_REG and VTD_GSTS_REG. */
153 while ((mmio_read32(reg_base + VTD_GSTS_REG) & mask) != (val & mask))
157 static void vtd_set_next_pt(pt_entry_t pte, unsigned long next_pt)
159 *pte = (next_pt & 0x000ffffffffff000UL) | VTD_PAGE_READ |
163 static void vtd_init_fault_nmi(void)
165 union x86_msi_vector msi = { .native.address = MSI_ADDRESS_VALUE };
166 void *reg_base = dmar_reg_base;
167 struct per_cpu *cpu_data;
170 /* This assumes that at least one bit is set somewhere because we
171 * don't support configurations where Linux is left with no CPUs. */
172 for (n = 0; root_cell.cpu_set->bitmap[n] == 0; n++)
174 cpu_data = per_cpu(ffsl(root_cell.cpu_set->bitmap[n]));
176 /* We only support 8-bit APIC IDs. */
177 msi.native.destination = (u8)cpu_data->apic_id;
179 /* Save this value globally to avoid multiple reports of the same
180 * case from different CPUs */
181 fault_reporting_cpu_id = cpu_data->cpu_id;
183 for (n = 0; n < dmar_units; n++, reg_base += PAGE_SIZE) {
185 mmio_write32_field(reg_base + VTD_FECTL_REG, VTD_FECTL_IM, 1);
188 * VT-d spec rev. 2.3 section 7.4 suggests that only reading
189 * back FSTS or FECTL ensures no interrupt messages are still
190 * in-flight when we change their destination below.
192 mmio_read32(reg_base + VTD_FECTL_REG);
194 /* Program MSI message to send NMIs to the target CPU */
195 mmio_write32(reg_base + VTD_FEDATA_REG, MSI_DM_NMI);
196 mmio_write32(reg_base + VTD_FEADDR_REG, (u32)msi.raw.address);
197 mmio_write32(reg_base + VTD_FEUADDR_REG, 0);
200 mmio_write32_field(reg_base + VTD_FECTL_REG, VTD_FECTL_IM, 0);
204 * There is a race window between setting the new reporting CPU ID and
205 * updating the target programming in the register. If a fault hits us
206 * in this window and no other NMIs arrive after that, the event will
207 * not be reported. Address this by triggering an NMI on the new
210 apic_send_nmi_ipi(cpu_data);
213 static void *vtd_get_fault_rec_reg_addr(void *reg_base)
215 return reg_base + 16 *
216 mmio_read64_field(reg_base + VTD_CAP_REG, VTD_CAP_FRO_MASK);
219 static void vtd_print_fault_record_reg_status(unsigned int unit_no,
222 unsigned int sid = mmio_read64_field(reg_base + VTD_FRCD_HI_REG,
223 VTD_FRCD_HI_SID_MASK);
224 unsigned int fr = mmio_read64_field(reg_base + VTD_FRCD_HI_REG,
225 VTD_FRCD_HI_FR_MASK);
226 unsigned long fi = mmio_read64_field(reg_base + VTD_FRCD_LO_REG,
227 VTD_FRCD_LO_FI_MASK);
228 unsigned int type = mmio_read64_field(reg_base + VTD_FRCD_HI_REG,
231 printk("VT-d fault event reported by IOMMU %d:\n", unit_no);
232 printk(" Source Identifier (bus:dev.func): %02x:%02x.%x\n",
233 PCI_BDF_PARAMS(sid));
234 printk(" Fault Reason: 0x%x Fault Info: %lx Type %d\n", fr, fi, type);
237 void iommu_check_pending_faults(void)
239 unsigned int fr_index;
240 void *reg_base = dmar_reg_base;
242 void *fault_reg_addr, *rec_reg_addr;
244 if (this_cpu_id() != fault_reporting_cpu_id)
247 for (n = 0; n < dmar_units; n++, reg_base += PAGE_SIZE)
248 if (mmio_read32_field(reg_base + VTD_FSTS_REG, VTD_FSTS_PPF)) {
249 fr_index = mmio_read32_field(reg_base + VTD_FSTS_REG,
251 fault_reg_addr = vtd_get_fault_rec_reg_addr(reg_base);
252 rec_reg_addr = fault_reg_addr + 16 * fr_index;
253 vtd_print_fault_record_reg_status(n, rec_reg_addr);
255 /* Clear faults in record registers */
256 mmio_write64_field(rec_reg_addr + VTD_FRCD_HI_REG,
257 VTD_FRCD_HI_F, VTD_FRCD_HI_F_CLEAR);
261 static int vtd_emulate_inv_int(unsigned int unit_no, unsigned int index)
263 struct vtd_irte_usage *irte_usage;
264 struct apic_irq_message irq_msg;
265 struct pci_device *device;
267 if (index >= root_cell_units[unit_no].irt_entries)
269 irte_usage = &root_cell_units[unit_no].irte_map[index];
270 if (!irte_usage->used)
273 device = pci_get_assigned_device(&root_cell, irte_usage->device_id);
274 if (device && device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
275 return pci_ivshmem_update_msix(device);
277 irq_msg = iommu_get_remapped_root_int(unit_no, irte_usage->device_id,
278 irte_usage->vector, index);
279 return iommu_map_interrupt(&root_cell, irte_usage->device_id,
280 irte_usage->vector, irq_msg);
283 static int vtd_emulate_qi_request(unsigned int unit_no,
284 struct vtd_entry inv_desc)
286 unsigned int start, count, n;
290 switch (inv_desc.lo_word & VTD_REQ_INV_MASK) {
291 case VTD_REQ_INV_INT:
292 if (inv_desc.lo_word & VTD_INV_INT_INDEX) {
293 start = (inv_desc.lo_word & VTD_INV_INT_IIDX_MASK) >>
294 VTD_INV_INT_IIDX_SHIFT;
296 1 << ((inv_desc.lo_word & VTD_INV_INT_IM_MASK) >>
297 VTD_INV_INT_IM_SHIFT);
300 count = root_cell_units[unit_no].irt_entries;
302 for (n = start; n < start + count; n++) {
303 result = vtd_emulate_inv_int(unit_no, n);
308 case VTD_REQ_INV_WAIT:
309 if (inv_desc.lo_word & VTD_INV_WAIT_IF ||
310 !(inv_desc.lo_word & VTD_INV_WAIT_SW))
313 status_page = paging_get_guest_pages(NULL, inv_desc.hi_word, 1,
318 *(u32 *)(status_page + (inv_desc.hi_word & ~PAGE_MASK)) =
319 inv_desc.lo_word >> 32;
326 static enum mmio_result vtd_unit_access_handler(void *arg,
327 struct mmio_access *mmio)
329 struct vtd_emulation *unit = arg;
330 unsigned int unit_no = unit - root_cell_units;
331 struct vtd_entry inv_desc;
334 if (mmio->address == VTD_FSTS_REG && !mmio->is_write) {
336 * Nothing to report this way, vtd_check_pending_faults takes
337 * care for the whole system.
342 if (mmio->address == VTD_IQT_REG && mmio->is_write) {
343 while (unit->iqh != (mmio->value & ~PAGE_MASK)) {
345 paging_get_guest_pages(NULL, unit->iqa, 1,
346 PAGE_READONLY_FLAGS);
348 goto invalid_iq_entry;
351 *(struct vtd_entry *)(inv_desc_page + unit->iqh);
353 if (vtd_emulate_qi_request(unit_no, inv_desc) != 0)
354 goto invalid_iq_entry;
356 unit->iqh += 1 << VTD_IQH_QH_SHIFT;
357 unit->iqh &= ~PAGE_MASK;
361 panic_printk("FATAL: Unhandled DMAR unit %s access, register %02x\n",
362 mmio->is_write ? "write" : "read", mmio->address);
366 panic_printk("FATAL: Invalid/unsupported invalidation queue entry\n");
370 static void vtd_init_unit(void *reg_base, void *inv_queue)
372 void *fault_reg_base;
375 /* Disabled QI and IR in case it was already on */
376 vtd_update_gcmd_reg(reg_base, VTD_GCMD_QIE, 0);
377 vtd_update_gcmd_reg(reg_base, VTD_GCMD_IRE, 0);
379 nfr = mmio_read64_field(reg_base + VTD_CAP_REG, VTD_CAP_NFR_MASK);
380 fault_reg_base = vtd_get_fault_rec_reg_addr(reg_base);
382 for (n = 0; n < nfr; n++)
383 /* Clear fault recording register status */
384 mmio_write64_field(fault_reg_base + 16 * n + VTD_FRCD_HI_REG,
385 VTD_FRCD_HI_F, VTD_FRCD_HI_F_CLEAR);
387 /* Clear fault overflow status */
388 mmio_write32_field(reg_base + VTD_FSTS_REG, VTD_FSTS_PFO,
391 /* Set root entry table pointer */
392 mmio_write64(reg_base + VTD_RTADDR_REG,
393 paging_hvirt2phys(root_entry_table));
394 vtd_update_gcmd_reg(reg_base, VTD_GCMD_SRTP, 1);
396 /* Set interrupt remapping table pointer */
397 mmio_write64(reg_base + VTD_IRTA_REG,
398 paging_hvirt2phys(int_remap_table) |
399 (using_x2apic ? VTD_IRTA_EIME : 0) |
400 (int_remap_table_size_log2 - 1));
401 vtd_update_gcmd_reg(reg_base, VTD_GCMD_SIRTP, 1);
403 /* Setup and activate invalidation queue */
404 mmio_write64(reg_base + VTD_IQT_REG, 0);
405 mmio_write64(reg_base + VTD_IQA_REG, paging_hvirt2phys(inv_queue));
406 vtd_update_gcmd_reg(reg_base, VTD_GCMD_QIE, 1);
408 vtd_submit_iq_request(reg_base, inv_queue, &inv_global_context);
409 vtd_submit_iq_request(reg_base, inv_queue, &inv_global_iotlb);
410 vtd_submit_iq_request(reg_base, inv_queue, &inv_global_int);
412 vtd_update_gcmd_reg(reg_base, VTD_GCMD_TE, 1);
413 vtd_update_gcmd_reg(reg_base, VTD_GCMD_IRE, 1);
416 static int vtd_init_ir_emulation(unsigned int unit_no, void *reg_base)
418 struct vtd_emulation *unit = &root_cell_units[unit_no];
419 unsigned long base, size;
422 root_cell.arch.vtd.ir_emulation = true;
424 base = system_config->platform_info.x86.iommu_units[unit_no].base;
425 mmio_region_register(&root_cell, base, PAGE_SIZE,
426 vtd_unit_access_handler, unit);
428 unit->irta = mmio_read64(reg_base + VTD_IRTA_REG);
429 unit->irt_entries = 2 << (unit->irta & VTD_IRTA_SIZE_MASK);
431 size = PAGE_ALIGN(sizeof(struct vtd_irte_usage) * unit->irt_entries);
432 unit->irte_map = page_alloc(&mem_pool, size / PAGE_SIZE);
436 iqt = mmio_read64(reg_base + VTD_IQT_REG);
437 while (mmio_read64(reg_base + VTD_IQH_REG) != iqt)
441 unit->iqa = mmio_read64(reg_base + VTD_IQA_REG);
442 if (unit->iqa & ~VTD_IQA_ADDR_MASK)
443 return trace_error(-EIO);
445 unit->fectl = mmio_read32(reg_base + VTD_FECTL_REG);
446 unit->fedata = mmio_read32(reg_base + VTD_FEDATA_REG);
447 unit->feaddr = mmio_read32(reg_base + VTD_FEADDR_REG);
448 unit->feuaddr = mmio_read32(reg_base + VTD_FEUADDR_REG);
455 unsigned long version, caps, ecaps, ctrls, sllps_caps = ~0UL;
456 unsigned int units, pt_levels, num_did, n;
457 struct jailhouse_iommu *unit;
461 /* n = roundup(log2(system_config->interrupt_limit)) */
462 for (n = 0; (1UL << n) < (system_config->interrupt_limit); n++)
465 return trace_error(-EINVAL);
468 page_alloc(&mem_pool, PAGES(sizeof(union vtd_irte) << n));
469 if (!int_remap_table)
472 int_remap_table_size_log2 = n;
474 units = iommu_count_units();
476 return trace_error(-EINVAL);
478 dmar_reg_base = page_alloc(&remap_pool, units);
480 return trace_error(-ENOMEM);
482 unit_inv_queue = page_alloc(&mem_pool, units);
486 for (n = 0; n < units; n++) {
487 unit = &system_config->platform_info.x86.iommu_units[n];
489 reg_base = dmar_reg_base + n * PAGE_SIZE;
491 err = paging_create(&hv_paging_structs, unit->base, PAGE_SIZE,
492 (unsigned long)reg_base,
493 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
494 PAGING_NON_COHERENT);
498 version = mmio_read64(reg_base + VTD_VER_REG) & VTD_VER_MASK;
499 if (version < VTD_VER_MIN || version == 0xff) {
502 printk("WARNING: No VT-d support found!\n");
506 printk("Found DMAR @%p\n", unit->base);
508 caps = mmio_read64(reg_base + VTD_CAP_REG);
509 if (caps & VTD_CAP_SAGAW39)
511 else if (caps & VTD_CAP_SAGAW48)
514 return trace_error(-EIO);
517 if (dmar_pt_levels > 0 && dmar_pt_levels != pt_levels)
518 return trace_error(-EIO);
519 dmar_pt_levels = pt_levels;
521 if (caps & VTD_CAP_CM)
522 return trace_error(-EIO);
524 ecaps = mmio_read64(reg_base + VTD_ECAP_REG);
525 if (!(ecaps & VTD_ECAP_QI) || !(ecaps & VTD_ECAP_IR) ||
526 (using_x2apic && !(ecaps & VTD_ECAP_EIM)))
527 return trace_error(-EIO);
529 ctrls = mmio_read32(reg_base + VTD_GSTS_REG) &
532 if (ctrls != (VTD_GSTS_IRES | VTD_GSTS_QIES))
533 return trace_error(-EBUSY);
534 err = vtd_init_ir_emulation(n, reg_base);
537 } else if (root_cell.arch.vtd.ir_emulation) {
538 /* IR+QI must be either on or off in all units */
539 return trace_error(-EIO);
542 num_did = 1 << (4 + (caps & VTD_CAP_NUM_DID_MASK) * 2);
543 if (num_did < dmar_num_did)
544 dmar_num_did = num_did;
550 * Derive vdt_paging from very similar x86_64_paging,
551 * replicating 0..3 for 4 levels and 1..3 for 3 levels.
553 memcpy(vtd_paging, &x86_64_paging[4 - dmar_pt_levels],
554 sizeof(struct paging) * dmar_pt_levels);
555 for (n = 0; n < dmar_pt_levels; n++)
556 vtd_paging[n].set_next_pt = vtd_set_next_pt;
557 if (!(sllps_caps & VTD_CAP_SLLPS1G))
558 vtd_paging[dmar_pt_levels - 3].page_size = 0;
559 if (!(sllps_caps & VTD_CAP_SLLPS2M))
560 vtd_paging[dmar_pt_levels - 2].page_size = 0;
562 return iommu_cell_init(&root_cell);
565 static void vtd_update_irte(unsigned int index, union vtd_irte content)
567 const struct vtd_entry inv_int = {
568 .lo_word = VTD_REQ_INV_INT | VTD_INV_INT_INDEX |
569 ((u64)index << VTD_INV_INT_IIDX_SHIFT),
571 union vtd_irte *irte = &int_remap_table[index];
572 void *inv_queue = unit_inv_queue;
573 void *reg_base = dmar_reg_base;
576 if (content.field.p) {
578 * Write upper half first to preserve non-presence.
579 * If the entry was present before, we are only modifying the
580 * lower half's content (destination etc.), so writing the
581 * upper half becomes a nop and is safely done first.
583 irte->raw[1] = content.raw[1];
585 irte->raw[0] = content.raw[0];
588 * Write only lower half - we are clearing presence and
591 irte->raw[0] = content.raw[0];
593 arch_paging_flush_cpu_caches(irte, sizeof(*irte));
595 for (n = 0; n < dmar_units; n++) {
596 vtd_submit_iq_request(reg_base, inv_queue, &inv_int);
597 reg_base += PAGE_SIZE;
598 inv_queue += PAGE_SIZE;
602 static int vtd_find_int_remap_region(u16 device_id)
606 /* interrupt_limit is < 2^16, see vtd_init */
607 for (n = 0; n < system_config->interrupt_limit; n++)
608 if (int_remap_table[n].field.assigned &&
609 int_remap_table[n].field.sid == device_id)
615 static int vtd_reserve_int_remap_region(u16 device_id, unsigned int length)
617 int n, start = -E2BIG;
619 if (length == 0 || vtd_find_int_remap_region(device_id) >= 0)
622 for (n = 0; n < system_config->interrupt_limit; n++) {
623 if (int_remap_table[n].field.assigned) {
629 if (n + 1 == start + length) {
630 printk("Reserving %u interrupt(s) for device %04x "
631 "at index %d\n", length, device_id, start);
632 for (n = start; n < start + length; n++) {
633 int_remap_table[n].field.assigned = 1;
634 int_remap_table[n].field.sid = device_id;
639 return trace_error(-E2BIG);
642 static void vtd_free_int_remap_region(u16 device_id, unsigned int length)
644 union vtd_irte free_irte = { .field.p = 0, .field.assigned = 0 };
645 int pos = vtd_find_int_remap_region(device_id);
648 printk("Freeing %u interrupt(s) for device %04x at index %d\n",
649 length, device_id, pos);
651 vtd_update_irte(pos++, free_irte);
655 int iommu_add_pci_device(struct cell *cell, struct pci_device *device)
657 unsigned int max_vectors = MAX(device->info->num_msi_vectors,
658 device->info->num_msix_vectors);
659 u16 bdf = device->info->bdf;
660 u64 *root_entry_lo = &root_entry_table[PCI_BUS(bdf)].lo_word;
661 struct vtd_entry *context_entry_table, *context_entry;
668 result = vtd_reserve_int_remap_region(bdf, max_vectors);
672 if (*root_entry_lo & VTD_ROOT_PRESENT) {
673 context_entry_table =
674 paging_phys2hvirt(*root_entry_lo & PAGE_MASK);
676 context_entry_table = page_alloc(&mem_pool, 1);
677 if (!context_entry_table)
679 *root_entry_lo = VTD_ROOT_PRESENT |
680 paging_hvirt2phys(context_entry_table);
681 arch_paging_flush_cpu_caches(root_entry_lo, sizeof(u64));
684 context_entry = &context_entry_table[PCI_DEVFN(bdf)];
685 context_entry->lo_word = VTD_CTX_PRESENT | VTD_CTX_TTYPE_MLP_UNTRANS |
686 paging_hvirt2phys(cell->arch.vtd.pg_structs.root_table);
687 context_entry->hi_word =
688 (dmar_pt_levels == 3 ? VTD_CTX_AGAW_39 : VTD_CTX_AGAW_48) |
689 (cell->id << VTD_CTX_DID_SHIFT);
690 arch_paging_flush_cpu_caches(context_entry, sizeof(*context_entry));
695 vtd_free_int_remap_region(bdf, max_vectors);
699 void iommu_remove_pci_device(struct pci_device *device)
701 u16 bdf = device->info->bdf;
702 u64 *root_entry_lo = &root_entry_table[PCI_BUS(bdf)].lo_word;
703 struct vtd_entry *context_entry_table;
704 struct vtd_entry *context_entry;
711 vtd_free_int_remap_region(bdf, MAX(device->info->num_msi_vectors,
712 device->info->num_msix_vectors));
714 context_entry_table = paging_phys2hvirt(*root_entry_lo & PAGE_MASK);
715 context_entry = &context_entry_table[PCI_DEVFN(bdf)];
717 context_entry->lo_word &= ~VTD_CTX_PRESENT;
718 arch_paging_flush_cpu_caches(&context_entry->lo_word, sizeof(u64));
720 for (n = 0; n < 256; n++)
721 if (context_entry_table[n].lo_word & VTD_CTX_PRESENT)
724 *root_entry_lo &= ~VTD_ROOT_PRESENT;
725 arch_paging_flush_cpu_caches(root_entry_lo, sizeof(u64));
726 page_free(&mem_pool, context_entry_table, 1);
729 int iommu_cell_init(struct cell *cell)
731 const struct jailhouse_irqchip *irqchip =
732 jailhouse_cell_irqchips(cell->config);
740 if (cell->id >= dmar_num_did)
741 return trace_error(-ERANGE);
743 cell->arch.vtd.pg_structs.root_paging = vtd_paging;
744 cell->arch.vtd.pg_structs.root_table = page_alloc(&mem_pool, 1);
745 if (!cell->arch.vtd.pg_structs.root_table)
748 /* reserve regions for IRQ chips (if not done already) */
749 for (n = 0; n < cell->config->num_irqchips; n++, irqchip++) {
750 result = vtd_reserve_int_remap_region(irqchip->id,
753 iommu_cell_exit(cell);
761 int iommu_map_memory_region(struct cell *cell,
762 const struct jailhouse_memory *mem)
770 if (!(mem->flags & JAILHOUSE_MEM_DMA))
773 if (mem->flags & JAILHOUSE_MEM_READ)
774 flags |= VTD_PAGE_READ;
775 if (mem->flags & JAILHOUSE_MEM_WRITE)
776 flags |= VTD_PAGE_WRITE;
778 return paging_create(&cell->arch.vtd.pg_structs, mem->phys_start,
779 mem->size, mem->virt_start, flags,
783 int iommu_unmap_memory_region(struct cell *cell,
784 const struct jailhouse_memory *mem)
790 if (!(mem->flags & JAILHOUSE_MEM_DMA))
793 return paging_destroy(&cell->arch.vtd.pg_structs, mem->virt_start,
794 mem->size, PAGING_COHERENT);
797 struct apic_irq_message
798 iommu_get_remapped_root_int(unsigned int iommu, u16 device_id,
799 unsigned int vector, unsigned int remap_index)
801 struct vtd_emulation *unit = &root_cell_units[iommu];
802 struct apic_irq_message irq_msg = { .valid = 0 };
803 union vtd_irte root_irte;
804 unsigned long irte_addr;
807 if (remap_index >= unit->irt_entries)
809 unit->irte_map[remap_index].used = 0;
811 irte_addr = (unit->irta & VTD_IRTA_ADDR_MASK) +
812 remap_index * sizeof(union vtd_irte);
813 irte_page = paging_get_guest_pages(NULL, irte_addr, 1,
814 PAGE_READONLY_FLAGS);
818 root_irte = *(union vtd_irte *)(irte_page + (irte_addr & ~PAGE_MASK));
821 (root_irte.field.p && root_irte.field.sid == device_id);
822 irq_msg.vector = root_irte.field.vector;
823 irq_msg.delivery_mode = root_irte.field.delivery_mode;
824 irq_msg.dest_logical = root_irte.field.dest_logical;
825 irq_msg.level_triggered = root_irte.field.level_triggered;
826 irq_msg.redir_hint = root_irte.field.redir_hint;
827 irq_msg.destination = root_irte.field.destination;
829 /* xAPIC in flat mode: APIC ID in 47:40 (of 63:32) */
830 irq_msg.destination >>= 8;
832 unit->irte_map[remap_index].device_id = device_id;
833 unit->irte_map[remap_index].vector = vector;
834 unit->irte_map[remap_index].used = 1;
839 int iommu_map_interrupt(struct cell *cell, u16 device_id, unsigned int vector,
840 struct apic_irq_message irq_msg)
849 base_index = vtd_find_int_remap_region(device_id);
853 if (vector >= system_config->interrupt_limit ||
854 base_index >= system_config->interrupt_limit - vector)
857 irte = int_remap_table[base_index + vector];
858 if (!irte.field.assigned || irte.field.sid != device_id)
861 irte.field.p = irq_msg.valid;
864 * Do not validate non-present entries, they may contain
865 * invalid data and cause false-positives.
870 * Validate delivery mode and destination(s).
871 * Note that we do support redirection hint only in logical
874 if ((irq_msg.delivery_mode != APIC_MSG_DLVR_FIXED &&
875 irq_msg.delivery_mode != APIC_MSG_DLVR_LOWPRI) ||
876 irq_msg.dest_logical != irq_msg.redir_hint)
878 if (!apic_filter_irq_dest(cell, &irq_msg))
881 irte.field.dest_logical = irq_msg.dest_logical;
882 irte.field.redir_hint = irq_msg.redir_hint;
883 irte.field.level_triggered = irq_msg.level_triggered;
884 irte.field.delivery_mode = irq_msg.delivery_mode;
885 irte.field.vector = irq_msg.vector;
886 irte.field.destination = irq_msg.destination;
888 /* xAPIC in flat mode: APIC ID in 47:40 (of 63:32) */
889 irte.field.destination <<= 8;
890 irte.field.sq = VTD_IRTE_SQ_VERIFY_FULL_SID;
891 irte.field.svt = VTD_IRTE_SVT_VERIFY_SID_SQ;
894 vtd_update_irte(base_index + vector, irte);
896 return base_index + vector;
899 void iommu_cell_exit(struct cell *cell)
905 page_free(&mem_pool, cell->arch.vtd.pg_structs.root_table, 1);
908 * Note that reservation regions of IOAPICs won't be released because
909 * they might be shared with other cells
913 void iommu_config_commit(struct cell *cell_added_removed)
915 void *inv_queue = unit_inv_queue;
916 void *reg_base = dmar_reg_base;
923 if (cell_added_removed)
924 vtd_init_fault_nmi();
926 if (cell_added_removed == &root_cell) {
927 for (n = 0; n < dmar_units; n++) {
928 vtd_init_unit(reg_base, inv_queue);
929 reg_base += PAGE_SIZE;
930 inv_queue += PAGE_SIZE;
932 dmar_units_initialized = true;
934 if (cell_added_removed)
935 vtd_flush_domain_caches(cell_added_removed->id);
936 vtd_flush_domain_caches(root_cell.id);
940 static void vtd_restore_ir(unsigned int unit_no, void *reg_base)
942 struct vtd_emulation *unit = &root_cell_units[unit_no];
943 void *inv_queue = unit_inv_queue + unit_no * PAGE_SIZE;
944 void *root_inv_queue;
947 mmio_write64(reg_base + VTD_IRTA_REG, unit->irta);
948 vtd_update_gcmd_reg(reg_base, VTD_GCMD_SIRTP, 1);
949 vtd_submit_iq_request(reg_base, inv_queue, &inv_global_int);
951 vtd_update_gcmd_reg(reg_base, VTD_GCMD_QIE, 0);
952 mmio_write64(reg_base + VTD_IQT_REG, 0);
953 mmio_write64(reg_base + VTD_IQA_REG, unit->iqa);
954 vtd_update_gcmd_reg(reg_base, VTD_GCMD_QIE, 1);
957 * Restore invalidation queue head pointer by issuing dummy requests
958 * until the hardware is in sync with the Linux state again.
961 root_inv_queue = paging_get_guest_pages(NULL, unit->iqa, 1,
964 while (mmio_read64(reg_base + VTD_IQH_REG) != iqh)
965 vtd_submit_iq_request(reg_base, root_inv_queue, NULL);
967 printk("WARNING: Failed to restore invalidation queue head\n");
969 vtd_update_gcmd_reg(reg_base, VTD_GCMD_IRE, 1);
971 mmio_write32(reg_base + VTD_FEDATA_REG, unit->fedata);
972 mmio_write32(reg_base + VTD_FEADDR_REG, unit->feaddr);
973 mmio_write32(reg_base + VTD_FEUADDR_REG, unit->feuaddr);
974 mmio_write32(reg_base + VTD_FECTL_REG, unit->fectl);
977 void iommu_shutdown(void)
979 void *reg_base = dmar_reg_base;
982 if (dmar_units_initialized)
983 for (n = 0; n < dmar_units; n++, reg_base += PAGE_SIZE) {
984 vtd_update_gcmd_reg(reg_base, VTD_GCMD_TE, 0);
985 vtd_update_gcmd_reg(reg_base, VTD_GCMD_IRE, 0);
986 if (root_cell.arch.vtd.ir_emulation)
987 vtd_restore_ir(n, reg_base);
989 vtd_update_gcmd_reg(reg_base, VTD_GCMD_QIE, 0);
993 bool iommu_cell_emulates_ir(struct cell *cell)
995 return cell->arch.vtd.ir_emulation;