2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Valentine Sinitsyn, 2014, 2015
5 * Copyright (c) Siemens AG, 2016
8 * Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
9 * Jan Kiszka <jan.kiszka@siemens.com>
11 * Commands posting and event log parsing code, as well as many defines
12 * were adapted from Linux's amd_iommu driver written by Joerg Roedel
15 * This work is licensed under the terms of the GNU GPL, version 2. See
16 * the COPYING file in the top-level directory.
19 #include <jailhouse/cell.h>
20 #include <jailhouse/cell-config.h>
21 #include <jailhouse/control.h>
22 #include <jailhouse/mmio.h>
23 #include <jailhouse/pci.h>
24 #include <jailhouse/printk.h>
25 #include <jailhouse/string.h>
26 #include <asm/amd_iommu.h>
28 #include <asm/iommu.h>
30 #define CAPS_IOMMU_HEADER_REG 0x00
31 #define CAPS_IOMMU_EFR_SUP (1 << 27)
32 #define CAPS_IOMMU_BASE_LOW_REG 0x04
33 #define CAPS_IOMMU_ENABLE (1 << 0)
34 #define CAPS_IOMMU_BASE_HI_REG 0x08
36 #define ACPI_REPORTING_HE_SUP (1 << 7)
38 #define AMD_DEV_TABLE_BASE_REG 0x0000
39 #define AMD_CMD_BUF_BASE_REG 0x0008
40 #define AMD_EVT_LOG_BASE_REG 0x0010
41 #define AMD_CONTROL_REG 0x0018
42 #define AMD_CONTROL_IOMMU_EN (1UL << 0)
43 #define AMD_CONTROL_EVT_LOG_EN (1UL << 2)
44 #define AMD_CONTROL_EVT_INT_EN (1UL << 3)
45 #define AMD_CONTROL_COMM_WAIT_INT_EN (1UL << 4)
46 #define AMD_CONTROL_CMD_BUF_EN (1UL << 12)
47 #define AMD_CONTROL_SMIF_EN (1UL << 22)
48 #define AMD_CONTROL_SMIFLOG_EN (1UL << 24)
49 #define AMD_CONTROL_SEG_EN_MASK BIT_MASK(36, 34)
50 #define AMD_CONTROL_SEG_EN_SHIFT 34
51 #define AMD_EXT_FEATURES_REG 0x0030
52 #define AMD_EXT_FEAT_HE_SUP (1UL << 7)
53 #define AMD_EXT_FEAT_SMI_FSUP_MASK BIT_MASK(17, 16)
54 #define AMD_EXT_FEAT_SMI_FSUP_SHIFT 16
55 #define AMD_EXT_FEAT_SMI_FRC_MASK BIT_MASK(20, 18)
56 #define AMD_EXT_FEAT_SMI_FRC_SHIFT 18
57 #define AMD_EXT_FEAT_SEG_SUP_MASK BIT_MASK(39, 38)
58 #define AMD_EXT_FEAT_SEG_SUP_SHIFT 38
59 #define AMD_HEV_UPPER_REG 0x0040
60 #define AMD_HEV_LOWER_REG 0x0048
61 #define AMD_HEV_STATUS_REG 0x0050
62 #define AMD_HEV_VALID (1UL << 1)
63 #define AMD_HEV_OVERFLOW (1UL << 2)
64 #define AMD_SMI_FILTER0_REG 0x0060
65 #define AMD_SMI_FILTER_VALID (1UL << 16)
66 #define AMD_SMI_FILTER_LOCKED (1UL << 17)
67 #define AMD_DEV_TABLE_SEG1_REG 0x0100
68 #define AMD_CMD_BUF_HEAD_REG 0x2000
69 #define AMD_CMD_BUF_TAIL_REG 0x2008
70 #define AMD_EVT_LOG_HEAD_REG 0x2010
71 #define AMD_EVT_LOG_TAIL_REG 0x2018
72 #define AMD_STATUS_REG 0x2020
73 # define AMD_STATUS_EVT_OVERFLOW (1UL << 0)
74 # define AMD_STATUS_EVT_LOG_INT (1UL << 1)
75 # define AMD_STATUS_EVT_LOG_RUN (1UL << 3)
77 struct dev_table_entry {
79 } __attribute__((packed));
81 #define DTE_VALID (1UL << 0)
82 #define DTE_TRANSLATION_VALID (1UL << 1)
83 #define DTE_PAGING_MODE_4_LEVEL (4UL << 9)
84 #define DTE_IR (1UL << 61)
85 #define DTE_IW (1UL << 62)
87 #define DEV_TABLE_SEG_MAX 8
88 #define DEV_TABLE_SIZE 0x200000
98 } __attribute__((packed));
100 #define CMD_COMPL_WAIT 0x01
101 # define CMD_COMPL_WAIT_STORE (1 << 0)
102 # define CMD_COMPL_WAIT_INT (1 << 1)
104 #define CMD_INV_DEVTAB_ENTRY 0x02
106 #define CMD_INV_IOMMU_PAGES 0x03
107 # define CMD_INV_IOMMU_PAGES_SIZE (1 << 0)
108 # define CMD_INV_IOMMU_PAGES_PDE (1 << 1)
110 #define EVENT_TYPE_ILL_DEV_TAB_ENTRY 0x01
111 #define EVENT_TYPE_PAGE_TAB_HW_ERR 0x04
112 #define EVENT_TYPE_ILL_CMD_ERR 0x05
113 #define EVENT_TYPE_CMD_HW_ERR 0x06
114 #define EVENT_TYPE_IOTLB_INV_TIMEOUT 0x07
115 #define EVENT_TYPE_INV_PPR_REQ 0x09
117 #define BUF_LEN_EXPONENT_SHIFT 56
119 /* Allocate minimum space possible (4K or 256 entries) */
120 #define BUF_SIZE(name, entry) ((1UL << name##_LEN_EXPONENT) * \
123 #define CMD_BUF_LEN_EXPONENT 8
124 #define EVT_LOG_LEN_EXPONENT 8
126 #define CMD_BUF_SIZE BUF_SIZE(CMD_BUF, union buf_entry)
127 #define EVT_LOG_SIZE BUF_SIZE(EVT_LOG, union buf_entry)
129 #define BITS_PER_SHORT 16
131 #define AMD_IOMMU_MAX_PAGE_TABLE_LEVELS 4
133 static struct amd_iommu {
136 /* Command Buffer, Event Log */
137 unsigned char *cmd_buf_base;
138 unsigned char *evt_log_base;
140 void *devtable_segments[DEV_TABLE_SEG_MAX];
144 } iommu_units[JAILHOUSE_MAX_IOMMU_UNITS];
146 #define for_each_iommu(iommu) for (iommu = iommu_units; \
147 iommu < iommu_units + iommu_units_count; \
150 static unsigned int iommu_units_count;
151 static struct paging amd_iommu_paging[AMD_IOMMU_MAX_PAGE_TABLE_LEVELS];
154 * Interrupt remapping is not emulated on AMD,
155 * thus we have no MMIO to intercept.
157 unsigned int iommu_mmio_count_regions(struct cell *cell)
162 bool iommu_cell_emulates_ir(struct cell *cell)
167 static int amd_iommu_init_pci(struct amd_iommu *entry,
168 struct jailhouse_iommu *iommu)
170 u64 caps_header, hi, lo;
172 /* Check alignment */
173 if (iommu->size & (iommu->size - 1))
174 return trace_error(-EINVAL);
176 /* Check that EFR is supported */
177 caps_header = pci_read_config(iommu->amd_bdf, iommu->amd_base_cap, 4);
178 if (!(caps_header & CAPS_IOMMU_EFR_SUP))
179 return trace_error(-EIO);
181 lo = pci_read_config(iommu->amd_bdf,
182 iommu->amd_base_cap + CAPS_IOMMU_BASE_LOW_REG, 4);
183 hi = pci_read_config(iommu->amd_bdf,
184 iommu->amd_base_cap + CAPS_IOMMU_BASE_HI_REG, 4);
186 if (lo & CAPS_IOMMU_ENABLE &&
187 ((hi << 32) | lo) != (iommu->base | CAPS_IOMMU_ENABLE)) {
188 printk("FATAL: IOMMU %d config is locked in invalid state.\n",
190 return trace_error(-EPERM);
193 /* Should be configured by BIOS, but we want to be sure */
194 pci_write_config(iommu->amd_bdf,
195 iommu->amd_base_cap + CAPS_IOMMU_BASE_HI_REG,
196 (u32)(iommu->base >> 32), 4);
197 pci_write_config(iommu->amd_bdf,
198 iommu->amd_base_cap + CAPS_IOMMU_BASE_LOW_REG,
199 (u32)(iommu->base & 0xffffffff) | CAPS_IOMMU_ENABLE,
202 /* Allocate and map MMIO space */
203 entry->mmio_base = page_alloc(&remap_pool, PAGES(iommu->size));
204 if (!entry->mmio_base)
207 return paging_create(&hv_paging_structs, iommu->base, iommu->size,
208 (unsigned long)entry->mmio_base,
209 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
210 PAGING_NON_COHERENT);
213 static int amd_iommu_init_features(struct amd_iommu *entry,
214 struct jailhouse_iommu *iommu)
216 u64 efr = mmio_read64(entry->mmio_base + AMD_EXT_FEATURES_REG);
217 unsigned char smi_filter_regcnt;
218 u64 val, ctrl_reg = 0, smi_freg = 0;
223 * Require SMI Filter support. Enable and lock filter but
224 * mark all entries as invalid to disable SMI delivery.
226 if (!(efr & AMD_EXT_FEAT_SMI_FSUP_MASK))
227 return trace_error(-EINVAL);
229 /* Figure out if hardware events are supported. */
230 if (iommu->amd_features)
231 entry->he_supported =
232 iommu->amd_features & ACPI_REPORTING_HE_SUP;
234 entry->he_supported = efr & AMD_EXT_FEAT_HE_SUP;
236 smi_filter_regcnt = (1 << (efr & AMD_EXT_FEAT_SMI_FRC_MASK) >>
237 AMD_EXT_FEAT_SMI_FRC_SHIFT);
238 for (n = 0; n < smi_filter_regcnt; n++) {
239 reg_base = entry->mmio_base + AMD_SMI_FILTER0_REG + (n << 3);
240 smi_freg = mmio_read64(reg_base);
242 if (!(smi_freg & AMD_SMI_FILTER_LOCKED)) {
244 * Program unlocked register the way we need:
245 * invalid and locked.
247 mmio_write64(reg_base, AMD_SMI_FILTER_LOCKED);
248 } else if (smi_freg & AMD_SMI_FILTER_VALID) {
250 * The register is locked and programed
251 * the way we don't want - error.
253 printk("ERROR: SMI Filter register %d is locked "
254 "and can't be reprogrammed.\n"
255 "Reboot and check no other component uses the "
256 "IOMMU %d.\n", n, entry->idx);
257 return trace_error(-EPERM);
260 * The register is locked, but programmed
261 * the way we need - OK to go.
265 ctrl_reg |= (AMD_CONTROL_SMIF_EN | AMD_CONTROL_SMIFLOG_EN);
267 /* Enable maximum Device Table segmentation possible */
268 entry->dev_tbl_seg_sup = (efr & AMD_EXT_FEAT_SEG_SUP_MASK) >>
269 AMD_EXT_FEAT_SEG_SUP_SHIFT;
270 if (entry->dev_tbl_seg_sup) {
271 val = (u64)entry->dev_tbl_seg_sup << AMD_CONTROL_SEG_EN_SHIFT;
272 ctrl_reg |= val & AMD_CONTROL_SEG_EN_MASK;
275 mmio_write64(entry->mmio_base + AMD_CONTROL_REG, ctrl_reg);
280 static int amd_iommu_init_buffers(struct amd_iommu *entry,
281 struct jailhouse_iommu *iommu)
283 /* Allocate and configure command buffer */
284 entry->cmd_buf_base = page_alloc(&mem_pool, PAGES(CMD_BUF_SIZE));
285 if (!entry->cmd_buf_base)
288 mmio_write64(entry->mmio_base + AMD_CMD_BUF_BASE_REG,
289 paging_hvirt2phys(entry->cmd_buf_base) |
290 ((u64)CMD_BUF_LEN_EXPONENT << BUF_LEN_EXPONENT_SHIFT));
292 entry->cmd_tail_ptr = 0;
294 /* Allocate and configure event log */
295 entry->evt_log_base = page_alloc(&mem_pool, PAGES(EVT_LOG_SIZE));
296 if (!entry->evt_log_base)
299 mmio_write64(entry->mmio_base + AMD_EVT_LOG_BASE_REG,
300 paging_hvirt2phys(entry->evt_log_base) |
301 ((u64)EVT_LOG_LEN_EXPONENT << BUF_LEN_EXPONENT_SHIFT));
306 static void amd_iommu_enable_command_processing(struct amd_iommu *iommu)
310 ctrl_reg = mmio_read64(iommu->mmio_base + AMD_CONTROL_REG);
311 ctrl_reg |= AMD_CONTROL_IOMMU_EN | AMD_CONTROL_CMD_BUF_EN |
312 AMD_CONTROL_EVT_LOG_EN | AMD_CONTROL_EVT_INT_EN;
313 mmio_write64(iommu->mmio_base + AMD_CONTROL_REG, ctrl_reg);
316 static void amd_iommu_set_next_pt_l4(pt_entry_t pte, unsigned long next_pt)
318 *pte = (next_pt & BIT_MASK(51, 12)) | AMD_IOMMU_PTE_PG_MODE(3) |
319 AMD_IOMMU_PTE_IR | AMD_IOMMU_PTE_IW | AMD_IOMMU_PTE_P;
322 static void amd_iommu_set_next_pt_l3(pt_entry_t pte, unsigned long next_pt)
324 *pte = (next_pt & BIT_MASK(51, 12)) | AMD_IOMMU_PTE_PG_MODE(2) |
325 AMD_IOMMU_PTE_IR | AMD_IOMMU_PTE_IW | AMD_IOMMU_PTE_P;
328 static void amd_iommu_set_next_pt_l2(pt_entry_t pte, unsigned long next_pt)
330 *pte = (next_pt & BIT_MASK(51, 12)) | AMD_IOMMU_PTE_PG_MODE(1) |
331 AMD_IOMMU_PTE_IR | AMD_IOMMU_PTE_IW | AMD_IOMMU_PTE_P;
334 static unsigned long amd_iommu_get_phys_l3(pt_entry_t pte, unsigned long virt)
336 if (*pte & AMD_IOMMU_PTE_PG_MODE_MASK)
337 return INVALID_PHYS_ADDR;
338 return (*pte & BIT_MASK(51, 30)) | (virt & BIT_MASK(29, 0));
341 static unsigned long amd_iommu_get_phys_l2(pt_entry_t pte, unsigned long virt)
343 if (*pte & AMD_IOMMU_PTE_PG_MODE_MASK)
344 return INVALID_PHYS_ADDR;
345 return (*pte & BIT_MASK(51, 21)) | (virt & BIT_MASK(20, 0));
350 struct jailhouse_iommu *iommu;
351 struct amd_iommu *entry;
355 iommu = &system_config->platform_info.x86.iommu_units[0];
356 for (n = 0; iommu->base && n < iommu_count_units(); iommu++, n++) {
357 entry = &iommu_units[iommu_units_count];
361 /* Protect against accidental VT-d configs. */
363 return trace_error(-EINVAL);
365 printk("AMD IOMMU @0x%lx/0x%x\n", iommu->base, iommu->size);
367 /* Initialize PCI registers and MMIO space */
368 err = amd_iommu_init_pci(entry, iommu);
372 /* Setup IOMMU features */
373 err = amd_iommu_init_features(entry, iommu);
377 /* Initialize command buffer and event log */
378 err = amd_iommu_init_buffers(entry, iommu);
382 /* Enable the IOMMU */
383 amd_iommu_enable_command_processing(entry);
389 * Derive amd_iommu_paging from very similar x86_64_paging,
390 * replicating all 4 levels.
392 memcpy(amd_iommu_paging, x86_64_paging, sizeof(amd_iommu_paging));
393 amd_iommu_paging[0].set_next_pt = amd_iommu_set_next_pt_l4;
394 amd_iommu_paging[1].set_next_pt = amd_iommu_set_next_pt_l3;
395 amd_iommu_paging[2].set_next_pt = amd_iommu_set_next_pt_l2;
396 amd_iommu_paging[1].get_phys = amd_iommu_get_phys_l3;
397 amd_iommu_paging[2].get_phys = amd_iommu_get_phys_l2;
399 return iommu_cell_init(&root_cell);
402 int iommu_cell_init(struct cell *cell)
405 if (iommu_units_count == 0)
408 if (cell->id > 0xffff)
409 return trace_error(-ERANGE);
411 cell->arch.amd_iommu.pg_structs.root_paging = amd_iommu_paging;
412 cell->arch.amd_iommu.pg_structs.root_table = page_alloc(&mem_pool, 1);
413 if (!cell->arch.amd_iommu.pg_structs.root_table)
414 return trace_error(-ENOMEM);
419 static void amd_iommu_completion_wait(struct amd_iommu *iommu);
421 static void amd_iommu_submit_command(struct amd_iommu *iommu,
422 union buf_entry *cmd, bool draining)
424 u32 head, next_tail, bytes_free;
425 unsigned char *cur_ptr;
427 head = mmio_read64(iommu->mmio_base + AMD_CMD_BUF_HEAD_REG);
428 next_tail = (iommu->cmd_tail_ptr + sizeof(*cmd)) % CMD_BUF_SIZE;
429 bytes_free = (head - next_tail) % CMD_BUF_SIZE;
431 /* Leave space for COMPLETION_WAIT that drains the buffer. */
432 if (bytes_free < (2 * sizeof(*cmd)) && !draining)
433 /* Drain the buffer */
434 amd_iommu_completion_wait(iommu);
436 cur_ptr = &iommu->cmd_buf_base[iommu->cmd_tail_ptr];
437 memcpy(cur_ptr, cmd, sizeof(*cmd));
439 /* Just to be sure. */
440 arch_paging_flush_cpu_caches(cur_ptr, sizeof(*cmd));
442 iommu->cmd_tail_ptr =
443 (iommu->cmd_tail_ptr + sizeof(*cmd)) % CMD_BUF_SIZE;
446 int iommu_map_memory_region(struct cell *cell,
447 const struct jailhouse_memory *mem)
449 unsigned long flags = AMD_IOMMU_PTE_P;
452 if (iommu_units_count == 0)
456 * Check that the address is not outside scope of current page
457 * tables. With 4 levels, we only support 48 address bits.
459 if (mem->virt_start & BIT_MASK(63, 48))
460 return trace_error(-E2BIG);
462 if (!(mem->flags & JAILHOUSE_MEM_DMA))
465 if (mem->flags & JAILHOUSE_MEM_READ)
466 flags |= AMD_IOMMU_PTE_IR;
467 if (mem->flags & JAILHOUSE_MEM_WRITE)
468 flags |= AMD_IOMMU_PTE_IW;
470 return paging_create(&cell->arch.amd_iommu.pg_structs, mem->phys_start,
471 mem->size, mem->virt_start, flags, PAGING_COHERENT);
474 int iommu_unmap_memory_region(struct cell *cell,
475 const struct jailhouse_memory *mem)
478 * TODO: This is almost a complete copy of vtd.c counterpart
479 * (sans QEMU hack). Think of unification.
483 if (iommu_units_count == 0)
486 if (!(mem->flags & JAILHOUSE_MEM_DMA))
489 return paging_destroy(&cell->arch.amd_iommu.pg_structs, mem->virt_start,
490 mem->size, PAGING_COHERENT);
493 static void amd_iommu_inv_dte(struct amd_iommu *iommu, u16 device_id)
495 union buf_entry invalidate_dte = {{ 0 }};
497 invalidate_dte.raw32[0] = device_id;
498 invalidate_dte.type = CMD_INV_DEVTAB_ENTRY;
500 amd_iommu_submit_command(iommu, &invalidate_dte, false);
503 static struct dev_table_entry *get_dev_table_entry(struct amd_iommu *iommu,
504 u16 bdf, bool allocate)
506 struct dev_table_entry *devtable_seg;
507 u8 seg_idx, seg_shift;
508 u64 reg_base, reg_val;
512 if (!iommu->dev_tbl_seg_sup) {
515 seg_size = DEV_TABLE_SIZE;
517 seg_shift = BITS_PER_SHORT - iommu->dev_tbl_seg_sup;
518 seg_mask = ~((1 << seg_shift) - 1);
519 seg_idx = (seg_mask & bdf) >> seg_shift;
520 seg_size = DEV_TABLE_SIZE / (1 << iommu->dev_tbl_seg_sup);
524 * Device table segmentation is tricky in Jailhouse. As cells can
525 * "share" the IOMMU, we don't know maximum bdf in each segment
526 * because cells are initialized independently. Thus, we can't simply
527 * adjust segment sizes for our maximum bdfs.
529 * The next best things is to lazily allocate segments as we add
530 * device using maximum possible size for segments. In the worst case
531 * scenario, we waste around 2M chunk per IOMMU.
533 devtable_seg = iommu->devtable_segments[seg_idx];
535 /* If we are not permitted to allocate, just fail */
539 devtable_seg = page_alloc(&mem_pool, PAGES(seg_size));
542 iommu->devtable_segments[seg_idx] = devtable_seg;
545 reg_base = AMD_DEV_TABLE_BASE_REG;
547 reg_base = AMD_DEV_TABLE_SEG1_REG + (seg_idx - 1) * 8;
549 /* Size in Kbytes = (m + 1) * 4, see Sect 3.3.6 */
550 reg_val = paging_hvirt2phys(devtable_seg) |
551 (seg_size / PAGE_SIZE - 1);
552 mmio_write64(iommu->mmio_base + reg_base, reg_val);
555 return &devtable_seg[bdf & ~seg_mask];
558 int iommu_add_pci_device(struct cell *cell, struct pci_device *device)
560 struct dev_table_entry *dte = NULL;
561 struct amd_iommu *iommu;
565 if (iommu_units_count == 0)
568 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
571 if (device->info->iommu >= JAILHOUSE_MAX_IOMMU_UNITS)
572 return trace_error(-ERANGE);
574 iommu = &iommu_units[device->info->iommu];
575 bdf = device->info->bdf;
577 dte = get_dev_table_entry(iommu, bdf, true);
581 memset(dte, 0, sizeof(*dte));
584 dte->raw64[1] = cell->id & 0xffff;
586 /* Translation information */
587 dte->raw64[0] = DTE_IR | DTE_IW |
588 paging_hvirt2phys(cell->arch.amd_iommu.pg_structs.root_table) |
589 DTE_PAGING_MODE_4_LEVEL | DTE_TRANSLATION_VALID | DTE_VALID;
591 /* TODO: Interrupt remapping. For now, just forward them unmapped. */
593 /* Flush caches, just to be sure. */
594 arch_paging_flush_cpu_caches(dte, sizeof(*dte));
596 amd_iommu_inv_dte(iommu, bdf);
601 void iommu_remove_pci_device(struct pci_device *device)
603 struct dev_table_entry *dte = NULL;
604 struct amd_iommu *iommu;
608 if (iommu_units_count == 0)
611 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
614 iommu = &iommu_units[device->info->iommu];
615 bdf = device->info->bdf;
617 dte = get_dev_table_entry(iommu, bdf, false);
622 * Clear DTE_TRANSLATION_VALID, but keep the entry valid
623 * to block any DMA requests.
625 dte->raw64[0] = DTE_VALID;
627 /* Flush caches, just to be sure. */
628 arch_paging_flush_cpu_caches(dte, sizeof(*dte));
630 amd_iommu_inv_dte(iommu, bdf);
633 void iommu_cell_exit(struct cell *cell)
635 /* TODO: Again, this a copy of vtd.c:iommu_cell_exit */
637 if (iommu_units_count == 0)
640 page_free(&mem_pool, cell->arch.amd_iommu.pg_structs.root_table, 1);
643 static void wait_for_zero(volatile u64 *sem, unsigned long mask)
649 static void amd_iommu_invalidate_pages(struct amd_iommu *iommu,
652 union buf_entry invalidate_pages = {{ 0 }};
655 * Flush everything, including PDEs, in whole address range, i.e.
656 * 0x7ffffffffffff000 with S bit (see Sect. 2.2.3).
658 invalidate_pages.raw32[1] = domain_id;
659 invalidate_pages.raw32[2] = 0xfffff000 | CMD_INV_IOMMU_PAGES_SIZE |
660 CMD_INV_IOMMU_PAGES_PDE;
661 invalidate_pages.raw32[3] = 0x7fffffff;
662 invalidate_pages.type = CMD_INV_IOMMU_PAGES;
664 amd_iommu_submit_command(iommu, &invalidate_pages, false);
667 static void amd_iommu_completion_wait(struct amd_iommu *iommu)
669 union buf_entry completion_wait = {{ 0 }};
670 volatile u64 sem = 1;
673 addr = paging_hvirt2phys(&sem);
675 completion_wait.raw32[0] = (addr & BIT_MASK(31, 3)) |
676 CMD_COMPL_WAIT_STORE;
677 completion_wait.raw32[1] = (addr & BIT_MASK(51, 32)) >> 32;
678 completion_wait.type = CMD_COMPL_WAIT;
680 amd_iommu_submit_command(iommu, &completion_wait, true);
681 mmio_write64(iommu->mmio_base + AMD_CMD_BUF_TAIL_REG,
682 iommu->cmd_tail_ptr);
684 wait_for_zero(&sem, -1);
687 static void amd_iommu_init_fault_nmi(void)
689 union x86_msi_vector msi_vec = {{ 0 }};
690 union pci_msi_registers msi_reg;
691 struct per_cpu *cpu_data;
692 struct amd_iommu *iommu;
695 cpu_data = iommu_select_fault_reporting_cpu();
697 /* Send NMI to fault reporting CPU */
698 msi_vec.native.address = MSI_ADDRESS_VALUE;
699 msi_vec.native.destination = cpu_data->apic_id;
701 msi_reg.msg32.enable = 1;
702 msi_reg.msg64.address = msi_vec.raw.address;
703 msi_reg.msg64.data = MSI_DM_NMI;
705 for_each_iommu(iommu) {
706 struct jailhouse_iommu *cfg =
707 &system_config->platform_info.x86.iommu_units[iommu->idx];
709 /* Disable MSI during interrupt reprogramming. */
710 pci_write_config(cfg->amd_bdf, cfg->amd_msi_cap + 2 , 0, 2);
713 * Write new MSI capability block, re-enabling interrupts with
716 for (n = 3; n >= 0; n--)
717 pci_write_config(cfg->amd_bdf, cfg->amd_msi_cap + 4 * n,
722 * There is a race window in between we change fault_reporting_cpu_id
723 * and actually reprogram the MSI. To prevent event loss, signal an
724 * interrupt when done, so iommu_check_pending_faults() is called
725 * upon completion even if no further NMIs due to events would occurr.
727 * Note we can't simply use CMD_COMPL_WAIT_INT_MASK in
728 * amd_iommu_completion_wait(), as it seems that IOMMU either signal
729 * an interrupt or do memory write, but not both.
731 apic_send_nmi_ipi(cpu_data);
734 void iommu_config_commit(struct cell *cell_added_removed)
736 struct amd_iommu *iommu;
739 if (iommu_units_count == 0)
742 /* Ensure we'll get NMI on completion, or if anything goes wrong. */
743 if (cell_added_removed)
744 amd_iommu_init_fault_nmi();
746 for_each_iommu(iommu) {
748 if (cell_added_removed) {
749 amd_iommu_invalidate_pages(iommu,
750 cell_added_removed->id & 0xffff);
751 amd_iommu_invalidate_pages(iommu,
752 root_cell.id & 0xffff);
754 /* Execute all commands in the buffer */
755 amd_iommu_completion_wait(iommu);
759 struct apic_irq_message iommu_get_remapped_root_int(unsigned int iommu,
762 unsigned int remap_index)
764 struct apic_irq_message dummy = { .valid = 0 };
766 /* TODO: Implement */
770 int iommu_map_interrupt(struct cell *cell, u16 device_id, unsigned int vector,
771 struct apic_irq_message irq_msg)
773 /* TODO: Implement */
777 void iommu_shutdown(void)
779 struct amd_iommu *iommu;
782 for_each_iommu(iommu) {
783 /* Disable the IOMMU */
784 ctrl_reg = mmio_read64(iommu->mmio_base + AMD_CONTROL_REG);
785 ctrl_reg &= ~(AMD_CONTROL_IOMMU_EN | AMD_CONTROL_CMD_BUF_EN |
786 AMD_CONTROL_EVT_LOG_EN | AMD_CONTROL_EVT_INT_EN);
787 mmio_write64(iommu->mmio_base + AMD_CONTROL_REG, ctrl_reg);
791 static void amd_iommu_print_event(struct amd_iommu *iommu,
792 union buf_entry *entry)
794 printk("AMD IOMMU %d reported event\n", iommu->idx);
795 printk(" EventCode: %lx, Operand 1: %lx, Operand 2: %lx\n",
796 entry->type, entry->raw64[0], entry->raw64[1]);
797 switch (entry->type) {
798 case EVENT_TYPE_ILL_DEV_TAB_ENTRY...EVENT_TYPE_PAGE_TAB_HW_ERR:
799 case EVENT_TYPE_IOTLB_INV_TIMEOUT...EVENT_TYPE_INV_PPR_REQ:
800 printk(" DeviceId (bus:dev.func): %02x:%02x.%x\n",
801 PCI_BDF_PARAMS(entry->raw32[0] & 0xffff));
803 case EVENT_TYPE_ILL_CMD_ERR:
804 case EVENT_TYPE_CMD_HW_ERR:
805 panic_printk("FATAL: IOMMU %d command error\n");
810 static void amd_iommu_restart_event_log(struct amd_iommu *iommu)
812 void *base = iommu->mmio_base;
814 wait_for_zero(base + AMD_STATUS_REG, AMD_STATUS_EVT_LOG_RUN);
816 mmio_write64_field(base + AMD_CONTROL_REG, AMD_CONTROL_EVT_LOG_EN, 0);
818 /* Simply start from the scratch */
819 mmio_write64(base + AMD_EVT_LOG_HEAD_REG, 0);
820 mmio_write64(base + AMD_EVT_LOG_TAIL_REG, 0);
822 /* Clear EventOverflow (RW1C) */
823 mmio_write64_field(base + AMD_STATUS_REG, AMD_STATUS_EVT_OVERFLOW, 1);
825 /* Bring logging back */
826 mmio_write64_field(base + AMD_CONTROL_REG, AMD_CONTROL_EVT_LOG_EN, 1);
829 static void amd_iommu_poll_events(struct amd_iommu *iommu)
831 union buf_entry *evt;
835 status = mmio_read64(iommu->mmio_base + AMD_STATUS_REG);
837 if (status & AMD_STATUS_EVT_OVERFLOW) {
838 printk("IOMMU %d: Event Log overflow occurred, "
839 "some events were lost!\n", iommu->idx);
840 amd_iommu_restart_event_log(iommu);
843 while (status & AMD_STATUS_EVT_LOG_INT) {
844 /* Clear EventLogInt (RW1C) */
845 mmio_write64_field(iommu->mmio_base + AMD_STATUS_REG,
846 AMD_STATUS_EVT_LOG_INT, 1);
848 head = mmio_read32(iommu->mmio_base + AMD_EVT_LOG_HEAD_REG);
849 tail = mmio_read32(iommu->mmio_base + AMD_EVT_LOG_TAIL_REG);
851 while (head != tail) {
852 evt = (union buf_entry *)(iommu->evt_log_base + head);
853 amd_iommu_print_event(iommu, evt);
854 head = (head + sizeof(*evt)) % EVT_LOG_SIZE;
857 mmio_write32(iommu->evt_log_base + AMD_EVT_LOG_HEAD_REG, head);
859 /* Re-read status to catch new events, as Linux does */
860 status = mmio_read64(iommu->mmio_base + AMD_STATUS_REG);
864 void iommu_check_pending_faults(void)
866 /* TODO: Implement */