2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2014
7 * Ivan Kolchin <ivan.kolchin@siemens.com>
8 * Jan Kiszka <jan.kiszka@siemens.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
20 #define MSIX_VECTOR_CTRL_DWORD 3
22 #define for_each_configured_pci_device(dev, cell) \
23 for ((dev) = (cell)->pci_devices; \
24 (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
27 #define for_each_pci_cap(cap, dev, counter) \
28 for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) + \
29 (dev)->info->caps_start, (counter) = 0; \
30 (counter) < (dev)->info->num_caps; \
33 /* entry for PCI config space whitelist (granting access) */
34 struct pci_cfg_access {
35 u32 reg_num; /* Register number (4-byte aligned) */
36 u32 mask; /* Bit set: access allowed */
39 /* --- Whilelists for writing to PCI config space registers --- */
40 /* Type 1: Endpoints */
41 static const struct pci_cfg_access endpoint_write_access[] = {
42 { 0x04, 0xffffffff }, /* Command, Status */
43 { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
44 { 0x3c, 0x000000ff }, /* Int Line */
47 static const struct pci_cfg_access bridge_write_access[] = {
48 { 0x04, 0xffffffff }, /* Command, Status */
49 { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
50 { 0x3c, 0xffff00ff }, /* Int Line, Bridge Control */
53 static void *pci_space;
54 static u64 mmcfg_start, mmcfg_end;
57 static void *pci_get_device_mmcfg_base(u16 bdf)
59 return pci_space + ((unsigned long)bdf << 12);
63 * Read from PCI config space.
64 * @param bdf 16-bit bus/device/function ID of target.
65 * @param address Config space access address.
66 * @param size Access size (1, 2 or 4 bytes).
70 * @see pci_write_config
72 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
74 void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
76 if (!pci_space || PCI_BUS(bdf) > end_bus)
77 return arch_pci_read_config(bdf, address, size);
80 return mmio_read8(mmcfg_addr);
82 return mmio_read16(mmcfg_addr);
84 return mmio_read32(mmcfg_addr);
88 * Write to PCI config space.
89 * @param bdf 16-bit bus/device/function ID of target.
90 * @param address Config space access address.
91 * @param value Value to be written.
92 * @param size Access size (1, 2 or 4 bytes).
94 * @see pci_read_config
96 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
98 void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
100 if (!pci_space || PCI_BUS(bdf) > end_bus)
101 return arch_pci_write_config(bdf, address, value, size);
104 mmio_write8(mmcfg_addr, value);
106 mmio_write16(mmcfg_addr, value);
108 mmio_write32(mmcfg_addr, value);
112 * Look up device owned by a cell.
113 * @param[in] cell Owning cell.
114 * @param bdf 16-bit bus/device/function ID.
116 * @return Pointer to owned PCI device or NULL.
118 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
120 const struct jailhouse_pci_device *dev_info =
121 jailhouse_cell_pci_devices(cell->config);
124 /* We iterate over the static device information to increase cache
126 for (n = 0; n < cell->config->num_pci_devices; n++)
127 if (dev_info[n].bdf == bdf)
128 return cell->pci_devices[n].cell ?
129 &cell->pci_devices[n] : NULL;
135 * Look up capability at given config space address.
136 * @param device The device to be accessed.
137 * @param address Config space access address.
139 * @return Corresponding capability structure or NULL if none found.
143 static const struct jailhouse_pci_capability *
144 pci_find_capability(struct pci_device *device, u16 address)
146 const struct jailhouse_pci_capability *cap =
147 jailhouse_cell_pci_caps(device->cell->config) +
148 device->info->caps_start;
151 for (n = 0; n < device->info->num_caps; n++, cap++)
152 if (cap->start <= address && cap->start + cap->len > address)
159 * Moderate config space read access.
160 * @param device The device to be accessed. If NULL, access will be
161 * emulated, returning a value of -1.
162 * @param address Config space address.
163 * @param size Access size (1, 2 or 4 bytes).
164 * @param value Pointer to buffer to receive the emulated value if
165 * PCI_ACCESS_DONE is returned.
167 * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
169 * @see pci_cfg_write_moderate
171 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
172 unsigned int size, u32 *value)
174 const struct jailhouse_pci_capability *cap;
175 unsigned int cap_offs;
179 return PCI_ACCESS_DONE;
182 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
183 return pci_ivshmem_cfg_read(device, address, size, value);
185 if (address < PCI_CONFIG_HEADER_SIZE)
186 return PCI_ACCESS_PERFORM;
188 cap = pci_find_capability(device, address);
190 return PCI_ACCESS_PERFORM;
192 cap_offs = address - cap->start;
193 if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
194 (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
195 *value = device->msi_registers.raw[cap_offs / 4] >>
196 ((cap_offs % 4) * 8);
197 return PCI_ACCESS_DONE;
200 return PCI_ACCESS_PERFORM;
203 static int pci_update_msix(struct pci_device *device,
204 const struct jailhouse_pci_capability *cap)
209 for (n = 0; n < device->info->num_msix_vectors; n++) {
210 result = arch_pci_update_msix_vector(device, n);
218 * Moderate config space write access.
219 * @param device The device to be accessed. If NULL, access will be
221 * @param address Config space address.
222 * @param size Access size (1, 2 or 4 bytes).
223 * @param value Value to be written.
225 * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
227 * @see pci_cfg_read_moderate
229 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
230 unsigned int size, u32 value)
232 const struct jailhouse_pci_capability *cap;
233 /* initialize list to work around wrong compiler warning */
234 const struct pci_cfg_access *list = NULL;
235 unsigned int bias_shift = (address % 4) * 8;
236 u32 mask = BYTE_MASK(size) << bias_shift;
237 unsigned int n, cap_offs, len = 0;
240 return PCI_ACCESS_REJECT;
242 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
243 return pci_ivshmem_cfg_write(device, address, size, value);
245 if (address < PCI_CONFIG_HEADER_SIZE) {
246 if (device->info->type == JAILHOUSE_PCI_TYPE_DEVICE) {
247 list = endpoint_write_access;
248 len = ARRAY_SIZE(endpoint_write_access);
249 } else if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE) {
250 list = bridge_write_access;
251 len = ARRAY_SIZE(bridge_write_access);
254 for (n = 0; n < len; n++) {
255 if (list[n].reg_num == (address & 0xffc) &&
256 (list[n].mask & mask) == mask)
257 return PCI_ACCESS_PERFORM;
260 // HACK to allow PCI bus rescanning in root-cell
261 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE &&
262 device->cell == &root_cell)
263 return PCI_ACCESS_DONE;
265 return PCI_ACCESS_REJECT;
268 cap = pci_find_capability(device, address);
269 if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
270 return PCI_ACCESS_REJECT;
272 value <<= bias_shift;
274 cap_offs = address - cap->start;
275 if (cap->id == PCI_CAP_MSI &&
276 (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
277 device->msi_registers.raw[cap_offs / 4] &= ~mask;
278 device->msi_registers.raw[cap_offs / 4] |= value;
280 if (arch_pci_update_msi(device, cap) < 0)
281 return PCI_ACCESS_REJECT;
284 * Address and data words are emulated, the control word is
288 return PCI_ACCESS_DONE;
289 } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
290 device->msix_registers.raw &= ~mask;
291 device->msix_registers.raw |= value;
293 if (pci_update_msix(device, cap) < 0)
294 return PCI_ACCESS_REJECT;
297 return PCI_ACCESS_PERFORM;
301 * Initialization of PCI subsystem.
303 * @return 0 on success, negative error code otherwise.
307 unsigned int mmcfg_size;
310 err = pci_cell_init(&root_cell);
314 mmcfg_start = system_config->platform_info.x86.mmconfig_base;
315 if (mmcfg_start == 0)
318 end_bus = system_config->platform_info.x86.mmconfig_end_bus;
319 mmcfg_size = (end_bus + 1) * 256 * 4096;
320 mmcfg_end = mmcfg_start + mmcfg_size - 4;
322 pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
324 return trace_error(-ENOMEM);
326 return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
327 (unsigned long)pci_space,
328 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
329 PAGING_NON_COHERENT);
332 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
333 u64 addr, u32 *value)
335 unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
336 struct pci_device *device = cell->msix_device_list;
341 if (addr >= device->info->msix_address &&
342 addr < device->info->msix_address +
343 device->info->msix_region_size)
345 device = device->next_msix_device;
350 /* access must be DWORD-aligned */
354 offs = addr - device->info->msix_address;
355 index = offs / sizeof(union pci_msix_vector);
359 * The PBA may share a page with the MSI-X table. Writing to
360 * PBA entries is undefined. We declare it as invalid.
362 if (index >= device->info->num_msix_vectors)
364 if (dword == MSIX_VECTOR_CTRL_DWORD) {
365 mmio_write32(&device->msix_table[index].raw[dword],
368 device->msix_vectors[index].raw[dword] = *value;
369 if (arch_pci_update_msix_vector(device, index) < 0)
373 if (index >= device->info->num_msix_vectors ||
374 dword == MSIX_VECTOR_CTRL_DWORD)
376 mmio_read32(((void *)device->msix_table) + offs);
378 *value = device->msix_vectors[index].raw[dword];
383 panic_printk("FATAL: Invalid PCI MSIX BAR write, device "
384 "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
389 * Handler for MMIO-accesses to PCI config space.
390 * @param cell Request issuing cell.
391 * @param is_write True if write access.
392 * @param addr Address accessed.
393 * @param value Pointer to value for reading/writing.
395 * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
397 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
398 u64 addr, u32 *value)
400 u32 mmcfg_offset, reg_addr;
401 struct pci_device *device;
402 enum pci_access access;
405 if (!pci_space || addr < mmcfg_start || addr > mmcfg_end) {
406 ret = pci_msix_access_handler(cell, is_write, addr, value);
408 ret = ivshmem_mmio_access_handler(cell, is_write, addr,
413 mmcfg_offset = addr - mmcfg_start;
414 reg_addr = mmcfg_offset & 0xfff;
415 /* access must be DWORD-aligned */
419 device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
422 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
423 if (access == PCI_ACCESS_REJECT)
425 if (access == PCI_ACCESS_PERFORM)
426 mmio_write32(pci_space + mmcfg_offset, *value);
428 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
429 if (access == PCI_ACCESS_PERFORM)
430 *value = mmio_read32(pci_space + mmcfg_offset);
436 panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
437 "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
443 * Retrieve number of enabled MSI vector of a device.
444 * @param device The device to be examined.
446 * @return number of vectors.
448 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
450 return device->msi_registers.msg32.enable ?
451 1 << device->msi_registers.msg32.mme : 0;
454 static void pci_save_msi(struct pci_device *device,
455 const struct jailhouse_pci_capability *cap)
457 u16 bdf = device->info->bdf;
460 for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
461 device->msi_registers.raw[n] =
462 pci_read_config(bdf, cap->start + n * 4, 4);
465 static void pci_restore_msi(struct pci_device *device,
466 const struct jailhouse_pci_capability *cap)
470 for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
471 pci_write_config(device->info->bdf, cap->start + n * 4,
472 device->msi_registers.raw[n], 4);
475 static void pci_suppress_msix(struct pci_device *device,
476 const struct jailhouse_pci_capability *cap,
479 union pci_msix_registers regs = device->msix_registers;
482 regs.field.fmask = 1;
483 pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
486 static void pci_save_msix(struct pci_device *device,
487 const struct jailhouse_pci_capability *cap)
491 device->msix_registers.raw =
492 pci_read_config(device->info->bdf, cap->start, 4);
494 for (n = 0; n < device->info->num_msix_vectors; n++)
495 for (r = 0; r < 3; r++)
496 device->msix_vectors[n].raw[r] =
497 mmio_read32(&device->msix_table[n].raw[r]);
500 static void pci_restore_msix(struct pci_device *device,
501 const struct jailhouse_pci_capability *cap)
505 for (n = 0; n < device->info->num_msix_vectors; n++)
506 for (r = 0; r < 3; r++)
507 mmio_write32(&device->msix_table[n].raw[r],
508 device->msix_vectors[n].raw[r]);
509 pci_suppress_msix(device, cap, false);
513 * Prepare the handover of PCI devices to Jailhouse or back to Linux.
515 void pci_prepare_handover(void)
517 const struct jailhouse_pci_capability *cap;
518 struct pci_device *device;
521 if (!root_cell.pci_devices)
524 for_each_configured_pci_device(device, &root_cell) {
526 for_each_pci_cap(cap, device, n)
527 if (cap->id == PCI_CAP_MSI)
528 arch_pci_suppress_msi(device, cap);
529 else if (cap->id == PCI_CAP_MSIX)
530 pci_suppress_msix(device, cap, true);
534 static int pci_add_virtual_device(struct cell *cell, struct pci_device *device)
537 device->next_virtual_device = cell->virtual_device_list;
538 cell->virtual_device_list = device;
539 return arch_pci_add_device(cell, device);
542 static int pci_add_device(struct cell *cell, struct pci_device *device)
544 unsigned int size = device->info->msix_region_size;
547 printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
548 PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
550 err = arch_pci_add_device(cell, device);
552 if (!err && device->info->msix_address) {
553 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
554 if (!device->msix_table) {
555 err = trace_error(-ENOMEM);
556 goto error_remove_dev;
559 err = paging_create(&hv_paging_structs,
560 device->info->msix_address, size,
561 (unsigned long)device->msix_table,
562 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
563 PAGING_NON_COHERENT);
565 goto error_page_free;
567 device->next_msix_device = cell->msix_device_list;
568 cell->msix_device_list = device;
573 page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
575 arch_pci_remove_device(device);
579 static void pci_remove_virtual_device(struct pci_device *device)
581 struct pci_device *prev = device->cell->virtual_device_list;
583 arch_pci_remove_device(device);
584 if (prev == device) {
585 device->cell->virtual_device_list = device->next_virtual_device;
587 while (prev->next_virtual_device != device)
588 prev = prev->next_virtual_device;
589 prev->next_virtual_device = device->next_virtual_device;
593 static void pci_remove_device(struct pci_device *device)
595 unsigned int size = device->info->msix_region_size;
596 struct pci_device *prev_msix_device;
598 printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
599 PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
600 arch_pci_remove_device(device);
601 pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
602 PCI_CMD_INTX_OFF, 2);
604 if (!device->msix_table)
607 /* cannot fail, destruction of same size as construction */
608 paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
609 size, PAGING_NON_COHERENT);
610 page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
612 prev_msix_device = device->cell->msix_device_list;
613 if (prev_msix_device == device) {
614 device->cell->msix_device_list = device->next_msix_device;
616 while (prev_msix_device->next_msix_device != device)
617 prev_msix_device = prev_msix_device->next_msix_device;
618 prev_msix_device->next_msix_device = device->next_msix_device;
623 * Perform PCI-specific initialization for a new cell.
624 * @param cell Cell to be initialized.
626 * @return 0 on success, negative error code otherwise.
630 int pci_cell_init(struct cell *cell)
632 unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
633 sizeof(struct pci_device));
634 const struct jailhouse_pci_device *dev_infos =
635 jailhouse_cell_pci_devices(cell->config);
636 const struct jailhouse_pci_capability *cap;
637 struct pci_device *device, *root_device;
638 unsigned int ndev, ncap;
641 cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
642 if (!cell->pci_devices)
646 * We order device states in the same way as the static information
647 * so that we can use the index of the latter to find the former. For
648 * the other way around and for obtaining the owner cell, we use more
649 * handy pointers. The cell pointer also encodes active ownership.
651 for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
652 if (dev_infos[ndev].num_msix_vectors > PCI_MAX_MSIX_VECTORS) {
653 err = trace_error(-ERANGE);
657 device = &cell->pci_devices[ndev];
658 device->info = &dev_infos[ndev];
660 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
661 err = pci_ivshmem_init(cell, device);
664 err = pci_add_virtual_device(cell, device);
670 root_device = pci_get_assigned_device(&root_cell,
671 dev_infos[ndev].bdf);
673 pci_remove_device(root_device);
674 root_device->cell = NULL;
677 err = pci_add_device(cell, device);
683 for_each_pci_cap(cap, device, ncap)
684 if (cap->id == PCI_CAP_MSI)
685 pci_save_msi(device, cap);
686 else if (cap->id == PCI_CAP_MSIX)
687 pci_save_msix(device, cap);
690 if (cell == &root_cell)
691 pci_prepare_handover();
699 static void pci_return_device_to_root_cell(struct pci_device *device)
701 struct pci_device *root_device;
703 for_each_configured_pci_device(root_device, &root_cell)
704 if (root_device->info->domain == device->info->domain &&
705 root_device->info->bdf == device->info->bdf) {
706 if (pci_add_device(&root_cell, root_device) < 0)
707 printk("WARNING: Failed to re-assign PCI "
708 "device to root cell\n");
710 root_device->cell = &root_cell;
716 * Perform PCI-specific cleanup for a cell under destruction.
717 * @param cell Cell to be destructed.
721 void pci_cell_exit(struct cell *cell)
723 unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
724 sizeof(struct pci_device));
725 struct pci_device *device;
728 * Do not destroy the root cell. We will shut down the complete
729 * hypervisor instead.
731 if (cell == &root_cell)
734 for_each_configured_pci_device(device, cell)
736 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
737 pci_ivshmem_exit(device);
738 pci_remove_virtual_device(device);
740 pci_remove_device(device);
741 pci_return_device_to_root_cell(device);
745 page_free(&mem_pool, cell->pci_devices, devlist_pages);
749 * Apply PCI-specific configuration changes.
750 * @param cell_added_removed Cell that was added or removed to/from the
753 * @see arch_config_commit
755 void pci_config_commit(struct cell *cell_added_removed)
757 const struct jailhouse_pci_capability *cap;
758 struct pci_device *device;
762 if (!cell_added_removed)
765 for_each_configured_pci_device(device, &root_cell)
767 for_each_pci_cap(cap, device, n) {
768 if (cap->id == PCI_CAP_MSI) {
769 err = arch_pci_update_msi(device, cap);
770 } else if (cap->id == PCI_CAP_MSIX) {
771 err = pci_update_msix(device, cap);
772 pci_suppress_msix(device, cap, false);
777 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
778 err = pci_ivshmem_update_msix(device);
788 panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
789 PCI_BDF_PARAMS(device->info->bdf));
791 panic_printk(", cap %d\n", cap->id);
798 * Shut down the PCI layer during hypervisor deactivation.
800 void pci_shutdown(void)
802 const struct jailhouse_pci_capability *cap;
803 struct pci_device *device;
806 if (!root_cell.pci_devices)
809 for_each_configured_pci_device(device, &root_cell)
811 for_each_pci_cap(cap, device, n)
812 if (cap->id == PCI_CAP_MSI)
813 pci_restore_msi(device, cap);
814 else if (cap->id == PCI_CAP_MSIX)
815 pci_restore_msix(device, cap);