2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2014
7 * Ivan Kolchin <ivan.kolchin@siemens.com>
8 * Jan Kiszka <jan.kiszka@siemens.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
20 #define MSIX_VECTOR_CTRL_DWORD 3
22 #define for_each_configured_pci_device(dev, cell) \
23 for ((dev) = (cell)->pci_devices; \
24 (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
27 #define for_each_pci_cap(cap, dev, counter) \
28 for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) + \
29 (dev)->info->caps_start, (counter) = 0; \
30 (counter) < (dev)->info->num_caps; \
33 /* entry for PCI config space access control */
34 struct pci_cfg_control {
39 } type; /* Access type */
40 u32 mask; /* Bit set: access type applies; bit cleared: deny access */
43 /* --- Access control for writing to PCI config space registers --- */
44 /* Type 1: Endpoints */
45 static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
46 [0x04/4] = {PCI_CONFIG_ALLOW, 0xffffffff}, /* Command, Status */
47 [0x0c/4] = {PCI_CONFIG_ALLOW, 0xff00ffff}, /* BIST, Lat., Cacheline */
48 [0x3c/4] = {PCI_CONFIG_ALLOW, 0x000000ff}, /* Int Line */
52 * Note: Ignore limit/base reprogramming attempts because the root cell will
53 * perform them on bus rescans. */
54 static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
55 [0x04/4] = {PCI_CONFIG_ALLOW, 0xffffffff}, /* Command, Status */
56 [0x0c/4] = {PCI_CONFIG_ALLOW, 0xff00ffff}, /* BIST, Lat., Cacheline */
57 [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
58 [0x20/4 ... /* Memory Limit/Base, Prefetch Memory Limit/Base, */
59 0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
60 [0x3c/4] = {PCI_CONFIG_ALLOW, 0xffff00ff}, /* Int Line, Bridge Ctrl */
63 static void *pci_space;
64 static u64 mmcfg_start, mmcfg_end;
67 static void *pci_get_device_mmcfg_base(u16 bdf)
69 return pci_space + ((unsigned long)bdf << 12);
73 * Read from PCI config space.
74 * @param bdf 16-bit bus/device/function ID of target.
75 * @param address Config space access address.
76 * @param size Access size (1, 2 or 4 bytes).
80 * @see pci_write_config
82 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
84 void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
86 if (!pci_space || PCI_BUS(bdf) > end_bus)
87 return arch_pci_read_config(bdf, address, size);
90 return mmio_read8(mmcfg_addr);
92 return mmio_read16(mmcfg_addr);
94 return mmio_read32(mmcfg_addr);
98 * Write to PCI config space.
99 * @param bdf 16-bit bus/device/function ID of target.
100 * @param address Config space access address.
101 * @param value Value to be written.
102 * @param size Access size (1, 2 or 4 bytes).
104 * @see pci_read_config
106 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
108 void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
110 if (!pci_space || PCI_BUS(bdf) > end_bus)
111 return arch_pci_write_config(bdf, address, value, size);
114 mmio_write8(mmcfg_addr, value);
116 mmio_write16(mmcfg_addr, value);
118 mmio_write32(mmcfg_addr, value);
122 * Look up device owned by a cell.
123 * @param[in] cell Owning cell.
124 * @param bdf 16-bit bus/device/function ID.
126 * @return Pointer to owned PCI device or NULL.
128 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
130 const struct jailhouse_pci_device *dev_info =
131 jailhouse_cell_pci_devices(cell->config);
134 /* We iterate over the static device information to increase cache
136 for (n = 0; n < cell->config->num_pci_devices; n++)
137 if (dev_info[n].bdf == bdf)
138 return cell->pci_devices[n].cell ?
139 &cell->pci_devices[n] : NULL;
145 * Look up capability at given config space address.
146 * @param device The device to be accessed.
147 * @param address Config space access address.
149 * @return Corresponding capability structure or NULL if none found.
153 static const struct jailhouse_pci_capability *
154 pci_find_capability(struct pci_device *device, u16 address)
156 const struct jailhouse_pci_capability *cap =
157 jailhouse_cell_pci_caps(device->cell->config) +
158 device->info->caps_start;
161 for (n = 0; n < device->info->num_caps; n++, cap++)
162 if (cap->start <= address && cap->start + cap->len > address)
169 * Moderate config space read access.
170 * @param device The device to be accessed. If NULL, access will be
171 * emulated, returning a value of -1.
172 * @param address Config space address.
173 * @param size Access size (1, 2 or 4 bytes).
174 * @param value Pointer to buffer to receive the emulated value if
175 * PCI_ACCESS_DONE is returned.
177 * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
179 * @see pci_cfg_write_moderate
181 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
182 unsigned int size, u32 *value)
184 const struct jailhouse_pci_capability *cap;
185 unsigned int cap_offs;
189 return PCI_ACCESS_DONE;
192 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
193 return pci_ivshmem_cfg_read(device, address, size, value);
195 if (address < PCI_CONFIG_HEADER_SIZE)
196 return PCI_ACCESS_PERFORM;
198 cap = pci_find_capability(device, address);
200 return PCI_ACCESS_PERFORM;
202 cap_offs = address - cap->start;
203 if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
204 (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
205 *value = device->msi_registers.raw[cap_offs / 4] >>
206 ((cap_offs % 4) * 8);
207 return PCI_ACCESS_DONE;
210 return PCI_ACCESS_PERFORM;
213 static int pci_update_msix(struct pci_device *device,
214 const struct jailhouse_pci_capability *cap)
219 for (n = 0; n < device->info->num_msix_vectors; n++) {
220 result = arch_pci_update_msix_vector(device, n);
228 * Moderate config space write access.
229 * @param device The device to be accessed. If NULL, access will be
231 * @param address Config space address.
232 * @param size Access size (1, 2 or 4 bytes).
233 * @param value Value to be written.
235 * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
237 * @see pci_cfg_read_moderate
239 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
240 unsigned int size, u32 value)
242 const struct jailhouse_pci_capability *cap;
243 /* initialize list to work around wrong compiler warning */
244 unsigned int bias_shift = (address % 4) * 8;
245 u32 mask = BYTE_MASK(size) << bias_shift;
246 struct pci_cfg_control cfg_control;
247 unsigned int cap_offs;
250 return PCI_ACCESS_REJECT;
252 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
253 return pci_ivshmem_cfg_write(device, address, size, value);
255 if (address < PCI_CONFIG_HEADER_SIZE) {
256 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
257 cfg_control = bridge_write[address / 4];
258 else /* physical device */
259 cfg_control = endpoint_write[address / 4];
261 if ((cfg_control.mask & mask) != mask)
262 return PCI_ACCESS_REJECT;
264 switch (cfg_control.type) {
265 case PCI_CONFIG_ALLOW:
266 return PCI_ACCESS_PERFORM;
267 case PCI_CONFIG_RDONLY:
268 return PCI_ACCESS_DONE;
270 return PCI_ACCESS_REJECT;
274 cap = pci_find_capability(device, address);
275 if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
276 return PCI_ACCESS_REJECT;
278 value <<= bias_shift;
280 cap_offs = address - cap->start;
281 if (cap->id == PCI_CAP_MSI &&
282 (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
283 device->msi_registers.raw[cap_offs / 4] &= ~mask;
284 device->msi_registers.raw[cap_offs / 4] |= value;
286 if (arch_pci_update_msi(device, cap) < 0)
287 return PCI_ACCESS_REJECT;
290 * Address and data words are emulated, the control word is
294 return PCI_ACCESS_DONE;
295 } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
296 device->msix_registers.raw &= ~mask;
297 device->msix_registers.raw |= value;
299 if (pci_update_msix(device, cap) < 0)
300 return PCI_ACCESS_REJECT;
303 return PCI_ACCESS_PERFORM;
307 * Initialization of PCI subsystem.
309 * @return 0 on success, negative error code otherwise.
313 unsigned int mmcfg_size;
316 err = pci_cell_init(&root_cell);
320 mmcfg_start = system_config->platform_info.x86.mmconfig_base;
321 if (mmcfg_start == 0)
324 end_bus = system_config->platform_info.x86.mmconfig_end_bus;
325 mmcfg_size = (end_bus + 1) * 256 * 4096;
326 mmcfg_end = mmcfg_start + mmcfg_size - 4;
328 pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
330 return trace_error(-ENOMEM);
332 return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
333 (unsigned long)pci_space,
334 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
335 PAGING_NON_COHERENT);
338 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
339 u64 addr, u32 *value)
341 unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
342 struct pci_device *device = cell->msix_device_list;
347 if (addr >= device->info->msix_address &&
348 addr < device->info->msix_address +
349 device->info->msix_region_size)
351 device = device->next_msix_device;
356 /* access must be DWORD-aligned */
360 offs = addr - device->info->msix_address;
361 index = offs / sizeof(union pci_msix_vector);
365 * The PBA may share a page with the MSI-X table. Writing to
366 * PBA entries is undefined. We declare it as invalid.
368 if (index >= device->info->num_msix_vectors)
371 device->msix_vectors[index].raw[dword] = *value;
372 if (arch_pci_update_msix_vector(device, index) < 0)
375 if (dword == MSIX_VECTOR_CTRL_DWORD)
376 mmio_write32(&device->msix_table[index].raw[dword],
379 if (index >= device->info->num_msix_vectors ||
380 dword == MSIX_VECTOR_CTRL_DWORD)
382 mmio_read32(((void *)device->msix_table) + offs);
384 *value = device->msix_vectors[index].raw[dword];
389 panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
390 "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
395 * Handler for MMIO-accesses to PCI config space.
396 * @param cell Request issuing cell.
397 * @param is_write True if write access.
398 * @param addr Address accessed.
399 * @param value Pointer to value for reading/writing.
401 * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
403 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
404 u64 addr, u32 *value)
406 u32 mmcfg_offset, reg_addr;
407 struct pci_device *device;
408 enum pci_access access;
411 if (!pci_space || addr < mmcfg_start || addr > mmcfg_end) {
412 ret = pci_msix_access_handler(cell, is_write, addr, value);
414 ret = ivshmem_mmio_access_handler(cell, is_write, addr,
419 mmcfg_offset = addr - mmcfg_start;
420 reg_addr = mmcfg_offset & 0xfff;
421 /* access must be DWORD-aligned */
425 device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
428 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
429 if (access == PCI_ACCESS_REJECT)
431 if (access == PCI_ACCESS_PERFORM)
432 mmio_write32(pci_space + mmcfg_offset, *value);
434 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
435 if (access == PCI_ACCESS_PERFORM)
436 *value = mmio_read32(pci_space + mmcfg_offset);
442 panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
443 "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
449 * Retrieve number of enabled MSI vector of a device.
450 * @param device The device to be examined.
452 * @return number of vectors.
454 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
456 return device->msi_registers.msg32.enable ?
457 1 << device->msi_registers.msg32.mme : 0;
460 static void pci_save_msi(struct pci_device *device,
461 const struct jailhouse_pci_capability *cap)
463 u16 bdf = device->info->bdf;
466 for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
467 device->msi_registers.raw[n] =
468 pci_read_config(bdf, cap->start + n * 4, 4);
471 static void pci_restore_msi(struct pci_device *device,
472 const struct jailhouse_pci_capability *cap)
476 for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
477 pci_write_config(device->info->bdf, cap->start + n * 4,
478 device->msi_registers.raw[n], 4);
481 static void pci_suppress_msix(struct pci_device *device,
482 const struct jailhouse_pci_capability *cap,
485 union pci_msix_registers regs = device->msix_registers;
489 pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
492 static void pci_save_msix(struct pci_device *device,
493 const struct jailhouse_pci_capability *cap)
497 device->msix_registers.raw =
498 pci_read_config(device->info->bdf, cap->start, 4);
500 for (n = 0; n < device->info->num_msix_vectors; n++)
501 for (r = 0; r < 4; r++)
502 device->msix_vectors[n].raw[r] =
503 mmio_read32(&device->msix_table[n].raw[r]);
506 static void pci_restore_msix(struct pci_device *device,
507 const struct jailhouse_pci_capability *cap)
511 for (n = 0; n < device->info->num_msix_vectors; n++)
512 /* only restore address/data, control is write-through */
513 for (r = 0; r < 3; r++)
514 mmio_write32(&device->msix_table[n].raw[r],
515 device->msix_vectors[n].raw[r]);
516 pci_suppress_msix(device, cap, false);
520 * Prepare the handover of PCI devices to Jailhouse or back to Linux.
522 void pci_prepare_handover(void)
524 const struct jailhouse_pci_capability *cap;
525 struct pci_device *device;
528 if (!root_cell.pci_devices)
531 for_each_configured_pci_device(device, &root_cell) {
533 for_each_pci_cap(cap, device, n)
534 if (cap->id == PCI_CAP_MSI)
535 arch_pci_suppress_msi(device, cap);
536 else if (cap->id == PCI_CAP_MSIX)
537 pci_suppress_msix(device, cap, true);
541 static int pci_add_virtual_device(struct cell *cell, struct pci_device *device)
544 device->next_virtual_device = cell->virtual_device_list;
545 cell->virtual_device_list = device;
549 static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
551 unsigned int size = device->info->msix_region_size;
554 printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
555 PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
557 err = arch_pci_add_physical_device(cell, device);
559 if (!err && device->info->msix_address) {
560 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
561 if (!device->msix_table) {
562 err = trace_error(-ENOMEM);
563 goto error_remove_dev;
566 err = paging_create(&hv_paging_structs,
567 device->info->msix_address, size,
568 (unsigned long)device->msix_table,
569 PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
570 PAGING_NON_COHERENT);
572 goto error_page_free;
574 device->next_msix_device = cell->msix_device_list;
575 cell->msix_device_list = device;
580 page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
582 arch_pci_remove_physical_device(device);
586 static void pci_remove_virtual_device(struct pci_device *device)
588 struct pci_device *prev = device->cell->virtual_device_list;
590 if (prev == device) {
591 device->cell->virtual_device_list = device->next_virtual_device;
593 while (prev->next_virtual_device != device)
594 prev = prev->next_virtual_device;
595 prev->next_virtual_device = device->next_virtual_device;
599 static void pci_remove_physical_device(struct pci_device *device)
601 unsigned int size = device->info->msix_region_size;
602 struct pci_device *prev_msix_device;
604 printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
605 PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
606 arch_pci_remove_physical_device(device);
607 pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
608 PCI_CMD_INTX_OFF, 2);
610 if (!device->msix_table)
613 /* cannot fail, destruction of same size as construction */
614 paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
615 size, PAGING_NON_COHERENT);
616 page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
618 prev_msix_device = device->cell->msix_device_list;
619 if (prev_msix_device == device) {
620 device->cell->msix_device_list = device->next_msix_device;
622 while (prev_msix_device->next_msix_device != device)
623 prev_msix_device = prev_msix_device->next_msix_device;
624 prev_msix_device->next_msix_device = device->next_msix_device;
629 * Perform PCI-specific initialization for a new cell.
630 * @param cell Cell to be initialized.
632 * @return 0 on success, negative error code otherwise.
636 int pci_cell_init(struct cell *cell)
638 unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
639 sizeof(struct pci_device));
640 const struct jailhouse_pci_device *dev_infos =
641 jailhouse_cell_pci_devices(cell->config);
642 const struct jailhouse_pci_capability *cap;
643 struct pci_device *device, *root_device;
644 unsigned int ndev, ncap;
647 cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
648 if (!cell->pci_devices)
652 * We order device states in the same way as the static information
653 * so that we can use the index of the latter to find the former. For
654 * the other way around and for obtaining the owner cell, we use more
655 * handy pointers. The cell pointer also encodes active ownership.
657 for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
658 if (dev_infos[ndev].num_msix_vectors > PCI_MAX_MSIX_VECTORS) {
659 err = trace_error(-ERANGE);
663 device = &cell->pci_devices[ndev];
664 device->info = &dev_infos[ndev];
666 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
667 err = pci_ivshmem_init(cell, device);
670 err = pci_add_virtual_device(cell, device);
676 root_device = pci_get_assigned_device(&root_cell,
677 dev_infos[ndev].bdf);
679 pci_remove_physical_device(root_device);
680 root_device->cell = NULL;
683 err = pci_add_physical_device(cell, device);
689 for_each_pci_cap(cap, device, ncap)
690 if (cap->id == PCI_CAP_MSI)
691 pci_save_msi(device, cap);
692 else if (cap->id == PCI_CAP_MSIX)
693 pci_save_msix(device, cap);
696 if (cell == &root_cell)
697 pci_prepare_handover();
705 static void pci_return_device_to_root_cell(struct pci_device *device)
707 struct pci_device *root_device;
709 for_each_configured_pci_device(root_device, &root_cell)
710 if (root_device->info->domain == device->info->domain &&
711 root_device->info->bdf == device->info->bdf) {
712 if (pci_add_physical_device(&root_cell,
714 printk("WARNING: Failed to re-assign PCI "
715 "device to root cell\n");
717 root_device->cell = &root_cell;
723 * Perform PCI-specific cleanup for a cell under destruction.
724 * @param cell Cell to be destructed.
728 void pci_cell_exit(struct cell *cell)
730 unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
731 sizeof(struct pci_device));
732 struct pci_device *device;
735 * Do not destroy the root cell. We will shut down the complete
736 * hypervisor instead.
738 if (cell == &root_cell)
741 for_each_configured_pci_device(device, cell)
743 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
744 pci_ivshmem_exit(device);
745 pci_remove_virtual_device(device);
747 pci_remove_physical_device(device);
748 pci_return_device_to_root_cell(device);
752 page_free(&mem_pool, cell->pci_devices, devlist_pages);
756 * Apply PCI-specific configuration changes.
757 * @param cell_added_removed Cell that was added or removed to/from the
760 * @see arch_config_commit
762 void pci_config_commit(struct cell *cell_added_removed)
764 const struct jailhouse_pci_capability *cap;
765 struct pci_device *device;
769 if (!cell_added_removed)
772 for_each_configured_pci_device(device, &root_cell)
774 for_each_pci_cap(cap, device, n) {
775 if (cap->id == PCI_CAP_MSI) {
776 err = arch_pci_update_msi(device, cap);
777 } else if (cap->id == PCI_CAP_MSIX) {
778 err = pci_update_msix(device, cap);
779 pci_suppress_msix(device, cap, false);
784 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
785 err = pci_ivshmem_update_msix(device);
795 panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
796 PCI_BDF_PARAMS(device->info->bdf));
798 panic_printk(", cap %d\n", cap->id);
805 * Shut down the PCI layer during hypervisor deactivation.
807 void pci_shutdown(void)
809 const struct jailhouse_pci_capability *cap;
810 struct pci_device *device;
813 if (!root_cell.pci_devices)
816 for_each_configured_pci_device(device, &root_cell) {
820 for_each_pci_cap(cap, device, n)
821 if (cap->id == PCI_CAP_MSI)
822 pci_restore_msi(device, cap);
823 else if (cap->id == PCI_CAP_MSIX)
824 pci_restore_msix(device, cap);
826 if (device->cell != &root_cell)
827 pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
828 PCI_CMD_INTX_OFF, 2);