2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2014
7 * Ivan Kolchin <ivan.kolchin@siemens.com>
8 * Jan Kiszka <jan.kiszka@siemens.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
20 #define PCI_CONFIG_HEADER_SIZE 0x40
22 #define PCI_CAP_MSI 0x05
23 #define PCI_CAP_MSIX 0x11
25 #define MSIX_VECTOR_CTRL_DWORD 3
27 #define for_each_configured_pci_device(dev, cell) \
28 for ((dev) = (cell)->pci_devices; \
29 (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
32 #define for_each_pci_cap(cap, dev, counter) \
33 for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) + \
34 (dev)->info->caps_start, (counter) = 0; \
35 (counter) < (dev)->info->num_caps; \
38 /* entry for PCI config space whitelist (granting access) */
39 struct pci_cfg_access {
40 u32 reg_num; /** Register number (4-byte aligned) */
41 u32 mask; /** Bit set: access allowed */
44 /* --- Whilelist for writing to PCI config space registers --- */
45 /* Type 1: Endpoints */
46 static const struct pci_cfg_access endpoint_write_access[] = {
47 { 0x04, 0xffffffff }, /* Command, Status */
48 { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
49 { 0x3c, 0x000000ff }, /* Int Line */
52 static const struct pci_cfg_access bridge_write_access[] = {
53 { 0x04, 0xffffffff }, /* Command, Status */
54 { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
55 { 0x3c, 0xffff00ff }, /* Int Line, Bridge Control */
58 static void *pci_space;
59 static u64 mmcfg_start, mmcfg_end;
62 static void *pci_get_device_mmcfg_base(u16 bdf)
64 return pci_space + ((unsigned long)bdf << 12);
68 * pci_read_config() - Read from PCI config space
69 * @bdf: 16-bit bus/device/function ID of target
70 * @address: Config space access address
71 * @size: Access size (1, 2 or 4 bytes)
75 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
77 void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
79 if (!pci_space || PCI_BUS(bdf) > end_bus)
80 return arch_pci_read_config(bdf, address, size);
83 return mmio_read8(mmcfg_addr);
85 return mmio_read16(mmcfg_addr);
87 return mmio_read32(mmcfg_addr);
91 * pci_write_config() - Write to PCI config space
92 * @bdf: 16-bit bus/device/function ID of target
93 * @address: Config space access address
94 * @value: Value to be written
95 * @size: Access size (1, 2 or 4 bytes)
97 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
99 void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
101 if (!pci_space || PCI_BUS(bdf) > end_bus)
102 return arch_pci_write_config(bdf, address, value, size);
105 mmio_write8(mmcfg_addr, value);
107 mmio_write16(mmcfg_addr, value);
109 mmio_write32(mmcfg_addr, value);
113 * pci_get_assigned_device() - Look up device owned by a cell
115 * @bdf: 16-bit bus/device/function ID
117 * Return: Pointer to owned PCI device or NULL.
119 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
121 const struct jailhouse_pci_device *dev_info =
122 jailhouse_cell_pci_devices(cell->config);
125 /* We iterate over the static device information to increase cache
127 for (n = 0; n < cell->config->num_pci_devices; n++)
128 if (dev_info[n].bdf == bdf)
129 return cell->pci_devices[n].cell ?
130 &cell->pci_devices[n] : NULL;
136 * pci_find_capability() - Look up capability at given config space address
137 * @device: The device to be accessed
138 * @address: Config space access address
140 * Return: Corresponding capability structure or NULL if none found.
142 static const struct jailhouse_pci_capability *
143 pci_find_capability(struct pci_device *device, u16 address)
145 const struct jailhouse_pci_capability *cap =
146 jailhouse_cell_pci_caps(device->cell->config) +
147 device->info->caps_start;
150 for (n = 0; n < device->info->num_caps; n++, cap++)
151 if (cap->start <= address && cap->start + cap->len > address)
158 * pci_cfg_read_moderate() - Moderate config space read access
159 * @device: The device to be accessed; if NULL, access will be emulated,
160 * returning a value of -1
161 * @address: Config space address
162 * @size: Access size (1, 2 or 4 bytes)
163 * @value: Pointer to buffer to receive the emulated value if
164 * PCI_ACCESS_DONE is returned
166 * Return: PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
168 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
169 unsigned int size, u32 *value)
171 const struct jailhouse_pci_capability *cap;
172 unsigned int cap_offs;
176 return PCI_ACCESS_DONE;
179 if (address < PCI_CONFIG_HEADER_SIZE)
180 return PCI_ACCESS_PERFORM;
182 cap = pci_find_capability(device, address);
184 return PCI_ACCESS_PERFORM;
186 cap_offs = address - cap->start;
187 if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
188 (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
189 *value = device->msi_registers.raw[cap_offs / 4] >>
190 ((cap_offs % 4) * 8);
191 return PCI_ACCESS_DONE;
194 return PCI_ACCESS_PERFORM;
197 static int pci_update_msix(struct pci_device *device,
198 const struct jailhouse_pci_capability *cap)
203 for (n = 0; n < device->info->num_msix_vectors; n++) {
204 result = pci_update_msix_vector(device, n);
212 * pci_cfg_write_moderate() - Moderate config space write access
213 * @device: The device to be accessed; if NULL, access will be rejected
214 * @address: Config space address
215 * @size: Access size (1, 2 or 4 bytes)
216 * @value: Value to be written
218 * Return: PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
220 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
221 unsigned int size, u32 value)
223 const struct jailhouse_pci_capability *cap;
224 /* initialize list to work around wrong compiler warning */
225 const struct pci_cfg_access *list = NULL;
226 unsigned int bias_shift = (address % 4) * 8;
227 u32 mask = BYTE_MASK(size) << bias_shift;
228 unsigned int n, cap_offs, len = 0;
231 return PCI_ACCESS_REJECT;
233 if (address < PCI_CONFIG_HEADER_SIZE) {
234 if (device->info->type == JAILHOUSE_PCI_TYPE_DEVICE) {
235 list = endpoint_write_access;
236 len = ARRAY_SIZE(endpoint_write_access);
237 } else if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE) {
238 list = bridge_write_access;
239 len = ARRAY_SIZE(bridge_write_access);
242 for (n = 0; n < len; n++) {
243 if (list[n].reg_num == (address & 0xffc) &&
244 (list[n].mask & mask) == mask)
245 return PCI_ACCESS_PERFORM;
248 return PCI_ACCESS_REJECT;
251 cap = pci_find_capability(device, address);
252 if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
253 return PCI_ACCESS_REJECT;
255 value <<= bias_shift;
257 cap_offs = address - cap->start;
258 if (cap->id == PCI_CAP_MSI &&
259 (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
260 device->msi_registers.raw[cap_offs / 4] &= ~mask;
261 device->msi_registers.raw[cap_offs / 4] |= value;
263 if (pci_update_msi(device, cap) < 0)
264 return PCI_ACCESS_REJECT;
267 * Address and data words are emulated, the control word is
271 return PCI_ACCESS_DONE;
272 } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
273 device->msix_registers.raw &= ~mask;
274 device->msix_registers.raw |= value;
276 if (pci_update_msix(device, cap) < 0)
277 return PCI_ACCESS_REJECT;
280 return PCI_ACCESS_PERFORM;
284 * pci_init() - Initialization of PCI module
286 * Return: 0 - success, error code - if error.
290 unsigned int mmcfg_size;
293 err = pci_cell_init(&root_cell);
297 mmcfg_start = system_config->platform_info.x86.mmconfig_base;
298 if (mmcfg_start == 0)
301 end_bus = system_config->platform_info.x86.mmconfig_end_bus;
302 mmcfg_size = (end_bus + 1) * 256 * 4096;
303 mmcfg_end = mmcfg_start + mmcfg_size - 4;
305 pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
309 return page_map_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
310 (unsigned long)pci_space,
311 PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED,
312 PAGE_MAP_NON_COHERENT);
315 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
316 u64 addr, u32 *value)
318 unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
319 struct pci_device *device = cell->msix_device_list;
324 if (addr >= device->info->msix_address &&
325 addr < device->info->msix_address +
326 device->info->msix_region_size)
328 device = device->next_msix_device;
333 /* access must be DWORD-aligned */
337 offs = addr - device->info->msix_address;
338 index = offs / sizeof(union pci_msix_vector);
342 * The PBA may share a page with the MSI-X table. Writing to
343 * PBA entries is undefined. We declare it as invalid.
345 if (index >= device->info->num_msix_vectors)
347 if (dword == MSIX_VECTOR_CTRL_DWORD) {
348 mmio_write32(&device->msix_table[index].field.ctrl,
351 device->msix_vectors[index].raw[dword] = *value;
352 if (pci_update_msix_vector(device, index) < 0)
356 if (index >= device->info->num_msix_vectors ||
357 dword == MSIX_VECTOR_CTRL_DWORD)
359 mmio_read32(((void *)device->msix_table) + offs);
361 *value = device->msix_vectors[index].raw[dword];
366 panic_printk("FATAL: Invalid PCI MSIX BAR write, device "
367 "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
372 * pci_mmio_access_handler() - Handler for MMIO-accesses to PCI config space
373 * @cell: Request issuing cell
374 * @is_write: True if write access
375 * @addr: Address accessed
376 * @value: Pointer to value for reading/writing
378 * Return: 1 if handled successfully, 0 if unhandled, -1 on access error
380 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
381 u64 addr, u32 *value)
383 u32 mmcfg_offset, reg_addr;
384 struct pci_device *device;
385 enum pci_access access;
387 if (!pci_space || addr < mmcfg_start || addr > mmcfg_end)
388 return pci_msix_access_handler(cell, is_write, addr, value);
390 mmcfg_offset = addr - mmcfg_start;
391 reg_addr = mmcfg_offset & 0xfff;
392 /* access must be DWORD-aligned */
396 device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
399 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
400 if (access == PCI_ACCESS_REJECT)
402 if (access == PCI_ACCESS_PERFORM)
403 mmio_write32(pci_space + mmcfg_offset, *value);
405 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
406 if (access == PCI_ACCESS_PERFORM)
407 *value = mmio_read32(pci_space + mmcfg_offset);
413 panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
414 "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
419 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
421 return device->msi_registers.msg32.enable ?
422 1 << device->msi_registers.msg32.mme : 0;
425 static void pci_save_msi(struct pci_device *device,
426 const struct jailhouse_pci_capability *cap)
428 u16 bdf = device->info->bdf;
431 for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
432 device->msi_registers.raw[n] =
433 pci_read_config(bdf, cap->start + n * 4, 4);
436 static void pci_restore_msi(struct pci_device *device,
437 const struct jailhouse_pci_capability *cap)
441 for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
442 pci_write_config(device->info->bdf, cap->start + n * 4,
443 device->msi_registers.raw[n], 4);
446 static void pci_suppress_msix(struct pci_device *device,
447 const struct jailhouse_pci_capability *cap,
450 union pci_msix_registers regs = device->msix_registers;
453 regs.field.fmask = 1;
454 pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
457 static void pci_save_msix(struct pci_device *device,
458 const struct jailhouse_pci_capability *cap)
462 device->msix_registers.raw =
463 pci_read_config(device->info->bdf, cap->start, 4);
465 for (n = 0; n < device->info->num_msix_vectors; n++)
466 for (r = 0; r < 3; r++)
467 device->msix_vectors[n].raw[r] =
468 mmio_read32(&device->msix_table[n].raw[r]);
471 static void pci_restore_msix(struct pci_device *device,
472 const struct jailhouse_pci_capability *cap)
476 for (n = 0; n < device->info->num_msix_vectors; n++)
477 for (r = 0; r < 3; r++)
478 mmio_write32(&device->msix_table[n].raw[r],
479 device->msix_vectors[n].raw[r]);
480 pci_suppress_msix(device, cap, false);
484 * pci_prepare_handover() - Prepare the handover of PCI devices to Jailhouse or
487 void pci_prepare_handover(void)
489 const struct jailhouse_pci_capability *cap;
490 struct pci_device *device;
493 if (!root_cell.pci_devices)
496 for_each_configured_pci_device(device, &root_cell) {
498 for_each_pci_cap(cap, device, n)
499 if (cap->id == PCI_CAP_MSI)
500 pci_suppress_msi(device, cap);
501 else if (cap->id == PCI_CAP_MSIX)
502 pci_suppress_msix(device, cap, true);
506 static int pci_add_device(struct cell *cell, struct pci_device *device)
508 unsigned int size = device->info->msix_region_size;
511 printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
512 PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
514 err = arch_pci_add_device(cell, device);
516 if (!err && device->info->msix_address) {
517 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
518 if (!device->msix_table) {
520 goto error_remove_dev;
523 err = page_map_create(&hv_paging_structs,
524 device->info->msix_address, size,
525 (unsigned long)device->msix_table,
526 PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED,
527 PAGE_MAP_NON_COHERENT);
529 goto error_page_free;
531 device->next_msix_device = cell->msix_device_list;
532 cell->msix_device_list = device;
537 page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
539 arch_pci_remove_device(device);
543 static void pci_remove_device(struct pci_device *device)
545 unsigned int size = device->info->msix_region_size;
546 struct pci_device *prev_msix_device;
548 printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
549 PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
550 arch_pci_remove_device(device);
551 pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
552 PCI_CMD_INTX_OFF, 2);
554 if (!device->msix_table)
557 /* cannot fail, destruction of same size as construction */
558 page_map_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
559 size, PAGE_MAP_NON_COHERENT);
560 page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
562 prev_msix_device = device->cell->msix_device_list;
563 if (prev_msix_device == device) {
564 device->cell->msix_device_list = NULL;
566 while (prev_msix_device->next_msix_device != device)
567 prev_msix_device = prev_msix_device->next_msix_device;
568 prev_msix_device->next_msix_device = NULL;
572 int pci_cell_init(struct cell *cell)
574 unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
575 sizeof(struct pci_device));
576 const struct jailhouse_pci_device *dev_infos =
577 jailhouse_cell_pci_devices(cell->config);
578 const struct jailhouse_pci_capability *cap;
579 struct pci_device *device, *root_device;
580 unsigned int ndev, ncap;
583 cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
584 if (!cell->pci_devices)
588 * We order device states in the same way as the static information
589 * so that we can use the index of the latter to find the former. For
590 * the other way around and for obtaining the owner cell, we use more
591 * handy pointers. The cell pointer also encodes active ownership.
593 for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
594 if (dev_infos[ndev].num_msix_vectors > PCI_MAX_MSIX_VECTORS) {
599 device = &cell->pci_devices[ndev];
600 device->info = &dev_infos[ndev];
602 root_device = pci_get_assigned_device(&root_cell,
603 dev_infos[ndev].bdf);
605 pci_remove_device(root_device);
606 root_device->cell = NULL;
609 err = pci_add_device(cell, device);
617 for_each_pci_cap(cap, device, ncap)
618 if (cap->id == PCI_CAP_MSI)
619 pci_save_msi(device, cap);
620 else if (cap->id == PCI_CAP_MSIX)
621 pci_save_msix(device, cap);
624 if (cell == &root_cell)
625 pci_prepare_handover();
630 static void pci_return_device_to_root_cell(struct pci_device *device)
632 struct pci_device *root_device;
634 for_each_configured_pci_device(root_device, &root_cell)
635 if (root_device->info->domain == device->info->domain &&
636 root_device->info->bdf == device->info->bdf) {
637 if (pci_add_device(&root_cell, root_device) < 0)
638 printk("WARNING: Failed to re-assign PCI "
639 "device to root cell\n");
641 root_device->cell = &root_cell;
646 void pci_cell_exit(struct cell *cell)
648 unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
649 sizeof(struct pci_device));
650 struct pci_device *device;
653 * Do not destroy the root cell. We will shut down the complete
654 * hypervisor instead.
656 if (cell == &root_cell)
659 for_each_configured_pci_device(device, cell)
661 pci_remove_device(device);
662 pci_return_device_to_root_cell(device);
665 page_free(&mem_pool, cell->pci_devices, devlist_pages);
668 void pci_config_commit(struct cell *cell_added_removed)
670 const struct jailhouse_pci_capability *cap;
671 struct pci_device *device;
675 if (!cell_added_removed)
678 for_each_configured_pci_device(device, &root_cell)
680 for_each_pci_cap(cap, device, n) {
681 if (cap->id == PCI_CAP_MSI) {
682 err = pci_update_msi(device, cap);
683 } else if (cap->id == PCI_CAP_MSIX) {
684 err = pci_update_msix(device, cap);
685 pci_suppress_msix(device, cap, false);
693 panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x,"
694 " cap %d\n", PCI_BDF_PARAMS(device->info->bdf), cap->id);
698 void pci_shutdown(void)
700 const struct jailhouse_pci_capability *cap;
701 struct pci_device *device;
704 if (!root_cell.pci_devices)
707 for_each_configured_pci_device(device, &root_cell)
709 for_each_pci_cap(cap, device, n)
710 if (cap->id == PCI_CAP_MSI)
711 pci_restore_msi(device, cap);
712 else if (cap->id == PCI_CAP_MSIX)
713 pci_restore_msix(device, cap);