]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci.c
core: pci: Migrate MMCONFIG access handling to the new MMIO dispatcher
[jailhouse.git] / hypervisor / pci.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014, 2015
5  *
6  * Authors:
7  *  Ivan Kolchin <ivan.kolchin@siemens.com>
8  *  Jan Kiszka <jan.kiszka@siemens.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  */
13
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
19
20 #define MSIX_VECTOR_CTRL_DWORD          3
21
22 #define for_each_configured_pci_device(dev, cell)                       \
23         for ((dev) = (cell)->pci_devices;                               \
24              (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
25              (dev)++)
26
27 #define for_each_pci_cap(cap, dev, counter)                             \
28         for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
29                 (dev)->info->caps_start, (counter) = 0;                 \
30              (counter) < (dev)->info->num_caps;                         \
31              (cap)++, (counter)++)
32
33 /* entry for PCI config space access control */
34 struct pci_cfg_control {
35         enum {
36                 PCI_CONFIG_DENY,
37                 PCI_CONFIG_ALLOW,
38                 PCI_CONFIG_RDONLY,
39         } type;   /* Access type */
40         u32 mask; /* Bit set: access type applies; bit cleared: deny access */
41 };
42
43 /* --- Access control for writing to PCI config space registers --- */
44 /* Type 1: Endpoints */
45 static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
46         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
47         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
48         [0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* ROM BAR */
49         [0x3c/4] = {PCI_CONFIG_ALLOW,  0x000000ff}, /* Int Line */
50 };
51
52 /* Type 2: Bridges
53  * Note: Ignore limit/base reprogramming attempts because the root cell will
54  *       perform them on bus rescans. */
55 static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
56         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
57         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
58         [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
59         [0x20/4 ...      /* Memory Limit/Base, Prefetch Memory Limit/Base, */
60          0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
61         [0x3c/4] = {PCI_CONFIG_ALLOW,  0xffff00ff}, /* Int Line, Bridge Ctrl */
62 };
63
64 static void *pci_space;
65 static u64 mmcfg_start, mmcfg_size;
66 static u8 end_bus;
67
68 unsigned int pci_mmio_count_regions(struct cell *cell)
69 {
70         const struct jailhouse_pci_device *dev_infos =
71                 jailhouse_cell_pci_devices(cell->config);
72         unsigned int n, regions = 0;
73
74         if (system_config->platform_info.x86.mmconfig_base)
75                 regions++;
76
77         for (n = 0; n < cell->config->num_pci_devices; n++)
78                 if (dev_infos[n].type == JAILHOUSE_PCI_TYPE_IVSHMEM)
79                         regions += PCI_IVSHMEM_NUM_MMIO_REGIONS;
80
81         return regions;
82 }
83
84 static void *pci_get_device_mmcfg_base(u16 bdf)
85 {
86         return pci_space + ((unsigned long)bdf << 12);
87 }
88
89 /**
90  * Read from PCI config space.
91  * @param bdf           16-bit bus/device/function ID of target.
92  * @param address       Config space access address.
93  * @param size          Access size (1, 2 or 4 bytes).
94  *
95  * @return Read value.
96  *
97  * @see pci_write_config
98  */
99 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
100 {
101         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
102
103         if (!pci_space || PCI_BUS(bdf) > end_bus)
104                 return arch_pci_read_config(bdf, address, size);
105
106         if (size == 1)
107                 return mmio_read8(mmcfg_addr);
108         else if (size == 2)
109                 return mmio_read16(mmcfg_addr);
110         else
111                 return mmio_read32(mmcfg_addr);
112 }
113
114 /**
115  * Write to PCI config space.
116  * @param bdf           16-bit bus/device/function ID of target.
117  * @param address       Config space access address.
118  * @param value         Value to be written.
119  * @param size          Access size (1, 2 or 4 bytes).
120  *
121  * @see pci_read_config
122  */
123 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
124 {
125         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
126
127         if (!pci_space || PCI_BUS(bdf) > end_bus)
128                 return arch_pci_write_config(bdf, address, value, size);
129
130         if (size == 1)
131                 mmio_write8(mmcfg_addr, value);
132         else if (size == 2)
133                 mmio_write16(mmcfg_addr, value);
134         else
135                 mmio_write32(mmcfg_addr, value);
136 }
137
138 /**
139  * Look up device owned by a cell.
140  * @param[in] cell      Owning cell.
141  * @param bdf           16-bit bus/device/function ID.
142  *
143  * @return Pointer to owned PCI device or NULL.
144  */
145 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
146 {
147         const struct jailhouse_pci_device *dev_info =
148                 jailhouse_cell_pci_devices(cell->config);
149         u32 n;
150
151         /* We iterate over the static device information to increase cache
152          * locality. */
153         for (n = 0; n < cell->config->num_pci_devices; n++)
154                 if (dev_info[n].bdf == bdf)
155                         return cell->pci_devices[n].cell ?
156                                 &cell->pci_devices[n] : NULL;
157
158         return NULL;
159 }
160
161 /**
162  * Look up capability at given config space address.
163  * @param device        The device to be accessed.
164  * @param address       Config space access address.
165  *
166  * @return Corresponding capability structure or NULL if none found.
167  *
168  * @private
169  */
170 static const struct jailhouse_pci_capability *
171 pci_find_capability(struct pci_device *device, u16 address)
172 {
173         const struct jailhouse_pci_capability *cap =
174                 jailhouse_cell_pci_caps(device->cell->config) +
175                 device->info->caps_start;
176         u32 n;
177
178         for (n = 0; n < device->info->num_caps; n++, cap++)
179                 if (cap->start <= address && cap->start + cap->len > address)
180                         return cap;
181
182         return NULL;
183 }
184
185 /**
186  * Moderate config space read access.
187  * @param device        The device to be accessed. If NULL, access will be
188  *                      emulated, returning a value of -1.
189  * @param address       Config space address.
190  * @param size          Access size (1, 2 or 4 bytes).
191  * @param value         Pointer to buffer to receive the emulated value if
192  *                      PCI_ACCESS_DONE is returned.
193  *
194  * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
195  *
196  * @see pci_cfg_write_moderate
197  */
198 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
199                                       unsigned int size, u32 *value)
200 {
201         const struct jailhouse_pci_capability *cap;
202         unsigned int bar_no, cap_offs;
203
204         if (!device) {
205                 *value = -1;
206                 return PCI_ACCESS_DONE;
207         }
208
209         /* Emulate BARs for physical and virtual devices */
210         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE) {
211                 /* Emulate BAR access, always returning the shadow value. */
212                 if (address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
213                         bar_no = (address - PCI_CFG_BAR) / 4;
214                         *value = device->bar[bar_no] >> ((address % 4) * 8);
215                         return PCI_ACCESS_DONE;
216                 }
217
218                 /* We do not expose ROMs. */
219                 if (address >= PCI_CFG_ROMBAR && address < PCI_CFG_CAPS) {
220                         *value = 0;
221                         return PCI_ACCESS_DONE;
222                 }
223         }
224
225         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
226                 return pci_ivshmem_cfg_read(device, address, value);
227
228         if (address < PCI_CONFIG_HEADER_SIZE)
229                 return PCI_ACCESS_PERFORM;
230
231         cap = pci_find_capability(device, address);
232         if (!cap)
233                 return PCI_ACCESS_PERFORM;
234
235         cap_offs = address - cap->start;
236         if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
237             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
238                 *value = device->msi_registers.raw[cap_offs / 4] >>
239                         ((cap_offs % 4) * 8);
240                 return PCI_ACCESS_DONE;
241         }
242
243         return PCI_ACCESS_PERFORM;
244 }
245
246 static int pci_update_msix(struct pci_device *device,
247                            const struct jailhouse_pci_capability *cap)
248 {
249         unsigned int n;
250         int result;
251
252         for (n = 0; n < device->info->num_msix_vectors; n++) {
253                 result = arch_pci_update_msix_vector(device, n);
254                 if (result < 0)
255                         return result;
256         }
257         return 0;
258 }
259
260 /**
261  * Moderate config space write access.
262  * @param device        The device to be accessed. If NULL, access will be
263  *                      rejected.
264  * @param address       Config space address.
265  * @param size          Access size (1, 2 or 4 bytes).
266  * @param value         Value to be written.
267  *
268  * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
269  *
270  * @see pci_cfg_read_moderate
271  */
272 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
273                                        unsigned int size, u32 value)
274 {
275         const struct jailhouse_pci_capability *cap;
276         /* initialize list to work around wrong compiler warning */
277         unsigned int bias_shift = (address % 4) * 8;
278         u32 mask = BYTE_MASK(size) << bias_shift;
279         struct pci_cfg_control cfg_control;
280         unsigned int bar_no, cap_offs;
281
282         if (!device)
283                 return PCI_ACCESS_REJECT;
284
285         value <<= bias_shift;
286
287         /* Emulate BARs for physical and virtual devices */
288         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE &&
289             address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
290                 bar_no = (address - PCI_CFG_BAR) / 4;
291                 mask &= device->info->bar_mask[bar_no];
292                 device->bar[bar_no] &= ~mask;
293                 device->bar[bar_no] |= value & mask;
294                 return PCI_ACCESS_DONE;
295         }
296
297         if (address < PCI_CONFIG_HEADER_SIZE) {
298                 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
299                         cfg_control = bridge_write[address / 4];
300                 else /* physical or virtual device */
301                         cfg_control = endpoint_write[address / 4];
302
303                 if ((cfg_control.mask & mask) != mask)
304                         return PCI_ACCESS_REJECT;
305
306                 switch (cfg_control.type) {
307                 case PCI_CONFIG_ALLOW:
308                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
309                                 return pci_ivshmem_cfg_write(device,
310                                                 address / 4, mask, value);
311                         return PCI_ACCESS_PERFORM;
312                 case PCI_CONFIG_RDONLY:
313                         return PCI_ACCESS_DONE;
314                 default:
315                         return PCI_ACCESS_REJECT;
316                 }
317         }
318
319         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
320                 return pci_ivshmem_cfg_write(device, address / 4, mask, value);
321
322         cap = pci_find_capability(device, address);
323         if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
324                 return PCI_ACCESS_REJECT;
325
326         cap_offs = address - cap->start;
327         if (cap->id == PCI_CAP_MSI &&
328             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
329                 device->msi_registers.raw[cap_offs / 4] &= ~mask;
330                 device->msi_registers.raw[cap_offs / 4] |= value;
331
332                 if (arch_pci_update_msi(device, cap) < 0)
333                         return PCI_ACCESS_REJECT;
334
335                 /*
336                  * Address and data words are emulated, the control word is
337                  * written as-is.
338                  */
339                 if (cap_offs >= 4)
340                         return PCI_ACCESS_DONE;
341         } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
342                 device->msix_registers.raw &= ~mask;
343                 device->msix_registers.raw |= value;
344
345                 if (pci_update_msix(device, cap) < 0)
346                         return PCI_ACCESS_REJECT;
347         }
348
349         return PCI_ACCESS_PERFORM;
350 }
351
352 /**
353  * Initialization of PCI subsystem.
354  *
355  * @return 0 on success, negative error code otherwise.
356  */
357 int pci_init(void)
358 {
359         int err;
360
361         err = pci_cell_init(&root_cell);
362         if (err)
363                 return err;
364
365         mmcfg_start = system_config->platform_info.x86.mmconfig_base;
366         if (mmcfg_start == 0)
367                 return 0;
368
369         end_bus = system_config->platform_info.x86.mmconfig_end_bus;
370         mmcfg_size = (end_bus + 1) * 256 * 4096;
371
372         pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
373         if (!pci_space)
374                 return trace_error(-ENOMEM);
375
376         return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
377                              (unsigned long)pci_space,
378                              PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
379                              PAGING_NON_COHERENT);
380 }
381
382 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
383                                    u64 addr, u32 *value)
384 {
385         unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
386         struct pci_device *device = cell->msix_device_list;
387         unsigned int index;
388         u64 offs;
389
390         while (device) {
391                 if (addr >= device->info->msix_address &&
392                     addr < device->info->msix_address +
393                            device->info->msix_region_size)
394                         goto found;
395                 device = device->next_msix_device;
396         }
397         return 0;
398
399 found:
400         /* access must be DWORD-aligned */
401         if (addr & 0x3)
402                 goto invalid_access;
403
404         offs = addr - device->info->msix_address;
405         index = offs / sizeof(union pci_msix_vector);
406
407         if (is_write) {
408                 /*
409                  * The PBA may share a page with the MSI-X table. Writing to
410                  * PBA entries is undefined. We declare it as invalid.
411                  */
412                 if (index >= device->info->num_msix_vectors)
413                         goto invalid_access;
414
415                 device->msix_vectors[index].raw[dword] = *value;
416                 if (arch_pci_update_msix_vector(device, index) < 0)
417                         goto invalid_access;
418
419                 if (dword == MSIX_VECTOR_CTRL_DWORD)
420                         mmio_write32(&device->msix_table[index].raw[dword],
421                                      *value);
422         } else {
423                 if (index >= device->info->num_msix_vectors ||
424                     dword == MSIX_VECTOR_CTRL_DWORD)
425                         *value =
426                             mmio_read32(((void *)device->msix_table) + offs);
427                 else
428                         *value = device->msix_vectors[index].raw[dword];
429         }
430         return 1;
431
432 invalid_access:
433         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
434                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
435         return -1;
436 }
437
438 static enum mmio_result pci_mmconfig_access_handler(void *arg,
439                                                     struct mmio_access *mmio)
440 {
441         u32 reg_addr = mmio->address & 0xfff;
442         struct pci_device *device;
443         enum pci_access result;
444         u32 val;
445
446         /* access must be DWORD-aligned */
447         if (reg_addr & 0x3)
448                 goto invalid_access;
449
450         device = pci_get_assigned_device(this_cell(), mmio->address >> 12);
451
452         if (mmio->is_write) {
453                 result = pci_cfg_write_moderate(device, reg_addr, 4,
454                                                 mmio->value);
455                 if (result == PCI_ACCESS_REJECT)
456                         goto invalid_access;
457                 if (result == PCI_ACCESS_PERFORM)
458                         mmio_write32(pci_space + mmio->address, mmio->value);
459         } else {
460                 result = pci_cfg_read_moderate(device, reg_addr, 4, &val);
461                 if (result == PCI_ACCESS_PERFORM)
462                         mmio->value = mmio_read32(pci_space + mmio->address);
463                 else
464                         mmio->value = val;
465         }
466
467         return MMIO_HANDLED;
468
469 invalid_access:
470         panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
471                      "reg: %\n", PCI_BDF_PARAMS(mmio->address >> 12),
472                      reg_addr);
473         return MMIO_ERROR;
474
475 }
476
477 /**
478  * Handler for MMIO-accesses to PCI config space.
479  * @param cell          Request issuing cell.
480  * @param is_write      True if write access.
481  * @param addr          Address accessed.
482  * @param value         Pointer to value for reading/writing.
483  *
484  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
485  */
486 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
487                             u64 addr, u32 *value)
488 {
489         return pci_msix_access_handler(cell, is_write, addr, value);
490 }
491
492 /**
493  * Retrieve number of enabled MSI vector of a device.
494  * @param device        The device to be examined.
495  *
496  * @return number of vectors.
497  */
498 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
499 {
500         return device->msi_registers.msg32.enable ?
501                 1 << device->msi_registers.msg32.mme : 0;
502 }
503
504 static void pci_save_msi(struct pci_device *device,
505                          const struct jailhouse_pci_capability *cap)
506 {
507         u16 bdf = device->info->bdf;
508         unsigned int n;
509
510         for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
511                 device->msi_registers.raw[n] =
512                         pci_read_config(bdf, cap->start + n * 4, 4);
513 }
514
515 static void pci_restore_msi(struct pci_device *device,
516                             const struct jailhouse_pci_capability *cap)
517 {
518         unsigned int n;
519
520         for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
521                 pci_write_config(device->info->bdf, cap->start + n * 4,
522                                  device->msi_registers.raw[n], 4);
523 }
524
525 static void pci_suppress_msix(struct pci_device *device,
526                               const struct jailhouse_pci_capability *cap,
527                               bool suppressed)
528 {
529         union pci_msix_registers regs = device->msix_registers;
530
531         if (suppressed)
532                 regs.fmask = 1;
533         pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
534 }
535
536 static void pci_save_msix(struct pci_device *device,
537                           const struct jailhouse_pci_capability *cap)
538 {
539         unsigned int n, r;
540
541         device->msix_registers.raw =
542                 pci_read_config(device->info->bdf, cap->start, 4);
543
544         for (n = 0; n < device->info->num_msix_vectors; n++)
545                 for (r = 0; r < 4; r++)
546                         device->msix_vectors[n].raw[r] =
547                                 mmio_read32(&device->msix_table[n].raw[r]);
548 }
549
550 static void pci_restore_msix(struct pci_device *device,
551                              const struct jailhouse_pci_capability *cap)
552 {
553         unsigned int n, r;
554
555         for (n = 0; n < device->info->num_msix_vectors; n++)
556                 /* only restore address/data, control is write-through */
557                 for (r = 0; r < 3; r++)
558                         mmio_write32(&device->msix_table[n].raw[r],
559                                      device->msix_vectors[n].raw[r]);
560         pci_suppress_msix(device, cap, false);
561 }
562
563 /**
564  * Prepare the handover of PCI devices to Jailhouse or back to Linux.
565  */
566 void pci_prepare_handover(void)
567 {
568         const struct jailhouse_pci_capability *cap;
569         struct pci_device *device;
570         unsigned int n;
571
572         if (!root_cell.pci_devices)
573                 return;
574
575         for_each_configured_pci_device(device, &root_cell) {
576                 if (device->cell)
577                         for_each_pci_cap(cap, device, n)
578                                 if (cap->id == PCI_CAP_MSI)
579                                         arch_pci_suppress_msi(device, cap);
580                                 else if (cap->id == PCI_CAP_MSIX)
581                                         pci_suppress_msix(device, cap, true);
582         }
583 }
584
585 static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
586 {
587         unsigned int n, pages, size = device->info->msix_region_size;
588         int err;
589
590         printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
591                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
592
593         for (n = 0; n < PCI_NUM_BARS; n ++)
594                 device->bar[n] = pci_read_config(device->info->bdf,
595                                                  PCI_CFG_BAR + n * 4, 4);
596
597         err = arch_pci_add_physical_device(cell, device);
598
599         if (!err && device->info->msix_address) {
600                 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
601                 if (!device->msix_table) {
602                         err = trace_error(-ENOMEM);
603                         goto error_remove_dev;
604                 }
605
606                 err = paging_create(&hv_paging_structs,
607                                     device->info->msix_address, size,
608                                     (unsigned long)device->msix_table,
609                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
610                                     PAGING_NON_COHERENT);
611                 if (err)
612                         goto error_page_free;
613
614                 if (device->info->num_msix_vectors > PCI_EMBEDDED_MSIX_VECTS) {
615                         pages = PAGES(sizeof(union pci_msix_vector) *
616                                       device->info->num_msix_vectors);
617                         device->msix_vectors = page_alloc(&mem_pool, pages);
618                         if (!device->msix_vectors) {
619                                 err = -ENOMEM;
620                                 goto error_unmap_table;
621                         }
622                 }
623
624                 device->next_msix_device = cell->msix_device_list;
625                 cell->msix_device_list = device;
626         }
627         return err;
628
629 error_unmap_table:
630         /* cannot fail, destruction of same size as construction */
631         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
632                        size, PAGING_NON_COHERENT);
633 error_page_free:
634         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
635 error_remove_dev:
636         arch_pci_remove_physical_device(device);
637         return err;
638 }
639
640 static void pci_remove_physical_device(struct pci_device *device)
641 {
642         unsigned int size = device->info->msix_region_size;
643         struct pci_device *prev_msix_device;
644
645         printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
646                PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
647         arch_pci_remove_physical_device(device);
648         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
649                          PCI_CMD_INTX_OFF, 2);
650
651         if (!device->msix_table)
652                 return;
653
654         /* cannot fail, destruction of same size as construction */
655         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
656                        size, PAGING_NON_COHERENT);
657         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
658
659         if (device->msix_vectors != device->msix_vector_array)
660                 page_free(&mem_pool, device->msix_vectors,
661                           PAGES(sizeof(union pci_msix_vector) *
662                                 device->info->num_msix_vectors));
663
664         prev_msix_device = device->cell->msix_device_list;
665         if (prev_msix_device == device) {
666                 device->cell->msix_device_list = device->next_msix_device;
667         } else {
668                 while (prev_msix_device->next_msix_device != device)
669                         prev_msix_device = prev_msix_device->next_msix_device;
670                 prev_msix_device->next_msix_device = device->next_msix_device;
671         }
672 }
673
674 /**
675  * Perform PCI-specific initialization for a new cell.
676  * @param cell  Cell to be initialized.
677  *
678  * @return 0 on success, negative error code otherwise.
679  *
680  * @see pci_cell_exit
681  */
682 int pci_cell_init(struct cell *cell)
683 {
684         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
685                                            sizeof(struct pci_device));
686         const struct jailhouse_pci_device *dev_infos =
687                 jailhouse_cell_pci_devices(cell->config);
688         const struct jailhouse_pci_capability *cap;
689         struct pci_device *device, *root_device;
690         unsigned int ndev, ncap;
691         int err;
692
693         if (pci_space)
694                 mmio_region_register(cell, mmcfg_start, mmcfg_size,
695                                      pci_mmconfig_access_handler, NULL);
696
697         cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
698         if (!cell->pci_devices)
699                 return -ENOMEM;
700
701         /*
702          * We order device states in the same way as the static information
703          * so that we can use the index of the latter to find the former. For
704          * the other way around and for obtaining the owner cell, we use more
705          * handy pointers. The cell pointer also encodes active ownership.
706          */
707         for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
708                 device = &cell->pci_devices[ndev];
709                 device->info = &dev_infos[ndev];
710                 device->msix_vectors = device->msix_vector_array;
711
712                 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
713                         err = pci_ivshmem_init(cell, device);
714                         if (err)
715                                 goto error;
716
717                         device->cell = cell;
718
719                         continue;
720                 }
721
722                 root_device = pci_get_assigned_device(&root_cell,
723                                                       dev_infos[ndev].bdf);
724                 if (root_device) {
725                         pci_remove_physical_device(root_device);
726                         root_device->cell = NULL;
727                 }
728
729                 err = pci_add_physical_device(cell, device);
730                 if (err)
731                         goto error;
732
733                 device->cell = cell;
734
735                 for_each_pci_cap(cap, device, ncap)
736                         if (cap->id == PCI_CAP_MSI)
737                                 pci_save_msi(device, cap);
738                         else if (cap->id == PCI_CAP_MSIX)
739                                 pci_save_msix(device, cap);
740         }
741
742         if (cell == &root_cell)
743                 pci_prepare_handover();
744
745         return 0;
746 error:
747         pci_cell_exit(cell);
748         return err;
749 }
750
751 static void pci_return_device_to_root_cell(struct pci_device *device)
752 {
753         struct pci_device *root_device;
754
755         for_each_configured_pci_device(root_device, &root_cell)
756                 if (root_device->info->domain == device->info->domain &&
757                     root_device->info->bdf == device->info->bdf) {
758                         if (pci_add_physical_device(&root_cell,
759                                                     root_device) < 0)
760                                 printk("WARNING: Failed to re-assign PCI "
761                                        "device to root cell\n");
762                         else
763                                 root_device->cell = &root_cell;
764                         break;
765                 }
766 }
767
768 /**
769  * Perform PCI-specific cleanup for a cell under destruction.
770  * @param cell  Cell to be destructed.
771  *
772  * @see pci_cell_init
773  */
774 void pci_cell_exit(struct cell *cell)
775 {
776         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
777                                            sizeof(struct pci_device));
778         struct pci_device *device;
779
780         /*
781          * Do not destroy the root cell. We will shut down the complete
782          * hypervisor instead.
783          */
784         if (cell == &root_cell)
785                 return;
786
787         for_each_configured_pci_device(device, cell)
788                 if (device->cell) {
789                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
790                                 pci_ivshmem_exit(device);
791                         } else {
792                                 pci_remove_physical_device(device);
793                                 pci_return_device_to_root_cell(device);
794                         }
795                 }
796
797         page_free(&mem_pool, cell->pci_devices, devlist_pages);
798 }
799
800 /**
801  * Apply PCI-specific configuration changes.
802  * @param cell_added_removed    Cell that was added or removed to/from the
803  *                              system or NULL.
804  *
805  * @see arch_config_commit
806  */
807 void pci_config_commit(struct cell *cell_added_removed)
808 {
809         const struct jailhouse_pci_capability *cap;
810         struct pci_device *device;
811         unsigned int n;
812         int err = 0;
813
814         if (!cell_added_removed)
815                 return;
816
817         for_each_configured_pci_device(device, &root_cell)
818                 if (device->cell) {
819                         for_each_pci_cap(cap, device, n) {
820                                 if (cap->id == PCI_CAP_MSI) {
821                                         err = arch_pci_update_msi(device, cap);
822                                 } else if (cap->id == PCI_CAP_MSIX) {
823                                         err = pci_update_msix(device, cap);
824                                         pci_suppress_msix(device, cap, false);
825                                 }
826                                 if (err)
827                                         goto error;
828                         }
829                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
830                                 err = pci_ivshmem_update_msix(device);
831                                 if (err) {
832                                         cap = NULL;
833                                         goto error;
834                                 }
835                         }
836                 }
837         return;
838
839 error:
840         panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
841                      PCI_BDF_PARAMS(device->info->bdf));
842         if (cap)
843                 panic_printk(", cap %d\n", cap->id);
844         else
845                 panic_printk("\n");
846         panic_stop();
847 }
848
849 /**
850  * Shut down the PCI layer during hypervisor deactivation.
851  */
852 void pci_shutdown(void)
853 {
854         const struct jailhouse_pci_capability *cap;
855         struct pci_device *device;
856         unsigned int n;
857
858         if (!root_cell.pci_devices)
859                 return;
860
861         for_each_configured_pci_device(device, &root_cell) {
862                 if (!device->cell)
863                         continue;
864
865                 for_each_pci_cap(cap, device, n)
866                         if (cap->id == PCI_CAP_MSI)
867                                 pci_restore_msi(device, cap);
868                         else if (cap->id == PCI_CAP_MSIX)
869                                 pci_restore_msix(device, cap);
870
871                 if (device->cell != &root_cell)
872                         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
873                                          PCI_CMD_INTX_OFF, 2);
874         }
875 }