]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci.c
core: Make field structs of pci_msix_registers and pci_msix_vector anonymous
[jailhouse.git] / hypervisor / pci.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014
5  *
6  * Authors:
7  *  Ivan Kolchin <ivan.kolchin@siemens.com>
8  *  Jan Kiszka <jan.kiszka@siemens.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  */
13
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
19
20 #define MSIX_VECTOR_CTRL_DWORD          3
21
22 #define for_each_configured_pci_device(dev, cell)                       \
23         for ((dev) = (cell)->pci_devices;                               \
24              (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
25              (dev)++)
26
27 #define for_each_pci_cap(cap, dev, counter)                             \
28         for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
29                 (dev)->info->caps_start, (counter) = 0;                 \
30              (counter) < (dev)->info->num_caps;                         \
31              (cap)++, (counter)++)
32
33 /* entry for PCI config space whitelist (granting access) */
34 struct pci_cfg_access {
35         u32 reg_num; /* Register number (4-byte aligned) */
36         u32 mask; /* Bit set: access allowed */
37 };
38
39 /* --- Whilelists for writing to PCI config space registers --- */
40 /* Type 1: Endpoints */
41 static const struct pci_cfg_access endpoint_write_access[] = {
42         { 0x04, 0xffffffff }, /* Command, Status */
43         { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
44         { 0x3c, 0x000000ff }, /* Int Line */
45 };
46 /* Type 2: Bridges */
47 static const struct pci_cfg_access bridge_write_access[] = {
48         { 0x04, 0xffffffff }, /* Command, Status */
49         { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
50         { 0x3c, 0xffff00ff }, /* Int Line, Bridge Control */
51 };
52
53 static void *pci_space;
54 static u64 mmcfg_start, mmcfg_end;
55 static u8 end_bus;
56
57 static void *pci_get_device_mmcfg_base(u16 bdf)
58 {
59         return pci_space + ((unsigned long)bdf << 12);
60 }
61
62 /**
63  * Read from PCI config space.
64  * @param bdf           16-bit bus/device/function ID of target.
65  * @param address       Config space access address.
66  * @param size          Access size (1, 2 or 4 bytes).
67  *
68  * @return Read value.
69  *
70  * @see pci_write_config
71  */
72 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
73 {
74         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
75
76         if (!pci_space || PCI_BUS(bdf) > end_bus)
77                 return arch_pci_read_config(bdf, address, size);
78
79         if (size == 1)
80                 return mmio_read8(mmcfg_addr);
81         else if (size == 2)
82                 return mmio_read16(mmcfg_addr);
83         else
84                 return mmio_read32(mmcfg_addr);
85 }
86
87 /**
88  * Write to PCI config space.
89  * @param bdf           16-bit bus/device/function ID of target.
90  * @param address       Config space access address.
91  * @param value         Value to be written.
92  * @param size          Access size (1, 2 or 4 bytes).
93  *
94  * @see pci_read_config
95  */
96 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
97 {
98         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
99
100         if (!pci_space || PCI_BUS(bdf) > end_bus)
101                 return arch_pci_write_config(bdf, address, value, size);
102
103         if (size == 1)
104                 mmio_write8(mmcfg_addr, value);
105         else if (size == 2)
106                 mmio_write16(mmcfg_addr, value);
107         else
108                 mmio_write32(mmcfg_addr, value);
109 }
110
111 /**
112  * Look up device owned by a cell.
113  * @param[in] cell      Owning cell.
114  * @param bdf           16-bit bus/device/function ID.
115  *
116  * @return Pointer to owned PCI device or NULL.
117  */
118 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
119 {
120         const struct jailhouse_pci_device *dev_info =
121                 jailhouse_cell_pci_devices(cell->config);
122         u32 n;
123
124         /* We iterate over the static device information to increase cache
125          * locality. */
126         for (n = 0; n < cell->config->num_pci_devices; n++)
127                 if (dev_info[n].bdf == bdf)
128                         return cell->pci_devices[n].cell ?
129                                 &cell->pci_devices[n] : NULL;
130
131         return NULL;
132 }
133
134 /**
135  * Look up capability at given config space address.
136  * @param device        The device to be accessed.
137  * @param address       Config space access address.
138  *
139  * @return Corresponding capability structure or NULL if none found.
140  *
141  * @private
142  */
143 static const struct jailhouse_pci_capability *
144 pci_find_capability(struct pci_device *device, u16 address)
145 {
146         const struct jailhouse_pci_capability *cap =
147                 jailhouse_cell_pci_caps(device->cell->config) +
148                 device->info->caps_start;
149         u32 n;
150
151         for (n = 0; n < device->info->num_caps; n++, cap++)
152                 if (cap->start <= address && cap->start + cap->len > address)
153                         return cap;
154
155         return NULL;
156 }
157
158 /**
159  * Moderate config space read access.
160  * @param device        The device to be accessed. If NULL, access will be
161  *                      emulated, returning a value of -1.
162  * @param address       Config space address.
163  * @param size          Access size (1, 2 or 4 bytes).
164  * @param value         Pointer to buffer to receive the emulated value if
165  *                      PCI_ACCESS_DONE is returned.
166  *
167  * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
168  *
169  * @see pci_cfg_write_moderate
170  */
171 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
172                                       unsigned int size, u32 *value)
173 {
174         const struct jailhouse_pci_capability *cap;
175         unsigned int cap_offs;
176
177         if (!device) {
178                 *value = -1;
179                 return PCI_ACCESS_DONE;
180         }
181
182         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
183                 return pci_ivshmem_cfg_read(device, address, size, value);
184
185         if (address < PCI_CONFIG_HEADER_SIZE)
186                 return PCI_ACCESS_PERFORM;
187
188         cap = pci_find_capability(device, address);
189         if (!cap)
190                 return PCI_ACCESS_PERFORM;
191
192         cap_offs = address - cap->start;
193         if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
194             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
195                 *value = device->msi_registers.raw[cap_offs / 4] >>
196                         ((cap_offs % 4) * 8);
197                 return PCI_ACCESS_DONE;
198         }
199
200         return PCI_ACCESS_PERFORM;
201 }
202
203 static int pci_update_msix(struct pci_device *device,
204                            const struct jailhouse_pci_capability *cap)
205 {
206         unsigned int n;
207         int result;
208
209         for (n = 0; n < device->info->num_msix_vectors; n++) {
210                 result = arch_pci_update_msix_vector(device, n);
211                 if (result < 0)
212                         return result;
213         }
214         return 0;
215 }
216
217 /**
218  * Moderate config space write access.
219  * @param device        The device to be accessed. If NULL, access will be
220  *                      rejected.
221  * @param address       Config space address.
222  * @param size          Access size (1, 2 or 4 bytes).
223  * @param value         Value to be written.
224  *
225  * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
226  *
227  * @see pci_cfg_read_moderate
228  */
229 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
230                                        unsigned int size, u32 value)
231 {
232         const struct jailhouse_pci_capability *cap;
233         /* initialize list to work around wrong compiler warning */
234         const struct pci_cfg_access *list = NULL;
235         unsigned int bias_shift = (address % 4) * 8;
236         u32 mask = BYTE_MASK(size) << bias_shift;
237         unsigned int n, cap_offs, len = 0;
238
239         if (!device)
240                 return PCI_ACCESS_REJECT;
241
242         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
243                 return pci_ivshmem_cfg_write(device, address, size, value);
244
245         if (address < PCI_CONFIG_HEADER_SIZE) {
246                 if (device->info->type == JAILHOUSE_PCI_TYPE_DEVICE) {
247                         list = endpoint_write_access;
248                         len = ARRAY_SIZE(endpoint_write_access);
249                 } else if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE) {
250                         list = bridge_write_access;
251                         len = ARRAY_SIZE(bridge_write_access);
252                 }
253
254                 for (n = 0; n < len; n++) {
255                         if (list[n].reg_num == (address & 0xffc) &&
256                             (list[n].mask & mask) == mask)
257                                 return PCI_ACCESS_PERFORM;
258                 }
259
260                 // HACK to allow PCI bus rescanning in root-cell
261                 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE &&
262                     device->cell == &root_cell)
263                         return PCI_ACCESS_DONE;
264
265                 return PCI_ACCESS_REJECT;
266         }
267
268         cap = pci_find_capability(device, address);
269         if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
270                 return PCI_ACCESS_REJECT;
271
272         value <<= bias_shift;
273
274         cap_offs = address - cap->start;
275         if (cap->id == PCI_CAP_MSI &&
276             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
277                 device->msi_registers.raw[cap_offs / 4] &= ~mask;
278                 device->msi_registers.raw[cap_offs / 4] |= value;
279
280                 if (arch_pci_update_msi(device, cap) < 0)
281                         return PCI_ACCESS_REJECT;
282
283                 /*
284                  * Address and data words are emulated, the control word is
285                  * written as-is.
286                  */
287                 if (cap_offs >= 4)
288                         return PCI_ACCESS_DONE;
289         } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
290                 device->msix_registers.raw &= ~mask;
291                 device->msix_registers.raw |= value;
292
293                 if (pci_update_msix(device, cap) < 0)
294                         return PCI_ACCESS_REJECT;
295         }
296
297         return PCI_ACCESS_PERFORM;
298 }
299
300 /**
301  * Initialization of PCI subsystem.
302  *
303  * @return 0 on success, negative error code otherwise.
304  */
305 int pci_init(void)
306 {
307         unsigned int mmcfg_size;
308         int err;
309
310         err = pci_cell_init(&root_cell);
311         if (err)
312                 return err;
313
314         mmcfg_start = system_config->platform_info.x86.mmconfig_base;
315         if (mmcfg_start == 0)
316                 return 0;
317
318         end_bus = system_config->platform_info.x86.mmconfig_end_bus;
319         mmcfg_size = (end_bus + 1) * 256 * 4096;
320         mmcfg_end = mmcfg_start + mmcfg_size - 4;
321
322         pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
323         if (!pci_space)
324                 return trace_error(-ENOMEM);
325
326         return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
327                              (unsigned long)pci_space,
328                              PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
329                              PAGING_NON_COHERENT);
330 }
331
332 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
333                                    u64 addr, u32 *value)
334 {
335         unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
336         struct pci_device *device = cell->msix_device_list;
337         unsigned int index;
338         u64 offs;
339
340         while (device) {
341                 if (addr >= device->info->msix_address &&
342                     addr < device->info->msix_address +
343                            device->info->msix_region_size)
344                         goto found;
345                 device = device->next_msix_device;
346         }
347         return 0;
348
349 found:
350         /* access must be DWORD-aligned */
351         if (addr & 0x3)
352                 goto invalid_access;
353
354         offs = addr - device->info->msix_address;
355         index = offs / sizeof(union pci_msix_vector);
356
357         if (is_write) {
358                 /*
359                  * The PBA may share a page with the MSI-X table. Writing to
360                  * PBA entries is undefined. We declare it as invalid.
361                  */
362                 if (index >= device->info->num_msix_vectors)
363                         goto invalid_access;
364
365                 device->msix_vectors[index].raw[dword] = *value;
366                 if (arch_pci_update_msix_vector(device, index) < 0)
367                         goto invalid_access;
368
369                 if (dword == MSIX_VECTOR_CTRL_DWORD)
370                         mmio_write32(&device->msix_table[index].raw[dword],
371                                      *value);
372         } else {
373                 if (index >= device->info->num_msix_vectors ||
374                     dword == MSIX_VECTOR_CTRL_DWORD)
375                         *value =
376                             mmio_read32(((void *)device->msix_table) + offs);
377                 else
378                         *value = device->msix_vectors[index].raw[dword];
379         }
380         return 1;
381
382 invalid_access:
383         panic_printk("FATAL: Invalid PCI MSIX BAR write, device "
384                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
385         return -1;
386 }
387
388 /**
389  * Handler for MMIO-accesses to PCI config space.
390  * @param cell          Request issuing cell.
391  * @param is_write      True if write access.
392  * @param addr          Address accessed.
393  * @param value         Pointer to value for reading/writing.
394  *
395  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
396  */
397 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
398                             u64 addr, u32 *value)
399 {
400         u32 mmcfg_offset, reg_addr;
401         struct pci_device *device;
402         enum pci_access access;
403         int ret;
404
405         if (!pci_space || addr < mmcfg_start || addr > mmcfg_end) {
406                 ret = pci_msix_access_handler(cell, is_write, addr, value);
407                 if (ret == 0)
408                         ret = ivshmem_mmio_access_handler(cell, is_write, addr,
409                                                           value);
410                 return ret;
411         }
412
413         mmcfg_offset = addr - mmcfg_start;
414         reg_addr = mmcfg_offset & 0xfff;
415         /* access must be DWORD-aligned */
416         if (reg_addr & 0x3)
417                 goto invalid_access;
418
419         device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
420
421         if (is_write) {
422                 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
423                 if (access == PCI_ACCESS_REJECT)
424                         goto invalid_access;
425                 if (access == PCI_ACCESS_PERFORM)
426                         mmio_write32(pci_space + mmcfg_offset, *value);
427         } else {
428                 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
429                 if (access == PCI_ACCESS_PERFORM)
430                         *value = mmio_read32(pci_space + mmcfg_offset);
431         }
432
433         return 1;
434
435 invalid_access:
436         panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
437                      "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
438         return -1;
439
440 }
441
442 /**
443  * Retrieve number of enabled MSI vector of a device.
444  * @param device        The device to be examined.
445  *
446  * @return number of vectors.
447  */
448 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
449 {
450         return device->msi_registers.msg32.enable ?
451                 1 << device->msi_registers.msg32.mme : 0;
452 }
453
454 static void pci_save_msi(struct pci_device *device,
455                          const struct jailhouse_pci_capability *cap)
456 {
457         u16 bdf = device->info->bdf;
458         unsigned int n;
459
460         for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
461                 device->msi_registers.raw[n] =
462                         pci_read_config(bdf, cap->start + n * 4, 4);
463 }
464
465 static void pci_restore_msi(struct pci_device *device,
466                             const struct jailhouse_pci_capability *cap)
467 {
468         unsigned int n;
469
470         for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
471                 pci_write_config(device->info->bdf, cap->start + n * 4,
472                                  device->msi_registers.raw[n], 4);
473 }
474
475 static void pci_suppress_msix(struct pci_device *device,
476                               const struct jailhouse_pci_capability *cap,
477                               bool suppressed)
478 {
479         union pci_msix_registers regs = device->msix_registers;
480
481         if (suppressed)
482                 regs.fmask = 1;
483         pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
484 }
485
486 static void pci_save_msix(struct pci_device *device,
487                           const struct jailhouse_pci_capability *cap)
488 {
489         unsigned int n, r;
490
491         device->msix_registers.raw =
492                 pci_read_config(device->info->bdf, cap->start, 4);
493
494         for (n = 0; n < device->info->num_msix_vectors; n++)
495                 for (r = 0; r < 4; r++)
496                         device->msix_vectors[n].raw[r] =
497                                 mmio_read32(&device->msix_table[n].raw[r]);
498 }
499
500 static void pci_restore_msix(struct pci_device *device,
501                              const struct jailhouse_pci_capability *cap)
502 {
503         unsigned int n, r;
504
505         for (n = 0; n < device->info->num_msix_vectors; n++)
506                 /* only restore address/data, control is write-through */
507                 for (r = 0; r < 3; r++)
508                         mmio_write32(&device->msix_table[n].raw[r],
509                                      device->msix_vectors[n].raw[r]);
510         pci_suppress_msix(device, cap, false);
511 }
512
513 /**
514  * Prepare the handover of PCI devices to Jailhouse or back to Linux.
515  */
516 void pci_prepare_handover(void)
517 {
518         const struct jailhouse_pci_capability *cap;
519         struct pci_device *device;
520         unsigned int n;
521
522         if (!root_cell.pci_devices)
523                 return;
524
525         for_each_configured_pci_device(device, &root_cell) {
526                 if (device->cell)
527                         for_each_pci_cap(cap, device, n)
528                                 if (cap->id == PCI_CAP_MSI)
529                                         arch_pci_suppress_msi(device, cap);
530                                 else if (cap->id == PCI_CAP_MSIX)
531                                         pci_suppress_msix(device, cap, true);
532         }
533 }
534
535 static int pci_add_virtual_device(struct cell *cell, struct pci_device *device)
536 {
537         device->cell = cell;
538         device->next_virtual_device = cell->virtual_device_list;
539         cell->virtual_device_list = device;
540         return arch_pci_add_device(cell, device);
541 }
542
543 static int pci_add_device(struct cell *cell, struct pci_device *device)
544 {
545         unsigned int size = device->info->msix_region_size;
546         int err;
547
548         printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
549                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
550
551         err = arch_pci_add_device(cell, device);
552
553         if (!err && device->info->msix_address) {
554                 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
555                 if (!device->msix_table) {
556                         err = trace_error(-ENOMEM);
557                         goto error_remove_dev;
558                 }
559
560                 err = paging_create(&hv_paging_structs,
561                                     device->info->msix_address, size,
562                                     (unsigned long)device->msix_table,
563                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
564                                     PAGING_NON_COHERENT);
565                 if (err)
566                         goto error_page_free;
567
568                 device->next_msix_device = cell->msix_device_list;
569                 cell->msix_device_list = device;
570         }
571         return 0;
572
573 error_page_free:
574         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
575 error_remove_dev:
576         arch_pci_remove_device(device);
577         return err;
578 }
579
580 static void pci_remove_virtual_device(struct pci_device *device)
581 {
582         struct pci_device *prev = device->cell->virtual_device_list;
583
584         arch_pci_remove_device(device);
585         if (prev == device) {
586                 device->cell->virtual_device_list = device->next_virtual_device;
587         } else {
588                 while (prev->next_virtual_device != device)
589                         prev = prev->next_virtual_device;
590                 prev->next_virtual_device = device->next_virtual_device;
591         }
592 }
593
594 static void pci_remove_device(struct pci_device *device)
595 {
596         unsigned int size = device->info->msix_region_size;
597         struct pci_device *prev_msix_device;
598
599         printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
600                PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
601         arch_pci_remove_device(device);
602         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
603                          PCI_CMD_INTX_OFF, 2);
604
605         if (!device->msix_table)
606                 return;
607
608         /* cannot fail, destruction of same size as construction */
609         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
610                        size, PAGING_NON_COHERENT);
611         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
612
613         prev_msix_device = device->cell->msix_device_list;
614         if (prev_msix_device == device) {
615                 device->cell->msix_device_list = device->next_msix_device;
616         } else {
617                 while (prev_msix_device->next_msix_device != device)
618                         prev_msix_device = prev_msix_device->next_msix_device;
619                 prev_msix_device->next_msix_device = device->next_msix_device;
620         }
621 }
622
623 /**
624  * Perform PCI-specific initialization for a new cell.
625  * @param cell  Cell to be initialized.
626  *
627  * @return 0 on success, negative error code otherwise.
628  *
629  * @see pci_cell_exit
630  */
631 int pci_cell_init(struct cell *cell)
632 {
633         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
634                                            sizeof(struct pci_device));
635         const struct jailhouse_pci_device *dev_infos =
636                 jailhouse_cell_pci_devices(cell->config);
637         const struct jailhouse_pci_capability *cap;
638         struct pci_device *device, *root_device;
639         unsigned int ndev, ncap;
640         int err;
641
642         cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
643         if (!cell->pci_devices)
644                 return -ENOMEM;
645
646         /*
647          * We order device states in the same way as the static information
648          * so that we can use the index of the latter to find the former. For
649          * the other way around and for obtaining the owner cell, we use more
650          * handy pointers. The cell pointer also encodes active ownership.
651          */
652         for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
653                 if (dev_infos[ndev].num_msix_vectors > PCI_MAX_MSIX_VECTORS) {
654                         err = trace_error(-ERANGE);
655                         goto error;
656                 }
657
658                 device = &cell->pci_devices[ndev];
659                 device->info = &dev_infos[ndev];
660
661                 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
662                         err = pci_ivshmem_init(cell, device);
663                         if (err)
664                                 goto error;
665                         err = pci_add_virtual_device(cell, device);
666                         if (err)
667                                 goto error;
668                         continue;
669                 }
670
671                 root_device = pci_get_assigned_device(&root_cell,
672                                                       dev_infos[ndev].bdf);
673                 if (root_device) {
674                         pci_remove_device(root_device);
675                         root_device->cell = NULL;
676                 }
677
678                 err = pci_add_device(cell, device);
679                 if (err)
680                         goto error;
681
682                 device->cell = cell;
683
684                 for_each_pci_cap(cap, device, ncap)
685                         if (cap->id == PCI_CAP_MSI)
686                                 pci_save_msi(device, cap);
687                         else if (cap->id == PCI_CAP_MSIX)
688                                 pci_save_msix(device, cap);
689         }
690
691         if (cell == &root_cell)
692                 pci_prepare_handover();
693
694         return 0;
695 error:
696         pci_cell_exit(cell);
697         return err;
698 }
699
700 static void pci_return_device_to_root_cell(struct pci_device *device)
701 {
702         struct pci_device *root_device;
703
704         for_each_configured_pci_device(root_device, &root_cell)
705                 if (root_device->info->domain == device->info->domain &&
706                     root_device->info->bdf == device->info->bdf) {
707                         if (pci_add_device(&root_cell, root_device) < 0)
708                                 printk("WARNING: Failed to re-assign PCI "
709                                        "device to root cell\n");
710                         else
711                                 root_device->cell = &root_cell;
712                         break;
713                 }
714 }
715
716 /**
717  * Perform PCI-specific cleanup for a cell under destruction.
718  * @param cell  Cell to be destructed.
719  *
720  * @see pci_cell_init
721  */
722 void pci_cell_exit(struct cell *cell)
723 {
724         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
725                                            sizeof(struct pci_device));
726         struct pci_device *device;
727
728         /*
729          * Do not destroy the root cell. We will shut down the complete
730          * hypervisor instead.
731          */
732         if (cell == &root_cell)
733                 return;
734
735         for_each_configured_pci_device(device, cell)
736                 if (device->cell) {
737                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
738                                 pci_ivshmem_exit(device);
739                                 pci_remove_virtual_device(device);
740                         } else {
741                                 pci_remove_device(device);
742                                 pci_return_device_to_root_cell(device);
743                         }
744                 }
745
746         page_free(&mem_pool, cell->pci_devices, devlist_pages);
747 }
748
749 /**
750  * Apply PCI-specific configuration changes.
751  * @param cell_added_removed    Cell that was added or removed to/from the
752  *                              system or NULL.
753  *
754  * @see arch_config_commit
755  */
756 void pci_config_commit(struct cell *cell_added_removed)
757 {
758         const struct jailhouse_pci_capability *cap;
759         struct pci_device *device;
760         unsigned int n;
761         int err = 0;
762
763         if (!cell_added_removed)
764                 return;
765
766         for_each_configured_pci_device(device, &root_cell)
767                 if (device->cell) {
768                         for_each_pci_cap(cap, device, n) {
769                                 if (cap->id == PCI_CAP_MSI) {
770                                         err = arch_pci_update_msi(device, cap);
771                                 } else if (cap->id == PCI_CAP_MSIX) {
772                                         err = pci_update_msix(device, cap);
773                                         pci_suppress_msix(device, cap, false);
774                                 }
775                                 if (err)
776                                         goto error;
777                         }
778                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
779                                 err = pci_ivshmem_update_msix(device);
780                                 if (err) {
781                                         cap = NULL;
782                                         goto error;
783                                 }
784                         }
785                 }
786         return;
787
788 error:
789         panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
790                      PCI_BDF_PARAMS(device->info->bdf));
791         if (cap)
792                 panic_printk(", cap %d\n", cap->id);
793         else
794                 panic_printk("\n");
795         panic_stop();
796 }
797
798 /**
799  * Shut down the PCI layer during hypervisor deactivation.
800  */
801 void pci_shutdown(void)
802 {
803         const struct jailhouse_pci_capability *cap;
804         struct pci_device *device;
805         unsigned int n;
806
807         if (!root_cell.pci_devices)
808                 return;
809
810         for_each_configured_pci_device(device, &root_cell) {
811                 if (!device->cell)
812                         continue;
813
814                 for_each_pci_cap(cap, device, n)
815                         if (cap->id == PCI_CAP_MSI)
816                                 pci_restore_msi(device, cap);
817                         else if (cap->id == PCI_CAP_MSIX)
818                                 pci_restore_msix(device, cap);
819
820                 if (device->cell != &root_cell)
821                         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
822                                          PCI_CMD_INTX_OFF, 2);
823         }
824 }