]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci.c
core: pci: Rework config space header write moderation
[jailhouse.git] / hypervisor / pci.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014
5  *
6  * Authors:
7  *  Ivan Kolchin <ivan.kolchin@siemens.com>
8  *  Jan Kiszka <jan.kiszka@siemens.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  */
13
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
19
20 #define MSIX_VECTOR_CTRL_DWORD          3
21
22 #define for_each_configured_pci_device(dev, cell)                       \
23         for ((dev) = (cell)->pci_devices;                               \
24              (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
25              (dev)++)
26
27 #define for_each_pci_cap(cap, dev, counter)                             \
28         for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
29                 (dev)->info->caps_start, (counter) = 0;                 \
30              (counter) < (dev)->info->num_caps;                         \
31              (cap)++, (counter)++)
32
33 /* entry for PCI config space access control */
34 struct pci_cfg_control {
35         enum {
36                 PCI_CONFIG_DENY,
37                 PCI_CONFIG_ALLOW,
38                 PCI_CONFIG_RDONLY,
39         } type;   /* Access type */
40         u32 mask; /* Bit set: access type applies; bit cleared: deny access */
41 };
42
43 /* --- Access control for writing to PCI config space registers --- */
44 /* Type 1: Endpoints */
45 static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
46         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
47         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
48         [0x3c/4] = {PCI_CONFIG_ALLOW,  0x000000ff}, /* Int Line */
49 };
50
51 /* Type 2: Bridges
52  * Note: Ignore limit/base reprogramming attempts because the root cell will
53  *       perform them on bus rescans. */
54 static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
55         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
56         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
57         [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
58         [0x20/4 ...      /* Memory Limit/Base, Prefetch Memory Limit/Base, */
59          0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
60         [0x3c/4] = {PCI_CONFIG_ALLOW,  0xffff00ff}, /* Int Line, Bridge Ctrl */
61 };
62
63 static void *pci_space;
64 static u64 mmcfg_start, mmcfg_end;
65 static u8 end_bus;
66
67 static void *pci_get_device_mmcfg_base(u16 bdf)
68 {
69         return pci_space + ((unsigned long)bdf << 12);
70 }
71
72 /**
73  * Read from PCI config space.
74  * @param bdf           16-bit bus/device/function ID of target.
75  * @param address       Config space access address.
76  * @param size          Access size (1, 2 or 4 bytes).
77  *
78  * @return Read value.
79  *
80  * @see pci_write_config
81  */
82 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
83 {
84         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
85
86         if (!pci_space || PCI_BUS(bdf) > end_bus)
87                 return arch_pci_read_config(bdf, address, size);
88
89         if (size == 1)
90                 return mmio_read8(mmcfg_addr);
91         else if (size == 2)
92                 return mmio_read16(mmcfg_addr);
93         else
94                 return mmio_read32(mmcfg_addr);
95 }
96
97 /**
98  * Write to PCI config space.
99  * @param bdf           16-bit bus/device/function ID of target.
100  * @param address       Config space access address.
101  * @param value         Value to be written.
102  * @param size          Access size (1, 2 or 4 bytes).
103  *
104  * @see pci_read_config
105  */
106 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
107 {
108         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
109
110         if (!pci_space || PCI_BUS(bdf) > end_bus)
111                 return arch_pci_write_config(bdf, address, value, size);
112
113         if (size == 1)
114                 mmio_write8(mmcfg_addr, value);
115         else if (size == 2)
116                 mmio_write16(mmcfg_addr, value);
117         else
118                 mmio_write32(mmcfg_addr, value);
119 }
120
121 /**
122  * Look up device owned by a cell.
123  * @param[in] cell      Owning cell.
124  * @param bdf           16-bit bus/device/function ID.
125  *
126  * @return Pointer to owned PCI device or NULL.
127  */
128 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
129 {
130         const struct jailhouse_pci_device *dev_info =
131                 jailhouse_cell_pci_devices(cell->config);
132         u32 n;
133
134         /* We iterate over the static device information to increase cache
135          * locality. */
136         for (n = 0; n < cell->config->num_pci_devices; n++)
137                 if (dev_info[n].bdf == bdf)
138                         return cell->pci_devices[n].cell ?
139                                 &cell->pci_devices[n] : NULL;
140
141         return NULL;
142 }
143
144 /**
145  * Look up capability at given config space address.
146  * @param device        The device to be accessed.
147  * @param address       Config space access address.
148  *
149  * @return Corresponding capability structure or NULL if none found.
150  *
151  * @private
152  */
153 static const struct jailhouse_pci_capability *
154 pci_find_capability(struct pci_device *device, u16 address)
155 {
156         const struct jailhouse_pci_capability *cap =
157                 jailhouse_cell_pci_caps(device->cell->config) +
158                 device->info->caps_start;
159         u32 n;
160
161         for (n = 0; n < device->info->num_caps; n++, cap++)
162                 if (cap->start <= address && cap->start + cap->len > address)
163                         return cap;
164
165         return NULL;
166 }
167
168 /**
169  * Moderate config space read access.
170  * @param device        The device to be accessed. If NULL, access will be
171  *                      emulated, returning a value of -1.
172  * @param address       Config space address.
173  * @param size          Access size (1, 2 or 4 bytes).
174  * @param value         Pointer to buffer to receive the emulated value if
175  *                      PCI_ACCESS_DONE is returned.
176  *
177  * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
178  *
179  * @see pci_cfg_write_moderate
180  */
181 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
182                                       unsigned int size, u32 *value)
183 {
184         const struct jailhouse_pci_capability *cap;
185         unsigned int cap_offs;
186
187         if (!device) {
188                 *value = -1;
189                 return PCI_ACCESS_DONE;
190         }
191
192         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
193                 return pci_ivshmem_cfg_read(device, address, size, value);
194
195         if (address < PCI_CONFIG_HEADER_SIZE)
196                 return PCI_ACCESS_PERFORM;
197
198         cap = pci_find_capability(device, address);
199         if (!cap)
200                 return PCI_ACCESS_PERFORM;
201
202         cap_offs = address - cap->start;
203         if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
204             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
205                 *value = device->msi_registers.raw[cap_offs / 4] >>
206                         ((cap_offs % 4) * 8);
207                 return PCI_ACCESS_DONE;
208         }
209
210         return PCI_ACCESS_PERFORM;
211 }
212
213 static int pci_update_msix(struct pci_device *device,
214                            const struct jailhouse_pci_capability *cap)
215 {
216         unsigned int n;
217         int result;
218
219         for (n = 0; n < device->info->num_msix_vectors; n++) {
220                 result = arch_pci_update_msix_vector(device, n);
221                 if (result < 0)
222                         return result;
223         }
224         return 0;
225 }
226
227 /**
228  * Moderate config space write access.
229  * @param device        The device to be accessed. If NULL, access will be
230  *                      rejected.
231  * @param address       Config space address.
232  * @param size          Access size (1, 2 or 4 bytes).
233  * @param value         Value to be written.
234  *
235  * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
236  *
237  * @see pci_cfg_read_moderate
238  */
239 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
240                                        unsigned int size, u32 value)
241 {
242         const struct jailhouse_pci_capability *cap;
243         /* initialize list to work around wrong compiler warning */
244         unsigned int bias_shift = (address % 4) * 8;
245         u32 mask = BYTE_MASK(size) << bias_shift;
246         struct pci_cfg_control cfg_control;
247         unsigned int cap_offs;
248
249         if (!device)
250                 return PCI_ACCESS_REJECT;
251
252         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
253                 return pci_ivshmem_cfg_write(device, address, size, value);
254
255         if (address < PCI_CONFIG_HEADER_SIZE) {
256                 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
257                         cfg_control = bridge_write[address / 4];
258                 else /* physical device */
259                         cfg_control = endpoint_write[address / 4];
260
261                 if ((cfg_control.mask & mask) != mask)
262                         return PCI_ACCESS_REJECT;
263
264                 switch (cfg_control.type) {
265                 case PCI_CONFIG_ALLOW:
266                         return PCI_ACCESS_PERFORM;
267                 case PCI_CONFIG_RDONLY:
268                         return PCI_ACCESS_DONE;
269                 default:
270                         return PCI_ACCESS_REJECT;
271                 }
272         }
273
274         cap = pci_find_capability(device, address);
275         if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
276                 return PCI_ACCESS_REJECT;
277
278         value <<= bias_shift;
279
280         cap_offs = address - cap->start;
281         if (cap->id == PCI_CAP_MSI &&
282             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
283                 device->msi_registers.raw[cap_offs / 4] &= ~mask;
284                 device->msi_registers.raw[cap_offs / 4] |= value;
285
286                 if (arch_pci_update_msi(device, cap) < 0)
287                         return PCI_ACCESS_REJECT;
288
289                 /*
290                  * Address and data words are emulated, the control word is
291                  * written as-is.
292                  */
293                 if (cap_offs >= 4)
294                         return PCI_ACCESS_DONE;
295         } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
296                 device->msix_registers.raw &= ~mask;
297                 device->msix_registers.raw |= value;
298
299                 if (pci_update_msix(device, cap) < 0)
300                         return PCI_ACCESS_REJECT;
301         }
302
303         return PCI_ACCESS_PERFORM;
304 }
305
306 /**
307  * Initialization of PCI subsystem.
308  *
309  * @return 0 on success, negative error code otherwise.
310  */
311 int pci_init(void)
312 {
313         unsigned int mmcfg_size;
314         int err;
315
316         err = pci_cell_init(&root_cell);
317         if (err)
318                 return err;
319
320         mmcfg_start = system_config->platform_info.x86.mmconfig_base;
321         if (mmcfg_start == 0)
322                 return 0;
323
324         end_bus = system_config->platform_info.x86.mmconfig_end_bus;
325         mmcfg_size = (end_bus + 1) * 256 * 4096;
326         mmcfg_end = mmcfg_start + mmcfg_size - 4;
327
328         pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
329         if (!pci_space)
330                 return trace_error(-ENOMEM);
331
332         return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
333                              (unsigned long)pci_space,
334                              PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
335                              PAGING_NON_COHERENT);
336 }
337
338 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
339                                    u64 addr, u32 *value)
340 {
341         unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
342         struct pci_device *device = cell->msix_device_list;
343         unsigned int index;
344         u64 offs;
345
346         while (device) {
347                 if (addr >= device->info->msix_address &&
348                     addr < device->info->msix_address +
349                            device->info->msix_region_size)
350                         goto found;
351                 device = device->next_msix_device;
352         }
353         return 0;
354
355 found:
356         /* access must be DWORD-aligned */
357         if (addr & 0x3)
358                 goto invalid_access;
359
360         offs = addr - device->info->msix_address;
361         index = offs / sizeof(union pci_msix_vector);
362
363         if (is_write) {
364                 /*
365                  * The PBA may share a page with the MSI-X table. Writing to
366                  * PBA entries is undefined. We declare it as invalid.
367                  */
368                 if (index >= device->info->num_msix_vectors)
369                         goto invalid_access;
370
371                 device->msix_vectors[index].raw[dword] = *value;
372                 if (arch_pci_update_msix_vector(device, index) < 0)
373                         goto invalid_access;
374
375                 if (dword == MSIX_VECTOR_CTRL_DWORD)
376                         mmio_write32(&device->msix_table[index].raw[dword],
377                                      *value);
378         } else {
379                 if (index >= device->info->num_msix_vectors ||
380                     dword == MSIX_VECTOR_CTRL_DWORD)
381                         *value =
382                             mmio_read32(((void *)device->msix_table) + offs);
383                 else
384                         *value = device->msix_vectors[index].raw[dword];
385         }
386         return 1;
387
388 invalid_access:
389         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
390                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
391         return -1;
392 }
393
394 /**
395  * Handler for MMIO-accesses to PCI config space.
396  * @param cell          Request issuing cell.
397  * @param is_write      True if write access.
398  * @param addr          Address accessed.
399  * @param value         Pointer to value for reading/writing.
400  *
401  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
402  */
403 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
404                             u64 addr, u32 *value)
405 {
406         u32 mmcfg_offset, reg_addr;
407         struct pci_device *device;
408         enum pci_access access;
409         int ret;
410
411         if (!pci_space || addr < mmcfg_start || addr > mmcfg_end) {
412                 ret = pci_msix_access_handler(cell, is_write, addr, value);
413                 if (ret == 0)
414                         ret = ivshmem_mmio_access_handler(cell, is_write, addr,
415                                                           value);
416                 return ret;
417         }
418
419         mmcfg_offset = addr - mmcfg_start;
420         reg_addr = mmcfg_offset & 0xfff;
421         /* access must be DWORD-aligned */
422         if (reg_addr & 0x3)
423                 goto invalid_access;
424
425         device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
426
427         if (is_write) {
428                 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
429                 if (access == PCI_ACCESS_REJECT)
430                         goto invalid_access;
431                 if (access == PCI_ACCESS_PERFORM)
432                         mmio_write32(pci_space + mmcfg_offset, *value);
433         } else {
434                 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
435                 if (access == PCI_ACCESS_PERFORM)
436                         *value = mmio_read32(pci_space + mmcfg_offset);
437         }
438
439         return 1;
440
441 invalid_access:
442         panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
443                      "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
444         return -1;
445
446 }
447
448 /**
449  * Retrieve number of enabled MSI vector of a device.
450  * @param device        The device to be examined.
451  *
452  * @return number of vectors.
453  */
454 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
455 {
456         return device->msi_registers.msg32.enable ?
457                 1 << device->msi_registers.msg32.mme : 0;
458 }
459
460 static void pci_save_msi(struct pci_device *device,
461                          const struct jailhouse_pci_capability *cap)
462 {
463         u16 bdf = device->info->bdf;
464         unsigned int n;
465
466         for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
467                 device->msi_registers.raw[n] =
468                         pci_read_config(bdf, cap->start + n * 4, 4);
469 }
470
471 static void pci_restore_msi(struct pci_device *device,
472                             const struct jailhouse_pci_capability *cap)
473 {
474         unsigned int n;
475
476         for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
477                 pci_write_config(device->info->bdf, cap->start + n * 4,
478                                  device->msi_registers.raw[n], 4);
479 }
480
481 static void pci_suppress_msix(struct pci_device *device,
482                               const struct jailhouse_pci_capability *cap,
483                               bool suppressed)
484 {
485         union pci_msix_registers regs = device->msix_registers;
486
487         if (suppressed)
488                 regs.fmask = 1;
489         pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
490 }
491
492 static void pci_save_msix(struct pci_device *device,
493                           const struct jailhouse_pci_capability *cap)
494 {
495         unsigned int n, r;
496
497         device->msix_registers.raw =
498                 pci_read_config(device->info->bdf, cap->start, 4);
499
500         for (n = 0; n < device->info->num_msix_vectors; n++)
501                 for (r = 0; r < 4; r++)
502                         device->msix_vectors[n].raw[r] =
503                                 mmio_read32(&device->msix_table[n].raw[r]);
504 }
505
506 static void pci_restore_msix(struct pci_device *device,
507                              const struct jailhouse_pci_capability *cap)
508 {
509         unsigned int n, r;
510
511         for (n = 0; n < device->info->num_msix_vectors; n++)
512                 /* only restore address/data, control is write-through */
513                 for (r = 0; r < 3; r++)
514                         mmio_write32(&device->msix_table[n].raw[r],
515                                      device->msix_vectors[n].raw[r]);
516         pci_suppress_msix(device, cap, false);
517 }
518
519 /**
520  * Prepare the handover of PCI devices to Jailhouse or back to Linux.
521  */
522 void pci_prepare_handover(void)
523 {
524         const struct jailhouse_pci_capability *cap;
525         struct pci_device *device;
526         unsigned int n;
527
528         if (!root_cell.pci_devices)
529                 return;
530
531         for_each_configured_pci_device(device, &root_cell) {
532                 if (device->cell)
533                         for_each_pci_cap(cap, device, n)
534                                 if (cap->id == PCI_CAP_MSI)
535                                         arch_pci_suppress_msi(device, cap);
536                                 else if (cap->id == PCI_CAP_MSIX)
537                                         pci_suppress_msix(device, cap, true);
538         }
539 }
540
541 static int pci_add_virtual_device(struct cell *cell, struct pci_device *device)
542 {
543         device->cell = cell;
544         device->next_virtual_device = cell->virtual_device_list;
545         cell->virtual_device_list = device;
546         return 0;
547 }
548
549 static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
550 {
551         unsigned int size = device->info->msix_region_size;
552         int err;
553
554         printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
555                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
556
557         err = arch_pci_add_physical_device(cell, device);
558
559         if (!err && device->info->msix_address) {
560                 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
561                 if (!device->msix_table) {
562                         err = trace_error(-ENOMEM);
563                         goto error_remove_dev;
564                 }
565
566                 err = paging_create(&hv_paging_structs,
567                                     device->info->msix_address, size,
568                                     (unsigned long)device->msix_table,
569                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
570                                     PAGING_NON_COHERENT);
571                 if (err)
572                         goto error_page_free;
573
574                 device->next_msix_device = cell->msix_device_list;
575                 cell->msix_device_list = device;
576         }
577         return err;
578
579 error_page_free:
580         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
581 error_remove_dev:
582         arch_pci_remove_physical_device(device);
583         return err;
584 }
585
586 static void pci_remove_virtual_device(struct pci_device *device)
587 {
588         struct pci_device *prev = device->cell->virtual_device_list;
589
590         if (prev == device) {
591                 device->cell->virtual_device_list = device->next_virtual_device;
592         } else {
593                 while (prev->next_virtual_device != device)
594                         prev = prev->next_virtual_device;
595                 prev->next_virtual_device = device->next_virtual_device;
596         }
597 }
598
599 static void pci_remove_physical_device(struct pci_device *device)
600 {
601         unsigned int size = device->info->msix_region_size;
602         struct pci_device *prev_msix_device;
603
604         printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
605                PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
606         arch_pci_remove_physical_device(device);
607         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
608                          PCI_CMD_INTX_OFF, 2);
609
610         if (!device->msix_table)
611                 return;
612
613         /* cannot fail, destruction of same size as construction */
614         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
615                        size, PAGING_NON_COHERENT);
616         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
617
618         prev_msix_device = device->cell->msix_device_list;
619         if (prev_msix_device == device) {
620                 device->cell->msix_device_list = device->next_msix_device;
621         } else {
622                 while (prev_msix_device->next_msix_device != device)
623                         prev_msix_device = prev_msix_device->next_msix_device;
624                 prev_msix_device->next_msix_device = device->next_msix_device;
625         }
626 }
627
628 /**
629  * Perform PCI-specific initialization for a new cell.
630  * @param cell  Cell to be initialized.
631  *
632  * @return 0 on success, negative error code otherwise.
633  *
634  * @see pci_cell_exit
635  */
636 int pci_cell_init(struct cell *cell)
637 {
638         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
639                                            sizeof(struct pci_device));
640         const struct jailhouse_pci_device *dev_infos =
641                 jailhouse_cell_pci_devices(cell->config);
642         const struct jailhouse_pci_capability *cap;
643         struct pci_device *device, *root_device;
644         unsigned int ndev, ncap;
645         int err;
646
647         cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
648         if (!cell->pci_devices)
649                 return -ENOMEM;
650
651         /*
652          * We order device states in the same way as the static information
653          * so that we can use the index of the latter to find the former. For
654          * the other way around and for obtaining the owner cell, we use more
655          * handy pointers. The cell pointer also encodes active ownership.
656          */
657         for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
658                 if (dev_infos[ndev].num_msix_vectors > PCI_MAX_MSIX_VECTORS) {
659                         err = trace_error(-ERANGE);
660                         goto error;
661                 }
662
663                 device = &cell->pci_devices[ndev];
664                 device->info = &dev_infos[ndev];
665
666                 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
667                         err = pci_ivshmem_init(cell, device);
668                         if (err)
669                                 goto error;
670                         err = pci_add_virtual_device(cell, device);
671                         if (err)
672                                 goto error;
673                         continue;
674                 }
675
676                 root_device = pci_get_assigned_device(&root_cell,
677                                                       dev_infos[ndev].bdf);
678                 if (root_device) {
679                         pci_remove_physical_device(root_device);
680                         root_device->cell = NULL;
681                 }
682
683                 err = pci_add_physical_device(cell, device);
684                 if (err)
685                         goto error;
686
687                 device->cell = cell;
688
689                 for_each_pci_cap(cap, device, ncap)
690                         if (cap->id == PCI_CAP_MSI)
691                                 pci_save_msi(device, cap);
692                         else if (cap->id == PCI_CAP_MSIX)
693                                 pci_save_msix(device, cap);
694         }
695
696         if (cell == &root_cell)
697                 pci_prepare_handover();
698
699         return 0;
700 error:
701         pci_cell_exit(cell);
702         return err;
703 }
704
705 static void pci_return_device_to_root_cell(struct pci_device *device)
706 {
707         struct pci_device *root_device;
708
709         for_each_configured_pci_device(root_device, &root_cell)
710                 if (root_device->info->domain == device->info->domain &&
711                     root_device->info->bdf == device->info->bdf) {
712                         if (pci_add_physical_device(&root_cell,
713                                                     root_device) < 0)
714                                 printk("WARNING: Failed to re-assign PCI "
715                                        "device to root cell\n");
716                         else
717                                 root_device->cell = &root_cell;
718                         break;
719                 }
720 }
721
722 /**
723  * Perform PCI-specific cleanup for a cell under destruction.
724  * @param cell  Cell to be destructed.
725  *
726  * @see pci_cell_init
727  */
728 void pci_cell_exit(struct cell *cell)
729 {
730         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
731                                            sizeof(struct pci_device));
732         struct pci_device *device;
733
734         /*
735          * Do not destroy the root cell. We will shut down the complete
736          * hypervisor instead.
737          */
738         if (cell == &root_cell)
739                 return;
740
741         for_each_configured_pci_device(device, cell)
742                 if (device->cell) {
743                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
744                                 pci_ivshmem_exit(device);
745                                 pci_remove_virtual_device(device);
746                         } else {
747                                 pci_remove_physical_device(device);
748                                 pci_return_device_to_root_cell(device);
749                         }
750                 }
751
752         page_free(&mem_pool, cell->pci_devices, devlist_pages);
753 }
754
755 /**
756  * Apply PCI-specific configuration changes.
757  * @param cell_added_removed    Cell that was added or removed to/from the
758  *                              system or NULL.
759  *
760  * @see arch_config_commit
761  */
762 void pci_config_commit(struct cell *cell_added_removed)
763 {
764         const struct jailhouse_pci_capability *cap;
765         struct pci_device *device;
766         unsigned int n;
767         int err = 0;
768
769         if (!cell_added_removed)
770                 return;
771
772         for_each_configured_pci_device(device, &root_cell)
773                 if (device->cell) {
774                         for_each_pci_cap(cap, device, n) {
775                                 if (cap->id == PCI_CAP_MSI) {
776                                         err = arch_pci_update_msi(device, cap);
777                                 } else if (cap->id == PCI_CAP_MSIX) {
778                                         err = pci_update_msix(device, cap);
779                                         pci_suppress_msix(device, cap, false);
780                                 }
781                                 if (err)
782                                         goto error;
783                         }
784                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
785                                 err = pci_ivshmem_update_msix(device);
786                                 if (err) {
787                                         cap = NULL;
788                                         goto error;
789                                 }
790                         }
791                 }
792         return;
793
794 error:
795         panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
796                      PCI_BDF_PARAMS(device->info->bdf));
797         if (cap)
798                 panic_printk(", cap %d\n", cap->id);
799         else
800                 panic_printk("\n");
801         panic_stop();
802 }
803
804 /**
805  * Shut down the PCI layer during hypervisor deactivation.
806  */
807 void pci_shutdown(void)
808 {
809         const struct jailhouse_pci_capability *cap;
810         struct pci_device *device;
811         unsigned int n;
812
813         if (!root_cell.pci_devices)
814                 return;
815
816         for_each_configured_pci_device(device, &root_cell) {
817                 if (!device->cell)
818                         continue;
819
820                 for_each_pci_cap(cap, device, n)
821                         if (cap->id == PCI_CAP_MSI)
822                                 pci_restore_msi(device, cap);
823                         else if (cap->id == PCI_CAP_MSIX)
824                                 pci_restore_msix(device, cap);
825
826                 if (device->cell != &root_cell)
827                         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
828                                          PCI_CMD_INTX_OFF, 2);
829         }
830 }