]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci.c
core: pci: Add support for devices with more than 16 MSI-X vectors
[jailhouse.git] / hypervisor / pci.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014, 2015
5  *
6  * Authors:
7  *  Ivan Kolchin <ivan.kolchin@siemens.com>
8  *  Jan Kiszka <jan.kiszka@siemens.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  */
13
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
19
20 #define MSIX_VECTOR_CTRL_DWORD          3
21
22 #define for_each_configured_pci_device(dev, cell)                       \
23         for ((dev) = (cell)->pci_devices;                               \
24              (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
25              (dev)++)
26
27 #define for_each_pci_cap(cap, dev, counter)                             \
28         for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
29                 (dev)->info->caps_start, (counter) = 0;                 \
30              (counter) < (dev)->info->num_caps;                         \
31              (cap)++, (counter)++)
32
33 /* entry for PCI config space access control */
34 struct pci_cfg_control {
35         enum {
36                 PCI_CONFIG_DENY,
37                 PCI_CONFIG_ALLOW,
38                 PCI_CONFIG_RDONLY,
39         } type;   /* Access type */
40         u32 mask; /* Bit set: access type applies; bit cleared: deny access */
41 };
42
43 /* --- Access control for writing to PCI config space registers --- */
44 /* Type 1: Endpoints */
45 static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
46         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
47         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
48         [0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* ROM BAR */
49         [0x3c/4] = {PCI_CONFIG_ALLOW,  0x000000ff}, /* Int Line */
50 };
51
52 /* Type 2: Bridges
53  * Note: Ignore limit/base reprogramming attempts because the root cell will
54  *       perform them on bus rescans. */
55 static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
56         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
57         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
58         [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
59         [0x20/4 ...      /* Memory Limit/Base, Prefetch Memory Limit/Base, */
60          0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
61         [0x3c/4] = {PCI_CONFIG_ALLOW,  0xffff00ff}, /* Int Line, Bridge Ctrl */
62 };
63
64 static void *pci_space;
65 static u64 mmcfg_start, mmcfg_end;
66 static u8 end_bus;
67
68 static void *pci_get_device_mmcfg_base(u16 bdf)
69 {
70         return pci_space + ((unsigned long)bdf << 12);
71 }
72
73 /**
74  * Read from PCI config space.
75  * @param bdf           16-bit bus/device/function ID of target.
76  * @param address       Config space access address.
77  * @param size          Access size (1, 2 or 4 bytes).
78  *
79  * @return Read value.
80  *
81  * @see pci_write_config
82  */
83 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
84 {
85         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
86
87         if (!pci_space || PCI_BUS(bdf) > end_bus)
88                 return arch_pci_read_config(bdf, address, size);
89
90         if (size == 1)
91                 return mmio_read8(mmcfg_addr);
92         else if (size == 2)
93                 return mmio_read16(mmcfg_addr);
94         else
95                 return mmio_read32(mmcfg_addr);
96 }
97
98 /**
99  * Write to PCI config space.
100  * @param bdf           16-bit bus/device/function ID of target.
101  * @param address       Config space access address.
102  * @param value         Value to be written.
103  * @param size          Access size (1, 2 or 4 bytes).
104  *
105  * @see pci_read_config
106  */
107 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
108 {
109         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
110
111         if (!pci_space || PCI_BUS(bdf) > end_bus)
112                 return arch_pci_write_config(bdf, address, value, size);
113
114         if (size == 1)
115                 mmio_write8(mmcfg_addr, value);
116         else if (size == 2)
117                 mmio_write16(mmcfg_addr, value);
118         else
119                 mmio_write32(mmcfg_addr, value);
120 }
121
122 /**
123  * Look up device owned by a cell.
124  * @param[in] cell      Owning cell.
125  * @param bdf           16-bit bus/device/function ID.
126  *
127  * @return Pointer to owned PCI device or NULL.
128  */
129 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
130 {
131         const struct jailhouse_pci_device *dev_info =
132                 jailhouse_cell_pci_devices(cell->config);
133         u32 n;
134
135         /* We iterate over the static device information to increase cache
136          * locality. */
137         for (n = 0; n < cell->config->num_pci_devices; n++)
138                 if (dev_info[n].bdf == bdf)
139                         return cell->pci_devices[n].cell ?
140                                 &cell->pci_devices[n] : NULL;
141
142         return NULL;
143 }
144
145 /**
146  * Look up capability at given config space address.
147  * @param device        The device to be accessed.
148  * @param address       Config space access address.
149  *
150  * @return Corresponding capability structure or NULL if none found.
151  *
152  * @private
153  */
154 static const struct jailhouse_pci_capability *
155 pci_find_capability(struct pci_device *device, u16 address)
156 {
157         const struct jailhouse_pci_capability *cap =
158                 jailhouse_cell_pci_caps(device->cell->config) +
159                 device->info->caps_start;
160         u32 n;
161
162         for (n = 0; n < device->info->num_caps; n++, cap++)
163                 if (cap->start <= address && cap->start + cap->len > address)
164                         return cap;
165
166         return NULL;
167 }
168
169 /**
170  * Moderate config space read access.
171  * @param device        The device to be accessed. If NULL, access will be
172  *                      emulated, returning a value of -1.
173  * @param address       Config space address.
174  * @param size          Access size (1, 2 or 4 bytes).
175  * @param value         Pointer to buffer to receive the emulated value if
176  *                      PCI_ACCESS_DONE is returned.
177  *
178  * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
179  *
180  * @see pci_cfg_write_moderate
181  */
182 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
183                                       unsigned int size, u32 *value)
184 {
185         const struct jailhouse_pci_capability *cap;
186         unsigned int bar_no, cap_offs;
187
188         if (!device) {
189                 *value = -1;
190                 return PCI_ACCESS_DONE;
191         }
192
193         /* Emulate BARs for physical and virtual devices */
194         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE) {
195                 /* Emulate BAR access, always returning the shadow value. */
196                 if (address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
197                         bar_no = (address - PCI_CFG_BAR) / 4;
198                         *value = device->bar[bar_no] >> ((address % 4) * 8);
199                         return PCI_ACCESS_DONE;
200                 }
201
202                 /* We do not expose ROMs. */
203                 if (address >= PCI_CFG_ROMBAR && address < PCI_CFG_CAPS) {
204                         *value = 0;
205                         return PCI_ACCESS_DONE;
206                 }
207         }
208
209         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
210                 return pci_ivshmem_cfg_read(device, address, value);
211
212         if (address < PCI_CONFIG_HEADER_SIZE)
213                 return PCI_ACCESS_PERFORM;
214
215         cap = pci_find_capability(device, address);
216         if (!cap)
217                 return PCI_ACCESS_PERFORM;
218
219         cap_offs = address - cap->start;
220         if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
221             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
222                 *value = device->msi_registers.raw[cap_offs / 4] >>
223                         ((cap_offs % 4) * 8);
224                 return PCI_ACCESS_DONE;
225         }
226
227         return PCI_ACCESS_PERFORM;
228 }
229
230 static int pci_update_msix(struct pci_device *device,
231                            const struct jailhouse_pci_capability *cap)
232 {
233         unsigned int n;
234         int result;
235
236         for (n = 0; n < device->info->num_msix_vectors; n++) {
237                 result = arch_pci_update_msix_vector(device, n);
238                 if (result < 0)
239                         return result;
240         }
241         return 0;
242 }
243
244 /**
245  * Moderate config space write access.
246  * @param device        The device to be accessed. If NULL, access will be
247  *                      rejected.
248  * @param address       Config space address.
249  * @param size          Access size (1, 2 or 4 bytes).
250  * @param value         Value to be written.
251  *
252  * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
253  *
254  * @see pci_cfg_read_moderate
255  */
256 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
257                                        unsigned int size, u32 value)
258 {
259         const struct jailhouse_pci_capability *cap;
260         /* initialize list to work around wrong compiler warning */
261         unsigned int bias_shift = (address % 4) * 8;
262         u32 mask = BYTE_MASK(size) << bias_shift;
263         struct pci_cfg_control cfg_control;
264         unsigned int bar_no, cap_offs;
265
266         if (!device)
267                 return PCI_ACCESS_REJECT;
268
269         value <<= bias_shift;
270
271         /* Emulate BARs for physical and virtual devices */
272         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE &&
273             address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
274                 bar_no = (address - PCI_CFG_BAR) / 4;
275                 mask &= device->info->bar_mask[bar_no];
276                 device->bar[bar_no] &= ~mask;
277                 device->bar[bar_no] |= value & mask;
278                 return PCI_ACCESS_DONE;
279         }
280
281         if (address < PCI_CONFIG_HEADER_SIZE) {
282                 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
283                         cfg_control = bridge_write[address / 4];
284                 else /* physical or virtual device */
285                         cfg_control = endpoint_write[address / 4];
286
287                 if ((cfg_control.mask & mask) != mask)
288                         return PCI_ACCESS_REJECT;
289
290                 switch (cfg_control.type) {
291                 case PCI_CONFIG_ALLOW:
292                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
293                                 return pci_ivshmem_cfg_write(device,
294                                                 address / 4, mask, value);
295                         return PCI_ACCESS_PERFORM;
296                 case PCI_CONFIG_RDONLY:
297                         return PCI_ACCESS_DONE;
298                 default:
299                         return PCI_ACCESS_REJECT;
300                 }
301         }
302
303         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
304                 return pci_ivshmem_cfg_write(device, address / 4, mask, value);
305
306         cap = pci_find_capability(device, address);
307         if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
308                 return PCI_ACCESS_REJECT;
309
310         cap_offs = address - cap->start;
311         if (cap->id == PCI_CAP_MSI &&
312             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
313                 device->msi_registers.raw[cap_offs / 4] &= ~mask;
314                 device->msi_registers.raw[cap_offs / 4] |= value;
315
316                 if (arch_pci_update_msi(device, cap) < 0)
317                         return PCI_ACCESS_REJECT;
318
319                 /*
320                  * Address and data words are emulated, the control word is
321                  * written as-is.
322                  */
323                 if (cap_offs >= 4)
324                         return PCI_ACCESS_DONE;
325         } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
326                 device->msix_registers.raw &= ~mask;
327                 device->msix_registers.raw |= value;
328
329                 if (pci_update_msix(device, cap) < 0)
330                         return PCI_ACCESS_REJECT;
331         }
332
333         return PCI_ACCESS_PERFORM;
334 }
335
336 /**
337  * Initialization of PCI subsystem.
338  *
339  * @return 0 on success, negative error code otherwise.
340  */
341 int pci_init(void)
342 {
343         unsigned int mmcfg_size;
344         int err;
345
346         err = pci_cell_init(&root_cell);
347         if (err)
348                 return err;
349
350         mmcfg_start = system_config->platform_info.x86.mmconfig_base;
351         if (mmcfg_start == 0)
352                 return 0;
353
354         end_bus = system_config->platform_info.x86.mmconfig_end_bus;
355         mmcfg_size = (end_bus + 1) * 256 * 4096;
356         mmcfg_end = mmcfg_start + mmcfg_size - 4;
357
358         pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
359         if (!pci_space)
360                 return trace_error(-ENOMEM);
361
362         return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
363                              (unsigned long)pci_space,
364                              PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
365                              PAGING_NON_COHERENT);
366 }
367
368 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
369                                    u64 addr, u32 *value)
370 {
371         unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
372         struct pci_device *device = cell->msix_device_list;
373         unsigned int index;
374         u64 offs;
375
376         while (device) {
377                 if (addr >= device->info->msix_address &&
378                     addr < device->info->msix_address +
379                            device->info->msix_region_size)
380                         goto found;
381                 device = device->next_msix_device;
382         }
383         return 0;
384
385 found:
386         /* access must be DWORD-aligned */
387         if (addr & 0x3)
388                 goto invalid_access;
389
390         offs = addr - device->info->msix_address;
391         index = offs / sizeof(union pci_msix_vector);
392
393         if (is_write) {
394                 /*
395                  * The PBA may share a page with the MSI-X table. Writing to
396                  * PBA entries is undefined. We declare it as invalid.
397                  */
398                 if (index >= device->info->num_msix_vectors)
399                         goto invalid_access;
400
401                 device->msix_vectors[index].raw[dword] = *value;
402                 if (arch_pci_update_msix_vector(device, index) < 0)
403                         goto invalid_access;
404
405                 if (dword == MSIX_VECTOR_CTRL_DWORD)
406                         mmio_write32(&device->msix_table[index].raw[dword],
407                                      *value);
408         } else {
409                 if (index >= device->info->num_msix_vectors ||
410                     dword == MSIX_VECTOR_CTRL_DWORD)
411                         *value =
412                             mmio_read32(((void *)device->msix_table) + offs);
413                 else
414                         *value = device->msix_vectors[index].raw[dword];
415         }
416         return 1;
417
418 invalid_access:
419         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
420                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
421         return -1;
422 }
423
424 /**
425  * Handler for MMIO-accesses to PCI config space.
426  * @param cell          Request issuing cell.
427  * @param is_write      True if write access.
428  * @param addr          Address accessed.
429  * @param value         Pointer to value for reading/writing.
430  *
431  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
432  */
433 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
434                             u64 addr, u32 *value)
435 {
436         u32 mmcfg_offset, reg_addr;
437         struct pci_device *device;
438         enum pci_access access;
439         int ret;
440
441         if (!pci_space || addr < mmcfg_start || addr > mmcfg_end) {
442                 ret = pci_msix_access_handler(cell, is_write, addr, value);
443                 if (ret == 0)
444                         ret = ivshmem_mmio_access_handler(cell, is_write, addr,
445                                                           value);
446                 return ret;
447         }
448
449         mmcfg_offset = addr - mmcfg_start;
450         reg_addr = mmcfg_offset & 0xfff;
451         /* access must be DWORD-aligned */
452         if (reg_addr & 0x3)
453                 goto invalid_access;
454
455         device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
456
457         if (is_write) {
458                 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
459                 if (access == PCI_ACCESS_REJECT)
460                         goto invalid_access;
461                 if (access == PCI_ACCESS_PERFORM)
462                         mmio_write32(pci_space + mmcfg_offset, *value);
463         } else {
464                 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
465                 if (access == PCI_ACCESS_PERFORM)
466                         *value = mmio_read32(pci_space + mmcfg_offset);
467         }
468
469         return 1;
470
471 invalid_access:
472         panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
473                      "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
474         return -1;
475
476 }
477
478 /**
479  * Retrieve number of enabled MSI vector of a device.
480  * @param device        The device to be examined.
481  *
482  * @return number of vectors.
483  */
484 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
485 {
486         return device->msi_registers.msg32.enable ?
487                 1 << device->msi_registers.msg32.mme : 0;
488 }
489
490 static void pci_save_msi(struct pci_device *device,
491                          const struct jailhouse_pci_capability *cap)
492 {
493         u16 bdf = device->info->bdf;
494         unsigned int n;
495
496         for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
497                 device->msi_registers.raw[n] =
498                         pci_read_config(bdf, cap->start + n * 4, 4);
499 }
500
501 static void pci_restore_msi(struct pci_device *device,
502                             const struct jailhouse_pci_capability *cap)
503 {
504         unsigned int n;
505
506         for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
507                 pci_write_config(device->info->bdf, cap->start + n * 4,
508                                  device->msi_registers.raw[n], 4);
509 }
510
511 static void pci_suppress_msix(struct pci_device *device,
512                               const struct jailhouse_pci_capability *cap,
513                               bool suppressed)
514 {
515         union pci_msix_registers regs = device->msix_registers;
516
517         if (suppressed)
518                 regs.fmask = 1;
519         pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
520 }
521
522 static void pci_save_msix(struct pci_device *device,
523                           const struct jailhouse_pci_capability *cap)
524 {
525         unsigned int n, r;
526
527         device->msix_registers.raw =
528                 pci_read_config(device->info->bdf, cap->start, 4);
529
530         for (n = 0; n < device->info->num_msix_vectors; n++)
531                 for (r = 0; r < 4; r++)
532                         device->msix_vectors[n].raw[r] =
533                                 mmio_read32(&device->msix_table[n].raw[r]);
534 }
535
536 static void pci_restore_msix(struct pci_device *device,
537                              const struct jailhouse_pci_capability *cap)
538 {
539         unsigned int n, r;
540
541         for (n = 0; n < device->info->num_msix_vectors; n++)
542                 /* only restore address/data, control is write-through */
543                 for (r = 0; r < 3; r++)
544                         mmio_write32(&device->msix_table[n].raw[r],
545                                      device->msix_vectors[n].raw[r]);
546         pci_suppress_msix(device, cap, false);
547 }
548
549 /**
550  * Prepare the handover of PCI devices to Jailhouse or back to Linux.
551  */
552 void pci_prepare_handover(void)
553 {
554         const struct jailhouse_pci_capability *cap;
555         struct pci_device *device;
556         unsigned int n;
557
558         if (!root_cell.pci_devices)
559                 return;
560
561         for_each_configured_pci_device(device, &root_cell) {
562                 if (device->cell)
563                         for_each_pci_cap(cap, device, n)
564                                 if (cap->id == PCI_CAP_MSI)
565                                         arch_pci_suppress_msi(device, cap);
566                                 else if (cap->id == PCI_CAP_MSIX)
567                                         pci_suppress_msix(device, cap, true);
568         }
569 }
570
571 static int pci_add_virtual_device(struct cell *cell, struct pci_device *device)
572 {
573         device->cell = cell;
574         device->next_virtual_device = cell->virtual_device_list;
575         cell->virtual_device_list = device;
576         return 0;
577 }
578
579 static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
580 {
581         unsigned int n, pages, size = device->info->msix_region_size;
582         int err;
583
584         printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
585                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
586
587         for (n = 0; n < PCI_NUM_BARS; n ++)
588                 device->bar[n] = pci_read_config(device->info->bdf,
589                                                  PCI_CFG_BAR + n * 4, 4);
590
591         err = arch_pci_add_physical_device(cell, device);
592
593         if (!err && device->info->msix_address) {
594                 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
595                 if (!device->msix_table) {
596                         err = trace_error(-ENOMEM);
597                         goto error_remove_dev;
598                 }
599
600                 err = paging_create(&hv_paging_structs,
601                                     device->info->msix_address, size,
602                                     (unsigned long)device->msix_table,
603                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
604                                     PAGING_NON_COHERENT);
605                 if (err)
606                         goto error_page_free;
607
608                 if (device->info->num_msix_vectors > PCI_EMBEDDED_MSIX_VECTS) {
609                         pages = PAGES(sizeof(union pci_msix_vector) *
610                                       device->info->num_msix_vectors);
611                         device->msix_vectors = page_alloc(&mem_pool, pages);
612                         if (!device->msix_vectors) {
613                                 err = -ENOMEM;
614                                 goto error_unmap_table;
615                         }
616                 }
617
618                 device->next_msix_device = cell->msix_device_list;
619                 cell->msix_device_list = device;
620         }
621         return err;
622
623 error_unmap_table:
624         /* cannot fail, destruction of same size as construction */
625         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
626                        size, PAGING_NON_COHERENT);
627 error_page_free:
628         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
629 error_remove_dev:
630         arch_pci_remove_physical_device(device);
631         return err;
632 }
633
634 static void pci_remove_virtual_device(struct pci_device *device)
635 {
636         struct pci_device *prev = device->cell->virtual_device_list;
637
638         if (prev == device) {
639                 device->cell->virtual_device_list = device->next_virtual_device;
640         } else {
641                 while (prev->next_virtual_device != device)
642                         prev = prev->next_virtual_device;
643                 prev->next_virtual_device = device->next_virtual_device;
644         }
645 }
646
647 static void pci_remove_physical_device(struct pci_device *device)
648 {
649         unsigned int size = device->info->msix_region_size;
650         struct pci_device *prev_msix_device;
651
652         printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
653                PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
654         arch_pci_remove_physical_device(device);
655         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
656                          PCI_CMD_INTX_OFF, 2);
657
658         if (!device->msix_table)
659                 return;
660
661         /* cannot fail, destruction of same size as construction */
662         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
663                        size, PAGING_NON_COHERENT);
664         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
665
666         if (device->msix_vectors != device->msix_vector_array)
667                 page_free(&mem_pool, device->msix_vectors,
668                           PAGES(sizeof(union pci_msix_vector) *
669                                 device->info->num_msix_vectors));
670
671         prev_msix_device = device->cell->msix_device_list;
672         if (prev_msix_device == device) {
673                 device->cell->msix_device_list = device->next_msix_device;
674         } else {
675                 while (prev_msix_device->next_msix_device != device)
676                         prev_msix_device = prev_msix_device->next_msix_device;
677                 prev_msix_device->next_msix_device = device->next_msix_device;
678         }
679 }
680
681 /**
682  * Perform PCI-specific initialization for a new cell.
683  * @param cell  Cell to be initialized.
684  *
685  * @return 0 on success, negative error code otherwise.
686  *
687  * @see pci_cell_exit
688  */
689 int pci_cell_init(struct cell *cell)
690 {
691         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
692                                            sizeof(struct pci_device));
693         const struct jailhouse_pci_device *dev_infos =
694                 jailhouse_cell_pci_devices(cell->config);
695         const struct jailhouse_pci_capability *cap;
696         struct pci_device *device, *root_device;
697         unsigned int ndev, ncap;
698         int err;
699
700         cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
701         if (!cell->pci_devices)
702                 return -ENOMEM;
703
704         /*
705          * We order device states in the same way as the static information
706          * so that we can use the index of the latter to find the former. For
707          * the other way around and for obtaining the owner cell, we use more
708          * handy pointers. The cell pointer also encodes active ownership.
709          */
710         for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
711                 device = &cell->pci_devices[ndev];
712                 device->info = &dev_infos[ndev];
713                 device->msix_vectors = device->msix_vector_array;
714
715                 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
716                         err = pci_ivshmem_init(cell, device);
717                         if (err)
718                                 goto error;
719                         err = pci_add_virtual_device(cell, device);
720                         if (err)
721                                 goto error;
722                         continue;
723                 }
724
725                 root_device = pci_get_assigned_device(&root_cell,
726                                                       dev_infos[ndev].bdf);
727                 if (root_device) {
728                         pci_remove_physical_device(root_device);
729                         root_device->cell = NULL;
730                 }
731
732                 err = pci_add_physical_device(cell, device);
733                 if (err)
734                         goto error;
735
736                 device->cell = cell;
737
738                 for_each_pci_cap(cap, device, ncap)
739                         if (cap->id == PCI_CAP_MSI)
740                                 pci_save_msi(device, cap);
741                         else if (cap->id == PCI_CAP_MSIX)
742                                 pci_save_msix(device, cap);
743         }
744
745         if (cell == &root_cell)
746                 pci_prepare_handover();
747
748         return 0;
749 error:
750         pci_cell_exit(cell);
751         return err;
752 }
753
754 static void pci_return_device_to_root_cell(struct pci_device *device)
755 {
756         struct pci_device *root_device;
757
758         for_each_configured_pci_device(root_device, &root_cell)
759                 if (root_device->info->domain == device->info->domain &&
760                     root_device->info->bdf == device->info->bdf) {
761                         if (pci_add_physical_device(&root_cell,
762                                                     root_device) < 0)
763                                 printk("WARNING: Failed to re-assign PCI "
764                                        "device to root cell\n");
765                         else
766                                 root_device->cell = &root_cell;
767                         break;
768                 }
769 }
770
771 /**
772  * Perform PCI-specific cleanup for a cell under destruction.
773  * @param cell  Cell to be destructed.
774  *
775  * @see pci_cell_init
776  */
777 void pci_cell_exit(struct cell *cell)
778 {
779         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
780                                            sizeof(struct pci_device));
781         struct pci_device *device;
782
783         /*
784          * Do not destroy the root cell. We will shut down the complete
785          * hypervisor instead.
786          */
787         if (cell == &root_cell)
788                 return;
789
790         for_each_configured_pci_device(device, cell)
791                 if (device->cell) {
792                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
793                                 pci_ivshmem_exit(device);
794                                 pci_remove_virtual_device(device);
795                         } else {
796                                 pci_remove_physical_device(device);
797                                 pci_return_device_to_root_cell(device);
798                         }
799                 }
800
801         page_free(&mem_pool, cell->pci_devices, devlist_pages);
802 }
803
804 /**
805  * Apply PCI-specific configuration changes.
806  * @param cell_added_removed    Cell that was added or removed to/from the
807  *                              system or NULL.
808  *
809  * @see arch_config_commit
810  */
811 void pci_config_commit(struct cell *cell_added_removed)
812 {
813         const struct jailhouse_pci_capability *cap;
814         struct pci_device *device;
815         unsigned int n;
816         int err = 0;
817
818         if (!cell_added_removed)
819                 return;
820
821         for_each_configured_pci_device(device, &root_cell)
822                 if (device->cell) {
823                         for_each_pci_cap(cap, device, n) {
824                                 if (cap->id == PCI_CAP_MSI) {
825                                         err = arch_pci_update_msi(device, cap);
826                                 } else if (cap->id == PCI_CAP_MSIX) {
827                                         err = pci_update_msix(device, cap);
828                                         pci_suppress_msix(device, cap, false);
829                                 }
830                                 if (err)
831                                         goto error;
832                         }
833                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
834                                 err = pci_ivshmem_update_msix(device);
835                                 if (err) {
836                                         cap = NULL;
837                                         goto error;
838                                 }
839                         }
840                 }
841         return;
842
843 error:
844         panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
845                      PCI_BDF_PARAMS(device->info->bdf));
846         if (cap)
847                 panic_printk(", cap %d\n", cap->id);
848         else
849                 panic_printk("\n");
850         panic_stop();
851 }
852
853 /**
854  * Shut down the PCI layer during hypervisor deactivation.
855  */
856 void pci_shutdown(void)
857 {
858         const struct jailhouse_pci_capability *cap;
859         struct pci_device *device;
860         unsigned int n;
861
862         if (!root_cell.pci_devices)
863                 return;
864
865         for_each_configured_pci_device(device, &root_cell) {
866                 if (!device->cell)
867                         continue;
868
869                 for_each_pci_cap(cap, device, n)
870                         if (cap->id == PCI_CAP_MSI)
871                                 pci_restore_msi(device, cap);
872                         else if (cap->id == PCI_CAP_MSIX)
873                                 pci_restore_msix(device, cap);
874
875                 if (device->cell != &root_cell)
876                         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
877                                          PCI_CMD_INTX_OFF, 2);
878         }
879 }