]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci.c
core: pci: Drop list of virtual devices
[jailhouse.git] / hypervisor / pci.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014, 2015
5  *
6  * Authors:
7  *  Ivan Kolchin <ivan.kolchin@siemens.com>
8  *  Jan Kiszka <jan.kiszka@siemens.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  */
13
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
19
20 #define MSIX_VECTOR_CTRL_DWORD          3
21
22 #define for_each_configured_pci_device(dev, cell)                       \
23         for ((dev) = (cell)->pci_devices;                               \
24              (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
25              (dev)++)
26
27 #define for_each_pci_cap(cap, dev, counter)                             \
28         for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
29                 (dev)->info->caps_start, (counter) = 0;                 \
30              (counter) < (dev)->info->num_caps;                         \
31              (cap)++, (counter)++)
32
33 /* entry for PCI config space access control */
34 struct pci_cfg_control {
35         enum {
36                 PCI_CONFIG_DENY,
37                 PCI_CONFIG_ALLOW,
38                 PCI_CONFIG_RDONLY,
39         } type;   /* Access type */
40         u32 mask; /* Bit set: access type applies; bit cleared: deny access */
41 };
42
43 /* --- Access control for writing to PCI config space registers --- */
44 /* Type 1: Endpoints */
45 static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
46         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
47         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
48         [0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* ROM BAR */
49         [0x3c/4] = {PCI_CONFIG_ALLOW,  0x000000ff}, /* Int Line */
50 };
51
52 /* Type 2: Bridges
53  * Note: Ignore limit/base reprogramming attempts because the root cell will
54  *       perform them on bus rescans. */
55 static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
56         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
57         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
58         [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
59         [0x20/4 ...      /* Memory Limit/Base, Prefetch Memory Limit/Base, */
60          0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
61         [0x3c/4] = {PCI_CONFIG_ALLOW,  0xffff00ff}, /* Int Line, Bridge Ctrl */
62 };
63
64 static void *pci_space;
65 static u64 mmcfg_start, mmcfg_end;
66 static u8 end_bus;
67
68 unsigned int pci_mmio_count_regions(struct cell *cell)
69 {
70         const struct jailhouse_pci_device *dev_infos =
71                 jailhouse_cell_pci_devices(cell->config);
72         unsigned int n, regions = 0;
73
74         for (n = 0; n < cell->config->num_pci_devices; n++)
75                 if (dev_infos[n].type == JAILHOUSE_PCI_TYPE_IVSHMEM)
76                         regions += PCI_IVSHMEM_NUM_MMIO_REGIONS;
77
78         return regions;
79 }
80
81 static void *pci_get_device_mmcfg_base(u16 bdf)
82 {
83         return pci_space + ((unsigned long)bdf << 12);
84 }
85
86 /**
87  * Read from PCI config space.
88  * @param bdf           16-bit bus/device/function ID of target.
89  * @param address       Config space access address.
90  * @param size          Access size (1, 2 or 4 bytes).
91  *
92  * @return Read value.
93  *
94  * @see pci_write_config
95  */
96 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
97 {
98         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
99
100         if (!pci_space || PCI_BUS(bdf) > end_bus)
101                 return arch_pci_read_config(bdf, address, size);
102
103         if (size == 1)
104                 return mmio_read8(mmcfg_addr);
105         else if (size == 2)
106                 return mmio_read16(mmcfg_addr);
107         else
108                 return mmio_read32(mmcfg_addr);
109 }
110
111 /**
112  * Write to PCI config space.
113  * @param bdf           16-bit bus/device/function ID of target.
114  * @param address       Config space access address.
115  * @param value         Value to be written.
116  * @param size          Access size (1, 2 or 4 bytes).
117  *
118  * @see pci_read_config
119  */
120 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
121 {
122         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
123
124         if (!pci_space || PCI_BUS(bdf) > end_bus)
125                 return arch_pci_write_config(bdf, address, value, size);
126
127         if (size == 1)
128                 mmio_write8(mmcfg_addr, value);
129         else if (size == 2)
130                 mmio_write16(mmcfg_addr, value);
131         else
132                 mmio_write32(mmcfg_addr, value);
133 }
134
135 /**
136  * Look up device owned by a cell.
137  * @param[in] cell      Owning cell.
138  * @param bdf           16-bit bus/device/function ID.
139  *
140  * @return Pointer to owned PCI device or NULL.
141  */
142 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
143 {
144         const struct jailhouse_pci_device *dev_info =
145                 jailhouse_cell_pci_devices(cell->config);
146         u32 n;
147
148         /* We iterate over the static device information to increase cache
149          * locality. */
150         for (n = 0; n < cell->config->num_pci_devices; n++)
151                 if (dev_info[n].bdf == bdf)
152                         return cell->pci_devices[n].cell ?
153                                 &cell->pci_devices[n] : NULL;
154
155         return NULL;
156 }
157
158 /**
159  * Look up capability at given config space address.
160  * @param device        The device to be accessed.
161  * @param address       Config space access address.
162  *
163  * @return Corresponding capability structure or NULL if none found.
164  *
165  * @private
166  */
167 static const struct jailhouse_pci_capability *
168 pci_find_capability(struct pci_device *device, u16 address)
169 {
170         const struct jailhouse_pci_capability *cap =
171                 jailhouse_cell_pci_caps(device->cell->config) +
172                 device->info->caps_start;
173         u32 n;
174
175         for (n = 0; n < device->info->num_caps; n++, cap++)
176                 if (cap->start <= address && cap->start + cap->len > address)
177                         return cap;
178
179         return NULL;
180 }
181
182 /**
183  * Moderate config space read access.
184  * @param device        The device to be accessed. If NULL, access will be
185  *                      emulated, returning a value of -1.
186  * @param address       Config space address.
187  * @param size          Access size (1, 2 or 4 bytes).
188  * @param value         Pointer to buffer to receive the emulated value if
189  *                      PCI_ACCESS_DONE is returned.
190  *
191  * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
192  *
193  * @see pci_cfg_write_moderate
194  */
195 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
196                                       unsigned int size, u32 *value)
197 {
198         const struct jailhouse_pci_capability *cap;
199         unsigned int bar_no, cap_offs;
200
201         if (!device) {
202                 *value = -1;
203                 return PCI_ACCESS_DONE;
204         }
205
206         /* Emulate BARs for physical and virtual devices */
207         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE) {
208                 /* Emulate BAR access, always returning the shadow value. */
209                 if (address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
210                         bar_no = (address - PCI_CFG_BAR) / 4;
211                         *value = device->bar[bar_no] >> ((address % 4) * 8);
212                         return PCI_ACCESS_DONE;
213                 }
214
215                 /* We do not expose ROMs. */
216                 if (address >= PCI_CFG_ROMBAR && address < PCI_CFG_CAPS) {
217                         *value = 0;
218                         return PCI_ACCESS_DONE;
219                 }
220         }
221
222         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
223                 return pci_ivshmem_cfg_read(device, address, value);
224
225         if (address < PCI_CONFIG_HEADER_SIZE)
226                 return PCI_ACCESS_PERFORM;
227
228         cap = pci_find_capability(device, address);
229         if (!cap)
230                 return PCI_ACCESS_PERFORM;
231
232         cap_offs = address - cap->start;
233         if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
234             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
235                 *value = device->msi_registers.raw[cap_offs / 4] >>
236                         ((cap_offs % 4) * 8);
237                 return PCI_ACCESS_DONE;
238         }
239
240         return PCI_ACCESS_PERFORM;
241 }
242
243 static int pci_update_msix(struct pci_device *device,
244                            const struct jailhouse_pci_capability *cap)
245 {
246         unsigned int n;
247         int result;
248
249         for (n = 0; n < device->info->num_msix_vectors; n++) {
250                 result = arch_pci_update_msix_vector(device, n);
251                 if (result < 0)
252                         return result;
253         }
254         return 0;
255 }
256
257 /**
258  * Moderate config space write access.
259  * @param device        The device to be accessed. If NULL, access will be
260  *                      rejected.
261  * @param address       Config space address.
262  * @param size          Access size (1, 2 or 4 bytes).
263  * @param value         Value to be written.
264  *
265  * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
266  *
267  * @see pci_cfg_read_moderate
268  */
269 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
270                                        unsigned int size, u32 value)
271 {
272         const struct jailhouse_pci_capability *cap;
273         /* initialize list to work around wrong compiler warning */
274         unsigned int bias_shift = (address % 4) * 8;
275         u32 mask = BYTE_MASK(size) << bias_shift;
276         struct pci_cfg_control cfg_control;
277         unsigned int bar_no, cap_offs;
278
279         if (!device)
280                 return PCI_ACCESS_REJECT;
281
282         value <<= bias_shift;
283
284         /* Emulate BARs for physical and virtual devices */
285         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE &&
286             address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
287                 bar_no = (address - PCI_CFG_BAR) / 4;
288                 mask &= device->info->bar_mask[bar_no];
289                 device->bar[bar_no] &= ~mask;
290                 device->bar[bar_no] |= value & mask;
291                 return PCI_ACCESS_DONE;
292         }
293
294         if (address < PCI_CONFIG_HEADER_SIZE) {
295                 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
296                         cfg_control = bridge_write[address / 4];
297                 else /* physical or virtual device */
298                         cfg_control = endpoint_write[address / 4];
299
300                 if ((cfg_control.mask & mask) != mask)
301                         return PCI_ACCESS_REJECT;
302
303                 switch (cfg_control.type) {
304                 case PCI_CONFIG_ALLOW:
305                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
306                                 return pci_ivshmem_cfg_write(device,
307                                                 address / 4, mask, value);
308                         return PCI_ACCESS_PERFORM;
309                 case PCI_CONFIG_RDONLY:
310                         return PCI_ACCESS_DONE;
311                 default:
312                         return PCI_ACCESS_REJECT;
313                 }
314         }
315
316         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
317                 return pci_ivshmem_cfg_write(device, address / 4, mask, value);
318
319         cap = pci_find_capability(device, address);
320         if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
321                 return PCI_ACCESS_REJECT;
322
323         cap_offs = address - cap->start;
324         if (cap->id == PCI_CAP_MSI &&
325             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
326                 device->msi_registers.raw[cap_offs / 4] &= ~mask;
327                 device->msi_registers.raw[cap_offs / 4] |= value;
328
329                 if (arch_pci_update_msi(device, cap) < 0)
330                         return PCI_ACCESS_REJECT;
331
332                 /*
333                  * Address and data words are emulated, the control word is
334                  * written as-is.
335                  */
336                 if (cap_offs >= 4)
337                         return PCI_ACCESS_DONE;
338         } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
339                 device->msix_registers.raw &= ~mask;
340                 device->msix_registers.raw |= value;
341
342                 if (pci_update_msix(device, cap) < 0)
343                         return PCI_ACCESS_REJECT;
344         }
345
346         return PCI_ACCESS_PERFORM;
347 }
348
349 /**
350  * Initialization of PCI subsystem.
351  *
352  * @return 0 on success, negative error code otherwise.
353  */
354 int pci_init(void)
355 {
356         unsigned int mmcfg_size;
357         int err;
358
359         err = pci_cell_init(&root_cell);
360         if (err)
361                 return err;
362
363         mmcfg_start = system_config->platform_info.x86.mmconfig_base;
364         if (mmcfg_start == 0)
365                 return 0;
366
367         end_bus = system_config->platform_info.x86.mmconfig_end_bus;
368         mmcfg_size = (end_bus + 1) * 256 * 4096;
369         mmcfg_end = mmcfg_start + mmcfg_size - 4;
370
371         pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
372         if (!pci_space)
373                 return trace_error(-ENOMEM);
374
375         return paging_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
376                              (unsigned long)pci_space,
377                              PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
378                              PAGING_NON_COHERENT);
379 }
380
381 static int pci_msix_access_handler(const struct cell *cell, bool is_write,
382                                    u64 addr, u32 *value)
383 {
384         unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
385         struct pci_device *device = cell->msix_device_list;
386         unsigned int index;
387         u64 offs;
388
389         while (device) {
390                 if (addr >= device->info->msix_address &&
391                     addr < device->info->msix_address +
392                            device->info->msix_region_size)
393                         goto found;
394                 device = device->next_msix_device;
395         }
396         return 0;
397
398 found:
399         /* access must be DWORD-aligned */
400         if (addr & 0x3)
401                 goto invalid_access;
402
403         offs = addr - device->info->msix_address;
404         index = offs / sizeof(union pci_msix_vector);
405
406         if (is_write) {
407                 /*
408                  * The PBA may share a page with the MSI-X table. Writing to
409                  * PBA entries is undefined. We declare it as invalid.
410                  */
411                 if (index >= device->info->num_msix_vectors)
412                         goto invalid_access;
413
414                 device->msix_vectors[index].raw[dword] = *value;
415                 if (arch_pci_update_msix_vector(device, index) < 0)
416                         goto invalid_access;
417
418                 if (dword == MSIX_VECTOR_CTRL_DWORD)
419                         mmio_write32(&device->msix_table[index].raw[dword],
420                                      *value);
421         } else {
422                 if (index >= device->info->num_msix_vectors ||
423                     dword == MSIX_VECTOR_CTRL_DWORD)
424                         *value =
425                             mmio_read32(((void *)device->msix_table) + offs);
426                 else
427                         *value = device->msix_vectors[index].raw[dword];
428         }
429         return 1;
430
431 invalid_access:
432         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
433                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
434         return -1;
435 }
436
437 /**
438  * Handler for MMIO-accesses to PCI config space.
439  * @param cell          Request issuing cell.
440  * @param is_write      True if write access.
441  * @param addr          Address accessed.
442  * @param value         Pointer to value for reading/writing.
443  *
444  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
445  */
446 int pci_mmio_access_handler(const struct cell *cell, bool is_write,
447                             u64 addr, u32 *value)
448 {
449         u32 mmcfg_offset, reg_addr;
450         struct pci_device *device;
451         enum pci_access access;
452
453         if (!pci_space || addr < mmcfg_start || addr > mmcfg_end)
454                 return pci_msix_access_handler(cell, is_write, addr, value);
455
456         mmcfg_offset = addr - mmcfg_start;
457         reg_addr = mmcfg_offset & 0xfff;
458         /* access must be DWORD-aligned */
459         if (reg_addr & 0x3)
460                 goto invalid_access;
461
462         device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
463
464         if (is_write) {
465                 access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
466                 if (access == PCI_ACCESS_REJECT)
467                         goto invalid_access;
468                 if (access == PCI_ACCESS_PERFORM)
469                         mmio_write32(pci_space + mmcfg_offset, *value);
470         } else {
471                 access = pci_cfg_read_moderate(device, reg_addr, 4, value);
472                 if (access == PCI_ACCESS_PERFORM)
473                         *value = mmio_read32(pci_space + mmcfg_offset);
474         }
475
476         return 1;
477
478 invalid_access:
479         panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
480                      "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
481         return -1;
482
483 }
484
485 /**
486  * Retrieve number of enabled MSI vector of a device.
487  * @param device        The device to be examined.
488  *
489  * @return number of vectors.
490  */
491 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
492 {
493         return device->msi_registers.msg32.enable ?
494                 1 << device->msi_registers.msg32.mme : 0;
495 }
496
497 static void pci_save_msi(struct pci_device *device,
498                          const struct jailhouse_pci_capability *cap)
499 {
500         u16 bdf = device->info->bdf;
501         unsigned int n;
502
503         for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
504                 device->msi_registers.raw[n] =
505                         pci_read_config(bdf, cap->start + n * 4, 4);
506 }
507
508 static void pci_restore_msi(struct pci_device *device,
509                             const struct jailhouse_pci_capability *cap)
510 {
511         unsigned int n;
512
513         for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
514                 pci_write_config(device->info->bdf, cap->start + n * 4,
515                                  device->msi_registers.raw[n], 4);
516 }
517
518 static void pci_suppress_msix(struct pci_device *device,
519                               const struct jailhouse_pci_capability *cap,
520                               bool suppressed)
521 {
522         union pci_msix_registers regs = device->msix_registers;
523
524         if (suppressed)
525                 regs.fmask = 1;
526         pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
527 }
528
529 static void pci_save_msix(struct pci_device *device,
530                           const struct jailhouse_pci_capability *cap)
531 {
532         unsigned int n, r;
533
534         device->msix_registers.raw =
535                 pci_read_config(device->info->bdf, cap->start, 4);
536
537         for (n = 0; n < device->info->num_msix_vectors; n++)
538                 for (r = 0; r < 4; r++)
539                         device->msix_vectors[n].raw[r] =
540                                 mmio_read32(&device->msix_table[n].raw[r]);
541 }
542
543 static void pci_restore_msix(struct pci_device *device,
544                              const struct jailhouse_pci_capability *cap)
545 {
546         unsigned int n, r;
547
548         for (n = 0; n < device->info->num_msix_vectors; n++)
549                 /* only restore address/data, control is write-through */
550                 for (r = 0; r < 3; r++)
551                         mmio_write32(&device->msix_table[n].raw[r],
552                                      device->msix_vectors[n].raw[r]);
553         pci_suppress_msix(device, cap, false);
554 }
555
556 /**
557  * Prepare the handover of PCI devices to Jailhouse or back to Linux.
558  */
559 void pci_prepare_handover(void)
560 {
561         const struct jailhouse_pci_capability *cap;
562         struct pci_device *device;
563         unsigned int n;
564
565         if (!root_cell.pci_devices)
566                 return;
567
568         for_each_configured_pci_device(device, &root_cell) {
569                 if (device->cell)
570                         for_each_pci_cap(cap, device, n)
571                                 if (cap->id == PCI_CAP_MSI)
572                                         arch_pci_suppress_msi(device, cap);
573                                 else if (cap->id == PCI_CAP_MSIX)
574                                         pci_suppress_msix(device, cap, true);
575         }
576 }
577
578 static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
579 {
580         unsigned int n, pages, size = device->info->msix_region_size;
581         int err;
582
583         printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
584                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
585
586         for (n = 0; n < PCI_NUM_BARS; n ++)
587                 device->bar[n] = pci_read_config(device->info->bdf,
588                                                  PCI_CFG_BAR + n * 4, 4);
589
590         err = arch_pci_add_physical_device(cell, device);
591
592         if (!err && device->info->msix_address) {
593                 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
594                 if (!device->msix_table) {
595                         err = trace_error(-ENOMEM);
596                         goto error_remove_dev;
597                 }
598
599                 err = paging_create(&hv_paging_structs,
600                                     device->info->msix_address, size,
601                                     (unsigned long)device->msix_table,
602                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
603                                     PAGING_NON_COHERENT);
604                 if (err)
605                         goto error_page_free;
606
607                 if (device->info->num_msix_vectors > PCI_EMBEDDED_MSIX_VECTS) {
608                         pages = PAGES(sizeof(union pci_msix_vector) *
609                                       device->info->num_msix_vectors);
610                         device->msix_vectors = page_alloc(&mem_pool, pages);
611                         if (!device->msix_vectors) {
612                                 err = -ENOMEM;
613                                 goto error_unmap_table;
614                         }
615                 }
616
617                 device->next_msix_device = cell->msix_device_list;
618                 cell->msix_device_list = device;
619         }
620         return err;
621
622 error_unmap_table:
623         /* cannot fail, destruction of same size as construction */
624         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
625                        size, PAGING_NON_COHERENT);
626 error_page_free:
627         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
628 error_remove_dev:
629         arch_pci_remove_physical_device(device);
630         return err;
631 }
632
633 static void pci_remove_physical_device(struct pci_device *device)
634 {
635         unsigned int size = device->info->msix_region_size;
636         struct pci_device *prev_msix_device;
637
638         printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
639                PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
640         arch_pci_remove_physical_device(device);
641         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
642                          PCI_CMD_INTX_OFF, 2);
643
644         if (!device->msix_table)
645                 return;
646
647         /* cannot fail, destruction of same size as construction */
648         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
649                        size, PAGING_NON_COHERENT);
650         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
651
652         if (device->msix_vectors != device->msix_vector_array)
653                 page_free(&mem_pool, device->msix_vectors,
654                           PAGES(sizeof(union pci_msix_vector) *
655                                 device->info->num_msix_vectors));
656
657         prev_msix_device = device->cell->msix_device_list;
658         if (prev_msix_device == device) {
659                 device->cell->msix_device_list = device->next_msix_device;
660         } else {
661                 while (prev_msix_device->next_msix_device != device)
662                         prev_msix_device = prev_msix_device->next_msix_device;
663                 prev_msix_device->next_msix_device = device->next_msix_device;
664         }
665 }
666
667 /**
668  * Perform PCI-specific initialization for a new cell.
669  * @param cell  Cell to be initialized.
670  *
671  * @return 0 on success, negative error code otherwise.
672  *
673  * @see pci_cell_exit
674  */
675 int pci_cell_init(struct cell *cell)
676 {
677         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
678                                            sizeof(struct pci_device));
679         const struct jailhouse_pci_device *dev_infos =
680                 jailhouse_cell_pci_devices(cell->config);
681         const struct jailhouse_pci_capability *cap;
682         struct pci_device *device, *root_device;
683         unsigned int ndev, ncap;
684         int err;
685
686         cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
687         if (!cell->pci_devices)
688                 return -ENOMEM;
689
690         /*
691          * We order device states in the same way as the static information
692          * so that we can use the index of the latter to find the former. For
693          * the other way around and for obtaining the owner cell, we use more
694          * handy pointers. The cell pointer also encodes active ownership.
695          */
696         for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
697                 device = &cell->pci_devices[ndev];
698                 device->info = &dev_infos[ndev];
699                 device->msix_vectors = device->msix_vector_array;
700
701                 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
702                         err = pci_ivshmem_init(cell, device);
703                         if (err)
704                                 goto error;
705
706                         device->cell = cell;
707
708                         continue;
709                 }
710
711                 root_device = pci_get_assigned_device(&root_cell,
712                                                       dev_infos[ndev].bdf);
713                 if (root_device) {
714                         pci_remove_physical_device(root_device);
715                         root_device->cell = NULL;
716                 }
717
718                 err = pci_add_physical_device(cell, device);
719                 if (err)
720                         goto error;
721
722                 device->cell = cell;
723
724                 for_each_pci_cap(cap, device, ncap)
725                         if (cap->id == PCI_CAP_MSI)
726                                 pci_save_msi(device, cap);
727                         else if (cap->id == PCI_CAP_MSIX)
728                                 pci_save_msix(device, cap);
729         }
730
731         if (cell == &root_cell)
732                 pci_prepare_handover();
733
734         return 0;
735 error:
736         pci_cell_exit(cell);
737         return err;
738 }
739
740 static void pci_return_device_to_root_cell(struct pci_device *device)
741 {
742         struct pci_device *root_device;
743
744         for_each_configured_pci_device(root_device, &root_cell)
745                 if (root_device->info->domain == device->info->domain &&
746                     root_device->info->bdf == device->info->bdf) {
747                         if (pci_add_physical_device(&root_cell,
748                                                     root_device) < 0)
749                                 printk("WARNING: Failed to re-assign PCI "
750                                        "device to root cell\n");
751                         else
752                                 root_device->cell = &root_cell;
753                         break;
754                 }
755 }
756
757 /**
758  * Perform PCI-specific cleanup for a cell under destruction.
759  * @param cell  Cell to be destructed.
760  *
761  * @see pci_cell_init
762  */
763 void pci_cell_exit(struct cell *cell)
764 {
765         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
766                                            sizeof(struct pci_device));
767         struct pci_device *device;
768
769         /*
770          * Do not destroy the root cell. We will shut down the complete
771          * hypervisor instead.
772          */
773         if (cell == &root_cell)
774                 return;
775
776         for_each_configured_pci_device(device, cell)
777                 if (device->cell) {
778                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
779                                 pci_ivshmem_exit(device);
780                         } else {
781                                 pci_remove_physical_device(device);
782                                 pci_return_device_to_root_cell(device);
783                         }
784                 }
785
786         page_free(&mem_pool, cell->pci_devices, devlist_pages);
787 }
788
789 /**
790  * Apply PCI-specific configuration changes.
791  * @param cell_added_removed    Cell that was added or removed to/from the
792  *                              system or NULL.
793  *
794  * @see arch_config_commit
795  */
796 void pci_config_commit(struct cell *cell_added_removed)
797 {
798         const struct jailhouse_pci_capability *cap;
799         struct pci_device *device;
800         unsigned int n;
801         int err = 0;
802
803         if (!cell_added_removed)
804                 return;
805
806         for_each_configured_pci_device(device, &root_cell)
807                 if (device->cell) {
808                         for_each_pci_cap(cap, device, n) {
809                                 if (cap->id == PCI_CAP_MSI) {
810                                         err = arch_pci_update_msi(device, cap);
811                                 } else if (cap->id == PCI_CAP_MSIX) {
812                                         err = pci_update_msix(device, cap);
813                                         pci_suppress_msix(device, cap, false);
814                                 }
815                                 if (err)
816                                         goto error;
817                         }
818                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
819                                 err = pci_ivshmem_update_msix(device);
820                                 if (err) {
821                                         cap = NULL;
822                                         goto error;
823                                 }
824                         }
825                 }
826         return;
827
828 error:
829         panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
830                      PCI_BDF_PARAMS(device->info->bdf));
831         if (cap)
832                 panic_printk(", cap %d\n", cap->id);
833         else
834                 panic_printk("\n");
835         panic_stop();
836 }
837
838 /**
839  * Shut down the PCI layer during hypervisor deactivation.
840  */
841 void pci_shutdown(void)
842 {
843         const struct jailhouse_pci_capability *cap;
844         struct pci_device *device;
845         unsigned int n;
846
847         if (!root_cell.pci_devices)
848                 return;
849
850         for_each_configured_pci_device(device, &root_cell) {
851                 if (!device->cell)
852                         continue;
853
854                 for_each_pci_cap(cap, device, n)
855                         if (cap->id == PCI_CAP_MSI)
856                                 pci_restore_msi(device, cap);
857                         else if (cap->id == PCI_CAP_MSIX)
858                                 pci_restore_msix(device, cap);
859
860                 if (device->cell != &root_cell)
861                         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
862                                          PCI_CMD_INTX_OFF, 2);
863         }
864 }