]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci.c
jailhouse: inmates: bench: Add -R option -- repeats count.
[jailhouse.git] / hypervisor / pci.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014, 2015
5  *
6  * Authors:
7  *  Ivan Kolchin <ivan.kolchin@siemens.com>
8  *  Jan Kiszka <jan.kiszka@siemens.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  */
13
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/pci.h>
17 #include <jailhouse/printk.h>
18 #include <jailhouse/utils.h>
19
20 #define MSIX_VECTOR_CTRL_DWORD          3
21
22 #define for_each_configured_pci_device(dev, cell)                       \
23         for ((dev) = (cell)->pci_devices;                               \
24              (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
25              (dev)++)
26
27 #define for_each_pci_cap(cap, dev, counter)                             \
28         for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
29                 (dev)->info->caps_start, (counter) = 0;                 \
30              (counter) < (dev)->info->num_caps;                         \
31              (cap)++, (counter)++)
32
33 /* entry for PCI config space access control */
34 struct pci_cfg_control {
35         enum {
36                 PCI_CONFIG_DENY,
37                 PCI_CONFIG_ALLOW,
38                 PCI_CONFIG_RDONLY,
39         } type;   /* Access type */
40         u32 mask; /* Bit set: access type applies; bit cleared: deny access */
41 };
42
43 /* --- Access control for writing to PCI config space registers --- */
44 /* Type 1: Endpoints */
45 static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
46         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
47         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
48         [0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* ROM BAR */
49         [0x3c/4] = {PCI_CONFIG_ALLOW,  0x000000ff}, /* Int Line */
50 };
51
52 /* Type 2: Bridges
53  * Note: Ignore limit/base reprogramming attempts because the root cell will
54  *       perform them on bus rescans. */
55 static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
56         [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
57         [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
58         [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
59         [0x20/4 ...      /* Memory Limit/Base, Prefetch Memory Limit/Base, */
60          0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
61         [0x3c/4] = {PCI_CONFIG_ALLOW,  0xffff00ff}, /* Int Line, Bridge Ctrl */
62 };
63
64 static void *pci_space;
65 static u64 mmcfg_start, mmcfg_size;
66 static u8 end_bus;
67
68 unsigned int pci_mmio_count_regions(struct cell *cell)
69 {
70         const struct jailhouse_pci_device *dev_infos =
71                 jailhouse_cell_pci_devices(cell->config);
72         unsigned int n, regions = 0;
73
74         if (system_config->platform_info.x86.mmconfig_base)
75                 regions++;
76
77         for (n = 0; n < cell->config->num_pci_devices; n++)
78                 if (dev_infos[n].type == JAILHOUSE_PCI_TYPE_IVSHMEM)
79                         regions += PCI_IVSHMEM_NUM_MMIO_REGIONS;
80                 else if (dev_infos[n].msix_address)
81                         regions++;
82
83         return regions;
84 }
85
86 static void *pci_get_device_mmcfg_base(u16 bdf)
87 {
88         return pci_space + ((unsigned long)bdf << 12);
89 }
90
91 /**
92  * Read from PCI config space.
93  * @param bdf           16-bit bus/device/function ID of target.
94  * @param address       Config space access address.
95  * @param size          Access size (1, 2 or 4 bytes).
96  *
97  * @return Read value.
98  *
99  * @see pci_write_config
100  */
101 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
102 {
103         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
104
105         if (!pci_space || PCI_BUS(bdf) > end_bus)
106                 return arch_pci_read_config(bdf, address, size);
107
108         if (size == 1)
109                 return mmio_read8(mmcfg_addr);
110         else if (size == 2)
111                 return mmio_read16(mmcfg_addr);
112         else
113                 return mmio_read32(mmcfg_addr);
114 }
115
116 /**
117  * Write to PCI config space.
118  * @param bdf           16-bit bus/device/function ID of target.
119  * @param address       Config space access address.
120  * @param value         Value to be written.
121  * @param size          Access size (1, 2 or 4 bytes).
122  *
123  * @see pci_read_config
124  */
125 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
126 {
127         void *mmcfg_addr = pci_get_device_mmcfg_base(bdf) + address;
128
129         if (!pci_space || PCI_BUS(bdf) > end_bus)
130                 return arch_pci_write_config(bdf, address, value, size);
131
132         if (size == 1)
133                 mmio_write8(mmcfg_addr, value);
134         else if (size == 2)
135                 mmio_write16(mmcfg_addr, value);
136         else
137                 mmio_write32(mmcfg_addr, value);
138 }
139
140 /**
141  * Look up device owned by a cell.
142  * @param[in] cell      Owning cell.
143  * @param bdf           16-bit bus/device/function ID.
144  *
145  * @return Pointer to owned PCI device or NULL.
146  */
147 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
148 {
149         const struct jailhouse_pci_device *dev_info =
150                 jailhouse_cell_pci_devices(cell->config);
151         u32 n;
152
153         /* We iterate over the static device information to increase cache
154          * locality. */
155         for (n = 0; n < cell->config->num_pci_devices; n++)
156                 if (dev_info[n].bdf == bdf)
157                         return cell->pci_devices[n].cell ?
158                                 &cell->pci_devices[n] : NULL;
159
160         return NULL;
161 }
162
163 /**
164  * Look up capability at given config space address.
165  * @param device        The device to be accessed.
166  * @param address       Config space access address.
167  *
168  * @return Corresponding capability structure or NULL if none found.
169  *
170  * @private
171  */
172 static const struct jailhouse_pci_capability *
173 pci_find_capability(struct pci_device *device, u16 address)
174 {
175         const struct jailhouse_pci_capability *cap =
176                 jailhouse_cell_pci_caps(device->cell->config) +
177                 device->info->caps_start;
178         u32 n;
179
180         for (n = 0; n < device->info->num_caps; n++, cap++)
181                 if (cap->start <= address && cap->start + cap->len > address)
182                         return cap;
183
184         return NULL;
185 }
186
187 /**
188  * Moderate config space read access.
189  * @param device        The device to be accessed. If NULL, access will be
190  *                      emulated, returning a value of -1.
191  * @param address       Config space address.
192  * @param size          Access size (1, 2 or 4 bytes).
193  * @param value         Pointer to buffer to receive the emulated value if
194  *                      PCI_ACCESS_DONE is returned.
195  *
196  * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
197  *
198  * @see pci_cfg_write_moderate
199  */
200 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
201                                       unsigned int size, u32 *value)
202 {
203         const struct jailhouse_pci_capability *cap;
204         unsigned int bar_no, cap_offs;
205
206         if (!device) {
207                 *value = -1;
208                 return PCI_ACCESS_DONE;
209         }
210
211         /* Emulate BARs for physical and virtual devices */
212         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE) {
213                 /* Emulate BAR access, always returning the shadow value. */
214                 if (address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
215                         bar_no = (address - PCI_CFG_BAR) / 4;
216                         *value = device->bar[bar_no] >> ((address % 4) * 8);
217                         return PCI_ACCESS_DONE;
218                 }
219
220                 /* We do not expose ROMs. */
221                 if (address >= PCI_CFG_ROMBAR && address < PCI_CFG_CAPS) {
222                         *value = 0;
223                         return PCI_ACCESS_DONE;
224                 }
225         }
226
227         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
228                 return pci_ivshmem_cfg_read(device, address, value);
229
230         if (address < PCI_CONFIG_HEADER_SIZE)
231                 return PCI_ACCESS_PERFORM;
232
233         cap = pci_find_capability(device, address);
234         if (!cap)
235                 return PCI_ACCESS_PERFORM;
236
237         cap_offs = address - cap->start;
238         if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
239             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
240                 *value = device->msi_registers.raw[cap_offs / 4] >>
241                         ((cap_offs % 4) * 8);
242                 return PCI_ACCESS_DONE;
243         }
244
245         return PCI_ACCESS_PERFORM;
246 }
247
248 static int pci_update_msix(struct pci_device *device,
249                            const struct jailhouse_pci_capability *cap)
250 {
251         unsigned int n;
252         int result;
253
254         for (n = 0; n < device->info->num_msix_vectors; n++) {
255                 result = arch_pci_update_msix_vector(device, n);
256                 if (result < 0)
257                         return result;
258         }
259         return 0;
260 }
261
262 /**
263  * Moderate config space write access.
264  * @param device        The device to be accessed. If NULL, access will be
265  *                      rejected.
266  * @param address       Config space address.
267  * @param size          Access size (1, 2 or 4 bytes).
268  * @param value         Value to be written.
269  *
270  * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
271  *
272  * @see pci_cfg_read_moderate
273  */
274 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
275                                        unsigned int size, u32 value)
276 {
277         const struct jailhouse_pci_capability *cap;
278         /* initialize list to work around wrong compiler warning */
279         unsigned int bias_shift = (address % 4) * 8;
280         u32 mask = BYTE_MASK(size) << bias_shift;
281         struct pci_cfg_control cfg_control;
282         unsigned int bar_no, cap_offs;
283
284         if (!device)
285                 return PCI_ACCESS_REJECT;
286
287         value <<= bias_shift;
288
289         /* Emulate BARs for physical and virtual devices */
290         if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE &&
291             address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
292                 bar_no = (address - PCI_CFG_BAR) / 4;
293                 mask &= device->info->bar_mask[bar_no];
294                 device->bar[bar_no] &= ~mask;
295                 device->bar[bar_no] |= value & mask;
296                 return PCI_ACCESS_DONE;
297         }
298
299         if (address < PCI_CONFIG_HEADER_SIZE) {
300                 if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
301                         cfg_control = bridge_write[address / 4];
302                 else /* physical or virtual device */
303                         cfg_control = endpoint_write[address / 4];
304
305                 if ((cfg_control.mask & mask) != mask)
306                         return PCI_ACCESS_REJECT;
307
308                 switch (cfg_control.type) {
309                 case PCI_CONFIG_ALLOW:
310                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
311                                 return pci_ivshmem_cfg_write(device,
312                                                 address / 4, mask, value);
313                         return PCI_ACCESS_PERFORM;
314                 case PCI_CONFIG_RDONLY:
315                         return PCI_ACCESS_DONE;
316                 default:
317                         return PCI_ACCESS_REJECT;
318                 }
319         }
320
321         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
322                 return pci_ivshmem_cfg_write(device, address / 4, mask, value);
323
324         cap = pci_find_capability(device, address);
325         if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
326                 return PCI_ACCESS_REJECT;
327
328         cap_offs = address - cap->start;
329         if (cap->id == PCI_CAP_MSI &&
330             (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
331                 device->msi_registers.raw[cap_offs / 4] &= ~mask;
332                 device->msi_registers.raw[cap_offs / 4] |= value;
333
334                 if (arch_pci_update_msi(device, cap) < 0)
335                         return PCI_ACCESS_REJECT;
336
337                 /*
338                  * Address and data words are emulated, the control word is
339                  * written as-is.
340                  */
341                 if (cap_offs >= 4)
342                         return PCI_ACCESS_DONE;
343         } else if (cap->id == PCI_CAP_MSIX && cap_offs < 4) {
344                 device->msix_registers.raw &= ~mask;
345                 device->msix_registers.raw |= value;
346
347                 if (pci_update_msix(device, cap) < 0)
348                         return PCI_ACCESS_REJECT;
349         }
350
351         return PCI_ACCESS_PERFORM;
352 }
353
354 /**
355  * Initialization of PCI subsystem.
356  *
357  * @return 0 on success, negative error code otherwise.
358  */
359 int pci_init(void)
360 {
361         int err;
362
363         mmcfg_start = system_config->platform_info.x86.mmconfig_base;
364         if (mmcfg_start != 0) {
365                 end_bus = system_config->platform_info.x86.mmconfig_end_bus;
366                 mmcfg_size = (end_bus + 1) * 256 * 4096;
367
368                 pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
369                 if (!pci_space)
370                         return trace_error(-ENOMEM);
371
372                 err = paging_create(&hv_paging_structs, mmcfg_start,
373                                     mmcfg_size, (unsigned long)pci_space,
374                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
375                                     PAGING_NON_COHERENT);
376                 if (err)
377                         return err;
378         }
379
380         return pci_cell_init(&root_cell);
381 }
382
383 static enum mmio_result pci_msix_access_handler(void *arg,
384                                                 struct mmio_access *mmio)
385 {
386         unsigned int dword =
387                 (mmio->address % sizeof(union pci_msix_vector)) >> 2;
388         struct pci_device *device = arg;
389         unsigned int index;
390
391         /* access must be DWORD-aligned */
392         if (mmio->address & 0x3)
393                 goto invalid_access;
394
395         index = mmio->address / sizeof(union pci_msix_vector);
396
397         if (mmio->is_write) {
398                 /*
399                  * The PBA may share a page with the MSI-X table. Writing to
400                  * PBA entries is undefined. We declare it as invalid.
401                  */
402                 if (index >= device->info->num_msix_vectors)
403                         goto invalid_access;
404
405                 device->msix_vectors[index].raw[dword] = mmio->value;
406                 if (arch_pci_update_msix_vector(device, index) < 0)
407                         goto invalid_access;
408
409                 if (dword == MSIX_VECTOR_CTRL_DWORD)
410                         mmio_write32(&device->msix_table[index].raw[dword],
411                                      mmio->value);
412         } else {
413                 if (index >= device->info->num_msix_vectors ||
414                     dword == MSIX_VECTOR_CTRL_DWORD)
415                         mmio->value = mmio_read32(((void *)device->msix_table) +
416                                                   mmio->address);
417                 else
418                         mmio->value = device->msix_vectors[index].raw[dword];
419         }
420         return MMIO_HANDLED;
421
422 invalid_access:
423         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
424                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
425         return MMIO_ERROR;
426 }
427
428 static enum mmio_result pci_mmconfig_access_handler(void *arg,
429                                                     struct mmio_access *mmio)
430 {
431         u32 reg_addr = mmio->address & 0xfff;
432         struct pci_device *device;
433         enum pci_access result;
434         u32 val;
435
436         /* access must be DWORD-aligned */
437         if (reg_addr & 0x3)
438                 goto invalid_access;
439
440         device = pci_get_assigned_device(this_cell(), mmio->address >> 12);
441
442         if (mmio->is_write) {
443                 result = pci_cfg_write_moderate(device, reg_addr, 4,
444                                                 mmio->value);
445                 if (result == PCI_ACCESS_REJECT)
446                         goto invalid_access;
447                 if (result == PCI_ACCESS_PERFORM)
448                         mmio_write32(pci_space + mmio->address, mmio->value);
449         } else {
450                 result = pci_cfg_read_moderate(device, reg_addr, 4, &val);
451                 if (result == PCI_ACCESS_PERFORM)
452                         mmio->value = mmio_read32(pci_space + mmio->address);
453                 else
454                         mmio->value = val;
455         }
456
457         return MMIO_HANDLED;
458
459 invalid_access:
460         panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
461                      "reg: %x\n", PCI_BDF_PARAMS(mmio->address >> 12),
462                      reg_addr);
463         return MMIO_ERROR;
464
465 }
466
467 /**
468  * Retrieve number of enabled MSI vector of a device.
469  * @param device        The device to be examined.
470  *
471  * @return number of vectors.
472  */
473 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
474 {
475         return device->msi_registers.msg32.enable ?
476                 1 << device->msi_registers.msg32.mme : 0;
477 }
478
479 static void pci_save_msi(struct pci_device *device,
480                          const struct jailhouse_pci_capability *cap)
481 {
482         u16 bdf = device->info->bdf;
483         unsigned int n;
484
485         for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
486                 device->msi_registers.raw[n] =
487                         pci_read_config(bdf, cap->start + n * 4, 4);
488 }
489
490 static void pci_restore_msi(struct pci_device *device,
491                             const struct jailhouse_pci_capability *cap)
492 {
493         unsigned int n;
494
495         for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
496                 pci_write_config(device->info->bdf, cap->start + n * 4,
497                                  device->msi_registers.raw[n], 4);
498 }
499
500 static void pci_suppress_msix(struct pci_device *device,
501                               const struct jailhouse_pci_capability *cap,
502                               bool suppressed)
503 {
504         union pci_msix_registers regs = device->msix_registers;
505
506         if (suppressed)
507                 regs.fmask = 1;
508         pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
509 }
510
511 static void pci_save_msix(struct pci_device *device,
512                           const struct jailhouse_pci_capability *cap)
513 {
514         unsigned int n, r;
515
516         device->msix_registers.raw =
517                 pci_read_config(device->info->bdf, cap->start, 4);
518
519         for (n = 0; n < device->info->num_msix_vectors; n++)
520                 for (r = 0; r < 4; r++)
521                         device->msix_vectors[n].raw[r] =
522                                 mmio_read32(&device->msix_table[n].raw[r]);
523 }
524
525 static void pci_restore_msix(struct pci_device *device,
526                              const struct jailhouse_pci_capability *cap)
527 {
528         unsigned int n, r;
529
530         for (n = 0; n < device->info->num_msix_vectors; n++)
531                 /* only restore address/data, control is write-through */
532                 for (r = 0; r < 3; r++)
533                         mmio_write32(&device->msix_table[n].raw[r],
534                                      device->msix_vectors[n].raw[r]);
535         pci_suppress_msix(device, cap, false);
536 }
537
538 /**
539  * Prepare the handover of PCI devices to Jailhouse or back to Linux.
540  */
541 void pci_prepare_handover(void)
542 {
543         const struct jailhouse_pci_capability *cap;
544         struct pci_device *device;
545         unsigned int n;
546
547         if (!root_cell.pci_devices)
548                 return;
549
550         for_each_configured_pci_device(device, &root_cell) {
551                 if (device->cell)
552                         for_each_pci_cap(cap, device, n)
553                                 if (cap->id == PCI_CAP_MSI)
554                                         arch_pci_suppress_msi(device, cap);
555                                 else if (cap->id == PCI_CAP_MSIX)
556                                         pci_suppress_msix(device, cap, true);
557         }
558 }
559
560 static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
561 {
562         unsigned int n, pages, size = device->info->msix_region_size;
563         int err;
564
565         printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
566                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
567
568         for (n = 0; n < PCI_NUM_BARS; n ++)
569                 device->bar[n] = pci_read_config(device->info->bdf,
570                                                  PCI_CFG_BAR + n * 4, 4);
571
572         err = arch_pci_add_physical_device(cell, device);
573
574         if (!err && device->info->msix_address) {
575                 device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
576                 if (!device->msix_table) {
577                         err = trace_error(-ENOMEM);
578                         goto error_remove_dev;
579                 }
580
581                 err = paging_create(&hv_paging_structs,
582                                     device->info->msix_address, size,
583                                     (unsigned long)device->msix_table,
584                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
585                                     PAGING_NON_COHERENT);
586                 if (err)
587                         goto error_page_free;
588
589                 if (device->info->num_msix_vectors > PCI_EMBEDDED_MSIX_VECTS) {
590                         pages = PAGES(sizeof(union pci_msix_vector) *
591                                       device->info->num_msix_vectors);
592                         device->msix_vectors = page_alloc(&mem_pool, pages);
593                         if (!device->msix_vectors) {
594                                 err = -ENOMEM;
595                                 goto error_unmap_table;
596                         }
597                 }
598
599                 mmio_region_register(cell, device->info->msix_address, size,
600                                      pci_msix_access_handler, device);
601         }
602         return err;
603
604 error_unmap_table:
605         /* cannot fail, destruction of same size as construction */
606         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
607                        size, PAGING_NON_COHERENT);
608 error_page_free:
609         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
610 error_remove_dev:
611         arch_pci_remove_physical_device(device);
612         return err;
613 }
614
615 static void pci_remove_physical_device(struct pci_device *device)
616 {
617         unsigned int size = device->info->msix_region_size;
618
619         printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
620                PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
621         arch_pci_remove_physical_device(device);
622         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
623                          PCI_CMD_INTX_OFF, 2);
624
625         if (!device->msix_table)
626                 return;
627
628         /* cannot fail, destruction of same size as construction */
629         paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
630                        size, PAGING_NON_COHERENT);
631         page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
632
633         if (device->msix_vectors != device->msix_vector_array)
634                 page_free(&mem_pool, device->msix_vectors,
635                           PAGES(sizeof(union pci_msix_vector) *
636                                 device->info->num_msix_vectors));
637
638         mmio_region_unregister(device->cell, device->info->msix_address);
639 }
640
641 /**
642  * Perform PCI-specific initialization for a new cell.
643  * @param cell  Cell to be initialized.
644  *
645  * @return 0 on success, negative error code otherwise.
646  *
647  * @see pci_cell_exit
648  */
649 int pci_cell_init(struct cell *cell)
650 {
651         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
652                                            sizeof(struct pci_device));
653         const struct jailhouse_pci_device *dev_infos =
654                 jailhouse_cell_pci_devices(cell->config);
655         const struct jailhouse_pci_capability *cap;
656         struct pci_device *device, *root_device;
657         unsigned int ndev, ncap;
658         int err;
659
660         if (pci_space)
661                 mmio_region_register(cell, mmcfg_start, mmcfg_size,
662                                      pci_mmconfig_access_handler, NULL);
663
664         if (cell->config->num_pci_devices == 0)
665                 return 0;
666
667         cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
668         if (!cell->pci_devices)
669                 return -ENOMEM;
670
671         /*
672          * We order device states in the same way as the static information
673          * so that we can use the index of the latter to find the former. For
674          * the other way around and for obtaining the owner cell, we use more
675          * handy pointers. The cell pointer also encodes active ownership.
676          */
677         for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
678                 device = &cell->pci_devices[ndev];
679                 device->info = &dev_infos[ndev];
680                 device->msix_vectors = device->msix_vector_array;
681
682                 if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
683                         err = pci_ivshmem_init(cell, device);
684                         if (err)
685                                 goto error;
686
687                         device->cell = cell;
688
689                         continue;
690                 }
691
692                 root_device = pci_get_assigned_device(&root_cell,
693                                                       dev_infos[ndev].bdf);
694                 if (root_device) {
695                         pci_remove_physical_device(root_device);
696                         root_device->cell = NULL;
697                 }
698
699                 err = pci_add_physical_device(cell, device);
700                 if (err)
701                         goto error;
702
703                 device->cell = cell;
704
705                 for_each_pci_cap(cap, device, ncap)
706                         if (cap->id == PCI_CAP_MSI)
707                                 pci_save_msi(device, cap);
708                         else if (cap->id == PCI_CAP_MSIX)
709                                 pci_save_msix(device, cap);
710         }
711
712         if (cell == &root_cell)
713                 pci_prepare_handover();
714
715         return 0;
716 error:
717         pci_cell_exit(cell);
718         return err;
719 }
720
721 static void pci_return_device_to_root_cell(struct pci_device *device)
722 {
723         struct pci_device *root_device;
724
725         for_each_configured_pci_device(root_device, &root_cell)
726                 if (root_device->info->domain == device->info->domain &&
727                     root_device->info->bdf == device->info->bdf) {
728                         if (pci_add_physical_device(&root_cell,
729                                                     root_device) < 0)
730                                 printk("WARNING: Failed to re-assign PCI "
731                                        "device to root cell\n");
732                         else
733                                 root_device->cell = &root_cell;
734                         break;
735                 }
736 }
737
738 /**
739  * Perform PCI-specific cleanup for a cell under destruction.
740  * @param cell  Cell to be destructed.
741  *
742  * @see pci_cell_init
743  */
744 void pci_cell_exit(struct cell *cell)
745 {
746         unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
747                                            sizeof(struct pci_device));
748         struct pci_device *device;
749
750         /*
751          * Do not destroy the root cell. We will shut down the complete
752          * hypervisor instead.
753          */
754         if (cell == &root_cell)
755                 return;
756
757         for_each_configured_pci_device(device, cell)
758                 if (device->cell) {
759                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
760                                 pci_ivshmem_exit(device);
761                         } else {
762                                 pci_remove_physical_device(device);
763                                 pci_return_device_to_root_cell(device);
764                         }
765                 }
766
767         page_free(&mem_pool, cell->pci_devices, devlist_pages);
768 }
769
770 /**
771  * Apply PCI-specific configuration changes.
772  * @param cell_added_removed    Cell that was added or removed to/from the
773  *                              system or NULL.
774  *
775  * @see arch_config_commit
776  */
777 void pci_config_commit(struct cell *cell_added_removed)
778 {
779         const struct jailhouse_pci_capability *cap;
780         struct pci_device *device;
781         unsigned int n;
782         int err = 0;
783
784         if (!cell_added_removed)
785                 return;
786
787         for_each_configured_pci_device(device, &root_cell)
788                 if (device->cell) {
789                         for_each_pci_cap(cap, device, n) {
790                                 if (cap->id == PCI_CAP_MSI) {
791                                         err = arch_pci_update_msi(device, cap);
792                                 } else if (cap->id == PCI_CAP_MSIX) {
793                                         err = pci_update_msix(device, cap);
794                                         pci_suppress_msix(device, cap, false);
795                                 }
796                                 if (err)
797                                         goto error;
798                         }
799                         if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
800                                 err = pci_ivshmem_update_msix(device);
801                                 if (err) {
802                                         cap = NULL;
803                                         goto error;
804                                 }
805                         }
806                 }
807         return;
808
809 error:
810         panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
811                      PCI_BDF_PARAMS(device->info->bdf));
812         if (cap)
813                 panic_printk(", cap %d\n", cap->id);
814         else
815                 panic_printk("\n");
816         panic_stop();
817 }
818
819 /**
820  * Shut down the PCI layer during hypervisor deactivation.
821  */
822 void pci_shutdown(void)
823 {
824         const struct jailhouse_pci_capability *cap;
825         struct pci_device *device;
826         unsigned int n;
827
828         if (!root_cell.pci_devices)
829                 return;
830
831         for_each_configured_pci_device(device, &root_cell) {
832                 if (!device->cell)
833                         continue;
834
835                 for_each_pci_cap(cap, device, n)
836                         if (cap->id == PCI_CAP_MSI)
837                                 pci_restore_msi(device, cap);
838                         else if (cap->id == PCI_CAP_MSIX)
839                                 pci_restore_msix(device, cap);
840
841                 if (device->cell != &root_cell)
842                         pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
843                                          PCI_CMD_INTX_OFF, 2);
844         }
845 }