2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2014
7 * Henning Schild <henning.schild@siemens.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 /** @addtogroup PCI-IVSHMEM
14 * Inter Cell communication using a virtual PCI device. The device provides
15 * shared memory and interrupts based on MSI-X.
17 * The implementation in Jailhouse provides a shared memory device between
18 * exactly 2 cells. The link between the two PCI devices is established by
19 * choosing the same BDF, memory location, and memory size.
22 #include <jailhouse/control.h>
23 #include <jailhouse/pci.h>
24 #include <jailhouse/printk.h>
25 #include <jailhouse/string.h>
26 #include <jailhouse/utils.h>
27 #include <jailhouse/processor.h>
30 #define VIRTIO_VENDOR_ID 0x1af4
31 #define IVSHMEM_DEVICE_ID 0x1110
33 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
34 * the location and the size are stored here. A memory-BAR size of 0 will tell
35 * device drivers that they are dealing with a special ivshmem device */
36 #define IVSHMEM_CFG_SHMEM_PTR 0x40
37 #define IVSHMEM_CFG_SHMEM_SZ 0x48
39 #define IVSHMEM_MSIX_VECTORS 1
40 #define IVSHMEM_CFG_MSIX_CAP 0x50
42 #define IVSHMEM_REG_IVPOS 8
43 #define IVSHMEM_REG_DBELL 12
45 #define IVSHMEM_CFG_SIZE (IVSHMEM_CFG_MSIX_CAP + 12)
52 struct pci_ivshmem_endpoint {
53 u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
55 struct virt_pci_bar bars[3];
56 struct pci_device *device;
57 struct pci_ivshmem_endpoint *remote;
58 struct apic_irq_message irq_msg;
61 struct pci_ivshmem_data {
62 struct pci_ivshmem_endpoint eps[2];
63 struct pci_ivshmem_data *next;
66 static struct pci_ivshmem_data *ivshmem_list;
68 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
69 [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70 [0x04/4] = (PCI_STS_CAPS << 16),
71 [0x08/4] = PCI_DEV_CLASS_MEM << 24,
72 [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
73 [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
74 /* MSI-X capability */
75 [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
76 | (0x00 << 8) | PCI_CAP_MSIX,
77 [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
78 [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
82 static const struct virt_pci_bar default_bars[3] = {
84 .flags = PCI_BAR_64BIT,
88 /* in jailhouse we leave this BAR empty, the shared memory
89 * location and size are in our custom registers
90 * IVSHMEM_CFG_SHMEM */
92 { /* used for MSI-X vectors */
93 .flags = PCI_BAR_64BIT,
94 .sz = ((0x18 * IVSHMEM_MSIX_VECTORS) + 15) & ~0xf,
98 static u32 ivshmem_cfg_read32(struct pci_ivshmem_endpoint *ive, u8 reg)
100 return ive->cspace[reg / 4];
103 static u64 ivshmem_cfg_read64(struct pci_ivshmem_endpoint *ive, u8 reg)
105 return ((u64)ivshmem_cfg_read32(ive, reg + 4) << 32) |
106 ivshmem_cfg_read32(ive, reg);
109 static u16 ivshmem_cfg_read16(struct pci_ivshmem_endpoint *ive, u8 reg)
111 unsigned int bias = reg % 4;
113 return (u16)(ivshmem_cfg_read32(ive, reg - bias) >> (bias * 8));
116 static u8 ivshmem_cfg_read8(struct pci_ivshmem_endpoint *ive, u8 reg)
118 unsigned int bias = reg % 4;
120 return (u8)(ivshmem_cfg_read32(ive, reg - bias) >> (bias * 8));
123 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
125 union pci_msix_registers c;
128 c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
129 if (!c.enable || c.fmask)
133 if (ive->device->msix_vectors[0].masked)
137 if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
143 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
145 union x86_msi_vector msi = {
146 .raw.address = ive->device->msix_vectors[0].address,
147 .raw.data = ive->device->msix_vectors[0].data,
149 struct apic_irq_message irq_msg;
151 /* before doing anything mark the cached irq_msg as invalid,
152 * on success it will be valid on return. */
153 ive->irq_msg.valid = 0;
156 if (ivshmem_is_msix_masked(ive))
159 irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
163 if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
164 printk("WARNING: ivshmem MSI-X target outside of "
165 "cell \"%s\" device %02x:%02x.%x\n",
166 ive->device->cell->config->name,
167 PCI_BDF_PARAMS(ive->device->info->bdf));
170 /* now copy the whole struct into our cache and mark the cache
171 * valid at the end */
173 ive->irq_msg = irq_msg;
175 ive->irq_msg.valid = 1;
181 * update the command register
182 * note that we only accept writes to two flags
184 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
186 u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
189 if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
190 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
191 err = ivshmem_update_msix(ive);
196 *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
200 static void ivshmem_write_bar(struct pci_ivshmem_endpoint *ive, u8 reg, u32 val)
202 int barn = (reg - PCI_CFG_BAR) / 8;
203 struct virt_pci_bar *bar = &(ive->bars[barn]);
207 newval = val & ((~(bar->sz - 1)) >> 32);
209 newval = (val & (~(bar->sz - 1) & ~0xf)) | (bar->flags & 0xf);
211 ive->cspace[reg / 4] = newval;
214 static int ivshmem_msix_mmio(struct pci_ivshmem_endpoint *ive, bool is_write,
215 u32 offset, u32 *value)
217 u32 *msix_table = (u32 *)ive->device->msix_vectors;
223 if (offset >= 0x10 * IVSHMEM_MSIX_VECTORS) {
233 msix_table[offset/4] = *value;
234 if (ivshmem_update_msix(ive))
237 *value = msix_table[offset/4];
244 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
246 struct pci_ivshmem_endpoint *remote = ive->remote;
247 struct apic_irq_message irq_msg;
252 /* get a copy of the struct before using it, the read barrier makes
253 * sure the copy is consistent */
254 irq_msg = remote->irq_msg;
255 memory_load_barrier();
257 apic_send_irq(irq_msg);
260 static int ivshmem_register_mmio(struct pci_ivshmem_endpoint *ive,
261 bool is_write, u32 offset, u32 *value)
263 /* IVPosition, ro and always returns 0 */
264 if (offset == IVSHMEM_REG_IVPOS && !is_write) {
269 if (offset == IVSHMEM_REG_DBELL) {
271 ivshmem_write_doorbell(ive);
280 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
282 union pci_msix_registers *p = (union pci_msix_registers *)&val;
283 union pci_msix_registers newval = {
284 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
287 newval.enable = p->enable;
288 newval.fmask = p->fmask;
289 if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
290 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
291 return ivshmem_update_msix(ive);
296 static enum pci_access ivshmem_cfg_write32(struct pci_ivshmem_endpoint *ive,
300 case PCI_CFG_COMMAND:
301 if(ivshmem_write_command(ive, val & 0xffff))
302 return PCI_ACCESS_REJECT;
304 case PCI_CFG_BAR ... (PCI_CFG_BAR + 3*8):
305 ivshmem_write_bar(ive, reg, val);
307 case IVSHMEM_CFG_MSIX_CAP:
308 if (ivshmem_write_msix_control(ive, val))
309 return PCI_ACCESS_REJECT;
311 return PCI_ACCESS_DONE;
314 static enum pci_access ivshmem_cfg_write16(struct pci_ivshmem_endpoint *ive,
319 shift = (reg % 4) * 8;
320 row = ive->cspace[reg / 4];
321 row &= ~(BYTE_MASK(2) << shift);
324 return ivshmem_cfg_write32(ive, reg - (reg % 4), row);
327 static enum pci_access ivshmem_cfg_write8(struct pci_ivshmem_endpoint *ive,
333 row = ive->cspace[reg / 4];
335 rowp[(reg % 4)] = val;
337 return ivshmem_cfg_write32(ive, reg - (reg % 4), row);
341 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
344 struct pci_ivshmem_data **ivp, *iv;
347 for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
349 bdf2 = iv->eps[0].device->info->bdf;
350 if (d->info->bdf == bdf2) {
351 if (iv->eps[0].device == d) {
356 if (iv->eps[1].device == d) {
369 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
370 struct pci_device *d,
371 const struct jailhouse_memory *mem,
374 struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
375 struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
377 memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
378 memcpy(ive->bars, &default_bars, sizeof(default_bars));
380 ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
381 ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
382 ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
383 ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
386 if (remote->device) {
387 ive->remote = remote;
388 remote->remote = ive;
389 ive->ivpos = (remote->ivpos + 1) % 2;
391 ive->ivpos = cellnum;
393 remote->remote = NULL;
395 d->ivshmem_endpoint = ive;
398 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
400 struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
401 struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
403 ive->device->ivshmem_endpoint = NULL;
406 remote->remote = NULL;
410 * Handler for MMIO-accesses to this virtual PCI devices memory. Both for the
411 * BAR containing the registers, and the MSI-X BAR.
412 * @param cell The cell that issued the access.
413 * @param is_write True if write access.
414 * @param addr Address accessed.
415 * @param value Pointer to value for reading/writing.
417 * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
419 * @see pci_mmio_access_handler
421 int ivshmem_mmio_access_handler(const struct cell *cell, bool is_write,
422 u64 addr, u32 *value)
424 struct pci_ivshmem_endpoint *ive;
425 struct pci_device *device;
426 u64 mem_start, mem_sz;
428 for (device = cell->virtual_device_list; device;
429 device = device->next_virtual_device) {
430 ive = device->ivshmem_endpoint;
433 if ((ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MEM) == 0)
436 /* register BAR access */
437 mem_start = ivshmem_cfg_read64(ive, PCI_CFG_BAR) & ~0xf;
438 mem_sz = ive->bars[0].sz;
439 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
440 return ivshmem_register_mmio(ive, is_write,
444 /* MSI-X BAR access */
445 mem_start = ivshmem_cfg_read64(ive, PCI_CFG_BAR + 2 * 8) & ~0xf;
446 mem_sz = ive->bars[2].sz;
447 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
448 return ivshmem_msix_mmio(ive, is_write,
449 addr - mem_start, value);
456 * Handler for MMIO-write-accesses to PCI config space of this virtual device.
457 * @param dev The device that access should be performed on.
458 * @param address Config space address accessed.
459 * @param sz The amount of bytes to write.
460 * @param value The value to write to the config space.
462 * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
464 * @see pci_cfg_write_moderate
466 enum pci_access pci_ivshmem_cfg_write(struct pci_device *dev, u16 address,
469 struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
471 if (address > (sizeof(default_cspace) - sz))
472 return PCI_ACCESS_REJECT;
475 return PCI_ACCESS_REJECT;
479 return ivshmem_cfg_write8(ive, address, (u8)value);
481 return ivshmem_cfg_write16(ive, address, (u16)value);
483 return ivshmem_cfg_write32(ive, address, value);
485 return PCI_ACCESS_REJECT;
490 * Handler for MMIO-read-accesses to PCI config space of this virtual device.
491 * @param dev The device that access should be performed on.
492 * @param address Config space address accessed.
493 * @param sz The amount of bytes to read.
494 * @param value Pointer to the return value.
496 * @return PCI_ACCESS_DONE.
498 * @see pci_cfg_read_moderate
500 enum pci_access pci_ivshmem_cfg_read(struct pci_device *dev, u16 address,
503 struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
505 if (address > (sizeof(default_cspace) - sz))
512 *value = (u32)ivshmem_cfg_read8(ive, address);
515 *value = (u32)ivshmem_cfg_read16(ive, address);
518 *value = ivshmem_cfg_read32(ive, address);
523 return PCI_ACCESS_DONE;
527 /* the caller can not deal with PCI_ACCESS_REJECT for reads */
528 return PCI_ACCESS_DONE;
532 * Update cached MSI-X state of the given ivshmem device.
533 * @param dev The device to be updated.
535 * @return 0 on success, negative error code otherwise.
537 int pci_ivshmem_update_msix(struct pci_device *dev)
539 return ivshmem_update_msix(dev->ivshmem_endpoint);
543 * Register a new ivshmem device.
544 * @param cell The cell the device should be attached to.
545 * @param dev The device to be registered.
547 * @return 0 on success, negative error code otherwise.
549 int pci_ivshmem_init(struct cell *cell, struct pci_device *dev)
551 const struct jailhouse_memory *mem, *mem0;
552 struct pci_ivshmem_data **ivp;
553 struct pci_device *dev0;
555 if (dev->info->num_msix_vectors != 1)
556 return trace_error(-EINVAL);
558 if (dev->info->shmem_region >= cell->config->num_memory_regions)
559 return trace_error(-EINVAL);
561 mem = jailhouse_cell_mem_regions(cell->config)
562 + dev->info->shmem_region;
563 ivp = ivshmem_find(dev, NULL);
565 dev0 = (*ivp)->eps[0].device;
566 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
567 dev0->info->shmem_region;
569 /* we already have a datastructure, connect second endpoint */
570 if ((mem0->phys_start == mem->phys_start) &&
571 (mem0->size == mem->size)) {
572 if ((*ivp)->eps[1].device)
573 return trace_error(-EBUSY);
574 ivshmem_connect_cell(*ivp, dev, mem, 1);
575 printk("Virtual PCI connection established "
576 "\"%s\" <--> \"%s\"\n",
577 cell->config->name, dev0->cell->config->name);
582 /* this is the first endpoint, allocate a new datastructure */
583 for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
585 *ivp = page_alloc(&mem_pool, 1);
588 ivshmem_connect_cell(*ivp, dev, mem, 0);
592 printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
593 PCI_BDF_PARAMS(dev->info->bdf), cell->config->name);
599 * Unregister a ivshmem device, typically when the corresponding cell exits.
600 * @param dev The device to be stopped.
603 void pci_ivshmem_exit(struct pci_device *dev)
605 struct pci_ivshmem_data **ivp, *iv;
608 ivp = ivshmem_find(dev, &cellnum);
614 if (iv->eps[0].device == dev) {
615 if (!iv->eps[1].device) {
617 page_free(&mem_pool, iv, 1);
620 iv->eps[0] = iv->eps[1];
622 ivshmem_disconnect_cell(iv, 1);