2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2014, 2015
7 * Henning Schild <henning.schild@siemens.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 /** @addtogroup PCI-IVSHMEM
14 * Inter Cell communication using a virtual PCI device. The device provides
15 * shared memory and interrupts based on MSI-X.
17 * The implementation in Jailhouse provides a shared memory device between
18 * exactly 2 cells. The link between the two PCI devices is established by
19 * choosing the same BDF, memory location, and memory size.
22 #include <jailhouse/control.h>
23 #include <jailhouse/mmio.h>
24 #include <jailhouse/pci.h>
25 #include <jailhouse/printk.h>
26 #include <jailhouse/string.h>
27 #include <jailhouse/utils.h>
28 #include <jailhouse/processor.h>
31 #define VIRTIO_VENDOR_ID 0x1af4
32 #define IVSHMEM_DEVICE_ID 0x1110
34 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
35 * the location and the size are stored here. A memory-BAR size of 0 will tell
36 * device drivers that they are dealing with a special ivshmem device */
37 #define IVSHMEM_CFG_SHMEM_PTR 0x40
38 #define IVSHMEM_CFG_SHMEM_SZ 0x48
40 #define IVSHMEM_MSIX_VECTORS 1
41 #define IVSHMEM_CFG_MSIX_CAP 0x50
43 #define IVSHMEM_REG_IVPOS 8
44 #define IVSHMEM_REG_DBELL 12
46 #define IVSHMEM_CFG_SIZE (IVSHMEM_CFG_MSIX_CAP + 12)
48 #define IVSHMEM_BAR0_SIZE 256
49 #define IVSHMEM_BAR4_SIZE ((0x18 * IVSHMEM_MSIX_VECTORS + 0xf) & ~0xf)
51 struct pci_ivshmem_endpoint {
52 u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
56 struct pci_device *device;
57 struct pci_ivshmem_endpoint *remote;
58 struct apic_irq_message irq_msg;
61 struct pci_ivshmem_data {
62 struct pci_ivshmem_endpoint eps[2];
63 struct pci_ivshmem_data *next;
66 static struct pci_ivshmem_data *ivshmem_list;
68 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
69 [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70 [0x04/4] = (PCI_STS_CAPS << 16),
71 [0x08/4] = PCI_DEV_CLASS_MEM << 24,
72 [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
73 [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
74 /* MSI-X capability */
75 [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
76 | (0x00 << 8) | PCI_CAP_MSIX,
77 [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
78 [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
82 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
84 struct pci_ivshmem_endpoint *remote = ive->remote;
85 struct apic_irq_message irq_msg;
90 /* get a copy of the struct before using it, the read barrier makes
91 * sure the copy is consistent */
92 irq_msg = remote->irq_msg;
93 memory_load_barrier();
95 apic_send_irq(irq_msg);
98 static enum mmio_result ivshmem_register_mmio(void *arg,
99 struct mmio_access *mmio)
101 struct pci_ivshmem_endpoint *ive = arg;
103 /* read-only IVPosition */
104 if (mmio->address == IVSHMEM_REG_IVPOS && !mmio->is_write) {
105 mmio->value = ive->ivpos;
109 if (mmio->address == IVSHMEM_REG_DBELL) {
111 ivshmem_write_doorbell(ive);
116 panic_printk("FATAL: Invalid ivshmem register %s, number %02x\n",
117 mmio->is_write ? "write" : "read", mmio->address);
121 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
123 union pci_msix_registers c;
126 c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
127 if (!c.enable || c.fmask)
131 if (ive->device->msix_vectors[0].masked)
135 if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
141 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
143 union x86_msi_vector msi = {
144 .raw.address = ive->device->msix_vectors[0].address,
145 .raw.data = ive->device->msix_vectors[0].data,
147 struct apic_irq_message irq_msg;
149 /* before doing anything mark the cached irq_msg as invalid,
150 * on success it will be valid on return. */
151 ive->irq_msg.valid = 0;
154 if (ivshmem_is_msix_masked(ive))
157 irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
161 if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
162 panic_printk("FATAL: ivshmem MSI-X target outside of "
163 "cell \"%s\" device %02x:%02x.%x\n",
164 ive->device->cell->config->name,
165 PCI_BDF_PARAMS(ive->device->info->bdf));
168 /* now copy the whole struct into our cache and mark the cache
169 * valid at the end */
171 ive->irq_msg = irq_msg;
173 ive->irq_msg.valid = 1;
178 static enum mmio_result ivshmem_msix_mmio(void *arg, struct mmio_access *mmio)
180 struct pci_ivshmem_endpoint *ive = arg;
181 u32 *msix_table = (u32 *)ive->device->msix_vectors;
183 if (mmio->address % 4)
187 if (mmio->address >= 0x10 * IVSHMEM_MSIX_VECTORS) {
188 if (mmio->is_write) {
196 if (mmio->is_write) {
197 msix_table[mmio->address / 4] = mmio->value;
198 if (ivshmem_update_msix(ive))
201 mmio->value = msix_table[mmio->address / 4];
207 panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
208 "%02x:%02x.%x\n", PCI_BDF_PARAMS(ive->device->info->bdf));
213 * update the command register
214 * note that we only accept writes to two flags
216 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
218 u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
219 struct pci_device *device = ive->device;
222 if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
223 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
224 err = ivshmem_update_msix(ive);
229 if ((val & PCI_CMD_MEM) != (*cmd & PCI_CMD_MEM)) {
230 if (*cmd & PCI_CMD_MEM) {
231 mmio_region_unregister(device->cell, ive->bar0_address);
232 mmio_region_unregister(device->cell, ive->bar4_address);
234 if (val & PCI_CMD_MEM) {
235 ive->bar0_address = (*(u64 *)&device->bar[0]) & ~0xfL;
236 mmio_region_register(device->cell, ive->bar0_address,
238 ivshmem_register_mmio, ive);
240 ive->bar4_address = (*(u64 *)&device->bar[4]) & ~0xfL;
241 mmio_region_register(device->cell, ive->bar4_address,
243 ivshmem_msix_mmio, ive);
245 *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
251 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
253 union pci_msix_registers *p = (union pci_msix_registers *)&val;
254 union pci_msix_registers newval = {
255 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
258 newval.enable = p->enable;
259 newval.fmask = p->fmask;
260 if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
261 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
262 return ivshmem_update_msix(ive);
267 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
270 struct pci_ivshmem_data **ivp, *iv;
273 for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
275 bdf2 = iv->eps[0].device->info->bdf;
276 if (d->info->bdf == bdf2) {
277 if (iv->eps[0].device == d) {
282 if (iv->eps[1].device == d) {
295 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
296 struct pci_device *d,
297 const struct jailhouse_memory *mem,
300 struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
301 struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
303 d->bar[0] = PCI_BAR_64BIT;
304 d->bar[4] = PCI_BAR_64BIT;
306 memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
308 ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
309 ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
310 ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
311 ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
314 if (remote->device) {
315 ive->remote = remote;
316 remote->remote = ive;
317 ive->ivpos = (remote->ivpos + 1) % 2;
319 ive->ivpos = cellnum;
321 remote->remote = NULL;
323 d->ivshmem_endpoint = ive;
326 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
328 struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
329 struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
330 u16 cmd = *(u16 *)&ive->cspace[PCI_CFG_COMMAND / 4];
332 if (cmd & PCI_CMD_MEM) {
333 mmio_region_unregister(this_cell(), ive->bar0_address);
334 mmio_region_unregister(this_cell(), ive->bar4_address);
336 ive->device->ivshmem_endpoint = NULL;
339 remote->remote = NULL;
343 * Handler for MMIO-write-accesses to PCI config space of this virtual device.
344 * @param device The device that access should be performed on.
345 * @param row Config space DWORD row of the access.
346 * @param mask Mask selected the DWORD bytes to write.
347 * @param value DWORD to write to the config space.
349 * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
351 * @see pci_cfg_write_moderate
353 enum pci_access pci_ivshmem_cfg_write(struct pci_device *device,
354 unsigned int row, u32 mask, u32 value)
356 struct pci_ivshmem_endpoint *ive = device->ivshmem_endpoint;
358 if (row >= ARRAY_SIZE(default_cspace))
359 return PCI_ACCESS_REJECT;
361 value |= ive->cspace[row] & ~mask;
364 case PCI_CFG_COMMAND / 4:
365 if (ivshmem_write_command(ive, value))
366 return PCI_ACCESS_REJECT;
368 case IVSHMEM_CFG_MSIX_CAP / 4:
369 if (ivshmem_write_msix_control(ive, value))
370 return PCI_ACCESS_REJECT;
372 return PCI_ACCESS_DONE;
376 * Handler for MMIO-read-accesses to PCI config space of this virtual device.
377 * @param device The device that access should be performed on.
378 * @param address Config space address accessed.
379 * @param value Pointer to the return value.
381 * @return PCI_ACCESS_DONE.
383 * @see pci_cfg_read_moderate
385 enum pci_access pci_ivshmem_cfg_read(struct pci_device *device, u16 address,
388 struct pci_ivshmem_endpoint *ive = device->ivshmem_endpoint;
390 if (address < sizeof(default_cspace))
391 *value = ive->cspace[address / 4] >> ((address % 4) * 8);
394 return PCI_ACCESS_DONE;
398 * Update cached MSI-X state of the given ivshmem device.
399 * @param device The device to be updated.
401 * @return 0 on success, negative error code otherwise.
403 int pci_ivshmem_update_msix(struct pci_device *device)
405 return ivshmem_update_msix(device->ivshmem_endpoint);
409 * Register a new ivshmem device.
410 * @param cell The cell the device should be attached to.
411 * @param device The device to be registered.
413 * @return 0 on success, negative error code otherwise.
415 int pci_ivshmem_init(struct cell *cell, struct pci_device *device)
417 const struct jailhouse_memory *mem, *mem0;
418 struct pci_ivshmem_data **ivp;
419 struct pci_device *dev0;
421 if (device->info->num_msix_vectors != 1)
422 return trace_error(-EINVAL);
424 if (device->info->shmem_region >= cell->config->num_memory_regions)
425 return trace_error(-EINVAL);
427 mem = jailhouse_cell_mem_regions(cell->config)
428 + device->info->shmem_region;
429 ivp = ivshmem_find(device, NULL);
431 dev0 = (*ivp)->eps[0].device;
432 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
433 dev0->info->shmem_region;
435 /* we already have a datastructure, connect second endpoint */
436 if ((mem0->phys_start == mem->phys_start) &&
437 (mem0->size == mem->size)) {
438 if ((*ivp)->eps[1].device)
439 return trace_error(-EBUSY);
440 ivshmem_connect_cell(*ivp, device, mem, 1);
441 printk("Virtual PCI connection established "
442 "\"%s\" <--> \"%s\"\n",
443 cell->config->name, dev0->cell->config->name);
448 /* this is the first endpoint, allocate a new datastructure */
449 for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
451 *ivp = page_alloc(&mem_pool, 1);
454 ivshmem_connect_cell(*ivp, device, mem, 0);
457 printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
458 PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
464 * Unregister a ivshmem device, typically when the corresponding cell exits.
465 * @param device The device to be stopped.
468 void pci_ivshmem_exit(struct pci_device *device)
470 struct pci_ivshmem_data **ivp, *iv;
473 ivp = ivshmem_find(device, &cellnum);
479 ivshmem_disconnect_cell(iv, cellnum);
482 if (!iv->eps[1].device) {
484 page_free(&mem_pool, iv, 1);
487 iv->eps[0] = iv->eps[1];