]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci_ivshmem.c
core: ivshmem: Use generic BAR emulation
[jailhouse.git] / hypervisor / pci_ivshmem.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014
5  *
6  * Author:
7  *  Henning Schild <henning.schild@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 /** @addtogroup PCI-IVSHMEM
14  * Inter Cell communication using a virtual PCI device. The device provides
15  * shared memory and interrupts based on MSI-X.
16  *
17  * The implementation in Jailhouse provides a shared memory device between
18  * exactly 2 cells. The link between the two PCI devices is established by
19  * choosing the same BDF, memory location, and memory size.
20  */
21
22 #include <jailhouse/control.h>
23 #include <jailhouse/pci.h>
24 #include <jailhouse/printk.h>
25 #include <jailhouse/string.h>
26 #include <jailhouse/utils.h>
27 #include <jailhouse/processor.h>
28 #include <asm/apic.h>
29
30 #define VIRTIO_VENDOR_ID        0x1af4
31 #define IVSHMEM_DEVICE_ID       0x1110
32
33 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
34  * the location and the size are stored here. A memory-BAR size of 0 will tell
35  * device drivers that they are dealing with a special ivshmem device */
36 #define IVSHMEM_CFG_SHMEM_PTR   0x40
37 #define IVSHMEM_CFG_SHMEM_SZ    0x48
38
39 #define IVSHMEM_MSIX_VECTORS    1
40 #define IVSHMEM_CFG_MSIX_CAP    0x50
41
42 #define IVSHMEM_REG_IVPOS       8
43 #define IVSHMEM_REG_DBELL       12
44
45 #define IVSHMEM_CFG_SIZE        (IVSHMEM_CFG_MSIX_CAP + 12)
46
47 struct virt_pci_bar {
48         char flags;
49         u64 sz;
50 };
51
52 struct pci_ivshmem_endpoint {
53         u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
54         u32 ivpos;
55         struct virt_pci_bar bars[3];
56         struct pci_device *device;
57         struct pci_ivshmem_endpoint *remote;
58         struct apic_irq_message irq_msg;
59 };
60
61 struct pci_ivshmem_data {
62         struct pci_ivshmem_endpoint eps[2];
63         struct pci_ivshmem_data *next;
64 };
65
66 static struct pci_ivshmem_data *ivshmem_list;
67
68 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
69         [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70         [0x04/4] = (PCI_STS_CAPS << 16),
71         [0x08/4] = PCI_DEV_CLASS_MEM << 24,
72         [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
73         [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
74         /* MSI-X capability */
75         [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
76                                    | (0x00 << 8) | PCI_CAP_MSIX,
77         [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
78         [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
79                                            (PCI_CFG_BAR/8 + 2),
80 };
81
82 static const struct virt_pci_bar default_bars[3] = {
83         {
84                 .flags = PCI_BAR_64BIT,
85                 .sz = 256,
86         },
87         {
88                 /* in jailhouse we leave this BAR empty, the shared memory
89                  * location and size are in our custom registers
90                  * IVSHMEM_CFG_SHMEM */
91         },
92         {       /* used for MSI-X vectors */
93                 .flags = PCI_BAR_64BIT,
94                 .sz = ((0x18 * IVSHMEM_MSIX_VECTORS) + 15) & ~0xf,
95         }
96 };
97
98 static u32 ivshmem_cfg_read32(struct pci_ivshmem_endpoint *ive, u8 reg)
99 {
100         return ive->cspace[reg / 4];
101 }
102
103 static u16 ivshmem_cfg_read16(struct pci_ivshmem_endpoint *ive, u8 reg)
104 {
105         unsigned int bias = reg % 4;
106
107         return (u16)(ivshmem_cfg_read32(ive, reg - bias) >> (bias * 8));
108 }
109
110 static u8 ivshmem_cfg_read8(struct pci_ivshmem_endpoint *ive, u8 reg)
111 {
112         unsigned int bias = reg % 4;
113
114         return (u8)(ivshmem_cfg_read32(ive, reg - bias) >> (bias * 8));
115 }
116
117 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
118 {
119         union pci_msix_registers c;
120
121         /* global mask */
122         c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
123         if (!c.enable || c.fmask)
124                 return true;
125
126         /* local mask */
127         if (ive->device->msix_vectors[0].masked)
128                 return true;
129
130         /* PCI Bus Master */
131         if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
132                 return true;
133
134         return false;
135 }
136
137 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
138 {
139         union x86_msi_vector msi = {
140                 .raw.address = ive->device->msix_vectors[0].address,
141                 .raw.data = ive->device->msix_vectors[0].data,
142         };
143         struct apic_irq_message irq_msg;
144
145         /* before doing anything mark the cached irq_msg as invalid,
146          * on success it will be valid on return. */
147         ive->irq_msg.valid = 0;
148         memory_barrier();
149
150         if (ivshmem_is_msix_masked(ive))
151                 return 0;
152
153         irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
154         if (!irq_msg.valid)
155                 return 0;
156
157         if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
158                 panic_printk("FATAL: ivshmem MSI-X target outside of "
159                              "cell \"%s\" device %02x:%02x.%x\n",
160                              ive->device->cell->config->name,
161                              PCI_BDF_PARAMS(ive->device->info->bdf));
162                 return -EPERM;
163         }
164         /* now copy the whole struct into our cache and mark the cache
165          * valid at the end */
166         irq_msg.valid = 0;
167         ive->irq_msg = irq_msg;
168         memory_barrier();
169         ive->irq_msg.valid = 1;
170
171         return 0;
172 }
173
174 /**
175  * update the command register
176  * note that we only accept writes to two flags
177  */
178 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
179 {
180         u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
181         int err;
182
183         if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
184                 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
185                 err = ivshmem_update_msix(ive);
186                 if (err)
187                         return err;
188         }
189
190         *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
191         return 0;
192 }
193
194 static int ivshmem_msix_mmio(struct pci_ivshmem_endpoint *ive, bool is_write,
195                              u32 offset, u32 *value)
196 {
197         u32 *msix_table = (u32 *)ive->device->msix_vectors;
198
199         if (offset % 4)
200                 goto fail;
201
202         /* MSI-X PBA */
203         if (offset >= 0x10 * IVSHMEM_MSIX_VECTORS) {
204                 if (is_write) {
205                         goto fail;
206                 } else {
207                         *value = 0;
208                         return 1;
209                 }
210         /* MSI-X Table */
211         } else {
212                 if (is_write) {
213                         msix_table[offset/4] = *value;
214                         if (ivshmem_update_msix(ive))
215                                 return -1;
216                 } else {
217                         *value = msix_table[offset/4];
218                 }
219                 return 1;
220         }
221
222 fail:
223         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
224                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(ive->device->info->bdf));
225         return -1;
226 }
227
228 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
229 {
230         struct pci_ivshmem_endpoint *remote = ive->remote;
231         struct apic_irq_message irq_msg;
232
233         if (!remote)
234                 return;
235
236         /* get a copy of the struct before using it, the read barrier makes
237          * sure the copy is consistent */
238         irq_msg = remote->irq_msg;
239         memory_load_barrier();
240         if (irq_msg.valid)
241                 apic_send_irq(irq_msg);
242 }
243
244 static int ivshmem_register_mmio(struct pci_ivshmem_endpoint *ive,
245                                  bool is_write, u32 offset, u32 *value)
246 {
247         /* read-only IVPosition */
248         if (offset == IVSHMEM_REG_IVPOS && !is_write) {
249                 *value = ive->ivpos;
250                 return 1;
251         }
252
253         if (offset == IVSHMEM_REG_DBELL) {
254                 if (is_write) {
255                         ivshmem_write_doorbell(ive);
256                 } else {
257                         *value = 0;
258                 }
259                 return 1;
260         }
261         panic_printk("FATAL: Invalid ivshmem register %s, number %02x\n",
262                      is_write ? "write" : "read", offset);
263         return -1;
264 }
265
266 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
267 {
268         union pci_msix_registers *p = (union pci_msix_registers *)&val;
269         union pci_msix_registers newval = {
270                 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
271         };
272
273         newval.enable = p->enable;
274         newval.fmask = p->fmask;
275         if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
276                 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
277                 return ivshmem_update_msix(ive);
278         }
279         return 0;
280 }
281
282 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
283                                               int *cellnum)
284 {
285         struct pci_ivshmem_data **ivp, *iv;
286         u16 bdf2;
287
288         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
289                 iv = *ivp;
290                 bdf2 = iv->eps[0].device->info->bdf;
291                 if (d->info->bdf == bdf2) {
292                         if (iv->eps[0].device == d) {
293                                 if (cellnum)
294                                         *cellnum = 0;
295                                 return ivp;
296                         }
297                         if (iv->eps[1].device == d) {
298                                 if (cellnum)
299                                         *cellnum = 1;
300                                 return ivp;
301                         }
302                         if (!cellnum)
303                                 return ivp;
304                 }
305         }
306
307         return NULL;
308 }
309
310 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
311                                  struct pci_device *d,
312                                  const struct jailhouse_memory *mem,
313                                  int cellnum)
314 {
315         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
316         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
317
318         memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
319         memcpy(ive->bars, &default_bars, sizeof(default_bars));
320
321         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
322         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
323         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
324         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
325
326         ive->device = d;
327         if (remote->device) {
328                 ive->remote = remote;
329                 remote->remote = ive;
330                 ive->ivpos = (remote->ivpos + 1) % 2;
331         } else {
332                 ive->ivpos = cellnum;
333                 ive->remote = NULL;
334                 remote->remote = NULL;
335         }
336         d->ivshmem_endpoint = ive;
337 }
338
339 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
340 {
341         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
342         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
343
344         ive->device->ivshmem_endpoint = NULL;
345         ive->device = NULL;
346         ive->remote = NULL;
347         remote->remote = NULL;
348 }
349
350 /**
351  * Handler for MMIO-accesses to this virtual PCI devices memory. Both for the
352  * BAR containing the registers, and the MSI-X BAR.
353  * @param cell          The cell that issued the access.
354  * @param is_write      True if write access.
355  * @param addr          Address accessed.
356  * @param value         Pointer to value for reading/writing.
357  *
358  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
359  *
360  * @see pci_mmio_access_handler
361  */
362 int ivshmem_mmio_access_handler(const struct cell *cell, bool is_write,
363                                 u64 addr, u32 *value)
364 {
365         struct pci_ivshmem_endpoint *ive;
366         struct pci_device *device;
367         u64 mem_start, mem_sz;
368
369         for (device = cell->virtual_device_list; device;
370              device = device->next_virtual_device) {
371                 ive = device->ivshmem_endpoint;
372                 if ((ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MEM) == 0)
373                         continue;
374
375                 /* BAR0: registers */
376                 mem_start = (*(u64 *)&device->bar[0]) & ~0xfL;
377                 mem_sz = ive->bars[0].sz;
378                 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
379                         return ivshmem_register_mmio(ive, is_write,
380                                                      addr - mem_start,
381                                                      value);
382
383                 /* BAR4: MSI-X */
384                 mem_start = (*(u64 *)&device->bar[4]) & ~0xfL;
385                 mem_sz = ive->bars[2].sz;
386                 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
387                         return ivshmem_msix_mmio(ive, is_write,
388                                                  addr - mem_start, value);
389         }
390
391         return 0;
392 }
393
394 /**
395  * Handler for MMIO-write-accesses to PCI config space of this virtual device.
396  * @param device        The device that access should be performed on.
397  * @param row           Config space DWORD row of the access.
398  * @param mask          Mask selected the DWORD bytes to write.
399  * @param value         DWORD to write to the config space.
400  *
401  * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
402  *
403  * @see pci_cfg_write_moderate
404  */
405 enum pci_access pci_ivshmem_cfg_write(struct pci_device *dev, unsigned int row,
406                                       u32 mask, u32 value)
407 {
408         struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
409
410         if (row >= ARRAY_SIZE(default_cspace))
411                 return PCI_ACCESS_REJECT;
412
413         value |= ive->cspace[row] & ~mask;
414
415         switch (row) {
416         case PCI_CFG_COMMAND / 4:
417                 if (ivshmem_write_command(ive, value))
418                         return PCI_ACCESS_REJECT;
419                 break;
420         case IVSHMEM_CFG_MSIX_CAP / 4:
421                 if (ivshmem_write_msix_control(ive, value))
422                         return PCI_ACCESS_REJECT;
423         }
424         return PCI_ACCESS_DONE;
425 }
426
427 /**
428  * Handler for MMIO-read-accesses to PCI config space of this virtual device.
429  * @param dev           The device that access should be performed on.
430  * @param address       Config space address accessed.
431  * @param sz            The amount of bytes to read.
432  * @param value         Pointer to the return value.
433  *
434  * @return PCI_ACCESS_DONE.
435  *
436  * @see pci_cfg_read_moderate
437  */
438 enum pci_access pci_ivshmem_cfg_read(struct pci_device *dev, u16 address,
439                                      u8 sz, u32 *value)
440 {
441         struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
442
443         if (address > (sizeof(default_cspace) - sz))
444                 goto fail;
445
446         switch (sz) {
447         case 1:
448                 *value = (u32)ivshmem_cfg_read8(ive, address);
449                 break;
450         case 2:
451                 *value = (u32)ivshmem_cfg_read16(ive, address);
452                 break;
453         case 4:
454                 *value = ivshmem_cfg_read32(ive, address);
455                 break;
456         default:
457                 goto fail;
458         }
459         return PCI_ACCESS_DONE;
460
461 fail:
462         *value = -1;
463         /* the caller can not deal with PCI_ACCESS_REJECT for reads */
464         return PCI_ACCESS_DONE;
465 }
466
467 /**
468  * Update cached MSI-X state of the given ivshmem device.
469  * @param dev   The device to be updated.
470  *
471  * @return 0 on success, negative error code otherwise.
472  */
473 int pci_ivshmem_update_msix(struct pci_device *dev)
474 {
475         return ivshmem_update_msix(dev->ivshmem_endpoint);
476 }
477
478 /**
479  * Register a new ivshmem device.
480  * @param cell          The cell the device should be attached to.
481  * @param dev           The device to be registered.
482  *
483  * @return 0 on success, negative error code otherwise.
484  */
485 int pci_ivshmem_init(struct cell *cell, struct pci_device *dev)
486 {
487         const struct jailhouse_memory *mem, *mem0;
488         struct pci_ivshmem_data **ivp;
489         struct pci_device *dev0;
490
491         if (dev->info->num_msix_vectors != 1)
492                 return trace_error(-EINVAL);
493
494         if (dev->info->shmem_region >= cell->config->num_memory_regions)
495                 return trace_error(-EINVAL);
496
497         mem = jailhouse_cell_mem_regions(cell->config)
498                 + dev->info->shmem_region;
499         ivp = ivshmem_find(dev, NULL);
500         if (ivp) {
501                 dev0 = (*ivp)->eps[0].device;
502                 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
503                         dev0->info->shmem_region;
504
505                 /* we already have a datastructure, connect second endpoint */
506                 if ((mem0->phys_start == mem->phys_start) &&
507                     (mem0->size == mem->size)) {
508                         if ((*ivp)->eps[1].device)
509                                 return trace_error(-EBUSY);
510                         ivshmem_connect_cell(*ivp, dev, mem, 1);
511                         printk("Virtual PCI connection established "
512                                 "\"%s\" <--> \"%s\"\n",
513                                 cell->config->name, dev0->cell->config->name);
514                         goto connected;
515                 }
516         }
517
518         /* this is the first endpoint, allocate a new datastructure */
519         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
520                 ; /* empty loop */
521         *ivp = page_alloc(&mem_pool, 1);
522         if (!(*ivp))
523                 return -ENOMEM;
524         ivshmem_connect_cell(*ivp, dev, mem, 0);
525
526 connected:
527         dev->cell = cell;
528         printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
529                PCI_BDF_PARAMS(dev->info->bdf), cell->config->name);
530
531         return 0;
532 }
533
534 /**
535  * Unregister a ivshmem device, typically when the corresponding cell exits.
536  * @param dev           The device to be stopped.
537  *
538  */
539 void pci_ivshmem_exit(struct pci_device *dev)
540 {
541         struct pci_ivshmem_data **ivp, *iv;
542         int cellnum;
543
544         ivp = ivshmem_find(dev, &cellnum);
545         if (!ivp || !(*ivp))
546                 return;
547
548         iv = *ivp;
549
550         if (iv->eps[0].device == dev) {
551                 if (!iv->eps[1].device) {
552                         *ivp = iv->next;
553                         page_free(&mem_pool, iv, 1);
554                         return;
555                 }
556                 iv->eps[0] = iv->eps[1];
557         }
558         ivshmem_disconnect_cell(iv, 1);
559 }