]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci_ivshmem.c
core: ivshmem: Remove superfluous ivshmem endpoint checks
[jailhouse.git] / hypervisor / pci_ivshmem.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014
5  *
6  * Author:
7  *  Henning Schild <henning.schild@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 /** @addtogroup PCI-IVSHMEM
14  * Inter Cell communication using a virtual PCI device. The device provides
15  * shared memory and interrupts based on MSI-X.
16  *
17  * The implementation in Jailhouse provides a shared memory device between
18  * exactly 2 cells. The link between the two PCI devices is established by
19  * choosing the same BDF, memory location, and memory size.
20  */
21
22 #include <jailhouse/control.h>
23 #include <jailhouse/pci.h>
24 #include <jailhouse/printk.h>
25 #include <jailhouse/string.h>
26 #include <jailhouse/utils.h>
27 #include <jailhouse/processor.h>
28 #include <asm/apic.h>
29
30 #define VIRTIO_VENDOR_ID        0x1af4
31 #define IVSHMEM_DEVICE_ID       0x1110
32
33 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
34  * the location and the size are stored here. A memory-BAR size of 0 will tell
35  * device drivers that they are dealing with a special ivshmem device */
36 #define IVSHMEM_CFG_SHMEM_PTR   0x40
37 #define IVSHMEM_CFG_SHMEM_SZ    0x48
38
39 #define IVSHMEM_MSIX_VECTORS    1
40 #define IVSHMEM_CFG_MSIX_CAP    0x50
41
42 #define IVSHMEM_REG_IVPOS       8
43 #define IVSHMEM_REG_DBELL       12
44
45 #define IVSHMEM_CFG_SIZE        (IVSHMEM_CFG_MSIX_CAP + 12)
46
47 struct virt_pci_bar {
48         char flags;
49         u64 sz;
50 };
51
52 struct pci_ivshmem_endpoint {
53         u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
54         u32 ivpos;
55         struct virt_pci_bar bars[3];
56         struct pci_device *device;
57         struct pci_ivshmem_endpoint *remote;
58         struct apic_irq_message irq_msg;
59 };
60
61 struct pci_ivshmem_data {
62         struct pci_ivshmem_endpoint eps[2];
63         struct pci_ivshmem_data *next;
64 };
65
66 static struct pci_ivshmem_data *ivshmem_list;
67
68 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
69         [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70         [0x04/4] = (PCI_STS_CAPS << 16),
71         [0x08/4] = PCI_DEV_CLASS_MEM << 24,
72         [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
73         [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
74         /* MSI-X capability */
75         [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
76                                    | (0x00 << 8) | PCI_CAP_MSIX,
77         [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
78         [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
79                                            (PCI_CFG_BAR/8 + 2),
80 };
81
82 static const struct virt_pci_bar default_bars[3] = {
83         {
84                 .flags = PCI_BAR_64BIT,
85                 .sz = 256,
86         },
87         {
88                 /* in jailhouse we leave this BAR empty, the shared memory
89                  * location and size are in our custom registers
90                  * IVSHMEM_CFG_SHMEM */
91         },
92         {       /* used for MSI-X vectors */
93                 .flags = PCI_BAR_64BIT,
94                 .sz = ((0x18 * IVSHMEM_MSIX_VECTORS) + 15) & ~0xf,
95         }
96 };
97
98 static u32 ivshmem_cfg_read32(struct pci_ivshmem_endpoint *ive, u8 reg)
99 {
100         return ive->cspace[reg / 4];
101 }
102
103 static u64 ivshmem_cfg_read64(struct pci_ivshmem_endpoint *ive, u8 reg)
104 {
105         return ((u64)ivshmem_cfg_read32(ive, reg + 4) << 32) |
106                ivshmem_cfg_read32(ive, reg);
107 }
108
109 static u16 ivshmem_cfg_read16(struct pci_ivshmem_endpoint *ive, u8 reg)
110 {
111         unsigned int bias = reg % 4;
112
113         return (u16)(ivshmem_cfg_read32(ive, reg - bias) >> (bias * 8));
114 }
115
116 static u8 ivshmem_cfg_read8(struct pci_ivshmem_endpoint *ive, u8 reg)
117 {
118         unsigned int bias = reg % 4;
119
120         return (u8)(ivshmem_cfg_read32(ive, reg - bias) >> (bias * 8));
121 }
122
123 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
124 {
125         union pci_msix_registers c;
126
127         /* global mask */
128         c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
129         if (!c.enable || c.fmask)
130                 return true;
131
132         /* local mask */
133         if (ive->device->msix_vectors[0].masked)
134                 return true;
135
136         /* PCI Bus Master */
137         if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
138                 return true;
139
140         return false;
141 }
142
143 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
144 {
145         union x86_msi_vector msi = {
146                 .raw.address = ive->device->msix_vectors[0].address,
147                 .raw.data = ive->device->msix_vectors[0].data,
148         };
149         struct apic_irq_message irq_msg;
150
151         /* before doing anything mark the cached irq_msg as invalid,
152          * on success it will be valid on return. */
153         ive->irq_msg.valid = 0;
154         memory_barrier();
155
156         if (ivshmem_is_msix_masked(ive))
157                 return 0;
158
159         irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
160         if (!irq_msg.valid)
161                 return 0;
162
163         if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
164                 printk("WARNING: ivshmem MSI-X target outside of "
165                        "cell \"%s\" device %02x:%02x.%x\n",
166                        ive->device->cell->config->name,
167                        PCI_BDF_PARAMS(ive->device->info->bdf));
168                 return -EPERM;
169         }
170         /* now copy the whole struct into our cache and mark the cache
171          * valid at the end */
172         irq_msg.valid = 0;
173         ive->irq_msg = irq_msg;
174         memory_barrier();
175         ive->irq_msg.valid = 1;
176
177         return 0;
178 }
179
180 /**
181  * update the command register
182  * note that we only accept writes to two flags
183  */
184 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
185 {
186         u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
187         int err;
188
189         if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
190                 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
191                 err = ivshmem_update_msix(ive);
192                 if (err)
193                         return err;
194         }
195
196         *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
197         return 0;
198 }
199
200 static void ivshmem_write_bar(struct pci_ivshmem_endpoint *ive, u8 reg, u32 val)
201 {
202         int barn = (reg - PCI_CFG_BAR) / 8;
203         struct virt_pci_bar *bar = &(ive->bars[barn]);
204         u32 newval;
205
206         if (reg & 4)
207                 newval = val & ((~(bar->sz - 1)) >> 32);
208         else
209                 newval = (val & (~(bar->sz - 1) & ~0xf)) | (bar->flags & 0xf);
210
211         ive->cspace[reg / 4] = newval;
212 }
213
214 static int ivshmem_msix_mmio(struct pci_ivshmem_endpoint *ive, bool is_write,
215                              u32 offset, u32 *value)
216 {
217         u32 *msix_table = (u32 *)ive->device->msix_vectors;
218
219         if (offset % 4)
220                 return -1;
221
222         /* MSI-X PBA */
223         if (offset >= 0x10 * IVSHMEM_MSIX_VECTORS) {
224                 if (is_write) {
225                         return -1;
226                 } else {
227                         *value = 0;
228                         return 1;
229                 }
230         /* MSI-X Table */
231         } else {
232                 if (is_write) {
233                         msix_table[offset/4] = *value;
234                         if (ivshmem_update_msix(ive))
235                                 return -1;
236                 } else {
237                         *value = msix_table[offset/4];
238                 }
239                 return 1;
240         }
241         return -1;
242 }
243
244 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
245 {
246         struct pci_ivshmem_endpoint *remote = ive->remote;
247         struct apic_irq_message irq_msg;
248
249         if (!remote)
250                 return;
251
252         /* get a copy of the struct before using it, the read barrier makes
253          * sure the copy is consistent */
254         irq_msg = remote->irq_msg;
255         memory_load_barrier();
256         if (irq_msg.valid)
257                 apic_send_irq(irq_msg);
258 }
259
260 static int ivshmem_register_mmio(struct pci_ivshmem_endpoint *ive,
261                                  bool is_write, u32 offset, u32 *value)
262 {
263         /* IVPosition, ro and always returns 0 */
264         if (offset == IVSHMEM_REG_IVPOS && !is_write) {
265                 *value = ive->ivpos;
266                 return 1;
267         }
268
269         if (offset == IVSHMEM_REG_DBELL) {
270                 if (is_write) {
271                         ivshmem_write_doorbell(ive);
272                 } else {
273                         *value = 0;
274                 }
275                 return 1;
276         }
277         return -1;
278 }
279
280 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
281 {
282         union pci_msix_registers *p = (union pci_msix_registers *)&val;
283         union pci_msix_registers newval = {
284                 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
285         };
286
287         newval.enable = p->enable;
288         newval.fmask = p->fmask;
289         if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
290                 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
291                 return ivshmem_update_msix(ive);
292         }
293         return 0;
294 }
295
296 static enum pci_access ivshmem_cfg_write32(struct pci_ivshmem_endpoint *ive,
297                                            u8 reg, u32 val)
298 {
299         switch (reg) {
300         case PCI_CFG_COMMAND:
301                 if(ivshmem_write_command(ive, val & 0xffff))
302                         return PCI_ACCESS_REJECT;
303                 break;
304         case PCI_CFG_BAR ... (PCI_CFG_BAR + 5 * 4):
305                 ivshmem_write_bar(ive, reg, val);
306                 break;
307         case IVSHMEM_CFG_MSIX_CAP:
308                 if (ivshmem_write_msix_control(ive, val))
309                         return PCI_ACCESS_REJECT;
310         }
311         return PCI_ACCESS_DONE;
312 }
313
314 static enum pci_access ivshmem_cfg_write16(struct pci_ivshmem_endpoint *ive,
315                                            u8 reg, u16 val)
316 {
317         u32 row, shift;
318
319         shift = (reg % 4) * 8;
320         row = ive->cspace[reg / 4];
321         row &= ~(BYTE_MASK(2) << shift);
322         row |= val << shift;
323
324         return ivshmem_cfg_write32(ive, reg - (reg % 4), row);
325 }
326
327 static enum pci_access ivshmem_cfg_write8(struct pci_ivshmem_endpoint *ive,
328                                           u8 reg, u8 val)
329 {
330         u32 row;
331         u8 *rowp;
332
333         row = ive->cspace[reg / 4];
334         rowp = (u8 *)&row;
335         rowp[(reg % 4)] = val;
336
337         return ivshmem_cfg_write32(ive, reg - (reg % 4), row);
338 }
339
340
341 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
342                                               int *cellnum)
343 {
344         struct pci_ivshmem_data **ivp, *iv;
345         u16 bdf2;
346
347         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
348                 iv = *ivp;
349                 bdf2 = iv->eps[0].device->info->bdf;
350                 if (d->info->bdf == bdf2) {
351                         if (iv->eps[0].device == d) {
352                                 if (cellnum)
353                                         *cellnum = 0;
354                                 return ivp;
355                         }
356                         if (iv->eps[1].device == d) {
357                                 if (cellnum)
358                                         *cellnum = 1;
359                                 return ivp;
360                         }
361                         if (!cellnum)
362                                 return ivp;
363                 }
364         }
365
366         return NULL;
367 }
368
369 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
370                                  struct pci_device *d,
371                                  const struct jailhouse_memory *mem,
372                                  int cellnum)
373 {
374         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
375         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
376
377         memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
378         memcpy(ive->bars, &default_bars, sizeof(default_bars));
379
380         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
381         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
382         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
383         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
384
385         ive->device = d;
386         if (remote->device) {
387                 ive->remote = remote;
388                 remote->remote = ive;
389                 ive->ivpos = (remote->ivpos + 1) % 2;
390         } else {
391                 ive->ivpos = cellnum;
392                 ive->remote = NULL;
393                 remote->remote = NULL;
394         }
395         d->ivshmem_endpoint = ive;
396 }
397
398 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
399 {
400         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
401         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
402
403         ive->device->ivshmem_endpoint = NULL;
404         ive->device = NULL;
405         ive->remote = NULL;
406         remote->remote = NULL;
407 }
408
409 /**
410  * Handler for MMIO-accesses to this virtual PCI devices memory. Both for the
411  * BAR containing the registers, and the MSI-X BAR.
412  * @param cell          The cell that issued the access.
413  * @param is_write      True if write access.
414  * @param addr          Address accessed.
415  * @param value         Pointer to value for reading/writing.
416  *
417  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
418  *
419  * @see pci_mmio_access_handler
420  */
421 int ivshmem_mmio_access_handler(const struct cell *cell, bool is_write,
422                                 u64 addr, u32 *value)
423 {
424         struct pci_ivshmem_endpoint *ive;
425         struct pci_device *device;
426         u64 mem_start, mem_sz;
427
428         for (device = cell->virtual_device_list; device;
429              device = device->next_virtual_device) {
430                 ive = device->ivshmem_endpoint;
431                 if ((ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MEM) == 0)
432                         continue;
433
434                 /* register BAR access */
435                 mem_start = ivshmem_cfg_read64(ive, PCI_CFG_BAR) & ~0xf;
436                 mem_sz = ive->bars[0].sz;
437                 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
438                         return ivshmem_register_mmio(ive, is_write,
439                                                      addr - mem_start,
440                                                      value);
441
442                 /* MSI-X BAR access */
443                 mem_start = ivshmem_cfg_read64(ive, PCI_CFG_BAR + 2 * 8) & ~0xf;
444                 mem_sz = ive->bars[2].sz;
445                 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
446                         return ivshmem_msix_mmio(ive, is_write,
447                                                  addr - mem_start, value);
448         }
449
450         return 0;
451 }
452
453 /**
454  * Handler for MMIO-write-accesses to PCI config space of this virtual device.
455  * @param dev           The device that access should be performed on.
456  * @param address       Config space address accessed.
457  * @param sz            The amount of bytes to write.
458  * @param value         The value to write to the config space.
459  *
460  * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
461  *
462  * @see pci_cfg_write_moderate
463  */
464 enum pci_access pci_ivshmem_cfg_write(struct pci_device *dev, u16 address,
465                                       u8 sz, u32 value)
466 {
467         struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
468
469         if (address > (sizeof(default_cspace) - sz))
470                 return PCI_ACCESS_REJECT;
471
472         switch (sz) {
473         case 1:
474                 return ivshmem_cfg_write8(ive, address, (u8)value);
475         case 2:
476                 return ivshmem_cfg_write16(ive, address, (u16)value);
477         case 4:
478                 return ivshmem_cfg_write32(ive, address, value);
479         default:
480                 return PCI_ACCESS_REJECT;
481         }
482 }
483
484 /**
485  * Handler for MMIO-read-accesses to PCI config space of this virtual device.
486  * @param dev           The device that access should be performed on.
487  * @param address       Config space address accessed.
488  * @param sz            The amount of bytes to read.
489  * @param value         Pointer to the return value.
490  *
491  * @return PCI_ACCESS_DONE.
492  *
493  * @see pci_cfg_read_moderate
494  */
495 enum pci_access pci_ivshmem_cfg_read(struct pci_device *dev, u16 address,
496                                      u8 sz, u32 *value)
497 {
498         struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
499
500         if (address > (sizeof(default_cspace) - sz))
501                 goto fail;
502
503         switch (sz) {
504         case 1:
505                 *value = (u32)ivshmem_cfg_read8(ive, address);
506                 break;
507         case 2:
508                 *value = (u32)ivshmem_cfg_read16(ive, address);
509                 break;
510         case 4:
511                 *value = ivshmem_cfg_read32(ive, address);
512                 break;
513         default:
514                 goto fail;
515         }
516         return PCI_ACCESS_DONE;
517
518 fail:
519         *value = -1;
520         /* the caller can not deal with PCI_ACCESS_REJECT for reads */
521         return PCI_ACCESS_DONE;
522 }
523
524 /**
525  * Update cached MSI-X state of the given ivshmem device.
526  * @param dev   The device to be updated.
527  *
528  * @return 0 on success, negative error code otherwise.
529  */
530 int pci_ivshmem_update_msix(struct pci_device *dev)
531 {
532         return ivshmem_update_msix(dev->ivshmem_endpoint);
533 }
534
535 /**
536  * Register a new ivshmem device.
537  * @param cell          The cell the device should be attached to.
538  * @param dev           The device to be registered.
539  *
540  * @return 0 on success, negative error code otherwise.
541  */
542 int pci_ivshmem_init(struct cell *cell, struct pci_device *dev)
543 {
544         const struct jailhouse_memory *mem, *mem0;
545         struct pci_ivshmem_data **ivp;
546         struct pci_device *dev0;
547
548         if (dev->info->num_msix_vectors != 1)
549                 return trace_error(-EINVAL);
550
551         if (dev->info->shmem_region >= cell->config->num_memory_regions)
552                 return trace_error(-EINVAL);
553
554         mem = jailhouse_cell_mem_regions(cell->config)
555                 + dev->info->shmem_region;
556         ivp = ivshmem_find(dev, NULL);
557         if (ivp) {
558                 dev0 = (*ivp)->eps[0].device;
559                 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
560                         dev0->info->shmem_region;
561
562                 /* we already have a datastructure, connect second endpoint */
563                 if ((mem0->phys_start == mem->phys_start) &&
564                     (mem0->size == mem->size)) {
565                         if ((*ivp)->eps[1].device)
566                                 return trace_error(-EBUSY);
567                         ivshmem_connect_cell(*ivp, dev, mem, 1);
568                         printk("Virtual PCI connection established "
569                                 "\"%s\" <--> \"%s\"\n",
570                                 cell->config->name, dev0->cell->config->name);
571                         goto connected;
572                 }
573         }
574
575         /* this is the first endpoint, allocate a new datastructure */
576         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
577                 ; /* empty loop */
578         *ivp = page_alloc(&mem_pool, 1);
579         if (!(*ivp))
580                 return -ENOMEM;
581         ivshmem_connect_cell(*ivp, dev, mem, 0);
582
583 connected:
584         dev->cell = cell;
585         printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
586                PCI_BDF_PARAMS(dev->info->bdf), cell->config->name);
587
588         return 0;
589 }
590
591 /**
592  * Unregister a ivshmem device, typically when the corresponding cell exits.
593  * @param dev           The device to be stopped.
594  *
595  */
596 void pci_ivshmem_exit(struct pci_device *dev)
597 {
598         struct pci_ivshmem_data **ivp, *iv;
599         int cellnum;
600
601         ivp = ivshmem_find(dev, &cellnum);
602         if (!ivp || !(*ivp))
603                 return;
604
605         iv = *ivp;
606
607         if (iv->eps[0].device == dev) {
608                 if (!iv->eps[1].device) {
609                         *ivp = iv->next;
610                         page_free(&mem_pool, iv, 1);
611                         return;
612                 }
613                 iv->eps[0] = iv->eps[1];
614         }
615         ivshmem_disconnect_cell(iv, 1);
616 }