]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci_ivshmem.c
arm: Make sure to not queue interrupt that were rejected as duplicates
[jailhouse.git] / hypervisor / pci_ivshmem.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014, 2015
5  *
6  * Author:
7  *  Henning Schild <henning.schild@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 /** @addtogroup PCI-IVSHMEM
14  * Inter Cell communication using a virtual PCI device. The device provides
15  * shared memory and interrupts based on MSI-X.
16  *
17  * The implementation in Jailhouse provides a shared memory device between
18  * exactly 2 cells. The link between the two PCI devices is established by
19  * choosing the same BDF, memory location, and memory size.
20  */
21
22 #include <jailhouse/control.h>
23 #include <jailhouse/mmio.h>
24 #include <jailhouse/pci.h>
25 #include <jailhouse/printk.h>
26 #include <jailhouse/string.h>
27 #include <jailhouse/utils.h>
28 #include <jailhouse/processor.h>
29 #include <asm/apic.h>
30
31 #define VIRTIO_VENDOR_ID        0x1af4
32 #define IVSHMEM_DEVICE_ID       0x1110
33
34 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
35  * the location and the size are stored here. A memory-BAR size of 0 will tell
36  * device drivers that they are dealing with a special ivshmem device */
37 #define IVSHMEM_CFG_SHMEM_PTR   0x40
38 #define IVSHMEM_CFG_SHMEM_SZ    0x48
39
40 #define IVSHMEM_MSIX_VECTORS    1
41 #define IVSHMEM_CFG_MSIX_CAP    0x50
42
43 #define IVSHMEM_REG_IVPOS       8
44 #define IVSHMEM_REG_DBELL       12
45
46 #define IVSHMEM_CFG_SIZE        (IVSHMEM_CFG_MSIX_CAP + 12)
47
48 #define IVSHMEM_BAR0_SIZE       256
49 #define IVSHMEM_BAR4_SIZE       ((0x18 * IVSHMEM_MSIX_VECTORS + 0xf) & ~0xf)
50
51 struct pci_ivshmem_endpoint {
52         u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
53         u32 ivpos;
54         u64 bar0_address;
55         u64 bar4_address;
56         struct pci_device *device;
57         struct pci_ivshmem_endpoint *remote;
58         struct apic_irq_message irq_msg;
59 };
60
61 struct pci_ivshmem_data {
62         struct pci_ivshmem_endpoint eps[2];
63         struct pci_ivshmem_data *next;
64 };
65
66 static struct pci_ivshmem_data *ivshmem_list;
67
68 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
69         [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70         [0x04/4] = (PCI_STS_CAPS << 16),
71         [0x08/4] = PCI_DEV_CLASS_MEM << 24,
72         [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
73         [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
74         /* MSI-X capability */
75         [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
76                                    | (0x00 << 8) | PCI_CAP_MSIX,
77         [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
78         [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
79                                            (PCI_CFG_BAR/8 + 2),
80 };
81
82 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
83 {
84         struct pci_ivshmem_endpoint *remote = ive->remote;
85         struct apic_irq_message irq_msg;
86
87         if (!remote)
88                 return;
89
90         /* get a copy of the struct before using it, the read barrier makes
91          * sure the copy is consistent */
92         irq_msg = remote->irq_msg;
93         memory_load_barrier();
94         if (irq_msg.valid)
95                 apic_send_irq(irq_msg);
96 }
97
98 static enum mmio_result ivshmem_register_mmio(void *arg,
99                                               struct mmio_access *mmio)
100 {
101         struct pci_ivshmem_endpoint *ive = arg;
102
103         /* read-only IVPosition */
104         if (mmio->address == IVSHMEM_REG_IVPOS && !mmio->is_write) {
105                 mmio->value = ive->ivpos;
106                 return MMIO_HANDLED;
107         }
108
109         if (mmio->address == IVSHMEM_REG_DBELL) {
110                 if (mmio->is_write)
111                         ivshmem_write_doorbell(ive);
112                 else
113                         mmio->value = 0;
114                 return MMIO_HANDLED;
115         }
116         panic_printk("FATAL: Invalid ivshmem register %s, number %02x\n",
117                      mmio->is_write ? "write" : "read", mmio->address);
118         return MMIO_ERROR;
119 }
120
121 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
122 {
123         union pci_msix_registers c;
124
125         /* global mask */
126         c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
127         if (!c.enable || c.fmask)
128                 return true;
129
130         /* local mask */
131         if (ive->device->msix_vectors[0].masked)
132                 return true;
133
134         /* PCI Bus Master */
135         if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
136                 return true;
137
138         return false;
139 }
140
141 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
142 {
143         union x86_msi_vector msi = {
144                 .raw.address = ive->device->msix_vectors[0].address,
145                 .raw.data = ive->device->msix_vectors[0].data,
146         };
147         struct apic_irq_message irq_msg;
148
149         /* before doing anything mark the cached irq_msg as invalid,
150          * on success it will be valid on return. */
151         ive->irq_msg.valid = 0;
152         memory_barrier();
153
154         if (ivshmem_is_msix_masked(ive))
155                 return 0;
156
157         irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
158         if (!irq_msg.valid)
159                 return 0;
160
161         if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
162                 panic_printk("FATAL: ivshmem MSI-X target outside of "
163                              "cell \"%s\" device %02x:%02x.%x\n",
164                              ive->device->cell->config->name,
165                              PCI_BDF_PARAMS(ive->device->info->bdf));
166                 return -EPERM;
167         }
168         /* now copy the whole struct into our cache and mark the cache
169          * valid at the end */
170         irq_msg.valid = 0;
171         ive->irq_msg = irq_msg;
172         memory_barrier();
173         ive->irq_msg.valid = 1;
174
175         return 0;
176 }
177
178 static enum mmio_result ivshmem_msix_mmio(void *arg, struct mmio_access *mmio)
179 {
180         struct pci_ivshmem_endpoint *ive = arg;
181         u32 *msix_table = (u32 *)ive->device->msix_vectors;
182
183         if (mmio->address % 4)
184                 goto fail;
185
186         /* MSI-X PBA */
187         if (mmio->address >= 0x10 * IVSHMEM_MSIX_VECTORS) {
188                 if (mmio->is_write) {
189                         goto fail;
190                 } else {
191                         mmio->value = 0;
192                         return MMIO_HANDLED;
193                 }
194         /* MSI-X Table */
195         } else {
196                 if (mmio->is_write) {
197                         msix_table[mmio->address / 4] = mmio->value;
198                         if (ivshmem_update_msix(ive))
199                                 return MMIO_ERROR;
200                 } else {
201                         mmio->value = msix_table[mmio->address / 4];
202                 }
203                 return MMIO_HANDLED;
204         }
205
206 fail:
207         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
208                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(ive->device->info->bdf));
209         return MMIO_ERROR;
210 }
211
212 /**
213  * update the command register
214  * note that we only accept writes to two flags
215  */
216 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
217 {
218         u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
219         struct pci_device *device = ive->device;
220         int err;
221
222         if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
223                 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
224                 err = ivshmem_update_msix(ive);
225                 if (err)
226                         return err;
227         }
228
229         if ((val & PCI_CMD_MEM) != (*cmd & PCI_CMD_MEM)) {
230                 if (*cmd & PCI_CMD_MEM) {
231                         mmio_region_unregister(device->cell, ive->bar0_address);
232                         mmio_region_unregister(device->cell, ive->bar4_address);
233                 }
234                 if (val & PCI_CMD_MEM) {
235                         ive->bar0_address = (*(u64 *)&device->bar[0]) & ~0xfL;
236                         mmio_region_register(device->cell, ive->bar0_address,
237                                              IVSHMEM_BAR0_SIZE,
238                                              ivshmem_register_mmio, ive);
239
240                         ive->bar4_address = (*(u64 *)&device->bar[4]) & ~0xfL;
241                         mmio_region_register(device->cell, ive->bar4_address,
242                                              IVSHMEM_BAR4_SIZE,
243                                              ivshmem_msix_mmio, ive);
244                 }
245                 *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
246         }
247
248         return 0;
249 }
250
251 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
252 {
253         union pci_msix_registers *p = (union pci_msix_registers *)&val;
254         union pci_msix_registers newval = {
255                 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
256         };
257
258         newval.enable = p->enable;
259         newval.fmask = p->fmask;
260         if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
261                 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
262                 return ivshmem_update_msix(ive);
263         }
264         return 0;
265 }
266
267 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
268                                               int *cellnum)
269 {
270         struct pci_ivshmem_data **ivp, *iv;
271         u16 bdf2;
272
273         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
274                 iv = *ivp;
275                 bdf2 = iv->eps[0].device->info->bdf;
276                 if (d->info->bdf == bdf2) {
277                         if (iv->eps[0].device == d) {
278                                 if (cellnum)
279                                         *cellnum = 0;
280                                 return ivp;
281                         }
282                         if (iv->eps[1].device == d) {
283                                 if (cellnum)
284                                         *cellnum = 1;
285                                 return ivp;
286                         }
287                         if (!cellnum)
288                                 return ivp;
289                 }
290         }
291
292         return NULL;
293 }
294
295 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
296                                  struct pci_device *d,
297                                  const struct jailhouse_memory *mem,
298                                  int cellnum)
299 {
300         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
301         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
302
303         d->bar[0] = PCI_BAR_64BIT;
304         d->bar[4] = PCI_BAR_64BIT;
305
306         memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
307
308         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
309         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
310         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
311         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
312
313         ive->device = d;
314         if (remote->device) {
315                 ive->remote = remote;
316                 remote->remote = ive;
317                 ive->ivpos = (remote->ivpos + 1) % 2;
318         } else {
319                 ive->ivpos = cellnum;
320                 ive->remote = NULL;
321                 remote->remote = NULL;
322         }
323         d->ivshmem_endpoint = ive;
324 }
325
326 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
327 {
328         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
329         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
330         u16 cmd = *(u16 *)&ive->cspace[PCI_CFG_COMMAND / 4];
331
332         if (cmd & PCI_CMD_MEM) {
333                 mmio_region_unregister(this_cell(), ive->bar0_address);
334                 mmio_region_unregister(this_cell(), ive->bar4_address);
335         }
336         ive->device->ivshmem_endpoint = NULL;
337         ive->device = NULL;
338         ive->remote = NULL;
339         remote->remote = NULL;
340 }
341
342 /**
343  * Handler for MMIO-write-accesses to PCI config space of this virtual device.
344  * @param device        The device that access should be performed on.
345  * @param row           Config space DWORD row of the access.
346  * @param mask          Mask selected the DWORD bytes to write.
347  * @param value         DWORD to write to the config space.
348  *
349  * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
350  *
351  * @see pci_cfg_write_moderate
352  */
353 enum pci_access pci_ivshmem_cfg_write(struct pci_device *device,
354                                       unsigned int row, u32 mask, u32 value)
355 {
356         struct pci_ivshmem_endpoint *ive = device->ivshmem_endpoint;
357
358         if (row >= ARRAY_SIZE(default_cspace))
359                 return PCI_ACCESS_REJECT;
360
361         value |= ive->cspace[row] & ~mask;
362
363         switch (row) {
364         case PCI_CFG_COMMAND / 4:
365                 if (ivshmem_write_command(ive, value))
366                         return PCI_ACCESS_REJECT;
367                 break;
368         case IVSHMEM_CFG_MSIX_CAP / 4:
369                 if (ivshmem_write_msix_control(ive, value))
370                         return PCI_ACCESS_REJECT;
371         }
372         return PCI_ACCESS_DONE;
373 }
374
375 /**
376  * Handler for MMIO-read-accesses to PCI config space of this virtual device.
377  * @param device        The device that access should be performed on.
378  * @param address       Config space address accessed.
379  * @param value         Pointer to the return value.
380  *
381  * @return PCI_ACCESS_DONE.
382  *
383  * @see pci_cfg_read_moderate
384  */
385 enum pci_access pci_ivshmem_cfg_read(struct pci_device *device, u16 address,
386                                      u32 *value)
387 {
388         struct pci_ivshmem_endpoint *ive = device->ivshmem_endpoint;
389
390         if (address < sizeof(default_cspace))
391                 *value = ive->cspace[address / 4] >> ((address % 4) * 8);
392         else
393                 *value = -1;
394         return PCI_ACCESS_DONE;
395 }
396
397 /**
398  * Update cached MSI-X state of the given ivshmem device.
399  * @param device        The device to be updated.
400  *
401  * @return 0 on success, negative error code otherwise.
402  */
403 int pci_ivshmem_update_msix(struct pci_device *device)
404 {
405         return ivshmem_update_msix(device->ivshmem_endpoint);
406 }
407
408 /**
409  * Register a new ivshmem device.
410  * @param cell          The cell the device should be attached to.
411  * @param device        The device to be registered.
412  *
413  * @return 0 on success, negative error code otherwise.
414  */
415 int pci_ivshmem_init(struct cell *cell, struct pci_device *device)
416 {
417         const struct jailhouse_memory *mem, *mem0;
418         struct pci_ivshmem_data **ivp;
419         struct pci_device *dev0;
420
421         if (device->info->num_msix_vectors != 1)
422                 return trace_error(-EINVAL);
423
424         if (device->info->shmem_region >= cell->config->num_memory_regions)
425                 return trace_error(-EINVAL);
426
427         mem = jailhouse_cell_mem_regions(cell->config)
428                 + device->info->shmem_region;
429         ivp = ivshmem_find(device, NULL);
430         if (ivp) {
431                 dev0 = (*ivp)->eps[0].device;
432                 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
433                         dev0->info->shmem_region;
434
435                 /* we already have a datastructure, connect second endpoint */
436                 if ((mem0->phys_start == mem->phys_start) &&
437                     (mem0->size == mem->size)) {
438                         if ((*ivp)->eps[1].device)
439                                 return trace_error(-EBUSY);
440                         ivshmem_connect_cell(*ivp, device, mem, 1);
441                         printk("Virtual PCI connection established "
442                                 "\"%s\" <--> \"%s\"\n",
443                                 cell->config->name, dev0->cell->config->name);
444                         goto connected;
445                 }
446         }
447
448         /* this is the first endpoint, allocate a new datastructure */
449         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
450                 ; /* empty loop */
451         *ivp = page_alloc(&mem_pool, 1);
452         if (!(*ivp))
453                 return -ENOMEM;
454         ivshmem_connect_cell(*ivp, device, mem, 0);
455
456 connected:
457         printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
458                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
459
460         return 0;
461 }
462
463 /**
464  * Unregister a ivshmem device, typically when the corresponding cell exits.
465  * @param device        The device to be stopped.
466  *
467  */
468 void pci_ivshmem_exit(struct pci_device *device)
469 {
470         struct pci_ivshmem_data **ivp, *iv;
471         int cellnum;
472
473         ivp = ivshmem_find(device, &cellnum);
474         if (!ivp || !(*ivp))
475                 return;
476
477         iv = *ivp;
478
479         ivshmem_disconnect_cell(iv, cellnum);
480
481         if (cellnum == 0) {
482                 if (!iv->eps[1].device) {
483                         *ivp = iv->next;
484                         page_free(&mem_pool, iv, 1);
485                         return;
486                 }
487                 iv->eps[0] = iv->eps[1];
488         }
489 }