]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci_ivshmem.c
0b3b7f643192c1498cb5cff40a89ffee1eabef29
[jailhouse.git] / hypervisor / pci_ivshmem.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014, 2015
5  *
6  * Author:
7  *  Henning Schild <henning.schild@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 /** @addtogroup PCI-IVSHMEM
14  * Inter Cell communication using a virtual PCI device. The device provides
15  * shared memory and interrupts based on MSI-X.
16  *
17  * The implementation in Jailhouse provides a shared memory device between
18  * exactly 2 cells. The link between the two PCI devices is established by
19  * choosing the same BDF, memory location, and memory size.
20  */
21
22 #include <jailhouse/control.h>
23 #include <jailhouse/pci.h>
24 #include <jailhouse/printk.h>
25 #include <jailhouse/string.h>
26 #include <jailhouse/utils.h>
27 #include <jailhouse/processor.h>
28 #include <asm/apic.h>
29
30 #define VIRTIO_VENDOR_ID        0x1af4
31 #define IVSHMEM_DEVICE_ID       0x1110
32
33 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
34  * the location and the size are stored here. A memory-BAR size of 0 will tell
35  * device drivers that they are dealing with a special ivshmem device */
36 #define IVSHMEM_CFG_SHMEM_PTR   0x40
37 #define IVSHMEM_CFG_SHMEM_SZ    0x48
38
39 #define IVSHMEM_MSIX_VECTORS    1
40 #define IVSHMEM_CFG_MSIX_CAP    0x50
41
42 #define IVSHMEM_REG_IVPOS       8
43 #define IVSHMEM_REG_DBELL       12
44
45 #define IVSHMEM_CFG_SIZE        (IVSHMEM_CFG_MSIX_CAP + 12)
46
47 #define IVSHMEM_BAR0_SIZE       256
48 #define IVSHMEM_BAR4_SIZE       ((0x18 * IVSHMEM_MSIX_VECTORS + 0xf) & ~0xf)
49
50 struct pci_ivshmem_endpoint {
51         u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
52         u32 ivpos;
53         struct pci_device *device;
54         struct pci_ivshmem_endpoint *remote;
55         struct apic_irq_message irq_msg;
56 };
57
58 struct pci_ivshmem_data {
59         struct pci_ivshmem_endpoint eps[2];
60         struct pci_ivshmem_data *next;
61 };
62
63 static struct pci_ivshmem_data *ivshmem_list;
64
65 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
66         [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
67         [0x04/4] = (PCI_STS_CAPS << 16),
68         [0x08/4] = PCI_DEV_CLASS_MEM << 24,
69         [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70         [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
71         /* MSI-X capability */
72         [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
73                                    | (0x00 << 8) | PCI_CAP_MSIX,
74         [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
75         [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
76                                            (PCI_CFG_BAR/8 + 2),
77 };
78
79 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
80 {
81         struct pci_ivshmem_endpoint *remote = ive->remote;
82         struct apic_irq_message irq_msg;
83
84         if (!remote)
85                 return;
86
87         /* get a copy of the struct before using it, the read barrier makes
88          * sure the copy is consistent */
89         irq_msg = remote->irq_msg;
90         memory_load_barrier();
91         if (irq_msg.valid)
92                 apic_send_irq(irq_msg);
93 }
94
95 static int ivshmem_register_mmio(struct pci_ivshmem_endpoint *ive,
96                                  bool is_write, u32 offset, u32 *value)
97 {
98         /* read-only IVPosition */
99         if (offset == IVSHMEM_REG_IVPOS && !is_write) {
100                 *value = ive->ivpos;
101                 return 1;
102         }
103
104         if (offset == IVSHMEM_REG_DBELL) {
105                 if (is_write)
106                         ivshmem_write_doorbell(ive);
107                 else
108                         *value = 0;
109                 return 1;
110         }
111         panic_printk("FATAL: Invalid ivshmem register %s, number %02x\n",
112                      is_write ? "write" : "read", offset);
113         return -1;
114 }
115
116 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
117 {
118         union pci_msix_registers c;
119
120         /* global mask */
121         c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
122         if (!c.enable || c.fmask)
123                 return true;
124
125         /* local mask */
126         if (ive->device->msix_vectors[0].masked)
127                 return true;
128
129         /* PCI Bus Master */
130         if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
131                 return true;
132
133         return false;
134 }
135
136 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
137 {
138         union x86_msi_vector msi = {
139                 .raw.address = ive->device->msix_vectors[0].address,
140                 .raw.data = ive->device->msix_vectors[0].data,
141         };
142         struct apic_irq_message irq_msg;
143
144         /* before doing anything mark the cached irq_msg as invalid,
145          * on success it will be valid on return. */
146         ive->irq_msg.valid = 0;
147         memory_barrier();
148
149         if (ivshmem_is_msix_masked(ive))
150                 return 0;
151
152         irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
153         if (!irq_msg.valid)
154                 return 0;
155
156         if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
157                 panic_printk("FATAL: ivshmem MSI-X target outside of "
158                              "cell \"%s\" device %02x:%02x.%x\n",
159                              ive->device->cell->config->name,
160                              PCI_BDF_PARAMS(ive->device->info->bdf));
161                 return -EPERM;
162         }
163         /* now copy the whole struct into our cache and mark the cache
164          * valid at the end */
165         irq_msg.valid = 0;
166         ive->irq_msg = irq_msg;
167         memory_barrier();
168         ive->irq_msg.valid = 1;
169
170         return 0;
171 }
172
173 static int ivshmem_msix_mmio(struct pci_ivshmem_endpoint *ive, bool is_write,
174                              u32 offset, u32 *value)
175 {
176         u32 *msix_table = (u32 *)ive->device->msix_vectors;
177
178         if (offset % 4)
179                 goto fail;
180
181         /* MSI-X PBA */
182         if (offset >= 0x10 * IVSHMEM_MSIX_VECTORS) {
183                 if (is_write) {
184                         goto fail;
185                 } else {
186                         *value = 0;
187                         return 1;
188                 }
189         /* MSI-X Table */
190         } else {
191                 if (is_write) {
192                         msix_table[offset/4] = *value;
193                         if (ivshmem_update_msix(ive))
194                                 return -1;
195                 } else {
196                         *value = msix_table[offset/4];
197                 }
198                 return 1;
199         }
200
201 fail:
202         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
203                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(ive->device->info->bdf));
204         return -1;
205 }
206
207 /**
208  * update the command register
209  * note that we only accept writes to two flags
210  */
211 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
212 {
213         u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
214         int err;
215
216         if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
217                 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
218                 err = ivshmem_update_msix(ive);
219                 if (err)
220                         return err;
221         }
222
223         *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
224         return 0;
225 }
226
227 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
228 {
229         union pci_msix_registers *p = (union pci_msix_registers *)&val;
230         union pci_msix_registers newval = {
231                 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
232         };
233
234         newval.enable = p->enable;
235         newval.fmask = p->fmask;
236         if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
237                 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
238                 return ivshmem_update_msix(ive);
239         }
240         return 0;
241 }
242
243 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
244                                               int *cellnum)
245 {
246         struct pci_ivshmem_data **ivp, *iv;
247         u16 bdf2;
248
249         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
250                 iv = *ivp;
251                 bdf2 = iv->eps[0].device->info->bdf;
252                 if (d->info->bdf == bdf2) {
253                         if (iv->eps[0].device == d) {
254                                 if (cellnum)
255                                         *cellnum = 0;
256                                 return ivp;
257                         }
258                         if (iv->eps[1].device == d) {
259                                 if (cellnum)
260                                         *cellnum = 1;
261                                 return ivp;
262                         }
263                         if (!cellnum)
264                                 return ivp;
265                 }
266         }
267
268         return NULL;
269 }
270
271 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
272                                  struct pci_device *d,
273                                  const struct jailhouse_memory *mem,
274                                  int cellnum)
275 {
276         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
277         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
278
279         d->bar[0] = PCI_BAR_64BIT;
280         d->bar[4] = PCI_BAR_64BIT;
281
282         memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
283
284         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
285         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
286         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
287         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
288
289         ive->device = d;
290         if (remote->device) {
291                 ive->remote = remote;
292                 remote->remote = ive;
293                 ive->ivpos = (remote->ivpos + 1) % 2;
294         } else {
295                 ive->ivpos = cellnum;
296                 ive->remote = NULL;
297                 remote->remote = NULL;
298         }
299         d->ivshmem_endpoint = ive;
300 }
301
302 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
303 {
304         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
305         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
306
307         ive->device->ivshmem_endpoint = NULL;
308         ive->device = NULL;
309         ive->remote = NULL;
310         remote->remote = NULL;
311 }
312
313 /**
314  * Handler for MMIO-accesses to this virtual PCI devices memory. Both for the
315  * BAR containing the registers, and the MSI-X BAR.
316  * @param cell          The cell that issued the access.
317  * @param is_write      True if write access.
318  * @param addr          Address accessed.
319  * @param value         Pointer to value for reading/writing.
320  *
321  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
322  *
323  * @see pci_mmio_access_handler
324  */
325 int ivshmem_mmio_access_handler(const struct cell *cell, bool is_write,
326                                 u64 addr, u32 *value)
327 {
328         struct pci_ivshmem_endpoint *ive;
329         struct pci_device *device;
330         u64 mem_start;
331
332         for (device = cell->virtual_device_list; device;
333              device = device->next_virtual_device) {
334                 ive = device->ivshmem_endpoint;
335                 if ((ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MEM) == 0)
336                         continue;
337
338                 /* BAR0: registers */
339                 mem_start = (*(u64 *)&device->bar[0]) & ~0xfL;
340                 if (addr >= mem_start &&
341                     addr <= (mem_start + IVSHMEM_BAR0_SIZE - 4))
342                         return ivshmem_register_mmio(ive, is_write,
343                                                      addr - mem_start,
344                                                      value);
345
346                 /* BAR4: MSI-X */
347                 mem_start = (*(u64 *)&device->bar[4]) & ~0xfL;
348                 if (addr >= mem_start &&
349                     addr <= (mem_start + IVSHMEM_BAR4_SIZE - 4))
350                         return ivshmem_msix_mmio(ive, is_write,
351                                                  addr - mem_start, value);
352         }
353
354         return 0;
355 }
356
357 /**
358  * Handler for MMIO-write-accesses to PCI config space of this virtual device.
359  * @param device        The device that access should be performed on.
360  * @param row           Config space DWORD row of the access.
361  * @param mask          Mask selected the DWORD bytes to write.
362  * @param value         DWORD to write to the config space.
363  *
364  * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
365  *
366  * @see pci_cfg_write_moderate
367  */
368 enum pci_access pci_ivshmem_cfg_write(struct pci_device *device,
369                                       unsigned int row, u32 mask, u32 value)
370 {
371         struct pci_ivshmem_endpoint *ive = device->ivshmem_endpoint;
372
373         if (row >= ARRAY_SIZE(default_cspace))
374                 return PCI_ACCESS_REJECT;
375
376         value |= ive->cspace[row] & ~mask;
377
378         switch (row) {
379         case PCI_CFG_COMMAND / 4:
380                 if (ivshmem_write_command(ive, value))
381                         return PCI_ACCESS_REJECT;
382                 break;
383         case IVSHMEM_CFG_MSIX_CAP / 4:
384                 if (ivshmem_write_msix_control(ive, value))
385                         return PCI_ACCESS_REJECT;
386         }
387         return PCI_ACCESS_DONE;
388 }
389
390 /**
391  * Handler for MMIO-read-accesses to PCI config space of this virtual device.
392  * @param device        The device that access should be performed on.
393  * @param address       Config space address accessed.
394  * @param value         Pointer to the return value.
395  *
396  * @return PCI_ACCESS_DONE.
397  *
398  * @see pci_cfg_read_moderate
399  */
400 enum pci_access pci_ivshmem_cfg_read(struct pci_device *device, u16 address,
401                                      u32 *value)
402 {
403         struct pci_ivshmem_endpoint *ive = device->ivshmem_endpoint;
404
405         if (address < sizeof(default_cspace))
406                 *value = ive->cspace[address / 4] >> ((address % 4) * 8);
407         else
408                 *value = -1;
409         return PCI_ACCESS_DONE;
410 }
411
412 /**
413  * Update cached MSI-X state of the given ivshmem device.
414  * @param device        The device to be updated.
415  *
416  * @return 0 on success, negative error code otherwise.
417  */
418 int pci_ivshmem_update_msix(struct pci_device *device)
419 {
420         return ivshmem_update_msix(device->ivshmem_endpoint);
421 }
422
423 /**
424  * Register a new ivshmem device.
425  * @param cell          The cell the device should be attached to.
426  * @param device        The device to be registered.
427  *
428  * @return 0 on success, negative error code otherwise.
429  */
430 int pci_ivshmem_init(struct cell *cell, struct pci_device *device)
431 {
432         const struct jailhouse_memory *mem, *mem0;
433         struct pci_ivshmem_data **ivp;
434         struct pci_device *dev0;
435
436         if (device->info->num_msix_vectors != 1)
437                 return trace_error(-EINVAL);
438
439         if (device->info->shmem_region >= cell->config->num_memory_regions)
440                 return trace_error(-EINVAL);
441
442         mem = jailhouse_cell_mem_regions(cell->config)
443                 + device->info->shmem_region;
444         ivp = ivshmem_find(device, NULL);
445         if (ivp) {
446                 dev0 = (*ivp)->eps[0].device;
447                 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
448                         dev0->info->shmem_region;
449
450                 /* we already have a datastructure, connect second endpoint */
451                 if ((mem0->phys_start == mem->phys_start) &&
452                     (mem0->size == mem->size)) {
453                         if ((*ivp)->eps[1].device)
454                                 return trace_error(-EBUSY);
455                         ivshmem_connect_cell(*ivp, device, mem, 1);
456                         printk("Virtual PCI connection established "
457                                 "\"%s\" <--> \"%s\"\n",
458                                 cell->config->name, dev0->cell->config->name);
459                         goto connected;
460                 }
461         }
462
463         /* this is the first endpoint, allocate a new datastructure */
464         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
465                 ; /* empty loop */
466         *ivp = page_alloc(&mem_pool, 1);
467         if (!(*ivp))
468                 return -ENOMEM;
469         ivshmem_connect_cell(*ivp, device, mem, 0);
470
471 connected:
472         printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
473                PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
474
475         return 0;
476 }
477
478 /**
479  * Unregister a ivshmem device, typically when the corresponding cell exits.
480  * @param device        The device to be stopped.
481  *
482  */
483 void pci_ivshmem_exit(struct pci_device *device)
484 {
485         struct pci_ivshmem_data **ivp, *iv;
486         int cellnum;
487
488         ivp = ivshmem_find(device, &cellnum);
489         if (!ivp || !(*ivp))
490                 return;
491
492         iv = *ivp;
493
494         ivshmem_disconnect_cell(iv, cellnum);
495
496         if (cellnum == 0) {
497                 if (!iv->eps[1].device) {
498                         *ivp = iv->next;
499                         page_free(&mem_pool, iv, 1);
500                         return;
501                 }
502                 iv->eps[0] = iv->eps[1];
503         }
504 }