]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/pci_ivshmem.c
core: ivshmem: Simplify pci_ivshmem_cfg_read
[jailhouse.git] / hypervisor / pci_ivshmem.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2014
5  *
6  * Author:
7  *  Henning Schild <henning.schild@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 /** @addtogroup PCI-IVSHMEM
14  * Inter Cell communication using a virtual PCI device. The device provides
15  * shared memory and interrupts based on MSI-X.
16  *
17  * The implementation in Jailhouse provides a shared memory device between
18  * exactly 2 cells. The link between the two PCI devices is established by
19  * choosing the same BDF, memory location, and memory size.
20  */
21
22 #include <jailhouse/control.h>
23 #include <jailhouse/pci.h>
24 #include <jailhouse/printk.h>
25 #include <jailhouse/string.h>
26 #include <jailhouse/utils.h>
27 #include <jailhouse/processor.h>
28 #include <asm/apic.h>
29
30 #define VIRTIO_VENDOR_ID        0x1af4
31 #define IVSHMEM_DEVICE_ID       0x1110
32
33 /* in jailhouse we can not allow dynamic remapping of the actual shared memory
34  * the location and the size are stored here. A memory-BAR size of 0 will tell
35  * device drivers that they are dealing with a special ivshmem device */
36 #define IVSHMEM_CFG_SHMEM_PTR   0x40
37 #define IVSHMEM_CFG_SHMEM_SZ    0x48
38
39 #define IVSHMEM_MSIX_VECTORS    1
40 #define IVSHMEM_CFG_MSIX_CAP    0x50
41
42 #define IVSHMEM_REG_IVPOS       8
43 #define IVSHMEM_REG_DBELL       12
44
45 #define IVSHMEM_CFG_SIZE        (IVSHMEM_CFG_MSIX_CAP + 12)
46
47 struct virt_pci_bar {
48         char flags;
49         u64 sz;
50 };
51
52 struct pci_ivshmem_endpoint {
53         u32 cspace[IVSHMEM_CFG_SIZE / sizeof(u32)];
54         u32 ivpos;
55         struct virt_pci_bar bars[3];
56         struct pci_device *device;
57         struct pci_ivshmem_endpoint *remote;
58         struct apic_irq_message irq_msg;
59 };
60
61 struct pci_ivshmem_data {
62         struct pci_ivshmem_endpoint eps[2];
63         struct pci_ivshmem_data *next;
64 };
65
66 static struct pci_ivshmem_data *ivshmem_list;
67
68 static const u32 default_cspace[IVSHMEM_CFG_SIZE / sizeof(u32)] = {
69         [0x00/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
70         [0x04/4] = (PCI_STS_CAPS << 16),
71         [0x08/4] = PCI_DEV_CLASS_MEM << 24,
72         [0x2c/4] = (IVSHMEM_DEVICE_ID << 16) | VIRTIO_VENDOR_ID,
73         [0x34/4] = IVSHMEM_CFG_MSIX_CAP,
74         /* MSI-X capability */
75         [IVSHMEM_CFG_MSIX_CAP/4] = (0xC000 + IVSHMEM_MSIX_VECTORS - 1) << 16
76                                    | (0x00 << 8) | PCI_CAP_MSIX,
77         [(IVSHMEM_CFG_MSIX_CAP + 0x4)/4] = PCI_CFG_BAR/8 + 2,
78         [(IVSHMEM_CFG_MSIX_CAP + 0x8)/4] = 0x10 * IVSHMEM_MSIX_VECTORS |
79                                            (PCI_CFG_BAR/8 + 2),
80 };
81
82 static const struct virt_pci_bar default_bars[3] = {
83         {
84                 .flags = PCI_BAR_64BIT,
85                 .sz = 256,
86         },
87         {
88                 /* in jailhouse we leave this BAR empty, the shared memory
89                  * location and size are in our custom registers
90                  * IVSHMEM_CFG_SHMEM */
91         },
92         {       /* used for MSI-X vectors */
93                 .flags = PCI_BAR_64BIT,
94                 .sz = ((0x18 * IVSHMEM_MSIX_VECTORS) + 15) & ~0xf,
95         }
96 };
97
98 static bool ivshmem_is_msix_masked(struct pci_ivshmem_endpoint *ive)
99 {
100         union pci_msix_registers c;
101
102         /* global mask */
103         c.raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4];
104         if (!c.enable || c.fmask)
105                 return true;
106
107         /* local mask */
108         if (ive->device->msix_vectors[0].masked)
109                 return true;
110
111         /* PCI Bus Master */
112         if (!(ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MASTER))
113                 return true;
114
115         return false;
116 }
117
118 static int ivshmem_update_msix(struct pci_ivshmem_endpoint *ive)
119 {
120         union x86_msi_vector msi = {
121                 .raw.address = ive->device->msix_vectors[0].address,
122                 .raw.data = ive->device->msix_vectors[0].data,
123         };
124         struct apic_irq_message irq_msg;
125
126         /* before doing anything mark the cached irq_msg as invalid,
127          * on success it will be valid on return. */
128         ive->irq_msg.valid = 0;
129         memory_barrier();
130
131         if (ivshmem_is_msix_masked(ive))
132                 return 0;
133
134         irq_msg = pci_translate_msi_vector(ive->device, 0, 0, msi);
135         if (!irq_msg.valid)
136                 return 0;
137
138         if (!apic_filter_irq_dest(ive->device->cell, &irq_msg)) {
139                 panic_printk("FATAL: ivshmem MSI-X target outside of "
140                              "cell \"%s\" device %02x:%02x.%x\n",
141                              ive->device->cell->config->name,
142                              PCI_BDF_PARAMS(ive->device->info->bdf));
143                 return -EPERM;
144         }
145         /* now copy the whole struct into our cache and mark the cache
146          * valid at the end */
147         irq_msg.valid = 0;
148         ive->irq_msg = irq_msg;
149         memory_barrier();
150         ive->irq_msg.valid = 1;
151
152         return 0;
153 }
154
155 /**
156  * update the command register
157  * note that we only accept writes to two flags
158  */
159 static int ivshmem_write_command(struct pci_ivshmem_endpoint *ive, u16 val)
160 {
161         u16 *cmd = (u16 *)&ive->cspace[PCI_CFG_COMMAND/4];
162         int err;
163
164         if ((val & PCI_CMD_MASTER) != (*cmd & PCI_CMD_MASTER)) {
165                 *cmd = (*cmd & ~PCI_CMD_MASTER) | (val & PCI_CMD_MASTER);
166                 err = ivshmem_update_msix(ive);
167                 if (err)
168                         return err;
169         }
170
171         *cmd = (*cmd & ~PCI_CMD_MEM) | (val & PCI_CMD_MEM);
172         return 0;
173 }
174
175 static int ivshmem_msix_mmio(struct pci_ivshmem_endpoint *ive, bool is_write,
176                              u32 offset, u32 *value)
177 {
178         u32 *msix_table = (u32 *)ive->device->msix_vectors;
179
180         if (offset % 4)
181                 goto fail;
182
183         /* MSI-X PBA */
184         if (offset >= 0x10 * IVSHMEM_MSIX_VECTORS) {
185                 if (is_write) {
186                         goto fail;
187                 } else {
188                         *value = 0;
189                         return 1;
190                 }
191         /* MSI-X Table */
192         } else {
193                 if (is_write) {
194                         msix_table[offset/4] = *value;
195                         if (ivshmem_update_msix(ive))
196                                 return -1;
197                 } else {
198                         *value = msix_table[offset/4];
199                 }
200                 return 1;
201         }
202
203 fail:
204         panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
205                      "%02x:%02x.%x\n", PCI_BDF_PARAMS(ive->device->info->bdf));
206         return -1;
207 }
208
209 static void ivshmem_write_doorbell(struct pci_ivshmem_endpoint *ive)
210 {
211         struct pci_ivshmem_endpoint *remote = ive->remote;
212         struct apic_irq_message irq_msg;
213
214         if (!remote)
215                 return;
216
217         /* get a copy of the struct before using it, the read barrier makes
218          * sure the copy is consistent */
219         irq_msg = remote->irq_msg;
220         memory_load_barrier();
221         if (irq_msg.valid)
222                 apic_send_irq(irq_msg);
223 }
224
225 static int ivshmem_register_mmio(struct pci_ivshmem_endpoint *ive,
226                                  bool is_write, u32 offset, u32 *value)
227 {
228         /* read-only IVPosition */
229         if (offset == IVSHMEM_REG_IVPOS && !is_write) {
230                 *value = ive->ivpos;
231                 return 1;
232         }
233
234         if (offset == IVSHMEM_REG_DBELL) {
235                 if (is_write) {
236                         ivshmem_write_doorbell(ive);
237                 } else {
238                         *value = 0;
239                 }
240                 return 1;
241         }
242         panic_printk("FATAL: Invalid ivshmem register %s, number %02x\n",
243                      is_write ? "write" : "read", offset);
244         return -1;
245 }
246
247 static int ivshmem_write_msix_control(struct pci_ivshmem_endpoint *ive, u32 val)
248 {
249         union pci_msix_registers *p = (union pci_msix_registers *)&val;
250         union pci_msix_registers newval = {
251                 .raw = ive->cspace[IVSHMEM_CFG_MSIX_CAP/4]
252         };
253
254         newval.enable = p->enable;
255         newval.fmask = p->fmask;
256         if (ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] != newval.raw) {
257                 ive->cspace[IVSHMEM_CFG_MSIX_CAP/4] = newval.raw;
258                 return ivshmem_update_msix(ive);
259         }
260         return 0;
261 }
262
263 static struct pci_ivshmem_data **ivshmem_find(struct pci_device *d,
264                                               int *cellnum)
265 {
266         struct pci_ivshmem_data **ivp, *iv;
267         u16 bdf2;
268
269         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next)) {
270                 iv = *ivp;
271                 bdf2 = iv->eps[0].device->info->bdf;
272                 if (d->info->bdf == bdf2) {
273                         if (iv->eps[0].device == d) {
274                                 if (cellnum)
275                                         *cellnum = 0;
276                                 return ivp;
277                         }
278                         if (iv->eps[1].device == d) {
279                                 if (cellnum)
280                                         *cellnum = 1;
281                                 return ivp;
282                         }
283                         if (!cellnum)
284                                 return ivp;
285                 }
286         }
287
288         return NULL;
289 }
290
291 static void ivshmem_connect_cell(struct pci_ivshmem_data *iv,
292                                  struct pci_device *d,
293                                  const struct jailhouse_memory *mem,
294                                  int cellnum)
295 {
296         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
297         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
298
299         memcpy(ive->cspace, &default_cspace, sizeof(default_cspace));
300         memcpy(ive->bars, &default_bars, sizeof(default_bars));
301
302         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4] = (u32)mem->virt_start;
303         ive->cspace[IVSHMEM_CFG_SHMEM_PTR/4 + 1] = (u32)(mem->virt_start >> 32);
304         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4] = (u32)mem->size;
305         ive->cspace[IVSHMEM_CFG_SHMEM_SZ/4 + 1] = (u32)(mem->size >> 32);
306
307         ive->device = d;
308         if (remote->device) {
309                 ive->remote = remote;
310                 remote->remote = ive;
311                 ive->ivpos = (remote->ivpos + 1) % 2;
312         } else {
313                 ive->ivpos = cellnum;
314                 ive->remote = NULL;
315                 remote->remote = NULL;
316         }
317         d->ivshmem_endpoint = ive;
318 }
319
320 static void ivshmem_disconnect_cell(struct pci_ivshmem_data *iv, int cellnum)
321 {
322         struct pci_ivshmem_endpoint *remote = &iv->eps[(cellnum + 1) % 2];
323         struct pci_ivshmem_endpoint *ive = &iv->eps[cellnum];
324
325         ive->device->ivshmem_endpoint = NULL;
326         ive->device = NULL;
327         ive->remote = NULL;
328         remote->remote = NULL;
329 }
330
331 /**
332  * Handler for MMIO-accesses to this virtual PCI devices memory. Both for the
333  * BAR containing the registers, and the MSI-X BAR.
334  * @param cell          The cell that issued the access.
335  * @param is_write      True if write access.
336  * @param addr          Address accessed.
337  * @param value         Pointer to value for reading/writing.
338  *
339  * @return 1 if handled successfully, 0 if unhandled, -1 on access error.
340  *
341  * @see pci_mmio_access_handler
342  */
343 int ivshmem_mmio_access_handler(const struct cell *cell, bool is_write,
344                                 u64 addr, u32 *value)
345 {
346         struct pci_ivshmem_endpoint *ive;
347         struct pci_device *device;
348         u64 mem_start, mem_sz;
349
350         for (device = cell->virtual_device_list; device;
351              device = device->next_virtual_device) {
352                 ive = device->ivshmem_endpoint;
353                 if ((ive->cspace[PCI_CFG_COMMAND/4] & PCI_CMD_MEM) == 0)
354                         continue;
355
356                 /* BAR0: registers */
357                 mem_start = (*(u64 *)&device->bar[0]) & ~0xfL;
358                 mem_sz = ive->bars[0].sz;
359                 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
360                         return ivshmem_register_mmio(ive, is_write,
361                                                      addr - mem_start,
362                                                      value);
363
364                 /* BAR4: MSI-X */
365                 mem_start = (*(u64 *)&device->bar[4]) & ~0xfL;
366                 mem_sz = ive->bars[2].sz;
367                 if (addr >= mem_start && addr <= (mem_start + mem_sz - 4))
368                         return ivshmem_msix_mmio(ive, is_write,
369                                                  addr - mem_start, value);
370         }
371
372         return 0;
373 }
374
375 /**
376  * Handler for MMIO-write-accesses to PCI config space of this virtual device.
377  * @param device        The device that access should be performed on.
378  * @param row           Config space DWORD row of the access.
379  * @param mask          Mask selected the DWORD bytes to write.
380  * @param value         DWORD to write to the config space.
381  *
382  * @return PCI_ACCESS_REJECT or PCI_ACCESS_DONE.
383  *
384  * @see pci_cfg_write_moderate
385  */
386 enum pci_access pci_ivshmem_cfg_write(struct pci_device *dev, unsigned int row,
387                                       u32 mask, u32 value)
388 {
389         struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
390
391         if (row >= ARRAY_SIZE(default_cspace))
392                 return PCI_ACCESS_REJECT;
393
394         value |= ive->cspace[row] & ~mask;
395
396         switch (row) {
397         case PCI_CFG_COMMAND / 4:
398                 if (ivshmem_write_command(ive, value))
399                         return PCI_ACCESS_REJECT;
400                 break;
401         case IVSHMEM_CFG_MSIX_CAP / 4:
402                 if (ivshmem_write_msix_control(ive, value))
403                         return PCI_ACCESS_REJECT;
404         }
405         return PCI_ACCESS_DONE;
406 }
407
408 /**
409  * Handler for MMIO-read-accesses to PCI config space of this virtual device.
410  * @param dev           The device that access should be performed on.
411  * @param address       Config space address accessed.
412  * @param value         Pointer to the return value.
413  *
414  * @return PCI_ACCESS_DONE.
415  *
416  * @see pci_cfg_read_moderate
417  */
418 enum pci_access pci_ivshmem_cfg_read(struct pci_device *dev, u16 address,
419                                      u32 *value)
420 {
421         struct pci_ivshmem_endpoint *ive = dev->ivshmem_endpoint;
422
423         if (address < sizeof(default_cspace))
424                 *value = ive->cspace[address / 4] >> ((address % 4) * 8);
425         else
426                 *value = -1;
427         return PCI_ACCESS_DONE;
428 }
429
430 /**
431  * Update cached MSI-X state of the given ivshmem device.
432  * @param dev   The device to be updated.
433  *
434  * @return 0 on success, negative error code otherwise.
435  */
436 int pci_ivshmem_update_msix(struct pci_device *dev)
437 {
438         return ivshmem_update_msix(dev->ivshmem_endpoint);
439 }
440
441 /**
442  * Register a new ivshmem device.
443  * @param cell          The cell the device should be attached to.
444  * @param dev           The device to be registered.
445  *
446  * @return 0 on success, negative error code otherwise.
447  */
448 int pci_ivshmem_init(struct cell *cell, struct pci_device *dev)
449 {
450         const struct jailhouse_memory *mem, *mem0;
451         struct pci_ivshmem_data **ivp;
452         struct pci_device *dev0;
453
454         if (dev->info->num_msix_vectors != 1)
455                 return trace_error(-EINVAL);
456
457         if (dev->info->shmem_region >= cell->config->num_memory_regions)
458                 return trace_error(-EINVAL);
459
460         mem = jailhouse_cell_mem_regions(cell->config)
461                 + dev->info->shmem_region;
462         ivp = ivshmem_find(dev, NULL);
463         if (ivp) {
464                 dev0 = (*ivp)->eps[0].device;
465                 mem0 = jailhouse_cell_mem_regions(dev0->cell->config) +
466                         dev0->info->shmem_region;
467
468                 /* we already have a datastructure, connect second endpoint */
469                 if ((mem0->phys_start == mem->phys_start) &&
470                     (mem0->size == mem->size)) {
471                         if ((*ivp)->eps[1].device)
472                                 return trace_error(-EBUSY);
473                         ivshmem_connect_cell(*ivp, dev, mem, 1);
474                         printk("Virtual PCI connection established "
475                                 "\"%s\" <--> \"%s\"\n",
476                                 cell->config->name, dev0->cell->config->name);
477                         goto connected;
478                 }
479         }
480
481         /* this is the first endpoint, allocate a new datastructure */
482         for (ivp = &ivshmem_list; *ivp; ivp = &((*ivp)->next))
483                 ; /* empty loop */
484         *ivp = page_alloc(&mem_pool, 1);
485         if (!(*ivp))
486                 return -ENOMEM;
487         ivshmem_connect_cell(*ivp, dev, mem, 0);
488
489 connected:
490         dev->cell = cell;
491         printk("Adding virtual PCI device %02x:%02x.%x to cell \"%s\"\n",
492                PCI_BDF_PARAMS(dev->info->bdf), cell->config->name);
493
494         return 0;
495 }
496
497 /**
498  * Unregister a ivshmem device, typically when the corresponding cell exits.
499  * @param dev           The device to be stopped.
500  *
501  */
502 void pci_ivshmem_exit(struct pci_device *dev)
503 {
504         struct pci_ivshmem_data **ivp, *iv;
505         int cellnum;
506
507         ivp = ivshmem_find(dev, &cellnum);
508         if (!ivp || !(*ivp))
509                 return;
510
511         iv = *ivp;
512
513         if (iv->eps[0].device == dev) {
514                 if (!iv->eps[1].device) {
515                         *ivp = iv->next;
516                         page_free(&mem_pool, iv, 1);
517                         return;
518                 }
519                 iv->eps[0] = iv->eps[1];
520         }
521         ivshmem_disconnect_cell(iv, 1);
522 }