]> rtime.felk.cvut.cz Git - jailhouse.git/commitdiff
core: Virtualize legacy MSI for interrupt remapping support
authorJan Kiszka <jan.kiszka@siemens.com>
Sun, 27 Jul 2014 12:42:56 +0000 (14:42 +0200)
committerJan Kiszka <jan.kiszka@siemens.com>
Tue, 26 Aug 2014 17:56:49 +0000 (19:56 +0200)
Analogously to edge-triggered IOAPIC interrupts, handover all legacy
MSIs by disabling them first, programming the VT-d remapping table and
then writing remappable parameters into the MSI capability registers.
An additional triggering of active vectors ensures that we do not lose
events during handover.

Disabling is done on x86 via a trick: we program an empty CPU mask in
logical destination mode.

MSI-X remains on the to-do list. Thus, once enabling interrupt
remapping, systems that use MSI-X will become unsupported for the time
being.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
hypervisor/arch/x86/control.c
hypervisor/arch/x86/include/asm/apic.h
hypervisor/arch/x86/pci.c
hypervisor/include/jailhouse/pci.h
hypervisor/pci.c

index f88d2a0182116fc99e66fa1fd4f06aa7e45e9ca8..1f6e683d787ece011a82fb1951e3e5dfbadf1c2c 100644 (file)
@@ -112,13 +112,17 @@ void arch_config_commit(struct per_cpu *cpu_data,
        vmx_invept();
 
        vtd_config_commit(cell_added_removed);
+       pci_config_commit(cell_added_removed);
        ioapic_config_commit(cell_added_removed);
 }
 
 void arch_shutdown(void)
 {
+       pci_prepare_handover();
        ioapic_prepare_handover();
+
        vtd_shutdown();
+       pci_shutdown();
        ioapic_shutdown();
 }
 
index fd80c2e339a87f16d462a295fa3dd091f0f3c9cc..8ebde72e259f8577fb1ac4a1cb41dc1cc71133d9 100644 (file)
@@ -97,12 +97,22 @@ union x86_msi_vector {
                u32 vector:8,
                    delivery_mode:3,
                    reserved:21;
-       } native;
+       } __attribute__((packed)) native;
+       struct {
+               u64 unused:2,
+                   int_index15:1,
+                   shv:1,
+                   remapped:1,
+                   int_index:15,
+                   address:44;
+               u16 subhandle;
+               u16 zero;
+       } __attribute__((packed)) remap;
        struct {
                u64 address;
                u32 data;
-       } raw;
-};
+       } __attribute__((packed)) raw;
+} __attribute__((packed));
 
 /* MSI delivery modes */
 #define MSI_DM_NMI                     (0x4 << 8)
index 733878b766e4efdca11432032a3c162ddeb51e88..fd600dc53d3e3bd995bf7a79ffbeeed6e6702b5f 100644 (file)
  * the COPYING file in the top-level directory.
  */
 
+#include <jailhouse/control.h>
 #include <jailhouse/pci.h>
 #include <jailhouse/printk.h>
 #include <jailhouse/utils.h>
+#include <asm/apic.h>
 #include <asm/io.h>
 #include <asm/pci.h>
 #include <asm/vtd.h>
@@ -223,3 +225,112 @@ void arch_pci_remove_device(struct pci_device *device)
 {
        vtd_remove_pci_device(device);
 }
+
+static union x86_msi_vector pci_get_x86_msi_vector(struct pci_device *device)
+{
+       union pci_msi_registers *regs = &device->msi_registers;
+       bool msi_64bits = device->info->msi_64bits;
+       union x86_msi_vector msi;
+
+       msi.raw.address = msi_64bits ? regs->msg64.address :
+                                      regs->msg32.address;
+       msi.raw.data = msi_64bits ? regs->msg64.data : regs->msg32.data;
+       return msi;
+}
+
+static struct apic_irq_message
+pci_translate_msi_vector(struct pci_device *device, unsigned int vector,
+                        unsigned int legacy_vectors, union x86_msi_vector msi)
+{
+       struct apic_irq_message irq_msg;
+
+       irq_msg.vector = msi.native.vector;
+       if (legacy_vectors > 1) {
+               irq_msg.vector &= ~(legacy_vectors - 1);
+               irq_msg.vector |= vector;
+       }
+       irq_msg.delivery_mode = msi.native.delivery_mode;
+       irq_msg.level_triggered = 0;
+       irq_msg.dest_logical = msi.native.dest_logical;
+       irq_msg.redir_hint = msi.native.redir_hint;
+       irq_msg.destination = msi.native.destination;
+
+       return irq_msg;
+}
+
+void pci_suppress_msi(struct pci_device *device,
+                     const struct jailhouse_pci_capability *cap)
+{
+       unsigned int n, vectors = pci_enabled_msi_vectors(device);
+       const struct jailhouse_pci_device *info = device->info;
+       struct apic_irq_message irq_msg;
+       union x86_msi_vector msi = {
+               .native.dest_logical = 1,
+               .native.redir_hint = 1,
+               .native.address = MSI_ADDRESS_VALUE,
+       };
+
+       if (!(pci_read_config(info->bdf, PCI_CFG_COMMAND, 2) & PCI_CMD_MASTER))
+               return;
+
+       /*
+        * Disable delivery by setting no destination CPU bit in logical
+        * addressing mode.
+        */
+       if (info->msi_64bits)
+               pci_write_config(info->bdf, cap->start + 8, 0, 4);
+       pci_write_config(info->bdf, cap->start + 4, (u32)msi.raw.address, 4);
+
+       /*
+        * Inject MSI vectors to avoid losing events while suppressed.
+        * Linux can handle rare spurious interrupts.
+        */
+       msi = pci_get_x86_msi_vector(device);
+       for (n = 0; n < vectors; n++) {
+               irq_msg = pci_translate_msi_vector(device, n, vectors, msi);
+               apic_send_irq(irq_msg);
+       }
+}
+
+int pci_update_msi(struct pci_device *device,
+                  const struct jailhouse_pci_capability *cap)
+{
+       unsigned int n, vectors = pci_enabled_msi_vectors(device);
+       union x86_msi_vector msi = pci_get_x86_msi_vector(device);
+       const struct jailhouse_pci_device *info = device->info;
+       struct apic_irq_message irq_msg;
+       u16 bdf = info->bdf;
+       int result = 0;
+
+       if (vectors == 0)
+               return 0;
+
+       for (n = 0; n < vectors; n++) {
+               irq_msg = pci_translate_msi_vector(device, n, vectors, msi);
+               result = vtd_map_interrupt(device->cell, bdf, n, irq_msg);
+               // HACK for QEMU
+               if (result == -ENOSYS) {
+                       for (n = 1; n < (info->msi_64bits ? 4 : 3); n++)
+                               pci_write_config(bdf, cap->start + n * 4,
+                                       device->msi_registers.raw[n], 4);
+                       return 0;
+               }
+               if (result < 0)
+                       return result;
+       }
+
+       /* set result to the base index again */
+       result -= vectors - 1;
+
+       pci_write_config(bdf, cap->start + (info->msi_64bits ? 12 : 8), 0, 2);
+
+       if (info->msi_64bits)
+               pci_write_config(bdf, cap->start + 8, 0, 4);
+       msi.remap.int_index15 = result >> 15;
+       msi.remap.shv = 1;
+       msi.remap.remapped = 1;
+       msi.remap.int_index = result;
+       pci_write_config(bdf, cap->start + 4, (u32)msi.raw.address, 4);
+
+       return 0;
+}
index f67cf35527fe292ac9940b03cd7aefc5fddd2810..31f54b986953d28abe33c3d4fc019a0ba0678617 100644 (file)
 #define PCI_DEVFN(bdf)         ((bdf) & 0xff)
 #define PCI_BDF_PARAMS(bdf)    (bdf) >> 8, ((bdf) >> 3) & 0x1f, (bdf) & 7
 
+#define PCI_CFG_COMMAND                0x04
+# define PCI_CMD_MASTER                (1 << 2)
+# define PCI_CMD_INTX_OFF      (1 << 10)
+
 enum pci_access { PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM, PCI_ACCESS_DONE };
 
+union pci_msi_registers {
+       struct {
+               u16 padding;
+               u16 enable:1,
+                   ignore1:3,
+                   mme:3,
+                   ignore2:9;
+               u32 address;
+               u16 data;
+       } __attribute__((packed)) msg32;
+       struct {
+               u32 padding; /* use msg32 */
+               u64 address;
+               u16 data;
+       } __attribute__((packed)) msg64;
+       u32 raw[4];
+} __attribute__((packed));
+
 struct pci_device {
        const struct jailhouse_pci_device *info;
        struct cell *cell;
+
+       union pci_msi_registers msi_registers;
 };
 
 int pci_init(void);
@@ -45,6 +69,18 @@ int pci_mmio_access_handler(const struct cell *cell, bool is_write, u64 addr,
 int pci_cell_init(struct cell *cell);
 void pci_cell_exit(struct cell *cell);
 
+void pci_config_commit(struct cell *cell_added_removed);
+
+unsigned int pci_enabled_msi_vectors(struct pci_device *device);
+
+void pci_suppress_msi(struct pci_device *device,
+                     const struct jailhouse_pci_capability *cap);
+int pci_update_msi(struct pci_device *device,
+                  const struct jailhouse_pci_capability *cap);
+
+void pci_prepare_handover(void);
+void pci_shutdown(void);
+
 u32 arch_pci_read_config(u16 bdf, u16 address, unsigned int size);
 void arch_pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size);
 
index 69fbdf60ab683796dd6417ae57fb5b4f0c97434f..1ff8dc19232de7ba137d334e5a2a5187a9908fa7 100644 (file)
 
 #define PCI_CONFIG_HEADER_SIZE         0x40
 
-#define PCI_CFG_COMMAND                        0x04
-# define PCI_CMD_INTX_OFF              (1 << 10)
+#define PCI_CAP_MSI                    0x05
+#define PCI_CAP_MSIX                   0x11
 
 #define for_each_configured_pci_device(dev, cell)                      \
        for ((dev) = (cell)->pci_devices;                               \
             (dev) - (cell)->pci_devices < (cell)->config->num_pci_devices; \
             (dev)++)
 
+#define for_each_pci_cap(cap, dev, counter)                            \
+       for ((cap) = jailhouse_cell_pci_caps((dev)->cell->config) +     \
+               (dev)->info->caps_start, (counter) = 0;                 \
+            (counter) < (dev)->info->num_caps;                         \
+            (cap)++, (counter)++)
+
 /* entry for PCI config space whitelist (granting access) */
 struct pci_cfg_access {
        u32 reg_num; /** Register number (4-byte aligned) */
@@ -161,6 +167,7 @@ enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
                                      unsigned int size, u32 *value)
 {
        const struct jailhouse_pci_capability *cap;
+       unsigned int cap_offs;
 
        if (!device) {
                *value = -1;
@@ -174,7 +181,13 @@ enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
        if (!cap)
                return PCI_ACCESS_PERFORM;
 
-       // TODO: Emulate MSI/MSI-X etc.
+       cap_offs = address - cap->start;
+       if (cap->id == PCI_CAP_MSI && cap_offs >= 4 &&
+           (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
+               *value = device->msi_registers.raw[cap_offs / 4] >>
+                       ((cap_offs % 4) * 8);
+               return PCI_ACCESS_DONE;
+       }
 
        return PCI_ACCESS_PERFORM;
 }
@@ -194,8 +207,9 @@ enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
        const struct jailhouse_pci_capability *cap;
        /* initialize list to work around wrong compiler warning */
        const struct pci_cfg_access *list = NULL;
-       unsigned int n, bias_shift, len = 0;
-       u32 mask;
+       unsigned int bias_shift = (address % 4) * 8;
+       unsigned int n, cap_offs, len = 0;
+       u32 mask = BYTE_MASK(size);
 
        if (!device)
                return PCI_ACCESS_REJECT;
@@ -209,9 +223,6 @@ enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
                        len = ARRAY_SIZE(bridge_write_access);
                }
 
-               bias_shift = (address & 0x003) * 8;
-               mask = BYTE_MASK(size);
-
                for (n = 0; n < len; n++) {
                        if (list[n].reg_num == (address & 0xffc) &&
                            ((list[n].mask >> bias_shift) & mask) == mask)
@@ -225,6 +236,25 @@ enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
        if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
                return PCI_ACCESS_REJECT;
 
+       cap_offs = address - cap->start;
+       if (cap->id == PCI_CAP_MSI &&
+           (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
+               value <<= bias_shift;
+               mask <<= bias_shift;
+               device->msi_registers.raw[cap_offs / 4] &= ~mask;
+               device->msi_registers.raw[cap_offs / 4] |= value;
+
+               if (pci_update_msi(device, cap) < 0)
+                       return PCI_ACCESS_REJECT;
+
+               /*
+                * Address and data words are emulated, the control word is
+                * written as-is.
+                */
+               if (cap_offs >= 4)
+                       return PCI_ACCESS_DONE;
+       }
+
        return PCI_ACCESS_PERFORM;
 }
 
@@ -308,6 +338,55 @@ invalid_access:
 
 }
 
+unsigned int pci_enabled_msi_vectors(struct pci_device *device)
+{
+       return device->msi_registers.msg32.enable ?
+               1 << device->msi_registers.msg32.mme : 0;
+}
+
+static void pci_save_msi(struct pci_device *device,
+                        const struct jailhouse_pci_capability *cap)
+{
+       u16 bdf = device->info->bdf;
+       unsigned int n;
+
+       for (n = 0; n < (device->info->msi_64bits ? 4 : 3); n++)
+               device->msi_registers.raw[n] =
+                       pci_read_config(bdf, cap->start + n * 4, 4);
+}
+
+static void pci_restore_msi(struct pci_device *device,
+                           const struct jailhouse_pci_capability *cap)
+{
+       unsigned int n;
+
+       for (n = 1; n < (device->info->msi_64bits ? 4 : 3); n++)
+               pci_write_config(device->info->bdf, cap->start + n * 4,
+                                device->msi_registers.raw[n], 4);
+}
+
+/**
+ * pci_prepare_handover() - Prepare the handover of PCI devices to Jailhouse or
+ *                          back to Linux
+ */
+void pci_prepare_handover(void)
+{
+       const struct jailhouse_pci_capability *cap;
+       struct pci_device *device;
+       unsigned int n;
+
+       if (!root_cell.pci_devices)
+               return;
+
+       for_each_configured_pci_device(device, &root_cell) {
+               if (device->cell)
+                       for_each_pci_cap(cap, device, n)
+                               if (cap->id == PCI_CAP_MSI)
+                                       pci_suppress_msi(device, cap);
+                               // TODO: MSI-X
+       }
+}
+
 static int pci_add_device(struct cell *cell, struct pci_device *device)
 {
        printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
@@ -330,8 +409,9 @@ int pci_cell_init(struct cell *cell)
                                              sizeof(struct pci_device));
        const struct jailhouse_pci_device *dev_infos =
                jailhouse_cell_pci_devices(cell->config);
+       const struct jailhouse_pci_capability *cap;
        struct pci_device *device, *root_device;
-       unsigned int ndev;
+       unsigned int ndev, ncap;
        int err;
 
        cell->pci_devices = page_alloc(&mem_pool, array_size / PAGE_SIZE);
@@ -362,8 +442,19 @@ int pci_cell_init(struct cell *cell)
                }
 
                device->cell = cell;
+
+               for_each_pci_cap(cap, device, ncap)
+                       if (cap->id == PCI_CAP_MSI)
+                               pci_save_msi(device, cap);
+                       else if (cap->id == PCI_CAP_MSIX)
+                               // TODO: Handle
+                               printk("MSI-X left out @%02x:%02x.%x!\n",
+                                      PCI_BDF_PARAMS(device->info->bdf));
        }
 
+       if (cell == &root_cell)
+               pci_prepare_handover();
+
        return 0;
 }
 
@@ -396,12 +487,55 @@ void pci_cell_exit(struct cell *cell)
        if (cell == &root_cell)
                return;
 
-       for_each_configured_pci_device(device, cell) {
-               if (!device->cell)
-                       continue;
-               pci_remove_device(device);
-               pci_return_device_to_root_cell(device);
-       }
+       for_each_configured_pci_device(device, cell)
+               if (device->cell) {
+                       pci_remove_device(device);
+                       pci_return_device_to_root_cell(device);
+               }
 
        page_free(&mem_pool, cell->pci_devices, array_size / PAGE_SIZE);
 }
+
+void pci_config_commit(struct cell *cell_added_removed)
+{
+       const struct jailhouse_pci_capability *cap;
+       struct pci_device *device;
+       unsigned int n;
+       int err = 0;
+
+       if (!cell_added_removed)
+               return;
+
+       for_each_configured_pci_device(device, &root_cell)
+               if (device->cell)
+                       for_each_pci_cap(cap, device, n) {
+                               if (cap->id == PCI_CAP_MSI)
+                                       err = pci_update_msi(device, cap);
+                               // TODO: MSI-X
+                               if (err)
+                                       goto error;
+                       }
+       return;
+
+error:
+       panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x,"
+                    " cap %d\n", PCI_BDF_PARAMS(device->info->bdf), cap->id);
+       panic_stop(NULL);
+}
+
+void pci_shutdown(void)
+{
+       const struct jailhouse_pci_capability *cap;
+       struct pci_device *device;
+       unsigned int n;
+
+       if (!root_cell.pci_devices)
+               return;
+
+       for_each_configured_pci_device(device, &root_cell)
+               if (device->cell)
+                       for_each_pci_cap(cap, device, n)
+                               if (cap->id == PCI_CAP_MSI)
+                                       pci_restore_msi(device, cap);
+                               // TODO: MSI-X
+}