]> rtime.felk.cvut.cz Git - jailhouse.git/blobdiff - hypervisor/pci.c
Merge remote-tracking branch 'kiszka/master'
[jailhouse.git] / hypervisor / pci.c
index b7b458b04c450d314530ae4f2bb73fed86f1e24e..12424666b5c874a09513e1b8171fabecda24ef90 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Jailhouse, a Linux-based partitioning hypervisor
  *
- * Copyright (c) Siemens AG, 2014
+ * Copyright (c) Siemens AG, 2014, 2015
  *
  * Authors:
  *  Ivan Kolchin <ivan.kolchin@siemens.com>
 #include <jailhouse/printk.h>
 #include <jailhouse/utils.h>
 
-#define PCI_CONFIG_HEADER_SIZE         0x40
-
-#define PCI_CAP_MSI                    0x05
-#define PCI_CAP_MSIX                   0x11
-
 #define MSIX_VECTOR_CTRL_DWORD         3
 
 #define for_each_configured_pci_device(dev, cell)                      \
             (counter) < (dev)->info->num_caps;                         \
             (cap)++, (counter)++)
 
-/* entry for PCI config space whitelist (granting access) */
-struct pci_cfg_access {
-       u32 reg_num; /** Register number (4-byte aligned) */
-       u32 mask; /** Bit set: access allowed */
+/* entry for PCI config space access control */
+struct pci_cfg_control {
+       enum {
+               PCI_CONFIG_DENY,
+               PCI_CONFIG_ALLOW,
+               PCI_CONFIG_RDONLY,
+       } type;   /* Access type */
+       u32 mask; /* Bit set: access type applies; bit cleared: deny access */
 };
 
-/* --- Whilelist for writing to PCI config space registers --- */
+/* --- Access control for writing to PCI config space registers --- */
 /* Type 1: Endpoints */
-static const struct pci_cfg_access endpoint_write_access[] = {
-       { 0x04, 0xffffffff }, /* Command, Status */
-       { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
-       { 0x3c, 0x000000ff }, /* Int Line */
+static const struct pci_cfg_control endpoint_write[PCI_CONFIG_HEADER_SIZE] = {
+       [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
+       [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
+       [0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* ROM BAR */
+       [0x3c/4] = {PCI_CONFIG_ALLOW,  0x000000ff}, /* Int Line */
 };
-/* Type 2: Bridges */
-static const struct pci_cfg_access bridge_write_access[] = {
-       { 0x04, 0xffffffff }, /* Command, Status */
-       { 0x0c, 0xff00ffff }, /* BIST, Latency Timer, Cacheline */
-       { 0x3c, 0xffff00ff }, /* Int Line, Bridge Control */
+
+/* Type 2: Bridges
+ * Note: Ignore limit/base reprogramming attempts because the root cell will
+ *       perform them on bus rescans. */
+static const struct pci_cfg_control bridge_write[PCI_CONFIG_HEADER_SIZE] = {
+       [0x04/4] = {PCI_CONFIG_ALLOW,  0xffffffff}, /* Command, Status */
+       [0x0c/4] = {PCI_CONFIG_ALLOW,  0xff00ffff}, /* BIST, Lat., Cacheline */
+       [0x1c/4] = {PCI_CONFIG_RDONLY, 0x0000ffff}, /* I/O Limit & Base */
+       [0x20/4 ...      /* Memory Limit/Base, Prefetch Memory Limit/Base, */
+        0x30/4] = {PCI_CONFIG_RDONLY, 0xffffffff}, /* I/O Limit & Base */
+       [0x3c/4] = {PCI_CONFIG_ALLOW,  0xffff00ff}, /* Int Line, Bridge Ctrl */
 };
 
 static void *pci_space;
-static u64 mmcfg_start, mmcfg_end;
+static u64 mmcfg_start, mmcfg_size;
 static u8 end_bus;
 
+unsigned int pci_mmio_count_regions(struct cell *cell)
+{
+       const struct jailhouse_pci_device *dev_infos =
+               jailhouse_cell_pci_devices(cell->config);
+       unsigned int n, regions = 0;
+
+       if (system_config->platform_info.x86.mmconfig_base)
+               regions++;
+
+       for (n = 0; n < cell->config->num_pci_devices; n++)
+               if (dev_infos[n].type == JAILHOUSE_PCI_TYPE_IVSHMEM)
+                       regions += PCI_IVSHMEM_NUM_MMIO_REGIONS;
+               else if (dev_infos[n].msix_address)
+                       regions++;
+
+       return regions;
+}
+
 static void *pci_get_device_mmcfg_base(u16 bdf)
 {
        return pci_space + ((unsigned long)bdf << 12);
 }
 
 /**
- * pci_read_config() - Read from PCI config space
- * @bdf:       16-bit bus/device/function ID of target
- * @address:   Config space access address
- * @size:      Access size (1, 2 or 4 bytes)
+ * Read from PCI config space.
+ * @param bdf          16-bit bus/device/function ID of target.
+ * @param address      Config space access address.
+ * @param size         Access size (1, 2 or 4 bytes).
  *
- * Return: read value
+ * @return Read value.
+ *
+ * @see pci_write_config
  */
 u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
 {
@@ -88,11 +114,13 @@ u32 pci_read_config(u16 bdf, u16 address, unsigned int size)
 }
 
 /**
- * pci_write_config() - Write to PCI config space
- * @bdf:       16-bit bus/device/function ID of target
- * @address:   Config space access address
- * @value:     Value to be written
- * @size:      Access size (1, 2 or 4 bytes)
+ * Write to PCI config space.
+ * @param bdf          16-bit bus/device/function ID of target.
+ * @param address      Config space access address.
+ * @param value                Value to be written.
+ * @param size         Access size (1, 2 or 4 bytes).
+ *
+ * @see pci_read_config
  */
 void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
 {
@@ -110,11 +138,11 @@ void pci_write_config(u16 bdf, u16 address, u32 value, unsigned int size)
 }
 
 /**
- * pci_get_assigned_device() - Look up device owned by a cell
- * @cell:      Owning cell
- * @bdf:       16-bit bus/device/function ID
+ * Look up device owned by a cell.
+ * @param[in] cell     Owning cell.
+ * @param bdf          16-bit bus/device/function ID.
  *
- * Return: Pointer to owned PCI device or NULL.
+ * @return Pointer to owned PCI device or NULL.
  */
 struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
 {
@@ -133,11 +161,13 @@ struct pci_device *pci_get_assigned_device(const struct cell *cell, u16 bdf)
 }
 
 /**
- * pci_find_capability() - Look up capability at given config space address
- * @device:    The device to be accessed
- * @address:   Config space access address
+ * Look up capability at given config space address.
+ * @param device       The device to be accessed.
+ * @param address      Config space access address.
+ *
+ * @return Corresponding capability structure or NULL if none found.
  *
- * Return: Corresponding capability structure or NULL if none found.
+ * @private
  */
 static const struct jailhouse_pci_capability *
 pci_find_capability(struct pci_device *device, u16 address)
@@ -155,27 +185,48 @@ pci_find_capability(struct pci_device *device, u16 address)
 }
 
 /**
- * pci_cfg_read_moderate() - Moderate config space read access
- * @device:    The device to be accessed; if NULL, access will be emulated,
- *             returning a value of -1
- * @address:   Config space address
- * @size:      Access size (1, 2 or 4 bytes)
- * @value:     Pointer to buffer to receive the emulated value if
- *             PCI_ACCESS_DONE is returned
+ * Moderate config space read access.
+ * @param device       The device to be accessed. If NULL, access will be
+ *                     emulated, returning a value of -1.
+ * @param address      Config space address.
+ * @param size         Access size (1, 2 or 4 bytes).
+ * @param value                Pointer to buffer to receive the emulated value if
+ *                     PCI_ACCESS_DONE is returned.
  *
- * Return: PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
+ * @return PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
+ *
+ * @see pci_cfg_write_moderate
  */
 enum pci_access pci_cfg_read_moderate(struct pci_device *device, u16 address,
                                      unsigned int size, u32 *value)
 {
        const struct jailhouse_pci_capability *cap;
-       unsigned int cap_offs;
+       unsigned int bar_no, cap_offs;
 
        if (!device) {
                *value = -1;
                return PCI_ACCESS_DONE;
        }
 
+       /* Emulate BARs for physical and virtual devices */
+       if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE) {
+               /* Emulate BAR access, always returning the shadow value. */
+               if (address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
+                       bar_no = (address - PCI_CFG_BAR) / 4;
+                       *value = device->bar[bar_no] >> ((address % 4) * 8);
+                       return PCI_ACCESS_DONE;
+               }
+
+               /* We do not expose ROMs. */
+               if (address >= PCI_CFG_ROMBAR && address < PCI_CFG_CAPS) {
+                       *value = 0;
+                       return PCI_ACCESS_DONE;
+               }
+       }
+
+       if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
+               return pci_ivshmem_cfg_read(device, address, value);
+
        if (address < PCI_CONFIG_HEADER_SIZE)
                return PCI_ACCESS_PERFORM;
 
@@ -201,7 +252,7 @@ static int pci_update_msix(struct pci_device *device,
        int result;
 
        for (n = 0; n < device->info->num_msix_vectors; n++) {
-               result = pci_update_msix_vector(device, n);
+               result = arch_pci_update_msix_vector(device, n);
                if (result < 0)
                        return result;
        }
@@ -209,58 +260,78 @@ static int pci_update_msix(struct pci_device *device,
 }
 
 /**
- * pci_cfg_write_moderate() - Moderate config space write access
- * @device:    The device to be accessed; if NULL, access will be rejected
- * @address:   Config space address
- * @size:      Access size (1, 2 or 4 bytes)
- * @value:     Value to be written
+ * Moderate config space write access.
+ * @param device       The device to be accessed. If NULL, access will be
+ *                     rejected.
+ * @param address      Config space address.
+ * @param size         Access size (1, 2 or 4 bytes).
+ * @param value                Value to be written.
+ *
+ * @return PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
  *
- * Return: PCI_ACCESS_REJECT, PCI_ACCESS_PERFORM or PCI_ACCESS_DONE.
+ * @see pci_cfg_read_moderate
  */
 enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
                                       unsigned int size, u32 value)
 {
        const struct jailhouse_pci_capability *cap;
        /* initialize list to work around wrong compiler warning */
-       const struct pci_cfg_access *list = NULL;
        unsigned int bias_shift = (address % 4) * 8;
        u32 mask = BYTE_MASK(size) << bias_shift;
-       unsigned int n, cap_offs, len = 0;
+       struct pci_cfg_control cfg_control;
+       unsigned int bar_no, cap_offs;
 
        if (!device)
                return PCI_ACCESS_REJECT;
 
+       value <<= bias_shift;
+
+       /* Emulate BARs for physical and virtual devices */
+       if (device->info->type != JAILHOUSE_PCI_TYPE_BRIDGE &&
+           address >= PCI_CFG_BAR && address <= PCI_CFG_BAR_END) {
+               bar_no = (address - PCI_CFG_BAR) / 4;
+               mask &= device->info->bar_mask[bar_no];
+               device->bar[bar_no] &= ~mask;
+               device->bar[bar_no] |= value & mask;
+               return PCI_ACCESS_DONE;
+       }
+
        if (address < PCI_CONFIG_HEADER_SIZE) {
-               if (device->info->type == JAILHOUSE_PCI_TYPE_DEVICE) {
-                       list = endpoint_write_access;
-                       len = ARRAY_SIZE(endpoint_write_access);
-               } else if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE) {
-                       list = bridge_write_access;
-                       len = ARRAY_SIZE(bridge_write_access);
-               }
+               if (device->info->type == JAILHOUSE_PCI_TYPE_BRIDGE)
+                       cfg_control = bridge_write[address / 4];
+               else /* physical or virtual device */
+                       cfg_control = endpoint_write[address / 4];
 
-               for (n = 0; n < len; n++) {
-                       if (list[n].reg_num == (address & 0xffc) &&
-                           (list[n].mask & mask) == mask)
-                               return PCI_ACCESS_PERFORM;
-               }
+               if ((cfg_control.mask & mask) != mask)
+                       return PCI_ACCESS_REJECT;
 
-               return PCI_ACCESS_REJECT;
+               switch (cfg_control.type) {
+               case PCI_CONFIG_ALLOW:
+                       if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
+                               return pci_ivshmem_cfg_write(device,
+                                               address / 4, mask, value);
+                       return PCI_ACCESS_PERFORM;
+               case PCI_CONFIG_RDONLY:
+                       return PCI_ACCESS_DONE;
+               default:
+                       return PCI_ACCESS_REJECT;
+               }
        }
 
+       if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM)
+               return pci_ivshmem_cfg_write(device, address / 4, mask, value);
+
        cap = pci_find_capability(device, address);
        if (!cap || !(cap->flags & JAILHOUSE_PCICAPS_WRITE))
                return PCI_ACCESS_REJECT;
 
-       value <<= bias_shift;
-
        cap_offs = address - cap->start;
        if (cap->id == PCI_CAP_MSI &&
            (cap_offs < 10 || (device->info->msi_64bits && cap_offs < 14))) {
                device->msi_registers.raw[cap_offs / 4] &= ~mask;
                device->msi_registers.raw[cap_offs / 4] |= value;
 
-               if (pci_update_msi(device, cap) < 0)
+               if (arch_pci_update_msi(device, cap) < 0)
                        return PCI_ACCESS_REJECT;
 
                /*
@@ -281,141 +352,124 @@ enum pci_access pci_cfg_write_moderate(struct pci_device *device, u16 address,
 }
 
 /**
- * pci_init() - Initialization of PCI module
+ * Initialization of PCI subsystem.
  *
- * Return: 0 - success, error code - if error.
+ * @return 0 on success, negative error code otherwise.
  */
 int pci_init(void)
 {
-       unsigned int mmcfg_size;
        int err;
 
-       err = pci_cell_init(&root_cell);
-       if (err)
-               return err;
-
        mmcfg_start = system_config->platform_info.x86.mmconfig_base;
-       if (mmcfg_start == 0)
-               return 0;
-
-       end_bus = system_config->platform_info.x86.mmconfig_end_bus;
-       mmcfg_size = (end_bus + 1) * 256 * 4096;
-       mmcfg_end = mmcfg_start + mmcfg_size - 4;
-
-       pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
-       if (!pci_space)
-               return -ENOMEM;
+       if (mmcfg_start != 0) {
+               end_bus = system_config->platform_info.x86.mmconfig_end_bus;
+               mmcfg_size = (end_bus + 1) * 256 * 4096;
+
+               pci_space = page_alloc(&remap_pool, mmcfg_size / PAGE_SIZE);
+               if (!pci_space)
+                       return trace_error(-ENOMEM);
+
+               err = paging_create(&hv_paging_structs, mmcfg_start,
+                                   mmcfg_size, (unsigned long)pci_space,
+                                   PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
+                                   PAGING_NON_COHERENT);
+               if (err)
+                       return err;
+       }
 
-       return page_map_create(&hv_paging_structs, mmcfg_start, mmcfg_size,
-                              (unsigned long)pci_space,
-                              PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED,
-                              PAGE_MAP_NON_COHERENT);
+       return pci_cell_init(&root_cell);
 }
 
-static int pci_msix_access_handler(const struct cell *cell, bool is_write,
-                                  u64 addr, u32 *value)
+static enum mmio_result pci_msix_access_handler(void *arg,
+                                               struct mmio_access *mmio)
 {
-       unsigned int dword = (addr % sizeof(union pci_msix_vector)) >> 2;
-       struct pci_device *device = cell->msix_device_list;
+       unsigned int dword =
+               (mmio->address % sizeof(union pci_msix_vector)) >> 2;
+       struct pci_device *device = arg;
        unsigned int index;
-       u64 offs;
-
-       while (device) {
-               if (addr >= device->info->msix_address &&
-                   addr < device->info->msix_address +
-                          device->info->msix_region_size)
-                       goto found;
-               device = device->next_msix_device;
-       }
-       return 0;
 
-found:
        /* access must be DWORD-aligned */
-       if (addr & 0x3)
+       if (mmio->address & 0x3)
                goto invalid_access;
 
-       offs = addr - device->info->msix_address;
-       index = offs / sizeof(union pci_msix_vector);
+       index = mmio->address / sizeof(union pci_msix_vector);
 
-       if (is_write) {
+       if (mmio->is_write) {
                /*
                 * The PBA may share a page with the MSI-X table. Writing to
                 * PBA entries is undefined. We declare it as invalid.
                 */
                if (index >= device->info->num_msix_vectors)
                        goto invalid_access;
-               if (dword == MSIX_VECTOR_CTRL_DWORD) {
-                       mmio_write32(&device->msix_table[index].field.ctrl,
-                                    *value);
-               } else {
-                       device->msix_vectors[index].raw[dword] = *value;
-                       if (pci_update_msix_vector(device, index) < 0)
-                               goto invalid_access;
-               }
+
+               device->msix_vectors[index].raw[dword] = mmio->value;
+               if (arch_pci_update_msix_vector(device, index) < 0)
+                       goto invalid_access;
+
+               if (dword == MSIX_VECTOR_CTRL_DWORD)
+                       mmio_write32(&device->msix_table[index].raw[dword],
+                                    mmio->value);
        } else {
                if (index >= device->info->num_msix_vectors ||
                    dword == MSIX_VECTOR_CTRL_DWORD)
-                       *value =
-                           mmio_read32(((void *)device->msix_table) + offs);
+                       mmio->value = mmio_read32(((void *)device->msix_table) +
+                                                 mmio->address);
                else
-                       *value = device->msix_vectors[index].raw[dword];
+                       mmio->value = device->msix_vectors[index].raw[dword];
        }
-       return 1;
+       return MMIO_HANDLED;
 
 invalid_access:
-       panic_printk("FATAL: Invalid PCI MSIX BAR write, device "
+       panic_printk("FATAL: Invalid PCI MSI-X table/PBA access, device "
                     "%02x:%02x.%x\n", PCI_BDF_PARAMS(device->info->bdf));
-       return -1;
+       return MMIO_ERROR;
 }
 
-/**
- * pci_mmio_access_handler() - Handler for MMIO-accesses to PCI config space
- * @cell:      Request issuing cell
- * @is_write:  True if write access
- * @addr:      Address accessed
- * @value:     Pointer to value for reading/writing
- *
- * Return: 1 if handled successfully, 0 if unhandled, -1 on access error
- */
-int pci_mmio_access_handler(const struct cell *cell, bool is_write,
-                           u64 addr, u32 *value)
+static enum mmio_result pci_mmconfig_access_handler(void *arg,
+                                                   struct mmio_access *mmio)
 {
-       u32 mmcfg_offset, reg_addr;
+       u32 reg_addr = mmio->address & 0xfff;
        struct pci_device *device;
-       enum pci_access access;
+       enum pci_access result;
+       u32 val;
 
-       if (!pci_space || addr < mmcfg_start || addr > mmcfg_end)
-               return pci_msix_access_handler(cell, is_write, addr, value);
-
-       mmcfg_offset = addr - mmcfg_start;
-       reg_addr = mmcfg_offset & 0xfff;
        /* access must be DWORD-aligned */
        if (reg_addr & 0x3)
                goto invalid_access;
 
-       device = pci_get_assigned_device(cell, mmcfg_offset >> 12);
+       device = pci_get_assigned_device(this_cell(), mmio->address >> 12);
 
-       if (is_write) {
-               access = pci_cfg_write_moderate(device, reg_addr, 4, *value);
-               if (access == PCI_ACCESS_REJECT)
+       if (mmio->is_write) {
+               result = pci_cfg_write_moderate(device, reg_addr, 4,
+                                               mmio->value);
+               if (result == PCI_ACCESS_REJECT)
                        goto invalid_access;
-               if (access == PCI_ACCESS_PERFORM)
-                       mmio_write32(pci_space + mmcfg_offset, *value);
+               if (result == PCI_ACCESS_PERFORM)
+                       mmio_write32(pci_space + mmio->address, mmio->value);
        } else {
-               access = pci_cfg_read_moderate(device, reg_addr, 4, value);
-               if (access == PCI_ACCESS_PERFORM)
-                       *value = mmio_read32(pci_space + mmcfg_offset);
+               result = pci_cfg_read_moderate(device, reg_addr, 4, &val);
+               if (result == PCI_ACCESS_PERFORM)
+                       mmio->value = mmio_read32(pci_space + mmio->address);
+               else
+                       mmio->value = val;
        }
 
-       return 1;
+       return MMIO_HANDLED;
 
 invalid_access:
        panic_printk("FATAL: Invalid PCI MMCONFIG write, device %02x:%02x.%x, "
-                    "reg: %\n", PCI_BDF_PARAMS(mmcfg_offset >> 12), reg_addr);
-       return -1;
+                    "reg: %x\n", PCI_BDF_PARAMS(mmio->address >> 12),
+                    reg_addr);
+       return MMIO_ERROR;
 
 }
 
+/**
+ * Retrieve number of enabled MSI vector of a device.
+ * @param device       The device to be examined.
+ *
+ * @return number of vectors.
+ */
 unsigned int pci_enabled_msi_vectors(struct pci_device *device)
 {
        return device->msi_registers.msg32.enable ?
@@ -450,7 +504,7 @@ static void pci_suppress_msix(struct pci_device *device,
        union pci_msix_registers regs = device->msix_registers;
 
        if (suppressed)
-               regs.field.fmask = 1;
+               regs.fmask = 1;
        pci_write_config(device->info->bdf, cap->start, regs.raw, 4);
 }
 
@@ -463,7 +517,7 @@ static void pci_save_msix(struct pci_device *device,
                pci_read_config(device->info->bdf, cap->start, 4);
 
        for (n = 0; n < device->info->num_msix_vectors; n++)
-               for (r = 0; r < 3; r++)
+               for (r = 0; r < 4; r++)
                        device->msix_vectors[n].raw[r] =
                                mmio_read32(&device->msix_table[n].raw[r]);
 }
@@ -474,6 +528,7 @@ static void pci_restore_msix(struct pci_device *device,
        unsigned int n, r;
 
        for (n = 0; n < device->info->num_msix_vectors; n++)
+               /* only restore address/data, control is write-through */
                for (r = 0; r < 3; r++)
                        mmio_write32(&device->msix_table[n].raw[r],
                                     device->msix_vectors[n].raw[r]);
@@ -481,8 +536,7 @@ static void pci_restore_msix(struct pci_device *device,
 }
 
 /**
- * pci_prepare_handover() - Prepare the handover of PCI devices to Jailhouse or
- *                          back to Linux
+ * Prepare the handover of PCI devices to Jailhouse or back to Linux.
  */
 void pci_prepare_handover(void)
 {
@@ -497,57 +551,74 @@ void pci_prepare_handover(void)
                if (device->cell)
                        for_each_pci_cap(cap, device, n)
                                if (cap->id == PCI_CAP_MSI)
-                                       pci_suppress_msi(device, cap);
+                                       arch_pci_suppress_msi(device, cap);
                                else if (cap->id == PCI_CAP_MSIX)
                                        pci_suppress_msix(device, cap, true);
        }
 }
 
-static int pci_add_device(struct cell *cell, struct pci_device *device)
+static int pci_add_physical_device(struct cell *cell, struct pci_device *device)
 {
-       unsigned int size = device->info->msix_region_size;
+       unsigned int n, pages, size = device->info->msix_region_size;
        int err;
 
        printk("Adding PCI device %02x:%02x.%x to cell \"%s\"\n",
               PCI_BDF_PARAMS(device->info->bdf), cell->config->name);
 
-       err = arch_pci_add_device(cell, device);
+       for (n = 0; n < PCI_NUM_BARS; n ++)
+               device->bar[n] = pci_read_config(device->info->bdf,
+                                                PCI_CFG_BAR + n * 4, 4);
+
+       err = arch_pci_add_physical_device(cell, device);
 
        if (!err && device->info->msix_address) {
                device->msix_table = page_alloc(&remap_pool, size / PAGE_SIZE);
                if (!device->msix_table) {
-                       err = -ENOMEM;
+                       err = trace_error(-ENOMEM);
                        goto error_remove_dev;
                }
 
-               err = page_map_create(&hv_paging_structs,
-                                     device->info->msix_address, size,
-                                     (unsigned long)device->msix_table,
-                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED,
-                                     PAGE_MAP_NON_COHERENT);
+               err = paging_create(&hv_paging_structs,
+                                   device->info->msix_address, size,
+                                   (unsigned long)device->msix_table,
+                                   PAGE_DEFAULT_FLAGS | PAGE_FLAG_DEVICE,
+                                   PAGING_NON_COHERENT);
                if (err)
                        goto error_page_free;
 
-               device->next_msix_device = cell->msix_device_list;
-               cell->msix_device_list = device;
+               if (device->info->num_msix_vectors > PCI_EMBEDDED_MSIX_VECTS) {
+                       pages = PAGES(sizeof(union pci_msix_vector) *
+                                     device->info->num_msix_vectors);
+                       device->msix_vectors = page_alloc(&mem_pool, pages);
+                       if (!device->msix_vectors) {
+                               err = -ENOMEM;
+                               goto error_unmap_table;
+                       }
+               }
+
+               mmio_region_register(cell, device->info->msix_address, size,
+                                    pci_msix_access_handler, device);
        }
-       return 0;
+       return err;
 
+error_unmap_table:
+       /* cannot fail, destruction of same size as construction */
+       paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
+                      size, PAGING_NON_COHERENT);
 error_page_free:
        page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
 error_remove_dev:
-       arch_pci_remove_device(device);
+       arch_pci_remove_physical_device(device);
        return err;
 }
 
-static void pci_remove_device(struct pci_device *device)
+static void pci_remove_physical_device(struct pci_device *device)
 {
        unsigned int size = device->info->msix_region_size;
-       struct pci_device *prev_msix_device;
 
        printk("Removing PCI device %02x:%02x.%x from cell \"%s\"\n",
               PCI_BDF_PARAMS(device->info->bdf), device->cell->config->name);
-       arch_pci_remove_device(device);
+       arch_pci_remove_physical_device(device);
        pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
                         PCI_CMD_INTX_OFF, 2);
 
@@ -555,20 +626,26 @@ static void pci_remove_device(struct pci_device *device)
                return;
 
        /* cannot fail, destruction of same size as construction */
-       page_map_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
-                        size, PAGE_MAP_NON_COHERENT);
+       paging_destroy(&hv_paging_structs, (unsigned long)device->msix_table,
+                      size, PAGING_NON_COHERENT);
        page_free(&remap_pool, device->msix_table, size / PAGE_SIZE);
 
-       prev_msix_device = device->cell->msix_device_list;
-       if (prev_msix_device == device) {
-               device->cell->msix_device_list = NULL;
-       } else {
-               while (prev_msix_device->next_msix_device != device)
-                       prev_msix_device = prev_msix_device->next_msix_device;
-               prev_msix_device->next_msix_device = NULL;
-       }
+       if (device->msix_vectors != device->msix_vector_array)
+               page_free(&mem_pool, device->msix_vectors,
+                         PAGES(sizeof(union pci_msix_vector) *
+                               device->info->num_msix_vectors));
+
+       mmio_region_unregister(device->cell, device->info->msix_address);
 }
 
+/**
+ * Perform PCI-specific initialization for a new cell.
+ * @param cell Cell to be initialized.
+ *
+ * @return 0 on success, negative error code otherwise.
+ *
+ * @see pci_cell_exit
+ */
 int pci_cell_init(struct cell *cell)
 {
        unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
@@ -580,6 +657,13 @@ int pci_cell_init(struct cell *cell)
        unsigned int ndev, ncap;
        int err;
 
+       if (pci_space)
+               mmio_region_register(cell, mmcfg_start, mmcfg_size,
+                                    pci_mmconfig_access_handler, NULL);
+
+       if (cell->config->num_pci_devices == 0)
+               return 0;
+
        cell->pci_devices = page_alloc(&mem_pool, devlist_pages);
        if (!cell->pci_devices)
                return -ENOMEM;
@@ -591,26 +675,30 @@ int pci_cell_init(struct cell *cell)
         * handy pointers. The cell pointer also encodes active ownership.
         */
        for (ndev = 0; ndev < cell->config->num_pci_devices; ndev++) {
-               if (dev_infos[ndev].num_msix_vectors > PCI_MAX_MSIX_VECTORS) {
-                       pci_cell_exit(cell);
-                       return -ERANGE;
-               }
-
                device = &cell->pci_devices[ndev];
                device->info = &dev_infos[ndev];
+               device->msix_vectors = device->msix_vector_array;
+
+               if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
+                       err = pci_ivshmem_init(cell, device);
+                       if (err)
+                               goto error;
+
+                       device->cell = cell;
+
+                       continue;
+               }
 
                root_device = pci_get_assigned_device(&root_cell,
                                                      dev_infos[ndev].bdf);
                if (root_device) {
-                       pci_remove_device(root_device);
+                       pci_remove_physical_device(root_device);
                        root_device->cell = NULL;
                }
 
-               err = pci_add_device(cell, device);
-               if (err) {
-                       pci_cell_exit(cell);
-                       return err;
-               }
+               err = pci_add_physical_device(cell, device);
+               if (err)
+                       goto error;
 
                device->cell = cell;
 
@@ -625,6 +713,9 @@ int pci_cell_init(struct cell *cell)
                pci_prepare_handover();
 
        return 0;
+error:
+       pci_cell_exit(cell);
+       return err;
 }
 
 static void pci_return_device_to_root_cell(struct pci_device *device)
@@ -634,7 +725,8 @@ static void pci_return_device_to_root_cell(struct pci_device *device)
        for_each_configured_pci_device(root_device, &root_cell)
                if (root_device->info->domain == device->info->domain &&
                    root_device->info->bdf == device->info->bdf) {
-                       if (pci_add_device(&root_cell, root_device) < 0)
+                       if (pci_add_physical_device(&root_cell,
+                                                   root_device) < 0)
                                printk("WARNING: Failed to re-assign PCI "
                                       "device to root cell\n");
                        else
@@ -643,6 +735,12 @@ static void pci_return_device_to_root_cell(struct pci_device *device)
                }
 }
 
+/**
+ * Perform PCI-specific cleanup for a cell under destruction.
+ * @param cell Cell to be destructed.
+ *
+ * @see pci_cell_init
+ */
 void pci_cell_exit(struct cell *cell)
 {
        unsigned int devlist_pages = PAGES(cell->config->num_pci_devices *
@@ -658,13 +756,24 @@ void pci_cell_exit(struct cell *cell)
 
        for_each_configured_pci_device(device, cell)
                if (device->cell) {
-                       pci_remove_device(device);
-                       pci_return_device_to_root_cell(device);
+                       if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
+                               pci_ivshmem_exit(device);
+                       } else {
+                               pci_remove_physical_device(device);
+                               pci_return_device_to_root_cell(device);
+                       }
                }
 
        page_free(&mem_pool, cell->pci_devices, devlist_pages);
 }
 
+/**
+ * Apply PCI-specific configuration changes.
+ * @param cell_added_removed   Cell that was added or removed to/from the
+ *                             system or NULL.
+ *
+ * @see arch_config_commit
+ */
 void pci_config_commit(struct cell *cell_added_removed)
 {
        const struct jailhouse_pci_capability *cap;
@@ -676,10 +785,10 @@ void pci_config_commit(struct cell *cell_added_removed)
                return;
 
        for_each_configured_pci_device(device, &root_cell)
-               if (device->cell)
+               if (device->cell) {
                        for_each_pci_cap(cap, device, n) {
                                if (cap->id == PCI_CAP_MSI) {
-                                       err = pci_update_msi(device, cap);
+                                       err = arch_pci_update_msi(device, cap);
                                } else if (cap->id == PCI_CAP_MSIX) {
                                        err = pci_update_msix(device, cap);
                                        pci_suppress_msix(device, cap, false);
@@ -687,14 +796,29 @@ void pci_config_commit(struct cell *cell_added_removed)
                                if (err)
                                        goto error;
                        }
+                       if (device->info->type == JAILHOUSE_PCI_TYPE_IVSHMEM) {
+                               err = pci_ivshmem_update_msix(device);
+                               if (err) {
+                                       cap = NULL;
+                                       goto error;
+                               }
+                       }
+               }
        return;
 
 error:
-       panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x,"
-                    " cap %d\n", PCI_BDF_PARAMS(device->info->bdf), cap->id);
+       panic_printk("FATAL: Unsupported MSI/MSI-X state, device %02x:%02x.%x",
+                    PCI_BDF_PARAMS(device->info->bdf));
+       if (cap)
+               panic_printk(", cap %d\n", cap->id);
+       else
+               panic_printk("\n");
        panic_stop();
 }
 
+/**
+ * Shut down the PCI layer during hypervisor deactivation.
+ */
 void pci_shutdown(void)
 {
        const struct jailhouse_pci_capability *cap;
@@ -704,11 +828,18 @@ void pci_shutdown(void)
        if (!root_cell.pci_devices)
                return;
 
-       for_each_configured_pci_device(device, &root_cell)
-               if (device->cell)
-                       for_each_pci_cap(cap, device, n)
-                               if (cap->id == PCI_CAP_MSI)
-                                       pci_restore_msi(device, cap);
-                               else if (cap->id == PCI_CAP_MSIX)
-                                       pci_restore_msix(device, cap);
+       for_each_configured_pci_device(device, &root_cell) {
+               if (!device->cell)
+                       continue;
+
+               for_each_pci_cap(cap, device, n)
+                       if (cap->id == PCI_CAP_MSI)
+                               pci_restore_msi(device, cap);
+                       else if (cap->id == PCI_CAP_MSIX)
+                               pci_restore_msix(device, cap);
+
+               if (device->cell != &root_cell)
+                       pci_write_config(device->info->bdf, PCI_CFG_COMMAND,
+                                        PCI_CMD_INTX_OFF, 2);
+       }
 }