]> rtime.felk.cvut.cz Git - jailhouse.git/blobdiff - tools/jailhouse-config-create
jailhouse: inmates: bench: Add -R option -- repeats count.
[jailhouse.git] / tools / jailhouse-config-create
index c4b74b837962773a050caa13ccc203ec1fb606d5..f0d65ed014032c9a0c032524d5daae0ad020374f 100755 (executable)
@@ -2,7 +2,13 @@
 #
 # Jailhouse, a Linux-based partitioning hypervisor
 #
-# Copyright (c) Siemens AG, 2014
+# Copyright (c) Siemens AG, 2014-2016
+# Copyright (c) Valentine Sinitsyn, 2014-2015
+#
+# Authors:
+#  Henning Schild <henning.schild@siemens.com>
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#  Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
 #
 # This work is licensed under the terms of the GNU GPL, version 2.  See
 # the COPYING file in the top-level directory.
@@ -82,6 +88,7 @@ inputs['files'].add('/proc/cpuinfo')
 inputs['files'].add('/proc/cmdline')
 inputs['files'].add('/proc/ioports')
 inputs['files'].add('/sys/bus/pci/devices/*/config')
+inputs['files'].add('/sys/bus/pci/devices/*/resource')
 inputs['files'].add('/sys/devices/system/cpu/cpu*/uevent')
 inputs['files'].add('/sys/firmware/acpi/tables/APIC')
 inputs['files'].add('/sys/firmware/acpi/tables/MCFG')
@@ -153,6 +160,31 @@ def input_listdir(dir, wildcards):
     return dirs
 
 
+class PCIBARs:
+    IORESOURCE_IO = 0x00000100
+    IORESOURCE_MEM = 0x00000200
+    IORESOURCE_MEM_64 = 0x00100000
+
+    def __init__(self, dir):
+        self.mask = []
+        f = input_open(os.path.join(dir, 'resource'), 'r')
+        for n in range(6):
+            (start, end, flags) = f.readline().split()
+            flags = int(flags, 16)
+            if flags & PCIBARs.IORESOURCE_IO:
+                mask = ~(int(end, 16) - int(start, 16))
+            elif flags & PCIBARs.IORESOURCE_MEM:
+                mask = ~(int(end, 16) - int(start, 16))
+                if flags & PCIBARs.IORESOURCE_MEM_64:
+                    self.mask.append(mask & 0xffffffff)
+                    mask >>= 32
+                    n += 1
+            else:
+                mask = 0
+            self.mask.append(mask & 0xffffffff)
+        f.close()
+
+
 class PCICapability:
     def __init__(self, id, start, len, flags, content, msix_address):
         self.id = id
@@ -170,9 +202,12 @@ class PCICapability:
     RD = '0'
     RW = 'JAILHOUSE_PCICAPS_WRITE'
 
+    JAILHOUSE_PCI_EXT_CAP = 0x8000
+
     @staticmethod
     def parse_pcicaps(dir):
         caps = []
+        has_extended_caps = False
         f = input_open(os.path.join(dir, 'config'), 'rb')
         f.seek(0x06)
         (status,) = struct.unpack('<H', f.read(2))
@@ -201,6 +236,14 @@ class PCICapability:
                 if (msgctl & (1 << 8)) != 0:  # per-vector masking support
                     len += 10
                 flags = PCICapability.RW
+            elif id == 0x10:  # Express
+                len = 20
+                (cap_reg,) = struct.unpack('<H', f.read(2))
+                if (cap_reg & 0xf) >= 2:  # v2 capability
+                    len = 44
+                # access side effects still need to be analyzed
+                flags = PCICapability.RD
+                has_extended_caps = True
             elif id == 0x11:  # MSI-X
                 # access will be moderated by hypervisor
                 len = 12
@@ -221,18 +264,47 @@ class PCICapability:
             content = f.read(len - 2)
             caps.append(PCICapability(id, cap, len, flags, content,
                                       msix_address))
+
+        if has_extended_caps:
+            # walk extended capability list
+            next = 0x100
+            while next != 0:
+                cap = next
+                f.seek(cap)
+                (id, version_next) = struct.unpack('<HH', f.read(4))
+                next = version_next >> 4
+                if id == 0xffff:
+                    break
+                elif id == 0x0010:  # SR-IOV
+                    len = 64
+                    # access side effects still need to be analyzed
+                    flags = PCICapability.RD
+                else:
+                    if (id & PCICapability.JAILHOUSE_PCI_EXT_CAP) != 0:
+                        print('WARNING: Ignoring unsupported PCI Express '
+                              'Extended Capability ID %x' % id)
+                        continue
+                    # unknown/unhandled cap, mark its existence
+                    len = 4
+                    flags = PCICapability.RD
+                f.seek(cap + 4)
+                content = f.read(len - 4)
+                id |= PCICapability.JAILHOUSE_PCI_EXT_CAP
+                caps.append(PCICapability(id, cap, len, flags, content, 0))
+
         f.close()
         return caps
 
 
 class PCIDevice:
-    def __init__(self, type, domain, bus, dev, fn, caps, path):
+    def __init__(self, type, domain, bus, dev, fn, bars, caps, path):
         self.type = type
         self.iommu = None
         self.domain = domain
         self.bus = bus
         self.dev = dev
         self.fn = fn
+        self.bars = bars
         self.caps = caps
         self.path = path
         self.caps_start = 0
@@ -268,6 +340,10 @@ class PCIDevice:
     def parse_pcidevice_sysfsdir(basedir, dir):
         dpath = os.path.join(basedir, dir)
         f = input_open(os.path.join(dpath, 'config'), 'rb')
+        (vendor_device,) = struct.unpack('<I', f.read(4))
+        if vendor_device == 0xffffffff:
+            print('WARNING: Ignoring apparently disabled PCI device %s' % dir)
+            return None
         f.seek(0x0A)
         (classcode,) = struct.unpack('<H', f.read(2))
         f.close()
@@ -279,9 +355,10 @@ class PCIDevice:
         domain = int(a[0], 16)
         bus = int(a[1], 16)
         df = a[2].split('.')
+        bars = PCIBARs(dpath)
         caps = PCICapability.parse_pcicaps(dpath)
         return PCIDevice(type, domain, bus, int(df[0], 16), int(df[1], 16),
-                         caps, dpath)
+                         bars, caps, dpath)
 
 
 class PCIPCIBridge(PCIDevice):
@@ -300,10 +377,7 @@ class MemRegion:
         self.start = start
         self.stop = stop
         self.typestr = typestr
-        if comments is None:
-            self.comments = []
-        else:
-            self.comments = comments
+        self.comments = comments or []
 
     def __str__(self):
         return 'MemRegion: %08x-%08x : %s' % \
@@ -465,8 +539,7 @@ class IOMemRegionTree:
             # blacklisted on all levels
             if (
                 (s.find('PCI MMCONFIG') >= 0) or
-                (s.find('APIC') >= 0) or  # covers both APIC and IOAPIC
-                (s.find('dmar') >= 0)
+                (s.find('APIC') >= 0)  # covers both APIC and IOAPIC
             ):
                 continue
 
@@ -486,6 +559,21 @@ class IOMemRegionTree:
         return regions
 
 
+class IOMMUConfig(object):
+    def __init__(self, props):
+        self.base_addr = props['base_addr']
+        self.mmio_size = props['mmio_size']
+        if 'amd_bdf' in props:
+            self.amd_bdf = props['amd_bdf']
+            self.amd_base_cap = props['amd_base_cap']
+            self.amd_msi_cap = props['amd_msi_cap']
+            self.amd_features = props['amd_features']
+
+    @property
+    def is_amd_iommu(self):
+        return hasattr(self, 'amd_bdf')
+
+
 def parse_iomem(pcidevices):
     regions = IOMemRegionTree.parse_iomem_tree(
         IOMemRegionTree.parse_iomem_file())
@@ -494,6 +582,7 @@ def parse_iomem(pcidevices):
     add_rom_region = False
 
     ret = []
+    dmar_regions = []
     for r in regions:
         append_r = True
         # filter the list for MSI-X pages
@@ -513,6 +602,10 @@ def parse_iomem(pcidevices):
         if (r.start >= rom_region.start and r.stop <= rom_region.stop):
             add_rom_region = True
             append_r = False
+        # filter out and save DMAR regions
+        if r.typestr.find('dmar') >= 0:
+            dmar_regions.append(r)
+            append_r = False
         if append_r:
             ret.append(r)
 
@@ -525,7 +618,7 @@ def parse_iomem(pcidevices):
     if (ret[0].typestr == 'System RAM' and ret[0].start == 0x1000):
         ret[0].start = 0
 
-    return ret
+    return ret, dmar_regions
 
 
 def parse_pcidevices():
@@ -637,7 +730,7 @@ def parse_dmar_devscope(f):
 
 # parsing of DMAR ACPI Table
 # see Intel VT-d Spec chapter 8
-def parse_dmar(pcidevices, ioapics):
+def parse_dmar(pcidevices, ioapics, dmar_regions):
     f = input_open('/sys/firmware/acpi/tables/DMAR', 'rb')
     signature = f.read(4)
     if signature != b'DMAR':
@@ -662,7 +755,20 @@ def parse_dmar(pcidevices, ioapics):
             if len(units) >= 8:
                 raise RuntimeError('Too many DMAR units. '
                                    'Raise JAILHOUSE_MAX_IOMMU_UNITS.')
-            units.append(base)
+            size = 0
+            for r in dmar_regions:
+                if base == r.start:
+                    size = r.size()
+            if size == 0:
+                raise RuntimeError('DMAR region size cannot be identified.\n'
+                                   'Target Linux must run with Intel IOMMU '
+                                   'enabled.')
+            if size > 0x3000:
+                raise RuntimeError('Unexpectedly large DMAR region.')
+            units.append(IOMMUConfig({
+                'base_addr': base,
+                'mmio_size': size
+            }))
             if flags & 1:
                 for d in pcidevices:
                     if d.iommu is None:
@@ -758,19 +864,21 @@ def parse_ivrs(pcidevices, ioapics):
     regions = []
     # BDF of devices that are permitted outside IOMMU: root complex
     iommu_skiplist = set([0x0])
+    ivhd_blocks = 0
     while length > 0:
         (block_type, block_length) = struct.unpack('<BxH', f.read(4))
         if block_type in [0x10, 0x11]:
+            ivhd_blocks += 1
+            if ivhd_blocks > 1:
+                raise RuntimeError('Jailhouse doesn\'t support more than one '
+                                   'AMD IOMMU per PCI function.')
             # IVHD block
-            (iommu_id, base_addr, pci_seg) = \
-                struct.unpack('<HxxQH', f.read(14))
-            length -= block_length
-            block_length -= 18
+            ivhd_fields = struct.unpack('<HHQHxxL', f.read(20))
+            (iommu_bdf, base_cap_ofs,
+             base_addr, pci_seg, iommu_feat) = ivhd_fields
 
-            # IOMMU EFR image and reserved area
-            skip_bytes = 6 if block_type == 0x10 else 22
-            f.seek(skip_bytes, os.SEEK_CUR)
-            block_length -= skip_bytes
+            length -= block_length
+            block_length -= 24
 
             if pci_seg != 0:
                 raise RuntimeError('We do not support multiple PCI segments')
@@ -779,12 +887,39 @@ def parse_ivrs(pcidevices, ioapics):
                 raise RuntimeError('Too many IOMMU units. '
                                    'Raise JAILHOUSE_MAX_IOMMU_UNITS.')
 
-            # We shouldn't map IOMMU to the cells
+            msi_cap_ofs = None
+
             for i, d in enumerate(pcidevices):
-                if d.bdf() == iommu_id:
+                if d.bdf() == iommu_bdf:
+                    # Extract MSI capability offset
+                    for c in d.caps:
+                        if c.id == 0x05:
+                            msi_cap_ofs = c.start
+                    # We must not map IOMMU to the cells
                     del pcidevices[i]
 
-            units.append(base_addr)
+            if msi_cap_ofs is None:
+                raise RuntimeError('AMD IOMMU lacks MSI support, and '
+                                   'Jailhouse doesn\'t support MSI-X yet.')
+
+            if (iommu_feat & (0xF << 13)) and (iommu_feat & (0x3F << 17)):
+                # Performance Counters are supported, allocate 512K
+                mmio_size = 524288
+            else:
+                # Allocate 16K
+                mmio_size = 16384
+
+            units.append(IOMMUConfig({
+                'base_addr': base_addr,
+                'mmio_size': mmio_size,
+                'amd_bdf': iommu_bdf,
+                'amd_base_cap': base_cap_ofs,
+                'amd_msi_cap': msi_cap_ofs,
+                # IVHD block type 0x11 has exact EFR copy but type 0x10 may
+                # overwrite what hardware reports. Set reserved bit 0 in that
+                # case to indicate that the value is in use.
+                'amd_features': (iommu_feat | 0x1) if block_type == 0x10 else 0
+            }))
 
             bdf_start_range = None
             while block_length > 0:
@@ -841,9 +976,9 @@ def parse_ivrs(pcidevices, ioapics):
 
         elif type in [0x20, 0x21, 0x22]:
             # IVMD block
+            ivmd_fields = struct.unpack('<BBHHHxxxxxxxxQQ', f.read(32))
             (block_type, block_flags, block_length,
-             device_id, aux_data, mem_addr, mem_len) = struct.unpack(
-                 '<BBHHHxxxxxxxxQQ')
+             device_id, aux_data, mem_addr, mem_len) = ivmd_fields
             length -= block_length
 
             if int(block_flags):
@@ -975,7 +1110,7 @@ product = [input_readline('/sys/class/dmi/id/sys_vendor',
 inmatemem = kmg_multiply_str(options.mem_inmates)
 hvmem = [0, kmg_multiply_str(options.mem_hv)]
 
-regions = parse_iomem(pcidevices)
+(regions, dmar_regions) = parse_iomem(pcidevices)
 ourmem = parse_kernel_cmdline()
 total = hvmem[1] + inmatemem
 
@@ -985,7 +1120,8 @@ ioapics = parse_madt()
 
 vendor = get_cpu_vendor()
 if vendor == 'GenuineIntel':
-    (iommu_units, extra_memregs) = parse_dmar(pcidevices, ioapics)
+    (iommu_units, extra_memregs) = parse_dmar(pcidevices, ioapics,
+                                              dmar_regions)
 else:
     (iommu_units, extra_memregs) = parse_ivrs(pcidevices, ioapics)
 regions += extra_memregs
@@ -995,7 +1131,8 @@ if ourmem is None:
     ourmem = alloc_mem(regions, total)
 elif (total > ourmem[1]):
     raise RuntimeError('Your memmap reservation is too small you need >="' +
-                       hex(total) + '"')
+                       hex(total) + '". Hint: your kernel cmd line needs '
+                       '"memmap=' + hex(total) + '$' + hex(ourmem[0]) + '"')
 
 hvmem[0] = ourmem[0]