]> rtime.felk.cvut.cz Git - jailhouse.git/blobdiff - tools/jailhouse-config-create
jailhouse: inmates: bench: Add -R option -- repeats count.
[jailhouse.git] / tools / jailhouse-config-create
index db55c91c19c949107ed2504c00f7f66aba03c341..f0d65ed014032c9a0c032524d5daae0ad020374f 100755 (executable)
@@ -2,7 +2,13 @@
 #
 # Jailhouse, a Linux-based partitioning hypervisor
 #
-# Copyright (c) Siemens AG, 2014
+# Copyright (c) Siemens AG, 2014-2016
+# Copyright (c) Valentine Sinitsyn, 2014-2015
+#
+# Authors:
+#  Henning Schild <henning.schild@siemens.com>
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#  Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
 #
 # This work is licensed under the terms of the GNU GPL, version 2.  See
 # the COPYING file in the top-level directory.
@@ -18,8 +24,18 @@ import os
 import re
 import argparse
 import struct
+import fnmatch
 from mako.template import Template
 
+datadir = None
+
+if datadir:
+    template_default_dir = datadir + "/jailhouse"
+else:
+    template_default_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
+
+cpuvendor = None
+
 # pretend to be part of the jailhouse tool
 sys.argv[0] = sys.argv[0].replace('-', ' ')
 
@@ -34,6 +50,12 @@ parser.add_argument('-r', '--root',
                     default='/',
                     action='store',
                     type=str)
+parser.add_argument('-t', '--template-dir',
+                    help='the directory where the templates are located,'
+                         'the default is "' + template_default_dir + '"',
+                    default=template_default_dir,
+                    action='store',
+                    type=str)
 
 memargs = [['--mem-inmates', '2M', 'inmate'],
            ['--mem-hv', '64M', 'hypervisor']]
@@ -53,15 +75,124 @@ parser.add_argument('file', metavar='FILE',
 
 options = parser.parse_args()
 
-inputs = {'files': set(), 'files_opt': set(), 'dirs': set()}
+inputs = {
+    'files': set(),
+    'files_opt': set(),
+    'files_intel': set(),
+    'files_amd': set()
+}
+
+# required files
+inputs['files'].add('/proc/iomem')
+inputs['files'].add('/proc/cpuinfo')
+inputs['files'].add('/proc/cmdline')
+inputs['files'].add('/proc/ioports')
+inputs['files'].add('/sys/bus/pci/devices/*/config')
+inputs['files'].add('/sys/bus/pci/devices/*/resource')
+inputs['files'].add('/sys/devices/system/cpu/cpu*/uevent')
+inputs['files'].add('/sys/firmware/acpi/tables/APIC')
+inputs['files'].add('/sys/firmware/acpi/tables/MCFG')
+# optional files
+inputs['files_opt'].add('/sys/class/dmi/id/product_name')
+inputs['files_opt'].add('/sys/class/dmi/id/sys_vendor')
+inputs['files_opt'].add('/sys/devices/jailhouse/enabled')
+# platform specific files
+inputs['files_intel'].add('/sys/firmware/acpi/tables/DMAR')
+inputs['files_amd'].add('/sys/firmware/acpi/tables/IVRS')
+
+
+def kmg_multiply(value, kmg):
+    if (kmg == 'K' or kmg == 'k'):
+        return 1024 * value
+    if (kmg == 'M' or kmg == 'm'):
+        return 1024**2 * value
+    if (kmg == 'G' or kmg == 'g'):
+        return 1024**3 * value
+    return value
+
+
+def kmg_multiply_str(str):
+    m = re.match(r'([0-9a-fA-FxX]+)([KMG]?)', str)
+    if m is not None:
+        return kmg_multiply(int(m.group(1)), m.group(2))
+    raise RuntimeError('kmg_multiply_str can not parse input "' + str + '"')
+
+
+def check_input_listed(name, optional=False):
+    set = inputs['files_opt']
+    if optional is False:
+        set = inputs['files']
+        global cpuvendor
+        if cpuvendor == 'GenuineIntel':
+            set = set.union(inputs['files_intel'])
+        elif cpuvendor == 'AuthenticAMD':
+            set = set.union(inputs['files_amd'])
+
+    for file in set:
+        if fnmatch.fnmatch(name, file):
+            return True
+    raise RuntimeError('"' + name + '" is not a listed input file')
+
+
+def input_open(name, mode='r', optional=False):
+    check_input_listed(name, optional)
+    try:
+        f = open(options.root + name, mode)
+    except Exception as e:
+        if optional:
+            return open("/dev/null", mode)
+        raise e
+    return f
+
+
+def input_readline(name, optional=False):
+    f = input_open(name, optional=optional)
+    line = f.readline()
+    f.close()
+    return line
+
+
+def input_listdir(dir, wildcards):
+    for w in wildcards:
+        check_input_listed(os.path.join(dir, w))
+    dirs = os.listdir(options.root + dir)
+    dirs.sort()
+    return dirs
+
+
+class PCIBARs:
+    IORESOURCE_IO = 0x00000100
+    IORESOURCE_MEM = 0x00000200
+    IORESOURCE_MEM_64 = 0x00100000
+
+    def __init__(self, dir):
+        self.mask = []
+        f = input_open(os.path.join(dir, 'resource'), 'r')
+        for n in range(6):
+            (start, end, flags) = f.readline().split()
+            flags = int(flags, 16)
+            if flags & PCIBARs.IORESOURCE_IO:
+                mask = ~(int(end, 16) - int(start, 16))
+            elif flags & PCIBARs.IORESOURCE_MEM:
+                mask = ~(int(end, 16) - int(start, 16))
+                if flags & PCIBARs.IORESOURCE_MEM_64:
+                    self.mask.append(mask & 0xffffffff)
+                    mask >>= 32
+                    n += 1
+            else:
+                mask = 0
+            self.mask.append(mask & 0xffffffff)
+        f.close()
 
 
 class PCICapability:
-    def __init__(self, id, start, len, flags):
+    def __init__(self, id, start, len, flags, content, msix_address):
         self.id = id
         self.start = start
         self.len = len
         self.flags = flags
+        self.content = content
+        self.msix_address = msix_address
         self.comments = []
 
     def __eq__(self, other):
@@ -71,10 +202,13 @@ class PCICapability:
     RD = '0'
     RW = 'JAILHOUSE_PCICAPS_WRITE'
 
+    JAILHOUSE_PCI_EXT_CAP = 0x8000
+
     @staticmethod
     def parse_pcicaps(dir):
         caps = []
-        f = input_open(dir + '/config', 'rb')
+        has_extended_caps = False
+        f = input_open(os.path.join(dir, 'config'), 'rb')
         f.seek(0x06)
         (status,) = struct.unpack('<H', f.read(2))
         # capability list supported?
@@ -86,6 +220,7 @@ class PCICapability:
         (next,) = struct.unpack('B', f.read(1))
         while next != 0:
             cap = next
+            msix_address = 0
             f.seek(cap)
             (id, next) = struct.unpack('<BB', f.read(2))
             if id == 0x01:  # Power Management
@@ -101,28 +236,99 @@ class PCICapability:
                 if (msgctl & (1 << 8)) != 0:  # per-vector masking support
                     len += 10
                 flags = PCICapability.RW
+            elif id == 0x10:  # Express
+                len = 20
+                (cap_reg,) = struct.unpack('<H', f.read(2))
+                if (cap_reg & 0xf) >= 2:  # v2 capability
+                    len = 44
+                # access side effects still need to be analyzed
+                flags = PCICapability.RD
+                has_extended_caps = True
             elif id == 0x11:  # MSI-X
                 # access will be moderated by hypervisor
                 len = 12
+                (table,) = struct.unpack('<xxI', f.read(6))
+                f.seek(0x10 + (table & 7) * 4)
+                (bar,) = struct.unpack('<I', f.read(4))
+                if (bar & 0x3) != 0:
+                    raise RuntimeError('Invalid MSI-X BAR found')
+                if (bar & 0x4) != 0:
+                    bar |= struct.unpack('<I', f.read(4))[0] << 32
+                msix_address = (bar & 0xfffffffffffffff0) + table & 0xfffffff8
                 flags = PCICapability.RW
             else:
                 # unknown/unhandled cap, mark its existence
                 len = 2
                 flags = PCICapability.RD
-            caps.append(PCICapability(id, cap, len, flags))
+            f.seek(cap + 2)
+            content = f.read(len - 2)
+            caps.append(PCICapability(id, cap, len, flags, content,
+                                      msix_address))
+
+        if has_extended_caps:
+            # walk extended capability list
+            next = 0x100
+            while next != 0:
+                cap = next
+                f.seek(cap)
+                (id, version_next) = struct.unpack('<HH', f.read(4))
+                next = version_next >> 4
+                if id == 0xffff:
+                    break
+                elif id == 0x0010:  # SR-IOV
+                    len = 64
+                    # access side effects still need to be analyzed
+                    flags = PCICapability.RD
+                else:
+                    if (id & PCICapability.JAILHOUSE_PCI_EXT_CAP) != 0:
+                        print('WARNING: Ignoring unsupported PCI Express '
+                              'Extended Capability ID %x' % id)
+                        continue
+                    # unknown/unhandled cap, mark its existence
+                    len = 4
+                    flags = PCICapability.RD
+                f.seek(cap + 4)
+                content = f.read(len - 4)
+                id |= PCICapability.JAILHOUSE_PCI_EXT_CAP
+                caps.append(PCICapability(id, cap, len, flags, content, 0))
+
+        f.close()
         return caps
 
 
 class PCIDevice:
-    def __init__(self, type, domain, bus, dev, fn, caps):
+    def __init__(self, type, domain, bus, dev, fn, bars, caps, path):
         self.type = type
+        self.iommu = None
         self.domain = domain
         self.bus = bus
         self.dev = dev
         self.fn = fn
+        self.bars = bars
         self.caps = caps
+        self.path = path
         self.caps_start = 0
         self.num_caps = len(caps)
+        self.num_msi_vectors = 0
+        self.msi_64bits = 0
+        self.num_msix_vectors = 0
+        self.msix_region_size = 0
+        self.msix_address = 0
+        for c in caps:
+            if c.id in (0x05, 0x11):
+                msg_ctrl = struct.unpack('<H', c.content[:2])[0]
+                if c.id == 0x05:  # MSI
+                    self.num_msi_vectors = 1 << ((msg_ctrl >> 1) & 0x7)
+                    self.msi_64bits = (msg_ctrl >> 7) & 1
+                else:  # MSI-X
+                    if c.msix_address != 0:
+                        vectors = (msg_ctrl & 0x7ff) + 1
+                        self.num_msix_vectors = vectors
+                        self.msix_region_size = (vectors * 16 + 0xfff) & 0xf000
+                        self.msix_address = c.msix_address
+                    else:
+                        print('WARNING: Ignoring invalid MSI-X configuration'
+                              ' of device %02x:%02x.%x' % (bus, dev, fn))
 
     def __str__(self):
         return 'PCIDevice: %02x:%02x.%x' % (self.bus, self.dev, self.fn)
@@ -132,9 +338,16 @@ class PCIDevice:
 
     @staticmethod
     def parse_pcidevice_sysfsdir(basedir, dir):
-        dpath = basedir + '/' + dir
-        dclass = input_readline(dpath + '/class')
-        if re.match(r'0x0604..', dclass):
+        dpath = os.path.join(basedir, dir)
+        f = input_open(os.path.join(dpath, 'config'), 'rb')
+        (vendor_device,) = struct.unpack('<I', f.read(4))
+        if vendor_device == 0xffffffff:
+            print('WARNING: Ignoring apparently disabled PCI device %s' % dir)
+            return None
+        f.seek(0x0A)
+        (classcode,) = struct.unpack('<H', f.read(2))
+        f.close()
+        if classcode == 0x0604:
             type = 'JAILHOUSE_PCI_TYPE_BRIDGE'
         else:
             type = 'JAILHOUSE_PCI_TYPE_DEVICE'
@@ -142,17 +355,29 @@ class PCIDevice:
         domain = int(a[0], 16)
         bus = int(a[1], 16)
         df = a[2].split('.')
+        bars = PCIBARs(dpath)
         caps = PCICapability.parse_pcicaps(dpath)
         return PCIDevice(type, domain, bus, int(df[0], 16), int(df[1], 16),
-                         caps)
+                         bars, caps, dpath)
+
+
+class PCIPCIBridge(PCIDevice):
+    @staticmethod
+    def get_2nd_busses(dev):
+        assert dev.type == 'JAILHOUSE_PCI_TYPE_BRIDGE'
+        f = input_open(os.path.join(dev.path, 'config'), 'rb')
+        f.seek(0x19)
+        (secondbus, subordinate) = struct.unpack('<BB', f.read(2))
+        f.close()
+        return (secondbus, subordinate)
 
 
 class MemRegion:
-    def __init__(self, start, stop, typestr, comments=[]):
+    def __init__(self, start, stop, typestr, comments=None):
         self.start = start
         self.stop = stop
         self.typestr = typestr
-        self.comments = comments
+        self.comments = comments or []
 
     def __str__(self):
         return 'MemRegion: %08x-%08x : %s' % \
@@ -163,70 +388,244 @@ class MemRegion:
         return int((self.stop - self.start + 0xfff) / 0x1000) * 0x1000
 
     def flagstr(self, p=''):
-        if (
-            self.typestr == 'ACPI Tables' or
-            self.typestr == 'ACPI Non-volatile Storage'
-        ):
-            return 'JAILHOUSE_MEM_READ'
         if (
             self.typestr == 'System RAM' or
+            self.typestr == 'Kernel' or
             self.typestr == 'RAM buffer' or
-            self.typestr == 'ACPI DMAR RMRR'
+            self.typestr == 'ACPI DMAR RMRR' or
+            self.typestr == 'ACPI IVRS'
         ):
             s = 'JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |\n'
             s += p + '\t\tJAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA'
             return s
         return 'JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE'
 
-    @staticmethod
-    # return the first region with the given typestr
-    def find_region(regions, typestr):
-        for r in regions:
-            if (r.typestr == typestr):
-                return r
-        return None
+
+class IOAPIC:
+    def __init__(self, id, address, gsi_base, iommu=0, bdf=0):
+        self.id = id
+        self.address = address
+        self.gsi_base = gsi_base
+        self.iommu = iommu
+        self.bdf = bdf
+
+    def __str__(self):
+        return 'IOAPIC %d, GSI base %d' % (self.id, self.gsi_base)
+
+    def irqchip_id(self):
+        # encode the IOMMU number into the irqchip ID
+        return (self.iommu << 16) | self.bdf
+
+
+class IOMemRegionTree:
+    def __init__(self, region, level):
+        self.region = region
+        self.level = level
+        self.parent = None
+        self.children = []
+
+    def __str__(self):
+        s = ''
+        if (self.region):
+            s = (' ' * (self.level - 1)) + str(self.region)
+            if self.parent and self.parent.region:
+                s += ' --> ' + self.parent.region.typestr
+            s += '\n'
+        for c in self.children:
+            s += str(c)
+        return s
+
+    def regions_split_by_kernel(self):
+        kernel = [x for x in self.children if
+                  x.region.typestr.startswith('Kernel ')]
+
+        if (len(kernel) == 0):
+            return [self.region]
+
+        r = self.region
+        s = r.typestr
+
+        kernel_start = kernel[0].region.start
+        kernel_stop = kernel[len(kernel) - 1].region.stop
+
+        # align this for 16M, but only if we have enough space
+        kernel_stop = (kernel_stop & ~0xFFFFFF) + 0xFFFFFF
+        if (kernel_stop > r.stop):
+            kernel_stop = r.stop
+
+        before_kernel = None
+        after_kernel = None
+
+        # before Kernel if any
+        if (r.start < kernel_start):
+            before_kernel = MemRegion(r.start, kernel_start - 1, s)
+
+        kernel_region = MemRegion(kernel_start, kernel_stop, "Kernel")
+
+        # after Kernel if any
+        if (r.stop > kernel_stop):
+            after_kernel = MemRegion(kernel_stop + 1, r.stop, s)
+
+        return [before_kernel, kernel_region, after_kernel]
 
     @staticmethod
     def parse_iomem_line(line):
         a = line.split(':', 1)
-        # HPET may be part of in reserved region
-        if a[0].startswith(' ') and a[1].find("HPET") < 0:
-            return None
+        level = int(a[0].count(' ') / 2) + 1
         region = a[0].split('-', 1)
         a[1] = a[1].strip()
-        return MemRegion(int(region[0], 16), int(region[1], 16), a[1])
+        return level, MemRegion(int(region[0], 16), int(region[1], 16), a[1])
 
+    @staticmethod
+    def parse_iomem_file():
+        root = IOMemRegionTree(None, 0)
+        f = input_open('/proc/iomem')
+        lastlevel = 0
+        lastnode = root
+        for line in f:
+            (level, r) = IOMemRegionTree.parse_iomem_line(line)
+            t = IOMemRegionTree(r, level)
+            if (t.level > lastlevel):
+                t.parent = lastnode
+            if (t.level == lastlevel):
+                t.parent = lastnode.parent
+            if (t.level < lastlevel):
+                p = lastnode.parent
+                while(t.level < p.level):
+                    p = p.parent
+                t.parent = p.parent
+
+            t.parent.children.append(t)
+            lastnode = t
+            lastlevel = t.level
+        f.close()
+
+        return root
+
+    # find HPET regions in tree
+    @staticmethod
+    def find_hpet_regions(tree):
+        regions = []
 
-def parse_iomem():
-    regions = []
-    f = input_open('/proc/iomem')
-    for line in f:
-        r = MemRegion.parse_iomem_line(line)
-        ## XXX what else to ignore??
-        if (
-            r is not None and
-            r.typestr != 'Local APIC' and
-            r.typestr != 'reserved'
-        ):
+        for tree in tree.children:
+            r = tree.region
+            s = r.typestr
+
+            if (s.find('HPET') >= 0):
+                regions.append(r)
+
+            # if the tree continues recurse further down ...
+            if (len(tree.children) > 0):
+                regions.extend(IOMemRegionTree.find_hpet_regions(tree))
+
+        return regions
+
+    # recurse down the tree
+    @staticmethod
+    def parse_iomem_tree(tree):
+        regions = []
+
+        for tree in tree.children:
+            r = tree.region
+            s = r.typestr
+
+            # System RAM on the first level will be added completely,
+            # if they don't contain the kernel itself, if they do,
+            # we split them
+            if (tree.level == 1 and s == 'System RAM'):
+                regions.extend(tree.regions_split_by_kernel())
+                continue
+
+            # blacklisted on all levels
+            if (
+                (s.find('PCI MMCONFIG') >= 0) or
+                (s.find('APIC') >= 0)  # covers both APIC and IOAPIC
+            ):
+                continue
+
+            # generally blacklisted, unless we find an HPET behind it
+            if (s == 'reserved'):
+                regions.extend(IOMemRegionTree.find_hpet_regions(tree))
+                continue
+
+            # if the tree continues recurse further down ...
+            if (len(tree.children) > 0):
+                regions.extend(IOMemRegionTree.parse_iomem_tree(tree))
+                continue
+
+            # add all remaining leaves
             regions.append(r)
-    f.close()
+
+        return regions
+
+
+class IOMMUConfig(object):
+    def __init__(self, props):
+        self.base_addr = props['base_addr']
+        self.mmio_size = props['mmio_size']
+        if 'amd_bdf' in props:
+            self.amd_bdf = props['amd_bdf']
+            self.amd_base_cap = props['amd_base_cap']
+            self.amd_msi_cap = props['amd_msi_cap']
+            self.amd_features = props['amd_features']
+
+    @property
+    def is_amd_iommu(self):
+        return hasattr(self, 'amd_bdf')
+
+
+def parse_iomem(pcidevices):
+    regions = IOMemRegionTree.parse_iomem_tree(
+        IOMemRegionTree.parse_iomem_file())
+
+    rom_region = MemRegion(0xc0000, 0xdffff, 'ROMs')
+    add_rom_region = False
+
+    ret = []
+    dmar_regions = []
+    for r in regions:
+        append_r = True
+        # filter the list for MSI-X pages
+        for d in pcidevices:
+            if d.msix_address >= r.start and d.msix_address <= r.stop:
+                if d.msix_address > r.start:
+                    head_r = MemRegion(r.start, d.msix_address - 1,
+                                       r.typestr, r.comments)
+                    ret.append(head_r)
+                if d.msix_address + d.msix_region_size < r.stop:
+                    tail_r = MemRegion(d.msix_address + d.msix_region_size,
+                                       r.stop, r.typestr, r.comments)
+                    ret.append(tail_r)
+                append_r = False
+                break
+        # filter out the ROMs
+        if (r.start >= rom_region.start and r.stop <= rom_region.stop):
+            add_rom_region = True
+            append_r = False
+        # filter out and save DMAR regions
+        if r.typestr.find('dmar') >= 0:
+            dmar_regions.append(r)
+            append_r = False
+        if append_r:
+            ret.append(r)
+
+    # add a region that covers all potential ROMs
+    if add_rom_region:
+        ret.append(rom_region)
 
     # newer Linux kernels will report the first page as reserved
     # it is needed for CPU init so include it anyways
-    if (
-        regions[0].typestr == 'System RAM' and
-        regions[0].start == 0x1000
-    ):
-        regions[0].start = 0
+    if (ret[0].typestr == 'System RAM' and ret[0].start == 0x1000):
+        ret[0].start = 0
 
-    return regions
+    return ret, dmar_regions
 
 
 def parse_pcidevices():
     devices = []
     caps = []
     basedir = '/sys/bus/pci/devices'
-    list = input_listdir(basedir, ['*/class', '*/config'])
+    list = input_listdir(basedir, ['*/config'])
     for dir in list:
         d = PCIDevice.parse_pcidevice_sysfsdir(basedir, dir)
         if d is not None:
@@ -248,53 +647,7 @@ def parse_pcidevices():
     return (devices, caps)
 
 
-def kmg_multiply(value, kmg):
-    if (kmg == 'K' or kmg == 'k'):
-        return 1024 * value
-    if (kmg == 'M' or kmg == 'm'):
-        return 1024**2 * value
-    if (kmg == 'G' or kmg == 'g'):
-        return 1024**3 * value
-    return value
-
-
-def kmg_multiply_str(str):
-    m = re.match(r'([0-9a-fA-FxX]+)([KMG]?)', str)
-    if m is not None:
-        return kmg_multiply(int(m.group(1)), m.group(2))
-    raise RuntimeError('kmg_multiply_str can not parse input "' + str + '"')
-    return 0
-
-
-def input_open(name, mode='r', optional=False):
-    inputs['files_opt' if optional else 'files'].add(name)
-    try:
-        f = open(options.root + name, mode)
-    except Exception as e:
-        if optional or options.generate_collector:
-            return open("/dev/null", mode)
-        raise e
-    return f
-
-
-def input_readline(name, optional=False):
-    f = input_open(name, optional=optional)
-    line = f.readline()
-    f.close()
-    return line
-
-
-def input_listdir(dir, wildcards):
-    for w in wildcards:
-        inputs['dirs'].add(dir + '/' + w)
-    if options.generate_collector:
-        return []
-    dirs = os.listdir(options.root + dir)
-    dirs.sort()
-    return dirs
-
-
-def parse_cmdline():
+def parse_kernel_cmdline():
     line = input_readline('/proc/cmdline')
     m = re.match(r'.*memmap=([0-9a-fA-FxX]+)([KMG]?)\$'
                  '([0-9a-fA-FxX]+)([KMG]?).*',
@@ -307,7 +660,22 @@ def parse_cmdline():
 
 
 def alloc_mem(regions, size):
-    mem = [0, size]
+    mem = [0x3b000000, size]
+    for r in regions:
+        if (
+            r.typestr == 'System RAM' and
+            r.start <= mem[0] and
+            r.stop + 1 >= mem[0] + mem[1]
+        ):
+            if r.start < mem[0]:
+                head_r = MemRegion(r.start, mem[0] - 1, r.typestr, r.comments)
+                regions.insert(regions.index(r), head_r)
+            if r.stop + 1 > mem[0] + mem[1]:
+                tail_r = MemRegion(mem[0] + mem[1], r.stop, r.typestr,
+                                   r.comments)
+                regions.insert(regions.index(r), tail_r)
+            regions.remove(r)
+            return mem
     for r in reversed(regions):
         if (r.typestr == 'System RAM' and r.size() >= mem[1]):
             mem[0] = r.start
@@ -325,28 +693,53 @@ def count_cpus():
     return count
 
 
+def parse_madt():
+    f = input_open('/sys/firmware/acpi/tables/APIC', 'rb')
+    signature = f.read(4)
+    if signature != b'APIC':
+        raise RuntimeError('MADT: incorrect input file format %s' % signature)
+    (length,) = struct.unpack('<I', f.read(4))
+    f.seek(44)
+    length -= 44
+    ioapics = []
+
+    while length > 0:
+        offset = 0
+        (struct_type, struct_len) = struct.unpack('<BB', f.read(2))
+        offset += 2
+        length -= struct_len
+
+        if struct_type == 1:
+            (id, address, gsi_base) = struct.unpack('<BxII', f.read(10))
+            offset += 10
+            ioapics.append(IOAPIC(id, address, gsi_base))
+
+        f.seek(struct_len - offset, os.SEEK_CUR)
+
+    f.close()
+    return ioapics
+
+
 def parse_dmar_devscope(f):
-    offset = 0
-    (scope_type, scope_len, bus, dev, fn) = \
-        struct.unpack('<BBxxxBBB', f.read(8))
-    offset += 8
-    return (offset, scope_type, scope_len, bus, dev, fn)
+    (scope_type, scope_len, id, bus, dev, fn) = \
+        struct.unpack('<BBxxBBBB', f.read(8))
+    if scope_len != 8:
+        raise RuntimeError('Unsupported DMAR Device Scope Structure')
+    return (scope_type, scope_len, id, bus, dev, fn)
 
 
 # parsing of DMAR ACPI Table
 # see Intel VT-d Spec chapter 8
-def parse_dmar():
+def parse_dmar(pcidevices, ioapics, dmar_regions):
     f = input_open('/sys/firmware/acpi/tables/DMAR', 'rb')
     signature = f.read(4)
     if signature != b'DMAR':
-        if options.generate_collector:
-            return 0, []
-        raise RuntimeError('incorrect input file format %s' % signature)
+        raise RuntimeError('DMAR: incorrect input file format %s' % signature)
     (length,) = struct.unpack('<I', f.read(4))
     f.seek(48)
     length -= 48
+    units = []
     regions = []
-    ioapic_id = 0
 
     while length > 0:
         offset = 0
@@ -356,19 +749,65 @@ def parse_dmar():
 
         # DMA Remapping Hardware Unit Definition
         if struct_type == 0:
-            f.seek(16 - offset, os.SEEK_CUR)
+            (flags, segment, base) = struct.unpack('<BxHQ', f.read(12))
+            if segment != 0:
+                raise RuntimeError('We do not support multiple PCI segments')
+            if len(units) >= 8:
+                raise RuntimeError('Too many DMAR units. '
+                                   'Raise JAILHOUSE_MAX_IOMMU_UNITS.')
+            size = 0
+            for r in dmar_regions:
+                if base == r.start:
+                    size = r.size()
+            if size == 0:
+                raise RuntimeError('DMAR region size cannot be identified.\n'
+                                   'Target Linux must run with Intel IOMMU '
+                                   'enabled.')
+            if size > 0x3000:
+                raise RuntimeError('Unexpectedly large DMAR region.')
+            units.append(IOMMUConfig({
+                'base_addr': base,
+                'mmio_size': size
+            }))
+            if flags & 1:
+                for d in pcidevices:
+                    if d.iommu is None:
+                        d.iommu = len(units) - 1
             offset += 16 - offset
             while offset < struct_len:
-                (off, scope_type, scope_len, bus, dev, fn) =\
+                (scope_type, scope_len, id, bus, dev, fn) =\
                     parse_dmar_devscope(f)
-                offset += off
-                if scope_type == 3:
-                    if ioapic_id != 0:
-                        raise RuntimeError('We do not support more '
-                                           'than 1 IOAPIC')
-                    ioapic_id = (bus << 8) | (dev << 3) | fn
-                f.seek(scope_len - 8, os.SEEK_CUR)
-                offset += scope_len - 8
+                # PCI Endpoint Device
+                if scope_type == 1:
+                    assert not (flags & 1)
+                    for d in pcidevices:
+                        if d.bus == bus and d.dev == dev and d.fn == fn:
+                            d.iommu = len(units) - 1
+                            break
+                # PCI Sub-hierarchy
+                elif scope_type == 2:
+                    assert not (flags & 1)
+                    for d in pcidevices:
+                        if d.bus == bus and d.dev == dev and d.fn == fn:
+                            (secondbus, subordinate) = \
+                                PCIPCIBridge.get_2nd_busses(d)
+                            for d2 in pcidevices:
+                                if (
+                                    d2.bus >= secondbus and
+                                    d2.bus <= subordinate
+                                ):
+                                    d2.iommu = len(units) - 1
+                            break
+                # IOAPIC
+                elif scope_type == 3:
+                    ioapic = next(chip for chip in ioapics if chip.id == id)
+                    bdf = (bus << 8) | (dev << 3) | fn
+                    for chip in ioapics:
+                        if chip.bdf == bdf:
+                            raise RuntimeError('IOAPICs with identical BDF')
+                    ioapic.bdf = bdf
+                    ioapic.iommu = len(units) - 1
+                offset += scope_len
 
         # Reserved Memory Region Reporting Structure
         if struct_type == 1:
@@ -379,24 +818,211 @@ def parse_dmar():
 
             comments = []
             while offset < struct_len:
-                (off, scope_type, scope_len, bus, dev, fn) =\
+                (scope_type, scope_len, id, bus, dev, fn) =\
                     parse_dmar_devscope(f)
-                offset += off
-                npath = (scope_len - 6)/2
-                if scope_type == 1 and npath == 1:
+                if scope_type == 1:
                     comments.append('PCI device: %02x:%02x.%x' %
                                     (bus, dev, fn))
                 else:
                     comments.append('DMAR parser could not decode device path')
-                f.seek(scope_len - off, os.SEEK_CUR)
-                offset += scope_len - off
+                offset += scope_len
 
             reg = MemRegion(base, limit, 'ACPI DMAR RMRR', comments)
             regions.append(reg)
 
         f.seek(struct_len - offset, os.SEEK_CUR)
 
-    return ioapic_id, regions
+    f.close()
+
+    for d in pcidevices:
+        if d.iommu is None:
+            raise RuntimeError(
+                'PCI device %02x:%02x.%x outside the scope of an '
+                'IOMMU' % (d.bus, d.dev, d.fn))
+
+    return units, regions
+
+
+def parse_ivrs(pcidevices, ioapics):
+    def format_bdf(bdf):
+        bus, dev, fun = (bdf >> 8) & 0xff, (bdf >> 3) & 0x1f, bdf & 0x7
+        return '%02x:%02x.%x' % (bus, dev, fun)
+
+    f = input_open('/sys/firmware/acpi/tables/IVRS', 'rb')
+    signature = f.read(4)
+    if signature != b'IVRS':
+        raise RuntimeError('IVRS: incorrect input file format %s' % signature)
+
+    (length, revision) = struct.unpack('<IB', f.read(5))
+    if revision > 2:
+        raise RuntimeError('IVRS: unsupported Revision %02x' % revision)
+
+    f.seek(48, os.SEEK_SET)
+    length -= 48
+
+    units = []
+    regions = []
+    # BDF of devices that are permitted outside IOMMU: root complex
+    iommu_skiplist = set([0x0])
+    ivhd_blocks = 0
+    while length > 0:
+        (block_type, block_length) = struct.unpack('<BxH', f.read(4))
+        if block_type in [0x10, 0x11]:
+            ivhd_blocks += 1
+            if ivhd_blocks > 1:
+                raise RuntimeError('Jailhouse doesn\'t support more than one '
+                                   'AMD IOMMU per PCI function.')
+            # IVHD block
+            ivhd_fields = struct.unpack('<HHQHxxL', f.read(20))
+            (iommu_bdf, base_cap_ofs,
+             base_addr, pci_seg, iommu_feat) = ivhd_fields
+
+            length -= block_length
+            block_length -= 24
+
+            if pci_seg != 0:
+                raise RuntimeError('We do not support multiple PCI segments')
+
+            if len(units) > 8:
+                raise RuntimeError('Too many IOMMU units. '
+                                   'Raise JAILHOUSE_MAX_IOMMU_UNITS.')
+
+            msi_cap_ofs = None
+
+            for i, d in enumerate(pcidevices):
+                if d.bdf() == iommu_bdf:
+                    # Extract MSI capability offset
+                    for c in d.caps:
+                        if c.id == 0x05:
+                            msi_cap_ofs = c.start
+                    # We must not map IOMMU to the cells
+                    del pcidevices[i]
+
+            if msi_cap_ofs is None:
+                raise RuntimeError('AMD IOMMU lacks MSI support, and '
+                                   'Jailhouse doesn\'t support MSI-X yet.')
+
+            if (iommu_feat & (0xF << 13)) and (iommu_feat & (0x3F << 17)):
+                # Performance Counters are supported, allocate 512K
+                mmio_size = 524288
+            else:
+                # Allocate 16K
+                mmio_size = 16384
+
+            units.append(IOMMUConfig({
+                'base_addr': base_addr,
+                'mmio_size': mmio_size,
+                'amd_bdf': iommu_bdf,
+                'amd_base_cap': base_cap_ofs,
+                'amd_msi_cap': msi_cap_ofs,
+                # IVHD block type 0x11 has exact EFR copy but type 0x10 may
+                # overwrite what hardware reports. Set reserved bit 0 in that
+                # case to indicate that the value is in use.
+                'amd_features': (iommu_feat | 0x1) if block_type == 0x10 else 0
+            }))
+
+            bdf_start_range = None
+            while block_length > 0:
+                (entry_type, device_id) = struct.unpack('<BHx', f.read(4))
+                block_length -= 4
+
+                if entry_type == 0x01:
+                    # All
+                    for d in pcidevices:
+                        d.iommu = len(units) - 1
+                elif entry_type == 0x02:
+                    # Select
+                    for d in pcidevices:
+                        if d.bdf() == device_id:
+                            d.iommu = len(units) - 1
+                elif entry_type == 0x03:
+                    # Start of range
+                    bdf_start_range = device_id
+                elif entry_type == 0x04:
+                    # End of range
+                    if bdf_start_range is None:
+                        continue
+                    for d in pcidevices:
+                        if d.bdf() >= bdf_start_range and d.bdf() <= device_id:
+                            d.iommu = len(units) - 1
+                    bdf_start_range = None
+                elif entry_type == 0x42:
+                    # Alias select
+                    (device_id_b,) = struct.unpack('<xHx', f.read(4))
+                    block_length -= 4
+                    for d in pcidevices:
+                        if d.bdf() == device_id_b:
+                            d.iommu = len(units) - 1
+                elif entry_type == 0x43:
+                    # Alias start of range
+                    (device_id_b,) = struct.unpack('<xHx', f.read(4))
+                    block_length -= 4
+                    bdf_start_range = device_id_b
+                elif entry_type == 0x48:
+                    # Special device
+                    (handle, device_id_b, variety) = struct.unpack(
+                        '<BHB', f.read(4))
+                    block_length -= 4
+                    if variety == 0x01:  # IOAPIC
+                        for chip in ioapics:
+                            if chip.id == handle:
+                                chip.bdf = device_id
+                                chip.iommu = len(units) - 1
+                else:
+                    # Reserved or ignored entries
+                    if entry_type >= 0x40:
+                        f.seek(4, os.SEEK_CUR)
+                        block_length -= 4
+
+        elif type in [0x20, 0x21, 0x22]:
+            # IVMD block
+            ivmd_fields = struct.unpack('<BBHHHxxxxxxxxQQ', f.read(32))
+            (block_type, block_flags, block_length,
+             device_id, aux_data, mem_addr, mem_len) = ivmd_fields
+            length -= block_length
+
+            if int(block_flags):
+                bdf_str = format_bdf(device_id)
+                print(
+                    'WARNING: Jailhouse doesn\'t support configurable '
+                    '(eg. read-only) device memory. Device %s may not '
+                    'work properly, especially in non-root cell.' % bdf_str)
+
+            if block_type == 0x20:
+                # All devices
+                comment = None
+            elif block_type == 0x21:
+                # Selected device
+                comment = 'PCI Device: %s' % format_bdf(device_id)
+            elif block_type == 0x22:
+                # Device range
+                comment = 'PCI Device: %s - %s' % (
+                    format_bdf(device_id), format_bdf(aux_data))
+
+            if comment:
+                print('WARNING: Jailhouse doesn\'t support per-device memory '
+                      'regions. The memory at 0x%x will be mapped accessible '
+                      'to all devices.' % mem_addr)
+
+            regions.append(MemRegion(mem_addr, mem_len, 'ACPI IVRS', comment))
+        elif type == 0x40:
+            raise RuntimeError(
+                'You board uses IVRS Rev. 2 feature Jailhouse doesn\'t '
+                'support yet. Please report this to '
+                'jailhouse-dev@googlegroups.com.')
+        else:
+            print(
+                'WARNING: Skipping unknown IVRS '
+                'block type 0x%02x' % block_type)
+
+        for d in pcidevices:
+            if d.bdf() not in iommu_skiplist and d.iommu is None:
+                raise RuntimeError(
+                    'PCI device %02x:%02x.%x outside the scope of an '
+                    'IOMMU' % (d.bus, d.dev, d.fn))
+
+        f.close()
+        return units, regions
 
 
 def parse_ioports():
@@ -410,6 +1036,69 @@ def parse_ioports():
     return pm_timer_base
 
 
+class MMConfig:
+    def __init__(self, base, end_bus):
+        self.base = base
+        self.end_bus = end_bus
+
+    @staticmethod
+    def parse():
+        f = input_open('/sys/firmware/acpi/tables/MCFG', 'rb')
+        signature = f.read(4)
+        if signature != b'MCFG':
+            raise RuntimeError('MCFG: incorrect input file format %s' %
+                               signature)
+        (length,) = struct.unpack('<I', f.read(4))
+        if length > 60:
+            raise RuntimeError('Multiple MMCONFIG regions found! '
+                               'This is not supported')
+        f.seek(44)
+        (base, segment, start_bus, end_bus) = \
+            struct.unpack('<QHBB', f.read(12))
+        if segment != 0 or start_bus != 0:
+            raise RuntimeError('Invalid MCFG structure found')
+        return MMConfig(base, end_bus)
+
+
+def get_cpu_vendor():
+    global cpuvendor
+    if cpuvendor is not None:
+        return cpuvendor
+    with input_open('/proc/cpuinfo', 'r') as f:
+        for line in f:
+            if not line.strip():
+                continue
+            key, value = line.split(':')
+            if key.strip() == 'vendor_id':
+                cpuvendor = value.strip()
+                return cpuvendor
+
+
+if options.generate_collector:
+    f = open(options.file, 'w')
+    filelist = ' '.join(inputs['files'])
+    filelist_opt = ' '.join(inputs['files_opt'])
+    filelist_intel = ' '.join(inputs['files_intel'])
+    filelist_amd = ' '.join(inputs['files_amd'])
+
+    tmpl = Template(filename=os.path.join(options.template_dir,
+                                          'jailhouse-config-collect.tmpl'))
+    f.write(tmpl.render(filelist=filelist, filelist_opt=filelist_opt,
+            filelist_intel=filelist_intel, filelist_amd=filelist_amd))
+    f.close()
+    sys.exit(0)
+
+if ((options.root is '/') and (os.geteuid() is not 0)):
+    print('ERROR: You have to be root to work on "/"!', file=sys.stderr)
+    sys.exit(1)
+
+jh_enabled = input_readline('/sys/devices/jailhouse/enabled', True).rstrip()
+if jh_enabled == '1':
+    print('ERROR: Jailhouse was enabled when collecting input files! '
+          'Disable jailhouse and try again.',
+          file=sys.stderr)
+    sys.exit(1)
+
 (pcidevices, pcicaps) = parse_pcidevices()
 
 product = [input_readline('/sys/class/dmi/id/sys_vendor',
@@ -421,29 +1110,32 @@ product = [input_readline('/sys/class/dmi/id/sys_vendor',
 inmatemem = kmg_multiply_str(options.mem_inmates)
 hvmem = [0, kmg_multiply_str(options.mem_hv)]
 
-regions = parse_iomem()
-ourmem = parse_cmdline()
+(regions, dmar_regions) = parse_iomem(pcidevices)
+ourmem = parse_kernel_cmdline()
 total = hvmem[1] + inmatemem
 
-ioapic_id, rmrr_regs = parse_dmar()
-regions += rmrr_regs
+mmconfig = MMConfig.parse()
+
+ioapics = parse_madt()
+
+vendor = get_cpu_vendor()
+if vendor == 'GenuineIntel':
+    (iommu_units, extra_memregs) = parse_dmar(pcidevices, ioapics,
+                                              dmar_regions)
+else:
+    (iommu_units, extra_memregs) = parse_ivrs(pcidevices, ioapics)
+regions += extra_memregs
 
 # kernel does not have memmap region, pick one
 if ourmem is None:
     ourmem = alloc_mem(regions, total)
 elif (total > ourmem[1]):
     raise RuntimeError('Your memmap reservation is too small you need >="' +
-                       hex(total) + '"')
+                       hex(total) + '". Hint: your kernel cmd line needs '
+                       '"memmap=' + hex(total) + '$' + hex(ourmem[0]) + '"')
 
 hvmem[0] = ourmem[0]
 
-creg = MemRegion.find_region(regions, 'ACPI Tables')
-if creg is not None:
-    confmem = [creg.start, creg.size()]
-else:
-    print('WARNING: Could not find "ACPI Tables" memory! '
-          'You need to set it manually.', file=sys.stderr)
-    confmem = [0, 0]
 inmatereg = MemRegion(ourmem[0] + hvmem[1],
                       ourmem[0] + hvmem[1] + inmatemem - 1,
                       'JAILHOUSE Inmate Memory')
@@ -453,34 +1145,25 @@ cpucount = count_cpus()
 
 pm_timer_base = parse_ioports()
 
-jh_enabled = input_readline('/sys/devices/jailhouse/enabled',
-                            True).rstrip()
-if options.generate_collector is False and jh_enabled == '1':
-    print('ERROR: Jailhouse was enabled when collecting input files! '
-          'Disable jailhouse and try again.',
-          file=sys.stderr)
-    sys.exit(1)
 
 f = open(options.file, 'w')
-
-if options.generate_collector:
-    filelist = ' '.join(inputs['files'].union(inputs['dirs']))
-    filelist_opt = ' '.join(inputs['files_opt'])
-
-    tmpl = Template(filename='jailhouse-config-collect.tmpl')
-    f.write(tmpl.render(filelist=filelist, filelist_opt=filelist_opt))
-else:
-    tmpl = Template(filename='root-cell-config.c.tmpl')
-    f.write(tmpl.render(regions=regions,
-                        ourmem=ourmem,
-                        argstr=' '.join(sys.argv),
-                        hvmem=hvmem,
-                        confmem=confmem,
-                        product=product,
-                        pcidevices=pcidevices,
-                        pcicaps=pcicaps,
-                        cpucount=cpucount,
-                        ioapic_id=ioapic_id,
-                        pm_timer_base=pm_timer_base))
+tmpl = Template(filename=os.path.join(options.template_dir,
+                                      'root-cell-config.c.tmpl'))
+kwargs = {
+    'regions': regions,
+    'ourmem': ourmem,
+    'argstr': ' '.join(sys.argv),
+    'hvmem': hvmem,
+    'product': product,
+    'pcidevices': pcidevices,
+    'pcicaps': pcicaps,
+    'cpucount': cpucount,
+    'irqchips': ioapics,
+    'pm_timer_base': pm_timer_base,
+    'mmconfig': mmconfig,
+    'iommu_units': iommu_units
+}
+
+f.write(tmpl.render(**kwargs))
 
 f.close()