#!/usr/bin/env python # # Jailhouse, a Linux-based partitioning hypervisor # # Copyright (c) Siemens AG, 2014-2016 # Copyright (c) Valentine Sinitsyn, 2014-2015 # # Authors: # Henning Schild # Jan Kiszka # Valentine Sinitsyn # # This work is licensed under the terms of the GNU GPL, version 2. See # the COPYING file in the top-level directory. # # This script should help to create a basic jailhouse configuration file. # It needs to be executed on the target machine, where it will gather # information about the system. For more advanced scenarios you will have # to change the generated C-code. from __future__ import print_function import sys import os import re import argparse import struct import fnmatch from mako.template import Template datadir = None if datadir: template_default_dir = datadir + "/jailhouse" else: template_default_dir = os.path.abspath(os.path.dirname(sys.argv[0])) cpuvendor = None # pretend to be part of the jailhouse tool sys.argv[0] = sys.argv[0].replace('-', ' ') parser = argparse.ArgumentParser() parser.add_argument('-g', '--generate-collector', help='generate a script to collect input files on ' 'a remote machine', action='store_true') parser.add_argument('-r', '--root', help='gather information in ROOT/, the default is "/" ' 'which means creating a config for localhost', default='/', action='store', type=str) parser.add_argument('-t', '--template-dir', help='the directory where the templates are located,' 'the default is "' + template_default_dir + '"', default=template_default_dir, action='store', type=str) memargs = [['--mem-inmates', '2M', 'inmate'], ['--mem-hv', '64M', 'hypervisor']] for entry in memargs: parser.add_argument(entry[0], help='the amount of ' + entry[2] + ' memory, default is "' + entry[1] + '", format "xxx[K|M|G]"', default=entry[1], action='store', type=str) parser.add_argument('file', metavar='FILE', help='name of file to write out', type=str) options = parser.parse_args() inputs = { 'files': set(), 'files_opt': set(), 'files_intel': set(), 'files_amd': set() } # required files inputs['files'].add('/proc/iomem') inputs['files'].add('/proc/cpuinfo') inputs['files'].add('/proc/cmdline') inputs['files'].add('/proc/ioports') inputs['files'].add('/sys/bus/pci/devices/*/config') inputs['files'].add('/sys/bus/pci/devices/*/resource') inputs['files'].add('/sys/devices/system/cpu/cpu*/uevent') inputs['files'].add('/sys/firmware/acpi/tables/APIC') inputs['files'].add('/sys/firmware/acpi/tables/MCFG') # optional files inputs['files_opt'].add('/sys/class/dmi/id/product_name') inputs['files_opt'].add('/sys/class/dmi/id/sys_vendor') inputs['files_opt'].add('/sys/devices/jailhouse/enabled') # platform specific files inputs['files_intel'].add('/sys/firmware/acpi/tables/DMAR') inputs['files_amd'].add('/sys/firmware/acpi/tables/IVRS') def kmg_multiply(value, kmg): if (kmg == 'K' or kmg == 'k'): return 1024 * value if (kmg == 'M' or kmg == 'm'): return 1024**2 * value if (kmg == 'G' or kmg == 'g'): return 1024**3 * value return value def kmg_multiply_str(str): m = re.match(r'([0-9a-fA-FxX]+)([KMG]?)', str) if m is not None: return kmg_multiply(int(m.group(1)), m.group(2)) raise RuntimeError('kmg_multiply_str can not parse input "' + str + '"') def check_input_listed(name, optional=False): set = inputs['files_opt'] if optional is False: set = inputs['files'] global cpuvendor if cpuvendor == 'GenuineIntel': set = set.union(inputs['files_intel']) elif cpuvendor == 'AuthenticAMD': set = set.union(inputs['files_amd']) for file in set: if fnmatch.fnmatch(name, file): return True raise RuntimeError('"' + name + '" is not a listed input file') def input_open(name, mode='r', optional=False): check_input_listed(name, optional) try: f = open(options.root + name, mode) except Exception as e: if optional: return open("/dev/null", mode) raise e return f def input_readline(name, optional=False): f = input_open(name, optional=optional) line = f.readline() f.close() return line def input_listdir(dir, wildcards): for w in wildcards: check_input_listed(os.path.join(dir, w)) dirs = os.listdir(options.root + dir) dirs.sort() return dirs class PCIBARs: IORESOURCE_IO = 0x00000100 IORESOURCE_MEM = 0x00000200 IORESOURCE_MEM_64 = 0x00100000 def __init__(self, dir): self.mask = [] f = input_open(os.path.join(dir, 'resource'), 'r') for n in range(6): (start, end, flags) = f.readline().split() flags = int(flags, 16) if flags & PCIBARs.IORESOURCE_IO: mask = ~(int(end, 16) - int(start, 16)) elif flags & PCIBARs.IORESOURCE_MEM: mask = ~(int(end, 16) - int(start, 16)) if flags & PCIBARs.IORESOURCE_MEM_64: self.mask.append(mask & 0xffffffff) mask >>= 32 n += 1 else: mask = 0 self.mask.append(mask & 0xffffffff) f.close() class PCICapability: def __init__(self, id, start, len, flags, content, msix_address): self.id = id self.start = start self.len = len self.flags = flags self.content = content self.msix_address = msix_address self.comments = [] def __eq__(self, other): return self.id == other.id and self.start == other.start and \ self.len == other.len and self.flags == other.flags RD = '0' RW = 'JAILHOUSE_PCICAPS_WRITE' JAILHOUSE_PCI_EXT_CAP = 0x8000 @staticmethod def parse_pcicaps(dir): caps = [] has_extended_caps = False f = input_open(os.path.join(dir, 'config'), 'rb') f.seek(0x06) (status,) = struct.unpack('= 2: # v2 capability len = 44 # access side effects still need to be analyzed flags = PCICapability.RD has_extended_caps = True elif id == 0x11: # MSI-X # access will be moderated by hypervisor len = 12 (table,) = struct.unpack('> 4 if id == 0xffff: break elif id == 0x0010: # SR-IOV len = 64 # access side effects still need to be analyzed flags = PCICapability.RD else: if (id & PCICapability.JAILHOUSE_PCI_EXT_CAP) != 0: print('WARNING: Ignoring unsupported PCI Express ' 'Extended Capability ID %x' % id) continue # unknown/unhandled cap, mark its existence len = 4 flags = PCICapability.RD f.seek(cap + 4) content = f.read(len - 4) id |= PCICapability.JAILHOUSE_PCI_EXT_CAP caps.append(PCICapability(id, cap, len, flags, content, 0)) f.close() return caps class PCIDevice: def __init__(self, type, domain, bus, dev, fn, bars, caps, path): self.type = type self.iommu = None self.domain = domain self.bus = bus self.dev = dev self.fn = fn self.bars = bars self.caps = caps self.path = path self.caps_start = 0 self.num_caps = len(caps) self.num_msi_vectors = 0 self.msi_64bits = 0 self.num_msix_vectors = 0 self.msix_region_size = 0 self.msix_address = 0 for c in caps: if c.id in (0x05, 0x11): msg_ctrl = struct.unpack('> 1) & 0x7) self.msi_64bits = (msg_ctrl >> 7) & 1 else: # MSI-X if c.msix_address != 0: vectors = (msg_ctrl & 0x7ff) + 1 self.num_msix_vectors = vectors self.msix_region_size = (vectors * 16 + 0xfff) & 0xf000 self.msix_address = c.msix_address else: print('WARNING: Ignoring invalid MSI-X configuration' ' of device %02x:%02x.%x' % (bus, dev, fn)) def __str__(self): return 'PCIDevice: %02x:%02x.%x' % (self.bus, self.dev, self.fn) def bdf(self): return self.bus << 8 | self.dev << 3 | self.fn @staticmethod def parse_pcidevice_sysfsdir(basedir, dir): dpath = os.path.join(basedir, dir) f = input_open(os.path.join(dpath, 'config'), 'rb') (vendor_device,) = struct.unpack(' r.stop): kernel_stop = r.stop before_kernel = None after_kernel = None # before Kernel if any if (r.start < kernel_start): before_kernel = MemRegion(r.start, kernel_start - 1, s) kernel_region = MemRegion(kernel_start, kernel_stop, "Kernel") # after Kernel if any if (r.stop > kernel_stop): after_kernel = MemRegion(kernel_stop + 1, r.stop, s) return [before_kernel, kernel_region, after_kernel] @staticmethod def parse_iomem_line(line): a = line.split(':', 1) level = int(a[0].count(' ') / 2) + 1 region = a[0].split('-', 1) a[1] = a[1].strip() return level, MemRegion(int(region[0], 16), int(region[1], 16), a[1]) @staticmethod def parse_iomem_file(): root = IOMemRegionTree(None, 0) f = input_open('/proc/iomem') lastlevel = 0 lastnode = root for line in f: (level, r) = IOMemRegionTree.parse_iomem_line(line) t = IOMemRegionTree(r, level) if (t.level > lastlevel): t.parent = lastnode if (t.level == lastlevel): t.parent = lastnode.parent if (t.level < lastlevel): p = lastnode.parent while(t.level < p.level): p = p.parent t.parent = p.parent t.parent.children.append(t) lastnode = t lastlevel = t.level f.close() return root # find HPET regions in tree @staticmethod def find_hpet_regions(tree): regions = [] for tree in tree.children: r = tree.region s = r.typestr if (s.find('HPET') >= 0): regions.append(r) # if the tree continues recurse further down ... if (len(tree.children) > 0): regions.extend(IOMemRegionTree.find_hpet_regions(tree)) return regions # recurse down the tree @staticmethod def parse_iomem_tree(tree): regions = [] for tree in tree.children: r = tree.region s = r.typestr # System RAM on the first level will be added completely, # if they don't contain the kernel itself, if they do, # we split them if (tree.level == 1 and s == 'System RAM'): regions.extend(tree.regions_split_by_kernel()) continue # blacklisted on all levels if ( (s.find('PCI MMCONFIG') >= 0) or (s.find('APIC') >= 0) # covers both APIC and IOAPIC ): continue # generally blacklisted, unless we find an HPET behind it if (s == 'reserved'): regions.extend(IOMemRegionTree.find_hpet_regions(tree)) continue # if the tree continues recurse further down ... if (len(tree.children) > 0): regions.extend(IOMemRegionTree.parse_iomem_tree(tree)) continue # add all remaining leaves regions.append(r) return regions class IOMMUConfig(object): def __init__(self, props): self.base_addr = props['base_addr'] self.mmio_size = props['mmio_size'] if 'amd_bdf' in props: self.amd_bdf = props['amd_bdf'] self.amd_base_cap = props['amd_base_cap'] self.amd_msi_cap = props['amd_msi_cap'] self.amd_features = props['amd_features'] @property def is_amd_iommu(self): return hasattr(self, 'amd_bdf') def parse_iomem(pcidevices): regions = IOMemRegionTree.parse_iomem_tree( IOMemRegionTree.parse_iomem_file()) rom_region = MemRegion(0xc0000, 0xdffff, 'ROMs') add_rom_region = False ret = [] dmar_regions = [] for r in regions: append_r = True # filter the list for MSI-X pages for d in pcidevices: if d.msix_address >= r.start and d.msix_address <= r.stop: if d.msix_address > r.start: head_r = MemRegion(r.start, d.msix_address - 1, r.typestr, r.comments) ret.append(head_r) if d.msix_address + d.msix_region_size < r.stop: tail_r = MemRegion(d.msix_address + d.msix_region_size, r.stop, r.typestr, r.comments) ret.append(tail_r) append_r = False break # filter out the ROMs if (r.start >= rom_region.start and r.stop <= rom_region.stop): add_rom_region = True append_r = False # filter out and save DMAR regions if r.typestr.find('dmar') >= 0: dmar_regions.append(r) append_r = False if append_r: ret.append(r) # add a region that covers all potential ROMs if add_rom_region: ret.append(rom_region) # newer Linux kernels will report the first page as reserved # it is needed for CPU init so include it anyways if (ret[0].typestr == 'System RAM' and ret[0].start == 0x1000): ret[0].start = 0 return ret, dmar_regions def parse_pcidevices(): devices = [] caps = [] basedir = '/sys/bus/pci/devices' list = input_listdir(basedir, ['*/config']) for dir in list: d = PCIDevice.parse_pcidevice_sysfsdir(basedir, dir) if d is not None: if len(d.caps) > 0: duplicate = False # look for duplicate capability patterns for d2 in devices: if d2.caps == d.caps: # reused existing capability list, but record all users d2.caps[0].comments.append(str(d)) d.caps_start = d2.caps_start duplicate = True break if not duplicate: d.caps[0].comments.append(str(d)) d.caps_start = len(caps) caps.extend(d.caps) devices.append(d) return (devices, caps) def parse_kernel_cmdline(): line = input_readline('/proc/cmdline') m = re.match(r'.*memmap=([0-9a-fA-FxX]+)([KMG]?)\$' '([0-9a-fA-FxX]+)([KMG]?).*', line) if m is not None: size = kmg_multiply(int(m.group(1), 0), m.group(2)) start = kmg_multiply(int(m.group(3), 0), m.group(4)) return [start, size] return None def alloc_mem(regions, size): mem = [0x3b000000, size] for r in regions: if ( r.typestr == 'System RAM' and r.start <= mem[0] and r.stop + 1 >= mem[0] + mem[1] ): if r.start < mem[0]: head_r = MemRegion(r.start, mem[0] - 1, r.typestr, r.comments) regions.insert(regions.index(r), head_r) if r.stop + 1 > mem[0] + mem[1]: tail_r = MemRegion(mem[0] + mem[1], r.stop, r.typestr, r.comments) regions.insert(regions.index(r), tail_r) regions.remove(r) return mem for r in reversed(regions): if (r.typestr == 'System RAM' and r.size() >= mem[1]): mem[0] = r.start r.start += mem[1] return mem raise RuntimeError('failed to allocate memory') def count_cpus(): list = input_listdir('/sys/devices/system/cpu', ['cpu*/uevent']) count = 0 for f in list: if re.match(r'cpu[0-9]+', f): count += 1 return count def parse_madt(): f = input_open('/sys/firmware/acpi/tables/APIC', 'rb') signature = f.read(4) if signature != b'APIC': raise RuntimeError('MADT: incorrect input file format %s' % signature) (length,) = struct.unpack(' 0: offset = 0 (struct_type, struct_len) = struct.unpack(' 0: offset = 0 (struct_type, struct_len) = struct.unpack('= 8: raise RuntimeError('Too many DMAR units. ' 'Raise JAILHOUSE_MAX_IOMMU_UNITS.') size = 0 for r in dmar_regions: if base == r.start: size = r.size() if size == 0: raise RuntimeError('DMAR region size cannot be identified.\n' 'Target Linux must run with Intel IOMMU ' 'enabled.') if size > 0x3000: raise RuntimeError('Unexpectedly large DMAR region.') units.append(IOMMUConfig({ 'base_addr': base, 'mmio_size': size })) if flags & 1: for d in pcidevices: if d.iommu is None: d.iommu = len(units) - 1 offset += 16 - offset while offset < struct_len: (scope_type, scope_len, id, bus, dev, fn) =\ parse_dmar_devscope(f) # PCI Endpoint Device if scope_type == 1: assert not (flags & 1) for d in pcidevices: if d.bus == bus and d.dev == dev and d.fn == fn: d.iommu = len(units) - 1 break # PCI Sub-hierarchy elif scope_type == 2: assert not (flags & 1) for d in pcidevices: if d.bus == bus and d.dev == dev and d.fn == fn: (secondbus, subordinate) = \ PCIPCIBridge.get_2nd_busses(d) for d2 in pcidevices: if ( d2.bus >= secondbus and d2.bus <= subordinate ): d2.iommu = len(units) - 1 break # IOAPIC elif scope_type == 3: ioapic = next(chip for chip in ioapics if chip.id == id) bdf = (bus << 8) | (dev << 3) | fn for chip in ioapics: if chip.bdf == bdf: raise RuntimeError('IOAPICs with identical BDF') ioapic.bdf = bdf ioapic.iommu = len(units) - 1 offset += scope_len # Reserved Memory Region Reporting Structure if struct_type == 1: f.seek(8 - offset, os.SEEK_CUR) offset += 8 - offset (base, limit) = struct.unpack('> 8) & 0xff, (bdf >> 3) & 0x1f, bdf & 0x7 return '%02x:%02x.%x' % (bus, dev, fun) f = input_open('/sys/firmware/acpi/tables/IVRS', 'rb') signature = f.read(4) if signature != b'IVRS': raise RuntimeError('IVRS: incorrect input file format %s' % signature) (length, revision) = struct.unpack(' 2: raise RuntimeError('IVRS: unsupported Revision %02x' % revision) f.seek(48, os.SEEK_SET) length -= 48 units = [] regions = [] # BDF of devices that are permitted outside IOMMU: root complex iommu_skiplist = set([0x0]) ivhd_blocks = 0 while length > 0: (block_type, block_length) = struct.unpack(' 1: raise RuntimeError('Jailhouse doesn\'t support more than one ' 'AMD IOMMU per PCI function.') # IVHD block ivhd_fields = struct.unpack(' 8: raise RuntimeError('Too many IOMMU units. ' 'Raise JAILHOUSE_MAX_IOMMU_UNITS.') msi_cap_ofs = None for i, d in enumerate(pcidevices): if d.bdf() == iommu_bdf: # Extract MSI capability offset for c in d.caps: if c.id == 0x05: msi_cap_ofs = c.start # We must not map IOMMU to the cells del pcidevices[i] if msi_cap_ofs is None: raise RuntimeError('AMD IOMMU lacks MSI support, and ' 'Jailhouse doesn\'t support MSI-X yet.') if (iommu_feat & (0xF << 13)) and (iommu_feat & (0x3F << 17)): # Performance Counters are supported, allocate 512K mmio_size = 524288 else: # Allocate 16K mmio_size = 16384 units.append(IOMMUConfig({ 'base_addr': base_addr, 'mmio_size': mmio_size, 'amd_bdf': iommu_bdf, 'amd_base_cap': base_cap_ofs, 'amd_msi_cap': msi_cap_ofs, # IVHD block type 0x11 has exact EFR copy but type 0x10 may # overwrite what hardware reports. Set reserved bit 0 in that # case to indicate that the value is in use. 'amd_features': (iommu_feat | 0x1) if block_type == 0x10 else 0 })) bdf_start_range = None while block_length > 0: (entry_type, device_id) = struct.unpack('= bdf_start_range and d.bdf() <= device_id: d.iommu = len(units) - 1 bdf_start_range = None elif entry_type == 0x42: # Alias select (device_id_b,) = struct.unpack('= 0x40: f.seek(4, os.SEEK_CUR) block_length -= 4 elif type in [0x20, 0x21, 0x22]: # IVMD block ivmd_fields = struct.unpack(' 60: raise RuntimeError('Multiple MMCONFIG regions found! ' 'This is not supported') f.seek(44) (base, segment, start_bus, end_bus) = \ struct.unpack(' ourmem[1]): raise RuntimeError('Your memmap reservation is too small you need >="' + hex(total) + '". Hint: your kernel cmd line needs ' '"memmap=' + hex(total) + '$' + hex(ourmem[0]) + '"') hvmem[0] = ourmem[0] inmatereg = MemRegion(ourmem[0] + hvmem[1], ourmem[0] + hvmem[1] + inmatemem - 1, 'JAILHOUSE Inmate Memory') regions.append(inmatereg) cpucount = count_cpus() pm_timer_base = parse_ioports() f = open(options.file, 'w') tmpl = Template(filename=os.path.join(options.template_dir, 'root-cell-config.c.tmpl')) kwargs = { 'regions': regions, 'ourmem': ourmem, 'argstr': ' '.join(sys.argv), 'hvmem': hvmem, 'product': product, 'pcidevices': pcidevices, 'pcicaps': pcicaps, 'cpucount': cpucount, 'irqchips': ioapics, 'pm_timer_base': pm_timer_base, 'mmconfig': mmconfig, 'iommu_units': iommu_units } f.write(tmpl.render(**kwargs)) f.close()