3 # Jailhouse, a Linux-based partitioning hypervisor
5 # Copyright (c) Siemens AG, 2014
7 # This work is licensed under the terms of the GNU GPL, version 2. See
8 # the COPYING file in the top-level directory.
10 # This script should help to create a basic jailhouse configuration file.
11 # It needs to be executed on the target machine, where it will gather
12 # information about the system. For more advanced scenarios you will have
13 # to change the generated C-code.
15 from __future__ import print_function
22 from mako.template import Template
27 template_default_dir = datadir + "/jailhouse"
29 template_default_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
33 # pretend to be part of the jailhouse tool
34 sys.argv[0] = sys.argv[0].replace('-', ' ')
36 parser = argparse.ArgumentParser()
37 parser.add_argument('-g', '--generate-collector',
38 help='generate a script to collect input files on '
41 parser.add_argument('-r', '--root',
42 help='gather information in ROOT/, the default is "/" '
43 'which means creating a config for localhost',
47 parser.add_argument('-t', '--template-dir',
48 help='the directory where the templates are located,'
49 'the default is "' + template_default_dir + '"',
50 default=template_default_dir,
54 memargs = [['--mem-inmates', '2M', 'inmate'],
55 ['--mem-hv', '64M', 'hypervisor']]
58 parser.add_argument(entry[0],
59 help='the amount of ' + entry[2] +
60 ' memory, default is "' + entry[1] +
61 '", format "xxx[K|M|G]"',
66 parser.add_argument('file', metavar='FILE',
67 help='name of file to write out',
70 options = parser.parse_args()
72 inputs = {'files': set(), 'files_opt': set(), 'files_intel': set()}
75 inputs['files'].add('/proc/iomem')
76 inputs['files'].add('/proc/cpuinfo')
77 inputs['files'].add('/proc/cmdline')
78 inputs['files'].add('/proc/ioports')
79 inputs['files'].add('/sys/bus/pci/devices/*/config')
80 inputs['files'].add('/sys/devices/system/cpu/cpu*/uevent')
81 inputs['files'].add('/sys/firmware/acpi/tables/APIC')
82 inputs['files'].add('/sys/firmware/acpi/tables/MCFG')
84 inputs['files_opt'].add('/sys/class/dmi/id/product_name')
85 inputs['files_opt'].add('/sys/class/dmi/id/sys_vendor')
86 inputs['files_opt'].add('/sys/devices/jailhouse/enabled')
87 # platform specific files
88 inputs['files_intel'].add('/sys/firmware/acpi/tables/DMAR')
91 def kmg_multiply(value, kmg):
92 if (kmg == 'K' or kmg == 'k'):
94 if (kmg == 'M' or kmg == 'm'):
95 return 1024**2 * value
96 if (kmg == 'G' or kmg == 'g'):
97 return 1024**3 * value
101 def kmg_multiply_str(str):
102 m = re.match(r'([0-9a-fA-FxX]+)([KMG]?)', str)
104 return kmg_multiply(int(m.group(1)), m.group(2))
105 raise RuntimeError('kmg_multiply_str can not parse input "' + str + '"')
108 def check_input_listed(name, optional=False):
109 set = inputs['files_opt']
110 if optional is False:
111 set = inputs['files']
113 if cpuvendor == 'GenuineIntel':
114 set = set.union(inputs['files_intel'])
117 if fnmatch.fnmatch(name, file):
119 raise RuntimeError('"' + name + '" is not a listed input file')
122 def input_open(name, mode='r', optional=False):
123 check_input_listed(name, optional)
125 f = open(options.root + name, mode)
126 except Exception as e:
128 return open("/dev/null", mode)
133 def input_readline(name, optional=False):
134 f = input_open(name, optional=optional)
140 def input_listdir(dir, wildcards):
142 check_input_listed(os.path.join(dir, w))
143 dirs = os.listdir(options.root + dir)
149 def __init__(self, id, start, len, flags, content, msix_address):
154 self.content = content
155 self.msix_address = msix_address
158 def __eq__(self, other):
159 return self.id == other.id and self.start == other.start and \
160 self.len == other.len and self.flags == other.flags
163 RW = 'JAILHOUSE_PCICAPS_WRITE'
166 def parse_pcicaps(dir):
168 f = input_open(os.path.join(dir, 'config'), 'rb')
170 (status,) = struct.unpack('<H', f.read(2))
171 # capability list supported?
172 if (status & (1 << 4)) == 0:
175 # walk capability list
177 (next,) = struct.unpack('B', f.read(1))
182 (id, next) = struct.unpack('<BB', f.read(2))
183 if id == 0x01: # Power Management
184 # this cap can be handed out completely
186 flags = PCICapability.RW
187 elif id == 0x05: # MSI
188 # access will be moderated by hypervisor
190 (msgctl,) = struct.unpack('<H', f.read(2))
191 if (msgctl & (1 << 7)) != 0: # 64-bit support
193 if (msgctl & (1 << 8)) != 0: # per-vector masking support
195 flags = PCICapability.RW
196 elif id == 0x11: # MSI-X
197 # access will be moderated by hypervisor
199 (table,) = struct.unpack('<xxI', f.read(6))
200 f.seek(0x10 + (table & 7) * 4)
201 (bar,) = struct.unpack('<I', f.read(4))
203 raise RuntimeError('Invalid MSI-X BAR found')
205 bar |= struct.unpack('<I', f.read(4))[0] << 32
206 msix_address = (bar & 0xfffffffffffffff0) + table & 0xfffffff8
207 flags = PCICapability.RW
209 # unknown/unhandled cap, mark its existence
211 flags = PCICapability.RD
213 content = f.read(len - 2)
214 caps.append(PCICapability(id, cap, len, flags, content,
220 def __init__(self, type, domain, bus, dev, fn, caps, path):
230 self.num_caps = len(caps)
231 self.num_msi_vectors = 0
233 self.num_msix_vectors = 0
234 self.msix_region_size = 0
235 self.msix_address = 0
237 if c.id in (0x05, 0x11):
238 msg_ctrl = struct.unpack('<H', c.content[:2])[0]
239 if c.id == 0x05: # MSI
240 self.num_msi_vectors = 1 << ((msg_ctrl >> 1) & 0x7)
241 self.msi_64bits = (msg_ctrl >> 7) & 1
243 vectors = (msg_ctrl & 0x7ff) + 1
244 self.num_msix_vectors = vectors
245 self.msix_region_size = (vectors * 16 + 0xfff) & 0xf000
246 self.msix_address = c.msix_address
249 return 'PCIDevice: %02x:%02x.%x' % (self.bus, self.dev, self.fn)
252 return self.bus << 8 | self.dev << 3 | self.fn
255 def parse_pcidevice_sysfsdir(basedir, dir):
256 dpath = os.path.join(basedir, dir)
257 f = input_open(os.path.join(dpath, 'config'), 'rb')
259 (classcode,) = struct.unpack('<H', f.read(2))
261 if classcode == 0x0604:
262 type = 'JAILHOUSE_PCI_TYPE_BRIDGE'
264 type = 'JAILHOUSE_PCI_TYPE_DEVICE'
266 domain = int(a[0], 16)
269 caps = PCICapability.parse_pcicaps(dpath)
270 return PCIDevice(type, domain, bus, int(df[0], 16), int(df[1], 16),
274 class PCIPCIBridge(PCIDevice):
276 def get_2nd_busses(dev):
277 assert dev.type == 'JAILHOUSE_PCI_TYPE_BRIDGE'
278 f = input_open(os.path.join(dev.path, 'config'), 'rb')
280 (secondbus, subordinate) = struct.unpack('<BB', f.read(2))
282 return (secondbus, subordinate)
286 def __init__(self, start, stop, typestr, comments=None):
289 self.typestr = typestr
293 self.comments = comments
296 return 'MemRegion: %08x-%08x : %s' % \
297 (self.start, self.stop, self.typestr)
300 # round up to full PAGE_SIZE
301 return int((self.stop - self.start + 0xfff) / 0x1000) * 0x1000
303 def flagstr(self, p=''):
305 self.typestr == 'System RAM' or
306 self.typestr == 'Kernel' or
307 self.typestr == 'RAM buffer' or
308 self.typestr == 'ACPI DMAR RMRR'
310 s = 'JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE |\n'
311 s += p + '\t\tJAILHOUSE_MEM_EXECUTE | JAILHOUSE_MEM_DMA'
313 return 'JAILHOUSE_MEM_READ | JAILHOUSE_MEM_WRITE'
317 def __init__(self, id, address, gsi_base, iommu=0, bdf=0):
319 self.address = address
320 self.gsi_base = gsi_base
325 return 'IOAPIC %d, GSI base %d' % (self.id, self.gsi_base)
327 def irqchip_id(self):
328 # encode the IOMMU number into the irqchip ID
329 return (self.iommu << 16) | self.bdf
332 class IOMemRegionTree:
333 def __init__(self, region, level):
342 s = (' ' * (self.level - 1)) + str(self.region)
343 if self.parent and self.parent.region:
344 s += ' --> ' + self.parent.region.typestr
346 for c in self.children:
350 def regions_split_by_kernel(self):
351 kernel = [x for x in self.children if
352 x.region.typestr.startswith('Kernel ')]
354 if (len(kernel) == 0):
360 kernel_start = kernel[0].region.start
361 kernel_stop = kernel[len(kernel) - 1].region.stop
363 # align this for 16M, but only if we have enough space
364 kernel_stop = (kernel_stop & ~0xFFFFFF) + 0xFFFFFF
365 if (kernel_stop > r.stop):
371 # before Kernel if any
372 if (r.start < kernel_start):
373 before_kernel = MemRegion(r.start, kernel_start - 1, s)
375 kernel_region = MemRegion(kernel_start, kernel_stop, "Kernel")
377 # after Kernel if any
378 if (r.stop > kernel_stop):
379 after_kernel = MemRegion(kernel_stop + 1, r.stop, s)
381 return [before_kernel, kernel_region, after_kernel]
384 def parse_iomem_line(line):
385 a = line.split(':', 1)
386 level = int(a[0].count(' ') / 2) + 1
387 region = a[0].split('-', 1)
389 return level, MemRegion(int(region[0], 16), int(region[1], 16), a[1])
392 def parse_iomem_file():
393 root = IOMemRegionTree(None, 0)
394 f = input_open('/proc/iomem')
398 (level, r) = IOMemRegionTree.parse_iomem_line(line)
399 t = IOMemRegionTree(r, level)
400 if (t.level > lastlevel):
402 if (t.level == lastlevel):
403 t.parent = lastnode.parent
404 if (t.level < lastlevel):
406 while(t.level < p.level):
410 t.parent.children.append(t)
417 # find HPET regions in tree
419 def find_hpet_regions(tree):
422 for tree in tree.children:
426 if (s.find('HPET') >= 0):
429 # if the tree continues recurse further down ...
430 if (len(tree.children) > 0):
431 regions.extend(IOMemRegionTree.find_hpet_regions(tree))
435 # recurse down the tree
437 def parse_iomem_tree(tree):
440 for tree in tree.children:
444 # System RAM on the first level will be added completely,
445 # if they don't contain the kernel itself, if they do,
447 if (tree.level == 1 and s == 'System RAM'):
448 regions.extend(tree.regions_split_by_kernel())
451 # blacklisted on all levels
453 (s.find('PCI MMCONFIG') >= 0) or
454 (s.find('APIC') >= 0) or # covers both APIC and IOAPIC
455 (s.find('dmar') >= 0)
459 # generally blacklisted, unless we find an HPET behind it
460 if (s == 'reserved'):
461 regions.extend(IOMemRegionTree.find_hpet_regions(tree))
464 # if the tree continues recurse further down ...
465 if (len(tree.children) > 0):
466 regions.extend(IOMemRegionTree.parse_iomem_tree(tree))
469 # add all remaining leaves
475 def parse_iomem(pcidevices):
476 regions = IOMemRegionTree.parse_iomem_tree(
477 IOMemRegionTree.parse_iomem_file())
479 # filter the list for MSI-X pages
483 if d.msix_address >= r.start and d.msix_address <= r.stop:
484 if d.msix_address > r.start:
485 head_r = MemRegion(r.start, d.msix_address - 1,
486 r.typestr, r.comments)
488 if d.msix_address + d.msix_region_size < r.stop:
489 tail_r = MemRegion(d.msix_address + d.msix_region_size,
490 r.stop, r.typestr, r.comments)
497 # newer Linux kernels will report the first page as reserved
498 # it is needed for CPU init so include it anyways
499 if (ret[0].typestr == 'System RAM' and ret[0].start == 0x1000):
505 def parse_pcidevices():
508 basedir = '/sys/bus/pci/devices'
509 list = input_listdir(basedir, ['*/config'])
511 d = PCIDevice.parse_pcidevice_sysfsdir(basedir, dir)
515 # look for duplicate capability patterns
517 if d2.caps == d.caps:
518 # reused existing capability list, but record all users
519 d2.caps[0].comments.append(str(d))
520 d.caps_start = d2.caps_start
524 d.caps[0].comments.append(str(d))
525 d.caps_start = len(caps)
528 return (devices, caps)
531 def parse_kernel_cmdline():
532 line = input_readline('/proc/cmdline')
533 m = re.match(r'.*memmap=([0-9a-fA-FxX]+)([KMG]?)\$'
534 '([0-9a-fA-FxX]+)([KMG]?).*',
537 size = kmg_multiply(int(m.group(1), 0), m.group(2))
538 start = kmg_multiply(int(m.group(3), 0), m.group(4))
543 def alloc_mem(regions, size):
544 mem = [0x3b000000, size]
547 r.typestr == 'System RAM' and
548 r.start <= mem[0] and
549 r.stop + 1 >= mem[0] + mem[1]
552 head_r = MemRegion(r.start, mem[0] - 1, r.typestr, r.comments)
553 regions.insert(regions.index(r), head_r)
554 if r.stop + 1 > mem[0] + mem[1]:
555 tail_r = MemRegion(mem[0] + mem[1], r.stop, r.typestr,
557 regions.insert(regions.index(r), tail_r)
560 for r in reversed(regions):
561 if (r.typestr == 'System RAM' and r.size() >= mem[1]):
565 raise RuntimeError('failed to allocate memory')
569 list = input_listdir('/sys/devices/system/cpu', ['cpu*/uevent'])
572 if re.match(r'cpu[0-9]+', f):
578 f = input_open('/sys/firmware/acpi/tables/APIC', 'rb')
579 signature = f.read(4)
580 if signature != b'APIC':
581 raise RuntimeError('MADT: incorrect input file format %s' % signature)
582 (length,) = struct.unpack('<I', f.read(4))
589 (struct_type, struct_len) = struct.unpack('<BB', f.read(2))
594 (id, address, gsi_base) = struct.unpack('<BxII', f.read(10))
596 ioapics.append(IOAPIC(id, address, gsi_base))
598 f.seek(struct_len - offset, os.SEEK_CUR)
603 def parse_dmar_devscope(f):
604 (scope_type, scope_len, id, bus, dev, fn) = \
605 struct.unpack('<BBxxBBBB', f.read(8))
607 raise RuntimeError('Unsupported DMAR Device Scope Structure')
608 return (scope_type, scope_len, id, bus, dev, fn)
611 # parsing of DMAR ACPI Table
612 # see Intel VT-d Spec chapter 8
613 def parse_dmar(pcidevices, ioapics):
614 f = input_open('/sys/firmware/acpi/tables/DMAR', 'rb')
615 signature = f.read(4)
616 if signature != b'DMAR':
617 raise RuntimeError('DMAR: incorrect input file format %s' % signature)
618 (length,) = struct.unpack('<I', f.read(4))
626 (struct_type, struct_len) = struct.unpack('<HH', f.read(4))
630 # DMA Remapping Hardware Unit Definition
632 (flags, segment, base) = struct.unpack('<BxHQ', f.read(12))
634 raise RuntimeError('We do not support multiple PCI segments')
636 raise RuntimeError('Too many DMAR units. '
637 'Raise JAILHOUSE_MAX_DMAR_UNITS.')
642 d.iommu = len(units) - 1
643 offset += 16 - offset
644 while offset < struct_len:
645 (scope_type, scope_len, id, bus, dev, fn) =\
646 parse_dmar_devscope(f)
647 # PCI Endpoint Device
650 if d.bus == bus and d.dev == dev and d.fn == fn:
651 d.iommu = len(units) - 1
654 elif scope_type == 2:
656 if d.bus == bus and d.dev == dev and d.fn == fn:
657 (secondbus, subordinate) = \
658 PCIPCIBridge.get_2nd_busses(d)
659 for d2 in pcidevices:
661 d2.bus >= secondbus and
662 d2.bus <= subordinate
664 d2.iommu = len(units) - 1
667 elif scope_type == 3:
668 ioapic = next(chip for chip in ioapics if chip.id == id)
669 bdf = (bus << 8) | (dev << 3) | fn
672 raise RuntimeError('IOAPICs with identical BDF')
674 ioapic.dmar_unit = len(units) - 1
677 # Reserved Memory Region Reporting Structure
679 f.seek(8 - offset, os.SEEK_CUR)
681 (base, limit) = struct.unpack('<QQ', f.read(16))
685 while offset < struct_len:
686 (scope_type, scope_len, id, bus, dev, fn) =\
687 parse_dmar_devscope(f)
689 comments.append('PCI device: %02x:%02x.%x' %
692 comments.append('DMAR parser could not decode device path')
695 reg = MemRegion(base, limit, 'ACPI DMAR RMRR', comments)
698 f.seek(struct_len - offset, os.SEEK_CUR)
700 return units, regions
705 f = input_open('/proc/ioports')
707 if line.endswith('ACPI PM_TMR\n'):
708 pm_timer_base = int(line.split('-')[0], 16)
715 def __init__(self, base, end_bus):
717 self.end_bus = end_bus
721 f = input_open('/sys/firmware/acpi/tables/MCFG', 'rb')
722 signature = f.read(4)
723 if signature != b'MCFG':
724 raise RuntimeError('MCFG: incorrect input file format %s' %
726 (length,) = struct.unpack('<I', f.read(4))
728 raise RuntimeError('Multiple MMCONFIG regions found! '
729 'This is not supported')
731 (base, segment, start_bus, end_bus) = \
732 struct.unpack('<QHBB', f.read(12))
733 if segment != 0 or start_bus != 0:
734 raise RuntimeError('Invalid MCFG structure found')
735 return MMConfig(base, end_bus)
738 def get_cpu_vendor():
740 if cpuvendor is not None:
742 with input_open('/proc/cpuinfo', 'r') as f:
746 key, value = line.split(':')
747 if key.strip() == 'vendor_id':
748 cpuvendor = value.strip()
752 if options.generate_collector:
753 f = open(options.file, 'w')
754 filelist = ' '.join(inputs['files'])
755 filelist_opt = ' '.join(inputs['files_opt'])
756 filelist_intel = ' '.join(inputs['files_intel'])
758 tmpl = Template(filename=os.path.join(options.template_dir,
759 'jailhouse-config-collect.tmpl'))
760 f.write(tmpl.render(filelist=filelist, filelist_opt=filelist_opt,
761 filelist_intel=filelist_intel))
765 if ((options.root is '/') and (os.geteuid() is not 0)):
766 print('ERROR: You have to be root to work on "/"!', file=sys.stderr)
769 jh_enabled = input_readline('/sys/devices/jailhouse/enabled', True).rstrip()
770 if jh_enabled == '1':
771 print('ERROR: Jailhouse was enabled when collecting input files! '
772 'Disable jailhouse and try again.',
776 (pcidevices, pcicaps) = parse_pcidevices()
778 product = [input_readline('/sys/class/dmi/id/sys_vendor',
780 input_readline('/sys/class/dmi/id/product_name',
784 inmatemem = kmg_multiply_str(options.mem_inmates)
785 hvmem = [0, kmg_multiply_str(options.mem_hv)]
787 regions = parse_iomem(pcidevices)
788 ourmem = parse_kernel_cmdline()
789 total = hvmem[1] + inmatemem
791 mmconfig = MMConfig.parse()
793 ioapics = parse_madt()
795 if get_cpu_vendor() == 'GenuineIntel':
796 (dmar_units, rmrr_regs) = parse_dmar(pcidevices, ioapics)
798 (dmar_units, rmrr_regs) = [], []
802 if get_cpu_vendor() == 'AuthenticAMD':
803 d.iommu = 0 # temporary workaround
805 raise RuntimeError('PCI device %02x:%02x.%x outside the scope of an '
806 'IOMMU' % (d.bus, d.dev, d.fn))
808 # kernel does not have memmap region, pick one
810 ourmem = alloc_mem(regions, total)
811 elif (total > ourmem[1]):
812 raise RuntimeError('Your memmap reservation is too small you need >="' +
817 inmatereg = MemRegion(ourmem[0] + hvmem[1],
818 ourmem[0] + hvmem[1] + inmatemem - 1,
819 'JAILHOUSE Inmate Memory')
820 regions.append(inmatereg)
822 cpucount = count_cpus()
824 pm_timer_base = parse_ioports()
827 f = open(options.file, 'w')
828 tmpl = Template(filename=os.path.join(options.template_dir,
829 'root-cell-config.c.tmpl'))
830 f.write(tmpl.render(regions=regions,
832 argstr=' '.join(sys.argv),
835 pcidevices=pcidevices,
839 pm_timer_base=pm_timer_base,
841 dmar_units=dmar_units))