#
# Jailhouse, a Linux-based partitioning hypervisor
#
-# Copyright (c) Siemens AG, 2014
+# Copyright (c) Siemens AG, 2014-2016
+# Copyright (c) Valentine Sinitsyn, 2014-2015
+#
+# Authors:
+# Henning Schild <henning.schild@siemens.com>
+# Jan Kiszka <jan.kiszka@siemens.com>
+# Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
inputs['files'].add('/proc/cmdline')
inputs['files'].add('/proc/ioports')
inputs['files'].add('/sys/bus/pci/devices/*/config')
+inputs['files'].add('/sys/bus/pci/devices/*/resource')
inputs['files'].add('/sys/devices/system/cpu/cpu*/uevent')
inputs['files'].add('/sys/firmware/acpi/tables/APIC')
inputs['files'].add('/sys/firmware/acpi/tables/MCFG')
return dirs
+class PCIBARs:
+ IORESOURCE_IO = 0x00000100
+ IORESOURCE_MEM = 0x00000200
+ IORESOURCE_MEM_64 = 0x00100000
+
+ def __init__(self, dir):
+ self.mask = []
+ f = input_open(os.path.join(dir, 'resource'), 'r')
+ for n in range(6):
+ (start, end, flags) = f.readline().split()
+ flags = int(flags, 16)
+ if flags & PCIBARs.IORESOURCE_IO:
+ mask = ~(int(end, 16) - int(start, 16))
+ elif flags & PCIBARs.IORESOURCE_MEM:
+ mask = ~(int(end, 16) - int(start, 16))
+ if flags & PCIBARs.IORESOURCE_MEM_64:
+ self.mask.append(mask & 0xffffffff)
+ mask >>= 32
+ n += 1
+ else:
+ mask = 0
+ self.mask.append(mask & 0xffffffff)
+ f.close()
+
+
class PCICapability:
def __init__(self, id, start, len, flags, content, msix_address):
self.id = id
RD = '0'
RW = 'JAILHOUSE_PCICAPS_WRITE'
+ JAILHOUSE_PCI_EXT_CAP = 0x8000
+
@staticmethod
def parse_pcicaps(dir):
caps = []
+ has_extended_caps = False
f = input_open(os.path.join(dir, 'config'), 'rb')
f.seek(0x06)
(status,) = struct.unpack('<H', f.read(2))
if (msgctl & (1 << 8)) != 0: # per-vector masking support
len += 10
flags = PCICapability.RW
+ elif id == 0x10: # Express
+ len = 20
+ (cap_reg,) = struct.unpack('<H', f.read(2))
+ if (cap_reg & 0xf) >= 2: # v2 capability
+ len = 44
+ # access side effects still need to be analyzed
+ flags = PCICapability.RD
+ has_extended_caps = True
elif id == 0x11: # MSI-X
# access will be moderated by hypervisor
len = 12
content = f.read(len - 2)
caps.append(PCICapability(id, cap, len, flags, content,
msix_address))
+
+ if has_extended_caps:
+ # walk extended capability list
+ next = 0x100
+ while next != 0:
+ cap = next
+ f.seek(cap)
+ (id, version_next) = struct.unpack('<HH', f.read(4))
+ next = version_next >> 4
+ if id == 0xffff:
+ break
+ elif id == 0x0010: # SR-IOV
+ len = 64
+ # access side effects still need to be analyzed
+ flags = PCICapability.RD
+ else:
+ if (id & PCICapability.JAILHOUSE_PCI_EXT_CAP) != 0:
+ print('WARNING: Ignoring unsupported PCI Express '
+ 'Extended Capability ID %x' % id)
+ continue
+ # unknown/unhandled cap, mark its existence
+ len = 4
+ flags = PCICapability.RD
+ f.seek(cap + 4)
+ content = f.read(len - 4)
+ id |= PCICapability.JAILHOUSE_PCI_EXT_CAP
+ caps.append(PCICapability(id, cap, len, flags, content, 0))
+
f.close()
return caps
class PCIDevice:
- def __init__(self, type, domain, bus, dev, fn, caps, path):
+ def __init__(self, type, domain, bus, dev, fn, bars, caps, path):
self.type = type
self.iommu = None
self.domain = domain
self.bus = bus
self.dev = dev
self.fn = fn
+ self.bars = bars
self.caps = caps
self.path = path
self.caps_start = 0
self.num_msi_vectors = 1 << ((msg_ctrl >> 1) & 0x7)
self.msi_64bits = (msg_ctrl >> 7) & 1
else: # MSI-X
- vectors = (msg_ctrl & 0x7ff) + 1
- self.num_msix_vectors = vectors
- self.msix_region_size = (vectors * 16 + 0xfff) & 0xf000
- self.msix_address = c.msix_address
+ if c.msix_address != 0:
+ vectors = (msg_ctrl & 0x7ff) + 1
+ self.num_msix_vectors = vectors
+ self.msix_region_size = (vectors * 16 + 0xfff) & 0xf000
+ self.msix_address = c.msix_address
+ else:
+ print('WARNING: Ignoring invalid MSI-X configuration'
+ ' of device %02x:%02x.%x' % (bus, dev, fn))
def __str__(self):
return 'PCIDevice: %02x:%02x.%x' % (self.bus, self.dev, self.fn)
domain = int(a[0], 16)
bus = int(a[1], 16)
df = a[2].split('.')
+ bars = PCIBARs(dpath)
caps = PCICapability.parse_pcicaps(dpath)
return PCIDevice(type, domain, bus, int(df[0], 16), int(df[1], 16),
- caps, dpath)
+ bars, caps, dpath)
class PCIPCIBridge(PCIDevice):
self.start = start
self.stop = stop
self.typestr = typestr
- if comments is None:
- self.comments = []
- else:
- self.comments = comments
+ self.comments = comments or []
def __str__(self):
return 'MemRegion: %08x-%08x : %s' % \
# blacklisted on all levels
if (
(s.find('PCI MMCONFIG') >= 0) or
- (s.find('APIC') >= 0) or # covers both APIC and IOAPIC
- (s.find('dmar') >= 0)
+ (s.find('APIC') >= 0) # covers both APIC and IOAPIC
):
continue
return regions
+class IOMMUConfig(object):
+ def __init__(self, props):
+ self.base_addr = props['base_addr']
+ self.mmio_size = props['mmio_size']
+ if 'amd_bdf' in props:
+ self.amd_bdf = props['amd_bdf']
+ self.amd_base_cap = props['amd_base_cap']
+ self.amd_msi_cap = props['amd_msi_cap']
+ self.amd_features = props['amd_features']
+
+ @property
+ def is_amd_iommu(self):
+ return hasattr(self, 'amd_bdf')
+
+
def parse_iomem(pcidevices):
regions = IOMemRegionTree.parse_iomem_tree(
IOMemRegionTree.parse_iomem_file())
add_rom_region = False
ret = []
+ dmar_regions = []
for r in regions:
append_r = True
# filter the list for MSI-X pages
if (r.start >= rom_region.start and r.stop <= rom_region.stop):
add_rom_region = True
append_r = False
+ # filter out and save DMAR regions
+ if r.typestr.find('dmar') >= 0:
+ dmar_regions.append(r)
+ append_r = False
if append_r:
ret.append(r)
if (ret[0].typestr == 'System RAM' and ret[0].start == 0x1000):
ret[0].start = 0
- return ret
+ return ret, dmar_regions
def parse_pcidevices():
# parsing of DMAR ACPI Table
# see Intel VT-d Spec chapter 8
-def parse_dmar(pcidevices, ioapics):
+def parse_dmar(pcidevices, ioapics, dmar_regions):
f = input_open('/sys/firmware/acpi/tables/DMAR', 'rb')
signature = f.read(4)
if signature != b'DMAR':
if len(units) >= 8:
raise RuntimeError('Too many DMAR units. '
'Raise JAILHOUSE_MAX_IOMMU_UNITS.')
- units.append(base)
+ size = 0
+ for r in dmar_regions:
+ if base == r.start:
+ size = r.size()
+ if size == 0:
+ raise RuntimeError('DMAR region size cannot be identified.\n'
+ 'Target Linux must run with Intel IOMMU '
+ 'enabled.')
+ if size > 0x3000:
+ raise RuntimeError('Unexpectedly large DMAR region.')
+ units.append(IOMMUConfig({
+ 'base_addr': base,
+ 'mmio_size': size
+ }))
if flags & 1:
for d in pcidevices:
if d.iommu is None:
if chip.bdf == bdf:
raise RuntimeError('IOAPICs with identical BDF')
ioapic.bdf = bdf
- ioapic.dmar_unit = len(units) - 1
+ ioapic.iommu = len(units) - 1
offset += scope_len
# Reserved Memory Region Reporting Structure
regions = []
# BDF of devices that are permitted outside IOMMU: root complex
iommu_skiplist = set([0x0])
+ ivhd_blocks = 0
while length > 0:
(block_type, block_length) = struct.unpack('<BxH', f.read(4))
if block_type in [0x10, 0x11]:
+ ivhd_blocks += 1
+ if ivhd_blocks > 1:
+ raise RuntimeError('Jailhouse doesn\'t support more than one '
+ 'AMD IOMMU per PCI function.')
# IVHD block
- (iommu_id, base_addr, pci_seg) = \
- struct.unpack('<HxxQH', f.read(14))
- length -= block_length
- block_length -= 18
+ ivhd_fields = struct.unpack('<HHQHxxL', f.read(20))
+ (iommu_bdf, base_cap_ofs,
+ base_addr, pci_seg, iommu_feat) = ivhd_fields
- # IOMMU EFR image and reserved area
- skip_bytes = 6 if block_type == 0x10 else 22
- f.seek(skip_bytes, os.SEEK_CUR)
- block_length -= skip_bytes
+ length -= block_length
+ block_length -= 24
if pci_seg != 0:
raise RuntimeError('We do not support multiple PCI segments')
raise RuntimeError('Too many IOMMU units. '
'Raise JAILHOUSE_MAX_IOMMU_UNITS.')
- # We shouldn't map IOMMU to the cells
+ msi_cap_ofs = None
+
for i, d in enumerate(pcidevices):
- if d.bdf() == iommu_id:
+ if d.bdf() == iommu_bdf:
+ # Extract MSI capability offset
+ for c in d.caps:
+ if c.id == 0x05:
+ msi_cap_ofs = c.start
+ # We must not map IOMMU to the cells
del pcidevices[i]
- units.append(base_addr)
+ if msi_cap_ofs is None:
+ raise RuntimeError('AMD IOMMU lacks MSI support, and '
+ 'Jailhouse doesn\'t support MSI-X yet.')
+
+ if (iommu_feat & (0xF << 13)) and (iommu_feat & (0x3F << 17)):
+ # Performance Counters are supported, allocate 512K
+ mmio_size = 524288
+ else:
+ # Allocate 16K
+ mmio_size = 16384
+
+ units.append(IOMMUConfig({
+ 'base_addr': base_addr,
+ 'mmio_size': mmio_size,
+ 'amd_bdf': iommu_bdf,
+ 'amd_base_cap': base_cap_ofs,
+ 'amd_msi_cap': msi_cap_ofs,
+ # IVHD block type 0x11 has exact EFR copy but type 0x10 may
+ # overwrite what hardware reports. Set reserved bit 0 in that
+ # case to indicate that the value is in use.
+ 'amd_features': (iommu_feat | 0x1) if block_type == 0x10 else 0
+ }))
bdf_start_range = None
while block_length > 0:
- (entry_type, device_id, dte_setting) = struct.unpack(
- '<BHB', f.read(4))
+ (entry_type, device_id) = struct.unpack('<BHx', f.read(4))
block_length -= 4
if entry_type == 0x01:
elif type in [0x20, 0x21, 0x22]:
# IVMD block
+ ivmd_fields = struct.unpack('<BBHHHxxxxxxxxQQ', f.read(32))
(block_type, block_flags, block_length,
- device_id, aux_data, mem_addr, mem_len) = struct.unpack(
- '<BBHHHxxxxxxxxQQ')
+ device_id, aux_data, mem_addr, mem_len) = ivmd_fields
length -= block_length
if int(block_flags):
inmatemem = kmg_multiply_str(options.mem_inmates)
hvmem = [0, kmg_multiply_str(options.mem_hv)]
-regions = parse_iomem(pcidevices)
+(regions, dmar_regions) = parse_iomem(pcidevices)
ourmem = parse_kernel_cmdline()
total = hvmem[1] + inmatemem
vendor = get_cpu_vendor()
if vendor == 'GenuineIntel':
- (iommu_units, extra_memregs) = parse_dmar(pcidevices, ioapics)
+ (iommu_units, extra_memregs) = parse_dmar(pcidevices, ioapics,
+ dmar_regions)
else:
(iommu_units, extra_memregs) = parse_ivrs(pcidevices, ioapics)
regions += extra_memregs
ourmem = alloc_mem(regions, total)
elif (total > ourmem[1]):
raise RuntimeError('Your memmap reservation is too small you need >="' +
- hex(total) + '"')
+ hex(total) + '". Hint: your kernel cmd line needs '
+ '"memmap=' + hex(total) + '$' + hex(ourmem[0]) + '"')
hvmem[0] = ourmem[0]