l4/pkg/plr/tools/boris/boris

   1 #!/usr/bin/python
   2
   3 # BORIS (Binary Object analyzeR for Injection experimentS)
   4 #
   5 #    Tool for parsing binaries and finding the relevant instrumentation
   6 #    points for a fault injection campaign (which is then coordinated by
   7 #    IGOR).
   8 #
   9 # (c) 2011-2013 Björn Döbel <doebel@os.inf.tu-dresden.de>,
  10 #     economic rights: Technische Universität Dresden (Germany)
  11 #
  12 #  This file is part of TUD:OS and distributed under the terms of the
  13 #  GNU General Public License 2.
  14 #  Please see the COPYING-GPL-2 file for details.
  15
  16 import sys, os
  17 import udis86
  18 import subprocess
  19 from pylibelf import libelf
  20 import random
  21 import argparse
  22
  23 #################################################
  24 ### Running external programs                 ###
  25 #################################################
  26
  27 class ProgramRunner:
  28
  29     def __init__(self, tool, options):
  30         self.tool    = tool
  31         self.options = [tool] + options
  32
  33     def output(self):
  34         try:
  35             r        = subprocess.check_output(self.options)
  36         except subprocess.CalledProcessError:
  37             print "Could not exec %s" % self.tool
  38             return None
  39         return r
  40
  41
  42 class NMRunner(ProgramRunner):
  43
  44     def __init__(self, binary):
  45         ProgramRunner.__init__(self, "nm", ["-C", binary])
  46
  47
  48 #################################################
  49 ### Data classes                              ###
  50 #################################################
  51 class Function:
  52
  53     def __init__(self, nmline):
  54         (self.address, self.symtype, self.name) = nmline.split(" ", 3)
  55         if self.symtype not in ["T", "t"]:
  56             raise ValueError()
  57
  58         self.address = int(self.address, 16)
  59         self.end     = self.address # don't know better yet
  60
  61
  62     def bounds(self):
  63         return (self.address, self.end)
  64
  65
  66     def __str__(self):
  67         return "[0x%08lx - 0x%08lx] %s" % (self.address, self.end, self.name)
  68
  69     def __repr__(self):
  70         return "[0x%08lx - 0x%08lx] %s" % (self.address, self.end, self.name)
  71
  72
  73 #################################################
  74 ### Specific FI generators                    ###
  75 #################################################
  76 class FunctionList:
  77     def create(self):
  78         for line in NMRunner(self.binary).output().split("\n"):
  79             try:
  80                 self.funcs.append(Function(line))
  81             except ValueError:
  82                 pass
  83
  84             self.funcs.sort(key = lambda f : f.address)
  85
  86         # Now we can go and fix up the end address
  87         for i in range(len(self.funcs) - 1):
  88             self.funcs[i].end = self.funcs[i+1].address - 1
  89
  90
  91     def __init__(self, binary):
  92         self.binary = binary
  93         self.funcs  = []
  94         self.create()
  95
  96
  97     def run_filter(self, funcnames=[]):
  98         """
  99         Filter out non-interesting functions
 100         """
 101         if funcnames == ["*"]:
 102             pass
 103         else:
 104             self.funcs = [x for x in self.funcs if x.name in funcnames]
 105
 106
 107 class BinaryObject:
 108     """
 109     Representation of all executable segments of a file.
 110
 111     This object's main structure is a list of (segment, buffer) tuples
 112     that contain a libELF segment descriptor and the respective binary
 113     buffer read from the file.
 114     """
 115     def __init__(self, binary):
 116         self.lib           = libelf.Library(binary)
 117         self.exec_segments = []
 118
 119         f = file(binary, "rb")
 120         for s in self.lib.segments:
 121             if s.executable:
 122                 f.seek(s.offset)
 123                 self.exec_segments.append((s, f.read(s.filesize)))
 124
 125         self.ud_obj      = udis86.init()
 126         self.ud_obj.set_mode(udis86.MOD_32)
 127         self.ud_obj.set_syntax(udis86.UD_SYN_ATT)
 128
 129     def segment_for_address(self, address):
 130         for (s, b) in self.exec_segments:
 131             if s.virtual_address <= address <= (s.virtual_address + s.filesize):
 132                 return (s,b)
 133         raise ValueError("No segment containing address 0x%08lx" % address)
 134
 135
 136     def gen_single_func(self, start, end):
 137         ret = []
 138         (seg, buf)   = self.segment_for_address(start)
 139
 140         pc     = start
 141         start -= seg.virtual_address # idx used for iterating the input buffer
 142         end   -= seg.virtual_address # upper limit of the input buffer
 143
 144         self.ud_obj.set_pc(pc)
 145
 146         # udis python bindings seem weirdly broken: initially, i set buf[start:end]
 147         # as the input buffer. however, at some point this broke the input stream,
 148         # as the python bindings read something different from what was actually in
 149         # the buffer. i figured this is somehow related to the udis input buffer
 150         # being too large. therefore we use smaller 32byte chunks now
 151         self.ud_obj.set_input_buffer(buf[start:start+32])
 152         while start < end and self.ud_obj.disassemble() > 0:
 153             if (self.filter_instruction()):
 154                 ret.append("0x%08lx    < %s >" % (pc, self.ud_obj.insn_asm()))
 155                 #print hex(pc), self.ud_obj.insn_hex(), self.ud_obj.insn_asm()
 156             pc    += self.ud_obj.insn_len()
 157             start += self.ud_obj.insn_len()
 158             self.ud_obj.set_pc(pc)
 159             self.ud_obj.set_input_buffer(buf[start:start+32])
 160
 161         return ret
 162
 163
 164     def generate(self):
 165         ret = []
 166         for f in self.functions.funcs:
 167             (start, end) = f.bounds()
 168             ret += self.gen_single_func(start, end)
 169
 170         return [r for r in ret if random.randint(0,99) < self.probability]
 171
 172
 173
 174 class ValidInstructionList(BinaryObject):
 175     """
 176     Generates a list of all valid instruction pointers within the binary
 177     """
 178     def __init__(self, funclist, rand_percentage = 20):
 179         BinaryObject.__init__(self, funclist.binary)
 180         self.functions   = funclist
 181         self.probability = rand_percentage
 182
 183
 184     def filter_instruction(self):
 185         """
 186         Filter out non-interesting operations
 187         """
 188         return self.ud_obj.insn_asm() not in ("nop ", "ret ", "leave ", "invalid ")
 189
 190
 191 class ArithInstructionList(ValidInstructionList):
 192
 193     def __init__(self, funclist, rand_percentage = 20):
 194         ValidInstructionList.__init__(self,funclist,rand_percentage)
 195         self.invalidops ="""
 196         adc adcb andb addb aaa aad aam aas arpl
 197         call clc cld cli cmpb cmpsd cmc cmpsb cmp cdq
 198         cmovae cmovbe cmovnz cmovz cwde daa das div divb
 199         fmul fdiv fild fldcww fistpw  fdivp fldz fucomp fnstsw fld fsub fadd
 200         fistp fnstcww fucompp fstp fxch imul idiv in fmulp fidiv fchs fstpq fcomp ficompw
 201         fldq fucom fsqrt fiaddw fdivr fcom fsubr fdivrq fnstsww fistpq fistp fimul
 202         fimulw fisttpq fmulq fld1
 203         int into iretd int1 insd mul mulb
 204         ja jae jb jbe jecxz jg jge jl jle jmp jnz jns jo js jz jnp jp jno
 205         hlt int3 insb lea lret lock lodsb lodsd ljmp loopnz lcall les lds lahf
 206         loop
 207         mov movb movsd movsb movsx movsxb movzx movzxb movzxw movw
 208         maskmovq
 209         o16 out outsb outsd pop popad push pushad popfd pushfd
 210         sbb seta setbe setg setle setnz setz stc sti sgdt scasb scasd sbbb
 211         shrb shlb sldt sahf std sarb subb rorb orb ror rcl salc
 212         rclb rcr rep repne ret stosb stosd test testb rol
 213         xorb rolb loope rcrb xchg wait xlatb
 214         """
 215
 216     def filter_instruction(self):
 217         if not ValidInstructionList.filter_instruction(self):
 218             return False
 219
 220         opcode = self.ud_obj.insn_asm().split()[0]
 221
 222         if opcode in ["add", "sub", "inc", "incb", "dec", "shl", "shr", "and", "or", "xor",
 223                       "sar", "neg"]:
 224             return True
 225         elif opcode in self.invalidops.split():
 226             return False
 227         else:
 228             print opcode
 229             return False
 230
 231
 232 class Configuration:
 233     def __init__(self):
 234         self.mode = "gpr" # alu, rat, instr
 235         self.functions = []
 236         self.probability = 100 # percent
 237         self.filename = ""
 238
 239
 240 class ArgumentParser:
 241     def __init__(self):
 242         self.conf = Configuration()
 243         self.parser = argparse.ArgumentParser(description="Generate list of relevant instructions that can be used to start IGOR runs.")
 244         self.parser.add_argument("file", help="binary file", default=None)
 245         self.parser.add_argument("-m", "--mode", dest="mode",
 246                                   help="gpr|alu|rat|instr", default = None)
 247         self.parser.add_argument("-p", "--prob", dest="probability",
 248                                  default=None, help="fraction of instructions that get selected finally")
 249         self.parser.add_argument("-f", "--functions", dest="functions",
 250                                  help="select only a subset of functions (comma-separated list)",
 251                                  default="")
 252
 253     def parse(self, args = sys.argv[1:]):
 254         ns = self.parser.parse_args(args)
 255         assert ns.file is not None
 256         self.conf.filename = ns.file
 257         if ns.mode is not None:
 258             self.conf.mode = ns.mode
 259         if ns.probability is not None:
 260             self.conf.probability = int(ns.probability)
 261         if ns.functions != "":
 262             self.conf.functions=ns.functions.split(",")
 263
 264
 265 def run_gpr(conf):
 266     ff = FunctionList(conf.filename)
 267     ff.run_filter(conf.functions)
 268     lst = ValidInstructionList(ff,conf.probability).generate()
 269     for l in lst:
 270         print l
 271
 272 def run_alu(conf):
 273     ff = FunctionList(conf.filename)
 274     ff.run_filter(conf.functions)
 275     lst = ArithInstructionList(ff,conf.probability).generate()
 276     for i in lst:
 277         print i
 278
 279
 280 def nop(conf):
 281     print "UNIMPLEMENTED:", conf.mode
 282
 283 generators = {
 284     "gpr"   : run_gpr,
 285     "instr" : run_gpr,
 286     "rat"   : nop,
 287     "alu"   : run_alu,
 288 }
 289
 290 def main():
 291     if len(sys.argv) < 2:
 292         sys.exit(1)
 293
 294     ap = ArgumentParser()
 295     ap.parse()
 296     try:
 297         generators[ap.conf.mode](ap.conf)
 298     except KeyError:
 299         print "Unknown mode:", ap.conf.mode
 300
 301 if __name__ == "__main__":
 302     main()