scripts/eval2.py

   1 #!/usr/bin/env python3
   2 import os
   3 import sys
   4 import re
   5
   6 import numpy as np
   7 import numpy.linalg as nplag
   8 import matplotlib.pyplot as plt
   9 import matplotlib
  10
  11 import collections
  12 import psycopg2
  13 import psycopg2.extras
  14 import subprocess
  15 import json
  16 import pickle
  17 import utils
  18
  19 #matplotlib.rcParams['font.size'] = 8
  20 matplotlib.rcParams['savefig.bbox'] = 'tight'
  21
  22 conn = psycopg2.connect(dbname="linux-conf-perf", connection_factory=psycopg2.extras.NamedTupleConnection)
  23 cur = conn.cursor()
  24
  25 # cur.execute("SELECT m.value, t.git_describe FROM measure AS m JOIN toolsgit AS t ON m.toolgit = t.id;")
  26 # for row in cur.fetchall():
  27 #         print(row)
  28
  29 def fill_in_missing_values():
  30         cur = conn.cursor()
  31         cur2 = conn.cursor()
  32         cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';")
  33         result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P<min>\d+) Act: *(?P<act>\d+) Avg: *(?P<avg>\d+) Max: *(?P<max>\d+) ok")
  34         for row in cur.fetchall():
  35                 for line in row.output.split('\n'):
  36                         match = result_re.match(line)
  37                         if match:
  38                                 value = int(match.group('max'))
  39                                 cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id })
  40                                 conn.commit()
  41                                 print(line, value)
  42
  43 def view_failed():
  44         cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;")
  45         for row in cur.fetchall():
  46                 with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc:
  47                         proc.communicate(input=row.output.encode('utf-8'))
  48
  49 class Config(dict):
  50         """Linux configuration fetched from DB, key is option name, value is
  51         'y' or 'n'.
  52         """
  53         def __init__(self, id):
  54                 cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,))
  55                 conf = cur.fetchone()
  56                 for opt in conf.config.split('\n'):
  57                         (key, val) = opt.split('=')
  58                         self[key] = val
  59
  60 class VariableOptions(dict):
  61         """Dictionary of config options that change value during experiments.
  62         Key is the config name, value is the index to the matrix A
  63         used for evaluation.
  64         """
  65         def __init__(self, measurements):
  66                 all_options = {}
  67
  68                 for m in measurements:
  69                         print(m)
  70                         config = Config(m.conf)
  71                         for (key, val) in config.items():
  72                                 if key not in all_options:
  73                                         all_options[key] = val
  74                                 else:
  75                                         if all_options[key] != val and key not in self:
  76                                                 self[key] = None
  77                 self.update_order()
  78
  79         def update_order(self):
  80                 self.order = []
  81                 for key in sorted(self.keys()):
  82                         self[key] = len(self.order)
  83                         self.order.append(key)
  84
  85         def __iter__(self):
  86                 return self.order.__iter__()
  87
  88         def name_by_index(self, index):
  89                 return self.order[index]
  90
  91         def print(self):
  92                 for k in self:
  93                         print("%-40s %s" % (k, self[k]))
  94
  95 def load_configs(measurements, options):
  96         try:
  97                 configs = pickle.load(open('configs.pickle', 'rb'))
  98         except:
  99                 configs = {}
 100                 for i in range(len(measurements)):
 101                         config = Config(measurements[i].conf)
 102                         keys = config.copy().keys()
 103                         for k in keys:
 104                                 if not k in options:
 105                                         del config[k]
 106                         configs[measurements[i].conf] = config
 107                 pickle.dump(configs, open('configs.pickle', 'wb'))
 108         return configs
 109
 110 def construct_ab(options, measurements, configs):
 111         A = np.mat(np.zeros((len(measurements), len(options) + 1)))
 112         B = np.mat(np.zeros((len(measurements), 1)))
 113
 114         for i in range(len(measurements)):
 115                 config = configs[measurements[i].conf]
 116                 A[i, len(options)] = 1
 117                 B[i] = measurements[i].value
 118                 for j in range(len(options)):
 119                         A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0
 120         return (A, B)
 121
 122 def plot_values(measurements, configs, measurements_est, highlight_options):
 123         print(measurements_est.__class__, measurements_est.shape)
 124         B = np.mat(np.zeros((len(measurements),1)))
 125         for i in range(len(measurements)):
 126                 B[i] = measurements[i].value
 127
 128         idx = np.argsort(B, 0)
 129         B = B[idx,0]
 130         measurements = sorted(measurements, key=lambda x: x.value)
 131         measurements_est = measurements_est[idx, 0]
 132
 133         plt.plot(measurements_est, 'x', color='#aaaaaa')
 134         plt.hold(True)
 135         plt.gcf().set_size_inches(8,4)
 136         plt.plot(B, color='b')
 137         plt.xlabel('Configuration')
 138         plt.ylabel('cyclictest result [µs]')
 139         yy = 0
 140         for o in highlight_options if 0 else []:
 141                 yy += 1
 142                 x = []
 143                 y = []
 144                 for i in range(len(measurements)):
 145                         if configs[measurements[i].conf][o] == 'y':
 146                                 x.append(i)
 147                                 y.append(measurements[i].value)
 148                 plt.plot(x, (yy*5-150)*np.ones(len(x)), '+')
 149
 150         plt.legend(['Model', 'Measured'] + [o[7:] for o in highlight_options], loc = 'upper left')
 151         plt.grid(True)
 152         plt.xlim(0,len(measurements))
 153         plt.savefig('measurements.pdf')
 154         #plt.show()
 155
 156 def save_options(options):
 157         json.dump(options, open('options.json', 'w'), indent='  ', sort_keys=True)
 158
 159 def load_variable_options(measurements):
 160         if os.path.exists('options.json'):
 161             options = VariableOptions([])
 162             options.update(json.load(open('options.json', 'r')))
 163             options.update_order()
 164         else:
 165             options = VariableOptions(measurements)
 166             save_options(options)
 167         #options.print()
 168         return options
 169
 170 def remove_linerly_dependent_options(options, measurements, configs):
 171         (A,B) = construct_ab(options, measurements, configs)
 172         rank = nplag.matrix_rank(A)
 173
 174         if rank == A.shape[1]:
 175                 return options
 176
 177         o = options.copy()
 178         for k in sorted(o.keys()):
 179                 del options[k]
 180                 options.update_order()
 181                 (A,B) = construct_ab(options, measurements, configs)
 182                 if nplag.matrix_rank(A) != rank:
 183                         options[k]=None
 184                         options.update_order()
 185                 else:
 186                         print("Removed", k)
 187         return options
 188
 189 class OptionResult:
 190         def __init__(self, option_name, regression=None, yes_count=None):
 191                 self.option_name = option_name
 192                 self.regression = regression
 193                 self.yes_count = int(yes_count)
 194                 self.measure_diff = []
 195
 196         def calc_measure_diff(self, A, B, options):
 197                 oidx = options[self.option_name]
 198                 for m1 in range(A.shape[0]):
 199                         if A[m1,oidx] == 1:
 200                                 row = A[m1].copy()
 201                                 row[0,oidx] = 0
 202                                 for m2 in range(A.shape[0]):
 203                                         if A[m2,oidx] == 0 and (row == A[m2]).all():
 204                                                 self.measure_diff.append(B[m1] - B[m2])
 205                                                 break
 206
 207 def plot_bars(results):
 208         y_pos = np.arange(len(results), 0, -1)
 209         regr = np.array([r.regression for r in results])
 210         diff = np.array([np.array(r.measure_diff).mean() for r in results])
 211
 212         fig, ax = plt.subplots()
 213         fig.set_size_inches(8,12)
 214
 215         ax.set_yticks(y_pos)
 216         ax.set_yticklabels([r.option_name[7:] for r in results], fontsize=12)
 217         rects1 = ax.barh(y_pos, regr, height=0.35, color='b')
 218         rects2 = ax.barh(y_pos+0.35, diff, height=0.35, color='w', hatch='//')
 219         ax.legend(['regression', 'two measurement diff'])
 220         ax.grid(True)
 221         ax.set_xlabel("Config option influence [µs]")
 222         plt.savefig('option_res.pdf')
 223         #plt.show()
 224
 225 def evaluate():
 226         cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);")
 227         measurements = cur.fetchall()
 228
 229         options = load_variable_options(measurements)
 230         configs = load_configs(measurements, options)
 231
 232         options = remove_linerly_dependent_options(options, measurements, configs)
 233         save_options(options)
 234
 235         (A,B) = construct_ab(options, measurements, configs)
 236
 237         #np.set_printoptions(threshold = float('nan'), linewidth = 300)
 238         np.set_printoptions(edgeitems = 9, linewidth = 80)
 239 #         print(A)
 240 #         print(B)
 241         result = nplag.lstsq(A, B)
 242
 243         x=np.mat(result[0])
 244         try:
 245                 results = pickle.load(open('results.pickle', 'rb'))
 246         except:
 247                 results = []
 248                 for k in options:
 249                         print(k)
 250                         idx = options[k]
 251                         r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum())
 252                         r.calc_measure_diff(A, B, options)
 253
 254                         results.append(r)
 255                 results.sort(key=lambda r: r.regression)
 256                 pickle.dump(results, open('results.pickle', 'wb'))
 257
 258         for r in results:
 259                 print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:2}%".format(percent=10, **r.__dict__))
 260         print("%-40s %6.2f" % ("common", x[len(options)]))
 261         print(A.shape)
 262
 263         plot_values(measurements, configs, A*x, [r.option_name for r in results])
 264         plot_bars(results)
 265
 266 if __name__ == '__main__':
 267         evaluate()