scripts/eval2.py

   1 #!/usr/bin/env python3
   2 import os
   3 import sys
   4 import re
   5
   6 import numpy as np
   7 import numpy.linalg as nplag
   8 import matplotlib.pyplot as plt
   9 import matplotlib
  10
  11 import collections
  12 import psycopg2
  13 import psycopg2.extras
  14 import subprocess
  15 import json
  16 import pickle
  17 import utils
  18 from random import random
  19
  20 #matplotlib.rcParams['font.size'] = 8
  21 matplotlib.rcParams['savefig.bbox'] = 'tight'
  22
  23 conn = psycopg2.connect(dbname="linux-conf-perf", connection_factory=psycopg2.extras.NamedTupleConnection)
  24 cur = conn.cursor()
  25
  26 # cur.execute("SELECT m.value, t.git_describe FROM measure AS m JOIN toolsgit AS t ON m.toolgit = t.id;")
  27 # for row in cur.fetchall():
  28 #                 print(row)
  29
  30 def fill_in_missing_values():
  31                 cur = conn.cursor()
  32                 cur2 = conn.cursor()
  33                 cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';")
  34                 result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P<min>\d+) Act: *(?P<act>\d+) Avg: *(?P<avg>\d+) Max: *(?P<max>\d+) ok")
  35                 for row in cur.fetchall():
  36                                 for line in row.output.split('\n'):
  37                                                 match = result_re.match(line)
  38                                                 if match:
  39                                                                 value = int(match.group('max'))
  40                                                                 cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id })
  41                                                                 conn.commit()
  42                                                                 print(line, value)
  43
  44 def view_failed():
  45                 cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;")
  46                 for row in cur.fetchall():
  47                                 with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc:
  48                                                 proc.communicate(input=row.output.encode('utf-8'))
  49
  50 class Config(dict):
  51                 """Linux configuration fetched from DB, key is option name, value is
  52                 'y' or 'n'.
  53                 """
  54                 def __init__(self, id):
  55                                 cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,))
  56                                 conf = cur.fetchone()
  57                                 for opt in conf.config.split('\n'):
  58                                                 (key, val) = opt.split('=')
  59                                                 self[key] = val
  60
  61 class VariableOptions(dict):
  62                 """Dictionary of config options that change value during experiments.
  63                 Key is the config name, value is the index to the matrix A
  64                 used for evaluation.
  65                 """
  66                 def __init__(self, measurements):
  67                                 all_options = {}
  68
  69                                 for m in measurements:
  70                                                 print(m)
  71                                                 config = Config(m.conf)
  72                                                 for (key, val) in config.items():
  73                                                                 if key not in all_options:
  74                                                                                 all_options[key] = val
  75                                                                 else:
  76                                                                                 if all_options[key] != val and key not in self:
  77                                                                                                 self[key] = None
  78                                 self.update_order()
  79
  80                 def update_order(self):
  81                                 self.order = []
  82                                 for key in sorted(self.keys()):
  83                                                 self[key] = len(self.order)
  84                                                 self.order.append(key)
  85
  86                 def __iter__(self):
  87                                 return self.order.__iter__()
  88
  89                 def name_by_index(self, index):
  90                                 return self.order[index]
  91
  92                 def print(self):
  93                                 for k in self:
  94                                                 print("%-40s %s" % (k, self[k]))
  95
  96 def load_configs(measurements, options):
  97                 try:
  98                                 configs = pickle.load(open('configs.pickle', 'rb'))
  99                 except:
 100                                 configs = {}
 101                                 for i in range(len(measurements)):
 102                                                 config = Config(measurements[i].conf)
 103                                                 keys = config.copy().keys()
 104                                                 for k in keys:
 105                                                                 if not k in options:
 106                                                                                 del config[k]
 107                                                 configs[measurements[i].conf] = config
 108                                 pickle.dump(configs, open('configs.pickle', 'wb'))
 109                 return configs
 110
 111 def construct_ab(options, measurements, configs):
 112                 A = np.mat(np.zeros((len(measurements), len(options) + 1)))
 113                 B = np.mat(np.zeros((len(measurements), 1)))
 114
 115                 for i in range(len(measurements)):
 116                                 config = configs[measurements[i].conf]
 117                                 A[i, len(options)] = 1
 118                                 B[i] = measurements[i].value
 119                                 for j in range(len(options)):
 120                                                 A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0
 121                 return (A, B)
 122
 123 def plot_values(measurements, configs, measurements_est, highlight_options):
 124                 print(measurements_est.__class__, measurements_est.shape)
 125                 B = np.mat(np.zeros((len(measurements),1)))
 126                 for i in range(len(measurements)):
 127                                 B[i] = measurements[i].value
 128
 129                 idx = np.argsort(B, 0)
 130                 B = B[idx,0]
 131                 measurements = sorted(measurements, key=lambda x: x.value)
 132                 measurements_est = measurements_est[idx, 0]
 133
 134                 plt.plot(B, color='b', zorder=1)
 135                 plt.grid(True)
 136                 plt.xlim(0,len(measurements))
 137                 plt.gcf().set_size_inches(8,4)
 138                 plt.ylabel('cyclictest result [µs]')
 139                 plt.xlabel('Configuration')
 140                 plt.savefig('measurements.pdf')
 141
 142                 plt.hold(True)
 143                 plt.plot(measurements_est, 'x', color='#aaaaaa', zorder=0)
 144                 yy = 0
 145                 for o in highlight_options if 0 else []:
 146                                 yy += 1
 147                                 x = []
 148                                 y = []
 149                                 for i in range(len(measurements)):
 150                                                 if configs[measurements[i].conf][o] == 'y':
 151                                                                 x.append(i)
 152                                                                 y.append(measurements[i].value)
 153                                 plt.plot(x, (yy*5-150)*np.ones(len(x)), '+')
 154
 155                 plt.legend(['Measured', 'Model'] + [o[7:] for o in highlight_options], loc = 'upper left')
 156                 plt.savefig('measurements-model.pdf')
 157                 #plt.show()
 158
 159 def save_options(options):
 160                 json.dump(options, open('options.json', 'w'), indent='  ', sort_keys=True)
 161
 162 def load_variable_options(measurements):
 163                 if os.path.exists('options.json'):
 164                         options = VariableOptions([])
 165                         options.update(json.load(open('options.json', 'r')))
 166                         options.update_order()
 167                 else:
 168                         options = VariableOptions(measurements)
 169                         save_options(options)
 170                 #options.print()
 171                 return options
 172
 173 def remove_linerly_dependent_options(options, measurements, configs):
 174                 (A,B) = construct_ab(options, measurements, configs)
 175                 rank = nplag.matrix_rank(A)
 176                 print("Rank: ", rank)
 177                 if rank == A.shape[1]:
 178                                 return options
 179
 180                 o = options.copy()
 181                 for k in sorted(o.keys(), key=lambda x: x[-1]):
 182                                 del options[k]
 183                                 options.update_order()
 184                                 (A,B) = construct_ab(options, measurements, configs)
 185                                 if nplag.matrix_rank(A) != rank:
 186                                                 options[k]=None
 187                                                 options.update_order()
 188                                 else:
 189                                                 print("Removed", k)
 190                 return options
 191
 192 def calc_measure_diff(measurements):
 193         options = VariableOptions(measurements)
 194
 195         configs = {}
 196         for i in range(len(measurements)):
 197                 config = Config(measurements[i].conf)
 198                 keys = config.copy().keys()
 199                 for k in keys:
 200                                 if not k in options:
 201                                                 del config[k]
 202                 configs[measurements[i].conf] = config
 203
 204         (A,B) = construct_ab(options, measurements, configs)
 205
 206
 207         measure_diff = {}
 208         print(options)
 209         for option in options.keys():
 210                 print("Calc measure diff", option)
 211                 oidx = options[option]
 212                 measure_diff[option] = []
 213                 for m1 in range(A.shape[0]):
 214                         if A[m1,oidx] == 1:
 215                                 row = A[m1].copy()
 216                                 row[0,oidx] = 0
 217                                 for m2 in range(A.shape[0]):
 218                                         if A[m2,oidx] == 0 and (row == A[m2]).all():
 219                                                 print("  found")
 220                                                 measure_diff[option].append(B.item(m1) - B.item(m2))
 221                                                 break
 222         return measure_diff
 223
 224
 225 class OptionResult:
 226                 def __init__(self, option_name, regression=None, yes_count=None):
 227                                 self.option_name = option_name
 228                                 self.regression = regression
 229                                 self.yes_count = int(yes_count)
 230                                 self.measure_diff = []
 231
 232
 233 def plot_bars(results):
 234                 y_pos = np.arange(0, len(results))
 235                 regr = np.array([r.regression for r in results])
 236                 diff = np.array([np.array(r.measure_diff).mean() for r in results])
 237
 238                 fig, ax = plt.subplots()
 239                 fig.set_size_inches(12,5)
 240
 241                 ax.set_xticks(y_pos)
 242                 ax.set_xticklabels([r.option_name[7:] for r in results], fontsize=12, rotation=70, rotation_mode='anchor', ha='right')
 243                 rects1 = ax.bar(y_pos, regr, width=0.7, color='b')
 244                 ax.legend(['regression', 'two measurement diff'], loc = 'upper left')
 245                 ax.grid(True)
 246                 ax.set_ylabel("Config option influence [µs]")
 247                 plt.savefig('option_res.pdf')
 248                 rects2 = ax.bar(y_pos+0.2, diff, width=0.3, color='w', hatch='//')
 249                 ax.legend(['regression', 'two measurements diff'], loc = 'upper left')
 250                 plt.savefig('option_res_diff.pdf')
 251                 #plt.show()
 252
 253 def evaluate():
 254                 cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);")
 255                 measurements = cur.fetchall()
 256
 257                 try:
 258                         measure_diff = json.load(open('measure_diff.json', 'r'))
 259                 except:
 260                         measure_diff = calc_measure_diff(measurements)
 261                         json.dump(measure_diff, open('measure_diff.json', 'w'), indent="\t", sort_keys=True)
 262
 263                 options = load_variable_options(measurements)
 264                 configs = load_configs(measurements, options)
 265
 266                 options = remove_linerly_dependent_options(options, measurements, configs)
 267 #               save_options(options)
 268
 269                 (A,B) = construct_ab(options, measurements, configs)
 270
 271                 #np.set_printoptions(threshold = float('nan'), linewidth = 300)
 272                 np.set_printoptions(edgeitems = 9, linewidth = 80)
 273 #                 print(A)
 274 #                 print(B)
 275                 result = nplag.lstsq(A, B)
 276
 277                 x=np.mat(result[0])
 278
 279                 results = []
 280                 for k in options:
 281                         idx = options[k]
 282                         r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum())
 283                         r.measure_diff = measure_diff[k]
 284                         results.append(r)
 285                 results.sort(key=lambda r: r.regression)
 286
 287                 for r in results:
 288                         print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:3.0f}%".format(percent=100*r.yes_count/len(measurements), **r.__dict__))
 289                 print("%-40s %7.2f" % ("common", x[len(options)]))
 290                 plot_values(measurements, configs, A*x, [r.option_name for r in results])
 291
 292                 print(A.shape)
 293
 294                 plot_bars(results)
 295
 296 if __name__ == '__main__':
 297         evaluate()