]> rtime.felk.cvut.cz Git - linux-conf-perf.git/blob - scripts/eval2.py
Allow importing lcp_django from other python scripts
[linux-conf-perf.git] / scripts / eval2.py
1 #!/usr/bin/env python3
2 import os
3 import sys
4 import re
5
6 import numpy as np
7 import numpy.linalg as nplag
8 import matplotlib.pyplot as plt
9 import matplotlib
10
11 import collections
12 import psycopg2
13 import psycopg2.extras
14 import subprocess
15 import json
16 import pickle
17 import utils
18 from random import random
19
20 #matplotlib.rcParams['font.size'] = 8
21 matplotlib.rcParams['savefig.bbox'] = 'tight'
22
23 conn = psycopg2.connect(dbname="linux-conf-perf", connection_factory=psycopg2.extras.NamedTupleConnection)
24 cur = conn.cursor()
25
26 # cur.execute("SELECT m.value, t.git_describe FROM measure AS m JOIN toolsgit AS t ON m.toolgit = t.id;")
27 # for row in cur.fetchall():
28 #                 print(row)
29
30 def fill_in_missing_values():
31                 cur = conn.cursor()
32                 cur2 = conn.cursor()
33                 cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';")
34                 result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P<min>\d+) Act: *(?P<act>\d+) Avg: *(?P<avg>\d+) Max: *(?P<max>\d+) ok")
35                 for row in cur.fetchall():
36                                 for line in row.output.split('\n'):
37                                                 match = result_re.match(line)
38                                                 if match:
39                                                                 value = int(match.group('max'))
40                                                                 cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id })
41                                                                 conn.commit()
42                                                                 print(line, value)
43
44 def view_failed():
45                 cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;")
46                 for row in cur.fetchall():
47                                 with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc:
48                                                 proc.communicate(input=row.output.encode('utf-8'))
49
50 class Config(dict):
51                 """Linux configuration fetched from DB, key is option name, value is
52                 'y' or 'n'.
53                 """
54                 def __init__(self, id):
55                                 cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,))
56                                 conf = cur.fetchone()
57                                 for opt in conf.config.split('\n'):
58                                                 (key, val) = opt.split('=')
59                                                 self[key] = val
60
61 class VariableOptions(dict):
62                 """Dictionary of config options that change value during experiments.
63                 Key is the config name, value is the index to the matrix A
64                 used for evaluation.
65                 """
66                 def __init__(self, measurements):
67                                 all_options = {}
68
69                                 for m in measurements:
70                                                 print(m)
71                                                 config = Config(m.conf)
72                                                 for (key, val) in config.items():
73                                                                 if key not in all_options:
74                                                                                 all_options[key] = val
75                                                                 else:
76                                                                                 if all_options[key] != val and key not in self:
77                                                                                                 self[key] = None
78                                 self.update_order()
79
80                 def update_order(self):
81                                 self.order = []
82                                 for key in sorted(self.keys()):
83                                                 self[key] = len(self.order)
84                                                 self.order.append(key)
85
86                 def __iter__(self):
87                                 return self.order.__iter__()
88
89                 def name_by_index(self, index):
90                                 return self.order[index]
91
92                 def print(self):
93                                 for k in self:
94                                                 print("%-40s %s" % (k, self[k]))
95
96 def load_configs(measurements, options):
97                 try:
98                                 configs = pickle.load(open('configs.pickle', 'rb'))
99                 except:
100                                 configs = {}
101                                 for i in range(len(measurements)):
102                                                 config = Config(measurements[i].conf)
103                                                 keys = config.copy().keys()
104                                                 for k in keys:
105                                                                 if not k in options:
106                                                                                 del config[k]
107                                                 configs[measurements[i].conf] = config
108                                 pickle.dump(configs, open('configs.pickle', 'wb'))
109                 return configs
110
111 def construct_ab(options, measurements, configs):
112                 A = np.mat(np.zeros((len(measurements), len(options) + 1)))
113                 B = np.mat(np.zeros((len(measurements), 1)))
114
115                 for i in range(len(measurements)):
116                                 config = configs[measurements[i].conf]
117                                 A[i, len(options)] = 1
118                                 B[i] = measurements[i].value
119                                 for j in range(len(options)):
120                                                 A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0
121                 return (A, B)
122
123 def plot_values(measurements, configs, measurements_est, highlight_options):
124                 print(measurements_est.__class__, measurements_est.shape)
125                 B = np.mat(np.zeros((len(measurements),1)))
126                 for i in range(len(measurements)):
127                                 B[i] = measurements[i].value
128
129                 idx = np.argsort(B, 0)
130                 B = B[idx,0]
131                 measurements = sorted(measurements, key=lambda x: x.value)
132                 measurements_est = measurements_est[idx, 0]
133
134                 plt.plot(B, color='b', zorder=1)
135                 plt.grid(True)
136                 plt.xlim(0,len(measurements))
137                 plt.gcf().set_size_inches(8,4)
138                 plt.ylabel('cyclictest result [µs]')
139                 plt.xlabel('Configuration')
140                 plt.savefig('measurements.pdf')
141
142                 plt.hold(True)
143                 plt.plot(measurements_est, 'x', color='#aaaaaa', zorder=0)
144                 yy = 0
145                 for o in highlight_options if 0 else []:
146                                 yy += 1
147                                 x = []
148                                 y = []
149                                 for i in range(len(measurements)):
150                                                 if configs[measurements[i].conf][o] == 'y':
151                                                                 x.append(i)
152                                                                 y.append(measurements[i].value)
153                                 plt.plot(x, (yy*5-150)*np.ones(len(x)), '+')
154
155                 plt.legend(['Measured', 'Model'] + [o[7:] for o in highlight_options], loc = 'upper left')
156                 plt.savefig('measurements-model.pdf')
157                 #plt.show()
158
159 def save_options(options):
160                 json.dump(options, open('options.json', 'w'), indent='  ', sort_keys=True)
161
162 def load_variable_options(measurements):
163                 if os.path.exists('options.json'):
164                         options = VariableOptions([])
165                         options.update(json.load(open('options.json', 'r')))
166                         options.update_order()
167                 else:
168                         options = VariableOptions(measurements)
169                         save_options(options)
170                 #options.print()
171                 return options
172
173 def remove_linerly_dependent_options(options, measurements, configs):
174                 (A,B) = construct_ab(options, measurements, configs)
175                 rank = nplag.matrix_rank(A)
176                 print("Rank: ", rank)
177                 if rank == A.shape[1]:
178                                 return options
179
180                 o = options.copy()
181                 for k in sorted(o.keys(), key=lambda x: x[-1]):
182                                 del options[k]
183                                 options.update_order()
184                                 (A,B) = construct_ab(options, measurements, configs)
185                                 if nplag.matrix_rank(A) != rank:
186                                                 options[k]=None
187                                                 options.update_order()
188                                 else:
189                                                 print("Removed", k)
190                 return options
191
192 def calc_measure_diff(measurements):
193         options = VariableOptions(measurements)
194
195         configs = {}
196         for i in range(len(measurements)):
197                 config = Config(measurements[i].conf)
198                 keys = config.copy().keys()
199                 for k in keys:
200                                 if not k in options:
201                                                 del config[k]
202                 configs[measurements[i].conf] = config
203
204         (A,B) = construct_ab(options, measurements, configs)
205
206
207         measure_diff = {}
208         print(options)
209         for option in options.keys():
210                 print("Calc measure diff", option)
211                 oidx = options[option]
212                 measure_diff[option] = []
213                 for m1 in range(A.shape[0]):
214                         if A[m1,oidx] == 1:
215                                 row = A[m1].copy()
216                                 row[0,oidx] = 0
217                                 for m2 in range(A.shape[0]):
218                                         if A[m2,oidx] == 0 and (row == A[m2]).all():
219                                                 print("  found")
220                                                 measure_diff[option].append(B.item(m1) - B.item(m2))
221                                                 break
222         return measure_diff
223
224
225 class OptionResult:
226                 def __init__(self, option_name, regression=None, yes_count=None):
227                                 self.option_name = option_name
228                                 self.regression = regression
229                                 self.yes_count = int(yes_count)
230                                 self.measure_diff = []
231
232
233 def plot_bars(results):
234                 y_pos = np.arange(0, len(results))
235                 regr = np.array([r.regression for r in results])
236                 diff = np.array([np.array(r.measure_diff).mean() for r in results])
237
238                 fig, ax = plt.subplots()
239                 fig.set_size_inches(12,5)
240
241                 ax.set_xticks(y_pos)
242                 ax.set_xticklabels([r.option_name[7:] for r in results], fontsize=12, rotation=70, rotation_mode='anchor', ha='right')
243                 rects1 = ax.bar(y_pos, regr, width=0.7, color='b')
244                 ax.legend(['regression', 'two measurement diff'], loc = 'upper left')
245                 ax.grid(True)
246                 ax.set_ylabel("Config option influence [µs]")
247                 plt.savefig('option_res.pdf')
248                 rects2 = ax.bar(y_pos+0.2, diff, width=0.3, color='w', hatch='//')
249                 ax.legend(['regression', 'two measurements diff'], loc = 'upper left')
250                 plt.savefig('option_res_diff.pdf')
251                 #plt.show()
252
253 def evaluate():
254                 cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);")
255                 measurements = cur.fetchall()
256
257                 try:
258                         measure_diff = json.load(open('measure_diff.json', 'r'))
259                 except:
260                         measure_diff = calc_measure_diff(measurements)
261                         json.dump(measure_diff, open('measure_diff.json', 'w'), indent="\t", sort_keys=True)
262
263                 options = load_variable_options(measurements)
264                 configs = load_configs(measurements, options)
265
266                 options = remove_linerly_dependent_options(options, measurements, configs)
267 #               save_options(options)
268
269                 (A,B) = construct_ab(options, measurements, configs)
270
271                 #np.set_printoptions(threshold = float('nan'), linewidth = 300)
272                 np.set_printoptions(edgeitems = 9, linewidth = 80)
273 #                 print(A)
274 #                 print(B)
275                 result = nplag.lstsq(A, B)
276
277                 x=np.mat(result[0])
278
279                 results = []
280                 for k in options:
281                         idx = options[k]
282                         r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum())
283                         r.measure_diff = measure_diff[k]
284                         results.append(r)
285                 results.sort(key=lambda r: r.regression)
286
287                 for r in results:
288                         print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:3.0f}%".format(percent=100*r.yes_count/len(measurements), **r.__dict__))
289                 print("%-40s %7.2f" % ("common", x[len(options)]))
290                 plot_values(measurements, configs, A*x, [r.option_name for r in results])
291
292                 print(A.shape)
293
294                 plot_bars(results)
295
296 if __name__ == '__main__':
297         evaluate()