From ed7d11b8113edaf9b5ef96af58affb932ee72c25 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Sat, 24 Oct 2015 21:49:22 +0200 Subject: [PATCH] Final eval script for RTLWS slides --- scripts/eval2.py | 452 +++++++++++++++++++++++++---------------------- 1 file changed, 241 insertions(+), 211 deletions(-) diff --git a/scripts/eval2.py b/scripts/eval2.py index 15fbf83..7678223 100755 --- a/scripts/eval2.py +++ b/scripts/eval2.py @@ -15,6 +15,7 @@ import subprocess import json import pickle import utils +from random import random #matplotlib.rcParams['font.size'] = 8 matplotlib.rcParams['savefig.bbox'] = 'tight' @@ -24,244 +25,273 @@ cur = conn.cursor() # cur.execute("SELECT m.value, t.git_describe FROM measure AS m JOIN toolsgit AS t ON m.toolgit = t.id;") # for row in cur.fetchall(): -# print(row) +# print(row) def fill_in_missing_values(): - cur = conn.cursor() - cur2 = conn.cursor() - cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';") - result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P\d+) Act: *(?P\d+) Avg: *(?P\d+) Max: *(?P\d+) ok") - for row in cur.fetchall(): - for line in row.output.split('\n'): - match = result_re.match(line) - if match: - value = int(match.group('max')) - cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id }) - conn.commit() - print(line, value) + cur = conn.cursor() + cur2 = conn.cursor() + cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';") + result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P\d+) Act: *(?P\d+) Avg: *(?P\d+) Max: *(?P\d+) ok") + for row in cur.fetchall(): + for line in row.output.split('\n'): + match = result_re.match(line) + if match: + value = int(match.group('max')) + cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id }) + conn.commit() + print(line, value) def view_failed(): - cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;") - for row in cur.fetchall(): - with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc: - proc.communicate(input=row.output.encode('utf-8')) + cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;") + for row in cur.fetchall(): + with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc: + proc.communicate(input=row.output.encode('utf-8')) class Config(dict): - """Linux configuration fetched from DB, key is option name, value is - 'y' or 'n'. - """ - def __init__(self, id): - cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,)) - conf = cur.fetchone() - for opt in conf.config.split('\n'): - (key, val) = opt.split('=') - self[key] = val + """Linux configuration fetched from DB, key is option name, value is + 'y' or 'n'. + """ + def __init__(self, id): + cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,)) + conf = cur.fetchone() + for opt in conf.config.split('\n'): + (key, val) = opt.split('=') + self[key] = val class VariableOptions(dict): - """Dictionary of config options that change value during experiments. - Key is the config name, value is the index to the matrix A - used for evaluation. - """ - def __init__(self, measurements): - all_options = {} - - for m in measurements: - print(m) - config = Config(m.conf) - for (key, val) in config.items(): - if key not in all_options: - all_options[key] = val - else: - if all_options[key] != val and key not in self: - self[key] = None - self.update_order() - - def update_order(self): - self.order = [] - for key in sorted(self.keys()): - self[key] = len(self.order) - self.order.append(key) - - def __iter__(self): - return self.order.__iter__() - - def name_by_index(self, index): - return self.order[index] - - def print(self): - for k in self: - print("%-40s %s" % (k, self[k])) + """Dictionary of config options that change value during experiments. + Key is the config name, value is the index to the matrix A + used for evaluation. + """ + def __init__(self, measurements): + all_options = {} + + for m in measurements: + print(m) + config = Config(m.conf) + for (key, val) in config.items(): + if key not in all_options: + all_options[key] = val + else: + if all_options[key] != val and key not in self: + self[key] = None + self.update_order() + + def update_order(self): + self.order = [] + for key in sorted(self.keys()): + self[key] = len(self.order) + self.order.append(key) + + def __iter__(self): + return self.order.__iter__() + + def name_by_index(self, index): + return self.order[index] + + def print(self): + for k in self: + print("%-40s %s" % (k, self[k])) def load_configs(measurements, options): - try: - configs = pickle.load(open('configs.pickle', 'rb')) - except: - configs = {} - for i in range(len(measurements)): - config = Config(measurements[i].conf) - keys = config.copy().keys() - for k in keys: - if not k in options: - del config[k] - configs[measurements[i].conf] = config - pickle.dump(configs, open('configs.pickle', 'wb')) - return configs + try: + configs = pickle.load(open('configs.pickle', 'rb')) + except: + configs = {} + for i in range(len(measurements)): + config = Config(measurements[i].conf) + keys = config.copy().keys() + for k in keys: + if not k in options: + del config[k] + configs[measurements[i].conf] = config + pickle.dump(configs, open('configs.pickle', 'wb')) + return configs def construct_ab(options, measurements, configs): - A = np.mat(np.zeros((len(measurements), len(options) + 1))) - B = np.mat(np.zeros((len(measurements), 1))) + A = np.mat(np.zeros((len(measurements), len(options) + 1))) + B = np.mat(np.zeros((len(measurements), 1))) - for i in range(len(measurements)): - config = configs[measurements[i].conf] - A[i, len(options)] = 1 - B[i] = measurements[i].value - for j in range(len(options)): - A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0 - return (A, B) + for i in range(len(measurements)): + config = configs[measurements[i].conf] + A[i, len(options)] = 1 + B[i] = measurements[i].value + for j in range(len(options)): + A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0 + return (A, B) def plot_values(measurements, configs, measurements_est, highlight_options): - print(measurements_est.__class__, measurements_est.shape) - B = np.mat(np.zeros((len(measurements),1))) - for i in range(len(measurements)): - B[i] = measurements[i].value - - idx = np.argsort(B, 0) - B = B[idx,0] - measurements = sorted(measurements, key=lambda x: x.value) - measurements_est = measurements_est[idx, 0] - - plt.plot(measurements_est, 'x', color='#aaaaaa') - plt.hold(True) - plt.gcf().set_size_inches(8,4) - plt.plot(B, color='b') - plt.xlabel('Configuration') - plt.ylabel('cyclictest result [µs]') - yy = 0 - for o in highlight_options if 0 else []: - yy += 1 - x = [] - y = [] - for i in range(len(measurements)): - if configs[measurements[i].conf][o] == 'y': - x.append(i) - y.append(measurements[i].value) - plt.plot(x, (yy*5-150)*np.ones(len(x)), '+') - - plt.legend(['Model', 'Measured'] + [o[7:] for o in highlight_options], loc = 'upper left') - plt.grid(True) - plt.xlim(0,len(measurements)) - plt.savefig('measurements.pdf') - #plt.show() + print(measurements_est.__class__, measurements_est.shape) + B = np.mat(np.zeros((len(measurements),1))) + for i in range(len(measurements)): + B[i] = measurements[i].value + + idx = np.argsort(B, 0) + B = B[idx,0] + measurements = sorted(measurements, key=lambda x: x.value) + measurements_est = measurements_est[idx, 0] + + plt.plot(B, color='b', zorder=1) + plt.grid(True) + plt.xlim(0,len(measurements)) + plt.gcf().set_size_inches(8,4) + plt.ylabel('cyclictest result [µs]') + plt.xlabel('Configuration') + plt.savefig('measurements.pdf') + + plt.hold(True) + plt.plot(measurements_est, 'x', color='#aaaaaa', zorder=0) + yy = 0 + for o in highlight_options if 0 else []: + yy += 1 + x = [] + y = [] + for i in range(len(measurements)): + if configs[measurements[i].conf][o] == 'y': + x.append(i) + y.append(measurements[i].value) + plt.plot(x, (yy*5-150)*np.ones(len(x)), '+') + + plt.legend(['Measured', 'Model'] + [o[7:] for o in highlight_options], loc = 'upper left') + plt.savefig('measurements-model.pdf') + #plt.show() def save_options(options): - json.dump(options, open('options.json', 'w'), indent=' ', sort_keys=True) + json.dump(options, open('options.json', 'w'), indent=' ', sort_keys=True) def load_variable_options(measurements): - if os.path.exists('options.json'): - options = VariableOptions([]) - options.update(json.load(open('options.json', 'r'))) - options.update_order() - else: - options = VariableOptions(measurements) - save_options(options) - #options.print() - return options + if os.path.exists('options.json'): + options = VariableOptions([]) + options.update(json.load(open('options.json', 'r'))) + options.update_order() + else: + options = VariableOptions(measurements) + save_options(options) + #options.print() + return options def remove_linerly_dependent_options(options, measurements, configs): - (A,B) = construct_ab(options, measurements, configs) - rank = nplag.matrix_rank(A) - - if rank == A.shape[1]: - return options - - o = options.copy() - for k in sorted(o.keys()): - del options[k] - options.update_order() - (A,B) = construct_ab(options, measurements, configs) - if nplag.matrix_rank(A) != rank: - options[k]=None - options.update_order() - else: - print("Removed", k) - return options + (A,B) = construct_ab(options, measurements, configs) + rank = nplag.matrix_rank(A) + print("Rank: ", rank) + if rank == A.shape[1]: + return options + + o = options.copy() + for k in sorted(o.keys(), key=lambda x: x[-1]): + del options[k] + options.update_order() + (A,B) = construct_ab(options, measurements, configs) + if nplag.matrix_rank(A) != rank: + options[k]=None + options.update_order() + else: + print("Removed", k) + return options + +def calc_measure_diff(measurements): + options = VariableOptions(measurements) + + configs = {} + for i in range(len(measurements)): + config = Config(measurements[i].conf) + keys = config.copy().keys() + for k in keys: + if not k in options: + del config[k] + configs[measurements[i].conf] = config + + (A,B) = construct_ab(options, measurements, configs) + + + measure_diff = {} + print(options) + for option in options.keys(): + print("Calc measure diff", option) + oidx = options[option] + measure_diff[option] = [] + for m1 in range(A.shape[0]): + if A[m1,oidx] == 1: + row = A[m1].copy() + row[0,oidx] = 0 + for m2 in range(A.shape[0]): + if A[m2,oidx] == 0 and (row == A[m2]).all(): + print(" found") + measure_diff[option].append(B.item(m1) - B.item(m2)) + break + return measure_diff + class OptionResult: - def __init__(self, option_name, regression=None, yes_count=None): - self.option_name = option_name - self.regression = regression - self.yes_count = int(yes_count) - self.measure_diff = [] - - def calc_measure_diff(self, A, B, options): - oidx = options[self.option_name] - for m1 in range(A.shape[0]): - if A[m1,oidx] == 1: - row = A[m1].copy() - row[0,oidx] = 0 - for m2 in range(A.shape[0]): - if A[m2,oidx] == 0 and (row == A[m2]).all(): - self.measure_diff.append(B[m1] - B[m2]) - break + def __init__(self, option_name, regression=None, yes_count=None): + self.option_name = option_name + self.regression = regression + self.yes_count = int(yes_count) + self.measure_diff = [] + def plot_bars(results): - y_pos = np.arange(len(results), 0, -1) - regr = np.array([r.regression for r in results]) - diff = np.array([np.array(r.measure_diff).mean() for r in results]) - - fig, ax = plt.subplots() - fig.set_size_inches(8,12) - - ax.set_yticks(y_pos) - ax.set_yticklabels([r.option_name[7:] for r in results], fontsize=12) - rects1 = ax.barh(y_pos, regr, height=0.35, color='b') - rects2 = ax.barh(y_pos+0.35, diff, height=0.35, color='w', hatch='//') - ax.legend(['regression', 'two measurement diff']) - ax.grid(True) - ax.set_xlabel("Config option influence [µs]") - plt.savefig('option_res.pdf') - #plt.show() + y_pos = np.arange(0, len(results)) + regr = np.array([r.regression for r in results]) + diff = np.array([np.array(r.measure_diff).mean() for r in results]) + + fig, ax = plt.subplots() + fig.set_size_inches(12,5) + + ax.set_xticks(y_pos) + ax.set_xticklabels([r.option_name[7:] for r in results], fontsize=12, rotation=70, rotation_mode='anchor', ha='right') + rects1 = ax.bar(y_pos, regr, width=0.7, color='b') + ax.legend(['regression', 'two measurement diff'], loc = 'upper left') + ax.grid(True) + ax.set_ylabel("Config option influence [µs]") + plt.savefig('option_res.pdf') + rects2 = ax.bar(y_pos+0.2, diff, width=0.3, color='w', hatch='//') + ax.legend(['regression', 'two measurements diff'], loc = 'upper left') + plt.savefig('option_res_diff.pdf') + #plt.show() def evaluate(): - cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);") - measurements = cur.fetchall() - - options = load_variable_options(measurements) - configs = load_configs(measurements, options) - - options = remove_linerly_dependent_options(options, measurements, configs) - save_options(options) - - (A,B) = construct_ab(options, measurements, configs) - - #np.set_printoptions(threshold = float('nan'), linewidth = 300) - np.set_printoptions(edgeitems = 9, linewidth = 80) -# print(A) -# print(B) - result = nplag.lstsq(A, B) - - x=np.mat(result[0]) - try: - results = pickle.load(open('results.pickle', 'rb')) - except: - results = [] - for k in options: - print(k) - idx = options[k] - r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum()) - r.calc_measure_diff(A, B, options) - - results.append(r) - results.sort(key=lambda r: r.regression) - pickle.dump(results, open('results.pickle', 'wb')) - - for r in results: - print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:2}%".format(percent=10, **r.__dict__)) - print("%-40s %6.2f" % ("common", x[len(options)])) - print(A.shape) - - plot_values(measurements, configs, A*x, [r.option_name for r in results]) - plot_bars(results) + cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);") + measurements = cur.fetchall() + + try: + measure_diff = json.load(open('measure_diff.json', 'r')) + except: + measure_diff = calc_measure_diff(measurements) + json.dump(measure_diff, open('measure_diff.json', 'w'), indent="\t", sort_keys=True) + + options = load_variable_options(measurements) + configs = load_configs(measurements, options) + + options = remove_linerly_dependent_options(options, measurements, configs) +# save_options(options) + + (A,B) = construct_ab(options, measurements, configs) + + #np.set_printoptions(threshold = float('nan'), linewidth = 300) + np.set_printoptions(edgeitems = 9, linewidth = 80) +# print(A) +# print(B) + result = nplag.lstsq(A, B) + + x=np.mat(result[0]) + + results = [] + for k in options: + idx = options[k] + r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum()) + r.measure_diff = measure_diff[k] + results.append(r) + results.sort(key=lambda r: r.regression) + + for r in results: + print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:3.0f}%".format(percent=100*r.yes_count/len(measurements), **r.__dict__)) + print("%-40s %7.2f" % ("common", x[len(options)])) + plot_values(measurements, configs, A*x, [r.option_name for r in results]) + + print(A.shape) + + plot_bars(results) if __name__ == '__main__': evaluate() -- 2.39.2