]> rtime.felk.cvut.cz Git - linux-conf-perf.git/commitdiff
Final eval script for RTLWS slides
authorMichal Sojka <sojkam1@fel.cvut.cz>
Sat, 24 Oct 2015 19:49:22 +0000 (21:49 +0200)
committerMichal Sojka <sojkam1@fel.cvut.cz>
Sat, 24 Oct 2015 19:49:22 +0000 (21:49 +0200)
scripts/eval2.py

index 15fbf83217000c4471058a106725c590a84aa7fa..7678223c4e7a0ccdd636b3ca3f2eaaa9800eb41d 100755 (executable)
@@ -15,6 +15,7 @@ import subprocess
 import json
 import pickle
 import utils
+from random import random
 
 #matplotlib.rcParams['font.size'] = 8
 matplotlib.rcParams['savefig.bbox'] = 'tight'
@@ -24,244 +25,273 @@ cur = conn.cursor()
 
 # cur.execute("SELECT m.value, t.git_describe FROM measure AS m JOIN toolsgit AS t ON m.toolgit = t.id;")
 # for row in cur.fetchall():
-#         print(row)
+#                print(row)
 
 def fill_in_missing_values():
-        cur = conn.cursor()
-        cur2 = conn.cursor()
-        cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';")
-        result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P<min>\d+) Act: *(?P<act>\d+) Avg: *(?P<avg>\d+) Max: *(?P<max>\d+) ok")
-        for row in cur.fetchall():
-                for line in row.output.split('\n'):
-                        match = result_re.match(line)
-                        if match:
-                                value = int(match.group('max'))
-                                cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id })
-                                conn.commit()
-                                print(line, value)
+               cur = conn.cursor()
+               cur2 = conn.cursor()
+               cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';")
+               result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P<min>\d+) Act: *(?P<act>\d+) Avg: *(?P<avg>\d+) Max: *(?P<max>\d+) ok")
+               for row in cur.fetchall():
+                               for line in row.output.split('\n'):
+                                               match = result_re.match(line)
+                                               if match:
+                                                               value = int(match.group('max'))
+                                                               cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id })
+                                                               conn.commit()
+                                                               print(line, value)
 
 def view_failed():
-        cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;")
-        for row in cur.fetchall():
-                with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc:
-                        proc.communicate(input=row.output.encode('utf-8'))
+               cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;")
+               for row in cur.fetchall():
+                               with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc:
+                                               proc.communicate(input=row.output.encode('utf-8'))
 
 class Config(dict):
-        """Linux configuration fetched from DB, key is option name, value is
-        'y' or 'n'.
-        """
-        def __init__(self, id):
-                cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,))
-                conf = cur.fetchone()
-                for opt in conf.config.split('\n'):
-                        (key, val) = opt.split('=')
-                        self[key] = val
+               """Linux configuration fetched from DB, key is option name, value is
+               'y' or 'n'.
+               """
+               def __init__(self, id):
+                               cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,))
+                               conf = cur.fetchone()
+                               for opt in conf.config.split('\n'):
+                                               (key, val) = opt.split('=')
+                                               self[key] = val
 
 class VariableOptions(dict):
-        """Dictionary of config options that change value during experiments.
-        Key is the config name, value is the index to the matrix A
-        used for evaluation.
-        """
-        def __init__(self, measurements):
-                all_options = {}
-
-                for m in measurements:
-                        print(m)
-                        config = Config(m.conf)
-                        for (key, val) in config.items():
-                                if key not in all_options:
-                                        all_options[key] = val
-                                else:
-                                        if all_options[key] != val and key not in self:
-                                                self[key] = None
-                self.update_order()
-
-        def update_order(self):
-                self.order = []
-                for key in sorted(self.keys()):
-                        self[key] = len(self.order)
-                        self.order.append(key)
-
-        def __iter__(self):
-                return self.order.__iter__()
-
-        def name_by_index(self, index):
-                return self.order[index]
-
-        def print(self):
-                for k in self:
-                        print("%-40s %s" % (k, self[k]))
+               """Dictionary of config options that change value during experiments.
+               Key is the config name, value is the index to the matrix A
+               used for evaluation.
+               """
+               def __init__(self, measurements):
+                               all_options = {}
+
+                               for m in measurements:
+                                               print(m)
+                                               config = Config(m.conf)
+                                               for (key, val) in config.items():
+                                                               if key not in all_options:
+                                                                               all_options[key] = val
+                                                               else:
+                                                                               if all_options[key] != val and key not in self:
+                                                                                               self[key] = None
+                               self.update_order()
+
+               def update_order(self):
+                               self.order = []
+                               for key in sorted(self.keys()):
+                                               self[key] = len(self.order)
+                                               self.order.append(key)
+
+               def __iter__(self):
+                               return self.order.__iter__()
+
+               def name_by_index(self, index):
+                               return self.order[index]
+
+               def print(self):
+                               for k in self:
+                                               print("%-40s %s" % (k, self[k]))
 
 def load_configs(measurements, options):
-        try:
-                configs = pickle.load(open('configs.pickle', 'rb'))
-        except:
-                configs = {}
-                for i in range(len(measurements)):
-                        config = Config(measurements[i].conf)
-                        keys = config.copy().keys()
-                        for k in keys:
-                                if not k in options:
-                                        del config[k]
-                        configs[measurements[i].conf] = config
-                pickle.dump(configs, open('configs.pickle', 'wb'))
-        return configs
+               try:
+                               configs = pickle.load(open('configs.pickle', 'rb'))
+               except:
+                               configs = {}
+                               for i in range(len(measurements)):
+                                               config = Config(measurements[i].conf)
+                                               keys = config.copy().keys()
+                                               for k in keys:
+                                                               if not k in options:
+                                                                               del config[k]
+                                               configs[measurements[i].conf] = config
+                               pickle.dump(configs, open('configs.pickle', 'wb'))
+               return configs
 
 def construct_ab(options, measurements, configs):
-        A = np.mat(np.zeros((len(measurements), len(options) + 1)))
-        B = np.mat(np.zeros((len(measurements), 1)))
+               A = np.mat(np.zeros((len(measurements), len(options) + 1)))
+               B = np.mat(np.zeros((len(measurements), 1)))
 
-        for i in range(len(measurements)):
-                config = configs[measurements[i].conf]
-                A[i, len(options)] = 1
-                B[i] = measurements[i].value
-                for j in range(len(options)):
-                        A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0
-        return (A, B)
+               for i in range(len(measurements)):
+                               config = configs[measurements[i].conf]
+                               A[i, len(options)] = 1
+                               B[i] = measurements[i].value
+                               for j in range(len(options)):
+                                               A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0
+               return (A, B)
 
 def plot_values(measurements, configs, measurements_est, highlight_options):
-        print(measurements_est.__class__, measurements_est.shape)
-        B = np.mat(np.zeros((len(measurements),1)))
-        for i in range(len(measurements)):
-                B[i] = measurements[i].value
-
-        idx = np.argsort(B, 0)
-        B = B[idx,0]
-        measurements = sorted(measurements, key=lambda x: x.value)
-        measurements_est = measurements_est[idx, 0]
-
-        plt.plot(measurements_est, 'x', color='#aaaaaa')
-        plt.hold(True)
-        plt.gcf().set_size_inches(8,4)
-        plt.plot(B, color='b')
-        plt.xlabel('Configuration')
-        plt.ylabel('cyclictest result [µs]')
-        yy = 0
-        for o in highlight_options if 0 else []:
-                yy += 1
-                x = []
-                y = []
-                for i in range(len(measurements)):
-                        if configs[measurements[i].conf][o] == 'y':
-                                x.append(i)
-                                y.append(measurements[i].value)
-                plt.plot(x, (yy*5-150)*np.ones(len(x)), '+')
-
-        plt.legend(['Model', 'Measured'] + [o[7:] for o in highlight_options], loc = 'upper left')
-        plt.grid(True)
-        plt.xlim(0,len(measurements))
-        plt.savefig('measurements.pdf')
-        #plt.show()
+               print(measurements_est.__class__, measurements_est.shape)
+               B = np.mat(np.zeros((len(measurements),1)))
+               for i in range(len(measurements)):
+                               B[i] = measurements[i].value
+
+               idx = np.argsort(B, 0)
+               B = B[idx,0]
+               measurements = sorted(measurements, key=lambda x: x.value)
+               measurements_est = measurements_est[idx, 0]
+
+               plt.plot(B, color='b', zorder=1)
+               plt.grid(True)
+               plt.xlim(0,len(measurements))
+               plt.gcf().set_size_inches(8,4)
+               plt.ylabel('cyclictest result [µs]')
+               plt.xlabel('Configuration')
+               plt.savefig('measurements.pdf')
+
+               plt.hold(True)
+               plt.plot(measurements_est, 'x', color='#aaaaaa', zorder=0)
+               yy = 0
+               for o in highlight_options if 0 else []:
+                               yy += 1
+                               x = []
+                               y = []
+                               for i in range(len(measurements)):
+                                               if configs[measurements[i].conf][o] == 'y':
+                                                               x.append(i)
+                                                               y.append(measurements[i].value)
+                               plt.plot(x, (yy*5-150)*np.ones(len(x)), '+')
+
+               plt.legend(['Measured', 'Model'] + [o[7:] for o in highlight_options], loc = 'upper left')
+               plt.savefig('measurements-model.pdf')
+               #plt.show()
 
 def save_options(options):
-        json.dump(options, open('options.json', 'w'), indent='  ', sort_keys=True)
+               json.dump(options, open('options.json', 'w'), indent='  ', sort_keys=True)
 
 def load_variable_options(measurements):
-        if os.path.exists('options.json'):
-            options = VariableOptions([])
-            options.update(json.load(open('options.json', 'r')))
-            options.update_order()
-        else:
-            options = VariableOptions(measurements)
-            save_options(options)
-        #options.print()
-        return options
+               if os.path.exists('options.json'):
+                       options = VariableOptions([])
+                       options.update(json.load(open('options.json', 'r')))
+                       options.update_order()
+               else:
+                       options = VariableOptions(measurements)
+                       save_options(options)
+               #options.print()
+               return options
 
 def remove_linerly_dependent_options(options, measurements, configs):
-        (A,B) = construct_ab(options, measurements, configs)
-        rank = nplag.matrix_rank(A)
-
-        if rank == A.shape[1]:
-                return options
-
-        o = options.copy()
-        for k in sorted(o.keys()):
-                del options[k]
-                options.update_order()
-                (A,B) = construct_ab(options, measurements, configs)
-                if nplag.matrix_rank(A) != rank:
-                        options[k]=None
-                        options.update_order()
-                else:
-                        print("Removed", k)
-        return options
+               (A,B) = construct_ab(options, measurements, configs)
+               rank = nplag.matrix_rank(A)
+               print("Rank: ", rank)
+               if rank == A.shape[1]:
+                               return options
+
+               o = options.copy()
+               for k in sorted(o.keys(), key=lambda x: x[-1]):
+                               del options[k]
+                               options.update_order()
+                               (A,B) = construct_ab(options, measurements, configs)
+                               if nplag.matrix_rank(A) != rank:
+                                               options[k]=None
+                                               options.update_order()
+                               else:
+                                               print("Removed", k)
+               return options
+
+def calc_measure_diff(measurements):
+       options = VariableOptions(measurements)
+
+       configs = {}
+       for i in range(len(measurements)):
+               config = Config(measurements[i].conf)
+               keys = config.copy().keys()
+               for k in keys:
+                               if not k in options:
+                                               del config[k]
+               configs[measurements[i].conf] = config
+
+       (A,B) = construct_ab(options, measurements, configs)
+
+
+       measure_diff = {}
+       print(options)
+       for option in options.keys():
+               print("Calc measure diff", option)
+               oidx = options[option]
+               measure_diff[option] = []
+               for m1 in range(A.shape[0]):
+                       if A[m1,oidx] == 1:
+                               row = A[m1].copy()
+                               row[0,oidx] = 0
+                               for m2 in range(A.shape[0]):
+                                       if A[m2,oidx] == 0 and (row == A[m2]).all():
+                                               print("  found")
+                                               measure_diff[option].append(B.item(m1) - B.item(m2))
+                                               break
+       return measure_diff
+
 
 class OptionResult:
-        def __init__(self, option_name, regression=None, yes_count=None):
-                self.option_name = option_name
-                self.regression = regression
-                self.yes_count = int(yes_count)
-                self.measure_diff = []
-
-        def calc_measure_diff(self, A, B, options):
-                oidx = options[self.option_name]
-                for m1 in range(A.shape[0]):
-                        if A[m1,oidx] == 1:
-                                row = A[m1].copy()
-                                row[0,oidx] = 0
-                                for m2 in range(A.shape[0]):
-                                        if A[m2,oidx] == 0 and (row == A[m2]).all():
-                                                self.measure_diff.append(B[m1] - B[m2])
-                                                break
+               def __init__(self, option_name, regression=None, yes_count=None):
+                               self.option_name = option_name
+                               self.regression = regression
+                               self.yes_count = int(yes_count)
+                               self.measure_diff = []
+
 
 def plot_bars(results):
-        y_pos = np.arange(len(results), 0, -1)
-        regr = np.array([r.regression for r in results])
-        diff = np.array([np.array(r.measure_diff).mean() for r in results])
-
-        fig, ax = plt.subplots()
-        fig.set_size_inches(8,12)
-
-        ax.set_yticks(y_pos)
-        ax.set_yticklabels([r.option_name[7:] for r in results], fontsize=12)
-        rects1 = ax.barh(y_pos, regr, height=0.35, color='b')
-        rects2 = ax.barh(y_pos+0.35, diff, height=0.35, color='w', hatch='//')
-        ax.legend(['regression', 'two measurement diff'])
-        ax.grid(True)
-        ax.set_xlabel("Config option influence [µs]")
-        plt.savefig('option_res.pdf')
-        #plt.show()
+               y_pos = np.arange(0, len(results))
+               regr = np.array([r.regression for r in results])
+               diff = np.array([np.array(r.measure_diff).mean() for r in results])
+
+               fig, ax = plt.subplots()
+               fig.set_size_inches(12,5)
+
+               ax.set_xticks(y_pos)
+               ax.set_xticklabels([r.option_name[7:] for r in results], fontsize=12, rotation=70, rotation_mode='anchor', ha='right')
+               rects1 = ax.bar(y_pos, regr, width=0.7, color='b')
+               ax.legend(['regression', 'two measurement diff'], loc = 'upper left')
+               ax.grid(True)
+               ax.set_ylabel("Config option influence [µs]")
+               plt.savefig('option_res.pdf')
+               rects2 = ax.bar(y_pos+0.2, diff, width=0.3, color='w', hatch='//')
+               ax.legend(['regression', 'two measurements diff'], loc = 'upper left')
+               plt.savefig('option_res_diff.pdf')
+               #plt.show()
 
 def evaluate():
-        cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);")
-        measurements = cur.fetchall()
-
-        options = load_variable_options(measurements)
-        configs = load_configs(measurements, options)
-
-        options = remove_linerly_dependent_options(options, measurements, configs)
-        save_options(options)
-
-        (A,B) = construct_ab(options, measurements, configs)
-
-        #np.set_printoptions(threshold = float('nan'), linewidth = 300)
-        np.set_printoptions(edgeitems = 9, linewidth = 80)
-#         print(A)
-#         print(B)
-        result = nplag.lstsq(A, B)
-
-        x=np.mat(result[0])
-        try:
-                results = pickle.load(open('results.pickle', 'rb'))
-        except:
-                results = []
-                for k in options:
-                        print(k)
-                        idx = options[k]
-                        r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum())
-                        r.calc_measure_diff(A, B, options)
-
-                        results.append(r)
-                results.sort(key=lambda r: r.regression)
-                pickle.dump(results, open('results.pickle', 'wb'))
-
-        for r in results:
-                print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:2}%".format(percent=10, **r.__dict__))
-        print("%-40s %6.2f" % ("common", x[len(options)]))
-        print(A.shape)
-
-        plot_values(measurements, configs, A*x, [r.option_name for r in results])
-        plot_bars(results)
+               cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);")
+               measurements = cur.fetchall()
+
+               try:
+                       measure_diff = json.load(open('measure_diff.json', 'r'))
+               except:
+                       measure_diff = calc_measure_diff(measurements)
+                       json.dump(measure_diff, open('measure_diff.json', 'w'), indent="\t", sort_keys=True)
+
+               options = load_variable_options(measurements)
+               configs = load_configs(measurements, options)
+
+               options = remove_linerly_dependent_options(options, measurements, configs)
+#              save_options(options)
+
+               (A,B) = construct_ab(options, measurements, configs)
+
+               #np.set_printoptions(threshold = float('nan'), linewidth = 300)
+               np.set_printoptions(edgeitems = 9, linewidth = 80)
+#                print(A)
+#                print(B)
+               result = nplag.lstsq(A, B)
+
+               x=np.mat(result[0])
+
+               results = []
+               for k in options:
+                       idx = options[k]
+                       r = OptionResult(k, regression=x.item(idx), yes_count=A[:,idx].sum())
+                       r.measure_diff = measure_diff[k]
+                       results.append(r)
+               results.sort(key=lambda r: r.regression)
+
+               for r in results:
+                       print("{option_name:<40} {regression:7.2f} {yes_count:4d} {percent:3.0f}%".format(percent=100*r.yes_count/len(measurements), **r.__dict__))
+               print("%-40s %7.2f" % ("common", x[len(options)]))
+               plot_values(measurements, configs, A*x, [r.option_name for r in results])
+
+               print(A.shape)
+
+               plot_bars(results)
 
 if __name__ == '__main__':
        evaluate()