Add database-based evaluation and helper functions

author Michal Sojka <sojkam1@fel.cvut.cz>

Mon, 28 Sep 2015 22:53:47 +0000 (00:53 +0200)

committer Michal Sojka <sojkam1@fel.cvut.cz>

Mon, 28 Sep 2015 22:54:11 +0000 (00:54 +0200)
author Michal Sojka <sojkam1@fel.cvut.cz>
Mon, 28 Sep 2015 22:53:47 +0000 (00:53 +0200)
committer Michal Sojka <sojkam1@fel.cvut.cz>
Mon, 28 Sep 2015 22:54:11 +0000 (00:54 +0200)
diff --git a/scripts/eval2.py b/scripts/eval2.py

new file mode 100755 (executable)

index 0000000..3e66e6a
--- /dev/null
+++ b/scripts/eval2.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+import os
+import sys
+import re
+
+
+import numpy as np
+import numpy.linalg as nplag
+import collections
+import psycopg2
+import psycopg2.extras
+import subprocess
+
+import utils
+
+
+conn = psycopg2.connect(dbname="linux-conf-perf", connection_factory=psycopg2.extras.NamedTupleConnection)
+cur = conn.cursor()
+
+# cur.execute("SELECT m.value, t.git_describe FROM measure AS m JOIN toolsgit AS t ON m.toolgit = t.id;")
+# for row in cur.fetchall():
+#         print(row)
+
+def fill_in_missing_values():
+        cur = conn.cursor()
+        cur2 = conn.cursor()
+        cur.execute("SELECT output, id, conf, measurement, result, value, linuxgit, toolgit FROM measure WHERE value IS NULL AND result <> 'failed';")
+        result_re = re.compile("! T: 0 \(.*\) P:.* I:.* C:.* Min: *(?P<min>\d+) Act: *(?P<act>\d+) Avg: *(?P<avg>\d+) Max: *(?P<max>\d+) ok")
+        for row in cur.fetchall():
+                for line in row.output.split('\n'):
+                        match = result_re.match(line)
+                        if match:
+                                value = int(match.group('max'))
+                                cur2.execute("UPDATE measure SET value = %(val)s WHERE id = %(id)s", {'val': value, 'id': row.id })
+                                conn.commit()
+                                print(line, value)
+
+def view_failed():
+        cur.execute("SELECT output FROM measure WHERE result='failed' and toolgit=11;")
+        for row in cur.fetchall():
+                with subprocess.Popen(['less', '+G'], stdin=subprocess.PIPE) as proc:
+                        proc.communicate(input=row.output.encode('utf-8'))
+
+class Config(dict):
+        def __init__(self, id):
+                cur.execute("SELECT config FROM configurations WHERE id=%s;", (id,))
+                conf = cur.fetchone()
+                for opt in conf.config.split('\n'):
+                        (key, val) = opt.split('=')
+                        self[key] = val
+
+class VariableOptions(dict):
+        def __init__(self, measurements):
+                self.order = []
+                all_options = {}
+
+                for m in measurements:
+                        print(m)
+                        config = Config(m.conf)
+                        for (key, val) in config.items():
+                                if key not in all_options:
+                                        all_options[key] = val
+                                else:
+                                        if all_options[key] != val and key not in self:
+                                                self[key] = len(self)
+                                                self.order.append(key)
+        def __iter__(self):
+                return self.order.__iter__()
+
+        def name_by_index(self, index):
+                return self.order[index]
+
+        def print(self):
+                for k in self:
+                        print("%-40s %s" % (k, self[k]))
+
+def evaluate():
+        cur.execute("SELECT conf, value FROM measure WHERE result='nominal' AND toolgit IN (1, 11);")
+        measurements = cur.fetchall()
+
+        options = VariableOptions(measurements)
+        options.print()
+
+        A = np.zeros((len(measurements), len(options) + 1))
+        B = np.zeros((len(measurements), 1))
+
+        for i in range(len(measurements)):
+                config = Config(measurements[i].conf)
+                A[i, len(options)] = 1
+                B[i] = measurements[i].value
+                for j in range(len(options)):
+                        A[i,j] = 1 if config[options.name_by_index(j)] == 'y' else 0
+        #np.set_printoptions(threshold = float('nan'), linewidth = 300)
+        np.set_printoptions(edgeitems = 20, linewidth = 300)
+        print(A, nplag.matrix_rank(A))
+        print(B)
+        result = nplag.lstsq(A, B)
+        print(result)
+
+        for k in options:
+                print("%-40s %g" % (k, result[0][options[k]]))
+        print("%-40s %g" % ("common", result[0][len(options)]))
+
+if __name__ == '__main__':
+       evaluate()
author	Michal Sojka <sojkam1@fel.cvut.cz>
	Mon, 28 Sep 2015 22:53:47 +0000 (00:53 +0200)
committer	Michal Sojka <sojkam1@fel.cvut.cz>
	Mon, 28 Sep 2015 22:54:11 +0000 (00:54 +0200)