-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtriage_db.py
323 lines (277 loc) · 12.5 KB
/
triage_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import psycopg2 as pg
import os
import time
import zlib
import hashlib
import itertools
import subprocess as subp
from enum import Enum
import sys
from utils import all_files_recursive
from config import DB_NAME, CREATE_SCHEMA_COMMAND
import schema_migration
SCHEMA_VERSION = 2
class ReduceResult(Enum):
'A reduce result.'
# these need not correspond with postgres's internal enum values
ok = 1 # Reduced with creduce
no_crash = 2 # Did not crash
dumb = 3 # Creduce failed, reduced with dumb reducer
def read_file(path):
'Read an entire file as binary.'
with open(path, 'rb') as f:
return f.read()
class DbNotInitialized(Exception):
"Thrown by TriageDb() when the schema has not been created."
pass
class TriageDb(object):
'Triage database.'
def __init__(self):
self.conn = pg.connect(database=DB_NAME)
with self.conn:
with self.conn.cursor() as c:
try:
c.execute("SELECT id FROM result_strings " +
"WHERE str='OK'")
self.OK_ID = c.fetchone()[0]
except pg.ProgrammingError:
raise DbNotInitialized()
with self.conn:
version = self.__get_schema_version()
if version > SCHEMA_VERSION:
print('Error: Unexpectedly high schema version {} '
'(expected at most {})'.format(
version, SCHEMA_VERSION), file=sys.stderr)
sys.exit(1)
elif version < SCHEMA_VERSION:
with self.conn:
self.__migrate_schema(version)
with self.conn:
newver = self.__get_schema_version()
assert newver == SCHEMA_VERSION, newver
def __get_schema_version(self):
with self.conn.cursor() as c:
try:
c.execute("SELECT value FROM params " +
" WHERE name='schema_version'")
return int(c.fetchone()[0])
except pg.ProgrammingError:
# version 1 did not have the params table
return 1
def __migrate_schema(self, version):
assert version < SCHEMA_VERSION
schema_migration.MIGRATE_FROM[version](self.conn)
@staticmethod
def createSchema():
'Create the database schema.'
print('Creating database schema...', file=sys.stderr)
try:
subp.check_call(CREATE_SCHEMA_COMMAND, stderr=subp.STDOUT)
except subp.CalledProcessError as e:
print('Schema creation failed.', file=sys.stderr)
print('Output from psql:\n' + e.output, file=sys.stderr)
raise
def doesCaseExist(self, sha):
'Check if a case exists in the database.'
with self.conn:
with self.conn.cursor() as c:
c.execute('SELECT COUNT(*) FROM cases WHERE sha1=%s', sha)
return bool(c.fetchone()[0])
def addCase(self, sha, contents):
'Add a case. It must not aldeary exist.'
self.addCases([(sha, contents)])
# FIXME this may be slow?
def addCases(self, cases):
'Add several cases.'
with self.conn:
with self.conn.cursor() as c:
c.executemany(
'INSERT INTO case_view (sha1, z_contents, size) ' +
'SELECT %s, %s, %s ' +
'WHERE NOT EXISTS (' +
' SELECT sha1 FROM case_view WHERE sha1=%s)',
((x[0], zlib.compress(x[1]), len(x[1]), x[0])
for x in cases))
def populateCases(self, cases_path, stop_after=None):
'''Add files from a directory, recursively, as cases. Filenames do not
matter.'''
case_files = all_files_recursive(cases_path)
if stop_after:
case_files = itertools.islice(case_files, 0, stop_after)
def cases_iter():
for fname in case_files:
contents = read_file(os.path.join(cases_path, fname))
sha = hashlib.sha1(contents).hexdigest()
yield (sha, contents)
self.addCases(cases_iter())
def iterateCases(self):
'Iterate through (sha1, contents) pairs.'
with self.conn:
c = self.conn.cursor()
c.execute('SELECT cc.sha1, cc.z_contents ' +
'FROM case_view AS cc, case_sizes ' +
'WHERE case_sizes.case_id = cc.id ' +
'ORDER BY case_sizes.size')
return ((x[0], zlib.decompress(x[1])) for x in c)
def iterateDistinctReduced(self):
'Iterate through distinct reduced cases.'
with self.conn:
c = self.conn.cursor()
c.execute('SELECT DISTINCT contents FROM reduced_contents')
return (x[0] for x in c)
def iterateDumbReduced(self):
'''Iterate through distinct dumb-reduced cases. Returns a list
of (original, reduced, reason).'''
with self.conn:
c = self.conn.cursor()
# FIXME get the latest failures... and get rid of that
# File not yet open hack.
c.execute('''
SELECT DISTINCT ON (contents) z_contents, contents, str
FROM case_contents, reduced_contents, reduced_cases,
result_strings,
(SELECT DISTINCT case_id
FROM reduced_cases AS rc, results, result_strings
WHERE rc.result='dumb' AND original=case_id
AND results.result=result_strings.id
AND result_strings.str<>'OK'
AND result_strings.str LIKE '%File not yet open%')
AS ids
WHERE case_contents.case_id=ids.case_id
AND reduced_contents.reduced_id=reduced_cases.id
AND reduced_cases.original=ids.case_id
AND str<>'OK'
AND str NOT LIKE '%File not yet open%'
AND str<>'Stack dump found' ''')
return ((zlib.decompress(x[0]), x[1], x[2]) for x in c)
def iterateOutputs(self):
'Iterate through compiler outputs.'
with self.conn:
c = self.conn.cursor()
c.execute('SELECT output FROM outputs')
return (zlib.decompress(x[0]) for x in c)
def getNumberOfCases(self):
'Get the number of cases in the database.'
with self.conn:
with self.conn.cursor() as c:
c.execute('SELECT count(*) from case_contents')
return c.fetchone()[0]
def _addTestRun(self, versions, start_time, end_time, results):
'''results: [(sha, result_string, output)].
Output is ignored if result_string="OK".'''
assert 'clang' in versions, versions
assert 'llvm' in versions, versions
clang_version = versions['clang']
llvm_version = versions['llvm']
with self.conn:
with self.conn.cursor() as c:
c.execute(
'INSERT INTO test_runs (id, start_time, end_time, '
' clang_version, llvm_version) '
'SELECT MAX(id)+1, %s, %s, %s, %s FROM test_runs '
' RETURNING id',
(start_time, end_time, clang_version, llvm_version))
run_id = c.fetchone()[0]
self._addResults(c, run_id, results)
# delete changed reduce results where new result != OK
c.execute("DELETE FROM reduced_cases WHERE original IN (" +
" SELECT case_id FROM changed_results " +
" WHERE new<>%s)",
(self.OK_ID, ))
def testRun(self, versions):
'Get a context manager for test runs.'
return TriageDb.TestRunContext(self, versions)
def _addResults(self, cursor, run_id, results):
'''results: [(sha, result_string, output)].
Output is ignored if result_string="OK".'''
c = cursor
# insert result strings if they do not already exist
unique_results = set(x[1] for x in results)
c.executemany('INSERT INTO result_strings (str) SELECT %s ' +
'WHERE NOT EXISTS ( ' +
' SELECT 1 from result_strings WHERE str=%s)',
((x, x) for x in unique_results))
c.executemany('INSERT INTO results (case_id, test_run, result) ' +
' (SELECT cases.id, %s, result_strings.id ' +
' FROM cases, result_strings ' +
' WHERE cases.sha1=%s AND str=%s)',
[(run_id, x[0], x[1]) for x in results])
# insert/replace outputs
outputs = [(x[0], x[2]) for x in results if x[1] != 'OK']
c.executemany('DELETE FROM outputs ' +
'WHERE case_id=(SELECT id FROM cases WHERE sha1=%s)',
((x[0],) for x in outputs))
c.executemany('INSERT INTO outputs ' +
'SELECT id, %s FROM cases WHERE sha1=%s',
[(zlib.compress(x[1]), x[0]) for x in outputs])
def getLastRunTimeByVersions(self, versions):
'''Returns (start_time, end_time) of the test run with these versions.
If no test has been run with this version, returns None.'''
assert 'clang' in versions, versions
assert 'llvm' in versions, versions
clang_version = versions['clang']
llvm_version = versions['llvm']
with self.conn:
with self.conn.cursor() as c:
# Currently there can (by design) be only one run, hence
# the ORDER_BY and LIMIT are redundant.
c.execute('SELECT start_time, end_time ' +
'FROM test_runs ' +
'WHERE clang_version=%s AND llvm_version=%s ' +
'ORDER BY start_time ' +
'LIMIT 1', (clang_version, llvm_version))
return c.fetchone()
def getReduceWork(self):
'Get a (sha, content) pair to run through reduce. None if none.'
with self.conn:
with self.conn.cursor() as c:
c.execute(
'SELECT sha1, z_contents FROM unreduced_cases_view ' +
'ORDER BY sha1 LIMIT 1')
r = c.fetchone()
if r is None:
return None
return (r[0], zlib.decompress(r[1]))
def addReduced(self, versions, sha, result, contents=None):
'Add a reduced case.'
if result == ReduceResult.ok or result == ReduceResult.dumb:
assert contents, 'Result OK or dumb but no contents?'
else:
assert result == ReduceResult.no_crash, result
assert contents is None
llvm_version = versions['llvm']
clang_version = versions['clang']
with self.conn:
with self.conn.cursor() as c:
c.execute('SELECT id FROM cases WHERE sha1=%s', (sha, ))
case_id = c.fetchone()
assert case_id, sha
case_id = case_id[0]
c.execute('INSERT INTO reduced_cases (original, ' +
' clang_version, llvm_version, result) ' +
'VALUES (%s, %s, %s, %s) RETURNING id', (
case_id, clang_version, llvm_version,
result.name))
cr_id = c.fetchone()[0]
if not contents is None:
c.execute('INSERT INTO reduced_contents ' +
' (reduced_id, contents) ' +
'VALUES (%s, %s)', (cr_id, contents))
class TestRunContext(object):
'A context manager for test runs.'
def __init__(self, db, versions):
self.db = db
self.versions = versions
self.results = []
def __enter__(self):
self.start_time = int(time.time())
return self
def __exit__(self, type, value, traceback):
# We actually don't want to commit on an exception
if not value:
self.db._addTestRun(self.versions,
self.start_time, int(time.time()),
self.results)
def addResult(self, sha, result_string, output):
'Add a result. Output will be ignored if result_string="OK".'
self.results.append((sha, result_string, output))