Skip to content

Commit 42c4d5a

Browse files
committed
stats_engine: Fix metrics
Previously, there was a chance that a match would be counted twice towards the actually added/removed lines of a patch against a git repository. Now, we blacklist the indices of lines we already found in a file.
1 parent cb86da1 commit 42c4d5a

File tree

1 file changed

+41
-24
lines changed

1 file changed

+41
-24
lines changed

src/stats_engine.py

+41-24
Original file line numberDiff line numberDiff line change
@@ -153,23 +153,30 @@ def collect_stats(kernel_repo, cve_patch):
153153
# keep track of all added lines
154154
all_added.extend(added)
155155

156+
# list of blacklisted indices of lines we already found
157+
already_found = []
156158
for i in range(0, len(fp_contents)):
157159
line = fp_contents[i]
158160

159-
for (above, added, below) in patch_added_lines[file]:
161+
for j in range(0, len(patch_added_lines[file])):
162+
(above, added, below) = patch_added_lines[file][j]
163+
# check if we already found these lines
164+
if j in already_found:
165+
continue
166+
160167
# check if the added block is present
161168
block_end_index = i
162169
if added[0] in line:
163-
added_lines = added[1:]
164-
fp_lines = fp_contents[i+1:i+len(added_lines)+1]
170+
added_lines = added
171+
fp_lines = fp_contents[i:i+len(added_lines)]
165172

166173
if set(added_lines) != set(fp_lines):
167174
continue
168175

169-
# check the surrounding lines (two above, two below)
176+
# check the surrounding lines
170177
fp_above = []
171178
idx = i - 1
172-
while len(fp_above) < 2:
179+
while len(fp_above) < len(above):
173180
above_candidate = fp_contents[idx]
174181
if above_candidate not in all_added:
175182
fp_above.append(above_candidate)
@@ -179,7 +186,7 @@ def collect_stats(kernel_repo, cve_patch):
179186

180187
fp_below = []
181188
idx = block_end_index + 1
182-
while len(fp_below) < 2:
189+
while len(fp_below) < len(below):
183190
if idx == len(fp_contents):
184191
break
185192
below_candidate = fp_contents[idx]
@@ -190,6 +197,7 @@ def collect_stats(kernel_repo, cve_patch):
190197
if (set(above) == set(fp_above)
191198
and set(below) == set(fp_below)):
192199
actually_added_lines += len(added)
200+
already_found.append(j)
193201

194202
for file in patch_removed_lines:
195203
file_path = kernel_repo + '/' + file
@@ -204,16 +212,25 @@ def collect_stats(kernel_repo, cve_patch):
204212
print("[E] Failed to read file: " + file_path + ", skipping!")
205213
continue
206214

215+
# list of blacklisted indices of lines we already found
216+
already_found = []
207217
for i in range(0, len(fp_contents)):
208-
lines = fp_contents[i:i+2]
209-
for (above, removed, below) in patch_removed_lines[file]:
218+
219+
for j in range(0, len(patch_removed_lines[file])):
220+
(above, removed, below) = patch_removed_lines[file][j]
221+
# check if we already found these lines
222+
if j in already_found:
223+
continue
224+
225+
lines = fp_contents[i:i+len(above)]
226+
210227
# make sure the removed block is missing
211228
if set(above) != set(lines):
212229
continue
213230

214231
fp_below = []
215-
idx = i + 2
216-
while len(fp_below) < 2:
232+
idx = i + len(above)
233+
while len(fp_below) < len(below):
217234
if idx == len(fp_contents):
218235
break
219236
below_candidate = fp_contents[idx]
@@ -223,6 +240,7 @@ def collect_stats(kernel_repo, cve_patch):
223240

224241
if set(below) == set(fp_below):
225242
actually_removed_lines += len(removed)
243+
already_found.append(j)
226244

227245
# if the patch does not add/remove lines, we return None
228246
if total_added_lines == 0:
@@ -268,23 +286,22 @@ def main():
268286
print("[E] invalid patchfile: " + patchfile)
269287
return
270288

271-
patch_added_count = 0
272-
patch_removed_count = 0
273289
patch_added_lines = find_changed_lines(patchfile, '+')
290+
total_added_lines = 0
291+
for file in patch_added_lines:
292+
triplets = patch_added_lines[file]
293+
for (above, changed, below) in triplets:
294+
total_added_lines += len(changed)
295+
274296
patch_removed_lines = find_changed_lines(patchfile, '-')
297+
total_removed_lines = 0
298+
for file in patch_removed_lines:
299+
triplets = patch_removed_lines[file]
300+
for (above, changed, below) in triplets:
301+
total_removed_lines += len(changed)
275302

276-
# collect metrics
277-
for changed_file in patch_added_lines:
278-
for triplet in patch_added_lines[changed_file]:
279-
# triplet[1] holds the actually changed lines
280-
patch_added_count += len(triplet[1])
281-
for changed_file in patch_removed_lines:
282-
for triplet in patch_removed_lines[changed_file]:
283-
# triplet[1] holds the actually changed lines
284-
patch_removed_count += len(triplet[1])
285-
286-
print(patchfile + ": " + str(patch_added_count) + " insertions(+)" +
287-
", " + str(patch_removed_count) + " deletions(-)")
303+
print(patchfile + ": " + str(total_added_lines) + " insertions(+)" +
304+
", " + str(total_removed_lines) + " deletions(-)")
288305

289306
if kernel_repo:
290307
((add, total_add), (rem, total_rem)) = collect_stats(kernel_repo, patchfile)

0 commit comments

Comments
 (0)