forked from vanhauser-thc/afl-patches
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathafl-cmin-reduce-dataset.diff
38 lines (28 loc) · 1.55 KB
/
afl-cmin-reduce-dataset.diff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
--- afl-cmin 2018-04-02 10:44:34.273922652 +0200
+++ afl-cmin 2018-04-02 11:25:13.310101247 +0200
@@ -363,10 +363,7 @@ echo "[+] Found $TUPLE_COUNT unique tupl
#####################################
# The next step is to find the best candidate for each tuple. The "best"
-# part is understood simply as the smallest input that includes a particular
-# tuple in its trace. Empirical evidence suggests that this produces smaller
-# datasets than more involved algorithms that could be still pulled off in
-# a shell script.
+# part is understood simply as the input with the biggest bitmap.
echo "[*] Finding best candidates for each tuple..."
@@ -379,7 +376,7 @@ while read -r fn; do
sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
-done < <(ls -rS "$IN_DIR")
+done < <(ls -S "$TRACE_DIR")
echo
@@ -387,10 +384,10 @@ echo
# STEP 4: LOADING CANDIDATES #
##############################
-# At this point, we have a file of tuple-file pairs, sorted by file size
-# in ascending order (as a consequence of ls -rS). By doing sort keyed
-# only by tuple (-k 1,1) and configured to output only the first line for
-# every key (-s -u), we end up with the smallest file for each tuple.
+# At this point, we have a file of tuple-file pairs, sorted by biggest test case
+# By doing sort keyed only by tuple (-k 1,1) and configured to output only the
+# first line for every key (-s -u), we end up with the file with largest
+# bitmap for each tuple. To the dataset will be small in number of files.
echo "[*] Sorting candidate list (be patient)..."