Skip to content

Commit

Permalink
Obfuscation without dummy pattern. Corrections (#163)
Browse files Browse the repository at this point in the history
* wisdom of obfuscation

* add BM report producing from empty report

* touch  meta for restart cache

* CI BM fix
  • Loading branch information
babenek authored Aug 19, 2024
1 parent 346e68c commit c0e6d9c
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 230 deletions.
106 changes: 53 additions & 53 deletions benchmark.txt → .ci/benchmark.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .ci/empty_report.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
7 changes: 7 additions & 0 deletions .github/workflows/review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ jobs:
python review_data.py &>review_head.txt
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_head.txt >review_head.html
- name: Produce benchmark scores from empty report to check markup only
if: steps.cache-data.outputs.cache-hit != 'true'
run: |
python -m benchmark --scanner credsweeper --load .ci/empty_report.json >benchmark.txt
diff --unified=3 --ignore-all-space --ignore-blank-lines .ci/benchmark.txt benchmark.txt
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v4
Expand All @@ -76,6 +82,7 @@ jobs:
path: |
review_head.txt
review_head.html
benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand Down
13 changes: 4 additions & 9 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -e
set -x

Expand All @@ -10,17 +10,12 @@ if [ -z "${VIRTUAL_ENV}" ]; then
echo "Virtual environment has been not activated"
if ! [ -d "${THISDIR}/${VENVDIR}" ]; then
echo "Create new virtual environment"
python3.8 -m virtualenv -v --copies "${THISDIR}/${VENVDIR}"
python3.10 -m virtualenv -v --copies "${THISDIR}/${VENVDIR}"
fi
fi

if [ -z "${VIRTUAL_ENV}" ]; then
. "${THISDIR}/${VENVDIR}/bin/activate"
fi

if ! pip list | grep PyYAML; then
pip install PyYAML
fi

python download_data.py --data_dir data

python download_data.py --clean_data --jobs $(nproc)
37 changes: 36 additions & 1 deletion download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import string
import subprocess
import sys
from argparse import ArgumentParser, Namespace
from argparse import Namespace, ArgumentParser
from multiprocessing import Pool
from typing import List

Expand Down Expand Up @@ -347,7 +347,42 @@ def get_obfuscated_value(value, meta_row: MetaRow):
return obfuscated_value


def check_asc_or_desc(line_data_value: str) -> bool:
"""ValuePatternCheck as example"""
count_asc = 1
count_desc = 1
for i in range(len(line_data_value) - 1):
if line_data_value[i] in string.ascii_letters + string.digits \
and ord(line_data_value[i + 1]) - ord(line_data_value[i]) == 1:
count_asc += 1
if 4 == count_asc:
return True
else:
count_asc = 1
if line_data_value[i] in string.ascii_letters + string.digits \
and ord(line_data_value[i]) - ord(line_data_value[i + 1]) == 1:
count_desc += 1
if 4 == count_desc:
return True
else:
count_desc = 1
continue
return False

def generate_value(value):
"""Wrapper to skip obfuscation with false positive or negatives"""
pattern_keyword = re.compile(r"(api|pass|pw[d\b])", flags=re.IGNORECASE)
pattern_similar = re.compile(r"(\w)\1{3,}")
new_value = None
while new_value is None \
or pattern_keyword.findall(new_value) \
or pattern_similar.findall(new_value) \
or check_asc_or_desc(new_value):
new_value = gen_random_value(value)
return new_value


def gen_random_value(value):
obfuscated_value = ""

digits_set = string.digits
Expand Down
8 changes: 4 additions & 4 deletions meta/2ba83c6a.csv
Original file line number Diff line number Diff line change
Expand Up @@ -595,10 +595,10 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
24213,911dde03,GitHub,2ba83c6a,data/2ba83c6a/test/911dde03.txt,22033,22033,T,F,6,70,F,F,,,,,0.0,0,F,F,F,Key
24214,8a68cd28,GitHub,2ba83c6a,data/2ba83c6a/test/8a68cd28.txt,119,119,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24215,5e763eae,GitHub,2ba83c6a,data/2ba83c6a/test/5e763eae.txt,195,195,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24216,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,250,250,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24216,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,250,250,F,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24217,4acf8d32,GitHub,2ba83c6a,data/2ba83c6a/test/4acf8d32.txt,37,37,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key:Bitbucket Client Secret:Bitbucket Client ID
24218,556bad09,GitHub,2ba83c6a,data/2ba83c6a/test/556bad09.txt,525,525,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24219,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,160,160,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24219,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,160,160,F,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24220,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,166,166,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24221,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,172,172,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24222,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,178,178,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
Expand Down Expand Up @@ -923,8 +923,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
27342,eb0705d8,GitHub,2ba83c6a,data/2ba83c6a/other/eb0705d8.pod,460,460,T,F,23,39,F,F,Any,,,Secret,3.88,16,F,F,F,Key
138189,eb0705d8,GitHub,2ba83c6a,data/2ba83c6a/other/eb0705d8.pod,460,460,F,F,54,-1,F,F,,,,,0.0,0,F,F,F,Other
27386,dc676919,GitHub,2ba83c6a,data/2ba83c6a/test/dc676919.txt,1,1,F,F,32,37,F,F,,,,,0,0,F,F,F,Secret
27407,6cfa362d,GitHub,2ba83c6a,data/2ba83c6a/src/6cfa362d.cnf,344,344,Template,T,9,19,F,F,Any,,,Secret,2.31,10,F,F,F,Password:Secret
27425,23d50951,GitHub,2ba83c6a,data/2ba83c6a/src/23d50951.cnf,358,358,T,T,9,23,F,F,Any,,,Secret,2.52,14,F,F,F,Password:Secret
27407,6cfa362d,GitHub,2ba83c6a,data/2ba83c6a/src/6cfa362d.cnf,344,344,T,T,9,19,F,F,Any,,,Secret,2.31,10,F,F,F,Secret
27425,23d50951,GitHub,2ba83c6a,data/2ba83c6a/src/23d50951.cnf,358,358,F,T,9,23,F,F,Any,,,Secret,2.52,14,F,F,F,Password:Secret
27489,a3cf7bc8,GitHub,2ba83c6a,data/2ba83c6a/test/a3cf7bc8.txt,298,298,F,F,6,24,F,F,,,,,0.0,0,F,F,F,Key
27490,a3cf7bc8,GitHub,2ba83c6a,data/2ba83c6a/test/a3cf7bc8.txt,282,282,F,F,6,14,F,F,,,,,0.0,0,F,F,F,Key
27491,b40503ed,GitHub,2ba83c6a,data/2ba83c6a/test/b40503ed.txt,154,154,F,F,6,11,F,F,,,,,0.0,0,F,F,F,Key
Expand Down
3 changes: 1 addition & 2 deletions meta/60f9915d.csv
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
32158,a414e92e,GitHub,60f9915d,data/60f9915d/test/a414e92e.go,300,300,T,T,67,74,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials
32159,7e47b56e,GitHub,60f9915d,data/60f9915d/test/7e47b56e.go,14,14,T,T,79,86,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials
32162,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,206,206,T,T,49,56,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials
32163,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,T,T,54,70,T,F,Any,,,Secret,3.25,16,F,F,F,URL Credentials
32163,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,F,T,,,T,F,Any,,,Secret,3.25,16,F,F,F,URL Credentials:Password
35875,e45e45ba,GitHub,60f9915d,data/60f9915d/src/e45e45ba.yml,48,48,T,T,25,32,F,F,CharsOnly,,,Secret,2.81,7,F,F,F,Password
46090,35a99f76,GitHub,60f9915d,data/60f9915d/test/35a99f76.go,79,79,Template,T,18,24,F,F,CharsOnly,,,Secret,2.25,6,F,F,F,Password
52007,3d9a9f38,GitHub,60f9915d,data/60f9915d/test/3d9a9f38.go,639,639,T,F,53,60,F,F,,,,,0.0,0,F,F,F,URL Credentials
Expand Down Expand Up @@ -122,4 +122,3 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
137948,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,T,F,86,93,F,F,,,,,0.0,0,F,F,F,Password:URL Credentials
138239,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,F,F,65,-1,F,F,,,,,0.0,0,F,F,F,Other
138240,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,F,F,104,-1,F,F,,,,,0.0,0,F,F,F,Other
138241,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,F,F,81,85,F,F,,,,,0.0,0,F,F,F,Password
6 changes: 6 additions & 0 deletions meta/69d49010.csv
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,9 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
133411,24cbbb32,GitHub,69d49010,data/69d49010/test/24cbbb32.py,680,680,F,F,,,F,F,,,,,0,0,F,F,F,Secret
135283,85d52436,GitHub,69d49010,data/69d49010/test/85d52436.py,279,279,T,F,20,33,F,F,,,,,0.0,0,F,F,F,Password:URL Credentials
1341461,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,438,438,T,F,17,53,F,F,,,,,0.0,0,F,F,F,UUID:Token
1479347,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,258,258,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479349,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,277,277,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479352,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,303,303,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479354,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,329,329,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479356,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,356,356,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479361,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,433,433,T,F,40,51,F,F,,,,,0.0,0,F,F,F,Auth
1 change: 1 addition & 0 deletions meta/75e7c64d.csv
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
114451,cb047af3,GitHub,75e7c64d,data/75e7c64d/src/cb047af3.yml,39,39,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Key
114452,2a39f30f,GitHub,75e7c64d,data/75e7c64d/src/2a39f30f.yml,37,37,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Key
133463,841e3aef,GitHub,75e7c64d,data/75e7c64d/src/841e3aef.py,329,329,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
1479362,0f6303ab,GitHub,75e7c64d,data/75e7c64d/src/0f6303ab.py,101,101,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
1 change: 1 addition & 0 deletions meta/8cda00f3.csv
Original file line number Diff line number Diff line change
Expand Up @@ -913,3 +913,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
135313,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,209,209,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth
135314,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,300,300,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth
1340770,4cf2897e,GitHub,8cda00f3,data/8cda00f3/src/4cf2897e.go,16,16,T,F,22,58,F,F,,,,,0.0,0,F,F,F,UUID
1479363,0a7921b3,GitHub,8cda00f3,data/8cda00f3/test/0a7921b3.go,155,155,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth
2 changes: 1 addition & 1 deletion meta/c41bb134.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
2503,8a3fd767,GitHub,c41bb134,data/c41bb134/test/8a3fd767.config,2,2,F,F,,,F,F,,,,,0,0,F,F,F,Key
3878,e99d6a11,GitHub,c41bb134,data/c41bb134/src/e99d6a11.nix,152,152,F,F,,,F,F,,,,,0,0,F,F,F,Password
4323,68cbce99,GitHub,c41bb134,data/c41bb134/test/68cbce99.hs,54,54,F,F,,,F,F,,,,,0,0,F,F,F,API
5571,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,231,231,Template,T,23,27,F,F,CharOnly,,,Secret,1.5,4,F,F,F,Password
5571,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,231,231,F,T,,,F,F,CharOnly,,,Secret,1.5,4,F,F,F,Password
5620,4460568d,GitHub,c41bb134,data/c41bb134/src/4460568d.hs,47,47,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
7252,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,2038,2038,F,F,,,F,F,,,,,0,0,F,F,F,Auth
7839,328c2f31,GitHub,c41bb134,data/c41bb134/src/328c2f31.hs,294,294,F,F,,,F,F,,,,,0,0,F,F,F,API
Expand Down
3 changes: 2 additions & 1 deletion meta_cred.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def __init__(self, cs_cred: dict):
if not self.path.startswith('data/'):
# license files ...
self.path = '/'.join([str(x) for x in path.parts[-3:]])
assert self.path.startswith('data/'), cs_cred # path for benchmark must start from data/
# path for benchmark must start from "data/"
assert self.path.startswith('data/'), cs_cred
self.valid_path = bool(self.valid_path_regex.match(self.path)) # to skip license files

self.line_start = line_data_list[0]["line_num"]
Expand Down
159 changes: 0 additions & 159 deletions update_meta.py

This file was deleted.

0 comments on commit c0e6d9c

Please sign in to comment.