Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Obfuscation without dummy pattern. Corrections #163

Merged
merged 4 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 53 additions & 53 deletions benchmark.txt → .ci/benchmark.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .ci/empty_report.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
7 changes: 7 additions & 0 deletions .github/workflows/review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ jobs:
python review_data.py &>review_head.txt
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_head.txt >review_head.html
- name: Produce benchmark scores from empty report to check markup only
if: steps.cache-data.outputs.cache-hit != 'true'
run: |
python -m benchmark --scanner credsweeper --load .ci/empty_report.json >benchmark.txt
diff --unified=3 --ignore-all-space --ignore-blank-lines .ci/benchmark.txt benchmark.txt
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v4
Expand All @@ -76,6 +82,7 @@ jobs:
path: |
review_head.txt
review_head.html
benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand Down
13 changes: 4 additions & 9 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -e
set -x

Expand All @@ -10,17 +10,12 @@ if [ -z "${VIRTUAL_ENV}" ]; then
echo "Virtual environment has been not activated"
if ! [ -d "${THISDIR}/${VENVDIR}" ]; then
echo "Create new virtual environment"
python3.8 -m virtualenv -v --copies "${THISDIR}/${VENVDIR}"
python3.10 -m virtualenv -v --copies "${THISDIR}/${VENVDIR}"
fi
fi

if [ -z "${VIRTUAL_ENV}" ]; then
. "${THISDIR}/${VENVDIR}/bin/activate"
fi

if ! pip list | grep PyYAML; then
pip install PyYAML
fi

python download_data.py --data_dir data

python download_data.py --clean_data --jobs $(nproc)
37 changes: 36 additions & 1 deletion download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import string
import subprocess
import sys
from argparse import ArgumentParser, Namespace
from argparse import Namespace, ArgumentParser
from multiprocessing import Pool
from typing import List

Expand Down Expand Up @@ -347,7 +347,42 @@ def get_obfuscated_value(value, meta_row: MetaRow):
return obfuscated_value


def check_asc_or_desc(line_data_value: str) -> bool:
"""ValuePatternCheck as example"""
count_asc = 1
count_desc = 1
for i in range(len(line_data_value) - 1):
if line_data_value[i] in string.ascii_letters + string.digits \
and ord(line_data_value[i + 1]) - ord(line_data_value[i]) == 1:
count_asc += 1
if 4 == count_asc:
return True
else:
count_asc = 1
if line_data_value[i] in string.ascii_letters + string.digits \
and ord(line_data_value[i]) - ord(line_data_value[i + 1]) == 1:
count_desc += 1
if 4 == count_desc:
return True
else:
count_desc = 1
continue
return False

def generate_value(value):
"""Wrapper to skip obfuscation with false positive or negatives"""
pattern_keyword = re.compile(r"(api|pass|pw[d\b])", flags=re.IGNORECASE)
pattern_similar = re.compile(r"(\w)\1{3,}")
new_value = None
while new_value is None \
or pattern_keyword.findall(new_value) \
or pattern_similar.findall(new_value) \
or check_asc_or_desc(new_value):
new_value = gen_random_value(value)
return new_value


def gen_random_value(value):
obfuscated_value = ""

digits_set = string.digits
Expand Down
8 changes: 4 additions & 4 deletions meta/2ba83c6a.csv
Original file line number Diff line number Diff line change
Expand Up @@ -595,10 +595,10 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
24213,911dde03,GitHub,2ba83c6a,data/2ba83c6a/test/911dde03.txt,22033,22033,T,F,6,70,F,F,,,,,0.0,0,F,F,F,Key
24214,8a68cd28,GitHub,2ba83c6a,data/2ba83c6a/test/8a68cd28.txt,119,119,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24215,5e763eae,GitHub,2ba83c6a,data/2ba83c6a/test/5e763eae.txt,195,195,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24216,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,250,250,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24216,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,250,250,F,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24217,4acf8d32,GitHub,2ba83c6a,data/2ba83c6a/test/4acf8d32.txt,37,37,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key:Bitbucket Client Secret:Bitbucket Client ID
24218,556bad09,GitHub,2ba83c6a,data/2ba83c6a/test/556bad09.txt,525,525,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24219,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,160,160,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24219,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,160,160,F,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24220,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,166,166,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24221,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,172,172,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
24222,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,178,178,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key
Expand Down Expand Up @@ -923,8 +923,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
27342,eb0705d8,GitHub,2ba83c6a,data/2ba83c6a/other/eb0705d8.pod,460,460,T,F,23,39,F,F,Any,,,Secret,3.88,16,F,F,F,Key
138189,eb0705d8,GitHub,2ba83c6a,data/2ba83c6a/other/eb0705d8.pod,460,460,F,F,54,-1,F,F,,,,,0.0,0,F,F,F,Other
27386,dc676919,GitHub,2ba83c6a,data/2ba83c6a/test/dc676919.txt,1,1,F,F,32,37,F,F,,,,,0,0,F,F,F,Secret
27407,6cfa362d,GitHub,2ba83c6a,data/2ba83c6a/src/6cfa362d.cnf,344,344,Template,T,9,19,F,F,Any,,,Secret,2.31,10,F,F,F,Password:Secret
27425,23d50951,GitHub,2ba83c6a,data/2ba83c6a/src/23d50951.cnf,358,358,T,T,9,23,F,F,Any,,,Secret,2.52,14,F,F,F,Password:Secret
27407,6cfa362d,GitHub,2ba83c6a,data/2ba83c6a/src/6cfa362d.cnf,344,344,T,T,9,19,F,F,Any,,,Secret,2.31,10,F,F,F,Secret
27425,23d50951,GitHub,2ba83c6a,data/2ba83c6a/src/23d50951.cnf,358,358,F,T,9,23,F,F,Any,,,Secret,2.52,14,F,F,F,Password:Secret
27489,a3cf7bc8,GitHub,2ba83c6a,data/2ba83c6a/test/a3cf7bc8.txt,298,298,F,F,6,24,F,F,,,,,0.0,0,F,F,F,Key
27490,a3cf7bc8,GitHub,2ba83c6a,data/2ba83c6a/test/a3cf7bc8.txt,282,282,F,F,6,14,F,F,,,,,0.0,0,F,F,F,Key
27491,b40503ed,GitHub,2ba83c6a,data/2ba83c6a/test/b40503ed.txt,154,154,F,F,6,11,F,F,,,,,0.0,0,F,F,F,Key
Expand Down
3 changes: 1 addition & 2 deletions meta/60f9915d.csv
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
32158,a414e92e,GitHub,60f9915d,data/60f9915d/test/a414e92e.go,300,300,T,T,67,74,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials
32159,7e47b56e,GitHub,60f9915d,data/60f9915d/test/7e47b56e.go,14,14,T,T,79,86,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials
32162,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,206,206,T,T,49,56,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials
32163,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,T,T,54,70,T,F,Any,,,Secret,3.25,16,F,F,F,URL Credentials
32163,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,F,T,,,T,F,Any,,,Secret,3.25,16,F,F,F,URL Credentials:Password
35875,e45e45ba,GitHub,60f9915d,data/60f9915d/src/e45e45ba.yml,48,48,T,T,25,32,F,F,CharsOnly,,,Secret,2.81,7,F,F,F,Password
46090,35a99f76,GitHub,60f9915d,data/60f9915d/test/35a99f76.go,79,79,Template,T,18,24,F,F,CharsOnly,,,Secret,2.25,6,F,F,F,Password
52007,3d9a9f38,GitHub,60f9915d,data/60f9915d/test/3d9a9f38.go,639,639,T,F,53,60,F,F,,,,,0.0,0,F,F,F,URL Credentials
Expand Down Expand Up @@ -122,4 +122,3 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
137948,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,T,F,86,93,F,F,,,,,0.0,0,F,F,F,Password:URL Credentials
138239,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,F,F,65,-1,F,F,,,,,0.0,0,F,F,F,Other
138240,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,F,F,104,-1,F,F,,,,,0.0,0,F,F,F,Other
138241,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,F,F,81,85,F,F,,,,,0.0,0,F,F,F,Password
6 changes: 6 additions & 0 deletions meta/69d49010.csv
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,9 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
133411,24cbbb32,GitHub,69d49010,data/69d49010/test/24cbbb32.py,680,680,F,F,,,F,F,,,,,0,0,F,F,F,Secret
135283,85d52436,GitHub,69d49010,data/69d49010/test/85d52436.py,279,279,T,F,20,33,F,F,,,,,0.0,0,F,F,F,Password:URL Credentials
1341461,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,438,438,T,F,17,53,F,F,,,,,0.0,0,F,F,F,UUID:Token
1479347,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,258,258,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479349,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,277,277,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479352,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,303,303,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479354,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,329,329,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479356,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,356,356,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth
1479361,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,433,433,T,F,40,51,F,F,,,,,0.0,0,F,F,F,Auth
1 change: 1 addition & 0 deletions meta/75e7c64d.csv
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
114451,cb047af3,GitHub,75e7c64d,data/75e7c64d/src/cb047af3.yml,39,39,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Key
114452,2a39f30f,GitHub,75e7c64d,data/75e7c64d/src/2a39f30f.yml,37,37,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Key
133463,841e3aef,GitHub,75e7c64d,data/75e7c64d/src/841e3aef.py,329,329,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
1479362,0f6303ab,GitHub,75e7c64d,data/75e7c64d/src/0f6303ab.py,101,101,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
1 change: 1 addition & 0 deletions meta/8cda00f3.csv
Original file line number Diff line number Diff line change
Expand Up @@ -913,3 +913,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
135313,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,209,209,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth
135314,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,300,300,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth
1340770,4cf2897e,GitHub,8cda00f3,data/8cda00f3/src/4cf2897e.go,16,16,T,F,22,58,F,F,,,,,0.0,0,F,F,F,UUID
1479363,0a7921b3,GitHub,8cda00f3,data/8cda00f3/test/0a7921b3.go,155,155,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth
2 changes: 1 addition & 1 deletion meta/c41bb134.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
2503,8a3fd767,GitHub,c41bb134,data/c41bb134/test/8a3fd767.config,2,2,F,F,,,F,F,,,,,0,0,F,F,F,Key
3878,e99d6a11,GitHub,c41bb134,data/c41bb134/src/e99d6a11.nix,152,152,F,F,,,F,F,,,,,0,0,F,F,F,Password
4323,68cbce99,GitHub,c41bb134,data/c41bb134/test/68cbce99.hs,54,54,F,F,,,F,F,,,,,0,0,F,F,F,API
5571,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,231,231,Template,T,23,27,F,F,CharOnly,,,Secret,1.5,4,F,F,F,Password
5571,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,231,231,F,T,,,F,F,CharOnly,,,Secret,1.5,4,F,F,F,Password
5620,4460568d,GitHub,c41bb134,data/c41bb134/src/4460568d.hs,47,47,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
7252,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,2038,2038,F,F,,,F,F,,,,,0,0,F,F,F,Auth
7839,328c2f31,GitHub,c41bb134,data/c41bb134/src/328c2f31.hs,294,294,F,F,,,F,F,,,,,0,0,F,F,F,API
Expand Down
3 changes: 2 additions & 1 deletion meta_cred.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def __init__(self, cs_cred: dict):
if not self.path.startswith('data/'):
# license files ...
self.path = '/'.join([str(x) for x in path.parts[-3:]])
assert self.path.startswith('data/'), cs_cred # path for benchmark must start from data/
# path for benchmark must start from "data/"
assert self.path.startswith('data/'), cs_cred
self.valid_path = bool(self.valid_path_regex.match(self.path)) # to skip license files

self.line_start = line_data_list[0]["line_num"]
Expand Down
159 changes: 0 additions & 159 deletions update_meta.py

This file was deleted.

Loading