From 6ecaa4d529521064ecdc52cf72c47b97ef5dce0e Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 19 Aug 2024 09:14:53 +0300 Subject: [PATCH 1/4] wisdom of obfuscation --- benchmark.txt | 269 ---------------------------------------------- build.sh | 13 +-- download_data.py | 37 ++++++- meta/2ba83c6a.csv | 8 +- meta/60f9915d.csv | 3 +- meta/69d49010.csv | 6 ++ meta/75e7c64d.csv | 1 + meta/8cda00f3.csv | 1 + meta_cred.py | 3 +- update_meta.py | 159 --------------------------- 10 files changed, 55 insertions(+), 445 deletions(-) delete mode 100644 benchmark.txt delete mode 100644 update_meta.py diff --git a/benchmark.txt b/benchmark.txt deleted file mode 100644 index 8f3d489e9..000000000 --- a/benchmark.txt +++ /dev/null @@ -1,269 +0,0 @@ -META MD5 877d5780a3115b42628c7fe43c869801 -DATA MD5 b3a2698b63448efee7ab94ffd4d11814 -DATA: 16345157 interested lines. MARKUP: 62634 items -FileType FileNumber ValidLines Positives Negatives Templates ---------------- ------------ ------------ ----------- ----------- ----------- - 194 28318 66 414 85 -.1 2 641 2 5 -.admx 1 26 1 -.adoc 1 158 13 6 1 -.api 2 118 4 -.asciidoc 96 14471 50 347 27 -.axaml 5 286 5 -.backup 1 62 2 1 -.bash 2 2158 2 1 -.bat 4 233 14 2 -.bats 15 2804 14 49 9 -.bazel 3 424 8 -.build 2 40 3 -.bundle 4 1512 580 -.bzl 3 2503 11 -.c 179 284009 8 942 5 -.cc 29 30562 617 1 -.cf 3 126 2 1 -.cfg 1 385 1 1 -.cjs 1 725 3 6 -.clj 2 133 3 -.cljc 5 2421 11 -.cls 1 657 1 -.cmd 4 401 2 3 -.cnf 8 858 15 34 18 -.coffee 1 585 2 -.conf 60 4945 53 67 53 -.config 20 492 16 38 1 -.cpp 15 5688 2 61 -.creds 1 10 1 1 -.crlf 1 27 1 -.crt 2 4979 211 -.cs 268 79532 158 894 94 -.cshtml 5 180 12 -.csp 3 379 9 -.csproj 1 14 1 -.css 6 13564 10 -.csv 1 109 78 -.dart 2 22 2 -.deprecated 1 126 1 -.development 1 5 1 -.diff 2 2460 8 2 -.dist 5 257 7 13 -.doc 1 2489 3 -.dockerfile 1 19 1 -.dot 1 160 6 -.eex 4 74 8 -.ejs 1 13 1 -.env 10 136 11 3 17 -.erb 13 323 27 -.erl 4 96 7 -.ex 25 4968 5 98 5 -.example 17 1838 74 38 54 -.exs 24 4842 8 187 4 -.ext 5 211 1 4 2 -.fsproj 1 75 1 2 -.g4 2 201 2 -.gd 1 37 1 -.gml 3 3075 16 -.gni 3 5017 19 -.go 1080 566476 693 4114 739 -.golden 5 1168 1 13 29 -.gradle 45 3265 4 90 100 -.graphql 7 420 13 -.graphqls 1 30 1 -.groovy 22 4986 24 215 1 -.h 9 1958 36 -.haml 9 191 17 -.hbs 2 54 3 -.hs 14 4140 30 61 5 -.html 53 15327 22 110 18 -.idl 2 777 1 4 -.iml 6 699 30 -.in 6 2130 6 43 10 -.inc 2 56 2 1 -.ini 11 1437 25 12 18 -.ipynb 1 134 5 -.j 1 241 2 2 -.j2 30 5530 6 186 10 -.java 621 134132 360 1366 171 -.jenkinsfile 1 58 2 6 -.jinja2 1 64 2 -.js 659 536413 535 2489 330 -.json 850 13046270 1070 10897 140 -.jsp 13 3202 1 40 -.jsx 7 857 19 -.jwt 1 1 2 -.key 83 2737 70 14 -.kt 123 20774 67 379 3 -.l 1 982 1 -.las 1 6656 35 -.lasso 1 230 7 -.lasso9 1 164 5 -.ldif 2 286 20 -.ldiff 1 20 1 -.ldml 1 6656 35 -.leex 1 9 2 -.less 4 3023 12 -.libsonnet 2 210 1 11 -.list 2 15 2 -.lkml 1 43 1 -.lock 24 160912 142 -.log 2 199 38 52 -.lua 10 1924 37 3 -.m 16 13358 11 158 3 -.manifest 3 102 9 6 -.markdown 3 139 3 1 -.markerb 3 12 3 -.marko 1 21 2 -.md 674 149399 710 2336 624 -.mdx 3 549 7 -.mjml 1 18 1 -.mjs 22 4424 76 340 -.mk 1 5878 13 -.ml 1 1856 16 -.mlir 2 1596 19 -.mod 2 96 4 -.moo 1 1404 26 -.mqh 1 1023 2 -.msg 1 26644 1 1 -.mysql 1 36 2 -.ndjson 2 5006 69 237 2 -.nix 4 211 12 -.nolint 1 2 1 -.odd 1 1281 43 -.oracle 1 9 1 -.p8 4 64 4 -.pan 2 48 4 -.patch 4 109405 4 27 -.pbxproj 1 941 2 -.pem 48 1169 47 8 -.php 371 75710 128 1619 79 -.pl 16 14727 6 34 -.pm 3 744 7 -.po 3 2994 15 -.pod 9 1859 1 23 -.pony 1 83 4 -.postinst 2 354 4 15 -.pp 10 563 16 -.ppk 1 45 36 -.private 1 15 1 -.proj 1 85 5 -.properties 48 1621 52 27 33 -.proto 5 5768 2 49 -.ps1 16 8509 15 64 2 -.ps1xml 1 5022 1 -.pug 2 193 2 -.purs 1 69 4 -.pxd 1 150 5 2 -.py 890 291553 674 3290 728 -.pyi 4 1361 9 -.pyp 1 167 1 -.pyx 2 1094 23 -.r 4 62 6 3 1 -.rake 2 51 2 -.rb 860 131838 258 3311 613 -.re 1 31 1 -.red 1 159 1 -.release 1 13 4 -.response 1 26 2 -.resx 11 3519 310 -.rexx 1 92 3 -.rnh 1 1354 3 2 -.rno 1 7229 2 -.rrc 39 1404 281 -.rs 31 9855 2 233 11 -.rsc 1 691 1 -.rsp 16 7101 19 10 28 -.rst 86 33980 70 321 68 -.rules 1 6 2 -.sample 2 25 3 4 4 -.sbt 3 570 5 2 -.scala 40 5071 22 101 -.scss 16 8553 32 1 -.secrets 1 11 1 -.sh 143 21525 51 466 30 -.slim 1 153 1 2 -.smali 1 775 18 -.snap 3 1708 9 30 2 -.spec 2 332 2 -.spin 1 565 1 -.sql 27 6606 126 56 4 -.storyboard 20 1802 341 -.strings 20 1240 137 -.stub 3 84 6 -.sublime-keymap 1 3 1 -.sum 37 22854 283 -.svg 1 638 12 -.t 9 1767 24 43 14 -.td 2 14002 6 -.template 19 1633 4 35 11 -.test 2 24 25 4 -.testsettings 1 21 1 10 -.tf 21 1377 3 29 2 -.tfstate 4 307 22 11 4 -.tfvars 1 31 3 2 -.tl 2 2161 161 2 -.tmpl 5 336 3 9 -.token 1 1 3 -.toml 83 2379 53 105 156 -.tpl 1 43 1 -.travis 1 34 4 3 1 -.ts 583 106730 159 1800 201 -.tsx 54 7914 1 114 5 -.ttar 1 452 1 -.txt 440 78102 5301 6341 49 -.utf8 1 77 2 -.vsixmanifest 1 36 1 -.vsmdi 1 6 2 -.vue 50 8736 1 154 1 -.xaml 21 8103 162 -.xcscheme 1 109 6 -.xib 11 503 169 -.xml 9 689 9 -.xsl 1 311 1 -.yaml 137 19004 123 345 44 -.yml 418 36162 545 892 380 -.zsh 6 872 12 -.zsh-theme 1 97 1 -TOTAL: 10259 16345157 12147 50315 5114 -credsweeper result_cnt : 7753, lost_cnt : 0, true_cnt : 7531, false_cnt : 222 -Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------- ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- -------- -API 128 3161 189 113 111 2 3348 17 0.000597 0.132812 0.994537 0.982301 0.867188 0.921162 -AWS Client ID 167 21 0 160 160 0 21 7 0.000000 0.041916 0.962766 1.000000 0.958084 0.978593 -AWS Multi 75 16 0 87 75 11 5 0 0.687500 0.000000 0.879121 0.872093 1.000000 0.931677 -AWS S3 Bucket 66 24 0 91 65 24 0 1 1.000000 0.015152 0.722222 0.730337 0.984848 0.838710 -Atlassian Old PAT token 27 308 3 12 3 8 303 24 0.025723 0.888889 0.905325 0.272727 0.111111 0.157895 -Auth 412 2726 76 378 359 19 2783 53 0.006781 0.128641 0.977598 0.949735 0.871359 0.908861 -Azure Access Token 19 0 0 12 12 0 0 7 0.368421 0.631579 1.000000 0.631579 0.774194 -BASE64 Private Key 7 4 0 7 7 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 -Bitbucket Client ID 143 2097 9 48 28 19 2087 115 0.009022 0.804196 0.940418 0.595745 0.195804 0.294737 -Bitbucket Client Secret 301 809 10 40 29 11 808 272 0.013431 0.903654 0.747321 0.725000 0.096346 0.170088 -Certificate 23 471 1 26 18 8 464 5 0.016949 0.217391 0.973737 0.692308 0.782609 0.734694 -Credential 95 420 74 92 92 0 494 3 0.000000 0.031579 0.994907 1.000000 0.968421 0.983957 -Docker Swarm Token 2 0 0 1 1 0 0 1 0.500000 0.500000 1.000000 0.500000 0.666667 -Dropbox App secret 64 139 1 46 35 10 130 29 0.071429 0.453125 0.808824 0.777778 0.546875 0.642202 -Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 -Firebase Domain 6 1 0 7 6 1 0 0 1.000000 0.000000 0.857143 0.857143 1.000000 0.923077 -Github Old Token 1 0 0 1 1 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Gitlab Feed Token 189 751 87 56 44 11 827 145 0.013126 0.767196 0.848101 0.800000 0.232804 0.360656 -Gitlab Incoming Email Token 37 8 0 21 19 2 6 18 0.250000 0.486486 0.555556 0.904762 0.513514 0.655172 -Google API Key 12 0 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Google Multi 10 2 0 11 10 1 1 0 0.500000 0.000000 0.916667 0.909091 1.000000 0.952381 -Google OAuth Access Token 3 0 0 3 3 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Grafana Provisioned API Key 22 1 0 5 5 0 1 17 0.000000 0.772727 0.260870 1.000000 0.227273 0.370370 -JSON Web Token 170 61 0 131 131 0 61 39 0.000000 0.229412 0.831169 1.000000 0.770588 0.870432 -Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 -Jira 2FA 15 6 1 12 12 0 7 3 0.000000 0.200000 0.863636 1.000000 0.800000 0.888889 -Key 3920 15689 482 472 466 6 16165 3454 0.000371 0.881122 0.827784 0.987288 0.118878 0.212204 -Nonce 91 49 0 83 81 2 47 10 0.040816 0.109890 0.914286 0.975904 0.890110 0.931034 -Other 0 8291 1 0 0 8292 0 0.000000 1.000000 -PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1844 7524 2713 1726 1657 69 10168 187 0.006740 0.101410 0.978810 0.960023 0.898590 0.928291 -Salt 45 76 2 42 41 1 77 4 0.012821 0.088889 0.959350 0.976190 0.911111 0.942529 -Secret 1296 1574 800 1236 1230 6 2368 66 0.002527 0.050926 0.980381 0.995146 0.949074 0.971564 -Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 -Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 648 4177 438 540 534 6 4609 114 0.001300 0.175926 0.977199 0.988889 0.824074 0.898990 -Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 -URL Credentials 209 144 225 196 196 0 369 13 0.000000 0.062201 0.977509 1.000000 0.937799 0.967901 -UUID 1069 265 0 1061 1060 1 264 9 0.003774 0.008419 0.992504 0.999057 0.991581 0.995305 - 12147 50315 5114 7760 7531 222 50093 4616 0.004412 0.380012 0.922545 0.971366 0.619988 0.756884 diff --git a/build.sh b/build.sh index 70acf02ea..d57c35575 100755 --- a/build.sh +++ b/build.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e set -x @@ -10,17 +10,12 @@ if [ -z "${VIRTUAL_ENV}" ]; then echo "Virtual environment has been not activated" if ! [ -d "${THISDIR}/${VENVDIR}" ]; then echo "Create new virtual environment" - python3.8 -m virtualenv -v --copies "${THISDIR}/${VENVDIR}" + python3.10 -m virtualenv -v --copies "${THISDIR}/${VENVDIR}" fi fi - + if [ -z "${VIRTUAL_ENV}" ]; then . "${THISDIR}/${VENVDIR}/bin/activate" fi -if ! pip list | grep PyYAML; then - pip install PyYAML -fi - -python download_data.py --data_dir data - +python download_data.py --clean_data --jobs $(nproc) diff --git a/download_data.py b/download_data.py index b9ae8ec0e..fae7462e9 100644 --- a/download_data.py +++ b/download_data.py @@ -9,7 +9,7 @@ import string import subprocess import sys -from argparse import ArgumentParser, Namespace +from argparse import Namespace, ArgumentParser from multiprocessing import Pool from typing import List @@ -347,7 +347,42 @@ def get_obfuscated_value(value, meta_row: MetaRow): return obfuscated_value +def check_asc_or_desc(line_data_value: str) -> bool: + """ValuePatternCheck as example""" + count_asc = 1 + count_desc = 1 + for i in range(len(line_data_value) - 1): + if line_data_value[i] in string.ascii_letters + string.digits \ + and ord(line_data_value[i + 1]) - ord(line_data_value[i]) == 1: + count_asc += 1 + if 4 == count_asc: + return True + else: + count_asc = 1 + if line_data_value[i] in string.ascii_letters + string.digits \ + and ord(line_data_value[i]) - ord(line_data_value[i + 1]) == 1: + count_desc += 1 + if 4 == count_desc: + return True + else: + count_desc = 1 + continue + return False + def generate_value(value): + """Wrapper to skip obfuscation with false positive or negatives""" + pattern_keyword = re.compile(r"(api|pass|pw[d\b])", flags=re.IGNORECASE) + pattern_similar = re.compile(r"(\w)\1{3,}") + new_value = None + while new_value is None \ + or pattern_keyword.findall(new_value) \ + or pattern_similar.findall(new_value) \ + or check_asc_or_desc(new_value): + new_value = gen_random_value(value) + return new_value + + +def gen_random_value(value): obfuscated_value = "" digits_set = string.digits diff --git a/meta/2ba83c6a.csv b/meta/2ba83c6a.csv index c08a19c0f..db3eb2bcc 100644 --- a/meta/2ba83c6a.csv +++ b/meta/2ba83c6a.csv @@ -595,10 +595,10 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 24213,911dde03,GitHub,2ba83c6a,data/2ba83c6a/test/911dde03.txt,22033,22033,T,F,6,70,F,F,,,,,0.0,0,F,F,F,Key 24214,8a68cd28,GitHub,2ba83c6a,data/2ba83c6a/test/8a68cd28.txt,119,119,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key 24215,5e763eae,GitHub,2ba83c6a,data/2ba83c6a/test/5e763eae.txt,195,195,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key -24216,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,250,250,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key +24216,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,250,250,F,F,6,38,F,F,,,,,0.0,0,F,F,F,Key 24217,4acf8d32,GitHub,2ba83c6a,data/2ba83c6a/test/4acf8d32.txt,37,37,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key:Bitbucket Client Secret:Bitbucket Client ID 24218,556bad09,GitHub,2ba83c6a,data/2ba83c6a/test/556bad09.txt,525,525,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key -24219,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,160,160,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key +24219,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,160,160,F,F,6,38,F,F,,,,,0.0,0,F,F,F,Key 24220,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,166,166,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key 24221,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,172,172,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key 24222,92404dee,GitHub,2ba83c6a,data/2ba83c6a/test/92404dee.txt,178,178,T,F,6,38,F,F,,,,,0.0,0,F,F,F,Key @@ -923,8 +923,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 27342,eb0705d8,GitHub,2ba83c6a,data/2ba83c6a/other/eb0705d8.pod,460,460,T,F,23,39,F,F,Any,,,Secret,3.88,16,F,F,F,Key 138189,eb0705d8,GitHub,2ba83c6a,data/2ba83c6a/other/eb0705d8.pod,460,460,F,F,54,-1,F,F,,,,,0.0,0,F,F,F,Other 27386,dc676919,GitHub,2ba83c6a,data/2ba83c6a/test/dc676919.txt,1,1,F,F,32,37,F,F,,,,,0,0,F,F,F,Secret -27407,6cfa362d,GitHub,2ba83c6a,data/2ba83c6a/src/6cfa362d.cnf,344,344,Template,T,9,19,F,F,Any,,,Secret,2.31,10,F,F,F,Password:Secret -27425,23d50951,GitHub,2ba83c6a,data/2ba83c6a/src/23d50951.cnf,358,358,T,T,9,23,F,F,Any,,,Secret,2.52,14,F,F,F,Password:Secret +27407,6cfa362d,GitHub,2ba83c6a,data/2ba83c6a/src/6cfa362d.cnf,344,344,T,T,9,19,F,F,Any,,,Secret,2.31,10,F,F,F,Secret +27425,23d50951,GitHub,2ba83c6a,data/2ba83c6a/src/23d50951.cnf,358,358,F,T,9,23,F,F,Any,,,Secret,2.52,14,F,F,F,Password:Secret 27489,a3cf7bc8,GitHub,2ba83c6a,data/2ba83c6a/test/a3cf7bc8.txt,298,298,F,F,6,24,F,F,,,,,0.0,0,F,F,F,Key 27490,a3cf7bc8,GitHub,2ba83c6a,data/2ba83c6a/test/a3cf7bc8.txt,282,282,F,F,6,14,F,F,,,,,0.0,0,F,F,F,Key 27491,b40503ed,GitHub,2ba83c6a,data/2ba83c6a/test/b40503ed.txt,154,154,F,F,6,11,F,F,,,,,0.0,0,F,F,F,Key diff --git a/meta/60f9915d.csv b/meta/60f9915d.csv index 28645ae95..f0f7d2a97 100644 --- a/meta/60f9915d.csv +++ b/meta/60f9915d.csv @@ -62,7 +62,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 32158,a414e92e,GitHub,60f9915d,data/60f9915d/test/a414e92e.go,300,300,T,T,67,74,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials 32159,7e47b56e,GitHub,60f9915d,data/60f9915d/test/7e47b56e.go,14,14,T,T,79,86,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials 32162,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,206,206,T,T,49,56,T,F,CharsOnly,,,Secret,2.81,7,F,F,F,URL Credentials -32163,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,T,T,54,70,T,F,Any,,,Secret,3.25,16,F,F,F,URL Credentials +32163,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,F,T,,,T,F,Any,,,Secret,3.25,16,F,F,F,URL Credentials:Password 35875,e45e45ba,GitHub,60f9915d,data/60f9915d/src/e45e45ba.yml,48,48,T,T,25,32,F,F,CharsOnly,,,Secret,2.81,7,F,F,F,Password 46090,35a99f76,GitHub,60f9915d,data/60f9915d/test/35a99f76.go,79,79,Template,T,18,24,F,F,CharsOnly,,,Secret,2.25,6,F,F,F,Password 52007,3d9a9f38,GitHub,60f9915d,data/60f9915d/test/3d9a9f38.go,639,639,T,F,53,60,F,F,,,,,0.0,0,F,F,F,URL Credentials @@ -122,4 +122,3 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 137948,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,T,F,86,93,F,F,,,,,0.0,0,F,F,F,Password:URL Credentials 138239,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,F,F,65,-1,F,F,,,,,0.0,0,F,F,F,Other 138240,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,161,161,F,F,104,-1,F,F,,,,,0.0,0,F,F,F,Other -138241,4dc56e64,GitHub,60f9915d,data/60f9915d/test/4dc56e64.go,219,219,F,F,81,85,F,F,,,,,0.0,0,F,F,F,Password diff --git a/meta/69d49010.csv b/meta/69d49010.csv index fc79a94e2..fdab5f4c4 100644 --- a/meta/69d49010.csv +++ b/meta/69d49010.csv @@ -163,3 +163,9 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 133411,24cbbb32,GitHub,69d49010,data/69d49010/test/24cbbb32.py,680,680,F,F,,,F,F,,,,,0,0,F,F,F,Secret 135283,85d52436,GitHub,69d49010,data/69d49010/test/85d52436.py,279,279,T,F,20,33,F,F,,,,,0.0,0,F,F,F,Password:URL Credentials 1341461,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,438,438,T,F,17,53,F,F,,,,,0.0,0,F,F,F,UUID:Token +1479347,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,258,258,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth +1479349,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,277,277,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth +1479352,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,303,303,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth +1479354,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,329,329,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth +1479356,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,356,356,T,F,35,48,F,F,,,,,0.0,0,F,F,F,Auth +1479361,5aad918a,GitHub,69d49010,data/69d49010/test/5aad918a.py,433,433,T,F,40,51,F,F,,,,,0.0,0,F,F,F,Auth diff --git a/meta/75e7c64d.csv b/meta/75e7c64d.csv index 81eed77ee..08be620db 100644 --- a/meta/75e7c64d.csv +++ b/meta/75e7c64d.csv @@ -47,3 +47,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 114451,cb047af3,GitHub,75e7c64d,data/75e7c64d/src/cb047af3.yml,39,39,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Key 114452,2a39f30f,GitHub,75e7c64d,data/75e7c64d/src/2a39f30f.yml,37,37,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Key 133463,841e3aef,GitHub,75e7c64d,data/75e7c64d/src/841e3aef.py,329,329,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key +1479362,0f6303ab,GitHub,75e7c64d,data/75e7c64d/src/0f6303ab.py,101,101,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key diff --git a/meta/8cda00f3.csv b/meta/8cda00f3.csv index 4addf083d..c5474f1a9 100644 --- a/meta/8cda00f3.csv +++ b/meta/8cda00f3.csv @@ -913,3 +913,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 135313,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,209,209,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth 135314,eaf18c55,GitHub,8cda00f3,data/8cda00f3/other/eaf18c55.md,300,300,T,F,21,37,F,F,,,,,0,0,F,F,F,Auth 1340770,4cf2897e,GitHub,8cda00f3,data/8cda00f3/src/4cf2897e.go,16,16,T,F,22,58,F,F,,,,,0.0,0,F,F,F,UUID +1479363,0a7921b3,GitHub,8cda00f3,data/8cda00f3/test/0a7921b3.go,155,155,F,F,,,F,F,,,,,0.0,0,F,F,F,Auth diff --git a/meta_cred.py b/meta_cred.py index 3abf1fe9e..d1175101f 100644 --- a/meta_cred.py +++ b/meta_cred.py @@ -16,7 +16,8 @@ def __init__(self, cs_cred: dict): if not self.path.startswith('data/'): # license files ... self.path = '/'.join([str(x) for x in path.parts[-3:]]) - assert self.path.startswith('data/'), cs_cred # path for benchmark must start from data/ + # path for benchmark must start from "data/" + assert self.path.startswith('data/'), cs_cred self.valid_path = bool(self.valid_path_regex.match(self.path)) # to skip license files self.line_start = line_data_list[0]["line_num"] diff --git a/update_meta.py b/update_meta.py deleted file mode 100644 index e2cf7acd5..000000000 --- a/update_meta.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python3 - -""" -The script is developed to update meta with absolute positions of value instead from stripped line -""" -import json -import os -import subprocess -import sys -from argparse import ArgumentParser -from functools import cache -from typing import Dict, Tuple, List - -from meta_cred import MetaCred -from meta_row import read_meta - -EXIT_SUCCESS = 0 -EXIT_FAILURE = 1 - - -@cache -def read_cache(path) -> list[str]: - with open(path, "r", encoding="utf8") as f: - return f.read().replace("\r\n", '\n').replace('\r', '\n').split('\n') - - -def main(meta_dir: str, data_dir: str, report_file: str) -> int: - errors = 0 - updated_rows = 0 - - if not os.path.exists(meta_dir): - raise FileExistsError(f"{meta_dir} directory does not exist.") - if not os.path.exists(data_dir): - raise FileExistsError(f"{data_dir} directory does not exist.") - creds: Dict[Tuple[str, int, int], List[MetaCred]] = {} - with open(report_file, 'r') as f: - for i in json.load(f): - cred = MetaCred(i) - multi_cred_key = (cred.path, cred.line_start, cred.line_end) - if multi_cred_key in creds: - creds[multi_cred_key].append(cred) - else: - creds[multi_cred_key] = [cred] - - meta = read_meta(meta_dir) - meta.sort(key=lambda x: (x.FilePath, x.LineStart, x.LineEnd, x.ValueStart, x.ValueEnd)) - for row in meta: - if "2ba83c6a" != row.RepoName: - continue # later - categories = set(row.Category.split(':')) - if "Secret" in categories : - meta_key = (row.FilePath, row.LineStart, row.LineEnd) - possible_creds = creds.get(meta_key) - if not possible_creds: - lines = read_cache(i.FilePath) - line = lines[i.LineStart - 1] - if 'secret' in line.lower: - continue - row.Category = "Other" - errors += subprocess.call( - ["sed", "-i", - f"s|^{row.Id},{row.FileID},.*$|" + str(row) + "|", - f"{meta_dir}/{row.RepoName}.csv"]) - updated_rows += 1 - continue - - if 0 > row.ValueStart: - # has markup for whole line - if any("Secret" == x.rule for x in possible_creds): - # ok - continue - categories.remove("Secret") - if 1 == len(categories): - # should be changed - categories = set(x.rule for x in possible_creds) - - cred = possible_creds[0] - if "Key" == cred.rule: - if ((16 <= len(cred.value) or 'hexkey' in cred.variable) - and not any(x in cred.line.lower() for x in - ['0011223344', '0001020304', '0b0b0b0b0b0b0b0', 'alice_', 'bob_', 'alice-', - 'bob-', 'fffefdfcfbfaf9f', '7f7e7d7c7b7a797877', '010203040506070809', - 'fefefefefefe','808182838485868788','000000000','111111111','eeeeeeeeee','fffffffffff','0123456789' - ]) - and 'OBJ_' not in cred.line - - ): - # may look like norm key - row.ValueStart = cred.value_start - row.ValueEnd = cred.value_end - row.GroundTruth = 'T' - else: - categories = set(x.rule for x in possible_creds) - for cred in possible_creds: - if "Key" == cred.rule: - if ((16 <= len(cred.value) or 'hexkey' in cred.variable) - and not any(x in cred.line.lower() for x in - ['0011223344', '0001020304', '0b0b0b0b0b0b0b0', 'alice_', 'bob_', 'alice-', - 'bob-', 'fffefdfcfbfaf9f', '7f7e7d7c7b7a797877', '010203040506070809', - 'fefefefefefe', '808182838485868788', '000000000', '111111111', - 'eeeeeeeeee', 'fffffffffff', '0123456789' - ]) - and 'OBJ_' not in cred.line - - ): - # may look like norm key - row.ValueStart = cred.value_start - row.ValueEnd = cred.value_end - row.GroundTruth = 'T' - break - - else: - if any("Secret" == x.rule for x in possible_creds if x.value_start == row.ValueStart): - # ok - continue - else: - # wrong position in markup - must be skipped - if 1 == len(categories): - # should be changed - categories = set(x.rule for x in possible_creds if x.value_start == row.ValueStart and ( - x.value_end == row.ValueEnd or 0 > row.ValueEnd)) - if not categories: - # wrong end position - categories = set(x.rule for x in possible_creds if x.value_start == row.ValueStart) - row.ValueEnd = -1 - assert row.GroundTruth == 'F' or row.GroundTruth == 'Template', row - row.GroundTruth = 'F' - else: - categories.remove("Secret") - - if not categories: - lines = read_cache(row.FilePath) - line = lines[row.LineStart - 1] - if 'secret' in line.lower(): - continue - categories.add("Other") - row.Category = ':'.join(categories) - errors += subprocess.call( - ["sed", "-i", - f"s|^{row.Id},{row.FileID},.*$|" + str(row) + "|", - f"{meta_dir}/{row.RepoName}.csv"]) - updated_rows += 1 - - result = EXIT_SUCCESS if 0 == errors else EXIT_FAILURE - print(f"Updated {updated_rows} of {len(meta)}, errors: {errors}, {result}", flush=True) - return result - - -if __name__ == "__main__": - parser = ArgumentParser(prog=f"python {os.path.basename(__file__)}", - description="Temporally console script for update meta with Secret category to Other") - - parser.add_argument("report_file", help="Credentials report from CredSweeper") - parser.add_argument("meta_dir", help="Markup location", nargs='?', default="meta") - parser.add_argument("data_dir", help="Dataset location", nargs='?', default="data") - _args = parser.parse_args() - - exit_code = main(_args.meta_dir, _args.data_dir, _args.report_file) - sys.exit(exit_code) From e9c2aeba549712f070aa052dcb8a6612362413a9 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 19 Aug 2024 09:55:14 +0300 Subject: [PATCH 2/4] add BM report producing from empty report --- .ci/benchmark.txt | 269 +++++++++++++++++++++++++++++++++++ .ci/empty_report.json | 1 + .github/workflows/review.yml | 7 + 3 files changed, 277 insertions(+) create mode 100644 .ci/benchmark.txt create mode 100644 .ci/empty_report.json diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt new file mode 100644 index 000000000..97bd04488 --- /dev/null +++ b/.ci/benchmark.txt @@ -0,0 +1,269 @@ +META MD5 46ed058cc55cc6e05b84e1b62d506059 +DATA MD5 4833f5614e463ecc7989b00a29499240 +DATA: 16345157 interested lines. MARKUP: 62644 items +FileType FileNumber ValidLines Positives Negatives Templates +--------------- ------------ ------------ ----------- ----------- ----------- + 194 28318 66 414 85 +.1 2 641 2 5 +.admx 1 26 1 +.adoc 1 158 13 6 1 +.api 2 118 4 +.asciidoc 96 14471 50 347 27 +.axaml 5 286 5 +.backup 1 62 2 1 +.bash 2 2158 2 1 +.bat 4 233 14 2 +.bats 15 2804 14 49 9 +.bazel 3 424 8 +.build 2 40 3 +.bundle 4 1512 580 +.bzl 3 2503 11 +.c 179 284009 8 942 5 +.cc 29 30562 617 1 +.cf 3 126 2 1 +.cfg 1 385 1 1 +.cjs 1 725 3 6 +.clj 2 133 3 +.cljc 5 2421 11 +.cls 1 657 1 +.cmd 4 401 2 3 +.cnf 8 858 15 36 16 +.coffee 1 585 2 +.conf 60 4945 53 67 53 +.config 20 492 16 38 1 +.cpp 15 5688 2 61 +.creds 1 10 1 1 +.crlf 1 27 1 +.crt 2 4979 211 +.cs 268 79532 158 894 94 +.cshtml 5 180 12 +.csp 3 379 9 +.csproj 1 14 1 +.css 6 13564 10 +.csv 1 109 78 +.dart 2 22 2 +.deprecated 1 126 1 +.development 1 5 1 +.diff 2 2460 8 2 +.dist 5 257 7 13 +.doc 1 2489 3 +.dockerfile 1 19 1 +.dot 1 160 6 +.eex 4 74 8 +.ejs 1 13 1 +.env 10 136 11 3 17 +.erb 13 323 27 +.erl 4 96 7 +.ex 25 4968 5 98 5 +.example 17 1838 74 38 54 +.exs 24 4842 8 187 4 +.ext 5 211 1 4 2 +.fsproj 1 75 1 2 +.g4 2 201 2 +.gd 1 37 1 +.gml 3 3075 16 +.gni 3 5017 19 +.go 1080 566476 692 4117 739 +.golden 5 1168 1 13 29 +.gradle 45 3265 4 90 100 +.graphql 7 420 13 +.graphqls 1 30 1 +.groovy 22 4986 24 215 1 +.h 9 1958 36 +.haml 9 191 17 +.hbs 2 54 3 +.hs 14 4140 30 61 5 +.html 53 15327 22 110 18 +.idl 2 777 1 4 +.iml 6 699 30 +.in 6 2130 6 43 10 +.inc 2 56 2 1 +.ini 11 1437 25 12 18 +.ipynb 1 134 5 +.j 1 241 2 2 +.j2 30 5530 6 186 10 +.java 621 134132 360 1366 171 +.jenkinsfile 1 58 2 6 +.jinja2 1 64 2 +.js 659 536413 535 2489 330 +.json 850 13046270 1070 10897 140 +.jsp 13 3202 1 40 +.jsx 7 857 19 +.jwt 1 1 2 +.key 83 2737 70 14 +.kt 123 20774 67 379 3 +.l 1 982 1 +.las 1 6656 35 +.lasso 1 230 7 +.lasso9 1 164 5 +.ldif 2 286 20 +.ldiff 1 20 1 +.ldml 1 6656 35 +.leex 1 9 2 +.less 4 3023 12 +.libsonnet 2 210 1 11 +.list 2 15 2 +.lkml 1 43 1 +.lock 24 160912 142 +.log 2 199 38 52 +.lua 10 1924 37 3 +.m 16 13358 11 158 3 +.manifest 3 102 9 6 +.markdown 3 139 3 1 +.markerb 3 12 3 +.marko 1 21 2 +.md 674 149399 710 2336 624 +.mdx 3 549 7 +.mjml 1 18 1 +.mjs 22 4424 76 340 +.mk 1 5878 13 +.ml 1 1856 16 +.mlir 2 1596 19 +.mod 2 96 4 +.moo 1 1404 26 +.mqh 1 1023 2 +.msg 1 26644 1 1 +.mysql 1 36 2 +.ndjson 2 5006 69 237 2 +.nix 4 211 12 +.nolint 1 2 1 +.odd 1 1281 43 +.oracle 1 9 1 +.p8 4 64 4 +.pan 2 48 4 +.patch 4 109405 4 27 +.pbxproj 1 941 2 +.pem 48 1169 47 8 +.php 371 75710 128 1619 79 +.pl 16 14727 6 34 +.pm 3 744 7 +.po 3 2994 15 +.pod 9 1859 1 23 +.pony 1 83 4 +.postinst 2 354 4 15 +.pp 10 563 16 +.ppk 1 45 36 +.private 1 15 1 +.proj 1 85 5 +.properties 48 1621 52 27 33 +.proto 5 5768 2 49 +.ps1 16 8509 15 64 2 +.ps1xml 1 5022 1 +.pug 2 193 2 +.purs 1 69 4 +.pxd 1 150 5 2 +.py 890 291553 680 3292 728 +.pyi 4 1361 9 +.pyp 1 167 1 +.pyx 2 1094 23 +.r 4 62 6 3 1 +.rake 2 51 2 +.rb 860 131838 258 3311 613 +.re 1 31 1 +.red 1 159 1 +.release 1 13 4 +.response 1 26 2 +.resx 11 3519 310 +.rexx 1 92 3 +.rnh 1 1354 3 2 +.rno 1 7229 2 +.rrc 39 1404 281 +.rs 31 9855 2 233 11 +.rsc 1 691 1 +.rsp 16 7101 19 10 28 +.rst 86 33980 70 321 68 +.rules 1 6 2 +.sample 2 25 3 4 4 +.sbt 3 570 5 2 +.scala 40 5071 22 101 +.scss 16 8553 32 1 +.secrets 1 11 1 +.sh 143 21525 51 466 30 +.slim 1 153 1 2 +.smali 1 775 18 +.snap 3 1708 9 30 2 +.spec 2 332 2 +.spin 1 565 1 +.sql 27 6606 126 56 4 +.storyboard 20 1802 341 +.strings 20 1240 137 +.stub 3 84 6 +.sublime-keymap 1 3 1 +.sum 37 22854 283 +.svg 1 638 12 +.t 9 1767 24 43 14 +.td 2 14002 6 +.template 19 1633 4 35 11 +.test 2 24 25 4 +.testsettings 1 21 1 10 +.tf 21 1377 3 29 2 +.tfstate 4 307 22 11 4 +.tfvars 1 31 3 2 +.tl 2 2161 161 2 +.tmpl 5 336 3 9 +.token 1 1 3 +.toml 83 2379 53 105 156 +.tpl 1 43 1 +.travis 1 34 4 3 1 +.ts 583 106730 159 1800 201 +.tsx 54 7914 1 114 5 +.ttar 1 452 1 +.txt 440 78102 5299 6343 49 +.utf8 1 77 2 +.vsixmanifest 1 36 1 +.vsmdi 1 6 2 +.vue 50 8736 1 154 1 +.xaml 21 8103 162 +.xcscheme 1 109 6 +.xib 11 503 169 +.xml 9 689 9 +.xsl 1 311 1 +.yaml 137 19004 123 345 44 +.yml 418 36162 545 892 380 +.zsh 6 872 12 +.zsh-theme 1 97 1 +TOTAL: 10259 16345157 12150 50324 5112 +credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 +Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 +------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- +API 128 3161 189 0 0 3350 128 0.000000 1.000000 0.963197 0.000000 +AWS Client ID 167 21 0 0 0 21 167 0.000000 1.000000 0.111702 0.000000 +AWS Multi 75 16 0 0 0 16 75 0.000000 1.000000 0.175824 0.000000 +AWS S3 Bucket 66 24 0 0 0 24 66 0.000000 1.000000 0.266667 0.000000 +Atlassian Old PAT token 27 308 3 0 0 311 27 0.000000 1.000000 0.920118 0.000000 +Auth 418 2727 76 0 0 2803 418 0.000000 1.000000 0.870227 0.000000 +Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 +BASE64 Private Key 7 4 0 0 0 4 7 0.000000 1.000000 0.363636 0.000000 +BASE64 encoded PEM Private Key 7 0 0 0 0 0 7 1.000000 0.000000 0.000000 +Bitbucket Client ID 143 2097 9 0 0 2106 143 0.000000 1.000000 0.936416 0.000000 +Bitbucket Client Secret 301 809 10 0 0 819 301 0.000000 1.000000 0.731250 0.000000 +Certificate 23 471 1 0 0 472 23 0.000000 1.000000 0.953535 0.000000 +Credential 95 420 74 0 0 494 95 0.000000 1.000000 0.838710 0.000000 +Docker Swarm Token 2 0 0 0 0 0 2 1.000000 0.000000 0.000000 +Dropbox App secret 64 139 1 0 0 140 64 0.000000 1.000000 0.686275 0.000000 +Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 +Firebase Domain 6 1 0 0 0 1 6 0.000000 1.000000 0.142857 0.000000 +Github Old Token 1 0 0 0 0 0 1 1.000000 0.000000 0.000000 +Gitlab Feed Token 189 751 87 0 0 838 189 0.000000 1.000000 0.815969 0.000000 +Gitlab Incoming Email Token 37 8 0 0 0 8 37 0.000000 1.000000 0.177778 0.000000 +Google API Key 12 0 0 0 0 0 12 1.000000 0.000000 0.000000 +Google Multi 10 2 0 0 0 2 10 0.000000 1.000000 0.166667 0.000000 +Google OAuth Access Token 3 0 0 0 0 0 3 1.000000 0.000000 0.000000 +Grafana Provisioned API Key 22 1 0 0 0 1 22 0.000000 1.000000 0.043478 0.000000 +JSON Web Token 170 61 0 0 0 61 170 0.000000 1.000000 0.264069 0.000000 +Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 +Jira 2FA 15 6 1 0 0 7 15 0.000000 1.000000 0.318182 0.000000 +Key 3918 15693 482 0 0 16175 3918 0.000000 1.000000 0.805007 0.000000 +Nonce 91 49 0 0 0 49 91 0.000000 1.000000 0.350000 0.000000 +Other 0 8291 1 0 0 8292 0 0.000000 1.000000 +PEM Private Key 1019 1483 0 0 0 1483 1019 0.000000 1.000000 0.592726 0.000000 +Password 1843 7526 2712 0 0 10238 1843 0.000000 1.000000 0.847446 0.000000 +Salt 45 76 2 0 0 78 45 0.000000 1.000000 0.634146 0.000000 +Secret 1297 1575 799 0 0 2374 1297 0.000000 1.000000 0.646690 0.000000 +Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 +Slack Token 4 1 0 0 0 1 4 0.000000 1.000000 0.200000 0.000000 +Token 648 4177 438 0 0 4615 648 0.000000 1.000000 0.876876 0.000000 +Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 +URL Credentials 208 145 225 0 0 370 208 0.000000 1.000000 0.640138 0.000000 +UUID 1069 265 0 0 0 265 1069 0.000000 1.000000 0.198651 0.000000 + 12150 50324 5112 0 0 0 50324 12150 0.000000 1.000000 0.805519 0.000000 diff --git a/.ci/empty_report.json b/.ci/empty_report.json new file mode 100644 index 000000000..fe51488c7 --- /dev/null +++ b/.ci/empty_report.json @@ -0,0 +1 @@ +[] diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml index d5acd676c..940616478 100644 --- a/.github/workflows/review.yml +++ b/.github/workflows/review.yml @@ -68,6 +68,12 @@ jobs: python review_data.py &>review_head.txt ansi2html --style 'pre {font-family: monospace; font-size: large}' review_head.html + - name: Produce benchmark scores from empty report to check markup only + if: steps.cache-data.outputs.cache-hit != 'true' + run: | + python -m benchmark --scanner credsweeper --load .ci/empty_report.json >benchmark.txt + diff -U3 .ci/benchmark.txt benchmark.txt + - name: Upload artifact if: always() uses: actions/upload-artifact@v4 @@ -76,6 +82,7 @@ jobs: path: | review_head.txt review_head.html + benchmark.txt # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # From 4ad15cb5fbb22d9eb920ace6b9bef068ac381e97 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 19 Aug 2024 10:13:14 +0300 Subject: [PATCH 3/4] touch meta for restart cache --- meta/c41bb134.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meta/c41bb134.csv b/meta/c41bb134.csv index 5afd0a5ba..26ea9d82b 100644 --- a/meta/c41bb134.csv +++ b/meta/c41bb134.csv @@ -6,7 +6,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 2503,8a3fd767,GitHub,c41bb134,data/c41bb134/test/8a3fd767.config,2,2,F,F,,,F,F,,,,,0,0,F,F,F,Key 3878,e99d6a11,GitHub,c41bb134,data/c41bb134/src/e99d6a11.nix,152,152,F,F,,,F,F,,,,,0,0,F,F,F,Password 4323,68cbce99,GitHub,c41bb134,data/c41bb134/test/68cbce99.hs,54,54,F,F,,,F,F,,,,,0,0,F,F,F,API -5571,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,231,231,Template,T,23,27,F,F,CharOnly,,,Secret,1.5,4,F,F,F,Password +5571,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,231,231,F,T,,,F,F,CharOnly,,,Secret,1.5,4,F,F,F,Password 5620,4460568d,GitHub,c41bb134,data/c41bb134/src/4460568d.hs,47,47,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key 7252,1abbf729,GitHub,c41bb134,data/c41bb134/test/1abbf729.sql,2038,2038,F,F,,,F,F,,,,,0,0,F,F,F,Auth 7839,328c2f31,GitHub,c41bb134,data/c41bb134/src/328c2f31.hs,294,294,F,F,,,F,F,,,,,0,0,F,F,F,API From 78b030e070c20dd10f5ddbff5a0ca33b217627d0 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 19 Aug 2024 10:27:52 +0300 Subject: [PATCH 4/4] CI BM fix --- .ci/benchmark.txt | 10 +++++----- .github/workflows/review.yml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 97bd04488..9baa23f4f 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,4 +1,4 @@ -META MD5 46ed058cc55cc6e05b84e1b62d506059 +META MD5 67039fe64aba3375bbcf27f16984acc5 DATA MD5 4833f5614e463ecc7989b00a29499240 DATA: 16345157 interested lines. MARKUP: 62644 items FileType FileNumber ValidLines Positives Negatives Templates @@ -184,7 +184,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .snap 3 1708 9 30 2 .spec 2 332 2 .spin 1 565 1 -.sql 27 6606 126 56 4 +.sql 27 6606 126 57 3 .storyboard 20 1802 341 .strings 20 1240 137 .stub 3 84 6 @@ -222,7 +222,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36162 545 892 380 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10259 16345157 12150 50324 5112 +TOTAL: 10259 16345157 12150 50325 5111 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- @@ -257,7 +257,7 @@ Key 3918 15693 482 Nonce 91 49 0 0 0 49 91 0.000000 1.000000 0.350000 0.000000 Other 0 8291 1 0 0 8292 0 0.000000 1.000000 PEM Private Key 1019 1483 0 0 0 1483 1019 0.000000 1.000000 0.592726 0.000000 -Password 1843 7526 2712 0 0 10238 1843 0.000000 1.000000 0.847446 0.000000 +Password 1843 7527 2711 0 0 10238 1843 0.000000 1.000000 0.847446 0.000000 Salt 45 76 2 0 0 78 45 0.000000 1.000000 0.634146 0.000000 Secret 1297 1575 799 0 0 2374 1297 0.000000 1.000000 0.646690 0.000000 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 @@ -266,4 +266,4 @@ Token 648 4177 438 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 208 145 225 0 0 370 208 0.000000 1.000000 0.640138 0.000000 UUID 1069 265 0 0 0 265 1069 0.000000 1.000000 0.198651 0.000000 - 12150 50324 5112 0 0 0 50324 12150 0.000000 1.000000 0.805519 0.000000 + 12150 50325 5111 0 0 0 50325 12150 0.000000 1.000000 0.805522 0.000000 diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml index 940616478..923bce564 100644 --- a/.github/workflows/review.yml +++ b/.github/workflows/review.yml @@ -72,7 +72,7 @@ jobs: if: steps.cache-data.outputs.cache-hit != 'true' run: | python -m benchmark --scanner credsweeper --load .ci/empty_report.json >benchmark.txt - diff -U3 .ci/benchmark.txt benchmark.txt + diff --unified=3 --ignore-all-space --ignore-blank-lines .ci/benchmark.txt benchmark.txt - name: Upload artifact if: always()