diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 3755f589f..e10f71b19 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -160,14 +160,14 @@ jobs: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # performance_benchmark: - # put the benchmark in single job to keep constant environment during test + # put the benchmark in single job to keep constant environment during test python 3.8 is not applicable needs: [ download_data ] runs-on: ubuntu-latest strategy: fail-fast: false matrix: - python-version: [ "3.10", "3.9", "3.10", "3.11" ] + python-version: [ "3.9", "3.10", "3.11" ] steps: diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 24b90f466..0eef1bd67 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -1,4 +1,4 @@ -DATA: 16988575 interested lines. MARKUP: 62853 items +DATA: 16988573 interested lines. MARKUP: 62864 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 65 430 89 @@ -54,14 +54,14 @@ FileType FileNumber ValidLines Positives Negatives Templat .erl 4 96 8 .ex 25 4968 3 105 5 .example 17 1838 74 35 55 -.exs 24 4842 3 187 4 +.exs 24 4842 3 188 4 .ext 5 211 1 4 2 .fsproj 1 75 1 .g4 2 201 2 .gd 1 37 1 .gml 3 3075 26 .gni 3 5017 18 -.go 1084 569469 661 4330 742 +.go 1084 569469 661 4344 742 .golden 5 1168 1 14 29 .gradle 45 3265 4 91 100 .graphql 8 445 1 13 @@ -83,7 +83,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .java 621 134132 328 1341 170 .jenkinsfile 1 58 1 7 .jinja2 1 64 2 -.js 659 536413 541 2630 336 +.js 659 536413 541 2631 336 .json 861 13670751 914 10970 143 .jsp 13 3202 1 42 .jsx 7 857 19 @@ -107,11 +107,10 @@ FileType FileNumber ValidLines Positives Negatives Templat .lua 10 1924 3 37 3 .m 16 13358 8 152 3 .manifest 3 102 3 -.map 2 2 2 .markdown 3 139 3 1 .markerb 3 12 3 .marko 1 21 2 -.md 679 149755 784 2563 671 +.md 679 149755 784 2565 671 .mdx 3 549 7 .mjml 1 18 1 .mjs 22 4424 108 310 @@ -223,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36162 467 917 384 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10335 16988575 8365 60294 5233 -credsweeper result_cnt : 7754, lost_cnt : 0, true_cnt : 7202, false_cnt : 552 +TOTAL: 10333 16988573 8365 60310 5233 +credsweeper result_cnt : 7771, lost_cnt : 0, true_cnt : 7202, false_cnt : 569 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- -------- API 123 3106 185 112 109 3 3288 14 0.000912 0.113821 0.995021 0.973214 0.886179 0.927660 @@ -232,7 +231,7 @@ AWS Client ID 168 13 0 16 AWS Multi 75 12 0 87 75 11 1 0 0.916667 0.000000 0.873563 0.872093 1.000000 0.931677 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 212 3 12 3 8 207 24 0.037209 0.888889 0.867769 0.272727 0.111111 0.157895 -Auth 404 2745 77 369 349 20 2802 55 0.007087 0.136139 0.976751 0.945799 0.863861 0.902975 +Auth 404 2746 77 370 349 21 2802 55 0.007439 0.136139 0.976449 0.943243 0.863861 0.901809 Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 @@ -257,15 +256,15 @@ JSON Web Token 284 11 2 27 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 14 6 0 10 10 0 6 4 0.000000 0.285714 0.800000 1.000000 0.714286 0.833333 Key 483 7844 464 444 435 9 8299 48 0.001083 0.099379 0.993516 0.979730 0.900621 0.938511 -Nonce 83 52 0 84 79 5 47 4 0.096154 0.048193 0.933333 0.940476 0.951807 0.946108 +Nonce 83 53 0 85 79 6 47 4 0.113208 0.048193 0.926471 0.929412 0.951807 0.940476 Other 0 0 5 0 0 5 0 0.000000 1.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1836 7434 2754 1665 1613 52 10136 223 0.005104 0.121460 0.977129 0.968769 0.878540 0.921451 +Password 1836 7450 2754 1680 1613 67 10137 223 0.006566 0.121460 0.975914 0.960119 0.878540 0.917520 Salt 42 72 2 38 38 0 74 4 0.000000 0.095238 0.965517 1.000000 0.904762 0.950000 -Secret 1358 29107 868 1234 1229 5 29970 129 0.000167 0.094993 0.995723 0.995948 0.905007 0.948302 +Secret 1358 29105 868 1234 1229 5 29968 129 0.000167 0.094993 0.995723 0.995948 0.905007 0.948302 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 Token 585 3950 439 519 511 8 4381 74 0.001823 0.126496 0.983514 0.984586 0.873504 0.925725 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 172 122 250 162 162 0 372 10 0.000000 0.058140 0.981618 1.000000 0.941860 0.970060 - 8365 60294 5233 7896 7202 552 59742 1163 0.009155 0.139032 0.975021 0.928811 0.860968 0.893604 + 8365 60310 5233 7913 7202 569 59741 1163 0.009435 0.139032 0.974780 0.926779 0.860968 0.892662 diff --git a/credsweeper/app.py b/credsweeper/app.py index d96b1f673..9035adcd7 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -47,7 +47,7 @@ def __init__(self, sort_output: bool = False, use_filters: bool = True, pool_count: int = 1, - ml_batch_size: Optional[int] = 16, + ml_batch_size: Optional[int] = None, ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium, azure: bool = False, cuda: bool = False, @@ -107,7 +107,7 @@ def __init__(self, self.json_filename: Union[None, str, Path] = json_filename self.xlsx_filename: Union[None, str, Path] = xlsx_filename self.sort_output = sort_output - self.ml_batch_size = ml_batch_size + self.ml_batch_size = ml_batch_size if ml_batch_size and 0 < ml_batch_size else 16 self.ml_threshold = ml_threshold self.azure = azure self.cuda = cuda diff --git a/credsweeper/common/constants.py b/credsweeper/common/constants.py index 05ab5d8bf..39166c68c 100644 --- a/credsweeper/common/constants.py +++ b/credsweeper/common/constants.py @@ -17,8 +17,10 @@ class KeywordPattern: # Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential value = r"(?P((b|r|br|rb|u|f|rf|fr|\\{0,8})?[`'\"]){1,4})?" \ r"( ?(oauth|bot|basic|bearer|apikey|accesskey) )?" \ - r"(?P(?:\{[^}]{3,8000}\})|(?:<[^>]{3,8000}>)|" \ - r"(?(value_leftquote)(?:\\[tnrux0-7][0-9a-f]*|[^`'\"\\])|(?:\\n|\\r|\\?[^\s`'\"\\,;])){3,8000})" \ + r"(?P" \ + r"(?(value_leftquote)(?:\\[tnrux0-7][0-9a-f]*|[^`'\"\\])|(?:\\n|\\r|\\?[^\s`'\"\\,;])){3,8000}" \ + r"|(?:\{[^}]{3,8000}\})|(?:<[^>]{3,8000}>)" \ + r")" \ r"(?(value_leftquote)(?P(\\{0,8}[`'\"]){1,4})?)" @classmethod diff --git a/credsweeper/filters/__init__.py b/credsweeper/filters/__init__.py index c0fa42640..66de37b97 100644 --- a/credsweeper/filters/__init__.py +++ b/credsweeper/filters/__init__.py @@ -14,6 +14,7 @@ from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck from credsweeper.filters.value_dictionary_keyword_check import ValueDictionaryKeywordCheck from credsweeper.filters.value_dictionary_value_length_check import ValueDictionaryValueLengthCheck +from credsweeper.filters.value_discord_bot_check import ValueDiscordBotCheck from credsweeper.filters.value_entropy_base32_check import ValueEntropyBase32Check from credsweeper.filters.value_entropy_base36_check import ValueEntropyBase36Check from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check @@ -21,6 +22,7 @@ from credsweeper.filters.value_first_word_check import ValueFirstWordCheck from credsweeper.filters.value_github_check import ValueGitHubCheck from credsweeper.filters.value_grafana_check import ValueGrafanaCheck +from credsweeper.filters.value_grafana_service_check import ValueGrafanaServiceCheck from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck from credsweeper.filters.value_ip_check import ValueIPCheck from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck diff --git a/credsweeper/filters/value_array_dictionary_check.py b/credsweeper/filters/value_array_dictionary_check.py index 369c3ec9c..4aa9ced0c 100644 --- a/credsweeper/filters/value_array_dictionary_check.py +++ b/credsweeper/filters/value_array_dictionary_check.py @@ -30,7 +30,8 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: True, if need to filter candidate and False if left """ - + if line_data.is_well_quoted_value: + return False if self.PATTERN.search(line_data.value): return True diff --git a/credsweeper/filters/value_discord_bot_check.py b/credsweeper/filters/value_discord_bot_check.py new file mode 100644 index 000000000..7d2c711d1 --- /dev/null +++ b/credsweeper/filters/value_discord_bot_check.py @@ -0,0 +1,31 @@ +import contextlib + +from credsweeper.config import Config +from credsweeper.credentials import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters import Filter +from credsweeper.utils import Util + + +class ValueDiscordBotCheck(Filter): + """Discord bot Token""" + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received token which might be structured. + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + True, when need to filter candidate and False if left + + """ + with contextlib.suppress(Exception): + parts = line_data.value.split('.') + if int(Util.decode_base64(parts[0], padding_safe=True, urlsafe_detect=True)): + return False + return True diff --git a/credsweeper/filters/value_grafana_service_check.py b/credsweeper/filters/value_grafana_service_check.py new file mode 100644 index 000000000..d6389fc2e --- /dev/null +++ b/credsweeper/filters/value_grafana_service_check.py @@ -0,0 +1,35 @@ +import binascii +import contextlib +import struct + +from credsweeper.common.constants import ASCII +from credsweeper.config import Config +from credsweeper.credentials import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters import Filter + + +class ValueGrafanaServiceCheck(Filter): + """Check that candidate have a known structure""" + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received token which might be structured. + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + True, if need to filter candidate and False if left + + """ + with contextlib.suppress(Exception): + checksum = struct.unpack("EAAC[0-9A-Za-z]{27,80}) + - (?EAA[0-9A-Za-z]{80,800}) filter_type: GeneralPattern required_substrings: - - EAAC - min_line_len: 31 + - EAA + min_line_len: 80 + target: + - code + - doc + +- name: Facebook App Token + severity: high + confidence: moderate + type: pattern + values: + - (?[0-9]{12,18}\|[0-9A-Za-z_-]{24,28})(?![=0-9A-Za-z_+-]) + filter_type: GeneralPattern + required_substrings: + - "|" + required_regex: "[0-9A-Za-z_/+-]{15}" + min_line_len: 33 target: - code - doc @@ -246,7 +261,7 @@ confidence: moderate type: pattern values: - - (?i)((git)[0-9A-Za-z_-]{0,80}(token|key|api)[0-9A-Za-z_-]{0,80}(\s)*(=|:|:=)(\s)*(["']?)(?P[a-z|\d]{40})(["']?)) + - (?i)((git)[0-9A-Za-z_-]{0,80}(token|key|api)[0-9A-Za-z_-]{0,80}(\s)*(=|:|:=)(\s)*(["']?)(?P[0-9a-z]{40})(["']?)) filter_type: GeneralPattern use_ml: true validations: @@ -644,7 +659,7 @@ confidence: moderate type: pattern values: - - (?SK[0-9a-fA-F]{32})(?![=0-9A-Za-z_+-]) + - (?SK[0-9A-Fa-f]{32})(?![=0-9A-Za-z_+-]) filter_type: GeneralPattern required_substrings: - SK @@ -1196,10 +1211,15 @@ confidence: strong type: pattern values: - - (?sk-[0-9A-Za-z_-]{20}T3BlbkFJ[0-9A-Za-z_-]{20})(?![=0-9A-Za-z_/+-]) + - (?sk-[0-9A-Za-z_-]{16,32}(T3BlbkFJ|9wZW5BS|PcGVuQU)[0-9A-Za-z_-]{16,32})(?![=0-9A-Za-z_/+-]) min_line_len: 51 + filter_type: + - ValuePatternCheck + - ValueEntropyBase64Check required_substrings: - T3BlbkFJ + - 9wZW5BS + - PcGVuQU target: - code - doc @@ -1219,3 +1239,35 @@ - code - doc +- name: Discord Bot Token + severity: high + confidence: strong + type: pattern + values: + - (?[NMO][ADgjQTwz][a-zA-Z0-9_-]{22,26}\.[a-zA-Z0-9_-]{6}\.[a-zA-Z0-9_-]{30,40})(?![0-9A-Za-z_/+-]) + min_line_len: 62 + filter_type: + - ValueDiscordBotCheck + required_substrings: + - M + - N + - O + required_regex: "[0-9A-Za-z_/+-]{15}" + target: + - code + - doc + +- name: Grafana Service Account Token + severity: high + confidence: strong + type: pattern + values: + - (?glsa_[0-9A-Za-z_-]{32}_[0-9A-Fa-f]{8})(?![=0-9A-Za-z_+-]) + min_line_len: 46 + filter_type: + - ValueGrafanaServiceCheck + required_substrings: + - glsa_ + target: + - code + - doc diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json index 4e7c80e32..1a4b904e3 100644 --- a/credsweeper/secret/config.json +++ b/credsweeper/secret/config.json @@ -32,6 +32,7 @@ ".jar", ".jpeg", ".jpg", + ".map", ".m4a", ".mat", ".mo", @@ -47,6 +48,8 @@ ".pyc", ".pyd", ".pyo", + ".rc", + ".rc2", ".rar", ".realm", ".s7z", @@ -58,6 +61,7 @@ ".tiff", ".ttf", ".vcxproj", + ".vdproj", ".war", ".wav", ".webm", diff --git a/tests/__init__.py b/tests/__init__.py index 4b0e12ce6..8c657c9bc 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,20 +1,20 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 128 +SAMPLES_FILES_COUNT: int = 130 # the lowest value of ML threshold is used to display possible lowest values NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan -SAMPLES_CRED_COUNT: int = 421 -SAMPLES_CRED_LINE_COUNT: int = 438 +SAMPLES_CRED_COUNT: int = 425 +SAMPLES_CRED_LINE_COUNT: int = 442 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 379 +SAMPLES_POST_CRED_COUNT: int = 383 # with option --doc -SAMPLES_IN_DOC = 404 +SAMPLES_IN_DOC = 407 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 25 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 118dc700c..b94ef2521 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -1127,6 +1127,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Discord Bot Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "line_num": 1, + "path": "tests/samples/discord_bot_token", + "info": "tests/samples/discord_bot_token|RAW", + "value": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "value_start": 0, + "value_end": 72, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.731746181697384, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -7643,19 +7670,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "tests/samples/facebook_key|RAW", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } @@ -7664,25 +7691,52 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.84, + "ml_probability": 0.999, "rule": "Token", "severity": "medium", "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "tests/samples/facebook_key|RAW", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": "GI_REO_GI_FACEBOOK_TOKEN", "variable_start": 0, "variable_end": 24, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Facebook App Token", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "line_num": 2, + "path": "tests/samples/facebook_key", + "info": "tests/samples/facebook_key|RAW", + "value": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "value_start": 0, + "value_end": 44, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.2089099270924217, "valid": true } } @@ -8262,6 +8316,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Grafana Service Account Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "line_num": 1, + "path": "tests/samples/grafana_service_accounts", + "info": "tests/samples/grafana_service_accounts|RAW", + "value": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "value_start": 0, + "value_end": 46, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.52211252299684, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9304,11 +9385,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "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", + "line": "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", "line_num": 2, "path": "tests/samples/open_ai_token", "info": "tests/samples/open_ai_token|RAW", - "value": "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", + "value": "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", "value_start": 0, "value_end": 51, "variable": null, @@ -9316,8 +9397,35 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.047085443409471, - "valid": false + "entropy": 5.115027050910027, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "OpenAI Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "line_num": 3, + "path": "tests/samples/open_ai_token", + "info": "tests/samples/open_ai_token|RAW", + "value": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "value_start": 0, + "value_end": 56, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.086469255159772, + "valid": true } } ] @@ -12062,19 +12170,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "the line will be found twice # 100 EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "line": "the line will be found twice # 100 EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "line_num": 97, "path": "tests/samples/test.html", "info": "tests/samples/test.html|HTML", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 35, - "value_end": 77, + "value_end": 122, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } @@ -12089,19 +12197,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "the line will be found twice # 100 EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "line": "the line will be found twice # 100 EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "line_num": 100, "path": "tests/samples/test.html", "info": "tests/samples/test.html|HTML", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 35, - "value_end": 77, + "value_end": 122, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } diff --git a/tests/data/doc.json b/tests/data/doc.json index f24a04787..ab8b856c2 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -654,6 +654,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Discord Bot Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "line_num": 1, + "path": "tests/samples/discord_bot_token", + "info": "tests/samples/discord_bot_token|RAW", + "value": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "value_start": 0, + "value_end": 72, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.731746181697384, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -10550,19 +10577,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "tests/samples/facebook_key|RAW", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } @@ -10572,24 +10599,24 @@ "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", "ml_probability": null, - "rule": "SECRET_PAIR", - "severity": "medium", + "rule": "Facebook App Token", + "severity": "high", "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", - "line_num": 1, + "line": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "line_num": 2, "path": "tests/samples/facebook_key", "info": "tests/samples/facebook_key|RAW", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", - "value_start": 28, - "value_end": 70, - "variable": "TOKEN", - "variable_start": 19, - "variable_end": 24, + "value": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "value_start": 0, + "value_end": 44, + "variable": null, + "variable_start": -2, + "variable_end": -2, "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "iterator": "BASE36_CHARS", + "entropy": 3.2089099270924217, "valid": true } } @@ -11115,6 +11142,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Grafana Service Account Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "line_num": 1, + "path": "tests/samples/grafana_service_accounts", + "info": "tests/samples/grafana_service_accounts|RAW", + "value": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "value_start": 0, + "value_end": 46, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.52211252299684, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11590,11 +11644,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "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", + "line": "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", "line_num": 2, "path": "tests/samples/open_ai_token", "info": "tests/samples/open_ai_token|RAW", - "value": "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", + "value": "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", "value_start": 0, "value_end": 51, "variable": null, @@ -11602,8 +11656,35 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.047085443409471, - "valid": false + "entropy": 5.115027050910027, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "OpenAI Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "line_num": 3, + "path": "tests/samples/open_ai_token", + "info": "tests/samples/open_ai_token|RAW", + "value": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "value_start": 0, + "value_end": 56, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.086469255159772, + "valid": true } } ] @@ -12695,19 +12776,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "the line will be found twice # 100 EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "line": "the line will be found twice # 100 EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "line_num": 97, "path": "tests/samples/test.html", "info": "tests/samples/test.html|HTML", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 35, - "value_end": 77, + "value_end": 122, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index 73074c839..0a74fb0f2 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -978,6 +978,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Discord Bot Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "line_num": 1, + "path": "tests/samples/discord_bot_token", + "info": "", + "value": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "value_start": 0, + "value_end": 72, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.731746181697384, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8277,19 +8304,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } @@ -8298,25 +8325,52 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.84, + "ml_probability": 0.999, "rule": "Token", "severity": "medium", "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": "GI_REO_GI_FACEBOOK_TOKEN", "variable_start": 0, "variable_end": 24, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Facebook App Token", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "line_num": 2, + "path": "tests/samples/facebook_key", + "info": "", + "value": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "value_start": 0, + "value_end": 44, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.2089099270924217, "valid": true } } @@ -8896,6 +8950,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Grafana Service Account Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "line_num": 1, + "path": "tests/samples/grafana_service_accounts", + "info": "", + "value": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "value_start": 0, + "value_end": 46, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.52211252299684, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9911,11 +9992,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "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", + "line": "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", "line_num": 2, "path": "tests/samples/open_ai_token", "info": "", - "value": "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", + "value": "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", "value_start": 0, "value_end": 51, "variable": null, @@ -9923,8 +10004,35 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.047085443409471, - "valid": false + "entropy": 5.115027050910027, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "OpenAI Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "line_num": 3, + "path": "tests/samples/open_ai_token", + "info": "", + "value": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "value_start": 0, + "value_end": 56, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.086469255159772, + "valid": true } } ] diff --git a/tests/data/output.json b/tests/data/output.json index d7e013641..3da19895c 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -924,6 +924,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Discord Bot Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "line_num": 1, + "path": "tests/samples/discord_bot_token", + "info": "", + "value": "MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje", + "value_start": 0, + "value_end": 72, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.731746181697384, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -7386,19 +7413,19 @@ "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, "valid": true } } @@ -7407,25 +7434,52 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.84, + "ml_probability": 0.999, "rule": "Token", "severity": "medium", "confidence": "moderate", "line_data_list": [ { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", + "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD\"", "line_num": 1, "path": "tests/samples/facebook_key", "info": "", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "value": "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "value_start": 28, - "value_end": 70, + "value_end": 115, "variable": "GI_REO_GI_FACEBOOK_TOKEN", "variable_start": 0, "variable_end": 24, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, + "entropy": 4.936120692057916, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Facebook App Token", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "line_num": 2, + "path": "tests/samples/facebook_key", + "info": "", + "value": "1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M", + "value_start": 0, + "value_end": 44, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.2089099270924217, "valid": true } } @@ -8005,6 +8059,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Grafana Service Account Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "line_num": 1, + "path": "tests/samples/grafana_service_accounts", + "info": "", + "value": "glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7", + "value_start": 0, + "value_end": 46, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.52211252299684, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -8993,11 +9074,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "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", + "line": "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", "line_num": 2, "path": "tests/samples/open_ai_token", "info": "", - "value": "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", + "value": "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", "value_start": 0, "value_end": 51, "variable": null, @@ -9005,8 +9086,35 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.047085443409471, - "valid": false + "entropy": 5.115027050910027, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "OpenAI Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "line_num": 3, + "path": "tests/samples/open_ai_token", + "info": "", + "value": "sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio", + "value_start": 0, + "value_end": 56, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.086469255159772, + "valid": true } } ] diff --git a/tests/filters/test_value_grafana_service_check.py b/tests/filters/test_value_grafana_service_check.py new file mode 100644 index 000000000..16eee9454 --- /dev/null +++ b/tests/filters/test_value_grafana_service_check.py @@ -0,0 +1,18 @@ +import pytest + +from credsweeper.filters import ValueGrafanaServiceCheck +from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET +from tests.test_utils.dummy_line_data import get_line_data + + +class TestValueGrafanaServiceCheck: + + @pytest.mark.parametrize("line", ["glsa_DuMmY-T0K3N-f0R-tHe-Te5t-CRC32Ok_770c8cda"]) + def test_value_sgrafana_service_check_p(self, file_path: pytest.fixture, line: str) -> None: + glsa_line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) + assert ValueGrafanaServiceCheck().run(glsa_line_data, DUMMY_ANALYSIS_TARGET) is False + + @pytest.mark.parametrize("line", ["glpl_DuMmY-T0K3N-f0R-tHe-Te5t-CRC32Ok_770c8CdA"]) + def test_value_sgrafana_service_check_n(self, file_path: pytest.fixture, line: str) -> None: + glsa_line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) + assert ValueGrafanaServiceCheck().run(glsa_line_data, DUMMY_ANALYSIS_TARGET) is True diff --git a/tests/rules/test_facebook_key.py b/tests/rules/test_facebook_key.py index b17666b26..590bed816 100644 --- a/tests/rules/test_facebook_key.py +++ b/tests/rules/test_facebook_key.py @@ -7,7 +7,10 @@ class TestFacebookKey(BaseTestRule): - @pytest.fixture(params=[["GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAAaBbCcDdEeCrackle\""]]) + @pytest.fixture(params=[[ + 'FACEBOOK_T0KEN = ' + '"EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD"' + ]]) def lines(self, request) -> List[str]: return request.param diff --git a/tests/samples/discord_bot_token b/tests/samples/discord_bot_token new file mode 100644 index 000000000..333db236f --- /dev/null +++ b/tests/samples/discord_bot_token @@ -0,0 +1 @@ +MTIzNDU2Nzg5MDEyMzQ1Njc4OQ.E2-E4_.Zig9V5mpMk-JybgCFvqSfgY9EoqWjkA5O_qDje diff --git a/tests/samples/facebook_key b/tests/samples/facebook_key index 47e94fbaf..6a949eb3d 100644 --- a/tests/samples/facebook_key +++ b/tests/samples/facebook_key @@ -1 +1,2 @@ -GI_REO_GI_FACEBOOK_TOKEN = "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF" \ No newline at end of file +GI_REO_GI_FACEBOOK_TOKEN = "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD" +1527194624358273|qbBf2-fdB9zZpqLA0_2nNzZDw2M diff --git a/tests/samples/grafana_service_accounts b/tests/samples/grafana_service_accounts new file mode 100644 index 000000000..e2808200b --- /dev/null +++ b/tests/samples/grafana_service_accounts @@ -0,0 +1,2 @@ +glsa_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7 +glpl_ThisI5NtTheTok3nYou8reLo0k1ngF0r_0a2a3df7 diff --git a/tests/samples/open_ai_token b/tests/samples/open_ai_token index e7fad68ed..ad934ce1c 100644 --- a/tests/samples/open_ai_token +++ b/tests/samples/open_ai_token @@ -1,5 +1,7 @@ # TP -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 +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 +sk-proj-qa25MV9c7Qu0EjDIEWdcT3BlbkFJ83uCF0K4yw7RzpY39bio # FP -sk-T3BlbkFJT3BlbkFJT3BlbkFJT3BlbkFJT3BlbkFJT3BlbkFJ \ No newline at end of file +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 +sk-proj-asdfgasdfasdfdcQbzdcT3BlbkFJ83uCasdfgasdfgasdfjk diff --git a/tests/samples/test.html b/tests/samples/test.html index ed6d823ac..d07d0420f 100644 --- a/tests/samples/test.html +++ b/tests/samples/test.html @@ -97,7 +97,7 @@ - the line will be found twice
# 100

EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF

+ the line will be found twice
# 100

EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD

diff --git a/tests/test_main.py b/tests/test_main.py index 68c599b33..8e390ade6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -161,6 +161,7 @@ def test_main_path_p(self, mock_get_arguments) -> None: rule_path=None, jobs=1, ml_threshold=0.0, + ml_batch_size=1, depth=0, doc=False, severity="info", @@ -193,6 +194,7 @@ def test_binary_patch_p(self, mock_get_arguments) -> None: rule_path=None, jobs=1, ml_threshold=0.0, + ml_batch_size=1, depth=9, doc=False, severity="info", @@ -612,8 +614,10 @@ def test_html_p(self) -> None: "# 95 dop_v1_425522a565f532bc6532d453422e50334a42f5242a3090fbe553b543b124259b", "# 94 ya29.dshMb48ehfXwydAj34D32J", "# 95 dop_v1_425522a565f532bc6532d453422e50334a42f5242a3090fbe553b543b124259b", - "the line will be found twice # 100 EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", - "the line will be found twice # 100 EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "the line will be found twice # 100" + " EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", + "the line will be found twice # 100" + " EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", ] self.assertEqual(len(expected_credential_lines), len(found_credentials)) for cred in found_credentials: @@ -678,7 +682,7 @@ def test_doc_p(self) -> None: "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0", "ya29.dshMb48ehfXwydAj34D32J", "dop_v1_425522a565f532bc6532d453422e50334a42f5242a3090fbe553b543b124259b", - "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", + "EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eose0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", "MU$T6Ef09#D!", } self.assertSetEqual(expected_credential_values, set(x.line_data_list[0].value for x in found_credentials))