From c75afe55798aa5d17d397f8bfb427c3da4f77f61 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Tue, 6 Aug 2024 14:23:22 +0300 Subject: [PATCH 01/11] jwt --- cicd/benchmark.txt | 15 +- credsweeper/file_handler/descriptor.py | 8 +- credsweeper/filters/__init__.py | 1 + .../filters/value_azure_token_check.py | 52 +++++ .../filters/value_entropy_base64_check.py | 12 +- .../filters/value_json_web_token_check.py | 46 +++- credsweeper/rules/config.yaml | 13 +- docs/source/credsweeper.filters.rst | 48 +++++ experiment/src/entropy_test.py | 111 +++++----- tests/__init__.py | 10 +- tests/data/depth_3.json | 197 ++++++------------ tests/data/doc.json | 81 +++++++ tests/data/ml_threshold.json | 197 ++++++------------ tests/data/output.json | 170 ++++++--------- tests/filters/test_value_azure_token_check.py | 26 +++ tests/filters/test_value_file_path_check.py | 2 +- .../test_value_json_web_token_check.py | 40 ++-- tests/samples/azure_access_token | 4 +- tests/samples/json_web_token | 2 + tests/samples/json_web_token.hs | 1 - 20 files changed, 536 insertions(+), 500 deletions(-) create mode 100644 credsweeper/filters/value_azure_token_check.py create mode 100644 tests/filters/test_value_azure_token_check.py create mode 100644 tests/samples/json_web_token delete mode 100644 tests/samples/json_web_token.hs diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index d75bff0b3..6203f80c7 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -231,7 +231,7 @@ AWS Client ID 168 13 0 16 AWS Multi 75 12 0 87 75 11 1 0 0.916667 0.000000 0.873563 0.872093 1.000000 0.931677 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 212 3 12 3 8 207 24 0.037209 0.888889 0.867769 0.272727 0.111111 0.157895 -Auth 407 2725 77 372 351 21 2781 56 0.007495 0.137592 0.976005 0.943548 0.862408 0.901155 +Auth 319 2749 86 294 275 19 2816 44 0.006702 0.137931 0.980025 0.935374 0.862069 0.897227 Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 @@ -253,16 +253,15 @@ Grafana Provisioned API Key 22 1 0 JSON Web Token 284 11 2 274 271 3 10 13 0.230769 0.045775 0.946128 0.989051 0.954225 0.971326 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 14 6 0 10 10 0 6 4 0.000000 0.285714 0.800000 1.000000 0.714286 0.833333 -Key 483 8494 464 445 436 9 8949 47 0.001005 0.097308 0.994068 0.979775 0.902692 0.939655 -Nonce 83 53 0 85 79 6 47 4 0.113208 0.048193 0.926471 0.929412 0.951807 0.940476 -Other 0 0 5 0 0 5 0 0.000000 1.000000 +Key 462 7841 462 439 431 8 8295 31 0.000964 0.067100 0.995550 0.981777 0.932900 0.956715 +Nonce 79 53 0 84 76 8 45 3 0.150943 0.037975 0.916667 0.904762 0.962025 0.932515 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1823 7474 2752 1681 1614 67 10159 209 0.006552 0.114646 0.977094 0.960143 0.885354 0.921233 -Salt 42 76 2 38 38 0 78 4 0.000000 0.095238 0.966667 1.000000 0.904762 0.950000 -Secret 1358 28497 869 1234 1229 5 29361 129 0.000170 0.094993 0.995639 0.995948 0.905007 0.948302 +Password 1915 7417 2669 1603 1581 22 10064 334 0.002181 0.174413 0.970336 0.986276 0.825587 0.898806 +Salt 42 72 2 38 38 0 74 4 0.000000 0.095238 0.965517 1.000000 0.904762 0.950000 +Secret 1359 29629 870 1236 1231 5 30494 128 0.000164 0.094187 0.995825 0.995955 0.905813 0.948748 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 585 3972 439 519 511 8 4403 74 0.001814 0.126496 0.983587 0.984586 0.873504 0.925725 +Token 572 3959 448 523 504 19 4388 68 0.004311 0.118881 0.982527 0.963671 0.881119 0.920548 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 194 125 251 184 184 0 376 10 0.000000 0.051546 0.982456 1.000000 0.948454 0.973545 7615 59903 5233 6704 6470 227 59676 1145 0.003789 0.150361 0.979679 0.966104 0.849639 0.904136 diff --git a/credsweeper/file_handler/descriptor.py b/credsweeper/file_handler/descriptor.py index a4e534a7d..b8ae850b6 100644 --- a/credsweeper/file_handler/descriptor.py +++ b/credsweeper/file_handler/descriptor.py @@ -3,7 +3,13 @@ @dataclass(frozen=True) class Descriptor: - """Descriptor for file - optimize memory consumption""" + """Descriptor for file - optimize memory consumption + + Args: + path: file path + extension: file extension + info: info for deep scan + """ path: str extension: str info: str diff --git a/credsweeper/filters/__init__.py b/credsweeper/filters/__init__.py index f88bcac57..f050671a5 100644 --- a/credsweeper/filters/__init__.py +++ b/credsweeper/filters/__init__.py @@ -5,6 +5,7 @@ from credsweeper.filters.value_allowlist_check import ValueAllowlistCheck from credsweeper.filters.value_array_dictionary_check import ValueArrayDictionaryCheck from credsweeper.filters.value_atlassian_token_check import ValueAtlassianTokenCheck +from credsweeper.filters.value_azure_token_check import ValueAzureTokenCheck from credsweeper.filters.value_base32_data_check import ValueBase32DataCheck from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem diff --git a/credsweeper/filters/value_azure_token_check.py b/credsweeper/filters/value_azure_token_check.py new file mode 100644 index 000000000..d62293a54 --- /dev/null +++ b/credsweeper/filters/value_azure_token_check.py @@ -0,0 +1,52 @@ +import contextlib +import json + +from credsweeper.common.constants import Chars +from credsweeper.config import Config +from credsweeper.credentials import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters import Filter +from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check +from credsweeper.utils import Util + + +class ValueAzureTokenCheck(Filter): + """ + Azure tokens contains header, payload and signature + https://learn.microsoft.com/en-us/azure/active-directory-b2c/access-tokens + """ + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received token which might be structured. + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + True, when need to filter candidate and False if left + + """ + with contextlib.suppress(Exception): + parts = line_data.value.split('.') + if 3 != len(parts): + return True + hdr = Util.decode_base64(parts[0], padding_safe=True, urlsafe_detect=True) + header = json.loads(hdr) + if not ("alg" in header and "typ" in header and "kid" in header): + # must be all parts in header + return True + pld = Util.decode_base64(parts[1], padding_safe=True, urlsafe_detect=True) + payload = json.loads(pld) + if not ("iss" in payload and "exp" in payload and "iat" in payload): + # must be all parts in payload + return True + min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2])) + entropy = Util.get_shannon_entropy(parts[2], Chars.BASE64URL_CHARS.value) + # good signature has to be like random bytes + return entropy < min_entropy + + return True diff --git a/credsweeper/filters/value_entropy_base64_check.py b/credsweeper/filters/value_entropy_base64_check.py index f97741a0d..7fa5ff3fd 100644 --- a/credsweeper/filters/value_entropy_base64_check.py +++ b/credsweeper/filters/value_entropy_base64_check.py @@ -1,6 +1,6 @@ import math -from credsweeper.common.constants import Chars, ENTROPY_LIMIT_BASE64 +from credsweeper.common.constants import Chars from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget @@ -25,14 +25,12 @@ def get_min_data_entropy(x: int) -> float: y = 4.1 elif 32 == x: y = 4.4 - elif 12 <= x < 35: + elif 12 <= x < 32: # logarithm base 2 - slow, but precise. Approximation does not exceed stdev y = 0.77 * math.log2(x) + 0.62 - elif 35 <= x < 60: - y = ENTROPY_LIMIT_BASE64 - elif 60 <= x: - # the entropy grows slowly after 60 - y = 5.0 + elif 32 < x: + l2x = math.log2(x) + y = 0.001477 * l2x**4 - 0.036886 * l2x**3 + 0.244849 * l2x**2 + 0.318411 * l2x + 0.3932 else: y = 0 return y diff --git a/credsweeper/filters/value_json_web_token_check.py b/credsweeper/filters/value_json_web_token_check.py index 7f6048a80..8235c9b3e 100644 --- a/credsweeper/filters/value_json_web_token_check.py +++ b/credsweeper/filters/value_json_web_token_check.py @@ -1,10 +1,11 @@ import contextlib import json +from credsweeper.common.constants import Chars from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget -from credsweeper.filters import Filter +from credsweeper.filters import Filter, ValueEntropyBase64Check from credsweeper.utils import Util @@ -14,6 +15,8 @@ class ValueJsonWebTokenCheck(Filter): only header is parsed with "typ" or "alg" member from example of RFC7519 https://datatracker.ietf.org/doc/html/rfc7519 """ + header_keys = {"alg", "typ", "cty", "enc"} + payload_keys = {"iss", "sub", "aud", "exp", "nbf", "iat", "jti"} def __init__(self, config: Config = None) -> None: pass @@ -29,12 +32,37 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: True, when need to filter candidate and False if left """ + header_check = False + payload_check = False + signature_check = False with contextlib.suppress(Exception): - delimiter_pos = line_data.value.find(".") - # jwt token. '.' must be always in given data, according regex in rule - value = line_data.value[:delimiter_pos] - decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True) - if header := json.loads(decoded): - if "alg" in header or "typ" in header: - return False - return True + jwt_parts = line_data.value.split('.') + for part in jwt_parts: + if part.startswith("eyJ"): + # open part - just base64 encoded + json_keys = json.loads(Util.decode_base64(part, padding_safe=True, urlsafe_detect=True)).keys() + # header will be checked first + if not header_check: + if header_check := bool(ValueJsonWebTokenCheck.header_keys.intersection(json_keys)): + continue + else: + break + # payload follows the header + if not payload_check: + if payload_check := bool(ValueJsonWebTokenCheck.payload_keys.intersection(json_keys)): + continue + else: + break + # any other payloads are allowed + elif header_check and payload_check and not signature_check: + # signature check or skip encrypted part + min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(part)) + entropy = Util.get_shannon_entropy(part, Chars.BASE64URL_CHARS.value) + # good signature has to be like random bytes + signature_check = entropy > min_entropy + else: + break + if header_check and payload_check and signature_check: + return False + else: + return True diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index ce05d3b4b..e91f3bc7e 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -338,17 +338,18 @@ - name: JSON Web Token severity: medium - confidence: moderate + confidence: strong type: pattern values: - - (?eyJ[0-9A-Za-z_=-]{15,8000}([.0-9A-Za-z_=-]{1,8000})?) - filter_type: GeneralPattern - use_ml: true + - (?eyJ[0-9A-Za-z_+/=-]{15,8000}([.0-9A-Za-z_+/=-]{16,8000}){2,16}) + filter_type: + - ValueJsonWebTokenCheck required_substrings: - eyJ min_line_len: 18 target: - code + - doc - name: MailChimp API Key severity: high @@ -841,9 +842,9 @@ confidence: strong type: pattern values: - - (?eyJ[A-Za-z0-9_=-]{50,500}\.eyJ[A-Za-z0-9_=-]{1,8000}\.[A-Za-z0-9_=-]{1,8000}) + - (?eyJ[A-Za-z0-9_=-]{50,500}\.eyJ[A-Za-z0-9_=-]{8,8000}\.[A-Za-z0-9_=-]{18,800}) filter_type: - - ValueJsonWebTokenCheck + - ValueAzureTokenCheck required_substrings: - eyJ min_line_len: 148 diff --git a/docs/source/credsweeper.filters.rst b/docs/source/credsweeper.filters.rst index 7599043ae..e91a76a69 100644 --- a/docs/source/credsweeper.filters.rst +++ b/docs/source/credsweeper.filters.rst @@ -20,6 +20,14 @@ credsweeper.filters.filter module :undoc-members: :show-inheritance: +credsweeper.filters.line\_git\_binary\_check module +--------------------------------------------------- + +.. automodule:: credsweeper.filters.line_git_binary_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.line\_specific\_key\_check module ----------------------------------------------------- @@ -60,6 +68,14 @@ credsweeper.filters.value\_atlassian\_token\_check module :undoc-members: :show-inheritance: +credsweeper.filters.value\_azure\_token\_check module +----------------------------------------------------- + +.. automodule:: credsweeper.filters.value_azure_token_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_base32\_data\_check module ----------------------------------------------------- @@ -92,6 +108,14 @@ credsweeper.filters.value\_base64\_key\_check module :undoc-members: :show-inheritance: +credsweeper.filters.value\_base64\_part\_check module +----------------------------------------------------- + +.. automodule:: credsweeper.filters.value_base64_part_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_blocklist\_check module -------------------------------------------------- @@ -132,6 +156,14 @@ credsweeper.filters.value\_dictionary\_value\_length\_check module :undoc-members: :show-inheritance: +credsweeper.filters.value\_discord\_bot\_check module +----------------------------------------------------- + +.. automodule:: credsweeper.filters.value_discord_bot_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_entropy\_base32\_check module -------------------------------------------------------- @@ -188,6 +220,22 @@ credsweeper.filters.value\_grafana\_check module :undoc-members: :show-inheritance: +credsweeper.filters.value\_grafana\_service\_check module +--------------------------------------------------------- + +.. automodule:: credsweeper.filters.value_grafana_service_check + :members: + :undoc-members: + :show-inheritance: + +credsweeper.filters.value\_hex\_number\_check module +---------------------------------------------------- + +.. automodule:: credsweeper.filters.value_hex_number_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_jfrog\_token\_check module ----------------------------------------------------- diff --git a/experiment/src/entropy_test.py b/experiment/src/entropy_test.py index 468ceca5d..150047718 100644 --- a/experiment/src/entropy_test.py +++ b/experiment/src/entropy_test.py @@ -1,16 +1,26 @@ +#!/usr/bin/env python3 +""" +The script is used in experiment to get statistical distribution of shanon entropy +of a line which was obtained with an encoding (base64, base32, etc.) from random generated bytes. +The result format is: +# size of encoded string: (mean of entropy, standard deviation) +""" + import base64 import random import signal import statistics -import string import threading import time -from datetime import datetime from multiprocessing import Pool from typing import Tuple, Dict +import matplotlib.pyplot as plt +import numpy as np +from scipy.optimize import curve_fit + from credsweeper.common.constants import Chars -from credsweeper.filters import ValueEntropyBase36Check +# from credsweeper.filters import ValueEntropyBase36Check from credsweeper.utils import Util random_data: str @@ -28,9 +38,9 @@ def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: entropies = [] for x in range(ITERATIONS): offset = x * size - # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE64_CHARS.value) + entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE64_CHARS.value) # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE36_CHARS.value) - entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE32_CHARS.value) + # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE32_CHARS.value) entropies.append(entropy) avg = statistics.mean(entropies) dvt = statistics.stdev(entropies, avg) @@ -40,15 +50,15 @@ def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: return min_avg, min_dvt -if __name__ == "__main__": - - stats: Dict[int, Tuple[float, float]] = {} - sizes = [12, 13, 15, 16, 17, 31, 32, 33] +def generate(start, end) -> Dict[int, Tuple[float, float]]: + stats: Dict[int, Tuple[float, float]] = {} # type: ignore + sizes = [x for x in range(start, end)] + global random_data try: for n in range(1000): start_time = time.time() rand_bytes = random.randbytes(int(8 * ITERATIONS * max(sizes) / 5)) - random_data = base64.b32encode(rand_bytes).decode('ascii') + random_data = base64.b64encode(rand_bytes).decode('ascii') # random_data = ''.join( # [random.choice(string.digits + string.ascii_lowercase) for _ in range(ITERATIONS * max(sizes))]) _args = [(i, stats[i][0] if i in stats else 9.9, stats[i][1] if i in stats else 0.0) for i in sizes] @@ -57,48 +67,47 @@ def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: with threading.Lock(): stats[_size] = _res print(f"done {n} in {time.time() - start_time}", flush=True) - for k, v in stats.items(): - print(f"{k} = {v}", flush=True) except KeyboardInterrupt as exc: print(exc) finally: - print("===========================================================") + print("===========================================================", flush=True) for k, v in stats.items(): - print(f"{k} = {v}", flush=True) - -# base32 -# 12 = (3.2448401902687922, 0.2001867347580528) -# 13 = (3.3305754195719484, 0.1987638281794566) -# 15 = (3.4840904247691813, 0.192504685389475) -# 16 = (3.544861791803441, 0.184688685917545) -# 17 = (3.613827056321014, 0.18707867741897827) -# 31 = (4.15268463818445, 0.1486133074700339) -# 32 = (4.177896164672521, 0.1472328639816872) -# 33 = (4.197883981615083, 0.14735097649694248) - -# base36 -# 14 = (3.4457644517398167, 0.18990807349700253) -# 15 = (3.5260346505689992, 0.18114901125908447) -# 16 = (3.598032662269341, 0.1830565384431312) -# 17 = (3.659276363856176, 0.1856434289456263) -# 23 = (3.963851572519515, 0.16574824489877288) -# 24 = (4.00254984568254, 0.1623406588528336) -# 25 = (4.040134902813914, 0.158720524449059) -# 26 = (4.078098075953585, 0.15933209429031434) - -# base64 -# 15 = (3.6775207689256977, 0.15381412670043787) -# 16 = (3.7600552609204625, 0.15666871578775507) -# 17 = (3.835262182966267, 0.1514079815395568) -# 18 = (3.899273202112598, 0.15521615494595756) -# 19 = (3.9669074540527136, 0.15022181070460836) -# 20 = (4.026675938018028, 0.1477139960335224) -# 21 = (4.0844028599694155, 0.14611461336723608) -# 23 = (4.1880028531766245, 0.14668346833164134) -# 24 = (4.236982996273627, 0.14220068825454704) -# 25 = (4.283528241641759, 0.14323971561083385) -# 31 = (4.5121865964712535, 0.1393228408491736) -# 32 = (4.545556887485041, 0.13347416608982715) -# 33 = (4.576938427997454, 0.1300362152603773) -# 39 = (4.743676039379888, 0.13053505168803348) -# 40 = (4.76769110698625, 0.1307074052311964) + print(f"{k}: {v},", flush=True) + return stats + + +def log_model(x, k4, k3, k2, k1, k0): + return k4 * np.log2(x)**4 + k3 * np.log2(x)**3 + k2 * np.log2(x)**2 + k1 * np.log2(x) + k0 + + +def solve(data: dict[int, Tuple[float, float]]): + d_list = list((x, y) for x, y in data.items()) + d_list.sort(key=lambda x: (int(x[0]))) + + plt.figure() + x = [int(i[0]) for i in d_list] + y = [i[1][0] for i in d_list] + y_min = [i[1][0] - i[1][1] for i in d_list] + y_max = [i[1][0] + i[1][1] for i in d_list] + plt.plot(x, y, 'r-', lw=2, label='ent') + plt.plot(x, y_min, 'r:', lw=1, label='min') + plt.plot(x, y_max, 'r:', lw=1, label='max') + + _y = np.array(y_min) + _x = np.array(x) + + params, covariance = curve_fit(log_model, _x, _y) + print(params) + k4, k3, k2, k1, k0 = params + plt.plot(x, log_model(x, k4, k3, k2, k1, k0), 'b--', label='fit') + + plt.grid(True) + plt.show() + + +if __name__ == "__main__": + data_file = "base64entr_12_1200.json" # [0.00147696 -0.03688593 0.24484864 0.31841099 0.39320007] + if not (_data := Util.json_load(data_file)): + _data = generate(12, 1200) + Util.json_dump(_data, data_file) + solve(_data) diff --git a/tests/__init__.py b/tests/__init__.py index 4f6c5b878..aa14dc9d2 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,18 +7,18 @@ NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan -SAMPLES_CRED_COUNT: int = 362 -SAMPLES_CRED_LINE_COUNT: int = 379 +SAMPLES_CRED_COUNT: int = 359 +SAMPLES_CRED_LINE_COUNT: int = 376 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 320 +SAMPLES_POST_CRED_COUNT: int = 318 # with option --doc -SAMPLES_IN_DOC = 411 +SAMPLES_IN_DOC = 414 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 24 -SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 17 +SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 1 # well known string with all latin letters diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index bf0894ec2..dac4acbd5 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -394,7 +394,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.941, + "ml_probability": 0.963, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -418,33 +418,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.941, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", - "line_num": 9, - "path": "tests/samples/auth_n.template", - "info": "tests/samples/auth_n.template|RAW", - "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", - "value_start": 31, - "value_end": 65, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.2479906920322064, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -805,11 +778,11 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "JSON Web Token", "severity": "medium", - "confidence": "moderate", + "confidence": "strong", "line_data_list": [ { "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd", @@ -830,6 +803,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Access Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "tests/samples/azure_access_token|RAW", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "tests/samples/azure_access_token|RAW", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -6885,33 +6912,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.985, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "grafana = 'eyJrIjoiMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMCIsIm4iOiJ4eHh4IiwiaWQiOjIwNDM2MH0='", - "line_num": 1, - "path": "tests/samples/grafana_provisioned_api_key", - "info": "tests/samples/grafana_provisioned_api_key|RAW", - "value": "eyJrIjoiMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMCIsIm4iOiJ4eHh4IiwiaWQiOjIwNDM2MH0=", - "value_start": 11, - "value_end": 107, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.8153130511409934, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -7155,33 +7155,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.931, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "$payload = 'eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS'", - "line_num": 1, - "path": "tests/samples/json_web_token.hs", - "info": "tests/samples/json_web_token.hs|RAW", - "value": "eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS", - "value_start": 12, - "value_end": 60, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.520488802699322, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -10487,33 +10460,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "token in zip: eyJrIjoiMDAwMDAwNDAwMDAwODAwMDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "line_num": 1, - "path": "tests/samples/test2.eml", - "info": "tests/samples/test2.eml|EML-DATA|ZIP|token.txt|RAW", - "value": "eyJrIjoiMDAwMDAwNDAwMDAwODAwMDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "value_start": 14, - "value_end": 108, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.006147345318248, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -10568,33 +10514,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.999, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "token in text: eyJrIjoiMDAwMDAwNDAwMDAwODAwNDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "line_num": 8, - "path": "tests/samples/test2.eml", - "info": "tests/samples/test2.eml|EML-TEXT", - "value": "eyJrIjoiMDAwMDAwNDAwMDAwODAwNDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "value_start": 15, - "value_end": 109, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.0296677144829305, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/doc.json b/tests/data/doc.json index edef67d77..2a0d3b969 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -438,6 +438,87 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd", + "line_num": 1, + "path": "tests/samples/azure_access_token", + "info": "tests/samples/azure_access_token|RAW", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd", + "value_start": 0, + "value_end": 1316, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.615950458346115, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Access Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "tests/samples/azure_access_token|RAW", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "tests/samples/azure_access_token|RAW", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index 71958bd1a..dc45dbdd4 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -299,7 +299,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.941, + "ml_probability": 0.963, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -323,33 +323,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.941, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", - "line_num": 9, - "path": "tests/samples/auth_n.template", - "info": "", - "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", - "value_start": 31, - "value_end": 65, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.2479906920322064, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -764,11 +737,11 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "JSON Web Token", "severity": "medium", - "confidence": "moderate", + "confidence": "strong", "line_data_list": [ { "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd", @@ -789,6 +762,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Access Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -7519,33 +7546,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.985, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "grafana = 'eyJrIjoiMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMCIsIm4iOiJ4eHh4IiwiaWQiOjIwNDM2MH0='", - "line_num": 1, - "path": "tests/samples/grafana_provisioned_api_key", - "info": "", - "value": "eyJrIjoiMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMCIsIm4iOiJ4eHh4IiwiaWQiOjIwNDM2MH0=", - "value_start": 11, - "value_end": 107, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.8153130511409934, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -7789,33 +7789,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.931, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "$payload = 'eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS'", - "line_num": 1, - "path": "tests/samples/json_web_token.hs", - "info": "", - "value": "eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS", - "value_start": 12, - "value_end": 60, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.520488802699322, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -9603,33 +9576,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.217, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "eyJUaGVyZSBpcyBub3QgdGhlIEpTT04geW91IGFyZSBsb29raW5nIGZvciJ9CjwvYm9keT4KPC9o", - "line_num": 17, - "path": "tests/samples/test.eml", - "info": "", - "value": "eyJUaGVyZSBpcyBub3QgdGhlIEpTT04geW91IGFyZSBsb29raW5nIGZvciJ9CjwvYm9keT4KPC9o", - "value_start": 0, - "value_end": 76, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 5.282347539953402, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9657,33 +9603,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.999, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "token in text: eyJrIjoiMDAwMDAwNDAwMDAwODAwNDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "line_num": 18, - "path": "tests/samples/test2.eml", - "info": "", - "value": "eyJrIjoiMDAwMDAwNDAwMDAwODAwNDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "value_start": 15, - "value_end": 109, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.0296677144829305, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/output.json b/tests/data/output.json index f5f87e544..f640de567 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -299,7 +299,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.941, + "ml_probability": 0.963, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -323,33 +323,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.941, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", - "line_num": 9, - "path": "tests/samples/auth_n.template", - "info": "", - "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", - "value_start": 31, - "value_end": 65, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.2479906920322064, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -710,11 +683,11 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "JSON Web Token", "severity": "medium", - "confidence": "moderate", + "confidence": "strong", "line_data_list": [ { "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd", @@ -735,6 +708,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Access Token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "line_num": 2, + "path": "tests/samples/azure_access_token", + "info": "", + "value": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt", + "value_start": 0, + "value_end": 1029, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.6044494049575055, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -6628,33 +6655,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.985, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "grafana = 'eyJrIjoiMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMCIsIm4iOiJ4eHh4IiwiaWQiOjIwNDM2MH0='", - "line_num": 1, - "path": "tests/samples/grafana_provisioned_api_key", - "info": "", - "value": "eyJrIjoiMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMCIsIm4iOiJ4eHh4IiwiaWQiOjIwNDM2MH0=", - "value_start": 11, - "value_end": 107, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.8153130511409934, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -6898,33 +6898,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.931, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "$payload = 'eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS'", - "line_num": 1, - "path": "tests/samples/json_web_token.hs", - "info": "", - "value": "eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS", - "value_start": 12, - "value_end": 60, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.520488802699322, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8550,33 +8523,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.999, - "rule": "JSON Web Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "token in text: eyJrIjoiMDAwMDAwNDAwMDAwODAwNDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "line_num": 18, - "path": "tests/samples/test2.eml", - "info": "", - "value": "eyJrIjoiMDAwMDAwNDAwMDAwODAwNDAwMDAwMDAwNDAwMDAwMDAwMDAwMDAyMSIsIm4iOiJ4eHh4IiwiaWQiOjQzMDh9Cg", - "value_start": 15, - "value_end": 109, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.0296677144829305, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/filters/test_value_azure_token_check.py b/tests/filters/test_value_azure_token_check.py new file mode 100644 index 000000000..fab078169 --- /dev/null +++ b/tests/filters/test_value_azure_token_check.py @@ -0,0 +1,26 @@ +import unittest + +from credsweeper.filters import ValueAzureTokenCheck +from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET +from tests.test_utils.dummy_line_data import get_line_data + + +class TestValueAzureTokenCheck(unittest.TestCase): + + def test_value_AzureToken_check_p(self): + self.assertTrue(ValueAzureTokenCheck().run(get_line_data(line=""), DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueAzureTokenCheck().run(get_line_data(line="eyJungle", pattern=LINE_VALUE_PATTERN), + DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueAzureTokenCheck().run( + get_line_data(line="eyJhbGciOjEsInR5cCI6Miwia2lkIjozfQo", pattern=LINE_VALUE_PATTERN), + DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueAzureTokenCheck().run( + get_line_data(line="eyJhbGciOjEsInR5cCI6Miwia2lkIjozfQo.eyJhbGciOjEsInR5cCI6Miwia2lkIjozfQo" + ".eyJhbGciOjEsInR5cCI6Miwia2lkIjozfQo", + pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) + + def test_value_AzureToken_check_n(self): + self.assertFalse(ValueAzureTokenCheck().run( + get_line_data(line="eyJhbGciOjEsInR5cCI6Miwia2lkIjozfQo.eyJpc3MiOjEsImV4cCI6MiwiaWF0IjozfQo" + ".1234567890qwertyuiopasdfghjklzxc", + pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) diff --git a/tests/filters/test_value_file_path_check.py b/tests/filters/test_value_file_path_check.py index 3a1697014..ad7b47b07 100644 --- a/tests/filters/test_value_file_path_check.py +++ b/tests/filters/test_value_file_path_check.py @@ -8,7 +8,7 @@ class TestValueFilePathCheck: @pytest.mark.parametrize("line", [ - "5//0KCPafDhZvtCwqrsyiKFeDGT_0ZGHiI-E0ClIWrLC7tZ1WE5vHc4-Y2qi1IhPy3Pz5fmCe9OPIxEZUONUg7SWJF9nwQ_j2lIdXU0", + "5//0KCPafDhZvtCyiKFeDGT0ZGHiIE0ClIWrLC7tZ1WE5vHc4Y2qi1IhPy3Pz5fmCe9OPIxEZNUg7SJF9nwQj2lIdXU0", ]) def test_value_file_path_check_p(self, file_path: pytest.fixture, line: str) -> None: line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index fc6f1d2d2..4a7a5b379 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -1,28 +1,28 @@ -import base64 - -import pytest +import unittest from credsweeper.filters import ValueJsonWebTokenCheck from tests.filters.conftest import LINE_VALUE_PATTERN, DUMMY_ANALYSIS_TARGET from tests.test_utils.dummy_line_data import get_line_data -class TestValueJsonWebTokenCheck: +class TestValueJsonWebTokenCheck(unittest.TestCase): + + def test_value_jwt_check_p(self): + self.assertTrue(ValueJsonWebTokenCheck().run(get_line_data(line="", pattern=LINE_VALUE_PATTERN), + DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueJsonWebTokenCheck().run(get_line_data(line="eyJungle", pattern=LINE_VALUE_PATTERN), + DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueJsonWebTokenCheck().run( + get_line_data(line="1234567890qwertyuiopasdfghjklzxc", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueJsonWebTokenCheck().run( + get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.eyJleHAiOjY1NTM2fQo", + pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) + self.assertTrue(ValueJsonWebTokenCheck().run( + get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.65474687468446387653", + pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) - @pytest.mark.parametrize("line", ["12345:asbdsa:28yd"]) - def test_value_jwt_check_p(self, file_path: pytest.fixture, line: str) -> None: - encoded_line = base64.b64encode(line.encode('ascii')).decode('ascii') - jwt_like_line = base64.b64encode('{"typ":"JWT", "dummy": false}'.encode('ascii')).decode('ascii') - jwt_line_data = get_line_data(file_path, line=f"{jwt_like_line}.{encoded_line}", pattern=LINE_VALUE_PATTERN) - assert ValueJsonWebTokenCheck().run(jwt_line_data, DUMMY_ANALYSIS_TARGET) is False - # partially line - jwt_line_data = get_line_data(file_path, line=f"{jwt_like_line}.AnyTailOfString", pattern=LINE_VALUE_PATTERN) - assert ValueJsonWebTokenCheck().run(jwt_line_data, DUMMY_ANALYSIS_TARGET) is False - @pytest.mark.parametrize("line", ["1234f:asbdsa:28yd"]) - def test_value_jwt_check_n(self, file_path: pytest.fixture, line: str) -> None: - encoded_line = base64.b64encode(line.encode('ascii')).decode('ascii') - jwt_line_data = get_line_data(file_path, line=f"eyJungle.{encoded_line}", pattern=LINE_VALUE_PATTERN) - assert ValueJsonWebTokenCheck().run(jwt_line_data, DUMMY_ANALYSIS_TARGET) is True - jwt_line_data = get_line_data(file_path, line="eyJungle", pattern=LINE_VALUE_PATTERN) - assert ValueJsonWebTokenCheck().run(jwt_line_data, DUMMY_ANALYSIS_TARGET) is True + def test_value_jwt_check_n(self): + self.assertFalse(ValueJsonWebTokenCheck().run( + get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.0xm2jd8ha7zo3l5qn48", + pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) diff --git a/tests/samples/azure_access_token b/tests/samples/azure_access_token index 4d5d0c454..d85be0700 100644 --- a/tests/samples/azure_access_token +++ b/tests/samples/azure_access_token @@ -1 +1,3 @@ -eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd \ No newline at end of file +eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiJlZjFkYTlkNC1mZjc3LTRjM2UtYTAwNS04NDBjM2Y4MzA3NDUiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC9mYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTUyMjIyOS8iLCJpYXQiOjE1MzcyMzMxMDYsIm5iZiI6MTUzNzIzMzEwNiwiZXhwIjoxNTM3MjM3MDA2LCJhY3IiOiIxIiwiYWlvIjoiQVhRQWkvOElBQUFBRm0rRS9RVEcrZ0ZuVnhMaldkdzhLKzYxQUdyU091TU1GNmViYU1qN1hPM0libUQzZkdtck95RCtOdlp5R24yVmFUL2tES1h3NE1JaHJnR1ZxNkJuOHdMWG9UMUxrSVorRnpRVmtKUFBMUU9WNEtjWHFTbENWUERTL0RpQ0RnRTIyMlRJbU12V05hRU1hVU9Uc0lHdlRRPT0iLCJhbXIiOlsid2lhIl0sImFwcGlkIjoiNzVkYmU3N2YtMTBhMy00ZTU5LTg1ZmQtOGMxMjc1NDRmMTdjIiwiYXBwaWRhY3IiOiIwIiwiZW1haWwiOiJBYmVMaUBtaWNyb3NvZnQuY29tIiwiZmFtaWx5X25hbWUiOiJMaW5jb2xuIiwiZ2l2ZW5fbmFtZSI6IkFiZSAoTVNGVCkiLCJpZHAiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMjIyNDcvIiwiaXBhZGRyIjoiMjIyLjIyMi4yMjIuMjIiLCJuYW1lIjoiYWJlbGkiLCJvaWQiOiIwMjIyM2I2Yi1hYTFkLTQyZDQtOWVjMC0xYjJiYjkxOTQ0MzgiLCJyaCI6IkkiLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJsM19yb0lTUVUyMjJiVUxTOXlpMmswWHBxcE9pTXo1SDNaQUNvMUdlWEEiLCJ0aWQiOiJmYTE1ZDY5Mi1lOWM3LTQ0NjAtYTc0My0yOWYyOTU2ZmQ0MjkiLCJ1bmlxdWVfbmFtZSI6ImFiZWxpQG1pY3Jvc29mdC5jb20iLCJ1dGkiOiJGVnNHeFlYSTMwLVR1aWt1dVVvRkFBIiwidmVyIjoiMS4wIn0.D3H6pMUtQnoJAGq6AHd +eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6Imk2bEdrM0ZaenhSY1ViMkMzbkVRN3N5SEpsWSJ9.eyJhdWQiOiI2ZTc0MTcyYi1iZTU2LTQ4NDMtOWZmNC1lNjZhMzliYjEyZTMiLCJpc3MiOiJodHRwczovL2xvZ2luLm1pY3Jvc29mdG9ubGluZS5jb20vNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3L3YyLjAiLCJpYXQiOjE1MzcyMzEwNDgsIm5iZiI6MTUzNzIzMTA0OCwiZXhwIjoxNTM3MjM0OTQ4LCJhaW8iOiJBWFFBaS84SUFBQUF0QWFaTG8zQ2hNaWY2S09udHRSQjdlQnE0L0RjY1F6amNKR3hQWXkvQzNqRGFOR3hYZDZ3TklJVkdSZ2hOUm53SjFsT2NBbk5aY2p2a295ckZ4Q3R0djMzMTQwUmlvT0ZKNGJDQ0dWdW9DYWcxdU9UVDIyMjIyZ0h3TFBZUS91Zjc5UVgrMEtJaWpkcm1wNjlSY3R6bVE9PSIsImF6cCI6IjZlNzQxNzJiLWJlNTYtNDg0My05ZmY0LWU2NmEzOWJiMTJlMyIsImF6cGFjciI6IjAiLCJuYW1lIjoiQWJlIExpbmNvbG4iLCJvaWQiOiI2OTAyMjJiZS1mZjFhLTRkNTYtYWJkMS03ZTRmN2QzOGU0NzQiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhYmVsaUBtaWNyb3NvZnQuY29tIiwicmgiOiJJIiwic2NwIjoiYWNjZXNzX2FzX3VzZXIiLCJzdWIiOiJIS1pwZmFIeVdhZGVPb3VZbGl0anJJLUtmZlRtMjIyWDVyclYzeERxZktRIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidXRpIjoiZnFpQnFYTFBqMGVRYTgyUy1JWUZBQSIsInZlciI6IjIuMCJ9.pj4N-w_3Us9DrBLfpCt +^^^ examples from https://learn.microsoft.com/en-us/entra/identity-platform/access-tokens \ No newline at end of file diff --git a/tests/samples/json_web_token b/tests/samples/json_web_token new file mode 100644 index 000000000..45e244772 --- /dev/null +++ b/tests/samples/json_web_token @@ -0,0 +1,2 @@ +detected: eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA +not detected: eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.NiIsInR5cCI6IkpXV.NiIsInR5cCI6IkpXV diff --git a/tests/samples/json_web_token.hs b/tests/samples/json_web_token.hs deleted file mode 100644 index 76f919b6c..000000000 --- a/tests/samples/json_web_token.hs +++ /dev/null @@ -1 +0,0 @@ -$payload = 'eyJgsIZgeJhvNgFpSmlP.eyJcaaF9xCe7shE0ENPiBlEJOpS' From 1482f56024aee893c254bcbcce5953c657b21253 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Tue, 6 Aug 2024 14:29:26 +0300 Subject: [PATCH 02/11] rollback some --- cicd/benchmark.txt | 15 ++++++++------- credsweeper/file_handler/descriptor.py | 8 +------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 6203f80c7..d75bff0b3 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -231,7 +231,7 @@ AWS Client ID 168 13 0 16 AWS Multi 75 12 0 87 75 11 1 0 0.916667 0.000000 0.873563 0.872093 1.000000 0.931677 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 212 3 12 3 8 207 24 0.037209 0.888889 0.867769 0.272727 0.111111 0.157895 -Auth 319 2749 86 294 275 19 2816 44 0.006702 0.137931 0.980025 0.935374 0.862069 0.897227 +Auth 407 2725 77 372 351 21 2781 56 0.007495 0.137592 0.976005 0.943548 0.862408 0.901155 Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 @@ -253,15 +253,16 @@ Grafana Provisioned API Key 22 1 0 JSON Web Token 284 11 2 274 271 3 10 13 0.230769 0.045775 0.946128 0.989051 0.954225 0.971326 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 14 6 0 10 10 0 6 4 0.000000 0.285714 0.800000 1.000000 0.714286 0.833333 -Key 462 7841 462 439 431 8 8295 31 0.000964 0.067100 0.995550 0.981777 0.932900 0.956715 -Nonce 79 53 0 84 76 8 45 3 0.150943 0.037975 0.916667 0.904762 0.962025 0.932515 +Key 483 8494 464 445 436 9 8949 47 0.001005 0.097308 0.994068 0.979775 0.902692 0.939655 +Nonce 83 53 0 85 79 6 47 4 0.113208 0.048193 0.926471 0.929412 0.951807 0.940476 +Other 0 0 5 0 0 5 0 0.000000 1.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1915 7417 2669 1603 1581 22 10064 334 0.002181 0.174413 0.970336 0.986276 0.825587 0.898806 -Salt 42 72 2 38 38 0 74 4 0.000000 0.095238 0.965517 1.000000 0.904762 0.950000 -Secret 1359 29629 870 1236 1231 5 30494 128 0.000164 0.094187 0.995825 0.995955 0.905813 0.948748 +Password 1823 7474 2752 1681 1614 67 10159 209 0.006552 0.114646 0.977094 0.960143 0.885354 0.921233 +Salt 42 76 2 38 38 0 78 4 0.000000 0.095238 0.966667 1.000000 0.904762 0.950000 +Secret 1358 28497 869 1234 1229 5 29361 129 0.000170 0.094993 0.995639 0.995948 0.905007 0.948302 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 572 3959 448 523 504 19 4388 68 0.004311 0.118881 0.982527 0.963671 0.881119 0.920548 +Token 585 3972 439 519 511 8 4403 74 0.001814 0.126496 0.983587 0.984586 0.873504 0.925725 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 194 125 251 184 184 0 376 10 0.000000 0.051546 0.982456 1.000000 0.948454 0.973545 7615 59903 5233 6704 6470 227 59676 1145 0.003789 0.150361 0.979679 0.966104 0.849639 0.904136 diff --git a/credsweeper/file_handler/descriptor.py b/credsweeper/file_handler/descriptor.py index b8ae850b6..a4e534a7d 100644 --- a/credsweeper/file_handler/descriptor.py +++ b/credsweeper/file_handler/descriptor.py @@ -3,13 +3,7 @@ @dataclass(frozen=True) class Descriptor: - """Descriptor for file - optimize memory consumption - - Args: - path: file path - extension: file extension - info: info for deep scan - """ + """Descriptor for file - optimize memory consumption""" path: str extension: str info: str From fc9167d36acac83fb49d5c968278ff58dba35286 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Tue, 6 Aug 2024 14:33:13 +0300 Subject: [PATCH 03/11] style --- tests/filters/test_value_json_web_token_check.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index 4a7a5b379..3b041425f 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -9,9 +9,9 @@ class TestValueJsonWebTokenCheck(unittest.TestCase): def test_value_jwt_check_p(self): self.assertTrue(ValueJsonWebTokenCheck().run(get_line_data(line="", pattern=LINE_VALUE_PATTERN), - DUMMY_ANALYSIS_TARGET)) + DUMMY_ANALYSIS_TARGET)) self.assertTrue(ValueJsonWebTokenCheck().run(get_line_data(line="eyJungle", pattern=LINE_VALUE_PATTERN), - DUMMY_ANALYSIS_TARGET)) + DUMMY_ANALYSIS_TARGET)) self.assertTrue(ValueJsonWebTokenCheck().run( get_line_data(line="1234567890qwertyuiopasdfghjklzxc", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) self.assertTrue(ValueJsonWebTokenCheck().run( @@ -21,7 +21,6 @@ def test_value_jwt_check_p(self): get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.65474687468446387653", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) - def test_value_jwt_check_n(self): self.assertFalse(ValueJsonWebTokenCheck().run( get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.0xm2jd8ha7zo3l5qn48", From 6c296d27b13dc3c3b7630c8830d1a6c0eb7a3ace Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Tue, 6 Aug 2024 18:27:29 +0300 Subject: [PATCH 04/11] [skip actions] [jwt] 2024-08-06T18:27:29+03:00 --- cicd/benchmark.txt | 12 ++-- .../filters/value_json_web_token_check.py | 8 +-- credsweeper/rules/config.yaml | 2 +- experiment/src/entropy_test.py | 65 +++++++++++++++---- .../test_value_json_web_token_check.py | 3 +- 5 files changed, 66 insertions(+), 24 deletions(-) diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index d75bff0b3..c0d0b3118 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -232,7 +232,7 @@ AWS Multi 75 12 0 8 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 212 3 12 3 8 207 24 0.037209 0.888889 0.867769 0.272727 0.111111 0.157895 Auth 407 2725 77 372 351 21 2781 56 0.007495 0.137592 0.976005 0.943548 0.862408 0.901155 -Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 +Azure Access Token 19 0 0 10 10 0 0 9 0.473684 0.526316 1.000000 0.526316 0.689655 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 Bitbucket Client ID 142 1813 9 46 27 18 1804 115 0.009879 0.809859 0.932281 0.600000 0.190141 0.288770 @@ -250,19 +250,19 @@ Google API Key 12 0 0 1 Google Multi 10 2 0 11 10 1 1 0 0.500000 0.000000 0.916667 0.909091 1.000000 0.952381 Google OAuth Access Token 3 0 0 3 3 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Grafana Provisioned API Key 22 1 0 1 1 0 1 21 0.000000 0.954545 0.086957 1.000000 0.045455 0.086957 -JSON Web Token 284 11 2 274 271 3 10 13 0.230769 0.045775 0.946128 0.989051 0.954225 0.971326 +JSON Web Token 173 2 2 92 92 0 4 81 0.000000 0.468208 0.542373 1.000000 0.531792 0.694340 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 14 6 0 10 10 0 6 4 0.000000 0.285714 0.800000 1.000000 0.714286 0.833333 -Key 483 8494 464 445 436 9 8949 47 0.001005 0.097308 0.994068 0.979775 0.902692 0.939655 +Key 483 8494 464 442 433 9 8949 50 0.001005 0.103520 0.993751 0.979638 0.896480 0.936216 Nonce 83 53 0 85 79 6 47 4 0.113208 0.048193 0.926471 0.929412 0.951807 0.940476 Other 0 0 5 0 0 5 0 0.000000 1.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 Password 1823 7474 2752 1681 1614 67 10159 209 0.006552 0.114646 0.977094 0.960143 0.885354 0.921233 Salt 42 76 2 38 38 0 78 4 0.000000 0.095238 0.966667 1.000000 0.904762 0.950000 -Secret 1358 28497 869 1234 1229 5 29361 129 0.000170 0.094993 0.995639 0.995948 0.905007 0.948302 +Secret 1358 28497 869 1231 1226 5 29361 132 0.000170 0.097202 0.995541 0.995938 0.902798 0.947084 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 585 3972 439 519 511 8 4403 74 0.001814 0.126496 0.983587 0.984586 0.873504 0.925725 +Token 585 3972 439 517 509 8 4403 76 0.001814 0.129915 0.983187 0.984526 0.870085 0.923775 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 194 125 251 184 184 0 376 10 0.000000 0.051546 0.982456 1.000000 0.948454 0.973545 - 7615 59903 5233 6704 6470 227 59676 1145 0.003789 0.150361 0.979679 0.966104 0.849639 0.904136 + 7504 59894 5233 6524 6293 224 59670 1211 0.003740 0.161381 0.978709 0.965628 0.838619 0.897654 diff --git a/credsweeper/filters/value_json_web_token_check.py b/credsweeper/filters/value_json_web_token_check.py index 8235c9b3e..d2a308271 100644 --- a/credsweeper/filters/value_json_web_token_check.py +++ b/credsweeper/filters/value_json_web_token_check.py @@ -38,9 +38,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: with contextlib.suppress(Exception): jwt_parts = line_data.value.split('.') for part in jwt_parts: + data = Util.decode_base64(part, padding_safe=True, urlsafe_detect=True) if part.startswith("eyJ"): # open part - just base64 encoded - json_keys = json.loads(Util.decode_base64(part, padding_safe=True, urlsafe_detect=True)).keys() + json_keys = json.loads(data).keys() # header will be checked first if not header_check: if header_check := bool(ValueJsonWebTokenCheck.header_keys.intersection(json_keys)): @@ -56,10 +57,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: # any other payloads are allowed elif header_check and payload_check and not signature_check: # signature check or skip encrypted part - min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(part)) - entropy = Util.get_shannon_entropy(part, Chars.BASE64URL_CHARS.value) - # good signature has to be like random bytes - signature_check = entropy > min_entropy + signature_check = not Util.is_ascii_entropy_validate(data) else: break if header_check and payload_check and signature_check: diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index e91f3bc7e..8eefe5d23 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -337,7 +337,7 @@ - doc - name: JSON Web Token - severity: medium + severity: critical confidence: strong type: pattern values: diff --git a/experiment/src/entropy_test.py b/experiment/src/entropy_test.py index 150047718..f7b15193e 100644 --- a/experiment/src/entropy_test.py +++ b/experiment/src/entropy_test.py @@ -7,9 +7,11 @@ """ import base64 +import math import random import signal import statistics +import sys import threading import time from multiprocessing import Pool @@ -23,14 +25,38 @@ # from credsweeper.filters import ValueEntropyBase36Check from credsweeper.utils import Util -random_data: str -ITERATIONS = 1000 +random_data: bytes +ITERATIONS = 100 def pool_initializer() -> None: signal.signal(signal.SIGINT, signal.SIG_IGN) +def byte_entropy(data: bytes): + data_len = len(data) + entropy = 0. + cells = [int(0)] * 256 + for x in data: + cells[x] += 1 + left = 0. + step = 256.0 / data_len + right = left + step + while left < 256: + cell_sum = 0 + i = int(left) + r = int(right) + while i < r and i < 256: + cell_sum += cells[i] + i += 1 + p_x = float(cell_sum) / data_len + if p_x > 0: + entropy += -p_x * math.log2(p_x) + left = right + right += step + return entropy + + def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: min_avg = _args[1] min_dvt = _args[2] @@ -38,7 +64,8 @@ def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: entropies = [] for x in range(ITERATIONS): offset = x * size - entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE64_CHARS.value) + entropy = byte_entropy(random_data[offset:offset + size]) + # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE64_CHARS.value) # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE36_CHARS.value) # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE32_CHARS.value) entropies.append(entropy) @@ -57,8 +84,7 @@ def generate(start, end) -> Dict[int, Tuple[float, float]]: try: for n in range(1000): start_time = time.time() - rand_bytes = random.randbytes(int(8 * ITERATIONS * max(sizes) / 5)) - random_data = base64.b64encode(rand_bytes).decode('ascii') + random_data = random.randbytes(ITERATIONS * max(sizes)) # random_data = ''.join( # [random.choice(string.digits + string.ascii_lowercase) for _ in range(ITERATIONS * max(sizes))]) _args = [(i, stats[i][0] if i in stats else 9.9, stats[i][1] if i in stats else 0.0) for i in sizes] @@ -76,8 +102,8 @@ def generate(start, end) -> Dict[int, Tuple[float, float]]: return stats -def log_model(x, k4, k3, k2, k1, k0): - return k4 * np.log2(x)**4 + k3 * np.log2(x)**3 + k2 * np.log2(x)**2 + k1 * np.log2(x) + k0 +def log_model(x, k1, k0): + return k1 * np.log2(x) + k0 def solve(data: dict[int, Tuple[float, float]]): @@ -98,16 +124,33 @@ def solve(data: dict[int, Tuple[float, float]]): params, covariance = curve_fit(log_model, _x, _y) print(params) - k4, k3, k2, k1, k0 = params - plt.plot(x, log_model(x, k4, k3, k2, k1, k0), 'b--', label='fit') + k1, k0 = params + plt.plot(x, log_model(x, k1, k0), 'b--', label='fit') plt.grid(True) plt.show() +from scipy.stats import entropy +import numpy as np + +def calculate_shannon_entropy(byte_sequence): + byte_counts = np.bincount(byte_sequence, minlength=256) + # Normalize the counts to get the probabilities + probabilities = byte_counts / np.sum(byte_counts) + # Calculate the entropy + return entropy(probabilities, base=2) if __name__ == "__main__": - data_file = "base64entr_12_1200.json" # [0.00147696 -0.03688593 0.24484864 0.31841099 0.39320007] + # data = [0]*200 + # for n in range(len(data)): + # data[n]=n>>2 + # print(byte_entropy(data)) + # print(calculate_shannon_entropy(data)) + # sys.exit(0) + # data_file = "base64entr_12_1200.json" # [0.00147696 -0.03688593 0.24484864 0.31841099 0.39320007] + start, end = 63, 130 + data_file = f"bytes_{start}_{end}.json" #[ 1.01660278 -1.03603384] if not (_data := Util.json_load(data_file)): - _data = generate(12, 1200) + _data = generate(start, end) Util.json_dump(_data, data_file) solve(_data) diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index 3b041425f..c73aa9f1b 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -23,5 +23,6 @@ def test_value_jwt_check_p(self): def test_value_jwt_check_n(self): self.assertFalse(ValueJsonWebTokenCheck().run( - get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.0xm2jd8ha7zo3l5qn48", + + get_line_data(line="eyJ0eXAiOiJqd3QiLCJhbGciOiJlZDI1NTE5In0.eyJhdWQiOiJURVNUUyIsImV4cCI6MTg1OTEyMTI3NSwianRpIjoiWE5MWjZYWVBIVE1ESlFSTlFPSFVPSlFHV0NVN01JNVc1SlhDWk5YQllVS0VRVzY3STI1USIsImlhdCI6MTU0Mzc2MTI3NSwiaXNzIjoiT0NBVDMzTVRWVTJWVU9JTUdOR1VOWEo2NkFIMlJMU0RBRjNNVUJDWUFZNVFNSUw2NU5RTTZYUUciLCJuYW1lIjoiU3luYWRpYSBDb21tdW5pY2F0aW9ucyBJbmMuIiwibmJmIjoxNTQzNzYxMjc1LCJzdWIiOiJPQ0FUMzNNVFZVMlZVT0lNR05HVU5YSjY2QUgyUkxTREFGM01VQkNZQVk1UU1JTDY1TlFNNlhRRyIsInR5cGUiOiJvcGVyYXRvciIsIm5hdHMiOnsic2lnbmluZ19rZXlzIjpbIk9EU0tSN01ZRlFaNU1NQUo2RlBNRUVUQ1RFM1JJSE9GTFRZUEpSTUFWVk40T0xWMllZQU1IQ0FDIiwiT0RTS0FDU1JCV1A1MzdEWkRSVko2NTdKT0lHT1BPUTZLRzdUNEhONk9LNEY2SUVDR1hEQUhOUDIiLCJPRFNLSTM2TFpCNDRPWTVJVkNSNlA1MkZaSlpZTVlXWlZXTlVEVExFWjVUSzJQTjNPRU1SVEFCUiJdfX0.hyfz6E39BMUh0GLzovFfk3wT4OfualftjdJ_eYkLfPvu5tZubYQ_Pn9oFYGCV_6yKy3KMGhWGUCyCdHaPhalBw", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) From 648621f154504eb9e0c8c30c9edf7702f079f89c Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 00:14:23 +0300 Subject: [PATCH 05/11] [skip actions] [jwt] 2024-08-07T00:14:23+03:00 --- .../filters/value_json_web_token_check.py | 2 +- credsweeper/rules/config.yaml | 2 +- credsweeper/utils/util.py | 9 ++++-- experiment/src/entropy_test.py | 30 +++++++------------ .../test_value_json_web_token_check.py | 3 +- 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/credsweeper/filters/value_json_web_token_check.py b/credsweeper/filters/value_json_web_token_check.py index d2a308271..9106a2d2a 100644 --- a/credsweeper/filters/value_json_web_token_check.py +++ b/credsweeper/filters/value_json_web_token_check.py @@ -16,7 +16,7 @@ class ValueJsonWebTokenCheck(Filter): https://datatracker.ietf.org/doc/html/rfc7519 """ header_keys = {"alg", "typ", "cty", "enc"} - payload_keys = {"iss", "sub", "aud", "exp", "nbf", "iat", "jti"} + payload_keys = {"iss", "sub", "aud", "exp", "nbf", "iat", "jti", "id", "role", "iss"} def __init__(self, config: Config = None) -> None: pass diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 8eefe5d23..ffa25c139 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -341,7 +341,7 @@ confidence: strong type: pattern values: - - (?eyJ[0-9A-Za-z_+/=-]{15,8000}([.0-9A-Za-z_+/=-]{16,8000}){2,16}) + - (?eyJ[0-9A-Za-z_+/=-]{15,8000}(\.[0-9A-Za-z_+/=-]{0,8000}){2,16}) filter_type: - ValueJsonWebTokenCheck required_substrings: diff --git a/credsweeper/utils/util.py b/credsweeper/utils/util.py index 3f51d18d5..2e2d2fb96 100644 --- a/credsweeper/utils/util.py +++ b/credsweeper/utils/util.py @@ -84,6 +84,8 @@ def get_shannon_entropy(data: str, iterator: str) -> float: 32: 3.25392803184602, 40: 3.64853567064867, 64: 4.57756933688035, + 384: 7.39, + 512: 7.55, } @staticmethod @@ -95,10 +97,13 @@ def get_min_data_entropy(x: int) -> float: # approximated for range 12 - 64 _x = x - 8 y = ((0.000016617804 * _x - 0.002695077) * _x + 0.170393) * _x + 0.4 - elif 64 < x: + elif 64 < x < 384: # logarithm base 2 - slow, but precise _x = x - 8 - y = 1.581026279659 * math.log2(_x) - 1.90156 + y = 1.095884 * math.log2(_x) - 1.90156 + elif 384 < x < 512: + # solved for 384 - 512 + y = -0.11215851 * math.log2(x) ** 2 + 2.34303484 * math.log2(x) - 4.4466237 else: # less or equal to 8 bytes might have 0 entropy y = 0 diff --git a/experiment/src/entropy_test.py b/experiment/src/entropy_test.py index f7b15193e..29d13a467 100644 --- a/experiment/src/entropy_test.py +++ b/experiment/src/entropy_test.py @@ -6,7 +6,6 @@ # size of encoded string: (mean of entropy, standard deviation) """ -import base64 import math import random import signal @@ -21,12 +20,12 @@ import numpy as np from scipy.optimize import curve_fit -from credsweeper.common.constants import Chars -# from credsweeper.filters import ValueEntropyBase36Check from credsweeper.utils import Util +# from credsweeper.filters import ValueEntropyBase36Check + random_data: bytes -ITERATIONS = 100 +ITERATIONS = 10000 def pool_initializer() -> None: @@ -82,12 +81,12 @@ def generate(start, end) -> Dict[int, Tuple[float, float]]: sizes = [x for x in range(start, end)] global random_data try: - for n in range(1000): + for n in range(100): start_time = time.time() random_data = random.randbytes(ITERATIONS * max(sizes)) # random_data = ''.join( # [random.choice(string.digits + string.ascii_lowercase) for _ in range(ITERATIONS * max(sizes))]) - _args = [(i, stats[i][0] if i in stats else 9.9, stats[i][1] if i in stats else 0.0) for i in sizes] + _args = [(i, stats[i][0] if i in stats else 99.99, stats[i][1] if i in stats else 0.0) for i in sizes] with Pool(processes=min(15, len(_args)), initializer=pool_initializer) as pool: for _size, _res in zip(sizes, pool.map(evaluate_avg, _args)): with threading.Lock(): @@ -102,8 +101,8 @@ def generate(start, end) -> Dict[int, Tuple[float, float]]: return stats -def log_model(x, k1, k0): - return k1 * np.log2(x) + k0 +def log_model(x, k2, k1, k0): + return k2 * np.log2(x) ** 2 + k1 * np.log2(x) + k0 def solve(data: dict[int, Tuple[float, float]]): @@ -124,21 +123,12 @@ def solve(data: dict[int, Tuple[float, float]]): params, covariance = curve_fit(log_model, _x, _y) print(params) - k1, k0 = params - plt.plot(x, log_model(x, k1, k0), 'b--', label='fit') + k2, k1, k0 = params + plt.plot(x, log_model(x, k2, k1, k0), 'b--', label='fit') plt.grid(True) plt.show() -from scipy.stats import entropy -import numpy as np - -def calculate_shannon_entropy(byte_sequence): - byte_counts = np.bincount(byte_sequence, minlength=256) - # Normalize the counts to get the probabilities - probabilities = byte_counts / np.sum(byte_counts) - # Calculate the entropy - return entropy(probabilities, base=2) if __name__ == "__main__": # data = [0]*200 @@ -148,7 +138,7 @@ def calculate_shannon_entropy(byte_sequence): # print(calculate_shannon_entropy(data)) # sys.exit(0) # data_file = "base64entr_12_1200.json" # [0.00147696 -0.03688593 0.24484864 0.31841099 0.39320007] - start, end = 63, 130 + start, end = 384, 512 # [-0.11215851 2.34303484 -4.4466237 ] data_file = f"bytes_{start}_{end}.json" #[ 1.01660278 -1.03603384] if not (_data := Util.json_load(data_file)): _data = generate(start, end) diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index c73aa9f1b..2113ed893 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -23,6 +23,5 @@ def test_value_jwt_check_p(self): def test_value_jwt_check_n(self): self.assertFalse(ValueJsonWebTokenCheck().run( - - get_line_data(line="eyJ0eXAiOiJqd3QiLCJhbGciOiJlZDI1NTE5In0.eyJhdWQiOiJURVNUUyIsImV4cCI6MTg1OTEyMTI3NSwianRpIjoiWE5MWjZYWVBIVE1ESlFSTlFPSFVPSlFHV0NVN01JNVc1SlhDWk5YQllVS0VRVzY3STI1USIsImlhdCI6MTU0Mzc2MTI3NSwiaXNzIjoiT0NBVDMzTVRWVTJWVU9JTUdOR1VOWEo2NkFIMlJMU0RBRjNNVUJDWUFZNVFNSUw2NU5RTTZYUUciLCJuYW1lIjoiU3luYWRpYSBDb21tdW5pY2F0aW9ucyBJbmMuIiwibmJmIjoxNTQzNzYxMjc1LCJzdWIiOiJPQ0FUMzNNVFZVMlZVT0lNR05HVU5YSjY2QUgyUkxTREFGM01VQkNZQVk1UU1JTDY1TlFNNlhRRyIsInR5cGUiOiJvcGVyYXRvciIsIm5hdHMiOnsic2lnbmluZ19rZXlzIjpbIk9EU0tSN01ZRlFaNU1NQUo2RlBNRUVUQ1RFM1JJSE9GTFRZUEpSTUFWVk40T0xWMllZQU1IQ0FDIiwiT0RTS0FDU1JCV1A1MzdEWkRSVko2NTdKT0lHT1BPUTZLRzdUNEhONk9LNEY2SUVDR1hEQUhOUDIiLCJPRFNLSTM2TFpCNDRPWTVJVkNSNlA1MkZaSlpZTVlXWlZXTlVEVExFWjVUSzJQTjNPRU1SVEFCUiJdfX0.hyfz6E39BMUh0GLzovFfk3wT4OfualftjdJ_eYkLfPvu5tZubYQ_Pn9oFYGCV_6yKy3KMGhWGUCyCdHaPhalBw", + get_line_data(line="eyJhbGciOiJQUzM4NCJ9.eyJpc3MiOiJqb2UifQ.mlPowjRz0cP5J-MmCoegKHYagOHZ_ArXOR91_u8jMdwmOfdfEQIcC6K5hAgQGSZQC_pQDA51RUoUHatsQgXtHlSDC_VP9ZxcPkOptWScOUMXriLH31bTcrg0YhlYL-A7TTHLMhbUrOCKqjpWjU-GxcnOkM86e0joZgJUL7CpHUtyCFRrxOXtuTvGr2m_LdS7I5OyZ4xEP4JRcsOgOnGq-m7e3WX7LTDKjggtVq3Nmdl4GISgJdM7GHHZOJHckUjgD-T3X6oHQanFdXZnjEl7nqo9KfN0skerI681fJ8mbjIlbf68pM6tJwJXI8fr1tF4pcAZxXR17ITCrocVSRC6NuWOVzh_XyyEVMEWmLqrRvc4zyRUfqlDbUhMn55Z54bJnU2Z_IzUi1o9ndy7ckISHQVhuYFKu789DjW1BV4PFFxC4heghK_Gw4h7El6MIMVdvM8oLRbrjlf6BYCRnCxuTA_y10IyB7s8eEuUC-D6JjVtXSvCRkRo7f8dWQTjFLs7", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) From 09edfab5e07272eccd916e486b4cefacc5750a20 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 00:23:44 +0300 Subject: [PATCH 06/11] custom BM ref --- .github/workflows/benchmark.yml | 12 ++++++++---- credsweeper/utils/util.py | 2 +- experiment/src/entropy_test.py | 2 +- tests/filters/test_value_json_web_token_check.py | 10 +++++++++- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 1caaa133f..8b879a093 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -22,7 +22,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: nojwt - name: Markup hashing run: | @@ -72,7 +73,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: nojwt - name: Markup hashing run: | @@ -169,7 +171,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: nojwt - name: Markup hashing run: | @@ -350,7 +353,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: nojwt - name: Markup hashing run: | diff --git a/credsweeper/utils/util.py b/credsweeper/utils/util.py index 2e2d2fb96..400d9c0a5 100644 --- a/credsweeper/utils/util.py +++ b/credsweeper/utils/util.py @@ -103,7 +103,7 @@ def get_min_data_entropy(x: int) -> float: y = 1.095884 * math.log2(_x) - 1.90156 elif 384 < x < 512: # solved for 384 - 512 - y = -0.11215851 * math.log2(x) ** 2 + 2.34303484 * math.log2(x) - 4.4466237 + y = -0.11215851 * math.log2(x)**2 + 2.34303484 * math.log2(x) - 4.4466237 else: # less or equal to 8 bytes might have 0 entropy y = 0 diff --git a/experiment/src/entropy_test.py b/experiment/src/entropy_test.py index 29d13a467..13f5f46b5 100644 --- a/experiment/src/entropy_test.py +++ b/experiment/src/entropy_test.py @@ -102,7 +102,7 @@ def generate(start, end) -> Dict[int, Tuple[float, float]]: def log_model(x, k2, k1, k0): - return k2 * np.log2(x) ** 2 + k1 * np.log2(x) + k0 + return k2 * np.log2(x)**2 + k1 * np.log2(x) + k0 def solve(data: dict[int, Tuple[float, float]]): diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index 2113ed893..32c6f757f 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -23,5 +23,13 @@ def test_value_jwt_check_p(self): def test_value_jwt_check_n(self): self.assertFalse(ValueJsonWebTokenCheck().run( - get_line_data(line="eyJhbGciOiJQUzM4NCJ9.eyJpc3MiOiJqb2UifQ.mlPowjRz0cP5J-MmCoegKHYagOHZ_ArXOR91_u8jMdwmOfdfEQIcC6K5hAgQGSZQC_pQDA51RUoUHatsQgXtHlSDC_VP9ZxcPkOptWScOUMXriLH31bTcrg0YhlYL-A7TTHLMhbUrOCKqjpWjU-GxcnOkM86e0joZgJUL7CpHUtyCFRrxOXtuTvGr2m_LdS7I5OyZ4xEP4JRcsOgOnGq-m7e3WX7LTDKjggtVq3Nmdl4GISgJdM7GHHZOJHckUjgD-T3X6oHQanFdXZnjEl7nqo9KfN0skerI681fJ8mbjIlbf68pM6tJwJXI8fr1tF4pcAZxXR17ITCrocVSRC6NuWOVzh_XyyEVMEWmLqrRvc4zyRUfqlDbUhMn55Z54bJnU2Z_IzUi1o9ndy7ckISHQVhuYFKu789DjW1BV4PFFxC4heghK_Gw4h7El6MIMVdvM8oLRbrjlf6BYCRnCxuTA_y10IyB7s8eEuUC-D6JjVtXSvCRkRo7f8dWQTjFLs7", + get_line_data(line="eyJhbGciOiJQUzM4NCJ9.eyJpc3MiOiJqb2UifQ." \ + "_VP9ZxcPkOptWScOUMXriLH31bTcrg0YhlYL-A7TTHLX7LTDKjggtVq3Nmdl4GIS" \ + "gJdM7GHHZOJHckUjgD-T3X6oHQanKqjpWjU-GxcnOkM86e0joZgJUL7CpHUt7e3W" \ + "MhbUrOCyCFRrxOXtuTvGr2m_LdS7I5OyZ4xEP4JRcsOgOnGq-MEWmLqrRvc4zy5m" \ + "pM6tJwJXI8fr1tF4pcAZxXR17ITCrocVSRC6NuWOVzh_XyyEVRUfqlDbJnU2Z_I0" \ + "dfEQIcC6K5hAgQGSZQC_pQDA51RUoUHa9KfNskerI681fJ8mbjIlbf68CFdXZnjE" \ + "zobUhMn5Z544PF9DjW1BVtsQgXtHlSDFxl6MIMVdvM8oLRbrjlf6BYCRnCxuTA_y" \ + "Ui1o9ndy7ckISHQVhuYFKu78l7nqC4heghK_Gw4h7EB7s8eEuUC-D6JjVtX10IyS" \ + "vCRkRo7f8dWQTjFLs7mlPowjRz0cP5J-MmCoegKHYagOHZ_ArXOR91_u8jMdwmOf", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) From 54e89aadc8bf93c884b357068cc256fbbb73ffba Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 00:44:34 +0300 Subject: [PATCH 07/11] ref: jwt --- .github/workflows/benchmark.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 8b879a093..d6017bb39 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -23,7 +23,7 @@ jobs: uses: actions/checkout@v4 with: repository: babenek/CredData - ref: nojwt + ref: jwt - name: Markup hashing run: | @@ -74,7 +74,7 @@ jobs: uses: actions/checkout@v4 with: repository: babenek/CredData - ref: nojwt + ref: jwt - name: Markup hashing run: | @@ -172,7 +172,7 @@ jobs: uses: actions/checkout@v4 with: repository: babenek/CredData - ref: nojwt + ref: jwt - name: Markup hashing run: | @@ -354,7 +354,7 @@ jobs: uses: actions/checkout@v4 with: repository: babenek/CredData - ref: nojwt + ref: jwt - name: Markup hashing run: | From af42f746e3315b36a5a01e4a09263e026a6c8ef3 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 00:46:29 +0300 Subject: [PATCH 08/11] testfix --- .../filters/value_entropy_base64_check.py | 12 +++++---- credsweeper/rules/config.yaml | 2 +- tests/__init__.py | 8 +++--- tests/data/depth_3.json | 27 +++++++++++++++++++ tests/data/doc.json | 27 +++++++++++++++++++ tests/data/ml_threshold.json | 27 +++++++++++++++++++ tests/data/output.json | 27 +++++++++++++++++++ tests/filters/test_value_file_path_check.py | 2 +- .../test_value_json_web_token_check.py | 2 +- tests/rules/test_jwt.py | 10 +++---- 10 files changed, 127 insertions(+), 17 deletions(-) diff --git a/credsweeper/filters/value_entropy_base64_check.py b/credsweeper/filters/value_entropy_base64_check.py index 7fa5ff3fd..f97741a0d 100644 --- a/credsweeper/filters/value_entropy_base64_check.py +++ b/credsweeper/filters/value_entropy_base64_check.py @@ -1,6 +1,6 @@ import math -from credsweeper.common.constants import Chars +from credsweeper.common.constants import Chars, ENTROPY_LIMIT_BASE64 from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget @@ -25,12 +25,14 @@ def get_min_data_entropy(x: int) -> float: y = 4.1 elif 32 == x: y = 4.4 - elif 12 <= x < 32: + elif 12 <= x < 35: # logarithm base 2 - slow, but precise. Approximation does not exceed stdev y = 0.77 * math.log2(x) + 0.62 - elif 32 < x: - l2x = math.log2(x) - y = 0.001477 * l2x**4 - 0.036886 * l2x**3 + 0.244849 * l2x**2 + 0.318411 * l2x + 0.3932 + elif 35 <= x < 60: + y = ENTROPY_LIMIT_BASE64 + elif 60 <= x: + # the entropy grows slowly after 60 + y = 5.0 else: y = 0 return y diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index ffa25c139..26e7e5198 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -337,7 +337,7 @@ - doc - name: JSON Web Token - severity: critical + severity: medium confidence: strong type: pattern values: diff --git a/tests/__init__.py b/tests/__init__.py index aa14dc9d2..5bda59e59 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,14 +7,14 @@ NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan -SAMPLES_CRED_COUNT: int = 359 -SAMPLES_CRED_LINE_COUNT: int = 376 +SAMPLES_CRED_COUNT: int = 360 +SAMPLES_CRED_LINE_COUNT: int = 377 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 318 +SAMPLES_POST_CRED_COUNT: int = 319 # with option --doc -SAMPLES_IN_DOC = 414 +SAMPLES_IN_DOC = 415 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 24 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index dac4acbd5..4c7822ef9 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -7155,6 +7155,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "detected: eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "line_num": 1, + "path": "tests/samples/json_web_token", + "info": "tests/samples/json_web_token|RAW", + "value": "eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "value_start": 10, + "value_end": 75, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.790963630103494, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/doc.json b/tests/data/doc.json index 2a0d3b969..46c8cdb7d 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -11520,6 +11520,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "detected: eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "line_num": 1, + "path": "tests/samples/json_web_token", + "info": "tests/samples/json_web_token|RAW", + "value": "eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "value_start": 10, + "value_end": 75, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.790963630103494, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index dc45dbdd4..c03e55fde 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -7789,6 +7789,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "detected: eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "line_num": 1, + "path": "tests/samples/json_web_token", + "info": "", + "value": "eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "value_start": 10, + "value_end": 75, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.790963630103494, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/output.json b/tests/data/output.json index f640de567..cfe4115d4 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -6898,6 +6898,33 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "JSON Web Token", + "severity": "medium", + "confidence": "strong", + "line_data_list": [ + { + "line": "detected: eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "line_num": 1, + "path": "tests/samples/json_web_token", + "info": "", + "value": "eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.Ce7sh0ENPiBlE_dose0cBA", + "value_start": 10, + "value_end": 75, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.790963630103494, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/filters/test_value_file_path_check.py b/tests/filters/test_value_file_path_check.py index ad7b47b07..3a1697014 100644 --- a/tests/filters/test_value_file_path_check.py +++ b/tests/filters/test_value_file_path_check.py @@ -8,7 +8,7 @@ class TestValueFilePathCheck: @pytest.mark.parametrize("line", [ - "5//0KCPafDhZvtCyiKFeDGT0ZGHiIE0ClIWrLC7tZ1WE5vHc4Y2qi1IhPy3Pz5fmCe9OPIxEZNUg7SJF9nwQj2lIdXU0", + "5//0KCPafDhZvtCwqrsyiKFeDGT_0ZGHiI-E0ClIWrLC7tZ1WE5vHc4-Y2qi1IhPy3Pz5fmCe9OPIxEZUONUg7SWJF9nwQ_j2lIdXU0", ]) def test_value_file_path_check_p(self, file_path: pytest.fixture, line: str) -> None: line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index 32c6f757f..05baf551d 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -18,7 +18,7 @@ def test_value_jwt_check_p(self): get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.eyJleHAiOjY1NTM2fQo", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) self.assertTrue(ValueJsonWebTokenCheck().run( - get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.65474687468446387653", + get_line_data(line="eyJhbGciOiJSUzI1NiJ9Cg.eyJleHAiOjY1NTM2fQo.AAAAAAAAAAAAAAAAAAAAAAA", pattern=LINE_VALUE_PATTERN), DUMMY_ANALYSIS_TARGET)) def test_value_jwt_check_n(self): diff --git a/tests/rules/test_jwt.py b/tests/rules/test_jwt.py index 10ec210ac..8ac1c4ec1 100644 --- a/tests/rules/test_jwt.py +++ b/tests/rules/test_jwt.py @@ -8,11 +8,11 @@ class TestJwt(BaseTestRule): @pytest.fixture(params=[[ - "jwt: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxN", - "TE2MjM5MDIyLCJ0ZXN0IjoiSSBuZWVkIHJlYWxseSByZWFsbHkgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nI", - "GxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvb", - "mcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgb", - "G9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZ", + "jwt: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxN" + "TE2MjM5MDIyLCJ0ZXN0IjoiSSBuZWVkIHJlYWxseSByZWFsbHkgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nI" + "GxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvb" + "mcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgb" + "G9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZ" "yBsb25nIGxvbmcgbG9uZyBsb25nIGxvbmcgbG9uZyBqd3QgdG9rZW4ifQ.4pWgA4mthx4FPPh1AZQY0luTKTQ7VOj6PGwwiANvtqg'" ]]) def lines(self, request) -> List[str]: From 51be1a595c0a670a507fd3db0d4eec405cd1ca8f Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 11:39:54 +0300 Subject: [PATCH 09/11] more reserved words --- .../filters/value_json_web_token_check.py | 31 ++++++++++--------- .../test_value_json_web_token_check.py | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/credsweeper/filters/value_json_web_token_check.py b/credsweeper/filters/value_json_web_token_check.py index 9106a2d2a..d7265dbce 100644 --- a/credsweeper/filters/value_json_web_token_check.py +++ b/credsweeper/filters/value_json_web_token_check.py @@ -1,22 +1,29 @@ import contextlib import json -from credsweeper.common.constants import Chars from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget -from credsweeper.filters import Filter, ValueEntropyBase64Check +from credsweeper.filters import Filter from credsweeper.utils import Util class ValueJsonWebTokenCheck(Filter): """ Check that candidate is JWT which starts usually from 'eyJ' - only header is parsed with "typ" or "alg" member from example of RFC7519 - https://datatracker.ietf.org/doc/html/rfc7519 + registered keys are checked to be in the JWT parts + https://www.iana.org/assignments/jose/jose.xhtml """ - header_keys = {"alg", "typ", "cty", "enc"} - payload_keys = {"iss", "sub", "aud", "exp", "nbf", "iat", "jti", "id", "role", "iss"} + header_keys = { + "alg", "jku", "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku", + "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "epk", "apu", "apv", "iv", "tag", "p2s", + "p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt" + } + payload_keys = { + "iss", "sub", "aud", "exp", "nbf", "iat", "jti", "kty", "use", "key_ops", "alg", "enc", "zip", "jku", "jwk", + "kid", "x5u", "x5c", "x5t", "x5t#S256", "crv", "x", "y", "d", "n", "e", "d", "p", "q", "dp", "dq", "qi", "oth", + "k", "crv", "d", "x", "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce" + } def __init__(self, config: Config = None) -> None: pass @@ -44,17 +51,11 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: json_keys = json.loads(data).keys() # header will be checked first if not header_check: - if header_check := bool(ValueJsonWebTokenCheck.header_keys.intersection(json_keys)): - continue - else: - break + header_check = bool(ValueJsonWebTokenCheck.header_keys.intersection(json_keys)) # payload follows the header if not payload_check: - if payload_check := bool(ValueJsonWebTokenCheck.payload_keys.intersection(json_keys)): - continue - else: - break - # any other payloads are allowed + payload_check = bool(ValueJsonWebTokenCheck.payload_keys.intersection(json_keys)) + # any other payloads are allowed elif header_check and payload_check and not signature_check: # signature check or skip encrypted part signature_check = not Util.is_ascii_entropy_validate(data) diff --git a/tests/filters/test_value_json_web_token_check.py b/tests/filters/test_value_json_web_token_check.py index 05baf551d..9aa85a752 100644 --- a/tests/filters/test_value_json_web_token_check.py +++ b/tests/filters/test_value_json_web_token_check.py @@ -23,7 +23,7 @@ def test_value_jwt_check_p(self): def test_value_jwt_check_n(self): self.assertFalse(ValueJsonWebTokenCheck().run( - get_line_data(line="eyJhbGciOiJQUzM4NCJ9.eyJpc3MiOiJqb2UifQ." \ + get_line_data(line="eyJhbGciOiJQUzM4NCJ9.eyJkdW1teSI6bnVsbH0.eyJpc3MiOiJqb2UifQ." \ "_VP9ZxcPkOptWScOUMXriLH31bTcrg0YhlYL-A7TTHLX7LTDKjggtVq3Nmdl4GIS" \ "gJdM7GHHZOJHckUjgD-T3X6oHQanKqjpWjU-GxcnOkM86e0joZgJUL7CpHUt7e3W" \ "MhbUrOCyCFRrxOXtuTvGr2m_LdS7I5OyZ4xEP4JRcsOgOnGq-MEWmLqrRvc4zy5m" \ From 69e0ff3e5313924cfbe179876e03fe12d80f9cb2 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 11:54:03 +0300 Subject: [PATCH 10/11] BM scores upd --- cicd/benchmark.txt | 52 +++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index c0d0b3118..c2e02262c 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -1,4 +1,4 @@ -DATA: 16979136 interested lines. MARKUP: 61880 items +DATA: 16978521 interested lines. MARKUP: 61845 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 64 427 89 @@ -27,7 +27,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .cmd 4 401 2 3 .cnf 8 858 18 45 18 .coffee 1 585 2 -.conf 61 4954 51 74 54 +.conf 60 4945 50 74 54 .config 20 492 16 33 1 .cpp 15 5688 1 61 .creds 1 10 1 1 @@ -53,7 +53,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .erb 13 323 27 .erl 4 96 8 .ex 25 4968 3 105 5 -.example 17 1838 74 37 55 +.example 17 1838 73 37 55 .exs 24 4842 3 188 4 .ext 5 211 1 4 2 .fsproj 1 75 1 @@ -61,16 +61,16 @@ FileType FileNumber ValidLines Positives Negatives Templat .gd 1 37 1 .gml 3 3075 26 .gni 3 5017 18 -.go 1079 566327 621 4334 742 +.go 1079 566327 619 4333 742 .golden 5 1168 1 14 29 .gradle 45 3265 4 91 100 .graphql 7 420 13 .graphqls 1 30 1 -.groovy 23 5011 25 211 1 +.groovy 22 4986 20 215 1 .h 11 2038 38 .haml 9 191 16 .hbs 2 54 3 -.hs 17 4509 37 71 5 +.hs 14 4140 31 72 5 .html 53 15327 14 115 18 .idl 2 777 4 .iml 6 699 36 @@ -80,16 +80,16 @@ FileType FileNumber ValidLines Positives Negatives Templat .ipynb 1 134 5 .j 1 241 4 .j2 30 5530 6 213 10 -.java 621 134132 322 1354 170 +.java 621 134132 314 1357 170 .jenkinsfile 1 58 1 7 .jinja2 1 64 2 .js 659 536413 521 2642 336 -.json 860 13670669 623 10947 143 +.json 860 13670669 623 10948 140 .jsp 13 3202 1 42 .jsx 7 857 19 -.jwt 6 8 7 +.jwt 1 1 2 .key 83 2737 70 14 -.kt 123 20774 51 383 3 +.kt 123 20774 50 384 3 .l 1 982 1 .las 1 6656 46 .lasso 1 230 6 @@ -110,10 +110,10 @@ FileType FileNumber ValidLines Positives Negatives Templat .markdown 3 139 3 1 .markerb 3 12 3 .marko 1 21 2 -.md 675 149422 661 2365 671 +.md 673 149294 646 2366 671 .mdx 3 549 7 .mjml 1 18 1 -.mjs 22 4424 108 310 +.mjs 22 4424 50 343 .mk 1 5878 16 .ml 1 1856 24 .mlir 2 1596 19 @@ -132,7 +132,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .patch 4 109405 27 .pbxproj 1 941 1 .pem 48 1169 47 8 -.php 371 75710 130 1769 80 +.php 371 75710 129 1770 80 .pl 16 14727 6 47 .pm 3 744 8 .po 3 2994 15 @@ -150,13 +150,13 @@ FileType FileNumber ValidLines Positives Negatives Templat .pug 2 193 2 .purs 1 69 4 .pxd 1 150 5 2 -.py 890 291553 618 3466 748 +.py 890 291553 618 3465 748 .pyi 4 1361 9 .pyp 1 167 1 .pyx 2 1094 21 .r 4 62 6 3 1 .rake 2 51 2 -.rb 861 131867 239 3455 615 +.rb 861 131867 237 3457 615 .re 1 31 1 .red 1 159 1 .release 1 13 4 @@ -197,7 +197,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .test 2 24 25 4 .testsettings 1 21 5 .tf 21 1377 3 32 2 -.tfstate 4 307 21 10 4 +.tfstate 4 307 18 11 4 .tfvars 1 31 3 3 .tl 2 2161 165 2 .tmpl 5 336 3 9 @@ -205,7 +205,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .toml 83 2379 54 72 172 .tpl 1 43 1 .travis 1 34 4 3 1 -.ts 584 106807 166 1930 203 +.ts 583 106730 158 1935 203 .tsx 54 7914 1 124 5 .ttar 2 6050 3 .txt 443 78152 1775 14282 50 @@ -222,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36162 460 916 384 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10294 16979136 7615 59903 5233 -credsweeper result_cnt : 6697, lost_cnt : 0, true_cnt : 6470, false_cnt : 227 +TOTAL: 10281 16978521 7499 59954 5230 +credsweeper result_cnt : 6597, lost_cnt : 0, true_cnt : 6352, false_cnt : 245 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- -------- API 123 3163 185 112 109 3 3345 14 0.000896 0.113821 0.995102 0.973214 0.886179 0.927660 @@ -232,7 +232,7 @@ AWS Multi 75 12 0 8 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 212 3 12 3 8 207 24 0.037209 0.888889 0.867769 0.272727 0.111111 0.157895 Auth 407 2725 77 372 351 21 2781 56 0.007495 0.137592 0.976005 0.943548 0.862408 0.901155 -Azure Access Token 19 0 0 10 10 0 0 9 0.473684 0.526316 1.000000 0.526316 0.689655 +Azure Access Token 19 0 0 12 12 0 0 7 0.368421 0.631579 1.000000 0.631579 0.774194 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 Bitbucket Client ID 142 1813 9 46 27 18 1804 115 0.009879 0.809859 0.932281 0.600000 0.190141 0.288770 @@ -249,20 +249,20 @@ Gitlab Incoming Email Token 37 3 0 2 Google API Key 12 0 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Google Multi 10 2 0 11 10 1 1 0 0.500000 0.000000 0.916667 0.909091 1.000000 0.952381 Google OAuth Access Token 3 0 0 3 3 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Grafana Provisioned API Key 22 1 0 1 1 0 1 21 0.000000 0.954545 0.086957 1.000000 0.045455 0.086957 -JSON Web Token 173 2 2 92 92 0 4 81 0.000000 0.468208 0.542373 1.000000 0.531792 0.694340 +Grafana Provisioned API Key 22 1 0 5 5 0 1 17 0.000000 0.772727 0.260870 1.000000 0.227273 0.370370 +JSON Web Token 169 61 0 158 137 21 40 32 0.344262 0.189349 0.769565 0.867089 0.810651 0.837920 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 14 6 0 10 10 0 6 4 0.000000 0.285714 0.800000 1.000000 0.714286 0.833333 -Key 483 8494 464 442 433 9 8949 50 0.001005 0.103520 0.993751 0.979638 0.896480 0.936216 +Key 483 8494 464 445 436 9 8949 47 0.001005 0.097308 0.994068 0.979775 0.902692 0.939655 Nonce 83 53 0 85 79 6 47 4 0.113208 0.048193 0.926471 0.929412 0.951807 0.940476 Other 0 0 5 0 0 5 0 0.000000 1.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 Password 1823 7474 2752 1681 1614 67 10159 209 0.006552 0.114646 0.977094 0.960143 0.885354 0.921233 Salt 42 76 2 38 38 0 78 4 0.000000 0.095238 0.966667 1.000000 0.904762 0.950000 -Secret 1358 28497 869 1231 1226 5 29361 132 0.000170 0.097202 0.995541 0.995938 0.902798 0.947084 +Secret 1358 28497 869 1234 1229 5 29361 129 0.000170 0.094993 0.995639 0.995948 0.905007 0.948302 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 585 3972 439 517 509 8 4403 76 0.001814 0.129915 0.983187 0.984526 0.870085 0.923775 +Token 584 3973 438 519 511 8 4403 73 0.001814 0.125000 0.983784 0.984586 0.875000 0.926564 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 194 125 251 184 184 0 376 10 0.000000 0.051546 0.982456 1.000000 0.948454 0.973545 - 7504 59894 5233 6524 6293 224 59670 1211 0.003740 0.161381 0.978709 0.965628 0.838619 0.897654 + 7499 59954 5230 6604 6352 245 59709 1147 0.004086 0.152954 0.979363 0.962862 0.847046 0.901249 From 9c7e1fb634b1d057bfa83c191ed869ba35ee36cf Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 7 Aug 2024 11:57:07 +0300 Subject: [PATCH 11/11] rollback embarrassing changes --- experiment/src/entropy_test.py | 156 ++++++++++++--------------------- 1 file changed, 57 insertions(+), 99 deletions(-) diff --git a/experiment/src/entropy_test.py b/experiment/src/entropy_test.py index 13f5f46b5..468ceca5d 100644 --- a/experiment/src/entropy_test.py +++ b/experiment/src/entropy_test.py @@ -1,61 +1,26 @@ -#!/usr/bin/env python3 -""" -The script is used in experiment to get statistical distribution of shanon entropy -of a line which was obtained with an encoding (base64, base32, etc.) from random generated bytes. -The result format is: -# size of encoded string: (mean of entropy, standard deviation) -""" - -import math +import base64 import random import signal import statistics -import sys +import string import threading import time +from datetime import datetime from multiprocessing import Pool from typing import Tuple, Dict -import matplotlib.pyplot as plt -import numpy as np -from scipy.optimize import curve_fit - +from credsweeper.common.constants import Chars +from credsweeper.filters import ValueEntropyBase36Check from credsweeper.utils import Util -# from credsweeper.filters import ValueEntropyBase36Check - -random_data: bytes -ITERATIONS = 10000 +random_data: str +ITERATIONS = 1000 def pool_initializer() -> None: signal.signal(signal.SIGINT, signal.SIG_IGN) -def byte_entropy(data: bytes): - data_len = len(data) - entropy = 0. - cells = [int(0)] * 256 - for x in data: - cells[x] += 1 - left = 0. - step = 256.0 / data_len - right = left + step - while left < 256: - cell_sum = 0 - i = int(left) - r = int(right) - while i < r and i < 256: - cell_sum += cells[i] - i += 1 - p_x = float(cell_sum) / data_len - if p_x > 0: - entropy += -p_x * math.log2(p_x) - left = right - right += step - return entropy - - def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: min_avg = _args[1] min_dvt = _args[2] @@ -63,10 +28,9 @@ def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: entropies = [] for x in range(ITERATIONS): offset = x * size - entropy = byte_entropy(random_data[offset:offset + size]) # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE64_CHARS.value) # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE36_CHARS.value) - # entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE32_CHARS.value) + entropy = Util.get_shannon_entropy(random_data[offset:offset + size], Chars.BASE32_CHARS.value) entropies.append(entropy) avg = statistics.mean(entropies) dvt = statistics.stdev(entropies, avg) @@ -76,71 +40,65 @@ def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]: return min_avg, min_dvt -def generate(start, end) -> Dict[int, Tuple[float, float]]: - stats: Dict[int, Tuple[float, float]] = {} # type: ignore - sizes = [x for x in range(start, end)] - global random_data +if __name__ == "__main__": + + stats: Dict[int, Tuple[float, float]] = {} + sizes = [12, 13, 15, 16, 17, 31, 32, 33] try: - for n in range(100): + for n in range(1000): start_time = time.time() - random_data = random.randbytes(ITERATIONS * max(sizes)) + rand_bytes = random.randbytes(int(8 * ITERATIONS * max(sizes) / 5)) + random_data = base64.b32encode(rand_bytes).decode('ascii') # random_data = ''.join( # [random.choice(string.digits + string.ascii_lowercase) for _ in range(ITERATIONS * max(sizes))]) - _args = [(i, stats[i][0] if i in stats else 99.99, stats[i][1] if i in stats else 0.0) for i in sizes] + _args = [(i, stats[i][0] if i in stats else 9.9, stats[i][1] if i in stats else 0.0) for i in sizes] with Pool(processes=min(15, len(_args)), initializer=pool_initializer) as pool: for _size, _res in zip(sizes, pool.map(evaluate_avg, _args)): with threading.Lock(): stats[_size] = _res print(f"done {n} in {time.time() - start_time}", flush=True) + for k, v in stats.items(): + print(f"{k} = {v}", flush=True) except KeyboardInterrupt as exc: print(exc) finally: - print("===========================================================", flush=True) + print("===========================================================") for k, v in stats.items(): - print(f"{k}: {v},", flush=True) - return stats - - -def log_model(x, k2, k1, k0): - return k2 * np.log2(x)**2 + k1 * np.log2(x) + k0 - - -def solve(data: dict[int, Tuple[float, float]]): - d_list = list((x, y) for x, y in data.items()) - d_list.sort(key=lambda x: (int(x[0]))) - - plt.figure() - x = [int(i[0]) for i in d_list] - y = [i[1][0] for i in d_list] - y_min = [i[1][0] - i[1][1] for i in d_list] - y_max = [i[1][0] + i[1][1] for i in d_list] - plt.plot(x, y, 'r-', lw=2, label='ent') - plt.plot(x, y_min, 'r:', lw=1, label='min') - plt.plot(x, y_max, 'r:', lw=1, label='max') - - _y = np.array(y_min) - _x = np.array(x) - - params, covariance = curve_fit(log_model, _x, _y) - print(params) - k2, k1, k0 = params - plt.plot(x, log_model(x, k2, k1, k0), 'b--', label='fit') - - plt.grid(True) - plt.show() - - -if __name__ == "__main__": - # data = [0]*200 - # for n in range(len(data)): - # data[n]=n>>2 - # print(byte_entropy(data)) - # print(calculate_shannon_entropy(data)) - # sys.exit(0) - # data_file = "base64entr_12_1200.json" # [0.00147696 -0.03688593 0.24484864 0.31841099 0.39320007] - start, end = 384, 512 # [-0.11215851 2.34303484 -4.4466237 ] - data_file = f"bytes_{start}_{end}.json" #[ 1.01660278 -1.03603384] - if not (_data := Util.json_load(data_file)): - _data = generate(start, end) - Util.json_dump(_data, data_file) - solve(_data) + print(f"{k} = {v}", flush=True) + +# base32 +# 12 = (3.2448401902687922, 0.2001867347580528) +# 13 = (3.3305754195719484, 0.1987638281794566) +# 15 = (3.4840904247691813, 0.192504685389475) +# 16 = (3.544861791803441, 0.184688685917545) +# 17 = (3.613827056321014, 0.18707867741897827) +# 31 = (4.15268463818445, 0.1486133074700339) +# 32 = (4.177896164672521, 0.1472328639816872) +# 33 = (4.197883981615083, 0.14735097649694248) + +# base36 +# 14 = (3.4457644517398167, 0.18990807349700253) +# 15 = (3.5260346505689992, 0.18114901125908447) +# 16 = (3.598032662269341, 0.1830565384431312) +# 17 = (3.659276363856176, 0.1856434289456263) +# 23 = (3.963851572519515, 0.16574824489877288) +# 24 = (4.00254984568254, 0.1623406588528336) +# 25 = (4.040134902813914, 0.158720524449059) +# 26 = (4.078098075953585, 0.15933209429031434) + +# base64 +# 15 = (3.6775207689256977, 0.15381412670043787) +# 16 = (3.7600552609204625, 0.15666871578775507) +# 17 = (3.835262182966267, 0.1514079815395568) +# 18 = (3.899273202112598, 0.15521615494595756) +# 19 = (3.9669074540527136, 0.15022181070460836) +# 20 = (4.026675938018028, 0.1477139960335224) +# 21 = (4.0844028599694155, 0.14611461336723608) +# 23 = (4.1880028531766245, 0.14668346833164134) +# 24 = (4.236982996273627, 0.14220068825454704) +# 25 = (4.283528241641759, 0.14323971561083385) +# 31 = (4.5121865964712535, 0.1393228408491736) +# 32 = (4.545556887485041, 0.13347416608982715) +# 33 = (4.576938427997454, 0.1300362152603773) +# 39 = (4.743676039379888, 0.13053505168803348) +# 40 = (4.76769110698625, 0.1307074052311964)