Skip to content

Commit

Permalink
remove ML_VALIDATION
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Jan 15, 2025
1 parent 05468db commit 5a6a8e0
Show file tree
Hide file tree
Showing 11 changed files with 989 additions and 3,023 deletions.
7 changes: 3 additions & 4 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
APP_PATH = Path(__file__).resolve().parent

from credsweeper.common.constants import KeyValidationOption, Severity, ThresholdPreset, DiffRowType
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
from credsweeper.config import Config
from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
from credsweeper.deep_scanner.deep_scanner import DeepScanner
Expand Down Expand Up @@ -368,11 +368,9 @@ def post_processing(self) -> None:
for candidate in group_candidates:
if candidate.use_ml:
if is_cred[i]:
candidate.ml_validation = KeyValidationOption.VALIDATED_KEY
candidate.ml_probability = probability[i]
new_cred_list.append(candidate)
else:
candidate.ml_validation = KeyValidationOption.NOT_AVAILABLE
new_cred_list.append(candidate)
else:
logger.info("Skipping ML validation due not applicable")
Expand Down Expand Up @@ -435,7 +433,8 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
for line_data in credential.line_data_list:
# bright rule name and path or info
print(Style.BRIGHT + credential.rule_name +
f" {line_data.info or line_data.path}:{line_data.line_num}" + Style.RESET_ALL)
f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_probability}" +
Style.RESET_ALL)
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))

if is_exported is False:
Expand Down
8 changes: 0 additions & 8 deletions credsweeper/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,6 @@ class Chars(Enum):
ENTROPY_LIMIT_BASE3x = 3


class KeyValidationOption(Enum):
"""API validation state"""
INVALID_KEY = 0
VALIDATED_KEY = 1
UNDECIDED = 2
NOT_AVAILABLE = 3


class GroupType(Enum):
"""Group type - used in Group constructor for load predefined set of filters"""
KEYWORD = "keyword"
Expand Down
11 changes: 4 additions & 7 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import copy
import re
from json.encoder import py_encode_basestring_ascii
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union

from credsweeper.common.constants import KeyValidationOption, Severity, Confidence
from credsweeper.common.constants import Severity, Confidence
from credsweeper.config import Config
from credsweeper.credentials.line_data import LineData

Expand Down Expand Up @@ -39,16 +39,14 @@ def __init__(self,
self.config = config
self.use_ml = use_ml
self.confidence = confidence
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_probability: Optional[float] = None
self.ml_probability: Union[None, int, float] = None if use_ml else -1

def compare(self, other: 'Candidate') -> bool:
"""Comparison method - checks only result of final cred"""
if self.rule_name == other.rule_name \
and self.severity == other.severity \
and self.confidence == other.confidence \
and self.use_ml == other.use_ml \
and self.ml_validation == other.ml_validation \
and self.ml_probability == other.ml_probability \
and len(self.line_data_list) == len(other.line_data_list):
for i, j in zip(self.line_data_list, other.line_data_list):
Expand Down Expand Up @@ -79,7 +77,7 @@ def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
f" | severity: {self.severity.value}" \
f" | confidence: {self.confidence.value}" \
f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
f" | ml_validation: {self.ml_validation.name}"
f" | ml_probability: {self.ml_probability}"

def __str__(self):
return self.to_str()
Expand All @@ -95,7 +93,6 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
"""
full_output = {
"ml_validation": self.ml_validation.name,
"patterns": [pattern.pattern for pattern in self.patterns],
"ml_probability": self.ml_probability,
"rule": self.rule_name,
Expand Down
1 change: 0 additions & 1 deletion credsweeper/secret/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@
"rule",
"severity",
"confidence",
"ml_validation",
"ml_probability",
"line_data_list"
]
Expand Down
8 changes: 8 additions & 0 deletions docs/source/credsweeper.deep_scanner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ credsweeper.deep\_scanner.lang\_scanner module
:undoc-members:
:show-inheritance:

credsweeper.deep\_scanner.mxfile\_scanner module
------------------------------------------------

.. automodule:: credsweeper.deep_scanner.mxfile_scanner
:members:
:undoc-members:
:show-inheritance:

credsweeper.deep\_scanner.pdf\_scanner module
---------------------------------------------

Expand Down
33 changes: 17 additions & 16 deletions docs/source/guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Get output as JSON file:

.. code-block:: bash
python -m credsweeper --ml_validation --path tests/samples/password --save-json output.json
python -m credsweeper --path tests/samples/password.gradle --save-json output.json
To check JSON file run:

Expand All @@ -97,10 +97,10 @@ To check JSON file run:
[
{
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99755,
"ml_probability": 0.9857242107391357,
"rule": "Password",
"severity": "medium",
"confidence": "moderate",
"line_data_list": [
{
"line": "password = \"cackle!\"",
Expand All @@ -111,9 +111,10 @@ To check JSON file run:
"value_start": 12,
"value_end": 19,
"variable": "password",
"entropy_validation":
{
"iterator": "BASE64_CHARS",
"variable_start": 0,
"variable_end": 8,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 2.120589933192232,
"valid": false
}
Expand All @@ -126,12 +127,12 @@ Get CLI output only:

.. code-block:: bash
python -m credsweeper --path tests/samples/password
python -m credsweeper --path tests/samples/password.gradle
.. code-block:: ruby
.. code-block:: bash
rule: Password / severity: medium / line_data_list: [line : 'password = "cackle!"' / line_num : 1 / path : tests/samples/password / entropy_validation: False] / ml_validation: VALIDATED_KEY
rule: Password | severity: medium | confidence: moderate | line_data_list: [line: 'password = "cackle!"' | line_num: 1 | path: tests/samples/password.gradle | value: 'cackle!' | entropy_validation: BASE64STDPAD_CHARS 2.120590 False] | ml_probability: 0.9857242107391357
Exclude outputs using CLI:
Expand All @@ -143,7 +144,7 @@ Space-like characters at left and right will be ignored.

.. code-block:: bash
$ python -m credsweeper --path tests/samples/password --denylist list.txt
$ python -m credsweeper --path tests/samples/password.gradle --denylist list.txt
Detected Credentials: 0
Time Elapsed: 0.07523202896118164s
$ cat list.txt
Expand All @@ -169,7 +170,7 @@ Then specify your config in CLI:

.. code-block:: bash
$ python -m credsweeper --path tests/samples/password --config my_cfg.json
$ python -m credsweeper --path tests/samples/password.gradle --config my_cfg.json
Detected Credentials: 0
Time Elapsed: 0.07152628898620605s
Expand All @@ -192,7 +193,7 @@ Minimal example for scanning line list:
.. code-block:: bash
rule: Password / severity: medium / line_data_list: [line: 'password='in_line_2'' / line_num: 2 / path: / value: 'in_line_2' / entropy_validation: False] / ml_validation: NOT_AVAILABLE
rule: Password | severity: medium | confidence: moderate | line_data_list: [line: 'password = "cackle!"' | line_num: 1 | path: tests/samples/password.gradle | value: 'cackle!' | entropy_validation: BASE64STDPAD_CHARS 2.120590 False] | ml_probability: 0.9857242107391357
Minimal example for scanning bytes:

Expand All @@ -201,7 +202,7 @@ Minimal example for scanning bytes:
from credsweeper import CredSweeper, ByteContentProvider
to_scan = b"line one\npassword='in_line_2'"
to_scan = b"line one\npassword='cackle!'"
cred_sweeper = CredSweeper()
provider = ByteContentProvider(to_scan)
results = cred_sweeper.file_scan(provider)
Expand All @@ -210,7 +211,7 @@ Minimal example for scanning bytes:
.. code-block:: bash
rule: Password / severity: medium / line_data_list: [line: 'password='in_line_2'' / line_num: 2 / path: / value: 'in_line_2' / entropy_validation: False] / ml_validation: NOT_AVAILABLE
rule: Password | severity: medium | confidence: moderate | line_data_list: [line: 'password = "cackle!"' | line_num: 2 | path: tests/samples/password.gradle | value: 'cackle!' | entropy_validation: BASE64STDPAD_CHARS 2.120590 False] | ml_probability: 0.9857242107391357
Minimal example for the ML validation:
Expand All @@ -220,7 +221,7 @@ Minimal example for the ML validation:
from credsweeper import CredSweeper, StringContentProvider, MlValidator, ThresholdPreset
to_scan = ["line one", "secret='fgELsRdFA'", "secret='template'"]
to_scan = ["line one", "password='cackle!'", "secret='template'"]
cred_sweeper = CredSweeper()
provider = StringContentProvider(to_scan)
Expand All @@ -239,7 +240,7 @@ Note that `"secret='template'"` is not reported due to failing check by the `MlV

.. code-block:: bash
rule: Secret / severity: medium / line_data_list: [line: 'secret='fgELsRdFA'' / line_num: 2 / path: / value: 'fgELsRdFA' / entropy_validation: False] / ml_validation: NOT_AVAILABLE
rule: Password | severity: medium | confidence: moderate | line_data_list: [line: 'password = "cackle!"' | line_num: 2 | path: | value: 'cackle!' | entropy_validation: BASE64STDPAD_CHARS 2.120590 False] | ml_probability: 0.9857242107391357
Configurations
--------------
Expand Down
Loading

0 comments on commit 5a6a8e0

Please sign in to comment.