ml_prob

Samsung · Jan 13, 2025 · 37c2fa3 · 37c2fa3
1 parent 0efec66
commit 37c2fa3
Show file tree

Hide file tree

Showing 8 changed files with 955 additions and 3,015 deletions.
diff --git a/credsweeper/app.py b/credsweeper/app.py
@@ -433,7 +433,7 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
                 for line_data in credential.line_data_list:
                     # bright rule name and path or info
                     print(Style.BRIGHT + credential.rule_name +
-                          f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_info}" +
+                          f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_probability}" +
                           Style.RESET_ALL)
                     print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
 

diff --git a/credsweeper/credentials/candidate.py b/credsweeper/credentials/candidate.py
@@ -1,7 +1,7 @@
 import copy
 import re
 from json.encoder import py_encode_basestring_ascii
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from credsweeper.common.constants import Severity, Confidence
 from credsweeper.config import Config
@@ -39,7 +39,7 @@ def __init__(self,
         self.config = config
         self.use_ml = use_ml
         self.confidence = confidence
-        self.ml_probability: Optional[float] = None
+        self.ml_probability: Union[None, int, float] = None if use_ml else -1
 
     def compare(self, other: 'Candidate') -> bool:
         """Comparison method - checks only result of final cred"""
@@ -77,7 +77,7 @@ def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
                f" | severity: {self.severity.value}" \
                f" | confidence: {self.confidence.value}" \
                f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
-               f" | ml_validation: {self.ml_validation}"
+               f" | ml_probability: {self.ml_probability}"
 
     def __str__(self):
         return self.to_str()
@@ -93,7 +93,6 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
 
         """
         full_output = {
-            "ml_validation": self.ml_validation,
             "patterns": [pattern.pattern for pattern in self.patterns],
             "ml_probability": self.ml_probability,
             "rule": self.rule_name,
@@ -137,29 +136,3 @@ def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, inf
             severity=Severity.INFO,  #
             config=config,  #
             confidence=Confidence.MODERATE)
-
-    @property
-    def ml_validation(self) -> str:
-        """Temporally replaced self.ml_validation"""
-        if not self.use_ml:
-            return "NOT_AVAILABLE"
-        elif isinstance(self.ml_probability, float):
-            return "VALIDATED_KEY"
-        elif self.ml_probability is None:
-            return "UNDECIDED"
-        else:
-            return "INVALID_KEY"
-
-    @property
-    def ml_info(self) -> str:
-        """Used to generate short info about ML of the candidate
-
-        Returns:
-            NA - Not applicable ML for the credential type
-            None - ML was not calculated
-            float - the probability
-        """
-        if not self.use_ml:
-            return "NA"
-        else:
-            return str(self.ml_probability)
diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json
@@ -164,7 +164,6 @@
         "rule",
         "severity",
         "confidence",
-        "ml_validation",
         "ml_probability",
         "line_data_list"
     ]