chore(iast): update evidence redaction suite tests (#12114)

- Update suite tests to the latest version https://github.com/DataDog/experimental/blob/main/teams/asm/iast/redaction/suite/evidence-redaction-suite.json. - Refactor the redaction class and add a default redaction for Code Injection. - Mark as "redacted" if the string is too long (partial implementation of this RFC https://docs.google.com/document/d/1cAsBBOusoAvU6wRMez2M5JXqTwUwG6tddmX4LsxSAyI/edit?tab=t.0. - Skip tests by description instead of list position to improve readability. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
DataDog · Jan 31, 2025 · 94a6d91 · 94a6d91
1 parent 7fbf54a
commit 94a6d91
Show file tree

Hide file tree

Showing 11 changed files with 400 additions and 48 deletions.
diff --git a/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py b/ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py
@@ -6,10 +6,12 @@
 
 from .._utils import _get_source_index
 from ..constants import VULN_CMDI
+from ..constants import VULN_CODE_INJECTION
 from ..constants import VULN_HEADER_INJECTION
 from ..constants import VULN_SQL_INJECTION
 from ..constants import VULN_SSRF
 from .command_injection_sensitive_analyzer import command_injection_sensitive_analyzer
+from .default_sensitive_analyzer import default_sensitive_analyzer
 from .header_injection_sensitive_analyzer import header_injection_sensitive_analyzer
 from .sql_sensitive_analyzer import sql_sensitive_analyzer
 from .url_sensitive_analyzer import url_sensitive_analyzer
@@ -19,6 +21,7 @@
 
 REDACTED_SOURCE_BUFFER = string.ascii_letters + string.digits
 LEN_SOURCE_BUFFER = len(REDACTED_SOURCE_BUFFER)
+VALUE_MAX_LENGHT = 45
 
 
 def get_redacted_source(length):
@@ -42,6 +45,7 @@ def __init__(self):
             VULN_SQL_INJECTION: sql_sensitive_analyzer,
             VULN_SSRF: url_sensitive_analyzer,
             VULN_HEADER_INJECTION: header_injection_sensitive_analyzer,
+            VULN_CODE_INJECTION: default_sensitive_analyzer,
         }
 
     @staticmethod
@@ -288,7 +292,7 @@ def to_redacted_json(self, evidence_value, sensitive, tainted_ranges, sources):
         return {"redacted_value_parts": value_parts, "redacted_sources": redacted_sources}
 
     def redact_source(self, sources, redacted_sources, redacted_sources_context, source_index, start, end):
-        if source_index is not None:
+        if source_index is not None and source_index < len(sources):
             if not sources[source_index].redacted:
                 redacted_sources.append(source_index)
                 sources[source_index].pattern = get_redacted_source(len(sources[source_index].value))
@@ -303,8 +307,10 @@ def write_value_part(self, value_parts, value, source_index=None):
         if value:
             if source_index is not None:
                 value_parts.append({"value": value, "source": source_index})
-            else:
+            elif len(value) < VALUE_MAX_LENGHT:
                 value_parts.append({"value": value})
+            else:
+                value_parts.append({"redacted": True})
 
     def write_redacted_value_part(
         self,

diff --git a/ddtrace/appsec/_iast/_evidence_redaction/default_sensitive_analyzer.py b/ddtrace/appsec/_iast/_evidence_redaction/default_sensitive_analyzer.py
@@ -0,0 +1,11 @@
+from ddtrace.internal.logger import get_logger
+
+
+log = get_logger(__name__)
+
+
+def default_sensitive_analyzer(evidence, name_pattern, value_pattern):
+    if name_pattern.search(evidence.value) or value_pattern.search(evidence.value):
+        return [{"start": 0, "end": len(evidence.value)}]
+
+    return []
diff --git a/ddtrace/appsec/_iast/_handlers.py b/ddtrace/appsec/_iast/_handlers.py
@@ -153,6 +153,7 @@ def _on_django_patch():
                     functools.partial(if_iast_taint_returned_object_for, OriginType.PARAMETER),
                 )
             )
+
             # we instrument those sources on _on_django_func_wrapped
             _set_metric_iast_instrumented_source(OriginType.HEADER_NAME)
             _set_metric_iast_instrumented_source(OriginType.HEADER)

diff --git a/tests/appsec/iast/taint_sinks/_taint_sinks_utils.py b/tests/appsec/iast/taint_sinks/_taint_sinks_utils.py
@@ -17,6 +17,14 @@ def get_parametrize(vuln_type, ignore_list=None):
     data = json.loads(open(fixtures_filename).read())
     idx = -1
     for element in data["suite"]:
+        if element["description"] in (
+            "$1 with query parameters or fragment",
+            "$1 - Tainted range based redaction - multiple ranges",
+            "Redacted source that needs to be truncated",
+            "Query with single quoted string literal and null source",
+        ):
+            continue
+
         if element["type"] == "VULNERABILITIES":
             evidence_parameters = [
                 param for k, params in element.get("parameters", {}).items() for param in params if param == vuln_type
@@ -46,13 +54,19 @@ def get_parametrize(vuln_type, ignore_list=None):
                                 if value_part.get("value"):
                                     value_part["value"] = value_part["value"].replace(replace, value)
 
-                            yield evidence_input_copy, sources_expected, vulnerabilities_expected_copy
+                            if all(
+                                [
+                                    bool(input_ranges["iinfo"].get("parameterName", {}))
+                                    for input_ranges in evidence_input_copy.get("ranges", {})
+                                ]
+                            ):
+                                yield evidence_input_copy, sources_expected, vulnerabilities_expected_copy, element
                 else:
                     idx += 1
                     if ignore_list and idx in ignore_list:
                         continue
 
-                    yield evidence_input[0], sources_expected, vulnerabilities_expected
+                    yield evidence_input[0], sources_expected, vulnerabilities_expected, element
 
 
 def _taint_pyobject_multiranges(pyobject, elements):