Skip to content

Commit

Permalink
Case-sensitive string matching
Browse files Browse the repository at this point in the history
  • Loading branch information
thomaspatzke committed May 12, 2023
1 parent e34980e commit d9e1a6c
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 6 deletions.
6 changes: 5 additions & 1 deletion sigma/backends/test/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sigma.processing.transformations import FieldMappingTransformation
from sigma.types import SigmaCompareExpression


class TextQueryTestBackend(TextQueryBackend):
name : str = "Test backend"
formats : Dict[str, str] = {
Expand Down Expand Up @@ -54,6 +53,11 @@ class TextQueryTestBackend(TextQueryBackend):
re_escape_char : ClassVar[str] = "\\"
re_escape : ClassVar[Tuple[str]] = ("/", "bar")

case_sensitive_match_expression = "{field} casematch {value}"
case_sensitive_startswith_expression : ClassVar[str] = "{field} startswith_cased {value}"
case_sensitive_endswith_expression : ClassVar[str] = "{field} endswith_cased {value}"
case_sensitive_contains_expression : ClassVar[str] = "{field} contains_cased {value}"

cidr_expression : ClassVar[str] = "cidrmatch('{field}', \"{value}\")"

compare_op_expression : ClassVar[str] = "{field}{operator}{value}"
Expand Down
53 changes: 51 additions & 2 deletions sigma/conversion/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sigma.collection import SigmaCollection
from sigma.rule import SigmaRule
from sigma.conditions import ConditionItem, ConditionOR, ConditionAND, ConditionNOT, ConditionFieldEqualsValueExpression, ConditionValueExpression, ConditionType
from sigma.types import SigmaBool, SigmaExists, SigmaExpansion, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaString, SigmaNumber, SigmaRegularExpression, SigmaCompareExpression, SigmaNull, SigmaQueryExpression, SigmaCIDRExpression, SpecialChars
from sigma.types import SigmaBool, SigmaCasedString, SigmaExists, SigmaExpansion, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaString, SigmaNumber, SigmaRegularExpression, SigmaCompareExpression, SigmaNull, SigmaQueryExpression, SigmaCIDRExpression, SpecialChars
from sigma.conversion.state import ConversionState

class Backend(ABC):
Expand Down Expand Up @@ -211,6 +211,10 @@ def convert_condition_not(self, cond : ConditionNOT, state : ConversionState) ->
def convert_condition_field_eq_val_str(self, cond : ConditionFieldEqualsValueExpression, state : ConversionState) -> Any:
"""Conversion of field = string value expressions"""

@abstractmethod
def convert_condition_field_eq_val_str_case_sensitive(self, cond : ConditionFieldEqualsValueExpression, state : ConversionState) -> Any:
"""Conversion of field = cased string value expressions"""

@abstractmethod
def convert_condition_field_eq_val_num(self, cond : ConditionFieldEqualsValueExpression, state : ConversionState) -> Any:
"""Conversion of field = number value expressions"""
Expand Down Expand Up @@ -277,7 +281,9 @@ def convert_condition_field_eq_expansion(self, cond : ConditionFieldEqualsValueE

def convert_condition_field_eq_val(self, cond : ConditionFieldEqualsValueExpression, state : ConversionState) -> Any:
"""Conversion dispatcher of field = value conditions. Dispatches to value-specific methods."""
if isinstance(cond.value, SigmaString):
if isinstance(cond.value, SigmaCasedString):
return self.convert_condition_field_eq_val_str_case_sensitive(cond, state)
elif isinstance(cond.value, SigmaString):
return self.convert_condition_field_eq_val_str(cond, state)
elif isinstance(cond.value, SigmaNumber):
return self.convert_condition_field_eq_val_num(cond, state)
Expand Down Expand Up @@ -466,6 +472,15 @@ class variables. If this is not sufficient, the respective methods can be implem
# remove it from re_flags or don't define it to ensure proper error handling in case of appearance.
re_flags : Dict[SigmaRegularExpressionFlag, str] = SigmaRegularExpression.sigma_to_re_flag

# Case sensitive string matching expression. String is quoted/escaped like a normal string.
# Placeholders {field} and {value} are replaced with field name and quoted/escaped string.
case_sensitive_match_expression : ClassVar[Optional[str]] = None
# Case sensitive string matching operators similar to standard string matching. If not provided,
# case_sensitive_match_expression is used.
case_sensitive_startswith_expression : ClassVar[Optional[str]] = None
case_sensitive_endswith_expression : ClassVar[Optional[str]] = None
case_sensitive_contains_expression : ClassVar[Optional[str]] = None

# CIDR expressions: define CIDR matching if backend has native support. Else pySigma expands
# CIDR values into string wildcard matches.
cidr_expression : ClassVar[Optional[str]] = None # CIDR expression query as format string with placeholders {field}, {value} (the whole CIDR value), {network} (network part only), {prefixlen} (length of network mask prefix) and {netmask} (CIDR network mask only)
Expand Down Expand Up @@ -728,6 +743,40 @@ def convert_condition_field_eq_val_str(self, cond : ConditionFieldEqualsValueExp
except TypeError: # pragma: no cover
raise NotImplementedError("Field equals string value expressions with strings are not supported by the backend.")

def convert_condition_field_eq_val_str_case_sensitive(self, cond : ConditionFieldEqualsValueExpression, state : ConversionState) -> Union[str, DeferredQueryExpression]:
"""Conversion of case-sensitive field = string value expressions"""
try:
if ( # Check conditions for usage of 'startswith' operator
self.case_sensitive_startswith_expression is not None # 'startswith' operator is defined in backend
and cond.value.endswith(SpecialChars.WILDCARD_MULTI) # String ends with wildcard
and not cond.value[:-1].contains_special() # Remainder of string doesn't contains special characters
):
expr = self.case_sensitive_startswith_expression # If all conditions are fulfilled, use 'startswith' operartor instead of equal token
value = cond.value[:-1]
elif ( # Same as above but for 'endswith' operator: string starts with wildcard and doesn't contains further special characters
self.case_sensitive_endswith_expression is not None
and cond.value.startswith(SpecialChars.WILDCARD_MULTI)
and not cond.value[1:].contains_special()
):
expr = self.case_sensitive_endswith_expression
value = cond.value[1:]
elif ( # contains: string starts and ends with wildcard
self.case_sensitive_contains_expression is not None
and cond.value.startswith(SpecialChars.WILDCARD_MULTI)
and cond.value.endswith(SpecialChars.WILDCARD_MULTI)
and not cond.value[1:-1].contains_special()
):
expr = self.case_sensitive_contains_expression
value = cond.value[1:-1]
elif self.case_sensitive_match_expression is not None:
expr = self.case_sensitive_match_expression
value = cond.value
else:
raise NotImplementedError("Case-sensitive string matching is not supported by backend.")
return expr.format(field=self.escape_and_quote_field(cond.field), value=self.convert_value_str(value, state))
except TypeError: # pragma: no cover
raise NotImplementedError("Case-sensitive field equals string value expressions with strings are not supported by the backend.")

def convert_condition_field_eq_val_num(self, cond : ConditionFieldEqualsValueExpression, state : ConversionState) -> Union[str, DeferredQueryExpression]:
"""Conversion of field = number value expressions"""
try:
Expand Down
7 changes: 6 additions & 1 deletion sigma/modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import ClassVar, Optional, Union, List, Sequence, Dict, Type, get_origin, get_args, get_type_hints
from collections.abc import Sequence as SequenceABC
from base64 import b64encode
from sigma.types import Placeholder, SigmaBool, SigmaExists, SigmaExpansion, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaType, SigmaString, SigmaNumber, SpecialChars, SigmaRegularExpression, SigmaCompareExpression, SigmaCIDRExpression
from sigma.types import Placeholder, SigmaBool, SigmaCasedString, SigmaExists, SigmaExpansion, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaType, SigmaString, SigmaNumber, SpecialChars, SigmaRegularExpression, SigmaCompareExpression, SigmaCIDRExpression
from sigma.conditions import ConditionAND
from sigma.exceptions import SigmaRuleLocation, SigmaTypeError, SigmaValueError
import sigma
Expand Down Expand Up @@ -207,6 +207,10 @@ class SigmaRegularExpressionDotAllFlagModifier(SigmaRegularExpressionFlagModifie
"""Regular expression dot matches all characters."""
flag : ClassVar[SigmaRegularExpressionFlag] = SigmaRegularExpressionFlag.DOTALL

class SigmaCaseSensitiveModifier(SigmaValueModifier):
def modify(self, val: SigmaString) -> SigmaCasedString:
return SigmaCasedString.from_sigma_string(val)

class SigmaCIDRModifier(SigmaValueModifier):
"""Treat value as IP (v4 or v6) CIDR network."""
def modify(self, val : SigmaString) -> SigmaCIDRExpression:
Expand Down Expand Up @@ -285,6 +289,7 @@ def modify(self, val : SigmaString) -> SigmaString:
"multiline" : SigmaRegularExpressionMultilineFlagModifier,
"s" : SigmaRegularExpressionDotAllFlagModifier,
"dotall" : SigmaRegularExpressionDotAllFlagModifier,
"cased" : SigmaCaseSensitiveModifier,
"cidr" : SigmaCIDRModifier,
"all" : SigmaAllModifier,
"lt" : SigmaLessThanModifier,
Expand Down
8 changes: 8 additions & 0 deletions sigma/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,14 @@ def convert(
raise SigmaValueError("Single-character wildcard not specified for conversion")
return s

class SigmaCasedString(SigmaString):
"""Case-sensitive string matching."""
@classmethod
def from_sigma_string(cls, s : SigmaString) -> "SigmaCasedString":
cs = cls(s.original)
cs.s = s.s
return cs

@dataclass
class SigmaNumber(SigmaType):
"""Numeric value type"""
Expand Down
64 changes: 64 additions & 0 deletions tests/test_conversion_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,22 @@ def test_convert_value_str(test_backend):
""")
) == ['mappedA="value" and \'field A\'="value"']

def test_convert_value_str_cased(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
fieldA|cased: value
field A|cased: value
condition: sel
""")
) == ['mappedA casematch "value" and \'field A\' casematch "value"']

def test_convert_value_str_empty(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
Expand Down Expand Up @@ -180,6 +196,22 @@ def test_convert_value_str_startswith(test_backend):
""")
) == ['mappedA startswith "value" and \'field A\' startswith "value"']

def test_convert_value_str_startswith_cased(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
fieldA|startswith|cased: "value"
field A|startswith|cased: "value"
condition: sel
""")
) == ['mappedA startswith_cased "value" and \'field A\' startswith_cased "value"']

def test_convert_value_str_startswith_further_wildcard(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
Expand Down Expand Up @@ -228,6 +260,22 @@ def test_convert_value_str_endswith(test_backend):
""")
) == ['mappedA endswith "value" and \'field A\' endswith "value"']

def test_convert_value_str_endswith_cased(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
fieldA|endswith|cased: "value"
field A|endswith|cased: "value"
condition: sel
""")
) == ['mappedA endswith_cased "value" and \'field A\' endswith_cased "value"']

def test_convert_value_str_endswith_further_wildcard(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
Expand Down Expand Up @@ -276,6 +324,22 @@ def test_convert_value_str_contains(test_backend):
""")
) == ['mappedA contains "value" and \'field A\' contains "value"']

def test_convert_value_str_contains_cased(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
fieldA|contains|cased: "value"
field A|contains|cased: "value"
condition: sel
""")
) == ['mappedA contains_cased "value" and \'field A\' contains_cased "value"']

def test_convert_value_str_contains_further_wildcard(test_backend):
assert test_backend.convert(
SigmaCollection.from_yaml("""
Expand Down
6 changes: 5 additions & 1 deletion tests/test_modifiers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
from typing import Union, Sequence, List
from sigma.modifiers import \
SigmaCaseSensitiveModifier, \
SigmaExistsModifier, \
SigmaFieldReferenceModifier, \
SigmaModifier, \
Expand All @@ -23,7 +24,7 @@
SigmaExpandModifier, \
SigmaWindowsDashModifier
from sigma.rule import SigmaDetectionItem
from sigma.types import SigmaBool, SigmaExists, SigmaExpansion, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaString, Placeholder, SigmaNumber, SigmaRegularExpression, SigmaCompareExpression, SigmaCIDRExpression
from sigma.types import SigmaBool, SigmaCasedString, SigmaExists, SigmaExpansion, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaString, Placeholder, SigmaNumber, SigmaRegularExpression, SigmaCompareExpression, SigmaCIDRExpression
from sigma.conditions import ConditionAND
from sigma.exceptions import SigmaRuleLocation, SigmaTypeError, SigmaValueError

Expand Down Expand Up @@ -206,6 +207,9 @@ def test_re_with_other(dummy_detection_item):
with pytest.raises(SigmaValueError, match="only applicable to unmodified values.*test.yml"):
SigmaRegularExpressionModifier(dummy_detection_item, [SigmaBase64Modifier], SigmaRuleLocation("test.yml")).modify(SigmaString("foo?bar.*"))

def test_cased(dummy_detection_item):
assert SigmaCaseSensitiveModifier(dummy_detection_item, []).modify(SigmaString("FooBar")) == SigmaCasedString("FooBar")

def test_all(dummy_detection_item):
values = [
SigmaString("*foobar*"),
Expand Down
5 changes: 4 additions & 1 deletion tests/test_types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ipaddress import IPv4Network, IPv6Network
import re
import pytest
from sigma.types import SigmaBool, SigmaCompareExpression, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaString, Placeholder, SpecialChars, SigmaNumber, SigmaNull, SigmaRegularExpression, SigmaQueryExpression, sigma_type, SigmaCIDRExpression
from sigma.types import SigmaBool, SigmaCasedString, SigmaCompareExpression, SigmaFieldReference, SigmaRegularExpressionFlag, SigmaString, Placeholder, SpecialChars, SigmaNumber, SigmaNull, SigmaRegularExpression, SigmaQueryExpression, sigma_type, SigmaCIDRExpression
from sigma.exceptions import SigmaTypeError, SigmaValueError, SigmaRegularExpressionError

@pytest.fixture
Expand Down Expand Up @@ -235,6 +235,9 @@ def test_string_index_slice_with_step(sigma_string):
with pytest.raises(IndexError, match="slice index with step"):
sigma_string[2:8:2]

def test_cased_string(sigma_string):
assert SigmaCasedString.from_sigma_string(sigma_string) == SigmaCasedString("*Test*Str\\*ing*")

def test_number_int():
assert SigmaNumber(123).number == 123

Expand Down

0 comments on commit d9e1a6c

Please sign in to comment.