-
-
Notifications
You must be signed in to change notification settings - Fork 109
/
Copy pathbase.py
1939 lines (1768 loc) · 88.3 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from abc import ABC, abstractmethod
from collections import ChainMap, defaultdict
import re
from sigma.correlations import (
SigmaCorrelationCondition,
SigmaCorrelationConditionOperator,
SigmaCorrelationFieldAlias,
SigmaCorrelationFieldAliases,
SigmaCorrelationRule,
SigmaCorrelationTimespan,
SigmaCorrelationType,
SigmaCorrelationTypeLiteral,
SigmaRuleReference,
)
from sigma.exceptions import (
ExceptionOnUsage,
SigmaBackendError,
SigmaConfigurationError,
SigmaConversionError,
SigmaError,
SigmaValueError,
)
from sigma.conversion.deferred import DeferredQueryExpression
from typing import Pattern, Union, ClassVar, Optional, Tuple, List, Dict, Any, Type
from sigma.processing.pipeline import ProcessingPipeline
from sigma.collection import SigmaCollection
from sigma.rule import SigmaRule
from sigma.conditions import (
ConditionItem,
ConditionOR,
ConditionAND,
ConditionNOT,
ConditionFieldEqualsValueExpression,
ConditionValueExpression,
ConditionType,
)
from sigma.types import (
SigmaBool,
SigmaCasedString,
SigmaExists,
SigmaExpansion,
SigmaFieldReference,
SigmaRegularExpressionFlag,
SigmaString,
SigmaNumber,
SigmaRegularExpression,
SigmaCompareExpression,
SigmaNull,
SigmaQueryExpression,
SigmaCIDRExpression,
SpecialChars,
)
from sigma.conversion.state import ConversionState
class Backend(ABC):
"""
Base class for Sigma conversion backends. A backend is made up from the following elements:
* Some metadata about the properties of the backend.
* A processing pipeline stored in backend_processing_pipeline that is applied to each Sigma
rule that is converted by the backend. This is the location where you add generic
transformations that should be applied to all Sigma rules before conversion.
* An additional processing pipeline can be passed to the constructor and is applied after
the backend pipeline. This one is configured by the user to implement transformations
required in the environment (e.g. field renaming).
* If collect_errors is set to True, exceptions will not be thrown, but collected in (sigma_rule, exception)
tuples in the errors property.
* The method convert is the entry point for a conversion of a rule set. By default it converts
each rule and invokes the finalization step for the whole set of converted rules. There are better
locations to implement backend functionality.
* convert_rule converts a single rule. By default it converts all conditions and invokes the rule
finalization.
* convert_condition is the entry point for conversion of a rule condition into a query. It dispatches
to the condition element classes.
* convert_condition_* methods must be implemented and handle the conversion of condition elements. The
result might be an intermediate representation which is finalized by finalize_query.
* finalize_query finalizes the conversion result of a converted rule condition. By default it simply
passes the generated queries.
* finalize_output_<format> finalizes the conversion result of a whole rule set in the specified format.
By default finalize_output_default is called and outputs a list of all queries. Further formats can be
implemented in similar methods. The default format can be specified in the class variable default_format.
Implementation of a backend:
1. Implement conversion of condition elements in convert_condition_*. The output can be an intermediate
or the final query representation.
2. If required, implement a per-query finalization step in finalize_query. Each Sigma rule condition
results in a query. This can embed the generated query into other structures (e.g. boilerplate code,
prefix/postfix query parts) or convert the intermediate into a final query representation.
3. If required, implement a finalization step working on all generated queries in finalize. This can
embed the queries into other data structures (e.g. JSON or XML containers for import into the target
system) or perform the conversion of an intermediate to the final query representation.
Some hints and conventions:
* Use processing pipelines to apply transformations instead of implementing transformations in the backend
itself. Implement generic transformations if they aren't too backend-specific.
* Use TextQueryBackend as base class for backends that output text-based queries.
* Use intermediate representations for queries and query sets for formats that require state information,
e.g. if the target query language results in a different structure than given by the condition.
"""
name: ClassVar[str] = "Base backend" # A descriptive name of the backend
formats: ClassVar[Dict[str, str]] = (
{ # Output formats provided by the backend as name -> description mapping. The name should match to finalize_output_<name>.
"default": "Default output format",
}
)
requires_pipeline: ClassVar[bool] = (
False # Does the backend requires that a processing pipeline is provided?
)
# Backends can offer different methods of correlation query generation. That are described by
# correlation_methods:
correlation_methods: ClassVar[Optional[Dict[str, str]]] = None
# The following class variable defines the default method that should be chosen if none is provided.
default_correlation_method: ClassVar[str] = "default"
processing_pipeline: ProcessingPipeline
last_processing_pipeline: ProcessingPipeline
backend_processing_pipeline: ClassVar[ProcessingPipeline] = ProcessingPipeline()
output_format_processing_pipeline: ClassVar[Dict[str, ProcessingPipeline]] = defaultdict(
ProcessingPipeline
)
default_format: ClassVar[str] = "default"
collect_errors: bool = False
errors: List[Tuple[SigmaRule, SigmaError]]
# in-expressions
convert_or_as_in: ClassVar[bool] = False # Convert OR as in-expression
convert_and_as_in: ClassVar[bool] = False # Convert AND as in-expression
in_expressions_allow_wildcards: ClassVar[bool] = (
False # Values in list can contain wildcards. If set to False (default) only plain values are converted into in-expressions.
)
# not exists: convert as "not exists-expression" or as dedicated expression
explicit_not_exists_expression: ClassVar[bool] = False
def __init__(
self,
processing_pipeline: Optional[ProcessingPipeline] = None,
collect_errors: bool = False,
**backend_options: Dict,
):
self.processing_pipeline = processing_pipeline
self.errors = list()
self.collect_errors = collect_errors
self.backend_options = backend_options
def convert(
self,
rule_collection: SigmaCollection,
output_format: Optional[str] = None,
correlation_method: Optional[str] = None,
) -> Any:
"""
Convert a Sigma ruleset into the target data structure. Usually the result are one or
multiple queries, but might also be some arbitrary data structure required for further
processing.
"""
rule_collection.resolve_rule_references()
queries = [
query
for rule in rule_collection.rules
for query in (
self.convert_rule(rule, output_format or self.default_format)
if isinstance(rule, SigmaRule)
else self.convert_correlation_rule(
rule, output_format or self.default_format, correlation_method
)
)
]
return self.finalize(queries, output_format or self.default_format)
def convert_rule(self, rule: SigmaRule, output_format: Optional[str] = None) -> List[Any]:
"""
Convert a single Sigma rule into the target data structure (usually query, see above).
"""
try:
self.last_processing_pipeline = (
self.backend_processing_pipeline
+ self.processing_pipeline
+ self.output_format_processing_pipeline[output_format or self.default_format]
)
self.last_processing_pipeline.vars.update(
{"backend_" + key: value for key, value in self.backend_options.items()}
)
error_state = "applying processing pipeline on"
self.last_processing_pipeline.apply(rule) # 1. Apply transformations
# 2. Convert conditions
error_state = "converting"
states = [
ConversionState(processing_state=dict(self.last_processing_pipeline.state))
for _ in rule.detection.parsed_condition
]
queries = [
self.convert_condition(cond.parsed, states[index])
for index, cond in enumerate(rule.detection.parsed_condition)
]
error_state = "finalizing query for"
# 3. Postprocess generated query if not part of a correlation rule
finalized_queries = (
[
self.finalize_query(
rule,
query,
index,
states[index],
output_format or self.default_format,
)
for index, query in enumerate(queries)
]
if not rule._backreferences
else queries
)
rule.set_conversion_result(finalized_queries)
rule.set_conversion_states(states)
if rule._output:
return finalized_queries
else:
return []
except SigmaError as e:
if self.collect_errors:
self.errors.append((rule, e))
return []
else:
raise e
except (
Exception
) as e: # enrich all other exceptions with Sigma-specific context information
msg = f" (while {error_state} rule {str(rule.source)})"
if len(e.args) > 1:
e.args = (e.args[0] + msg,) + e.args[1:]
else:
e.args = (e.args[0] + msg,)
raise
def decide_convert_condition_as_in_expression(
self, cond: Union[ConditionOR, ConditionAND], state: ConversionState
) -> bool:
"""
Decide if an OR or AND expression should be converted as "field in (value list)" or as plain expression.
:param cond: Condition that is converted for which the decision has to be made.
:type cond: Union[ConditionOR, ConditionAND]
:param state: Current conversion state.
:type state: ConversionState
:return: True if in-expression should be generated, else False
:rtype: bool
"""
# Check if conversion of condition type is enabled
if (
not self.convert_or_as_in
and isinstance(cond, ConditionOR)
or not self.convert_and_as_in
and isinstance(cond, ConditionAND)
):
return False
# Check if more than one argument is present
# if len(cond.args <= 1):
# return False
# All arguments of the given condition must reference a field
if not all((isinstance(arg, ConditionFieldEqualsValueExpression) for arg in cond.args)):
return False
# Build a set of all fields appearing in condition arguments
fields = {arg.field for arg in cond.args}
# All arguments must reference the same field
if len(fields) != 1:
return False
# All argument values must be strings or numbers
if not all([isinstance(arg.value, (SigmaString, SigmaNumber)) for arg in cond.args]):
return False
# Check for plain strings if wildcards are not allowed for string expressions.
if not self.in_expressions_allow_wildcards and any(
[
arg.value.contains_special()
for arg in cond.args
if isinstance(arg.value, SigmaString)
]
):
return False
# All checks passed, expression can be converted to in-expression
return True
@abstractmethod
def convert_condition_as_in_expression(
self, cond: Union[ConditionOR, ConditionAND], state: ConversionState
) -> Any:
"""Conversion of OR or AND conditions into "field in (value list)" expressions."""
@abstractmethod
def convert_condition_or(self, cond: ConditionOR, state: ConversionState) -> Any:
"""Conversion of OR conditions."""
@abstractmethod
def convert_condition_and(self, cond: ConditionAND, state: ConversionState) -> Any:
"""Conversion of AND conditions."""
@abstractmethod
def convert_condition_not(self, cond: ConditionNOT, state: ConversionState) -> Any:
"""Conversion of NOT conditions."""
@abstractmethod
def convert_condition_field_eq_val_str(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field = string value expressions"""
@abstractmethod
def convert_condition_field_eq_val_str_case_sensitive(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field = cased string value expressions"""
@abstractmethod
def convert_condition_field_eq_val_num(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field = number value expressions"""
@abstractmethod
def convert_condition_field_eq_val_bool(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field = boolean value expressions"""
@abstractmethod
def convert_condition_field_eq_val_re(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field matches regular expression value expressions"""
@abstractmethod
def convert_condition_field_eq_val_cidr(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field matches CIDR expression value expressions"""
@abstractmethod
def convert_condition_field_compare_op_val(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field matches regular expression value expressions"""
@abstractmethod
def convert_condition_field_eq_field(
self, cond: SigmaFieldReference, state: ConversionState
) -> Any:
"""Conversion of field equals another field expressions."""
@abstractmethod
def convert_condition_field_eq_val_null(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field is null expression value expressions"""
@abstractmethod
def convert_condition_field_exists(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field exists expressions"""
@abstractmethod
def convert_condition_field_not_exists(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of field not exists expressions"""
def convert_condition_field_eq_val_exists(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Dispatch conversion of field exists expressions to appropriate method."""
if (
self.explicit_not_exists_expression
): # Call distinguished methods if there is an explicit expression for field existence and non-existence.
if cond.value:
return self.convert_condition_field_exists(cond, state)
else:
return self.convert_condition_field_not_exists(cond, state)
else: # If there are no distinguished expressions for field (non-)existence in the target query language, just negate the expression if necessary.
if cond.value:
return self.convert_condition_field_exists(cond, state)
else:
return self.convert_condition_not(
ConditionNOT(
[ConditionFieldEqualsValueExpression(cond.field, SigmaExists(True))],
cond.source,
),
state,
)
@abstractmethod
def convert_condition_field_eq_query_expr(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion of query expressions bound to a field."""
def convert_condition_field_eq_expansion(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""
Convert each value of the expansion with the field from the containing condition and OR-link
all converted subconditions.
"""
or_cond = ConditionOR(
[ConditionFieldEqualsValueExpression(cond.field, value) for value in cond.value.values],
cond.source,
)
return self.convert_condition_or(or_cond, state)
def convert_condition_field_eq_val(
self, cond: ConditionFieldEqualsValueExpression, state: ConversionState
) -> Any:
"""Conversion dispatcher of field = value conditions. Dispatches to value-specific methods."""
if isinstance(cond.value, SigmaCasedString):
return self.convert_condition_field_eq_val_str_case_sensitive(cond, state)
elif isinstance(cond.value, SigmaString):
return self.convert_condition_field_eq_val_str(cond, state)
elif isinstance(cond.value, SigmaNumber):
return self.convert_condition_field_eq_val_num(cond, state)
elif isinstance(cond.value, SigmaBool):
return self.convert_condition_field_eq_val_bool(cond, state)
elif isinstance(cond.value, SigmaRegularExpression):
return self.convert_condition_field_eq_val_re(cond, state)
elif isinstance(cond.value, SigmaCIDRExpression):
return self.convert_condition_field_eq_val_cidr(cond, state)
elif isinstance(cond.value, SigmaCompareExpression):
return self.convert_condition_field_compare_op_val(cond, state)
elif isinstance(cond.value, SigmaFieldReference):
return self.convert_condition_field_eq_field(cond, state)
elif isinstance(cond.value, SigmaNull):
return self.convert_condition_field_eq_val_null(cond, state)
elif isinstance(cond.value, SigmaQueryExpression):
return self.convert_condition_field_eq_query_expr(cond, state)
elif isinstance(cond.value, SigmaExists):
return self.convert_condition_field_eq_val_exists(cond, state)
elif isinstance(cond.value, SigmaExpansion):
return self.convert_condition_field_eq_expansion(cond, state)
else: # pragma: no cover
raise TypeError(
"Unexpected value type class in condition parse tree: "
+ cond.value.__class__.__name__
)
@abstractmethod
def convert_condition_val_str(
self, cond: ConditionValueExpression, state: ConversionState
) -> Any:
"""Conversion of string-only conditions."""
@abstractmethod
def convert_condition_val_num(
self, cond: ConditionValueExpression, state: ConversionState
) -> Any:
"""Conversion of number-only conditions."""
@abstractmethod
def convert_condition_val_re(
self, cond: ConditionValueExpression, state: ConversionState
) -> Any:
"""Conversion of regexp-only conditions."""
@abstractmethod
def convert_condition_query_expr(
self, cond: ConditionValueExpression, state: ConversionState
) -> Any:
"""Conversion of query expressions without field association."""
def convert_condition_val(self, cond: ConditionValueExpression, state: ConversionState) -> Any:
"""Conversion of value-only conditions."""
if isinstance(cond.value, SigmaString):
return self.convert_condition_val_str(cond, state)
elif isinstance(cond.value, SigmaNumber):
return self.convert_condition_val_num(cond, state)
elif isinstance(cond.value, SigmaBool):
raise SigmaValueError(
"Boolean values can't appear as standalone value without a field name."
)
elif isinstance(cond.value, SigmaRegularExpression):
return self.convert_condition_val_re(cond, state)
elif isinstance(cond.value, SigmaCIDRExpression):
raise SigmaValueError(
"CIDR values can't appear as standalone value without a field name."
)
elif isinstance(cond.value, SigmaQueryExpression):
return self.convert_condition_query_expr(cond, state)
else: # pragma: no cover
raise TypeError(
"Unexpected value type class in condition parse tree: "
+ cond.value.__class__.__name__
)
def convert_condition(self, cond: ConditionType, state: ConversionState) -> Any:
"""
Convert query of Sigma rule into target data structure (usually query, see above).
Dispatches to methods (see above) specialized on specific condition parse tree node objects.
The state mainly contains the deferred list, which is used to collect query parts that are not
directly integrated into the generated query, but added at a postponed stage of the conversion
process after the conversion of the condition to a query is finished. This is done in the
finalize_query method and must be implemented individually.
"""
if isinstance(cond, ConditionOR):
if self.decide_convert_condition_as_in_expression(cond, state):
return self.convert_condition_as_in_expression(cond, state)
else:
return self.convert_condition_or(cond, state)
elif isinstance(cond, ConditionAND):
if self.decide_convert_condition_as_in_expression(cond, state):
return self.convert_condition_as_in_expression(cond, state)
else:
return self.convert_condition_and(cond, state)
elif isinstance(cond, ConditionNOT):
return self.convert_condition_not(cond, state)
elif isinstance(cond, ConditionFieldEqualsValueExpression):
return self.convert_condition_field_eq_val(cond, state)
elif isinstance(cond, ConditionValueExpression):
return self.convert_condition_val(cond, state)
else: # pragma: no cover
raise TypeError(
"Unexpected data type in condition parse tree: " + cond.__class__.__name__
)
def convert_correlation_rule(
self,
rule: SigmaCorrelationRule,
output_format: Optional[str] = None,
method: Optional[str] = None,
) -> List[Any]:
"""
Convert a correlation rule into the target data structure (usually query).
Args:
rule (SigmaCorrelationRule): The correlation rule to be converted.
output_format (Optional[str]): The desired output format. Defaults to None.
method (Optional[str]): The correlation method to be used. Defaults to None.
Returns:
Any: The converted data structure.
Raises:
NotImplementedError: If the conversion for the given correlation rule type is not implemented.
"""
if self.correlation_methods is None:
raise NotImplementedError("Backend does not support correlation rules.")
method = method or self.default_correlation_method
if method not in self.correlation_methods:
raise SigmaConversionError(
f"Correlation method '{method}' is not supported by backend '{self.name}'."
)
self.last_processing_pipeline.apply(rule)
correlation_methods = {
SigmaCorrelationType.EVENT_COUNT: self.convert_correlation_event_count_rule,
SigmaCorrelationType.VALUE_COUNT: self.convert_correlation_value_count_rule,
SigmaCorrelationType.TEMPORAL: self.convert_correlation_temporal_rule,
SigmaCorrelationType.TEMPORAL_ORDERED: self.convert_correlation_temporal_ordered_rule,
}
if rule.type not in correlation_methods:
raise NotImplementedError(
f"Conversion of correlation rule type {rule.type} is not implemented."
)
# Convert the correlation rule depending on its type
queries = correlation_methods[rule.type](rule, output_format, method)
states = [
ConversionState(processing_state=dict(self.last_processing_pipeline.state))
for _ in queries
]
# Apply the finalization step
finalized_queries = [
self.finalize_query(
rule,
query,
index,
states[index],
output_format or self.default_format,
)
for index, query in enumerate(queries)
]
rule.set_conversion_result(finalized_queries)
rule.set_conversion_states(states)
return finalized_queries
@abstractmethod
def convert_correlation_event_count_rule(
self,
rule: SigmaCorrelationRule,
output_format: Optional[str] = None,
method: Optional[str] = None,
) -> List[Any]:
"""
Convert an event count correlation rule into the target data structure (usually query).
Args:
rule (SigmaCorrelationRule): The event count correlation rule to be converted.
output_format (Optional[str]): The output format for the conversion. Defaults to None.
method (Optional[str]): The correlation method to be used. Defaults to None.
Returns:
Any: The converted data structure.
"""
@abstractmethod
def convert_correlation_value_count_rule(
self,
rule: SigmaCorrelationRule,
output_format: Optional[str] = None,
method: Optional[str] = None,
) -> List[Any]:
"""
Convert a value count correlation rule into the target data structure (usually query).
Args:
rule (SigmaCorrelationRule): The value count correlation rule to be converted.
output_format (Optional[str]): The output format for the conversion. Defaults to None.
method (Optional[str]): The correlation method to be used. Defaults to None.
Returns:
Any: The converted data structure.
"""
@abstractmethod
def convert_correlation_temporal_rule(
self,
rule: SigmaCorrelationRule,
output_format: Optional[str] = None,
method: Optional[str] = None,
) -> List[Any]:
"""
Convert a temporal correlation rule into the target data structure (usually query).
Args:
rule (SigmaCorrelationRule): The temporal correlation rule to be converted.
output_format (Optional[str]): The output format for the conversion. Defaults to None.
method (Optional[str]): The correlation method to be used. Defaults to None.
Returns:
Any: The converted data structure.
"""
@abstractmethod
def convert_correlation_temporal_ordered_rule(
self,
rule: SigmaCorrelationRule,
output_format: Optional[str] = None,
method: Optional[str] = None,
) -> List[Any]:
"""
Convert an ordered temporal correlation rule into the target data structure (usually query).
Args:
rule (SigmaCorrelationRule): The ordered temporal correlation rule to be converted.
output_format (Optional[str]): The output format for the conversion. Defaults to None.
method (Optional[str]): The correlation method to be used. Defaults to None.
Returns:
Any: The converted data structure.
"""
def finalize_query(
self,
rule: SigmaRule,
query: Any,
index: int,
state: ConversionState,
output_format: str,
):
"""
Finalize query. Dispatches to format-specific method. The index parameter enumerates generated queries if the
conversion of a Sigma rule results in multiple queries.
This is the place where syntactic elements of the target format for the specific query are added,
e.g. adding query metadata.
"""
backend_query = self.__getattribute__("finalize_query_" + output_format)(
rule, query, index, state
)
return self.last_processing_pipeline.postprocess_query(rule, backend_query)
def finalize_query_default(
self, rule: SigmaRule, query: Any, index: int, state: ConversionState
) -> Any:
"""
Finalize conversion result of a query. Handling of deferred query parts must be implemented by overriding
this method.
"""
return query
def finalize(self, queries: List[Any], output_format: str):
"""Finalize output. Dispatches to format-specific method."""
output = self.__getattribute__("finalize_output_" + output_format)(queries)
return self.last_processing_pipeline.finalize(output)
def finalize_output_default(self, queries: List[Any]) -> Any:
"""
Default finalization.
This is the place where syntactic elements of the target format for the whole output are added,
e.g. putting individual queries into a XML file.
"""
return queries
class TextQueryBackend(Backend):
"""
Backend base for backends generating text-based queries. The behavior can be defined by various
class variables. If this is not sufficient, the respective methods can be implemented with more
complex transformations.
"""
# Operator precedence: tuple of Condition{AND,OR,NOT} in order of precedence.
# The backend generates grouping if required
precedence: ClassVar[Tuple[Type[ConditionItem], Type[ConditionItem], Type[ConditionItem]]] = (
ConditionNOT,
ConditionAND,
ConditionOR,
)
group_expression: ClassVar[Optional[str]] = (
None # Expression for precedence override grouping as format string with {expr} placeholder
)
parenthesize: bool = (
False # Reflect parse tree by putting parenthesis around all expressions - use this for target systems without strict precedence rules.
)
# Generated query tokens
token_separator: str = " " # separator inserted between all boolean operators
or_token: ClassVar[Optional[str]] = None
and_token: ClassVar[Optional[str]] = None
not_token: ClassVar[Optional[str]] = None
eq_token: ClassVar[Optional[str]] = (
None # Token inserted between field and value (without separator)
)
eq_expression: ClassVar[str] = (
"{field}{backend.eq_token}{value}" # Expression for field = value
)
# Query structure
# The generated query can be embedded into further structures. One common example are data
# source commands that are prepended to the matching condition and specify data repositories or
# tables from which the data is queried.
# This is specified as format string that contains the following placeholders:
# * {query}: The generated query
# * {rule}: The Sigma rule from which the query was generated
# * {state}: Conversion state at the end of query generation. This state is initialized with the
# pipeline state.
query_expression: ClassVar[str] = "{query}"
# The following dict defines default values for the conversion state. They are used if
# the respective state is not set.
state_defaults: ClassVar[Dict[str, str]] = dict()
# String output
## Fields
### Quoting
field_quote: ClassVar[Optional[str]] = (
None # Character used to quote field characters if field_quote_pattern matches (or not, depending on field_quote_pattern_negation). No field name quoting is done if not set.
)
field_quote_pattern: ClassVar[Optional[Pattern]] = (
None # Quote field names if this pattern (doesn't) matches, depending on field_quote_pattern_negation. Field name is always quoted if pattern is not set.
)
field_quote_pattern_negation: ClassVar[bool] = (
True # Negate field_quote_pattern result. Field name is quoted if pattern doesn't matches if set to True (default).
)
### Escaping
field_escape: ClassVar[Optional[str]] = (
None # Character to escape particular parts defined in field_escape_pattern.
)
field_escape_quote: ClassVar[bool] = True # Escape quote string defined in field_quote
field_escape_pattern: ClassVar[Optional[Pattern]] = (
None # All matches of this pattern are prepended with the string contained in field_escape.
)
# Characters to escape in addition in regular expression representation of string (regex
# template variable) to default escaping characters.
add_escaped_re: ClassVar[str] = ""
## Values
### String quoting
str_quote: ClassVar[str] = "" # string quoting character (added as escaping character)
str_quote_pattern: ClassVar[Optional[Pattern]] = (
None # Quote string values that match (or don't match) this pattern
)
str_quote_pattern_negation: ClassVar[bool] = True # Negate str_quote_pattern result
### String escaping and filtering
escape_char: ClassVar[Optional[str]] = (
None # Escaping character for special characters inside string
)
wildcard_multi: ClassVar[Optional[str]] = None # Character used as multi-character wildcard
wildcard_single: ClassVar[Optional[str]] = None # Character used as single-character wildcard
add_escaped: ClassVar[str] = "" # Characters quoted in addition to wildcards and string quote
filter_chars: ClassVar[str] = "" # Characters filtered
### Booleans
bool_values: ClassVar[Dict[bool, Optional[str]]] = (
{ # Values to which boolean values are mapped.
True: None,
False: None,
}
)
# String matching operators. if none is appropriate eq_token is used.
startswith_expression: ClassVar[Optional[str]] = None
startswith_expression_allow_special: ClassVar[bool] = False
endswith_expression: ClassVar[Optional[str]] = None
endswith_expression_allow_special: ClassVar[bool] = False
contains_expression: ClassVar[Optional[str]] = None
contains_expression_allow_special: ClassVar[bool] = False
wildcard_match_expression: ClassVar[Optional[str]] = (
None # Special expression if wildcards can't be matched with the eq_token operator.
)
# Regular expressions
# Regular expression query as format string with placeholders {field}, {regex}, {flag_x} where x
# is one of the flags shortcuts supported by Sigma (currently i, m and s) and refers to the
# token stored in the class variable re_flags.
re_expression: ClassVar[Optional[str]] = None
re_escape_char: ClassVar[Optional[str]] = (
None # Character used for escaping in regular expressions
)
re_escape: ClassVar[Tuple[str]] = () # List of strings that are escaped
re_escape_escape_char: bool = True # If True, the escape character is also escaped
re_flag_prefix: bool = (
True # If True, the flags are prepended as (?x) group at the beginning of the regular expression, e.g. (?i). If this is not supported by the target, it should be set to False.
)
# Mapping from SigmaRegularExpressionFlag values to static string templates that are used in
# flag_x placeholders in re_expression template.
# By default, i, m and s are defined. If a flag is not supported by the target query language,
# remove it from re_flags or don't define it to ensure proper error handling in case of appearance.
re_flags: Dict[SigmaRegularExpressionFlag, str] = SigmaRegularExpression.sigma_to_re_flag
# Case sensitive string matching expression. String is quoted/escaped like a normal string.
# Placeholders {field} and {value} are replaced with field name and quoted/escaped string.
# {regex} contains the value expressed as regular expression.
case_sensitive_match_expression: ClassVar[Optional[str]] = None
# Case sensitive string matching operators similar to standard string matching. If not provided,
# case_sensitive_match_expression is used.
case_sensitive_startswith_expression: ClassVar[Optional[str]] = None
case_sensitive_startswith_expression_allow_special: ClassVar[bool] = False
case_sensitive_endswith_expression: ClassVar[Optional[str]] = None
case_sensitive_endswith_expression_allow_special: ClassVar[bool] = False
case_sensitive_contains_expression: ClassVar[Optional[str]] = None
case_sensitive_contains_expression_allow_special: ClassVar[bool] = False
# CIDR expressions: define CIDR matching if backend has native support. Else pySigma expands
# CIDR values into string wildcard matches.
cidr_expression: ClassVar[Optional[str]] = (
None # CIDR expression query as format string with placeholders {field}, {value} (the whole CIDR value), {network} (network part only), {prefixlen} (length of network mask prefix) and {netmask} (CIDR network mask only)
)
# Numeric comparison operators
compare_op_expression: ClassVar[Optional[str]] = (
None # Compare operation query as format string with placeholders {field}, {operator} and {value}
)
compare_operators: ClassVar[Optional[Dict[SigmaCompareExpression.CompareOperators, str]]] = (
None # Mapping between CompareOperators elements and strings used as replacement for {operator} in compare_op_expression
)
# Expression for comparing two event fields
field_equals_field_expression: ClassVar[Optional[str]] = (
None # Field comparison expression with the placeholders {field1} and {field2} corresponding to left field and right value side of Sigma detection item
)
field_equals_field_escaping_quoting: Tuple[bool, bool] = (
True,
True,
) # If regular field-escaping/quoting is applied to field1 and field2. A custom escaping/quoting can be implemented in the convert_condition_field_eq_field_escape_and_quote method.
# Null/None expressions
field_null_expression: ClassVar[Optional[str]] = (
None # Expression for field has null value as format string with {field} placeholder for field name
)
# Field existence condition expressions.
field_exists_expression: ClassVar[Optional[str]] = (
None # Expression for field existence as format string with {field} placeholder for field name
)
field_not_exists_expression: ClassVar[Optional[str]] = (
None # Expression for field non-existence as format string with {field} placeholder for field name. If not set, field_exists_expression is negated with boolean NOT.
)
# Field value in list, e.g. "field in (value list)" or "field containsall (value list)"
field_in_list_expression: ClassVar[Optional[str]] = (
None # Expression for field in list of values as format string with placeholders {field}, {op} and {list}
)
or_in_operator: ClassVar[Optional[str]] = (
None # Operator used to convert OR into in-expressions. Must be set if convert_or_as_in is set
)
and_in_operator: ClassVar[Optional[str]] = (
None # Operator used to convert AND into in-expressions. Must be set if convert_and_as_in is set
)
list_separator: ClassVar[Optional[str]] = None # List element separator
# Value not bound to a field
unbound_value_str_expression: ClassVar[Optional[str]] = (
None # Expression for string value not bound to a field as format string with placeholder {value} and {regex} (value as regular expression)
)
unbound_value_num_expression: ClassVar[Optional[str]] = (
None # Expression for number value not bound to a field as format string with placeholder {value} and {regex} (value as regular expression)
)
unbound_value_re_expression: ClassVar[Optional[str]] = (
None # Expression for regular expression not bound to a field as format string with placeholder {value} and {flag_x} as described for re_expression
)
# Query finalization: appending and concatenating deferred query part
deferred_start: ClassVar[Optional[str]] = (
None # String used as separator between main query and deferred parts
)
deferred_separator: ClassVar[Optional[str]] = (
None # String used to join multiple deferred query parts
)
deferred_only_query: ClassVar[Optional[str]] = (
None # String used as query if final query only contains deferred expression
)
### Correlation rule templates
## Correlation query frame
# The correlation query frame is the basic structure of a correlation query for each correlation
# type. It contains the following placeholders:
# * {search} is the search expression generated by the correlation query search phase.
# * {typing} is the event typing expression generated by the correlation query typing phase.
# * {aggregate} is the aggregation expression generated by the correlation query aggregation
# phase.
# * {condition} is the condition expression generated by the correlation query condition phase.
# If a correlation query template for a specific correlation type is not defined, the default correlation query template is used.
default_correlation_query: ClassVar[Optional[Dict[str, str]]] = None
event_count_correlation_query: ClassVar[Optional[Dict[str, str]]] = None
value_count_correlation_query: ClassVar[Optional[Dict[str, str]]] = None
temporal_correlation_query: ClassVar[Optional[Dict[str, str]]] = None
temporal_ordered_correlation_query: ClassVar[Optional[Dict[str, str]]] = None
## Correlation query search phase
# The first step of a correlation query is to match events described by the referred Sigma
# rules. A main difference is made between single and multiple rule searches.
# A single rule search expression defines the search expression emitted if only one rule is
# referred by the correlation rule. It contains the following placeholders:
# * {rule} is the referred Sigma rule.
# * {ruleid} is the rule name or if not available the id of the rule.
# * {query} is the query generated from the referred Sigma rule.
# * {normalization} is the expression that normalizes the rule field names to unified alias
# field names that can be later used for aggregation. The expression is defined by
# correlation_search_field_normalization_expression defined below.
correlation_search_single_rule_expression: ClassVar[Optional[str]] = None
# If no single rule query expression is defined, the multi query template expressions below are
# used and must be suitable for this purpose.
# A multiple rule search expression defines the search expression emitted if multiple rules are
# referred by the correlation rule. This is split into the expression for the query itself:
correlation_search_multi_rule_expression: ClassVar[Optional[str]] = None
# This template contains only one placeholder {queries} which contains the queries generated
# from single queries joined with a query separator:
# * A query template for each query generated from the referred Sigma rules similar to the
# search_single_rule_expression defined above:
correlation_search_multi_rule_query_expression: ClassVar[Optional[str]] = None
# Usually the expression must contain some an expression that marks the matched event type as
# such, e.g. by using the rule name or uuid.
# * A joiner string that is put between each search_multi_rule_query_expression:
correlation_search_multi_rule_query_expression_joiner: ClassVar[Optional[str]] = None
## Correlation query typing phase (optional)
# Event typing expression. In some query languages the initial search query only allows basic
# boolean expressions without the possibility to mark the matched events with a type, which is
# especially required by temporal correlation rules to distinguish between the different matched
# event types.
# This is the template for the event typing expression that is used to mark the matched events.
# It contains only a {queries} placeholder that is replaced by the result of joining
# typing_rule_query_expression with typing_rule_query_expression_joiner defined afterwards.
typing_expression: ClassVar[Optional[str]] = None
# This is the template for the event typing expression for each query generated from the
# referred Sigma rules. It contains the following placeholders:
# * {rule} is the referred Sigma rule.
# * {ruleid} is the rule name or if not available the id of the rule.
# * {query} is the query generated from the referred Sigma rule.
typing_rule_query_expression: ClassVar[Optional[str]] = None
# String that is used to join the event typing expressions for each rule query referred by the
# correlation rule:
typing_rule_query_expression_joiner: ClassVar[Optional[str]] = None
# Event field normalization expression. This is used to normalize field names in events matched
# by the Sigma rules referred by the correlation rule. This is a dictionary mapping from
# correlation_method names to format strings hat can contain the following placeholders:
# * {alias} is the field name to which the event field names are normalized and that is used as
# group-by field in the aggregation phase.
# * {field} is the field name from the rule that is normalized.
# The expression is generated for each Sigma rule referred by the correlation rule and each
# alias field definition that contains a field definition for the Sigma rule for which the
# normalization expression is generated. All such generated expressions are joined with the
# correlation_search_field_normalization_expression_joiner and the result is passed as