[FR] [DAC] Add Support for Known Types to Auto-generated Schemas (#3985)

eric-forte-elastic · web-flow · commit 0c38662cf30a · 2024-08-28T10:48:00.000-04:00
* Add support for autogen known type

* Add support for ML packages

* rename known_type to field_type
diff --git a/detection_rules/custom_schemas.py b/detection_rules/custom_schemas.py
@@ -47,11 +47,9 @@ def resolve_schema_path(path: str) -> Path:
     return path_obj if path_obj.is_absolute() else RULES_CONFIG.stack_schema_map_file.parent.joinpath(path)
 
 
-def update_data(index: str, field: str, data: dict) -> dict:
+def update_data(index: str, field: str, data: dict, field_type: str = None) -> dict:
     """Update the schema entry with the appropriate index and field."""
-    if index not in data:
-        data[index] = {}
-    data[index][field] = "keyword"
+    data.setdefault(index, {})[field] = field_type if field_type else "keyword"
     return data
 
 
@@ -82,14 +80,14 @@ def clean_stack_schema_map(stack_schema_map: dict, auto_generated_id: str, rando
     return stack_schema_map
 
 
-def update_auto_generated_schema(index: str, field: str):
+def update_auto_generated_schema(index: str, field: str, field_type: str = None):
     """Load custom schemas if present."""
     auto_gen_schema_file = str(RULES_CONFIG.auto_gen_schema_file)
     stack_schema_map_file = str(RULES_CONFIG.stack_schema_map_file)
 
     # Update autogen schema file
     data = load_dump(auto_gen_schema_file)
-    data = update_data(index, field, data)
+    data = update_data(index, field, data, field_type)
     save_dump(data, auto_gen_schema_file)
 
     # Update the stack-schema-map.yaml file with the appropriate auto_gen_schema_file location
diff --git a/detection_rules/ecs.py b/detection_rules/ecs.py
@@ -18,6 +18,7 @@
 
 from .config import CUSTOM_RULES_DIR, parse_rules_config
 from .custom_schemas import get_custom_schemas
+from .integrations import load_integrations_schemas
 from .utils import (DateTimeEncoder, cached, get_etc_path, gzip_compress,
                     load_etc_dump, read_gzip, unzip)
 
@@ -150,6 +151,31 @@ def flatten(schema):
     return flattened
 
 
+@cached
+def get_all_flattened_schema() -> dict:
+    """Load all schemas into a flattened dictionary."""
+    all_flattened_schema = {}
+    for _, schema in get_non_ecs_schema().items():
+        all_flattened_schema.update(flatten(schema))
+
+    ecs_schemas = get_schemas()
+    for version in ecs_schemas:
+        for index, info in ecs_schemas[version]["ecs_flat"].items():
+            all_flattened_schema.update({index: info["type"]})
+
+    for _, integration_schema in load_integrations_schemas().items():
+        for index, index_schema in integration_schema.items():
+            # Detect if ML integration
+            if "jobs" in index_schema:
+                ml_schemas = {k: v for k, v in index_schema.items() if k != "jobs"}
+                for _, ml_schema in ml_schemas.items():
+                    all_flattened_schema.update(flatten(ml_schema))
+            else:
+                all_flattened_schema.update(flatten(index_schema))
+
+    return all_flattened_schema
+
+
 @cached
 def get_non_ecs_schema():
     """Load non-ecs schema."""
diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py
@@ -119,7 +119,8 @@ def unique_fields(self) -> List[str]:
     def auto_add_field(self, validation_checks_error: kql.errors.KqlParseError, index_or_dataview: str) -> None:
         """Auto add a missing field to the schema."""
         field_name = extract_error_field(self.query, validation_checks_error)
-        update_auto_generated_schema(index_or_dataview, field_name)
+        field_type = ecs.get_all_flattened_schema().get(field_name)
+        update_auto_generated_schema(index_or_dataview, field_name, field_type)
 
     def to_eql(self) -> eql.ast.Expression:
         return kql.to_eql(self.query)
@@ -328,7 +329,8 @@ def unique_fields(self) -> List[str]:
     def auto_add_field(self, validation_checks_error: eql.errors.EqlParseError, index_or_dataview: str) -> None:
         """Auto add a missing field to the schema."""
         field_name = extract_error_field(self.query, validation_checks_error)
-        update_auto_generated_schema(index_or_dataview, field_name)
+        field_type = ecs.get_all_flattened_schema().get(field_name)
+        update_auto_generated_schema(index_or_dataview, field_name, field_type)
 
     def validate(self, data: "QueryRuleData", meta: RuleMeta, max_attempts: int = 10) -> None:
         """Validate an EQL query while checking TOMLRule."""