Merge pull request #412 from transform-data/qmalcolm--improve-transformations-workflow

QMalcolm · web-flow · commit 88063114a08b · 2023-01-26T11:47:33.000-08:00
Improve Terminology and Implementation of `TransformModel`
diff --git a/metricflow/model/model_transformer.py b/metricflow/model/model_transformer.py
@@ -1,7 +1,7 @@
 import copy
 import logging
 
-from typing import Sequence
+from typing import Sequence, Tuple
 
 from metricflow.model.objects.user_configured_model import UserConfiguredModel
 from metricflow.model.transformations.add_input_metric_measures import AddInputMetricMeasuresRule
@@ -23,12 +23,12 @@ class ModelTransformer:
     Generally used to make it more convenient for the user to develop their model.
     """
 
-    DEFAULT_PRE_VALIDATION_RULES: Sequence[ModelTransformRule] = (
+    PRIMARY_RULES: Sequence[ModelTransformRule] = (
         LowerCaseNamesRule(),
         SetMeasureAggregationTimeDimensionRule(),
     )
 
-    DEFAULT_POST_VALIDATION_RULES: Sequence[ModelTransformRule] = (
+    SECONDARY_RULES: Sequence[ModelTransformRule] = (
         CreateProxyMeasureRule(),
         BooleanMeasureAggregationRule(),
         CompositeIdentifierExpressionRule(),
@@ -37,25 +37,49 @@ class ModelTransformer:
         AddInputMetricMeasuresRule(),
     )
 
+    DEFAULT_RULES: Tuple[Sequence[ModelTransformRule], ...] = (
+        PRIMARY_RULES,
+        SECONDARY_RULES,
+    )
+
     @staticmethod
-    def pre_validation_transform_model(
-        model: UserConfiguredModel, rules: Sequence[ModelTransformRule] = DEFAULT_PRE_VALIDATION_RULES
+    def transform(
+        model: UserConfiguredModel,
+        ordered_rule_sequences: Tuple[Sequence[ModelTransformRule], ...] = DEFAULT_RULES,
     ) -> UserConfiguredModel:
-        """Transform a model according to configured rules before validations are run."""
+        """Copies the passed in model, applies the rules to the new model, and then returns that model
+
+        It's important to note that some rules need to happen before or after other rules. Thus rules
+        are passed in as an ordered tuple of rule sequences. Primary rules are run first, and then
+        secondary rules. We don't currently have tertiary, quaternary, or etc currently, but this
+        system easily allows for it.
+        """
         model_copy = copy.deepcopy(model)
 
-        for transform_rule in rules:
-            model_copy = transform_rule.transform_model(model_copy)
+        for rule_sequence in ordered_rule_sequences:
+            for rule in rule_sequence:
+                model_copy = rule.transform_model(model_copy)
 
         return model_copy
 
+    @staticmethod
+    def pre_validation_transform_model(
+        model: UserConfiguredModel, rules: Sequence[ModelTransformRule] = PRIMARY_RULES
+    ) -> UserConfiguredModel:
+        """Transform a model according to configured rules before validations are run."""
+        logger.warning(
+            "DEPRECATION: `ModelTransformer.pre_validation_transform_model` is deprecated. Please use `ModelTransformer.transform` instead."
+        )
+
+        return ModelTransformer.transform(model=model, ordered_rule_sequences=(rules,))
+
     @staticmethod
     def post_validation_transform_model(
-        model: UserConfiguredModel, rules: Sequence[ModelTransformRule] = DEFAULT_POST_VALIDATION_RULES
+        model: UserConfiguredModel, rules: Sequence[ModelTransformRule] = SECONDARY_RULES
     ) -> UserConfiguredModel:
         """Transform a model according to configured rules after validations are run."""
-        model_copy = copy.deepcopy(model)
-        for transform_rule in rules:
-            model_copy = transform_rule.transform_model(model_copy)
+        logger.warning(
+            "DEPRECATION: `ModelTransformer.post_validation_transform_model` is deprecated. Please use `ModelTransformer.transform` instead."
+        )
 
-        return model_copy
+        return ModelTransformer.transform(model=model, ordered_rule_sequences=(rules,))
diff --git a/metricflow/model/parsing/dbt_cloud_to_model.py b/metricflow/model/parsing/dbt_cloud_to_model.py
@@ -40,8 +40,7 @@ def get_dbt_cloud_metrics(auth: str, job_id: str) -> list[MetricNode]:
 def parse_dbt_cloud_metrics_to_model(dbt_metrics: List[MetricNode]) -> ModelBuildResult:
     """Builds a UserConfiguredModel from a list of dbt cloud MetricNodes"""
     build_result = DbtConverter().convert(dbt_metrics=tuple(dbt_metrics))
-    transformed_model = ModelTransformer.pre_validation_transform_model(model=build_result.model)
-    transformed_model = ModelTransformer.post_validation_transform_model(model=transformed_model)
+    transformed_model = ModelTransformer.transform(model=build_result.model)
     return ModelBuildResult(model=transformed_model, issues=build_result.issues)
 
 
diff --git a/metricflow/model/parsing/dbt_dir_to_model.py b/metricflow/model/parsing/dbt_dir_to_model.py
@@ -40,6 +40,5 @@ def parse_dbt_project_to_model(
     """Parse dbt model files in the given directory to a UserConfiguredModel."""
     manifest = get_dbt_project_manifest(directory=directory, profile=profile, target=target)
     build_result = DbtManifestTransformer(manifest=manifest).build_user_configured_model()
-    transformed_model = ModelTransformer.pre_validation_transform_model(model=build_result.model)
-    transformed_model = ModelTransformer.post_validation_transform_model(model=transformed_model)
+    transformed_model = ModelTransformer.transform(model=build_result.model)
     return ModelBuildResult(model=transformed_model, issues=build_result.issues)
diff --git a/metricflow/model/parsing/dir_to_model.py b/metricflow/model/parsing/dir_to_model.py
@@ -104,8 +104,7 @@ def collect_yaml_config_file_paths(directory: str) -> List[str]:
 def parse_directory_of_yaml_files_to_model(
     directory: str,
     template_mapping: Optional[Dict[str, str]] = None,
-    apply_pre_transformations: Optional[bool] = True,
-    apply_post_transformations: Optional[bool] = True,
+    apply_transformations: Optional[bool] = True,
     raise_issues_as_exceptions: bool = True,
 ) -> ModelBuildResult:
     """Parse files in the given directory to a UserConfiguredModel.
@@ -116,17 +115,15 @@ def parse_directory_of_yaml_files_to_model(
     return parse_yaml_file_paths_to_model(
         file_paths=file_paths,
         template_mapping=template_mapping,
-        apply_pre_transformations=apply_pre_transformations,
-        apply_post_transformations=apply_post_transformations,
+        apply_transformations=apply_transformations,
         raise_issues_as_exceptions=raise_issues_as_exceptions,
     )
 
 
 def parse_yaml_file_paths_to_model(
     file_paths: List[str],
     template_mapping: Optional[Dict[str, str]] = None,
-    apply_pre_transformations: Optional[bool] = True,
-    apply_post_transformations: Optional[bool] = True,
+    apply_transformations: Optional[bool] = True,
     raise_issues_as_exceptions: bool = True,
 ) -> ModelBuildResult:
     """Parse files the given list of file paths to a UserConfiguredModel.
@@ -157,16 +154,14 @@ def parse_yaml_file_paths_to_model(
 
     return parse_yaml_files_to_validation_ready_model(
         yaml_config_files=yaml_config_files,
-        apply_pre_transformations=apply_pre_transformations,
-        apply_post_transformations=apply_post_transformations,
+        apply_transformations=apply_transformations,
         raise_issues_as_exceptions=raise_issues_as_exceptions,
     )
 
 
 def parse_yaml_files_to_validation_ready_model(
     yaml_config_files: List[YamlConfigFile],
-    apply_pre_transformations: Optional[bool] = True,
-    apply_post_transformations: Optional[bool] = True,
+    apply_transformations: Optional[bool] = True,
     raise_issues_as_exceptions: bool = True,
 ) -> ModelBuildResult:
     """Parse and transform the given set of in-memory YamlConfigFiles to a UserConfigured model
@@ -182,11 +177,8 @@ def parse_yaml_files_to_validation_ready_model(
 
     build_issues = build_result.issues
     try:
-        if apply_pre_transformations:
-            model = ModelTransformer.pre_validation_transform_model(model)
-
-        if apply_post_transformations:
-            model = ModelTransformer.post_validation_transform_model(model)
+        if apply_transformations:
+            model = ModelTransformer.transform(model)
     except Exception as e:
         transformation_issue_results = ModelValidationResults(errors=[ValidationError(message=str(e))])
         build_issues = ModelValidationResults.merge([build_issues, transformation_issue_results])
diff --git a/metricflow/test/fixtures/model_fixtures.py b/metricflow/test/fixtures/model_fixtures.py
@@ -11,6 +11,7 @@
 from metricflow.dataflow.builder.source_node import SourceNodeBuilder
 from metricflow.dataflow.dataflow_plan import ReadSqlSourceNode, BaseOutput
 from metricflow.dataset.convert_data_source import DataSourceToDataSetConverter
+from metricflow.model.model_transformer import ModelTransformer
 from metricflow.model.model_validator import ModelValidator
 from metricflow.model.objects.data_source import DataSource
 from metricflow.model.objects.user_configured_model import UserConfiguredModel
@@ -252,16 +253,18 @@ def simple_user_configured_model(template_mapping: Dict[str, str]) -> UserConfig
 
 
 @pytest.fixture(scope="session")
-def simple_model__pre_transforms(template_mapping: Dict[str, str]) -> UserConfiguredModel:
+def simple_model__with_primary_transforms(template_mapping: Dict[str, str]) -> UserConfiguredModel:
     """Model used for tests pre-transformations."""
 
     model_build_result = parse_directory_of_yaml_files_to_model(
         os.path.join(os.path.dirname(__file__), "model_yamls/simple_model"),
         template_mapping=template_mapping,
-        apply_pre_transformations=True,
-        apply_post_transformations=False,
+        apply_transformations=False,
     )
-    return model_build_result.model
+    transformed_model = ModelTransformer.transform(
+        model=model_build_result.model, ordered_rule_sequences=(ModelTransformer.PRIMARY_RULES,)
+    )
+    return transformed_model
 
 
 @pytest.fixture(scope="session")
diff --git a/metricflow/test/model/transformations/test_configurable_transform_rules.py b/metricflow/test/model/transformations/test_configurable_transform_rules.py
@@ -16,17 +16,13 @@ def transform_model(model: UserConfiguredModel) -> UserConfiguredModel:  # noqa:
         return model
 
 
-def test_can_configure_model_transform_rules(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
-    pre_model = simple_model__pre_transforms
+def test_can_configure_model_transform_rules(  # noqa: D
+    simple_model__with_primary_transforms: UserConfiguredModel,
+) -> None:
+    pre_model = simple_model__with_primary_transforms
     assert not all(len(x.name) == 3 for x in pre_model.data_sources)
 
-    # Confirms that a custom transformation works for pre-validation transform
-    pre_model = ModelTransformer.pre_validation_transform_model(pre_model, rules=[SliceNamesRule()])
-    assert all(len(x.name) == 3 for x in pre_model.data_sources)
-
-    post_model = simple_model__pre_transforms
-    assert not all(len(x.name) == 3 for x in post_model.data_sources)
-
-    # Confirms that a custom transformation works for post-validation transform
-    post_model = ModelTransformer.post_validation_transform_model(post_model, rules=[SliceNamesRule()])
-    assert all(len(x.name) == 3 for x in post_model.data_sources)
+    # Confirms that a custom transformation works `for ModelTransformer.transform`
+    rules = [SliceNamesRule()]
+    transformed_model = ModelTransformer.transform(pre_model, ordered_rule_sequences=(rules,))
+    assert all(len(x.name) == 3 for x in transformed_model.data_sources)
diff --git a/metricflow/test/model/validations/test_common_identifiers.py b/metricflow/test/model/validations/test_common_identifiers.py
@@ -12,8 +12,8 @@
 
 
 @pytest.mark.skip("TODO: re-enforce after validations improvements")
-def test_lonely_identifier_raises_issue(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
-    model = copy.deepcopy(simple_model__pre_transforms)
+def test_lonely_identifier_raises_issue(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
+    model = copy.deepcopy(simple_model__with_primary_transforms)
     lonely_identifier_name = "hi_im_lonely"
 
     func: Callable[[DataSource], bool] = lambda data_source: len(data_source.identifiers) > 0
diff --git a/metricflow/test/model/validations/test_configurable_rules.py b/metricflow/test/model/validations/test_configurable_rules.py
@@ -8,9 +8,11 @@
 from metricflow.test.test_utils import model_with_materialization
 
 
-def test_can_configure_model_validator_rules(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_can_configure_model_validator_rules(  # noqa: D
+    simple_model__with_primary_transforms: UserConfiguredModel,
+) -> None:
     model = model_with_materialization(
-        simple_model__pre_transforms,
+        simple_model__with_primary_transforms,
         [
             materialization_with_guaranteed_meta(
                 name="foobar",
diff --git a/metricflow/test/model/validations/test_data_warehouse_tasks.py b/metricflow/test/model/validations/test_data_warehouse_tasks.py
@@ -280,8 +280,7 @@ def test_validate_metrics(  # noqa: D
     )
     model.data_sources[0].measures = new_measures
     model.metrics = []
-    model = ModelTransformer.pre_validation_transform_model(model)
-    model = ModelTransformer.post_validation_transform_model(model)
+    model = ModelTransformer.transform(model)
 
     # Validate new metric created by proxy causes an issue (because the column used doesn't exist)
     dw_validator = DataWarehouseModelValidator(
diff --git a/metricflow/test/model/validations/test_element_const.py b/metricflow/test/model/validations/test_element_const.py
@@ -15,8 +15,8 @@ def _categorical_dimensions(data_source: DataSource) -> Tuple[Dimension, ...]:
     return tuple(dim for dim in data_source.dimensions if dim.type == DimensionType.CATEGORICAL)
 
 
-def test_cross_element_names(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa:D
-    model = copy.deepcopy(simple_model__pre_transforms)
+def test_cross_element_names(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa:D
+    model = copy.deepcopy(simple_model__with_primary_transforms)
 
     # ensure we have a usable data source for the test
     usable_ds, usable_ds_index = find_data_source_with(
diff --git a/metricflow/test/model/validations/test_identifiers.py b/metricflow/test/model/validations/test_identifiers.py
@@ -33,10 +33,10 @@
 
 
 def test_data_source_cant_have_more_than_one_primary_identifier(
-    simple_model__pre_transforms: UserConfiguredModel,
+    simple_model__with_primary_transforms: UserConfiguredModel,
 ) -> None:  # noqa: D
     """Add an additional primary identifier to a data source and assert that it cannot have two"""
-    model = copy.deepcopy(simple_model__pre_transforms)
+    model = copy.deepcopy(simple_model__with_primary_transforms)
     func: Callable[[DataSource], bool] = lambda data_source: len(data_source.identifiers) > 1
 
     multiple_identifier_data_source, _ = find_data_source_with(model, func)
@@ -216,13 +216,13 @@ def test_composite_identifiers_ref_and_name() -> None:  # noqa:D
         )
 
 
-def test_mismatched_identifier(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_mismatched_identifier(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     """Testing two mismatched identifiers in two data sources
 
     Add two identifiers with mismatched sub-identifiers to two data sources in the model
     Ensure that our composite identifiers rule catches this incompatibility
     """
-    model = copy.deepcopy(simple_model__pre_transforms)
+    model = copy.deepcopy(simple_model__with_primary_transforms)
 
     bookings_source, _ = find_data_source_with(
         model=model,
diff --git a/metricflow/test/model/validations/test_materializations.py b/metricflow/test/model/validations/test_materializations.py
@@ -10,16 +10,16 @@
 logger = logging.getLogger(__name__)
 
 
-def test_materialization_validation(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
-    ValidMaterializationRule.validate_model(simple_model__pre_transforms)
+def test_materialization_validation(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
+    ValidMaterializationRule.validate_model(simple_model__with_primary_transforms)
 
 
-def test_identifier(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_identifier(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     assert (
         len(
             ValidMaterializationRule.validate_model(
                 model_with_materialization(
-                    simple_model__pre_transforms,
+                    simple_model__with_primary_transforms,
                     [
                         materialization_with_guaranteed_meta(
                             name="foobar",
@@ -34,12 +34,12 @@ def test_identifier(simple_model__pre_transforms: UserConfiguredModel) -> None:
     )
 
 
-def test_invalid_metric_name(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_invalid_metric_name(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     assert (
         len(
             ValidMaterializationRule.validate_model(
                 model_with_materialization(
-                    simple_model__pre_transforms,
+                    simple_model__with_primary_transforms,
                     [
                         materialization_with_guaranteed_meta(
                             name="foobar",
@@ -54,12 +54,12 @@ def test_invalid_metric_name(simple_model__pre_transforms: UserConfiguredModel)
     )
 
 
-def test_invalid_dimension_name(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_invalid_dimension_name(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     assert (
         len(
             ValidMaterializationRule.validate_model(
                 model_with_materialization(
-                    simple_model__pre_transforms,
+                    simple_model__with_primary_transforms,
                     [
                         materialization_with_guaranteed_meta(
                             name="foobar",
@@ -74,13 +74,13 @@ def test_invalid_dimension_name(simple_model__pre_transforms: UserConfiguredMode
     )
 
 
-def test_missing_primary_time_dimension(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_missing_primary_time_dimension(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     """Materializations should have the primary time dimension listed as a dimension"""
     assert (
         len(
             ValidMaterializationRule.validate_model(
                 model_with_materialization(
-                    simple_model__pre_transforms,
+                    simple_model__with_primary_transforms,
                     [
                         materialization_with_guaranteed_meta(
                             name="foobar",
@@ -95,12 +95,12 @@ def test_missing_primary_time_dimension(simple_model__pre_transforms: UserConfig
     )
 
 
-def test_valid_time_granularity(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_valid_time_granularity(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     assert (
         len(
             ValidMaterializationRule.validate_model(
                 model_with_materialization(
-                    simple_model__pre_transforms,
+                    simple_model__with_primary_transforms,
                     [
                         materialization_with_guaranteed_meta(
                             name="materialization_test_case",
@@ -115,12 +115,12 @@ def test_valid_time_granularity(simple_model__pre_transforms: UserConfiguredMode
     )
 
 
-def test_invalid_time_granularity(simple_model__pre_transforms: UserConfiguredModel) -> None:  # noqa: D
+def test_invalid_time_granularity(simple_model__with_primary_transforms: UserConfiguredModel) -> None:  # noqa: D
     assert (
         len(
             ValidMaterializationRule.validate_model(
                 model_with_materialization(
-                    simple_model__pre_transforms,
+                    simple_model__with_primary_transforms,
                     [
                         materialization_with_guaranteed_meta(
                             name="materialization_test_case",
diff --git a/metricflow/test/model/validations/test_measures.py b/metricflow/test/model/validations/test_measures.py
diff --git a/metricflow/test/model/validations/test_reserved_keywords.py b/metricflow/test/model/validations/test_reserved_keywords.py
diff --git a/metricflow/test/model/validations/test_unique_valid_name.py b/metricflow/test/model/validations/test_unique_valid_name.py

Original file line number	Diff line number	Diff line change
`@@ -280,8 +280,7 @@ def test_validate_metrics( # noqa: D`
`280`	`280`	`)`
`281`	`281`	`model.data_sources[0].measures = new_measures`
`282`	`282`	`model.metrics = []`
`283`		`- model = ModelTransformer.pre_validation_transform_model(model)`
`284`		`- model = ModelTransformer.post_validation_transform_model(model)`
	`283`	`+ model = ModelTransformer.transform(model)`
`285`	`284`
`286`	`285`	`# Validate new metric created by proxy causes an issue (because the column used doesn't exist)`
`287`	`286`	`dw_validator = DataWarehouseModelValidator(`