27
27
from metricflow .dataflow .builder .partitions import PartitionJoinResolver
28
28
from metricflow .dataflow .dataflow_plan import (
29
29
BaseOutput ,
30
+ FilterElementsNode ,
30
31
JoinDescription ,
31
32
PartitionDimensionJoinDescription ,
32
33
PartitionTimeDimensionJoinDescription ,
37
38
from metricflow .model .semantics .semantic_model_join_evaluator import SemanticModelJoinEvaluator
38
39
from metricflow .plan_conversion .instance_converters import CreateValidityWindowJoinDescription
39
40
from metricflow .protocols .semantics import SemanticModelAccessor
40
- from metricflow .specs .specs import (
41
- LinkableInstanceSpec ,
42
- LinklessEntitySpec ,
43
- )
41
+ from metricflow .specs .specs import InstanceSpecSet , LinkableInstanceSpec , LinklessEntitySpec
42
+ from metricflow .sql .sql_plan import SqlJoinType
44
43
45
44
logger = logging .getLogger (__name__ )
46
45
@@ -61,6 +60,8 @@ class JoinLinkableInstancesRecipe:
61
60
# the linkable specs in the node that can help to satisfy the query. e.g. "user_id__country" might be one of the
62
61
# "satisfiable_linkable_specs", but "country" is the linkable spec in the node.
63
62
satisfiable_linkable_specs : List [LinkableInstanceSpec ]
63
+ # Join type to use when joining nodes
64
+ join_type : SqlJoinType
64
65
65
66
# The partitions to join on, if there are matching partitions between the start_node and node_to_join.
66
67
join_on_partition_dimensions : Tuple [PartitionDimensionJoinDescription , ...]
@@ -71,12 +72,36 @@ class JoinLinkableInstancesRecipe:
71
72
@property
72
73
def join_description (self ) -> JoinDescription :
73
74
"""The recipe as a join description to use in the dataflow plan node."""
75
+ # Figure out what elements to filter from the joined node.
76
+ include_specs : List [LinkableInstanceSpec ] = []
77
+ assert all ([len (spec .entity_links ) > 0 for spec in self .satisfiable_linkable_specs ])
78
+ include_specs .extend (
79
+ [LinklessEntitySpec .from_reference (spec .entity_links [0 ]) for spec in self .satisfiable_linkable_specs ]
80
+ )
81
+
82
+ include_specs .extend ([join .node_to_join_dimension_spec for join in self .join_on_partition_dimensions ])
83
+ include_specs .extend ([join .node_to_join_time_dimension_spec for join in self .join_on_partition_time_dimensions ])
84
+ if self .validity_window :
85
+ include_specs .extend (
86
+ [self .validity_window .window_start_dimension , self .validity_window .window_end_dimension ]
87
+ )
88
+
89
+ # `satisfiable_linkable_specs` describes what can be satisfied after the join, so remove the entity
90
+ # link when filtering before the join.
91
+ # e.g. if the node is used to satisfy "user_id__country", then the node must have the entity
92
+ # "user_id" and the "country" dimension so that it can be joined to the source node.
93
+ include_specs .extend ([spec .without_first_entity_link for spec in self .satisfiable_linkable_specs ])
94
+ filtered_node_to_join = FilterElementsNode (
95
+ parent_node = self .node_to_join , include_specs = InstanceSpecSet .create_from_linkable_specs (include_specs )
96
+ )
97
+
74
98
return JoinDescription (
75
- join_node = self . node_to_join ,
99
+ join_node = filtered_node_to_join ,
76
100
join_on_entity = self .join_on_entity ,
77
101
join_on_partition_dimensions = self .join_on_partition_dimensions ,
78
102
join_on_partition_time_dimensions = self .join_on_partition_time_dimensions ,
79
103
validity_window = self .validity_window ,
104
+ join_type = self .join_type ,
80
105
)
81
106
82
107
@@ -133,6 +158,7 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs(
133
158
self ,
134
159
start_node_instance_set : InstanceSet ,
135
160
needed_linkable_specs : List [LinkableInstanceSpec ],
161
+ join_type : SqlJoinType ,
136
162
) -> List [JoinLinkableInstancesRecipe ]:
137
163
"""Get nodes that can be joined to get 1 or more of the "needed_linkable_specs".
138
164
@@ -257,6 +283,7 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs(
257
283
join_on_partition_dimensions = join_on_partition_dimensions ,
258
284
join_on_partition_time_dimensions = join_on_partition_time_dimensions ,
259
285
validity_window = validity_window_join_description ,
286
+ join_type = join_type ,
260
287
)
261
288
)
262
289
@@ -271,6 +298,7 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs(
271
298
def _update_candidates_that_can_satisfy_linkable_specs (
272
299
candidates_for_join : List [JoinLinkableInstancesRecipe ],
273
300
already_satisfisfied_linkable_specs : List [LinkableInstanceSpec ],
301
+ join_type : SqlJoinType ,
274
302
) -> List [JoinLinkableInstancesRecipe ]:
275
303
"""Update / filter candidates_for_join based on linkable instance specs that we have already satisfied.
276
304
@@ -294,6 +322,7 @@ def _update_candidates_that_can_satisfy_linkable_specs(
294
322
join_on_partition_dimensions = candidate_for_join .join_on_partition_dimensions ,
295
323
join_on_partition_time_dimensions = candidate_for_join .join_on_partition_time_dimensions ,
296
324
validity_window = candidate_for_join .validity_window ,
325
+ join_type = join_type ,
297
326
)
298
327
)
299
328
return sorted (
@@ -306,6 +335,7 @@ def evaluate_node(
306
335
self ,
307
336
start_node : BaseOutput ,
308
337
required_linkable_specs : Sequence [LinkableInstanceSpec ],
338
+ default_join_type : SqlJoinType ,
309
339
) -> LinkableInstanceSatisfiabilityEvaluation :
310
340
"""Evaluates if the "required_linkable_specs" can be realized by joining the "start_node" with other nodes.
311
341
@@ -345,7 +375,9 @@ def evaluate_node(
345
375
possibly_joinable_linkable_specs .append (required_linkable_spec )
346
376
347
377
candidates_for_join = self ._find_joinable_candidate_nodes_that_can_satisfy_linkable_specs (
348
- start_node_instance_set = candidate_instance_set , needed_linkable_specs = possibly_joinable_linkable_specs
378
+ start_node_instance_set = candidate_instance_set ,
379
+ needed_linkable_specs = possibly_joinable_linkable_specs ,
380
+ join_type = default_join_type ,
349
381
)
350
382
join_candidates : List [JoinLinkableInstancesRecipe ] = []
351
383
@@ -378,6 +410,7 @@ def evaluate_node(
378
410
candidates_for_join = self ._update_candidates_that_can_satisfy_linkable_specs (
379
411
candidates_for_join = candidates_for_join ,
380
412
already_satisfisfied_linkable_specs = next_candidate .satisfiable_linkable_specs ,
413
+ join_type = default_join_type ,
381
414
)
382
415
383
416
# The once possibly joinable specs are definitely joinable and no longer need to be searched for.
0 commit comments