diff --git a/pydough/conversion/hybrid_decorrelater.py b/pydough/conversion/hybrid_decorrelater.py new file mode 100644 index 00000000..72f6aad1 --- /dev/null +++ b/pydough/conversion/hybrid_decorrelater.py @@ -0,0 +1,330 @@ +""" +Logic for applying de-correlation to hybrid trees before relational conversion +if the correlate is not a semi/anti join. +""" + +__all__ = ["run_hybrid_decorrelation"] + + +import copy + +from .hybrid_tree import ( + ConnectionType, + HybridBackRefExpr, + HybridCalc, + HybridChildRefExpr, + HybridColumnExpr, + HybridConnection, + HybridCorrelExpr, + HybridExpr, + HybridFilter, + HybridFunctionExpr, + HybridLiteralExpr, + HybridPartition, + HybridRefExpr, + HybridTree, + HybridWindowExpr, +) + + +class Decorrelater: + """ + Class that encapsulates the logic used for de-correlation of hybrid trees. + """ + + def make_decorrelate_parent( + self, hybrid: HybridTree, child_idx: int, required_steps: int + ) -> HybridTree: + """ + Creates a snapshot of the ancestry of the hybrid tree that contains + a correlated child, without any of its children, its descendants, or + any pipeline operators that do not need to be there. + + Args: + `hybrid`: The hybrid tree to create a snapshot of in order to aid + in the de-correlation of a correlated child. + `child_idx`: The index of the correlated child of hybrid that the + snapshot is being created to aid in the de-correlation of. + `required_steps`: The index of the last pipeline operator that + needs to be included in the snapshot in order for the child to be + derivable. + + Returns: + A snapshot of `hybrid` and its ancestry in the hybrid tree, without + without any of its children or pipeline operators that occur during + or after the derivation of the correlated child, or without any of + its descendants. + """ + if isinstance(hybrid.pipeline[0], HybridPartition) and child_idx == 0: + # Special case: if the correlated child is the data argument of a + # partition operation, then the parent to snapshot is actually the + # parent of the level containing the partition operation. In this + # case, all of the parent's children & pipeline operators should be + # included in the snapshot. + assert hybrid.parent is not None + return self.make_decorrelate_parent( + hybrid.parent, len(hybrid.parent.children), len(hybrid.pipeline) + ) + # Temporarily detach the successor of the current level, then create a + # deep copy of the current level (which will include its ancestors), + # then reattach the successor back to the original. This ensures that + # the descendants of the current level are not included when providing + # the parent to the correlated child as its new ancestor. + successor: HybridTree | None = hybrid.successor + hybrid._successor = None + new_hybrid: HybridTree = copy.deepcopy(hybrid) + hybrid._successor = successor + # Ensure the new parent only includes the children & pipeline operators + # that is has to. + new_hybrid._children = new_hybrid._children[:child_idx] + new_hybrid._pipeline = new_hybrid._pipeline[: required_steps + 1] + return new_hybrid + + def remove_correl_refs( + self, expr: HybridExpr, parent: HybridTree, child_height: int + ) -> HybridExpr: + """ + Recursively & destructively removes correlated references within a + hybrid expression if they point to a specific correlated ancestor + hybrid tree, and replaces them with corresponding BACK references. + + Args: + `expr`: The hybrid expression to remove correlated references from. + `parent`: The correlated ancestor hybrid tree that the correlated + references should point to when they are targeted for removal. + `child_height`: The height of the correlated child within the + hybrid tree that the correlated references is point to. This is + the number of BACK indices to shift by when replacing the + correlated reference with a BACK reference. + + Returns: + The hybrid expression with all correlated references to `parent` + replaced with corresponding BACK references. The replacement also + happens in-place. + """ + match expr: + case HybridCorrelExpr(): + # If the correlated reference points to the parent, then + # replace it with a BACK reference. Otherwise, recursively + # transform its input expression in case it contains another + # correlated reference. + if expr.hybrid is parent: + result: HybridExpr | None = expr.expr.shift_back(child_height) + assert result is not None + return result + else: + expr.expr = self.remove_correl_refs(expr.expr, parent, child_height) + return expr + case HybridFunctionExpr(): + # For regular functions, recursively transform all of their + # arguments. + for idx, arg in enumerate(expr.args): + expr.args[idx] = self.remove_correl_refs(arg, parent, child_height) + return expr + case HybridWindowExpr(): + # For window functions, recursively transform all of their + # arguments, partition keys, and order keys. + for idx, arg in enumerate(expr.args): + expr.args[idx] = self.remove_correl_refs(arg, parent, child_height) + for idx, arg in enumerate(expr.partition_args): + expr.partition_args[idx] = self.remove_correl_refs( + arg, parent, child_height + ) + for order_arg in expr.order_args: + order_arg.expr = self.remove_correl_refs( + order_arg.expr, parent, child_height + ) + return expr + case ( + HybridBackRefExpr() + | HybridRefExpr() + | HybridChildRefExpr() + | HybridLiteralExpr() + | HybridColumnExpr() + ): + # All other expression types do not require any transformation + # to de-correlate since they cannot contain correlations. + return expr + case _: + raise NotImplementedError( + f"Unsupported expression type: {expr.__class__.__name__}." + ) + + def correl_ref_purge( + self, + level: HybridTree | None, + old_parent: HybridTree, + new_parent: HybridTree, + child_height: int, + ) -> None: + """ + The recursive procedure to remove correlated references from the + expressions of a hybrid tree or any of its ancestors or children if + they refer to a specific correlated ancestor that is being removed. + + Args: + `level`: The current level of the hybrid tree to remove correlated + references from. + `old_parent`: The correlated ancestor hybrid tree that the correlated + references should point to when they are targeted for removal. + `new_parent`: The ancestor of `level` that removal should stop at + because it is the transposed snapshot of `old_parent`, and + therefore it & its ancestors cannot contain any more correlated + references that would be targeted for removal. + `child_height`: The height of the correlated child within the + hybrid tree that the correlated references is point to. This is + the number of BACK indices to shift by when replacing the + correlated reference with a BACK + """ + while level is not None and level is not new_parent: + # First, recursively remove any targeted correlated references from + # the children of the current level. + for child in level.children: + self.correl_ref_purge( + child.subtree, old_parent, new_parent, child_height + ) + # Then, remove any correlated references from the pipeline + # operators of the current level. Usually this just means + # transforming the terms/orderings/unique keys of the operation, + # but specific operation types will require special casing if they + # have additional expressions stored in other field that need to be + # transformed. + for operation in level.pipeline: + for name, expr in operation.terms.items(): + operation.terms[name] = self.remove_correl_refs( + expr, old_parent, child_height + ) + for ordering in operation.orderings: + ordering.expr = self.remove_correl_refs( + ordering.expr, old_parent, child_height + ) + for idx, expr in enumerate(operation.unique_exprs): + operation.unique_exprs[idx] = self.remove_correl_refs( + expr, old_parent, child_height + ) + if isinstance(operation, HybridCalc): + for str, expr in operation.new_expressions.items(): + operation.new_expressions[str] = self.remove_correl_refs( + expr, old_parent, child_height + ) + if isinstance(operation, HybridFilter): + operation.condition = self.remove_correl_refs( + operation.condition, old_parent, child_height + ) + # Repeat the process on the ancestor until either loop guard + # condition is no longer True. + level = level.parent + + def decorrelate_child( + self, + old_parent: HybridTree, + new_parent: HybridTree, + child: HybridConnection, + is_aggregate: bool, + ) -> None: + """ + Runs the logic to de-correlate a child of a hybrid tree that contains + a correlated reference. This involves linking the child to a new parent + as its ancestor, the parent being a snapshot of the original hybrid + tree that contained the correlated child as a child. The transformed + child can now replace correlated references with BACK references that + point to terms in its newly expanded ancestry, and the original hybrid + tree can now join onto this child using its uniqueness keys. + """ + # First, find the height of the child subtree & its top-most level. + child_root: HybridTree = child.subtree + child_height: int = 1 + while child_root.parent is not None: + child_height += 1 + child_root = child_root.parent + # Link the top level of the child subtree to the new parent. + new_parent.add_successor(child_root) + # Replace any correlated references to the original parent with BACK references. + self.correl_ref_purge(child.subtree, old_parent, new_parent, child_height) + # Update the join keys to join on the unique keys of all the ancestors. + new_join_keys: list[tuple[HybridExpr, HybridExpr]] = [] + additional_levels: int = 0 + current_level: HybridTree | None = old_parent + while current_level is not None: + for unique_key in current_level.pipeline[0].unique_exprs: + lhs_key: HybridExpr | None = unique_key.shift_back(additional_levels) + rhs_key: HybridExpr | None = unique_key.shift_back( + additional_levels + child_height + ) + assert lhs_key is not None and rhs_key is not None + new_join_keys.append((lhs_key, rhs_key)) + current_level = current_level.parent + additional_levels += 1 + child.subtree.join_keys = new_join_keys + # If aggregating, do the same with the aggregation keys. + if is_aggregate: + new_agg_keys: list[HybridExpr] = [] + assert child.subtree.join_keys is not None + for _, rhs_key in child.subtree.join_keys: + new_agg_keys.append(rhs_key) + child.subtree.agg_keys = new_agg_keys + + def decorrelate_hybrid_tree(self, hybrid: HybridTree) -> HybridTree: + """ + TODO + """ + # Recursively decorrelate the ancestors of the current level of the + # hybrid tree. + if hybrid.parent is not None: + hybrid._parent = self.decorrelate_hybrid_tree(hybrid.parent) + hybrid._parent._successor = hybrid + # Iterate across all the children and recursively decorrelate them. + for child in hybrid.children: + child.subtree = self.decorrelate_hybrid_tree(child.subtree) + # Iterate across all the children, identify any that are correlated, + # and transform any of the correlated ones that require decorrelation + # due to the type of connection. + for idx, child in enumerate(hybrid.children): + if idx not in hybrid.correlated_children: + continue + new_parent: HybridTree = self.make_decorrelate_parent( + hybrid, idx, hybrid.children[idx].required_steps + ) + match child.connection_type: + case ( + ConnectionType.SINGULAR + | ConnectionType.SINGULAR_ONLY_MATCH + | ConnectionType.AGGREGATION + | ConnectionType.AGGREGATION_ONLY_MATCH + ): + self.decorrelate_child( + hybrid, new_parent, child, child.connection_type.is_aggregation + ) + case ConnectionType.NDISTINCT | ConnectionType.NDISTINCT_ONLY_MATCH: + raise NotImplementedError( + f"PyDough does not yet support correlated references with the {child.connection_type.name} pattern." + ) + case ( + ConnectionType.SEMI + | ConnectionType.ANTI + | ConnectionType.NO_MATCH_SINGULAR + | ConnectionType.NO_MATCH_AGGREGATION + | ConnectionType.NO_MATCH_NDISTINCT + ): + # These patterns do not require decorrelation since they + # are supported via correlated SEMI/ANTI joins. + continue + return hybrid + + +def run_hybrid_decorrelation(hybrid: HybridTree) -> HybridTree: + """ + Invokes the procedure to remove correlated references from a hybrid tree + before relational conversion if those correlated references are invalid + (e.g. not from a semi/anti join). + + Args: + `hybrid`: The hybrid tree to remove correlated references from. + + Returns: + The hybrid tree with all invalid correlated references removed as the + tree structure is re-written to allow them to be replaced with BACK + references. The transformation is also done in-place. + """ + decorr: Decorrelater = Decorrelater() + return decorr.decorrelate_hybrid_tree(hybrid) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 79a03062..956e88f4 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -184,6 +184,8 @@ def apply_renamings(self, renamings: dict[str, str]) -> "HybridExpr": return self def shift_back(self, levels: int) -> HybridExpr | None: + if levels == 0: + return self return HybridBackRefExpr(self.name, levels, self.typ) @@ -895,6 +897,7 @@ def __init__( self._is_connection_root: bool = is_connection_root self._agg_keys: list[HybridExpr] | None = None self._join_keys: list[tuple[HybridExpr, HybridExpr]] | None = None + self._correlated_children: set[int] = set() if isinstance(root_operation, HybridPartition): self._join_keys = [] @@ -935,6 +938,14 @@ def children(self) -> list[HybridConnection]: """ return self._children + @property + def correlated_children(self) -> set[int]: + """ + The set of indices of children that contain correlated references to + the current hybrid tree. + """ + return self._correlated_children + @property def successor(self) -> Optional["HybridTree"]: """ @@ -1584,9 +1595,10 @@ def make_hybrid_correl_expr( # Special case: stepping out of the data argument of PARTITION back # into its ancestor. For example: # TPCH(x=...).PARTITION(data.WHERE(y > BACK(1).x), ...) - if len(parent_tree.pipeline) == 1 and isinstance( + partition_edge_case: bool = len(parent_tree.pipeline) == 1 and isinstance( parent_tree.pipeline[0], HybridPartition - ): + ) + if partition_edge_case: assert parent_tree.parent is not None # Treat the partition's parent as the conext for the back # to step into, as opposed to the partition itself (so the back @@ -1594,26 +1606,8 @@ def make_hybrid_correl_expr( self.stack.append(parent_tree.parent) parent_result = self.make_hybrid_correl_expr( back_expr, collection, steps_taken_so_far - ) + ).expr self.stack.pop() - self.stack.append(parent_tree) - # Then, postprocess the output to account for the fact that a - # BACK level got skipped due to the change in subtree. - match parent_result.expr: - case HybridRefExpr(): - parent_result = HybridBackRefExpr( - parent_result.expr.name, 1, parent_result.typ - ) - case HybridBackRefExpr(): - parent_result = HybridBackRefExpr( - parent_result.expr.name, - parent_result.expr.back_idx + 1, - parent_result.typ, - ) - case _: - raise ValueError( - f"Malformed expression for correlated reference: {parent_result}" - ) elif remaining_steps_back == 0: # If there are no more steps back to be made, then the correlated # reference is to a reference from the current context. @@ -1634,6 +1628,8 @@ def make_hybrid_correl_expr( collection, back_expr.term_name, remaining_steps_back ) parent_result = self.make_hybrid_expr(parent_tree, new_expr, {}, False) + if not isinstance(parent_result, HybridCorrelExpr): + parent_tree.correlated_children.add(len(parent_tree.children)) # Restore parent_tree back onto the stack, since evaluating `back_expr` # does not change the program's current placement in the sutbtrees. self.stack.append(parent_tree) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 494d7fbd..77c10c86 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -44,6 +44,7 @@ ) from pydough.types import BooleanType, Int64Type, UnknownType +from .hybrid_decorrelater import run_hybrid_decorrelation from .hybrid_tree import ( ConnectionType, HybridBackRefExpr, @@ -648,7 +649,7 @@ def translate_partition( Returns: The TranslationOutput payload containing access to the aggregated - child corresponding tot he partition data. + child corresponding to the partition data. """ expressions: dict[HybridExpr, ColumnReference] = {} # Account for the fact that the PARTITION is stepping down a level, @@ -998,10 +999,11 @@ def convert_ast_to_relational( final_terms: set[str] = node.calc_terms node = translator.preprocess_root(node) - # Convert the QDAG node to the hybrid form, then invoke the relational - # conversion procedure. The first element in the returned list is the - # final rel node. + # Convert the QDAG node to the hybrid form, decorrelate it, then invoke + # the relational conversion procedure. The first element in the returned + # list is the final rel node. hybrid: HybridTree = HybridTranslator(configs).make_hybrid_tree(node, None) + run_hybrid_decorrelation(hybrid) renamings: dict[str, str] = hybrid.pipeline[-1].renamings output: TranslationOutput = translator.rel_translation( None, hybrid, len(hybrid.pipeline) - 1 diff --git a/pydough/pydough_operators/base_operator.py b/pydough/pydough_operators/base_operator.py index fe12629e..82ccfacf 100644 --- a/pydough/pydough_operators/base_operator.py +++ b/pydough/pydough_operators/base_operator.py @@ -80,3 +80,15 @@ def to_string(self, arg_strings: list[str]) -> str: Returns: The string representation of the operator called on its arguments. """ + + @abstractmethod + def equals(self, other: object) -> bool: + """ + Returns whether this operator is equal to another operator. + """ + + def __eq__(self, other: object) -> bool: + return self.equals(other) + + def __hash__(self) -> int: + return hash(repr(self)) diff --git a/pydough/types/struct_type.py b/pydough/types/struct_type.py index 94b3222a..7b3ef680 100644 --- a/pydough/types/struct_type.py +++ b/pydough/types/struct_type.py @@ -109,7 +109,7 @@ def parse_struct_body( except PyDoughTypeException: pass - # Otherwise, iterate across all commas int he right hand side + # Otherwise, iterate across all commas in the right hand side # that are candidate splitting locations between a PyDough # type and a suffix that is a valid list of fields. if field_type is None: diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index c5956c80..bcc6d73d 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,8 +1,10 @@ ROOT(columns=[('name', name), ('n_prefix_nations', n_prefix_nations)], orderings=[(ordering_1):asc_first]) PROJECT(columns={'n_prefix_nations': n_prefix_nations, 'name': name, 'ordering_1': name}) PROJECT(columns={'n_prefix_nations': DEFAULT_TO(agg_0, 0:int64), 'name': name}) - JOIN(conditions=[t0.key == t1.region_key], types=['left'], columns={'agg_0': t1.agg_0, 'name': t0.name}, correl_name='corr1') + JOIN(conditions=[t0.key == t1.key], types=['left'], columns={'agg_0': t1.agg_0, 'name': t0.name}) SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) - AGGREGATE(keys={'region_key': region_key}, aggregations={'agg_0': COUNT()}) - FILTER(condition=SLICE(name, None:unknown, 1:int64, None:unknown) == SLICE(corr1.name, None:unknown, 1:int64, None:unknown), columns={'region_key': region_key}) - SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) + AGGREGATE(keys={'key': key}, aggregations={'agg_0': COUNT()}) + FILTER(condition=SLICE(name_3, None:unknown, 1:int64, None:unknown) == SLICE(name, None:unknown, 1:int64, None:unknown), columns={'key': key}) + JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'key': t0.key, 'name': t0.name, 'name_3': t1.name}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 0dc3f6a6..960b0c69 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,22 +1,25 @@ ROOT(columns=[('n', n)], orderings=[]) PROJECT(columns={'n': agg_1}) - JOIN(conditions=[True:bool], types=['left'], columns={'agg_1': t1.agg_1}, correl_name='corr4') - PROJECT(columns={'avg_price': agg_0}) - AGGREGATE(keys={}, aggregations={'agg_0': AVG(retail_price)}) - SCAN(table=tpch.PART, columns={'retail_price': p_retailprice}) + JOIN(conditions=[True:bool], types=['left'], columns={'agg_1': t1.agg_1}) + AGGREGATE(keys={}, aggregations={}) + SCAN(table=tpch.PART, columns={'brand': p_brand}) AGGREGATE(keys={}, aggregations={'agg_1': COUNT()}) FILTER(condition=True:bool, columns={'account_balance': account_balance}) - JOIN(conditions=[t0.key == t1.supplier_key], types=['semi'], columns={'account_balance': t0.account_balance}, correl_name='corr3') - PROJECT(columns={'account_balance': account_balance, 'avg_price': agg_0, 'key': key}) - JOIN(conditions=[t0.key == t1.supplier_key], types=['left'], columns={'account_balance': t0.account_balance, 'agg_0': t1.agg_0, 'key': t0.key}) - FILTER(condition=nation_key == 19:int64, columns={'account_balance': account_balance, 'key': key}) - SCAN(table=tpch.SUPPLIER, columns={'account_balance': s_acctbal, 'key': s_suppkey, 'nation_key': s_nationkey}) + JOIN(conditions=[t0.key == t1.supplier_key], types=['semi'], columns={'account_balance': t0.account_balance}, correl_name='corr4') + PROJECT(columns={'account_balance': account_balance, 'avg_price': avg_price, 'avg_price_3': agg_0, 'key': key}) + JOIN(conditions=[t0.key == t1.supplier_key], types=['left'], columns={'account_balance': t0.account_balance, 'agg_0': t1.agg_0, 'avg_price': t0.avg_price, 'key': t0.key}) + FILTER(condition=nation_key == 19:int64, columns={'account_balance': account_balance, 'avg_price': avg_price, 'key': key}) + JOIN(conditions=[True:bool], types=['inner'], columns={'account_balance': t1.account_balance, 'avg_price': t0.avg_price, 'key': t1.key, 'nation_key': t1.nation_key}) + PROJECT(columns={'avg_price': agg_0}) + AGGREGATE(keys={}, aggregations={'agg_0': AVG(retail_price)}) + SCAN(table=tpch.PART, columns={'retail_price': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'account_balance': s_acctbal, 'key': s_suppkey, 'nation_key': s_nationkey}) AGGREGATE(keys={'supplier_key': supplier_key}, aggregations={'agg_0': AVG(retail_price)}) JOIN(conditions=[t0.part_key == t1.key], types=['inner'], columns={'retail_price': t1.retail_price, 'supplier_key': t0.supplier_key}) SCAN(table=tpch.PARTSUPP, columns={'part_key': ps_partkey, 'supplier_key': ps_suppkey}) SCAN(table=tpch.PART, columns={'key': p_partkey, 'retail_price': p_retailprice}) FILTER(condition=True:bool, columns={'supplier_key': supplier_key}) - JOIN(conditions=[t0.part_key == t1.key], types=['semi'], columns={'supplier_key': t0.supplier_key}, correl_name='corr2') + JOIN(conditions=[t0.part_key == t1.key], types=['semi'], columns={'supplier_key': t0.supplier_key}, correl_name='corr3') SCAN(table=tpch.PARTSUPP, columns={'part_key': ps_partkey, 'supplier_key': ps_suppkey, 'supplycost': ps_supplycost}) - FILTER(condition=container == 'LG DRUM':string & retail_price < corr2.supplycost * 1.5:float64 & retail_price < corr3.avg_price & retail_price < corr4.avg_price * 0.85:float64, columns={'key': key}) + FILTER(condition=container == 'LG DRUM':string & retail_price < corr3.supplycost * 1.5:float64 & retail_price < corr4.avg_price_3 & retail_price < corr4.avg_price * 0.85:float64, columns={'key': key}) SCAN(table=tpch.PART, columns={'container': p_container, 'key': p_partkey, 'retail_price': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 4e532f3c..aad7c616 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -2,8 +2,10 @@ ROOT(columns=[('fullname', fullname)], orderings=[(ordering_0):asc_first]) PROJECT(columns={'fullname': fullname, 'ordering_0': fullname}) PROJECT(columns={'fullname': fname}) FILTER(condition=True:bool, columns={'fname': fname}) - JOIN(conditions=[t0.region_key == t1.key], types=['inner'], columns={'fname': t1.fname}, correl_name='corr1') - PROJECT(columns={'lname': LOWER(name), 'region_key': region_key}) - SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(name), corr1.lname), 'key': key}) - SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + JOIN(conditions=[t0.key == t1.key], types=['inner'], columns={'fname': t1.fname}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey}) + PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(name_3), lname), 'key': key}) + JOIN(conditions=[t0.region_key == t1.key], types=['inner'], columns={'key': t0.key, 'lname': t0.lname, 'name_3': t1.name}) + PROJECT(columns={'key': key, 'lname': LOWER(name), 'region_key': region_key}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name, 'region_key': n_regionkey}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index ab36ffd4..db0b5291 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -2,13 +2,18 @@ ROOT(columns=[('n', n)], orderings=[]) PROJECT(columns={'n': DEFAULT_TO(agg_0, 0:int64)}) AGGREGATE(keys={}, aggregations={'agg_0': SUM(n_above_avg)}) PROJECT(columns={'n_above_avg': DEFAULT_TO(agg_2, 0:int64)}) - JOIN(conditions=[t0.customer_key == t1.customer_key & t0.order_date == t1.order_date], types=['left'], columns={'agg_2': t1.agg_2}, correl_name='corr1') - PROJECT(columns={'customer_key': customer_key, 'order_date': order_date, 'total_price': DEFAULT_TO(agg_1, 0:int64)}) - FILTER(condition=DEFAULT_TO(agg_0, 0:int64) > 1:int64, columns={'agg_1': agg_1, 'customer_key': customer_key, 'order_date': order_date}) - AGGREGATE(keys={'customer_key': customer_key, 'order_date': order_date}, aggregations={'agg_0': COUNT(), 'agg_1': SUM(total_price)}) - FILTER(condition=YEAR(order_date) == 1993:int64, columns={'customer_key': customer_key, 'order_date': order_date, 'total_price': total_price}) - SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'order_date': o_orderdate, 'total_price': o_totalprice}) + JOIN(conditions=[t0.customer_key == t1.customer_key & t0.order_date == t1.order_date], types=['left'], columns={'agg_2': t1.agg_2}) + FILTER(condition=DEFAULT_TO(agg_0, 0:int64) > 1:int64, columns={'customer_key': customer_key, 'order_date': order_date}) + AGGREGATE(keys={'customer_key': customer_key, 'order_date': order_date}, aggregations={'agg_0': COUNT()}) + FILTER(condition=YEAR(order_date) == 1993:int64, columns={'customer_key': customer_key, 'order_date': order_date}) + SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'order_date': o_orderdate}) AGGREGATE(keys={'customer_key': customer_key, 'order_date': order_date}, aggregations={'agg_2': COUNT()}) - FILTER(condition=total_price >= 0.5:float64 * corr1.total_price, columns={'customer_key': customer_key, 'order_date': order_date}) - FILTER(condition=YEAR(order_date) == 1993:int64, columns={'customer_key': customer_key, 'order_date': order_date, 'total_price': total_price}) - SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'order_date': o_orderdate, 'total_price': o_totalprice}) + FILTER(condition=total_price_3 >= 0.5:float64 * total_price, columns={'customer_key': customer_key, 'order_date': order_date}) + FILTER(condition=YEAR(order_date_2) == 1993:int64, columns={'customer_key': customer_key, 'order_date': order_date, 'total_price': total_price, 'total_price_3': total_price_3}) + JOIN(conditions=[True:bool], types=['inner'], columns={'customer_key': t0.customer_key, 'order_date': t0.order_date, 'order_date_2': t1.order_date, 'total_price': t0.total_price, 'total_price_3': t1.total_price}) + PROJECT(columns={'customer_key': customer_key, 'order_date': order_date, 'total_price': DEFAULT_TO(agg_1, 0:int64)}) + FILTER(condition=DEFAULT_TO(agg_0, 0:int64) > 1:int64, columns={'agg_1': agg_1, 'customer_key': customer_key, 'order_date': order_date}) + AGGREGATE(keys={'customer_key': customer_key, 'order_date': order_date}, aggregations={'agg_0': COUNT(), 'agg_1': SUM(total_price)}) + FILTER(condition=YEAR(order_date) == 1993:int64, columns={'customer_key': customer_key, 'order_date': order_date, 'total_price': total_price}) + SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'order_date': o_orderdate, 'total_price': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'order_date': o_orderdate, 'total_price': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index a273084b..a65ac794 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,12 +1,16 @@ -ROOT(columns=[('name', name_7), ('n_super_cust', n_super_cust)], orderings=[(ordering_1):desc_last]) - PROJECT(columns={'n_super_cust': n_super_cust, 'name_7': name_3, 'ordering_1': ordering_1}) +ROOT(columns=[('name', name_14), ('n_super_cust', n_super_cust)], orderings=[(ordering_1):desc_last]) + PROJECT(columns={'n_super_cust': n_super_cust, 'name_14': name_3, 'ordering_1': ordering_1}) LIMIT(limit=Literal(value=5, type=Int64Type()), columns={'n_super_cust': n_super_cust, 'name_3': name_3, 'ordering_1': ordering_1}, orderings=[(ordering_1):desc_last]) PROJECT(columns={'n_super_cust': n_super_cust, 'name_3': name_3, 'ordering_1': n_super_cust}) PROJECT(columns={'n_super_cust': DEFAULT_TO(agg_0, 0:int64), 'name_3': name_3}) - JOIN(conditions=[t0.key_2 == t1.nation_key], types=['left'], columns={'agg_0': t1.agg_0, 'name_3': t0.name_3}, correl_name='corr4') - JOIN(conditions=[t0.nation_key == t1.key], types=['inner'], columns={'account_balance': t0.account_balance, 'key_2': t1.key, 'name_3': t1.name}) - SCAN(table=tpch.SUPPLIER, columns={'account_balance': s_acctbal, 'nation_key': s_nationkey}) + JOIN(conditions=[t0.key_2 == t1.key_5 & t0.key == t1.key], types=['left'], columns={'agg_0': t1.agg_0, 'name_3': t0.name_3}) + JOIN(conditions=[t0.nation_key == t1.key], types=['inner'], columns={'key': t0.key, 'key_2': t1.key, 'name_3': t1.name}) + SCAN(table=tpch.SUPPLIER, columns={'key': s_suppkey, 'nation_key': s_nationkey}) SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) - AGGREGATE(keys={'nation_key': nation_key}, aggregations={'agg_0': COUNT()}) - FILTER(condition=acctbal > corr4.account_balance, columns={'nation_key': nation_key}) - SCAN(table=tpch.CUSTOMER, columns={'acctbal': c_acctbal, 'nation_key': c_nationkey}) + AGGREGATE(keys={'key': key, 'key_5': key_5}, aggregations={'agg_0': COUNT()}) + FILTER(condition=acctbal > account_balance, columns={'key': key, 'key_5': key_5}) + JOIN(conditions=[t0.key_5 == t1.nation_key], types=['inner'], columns={'account_balance': t0.account_balance, 'acctbal': t1.acctbal, 'key': t0.key, 'key_5': t0.key_5}) + JOIN(conditions=[t0.nation_key == t1.key], types=['inner'], columns={'account_balance': t0.account_balance, 'key': t0.key, 'key_5': t1.key}) + SCAN(table=tpch.SUPPLIER, columns={'account_balance': s_acctbal, 'key': s_suppkey, 'nation_key': s_nationkey}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'acctbal': c_acctbal, 'nation_key': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 529b06fd..b5d64b7c 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,12 +1,17 @@ -ROOT(columns=[('name', name_7), ('n_selected_custs', n_selected_custs)], orderings=[(ordering_1):asc_first]) - PROJECT(columns={'n_selected_custs': n_selected_custs, 'name_7': name_6, 'ordering_1': ordering_1}) - PROJECT(columns={'n_selected_custs': n_selected_custs, 'name_6': name_6, 'ordering_1': name_6}) - PROJECT(columns={'n_selected_custs': DEFAULT_TO(agg_0, 0:int64), 'name_6': name_3}) - JOIN(conditions=[t0.key_2 == t1.nation_key], types=['left'], columns={'agg_0': t1.agg_0, 'name_3': t0.name_3}, correl_name='corr4') - JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'key_2': t1.key, 'name': t0.name, 'name_3': t1.name}) - FILTER(condition=NOT(STARTSWITH(name, 'A':string)), columns={'key': key, 'name': name}) +ROOT(columns=[('name', name_12), ('n_selected_custs', n_selected_custs)], orderings=[(ordering_1):asc_first]) + PROJECT(columns={'n_selected_custs': n_selected_custs, 'name_12': name_11, 'ordering_1': ordering_1}) + PROJECT(columns={'n_selected_custs': n_selected_custs, 'name_11': name_11, 'ordering_1': name_11}) + PROJECT(columns={'n_selected_custs': DEFAULT_TO(agg_0, 0:int64), 'name_11': name_3}) + JOIN(conditions=[t0.key_2 == t1.key_5 & t0.key == t1.key], types=['left'], columns={'agg_0': t1.agg_0, 'name_3': t0.name_3}) + JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'key': t0.key, 'key_2': t1.key, 'name_3': t1.name}) + FILTER(condition=NOT(STARTSWITH(name, 'A':string)), columns={'key': key}) SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name, 'region_key': n_regionkey}) - AGGREGATE(keys={'nation_key': nation_key}, aggregations={'agg_0': COUNT()}) - FILTER(condition=SLICE(comment, None:unknown, 1:int64, None:unknown) == LOWER(SLICE(corr4.name, None:unknown, 1:int64, None:unknown)), columns={'nation_key': nation_key}) - SCAN(table=tpch.CUSTOMER, columns={'comment': c_comment, 'nation_key': c_nationkey}) + AGGREGATE(keys={'key': key, 'key_5': key_5}, aggregations={'agg_0': COUNT()}) + FILTER(condition=SLICE(comment_7, None:unknown, 1:int64, None:unknown) == LOWER(SLICE(name, None:unknown, 1:int64, None:unknown)), columns={'key': key, 'key_5': key_5}) + JOIN(conditions=[t0.key_5 == t1.nation_key], types=['inner'], columns={'comment_7': t1.comment, 'key': t0.key, 'key_5': t0.key_5, 'name': t0.name}) + JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'key': t0.key, 'key_5': t1.key, 'name': t0.name}) + FILTER(condition=NOT(STARTSWITH(name, 'A':string)), columns={'key': key, 'name': name}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'region_key': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'comment': c_comment, 'nation_key': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index c1d388c4..670d0880 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -2,16 +2,27 @@ ROOT(columns=[('n', n)], orderings=[]) PROJECT(columns={'n': agg_0}) AGGREGATE(keys={}, aggregations={'agg_0': COUNT()}) FILTER(condition=domestic, columns={'account_balance': account_balance}) - JOIN(conditions=[t0.nation_key_11 == t1.key], types=['left'], columns={'account_balance': t0.account_balance, 'domestic': t1.domestic}, correl_name='corr13') - JOIN(conditions=[t0.supplier_key == t1.key], types=['inner'], columns={'account_balance': t1.account_balance, 'name': t0.name, 'nation_key_11': t1.nation_key}) - JOIN(conditions=[t0.key_5 == t1.order_key], types=['inner'], columns={'name': t0.name, 'supplier_key': t1.supplier_key}) - FILTER(condition=YEAR(order_date) == 1998:int64 & MONTH(order_date) == 6:int64, columns={'key_5': key_5, 'name': name}) - JOIN(conditions=[t0.key_2 == t1.customer_key], types=['inner'], columns={'key_5': t1.key, 'name': t0.name, 'order_date': t1.order_date}) - JOIN(conditions=[t0.key == t1.nation_key], types=['inner'], columns={'key_2': t1.key, 'name': t0.name}) - SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) + JOIN(conditions=[t0.key_9 == t1.key_21 & t0.order_key == t1.order_key & t0.line_number == t1.line_number & t0.key_5 == t1.key_17 & t0.key_2 == t1.key_14 & t0.key == t1.key], types=['left'], columns={'account_balance': t0.account_balance, 'domestic': t1.domestic}) + JOIN(conditions=[t0.supplier_key == t1.key], types=['inner'], columns={'account_balance': t1.account_balance, 'key': t0.key, 'key_2': t0.key_2, 'key_5': t0.key_5, 'key_9': t1.key, 'line_number': t0.line_number, 'order_key': t0.order_key}) + JOIN(conditions=[t0.key_5 == t1.order_key], types=['inner'], columns={'key': t0.key, 'key_2': t0.key_2, 'key_5': t0.key_5, 'line_number': t1.line_number, 'order_key': t1.order_key, 'supplier_key': t1.supplier_key}) + FILTER(condition=YEAR(order_date) == 1998:int64 & MONTH(order_date) == 6:int64, columns={'key': key, 'key_2': key_2, 'key_5': key_5}) + JOIN(conditions=[t0.key_2 == t1.customer_key], types=['inner'], columns={'key': t0.key, 'key_2': t0.key_2, 'key_5': t1.key, 'order_date': t1.order_date}) + JOIN(conditions=[t0.key == t1.nation_key], types=['inner'], columns={'key': t0.key, 'key_2': t1.key}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'key': c_custkey, 'nation_key': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'key': o_orderkey, 'order_date': o_orderdate}) - SCAN(table=tpch.LINEITEM, columns={'order_key': l_orderkey, 'supplier_key': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'account_balance': s_acctbal, 'key': s_suppkey, 'nation_key': s_nationkey}) - PROJECT(columns={'domestic': name == corr13.name, 'key': key}) - SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) + SCAN(table=tpch.LINEITEM, columns={'line_number': l_linenumber, 'order_key': l_orderkey, 'supplier_key': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'account_balance': s_acctbal, 'key': s_suppkey}) + PROJECT(columns={'domestic': name_27 == name, 'key': key, 'key_14': key_14, 'key_17': key_17, 'key_21': key_21, 'line_number': line_number, 'order_key': order_key}) + JOIN(conditions=[t0.nation_key_23 == t1.key], types=['inner'], columns={'key': t0.key, 'key_14': t0.key_14, 'key_17': t0.key_17, 'key_21': t0.key_21, 'line_number': t0.line_number, 'name': t0.name, 'name_27': t1.name, 'order_key': t0.order_key}) + JOIN(conditions=[t0.supplier_key == t1.key], types=['inner'], columns={'key': t0.key, 'key_14': t0.key_14, 'key_17': t0.key_17, 'key_21': t1.key, 'line_number': t0.line_number, 'name': t0.name, 'nation_key_23': t1.nation_key, 'order_key': t0.order_key}) + JOIN(conditions=[t0.key_17 == t1.order_key], types=['inner'], columns={'key': t0.key, 'key_14': t0.key_14, 'key_17': t0.key_17, 'line_number': t1.line_number, 'name': t0.name, 'order_key': t1.order_key, 'supplier_key': t1.supplier_key}) + FILTER(condition=YEAR(order_date) == 1998:int64 & MONTH(order_date) == 6:int64, columns={'key': key, 'key_14': key_14, 'key_17': key_17, 'name': name}) + JOIN(conditions=[t0.key_14 == t1.customer_key], types=['inner'], columns={'key': t0.key, 'key_14': t0.key_14, 'key_17': t1.key, 'name': t0.name, 'order_date': t1.order_date}) + JOIN(conditions=[t0.key == t1.nation_key], types=['inner'], columns={'key': t0.key, 'key_14': t1.key, 'name': t0.name}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) + SCAN(table=tpch.CUSTOMER, columns={'key': c_custkey, 'nation_key': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'key': o_orderkey, 'order_date': o_orderdate}) + SCAN(table=tpch.LINEITEM, columns={'line_number': l_linenumber, 'order_key': l_orderkey, 'supplier_key': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'key': s_suppkey, 'nation_key': s_nationkey}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index 57d2dfdf..2bbb01bc 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -1,11 +1,13 @@ ROOT(columns=[('name', name), ('n_nations', n_nations)], orderings=[(ordering_1):asc_first]) PROJECT(columns={'n_nations': n_nations, 'name': name, 'ordering_1': name}) PROJECT(columns={'n_nations': DEFAULT_TO(agg_0, 0:int64), 'name': name}) - JOIN(conditions=[t0.key == t1.region_key], types=['left'], columns={'agg_0': t1.agg_0, 'name': t0.name}, correl_name='corr1') + JOIN(conditions=[t0.key == t1.key], types=['left'], columns={'agg_0': t1.agg_0, 'name': t0.name}) SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) - AGGREGATE(keys={'region_key': region_key}, aggregations={'agg_0': COUNT()}) - FILTER(condition=True:bool, columns={'region_key': region_key}) - JOIN(conditions=[t0.key == t1.nation_key], types=['semi'], columns={'region_key': t0.region_key}) - SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'region_key': n_regionkey}) - FILTER(condition=SLICE(comment, None:unknown, 2:int64, None:unknown) == LOWER(SLICE(corr1.name, None:unknown, 2:int64, None:unknown)), columns={'nation_key': nation_key}) + AGGREGATE(keys={'key': key}, aggregations={'agg_0': COUNT()}) + FILTER(condition=True:bool, columns={'key': key}) + JOIN(conditions=[t0.key_2 == t1.nation_key], types=['semi'], columns={'key': t0.key}, correl_name='corr4') + JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'key': t0.key, 'key_2': t1.key, 'name': t0.name}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'region_key': n_regionkey}) + FILTER(condition=SLICE(comment, None:unknown, 2:int64, None:unknown) == LOWER(SLICE(corr4.name, None:unknown, 2:int64, None:unknown)), columns={'nation_key': nation_key}) SCAN(table=tpch.CUSTOMER, columns={'comment': c_comment, 'nation_key': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index 0a85a6fa..a6829877 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,8 +1,10 @@ ROOT(columns=[('name', name), ('n_prefix_nations', n_prefix_nations)], orderings=[]) PROJECT(columns={'n_prefix_nations': DEFAULT_TO(agg_0, 0:int64), 'name': name}) FILTER(condition=True:bool, columns={'agg_0': agg_0, 'name': name}) - JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'agg_0': t1.agg_0, 'name': t0.name}, correl_name='corr1') + JOIN(conditions=[t0.key == t1.key], types=['inner'], columns={'agg_0': t1.agg_0, 'name': t0.name}) SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) - AGGREGATE(keys={'region_key': region_key}, aggregations={'agg_0': COUNT()}) - FILTER(condition=SLICE(name, None:unknown, 1:int64, None:unknown) == SLICE(corr1.name, None:unknown, 1:int64, None:unknown), columns={'region_key': region_key}) - SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) + AGGREGATE(keys={'key': key}, aggregations={'agg_0': COUNT()}) + FILTER(condition=SLICE(name_3, None:unknown, 1:int64, None:unknown) == SLICE(name, None:unknown, 1:int64, None:unknown), columns={'key': key}) + JOIN(conditions=[t0.key == t1.region_key], types=['inner'], columns={'key': t0.key, 'name': t0.name, 'name_3': t1.name}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_8.txt b/tests/test_plan_refsols/correl_8.txt index 87bcc66e..8da228f0 100644 --- a/tests/test_plan_refsols/correl_8.txt +++ b/tests/test_plan_refsols/correl_8.txt @@ -1,7 +1,9 @@ ROOT(columns=[('name', name), ('rname', rname)], orderings=[(ordering_0):asc_first]) PROJECT(columns={'name': name, 'ordering_0': name, 'rname': rname}) - PROJECT(columns={'name': name, 'rname': name_4}) - JOIN(conditions=[t0.region_key == t1.key], types=['left'], columns={'name': t0.name, 'name_4': t1.name}, correl_name='corr1') - SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) - FILTER(condition=SLICE(name, None:unknown, 1:int64, None:unknown) == SLICE(corr1.name, None:unknown, 1:int64, None:unknown), columns={'key': key, 'name': name}) - SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + PROJECT(columns={'name': name, 'rname': name_3}) + JOIN(conditions=[t0.key == t1.key], types=['left'], columns={'name': t0.name, 'name_3': t1.name_3}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) + FILTER(condition=SLICE(name_3, None:unknown, 1:int64, None:unknown) == SLICE(name, None:unknown, 1:int64, None:unknown), columns={'key': key, 'name_3': name_3}) + JOIN(conditions=[t0.region_key == t1.key], types=['inner'], columns={'key': t0.key, 'name': t0.name, 'name_3': t1.name}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name, 'region_key': n_regionkey}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) diff --git a/tests/test_plan_refsols/correl_9.txt b/tests/test_plan_refsols/correl_9.txt index 6a7a6c13..449cceea 100644 --- a/tests/test_plan_refsols/correl_9.txt +++ b/tests/test_plan_refsols/correl_9.txt @@ -1,8 +1,10 @@ ROOT(columns=[('name', name), ('rname', rname)], orderings=[(ordering_0):asc_first]) PROJECT(columns={'name': name, 'ordering_0': name, 'rname': rname}) - PROJECT(columns={'name': name, 'rname': name_4}) - FILTER(condition=True:bool, columns={'name': name, 'name_4': name_4}) - JOIN(conditions=[t0.region_key == t1.key], types=['inner'], columns={'name': t0.name, 'name_4': t1.name}, correl_name='corr1') - SCAN(table=tpch.NATION, columns={'name': n_name, 'region_key': n_regionkey}) - FILTER(condition=SLICE(name, None:unknown, 1:int64, None:unknown) == SLICE(corr1.name, None:unknown, 1:int64, None:unknown), columns={'key': key, 'name': name}) - SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + PROJECT(columns={'name': name, 'rname': name_3}) + FILTER(condition=True:bool, columns={'name': name, 'name_3': name_3}) + JOIN(conditions=[t0.key == t1.key], types=['inner'], columns={'name': t0.name, 'name_3': t1.name_3}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name}) + FILTER(condition=SLICE(name_3, None:unknown, 1:int64, None:unknown) == SLICE(name, None:unknown, 1:int64, None:unknown), columns={'key': key, 'name_3': name_3}) + JOIN(conditions=[t0.region_key == t1.key], types=['inner'], columns={'key': t0.key, 'name': t0.name, 'name_3': t1.name}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name, 'region_key': n_regionkey}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index fbd35207..7cb9925b 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,21 +1,26 @@ ROOT(columns=[('N_NAME', N_NAME), ('REVENUE', REVENUE)], orderings=[(ordering_1):desc_last]) PROJECT(columns={'N_NAME': N_NAME, 'REVENUE': REVENUE, 'ordering_1': REVENUE}) PROJECT(columns={'N_NAME': name, 'REVENUE': DEFAULT_TO(agg_0, 0:int64)}) - JOIN(conditions=[t0.key == t1.nation_key], types=['left'], columns={'agg_0': t1.agg_0, 'name': t0.name}, correl_name='corr10') + JOIN(conditions=[t0.key == t1.key], types=['left'], columns={'agg_0': t1.agg_0, 'name': t0.name}) FILTER(condition=name_3 == 'ASIA':string, columns={'key': key, 'name': name}) JOIN(conditions=[t0.region_key == t1.key], types=['left'], columns={'key': t0.key, 'name': t0.name, 'name_3': t1.name}) SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name, 'region_key': n_regionkey}) SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) - AGGREGATE(keys={'nation_key': nation_key}, aggregations={'agg_0': SUM(value)}) - PROJECT(columns={'nation_key': nation_key, 'value': extended_price * 1:int64 - discount}) - FILTER(condition=name_9 == corr10.name, columns={'discount': discount, 'extended_price': extended_price, 'nation_key': nation_key}) - JOIN(conditions=[t0.supplier_key == t1.key], types=['left'], columns={'discount': t0.discount, 'extended_price': t0.extended_price, 'name_9': t1.name_9, 'nation_key': t0.nation_key}) - JOIN(conditions=[t0.key_5 == t1.order_key], types=['inner'], columns={'discount': t1.discount, 'extended_price': t1.extended_price, 'nation_key': t0.nation_key, 'supplier_key': t1.supplier_key}) - FILTER(condition=order_date >= datetime.date(1994, 1, 1):date & order_date < datetime.date(1995, 1, 1):date, columns={'key_5': key_5, 'nation_key': nation_key}) - JOIN(conditions=[t0.key == t1.customer_key], types=['inner'], columns={'key_5': t1.key, 'nation_key': t0.nation_key, 'order_date': t1.order_date}) - SCAN(table=tpch.CUSTOMER, columns={'key': c_custkey, 'nation_key': c_nationkey}) + AGGREGATE(keys={'key': key}, aggregations={'agg_0': SUM(value)}) + PROJECT(columns={'key': key, 'value': extended_price * 1:int64 - discount}) + FILTER(condition=name_15 == name, columns={'discount': discount, 'extended_price': extended_price, 'key': key}) + JOIN(conditions=[t0.supplier_key == t1.key], types=['left'], columns={'discount': t0.discount, 'extended_price': t0.extended_price, 'key': t0.key, 'name': t0.name, 'name_15': t1.name_15}) + JOIN(conditions=[t0.key_11 == t1.order_key], types=['inner'], columns={'discount': t1.discount, 'extended_price': t1.extended_price, 'key': t0.key, 'name': t0.name, 'supplier_key': t1.supplier_key}) + FILTER(condition=order_date >= datetime.date(1994, 1, 1):date & order_date < datetime.date(1995, 1, 1):date, columns={'key': key, 'key_11': key_11, 'name': name}) + JOIN(conditions=[t0.key_8 == t1.customer_key], types=['inner'], columns={'key': t0.key, 'key_11': t1.key, 'name': t0.name, 'order_date': t1.order_date}) + JOIN(conditions=[t0.key == t1.nation_key], types=['inner'], columns={'key': t0.key, 'key_8': t1.key, 'name': t0.name}) + FILTER(condition=name_6 == 'ASIA':string, columns={'key': key, 'name': name}) + JOIN(conditions=[t0.region_key == t1.key], types=['left'], columns={'key': t0.key, 'name': t0.name, 'name_6': t1.name}) + SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name, 'region_key': n_regionkey}) + SCAN(table=tpch.REGION, columns={'key': r_regionkey, 'name': r_name}) + SCAN(table=tpch.CUSTOMER, columns={'key': c_custkey, 'nation_key': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'customer_key': o_custkey, 'key': o_orderkey, 'order_date': o_orderdate}) SCAN(table=tpch.LINEITEM, columns={'discount': l_discount, 'extended_price': l_extendedprice, 'order_key': l_orderkey, 'supplier_key': l_suppkey}) - JOIN(conditions=[t0.nation_key == t1.key], types=['inner'], columns={'key': t0.key, 'name_9': t1.name}) + JOIN(conditions=[t0.nation_key == t1.key], types=['inner'], columns={'key': t0.key, 'name_15': t1.name}) SCAN(table=tpch.SUPPLIER, columns={'key': s_suppkey, 'nation_key': s_nationkey}) SCAN(table=tpch.NATION, columns={'key': n_nationkey, 'name': n_name})