Skip to content

Commit

Permalink
started mass renaming and addition of the ancestral mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
knassre-bodo committed Feb 11, 2025
1 parent 0482148 commit 096c752
Show file tree
Hide file tree
Showing 32 changed files with 404 additions and 330 deletions.
33 changes: 16 additions & 17 deletions pydough/conversion/hybrid_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

__all__ = [
"HybridBackRefExpr",
"HybridCalc",
"HybridCalculate",
"HybridChildRefExpr",
"HybridCollation",
"HybridCollectionAccess",
Expand Down Expand Up @@ -39,7 +39,7 @@
)
from pydough.qdag import (
BackReferenceExpression,
Calc,
Calculate,
ChildOperator,
ChildOperatorChildAccess,
ChildReferenceCollection,
Expand Down Expand Up @@ -377,10 +377,10 @@ class HybridOperation:
- `terms`: mapping of names to expressions accessible from that point in
the pipeline execution.
- `renamings`: mapping of names to a new name that should be used to access
them from within `terms`. This is used when a `CALC` overrides a
term name so that future invocations of the term name use the
renamed version, while key operations like joins can still
access the original version.
them from within `terms`. This is used when a `CALCULATE`
overrides a term name so that future invocations of the term
name use the renamed version, while key operations like joins
can still access the original version.
- `orderings`: list of collation expressions that specify the order
that a hybrid operation is sorted by.
- `unique_exprs`: list of expressions that are used to uniquely identify
Expand Down Expand Up @@ -455,9 +455,9 @@ def __repr__(self):
return "PARTITION_CHILD[*]"


class HybridCalc(HybridOperation):
class HybridCalculate(HybridOperation):
"""
Class for HybridOperation corresponding to a CALC operation.
Class for HybridOperation corresponding to a CALCULATE operation.
"""

def __init__(
Expand All @@ -483,11 +483,10 @@ def __init__(
terms[used_name] = expr
renamings[name] = used_name
super().__init__(terms, renamings, orderings, predecessor.unique_exprs)
self.calc = Calc
self.new_expressions = new_expressions

def __repr__(self):
return f"CALC[{self.new_expressions}]"
return f"CALCULATE[{self.new_expressions}]"


class HybridFilter(HybridOperation):
Expand Down Expand Up @@ -1213,14 +1212,14 @@ def populate_children(
child_idx_mapping: dict[int, int],
) -> None:
"""
Helper utility that takes any children of a child operator (CALC,
Helper utility that takes any children of a child operator (CALCULATE,
WHERE, etc.) and builds the corresponding HybridTree subtree,
where the parent of the subtree's root is absent instead of the
current level, and inserts the corresponding HybridConnection node.
Args:
`hybrid`: the HybridTree having children added to it.
`child_operator`: the collection QDAG node (CALC, WHERE, etc.)
`child_operator`: the collection QDAG node (CALCULATE, WHERE, etc.)
containing the children.
`child_idx_mapping`: a mapping of indices of children of the
original `child_operator` to the indices of children of the hybrid
Expand Down Expand Up @@ -1252,7 +1251,7 @@ def populate_children(
self.identify_connection_types(
col.expr, child_idx, reference_types
)
case Calc():
case Calculate():
for expr in child_operator.calc_term_values.values():
self.identify_connection_types(expr, child_idx, reference_types)
case PartitionBy():
Expand Down Expand Up @@ -1733,7 +1732,7 @@ def process_hybrid_collations(
Returns:
A tuple containing a dictionary of new expressions for generating
a calc and a list of the new HybridCollation values.
a `CALCcULATE` and a list of the new HybridCollation values.
"""
new_expressions: dict[str, HybridExpr] = {}
hybrid_orderings: list[HybridCollation] = []
Expand Down Expand Up @@ -1786,7 +1785,7 @@ def make_hybrid_tree(
)
hybrid.add_successor(successor_hybrid)
return successor_hybrid
case Calc():
case Calculate():
hybrid = self.make_hybrid_tree(node.preceding_context, parent)
self.populate_children(hybrid, node, child_ref_mapping)
new_expressions: dict[str, HybridExpr] = {}
Expand All @@ -1796,7 +1795,7 @@ def make_hybrid_tree(
)
new_expressions[name] = expr
hybrid.pipeline.append(
HybridCalc(
HybridCalculate(
hybrid.pipeline[-1],
new_expressions,
hybrid.pipeline[-1].orderings,
Expand Down Expand Up @@ -1836,7 +1835,7 @@ def make_hybrid_tree(
hybrid, node.collation, child_ref_mapping
)
hybrid.pipeline.append(
HybridCalc(hybrid.pipeline[-1], new_nodes, hybrid_orderings)
HybridCalculate(hybrid.pipeline[-1], new_nodes, hybrid_orderings)
)
if isinstance(node, TopK):
hybrid.pipeline.append(
Expand Down
24 changes: 12 additions & 12 deletions pydough/conversion/relational_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
SimpleTableMetadata,
)
from pydough.qdag import (
Calc,
Calculate,
CollectionAccess,
PyDoughCollectionQDAG,
PyDoughExpressionQDAG,
Expand Down Expand Up @@ -46,7 +46,7 @@
from .hybrid_tree import (
ConnectionType,
HybridBackRefExpr,
HybridCalc,
HybridCalculate,
HybridChildRefExpr,
HybridCollation,
HybridCollectionAccess,
Expand Down Expand Up @@ -678,17 +678,17 @@ def translate_limit(
)
return TranslationOutput(out_rel, context.expressions)

def translate_calc(
def translate_calculate(
self,
node: HybridCalc,
node: HybridCalculate,
context: TranslationOutput,
) -> TranslationOutput:
"""
Converts a calc into a project on top of its child to derive additional
terms.
Converts a CALCULATE into a project on top of its child to derive
additional terms.
Args:
`node`: the node corresponding to the calc being derived.
`node`: the node corresponding to the CALCULATE being derived.
`context`: the data structure storing information used by the
conversion, such as bindings of already translated terms from
preceding contexts and the corresponding relational node.
Expand Down Expand Up @@ -849,9 +849,9 @@ def rel_translation(
case HybridPartitionChild():
assert context is not None, "Malformed HybridTree pattern."
result = self.translate_partition_child(operation, context)
case HybridCalc():
case HybridCalculate():
assert context is not None, "Malformed HybridTree pattern."
result = self.translate_calc(operation, context)
result = self.translate_calculate(operation, context)
case HybridFilter():
assert context is not None, "Malformed HybridTree pattern."
result = self.translate_filter(operation, context)
Expand All @@ -877,8 +877,8 @@ def preprocess_root(
node: PyDoughCollectionQDAG,
) -> PyDoughCollectionQDAG:
"""
Transforms the final PyDough collection by appending it with an extra CALC
containing all of the columns that are output.
Transforms the final PyDough collection by appending it with an extra
CALCULATE containing all of the columns that are output.
"""
# Fetch all of the expressions that should be kept in the final output
original_calc_terms: set[str] = node.calc_terms
Expand All @@ -889,7 +889,7 @@ def preprocess_root(
all_names.add(name)
final_terms.sort(key=lambda term: node.get_expression_position(term[0]))
children: list[PyDoughCollectionQDAG] = []
final_calc: Calc = Calc(node, children).with_terms(final_terms)
final_calc: Calculate = Calculate(node, children).with_terms(final_terms)
return final_calc


Expand Down
6 changes: 3 additions & 3 deletions pydough/exploration/explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
TableColumnMetadata,
)
from pydough.qdag import (
Calc,
Calculate,
ChildOperator,
ExpressionFunctionCall,
GlobalContext,
Expand Down Expand Up @@ -349,7 +349,7 @@ def explain_unqualified(node: UnqualifiedNode, verbose: bool) -> str:
match qualified_node:
case GlobalContext():
lines.append(
"This node is a reference to the global context for the entire graph. An operation must be done onto this node (e.g. a CALC or accessing a collection) before it can be executed."
"This node is a reference to the global context for the entire graph. An operation must be done onto this node (e.g. a CALCULATE or accessing a collection) before it can be executed."
)
case TableCollection():
collection_name = qualified_node.collection.name
Expand Down Expand Up @@ -382,7 +382,7 @@ def explain_unqualified(node: UnqualifiedNode, verbose: bool) -> str:
lines.append(f" child ${idx + 1}: {child.to_string()}")
lines.append("")
match qualified_node:
case Calc():
case Calculate():
lines.append(
"The main task of this node is to calculate the following additional expressions that are added to the terms of the collection:"
)
Expand Down
8 changes: 4 additions & 4 deletions pydough/exploration/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from pydough.unqualified import (
UnqualifiedAccess,
UnqualifiedCalc,
UnqualifiedCalculate,
UnqualifiedNode,
UnqualifiedOrderBy,
UnqualifiedPartition,
Expand Down Expand Up @@ -51,7 +51,7 @@ def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None:
return node
case (
UnqualifiedAccess()
| UnqualifiedCalc()
| UnqualifiedCalculate()
| UnqualifiedWhere()
| UnqualifiedOrderBy()
| UnqualifiedTopK()
Expand Down Expand Up @@ -281,15 +281,15 @@ def explain_term(
lines.append("")
if qualified_term.is_singular(qualified_node.starting_predecessor):
lines.append(
"This term is singular with regards to the collection, meaning it can be placed in a CALC of a collection."
"This term is singular with regards to the collection, meaning it can be placed in a CALCULATE of a collection."
)
lines.append("For example, the following is valid:")
lines.append(
f" {qualified_node.to_string()}({qualified_term.to_string()})"
)
else:
lines.append(
"This expression is plural with regards to the collection, meaning it can be placed in a CALC of a collection if it is aggregated."
"This expression is plural with regards to the collection, meaning it can be placed in a CALCULATE of a collection if it is aggregated."
)
lines.append("For example, the following is valid:")
lines.append(
Expand Down
10 changes: 5 additions & 5 deletions pydough/qdag/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ sub_collection = builder.build_child_access("region", table_collection)
child_collection = ChildOperatorChildAccess(sub_collection)
child_reference_node = builder.build_child_reference_expression([child_collection], 0, "name")

# Build a CALC node
# Build a CALCULATE node
# Equivalent PyDough code: `TPCH.Nations(region_name=region.name)`
calc_node = builder.build_calc(table_collection, [child_collection])
calc_node = calc_node.with_terms([("region_name", child_reference_node)])
calculate_node = builder.build_calc(table_collection, [child_collection])
calculate_node = calculate_node.with_terms([("region_name", child_reference_node)])

# Build a WHERE node
# Equivalent PyDough code: `TPCH.Nations.WHERE(region.name == "ASIA")`
Expand Down Expand Up @@ -127,8 +127,8 @@ count_call = builder.build_expression_function_call(
"COUNT",
[child_reference_collection_node]
)
calc_node = builder.build_calc(table_collection, [customers_child])
calc_node = calc_node.with_terms([("n_customers", count_call)])
calculate_node = builder.build_calc(table_collection, [customers_child])
calculate_node = calculate_node.with_terms([("n_customers", count_call)])

# Build a window function call node
# Equivalent PyDough code: `RANKING(by=TPCH.Nations.name, levels=1, allow_ties=True)`
Expand Down
4 changes: 2 additions & 2 deletions pydough/qdag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"AstNodeBuilder",
"BackReferenceCollection",
"BackReferenceExpression",
"Calc",
"Calculate",
"ChildAccess",
"ChildOperator",
"ChildOperatorChildAccess",
Expand Down Expand Up @@ -40,7 +40,7 @@
from .abstract_pydough_qdag import PyDoughQDAG
from .collections import (
BackReferenceCollection,
Calc,
Calculate,
ChildAccess,
ChildOperator,
ChildOperatorChildAccess,
Expand Down
8 changes: 4 additions & 4 deletions pydough/qdag/collections/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ The QDAG collections module contains the following hierarchy of collection class
- [`BackReferenceCollection`](back_reference_collection.py) (concrete): Same idea as `ChildReferenceCollection`, but on a subcollection of an ancestor collection
- [`HiddenBackReferenceCollection`](hidden_back_reference_collection.py) (concrete): Same idea as `BackReferenceCollection`, but where the back reference is hidden because it is a subcollection reference where the subcollection comes from a hidden ancestor of a compound relationship.
- [`ChildOperator`](child_operator.py) (abstract): Base class for collection QDAG nodes that need to access child contexts in order to make a child reference.
- [`Calc`](calc.py) (concrete): Operation that defines new singular expression terms in the current context and names them.
- [`Calculate`](calculate.py) (concrete): Operation that defines new singular expression terms in the current context and names them.
- [`Where`](where.py) (concrete): Operation that filters the current context based on a predicate that is a singular expression.
- [`OrderBy`](order_by.py) (concrete): Operation that sorts the current context based on 1+ singular collation expressions.
- [`TopK`](top_k.py) (concrete): Operation that sorts the current context based on 1+ singular collation expressions and filters to only keep the first `k` records.
Expand All @@ -34,7 +34,7 @@ The base QDAG collection node contains the following interface:
- `is_singular`: Method that takes in a context and returns whether the current collection is singular with regards to that context. (Note: it is assumed that `.starting_predecessor` has been called on all the arguments already).
- `starting_predecessor`: Property that finds the furthest predecessor of the curren collection.
- `verify_singular_terms`: Method that takes in a sequence of expression QDAG nodes and verifies that all of them are singular with regards to the current context (e.g. can they be used as CALC terms).
- `get_expression_position`: Method that takes in the string name of a calc term and returns its ordinal position when placed in the output.
- `get_expression_position`: Method that takes in the string name of a calculate term and returns its ordinal position when placed in the output.
- `get_term`: Method that takes in the string name of any term of the current context and returns the QDAG node for it with regards to the current context. E.g. if calling on the name of a subcollection, returns the subcollection node.
- `get_expr`: Same as `get_term` but specifically for expressions-only.
- `get_collection`: Same as `get_term` but specifically for collections-only.
Expand Down Expand Up @@ -72,7 +72,7 @@ Nations.WHERE(
├─┬─ AccessChild
│ └─── SubCollection[region]
├─── SubCollection[suppliers]
└─── Calc[supplier_name=name, nation_name=BACK(1).name]
└─── Calculate[supplier_name=name, nation_name=BACK(1).name]
```

And below is another such example:
Expand Down Expand Up @@ -101,7 +101,7 @@ PARTITION(selected_parts, name="p", by=size)(
│ └─┬─ AccessChild
│ └─┬─ SubCollection[supplier]
│ └─── SubCollection[nation]
├─┬─ Calc[size=size, n_parts_with_german_supplier=COUNT($1)]
├─┬─ Calculate[size=size, n_parts_with_german_supplier=COUNT($1)]
│ └─┬─ AccessChild
│ └─── PartitionChild[p]
└─── TopK[10, n_parts_with_german_supplier.DESC(na_pos='last')]
Expand Down
4 changes: 2 additions & 2 deletions pydough/qdag/collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
__all__ = [
"AugmentingChildOperator",
"BackReferenceCollection",
"Calc",
"Calculate",
"ChildAccess",
"ChildOperator",
"ChildOperatorChildAccess",
Expand All @@ -26,7 +26,7 @@

from .augmenting_child_operator import AugmentingChildOperator
from .back_reference_collection import BackReferenceCollection
from .calc import Calc
from .calculate import Calculate
from .child_access import ChildAccess
from .child_operator import ChildOperator
from .child_operator_child_access import ChildOperatorChildAccess
Expand Down
8 changes: 6 additions & 2 deletions pydough/qdag/collections/augmenting_child_operator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Defines an abstract subclass of ChildOperator for operations that augment their
preceding context without stepping down into another context, like CALC or
preceding context without stepping down into another context, like CALCULATE or
WHERE.
"""

Expand All @@ -10,7 +10,7 @@
from functools import cache

from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG
from pydough.qdag.expressions import CollationExpression
from pydough.qdag.expressions import CollationExpression, PyDoughExpressionQDAG

from .child_access import ChildAccess
from .child_operator import ChildOperator
Expand Down Expand Up @@ -40,6 +40,10 @@ def ancestor_context(self) -> PyDoughCollectionQDAG | None:
def preceding_context(self) -> PyDoughCollectionQDAG:
return self._preceding_context

@property
def ancestral_mapping(self) -> dict[str, PyDoughExpressionQDAG]:
return self.preceding_context.ancestral_mapping

@property
def ordering(self) -> list[CollationExpression] | None:
return self.preceding_context.ordering
Expand Down
5 changes: 5 additions & 0 deletions pydough/qdag/collections/back_reference_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from functools import cache

from pydough.qdag.errors import PyDoughQDAGException
from pydough.qdag.expressions import PyDoughExpressionQDAG

from .child_access import ChildAccess
from .collection_access import CollectionAccess
Expand Down Expand Up @@ -49,6 +50,10 @@ def __init__(
def clone_with_parent(self, new_ancestor: PyDoughCollectionQDAG) -> ChildAccess:
return BackReferenceCollection(new_ancestor, self.term_name, self.back_levels)

@property
def ancestral_mapping(self) -> dict[str, PyDoughExpressionQDAG]:
raise NotImplementedError()

@property
def back_levels(self) -> int:
"""
Expand Down
Loading

0 comments on commit 096c752

Please sign in to comment.