-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for global CALC nodes #23
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
""" | ||
TODO: add file-level docstring | ||
""" | ||
|
||
__all__ = ["GlobalCalc"] | ||
|
||
|
||
from typing import Dict, List, Tuple, Set | ||
|
||
from pydough.metadata import GraphMetadata | ||
from pydough.pydough_ast.abstract_pydough_ast import PyDoughAST | ||
from pydough.pydough_ast.errors import PyDoughASTException | ||
from pydough.pydough_ast.expressions import PyDoughExpressionAST | ||
from .collection_ast import PyDoughCollectionAST | ||
from .table_collection import TableCollection | ||
|
||
|
||
class GlobalCalc(PyDoughCollectionAST): | ||
""" | ||
The AST node implementation class representing a top-level CALC expression | ||
without a parent context. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
graph: GraphMetadata, | ||
children: List[PyDoughCollectionAST], | ||
): | ||
self._graph: GraphMetadata = graph | ||
self._children: List[PyDoughCollectionAST] = children | ||
# Not defined until with_terms is called | ||
self._calc_term_indices: Dict[str, Tuple[int, PyDoughExpressionAST]] | None = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for having an optional type as opposed to an empty dictionary? Is this because the dictionary is effectively "frozen" once initialized so this is avoid any invalid behavior? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Explained in the next section. This seems fine. |
||
None | ||
) | ||
self._all_terms: Dict[str, PyDoughAST] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Type annotation needs optional |
||
|
||
def with_terms(self, terms: List[Tuple[str, PyDoughExpressionAST]]) -> "GlobalCalc": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming this is necessary because terms can't be assigned until a later time. This is a less desirable design pattern because it makes it harder to track when the state is valid, but it's an understandable limitation. Can you write the docstring now though so we can be very clear about the explained delay between |
||
""" | ||
TODO: add function docstring | ||
""" | ||
if self._calc_term_indices is not None: | ||
raise PyDoughCollectionAST( | ||
"Cannot call `with_terms` more than once per GlobalCalc node" | ||
) | ||
self._calc_term_indices = {name: idx for idx, (name, _) in enumerate(terms)} | ||
# Include terms from the graph itself, with the terms from this CALC | ||
# added in (overwriting any preceding properties with the same name) | ||
self._all_terms = {} | ||
for name in self.graph.get_collection_names(): | ||
self._all_terms[name] = TableCollection(self.graph.get_collection(name)) | ||
for name, property in terms: | ||
self._all_terms[name] = property | ||
return self | ||
|
||
@property | ||
def graph(self) -> GraphMetadata: | ||
""" | ||
The graph that the global calc node is being done within. | ||
""" | ||
return self._graph | ||
|
||
@property | ||
def children(self) -> List[PyDoughCollectionAST]: | ||
""" | ||
The child collections accessible from the global CALC used to derive | ||
expressions in terms of a subcollection. | ||
""" | ||
return self._children | ||
|
||
@property | ||
def calc_term_indices(self) -> Dict[str, Tuple[int, PyDoughExpressionAST]]: | ||
""" | ||
Mapping of each named expression of the CALC to a tuple (idx, expr) | ||
where idx is the ordinal position of the property when included | ||
in a CALC and property is the AST node representing the property. | ||
""" | ||
if self._calc_term_indices is None: | ||
raise PyDoughCollectionAST( | ||
"Cannot invoke `calc_term_indices` before calling `with_terms`" | ||
) | ||
return self._calc_term_indices | ||
|
||
@property | ||
def ancestor_context(self) -> PyDoughCollectionAST | None: | ||
return None | ||
|
||
@property | ||
def preceding_context(self) -> PyDoughCollectionAST | None: | ||
return None | ||
|
||
@property | ||
def calc_terms(self) -> Set[str]: | ||
return set(self.calc_term_indices) | ||
|
||
@property | ||
def all_terms(self) -> Set[str]: | ||
return set(self._all_terms) | ||
|
||
def get_expression_position(self, expr_name: str) -> int: | ||
if expr_name not in self.calc_term_indices: | ||
raise PyDoughASTException(f"Unrecognized CALC term: {expr_name!r}") | ||
return self.calc_term_indices[expr_name] | ||
|
||
def get_term(self, term_name: str) -> PyDoughAST: | ||
if term_name not in self.all_terms: | ||
raise PyDoughASTException(f"Unrecognized term: {term_name!r}") | ||
return self._all_terms[term_name] | ||
|
||
def to_string(self) -> str: | ||
kwarg_strings: List[str] = [] | ||
for name in self._calc_term_indices: | ||
expr: PyDoughExpressionAST = self.get_term(name) | ||
kwarg_strings.append(f"{name}={expr.to_string()}") | ||
return f"{self.graph.name}({', '.join(kwarg_strings)})" | ||
|
||
def to_tree_string(self) -> str: | ||
raise NotImplementedError | ||
|
||
def equals(self, other: "GlobalCalc") -> bool: | ||
if self._all_terms is None: | ||
raise PyDoughCollectionAST( | ||
"Cannot invoke `equals` before calling `with_terms`" | ||
) | ||
return ( | ||
super().equals(other) | ||
and self._calc_term_indices == other._calc_term_indices | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not saying there is any need to change this, but could there ever be a situation where the "global context" is really graph-level context in the future, meaning tied to a particular graph but not any node in the graph, for when multiple graphs are supported?
If there is then maybe
GraphLevelCalc
which could have all the same properties is a more accurate description.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I just overhauled this by making a "global context" which table collections & calcs can be children of (the necessity of this became quite apparent when I started doing the tree strings), eliminating the need for this class. I'm closing this PR since it is now redundant.