-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BSE-4155] Add support for the Aggregate relational node #39
Changes from 76 commits
cce5e4c
19460cc
89f8e42
2786774
4ee70be
66107ec
256ecef
d267275
cb32b89
b11a50f
856e20e
513abe3
5ed09e0
24dceb2
4cfd06e
7081a2c
d318e64
0764621
178ffd1
2e7d1a5
9b88810
b844130
24b620e
63aa271
d32f73e
c2aef51
c832181
135ab5f
ff3b25f
90cd1e5
8bd7cfc
0737744
95c281e
2fc3e83
47c1933
0e3166c
becb91a
7830f13
e8ac1e6
4081ad8
1c7e2de
eb71c14
288ae56
631606d
411a623
106c708
4c76249
4719a66
ac2a6e4
44f709f
33b5332
4d2a781
ecc1600
25a7791
ea9255c
bf20591
ab2f5db
32cc1ea
591b67d
d96e9f6
ce7b9fa
5376da1
7501f3c
721c181
bd84645
876667f
316ea57
7fef483
9ce94c1
2b37703
55b03ed
d3e2c89
91bcc7c
593247c
ab8c0c7
f6d8eb2
90b4e7a
cc1a2d1
4a01824
d05aa76
11b5d27
0f9329d
07f4e32
d66dcee
393057b
8a78231
e00ee40
196a812
16954e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,8 @@ | |
""" | ||
|
||
__all__ = [ | ||
"Column", | ||
"Relational", | ||
] | ||
from .abstract import ( | ||
Column, | ||
Relational, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
""" | ||
This file contains the relational implementation for an aggregation. This is our | ||
relational representation for any grouping operation that optionally involves | ||
keys and aggregate functions. | ||
""" | ||
|
||
from collections.abc import MutableMapping | ||
|
||
from sqlglot.expressions import Expression as SQLGlotExpression | ||
|
||
from .abstract import Relational | ||
from .relational_expressions.call_expression import CallExpression | ||
from .relational_expressions.column_reference import ColumnReference | ||
from .single_relational import SingleRelational | ||
|
||
|
||
class Aggregate(SingleRelational): | ||
""" | ||
The Aggregate node in the relational tree. This node represents an aggregation | ||
based on some keys, which should most commonly be column references, and some | ||
aggregate functions. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
input: Relational, | ||
keys: MutableMapping[str, ColumnReference], | ||
aggregations: MutableMapping[str, CallExpression], | ||
) -> None: | ||
total_cols = {**keys, **aggregations} | ||
assert len(total_cols) == len(keys) + len( | ||
aggregations | ||
), "Keys and aggregations must have unique names" | ||
super().__init__(input, total_cols) | ||
self._keys: MutableMapping[str, ColumnReference] = keys | ||
self._aggregations: MutableMapping[str, CallExpression] = aggregations | ||
assert all( | ||
agg.is_aggregation for agg in aggregations.values() | ||
), "All functions used in aggregations must be aggregation functions" | ||
|
||
@property | ||
def keys(self) -> MutableMapping[str, ColumnReference]: | ||
return self._keys | ||
|
||
@property | ||
def aggregations(self) -> MutableMapping[str, CallExpression]: | ||
return self._aggregations | ||
|
||
def to_sqlglot(self) -> SQLGlotExpression: | ||
raise NotImplementedError( | ||
"Conversion to SQLGlot Expressions is not yet implemented." | ||
) | ||
|
||
def node_equals(self, other: Relational) -> bool: | ||
return ( | ||
isinstance(other, Aggregate) | ||
and self.keys == other.keys | ||
and self.aggregations == other.aggregations | ||
and super().node_equals(other) | ||
njriasan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
def to_string(self) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For context because I didn't mention this in prior PRs, I've decided not to represent a tree for simplicity. I would rather eventually add a "TreeWriter" class that could unify nodes more easily. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, I suggest we have Relational object have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to revisit this in a followup. It seems like a more reasonable pattern to have a separate writer object (equivalent to Calcite) so we can manage things like drawing the lines more elegantly. |
||
# TODO: Should we visit the input? | ||
return f"AGGREGATE(keys={self.keys}, aggregations={self.aggregations})" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
""" | ||
This file contains the relational implementation for a "limit" operation. | ||
This is the relational representation of top-n selection and typically depends | ||
on explicit ordering of the input relation. | ||
""" | ||
|
||
from collections.abc import MutableMapping, MutableSequence | ||
|
||
from sqlglot.expressions import Expression as SQLGlotExpression | ||
|
||
from pydough.types.integer_types import IntegerType | ||
|
||
from .abstract import Relational | ||
from .relational_expressions import ColumnOrdering, RelationalExpression | ||
from .single_relational import SingleRelational | ||
|
||
|
||
class Limit(SingleRelational): | ||
""" | ||
The Limit node in the relational tree. This node represents any TOP-N | ||
operations in the relational algebra. This operation is dependent on the | ||
orderings of the input relation. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
input: Relational, | ||
limit: RelationalExpression, | ||
columns: MutableMapping[str, RelationalExpression], | ||
orderings: MutableSequence[ColumnOrdering] | None = None, | ||
) -> None: | ||
super().__init__(input, columns) | ||
# Note: The limit is a relational expression because it should be a constant | ||
# now but in the future could be a more complex expression that may require | ||
# multi-step SQL to successfully evaluate. | ||
assert isinstance( | ||
limit.data_type, IntegerType | ||
), "Limit must be an integer type." | ||
self._limit: RelationalExpression = limit | ||
self._orderings: MutableSequence[ColumnOrdering] = ( | ||
[] if orderings is None else orderings | ||
) | ||
|
||
@property | ||
def limit(self) -> RelationalExpression: | ||
return self._limit | ||
|
||
@property | ||
def orderings(self) -> MutableSequence[ColumnOrdering]: | ||
return self._orderings | ||
|
||
def to_sqlglot(self) -> SQLGlotExpression: | ||
raise NotImplementedError( | ||
"Conversion to SQLGlot Expressions is not yet implemented." | ||
) | ||
|
||
def node_equals(self, other: Relational) -> bool: | ||
return ( | ||
isinstance(other, Limit) | ||
and self.limit == other.limit | ||
and self.orderings == other.orderings | ||
and super().node_equals(other) | ||
) | ||
|
||
def to_string(self) -> str: | ||
# TODO: Should we visit the input? | ||
return f"LIMIT(limit={self.limit}, columns={self.columns}, orderings={self.orderings})" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
""" | ||
This file contains the relational implementation for a "project". This is our | ||
relational representation for a "calc" that involves any compute steps and can include | ||
adding or removing columns (as well as technically reordering). In general, we seek to | ||
avoid introducing extra nodes just to reorder or prune columns, so ideally their use | ||
should be sparse. | ||
""" | ||
|
||
from collections.abc import MutableMapping | ||
|
||
from sqlglot.expressions import Expression as SQLGlotExpression | ||
|
||
from .abstract import Relational | ||
from .relational_expressions import RelationalExpression | ||
from .single_relational import SingleRelational | ||
|
||
|
||
class Project(SingleRelational): | ||
""" | ||
The Project node in the relational tree. This node represents a "calc" in | ||
relational algebra, which should involve some "compute" functions and may | ||
involve adding, removing, or reordering columns. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
input: Relational, | ||
columns: MutableMapping[str, RelationalExpression], | ||
) -> None: | ||
super().__init__(input, columns) | ||
|
||
def to_sqlglot(self) -> SQLGlotExpression: | ||
raise NotImplementedError( | ||
"Conversion to SQLGlot Expressions is not yet implemented." | ||
) | ||
|
||
def node_equals(self, other: Relational) -> bool: | ||
return isinstance(other, Project) and super().node_equals(other) | ||
|
||
def to_string(self) -> str: | ||
# TODO: Should we visit the input? | ||
return f"PROJECT(columns={self.columns})" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
""" | ||
TODO: add module-level docstring | ||
""" | ||
|
||
__all__ = [ | ||
"ColumnOrdering", | ||
"RelationalExpression", | ||
] | ||
from .abstract import RelationalExpression | ||
from .column_ordering import ColumnOrdering |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can avoid this if we have a special version of CallExpression just for agg calls (its inputs are only allowed to be literals or column references, nothing else) -> use type annotations to verify that the
aggregations
argument maps to this special variant.They can then check the operator passed in to make sure that its
.is_aggregation
field is true.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's consider this as a followup. I think the extra class isn't worth the safety.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also I'm not supporting literals yet. I still need to figure that out.