Skip to content

Commit 9dfd470

Browse files
AdRileyFrizi
authored andcommitted
SQLServer Aggregate Support (#11811)
* 40 red * 18 Red * 31 Red * 20 red * 18 Red * 15 red * 9 Red * 7 * Comment out broken test for now * Green * Cleanup * Changelog * Update check_aggregate_support * Cleanup * Reenable test * Fix tests * Doc comment
1 parent 0d9cb29 commit 9dfd470

File tree

10 files changed

+101
-78
lines changed

10 files changed

+101
-78
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
- [Enhance Managed_Resource to allow implementation of in-memory caches][11577]
137137
- [Added `add_group_number` to the in-memory database.[11818]
138138
- [The reload button clears the HTTP cache.][11673]
139+
- [SQL Server Support for Aggregate][11811]
139140

140141
[11235]: https://github.com/enso-org/enso/pull/11235
141142
[11255]: https://github.com/enso-org/enso/pull/11255
@@ -146,6 +147,7 @@
146147
[11577]: https://github.com/enso-org/enso/pull/11577
147148
[11818]: https://github.com/enso-org/enso/pull/11818
148149
[11673]: https://github.com/enso-org/enso/pull/11673
150+
[11811]: https://github.com/enso-org/enso/pull/11811
149151

150152
#### Enso Language & Runtime
151153

distribution/lib/Standard/AWS/0.0.0-dev/src/Database/Redshift/Internal/Redshift_Dialect.enso

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,12 @@ type Redshift_Dialect
157157
_ = [op_kind, args]
158158
expression
159159

160+
## PRIVATE
161+
Add an extra cast to adjust the output type of aggregate operations.
162+
Some DBs do CAST(SUM(x) AS FLOAT) others do SUM(CAST(x AS FLOAT)).
163+
cast_aggregate_columns self op_kind:Text columns:(Vector Internal_Column) =
164+
self.cast_op_type op_kind columns (SQL_Expression.Operation op_kind (columns.map c->c.expression))
165+
160166
## PRIVATE
161167
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
162168
prepare_fetch_types_query self expression context =

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ from project.Errors import Aggregagtion_Requires_Order
3535
make_aggregate_column : DB_Table -> Aggregate_Column -> Text -> Dialect -> (Text -> Vector -> SQL_Expression -> SQL_Type_Reference) -> Problem_Builder -> Internal_Column
3636
make_aggregate_column table aggregate as dialect infer_return_type problem_builder -> Internal_Column =
3737
simple_aggregate op_kind columns =
38-
expression = dialect.cast_op_type op_kind columns (SQL_Expression.Operation op_kind (columns.map c->c.expression))
38+
expression = dialect.cast_aggregate_columns op_kind columns
3939
sql_type_ref = infer_return_type op_kind columns expression
4040
Internal_Column.Value as sql_type_ref expression
4141

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ type SQL_Generator
179179
generate_select_query_sql : Dialect -> Vector (Pair Text SQL_Expression) -> Context -> SQL_Builder
180180
generate_select_query_sql self dialect columns ctx =
181181
gen_exprs exprs = exprs.map (expr-> dialect.generate_expression self expr for_select=False)
182+
gen_group_exprs exprs = exprs.map (expr-> dialect.generate_expression self expr for_select=True)
182183
gen_column pair = (dialect.generate_expression self expr=pair.second for_select=True) ++ alias dialect pair.first
183184

184185
generated_columns = case columns of
@@ -187,7 +188,7 @@ type SQL_Generator
187188

188189
from_part = self.generate_from_part dialect ctx.from_spec
189190
where_part = (SQL_Builder.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE "
190-
group_part = (SQL_Builder.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY "
191+
group_part = (SQL_Builder.join ", " (gen_group_exprs ctx.groups)) . prefix_if_present " GROUP BY "
191192

192193
orders = ctx.orders.map (self.generate_order dialect)
193194
order_part = (SQL_Builder.join ", " orders) . prefix_if_present " ORDER BY "
@@ -663,14 +664,14 @@ preprocess_query (query : Query) -> Query =
663664
column expression; it should be provided only if `has_quote` is `True` and
664665
must not be empty then. If the quote character occurs in the expression, it
665666
is escaped by doubling each occurrence.
666-
make_concat make_raw_concat_expr make_contains_expr has_quote args =
667+
make_concat make_raw_concat_expr make_contains_expr has_quote args append_char="||" =
667668
expected_args = if has_quote then 5 else 4
668669
if args.length != expected_args then Error.throw (Illegal_State.Error "Unexpected number of arguments for the concat operation.") else
669670
expr = args.at 0
670671
separator = args.at 1
671672
prefix = args.at 2
672673
suffix = args.at 3
673-
append = " || "
674+
append = " " + append_char + " "
674675
possibly_quoted = case has_quote of
675676
True ->
676677
quote = args.at 4

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,12 @@ type Postgres_Dialect
231231
if cast_to.is_nothing then expression else
232232
SQL_Expression.Operation "CAST" [expression, SQL_Expression.Literal cast_to]
233233

234+
## PRIVATE
235+
Add an extra cast to adjust the output type of aggregate operations.
236+
Some DBs do CAST(SUM(x) AS FLOAT) others do SUM(CAST(x AS FLOAT)).
237+
cast_aggregate_columns self op_kind:Text columns:(Vector Internal_Column) =
238+
self.cast_op_type op_kind columns (SQL_Expression.Operation op_kind (columns.map c->c.expression))
239+
234240
## PRIVATE
235241
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
236242
prepare_fetch_types_query self expression context =

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,12 @@ type SQLite_Dialect
215215
_ = [op_kind, args]
216216
expression
217217

218+
## PRIVATE
219+
Add an extra cast to adjust the output type of aggregate operations.
220+
Some DBs do CAST(SUM(x) AS FLOAT) others do SUM(CAST(x AS FLOAT)).
221+
cast_aggregate_columns self op_kind:Text columns:(Vector Internal_Column) =
222+
self.cast_op_type op_kind columns (SQL_Expression.Operation op_kind (columns.map c->c.expression))
223+
218224
## PRIVATE
219225
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
220226
prepare_fetch_types_query self expression context =

distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Dialect.enso

Lines changed: 64 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,27 @@ type SQLServer_Dialect
212212
is used only to override the type in cases where the default one that the
213213
database uses is not what we want.
214214
cast_op_type self (op_kind:Text) (args:(Vector Internal_Column)) (expression:SQL_Expression) =
215-
_ = [op_kind, args]
216-
expression
215+
is_int ic =
216+
typeid = ic.sql_type_reference.get.typeid
217+
typeid == Java_Types.SMALLINT || typeid == Java_Types.INTEGER || typeid == Java_Types.BIGINT
218+
219+
cast_to = case op_kind of
220+
"AVG" ->
221+
if is_int (args.at 0) then "FLOAT" else Nothing
222+
"STDDEV_POP" ->
223+
if is_int (args.at 0) then "FLOAT" else Nothing
224+
"STDDEV_SAMP" ->
225+
if is_int (args.at 0) then "FLOAT" else Nothing
226+
_ -> Nothing
227+
228+
if cast_to.is_nothing then expression else
229+
SQL_Expression.Operation "CAST" [expression, SQL_Expression.Literal cast_to]
230+
231+
## PRIVATE
232+
Add an extra cast to adjust the output type of aggregate operations.
233+
Some DBs do CAST(SUM(x) AS FLOAT) others do SUM(CAST(x AS FLOAT)).
234+
cast_aggregate_columns self op_kind:Text columns:(Vector Internal_Column) =
235+
SQL_Expression.Operation op_kind (columns.map c->(self.cast_op_type op_kind columns (Internals_Access.column_expression c)))
217236

218237
## PRIVATE
219238
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
@@ -224,10 +243,32 @@ type SQLServer_Dialect
224243
generate_collate self collation_name:Text -> Text = Base_Generator.default_generate_collate collation_name quote_char=""
225244

226245
## PRIVATE
227-
check_aggregate_support : Aggregate_Column -> Boolean ! Unsupported_Database_Operation
228-
check_aggregate_support self aggregate =
229-
_ = aggregate
230-
True
246+
check_aggregate_support self aggregate:Aggregate_Column -> Boolean ! Unsupported_Database_Operation =
247+
unsupported name =
248+
Error.throw (Unsupported_Database_Operation.Error name)
249+
case aggregate of
250+
Group_By _ _ -> True
251+
Count _ -> True
252+
Count_Distinct columns _ _ ->
253+
if columns.length == 1 then True else
254+
unsupported "Count_Distinct on multiple columns"
255+
Count_Not_Nothing _ _ -> True
256+
Count_Nothing _ _ -> True
257+
Count_Not_Empty _ _ -> True
258+
Count_Empty _ _ -> True
259+
Percentile _ _ _ -> unsupported "Percentile"
260+
Mode _ _ -> unsupported "Mode"
261+
First _ _ _ _ -> unsupported "First"
262+
Last _ _ _ _ -> unsupported "Last"
263+
Maximum _ _ -> True
264+
Minimum _ _ -> True
265+
Shortest _ _ -> unsupported "Shortest"
266+
Longest _ _ -> unsupported "Longest"
267+
Standard_Deviation _ _ _ -> True
268+
Concatenate _ _ _ _ _ _ -> True
269+
Sum _ _ -> True
270+
Average _ _ -> True
271+
Median _ _ -> unsupported "Median"
231272

232273
## PRIVATE
233274
Checks if an operation is supported by the dialect.
@@ -243,6 +284,7 @@ type SQLServer_Dialect
243284
Feature.Filter -> True
244285
Feature.Join -> True
245286
Feature.Union -> True
287+
Feature.Aggregate -> True
246288
_ -> False
247289

248290
## PRIVATE
@@ -401,6 +443,7 @@ private _generate_expression dialect base_gen expr expression_kind:Expression_Ki
401443

402444
pair final_expr null_checks_result
403445
query : Query -> pair (base_gen.generate_sub_query dialect query) []
446+
descriptor : Order_Descriptor -> pair (base_gen.generate_order dialect descriptor) []
404447

405448
## PRIVATE
406449
type Expression_Kind
@@ -437,7 +480,7 @@ private _op_return_kind op -> Expression_Kind =
437480
if return_bool_ops.contains op then Expression_Kind.Boolean_Condition else Expression_Kind.Value
438481

439482
private _op_needs_to_materialize_null_checks op -> Boolean =
440-
["FILL_NULL", "COALESCE"].contains op
483+
["FILL_NULL", "COALESCE", "COUNT_IS_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT", "SUM", "AVG", "LONGEST", "SHORTEST", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "STDDEV_POP", "STDDEV_SAMP", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MIN", "MAX"].contains op
441484

442485
## PRIVATE
443486
make_dialect_operations =
@@ -447,13 +490,13 @@ make_dialect_operations =
447490
arith_extensions = [floating_point_div, mod_op, decimal_div, decimal_mod, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
448491
bool = [bool_or]
449492

450-
stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
451-
stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"]
452-
stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp]
493+
stddev_pop = ["STDDEV_POP", Base_Generator.make_function "STDEVP"]
494+
stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "STDEV"]
495+
stats = [stddev_pop, stddev_samp]
453496
date_ops = [["year", Base_Generator.make_function "year"], make_datepart "quarter", ["month", Base_Generator.make_function "month"], make_datepart "week" "iso_week", ["day", Base_Generator.make_function "day"], make_datepart "hour", make_datepart "minute", make_datepart "day_of_year" "dayofyear", make_day_of_week, make_datepart "second", make_datepart "millisecond", make_extract_microsecond, ["date_add", make_date_add], ["date_diff", make_date_diff], ["date_trunc_to_day", make_date_trunc_to_day]]
454497
special_overrides = [is_empty, ["IIF", _make_iif]]
455498
other = [["RUNTIME_ERROR", make_runtime_error_op]]
456-
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides + other
499+
my_mappings = text + counts + arith_extensions + bool + stats + date_ops + special_overrides + other
457500
base = Base_Generator.base_dialect_operations . extend_with my_mappings
458501
Base_Generator.Dialect_Operations.Value (base.operations_dict.remove "IS_IN")
459502

@@ -469,68 +512,29 @@ private _make_iif arguments:Vector -> SQL_Builder =
469512

470513
## PRIVATE
471514
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
472-
SQL_Builder.code "SUM(CASE WHEN " ++ arg.paren ++ " IS NULL THEN 1 ELSE 0 END)"
515+
SQL_Builder.code "COALESCE(SUM(CASE WHEN " ++ arg.paren ++ " IS NULL THEN 1 ELSE 0 END), 0)"
473516

474517
## PRIVATE
475518
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
476-
SQL_Builder.code "SUM(CASE WHEN (" ++ arg.paren ++ " IS NULL) OR (" ++ arg.paren ++ " = '') THEN 1 ELSE 0 END)"
519+
SQL_Builder.code "COALESCE(SUM(CASE WHEN (" ++ arg.paren ++ " IS NULL) OR (" ++ arg.paren ++ " = '') THEN 1 ELSE 0 END), 0)"
477520

478521
## PRIVATE
479522
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
480-
SQL_Builder.code "SUM(CASE WHEN (" ++ arg.paren ++ " IS NOT NULL) AND (" ++ arg.paren ++ " != '') THEN 1 ELSE 0 END)"
481-
482-
483-
## PRIVATE
484-
agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
485-
median = SQL_Builder.code "MEDIAN(" ++ arg ++ ")"
486-
has_nan = SQL_Builder.code "BOOLOR_AGG(" ++ arg ++ " = 'NaN'::Double)"
487-
SQL_Builder.code "CASE WHEN " ++ has_nan ++ " THEN 'NaN'::Double ELSE " ++ median ++ " END"
488-
489-
## PRIVATE
490-
agg_mode = Base_Generator.lift_unary_op "MODE" arg->
491-
SQL_Builder.code "MODE(" ++ arg ++ ")"
492-
493-
## PRIVATE
494-
agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr->
495-
percentile = SQL_Builder.code "percentile_cont(" ++ p ++ ") WITHIN GROUP (ORDER BY " ++ expr ++ ")"
496-
has_nan = SQL_Builder.code "BOOLOR_AGG(" ++ expr ++ " = 'NaN'::Double)"
497-
SQL_Builder.code "CASE WHEN " ++ has_nan ++ " THEN 'NaN' ELSE " ++ percentile ++ " END"
498-
499-
## PRIVATE
500-
These are written in a not most-efficient way, but a way that makes them
501-
compatible with other group-by aggregations out-of-the-box. In the future, we
502-
may want to consider some alternative solutions.
503-
first_last_aggregators =
504-
first = make_first_aggregator reverse=False ignore_null=False
505-
first_not_null = make_first_aggregator reverse=False ignore_null=True
506-
last = make_first_aggregator reverse=True ignore_null=False
507-
last_not_null = make_first_aggregator reverse=True ignore_null=True
508-
[["FIRST", first], ["FIRST_NOT_NULL", first_not_null], ["LAST", last], ["LAST_NOT_NULL", last_not_null]]
509-
510-
## PRIVATE
511-
make_first_aggregator reverse ignore_null args =
512-
if args.length < 2 then Error.throw (Illegal_State.Error "Insufficient number of arguments for the operation.") else
513-
result_expr = args.first
514-
order_bys = args.drop 1
515-
516-
method_name = if reverse then "LAST_VALUE" else "FIRST_VALUE"
517-
filter_clause = if ignore_null then ") IGNORE NULLS OVER" else ") OVER"
518-
order_clause = SQL_Builder.code " ORDER BY " ++ SQL_Builder.join "," order_bys
519-
SQL_Builder.code (method_name + "(") ++ result_expr ++ filter_clause ++ order_clause
523+
SQL_Builder.code "COALESCE(SUM(CASE WHEN (" ++ arg.paren ++ " IS NOT NULL) AND (" ++ arg.paren ++ " != '') THEN 1 ELSE 0 END), 0)"
520524

521525
## PRIVATE
522526
agg_shortest = Base_Generator.lift_unary_op "SHORTEST" arg->
523-
SQL_Builder.code "FIRST_VALUE(" ++ arg ++ ") IGNORE NULLS OVER (ORDER BY LENGTH(" ++ arg ++ "))"
527+
SQL_Builder.code "FIRST_VALUE(" ++ arg ++ ") IGNORE NULLS OVER (ORDER BY LEN(" ++ arg ++ "))"
524528

525529
## PRIVATE
526530
agg_longest = Base_Generator.lift_unary_op "LONGEST" arg->
527-
SQL_Builder.code "FIRST_VALUE(" ++ arg ++ ") IGNORE NULLS OVER (ORDER BY LENGTH(" ++ arg ++ ") DESC)"
531+
SQL_Builder.code "FIRST_VALUE(" ++ arg ++ ") IGNORE NULLS OVER (ORDER BY LEN(" ++ arg ++ ") DESC)"
528532

529533
## PRIVATE
530534
concat_ops =
531535
make_raw_concat_expr expr separator =
532536
SQL_Builder.code "string_agg(" ++ expr ++ ", " ++ separator ++ ")"
533-
concat = Base_Generator.make_concat make_raw_concat_expr make_contains_expr
537+
concat = Base_Generator.make_concat make_raw_concat_expr make_contains_expr append_char="+"
534538
[["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]
535539

536540
## PRIVATE
@@ -554,14 +558,7 @@ agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument.E
554558
True ->
555559
## A single null value will be skipped.
556560
SQL_Builder.code "COUNT(DISTINCT " ++ args.first ++ ")"
557-
False ->
558-
## A tuple of nulls is not a null, so it will not be skipped - but
559-
we want to ignore all-null columns. So we manually filter them
560-
out.
561-
count = SQL_Builder.code "COUNT(DISTINCT (" ++ SQL_Builder.join ", " args ++ "))"
562-
are_nulls = args.map arg-> arg.paren ++ " IS NULL"
563-
all_nulls_filter = SQL_Builder.code " FILTER (WHERE NOT (" ++ SQL_Builder.join " AND " are_nulls ++ "))"
564-
(count ++ all_nulls_filter).paren
561+
False -> Error.throw (Illegal_Argument.Error "COUNT_DISTINCT supports only single arguments in SQLServer.")
565562

566563
## PRIVATE
567564
agg_count_distinct_include_null args = case args.length == 1 of
@@ -595,12 +592,11 @@ ends_with = Base_Generator.lift_binary_op "ENDS_WITH" str-> sub->
595592
res.paren
596593

597594
## PRIVATE
598-
contains = Base_Generator.lift_binary_op "CONTAINS" str-> sub->
599-
res = SQL_Builder.code "CHARINDEX(" ++ sub ++ ", " ++ str ++ ") > 0"
600-
res.paren
595+
make_contains_expr expr substring =
596+
SQL_Builder.code "CHARINDEX(" ++ substring ++ ", " ++ expr ++ ") > 0"
601597

602598
## PRIVATE
603-
make_contains_expr expr substring = contains [expr, substring]
599+
contains = Base_Generator.lift_binary_op "CONTAINS" make_contains_expr
604600

605601
## PRIVATE
606602
make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg->

distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,12 @@ type Snowflake_Dialect
219219
_ = [op_kind, args]
220220
expression
221221

222+
## PRIVATE
223+
Add an extra cast to adjust the output type of aggregate operations.
224+
Some DBs do CAST(SUM(x) AS FLOAT) others do SUM(CAST(x AS FLOAT)).
225+
cast_aggregate_columns self op_kind:Text columns:(Vector Internal_Column) =
226+
self.cast_op_type op_kind columns (SQL_Expression.Operation op_kind (columns.map c->c.expression))
227+
222228
## PRIVATE
223229
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
224230
prepare_fetch_types_query self expression context =

test/Microsoft_Tests/src/SQLServer_Spec.enso

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ add_sqlserver_specs suite_builder create_connection_fn =
200200
materialize = .read
201201

202202
common_selection = Common_Table_Operations.Main.Test_Selection.Config supported_replace_params=supported_replace_params run_advanced_edge_case_tests_by_default=True
203-
aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config first_last_row_order=False aggregation_problems=False
204-
agg_in_memory_table = (enso_project.data / "data.csv") . read
203+
aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config advanced_stats=False text_shortest_longest=False first_last=False first_last_row_order=False aggregation_problems=False multi_distinct=False first_last_multi_order=False first_last_ignore_nothing=False text_concat=False
204+
agg_in_memory_table = ((Project_Description.new enso_dev.Table_Tests).data / "data.csv") . read
205205

206206
agg_table_fn = _->
207207
agg_in_memory_table.select_into_database_table default_connection.get (Name_Generator.random_name "Agg1") primary_key=Nothing temporary=True

0 commit comments

Comments
 (0)