diff --git a/CHANGELOG.md b/CHANGELOG.md index 4df253fba6e9..1f93e9310f48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,7 @@ - [Added `Table.input` allowing creation of typed tables from vectors of data, including auto parsing text columns.][11562] - [Enhance Managed_Resource to allow implementation of in-memory caches][11577] +- [Added `add_group_number` to the in-memory database.[11818] - [The reload button clears the HTTP cache.][11673] [11235]: https://github.com/enso-org/enso/pull/11235 @@ -129,6 +130,7 @@ [11490]: https://github.com/enso-org/enso/pull/11490 [11562]: https://github.com/enso-org/enso/pull/11562 [11577]: https://github.com/enso-org/enso/pull/11577 +[11818]: https://github.com/enso-org/enso/pull/11818 [11673]: https://github.com/enso-org/enso/pull/11673 #### Enso Language & Runtime diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso index 25da867e82e0..7de8b96491ac 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso @@ -25,6 +25,7 @@ import Standard.Table.Columns_To_Add.Columns_To_Add import Standard.Table.Columns_To_Keep.Columns_To_Keep import Standard.Table.Expression.Expression import Standard.Table.Expression.Expression_Error +import Standard.Table.Grouping_Method.Grouping_Method import Standard.Table.Internal.Add_Row_Number import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper import Standard.Table.Internal.Constant_Column.Constant_Column @@ -926,6 +927,99 @@ type DB_Table updated_table = renamed_table.updated_columns (renamed_table.internal_columns + [new_column]) updated_table.as_subquery + ## PRIVATE add group column, group id, bucket, tile + GROUP Standard.Base.Values + ICON column_add + Adds a new column to the table enumerating groups of rows, assigning each + row to one group number. All rows in each group will get the same number. + + Arguments: + - grouping_method: Specifies how to group the rows; see "Grouping + Methods", below. + - name: The name of the new column. Defaults to "Group". + - from: The starting value for the enumeration. Defaults to 0. + - step: The amount to increment the enumeration by. Defaults to 1. + + ? Grouping Methods + + The following grouping methods are supported: + - `Unique`: Group rows by the specified columns. + - Equal_Count: Create the specified number of groups with the same + number of rows in each group (except possibly the last one). + + ? Ordering of rows + + Note that the ordering of rows from the original table is preserved in + all cases. The grouping and ordering settings can affect how the group + numbers are assigned, depending on the grouping method. The order of + the rows itself is not changed by this operation. + + ! Error Conditions + + - If the columns specified in `group_by` or `order_by` are not present + in the table, a `Missing_Input_Columns` error is raised. + - If the column with the same name as provided `name` already exists, + a `Duplicate_Output_Column_Names` problem is reported and the + existing column is renamed to avoid the clash. + - If grouping on floating point numbers, a `Floating_Point_Equality` + problem is reported. + + > Example + Assign group numbers based on unique values of the first two columns. + + ## table: + x | y | z + ---+---+--- + 1 | 0 | 2 + 0 | 1 | 0 + 1 | 2 | 0 + 0 | 1 | 1 + 1 | 0 | 1 + 1 | 2 | 1 + table = table_builder [['x', [1, 0, 1, 0, 1, 1]], ['y', [0, 1, 2, 1, 0, 2]], ['z' [2, 0, 0, 1, 1, 1]]] + table2 = table.add_group_number (..Unique group_by=['x', 'y']) "g" + table2.at 'g' . to_vector + # => [0, 1, 2, 1, 0, 2] + ## table2: + x | y | z | g + ---+---+---+--- + 1 | 0 | 2 | 0 + 0 | 1 | 0 | 1 + 1 | 2 | 0 | 2 + 0 | 1 | 1 | 1 + 1 | 0 | 1 | 2 + 1 | 2 | 1 | 0 + + > Example + Divide rows into three groups. + ## table: + x | y + ---+--- + 1 | 5 + 2 | 4 + 3 | 3 + 4 | 2 + 5 | 1 + table = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]]] + table2 = tabble.add_group_number (..Equal_Count 3) "g" + table2.at 'g' . to_vector + # => [0, 0, 1, 1, 2] + ## table2: + x | y | g + ---+---+--- + 1 | 5 | 0 + 2 | 4 | 0 + 3 | 3 | 1 + 4 | 2 | 1 + 5 | 1 | 2 + @name (Widget.Text_Input display=..Always) + @from (Widget.Numeric_Input display=..Always) + @group_by (Widget_Helpers.make_column_name_multi_selector display=..When_Modified) + @order_by (Widget_Helpers.make_order_by_selector display=..When_Modified) + add_group_number self (grouping_method:Grouping_Method=..Unique) (name:Text="Group") (from:Integer=0) (step:Integer=1) (on_problems:Problem_Behavior=..Report_Warning) -> Table = + _ = [grouping_method, name, from, step, on_problems] + Error.throw (Unsupported_Database_Operation.Error "add_group_number") + ## ALIAS order_by GROUP Standard.Base.Selections diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Feature.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Feature.enso index ac8880942439..d82b0d38b0a4 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Feature.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Feature.enso @@ -31,6 +31,9 @@ type Feature ## PRIVATE Catch all for tests that haven't yet been categorized correctly or use multiple features. Integration_Tests + ## PRIVATE + add a group number column to a table. + Add_Group_Number ## PRIVATE add a row number column to a table. Add_Row_Number diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Grouping_Method.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Grouping_Method.enso new file mode 100644 index 000000000000..de5918416be7 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Grouping_Method.enso @@ -0,0 +1,24 @@ +from Standard.Base import all +import Standard.Base.Errors.Common.Missing_Argument + +polyglot java import org.enso.table.operations.AddGroupNumber + +## Specifies a method for grouping rows in `add_group_number`. +type Grouping_Method + ## Group rows by the specified columns. + + Arguments: + - on: Rows that have the same values for these columns will be grouped + together. At least one column must be specified. + Unique (on:(Vector | Text | Integer | Regex)=(Missing_Argument.throw "on")) + + ## Create the specified number of groups with the same number of rows in + each group (except possibly the last one). + + Arguments + - group_count: The number of groups to divide the table into. + - order_by: (Optional.) Specifies the order in which rows should be + assigned to groups. Only affects the assignment of group numbers, not + the ordering of the output rows. Defaults to the order of the rows in + the table. + Equal_Count (group_count:Integer=(Missing_Argument.throw "group_count")) (order_by:(Vector | Text)=[]) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Group_Number.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Group_Number.enso new file mode 100644 index 000000000000..5aef76394447 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Group_Number.enso @@ -0,0 +1,50 @@ +private + +from Standard.Base import all +import Standard.Base.Errors.Common.Unsupported_Argument_Types +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument + +import project.Column.Column +import project.Grouping_Method.Grouping_Method +import project.Internal.Java_Problems +import project.Internal.Problem_Builder.Problem_Builder +import project.Internal.Table_Helpers +import project.Set_Mode.Set_Mode +import project.Table.Table +from project.Internal.Add_Row_Number import rename_columns_if_needed + +polyglot java import java.lang.ArithmeticException +polyglot java import org.enso.table.operations.AddGroupNumber + +add_group_number (table:Table) (grouping_method:Grouping_Method) (name:Text) (from:Integer) (step:Integer) (on_problems:Problem_Behavior=..Report_Warning) -> Table = + problem_builder = Problem_Builder.new error_on_missing_columns=True + + handle_arithmetic_exception _ = + Error.throw (Illegal_Argument.Error "The row number has exceeded the 64-bit integer range. BigInteger numbering is currently not supported. Please use a smaller start/step.") + + Panic.catch ArithmeticException handler=handle_arithmetic_exception <| Panic.catch Unsupported_Argument_Types handler=handle_arithmetic_exception <| + Java_Problems.with_problem_aggregator on_problems java_problem_aggregator-> + new_storage = case grouping_method of + Grouping_Method.Unique group_by -> + _illegal_if group_by.is_empty "..Unique requires a non-empty 'group_by'" <| + grouping = _prepare_group_by table problem_builder group_by + AddGroupNumber.numberGroupsUnique table.row_count from step grouping java_problem_aggregator + Grouping_Method.Equal_Count group_count order_by -> + _illegal_if (group_count < 1) "group_count must be at least 1" <| + ordering = _prepare_ordering table problem_builder order_by + AddGroupNumber.numberGroupsEqualCount table.row_count group_count from step (ordering.at 0) (ordering.at 1) java_problem_aggregator + new_column = Column.from_storage name new_storage + renamed_table = rename_columns_if_needed table name on_problems Table.new + problem_builder.attach_problems_before on_problems <| + renamed_table.set new_column name set_mode=Set_Mode.Add + +_prepare_group_by table problem_builder group_by = + table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder . map c->c.java_column + +_prepare_ordering table problem_builder order_by = + ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder + ordering_columns = ordering.map c->c.column.java_column + directions = ordering.map c->c.associated_selector.direction.to_sign + [ordering_columns, directions] + +_illegal_if b msg ~cont = if b then Error.throw (Illegal_Argument.Error msg) else cont diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso index 477d1abad602..89735dc9d141 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso @@ -168,6 +168,17 @@ make_join_condition_selector table display:Display=..Always cache=Nothing = item_editor = Single_Choice display=display values=names Vector_Editor item_editor=item_editor item_default="(..Equals "+table.column_names.first.pretty+")" display=display +## PRIVATE +make_grouping_method_selector table:Table display:Display=..Always -> Widget = + column_selector = make_column_name_selector table display=Display.Always + columns_selector = Vector_Editor item_editor=column_selector item_default=table.column_names.first.pretty display=display + + unique = Option "Unique" "..Unique" [["on", columns_selector]] + equal_count = Option "Equal Count" "..Equal_Count" [["order_by", columns_selector]] + names=[unique, equal_count] + + Single_Choice display=display values=names + ## PRIVATE Make a column name selector. make_order_by_selector : Table -> Display -> Boolean -> Widget diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index 40d3689806de..fea12a3eb9e2 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -31,6 +31,8 @@ export project.Extensions.Table_Conversions.parse_to_table export project.Extensions.Table_Conversions.to_table export project.Extensions.Table_Conversions.write_table +export project.Grouping_Method.Grouping_Method + export project.Headers.Headers export project.Join_Condition.Join_Condition diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso index c7147eb87b4a..19fa076122a4 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso @@ -35,6 +35,8 @@ import project.Delimited.Delimited_Format.Delimited_Format import project.Expression.Expression import project.Expression.Expression_Error import project.Extensions.Table_Conversions +import project.Grouping_Method.Grouping_Method +import project.Internal.Add_Group_Number import project.Internal.Add_Row_Number import project.Internal.Add_Running import project.Internal.Aggregate_Column_Helper @@ -2323,6 +2325,99 @@ type Table add_row_number self (name:Text="Row") (from:Integer=0) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) = Incomparable_Values.handle_errors <| Add_Row_Number.add_row_number self name from step group_by order_by on_problems + ## PRIVATE add group column, group id, bucket, tile + GROUP Standard.Base.Values + ICON column_add + Adds a new column to the table enumerating groups of rows, assigning each + row to one group number. All rows in each group will get the same number. + + Arguments: + - grouping_method: Specifies how to group the rows; see "Grouping + Methods", below. + - name: The name of the new column. Defaults to "Group". + - from: The starting value for the enumeration. Defaults to 0. + - step: The amount to increment the enumeration by. Defaults to 1. + + ? Grouping Methods + + The following grouping methods are supported: + - `Unique`: Group rows by the specified columns. + - Equal_Count: Create the specified number of groups with the same + number of rows in each group (except possibly the last one). + + ? Ordering of rows + + Note that the ordering of rows from the original table is preserved in + all cases. The grouping and ordering settings can affect how the group + numbers are assigned, depending on the grouping method. The order of + the rows itself is not changed by this operation. + + ! Error Conditions + + - If the columns specified in `group_by` or `order_by` are not present + in the table, a `Missing_Input_Columns` error is raised. + - If the column with the same name as provided `name` already exists, + a `Duplicate_Output_Column_Names` problem is reported and the + existing column is renamed to avoid the clash. + - If grouping on floating point numbers, a `Floating_Point_Equality` + problem is reported. + + > Example + Assign group numbers based on unique values of the first two columns. + + ## table: + x | y | z + ---+---+--- + 1 | 0 | 2 + 0 | 1 | 0 + 1 | 2 | 0 + 0 | 1 | 1 + 1 | 0 | 1 + 1 | 2 | 1 + table = table_builder [['x', [1, 0, 1, 0, 1, 1]], ['y', [0, 1, 2, 1, 0, 2]], ['z' [2, 0, 0, 1, 1, 1]]] + table2 = table.add_group_number (..Unique group_by=['x', 'y']) "g" + table2.at 'g' . to_vector + # => [0, 1, 2, 1, 0, 2] + ## table2: + x | y | z | g + ---+---+---+--- + 1 | 0 | 2 | 0 + 0 | 1 | 0 | 1 + 1 | 2 | 0 | 2 + 0 | 1 | 1 | 1 + 1 | 0 | 1 | 2 + 1 | 2 | 1 | 0 + + > Example + Divide rows into three groups. + ## table: + x | y + ---+--- + 1 | 5 + 2 | 4 + 3 | 3 + 4 | 2 + 5 | 1 + table = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]]] + table2 = tabble.add_group_number (..Equal_Count 3) "g" + table2.at 'g' . to_vector + # => [0, 0, 1, 1, 2] + ## table2: + x | y | g + ---+---+--- + 1 | 5 | 0 + 2 | 4 | 0 + 3 | 3 | 1 + 4 | 2 | 1 + 5 | 1 | 2 + @grouping_method (Widget_Helpers.make_grouping_method_selector display=..Always) + @name (Widget.Text_Input display=..Always) + @from (Widget.Numeric_Input display=..Always) + @group_by (Widget_Helpers.make_column_name_multi_selector display=..When_Modified) + @order_by (Widget_Helpers.make_order_by_selector display=..When_Modified) + add_group_number self (grouping_method:Grouping_Method=(Missing_Argument.throw "grouping_method")) (name:Text="Group") (from:Integer=0) (step:Integer=1) (on_problems:Problem_Behavior=..Report_Warning) -> Table = + Incomparable_Values.handle_errors <| Add_Group_Number.add_group_number self grouping_method name from step on_problems + ## ALIAS add column, expression, formula, new column, update column GROUP Standard.Base.Values ICON column_add diff --git a/std-bits/table/src/main/java/org/enso/table/operations/AddGroupNumber.java b/std-bits/table/src/main/java/org/enso/table/operations/AddGroupNumber.java new file mode 100644 index 000000000000..1899130f25d0 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/operations/AddGroupNumber.java @@ -0,0 +1,122 @@ +package org.enso.table.operations; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.IntStream; +import org.enso.base.text.TextFoldingStrategy; +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.column.storage.numeric.LongStorage; +import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.index.OrderedMultiValueKey; +import org.enso.table.data.index.UnorderedMultiValueKey; +import org.enso.table.data.table.Column; +import org.enso.table.problems.ColumnAggregatedProblemAggregator; +import org.enso.table.problems.ProblemAggregator; +import org.enso.table.util.ConstantList; + +public class AddGroupNumber { + public static Storage numberGroupsUnique( + long numRows, + long start, + long step, + Column[] groupingColumns, + ProblemAggregator problemAggregator) { + if (groupingColumns.length == 0) { + throw new IllegalArgumentException("At least one grouping column is required."); + } + + var groupNumberIterator = new StepIterator(start, step); + + long[] numbers = new long[Math.toIntExact(numRows)]; + + Storage[] groupingStorages = + Arrays.stream(groupingColumns).map(Column::getStorage).toArray(Storage[]::new); + ColumnAggregatedProblemAggregator groupingProblemAggregator = + new ColumnAggregatedProblemAggregator(problemAggregator); + List textFoldingStrategy = + ConstantList.make(TextFoldingStrategy.unicodeNormalizedFold, groupingStorages.length); + Map groupNumbers = new HashMap<>(); + + for (int i = 0; i < numRows; i++) { + var key = new UnorderedMultiValueKey(groupingStorages, i, textFoldingStrategy); + key.checkAndReportFloatingEquality( + groupingProblemAggregator, columnIx -> groupingColumns[columnIx].getName()); + var groupNumber = groupNumbers.computeIfAbsent(key, k -> groupNumberIterator.next()); + numbers[i] = groupNumber; + } + + return new LongStorage(numbers, IntegerType.INT_64); + } + + public static Storage numberGroupsEqualCount( + long numRows, + int groupCount, + long start, + long step, + Column[] orderingColumns, + int[] directions, + ProblemAggregator problemAggregator) { + long[] numbers = new long[Math.toIntExact(numRows)]; + + var equalCountGenerator = new EqualCountGenerator(start, step, numRows, groupCount); + + if (orderingColumns.length == 0) { + for (int i = 0; i < numRows; ++i) { + numbers[i] = equalCountGenerator.next(); + } + } else { + Storage[] orderingStorages = + Arrays.stream(orderingColumns).map(Column::getStorage).toArray(Storage[]::new); + List keys = + new ArrayList<>( + IntStream.range(0, Math.toIntExact(numRows)) + .mapToObj(i -> new OrderedMultiValueKey(orderingStorages, i, directions)) + .toList()); + keys.sort(null); + for (var key : keys) { + var i = key.getRowIndex(); + numbers[i] = equalCountGenerator.next(); + } + } + + return new LongStorage(numbers, IntegerType.INT_64); + } + + private static class StepIterator { + private final long step; + private long current; + + public StepIterator(long start, long step) { + this.step = step; + this.current = start; + } + + public long next() { + var toReturn = current; + current = Math.addExact(current, step); + return toReturn; + } + } + + private static class EqualCountGenerator { + private final long start; + private final long step; + private long currentIndex = 0; + private final long groupSize; + + public EqualCountGenerator(long start, long step, long totalCount, long numgroups) { + this.start = start; + this.step = step; + groupSize = (long) Math.ceil((double) totalCount / (double) numgroups); + } + + public long next() { + long toReturn = Math.addExact(start, Math.multiplyExact(step, (currentIndex / groupSize))); + currentIndex = Math.addExact(currentIndex, 1L); + return toReturn; + } + } +} diff --git a/test/Table_Tests/src/Common_Table_Operations/Add_Group_Number_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Add_Group_Number_Spec.enso new file mode 100644 index 000000000000..57ea0cb3b498 --- /dev/null +++ b/test/Table_Tests/src/Common_Table_Operations/Add_Group_Number_Spec.enso @@ -0,0 +1,152 @@ +from Standard.Base import all +import Standard.Base.Errors.Common.Floating_Point_Equality +import Standard.Base.Errors.Common.Missing_Argument +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument + +import Standard.Database.Feature.Feature +from Standard.Database.Errors import all + +from Standard.Table import all +from Standard.Table.Errors import Missing_Input_Columns, Duplicate_Output_Column_Names + +from Standard.Test import all + +from project.Common_Table_Operations.Util import run_default_backend +import project.Util + +polyglot java import java.lang.Long as Java_Long + +main filter=Nothing = run_default_backend add_specs filter + +add_specs suite_builder setup = + if setup.is_database.not then (add_group_number_specs suite_builder setup) else + suite_builder.group setup.prefix+"Table.add_group_number" group_builder-> + group_builder.specify "add_group_number should report unsupported" <| + table_builder = setup.light_table_builder + t = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]], ['z', [1, 5, 4, 2, 3]]] + t.add_group_number (..Equal_Count 3) "g" . should_fail_with (Unsupported_Database_Operation.Error "add_group_number") + +add_group_number_specs suite_builder setup = + prefix = setup.prefix + # materialize = setup.materialize + # create_connection_fn = setup.create_connection_func + + suite_builder.group prefix+"Table.add_group_number (common)" group_builder-> + table_builder = setup.table_builder + table_builder_from_rows column_names rows = table_builder (column_names.zip rows.transpose c-> col-> [c, col]) + + group_builder.specify "should add group number by unique values" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + g0 = t.add_group_number (..Unique on=['x', 'y']) "g" + g0.at 'g' . to_vector . should_equal [0, 1, 2, 1, 0, 2] + + g1 = t.add_group_number (..Unique on=['x', 'z']) "g" + g1.at 'g' . to_vector . should_equal [0, 1, 2, 3, 4, 4] + + g2 = t.add_group_number (..Unique on=['y', 'z']) "g" + g2.at 'g' . to_vector . should_equal [0, 1, 2, 3, 4, 5] + + group_builder.specify "should add group number by equal counts" <| + t = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]], ['z', [1, 5, 4, 2, 3]]] + + g0 = t.add_group_number (..Equal_Count 3) "g" + g0.at 'g' . to_vector . should_equal [0, 0, 1, 1, 2] + + g1 = t.add_group_number (..Equal_Count 3 order_by=['x']) "g" + g1.at 'g' . to_vector . should_equal [0, 0, 1, 1, 2] + + g2 = t.add_group_number (..Equal_Count 3 order_by=['y']) "g" + g2.at 'g' . to_vector . should_equal [2, 1, 1, 0, 0] + + g3 = t.add_group_number (..Equal_Count 3 order_by='z') "g" + g3.at 'g' . to_vector . should_equal [0, 2, 1, 0, 1] + + g4 = t.add_group_number (..Equal_Count 2) "g" + g4.at 'g' . to_vector . should_equal [0, 0, 0, 1, 1] + + g5 = t.add_group_number (..Equal_Count 2 order_by=['x']) "g" + g5.at 'g' . to_vector . should_equal [0, 0, 0, 1, 1] + + g6 = t.add_group_number (..Equal_Count 2 order_by=['y']) "g" + g6.at 'g' . to_vector . should_equal [1, 1, 0, 0, 0] + + g7 = t.add_group_number (..Equal_Count 2 order_by='z') "g" + g7.at 'g' . to_vector . should_equal [0, 1, 1, 0, 0] + + g8 = t.add_group_number (..Equal_Count 1) "g" + g8.at 'g' . to_vector . should_equal [0, 0, 0, 0, 0] + + g9 = t.add_group_number (..Equal_Count 1 order_by=['x']) "g" + g9.at 'g' . to_vector . should_equal [0, 0, 0, 0, 0] + + g10 = t.add_group_number (..Equal_Count 1 order_by=['y']) "g" + g10.at 'g' . to_vector . should_equal [0, 0, 0, 0, 0] + + + group_builder.specify "should add group number by unique values" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + g0 = t.add_group_number (..Unique on=['x', 'y']) + g0.at 'Group' . to_vector . should_equal [0, 1, 2, 1, 0, 2] + + group_builder.specify "should add group number by unique values, with from and step" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + g0 = t.add_group_number (..Unique on=['x', 'y']) "g" from=10 step=3 + g0.at 'g' . to_vector . should_equal [10, 13, 16, 13, 10, 16] + + group_builder.specify "should add group number by equal counts, with from and step" <| + t = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]], ['z', [1, 5, 4, 2, 3]]] + + g0 = t.add_group_number (..Equal_Count 3) "g" from=10 step=3 + g0.at 'g' . to_vector . should_equal [10, 10, 13, 13, 16] + + group_builder.specify "must specify group_by with Unique" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + t.add_group_number ..Unique "g" . should_fail_with Missing_Argument + + group_builder.specify "must specify nonempty group_by with Unique" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + t.add_group_number (..Unique on=[]) "g" . should_fail_with Illegal_Argument + + group_builder.specify "must specify one or more groups with Equal_Count" <| + t = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]], ['z', [1, 5, 4, 2, 3]]] + t.add_group_number (..Equal_Count 0) "g" . should_fail_with Illegal_Argument + t.add_group_number (..Equal_Count -1) "g" . should_fail_with Illegal_Argument + + group_builder.specify "should report floating point equality warning when grouping on float columns" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1.0, 0.0, 2.0], [0.0, 1.0, 0.0], [1.0, 2.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 2.0, 1.0]] + g0 = t.add_group_number (..Unique on=['x', 'y']) "g" + Problems.expect_warning Floating_Point_Equality g0 + + group_builder.specify "should fail if columns provided in group_by do not exist" <| + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + t.add_group_number (..Unique on=['x', 'q']) . should_fail_with Missing_Input_Columns + + group_builder.specify "should fail if columns provided in order_by do not exist" <| + t = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]], ['z', [1, 5, 4, 2, 3]]] + + t.add_group_number (..Equal_Count 3 order_by=['q']) "g" . should_fail_with Missing_Input_Columns + + group_builder.specify "will fail if the row number exceeds Long range" <| + max_long = Java_Long.MAX_VALUE + + t = table_builder_from_rows ['x', 'y', 'z'] [[1, 0, 2], [0, 1, 0], [1, 2, 0], [0, 1, 1], [1, 0, 1], [1, 2, 1]] + + Problems.assume_no_problems <| t.add_group_number (..Unique on=['x', 'y']) "g" from=(max_long - 10) + + t2 = t.add_group_number (..Unique on=['x', 'y']) "g" from=(max_long - 1) + t2.should_fail_with Illegal_Argument + t2.catch.to_display_text . should_contain "The row number has exceeded the 64-bit integer range" + + group_builder.specify "should rename existing column upon a name clash, and attach a warning" <| + t = table_builder_from_rows ['x', 'y', 'z'] [['b', 'a', 'c'], ['a', 'b', 'a'], ['b', 'b', 'a'], ['a', 'b', 'b'], ['b', 'a', 'b'], ['b', 'b', 'b']] + + g0 = t.add_group_number (..Unique on=['x', 'y']) "y" + g0.at 'y' . to_vector . should_equal [0, 1, 2, 1, 0, 2] + g0.at 'y 1' . to_vector . should_equal ['a', 'b', 'b', 'b', 'a', 'b'] + Problems.expect_warning Duplicate_Output_Column_Names g0 diff --git a/test/Table_Tests/src/Common_Table_Operations/Main.enso b/test/Table_Tests/src/Common_Table_Operations/Main.enso index e963e1c051eb..c5aeabe281e9 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Main.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Main.enso @@ -4,6 +4,7 @@ import Standard.Database.Internal.Replace_Params.Replace_Params from Standard.Test import Test +import project.Common_Table_Operations.Add_Group_Number_Spec import project.Common_Table_Operations.Add_Row_Number_Spec import project.Common_Table_Operations.Aggregate_Spec import project.Common_Table_Operations.Coalesce_Spec @@ -141,6 +142,7 @@ add_specs suite_builder setup = Distinct_Spec.add_specs suite_builder setup Cross_Tab_Spec.add_specs suite_builder setup Transpose_Spec.add_specs suite_builder setup + Add_Group_Number_Spec.add_specs suite_builder setup Add_Row_Number_Spec.add_specs suite_builder setup Integration_Tests.add_specs suite_builder setup Temp_Column_Spec.add_specs suite_builder setup