From d97754da17bcadbcb1b60c05689e1f765f769982 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 May 2024 18:43:50 +0100 Subject: [PATCH] Error messages for `rename_columns` and `Vector.duplicates` (#9917) - Improve error message for `rename_columns`. - Add `length` to `Set` and `Map`. - Add `duplicates` to `Vector` (and `Array`). ![image](https://github.com/enso-org/enso/assets/4699705/623df253-52e8-4bdc-a69c-ac8dc3ca594e) --- CHANGELOG.md | 2 ++ .../Base/0.0.0-dev/src/Data/Array.enso | 20 ++++++++++++++++ .../Standard/Base/0.0.0-dev/src/Data/Map.enso | 9 +++++-- .../Standard/Base/0.0.0-dev/src/Data/Set.enso | 9 +++++-- .../Base/0.0.0-dev/src/Data/Vector.enso | 20 ++++++++++++++++ .../src/Internal/Array_Like_Helpers.enso | 7 ++++++ .../0.0.0-dev/src/Internal/Table_Helpers.enso | 16 +++++++++---- test/Base_Tests/src/Data/Vector_Spec.enso | 7 ++++++ .../Select_Columns_Spec.enso | 24 +++++++++++++++++++ 9 files changed, 106 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01929d963a5c..0dd97686f725 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -660,6 +660,7 @@ - [Added ability to write to Data Links.][9750] - [Added `Vector.build_multiple`, and better for support for errors and warnings inside `Vector.build` and `Vector.build_multiple`.][9766] +- [Added `Vector.duplicates`.][9917] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -968,6 +969,7 @@ [9577]: https://github.com/enso-org/enso/pull/9577 [9750]: https://github.com/enso-org/enso/pull/9750 [9766]: https://github.com/enso-org/enso/pull/9766 +[9917]: https://github.com/enso-org/enso/pull/9917 #### Enso Compiler diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso index e5fc3ef61893..bbbe872cb2fb 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso @@ -343,6 +343,26 @@ type Array distinct : (Any -> Any) -> Vector Any distinct self (on = x->x) = Array_Like_Helpers.distinct self on + ## ALIAS duplicates + GROUP Selections + ICON preparation + Returns only non-unique elements within the array. + + Arguments: + - on: A projection from the element type to the value of that element + which determines the uniqueness criteria. + + The returned duplicate elements are kept in the same order as the + first duplicate appeared in the input. + + > Example + Removing repeating entries. + + [1, 3, 1, 2, 2, 1].to_array . duplicates == [1, 2].to_array + duplicates : (Any -> Any) -> Vector Any + duplicates self (on = x->x) = + Array_Like_Helpers.duplicates self on + ## ICON dataframe_map_column Applies a function to each element of the array, returning the `Vector` of results. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso index 443be2346889..b9eb1f9c4c1b 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso @@ -118,12 +118,17 @@ type Map key value not_empty : Boolean not_empty self = self.is_empty.not - ## GROUP Metadata - ICON metadata + ## ICON metadata Returns the number of entries in this map. size : Integer size self = @Builtin_Method "Map.size" + ## GROUP Metadata + ICON metadata + Returns the number of entries in this map. + length : Integer + length self = self.size + ## GROUP Calculations ICON row_add Inserts a key-value mapping into this map, overriding any existing diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso index fce7e6e811a1..4065c043d45f 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Set.enso @@ -47,12 +47,17 @@ type Set to_vector : Vector to_vector self = self.underlying_map.keys - ## GROUP Metadata - ICON metadata + ## ICON metadata Returns the number of elements in this set. size : Integer size self = self.underlying_map.size + ## GROUP Metadata + ICON metadata + Returns the number of elements in this set. + length : Integer + length self = self.size + ## GROUP Logical ICON metadata Checks if the set is empty. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso index 9d321f68b486..17639b716b86 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso @@ -1243,6 +1243,26 @@ type Vector a distinct self (on = x->x) = Array_Like_Helpers.distinct self on + ## ALIAS duplicates + GROUP Selections + ICON preparation + Returns only non-unique elements within the vector. + + Arguments: + - on: A projection from the element type to the value of that element + which determines the uniqueness criteria. + + The returned duplicate elements are kept in the same order as the + first duplicate appeared in the input. + + > Example + Removing repeating entries. + + [1, 3, 1, 2, 2, 1] . duplicates == [1, 2] + duplicates : (Any -> Any) -> Vector Any + duplicates self (on = x->x) = + Array_Like_Helpers.duplicates self on + ## ICON convert Returns the vector as a `Vector`. to_vector : Vector diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Array_Like_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Array_Like_Helpers.enso index d3fe6980b5b6..f12786d07043 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Array_Like_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Array_Like_Helpers.enso @@ -164,6 +164,13 @@ distinct vector on = builder.append item existing.insert key True +duplicates vector on = Vector.build builder-> + vector.fold Map.empty current-> item-> + key = on item + count = current.get key 0 + if count == 1 then builder.append item + current.insert key count+1 + take vector range = case range of ## We are using a specialized implementation for `take Sample`, because the default implementation (which needs to be generic for any diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 8cc9eb7da42d..d002ffe5afc5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -311,10 +311,18 @@ rename_columns (naming_helper : Column_Naming_Helper) (internal_columns:Vector) is_vec_pairs = mapping.is_a Vector && mapping.length > 0 && (mapping.first.is_a Text . not) case is_vec_pairs of True -> - ## Attempt to treat as Map - map = Map.from_vector mapping - if map.is_error then Error.throw (Illegal_Argument.Error "A mapping Vector must be either a list of names or a list of pairs (old name to new name).") else - rename_columns naming_helper internal_columns map case_sensitivity error_on_missing_columns on_problems + ## Check all pairs (Integer | Text | Regex => Text ) + is_valid_row r = r.is_a Vector || r.is_a Pair + is_valid_key k = k.is_a Integer || k.is_a Text || k.is_a Regex + all_pairs = mapping.all p-> (is_valid_row p) && p.length == 2 && (is_valid_key p.first) && p.second.is_a Text + if all_pairs.not then Error.throw (Illegal_Argument.Error "mapping is not a Vector of old name to new name.") else + ## Attempt to treat as Map + map = Map.from_vector mapping error_on_duplicates=False + if map.length == mapping.length then rename_columns naming_helper internal_columns map case_sensitivity error_on_missing_columns on_problems else + duplicates = mapping.duplicates on=_.first . map p->p.first.to_text + duplicate_text = if duplicates.length < 5 then duplicates.to_vector . join ", " else + duplicates.take 3 . to_vector . join ", " + (", ... " + (duplicates.length - 3).to_text + " others") + Error.throw (Illegal_Argument.Error "duplicate old name mappings ("+duplicate_text+").") False -> unique = naming_helper.create_unique_name_strategy problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns diff --git a/test/Base_Tests/src/Data/Vector_Spec.enso b/test/Base_Tests/src/Data/Vector_Spec.enso index 1d8ecfa8809a..7840efde158c 100644 --- a/test/Base_Tests/src/Data/Vector_Spec.enso +++ b/test/Base_Tests/src/Data/Vector_Spec.enso @@ -857,6 +857,13 @@ type_spec suite_builder name alter = suite_builder.group name group_builder-> alter [1, 1.0, 2, 2.0] . distinct . should_equal [1, 2] alter [] . distinct . should_equal [] + group_builder.specify "should return a vector containing only duplicate elements" <| + alter [1, 3, 1, 2, 2, 1] . duplicates . should_equal [1, 2] + alter ["a", "a", "a"] . duplicates . should_equal ["a"] + alter ['ś', 's', 's\u0301'] . duplicates . should_equal ['s\u0301'] + alter [1, 1.0, 2, 2.0] . duplicates . should_equal [1.0, 2.0] + alter [] . duplicates . should_equal [] + group_builder.specify "should be able to handle distinct on different primitive values" <| alter [1, "a"] . distinct . should_equal [1, "a"] alter ["a", 1] . distinct . should_equal ["a", 1] diff --git a/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso index 200b0506e269..29bc11657ff3 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso @@ -1,4 +1,5 @@ from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument from Standard.Table import Position, Value_Type, Bits from Standard.Table.Errors import all @@ -503,6 +504,29 @@ add_specs suite_builder setup = expect_column_names ["lpha", "beta", "gamma", "delta"] <| data.table.rename_columns map + group_builder.specify "should report invalid input map nicely" <| + test_invalid_map map = + result = data.table.rename_columns map + result.should_fail_with Illegal_Argument + result.catch Any . message . should_equal "mapping is not a Vector of old name to new name." + + test_invalid_map [["Alpha"]] + test_invalid_map [["Alpha", 1]] + test_invalid_map [["Alpha", "Beta", "Delta"]] + test_invalid_map [[True, "Beta"]] + + group_builder.specify "should report duplicates in input map nicely" <| + test_duplicate_names map message = + result = data.table.rename_columns map + result.should_fail_with Illegal_Argument + result.catch Any . message . should_equal message + + test_duplicate_names [["Alpha", "1"], ["Alpha", "2"]] "duplicate old name mappings (Alpha)." + test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"]] "duplicate old name mappings (Beta, Alpha)." + test_duplicate_names [["Alpha", "1"], ["Alpha", "2"], ["Alpha", "3"]] "duplicate old name mappings (Alpha)." + test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"], ["Gamma","6"], ["Delta","7"], ["Delta","8"]] "duplicate old name mappings (Beta, Alpha, Gamma, Delta)." + test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"], ["Gamma","6"], ["Delta","7"], ["Delta","8"], ["Echo","9"], ["Echo","10"]] "duplicate old name mappings (Beta, Alpha, Gamma, ... 2 others)." + group_builder.specify "should correctly handle problems: unmatched names" <| weird_name = '.*?-!@#!"' map = Map.from_vector [["alpha", "FirstColumn"], ["omicron", "Another"], [weird_name, "Fixed"]]