diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso index 87109c555aee..e5551ad5fc6a 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso @@ -156,7 +156,8 @@ type DB_Column example_at = Examples.integer_column.get 0 -1 get : Integer -> Any -> Any | Nothing get self index:Integer=0 (~default=Nothing) = - self.read (..First index+1) . get index default + if index < 0 then Error.throw (Unsupported_Database_Operation.Error "Reading backwards from end is not supported in-database. Use `read` to materialize the column.") else + self.read (..First index+1) . get index default ## GROUP Standard.Base.Metadata ICON metadata @@ -1806,14 +1807,15 @@ type DB_Column ## GROUP Standard.Base.Conversions ICON convert + ALIAS auto_value_type Change the value type of the column to a more specific one, based on its contents. This operation is currently not available in the Database backend. - auto_value_type : Boolean -> DB_Column - auto_value_type self shrink_types=False = + auto_cast : Boolean -> DB_Column + auto_cast self shrink_types=False = _ = shrink_types - Error.throw <| Unsupported_Database_Operation.Error "`DB_Column.auto_value_type` is not supported in the Database backends." + Error.throw <| Unsupported_Database_Operation.Error "`DB_Column.auto_cast` is not supported in the Database backends." ## PRIVATE Shares the core CAST logic between `cast` and `parse`. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso index 21ee6fd9855d..3d4ac500768a 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso @@ -172,6 +172,20 @@ type DB_Table col = self.get selector if_missing=Nothing if Nothing == col then if_missing else col.get index if_missing + ## ALIAS first cell + GROUP Standard.Base.Selections + ICON local_scope4 + Gets the top left value from the table. + first_value : Any ! Index_Out_Of_Bounds + first_value self = self.at 0 . at 0 + + ## ALIAS last cell + GROUP Standard.Base.Selections + ICON local_scope4 + Gets the bottom right value from the table. + last_value : Any ! Index_Out_Of_Bounds + last_value self = self.last_row . at -1 + ## ALIAS first field GROUP Standard.Base.Selections ICON select_column @@ -179,13 +193,6 @@ type DB_Table first_column : DB_Column ! Index_Out_Of_Bounds first_column self = self.at 0 - ## ALIAS second field - GROUP Standard.Base.Selections - ICON select_column - Gets the second column - second_column : DB_Column ! Index_Out_Of_Bounds - second_column self = self.at 1 - ## ALIAS last field GROUP Standard.Base.Selections ICON select_column @@ -1164,13 +1171,6 @@ type DB_Table first_row self = self.read (..First 1) . rows . first - ## GROUP Standard.Base.Selections - ICON select_row - Returns the second row of the table. - second_row : Row ! Index_Out_Of_Bounds - second_row self = - self.read (..First 2) . rows . second - ## GROUP Standard.Base.Selections ICON select_row Returns the last row of the table. @@ -2556,8 +2556,8 @@ type DB_Table This operation is currently not available in the Database backend. @columns (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True) - auto_value_types : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> DB_Table - auto_value_types self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning = + auto_cast : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> DB_Table + auto_cast self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning = _ = [columns, shrink_types, error_on_missing_columns, on_problems] Error.throw (Unsupported_Database_Operation.Error "DB_Table.auto_value_types is not supported in the Database backends.") @@ -2938,13 +2938,14 @@ type DB_Table Applies the specified cleansings to the text in each row of the specified columns Arguments: + - from: The column(s) to cleanse. - remove: A vector of the text cleanings to remove from the text. The text cleansings are applied in the order they are provided. The same text cleansing can be used multiple times. The text cleansings are: - - ..Leading_Whitespace: Removes all whitspace from the start of the string. - - ..Trailing_Whitespace: Removes all whitspace from the end of the string. - - ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block. - - ..All_Whitespace: Removes all whitspace from the string. + - ..Leading_Whitespace: Removes all whitespace from the start of the string. + - ..Trailing_Whitespace: Removes all whitespace from the end of the string. + - ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block. + - ..All_Whitespace: Removes all whitespace from the string. - ..Leading_Numbers: Removes all numbers from the start of the string. - ..Trailing_Numbers: Removes all numbers from the end of the string. - ..Non_ASCII: Removes all non-ascii characters from the string. @@ -2953,16 +2954,15 @@ type DB_Table - ..Numbers: Removes all numbers characters from the string. - ..Punctuation: Removes all characters in the set ,.!?():;'" from the string. - ..Symbols: Removes anything that isn't letters, numbers or whitespace from the string. - - from: The column(s) to cleanse. > Example Remove leading and trailing spaces from cells. - table.text_cleanse [..Leading_Whitespace, ..Trailing_Whitespace] - @remove make_data_cleanse_vector_selector + table.text_cleanse ["Input"] [..Leading_Whitespace, ..Trailing_Whitespace] @from (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True) - text_cleanse : Vector Named_Pattern -> Vector (Integer | Text | Regex | By_Type) -> DB_Table - text_cleanse self remove from:(Vector (Integer | Text | Regex | By_Type)) = + @remove make_data_cleanse_vector_selector + text_cleanse : Vector (Integer | Text | Regex | By_Type) -> Vector Named_Pattern -> DB_Table + text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove = transformer col = col.text_cleanse remove Table_Helpers.replace_columns_with_transformed_columns self from transformer diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso index 875089cf8c86..3d9a5ca8a636 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso @@ -6,6 +6,7 @@ import Standard.Base.Errors.Common.Arithmetic_Error import Standard.Base.Errors.Common.Incomparable_Values import Standard.Base.Errors.Common.Index_Out_Of_Bounds import Standard.Base.Errors.Common.No_Such_Method +import Standard.Base.Errors.Deprecated.Deprecated import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Internal.Polyglot_Helpers @@ -1962,6 +1963,7 @@ type Column ## GROUP Standard.Base.Conversions ICON convert + ALIAS auto_value_type Change the value type of the column to a more specific one, based on its contents. @@ -1990,8 +1992,8 @@ type Column elements are no longer than 255 characters, the column will get a max length of 255. Otherwise, the column size limit will stay unchanged. - auto_value_type : Boolean -> Column - auto_value_type self shrink_types=False = + auto_cast : Boolean -> Column + auto_cast self shrink_types=False = new_value_type = case shrink_types of False -> self.inferred_precise_value_type True -> @@ -1999,6 +2001,45 @@ type Column # We run with Report_Error because we do not expect any problems. self.cast new_value_type on_problems=Problem_Behavior.Report_Error + ## PRIVATE + GROUP Standard.Base.Conversions + ICON convert + + Deprecated: Use `auto_cast` instead. + + Change the value type of the column to a more specific one, based on its + contents. + + Arguments: + - shrink_types: If set `True`, smaller types will be chosen if possible, + according to the rules below. Defaults to `False`. + + ? Auto Type Selection Rules + + - If a `Mixed` column can be assigned a single type, like `Char` or + `Integer`, that will be used. + - Text columns are not parsed. To do that, use the `parse` method. + - If a `Float` column contains only integers, it will be converted to + an Integer column. + - If a `Decimal` column contains only integers that could fit in a + 64-bit integer storage, it will be converted to an Integer column. + - If `shrink_types` is `False` (default), no other transformations are + applied. + - However, if `shrink_types` is set to `True`, then: + - Integer columns will be assigned the smallest size that can fit all + values (down to 16-bit integers; converting to the `Byte` type has + to be done manually through `cast`). + - If all elements in a text column have the same length, the type + will become fixed length. + - Otherwise, if a text column is variable length, but all text + elements are no longer than 255 characters, the column will get a + max length of 255. Otherwise, the column size limit will stay + unchanged. + auto_value_type : Boolean -> Column + auto_value_type self shrink_types=False = + Warning.attach (Deprecated.Warning "Standard.Table.Column.Column" "auto_value_type" "Deprecated: `auto_value_type` has been replaced by `auto_cast`.") <| + self.auto_cast shrink_types + ## ALIAS transform column ICON column_add @@ -2191,11 +2232,12 @@ type Column @index (self-> Numeric_Input minimum=0 maximum=self.length-1) get : Integer -> Any -> Any | Nothing get self index:Integer=0 (~default=Nothing) = - valid_index = (index >= 0) && (index < self.length) - if valid_index.not then default else - storage = self.java_column.getStorage - if storage.isNothing index then Nothing else - java_to_enso <| storage.getItem index + if index < 0 && index >= -self.length then self.get (self.length + index) default else + valid_index = (index >= 0) && (index < self.length) + if valid_index.not then default else + storage = self.java_column.getStorage + if storage.isNothing index then Nothing else + java_to_enso <| storage.getItem index ## ICON data_input Returns a column containing rows of this column. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 9d78bbceefd7..add1526a58fc 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -296,7 +296,7 @@ read_name_map_from_table column_map:Table = case column_map.column_count of if col.value_type.is_text then col.to_vector else Error.throw (Illegal_Argument.Error "Expected a table with one or two columns of text values.") 2 -> - if column_map.first_column.value_type.is_text && column_map.second_column.value_type.is_text then Map.from_vector column_map.rows else + if column_map.first_column.value_type.is_text && (column_map.at 1).value_type.is_text then Map.from_vector column_map.rows else Error.throw (Illegal_Argument.Error "Expected a table with one or two columns of text values.") _ -> Error.throw (Illegal_Argument.Error "Expected a table with one or two columns of text values.") diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso index 292b3375017c..7f7aa2bff7c7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso @@ -304,6 +304,20 @@ type Table col = self.get selector if_missing=Nothing if Nothing == col then if_missing else col.get index if_missing + ## ALIAS first cell + GROUP Standard.Base.Selections + ICON local_scope4 + Gets the top left value from the table. + first_value : Any ! Index_Out_Of_Bounds + first_value self = self.at 0 . at 0 + + ## ALIAS last cell + GROUP Standard.Base.Selections + ICON local_scope4 + Gets the bottom right value from the table. + last_value : Any ! Index_Out_Of_Bounds + last_value self = self.at -1 . at -1 + ## ALIAS first field GROUP Standard.Base.Selections ICON select_column @@ -311,13 +325,6 @@ type Table first_column : Column ! Index_Out_Of_Bounds first_column self = self.at 0 - ## ALIAS second field - GROUP Standard.Base.Selections - ICON select_column - Gets the second column - second_column : Column ! Index_Out_Of_Bounds - second_column self = self.at 1 - ## ALIAS last field GROUP Standard.Base.Selections ICON select_column @@ -1277,6 +1284,7 @@ type Table ## GROUP Standard.Base.Conversions ICON convert + ALIAS auto_value_types Change the value type of table columns to a more specific one, based on their contents. @@ -1315,13 +1323,61 @@ type Table max length of 255. Otherwise, the column size limit will stay unchanged. @columns (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True) - auto_value_types : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> Table - auto_value_types self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning = + auto_cast : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> Table + auto_cast self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning = selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False selected.fold self table-> column_to_cast-> - new_column = column_to_cast.auto_value_type shrink_types + new_column = column_to_cast.auto_cast shrink_types table.set new_column as=column_to_cast.name set_mode=Set_Mode.Update + ## GROUP Standard.Base.Conversions + ICON convert + + Deprecated: Use `auto_cast` instead. + + Change the value type of table columns to a more specific one, based on + their contents. + + This is most useful for `Mixed` type columns and will allow to narrow + down the type if all values in the column fit a more specific type. + + Arguments: + - columns: The selection of columns to convert. + - shrink_types: If set `True`, smaller types will be chosen if possible, + according to the rules below. Defaults to `False`. + - error_on_missing_columns: Specifies if a missing input column should + result in an error regardless of the `on_problems` settings. Defaults + to `True`. + - on_problems: Specifies how to handle problems if they occur, reporting + them as warnings by default. + + ? Auto Type Selection Rules + + - If a `Mixed` column can be assigned a single type, like `Char` or + `Integer`, that will be used. + - Text columns are not parsed. To do that, use the `parse` method. + - If a `Float` column contains only integers, it will be converted to + an Integer column. + - If a `Decimal` column contains only integers that could fit in a + 64-bit integer storage, it will be converted to an Integer column. + - If `shrink_types` is `False` (default), no other transformations are + applied. + - However, if `shrink_types` is set to `True`, then: + - Integer columns will be assigned the smallest size that can fit all + values (down to 16-bit integers; converting to the `Byte` type has + to be done manually through `cast`). + - If all elements in a text column have the same length, the type + will become fixed length. + - Otherwise, if a text column is variable length, but all text + elements are no longer than 255 characters, the column will get a + max length of 255. Otherwise, the column size limit will stay + unchanged. + @columns (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True) + auto_value_types : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> Table + auto_value_types self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning = + Warning.attach (Deprecated.Warning "Standard.Table.Table.Table" "auto_value_types" "Deprecated: `auto_value_types` has been replaced by `auto_cast`.") <| + self.auto_cast columns shrink_types error_on_missing_columns on_problems + ## GROUP Standard.Base.Conversions ICON split Splits a column of text into a set of new columns. @@ -1913,16 +1969,6 @@ type Table if self.row_count == 0 then Error.throw (Index_Out_Of_Bounds.Error 0 0) else Row.Value self 0 - ## GROUP Standard.Base.Selections - ICON select_row - Returns the second row of the table. - - In the database backend, it first materializes the table to in-memory. - second_row : Row ! Index_Out_Of_Bounds - second_row self = - if self.row_count < 2 then Error.throw (Index_Out_Of_Bounds.Error 1 self.row_count) else - Row.Value self 1 - ## GROUP Standard.Base.Selections ICON select_row Returns the last row of the table. @@ -2919,16 +2965,17 @@ type Table ## GROUP Standard.Base.Text ICON column_add - Applies the specified cleansings to the text in each row of the specified columns. + Applies the specified cleansings to the text in each row of the specified columns Arguments: + - from: The column(s) to cleanse. - remove: A vector of the text cleanings to remove from the text. The text cleansings are applied in the order they are provided. The same text cleansing can be used multiple times. The text cleansings are: - - ..Leading_Whitespace: Removes all whitspace from the start of the string. - - ..Trailing_Whitespace: Removes all whitspace from the end of the string. - - ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block. - - ..All_Whitespace: Removes all whitspace from the string. + - ..Leading_Whitespace: Removes all whitespace from the start of the string. + - ..Trailing_Whitespace: Removes all whitespace from the end of the string. + - ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block. + - ..All_Whitespace: Removes all whitespace from the string. - ..Leading_Numbers: Removes all numbers from the start of the string. - ..Trailing_Numbers: Removes all numbers from the end of the string. - ..Non_ASCII: Removes all non-ascii characters from the string. @@ -2937,16 +2984,15 @@ type Table - ..Numbers: Removes all numbers characters from the string. - ..Punctuation: Removes all characters in the set ,.!?():;'" from the string. - ..Symbols: Removes anything that isn't letters, numbers or whitespace from the string. - - from: The column(s) to cleanse. > Example Remove leading and trailing spaces from cells. - table.text_cleanse [..Leading_Whitespace, ..Trailing_Whitespace] - @remove make_data_cleanse_vector_selector + table.text_cleanse ["Input"] [..Leading_Whitespace, ..Trailing_Whitespace] @from (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True) - text_cleanse : Vector Named_Pattern -> Vector (Integer | Text | Regex | By_Type) -> Table - text_cleanse self remove from:(Vector (Integer | Text | Regex | By_Type)) = + @remove make_data_cleanse_vector_selector + text_cleanse : Vector (Integer | Text | Regex | By_Type) -> Vector Named_Pattern -> Table + text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove = transformer col = col.text_cleanse remove Table_Helpers.replace_columns_with_transformed_columns self from transformer diff --git a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso index 1a28f9c191dc..3aa38b13fecd 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso @@ -677,8 +677,8 @@ add_specs suite_builder setup = group_builder.specify "should report unsupported" <| t = table_builder [["X", [1, 2, 3]]] - t.auto_value_types . should_fail_with Unsupported_Database_Operation - t.at "X" . auto_value_type . should_fail_with Unsupported_Database_Operation + t.auto_cast . should_fail_with Unsupported_Database_Operation + t.at "X" . auto_cast . should_fail_with Unsupported_Database_Operation # The in-memory functionality of `expand_column` is tested in test/Table_Tests/src/In_Memory/Table_Conversion_Spec.enso if setup.is_database then suite_builder.group prefix+"Table.expand_column" group_builder-> @@ -733,7 +733,7 @@ add_specs suite_builder setup = t1.at "mixed_time" . value_type . should_equal Value_Type.Mixed t1.at "bools" . value_type . should_equal Value_Type.Mixed - t2 = t1.auto_value_types shrink_types=shrink_types + t2 = t1.auto_cast shrink_types=shrink_types # Depending on shrink_types value the size of the Char/Integer types may vary - exact details tested elsewhere. t2.at "strs" . value_type . should_be_a (Value_Type.Char ...) t2.at "ints" . value_type . should_be_a (Value_Type.Integer ...) @@ -750,18 +750,18 @@ add_specs suite_builder setup = t0 = table_builder [["strs", [mixer, "a", "b"]], ["ints", [mixer, 2, 3]], ["floats", [mixer, 1.5, 2.5]]] t1 = t0.drop 1 - t2 = t1.auto_value_types [] + t2 = t1.auto_cast [] t2.at "strs" . value_type . should_equal Value_Type.Mixed t2.at "ints" . value_type . should_equal Value_Type.Mixed t2.at "floats" . value_type . should_equal Value_Type.Mixed - t3 = t1.auto_value_types ["strs"] + t3 = t1.auto_cast ["strs"] t3.at "strs" . value_type . should_equal Value_Type.Char t3.at "ints" . value_type . should_equal Value_Type.Mixed t3.at "floats" . value_type . should_equal Value_Type.Mixed # should match ints and floats but not strs - t4 = t1.auto_value_types "[if].*".to_regex + t4 = t1.auto_cast "[if].*".to_regex t4.at "strs" . value_type . should_equal Value_Type.Mixed t4.at "ints" . value_type . should_equal Value_Type.Integer t4.at "floats" . value_type . should_equal Value_Type.Float @@ -771,7 +771,7 @@ add_specs suite_builder setup = t0 = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", [mixer, 2.5, 3.0]]] t1 = t0.drop 1 - t2 = t1.auto_value_types [..By_Type ..Float] + t2 = t1.auto_cast [..By_Type ..Float] t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t2.at "Y" . value_type . should_equal Value_Type.Mixed @@ -781,7 +781,7 @@ add_specs suite_builder setup = t1.at "Y" . value_type . should_equal Value_Type.Float t1.at "Z" . value_type . should_equal Value_Type.Float - t2 = t1.auto_value_types shrink_types=False + t2 = t1.auto_cast shrink_types=False t2.at "X" . to_vector . should_equal [1, 2, 3] t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t2.at "Y" . value_type . should_equal Value_Type.Float @@ -795,7 +795,7 @@ add_specs suite_builder setup = group_builder.specify "will not parse text columns" <| t1 = table_builder [["X", ["1", "2", "3"]]] - c2 = t1.at "X" . auto_value_type + c2 = t1.at "X" . auto_cast c2.value_type . should_equal Value_Type.Char group_builder.specify "will 'undo' a cast to Mixed" <| @@ -804,7 +804,7 @@ add_specs suite_builder setup = t2.at "X" . value_type . should_equal Value_Type.Mixed t2.at "Y" . value_type . should_equal Value_Type.Mixed - t3 = t2.auto_value_types + t3 = t2.auto_cast t3.at "X" . value_type . should_equal Value_Type.Integer t3.at "Y" . value_type . should_equal Value_Type.Char @@ -813,7 +813,7 @@ add_specs suite_builder setup = c1 = c0.drop 1 c1.value_type . should_equal Value_Type.Mixed - c2 = c1.auto_value_type + c2 = c1.auto_cast c2.value_type . should_be_a (Value_Type.Decimal ...) c2.to_vector . should_equal [1, 2, (2^100)+1] @@ -831,13 +831,13 @@ add_specs suite_builder setup = True -> t1.at "F" . value_type . should_equal Value_Type.Mixed False -> t1.at "F" . value_type . should_equal Value_Type.Float - t2 = t1.auto_value_types shrink_types=False + t2 = t1.auto_cast shrink_types=False t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t2.at "Y" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t2.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) t2.at "F" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) - t3 = t1.auto_value_types shrink_types=True + t3 = t1.auto_cast shrink_types=True # Even though X's values are small enough to fit in a Byte, we stick to 16-bit Integers. t3.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_16) t3.at "Y" . value_type . should_equal (Value_Type.Integer Bits.Bits_32) @@ -850,7 +850,7 @@ add_specs suite_builder setup = c1.value_type . should_equal Value_Type.Byte [True, False].each shrink_types-> - c2 = c1.auto_value_type shrink_types=shrink_types + c2 = c1.auto_cast shrink_types=shrink_types c2.value_type . should_equal Value_Type.Byte group_builder.specify "Decimal (scale=0, i.e. integer) columns should also be shrinked if possible and shrink_types=True" <| @@ -861,7 +861,7 @@ add_specs suite_builder setup = t1.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0) t1.at "Z" . value_type . should_equal (Value_Type.Decimal scale=0) - t2 = t1.auto_value_types shrink_types=False + t2 = t1.auto_cast shrink_types=False # Without shrinking we get an integer type, but not the smallest one - just the default 64-bit. t2.at "X" . to_vector . should_equal [1, 2, 3] @@ -869,7 +869,7 @@ add_specs suite_builder setup = t2.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0) t2.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64) - t3 = t1.auto_value_types shrink_types=True + t3 = t1.auto_cast shrink_types=True t3.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_16) t3.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0) t3.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_16) @@ -885,20 +885,20 @@ add_specs suite_builder setup = True -> c1.value_type . should_equal Value_Type.Mixed False -> c1.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True) - c2 = c1.auto_value_type shrink_types=False + c2 = c1.auto_cast shrink_types=False c2.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True) - c3 = c1.auto_value_type shrink_types=True + c3 = c1.auto_cast shrink_types=True c3.value_type . should_equal (Value_Type.Char size=2 variable_length=False) c4 = table_builder [["X", ["a", "x", "y"]]] . at "X" . cast (Value_Type.Char size=100 variable_length=True) c4.to_vector . should_equal ["a", "x", "y"] c4.value_type . should_equal (Value_Type.Char size=100 variable_length=True) - c5 = c4.auto_value_type shrink_types=False + c5 = c4.auto_cast shrink_types=False c5.value_type . should_equal (Value_Type.Char size=100 variable_length=True) - c6 = c4.auto_value_type shrink_types=True + c6 = c4.auto_cast shrink_types=True c6.value_type . should_equal (Value_Type.Char size=1 variable_length=False) group_builder.specify "if all text values are empty string, the type will remain unchanged" <| @@ -909,10 +909,10 @@ add_specs suite_builder setup = c2.value_type . should_equal (Value_Type.Char size=100 variable_length=True) [True, False].each shrink_types-> - c1_b = c1.auto_value_type shrink_types=shrink_types + c1_b = c1.auto_cast shrink_types=shrink_types c1_b.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True) - c2_b = c2.auto_value_type shrink_types=shrink_types + c2_b = c2.auto_cast shrink_types=shrink_types c2_b.value_type . should_equal (Value_Type.Char size=100 variable_length=True) group_builder.specify "if all text values fit under 255 characters, will add a 255 length limit (if shrink_types=True)" <| @@ -921,7 +921,7 @@ add_specs suite_builder setup = t2 = t1 . set (t1.at "short_unbounded" . cast (Value_Type.Char size=1000)) "short_1000" . set (t1.at "short_unbounded" . cast (Value_Type.Char size=10)) "short_10" . set (t1.at "long_unbounded" . cast (Value_Type.Char size=400)) "long_400" . set (t1.at "short_unbounded" . cast Value_Type.Mixed) "short_mixed" t2.at "short_mixed" . value_type . should_equal Value_Type.Mixed - t3 = t2.auto_value_types shrink_types=False + t3 = t2.auto_cast shrink_types=False t3.at "short_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True) t3.at "short_1000" . value_type . should_equal (Value_Type.Char size=1000 variable_length=True) t3.at "short_10" . value_type . should_equal (Value_Type.Char size=10 variable_length=True) @@ -930,7 +930,7 @@ add_specs suite_builder setup = t3.at "long_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True) t3.at "long_400" . value_type . should_equal (Value_Type.Char size=400 variable_length=True) - t4 = t2.auto_value_types shrink_types=True + t4 = t2.auto_cast shrink_types=True # Short ones get shortened to 255 unless they were shorter already. t4.at "short_unbounded" . value_type . should_equal (Value_Type.Char size=255 variable_length=True) t4.at "short_1000" . value_type . should_equal (Value_Type.Char size=255 variable_length=True) @@ -950,7 +950,7 @@ add_specs suite_builder setup = t1.at "str" . value_type . should_equal Value_Type.Char t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0) - t2 = t1.auto_value_types shrink_types=False + t2 = t1.auto_cast shrink_types=False t2.at "mix" . value_type . should_equal Value_Type.Mixed t2.at "int" . value_type . should_equal Value_Type.Integer ## Technically, if there are no elements, "all of elements" are @@ -961,7 +961,7 @@ add_specs suite_builder setup = t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0) t2.at "str" . value_type . should_equal Value_Type.Char - t3 = t1.auto_value_types shrink_types=True + t3 = t1.auto_cast shrink_types=True t3.at "mix" . value_type . should_equal Value_Type.Mixed # Technically, if there are no elements, then they can be fit inside of the smallest types available: t3.at "int" . value_type . should_equal (Value_Type.Integer Bits.Bits_16) diff --git a/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso index ea81dccec30c..eb99f8077d06 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso @@ -105,10 +105,6 @@ add_specs suite_builder setup = column_4.name . should_equal "foo" column_4.to_vector . should_equal [1, 2, 3] - column_5 = data.table.second_column - column_5.name . should_equal "bar" - column_5.to_vector . should_equal [4, 5, 6] - column_6 = data.table.last_column column_6.name . should_equal "abcd123" column_6.to_vector . should_equal [19, 20, 21] @@ -312,13 +308,6 @@ add_specs suite_builder setup = first_row.at "Y" . should_equal 5 first_row.at "Z" . should_equal "A" - group_builder.specify "should let you get the second row" <| - second_row = data.table.second_row - second_row . length . should_equal 3 - second_row.at "X" . should_equal 2 - second_row.at "Y" . should_equal 6 - second_row.at "Z" . should_equal "B" - group_builder.specify "should let you get the last row" <| last_row = data.table.last_row last_row . length . should_equal 3 diff --git a/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso index d26fd0f80cb9..80f7b9677e7f 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso @@ -46,7 +46,7 @@ add_specs suite_builder setup = clean_flight = ["Flight", ["BA0123", "BA0123 ", "SG0456 ", "BA 0123", "S G 0 4 5 6 "]] clean_passenger = ["Passenger", ["Albert Einstein", "Marie Curie ", "Isaac Newton ", "Stephen Hawking", "A d a Lovelace "]] expected_table = Table.new [clean_flight, clean_passenger, ticket_price] - res = table.text_cleanse [Named_Pattern.Leading_Whitespace] ["Flight", "Passenger"] + res = table.text_cleanse ["Flight", "Passenger"] [..Leading_Whitespace] case res.is_error && setup.is_database of True -> res.should_fail_with Unsupported_Database_Operation @@ -58,7 +58,7 @@ add_specs suite_builder setup = clean_flight = ["Flight", ["BA0123", "BA0123 ", "SG0456 ", "BA 0123", "S G 0 4 5 6 "]] clean_passenger = ["Passenger", ["Albert Einstein", "Marie Curie ", "Isaac Newton ", "Stephen Hawking", "A d a Lovelace "]] expected_table = Table.new [clean_flight, clean_passenger, ticket_price] - res = table.text_cleanse [Named_Pattern.Leading_Whitespace] [(regex "Fl.*"), (regex "P.*")] + res = table.text_cleanse [(regex "Fl.*"), (regex "P.*")] [..Leading_Whitespace] case res.is_error && setup.is_database of True -> res.should_fail_with Unsupported_Database_Operation @@ -70,7 +70,7 @@ add_specs suite_builder setup = clean_flight = ["Flight", ["BA0123", "BA0123 ", "SG0456 ", "BA 0123", "S G 0 4 5 6 "]] clean_passenger = ["Passenger", ["Albert Einstein", "Marie Curie ", "Isaac Newton ", "Stephen Hawking", "A d a Lovelace "]] expected_table = Table.new [clean_flight, clean_passenger, ticket_price] - res = table.text_cleanse [Named_Pattern.Leading_Whitespace] [..By_Type ..Char] + res = table.text_cleanse [..By_Type ..Char] [..Leading_Whitespace] case res.is_error && setup.is_database of True -> res.should_fail_with Unsupported_Database_Operation @@ -79,72 +79,72 @@ add_specs suite_builder setup = r.length . should_equal 5 r.should_equal (expected_table . rows . map .to_vector) group_builder.specify "should error if applied to non-text column" <| - table.text_cleanse [Named_Pattern.Leading_Whitespace] ["Ticket Price"] . should_fail_with Invalid_Value_Type + table.text_cleanse ["Ticket Price"] [..Leading_Whitespace] . should_fail_with Invalid_Value_Type suite_builder.group "Column Text Cleanse" group_builder-> test_col = Column.from_vector "Test" [" It was", "the best ", "of times", " it was the worst of times "] group_builder.specify "should remove leading whitespace" <| expected_col = Column.from_vector "Test" ["It was", "the best ", "of times", "it was the worst of times "] - res = test_col.text_cleanse [Named_Pattern.Leading_Whitespace] + res = test_col.text_cleanse [..Leading_Whitespace] res.should_equal expected_col group_builder.specify "should remove trailing whitespace" <| expected_col = Column.from_vector "Test" [" It was", "the best", "of times", " it was the worst of times"] - res = test_col.text_cleanse [Named_Pattern.Trailing_Whitespace] + res = test_col.text_cleanse [..Trailing_Whitespace] res.should_equal expected_col group_builder.specify "should remove duplicate whitespace" <| expected_col = Column.from_vector "Test" [" It was", "the best ", "of times", " it was the worst of times "] - res = test_col.text_cleanse [Named_Pattern.Duplicate_Whitespace] + res = test_col.text_cleanse [..Duplicate_Whitespace] res.should_equal expected_col group_builder.specify "should remove leading and trailing whitespace" <| expected_col = Column.from_vector "Test" ["It was", "the best", "of times", "it was the worst of times"] - res = test_col.text_cleanse [Named_Pattern.Leading_Whitespace, Named_Pattern.Trailing_Whitespace] + res = test_col.text_cleanse [..Leading_Whitespace, ..Trailing_Whitespace] res.should_equal expected_col group_builder.specify "should remove all whitespace" <| expected_col = Column.from_vector "Test" ["Itwas", "thebest", "oftimes", "itwastheworstoftimes"] - res = test_col.text_cleanse [Named_Pattern.All_Whitespace] + res = test_col.text_cleanse [..All_Whitespace] res.should_equal expected_col test_col_with_nums = Column.from_vector "Test" ["1It was", "the best2", "3of times4", " 1984 it was the worst of times 72"] group_builder.specify "should remove leading numbers" <| expected_col = Column.from_vector "Test" ["It was", "the best2", "of times4", " 1984 it was the worst of times 72"] - res = test_col_with_nums.text_cleanse [Named_Pattern.Leading_Numbers] + res = test_col_with_nums.text_cleanse [..Leading_Numbers] res.should_equal expected_col group_builder.specify "should remove trailing numbers" <| expected_col = Column.from_vector "Test" ["1It was", "the best", "3of times", " 1984 it was the worst of times "] - res = test_col_with_nums.text_cleanse [Named_Pattern.Trailing_Numbers] + res = test_col_with_nums.text_cleanse [..Trailing_Numbers] res.should_equal expected_col test_col_with_non_ascii_chars = Column.from_vector "Test" [" It was the 🥇", "of 🕒s", " it was the 𒀂 of 🕒s "] group_builder.specify "should remove non-ascii characters" <| expected_col = Column.from_vector "Test" [" It was the ", "of s", " it was the of s "] - res = test_col_with_non_ascii_chars.text_cleanse [Named_Pattern.Non_ASCII] + res = test_col_with_non_ascii_chars.text_cleanse [..Non_ASCII] res.should_equal expected_col group_builder.specify "should remove tabs" <| test_col_with_tabs = Column.from_vector "Test" [' It was\t the best', 'of times it was the worst\t of times '] expected_col = Column.from_vector "Test" [" It was the best", "of times it was the worst of times "] - res = test_col_with_tabs.text_cleanse [Named_Pattern.Tabs] + res = test_col_with_tabs.text_cleanse [..Tabs] res.should_equal expected_col group_builder.specify "should remove numbers and letters" <| test_col_with_nums_and_letters = Column.from_vector "Test" ["1A2B3C4", "5D6E7F8", "9G0H1I2", "3J4K5L6"] - res1 = test_col_with_nums_and_letters.text_cleanse [Named_Pattern.Numbers] - res2 = test_col_with_nums_and_letters.text_cleanse [Named_Pattern.Letters] - res3 = test_col_with_nums_and_letters.text_cleanse [Named_Pattern.Letters, Named_Pattern.Numbers] + res1 = test_col_with_nums_and_letters.text_cleanse [..Numbers] + res2 = test_col_with_nums_and_letters.text_cleanse [..Letters] + res3 = test_col_with_nums_and_letters.text_cleanse [..Letters, ..Numbers] res1.should_equal (Column.from_vector "Test" ["ABC", "DEF", "GHI", "JKL"]) res2.should_equal (Column.from_vector "Test" ["1234", "5678", "9012", "3456"]) res3.should_equal (Column.from_vector "Test" ["", "", "", ""]) group_builder.specify "should remove punctuation" <| test_col_with_punctuation = Column.from_vector "Test" ['Hello, World!', 'How are you?', ',.!?():;\'"'] - res = test_col_with_punctuation.text_cleanse [Named_Pattern.Punctuation] + res = test_col_with_punctuation.text_cleanse [..Punctuation] res.should_equal (Column.from_vector "Test" ["Hello World", "How are you", ""]) group_builder.specify "should remove symbols" <| test_col_with_symbols = Column.from_vector "Test" ['Hello, World123!', 'How_are_you?', ',.!?():;\'"', '🥇🕒🕒'] - res = test_col_with_symbols.text_cleanse [Named_Pattern.Symbols] + res = test_col_with_symbols.text_cleanse [..Symbols] res.should_equal (Column.from_vector "Test" ["Hello World123", "Howareyou", "", ""]) group_builder.specify "should error if applied to non-text column" <| test_col_num = Column.from_vector "Test" [1, 2, 3, 4] - test_col_num.text_cleanse [Named_Pattern.Numbers] . should_fail_with Invalid_Value_Type + test_col_num.text_cleanse [..Numbers] . should_fail_with Invalid_Value_Type group_builder.specify "should apply the operations in order" <| test_col_with_mixed_chars = Column.from_vector "Test" [" 11String with Leading Spaces then Leading Numbers", "22 String with Leading Numbers then Leading Spaces"] - res1 = test_col_with_mixed_chars.text_cleanse [Named_Pattern.Leading_Whitespace, Named_Pattern.Leading_Numbers] - res2 = test_col_with_mixed_chars.text_cleanse [Named_Pattern.Leading_Numbers, Named_Pattern.Leading_Whitespace] - res3 = test_col_with_mixed_chars.text_cleanse [Named_Pattern.Leading_Numbers, Named_Pattern.Leading_Whitespace, Named_Pattern.Leading_Numbers] + res1 = test_col_with_mixed_chars.text_cleanse [..Leading_Whitespace, ..Leading_Numbers] + res2 = test_col_with_mixed_chars.text_cleanse [..Leading_Numbers, ..Leading_Whitespace] + res3 = test_col_with_mixed_chars.text_cleanse [..Leading_Numbers, ..Leading_Whitespace, ..Leading_Numbers] expected_col1 = Column.from_vector "Test" ["String with Leading Spaces then Leading Numbers", " String with Leading Numbers then Leading Spaces"] expected_col2 = Column.from_vector "Test" ["11String with Leading Spaces then Leading Numbers", "String with Leading Numbers then Leading Spaces"] expected_col3 = Column.from_vector "Test" ["String with Leading Spaces then Leading Numbers", "String with Leading Numbers then Leading Spaces"]