Skip to content

Commit

Permalink
Changes from session with Ned (#10349)
Browse files Browse the repository at this point in the history
- Removed `second_row` and `second_column` from the `Table` and `DB_Table`.
- Added `first_value` and `last_value` to the `Table` and `DB_Table`.
- Fixed bug where negative index access wasn't allowed on `Column`.
- Added error if negative index access used on `DB_Column`. Tells user they have to materialize.
- Fix argument order for `Table.text_cleanse` and a couple of typo corrections.
- Rename `auto_value_type` to `auto_cast` on table and columns.
  • Loading branch information
jdunkerley authored Jun 24, 2024
1 parent 5233390 commit e6c8ec7
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 126 deletions.
10 changes: 6 additions & 4 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ type DB_Column
example_at = Examples.integer_column.get 0 -1
get : Integer -> Any -> Any | Nothing
get self index:Integer=0 (~default=Nothing) =
self.read (..First index+1) . get index default
if index < 0 then Error.throw (Unsupported_Database_Operation.Error "Reading backwards from end is not supported in-database. Use `read` to materialize the column.") else
self.read (..First index+1) . get index default

## GROUP Standard.Base.Metadata
ICON metadata
Expand Down Expand Up @@ -1806,14 +1807,15 @@ type DB_Column

## GROUP Standard.Base.Conversions
ICON convert
ALIAS auto_value_type
Change the value type of the column to a more specific one, based on its
contents.

This operation is currently not available in the Database backend.
auto_value_type : Boolean -> DB_Column
auto_value_type self shrink_types=False =
auto_cast : Boolean -> DB_Column
auto_cast self shrink_types=False =
_ = shrink_types
Error.throw <| Unsupported_Database_Operation.Error "`DB_Column.auto_value_type` is not supported in the Database backends."
Error.throw <| Unsupported_Database_Operation.Error "`DB_Column.auto_cast` is not supported in the Database backends."

## PRIVATE
Shares the core CAST logic between `cast` and `parse`.
Expand Down
50 changes: 25 additions & 25 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -172,20 +172,27 @@ type DB_Table
col = self.get selector if_missing=Nothing
if Nothing == col then if_missing else col.get index if_missing

## ALIAS first cell
GROUP Standard.Base.Selections
ICON local_scope4
Gets the top left value from the table.
first_value : Any ! Index_Out_Of_Bounds
first_value self = self.at 0 . at 0

## ALIAS last cell
GROUP Standard.Base.Selections
ICON local_scope4
Gets the bottom right value from the table.
last_value : Any ! Index_Out_Of_Bounds
last_value self = self.last_row . at -1

## ALIAS first field
GROUP Standard.Base.Selections
ICON select_column
Gets the first column.
first_column : DB_Column ! Index_Out_Of_Bounds
first_column self = self.at 0

## ALIAS second field
GROUP Standard.Base.Selections
ICON select_column
Gets the second column
second_column : DB_Column ! Index_Out_Of_Bounds
second_column self = self.at 1

## ALIAS last field
GROUP Standard.Base.Selections
ICON select_column
Expand Down Expand Up @@ -1164,13 +1171,6 @@ type DB_Table
first_row self =
self.read (..First 1) . rows . first

## GROUP Standard.Base.Selections
ICON select_row
Returns the second row of the table.
second_row : Row ! Index_Out_Of_Bounds
second_row self =
self.read (..First 2) . rows . second

## GROUP Standard.Base.Selections
ICON select_row
Returns the last row of the table.
Expand Down Expand Up @@ -2556,8 +2556,8 @@ type DB_Table

This operation is currently not available in the Database backend.
@columns (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True)
auto_value_types : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> DB_Table
auto_value_types self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning =
auto_cast : Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type -> Boolean -> Boolean -> Problem_Behavior -> DB_Table
auto_cast self columns:(Vector (Text | Integer | Regex | By_Type) | Text | Integer | Regex | By_Type)=self.column_names shrink_types:Boolean=False error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning =
_ = [columns, shrink_types, error_on_missing_columns, on_problems]
Error.throw (Unsupported_Database_Operation.Error "DB_Table.auto_value_types is not supported in the Database backends.")

Expand Down Expand Up @@ -2938,13 +2938,14 @@ type DB_Table
Applies the specified cleansings to the text in each row of the specified columns

Arguments:
- from: The column(s) to cleanse.
- remove: A vector of the text cleanings to remove from the text. The text cleansings are
applied in the order they are provided. The same text cleansing can be used multiple
times. The text cleansings are:
- ..Leading_Whitespace: Removes all whitspace from the start of the string.
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
- ..All_Whitespace: Removes all whitspace from the string.
- ..Leading_Whitespace: Removes all whitespace from the start of the string.
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
- ..All_Whitespace: Removes all whitespace from the string.
- ..Leading_Numbers: Removes all numbers from the start of the string.
- ..Trailing_Numbers: Removes all numbers from the end of the string.
- ..Non_ASCII: Removes all non-ascii characters from the string.
Expand All @@ -2953,16 +2954,15 @@ type DB_Table
- ..Numbers: Removes all numbers characters from the string.
- ..Punctuation: Removes all characters in the set ,.!?():;'" from the string.
- ..Symbols: Removes anything that isn't letters, numbers or whitespace from the string.
- from: The column(s) to cleanse.

> Example
Remove leading and trailing spaces from cells.

table.text_cleanse [..Leading_Whitespace, ..Trailing_Whitespace]
@remove make_data_cleanse_vector_selector
table.text_cleanse ["Input"] [..Leading_Whitespace, ..Trailing_Whitespace]
@from (Widget_Helpers.make_column_name_multi_selector add_regex=True add_by_type=True)
text_cleanse : Vector Named_Pattern -> Vector (Integer | Text | Regex | By_Type) -> DB_Table
text_cleanse self remove from:(Vector (Integer | Text | Regex | By_Type)) =
@remove make_data_cleanse_vector_selector
text_cleanse : Vector (Integer | Text | Regex | By_Type) -> Vector Named_Pattern -> DB_Table
text_cleanse self from:(Vector (Integer | Text | Regex | By_Type)) remove =
transformer col = col.text_cleanse remove
Table_Helpers.replace_columns_with_transformed_columns self from transformer

Expand Down
56 changes: 49 additions & 7 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Standard.Base.Errors.Common.Arithmetic_Error
import Standard.Base.Errors.Common.Incomparable_Values
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Common.No_Such_Method
import Standard.Base.Errors.Deprecated.Deprecated
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Internal.Polyglot_Helpers
Expand Down Expand Up @@ -1962,6 +1963,7 @@ type Column

## GROUP Standard.Base.Conversions
ICON convert
ALIAS auto_value_type
Change the value type of the column to a more specific one, based on its
contents.

Expand Down Expand Up @@ -1990,15 +1992,54 @@ type Column
elements are no longer than 255 characters, the column will get a
max length of 255. Otherwise, the column size limit will stay
unchanged.
auto_value_type : Boolean -> Column
auto_value_type self shrink_types=False =
auto_cast : Boolean -> Column
auto_cast self shrink_types=False =
new_value_type = case shrink_types of
False -> self.inferred_precise_value_type
True ->
Storage.to_value_type self.java_column.getStorage.inferPreciseTypeShrunk
# We run with Report_Error because we do not expect any problems.
self.cast new_value_type on_problems=Problem_Behavior.Report_Error

## PRIVATE
GROUP Standard.Base.Conversions
ICON convert

Deprecated: Use `auto_cast` instead.

Change the value type of the column to a more specific one, based on its
contents.

Arguments:
- shrink_types: If set `True`, smaller types will be chosen if possible,
according to the rules below. Defaults to `False`.

? Auto Type Selection Rules

- If a `Mixed` column can be assigned a single type, like `Char` or
`Integer`, that will be used.
- Text columns are not parsed. To do that, use the `parse` method.
- If a `Float` column contains only integers, it will be converted to
an Integer column.
- If a `Decimal` column contains only integers that could fit in a
64-bit integer storage, it will be converted to an Integer column.
- If `shrink_types` is `False` (default), no other transformations are
applied.
- However, if `shrink_types` is set to `True`, then:
- Integer columns will be assigned the smallest size that can fit all
values (down to 16-bit integers; converting to the `Byte` type has
to be done manually through `cast`).
- If all elements in a text column have the same length, the type
will become fixed length.
- Otherwise, if a text column is variable length, but all text
elements are no longer than 255 characters, the column will get a
max length of 255. Otherwise, the column size limit will stay
unchanged.
auto_value_type : Boolean -> Column
auto_value_type self shrink_types=False =
Warning.attach (Deprecated.Warning "Standard.Table.Column.Column" "auto_value_type" "Deprecated: `auto_value_type` has been replaced by `auto_cast`.") <|
self.auto_cast shrink_types

## ALIAS transform column
ICON column_add

Expand Down Expand Up @@ -2191,11 +2232,12 @@ type Column
@index (self-> Numeric_Input minimum=0 maximum=self.length-1)
get : Integer -> Any -> Any | Nothing
get self index:Integer=0 (~default=Nothing) =
valid_index = (index >= 0) && (index < self.length)
if valid_index.not then default else
storage = self.java_column.getStorage
if storage.isNothing index then Nothing else
java_to_enso <| storage.getItem index
if index < 0 && index >= -self.length then self.get (self.length + index) default else
valid_index = (index >= 0) && (index < self.length)
if valid_index.not then default else
storage = self.java_column.getStorage
if storage.isNothing index then Nothing else
java_to_enso <| storage.getItem index

## ICON data_input
Returns a column containing rows of this column.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ read_name_map_from_table column_map:Table = case column_map.column_count of
if col.value_type.is_text then col.to_vector else
Error.throw (Illegal_Argument.Error "Expected a table with one or two columns of text values.")
2 ->
if column_map.first_column.value_type.is_text && column_map.second_column.value_type.is_text then Map.from_vector column_map.rows else
if column_map.first_column.value_type.is_text && (column_map.at 1).value_type.is_text then Map.from_vector column_map.rows else
Error.throw (Illegal_Argument.Error "Expected a table with one or two columns of text values.")
_ -> Error.throw (Illegal_Argument.Error "Expected a table with one or two columns of text values.")

Expand Down
Loading

0 comments on commit e6c8ec7

Please sign in to comment.