Skip to content

Commit

Permalink
Refine single-column Table to act as a Column (#12165)
Browse files Browse the repository at this point in the history
- Closes #12137
  • Loading branch information
radeusgd authored Feb 4, 2025
1 parent 1894d2c commit 7031afe
Show file tree
Hide file tree
Showing 30 changed files with 494 additions and 186 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -547,8 +547,9 @@ private _make_table_for_name connection name alias internal_temporary_keep_alive
warning. We do not want to fail, as we do want to allow the user to
access any table already present in the database.
DB_Table_Module.make_table connection name columns ctx on_problems=Problem_Behavior.Report_Warning
result.catch SQL_Error _->
if result.is_error then
Error.throw (Table_Not_Found.Error name)
result

## PRIVATE
private _check_statement_is_allowed connection stmt =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ type DB_Column
Converts this column into a single-column table.
to_table : DB_Table
to_table self =
DB_Table.Value self.name self.connection [self.as_internal] self.context
DB_Table.new self.name self.connection [self.as_internal] self.context

## ALIAS column type, field info, metadata
GROUP Standard.Base.Metadata
Expand Down
39 changes: 24 additions & 15 deletions distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import project.Internal.IR.Query.Query
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.Type_Refinements.DB_Table_Refinements
import project.SQL_Query.SQL_Query
import project.SQL_Statement.SQL_Statement
import project.SQL_Type.SQL_Type
Expand All @@ -80,9 +81,8 @@ polyglot java import java.util.UUID

## Represents a column-oriented table data structure backed by a database.
type DB_Table
## PRIVATE

Represents a column-oriented table data structure backed by a database.
## Internal constructor that should not be used directly.
Please use `DB_Table.new` instead.

Arguments:
- internal_name: The name of the table.
Expand All @@ -91,6 +91,14 @@ type DB_Table
- context: The context associated with this table.
private Value internal_name:Text connection:(Connection | Any) (internal_columns:(Vector Internal_Column)) context:Context

## The internal constructor used to construct a DB_Table instance.

It can perform some additional operations, like refining the type,
so it should always be preferred over calling `DB_Table.Value` directly.
private new (internal_name:Text) connection:(Connection | Any) (internal_columns:(Vector Internal_Column)) context:Context -> DB_Table =
DB_Table_Refinements.refine_table <|
DB_Table.Value internal_name connection internal_columns context

## GROUP Standard.Base.Metadata
ICON metadata
The name of the table.
Expand Down Expand Up @@ -1648,7 +1656,7 @@ type DB_Table
problem_builder.attach_problems_before on_problems <|
new_from = From_Spec.Join sql_join_kind left_setup.subquery right_setup.subquery on_expressions
new_ctx = Context.for_subquery new_from . set_where_filters where_expressions
DB_Table.Value new_table_name self.connection result_columns new_ctx
DB_Table.new new_table_name self.connection result_columns new_ctx

## ALIAS append, cartesian join
GROUP Standard.Base.Calculations
Expand Down Expand Up @@ -2067,7 +2075,7 @@ type DB_Table
input_column = Internal_Column.Value name (infer_return_type expression) expression
dialect.adapt_unified_column input_column result_type infer_return_type

DB_Table.Value union_alias self.connection new_columns new_ctx
DB_Table.new union_alias self.connection new_columns new_ctx

## ALIAS average, count, count distinct, first, group by, last, longest, maximum, mean, median, minimum, mode, percentile, shortest, standard deviation, sum, summarize, variance
GROUP Standard.Base.Calculations
Expand Down Expand Up @@ -2805,7 +2813,7 @@ type DB_Table
Arguments:
- columns: The columns with which to update this table.
updated_columns : Vector Internal_Column -> DB_Table
updated_columns self internal_columns = DB_Table.Value self.name self.connection internal_columns self.context
updated_columns self internal_columns = DB_Table.new self.name self.connection internal_columns self.context

## PRIVATE

Expand All @@ -2814,7 +2822,7 @@ type DB_Table
Arguments:
- ctx: The new context for this table.
updated_context : Context -> DB_Table
updated_context self ctx = DB_Table.Value self.name self.connection self.internal_columns ctx
updated_context self ctx = DB_Table.new self.name self.connection self.internal_columns ctx

## PRIVATE

Expand All @@ -2838,9 +2846,9 @@ type DB_Table
setup = ctx.as_subquery self.name [internal_columns]
new_ctx = Context.for_subquery setup.subquery
new_columns = setup.new_columns.first
DB_Table.Value self.name self.connection new_columns new_ctx
DB_Table.new self.name self.connection new_columns new_ctx
False ->
DB_Table.Value self.name self.connection internal_columns ctx
DB_Table.new self.name self.connection internal_columns ctx

## PRIVATE
Nests a table as a subquery, using `updated_context_and_columns`, which
Expand Down Expand Up @@ -2926,10 +2934,11 @@ type DB_Table

- `Auto_Detect`: The file format is determined by the provided file.
- `Bytes` and `Plain_Text`: The Table does not support these types in
the `write` function. If passed as format, an
`Illegal_Argument` is raised. To write out the table as plain
text, the user needs to call the `Text.from Table` method and then
use the `Text.write` function.
the `write` function. If passed as format, an
`Illegal_Argument` is raised. To write out the table as plain
text, the user needs to convert the Table to Text
(e.g. using `to_delimited` method) and then use the `Text.write`
function.

> Example
Write a database table to a CSV file.
Expand Down Expand Up @@ -3216,7 +3225,7 @@ make_table connection table_name columns ctx on_problems =
problem_builder.report_unique_name_strategy column_names_validator
# We do not want to stop the table from being fetched, so we report the issues as warnings.
problem_builder.attach_problems_before on_problems <|
DB_Table.Value table_name connection cols ctx
DB_Table.new table_name connection cols ctx

## PRIVATE
By default, join on the first column, unless it's a cross join, in which
Expand Down Expand Up @@ -3276,7 +3285,7 @@ make_literal_table connection column_vectors column_names alias =
if needs_cast.not then base_column else
connection.dialect.make_cast base_column sql_type infer_type_from_database

DB_Table.Value alias connection internal_columns context
DB_Table.new alias connection internal_columns context

## PRIVATE
Many_Files_List.from (that : DB_Table) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ build_lookup_query base_table lookup_table key_columns add_new_columns allow_unm
new_ctx_with_invariant_check = new_ctx.add_where_filters [make_invariant_check subquery_setup.lookup_counter allow_unmatched_rows]

precheck_for_duplicate_matches lookup_columns subquery_setup base_table.connection new_ctx <|
DB_Table.Value subquery_setup.new_table_name base_table.connection new_columns new_ctx_with_invariant_check
DB_Table.new subquery_setup.new_table_name base_table.connection new_columns new_ctx_with_invariant_check

## PRIVATE
Checks if they key contains NULL values or if there would be unmatched rows
Expand Down Expand Up @@ -196,7 +196,7 @@ precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx
key_columns_for_duplicate_check = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
_ -> []
table_for_duplicate_check = DB_Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
table_for_duplicate_check = DB_Table.new subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read (..First 1)
case duplicate_lookup_matches.row_count > 0 of
True ->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
private

from Standard.Base import all

import project.DB_Column.DB_Column
import project.DB_Table.DB_Table
from project.Internal.Type_Refinements.Single_Column_DB_Table_Conversions import all

refine_table (table : DB_Table) =
if is_single_column table . not then table else
r = table : DB_Table & DB_Column
r

is_single_column table:DB_Table -> Boolean =
table.column_count == 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
private

from Standard.Base import all

import project.DB_Column.DB_Column
import project.DB_Table.DB_Table
from project.Internal.Type_Refinements.DB_Table_Refinements import is_single_column

## This conversion is internal and should never be exported.
DB_Column.from (that : DB_Table) -> DB_Column =
Runtime.assert (is_single_column that)
that.at 0
19 changes: 12 additions & 7 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import project.Internal.Offset_Helper
import project.Internal.Parse_Values_Helper
import project.Internal.Read_Many_Helpers
import project.Internal.Storage
import project.Internal.Type_Refinements.Column_Refinements
import project.Internal.Value_Type_Helpers
import project.Internal.Widget_Helpers
import project.Fill_With.Fill_With
Expand All @@ -38,7 +39,7 @@ from project.Errors import Conversion_Failure, Inexact_Type_Coercion, Invalid_Co
from project.Internal.Column_Format import all
from project.Internal.Java_Exports import make_string_builder
from project.Internal.Storage import enso_to_java, java_to_enso
from project.Internal.Type_Refinements.Single_Value_Column import refine_with_single_value
from project.Table import from_java_table

polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
Expand Down Expand Up @@ -142,7 +143,7 @@ type Column
Creates a new column given a Java Column object.
from_java_column java_column:Java_Column -> Column =
column = Column.Value java_column
column |> refine_with_single_value
Column_Refinements.refine_column column

## PRIVATE
ADVANCED
Expand All @@ -160,12 +161,16 @@ type Column
Column.from_java_column java_column

## PRIVATE

A representation of a column in a Table.
Internal constructor that should not be used directly.
Please use `from_java_column` instead.

Arguments:
- java_column: The internal representation of the column.
private Value java_column
- internal_java_column: The internal representation of the column.
private Value internal_java_column

## PRIVATE
A getter that is a workaround for bug https://github.com/enso-org/enso/issues/12180
private java_column self = self.internal_java_column

## PRIVATE
ADVANCED
Expand Down Expand Up @@ -2396,7 +2401,7 @@ type Column

example_to_table = Examples.integer_column.to_table
to_table : Table
to_table self = Table.Value self.java_column.toTable
to_table self = from_java_table self.java_column.toTable

## ALIAS column type, field info, metadata
GROUP Standard.Base.Metadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import project.Table.Table
from project.Errors import Empty_Sheet, No_Rows
from project.Internal.Excel_Reader import handle_invalid_location
from project.Internal.Table_Helpers import duplicate_rows
from project.Table import from_java_table

polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.ss.usermodel.Workbook
Expand Down Expand Up @@ -244,7 +245,7 @@ type Excel_Workbook
names = self.sheet_names
if (query < 1 || query >= names.length) then Error.throw (Illegal_Argument.Error "Worksheet index out of range (1 - "+names.length.to_text+").") else
ExcelReader.readRangeByName java_workbook (names.at (query - 1)) java_headers skip_rows java_limit java_problem_aggregator
limit.attach_warning (Table.Value java_table)
limit.attach_warning (from_java_table java_table)

## PRIVATE
GROUP Standard.Base.Input
Expand All @@ -269,15 +270,15 @@ type Excel_Workbook
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
self.with_java_workbook java_workbook->
ExcelReader.readRangeByName java_workbook sheet_name java_headers skip_rows java_limit java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
row_limit.attach_warning (from_java_table java_table)
Excel_Section.Cell_Range address headers skip_rows row_limit ->
java_headers = Excel_Reader.make_java_headers headers
java_limit = row_limit.rows_to_read
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
self.with_java_workbook java_workbook-> case address of
_ : Excel_Range -> ExcelReader.readRange java_workbook address.java_range java_headers skip_rows java_limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook address java_headers skip_rows java_limit java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
row_limit.attach_warning (from_java_table java_table)

## PRIVATE
GROUP Standard.Base.Input
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import project.Internal.Java_Problems
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
from project.Errors import Empty_File_Error, Mismatched_Quote, Parser_Error
from project.Table import from_java_table

polyglot java import com.univocity.parsers.common.TextParsingException
polyglot java import java.io.InputStream
Expand Down Expand Up @@ -98,7 +99,7 @@ read_from_reader format java_reader on_problems:Problem_Behavior max_columns=409
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
reader = prepare_reader format max_columns on_problems java_problem_aggregator
java_table = reader.read java_reader
format.row_limit.attach_warning (Table.Value java_table)
format.row_limit.attach_warning (from_java_table java_table)

## PRIVATE
prepare_reader format:Delimited_Format max_columns on_problems:Problem_Behavior java_problem_aggregator newline_override=Nothing =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import project.Internal.Java_Problems
import project.Match_Columns.Match_Columns
import project.Table.Table
from project.Errors import Column_Count_Mismatch, Column_Name_Mismatch
from project.Table import from_java_table

polyglot java import java.io.IOException
polyglot java import java.io.PrintWriter
Expand Down Expand Up @@ -94,7 +95,7 @@ append_to_local_file table format (file : File) match_columns on_problems:Proble
Error.throw (Illegal_Argument.Error "Cannot append by name when headers are not present in the existing data.")
Match_Columns.By_Position ->
ColumnMapper.mapColumnsByPosition table.java_table column_count
reordered_table = Table.Value reordered_java_table
reordered_table = from_java_table reordered_java_table
writing_new_file = preexisting_headers == Nothing
amended_format_1 = case writing_new_file && (should_write_headers format.headers) of
True -> format.with_headers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import project.Internal.Excel_Section.Excel_Section
import project.Internal.Java_Problems
import project.Table.Table
from project.Errors import Duplicate_Output_Column_Names, Empty_Sheet, Invalid_Column_Names, Invalid_Location
from project.Table import from_java_table

polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.poifs.filesystem.NotOLE2FileException
Expand Down Expand Up @@ -60,14 +61,14 @@ read_file file section on_problems:Problem_Behavior xls_format=False =
java_table = case sheet of
_ : Integer -> ExcelReader.readSheetByIndex java_file sheet (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readSheetByName java_file sheet (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
row_limit.attach_warning (from_java_table java_table)
Excel_Section.Cell_Range address headers skip_rows row_limit ->
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
java_limit = row_limit.rows_to_read
java_table = case address of
_ : Excel_Range -> ExcelReader.readRange java_file address.java_range (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_file address (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
row_limit.attach_warning (from_java_table java_table)

handle_reader file reader

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,26 @@ _merge_input_and_tables (input_table : Table) (tables_for_rows : Vector Read_Man

multiplicated_inputs = duplicate_rows input_table counts
Runtime.assert (unified_data.row_count == multiplicated_inputs.row_count)
Runtime.assert (unified_metadata.is_nothing || (unified_metadata.row_count == unified_data.row_count))
Runtime.assert ((Nothing == unified_metadata) || (unified_metadata.row_count == unified_data.row_count))

first_pass = if unified_metadata.is_nothing then multiplicated_inputs else
first_pass = if Nothing == unified_metadata then multiplicated_inputs else
multiplicated_inputs.zip unified_metadata right_prefix=""
first_pass.zip unified_data right_prefix=""

## Unifies provided metadata tables, knowing that some tables may have no
metadata - in such case we want to insert as many Nothing rows for metadata
as there are rows in the corresponding data table.
_unify_metadata (tables : Vector Read_Many_As_Table_Result) (on_problems : Problem_Behavior) -> Table | Nothing =
has_no_metadata = tables.all r-> r.metadata.is_nothing
has_no_metadata = tables.all r-> Nothing == r.metadata
if has_no_metadata then Nothing else
unique = Column_Naming_Helper.in_memory.create_unique_name_strategy
tables.each r->
if r.metadata.is_nothing.not then unique.mark_used r.metadata.column_names
if Nothing != r.metadata then unique.mark_used r.metadata.column_names

# A dummy column because we cannot create a table with 0 columns, it will be removed after union. We find an unique name for it to avoid conflicts.
dummy_column_name = unique.make_unique "_Internal_Placeholder_Column_"
tables_for_union = tables.map r->
if r.metadata.is_nothing.not then r.metadata else
if Nothing != r.metadata then r.metadata else
Table.new [Column.from_repeated_item dummy_column_name Nothing r.data.row_count]

# Metadata are always merged by-name and columns that appear only in some tables are kept.
Expand Down
Loading

0 comments on commit 7031afe

Please sign in to comment.