diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 873cd1a61908..97cf8e36733e 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -10,6 +10,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Internal.Polyglot_Helpers import Standard.Base.Internal.Rounding_Helpers +from Standard.Base.Metadata.Widget import Numeric_Input from Standard.Base.Widget_Helpers import make_format_chooser, make_regex_text_widget import project.Data.Constants.Previous_Value @@ -144,7 +145,6 @@ type Column display : Integer -> Boolean -> Text display self show_rows=10 format_terminal=False = java_col = self.java_column - index = java_col.getIndex col_name = normalize_string_for_display java_col.getName storage = java_col.getStorage num_rows = java_col.getSize @@ -152,8 +152,8 @@ type Column items = Vector.new display_rows num-> row = if storage.isNa num then "Nothing" else get_item_string storage num - [index.ilocString num, row] - table = print_table [index.getName, col_name] items 1 format_terminal + [num.to_text, row] + table = print_table ["", col_name] items 1 format_terminal if num_rows - display_rows <= 0 then table else missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.' table + missing @@ -2093,6 +2093,7 @@ type Column import Standard.Examples example_at = Examples.integer_column.at 0 + @index (self-> Numeric_Input minimum=0 maximum=self.length-1) at : Integer -> (Any | Nothing) ! Index_Out_Of_Bounds at self (index : Integer) = self.get index (Error.throw (Index_Out_Of_Bounds.Error index self.length)) @@ -2111,6 +2112,7 @@ type Column import Standard.Examples example_at = Examples.integer_column.get 0 -1 + @index (self-> Numeric_Input minimum=0 maximum=self.length-1) get : Integer -> Any -> Any | Nothing get self (index : Integer) (~default=Nothing) = valid_index = (index >= 0) && (index < self.length) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 5b0bf3ca2dc0..9bad5dacafe2 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -179,15 +179,14 @@ type Table display : Integer -> Boolean -> Text display self show_rows=10 format_terminal=False = cols = Vector.from_polyglot_array self.java_table.getColumns - index = self.java_table.getIndex - col_names = ([index.getName] + cols.map .getName) . map normalize_string_for_display + col_names = ([""] + cols.map .getName) . map normalize_string_for_display col_vals = cols.map .getStorage num_rows = self.row_count display_rows = num_rows.min show_rows rows = Vector.new display_rows row_num-> cols = col_vals.map col-> if col.isNa row_num then "Nothing" else get_item_string col row_num - [index.ilocString row_num] + cols + [row_num.to_text] + cols table = print_table col_names rows 1 format_terminal if num_rows - display_rows <= 0 then table else missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.' @@ -1419,7 +1418,7 @@ type Table filter : (Column | Text | Integer) -> (Filter_Condition | (Any -> Boolean)) -> Problem_Behavior -> Table ! No_Such_Column | Index_Out_Of_Bounds | Invalid_Value_Type filter self column (filter : Filter_Condition | (Any -> Boolean) = Filter_Condition.Equal True) on_problems=Report_Warning = case column of _ : Column -> - mask filter_column = Table.Value (self.java_table.mask filter_column.java_column) + mask filter_column = Table.Value (self.java_table.filter filter_column.java_column) case filter of _ : Filter_Condition -> resolved = (self:Table_Ref).resolve_condition filter diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Fan_Out.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Fan_Out.enso index 90b6574a58b1..d88969a5c009 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Fan_Out.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Fan_Out.enso @@ -128,10 +128,7 @@ fan_out_to_rows_and_columns table input_column_id function column_names at_least new_columns_unflattened = table.columns.map column-> # Replace the input column with the output columns. if column.name == input_column_id then output_columns else - # Build a new column from the old one with the mask - old_storage = column.java_column.getStorage - new_storage = old_storage.applyMask order_mask - [Column.from_storage column.name new_storage] + [Column.Value (column.java_column.applyMask order_mask)] new_table = Table.new new_columns_unflattened.flatten problem_builder.attach_problems_after on_problems new_table diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index c01ac1d080a6..1651082283e7 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -12,7 +12,6 @@ import org.enso.table.data.column.operation.map.bool.BooleanIsInOp; import org.enso.table.data.column.storage.type.BooleanType; import org.enso.table.data.column.storage.type.StorageType; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.enso.table.error.UnexpectedColumnTypeException; @@ -177,13 +176,13 @@ public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { } @Override - public BoolStorage mask(BitSet mask, int cardinality) { + public BoolStorage applyFilter(BitSet filterMask, int newLength) { Context context = Context.getCurrent(); BitSet newMissing = new BitSet(); BitSet newValues = new BitSet(); int resultIx = 0; for (int i = 0; i < size; i++) { - if (mask.get(i)) { + if (filterMask.get(i)) { if (isMissing.get(i)) { newMissing.set(resultIx++); } else if (values.get(i)) { @@ -197,7 +196,7 @@ public BoolStorage mask(BitSet mask, int cardinality) { context.safepoint(); } - return new BoolStorage(newValues, newMissing, cardinality, negated); + return new BoolStorage(newValues, newMissing, newLength, negated); } @Override @@ -207,7 +206,7 @@ public BoolStorage applyMask(OrderMask mask) { BitSet newVals = new BitSet(); for (int i = 0; i < mask.length(); i++) { int position = mask.get(i); - if (position == Index.NOT_FOUND || isMissing.get(position)) { + if (position == Storage.NOT_FOUND_INDEX || isMissing.get(position)) { newNa.set(i); } else if (values.get(position)) { newVals.set(i); @@ -218,25 +217,6 @@ public BoolStorage applyMask(OrderMask mask) { return new BoolStorage(newVals, newNa, mask.length(), negated); } - @Override - public BoolStorage countMask(int[] counts, int total) { - Context context = Context.getCurrent(); - BitSet newNa = new BitSet(); - BitSet newVals = new BitSet(); - int pos = 0; - for (int i = 0; i < counts.length; i++) { - if (isMissing.get(i)) { - newNa.set(pos, pos + counts[i]); - } else if (values.get(i)) { - newVals.set(pos, pos + counts[i]); - } - pos += counts[i]; - - context.safepoint(); - } - return new BoolStorage(newVals, newNa, total, negated); - } - public boolean isNegated() { return negated; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java index 054125816b8d..61309b8ed427 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java @@ -91,8 +91,8 @@ public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { } @Override - public Storage mask(BitSet mask, int cardinality) { - Storage newStorage = underlyingStorage.mask(mask, cardinality); + public Storage applyFilter(BitSet filterMask, int newLength) { + Storage newStorage = underlyingStorage.applyFilter(filterMask, newLength); return new MixedStorageFacade(newStorage); } @@ -102,12 +102,6 @@ public Storage applyMask(OrderMask mask) { return new MixedStorageFacade(newStorage); } - @Override - public Storage countMask(int[] counts, int total) { - Storage newStorage = underlyingStorage.countMask(counts, total); - return new MixedStorageFacade(newStorage); - } - @Override public Storage slice(int offset, int limit) { Storage newStorage = underlyingStorage.slice(offset, limit); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java index a19c113e64ec..6d557c40fc51 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java @@ -6,7 +6,6 @@ import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; import org.enso.table.data.column.operation.map.MapOperationStorage; import org.enso.table.data.column.storage.type.StorageType; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Context; @@ -110,18 +109,18 @@ public Storage runVectorizedZip( } @Override - public SpecializedStorage mask(BitSet mask, int cardinality) { + public SpecializedStorage applyFilter(BitSet filterMask, int newLength) { Context context = Context.getCurrent(); - T[] newData = newUnderlyingArray(cardinality); + T[] newData = newUnderlyingArray(newLength); int resIx = 0; for (int i = 0; i < size; i++) { - if (mask.get(i)) { + if (filterMask.get(i)) { newData[resIx++] = data[i]; } context.safepoint(); } - return newInstance(newData, cardinality); + return newInstance(newData, newLength); } @Override @@ -130,26 +129,12 @@ public SpecializedStorage applyMask(OrderMask mask) { T[] newData = newUnderlyingArray(mask.length()); for (int i = 0; i < mask.length(); i++) { int position = mask.get(i); - newData[i] = position == Index.NOT_FOUND ? null : data[position]; + newData[i] = position == Storage.NOT_FOUND_INDEX ? null : data[position]; context.safepoint(); } return newInstance(newData, newData.length); } - @Override - public SpecializedStorage countMask(int[] counts, int total) { - Context context = Context.getCurrent(); - T[] newData = newUnderlyingArray(total); - int pos = 0; - for (int i = 0; i < counts.length; i++) { - for (int j = 0; j < counts[i]; j++) { - newData[pos++] = data[i]; - context.safepoint(); - } - } - return newInstance(newData, total); - } - public T[] getData() { return data; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index afaa1940e159..7e0404d02549 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -21,6 +21,9 @@ /** An abstract representation of a data column. */ public abstract class Storage { + /** A constant representing the index of a missing value in a column. */ + public static final int NOT_FOUND_INDEX = -1; + /** * @return the number of elements in this column (including NAs) */ @@ -472,11 +475,11 @@ public Storage fillMissingFrom( /** * Return a new storage, containing only the items marked true in the mask. * - * @param mask the mask to use - * @param cardinality the number of true values in mask - * @return a new storage, masked with the given mask + * @param filterMask the mask to use + * @param newLength the number of true values in mask + * @return a new storage, filtered with the given mask */ - public abstract Storage mask(BitSet mask, int cardinality); + public abstract Storage applyFilter(BitSet filterMask, int newLength); /** * Returns a new storage, ordered according to the rules specified in a mask. @@ -485,19 +488,6 @@ public Storage fillMissingFrom( */ public abstract Storage applyMask(OrderMask mask); - /** - * Returns a new storage, resulting from applying the rules specified in a mask. The resulting - * storage should contain the elements of the original storage, in the same order. However, the - * number of consecutive copies of the i-th element of the original storage should be {@code - * counts[i]}. - * - * @param counts the mask specifying elements duplication - * @param total the sum of all elements in the mask, also interpreted as the length of the - * resulting storage - * @return the storage masked according to the specified rules - */ - public abstract Storage countMask(int[] counts, int total); - /** * @return a copy of the storage containing a slice of the original data */ diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedLongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedLongStorage.java index e512824dd3f9..37bedc257c6c 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedLongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedLongStorage.java @@ -4,7 +4,6 @@ import java.util.List; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.IntegerType; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Context; @@ -63,19 +62,19 @@ public BitSet getIsMissing() { } @Override - public Storage mask(BitSet mask, int cardinality) { + public Storage applyFilter(BitSet filterMask, int newLength) { BitSet newMissing = new BitSet(); - long[] newData = new long[cardinality]; + long[] newData = new long[newLength]; int resIx = 0; Context context = Context.getCurrent(); for (int i = 0; i < size; i++) { - if (mask.get(i)) { + if (filterMask.get(i)) { newData[resIx++] = getItem(i); } context.safepoint(); } - return new LongStorage(newData, cardinality, newMissing, getType()); + return new LongStorage(newData, newLength, newMissing, getType()); } @Override @@ -85,7 +84,7 @@ public Storage applyMask(OrderMask mask) { Context context = Context.getCurrent(); for (int i = 0; i < mask.length(); i++) { int position = mask.get(i); - if (position == Index.NOT_FOUND) { + if (position == Storage.NOT_FOUND_INDEX) { newMissing.set(i); } else { newData[i] = getItem(position); @@ -96,23 +95,6 @@ public Storage applyMask(OrderMask mask) { return new LongStorage(newData, newData.length, newMissing, getType()); } - @Override - public Storage countMask(int[] counts, int total) { - long[] newData = new long[total]; - BitSet newMissing = new BitSet(); - int pos = 0; - Context context = Context.getCurrent(); - for (int i = 0; i < counts.length; i++) { - long item = getItem(i); - for (int j = 0; j < counts[i]; j++) { - newData[pos++] = item; - } - - context.safepoint(); - } - return new LongStorage(newData, total, newMissing, getType()); - } - @Override public Storage slice(int offset, int limit) { int newSize = Math.min(size - offset, limit); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedNullableLongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedNullableLongStorage.java index 747afc0db8cd..0e72870ef197 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedNullableLongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedNullableLongStorage.java @@ -4,7 +4,6 @@ import java.util.List; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.IntegerType; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Context; @@ -78,13 +77,13 @@ public BitSet getIsMissing() { } @Override - public Storage mask(BitSet mask, int cardinality) { + public Storage applyFilter(BitSet filterMask, int newLength) { BitSet newMissing = new BitSet(); - long[] newData = new long[cardinality]; + long[] newData = new long[newLength]; int resIx = 0; Context context = Context.getCurrent(); for (int i = 0; i < size; i++) { - if (mask.get(i)) { + if (filterMask.get(i)) { Long item = computeItem(i); if (item == null) { newMissing.set(resIx++); @@ -95,7 +94,7 @@ public Storage mask(BitSet mask, int cardinality) { context.safepoint(); } - return new LongStorage(newData, cardinality, newMissing, getType()); + return new LongStorage(newData, newLength, newMissing, getType()); } @Override @@ -105,7 +104,7 @@ public Storage applyMask(OrderMask mask) { Context context = Context.getCurrent(); for (int i = 0; i < mask.length(); i++) { int position = mask.get(i); - if (position == Index.NOT_FOUND) { + if (position == Storage.NOT_FOUND_INDEX) { newMissing.set(i); } else { Long item = computeItem(position); @@ -121,29 +120,6 @@ public Storage applyMask(OrderMask mask) { return new LongStorage(newData, newData.length, newMissing, getType()); } - @Override - public Storage countMask(int[] counts, int total) { - long[] newData = new long[total]; - BitSet newMissing = new BitSet(); - int pos = 0; - Context context = Context.getCurrent(); - for (int i = 0; i < counts.length; i++) { - Long item = computeItem(i); - if (item == null) { - newMissing.set(pos, pos + counts[i]); - pos += counts[i]; - } else { - long nonNullItem = item; - for (int j = 0; j < counts[i]; j++) { - newData[pos++] = nonNullItem; - } - } - - context.safepoint(); - } - return new LongStorage(newData, total, newMissing, getType()); - } - @Override public Storage slice(int offset, int limit) { int newSize = Math.min(size - offset, limit); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java index d8078d28c214..dbed265ee67b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java @@ -27,7 +27,6 @@ import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; import org.enso.table.data.column.storage.type.StorageType; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.enso.table.problems.ProblemAggregator; @@ -257,13 +256,13 @@ public DoubleStorage fillMissingFromPrevious(BoolStorage missingIndicator) { } @Override - public Storage mask(BitSet mask, int cardinality) { + public Storage applyFilter(BitSet filterMask, int newLength) { BitSet newMissing = new BitSet(); - long[] newData = new long[cardinality]; + long[] newData = new long[newLength]; int resIx = 0; Context context = Context.getCurrent(); for (int i = 0; i < size; i++) { - if (mask.get(i)) { + if (filterMask.get(i)) { if (isMissing.get(i)) { newMissing.set(resIx++); } else { @@ -273,7 +272,7 @@ public Storage mask(BitSet mask, int cardinality) { context.safepoint(); } - return new DoubleStorage(newData, cardinality, newMissing); + return new DoubleStorage(newData, newLength, newMissing); } @Override @@ -283,7 +282,7 @@ public Storage applyMask(OrderMask mask) { Context context = Context.getCurrent(); for (int i = 0; i < mask.length(); i++) { int position = mask.get(i); - if (position == Index.NOT_FOUND || isMissing.get(position)) { + if (position == Storage.NOT_FOUND_INDEX || isMissing.get(position)) { newMissing.set(i); } else { newData[i] = data[position]; @@ -294,27 +293,6 @@ public Storage applyMask(OrderMask mask) { return new DoubleStorage(newData, newData.length, newMissing); } - @Override - public Storage countMask(int[] counts, int total) { - long[] newData = new long[total]; - BitSet newMissing = new BitSet(); - int pos = 0; - Context context = Context.getCurrent(); - for (int i = 0; i < counts.length; i++) { - if (isMissing.get(i)) { - newMissing.set(pos, pos + counts[i]); - pos += counts[i]; - } else { - for (int j = 0; j < counts[i]; j++) { - newData[pos++] = data[i]; - } - } - - context.safepoint(); - } - return new DoubleStorage(newData, total, newMissing); - } - public BitSet getIsMissing() { return isMissing; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/LongStorage.java index 70dfae5fcd2e..637dd6d239ca 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/LongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/LongStorage.java @@ -9,7 +9,6 @@ import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.IntegerType; import org.enso.table.data.column.storage.type.StorageType; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.enso.table.problems.ProblemAggregator; @@ -170,13 +169,13 @@ public Storage fillMissing( } @Override - public Storage mask(BitSet mask, int cardinality) { + public Storage applyFilter(BitSet filterMask, int newLength) { BitSet newMissing = new BitSet(); - long[] newData = new long[cardinality]; + long[] newData = new long[newLength]; int resIx = 0; Context context = Context.getCurrent(); for (int i = 0; i < size; i++) { - if (mask.get(i)) { + if (filterMask.get(i)) { if (isMissing.get(i)) { newMissing.set(resIx++); } else { @@ -186,7 +185,7 @@ public Storage mask(BitSet mask, int cardinality) { context.safepoint(); } - return new LongStorage(newData, cardinality, newMissing, type); + return new LongStorage(newData, newLength, newMissing, type); } @Override @@ -196,7 +195,7 @@ public Storage applyMask(OrderMask mask) { Context context = Context.getCurrent(); for (int i = 0; i < mask.length(); i++) { int position = mask.get(i); - if (position == Index.NOT_FOUND || isMissing.get(position)) { + if (position == Storage.NOT_FOUND_INDEX || isMissing.get(position)) { newMissing.set(i); } else { newData[i] = data[position]; @@ -207,27 +206,6 @@ public Storage applyMask(OrderMask mask) { return new LongStorage(newData, newData.length, newMissing, type); } - @Override - public Storage countMask(int[] counts, int total) { - long[] newData = new long[total]; - BitSet newMissing = new BitSet(); - int pos = 0; - Context context = Context.getCurrent(); - for (int i = 0; i < counts.length; i++) { - if (isMissing.get(i)) { - newMissing.set(pos, pos + counts[i]); - pos += counts[i]; - } else { - for (int j = 0; j < counts[i]; j++) { - newData[pos++] = data[i]; - } - } - - context.safepoint(); - } - return new LongStorage(newData, total, newMissing, type); - } - @Override public BitSet getIsMissing() { return isMissing; diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/DefaultIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/DefaultIndex.java deleted file mode 100644 index 57182748e32e..000000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/index/DefaultIndex.java +++ /dev/null @@ -1,86 +0,0 @@ -package org.enso.table.data.index; - -import java.util.BitSet; -import java.util.Collections; -import java.util.List; -import org.enso.table.data.mask.OrderMask; -import org.enso.table.data.mask.SliceRange; -import org.enso.table.data.table.Column; - -public class DefaultIndex extends Index { - private final int size; - - public DefaultIndex(int size) { - this.size = size; - } - - @Override - public Integer iloc(int loc) { - return loc; - } - - @Override - public List loc(Object item) { - if (item instanceof Integer) { - if ((Integer) item < size) { - return Collections.singletonList((Integer) item); - } - } else if (item instanceof Long) { - long l = (Long) item; - if (l < size) { - return Collections.singletonList((int) l); - } - } - return null; - } - - @Override - public String ilocString(int loc) { - return String.valueOf(loc); - } - - @Override - public String getName() { - return ""; - } - - @Override - public Column toColumn() { - return null; - } - - @Override - public Index mask(BitSet mask, int cardinality) { - return new DefaultIndex(cardinality); - } - - @Override - public Index countMask(int[] counts, int total) { - return new DefaultIndex(total); - } - - @Override - public Index unique() { - return this; - } - - @Override - public int size() { - return size; - } - - @Override - public Index applyMask(OrderMask mask) { - return this; - } - - @Override - public DefaultIndex slice(int offset, int limit) { - return new DefaultIndex(Math.min(size, limit)); - } - - @Override - public DefaultIndex slice(List ranges) { - return new DefaultIndex(SliceRange.totalLength(ranges)); - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/Index.java b/std-bits/table/src/main/java/org/enso/table/data/index/Index.java deleted file mode 100644 index cf800e1307e7..000000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/index/Index.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.enso.table.data.index; - -import java.util.BitSet; -import java.util.List; -import org.enso.table.data.mask.OrderMask; -import org.enso.table.data.mask.SliceRange; -import org.enso.table.data.table.Column; - -/** A storage class for ordered multisets. */ -public abstract class Index { - public static final int NOT_FOUND = -1; - - /** - * Returns the element at a given (0-based) position. - * - * @param loc the position - * @return the corresponding element - */ - public abstract Object iloc(int loc); - - /** - * Returns a string representation of the item at a given position. - * - * @param loc the position - * @return a string representing the element at the given position - */ - public abstract String ilocString(int loc); - - /** - * Returns the list of positions where the given object is contained. The result may be null if - * the item is not found. - * - * @param item the item to lookup - * @return the list of all positions containing {@code item} - */ - public abstract List loc(Object item); - - /** - * Builds an index containing the same values as this one, but with only one occurrence of each. - * - * @return a unique index obtained from this one. - */ - public abstract Index unique(); - - /** - * @return the name of this index - */ - public abstract String getName(); - - /** - * @return the contents of this index as a column. May be null, if the index does not represent - * any meaningful data. - */ - public abstract Column toColumn(); - - /** - * Return a new index, containing only the items marked true in the mask. - * - * @param mask the mask to use - * @param cardinality the number of true values in mask - * @return a new index, masked with the given mask - */ - public abstract Index mask(BitSet mask, int cardinality); - - /** - * Returns a new index, resulting from applying the rules specified in a mask. The resulting index - * should contain the elements of the original storage, in the same order. However, the number of - * consecutive copies of the i-th element of the original index should be {@code counts[i]}. - * - * @param counts the mask specifying elements duplication - * @param total the sum of all elements in the mask, also interpreted as the length of the - * resulting index - * @return the index masked according to the specified rules - */ - public abstract Index countMask(int[] counts, int total); - - /** - * Returns a new index, ordered according to the rules specified in a mask. - * - * @param mask an order mask specifying the reordering - * @return an index resulting from applying the reordering rules - */ - public abstract Index applyMask(OrderMask mask); - - /** - * @return the number of elements in this index. - */ - public abstract int size(); - - /** - * @return a copy of the index containing a slice of the original data - */ - public abstract Index slice(int offset, int limit); - - /** - * @return a copy of the index consisting of slices of the original data - */ - public abstract Index slice(List ranges); -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/mask/OrderMask.java b/std-bits/table/src/main/java/org/enso/table/data/mask/OrderMask.java index 593a61392366..b059c59157b5 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/mask/OrderMask.java +++ b/std-bits/table/src/main/java/org/enso/table/data/mask/OrderMask.java @@ -11,8 +11,8 @@ public interface OrderMask { * *

The resulting storage should contain the {@code positions[i]}-th element of the original * storage at the {@code idx}-th position. It may return {@link - * org.enso.table.data.index.Index.NOT_FOUND}, in which case a missing value should be inserted at - * this position. + * org.enso.table.data.storage.Storage.NOT_FOUND_INDEX}, in which case a missing value should be + * inserted at this position. */ int get(int idx); diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java index 671e2107d286..2b599cc3d31a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java @@ -6,15 +6,11 @@ import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.builder.InferredBuilder; import org.enso.table.data.column.builder.MixedBuilder; -import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.StorageType; -import org.enso.table.data.index.DefaultIndex; -import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.enso.table.error.InvalidColumnNameException; -import org.enso.table.error.UnexpectedColumnTypeException; import org.enso.table.problems.ProblemAggregator; import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Value; @@ -88,32 +84,12 @@ public int getSize() { /** * Return a new column, containing only the items marked true in the mask. * - * @param mask the mask to use - * @param cardinality the number of true values in mask + * @param filterMask the mask to use + * @param newLength the number of true values in mask * @return a new column, masked with the given mask */ - public Column mask(BitSet mask, int cardinality) { - return new Column(name, storage.mask(mask, cardinality)); - } - - /** - * Returns a column resulting from selecting only the rows corresponding to true entries in the - * provided column. - * - * @param maskCol the masking column - * @return the result of masking this column with the provided column - */ - public Column mask(Column maskCol) { - if (!(maskCol.getStorage() instanceof BoolStorage boolStorage)) { - throw new UnexpectedColumnTypeException("Boolean"); - } - - var mask = BoolStorage.toMask(boolStorage); - var localStorageMask = new BitSet(); - localStorageMask.set(0, getStorage().size()); - mask.and(localStorageMask); - int cardinality = mask.cardinality(); - return mask(mask, cardinality); + public Column applyFilter(BitSet filterMask, int newLength) { + return new Column(name, storage.applyFilter(filterMask, newLength)); } /** @@ -184,7 +160,7 @@ public static Column fromItemsNoDateConversion( * Creates a new column with given name and an element to repeat. * * @param name the name to use - * @param items the item repeated in the column + * @param item the item repeated in the column * @return a column with given name and items */ public static Column fromRepeatedItem( @@ -213,13 +189,6 @@ public static Column fromRepeatedItem( return new Column(name, builder.seal()); } - /** - * @return the index of this column - */ - public Index getIndex() { - return new DefaultIndex(getSize()); - } - /** * @param mask the reordering to apply * @return a new column, resulting from reordering this column according to {@code mask}. diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java index 1491d217eb22..04d8ff3fef89 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java @@ -19,8 +19,6 @@ import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.TextType; import org.enso.table.data.index.CrossTabIndex; -import org.enso.table.data.index.DefaultIndex; -import org.enso.table.data.index.Index; import org.enso.table.data.index.MultiValueIndex; import org.enso.table.data.index.MultiValueKeyBase; import org.enso.table.data.index.OrderedMultiValueKey; @@ -104,11 +102,11 @@ public Column getColumnByName(String name) { * Returns a table resulting from selecting only the rows corresponding to true entries in the * provided column. * - * @param maskCol the masking column + * @param filterColumn the column for selecting rows * @return the result of masking this table with the provided column */ - public Table mask(Column maskCol) { - if (!(maskCol.getStorage() instanceof BoolStorage storage)) { + public Table filter(Column filterColumn) { + if (!(filterColumn.getStorage() instanceof BoolStorage storage)) { throw new UnexpectedColumnTypeException("Boolean"); } @@ -119,7 +117,7 @@ public Table mask(Column maskCol) { int cardinality = mask.cardinality(); Column[] newColumns = new Column[columns.length]; for (int i = 0; i < columns.length; i++) { - newColumns[i] = columns[i].mask(mask, cardinality); + newColumns[i] = columns[i].applyFilter(mask, cardinality); } return new Table(newColumns); } @@ -159,19 +157,10 @@ private Table addColumn(Column newColumn) { return new Table(newCols); } - /** - * Returns the index of this table. - * - * @return the index of this table - */ - public Index getIndex() { - return new DefaultIndex(rowCount()); - } - /** * Creates an index for this table by using values from the specified columns. * - * @param columns set of columns to use as an Index + * @param columns set of columns to use as an index * @return a table indexed by the proper column */ public MultiValueIndex indexFromColumns( @@ -206,7 +195,7 @@ public Table makeCrossTabTable( /** * Creates a new table with the rows sorted * - * @param columns set of columns to use as an Index + * @param columns set of columns to use as an index * @param objectComparator Object comparator allowing calling back to `compare_to` when needed. * @return a table indexed by the proper column */ @@ -229,7 +218,7 @@ public Table orderBy(Column[] columns, Long[] directions, Comparator obj /** * Creates a new table keeping only rows with distinct key columns. * - * @param keyColumns set of columns to use as an Index + * @param keyColumns set of columns to use as an index * @param textFoldingStrategy a strategy for folding text columns * @param problemAggregator an aggregator for problems * @return a table where duplicate rows with the same key are removed @@ -244,7 +233,7 @@ public Table distinct( int cardinality = rowsToKeep.cardinality(); Column[] newColumns = new Column[this.columns.length]; for (int i = 0; i < this.columns.length; i++) { - newColumns[i] = this.columns[i].mask(rowsToKeep, cardinality); + newColumns[i] = this.columns[i].applyFilter(rowsToKeep, cardinality); } return new Table(newColumns); diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupJoin.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupJoin.java index 1ff712d4a73f..4fdb849997db 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupJoin.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupJoin.java @@ -6,7 +6,6 @@ import org.enso.base.text.TextFoldingStrategy; import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.storage.Storage; -import org.enso.table.data.index.Index; import org.enso.table.data.index.MultiValueIndex; import org.enso.table.data.index.UnorderedMultiValueKey; import org.enso.table.data.mask.OrderMask; @@ -105,12 +104,12 @@ private Table join() { // Find corresponding row in the lookup table int lookupRow = findLookupRow(i); - assert allowUnmatchedRows || lookupRow != Index.NOT_FOUND; + assert allowUnmatchedRows || lookupRow != Storage.NOT_FOUND_INDEX; // Merge columns replacing old values for (LookupOutputColumn.MergeColumns mergeColumns : columnsToMerge) { Object itemToAdd; - if (lookupRow != Index.NOT_FOUND) { + if (lookupRow != Storage.NOT_FOUND_INDEX) { itemToAdd = mergeColumns.lookupReplacement.getItemBoxed(lookupRow); } else { itemToAdd = mergeColumns.original.getItemBoxed(i); @@ -133,7 +132,7 @@ private int findLookupRow(int baseRowIx) { List lookupRowIndices = lookupIndex.get(key); if (lookupRowIndices == null) { if (allowUnmatchedRows) { - return Index.NOT_FOUND; + return Storage.NOT_FOUND_INDEX; } else { List exampleKeyValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList(); diff --git a/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java b/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java index db4a6f550f40..7c2f85f82d9e 100644 --- a/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java +++ b/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java @@ -213,7 +213,7 @@ public static ExcelRange expandSingleCell(ExcelRange excelRange, ExcelSheet shee } /** - * @param index Index to the next character after the parsed value + * @param index The index to the next character after the parsed value * @param value Parsed integer value or 0 if not valid */ private record ParsedInteger(int index, int value) {} diff --git a/test/Base_Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java b/test/Base_Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java index 56ca0db879af..d24a9de876df 100644 --- a/test/Base_Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java +++ b/test/Base_Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java @@ -94,7 +94,7 @@ public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { } @Override - public Storage mask(BitSet mask, int cardinality) { + public Storage applyFilter(BitSet filterMask, int newLength) { return null; } @@ -103,11 +103,6 @@ public Storage applyMask(OrderMask mask) { return null; } - @Override - public Storage countMask(int[] counts, int total) { - return null; - } - @Override public Storage slice(int offset, int limit) { return null;