Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move Text Left and Text Right #12354

Merged
merged 2 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ polyglot java import org.enso.table.data.column.operation.CountNothing
polyglot java import org.enso.table.data.column.operation.CountUntrimmed
polyglot java import org.enso.table.data.column.operation.SampleOperation
polyglot java import org.enso.table.data.column.operation.comparators.Comparators
polyglot java import org.enso.table.data.column.operation.text.TextPartOperation
polyglot java import org.enso.table.data.column.operation.text.TextPredicates
polyglot java import org.enso.table.data.column.operation.unary.DatePartOperation
polyglot java import org.enso.table.data.column.operation.unary.DateTruncateOperation
Expand Down Expand Up @@ -1318,10 +1319,9 @@ type Column
example_text_length =
Examples.text_column_1.text_left 5
text_left self (n : Column | Integer) -> Column =
Value_Type.expect_text self <|
Value_Type.expect_integer n <|
new_name = naming_helper.function_name "text_left" [self, n]
run_vectorized_binary_op self Java_Storage.Maps.TEXT_LEFT n new_name
Value_Type.expect_text self <| Value_Type.expect_integer n <|
new_name = naming_helper.function_name "text_left" [self, n]
_apply_case_sensitive_text_operation self n ..Default TextPartOperation.LEFT (a -> b -> a.take b) new_name

## GROUP Standard.Base.Text
ICON preparation
Expand All @@ -1338,10 +1338,9 @@ type Column
example_text_length =
Examples.text_column_1.text_right 5
text_right self (n : Column | Integer) -> Column =
Value_Type.expect_text self <|
Value_Type.expect_integer n <|
new_name = naming_helper.function_name "text_right" [self, n]
run_vectorized_binary_op self Java_Storage.Maps.TEXT_RIGHT n new_name
Value_Type.expect_text self <| Value_Type.expect_integer n <|
new_name = naming_helper.function_name "text_right" [self, n]
_apply_case_sensitive_text_operation self n ..Default TextPartOperation.RIGHT (a -> b -> a.take (..Last b)) new_name

## GROUP Standard.Base.Logical
ICON preparation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;

public interface BinaryOperation {
public interface BinaryOperation<T> {
static ColumnStorage<?> getInferredStorage(Column input) {
var storage = input.getStorage();
if (storage instanceof ColumnStorageWithInferredStorage withInferredStorage) {
Expand Down Expand Up @@ -74,8 +74,8 @@ default Column apply(Column left, Object right, String newName) {
boolean canApplyZip(ColumnStorage<?> left, ColumnStorage<?> right);

/** Apply the map to the pair of ColumnStorage and constant. */
ColumnStorage<Boolean> applyMap(ColumnStorage<?> left, Object rightValue);
ColumnStorage<T> applyMap(ColumnStorage<?> left, Object rightValue);

/** Apply the map to the pair of ColumnStorage. */
ColumnStorage<Boolean> applyZip(ColumnStorage<?> left, ColumnStorage<?> right);
ColumnStorage<T> applyZip(ColumnStorage<?> left, ColumnStorage<?> right);
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
public class CountNonTrivialWhitespace {
/** Counts the number of cells in the columns with non trivial whitespace */
public static Long apply(Column column, long sampleSize) throws InterruptedException {
ColumnStorage storage = column.getStorage();
ColumnStorage<?> storage = column.getStorage();
return applyToStorage(storage, sampleSize);
}

Expand All @@ -22,15 +22,15 @@ public static Long apply(Column column, long sampleSize) throws InterruptedExcep
*
* @return
*/
public static Long applyToStorage(ColumnStorage storage, long sampleSize)
public static Long applyToStorage(ColumnStorage<?> storage, long sampleSize)
throws InterruptedException {
return (sampleSize == DEFAULT_SAMPLE_SIZE && storage instanceof StringStorage stringStorage)
? stringStorage.cachedWhitespaceCount()
: (Long) compute(storage, sampleSize, Context.getCurrent());
}

/** Internal method performing the calculation on a storage. */
public static long compute(ColumnStorage storage, long sampleSize, Context context) {
public static long compute(ColumnStorage<?> storage, long sampleSize, Context context) {
long size = storage.getSize();

long count = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ private <T> ColumnStorage<String> castTemporalStorage(
(index, value) -> adapt(converter.apply(value), problemAggregator));
}

private ColumnStorage<String> adaptStringStorage(StringStorage stringStorage) {
private ColumnStorage<String> adaptStringStorage(ColumnStorage<String> stringStorage) {
// Adapting an existing string storage into a new type is done without warnings.
return StorageIterators.mapOverStorage(
stringStorage,
Expand Down Expand Up @@ -153,8 +153,9 @@ private String adaptWithoutWarning(String value) {
return targetType.adapt(value);
}

private boolean canAvoidCopying(StringStorage stringStorage) {
if (targetType.fitsExactly(stringStorage.getType())) {
private boolean canAvoidCopying(ColumnStorage<String> stringStorage) {
var type = stringStorage.getType();
if (type instanceof TextType textType && targetType.fitsExactly(textType)) {
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* Entry point for calling Comparators. Calls the appropriate comparator based on the type of the
* left columns.
*/
public interface Comparators extends BinaryOperation {
public interface Comparators extends BinaryOperation<Boolean> {
static boolean isSupported(Column left) {
var storage = BinaryOperation.getInferredStorage(left);
var storageType = storage.getType();
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package org.enso.table.data.column.operation.text;

import org.enso.base.Text_Utils;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.BinaryOperation;
import org.enso.table.data.column.operation.StorageIterators;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.NullType;
import org.enso.table.data.column.storage.type.TextType;

public final class TextPartOperation implements BinaryOperation<String> {
public static final TextPartOperation LEFT = new TextPartOperation(Text_Utils::take_prefix);
public static final TextPartOperation RIGHT = new TextPartOperation(Text_Utils::take_suffix);

@FunctionalInterface
public interface TextLongToStringFunction {
String apply(String text, long value);
}

private final TextLongToStringFunction function;

private TextPartOperation(TextLongToStringFunction function) {
this.function = function;
}

@Override
public boolean canApplyMap(ColumnStorage<?> left, Object rightValue) {
var storageType = left.getType();
return storageType instanceof TextType || storageType instanceof NullType;
}

@Override
public boolean canApplyZip(ColumnStorage<?> left, ColumnStorage<?> right) {
var rightStorageType = right.getType();
return canApplyMap(left, null)
&& (rightStorageType instanceof IntegerType || rightStorageType instanceof NullType);
}

@Override
public ColumnStorage<String> applyMap(ColumnStorage<?> left, Object rightValue) {
if (left.getType() instanceof NullType) {
return StringStorage.makeEmpty(TextType.VARIABLE_LENGTH, left.getSize());
}

if (left.getType() instanceof TextType textType) {
if (rightValue == null) {
return StringStorage.makeEmpty(textType, left.getSize());
}

if (!NumericConverter.isCoercibleToLong(rightValue)) {
throw new IllegalArgumentException("Unsupported right value type.");
}
long right = NumericConverter.coerceToLong(rightValue);

return StorageIterators.mapOverStorage(
textType.asTypedStorage(left),
Builder.getForText(textType, left.getSize()),
(index, value) -> function.apply(value, right));
}

throw new IllegalArgumentException("Unsupported storage type.");
}

@Override
public ColumnStorage<String> applyZip(ColumnStorage<?> left, ColumnStorage<?> right) {
if (left.getSize() != right.getSize()) {
throw new IllegalArgumentException("Columns must be of the same size.");
}

if (left.getType() instanceof NullType || right.getType() instanceof NullType) {
return StringStorage.makeEmpty(TextType.VARIABLE_LENGTH, left.getSize());
}

if (left.getType() instanceof TextType textType
&& right.getType() instanceof IntegerType integerType) {
return StorageIterators.zipOverStorages(
textType.asTypedStorage(left),
integerType.asTypedStorage(right),
length -> Builder.getForText(textType, length),
true,
(index, leftValue, rightValue) -> function.apply(leftValue, rightValue));
}

throw new IllegalArgumentException("Unsupported storage types.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
import org.enso.base.Regex_Utils;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.operation.comparators.GenericComparators;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.NullType;
import org.enso.table.data.column.storage.type.TextType;
import org.enso.table.data.table.Column;
import org.enso.table.error.UnexpectedTypeException;

public final class TextPredicates extends GenericComparators<String> {
Expand All @@ -25,15 +23,6 @@ private TextPredicates(BiPredicate<String, String> predicate) {
super(predicate, true);
}

@Override
public Column apply(Column left, Object right, String newName) {
var leftStorage = getStorage(left);
if (leftStorage.getType() instanceof NullType) {
return new Column(newName, BoolStorage.makeEmpty(leftStorage.getSize()));
}
return super.apply(left, right, newName);
}

@Override
protected RuntimeException makeCompareError(Object left, Object right) {
return new UnexpectedTypeException("a Text", right.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ private static MapOperationStorage<Void, NullStorage> buildOps() {
ops.add(new NullAndOp());
ops.add(new NullOrOp());

ops.add(new NullOp(Maps.TEXT_LEFT));
ops.add(new NullOp(Maps.TEXT_RIGHT));

ops.add(new CoalescingNullOp(Maps.MIN));
ops.add(new CoalescingNullOp(Maps.MAX));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ public static final class Maps {
public static final String ROUND = "round";
public static final String AND = "&&";
public static final String OR = "||";
public static final String TEXT_LEFT = "text_left";
public static final String TEXT_RIGHT = "text_right";
public static final String IS_IN = "is_in";
public static final String MIN = "min";
public static final String MAX = "max";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.text.CoalescingStringStringOp;
import org.enso.table.data.column.operation.map.text.StringIsInOp;
import org.enso.table.data.column.operation.map.text.StringLongToStringOp;
import org.enso.table.data.column.operation.map.text.StringStringOp;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
Expand All @@ -23,7 +22,6 @@ public final class StringStorage extends SpecializedStorage<String> {
private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(StringStorage.class);

record DataQualityMetrics(Long untrimmedCount, Long whitespaceCount) {}
;

private Future<DataQualityMetrics> dataQualityMetricsValues;

Expand All @@ -35,7 +33,7 @@ public StringStorage(String[] data, TextType type) {
super(type, data, buildOps());

dataQualityMetricsValues =
CompletableFuture.supplyAsync(() -> createDataQualityMetricsWitDefaultSize());
CompletableFuture.supplyAsync(this::createDataQualityMetricsWitDefaultSize);
}

public static StringStorage makeEmpty(TextType type, long size) {
Expand Down Expand Up @@ -109,20 +107,6 @@ public Long cachedWhitespaceCount() throws InterruptedException {

private static MapOperationStorage<String, SpecializedStorage<String>> buildOps() {
MapOperationStorage<String, SpecializedStorage<String>> t = new MapOperationStorage<>();
t.add(
new StringLongToStringOp(Maps.TEXT_LEFT) {
@Override
protected String doOperation(String a, long b) {
return Text_Utils.take_prefix(a, b);
}
});
t.add(
new StringLongToStringOp(Maps.TEXT_RIGHT) {
@Override
protected String doOperation(String a, long b) {
return Text_Utils.take_suffix(a, b);
}
});
t.add(new StringIsInOp<>());
t.add(
new StringStringOp(Maps.ADD) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.enso.table.data.column.storage.type;

import java.math.BigInteger;
import org.enso.table.data.column.storage.ColumnStorage;

public record IntegerType(Bits bits) implements StorageType {
public static final IntegerType INT_64 = new IntegerType(Bits.BITS_64);
Expand Down Expand Up @@ -83,4 +84,13 @@ public static IntegerType smallestFitting(long value) {
if (INT_32.fits(value)) return INT_32;
return INT_64;
}

public ColumnStorage<Long> asTypedStorage(ColumnStorage<?> storage) {
if (storage.getType() instanceof IntegerType) {
@SuppressWarnings("unchecked")
var output = (ColumnStorage<Long>) storage;
return output;
}
throw new IllegalArgumentException("Storage is not of IntegerType");
}
}
Loading