Skip to content

Commit b8e93b3

Browse files
authored
Add new text_left and text_right functions (#8691)
Added text_left and text_right functions for in-memory and databases
1 parent 943b857 commit b8e93b3

File tree

13 files changed

+272
-5
lines changed

13 files changed

+272
-5
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,7 @@
601601
and `Is_Finite`.][8539]
602602
- [Added text_length to Column][8606]
603603
- [Added none delimiter option for Data.Read][8627]
604+
- [Added text_left and text_right to Column][8691]
604605

605606
[debug-shortcuts]:
606607
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -862,6 +863,7 @@
862863
[8564]: https://github.com/enso-org/enso/pull/8564
863864
[8606]: https://github.com/enso-org/enso/pull/8606
864865
[8627]: https://github.com/enso-org/enso/pull/8627
866+
[8691]: https://github.com/enso-org/enso/pull/8691
865867

866868
#### Enso Compiler
867869

distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,48 @@ type Column
11951195
new_name = self.naming_helper.function_name "text_length" [self]
11961196
self.make_unary_op "LENGTH" new_name
11971197

1198+
## GROUP Standard.Base.Text
1199+
ICON preparation
1200+
Gets the left n characters for each element of the column.
1201+
1202+
In the Database backends, the default text left method of the
1203+
particular database is used.
1204+
1205+
In the in-memory backend, this will give you the left n graphemes of the string.
1206+
1207+
> Example
1208+
import Standard.Examples
1209+
1210+
example_text_length =
1211+
Examples.text_column_1.text_left 5
1212+
text_left : Column|Integer -> Column
1213+
text_left self n =
1214+
Value_Type.expect_text self <| Value_Type.expect_integer n <|
1215+
n2 = n.max 0
1216+
new_name = self.naming_helper.function_name "text_left" [self, n]
1217+
self.make_binary_op "LEFT" n2 new_name
1218+
1219+
## GROUP Standard.Base.Text
1220+
ICON preparation
1221+
Gets the right n characters for each element of the column.
1222+
1223+
In the Database backends, the default text right method of the
1224+
particular database is used.
1225+
1226+
In the in-memory backend, this will give you the right n graphemes of the string.
1227+
1228+
> Example
1229+
import Standard.Examples
1230+
1231+
example_text_length =
1232+
Examples.text_column_1.text_right 5
1233+
text_right : Column|Integer -> Column
1234+
text_right self n =
1235+
Value_Type.expect_text self <| Value_Type.expect_integer n <|
1236+
n2 = n.max 0
1237+
new_name = self.naming_helper.function_name "text_right" [self, n]
1238+
self.make_binary_op "RIGHT" n2 new_name
1239+
11981240
## GROUP Standard.Base.Logical
11991241
Checks for each element of the column if it contains `other`.
12001242

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ type Postgres_Dialect
287287
## PRIVATE
288288
make_internal_generator_dialect =
289289
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
290-
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace]]+concat_ops+cases+trim_ops
290+
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace], left, right]+concat_ops+cases+trim_ops
291291
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
292292
arith_extensions = [is_nan, is_inf, floating_point_div, mod_op, decimal_div, decimal_mod, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
293293
bool = [bool_or]
@@ -486,6 +486,14 @@ make_contains_expr expr substring =
486486
## PRIVATE
487487
contains = Base_Generator.lift_binary_op "contains" make_contains_expr
488488

489+
## PRIVATE
490+
left = Base_Generator.lift_binary_op "LEFT" str-> n->
491+
Builder.code "left(" ++ str ++ ", CAST(" ++ n ++ " AS INT))"
492+
493+
## PRIVATE
494+
right = Base_Generator.lift_binary_op "RIGHT" str-> n->
495+
Builder.code "right(" ++ str ++ ", CAST(" ++ n ++ " AS INT))"
496+
489497
## PRIVATE
490498
make_order_descriptor internal_column sort_direction text_ordering =
491499
nulls = case sort_direction of

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ type SQLite_Dialect
282282

283283
## PRIVATE
284284
make_internal_generator_dialect =
285-
text = [starts_with, contains, ends_with, make_case_sensitive, ["REPLACE", replace]]+concat_ops+trim_ops
285+
text = [starts_with, contains, ends_with, make_case_sensitive, ["REPLACE", replace], left, right]+concat_ops+trim_ops
286286
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
287287
stats = [agg_stddev_pop, agg_stddev_samp]
288288
arith_extensions = [is_inf, floating_point_div, mod_op]
@@ -409,6 +409,14 @@ make_contains_expr expr substring =
409409
## PRIVATE
410410
contains = Base_Generator.lift_binary_op "contains" make_contains_expr
411411

412+
## PRIVATE
413+
left = Base_Generator.lift_binary_op "LEFT" str-> n->
414+
Builder.code "substr(" ++ str ++ ", 0, " ++ n ++ " + 1)"
415+
416+
## PRIVATE
417+
right = Base_Generator.lift_binary_op "RIGHT" str-> n->
418+
Builder.code "substr(" ++ str ++ ", -" ++ n ++ ", " ++ n ++ ")"
419+
412420
## PRIVATE
413421
bool_or = Base_Generator.lift_unary_op "BOOL_OR" arg->
414422
Builder.code "max(" ++ arg ++ ")"

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ operations_map =
184184

185185
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
186186
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
187-
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"]
187+
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE", "LEFT", "RIGHT"]
188188
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER", "ROW_NUMBER_IN_GROUP", "LENGTH"]
189189
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
190190
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]

distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,6 +1256,48 @@ type Column
12561256
Value_Type.expect_text self <|
12571257
simple_unary_op self Java_Storage.Maps.TEXT_LENGTH
12581258

1259+
## GROUP Standard.Base.Text
1260+
ICON preparation
1261+
Gets the left n characters for each element of the column.
1262+
1263+
In the Database backends, the default text left method of the
1264+
particular database is used.
1265+
1266+
In the in-memory backend, this will give you the left n graphemes of the string.
1267+
1268+
> Example
1269+
import Standard.Examples
1270+
1271+
example_text_length =
1272+
Examples.text_column_1.text_left 5
1273+
text_left : Column|Integer -> Column
1274+
text_left self n =
1275+
Value_Type.expect_text self <|
1276+
Value_Type.expect_integer n <|
1277+
new_name = naming_helper.function_name "text_left" [self, n]
1278+
run_vectorized_binary_op self Java_Storage.Maps.TEXT_LEFT n new_name
1279+
1280+
## GROUP Standard.Base.Text
1281+
ICON preparation
1282+
Gets the right n characters for each element of the column.
1283+
1284+
In the Database backends, the default text right method of the
1285+
particular database is used.
1286+
1287+
In the in-memory backend, this will give you the right n graphemes of the string.
1288+
1289+
> Example
1290+
import Standard.Examples
1291+
1292+
example_text_length =
1293+
Examples.text_column_1.text_right 5
1294+
text_right : Column|Integer -> Column
1295+
text_right self n =
1296+
Value_Type.expect_text self <|
1297+
Value_Type.expect_integer n <|
1298+
new_name = naming_helper.function_name "text_right" [self, n]
1299+
run_vectorized_binary_op self Java_Storage.Maps.TEXT_RIGHT n new_name
1300+
12591301
## GROUP Standard.Base.Logical
12601302
Checks for each element of the column if it contains `other`.
12611303

lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Text_Utils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ public static int compare_normalized(String a, String b) {
5252
public static String take_prefix(String str, long grapheme_length) {
5353
BreakIterator iter = BreakIterator.getCharacterInstance();
5454
iter.setText(str);
55-
if (iter.next(Math.toIntExact(grapheme_length)) == BreakIterator.DONE) {
55+
if (grapheme_length <= 0) {
56+
return "";
57+
} else if (iter.next(Math.toIntExact(grapheme_length)) == BreakIterator.DONE) {
5658
return str;
5759
} else {
5860
return str.substring(0, iter.current());

std-bits/base/src/main/java/org/enso/base/Text_Utils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,9 @@ public static String take_suffix(String str, long grapheme_length) {
294294
BreakIterator iter = BreakIterator.getCharacterInstance();
295295
iter.setText(str);
296296
iter.last();
297-
if (iter.next(Math.toIntExact(-grapheme_length)) == BreakIterator.DONE) {
297+
if (grapheme_length <= 0) {
298+
return "";
299+
} else if (iter.next(Math.toIntExact(-grapheme_length)) == BreakIterator.DONE) {
298300
return str;
299301
} else {
300302
return str.substring(iter.current());
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package org.enso.table.data.column.operation.map.text;
2+
3+
import org.enso.table.data.column.builder.StringBuilder;
4+
import org.enso.table.data.column.operation.map.BinaryMapOperation;
5+
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
6+
import org.enso.table.data.column.storage.SpecializedStorage;
7+
import org.enso.table.data.column.storage.Storage;
8+
import org.enso.table.data.column.storage.StringStorage;
9+
import org.enso.table.data.column.storage.numeric.LongStorage;
10+
import org.enso.table.data.column.storage.type.TextType;
11+
import org.enso.table.error.UnexpectedTypeException;
12+
import org.graalvm.polyglot.Context;
13+
14+
public abstract class StringLongToStringOp
15+
extends BinaryMapOperation<String, SpecializedStorage<String>> {
16+
public StringLongToStringOp(String name) {
17+
super(name);
18+
}
19+
20+
protected abstract String doOperation(String a, long b);
21+
22+
@Override
23+
public Storage<?> runBinaryMap(
24+
SpecializedStorage<String> storage,
25+
Object arg,
26+
MapOperationProblemAggregator problemAggregator) {
27+
int size = storage.size();
28+
if (arg == null) {
29+
StringBuilder builder = new StringBuilder(size, TextType.VARIABLE_LENGTH);
30+
builder.appendNulls(size);
31+
return builder.seal();
32+
} else if (arg instanceof Long argLong) {
33+
String[] newVals = new String[size];
34+
Context context = Context.getCurrent();
35+
for (int i = 0; i < size; i++) {
36+
if (storage.isNa(i)) {
37+
newVals[i] = null;
38+
} else {
39+
newVals[i] = doOperation(storage.getItem(i), argLong);
40+
}
41+
42+
context.safepoint();
43+
}
44+
45+
return new StringStorage(newVals, size, (TextType) storage.getType());
46+
} else {
47+
throw new UnexpectedTypeException("a Text");
48+
}
49+
}
50+
51+
@Override
52+
public Storage<?> runZip(
53+
SpecializedStorage<String> storage,
54+
Storage<?> arg,
55+
MapOperationProblemAggregator problemAggregator) {
56+
if (arg instanceof LongStorage v) {
57+
int size = storage.size();
58+
String[] newVals = new String[size];
59+
Context context = Context.getCurrent();
60+
for (int i = 0; i < size; i++) {
61+
if (storage.isNa(i) || v.isNa(i)) {
62+
newVals[i] = null;
63+
} else {
64+
newVals[i] = doOperation(storage.getItem(i), v.getItem(i));
65+
}
66+
67+
context.safepoint();
68+
}
69+
70+
return new StringStorage(newVals, size, (TextType) storage.getType());
71+
} else {
72+
throw new UnexpectedTypeException("a Text column");
73+
}
74+
}
75+
}

std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ public static final class Maps {
110110
public static final String STARTS_WITH = "starts_with";
111111
public static final String ENDS_WITH = "ends_with";
112112
public static final String TEXT_LENGTH = "text_length";
113+
public static final String TEXT_LEFT = "text_left";
114+
public static final String TEXT_RIGHT = "text_right";
113115
public static final String CONTAINS = "contains";
114116
public static final String LIKE = "like";
115117
public static final String IS_IN = "is_in";

0 commit comments

Comments
 (0)