Skip to content

Commit

Permalink
sql: add substring_index built-in function
Browse files Browse the repository at this point in the history
This commit introduces the `substring_index` built-in function,
mirroring MySQL's behavior. `substring_index` returns a substring of
`input` before `count` occurrences of `delim`. If `count` is positive,
the leftmost part is returned. If `count` is negative, the rightmost
part is returned.

Epic: none
Release note (sql change): Added the `substring_index` built-in
function, which extracts a portion of a string based on a specified
delimiter and occurrence count, following MySQL behavior.
  • Loading branch information
spilchen committed Feb 24, 2025
1 parent 4215a94 commit b4a01b8
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 0 deletions.
3 changes: 3 additions & 0 deletions docs/generated/sql/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -3161,6 +3161,9 @@ Case mode values range between 0 - 1, representing lower casing and upper casing
</span></td><td>Immutable</td></tr>
<tr><td><a name="substring"></a><code>substring(input: varbit, start_pos: <a href="int.html">int</a>, length: <a href="int.html">int</a>) &rarr; varbit</code></td><td><span class="funcdesc"><p>Returns a bit subarray of <code>input</code> starting at <code>start_pos</code> (count starts at 1) and including up to <code>length</code> characters.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="substring_index"></a><code>substring_index(input: <a href="string.html">string</a>, delim: <a href="string.html">string</a>, count: <a href="int.html">int</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Returns a substring of <code>input</code> before <code>count</code> occurrences of <code>delim</code>.
If <code>count</code> is positive, the leftmost part is returned. If <code>count</code> is negative, the rightmost part is returned.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="to_char_with_style"></a><code>to_char_with_style(date: <a href="date.html">date</a>, datestyle: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Convert an date to a string assuming the string is formatted using the given DateStyle.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="to_char_with_style"></a><code>to_char_with_style(interval: <a href="interval.html">interval</a>, style: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Convert an interval to a string using the given IntervalStyle.</p>
Expand Down
102 changes: 102 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/builtin_function
Original file line number Diff line number Diff line change
Expand Up @@ -4407,4 +4407,106 @@ SELECT crdb_internal.type_is_indexable(NULL);
----
NULL

subtest substring_index

# Test basic behavior of substring_index
query T
SELECT substring_index('www.cockroachlabs.com', '.', 2);
----
www.cockroachlabs

query T
SELECT substring_index('www.cockroachlabs.com', '.', -2);
----
cockroachlabs.com

query T
SELECT substring_index('hello.world.example.com', '.', 3);
----
hello.world.example

query T
SELECT substring_index('hello.world.example.com', '.', -1);
----
com

# Test when count is 0, should return empty string
query T
SELECT substring_index('111-22222-3333', '-', 0);
----
·

# Test when count exceeds available delimiters, should return full string
query T
SELECT substring_index('example.com', '.', 5);
----
example.com

query T
SELECT substring_index('example.com', '.', -5);
----
example.com

# Test when delimiter is not found in the string, should return full string
query T
SELECT substring_index('no.delimiters.here', ':', 1);
----
no.delimiters.here

query T
SELECT substring_index('singleword', '.', 1);
----
singleword

# Test when input is empty, should return empty string
query T
SELECT substring_index('', '.', 1);
----
·


# Test when delimiter is empty, should return empty string
query T
SELECT substring_index('teststring', '', 1);
----
·

# Test NULL behavior, should return NULL if any argument is NULL
query T
SELECT substring_index(NULL, '.', 1);
----
NULL

query T
SELECT substring_index('test.string', NULL, 1);
----
NULL

query T
SELECT substring_index('test.string', '.', NULL);
----
NULL

# Test with multi-character delimiters
query T
SELECT substring_index('apple--banana--cherry--date', '--', 2);
----
apple--banana

query T
SELECT substring_index('apple--banana--cherry--date', '--', -2);
----
cherry--date

# Test when the string contains repeated delimiters
query T
SELECT substring_index('a..b..c..d', '..', 2);
----
a..b

query T
SELECT substring_index('a..b..c..d', '..', -2);
----
c..d

subtest end
44 changes: 44 additions & 0 deletions pkg/sql/sem/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,50 @@ var regularBuiltins = map[string]builtinDefinition{
"substr": makeSubStringImpls(),
"substring": makeSubStringImpls(),

"substring_index": makeBuiltin(
tree.FunctionProperties{Category: builtinconstants.CategoryString},
tree.Overload{
Types: tree.ParamTypes{
{Name: "input", Typ: types.String},
{Name: "delim", Typ: types.String},
{Name: "count", Typ: types.Int},
},
ReturnType: tree.FixedReturnType(types.String),
Fn: func(_ context.Context, _ *eval.Context, args tree.Datums) (tree.Datum, error) {
input := string(tree.MustBeDString(args[0]))
delim := string(tree.MustBeDString(args[1]))
count := int(tree.MustBeDInt(args[2]))

// Handle empty input.
if input == "" || delim == "" || count == 0 {
return tree.NewDString(""), nil
}

parts := strings.Split(input, delim)
length := len(parts)

// If count is positive, return the first 'count' parts joined by delim
if count > 0 {
if count >= length {
return tree.NewDString(input), nil // If count exceeds occurrences, return the full string
}
result := strings.Join(parts[:count], delim)
return tree.NewDString(result), nil
}

// If count is negative, return the last 'abs(count)' parts joined by delim
count = -count
if count >= length {
return tree.NewDString(input), nil // If count exceeds occurrences, return the full string
}
return tree.NewDString(strings.Join(parts[length-count:], delim)), nil
},
Info: "Returns a substring of `input` before `count` occurrences of `delim`.\n" +
"If `count` is positive, the leftmost part is returned. If `count` is negative, the rightmost part is returned.",
Volatility: volatility.Immutable,
},
),

// concat concatenates the text representations of all the arguments.
// NULL arguments are ignored.
"concat": makeBuiltin(
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sem/builtins/fixed_oids.go
Original file line number Diff line number Diff line change
Expand Up @@ -2643,6 +2643,7 @@ var builtinOidsArray = []string{
2680: `jsonpath(jsonpath: jsonpath) -> jsonpath`,
2681: `varchar(jsonpath: jsonpath) -> varchar`,
2682: `char(jsonpath: jsonpath) -> "char"`,
2683: `substring_index(input: string, delim: string, count: int) -> string`,
}

var builtinOidsBySignature map[string]oid.Oid
Expand Down

0 comments on commit b4a01b8

Please sign in to comment.