Skip to content

Commit 6968670

Browse files
craig[bot]spilchen
andcommitted
Merge #141929
141929: sql: add substring_index built-in function r=spilchen a=spilchen This commit introduces the `substring_index` built-in function, mirroring MySQL's behavior. `substring_index` returns a substring of `input` before `count` occurrences of `delim`. If `count` is positive, the leftmost part is returned. If `count` is negative, the rightmost part is returned. ``` [email protected]:26257/demoapp/db> SELECT substring_index('www.cockroachlabs.com', '.', 2); substring_index --------------------- www.cockroachlabs (1 row) Time: 3ms total (execution 3ms / network 0ms) [email protected]:26257/demoapp/db> SELECT substring_index('www.cockroachlabs.com', '.', -2); substring_index --------------------- cockroachlabs.com (1 row) Time: 1ms total (execution 1ms / network 0ms) ``` [TREQ-900](https://cockroachlabs.atlassian.net/browse/TREQ-900) Epic: none Release note (sql change): Added the `substring_index` built-in function, which extracts a portion of a string based on a specified delimiter and occurrence count, following MySQL behavior. Co-authored-by: Matt Spilchen <[email protected]>
2 parents eb62671 + b4a01b8 commit 6968670

File tree

4 files changed

+150
-0
lines changed

4 files changed

+150
-0
lines changed

docs/generated/sql/functions.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3161,6 +3161,9 @@ Case mode values range between 0 - 1, representing lower casing and upper casing
31613161
</span></td><td>Immutable</td></tr>
31623162
<tr><td><a name="substring"></a><code>substring(input: varbit, start_pos: <a href="int.html">int</a>, length: <a href="int.html">int</a>) &rarr; varbit</code></td><td><span class="funcdesc"><p>Returns a bit subarray of <code>input</code> starting at <code>start_pos</code> (count starts at 1) and including up to <code>length</code> characters.</p>
31633163
</span></td><td>Immutable</td></tr>
3164+
<tr><td><a name="substring_index"></a><code>substring_index(input: <a href="string.html">string</a>, delim: <a href="string.html">string</a>, count: <a href="int.html">int</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Returns a substring of <code>input</code> before <code>count</code> occurrences of <code>delim</code>.
3165+
If <code>count</code> is positive, the leftmost part is returned. If <code>count</code> is negative, the rightmost part is returned.</p>
3166+
</span></td><td>Immutable</td></tr>
31643167
<tr><td><a name="to_char_with_style"></a><code>to_char_with_style(date: <a href="date.html">date</a>, datestyle: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Convert an date to a string assuming the string is formatted using the given DateStyle.</p>
31653168
</span></td><td>Immutable</td></tr>
31663169
<tr><td><a name="to_char_with_style"></a><code>to_char_with_style(interval: <a href="interval.html">interval</a>, style: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Convert an interval to a string using the given IntervalStyle.</p>

pkg/sql/logictest/testdata/logic_test/builtin_function

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4407,4 +4407,106 @@ SELECT crdb_internal.type_is_indexable(NULL);
44074407
----
44084408
NULL
44094409

4410+
subtest substring_index
4411+
4412+
# Test basic behavior of substring_index
4413+
query T
4414+
SELECT substring_index('www.cockroachlabs.com', '.', 2);
4415+
----
4416+
www.cockroachlabs
4417+
4418+
query T
4419+
SELECT substring_index('www.cockroachlabs.com', '.', -2);
4420+
----
4421+
cockroachlabs.com
4422+
4423+
query T
4424+
SELECT substring_index('hello.world.example.com', '.', 3);
4425+
----
4426+
hello.world.example
4427+
4428+
query T
4429+
SELECT substring_index('hello.world.example.com', '.', -1);
4430+
----
4431+
com
4432+
4433+
# Test when count is 0, should return empty string
4434+
query T
4435+
SELECT substring_index('111-22222-3333', '-', 0);
4436+
----
4437+
·
4438+
4439+
# Test when count exceeds available delimiters, should return full string
4440+
query T
4441+
SELECT substring_index('example.com', '.', 5);
4442+
----
4443+
example.com
4444+
4445+
query T
4446+
SELECT substring_index('example.com', '.', -5);
4447+
----
4448+
example.com
4449+
4450+
# Test when delimiter is not found in the string, should return full string
4451+
query T
4452+
SELECT substring_index('no.delimiters.here', ':', 1);
4453+
----
4454+
no.delimiters.here
4455+
4456+
query T
4457+
SELECT substring_index('singleword', '.', 1);
4458+
----
4459+
singleword
4460+
4461+
# Test when input is empty, should return empty string
4462+
query T
4463+
SELECT substring_index('', '.', 1);
4464+
----
4465+
·
4466+
4467+
4468+
# Test when delimiter is empty, should return empty string
4469+
query T
4470+
SELECT substring_index('teststring', '', 1);
4471+
----
4472+
·
4473+
4474+
# Test NULL behavior, should return NULL if any argument is NULL
4475+
query T
4476+
SELECT substring_index(NULL, '.', 1);
4477+
----
4478+
NULL
4479+
4480+
query T
4481+
SELECT substring_index('test.string', NULL, 1);
4482+
----
4483+
NULL
4484+
4485+
query T
4486+
SELECT substring_index('test.string', '.', NULL);
4487+
----
4488+
NULL
4489+
4490+
# Test with multi-character delimiters
4491+
query T
4492+
SELECT substring_index('apple--banana--cherry--date', '--', 2);
4493+
----
4494+
apple--banana
4495+
4496+
query T
4497+
SELECT substring_index('apple--banana--cherry--date', '--', -2);
4498+
----
4499+
cherry--date
4500+
4501+
# Test when the string contains repeated delimiters
4502+
query T
4503+
SELECT substring_index('a..b..c..d', '..', 2);
4504+
----
4505+
a..b
4506+
4507+
query T
4508+
SELECT substring_index('a..b..c..d', '..', -2);
4509+
----
4510+
c..d
4511+
44104512
subtest end

pkg/sql/sem/builtins/builtins.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,50 @@ var regularBuiltins = map[string]builtinDefinition{
387387
"substr": makeSubStringImpls(),
388388
"substring": makeSubStringImpls(),
389389

390+
"substring_index": makeBuiltin(
391+
tree.FunctionProperties{Category: builtinconstants.CategoryString},
392+
tree.Overload{
393+
Types: tree.ParamTypes{
394+
{Name: "input", Typ: types.String},
395+
{Name: "delim", Typ: types.String},
396+
{Name: "count", Typ: types.Int},
397+
},
398+
ReturnType: tree.FixedReturnType(types.String),
399+
Fn: func(_ context.Context, _ *eval.Context, args tree.Datums) (tree.Datum, error) {
400+
input := string(tree.MustBeDString(args[0]))
401+
delim := string(tree.MustBeDString(args[1]))
402+
count := int(tree.MustBeDInt(args[2]))
403+
404+
// Handle empty input.
405+
if input == "" || delim == "" || count == 0 {
406+
return tree.NewDString(""), nil
407+
}
408+
409+
parts := strings.Split(input, delim)
410+
length := len(parts)
411+
412+
// If count is positive, return the first 'count' parts joined by delim
413+
if count > 0 {
414+
if count >= length {
415+
return tree.NewDString(input), nil // If count exceeds occurrences, return the full string
416+
}
417+
result := strings.Join(parts[:count], delim)
418+
return tree.NewDString(result), nil
419+
}
420+
421+
// If count is negative, return the last 'abs(count)' parts joined by delim
422+
count = -count
423+
if count >= length {
424+
return tree.NewDString(input), nil // If count exceeds occurrences, return the full string
425+
}
426+
return tree.NewDString(strings.Join(parts[length-count:], delim)), nil
427+
},
428+
Info: "Returns a substring of `input` before `count` occurrences of `delim`.\n" +
429+
"If `count` is positive, the leftmost part is returned. If `count` is negative, the rightmost part is returned.",
430+
Volatility: volatility.Immutable,
431+
},
432+
),
433+
390434
// concat concatenates the text representations of all the arguments.
391435
// NULL arguments are ignored.
392436
"concat": makeBuiltin(

pkg/sql/sem/builtins/fixed_oids.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2643,6 +2643,7 @@ var builtinOidsArray = []string{
26432643
2680: `jsonpath(jsonpath: jsonpath) -> jsonpath`,
26442644
2681: `varchar(jsonpath: jsonpath) -> varchar`,
26452645
2682: `char(jsonpath: jsonpath) -> "char"`,
2646+
2683: `substring_index(input: string, delim: string, count: int) -> string`,
26462647
}
26472648

26482649
var builtinOidsBySignature map[string]oid.Oid

0 commit comments

Comments
 (0)