Skip to content

Commit 87e494c

Browse files
committed
Auto merge of #67330 - golddranks:split_inclusive, r=kodraus
Implement split_inclusive for slice and str # Overview * Implement `split_inclusive` for `slice` and `str` and `split_inclusive_mut` for `slice` * `split_inclusive` is a substring/subslice splitting iterator that includes the matched part in the iterated substrings as a terminator. * EDIT: The behaviour has now changed, as per @KodrAus 's input, to the same semantics with the `split_terminator` function. I updated the examples below. * Two examples below: ```Rust let data = "\nMäry häd ä little lämb\nLittle lämb\n"; let split: Vec<&str> = data.split_inclusive('\n').collect(); assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]); ``` ```Rust let uppercase_separated = "SheePSharKTurtlECaT"; let mut first_char = true; let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| { let split = !first_char && c.is_uppercase(); first_char = split; split }).collect(); assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); ``` # Justification for the API * I was surprised to find that stdlib currently only has splitting iterators that leave out the matched part. In my experience, wanting to leave a substring terminator as a part of the substring is a pretty common usecase. * This API is strictly more expressive than the standard `split` API: it's easy to get the behaviour of `split` by mapping a subslicing operation that drops the terminator. On the other hand it's impossible to derive this behaviour from `split` without using hacky and brittle `unsafe` code. The normal way to achieve this functionality would be implementing the iterator yourself. * Especially when dealing with mutable slices, the only way currently is to use `split_at_mut`. This API provides an ergonomic alternative that plays to the strengths of the iterating capabilities of Rust. (Using `split_at_mut` iteratively used to be a real pain before NLL, fortunately the situation is a bit better now.) # Discussion items * <s>Does it make sense to mimic `split_terminator` in that the final empty slice would be left off in case of the string/slice ending with a terminator? It might do, as this use case is naturally geared towards considering the matching part as a terminator instead of a separator.</s> * EDIT: The behaviour was changed to mimic `split_terminator`. * Does it make sense to have `split_inclusive_mut` for `&mut str`?
2 parents d735ede + 5c9dc57 commit 87e494c

File tree

5 files changed

+545
-1
lines changed

5 files changed

+545
-1
lines changed

src/liballoc/tests/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#![feature(binary_heap_into_iter_sorted)]
1313
#![feature(binary_heap_drain_sorted)]
1414
#![feature(vec_remove_item)]
15+
#![feature(split_inclusive)]
1516

1617
use std::collections::hash_map::DefaultHasher;
1718
use std::hash::{Hash, Hasher};

src/liballoc/tests/slice.rs

+80
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,86 @@ fn test_splitator() {
851851
assert_eq!(xs.split(|x| *x == 5).collect::<Vec<&[i32]>>(), splits);
852852
}
853853

854+
#[test]
855+
fn test_splitator_inclusive() {
856+
let xs = &[1, 2, 3, 4, 5];
857+
858+
let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]];
859+
assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::<Vec<_>>(), splits);
860+
let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]];
861+
assert_eq!(xs.split_inclusive(|x| *x == 1).collect::<Vec<_>>(), splits);
862+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
863+
assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<_>>(), splits);
864+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
865+
assert_eq!(xs.split_inclusive(|x| *x == 10).collect::<Vec<_>>(), splits);
866+
let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]];
867+
assert_eq!(xs.split_inclusive(|_| true).collect::<Vec<&[i32]>>(), splits);
868+
869+
let xs: &[i32] = &[];
870+
let splits: &[&[i32]] = &[&[]];
871+
assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<&[i32]>>(), splits);
872+
}
873+
874+
#[test]
875+
fn test_splitator_inclusive_reverse() {
876+
let xs = &[1, 2, 3, 4, 5];
877+
878+
let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]];
879+
assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits);
880+
let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]];
881+
assert_eq!(xs.split_inclusive(|x| *x == 1).rev().collect::<Vec<_>>(), splits);
882+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
883+
assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
884+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
885+
assert_eq!(xs.split_inclusive(|x| *x == 10).rev().collect::<Vec<_>>(), splits);
886+
let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]];
887+
assert_eq!(xs.split_inclusive(|_| true).rev().collect::<Vec<_>>(), splits);
888+
889+
let xs: &[i32] = &[];
890+
let splits: &[&[i32]] = &[&[]];
891+
assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
892+
}
893+
894+
#[test]
895+
fn test_splitator_mut_inclusive() {
896+
let xs = &mut [1, 2, 3, 4, 5];
897+
898+
let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]];
899+
assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).collect::<Vec<_>>(), splits);
900+
let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]];
901+
assert_eq!(xs.split_inclusive_mut(|x| *x == 1).collect::<Vec<_>>(), splits);
902+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
903+
assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits);
904+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
905+
assert_eq!(xs.split_inclusive_mut(|x| *x == 10).collect::<Vec<_>>(), splits);
906+
let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]];
907+
assert_eq!(xs.split_inclusive_mut(|_| true).collect::<Vec<_>>(), splits);
908+
909+
let xs: &mut [i32] = &mut [];
910+
let splits: &[&[i32]] = &[&[]];
911+
assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits);
912+
}
913+
914+
#[test]
915+
fn test_splitator_mut_inclusive_reverse() {
916+
let xs = &mut [1, 2, 3, 4, 5];
917+
918+
let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]];
919+
assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits);
920+
let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]];
921+
assert_eq!(xs.split_inclusive_mut(|x| *x == 1).rev().collect::<Vec<_>>(), splits);
922+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
923+
assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
924+
let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
925+
assert_eq!(xs.split_inclusive_mut(|x| *x == 10).rev().collect::<Vec<_>>(), splits);
926+
let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]];
927+
assert_eq!(xs.split_inclusive_mut(|_| true).rev().collect::<Vec<_>>(), splits);
928+
929+
let xs: &mut [i32] = &mut [];
930+
let splits: &[&[i32]] = &[&[]];
931+
assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
932+
}
933+
854934
#[test]
855935
fn test_splitnator() {
856936
let xs = &[1, 2, 3, 4, 5];

src/liballoc/tests/str.rs

+43
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,49 @@ fn test_split_char_iterator_no_trailing() {
12471247
assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]);
12481248
}
12491249

1250+
#[test]
1251+
fn test_split_char_iterator_inclusive() {
1252+
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1253+
1254+
let split: Vec<&str> = data.split_inclusive('\n').collect();
1255+
assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]);
1256+
1257+
let uppercase_separated = "SheePSharKTurtlECaT";
1258+
let mut first_char = true;
1259+
let split: Vec<&str> = uppercase_separated
1260+
.split_inclusive(|c: char| {
1261+
let split = !first_char && c.is_uppercase();
1262+
first_char = split;
1263+
split
1264+
})
1265+
.collect();
1266+
assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
1267+
}
1268+
1269+
#[test]
1270+
fn test_split_char_iterator_inclusive_rev() {
1271+
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1272+
1273+
let split: Vec<&str> = data.split_inclusive('\n').rev().collect();
1274+
assert_eq!(split, ["Little lämb\n", "Märy häd ä little lämb\n", "\n"]);
1275+
1276+
// Note that the predicate is stateful and thus dependent
1277+
// on the iteration order.
1278+
// (A different predicate is needed for reverse iterator vs normal iterator.)
1279+
// Not sure if anything can be done though.
1280+
let uppercase_separated = "SheePSharKTurtlECaT";
1281+
let mut term_char = true;
1282+
let split: Vec<&str> = uppercase_separated
1283+
.split_inclusive(|c: char| {
1284+
let split = term_char && c.is_uppercase();
1285+
term_char = c.is_uppercase();
1286+
split
1287+
})
1288+
.rev()
1289+
.collect();
1290+
assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]);
1291+
}
1292+
12501293
#[test]
12511294
fn test_rsplit() {
12521295
let data = "\nMäry häd ä little lämb\nLittle lämb\n";

0 commit comments

Comments
 (0)