Skip to content

Commit 58c42c5

Browse files
rename "utf8" to "string" (#797)
Signed-off-by: Runji Wang <[email protected]>
1 parent 7fee0d7 commit 58c42c5

20 files changed

+164
-159
lines changed

src/array/utf8_array.rs renamed to src/array/bytes_array.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ impl ValueRef for BlobRef {
3737
}
3838
}
3939

40-
pub type Utf8Array = BytesArray<str>;
40+
pub type StringArray = BytesArray<str>;
4141
pub type BlobArray = BytesArray<BlobRef>;
42-
pub type Utf8ArrayBuilder = BytesArrayBuilder<str>;
42+
pub type StringArrayBuilder = BytesArrayBuilder<str>;
4343
pub type BlobArrayBuilder = BytesArrayBuilder<BlobRef>;
4444

4545
impl<T: ValueRef + ?Sized> Clone for BytesArray<T> {
@@ -222,7 +222,7 @@ impl<T: ValueRef + ?Sized> Drop for BytesArrayWriter<'_, T> {
222222
}
223223
}
224224

225-
impl Utf8Array {
225+
impl StringArray {
226226
pub fn from_iter_display(iter: impl IntoIterator<Item = Option<impl Display>>) -> Self {
227227
let iter = iter.into_iter();
228228
let mut builder = <Self as Array>::Builder::with_capacity(iter.size_hint().0);
@@ -260,8 +260,8 @@ impl<O: AsRef<T>, T: ValueRef + ?Sized> FromIterator<Option<O>> for BytesArray<T
260260
mod tests {
261261
use super::*;
262262
#[test]
263-
fn test_utf8_builder() {
264-
let mut builder = Utf8ArrayBuilder::with_capacity(100);
263+
fn test_string_array_builder() {
264+
let mut builder = StringArrayBuilder::with_capacity(100);
265265
for i in 0..100 {
266266
if i % 2 == 0 {
267267
builder.push(Some(&format!("{}", i)));

src/array/mod.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ use crate::types::{
1515
F32, F64,
1616
};
1717

18+
mod bytes_array;
1819
mod data_chunk;
1920
mod data_chunk_builder;
2021
pub mod ops;
2122
mod primitive_array;
22-
mod utf8_array;
2323

24+
pub use self::bytes_array::*;
2425
pub use self::data_chunk::*;
2526
pub use self::data_chunk_builder::*;
2627
pub use self::primitive_array::*;
27-
pub use self::utf8_array::*;
2828

2929
mod internal_ext;
3030

@@ -39,7 +39,7 @@ pub use shuffle_ext::*;
3939
/// `ArrayBuilder` is a trait over all builders. You could build an array with
4040
/// `push` with the help of `ArrayBuilder` trait. The `push` function always
4141
/// accepts reference to an element. e.g. for `PrimitiveArray`,
42-
/// you must do `builder.push(Some(&1))`. For `Utf8Array`, you must do
42+
/// you must do `builder.push(Some(&1))`. For `StringArray`, you must do
4343
/// `builder.push(Some("xxx"))`. Note that you don't need to construct a `String`.
4444
///
4545
/// The associated type `Array` is the type of the corresponding array. It is the
@@ -93,7 +93,7 @@ pub trait ArrayBuilder: Sized + Send + Sync + 'static {
9393
/// The `Builder` associated type is the builder for this array.
9494
/// The `Item` is the item you could retrieve from this array.
9595
///
96-
/// For example, `PrimitiveArray` could return an `Option<&u32>`, and `Utf8Array` will
96+
/// For example, `PrimitiveArray` could return an `Option<&u32>`, and `StringArray` will
9797
/// return an `Option<&str>`.
9898
pub trait Array: Sized + Send + Sync + 'static {
9999
/// Corresponding builder of this array.
@@ -198,7 +198,7 @@ pub enum ArrayImpl {
198198
Int64(Arc<I64Array>),
199199
// Float32(PrimitiveArray<f32>),
200200
Float64(Arc<F64Array>),
201-
Utf8(Arc<Utf8Array>),
201+
String(Arc<StringArray>),
202202
Blob(Arc<BlobArray>),
203203
Decimal(Arc<DecimalArray>),
204204
Date(Arc<DateArray>),
@@ -229,7 +229,7 @@ pub enum ArrayBuilderImpl {
229229
Int64(I64ArrayBuilder),
230230
// Float32(PrimitiveArrayBuilder<f32>),
231231
Float64(F64ArrayBuilder),
232-
Utf8(Utf8ArrayBuilder),
232+
String(StringArrayBuilder),
233233
Blob(BlobArrayBuilder),
234234
Decimal(DecimalArrayBuilder),
235235
Date(DateArrayBuilder),
@@ -262,7 +262,7 @@ macro_rules! for_all_variants {
262262
{ Timestamp, Timestamp, timestamp, TimestampArray, TimestampArrayBuilder, Timestamp, Timestamp },
263263
{ TimestampTz, TimestampTz, timestamp_tz, TimestampTzArray, TimestampTzArrayBuilder, TimestampTz, TimestampTz },
264264
{ Interval, Interval, interval, IntervalArray, IntervalArrayBuilder, Interval, Interval },
265-
{ Utf8, str, utf8, Utf8Array, Utf8ArrayBuilder, String, String },
265+
{ String, str, string, StringArray, StringArrayBuilder, String, String },
266266
{ Blob, BlobRef, blob, BlobArray, BlobArrayBuilder, Blob, Blob }
267267
}
268268
};
@@ -283,7 +283,7 @@ macro_rules! for_all_variants_without_null {
283283
{ Timestamp, Timestamp, timestamp, TimestampArray, TimestampArrayBuilder, Timestamp, Timestamp },
284284
{ TimestampTz, TimestampTz, timestamp_tz, TimestampTzArray, TimestampTzArrayBuilder, TimestampTz, TimestampTz },
285285
{ Interval, Interval, interval, IntervalArray, IntervalArrayBuilder, Interval, Interval },
286-
{ Utf8, str, utf8, Utf8Array, Utf8ArrayBuilder, String, String },
286+
{ String, str, string, StringArray, StringArrayBuilder, String, String },
287287
{ Blob, BlobRef, blob, BlobArray, BlobArrayBuilder, Blob, Blob }
288288
}
289289
};
@@ -476,7 +476,7 @@ impl ArrayBuilderImpl {
476476
Self::Int32(a) if null => a.push(None),
477477
Self::Int64(a) if null => a.push(None),
478478
Self::Float64(a) if null => a.push(None),
479-
Self::Utf8(a) if null => a.push(None),
479+
Self::String(a) if null => a.push(None),
480480
Self::Blob(a) if null => a.push(None),
481481
Self::Decimal(a) if null => a.push(None),
482482
Self::Date(a) if null => a.push(None),
@@ -503,7 +503,7 @@ impl ArrayBuilderImpl {
503503
&s.parse::<F64>()
504504
.map_err(|e| ConvertError::ParseFloat(s.to_string(), e))?,
505505
)),
506-
Self::Utf8(a) => a.push(Some(s)),
506+
Self::String(a) => a.push(Some(s)),
507507
Self::Blob(a) => a.push(Some(
508508
&s.parse::<Blob>()
509509
.map_err(|e| ConvertError::ParseBlob(s.to_string(), e))?,
@@ -635,7 +635,7 @@ impl From<&DataValue> for ArrayImpl {
635635
&DataValue::Int32(v) => Self::new_int32([v].into_iter().collect()),
636636
&DataValue::Int64(v) => Self::new_int64([v].into_iter().collect()),
637637
&DataValue::Float64(v) => Self::new_float64([v].into_iter().collect()),
638-
DataValue::String(v) => Self::new_utf8([Some(v)].into_iter().collect()),
638+
DataValue::String(v) => Self::new_string([Some(v)].into_iter().collect()),
639639
DataValue::Blob(v) => Self::new_blob([Some(v)].into_iter().collect()),
640640
&DataValue::Decimal(v) => Self::new_decimal([v].into_iter().collect()),
641641
&DataValue::Date(v) => Self::new_date([v].into_iter().collect()),

src/array/ops.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ macro_rules! cmp {
133133
(A::Decimal(a), A::Float64(b)) => binary_op(a.as_ref(), b.as_ref(), |a, b| *a $op Decimal::from_f64_retain(b.0).unwrap()),
134134
(A::Decimal(a), A::Decimal(b)) => binary_op(a.as_ref(), b.as_ref(), |a, b| a $op b),
135135

136-
(A::Utf8(a), A::Utf8(b)) => binary_op(a.as_ref(), b.as_ref(), |a, b| a $op b),
136+
(A::String(a), A::String(b)) => binary_op(a.as_ref(), b.as_ref(), |a, b| a $op b),
137137

138138
(A::Date(a), A::Date(b)) => binary_op(a.as_ref(), b.as_ref(), |a, b| a $op b),
139139

@@ -209,7 +209,7 @@ impl ArrayImpl {
209209
}
210210
regex
211211
}
212-
let A::Utf8(a) = self else {
212+
let A::String(a) = self else {
213213
return Err(ConvertError::NoUnaryOp("like".into(), self.type_string()));
214214
};
215215
let regex = Regex::new(&like_to_regex(pattern)).unwrap();
@@ -219,11 +219,11 @@ impl ArrayImpl {
219219
}
220220

221221
pub fn concat(&self, other: &Self) -> Result {
222-
let (A::Utf8(a), A::Utf8(b)) = (self, other) else {
222+
let (A::String(a), A::String(b)) = (self, other) else {
223223
return Err(ConvertError::NoBinaryOp("||".into(), self.type_string(), other.type_string()));
224224
};
225225

226-
Ok(A::new_utf8(binary_op(a.as_ref(), b.as_ref(), |a, b| {
226+
Ok(A::new_string(binary_op(a.as_ref(), b.as_ref(), |a, b| {
227227
format!("{a}{b}")
228228
})))
229229
}
@@ -282,10 +282,10 @@ impl ArrayImpl {
282282
}
283283

284284
pub fn substring(&self, start: &Self, length: &Self) -> Result {
285-
let (A::Utf8(a), A::Int32(b), A::Int32(c)) = (self, start, length) else {
285+
let (A::String(a), A::Int32(b), A::Int32(c)) = (self, start, length) else {
286286
return Err(ConvertError::NoTernaryOp("substring".into(), self.type_string(), start.type_string(), length.type_string()));
287287
};
288-
Ok(A::new_utf8(ternary_op(
288+
Ok(A::new_string(ternary_op(
289289
a.as_ref(),
290290
b.as_ref(),
291291
c.as_ref(),
@@ -354,7 +354,7 @@ impl ArrayImpl {
354354
Self::new_float64(unary_op(a.as_ref(), |&b| F64::from(b as u8 as f64)))
355355
}
356356
Type::String => {
357-
Self::new_utf8(unary_op(a.as_ref(), |&b| if b { "true" } else { "false" }))
357+
Self::new_string(unary_op(a.as_ref(), |&b| if b { "true" } else { "false" }))
358358
}
359359
Type::Decimal(_, _) => {
360360
Self::new_decimal(unary_op(a.as_ref(), |&b| Decimal::from(b as u8)))
@@ -375,7 +375,7 @@ impl ArrayImpl {
375375
Type::Int32 => Self::new_int32(unary_op(a.as_ref(), |&b| b as i32)),
376376
Type::Int64 => Self::new_int64(unary_op(a.as_ref(), |&b| b as i64)),
377377
Type::Float64 => Self::new_float64(unary_op(a.as_ref(), |&i| F64::from(i as f64))),
378-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
378+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
379379
Type::Decimal(_, _) => {
380380
Self::new_decimal(unary_op(a.as_ref(), |&i| Decimal::from(i)))
381381
}
@@ -398,7 +398,7 @@ impl ArrayImpl {
398398
Type::Int32 => Self::Int32(a.clone()),
399399
Type::Int64 => Self::new_int64(unary_op(a.as_ref(), |&b| b as i64)),
400400
Type::Float64 => Self::new_float64(unary_op(a.as_ref(), |&i| F64::from(i as f64))),
401-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
401+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
402402
Type::Decimal(_, _) => {
403403
Self::new_decimal(unary_op(a.as_ref(), |&i| Decimal::from(i)))
404404
}
@@ -424,7 +424,7 @@ impl ArrayImpl {
424424
})?),
425425
Type::Int64 => Self::Int64(a.clone()),
426426
Type::Float64 => Self::new_float64(unary_op(a.as_ref(), |&i| F64::from(i as f64))),
427-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
427+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
428428
Type::Decimal(_, _) => {
429429
Self::new_decimal(unary_op(a.as_ref(), |&i| Decimal::from(i)))
430430
}
@@ -453,7 +453,7 @@ impl ArrayImpl {
453453
.ok_or(ConvertError::Overflow(DataValue::Float64(b), Type::Int64))
454454
})?),
455455
Type::Float64 => Self::Float64(a.clone()),
456-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
456+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
457457
Type::Decimal(_, _) => Self::new_decimal(unary_op(a.as_ref(), |&f| {
458458
Decimal::from_f64_retain(f.0).unwrap()
459459
})),
@@ -467,7 +467,7 @@ impl ArrayImpl {
467467
return Err(ConvertError::NoCast("DOUBLE", data_type.clone()));
468468
}
469469
},
470-
Self::Utf8(a) => match data_type {
470+
Self::String(a) => match data_type {
471471
Type::Bool => Self::new_bool(try_unary_op(a.as_ref(), |s| {
472472
s.parse::<bool>()
473473
.map_err(|e| ConvertError::ParseBool(s.to_string(), e))
@@ -488,7 +488,7 @@ impl ArrayImpl {
488488
s.parse::<F64>()
489489
.map_err(|e| ConvertError::ParseFloat(s.to_string(), e))
490490
})?),
491-
Type::String => Self::Utf8(a.clone()),
491+
Type::String => Self::String(a.clone()),
492492
Type::Decimal(_, _) => Self::new_decimal(try_unary_op(a.as_ref(), |s| {
493493
Decimal::from_str(s).map_err(|e| ConvertError::ParseDecimal(s.to_string(), e))
494494
})?),
@@ -533,7 +533,7 @@ impl ArrayImpl {
533533
.map(F64::from)
534534
.ok_or(ConvertError::FromDecimalError(DataTypeKind::Float64, d))
535535
})?),
536-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
536+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
537537
Type::Decimal(_, _) => self.clone(),
538538
Type::Null
539539
| Type::Blob
@@ -547,17 +547,17 @@ impl ArrayImpl {
547547
},
548548
Self::Date(a) => match data_type {
549549
Type::Date => self.clone(),
550-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
550+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
551551
_ => return Err(ConvertError::NoCast("DATE", data_type.clone())),
552552
},
553553
Self::Timestamp(a) => match data_type {
554554
Type::Timestamp => self.clone(),
555-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
555+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
556556
_ => return Err(ConvertError::NoCast("TIMESTAMP", data_type.clone())),
557557
},
558558
Self::TimestampTz(a) => match data_type {
559559
Type::TimestampTz => self.clone(),
560-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
560+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
561561
_ => {
562562
return Err(ConvertError::NoCast(
563563
"TIMESTAMP WITH TIME ZONE",
@@ -567,7 +567,7 @@ impl ArrayImpl {
567567
},
568568
Self::Interval(a) => match data_type {
569569
Type::Interval => self.clone(),
570-
Type::String => Self::new_utf8(Utf8Array::from_iter_display(a.iter())),
570+
Type::String => Self::new_string(StringArray::from_iter_display(a.iter())),
571571
_ => return Err(ConvertError::NoCast("INTERVAL", data_type.clone())),
572572
},
573573
})
@@ -592,10 +592,10 @@ impl ArrayImpl {
592592
}
593593

594594
pub fn replace(&self, from: &str, to: &str) -> Result {
595-
let A::Utf8(a) = self else {
595+
let A::String(a) = self else {
596596
return Err(ConvertError::NoUnaryOp("replace".into(), self.type_string()));
597597
};
598-
Ok(A::new_utf8(unary_op(a.as_ref(), |s| s.replace(from, to))))
598+
Ok(A::new_string(unary_op(a.as_ref(), |s| s.replace(from, to))))
599599
}
600600
}
601601

src/db.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use futures::TryStreamExt;
66
use risinglight_proto::rowset::block_statistics::BlockStatisticsType;
77

88
use crate::array::{
9-
ArrayBuilder, ArrayBuilderImpl, Chunk, DataChunk, I32ArrayBuilder, Utf8ArrayBuilder,
9+
ArrayBuilder, ArrayBuilderImpl, Chunk, DataChunk, I32ArrayBuilder, StringArrayBuilder,
1010
};
1111
use crate::catalog::{RootCatalogRef, TableRefId, INTERNAL_SCHEMA_NAME};
1212
use crate::parser::{parse, ParserError, Statement};
@@ -54,10 +54,10 @@ impl Database {
5454

5555
fn run_desc(&self, table_name: &str) -> Result<Vec<Chunk>, Error> {
5656
let mut column_id = I32ArrayBuilder::new();
57-
let mut column_name = Utf8ArrayBuilder::new();
58-
let mut column_type = Utf8ArrayBuilder::new();
59-
let mut column_is_null = Utf8ArrayBuilder::new();
60-
let mut column_is_primary = Utf8ArrayBuilder::new();
57+
let mut column_name = StringArrayBuilder::new();
58+
let mut column_type = StringArrayBuilder::new();
59+
let mut column_is_null = StringArrayBuilder::new();
60+
let mut column_is_primary = StringArrayBuilder::new();
6161
let table_catalog = self.catalog.get_table_by_name(table_name).unwrap();
6262

6363
let all_columns = table_catalog.all_columns();
@@ -96,9 +96,9 @@ impl Database {
9696

9797
fn run_dt(&self) -> Result<Vec<Chunk>, Error> {
9898
let mut schema_id_vec = I32ArrayBuilder::new();
99-
let mut schema_vec = Utf8ArrayBuilder::new();
99+
let mut schema_vec = StringArrayBuilder::new();
100100
let mut table_id_vec = I32ArrayBuilder::new();
101-
let mut table_vec = Utf8ArrayBuilder::new();
101+
let mut table_vec = StringArrayBuilder::new();
102102
for (_, schema) in self.catalog.all_schemas() {
103103
for (_, table) in schema.all_tables() {
104104
schema_id_vec.push(Some(&(schema.id() as i32)));
@@ -148,8 +148,8 @@ impl Database {
148148
StorageColumnRef::Idx(col_id),
149149
),
150150
]);
151-
let mut stat_name = Utf8ArrayBuilder::with_capacity(2);
152-
let mut stat_value = Utf8ArrayBuilder::with_capacity(2);
151+
let mut stat_name = StringArrayBuilder::with_capacity(2);
152+
let mut stat_value = StringArrayBuilder::with_capacity(2);
153153
stat_name.push(Some("RowCount"));
154154
stat_value.push(Some(
155155
row_count[0]

src/executor/copy_from_file.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ mod tests {
155155
let expected: DataChunk = [
156156
ArrayImpl::new_int32([1, 2].into_iter().collect()),
157157
ArrayImpl::new_float64([1.5, 2.5].into_iter().collect()),
158-
ArrayImpl::new_utf8(["one", "two"].iter().map(Some).collect()),
158+
ArrayImpl::new_string(["one", "two"].iter().map(Some).collect()),
159159
]
160160
.into_iter()
161161
.collect();

src/executor/copy_to_file.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ mod tests {
9898
yield [
9999
ArrayImpl::new_int32([1, 2].into_iter().collect()),
100100
ArrayImpl::new_float64([1.5, 2.5].into_iter().collect()),
101-
ArrayImpl::new_utf8(["one", "two"].iter().map(Some).collect()),
101+
ArrayImpl::new_string(["one", "two"].iter().map(Some).collect()),
102102
]
103103
.into_iter()
104104
.collect();

src/executor/explain.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use futures::{future, stream};
44
use pretty_xmlish::PrettyConfig;
55

66
use super::*;
7-
use crate::array::{ArrayImpl, Utf8Array};
7+
use crate::array::{ArrayImpl, StringArray};
88
use crate::planner::{Explain, Optimizer};
99

1010
/// The executor of `explain` statement.
@@ -28,8 +28,9 @@ impl ExplainExecutor {
2828
..PrettyConfig::default()
2929
};
3030
config.unicode(&mut explain, &explainer);
31-
let chunk =
32-
DataChunk::from_iter([ArrayImpl::new_utf8(Utf8Array::from_iter([Some(explain)]))]);
31+
let chunk = DataChunk::from_iter([ArrayImpl::new_string(StringArray::from_iter([Some(
32+
explain,
33+
)]))]);
3334

3435
stream::once(future::ok(chunk)).boxed()
3536
}

src/executor/internal.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// Copyright 2023 RisingLight Project Authors. Licensed under Apache-2.0.
22
use super::*;
3-
use crate::array::{ArrayImpl, Utf8Array};
3+
use crate::array::{ArrayImpl, StringArray};
44
use crate::catalog::{TableRefId, CONTRIBUTORS_TABLE_ID};
55
/// The executor of internal tables.
66
pub struct InternalTableExecutor {
@@ -79,7 +79,7 @@ fn contributors() -> DataChunk {
7979
"yuzi-neko",
8080
"XieJiann",
8181
];
82-
[ArrayImpl::new_utf8(Utf8Array::from_iter(
82+
[ArrayImpl::new_string(StringArray::from_iter(
8383
contributors.iter().map(|s| Some(*s)).sorted(),
8484
))]
8585
.into_iter()

0 commit comments

Comments
 (0)