Skip to content

Commit fbf2ffc

Browse files
Chen-Yuan-LaiCheng-Yuan-LaiIan Lai
authored
refactor: use TypeSignature::Coercible for crypto functions (#14826)
* refactor: use TypeSignature::Coercible for crypto functions * fix * fix * fix string_view.slt * fix signatrue for sha256 * remove unsed import * support binaryView * modify signature if sha256 function * remove unsed import * clean unused codes * rewrite function using new_implicit for sha512 * remove unused import * rewrite signature with new_implicit for other crypto functions * support null for md5 function * modify sqllogictest to fit allowed input type for md5 function --------- Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com> Co-authored-by: Ian Lai <[email protected]>
1 parent fdb4e84 commit fbf2ffc

File tree

9 files changed

+190
-88
lines changed

9 files changed

+190
-88
lines changed

datafusion/functions/src/crypto/basic.rs

+66-36
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717

1818
//! "crypto" DataFusion functions
1919
20-
use arrow::array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait};
20+
use arrow::array::{
21+
Array, ArrayRef, BinaryArray, BinaryArrayType, BinaryViewArray, GenericBinaryArray,
22+
OffsetSizeTrait,
23+
};
2124
use arrow::array::{AsArray, GenericStringArray, StringArray, StringViewArray};
2225
use arrow::datatypes::DataType;
2326
use blake2::{Blake2b512, Blake2s256, Digest};
@@ -26,8 +29,8 @@ use datafusion_common::cast::as_binary_array;
2629

2730
use arrow::compute::StringArrayType;
2831
use datafusion_common::{
29-
cast::as_generic_binary_array, exec_err, internal_err, plan_err,
30-
utils::take_function_args, DataFusionError, Result, ScalarValue,
32+
exec_err, internal_err, plan_err, utils::take_function_args, DataFusionError, Result,
33+
ScalarValue,
3134
};
3235
use datafusion_expr::ColumnarValue;
3336
use md5::Md5;
@@ -203,6 +206,7 @@ pub fn utf8_or_binary_to_binary_type(
203206
| DataType::LargeUtf8
204207
| DataType::Utf8
205208
| DataType::Binary
209+
| DataType::BinaryView
206210
| DataType::LargeBinary => DataType::Binary,
207211
DataType::Null => DataType::Null,
208212
_ => {
@@ -251,27 +255,17 @@ impl DigestAlgorithm {
251255
where
252256
T: OffsetSizeTrait,
253257
{
254-
let input_value = as_generic_binary_array::<T>(value)?;
255-
let array: ArrayRef = match self {
256-
Self::Md5 => digest_to_array!(Md5, input_value),
257-
Self::Sha224 => digest_to_array!(Sha224, input_value),
258-
Self::Sha256 => digest_to_array!(Sha256, input_value),
259-
Self::Sha384 => digest_to_array!(Sha384, input_value),
260-
Self::Sha512 => digest_to_array!(Sha512, input_value),
261-
Self::Blake2b => digest_to_array!(Blake2b512, input_value),
262-
Self::Blake2s => digest_to_array!(Blake2s256, input_value),
263-
Self::Blake3 => {
264-
let binary_array: BinaryArray = input_value
265-
.iter()
266-
.map(|opt| {
267-
opt.map(|x| {
268-
let mut digest = Blake3::default();
269-
digest.update(x);
270-
Blake3::finalize(&digest).as_bytes().to_vec()
271-
})
272-
})
273-
.collect();
274-
Arc::new(binary_array)
258+
let array = match value.data_type() {
259+
DataType::Binary | DataType::LargeBinary => {
260+
let v = value.as_binary::<T>();
261+
self.digest_binary_array_impl::<&GenericBinaryArray<T>>(v)
262+
}
263+
DataType::BinaryView => {
264+
let v = value.as_binary_view();
265+
self.digest_binary_array_impl::<&BinaryViewArray>(v)
266+
}
267+
other => {
268+
return exec_err!("unsupported type for digest_utf_array: {other:?}")
275269
}
276270
};
277271
Ok(ColumnarValue::Array(array))
@@ -328,6 +322,37 @@ impl DigestAlgorithm {
328322
}
329323
}
330324
}
325+
326+
pub fn digest_binary_array_impl<'a, BinaryArrType>(
327+
self,
328+
input_value: BinaryArrType,
329+
) -> ArrayRef
330+
where
331+
BinaryArrType: BinaryArrayType<'a>,
332+
{
333+
match self {
334+
Self::Md5 => digest_to_array!(Md5, input_value),
335+
Self::Sha224 => digest_to_array!(Sha224, input_value),
336+
Self::Sha256 => digest_to_array!(Sha256, input_value),
337+
Self::Sha384 => digest_to_array!(Sha384, input_value),
338+
Self::Sha512 => digest_to_array!(Sha512, input_value),
339+
Self::Blake2b => digest_to_array!(Blake2b512, input_value),
340+
Self::Blake2s => digest_to_array!(Blake2s256, input_value),
341+
Self::Blake3 => {
342+
let binary_array: BinaryArray = input_value
343+
.iter()
344+
.map(|opt| {
345+
opt.map(|x| {
346+
let mut digest = Blake3::default();
347+
digest.update(x);
348+
Blake3::finalize(&digest).as_bytes().to_vec()
349+
})
350+
})
351+
.collect();
352+
Arc::new(binary_array)
353+
}
354+
}
355+
}
331356
}
332357
pub fn digest_process(
333358
value: &ColumnarValue,
@@ -342,22 +367,27 @@ pub fn digest_process(
342367
DataType::LargeBinary => {
343368
digest_algorithm.digest_binary_array::<i64>(a.as_ref())
344369
}
345-
other => exec_err!(
346-
"Unsupported data type {other:?} for function {digest_algorithm}"
347-
),
348-
},
349-
ColumnarValue::Scalar(scalar) => match scalar {
350-
ScalarValue::Utf8View(a)
351-
| ScalarValue::Utf8(a)
352-
| ScalarValue::LargeUtf8(a) => {
353-
Ok(digest_algorithm
354-
.digest_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
370+
DataType::BinaryView => {
371+
digest_algorithm.digest_binary_array::<i32>(a.as_ref())
355372
}
356-
ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) => Ok(digest_algorithm
357-
.digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
358373
other => exec_err!(
359374
"Unsupported data type {other:?} for function {digest_algorithm}"
360375
),
361376
},
377+
ColumnarValue::Scalar(scalar) => {
378+
match scalar {
379+
ScalarValue::Utf8View(a)
380+
| ScalarValue::Utf8(a)
381+
| ScalarValue::LargeUtf8(a) => Ok(digest_algorithm
382+
.digest_scalar(a.as_ref().map(|s: &String| s.as_bytes()))),
383+
ScalarValue::Binary(a)
384+
| ScalarValue::LargeBinary(a)
385+
| ScalarValue::BinaryView(a) => Ok(digest_algorithm
386+
.digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
387+
other => exec_err!(
388+
"Unsupported data type {other:?} for function {digest_algorithm}"
389+
),
390+
}
391+
}
362392
}
363393
}

datafusion/functions/src/crypto/digest.rs

+14-8
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,15 @@
1818
//! "crypto" DataFusion functions
1919
use super::basic::{digest, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
21-
use datafusion_common::Result;
21+
use datafusion_common::{
22+
types::{logical_binary, logical_string},
23+
Result,
24+
};
2225
use datafusion_expr::{
2326
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24-
TypeSignature::*, Volatility,
27+
TypeSignature, Volatility,
2528
};
29+
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
2630
use datafusion_macros::user_doc;
2731
use std::any::Any;
2832

@@ -64,15 +68,17 @@ impl Default for DigestFunc {
6468

6569
impl DigestFunc {
6670
pub fn new() -> Self {
67-
use DataType::*;
6871
Self {
6972
signature: Signature::one_of(
7073
vec![
71-
Exact(vec![Utf8View, Utf8View]),
72-
Exact(vec![Utf8, Utf8]),
73-
Exact(vec![LargeUtf8, Utf8]),
74-
Exact(vec![Binary, Utf8]),
75-
Exact(vec![LargeBinary, Utf8]),
74+
TypeSignature::Coercible(vec![
75+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
76+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
77+
]),
78+
TypeSignature::Coercible(vec![
79+
Coercion::new_exact(TypeSignatureClass::Native(logical_binary())),
80+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
81+
]),
7682
],
7783
Volatility::Immutable,
7884
),

datafusion/functions/src/crypto/md5.rs

+22-8
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,16 @@
1818
//! "crypto" DataFusion functions
1919
use crate::crypto::basic::md5;
2020
use arrow::datatypes::DataType;
21-
use datafusion_common::{plan_err, Result};
21+
use datafusion_common::{
22+
plan_err,
23+
types::{logical_binary, logical_string, NativeType},
24+
Result,
25+
};
2226
use datafusion_expr::{
2327
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24-
Volatility,
28+
TypeSignature, Volatility,
2529
};
30+
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
2631
use datafusion_macros::user_doc;
2732
use std::any::Any;
2833

@@ -52,11 +57,20 @@ impl Default for Md5Func {
5257

5358
impl Md5Func {
5459
pub fn new() -> Self {
55-
use DataType::*;
5660
Self {
57-
signature: Signature::uniform(
58-
1,
59-
vec![Utf8View, Utf8, LargeUtf8, Binary, LargeBinary],
61+
signature: Signature::one_of(
62+
vec![
63+
TypeSignature::Coercible(vec![Coercion::new_implicit(
64+
TypeSignatureClass::Native(logical_binary()),
65+
vec![TypeSignatureClass::Native(logical_string())],
66+
NativeType::String,
67+
)]),
68+
TypeSignature::Coercible(vec![Coercion::new_implicit(
69+
TypeSignatureClass::Native(logical_binary()),
70+
vec![TypeSignatureClass::Native(logical_binary())],
71+
NativeType::Binary,
72+
)]),
73+
],
6074
Volatility::Immutable,
6175
),
6276
}
@@ -79,11 +93,11 @@ impl ScalarUDFImpl for Md5Func {
7993
use DataType::*;
8094
Ok(match &arg_types[0] {
8195
LargeUtf8 | LargeBinary => Utf8,
82-
Utf8View | Utf8 | Binary => Utf8,
96+
Utf8View | Utf8 | Binary | BinaryView => Utf8,
8397
Null => Null,
8498
Dictionary(_, t) => match **t {
8599
LargeUtf8 | LargeBinary => Utf8,
86-
Utf8 | Binary => Utf8,
100+
Utf8 | Binary | BinaryView => Utf8,
87101
Null => Null,
88102
_ => {
89103
return plan_err!(

datafusion/functions/src/crypto/sha224.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,15 @@
1818
//! "crypto" DataFusion functions
1919
use super::basic::{sha224, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
21-
use datafusion_common::Result;
21+
use datafusion_common::{
22+
types::{logical_binary, logical_string, NativeType},
23+
Result,
24+
};
2225
use datafusion_expr::{
2326
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24-
Volatility,
27+
TypeSignature, Volatility,
2528
};
29+
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
2630
use datafusion_macros::user_doc;
2731
use std::any::Any;
2832

@@ -53,11 +57,20 @@ impl Default for SHA224Func {
5357

5458
impl SHA224Func {
5559
pub fn new() -> Self {
56-
use DataType::*;
5760
Self {
58-
signature: Signature::uniform(
59-
1,
60-
vec![Utf8View, Utf8, LargeUtf8, Binary, LargeBinary],
61+
signature: Signature::one_of(
62+
vec![
63+
TypeSignature::Coercible(vec![Coercion::new_implicit(
64+
TypeSignatureClass::Native(logical_binary()),
65+
vec![TypeSignatureClass::Native(logical_string())],
66+
NativeType::String,
67+
)]),
68+
TypeSignature::Coercible(vec![Coercion::new_implicit(
69+
TypeSignatureClass::Native(logical_binary()),
70+
vec![TypeSignatureClass::Native(logical_binary())],
71+
NativeType::Binary,
72+
)]),
73+
],
6174
Volatility::Immutable,
6275
),
6376
}

datafusion/functions/src/crypto/sha256.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,15 @@
1818
//! "crypto" DataFusion functions
1919
use super::basic::{sha256, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
21-
use datafusion_common::Result;
21+
use datafusion_common::{
22+
types::{logical_binary, logical_string, NativeType},
23+
Result,
24+
};
2225
use datafusion_expr::{
2326
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24-
Volatility,
27+
TypeSignature, Volatility,
2528
};
29+
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
2630
use datafusion_macros::user_doc;
2731
use std::any::Any;
2832

@@ -52,11 +56,20 @@ impl Default for SHA256Func {
5256

5357
impl SHA256Func {
5458
pub fn new() -> Self {
55-
use DataType::*;
5659
Self {
57-
signature: Signature::uniform(
58-
1,
59-
vec![Utf8View, Utf8, LargeUtf8, Binary, LargeBinary],
60+
signature: Signature::one_of(
61+
vec![
62+
TypeSignature::Coercible(vec![Coercion::new_implicit(
63+
TypeSignatureClass::Native(logical_binary()),
64+
vec![TypeSignatureClass::Native(logical_string())],
65+
NativeType::String,
66+
)]),
67+
TypeSignature::Coercible(vec![Coercion::new_implicit(
68+
TypeSignatureClass::Native(logical_binary()),
69+
vec![TypeSignatureClass::Native(logical_binary())],
70+
NativeType::Binary,
71+
)]),
72+
],
6073
Volatility::Immutable,
6174
),
6275
}

datafusion/functions/src/crypto/sha384.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,15 @@
1818
//! "crypto" DataFusion functions
1919
use super::basic::{sha384, utf8_or_binary_to_binary_type};
2020
use arrow::datatypes::DataType;
21-
use datafusion_common::Result;
21+
use datafusion_common::{
22+
types::{logical_binary, logical_string, NativeType},
23+
Result,
24+
};
2225
use datafusion_expr::{
2326
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24-
Volatility,
27+
TypeSignature, Volatility,
2528
};
29+
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
2630
use datafusion_macros::user_doc;
2731
use std::any::Any;
2832

@@ -52,11 +56,20 @@ impl Default for SHA384Func {
5256

5357
impl SHA384Func {
5458
pub fn new() -> Self {
55-
use DataType::*;
5659
Self {
57-
signature: Signature::uniform(
58-
1,
59-
vec![Utf8View, Utf8, LargeUtf8, Binary, LargeBinary],
60+
signature: Signature::one_of(
61+
vec![
62+
TypeSignature::Coercible(vec![Coercion::new_implicit(
63+
TypeSignatureClass::Native(logical_binary()),
64+
vec![TypeSignatureClass::Native(logical_string())],
65+
NativeType::String,
66+
)]),
67+
TypeSignature::Coercible(vec![Coercion::new_implicit(
68+
TypeSignatureClass::Native(logical_binary()),
69+
vec![TypeSignatureClass::Native(logical_binary())],
70+
NativeType::Binary,
71+
)]),
72+
],
6073
Volatility::Immutable,
6174
),
6275
}

0 commit comments

Comments
 (0)