Skip to content

Commit d5ff3e7

Browse files
Chen-Yuan-LaiIan Laialamb
authored
refactor: remove uses of arrow_buffer & arrow_array and use reexport in arrow instead (#14503)
* refactor: replace uses of arrow_buffer and arrow_array with reexport in arrow * Remove arrow-buffer in common * Remove dependency in core * remove another ne * remove from functions-nested * remove from physical-expr * remove from physical-expr-common * Remove from physical-plan * Remove from substrait * fix datafusion-cli/Cargo.lock --------- Co-authored-by: Ian Lai <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent fe8ab01 commit d5ff3e7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+114
-124
lines changed

datafusion-cli/Cargo.lock

-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ apache-avro = { version = "0.17", default-features = false, features = [
5252
], optional = true }
5353
arrow = { workspace = true }
5454
arrow-array = { workspace = true }
55-
arrow-buffer = { workspace = true }
5655
arrow-ipc = { workspace = true }
5756
arrow-schema = { workspace = true }
5857
base64 = "0.22.1"

datafusion/common/src/hash_utils.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,11 @@
2121
use std::sync::Arc;
2222

2323
use ahash::RandomState;
24+
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
2425
use arrow::array::*;
2526
use arrow::datatypes::*;
2627
#[cfg(not(feature = "force_hash_collisions"))]
2728
use arrow::{downcast_dictionary_array, downcast_primitive_array};
28-
use arrow_buffer::IntervalDayTime;
29-
use arrow_buffer::IntervalMonthDayNano;
3029

3130
#[cfg(not(feature = "force_hash_collisions"))]
3231
use crate::cast::{
@@ -700,7 +699,7 @@ mod tests {
700699
// Tests actual values of hashes, which are different if forcing collisions
701700
#[cfg(not(feature = "force_hash_collisions"))]
702701
fn create_hashes_for_struct_arrays() {
703-
use arrow_buffer::Buffer;
702+
use arrow::buffer::Buffer;
704703

705704
let boolarr = Arc::new(BooleanArray::from(vec![
706705
false, false, true, true, true, true,

datafusion/common/src/scalar/mod.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ use crate::cast::{
4040
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
4141
use crate::hash_utils::create_hashes;
4242
use crate::utils::SingleRowListArrayBuilder;
43+
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
44+
use arrow::buffer::ScalarBuffer;
4345
use arrow::compute::kernels::numeric::*;
4446
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
4547
use arrow::{
@@ -54,7 +56,6 @@ use arrow::{
5456
UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
5557
},
5658
};
57-
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
5859
use arrow_schema::{UnionFields, UnionMode};
5960

6061
use crate::format::DEFAULT_CAST_OPTIONS;
@@ -3958,12 +3959,11 @@ mod tests {
39583959
};
39593960

39603961
use crate::assert_batches_eq;
3961-
use arrow::buffer::OffsetBuffer;
3962+
use arrow::array::{types::Float64Type, NullBufferBuilder};
3963+
use arrow::buffer::{Buffer, OffsetBuffer};
39623964
use arrow::compute::{is_null, kernels};
39633965
use arrow::error::ArrowError;
39643966
use arrow::util::pretty::pretty_format_columns;
3965-
use arrow_array::types::Float64Type;
3966-
use arrow_buffer::{Buffer, NullBufferBuilder};
39673967
use arrow_schema::Fields;
39683968
use chrono::NaiveDate;
39693969
use rand::Rng;

datafusion/core/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ xz2 = { version = "0.1", optional = true, features = ["static"] }
137137
zstd = { version = "0.13", optional = true, default-features = false }
138138

139139
[dev-dependencies]
140-
arrow-buffer = { workspace = true }
141140
async-trait = { workspace = true }
142141
criterion = { version = "0.5", features = ["async_tokio"] }
143142
ctor = { workspace = true }

datafusion/core/tests/dataframe/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
mod dataframe_functions;
2020
mod describe;
2121

22+
use arrow::buffer::ScalarBuffer;
2223
use arrow::datatypes::{DataType, Field, Float32Type, Int32Type, Schema, UInt64Type};
2324
use arrow::util::pretty::pretty_format_batches;
2425
use arrow::{
@@ -33,7 +34,6 @@ use arrow_array::{
3334
record_batch, Array, BooleanArray, DictionaryArray, Float32Array, Float64Array,
3435
Int8Array, UnionArray,
3536
};
36-
use arrow_buffer::ScalarBuffer;
3737
use arrow_schema::{ArrowError, SchemaRef, UnionFields, UnionMode};
3838
use datafusion_functions_aggregate::count::count_udaf;
3939
use datafusion_functions_aggregate::expr_fn::{

datafusion/core/tests/expr_api/simplification.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
//! This program demonstrates the DataFusion expression simplification API.
1919
20+
use arrow::array::types::IntervalDayTime;
2021
use arrow::datatypes::{DataType, Field, Schema};
2122
use arrow_array::{ArrayRef, Int32Array};
22-
use arrow_buffer::IntervalDayTime;
2323
use chrono::{DateTime, TimeZone, Utc};
2424
use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*};
2525
use datafusion_common::cast::as_int32_array;

datafusion/functions-aggregate/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ path = "src/lib.rs"
4040
[dependencies]
4141
ahash = { workspace = true }
4242
arrow = { workspace = true }
43-
arrow-buffer = { workspace = true }
4443
arrow-schema = { workspace = true }
4544
datafusion-common = { workspace = true }
4645
datafusion-doc = { workspace = true }

datafusion/functions-aggregate/benches/array_agg.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,18 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray};
20+
use arrow::array::{
21+
Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, NullBufferBuilder,
22+
};
2123
use arrow::datatypes::Int64Type;
2224
use arrow::util::bench_util::create_primitive_array;
2325
use arrow_schema::Field;
2426
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2527
use datafusion_expr::Accumulator;
2628
use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;
2729

30+
use arrow::buffer::OffsetBuffer;
2831
use arrow::util::test_util::seedable_rng;
29-
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
3032
use rand::distributions::{Distribution, Standard};
3133
use rand::Rng;
3234

datafusion/functions-aggregate/src/correlation.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ use std::mem::size_of_val;
2323
use std::sync::Arc;
2424

2525
use arrow::array::{
26-
downcast_array, Array, AsArray, BooleanArray, Float64Array, UInt64Array,
26+
downcast_array, Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder,
27+
UInt64Array,
2728
};
2829
use arrow::compute::{and, filter, is_not_null, kernels::cast};
2930
use arrow::datatypes::{Float64Type, UInt64Type};
3031
use arrow::{
3132
array::ArrayRef,
3233
datatypes::{DataType, Field},
3334
};
34-
use arrow_buffer::NullBufferBuilder;
3535
use datafusion_expr::{EmitTo, GroupsAccumulator};
3636
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate_multiple;
3737
use log::debug;

datafusion/functions-nested/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ path = "src/lib.rs"
4242
[dependencies]
4343
arrow = { workspace = true }
4444
arrow-array = { workspace = true }
45-
arrow-buffer = { workspace = true }
4645
arrow-ord = { workspace = true }
4746
arrow-schema = { workspace = true }
4847
datafusion-common = { workspace = true }

datafusion/functions-nested/benches/map.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
2021
use arrow_array::{Int32Array, ListArray, StringArray};
21-
use arrow_buffer::{OffsetBuffer, ScalarBuffer};
2222
use arrow_schema::{DataType, Field};
2323
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2424
use rand::prelude::ThreadRng;

datafusion/functions-nested/src/array_has.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
//! [`ScalarUDFImpl`] definitions for array_has, array_has_all and array_has_any functions.
1919
2020
use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait};
21+
use arrow::buffer::BooleanBuffer;
2122
use arrow::datatypes::DataType;
2223
use arrow::row::{RowConverter, Rows, SortField};
2324
use arrow_array::{Datum, GenericListArray, Scalar};
24-
use arrow_buffer::BooleanBuffer;
2525
use datafusion_common::cast::as_generic_list_array;
2626
use datafusion_common::utils::string_utils::string_array_to_vec;
2727
use datafusion_common::{exec_err, Result, ScalarValue};

datafusion/functions-nested/src/concat.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
use std::sync::Arc;
2121
use std::{any::Any, cmp::Ordering};
2222

23-
use arrow::array::{Capacities, MutableArrayData};
24-
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
25-
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
23+
use arrow::array::{
24+
Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullBufferBuilder,
25+
OffsetSizeTrait,
26+
};
27+
use arrow::buffer::OffsetBuffer;
2628
use arrow_schema::{DataType, Field};
2729
use datafusion_common::Result;
2830
use datafusion_common::{

datafusion/functions-nested/src/except.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
//! [`ScalarUDFImpl`] definitions for array_except function.
1919
2020
use crate::utils::{check_datatypes, make_scalar_function};
21+
use arrow::buffer::OffsetBuffer;
2122
use arrow::row::{RowConverter, SortField};
2223
use arrow_array::cast::AsArray;
2324
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
24-
use arrow_buffer::OffsetBuffer;
2525
use arrow_schema::{DataType, FieldRef};
2626
use datafusion_common::{exec_err, internal_err, HashSet, Result};
2727
use datafusion_expr::{

datafusion/functions-nested/src/extract.rs

+4-9
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,12 @@
1717

1818
//! [`ScalarUDFImpl`] definitions for array_element, array_slice, array_pop_front, array_pop_back, and array_any_value functions.
1919
20-
use arrow::array::Array;
21-
use arrow::array::ArrayRef;
22-
use arrow::array::ArrowNativeTypeOp;
23-
use arrow::array::Capacities;
24-
use arrow::array::GenericListArray;
25-
use arrow::array::Int64Array;
26-
use arrow::array::MutableArrayData;
27-
use arrow::array::OffsetSizeTrait;
20+
use arrow::array::{
21+
Array, ArrayRef, ArrowNativeTypeOp, Capacities, GenericListArray, Int64Array,
22+
MutableArrayData, NullBufferBuilder, OffsetSizeTrait,
23+
};
2824
use arrow::buffer::OffsetBuffer;
2925
use arrow::datatypes::DataType;
30-
use arrow_buffer::NullBufferBuilder;
3126
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
3227
use arrow_schema::Field;
3328
use datafusion_common::cast::as_int64_array;

datafusion/functions-nested/src/flatten.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
//! [`ScalarUDFImpl`] definitions for flatten function.
1919
2020
use crate::utils::make_scalar_function;
21+
use arrow::buffer::OffsetBuffer;
2122
use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait};
22-
use arrow_buffer::OffsetBuffer;
2323
use arrow_schema::DataType;
2424
use arrow_schema::DataType::{FixedSizeList, LargeList, List, Null};
2525
use datafusion_common::cast::{

datafusion/functions-nested/src/make_array.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ use std::vec;
2323

2424
use crate::utils::make_scalar_function;
2525
use arrow::array::{ArrayData, Capacities, MutableArrayData};
26+
use arrow::buffer::OffsetBuffer;
2627
use arrow_array::{
2728
new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait,
2829
};
29-
use arrow_buffer::OffsetBuffer;
3030
use arrow_schema::DataType::{List, Null};
3131
use arrow_schema::{DataType, Field};
3232
use datafusion_common::utils::SingleRowListArrayBuilder;

datafusion/functions-nested/src/map.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ use std::collections::VecDeque;
2020
use std::sync::Arc;
2121

2222
use arrow::array::ArrayData;
23+
use arrow::buffer::Buffer;
24+
use arrow::datatypes::ToByteSlice;
2325
use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray};
24-
use arrow_buffer::{Buffer, ToByteSlice};
2526
use arrow_schema::{DataType, Field, SchemaBuilder};
2627

2728
use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};

datafusion/functions-nested/src/map_extract.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
use arrow::array::{ArrayRef, Capacities, MutableArrayData};
2121
use arrow_array::{make_array, ListArray};
2222

23+
use arrow::buffer::OffsetBuffer;
2324
use arrow::datatypes::DataType;
2425
use arrow_array::{Array, MapArray};
25-
use arrow_buffer::OffsetBuffer;
2626
use arrow_schema::Field;
2727

2828
use datafusion_common::{cast::as_map_array, exec_err, Result};

datafusion/functions-nested/src/range.rs

+9-9
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
//! [`ScalarUDFImpl`] definitions for range and gen_series functions.
1919
2020
use crate::utils::make_scalar_function;
21-
use arrow::array::{Array, ArrayRef, Int64Array, ListArray, ListBuilder};
22-
use arrow::datatypes::{DataType, Field};
23-
use arrow_array::builder::{Date32Builder, TimestampNanosecondBuilder};
24-
use arrow_array::temporal_conversions::as_datetime_with_timezone;
25-
use arrow_array::timezone::Tz;
26-
use arrow_array::types::{
27-
Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT,
21+
use arrow::array::{
22+
builder::{Date32Builder, TimestampNanosecondBuilder},
23+
temporal_conversions::as_datetime_with_timezone,
24+
timezone::Tz,
25+
types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT},
26+
Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullArray, NullBufferBuilder,
27+
TimestampNanosecondArray,
2828
};
29-
use arrow_array::{NullArray, TimestampNanosecondArray};
30-
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
29+
use arrow::buffer::OffsetBuffer;
30+
use arrow::datatypes::{DataType, Field};
3131
use arrow_schema::DataType::*;
3232
use arrow_schema::IntervalUnit::MonthDayNano;
3333
use arrow_schema::TimeUnit::Nanosecond;

datafusion/functions-nested/src/remove.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
2020
use crate::utils;
2121
use crate::utils::make_scalar_function;
22+
use arrow::buffer::OffsetBuffer;
2223
use arrow_array::cast::AsArray;
2324
use arrow_array::{
2425
new_empty_array, Array, ArrayRef, BooleanArray, GenericListArray, OffsetSizeTrait,
2526
};
26-
use arrow_buffer::OffsetBuffer;
2727
use arrow_schema::{DataType, Field};
2828
use datafusion_common::cast::as_int64_array;
2929
use datafusion_common::{exec_err, Result};

datafusion/functions-nested/src/repeat.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@
1919
2020
use crate::utils::make_scalar_function;
2121
use arrow::array::{Capacities, MutableArrayData};
22+
use arrow::buffer::OffsetBuffer;
2223
use arrow::compute;
2324
use arrow::compute::cast;
2425
use arrow_array::{
2526
new_null_array, Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait,
2627
UInt64Array,
2728
};
28-
use arrow_buffer::OffsetBuffer;
2929
use arrow_schema::DataType::{LargeList, List};
3030
use arrow_schema::{DataType, Field};
3131
use datafusion_common::cast::{as_large_list_array, as_list_array, as_uint64_array};

datafusion/functions-nested/src/replace.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818
//! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and array_replace_all functions.
1919
2020
use arrow::array::{
21-
Array, ArrayRef, AsArray, Capacities, MutableArrayData, OffsetSizeTrait,
21+
Array, ArrayRef, AsArray, Capacities, GenericListArray, MutableArrayData,
22+
NullBufferBuilder, OffsetSizeTrait,
2223
};
2324
use arrow::datatypes::DataType;
2425

25-
use arrow_array::GenericListArray;
26-
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
26+
use arrow::buffer::OffsetBuffer;
2727
use arrow_schema::Field;
2828
use datafusion_common::cast::as_int64_array;
2929
use datafusion_common::{exec_err, Result};

0 commit comments

Comments
 (0)