Skip to content

Commit d10b147

Browse files
committed
fix bug where caching would be inabled when it shouldn't
1 parent 1b5b964 commit d10b147

File tree

5 files changed

+36
-17
lines changed

5 files changed

+36
-17
lines changed

Makefile

+4-3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ clean:
4343
rm -rf build
4444
rm -rf testext
4545
cd duckdb && make clean
46+
cd duckdb/tools/nodejs && make clean
4647

4748
# Main build
4849
debug:
@@ -68,16 +69,16 @@ test_release: release
6869
./build/release/test/unittest "$(PROJ_DIR)test/*"
6970

7071
test_debug: debug
71-
./build/release/test/unittest "$(PROJ_DIR)test/*"
72+
./build/debug/test/unittest "$(PROJ_DIR)test/*"
7273

7374
# Client tests
7475
DEBUG_EXT_PATH='$(PROJ_DIR)build/debug/extension/arrow/arrow.duckdb_extension'
75-
RELEASE_EXT_PATH='$(PROJ_DIR)build/debug/extension/arrow/arrow.duckdb_extension'
76+
RELEASE_EXT_PATH='$(PROJ_DIR)build/release/extension/arrow/arrow.duckdb_extension'
7677
test_js: test_debug_js
7778
test_debug_js: debug_js
7879
cd duckdb/tools/nodejs && ARROW_EXTENSION_BINARY_PATH=$(DEBUG_EXT_PATH) npm run test-path -- "../../../test/nodejs/**/*.js"
7980
test_release_js: release_js
80-
cd duckdb/tools/nodejs && ARROW_EXTENSION_BINARY_PATH=$(DEBUG_EXT_PATH) npm run test-path -- "../../../test/nodejs/**/*.js"
81+
cd duckdb/tools/nodejs && ARROW_EXTENSION_BINARY_PATH=$(RELEASE_EXT_PATH) npm run test-path -- "../../../test/nodejs/**/*.js"
8182

8283
format:
8384
find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i

duckdb

Submodule duckdb updated 1772 files

src/arrow_scan_ipc.cpp

+17-11
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@ TableFunction ArrowIPCTableFunction::GetFunction() {
88

99
TableFunction scan_arrow_ipc_func(
1010
"scan_arrow_ipc", {LogicalType::LIST(LogicalType::STRUCT(make_buffer_struct_children))},
11-
ArrowIPCTableFunction::ArrowScanFunction, ArrowIPCTableFunction::ArrowScanBind,
12-
ArrowTableFunction::ArrowScanInitGlobal, ArrowTableFunction::ArrowScanInitLocal);
11+
ArrowIPCTableFunction::ArrowScanFunction, ArrowIPCTableFunction::ArrowScanBind,
12+
ArrowTableFunction::ArrowScanInitGlobal, ArrowTableFunction::ArrowScanInitLocal);
1313

1414
scan_arrow_ipc_func.cardinality = ArrowTableFunction::ArrowScanCardinality;
15-
// FIXME this currently does not work yet
16-
// scan_arrow_ipc_func.get_batch_index = ArrowTableFunction::ArrowGetBatchIndex;
15+
scan_arrow_ipc_func.get_batch_index = nullptr; // TODO implement
1716
scan_arrow_ipc_func.projection_pushdown = true;
1817
scan_arrow_ipc_func.filter_pushdown = false;
1918

@@ -80,7 +79,7 @@ unique_ptr <FunctionData> ArrowIPCTableFunction::ArrowScanBind(ClientContext &co
8079
return std::move(res);
8180
}
8281

83-
// TODO: cleanup: only difference is the ArrowToDuckDB call
82+
// Same as regular arrow scan, except ArrowToDuckDB call TODO: refactor to allow nicely overriding this
8483
void ArrowIPCTableFunction::ArrowScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
8584
if (!data_p.local_state) {
8685
return;
@@ -90,16 +89,23 @@ void ArrowIPCTableFunction::ArrowScanFunction(ClientContext &context, TableFunct
9089
auto &global_state = data_p.global_state->Cast<ArrowScanGlobalState>();
9190

9291
//! Out of tuples in this chunk
93-
if (state.chunk_offset >= (idx_t) state.chunk->arrow_array.length) {
94-
if (!ArrowTableFunction::ArrowScanParallelStateNext(context, data_p.bind_data.get(), state, global_state)) {
92+
if (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) {
93+
if (!ArrowScanParallelStateNext(context, data_p.bind_data.get(), state, global_state)) {
9594
return;
9695
}
9796
}
98-
int64_t output_size =
99-
MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
97+
int64_t output_size = MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
10098
data.lines_read += output_size;
101-
output.SetCardinality(output_size);
102-
ArrowTableFunction::ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, data.lines_read - output_size, false);
99+
if (global_state.CanRemoveFilterColumns()) {
100+
state.all_columns.Reset();
101+
state.all_columns.SetCardinality(output_size);
102+
ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, data.lines_read - output_size, false);
103+
output.ReferenceColumns(state.all_columns, global_state.projection_ids);
104+
} else {
105+
output.SetCardinality(output_size);
106+
ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, data.lines_read - output_size, false);
107+
}
108+
103109
output.Verify();
104110
state.chunk_offset += output.size();
105111
}

src/arrow_to_ipc.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ OperatorResultType ToArrowIPCFunction::Function(ExecutionContext &context, Table
8787

8888
bool sending_schema = false;
8989

90-
bool caching_disabled = context.pipeline && !context.pipeline->GetSink();
90+
bool caching_disabled = !PhysicalOperator::OperatorCachingAllowed(context);
9191

9292
if (!local_state.checked_schema) {
9393
if (!global_state.sent_schema) {

test/sql/arrow.test

+13-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,16 @@
99
require arrow
1010

1111
statement ok
12-
SELECT * FROM to_arrow_ipc((SELECT 'Its working!'));
12+
SELECT * FROM to_arrow_ipc((SELECT 'Its working!'));
13+
14+
# Test operator caching behaviour is sane
15+
statement ok
16+
create table data as select * from range(0,2000) tbl(col)
17+
18+
statement ok
19+
WITH data_union AS (
20+
SELECT * FROM data
21+
UNION ALL
22+
SELECT * FROM data
23+
)
24+
SELECT * FROM to_arrow_ipc((SELECT * FROM data_union ORDER BY col));

0 commit comments

Comments
 (0)