Skip to content

Commit 1b5b964

Browse files
authored
Merge pull request #17 from samansmink/switch-to-extension-util
Switch to extension util for loading extension
2 parents 56d24d4 + 67e1c4a commit 1b5b964

File tree

6 files changed

+58
-53
lines changed

6 files changed

+58
-53
lines changed

Makefile

+9-7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ EXTENSION_FLAGS=\
3131
-DDUCKDB_EXTENSION_NAMES="arrow" \
3232
-DDUCKDB_EXTENSION_ARROW_PATH="$(PROJ_DIR)" \
3333
-DDUCKDB_EXTENSION_ARROW_LOAD_TESTS=1 \
34+
-DDUCKDB_EXTENSION_ARROW_SHOULD_LINK=0 \
3435
-DDUCKDB_EXTENSION_ARROW_TEST_PATH=$(PROJ_DIR)test \
3536
-DDUCKDB_EXTENSION_ARROW_INCLUDE_PATH="$(PROJ_DIR)src/include"
3637

@@ -46,18 +47,18 @@ clean:
4647
# Main build
4748
debug:
4849
mkdir -p build/debug && \
49-
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) -DDUCKDB_EXTENSION_ARROW_SHOULD_LINK=0 ${CLIENT_FLAGS} ${CMAKE_VARS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S ./duckdb/ -B build/debug && \
50+
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} ${CMAKE_VARS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S ./duckdb/ -B build/debug && \
5051
cmake --build build/debug --config Debug
5152

5253
release:
5354
mkdir -p build/release && \
54-
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) -DDUCKDB_EXTENSION_ARROW_SHOULD_LINK=0 ${CLIENT_FLAGS} ${CMAKE_VARS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S ./duckdb/ -B build/release && \
55+
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} ${CMAKE_VARS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S ./duckdb/ -B build/release && \
5556
cmake --build build/release --config Release
5657

5758
# Client build
58-
debug_js: CLIENT_FLAGS=-DBUILD_NODE=1 -DBUILD_EXTENSIONS=json -DDUCKDB_EXTENSION_ARROW_SHOULD_LINK=1
59+
debug_js: CLIENT_FLAGS=-DBUILD_NODE=1
5960
debug_js: debug
60-
release_js: CLIENT_FLAGS=-DBUILD_NODE=1 -DBUILD_EXTENSIONS=json -DDUCKDB_EXTENSION_ARROW_SHOULD_LINK=1
61+
release_js: CLIENT_FLAGS=-DBUILD_NODE=1
6162
release_js: release
6263

6364
# Main tests
@@ -70,12 +71,13 @@ test_debug: debug
7071
./build/release/test/unittest "$(PROJ_DIR)test/*"
7172

7273
# Client tests
74+
DEBUG_EXT_PATH='$(PROJ_DIR)build/debug/extension/arrow/arrow.duckdb_extension'
75+
RELEASE_EXT_PATH='$(PROJ_DIR)build/debug/extension/arrow/arrow.duckdb_extension'
7376
test_js: test_debug_js
7477
test_debug_js: debug_js
75-
cd duckdb/tools/nodejs && npm run test-path -- "../../../test/nodejs/**/*.js"
76-
78+
cd duckdb/tools/nodejs && ARROW_EXTENSION_BINARY_PATH=$(DEBUG_EXT_PATH) npm run test-path -- "../../../test/nodejs/**/*.js"
7779
test_release_js: release_js
78-
cd duckdb/tools/nodejs && npm run test-path -- "../../../test/nodejs/**/*.js"
80+
cd duckdb/tools/nodejs && ARROW_EXTENSION_BINARY_PATH=$(DEBUG_EXT_PATH) npm run test-path -- "../../../test/nodejs/**/*.js"
7981

8082
format:
8183
find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i

duckdb

Submodule duckdb updated 672 files

src/arrow_extension.cpp

+3-13
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,16 @@
1010
#include "duckdb/common/arrow/result_arrow_wrapper.hpp"
1111
#include "duckdb/common/arrow/arrow_appender.hpp"
1212
#include "duckdb/common/arrow/arrow_converter.hpp"
13+
#include "duckdb/main/extension_util.hpp"
1314
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
1415
#include "duckdb/function/table/arrow.hpp"
1516
#endif
1617

1718
namespace duckdb {
1819

1920
static void LoadInternal(DatabaseInstance &instance) {
20-
Connection con(instance);
21-
con.BeginTransaction();
22-
auto &catalog = Catalog::GetSystemCatalog(*con.context);
23-
24-
auto to_arrow_fun = ToArrowIPCFunction::GetFunction();
25-
CreateTableFunctionInfo to_arrow_ipc_info(to_arrow_fun);
26-
catalog.CreateTableFunction(*con.context, &to_arrow_ipc_info);
27-
28-
auto scan_arrow_fun = ArrowIPCTableFunction::GetFunction();
29-
CreateTableFunctionInfo scan_arrow_ipc_info(scan_arrow_fun);
30-
catalog.CreateTableFunction(*con.context, &scan_arrow_ipc_info);
31-
32-
con.Commit();
21+
ExtensionUtil::RegisterFunction(instance, ToArrowIPCFunction::GetFunction());
22+
ExtensionUtil::RegisterFunction(instance, ArrowIPCTableFunction::GetFunction());
3323
}
3424

3525
void ArrowExtension::Load(DuckDB &db) {

src/arrow_scan_ipc.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,15 @@ unique_ptr <FunctionData> ArrowIPCTableFunction::ArrowScanBind(ClientContext &co
6060
if (!schema.release) {
6161
throw InvalidInputException("arrow_scan: released schema passed");
6262
}
63+
auto arrow_type = GetArrowLogicalType(schema);
6364
if (schema.dictionary) {
64-
res->arrow_convert_data[col_idx] =
65-
make_uniq<ArrowConvertData>(GetArrowLogicalType(schema, res->arrow_convert_data, col_idx));
66-
return_types.emplace_back(GetArrowLogicalType(*schema.dictionary, res->arrow_convert_data, col_idx));
65+
auto dictionary_type = GetArrowLogicalType(*schema.dictionary);
66+
return_types.emplace_back(dictionary_type->GetDuckType());
67+
arrow_type->SetDictionary(std::move(dictionary_type));
6768
} else {
68-
return_types.emplace_back(GetArrowLogicalType(schema, res->arrow_convert_data, col_idx));
69+
return_types.emplace_back(arrow_type->GetDuckType());
6970
}
71+
res->arrow_table.AddColumn(col_idx, std::move(arrow_type));
7072
auto format = string(schema.format);
7173
auto name = string(schema.name);
7274
if (name.empty()) {
@@ -97,7 +99,7 @@ void ArrowIPCTableFunction::ArrowScanFunction(ClientContext &context, TableFunct
9799
MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
98100
data.lines_read += output_size;
99101
output.SetCardinality(output_size);
100-
ArrowTableFunction::ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size, false);
102+
ArrowTableFunction::ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, data.lines_read - output_size, false);
101103
output.Verify();
102104
state.chunk_offset += output.size();
103105
}

src/arrow_to_ipc.cpp

+3-9
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include "duckdb/common/arrow/arrow_converter.hpp"
2323
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
2424
#include "duckdb/function/table/arrow.hpp"
25-
#include "duckdb/common/arrow/arrow_options.hpp"
25+
#include "duckdb/main/client_properties.hpp"
2626
#endif
2727

2828
namespace duckdb {
@@ -72,10 +72,7 @@ unique_ptr<FunctionData> ToArrowIPCFunction::Bind(ClientContext &context, TableF
7272

7373
// Create the Arrow schema
7474
ArrowSchema schema;
75-
ArrowOptions options;
76-
options.time_zone = context.GetClientProperties().time_zone;
77-
options.offset_size = ArrowOffsetSize::REGULAR;
78-
ArrowConverter::ToArrowSchema(&schema, input.input_table_types, input.input_table_names, options);
75+
ArrowConverter::ToArrowSchema(&schema, input.input_table_types, input.input_table_names, context.GetClientProperties());
7976
result->schema = arrow::ImportSchema(&schema).ValueOrDie();
8077

8178
return std::move(result);
@@ -91,9 +88,6 @@ OperatorResultType ToArrowIPCFunction::Function(ExecutionContext &context, Table
9188
bool sending_schema = false;
9289

9390
bool caching_disabled = context.pipeline && !context.pipeline->GetSink();
94-
ArrowOptions options;
95-
options.offset_size = ArrowOffsetSize::REGULAR;
96-
// bool caching_disabled = true;
9791

9892
if (!local_state.checked_schema) {
9993
if (!global_state.sent_schema) {
@@ -113,7 +107,7 @@ OperatorResultType ToArrowIPCFunction::Function(ExecutionContext &context, Table
113107
output.data[1].SetValue(0, Value::BOOLEAN(1));
114108
} else {
115109
if (!local_state.appender) {
116-
local_state.appender = make_uniq<ArrowAppender>(input.GetTypes(), data.chunk_size, options);
110+
local_state.appender = make_uniq<ArrowAppender>(input.GetTypes(), data.chunk_size, context.client.GetClientProperties());
117111
}
118112

119113
// Append input chunk

test/nodejs/arrow_test.js

+35-18
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,25 @@ const to_ipc_functions = {
2828
'materialized': arrow_ipc_materialized,
2929
}
3030

31+
function getDatabase() {
32+
return new duckdb.Database(':memory:', {"allow_unsigned_extensions":"true"});
33+
}
34+
35+
function getConnection(db, done) {
36+
let conn = new duckdb.Connection(db);
37+
conn.exec(`LOAD '${process.env.ARROW_EXTENSION_BINARY_PATH}';`, function (err) {
38+
if (err) throw err;
39+
done();
40+
});
41+
return conn
42+
}
43+
3144
describe(`Arrow IPC`, () => {
3245
let db;
3346
let conn;
3447
before((done) => {
35-
db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
36-
conn = new duckdb.Connection(db);
37-
done()
48+
db = getDatabase();
49+
conn = getConnection(db, () => done())
3850
});
3951

4052
it(`Basic examples`, async () => {
@@ -110,9 +122,8 @@ for (const [name, fun] of Object.entries(to_ipc_functions)) {
110122
let db;
111123
let conn;
112124
before((done) => {
113-
db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
114-
conn = new duckdb.Connection(db);
115-
done()
125+
db = getDatabase();
126+
conn = getConnection(db, () => done())
116127
});
117128

118129
it(`Buffers are not garbage collected`, async () => {
@@ -211,12 +222,13 @@ describe('[Benchmark] Arrow IPC Single Int Column (50M tuples)',() => {
211222
let conn;
212223

213224
before((done) => {
214-
db = new duckdb.Database(':memory:', {"allow_unsigned_extensions":"true"});
215-
conn = new duckdb.Connection(db);
216-
conn.run("CREATE TABLE test AS select * FROM range(0,?) tbl(i);", column_size, (err) => {
217-
assert(!err);
218-
done()
219-
});
225+
db = getDatabase();
226+
conn = getConnection(db, () => {
227+
conn.run("CREATE TABLE test AS select * FROM range(0,?) tbl(i);", column_size, (err) => {
228+
if (err) throw err;
229+
done()
230+
});
231+
})
220232
});
221233

222234
it('DuckDB table -> DuckDB table', (done) => {
@@ -253,6 +265,13 @@ describe('Buffer registration',() => {
253265
done();
254266
});
255267

268+
before((done) => {
269+
db = getDatabase();
270+
conn1 = getConnection(db, () => {
271+
conn2 = getConnection(db, () => done());
272+
})
273+
});
274+
256275
it('Buffers can only be overwritten with force flag', async () => {
257276
const arrow_buffer = await arrow_ipc_materialized(conn1, "SELECT 1337 as a");
258277

@@ -368,9 +387,8 @@ describe('[Benchmark] Arrow IPC TPC-H lineitem.parquet', () => {
368387
let conn;
369388

370389
before((done) => {
371-
db = new duckdb.Database(':memory:', {"allow_unsigned_extensions":"true"});
372-
conn = new duckdb.Connection(db);
373-
done();
390+
db = getDatabase();
391+
conn = getConnection(db, () => done())
374392
});
375393

376394
it('Parquet -> DuckDB Streaming-> Arrow IPC -> DuckDB Query', async () => {
@@ -455,9 +473,8 @@ for (const [name, fun] of Object.entries(to_ipc_functions)) {
455473
let db;
456474
let conn;
457475
before((done) => {
458-
db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
459-
conn = new duckdb.Connection(db);
460-
done();
476+
db = getDatabase();
477+
conn = getConnection(db, () => done())
461478
});
462479

463480
for (const query of queries) {

0 commit comments

Comments
 (0)