Skip to content

Commit 0f935a6

Browse files
Update vendored DuckDB sources to 6bb5fa1
1 parent 6bb5fa1 commit 0f935a6

File tree

68 files changed

+2109
-2015
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+2109
-2015
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ set(DUCKDB_SRC_FILES
104104
src/duckdb/ub_src_common_crypto.cpp
105105
src/duckdb/ub_src_common_enums.cpp
106106
src/duckdb/ub_src_common_exception.cpp
107+
src/duckdb/ub_src_common_multi_file.cpp
107108
src/duckdb/ub_src_common_operator.cpp
108109
src/duckdb/ub_src_common_progress_bar.cpp
109110
src/duckdb/ub_src_common_row_operations.cpp

src/duckdb/extension/icu/icu-datefunc.cpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "duckdb/common/operator/multiply.hpp"
66
#include "duckdb/common/types/timestamp.hpp"
77
#include "duckdb/common/exception/conversion_exception.hpp"
8+
#include "icu-helpers.hpp"
89
#include "unicode/ucal.h"
910

1011
namespace duckdb {
@@ -72,19 +73,19 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio
7273
}
7374

7475
bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
75-
auto tz = icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
76-
if (*tz == icu::TimeZone::getUnknown()) {
77-
delete tz;
76+
string tz_str = tz_id.GetString();
77+
auto tz = ICUHelpers::TryGetTimeZone(tz_str);
78+
if (!tz) {
7879
return false;
7980
}
80-
calendar->adoptTimeZone(tz);
81+
calendar->adoptTimeZone(tz.release());
8182
return true;
8283
}
8384

8485
void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
85-
if (!TrySetTimeZone(calendar, tz_id)) {
86-
throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
87-
}
86+
string tz_str = tz_id.GetString();
87+
auto tz = ICUHelpers::GetTimeZone(tz_str);
88+
calendar->adoptTimeZone(tz.release());
8889
}
8990

9091
timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {

src/duckdb/extension/icu/icu_extension.cpp

+40-17
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "unicode/stringpiece.h"
3333
#include "unicode/timezone.h"
3434
#include "unicode/ucol.h"
35+
#include "icu-helpers.hpp"
3536

3637
#include <cassert>
3738

@@ -210,41 +211,63 @@ static ScalarFunction GetICUCollateFunction(const string &collation, const strin
210211
return result;
211212
}
212213

213-
static void SetICUTimeZone(ClientContext &context, SetScope scope, Value &parameter) {
214-
auto str = StringValue::Get(parameter);
215-
icu::StringPiece utf8(str);
216-
const auto uid = icu::UnicodeString::fromUTF8(utf8);
214+
unique_ptr<icu::TimeZone> GetTimeZoneInternal(string &tz_str, vector<string> &candidates) {
215+
icu::StringPiece tz_name_utf8(tz_str);
216+
const auto uid = icu::UnicodeString::fromUTF8(tz_name_utf8);
217217
duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createTimeZone(uid));
218218
if (*tz != icu::TimeZone::getUnknown()) {
219-
return;
219+
return tz;
220220
}
221221

222-
// Try to be friendlier
223-
// Go through all the zone names and look for a case insensitive match
224-
// If we don't find one, make a suggestion
222+
// Try to be friendlier
223+
// Go through all the zone names and look for a case insensitive match
224+
// If we don't find one, make a suggestion
225+
// FIXME: this is very inefficient
225226
UErrorCode status = U_ZERO_ERROR;
226227
duckdb::unique_ptr<icu::Calendar> calendar(icu::Calendar::createInstance(status));
227228
duckdb::unique_ptr<icu::StringEnumeration> tzs(icu::TimeZone::createEnumeration());
228-
vector<string> candidates;
229229
for (;;) {
230230
auto long_id = tzs->snext(status);
231231
if (U_FAILURE(status) || !long_id) {
232232
break;
233233
}
234-
std::string utf8;
235-
long_id->toUTF8String(utf8);
236-
if (StringUtil::CIEquals(utf8, str)) {
237-
parameter = Value(utf8);
238-
return;
234+
std::string candidate_tz_name;
235+
long_id->toUTF8String(candidate_tz_name);
236+
if (StringUtil::CIEquals(candidate_tz_name, tz_str)) {
237+
// case insensitive match - return this timezone instead
238+
tz_str = candidate_tz_name;
239+
icu::StringPiece utf8(tz_str);
240+
const auto tz_unicode_str = icu::UnicodeString::fromUTF8(utf8);
241+
duckdb::unique_ptr<icu::TimeZone> insensitive_tz(icu::TimeZone::createTimeZone(tz_unicode_str));
242+
return insensitive_tz;
239243
}
240244

241-
candidates.emplace_back(utf8);
245+
candidates.emplace_back(candidate_tz_name);
242246
}
247+
return nullptr;
248+
}
243249

250+
unique_ptr<icu::TimeZone> ICUHelpers::TryGetTimeZone(string &tz_str) {
251+
vector<string> candidates;
252+
return GetTimeZoneInternal(tz_str, candidates);
253+
}
254+
255+
unique_ptr<icu::TimeZone> ICUHelpers::GetTimeZone(string &tz_str) {
256+
vector<string> candidates;
257+
auto tz = GetTimeZoneInternal(tz_str, candidates);
258+
if (tz) {
259+
return tz;
260+
}
244261
string candidate_str =
245-
StringUtil::CandidatesMessage(StringUtil::TopNJaroWinkler(candidates, str), "Candidate time zones");
262+
StringUtil::CandidatesMessage(StringUtil::TopNJaroWinkler(candidates, tz_str), "Candidate time zones");
263+
264+
throw NotImplementedException("Unknown TimeZone '%s'!\n%s", tz_str, candidate_str);
265+
}
246266

247-
throw NotImplementedException("Unknown TimeZone '%s'!\n%s", str, candidate_str);
267+
static void SetICUTimeZone(ClientContext &context, SetScope scope, Value &parameter) {
268+
auto tz_str = StringValue::Get(parameter);
269+
ICUHelpers::GetTimeZone(tz_str);
270+
parameter = Value(tz_str);
248271
}
249272

250273
struct ICUCalendarData : public GlobalTableFunctionState {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// icu-helpers.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "duckdb.hpp"
12+
#include "unicode/timezone.h"
13+
14+
namespace duckdb {
15+
16+
struct ICUHelpers {
17+
//! Tries to get a time zone - returns nullptr if the timezone is not found
18+
static unique_ptr<icu::TimeZone> TryGetTimeZone(string &tz_str);
19+
//! Gets a time zone - throws an error if the timezone is not found
20+
static unique_ptr<icu::TimeZone> GetTimeZone(string &tz_str);
21+
};
22+
23+
} // namespace duckdb

src/duckdb/extension/json/include/json_multi_file_info.hpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
#pragma once
1010

11-
#include "duckdb/common/multi_file_reader_function.hpp"
11+
#include "duckdb/common/multi_file/multi_file_function.hpp"
1212
#include "json_reader_options.hpp"
1313

1414
namespace duckdb {
@@ -24,8 +24,8 @@ struct JSONMultiFileInfo {
2424
static bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
2525
BaseFileReaderOptions &options, vector<string> &expected_names,
2626
vector<LogicalType> &expected_types);
27-
static bool ParseOption(ClientContext &context, const string &key, const Value &val,
28-
MultiFileReaderOptions &file_options, BaseFileReaderOptions &options);
27+
static bool ParseOption(ClientContext &context, const string &key, const Value &val, MultiFileOptions &file_options,
28+
BaseFileReaderOptions &options);
2929
static void FinalizeCopyBind(ClientContext &context, BaseFileReaderOptions &options,
3030
const vector<string> &expected_names, const vector<LogicalType> &expected_types);
3131
static unique_ptr<TableFunctionData> InitializeBindData(MultiFileBindData &multi_file_data,
@@ -46,8 +46,7 @@ struct JSONMultiFileInfo {
4646
const string &filename, idx_t file_idx,
4747
const MultiFileBindData &bind_data);
4848
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const string &filename,
49-
JSONReaderOptions &options,
50-
const MultiFileReaderOptions &file_options);
49+
JSONReaderOptions &options, const MultiFileOptions &file_options);
5150
static shared_ptr<BaseUnionData> GetUnionData(shared_ptr<BaseFileReader> scan_p, idx_t file_idx);
5251
static void FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &gstate_p);
5352
static bool TryInitializeScan(ClientContext &context, shared_ptr<BaseFileReader> &reader,

src/duckdb/extension/json/include/json_reader.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
#include "duckdb/common/enum_util.hpp"
1313
#include "duckdb/common/enums/file_compression_type.hpp"
1414
#include "duckdb/common/file_system.hpp"
15-
#include "duckdb/common/base_file_reader.hpp"
16-
#include "duckdb/common/multi_file_reader.hpp"
15+
#include "duckdb/common/multi_file/base_file_reader.hpp"
16+
#include "duckdb/common/multi_file/multi_file_reader.hpp"
1717
#include "json_reader_options.hpp"
1818
#include "duckdb/common/mutex.hpp"
1919
#include "json_common.hpp"
@@ -227,6 +227,10 @@ class JSONReader : public BaseFileReader {
227227

228228
void DecrementBufferUsage(JSONBufferHandle &handle, idx_t lines_or_object_in_buffer, AllocatedData &buffer);
229229

230+
string GetReaderType() const override {
231+
return "JSON";
232+
}
233+
230234
private:
231235
void SkipOverArrayStart(JSONReaderScanState &scan_state);
232236
void AutoDetect(Allocator &allocator, idx_t buffer_size);

src/duckdb/extension/json/include/json_scan.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "json_reader.hpp"
12-
#include "duckdb/common/multi_file_reader.hpp"
12+
#include "duckdb/common/multi_file/multi_file_reader.hpp"
1313
#include "duckdb/common/mutex.hpp"
1414
#include "duckdb/common/pair.hpp"
1515
#include "duckdb/common/types/type_map.hpp"

src/duckdb/extension/json/json_functions/copy_json.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ CopyFunction JSONFunctions::GetJSONCopyFunction() {
118118

119119
function.plan = CopyToJSONPlan;
120120

121-
function.copy_from_bind = MultiFileReaderFunction<JSONMultiFileInfo>::MultiFileBindCopy;
121+
function.copy_from_bind = MultiFileFunction<JSONMultiFileInfo>::MultiFileBindCopy;
122122
function.copy_from_function = JSONFunctions::GetReadJSONTableFunction(make_shared_ptr<JSONScanInfo>(
123123
JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::RECORDS, false));
124124

src/duckdb/extension/json/json_functions/read_json.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include "duckdb/common/helper.hpp"
2-
#include "duckdb/common/multi_file_reader.hpp"
2+
#include "duckdb/common/multi_file/multi_file_reader.hpp"
33
#include "json_functions.hpp"
44
#include "json_scan.hpp"
55
#include "json_structure.hpp"
@@ -236,7 +236,7 @@ void JSONScan::AutoDetect(ClientContext &context, MultiFileBindData &bind_data,
236236
}
237237

238238
TableFunction JSONFunctions::GetReadJSONTableFunction(shared_ptr<JSONScanInfo> function_info) {
239-
MultiFileReaderFunction<JSONMultiFileInfo> table_function("read_json");
239+
MultiFileFunction<JSONMultiFileInfo> table_function("read_json");
240240

241241
JSONScan::TableFunctionDefaults(table_function);
242242
table_function.named_parameters["columns"] = LogicalType::ANY;

src/duckdb/extension/json/json_functions/read_json_objects.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
namespace duckdb {
88

99
TableFunction GetReadJSONObjectsTableFunction(string name, shared_ptr<JSONScanInfo> function_info) {
10-
MultiFileReaderFunction<JSONMultiFileInfo> table_function(std::move(name));
10+
MultiFileFunction<JSONMultiFileInfo> table_function(std::move(name));
1111
JSONScan::TableFunctionDefaults(table_function);
1212
table_function.function_info = std::move(function_info);
1313
return static_cast<TableFunction>(table_function);

src/duckdb/extension/json/json_multi_file_info.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ unique_ptr<BaseFileReaderOptions> JSONMultiFileInfo::InitializeOptions(ClientCon
2929
return std::move(reader_options);
3030
}
3131

32-
bool JSONMultiFileInfo::ParseOption(ClientContext &context, const string &key, const Value &value,
33-
MultiFileReaderOptions &, BaseFileReaderOptions &options_p) {
32+
bool JSONMultiFileInfo::ParseOption(ClientContext &context, const string &key, const Value &value, MultiFileOptions &,
33+
BaseFileReaderOptions &options_p) {
3434
auto &reader_options = options_p.Cast<JSONFileReaderOptions>();
3535
auto &options = reader_options.options;
3636
if (value.IsNull()) {
@@ -346,8 +346,7 @@ void JSONMultiFileInfo::BindReader(ClientContext &context, vector<LogicalType> &
346346
auto &json_reader = union_reader->reader->Cast<JSONReader>();
347347
union_reader->names = names;
348348
union_reader->types = return_types;
349-
union_reader->reader->columns =
350-
MultiFileReaderColumnDefinition::ColumnsFromNamesAndTypes(names, return_types);
349+
union_reader->reader->columns = MultiFileColumnDefinition::ColumnsFromNamesAndTypes(names, return_types);
351350
json_reader.Reset();
352351
}
353352
}
@@ -428,7 +427,7 @@ shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &contex
428427
const MultiFileBindData &bind_data_p) {
429428
auto &json_data = bind_data_p.bind_data->Cast<JSONScanData>();
430429
auto reader = make_shared_ptr<JSONReader>(context, json_data.options, union_data.GetFileName());
431-
reader->columns = MultiFileReaderColumnDefinition::ColumnsFromNamesAndTypes(union_data.names, union_data.types);
430+
reader->columns = MultiFileColumnDefinition::ColumnsFromNamesAndTypes(union_data.names, union_data.types);
432431
return std::move(reader);
433432
}
434433

@@ -437,12 +436,12 @@ shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &contex
437436
const MultiFileBindData &bind_data) {
438437
auto &json_data = bind_data.bind_data->Cast<JSONScanData>();
439438
auto reader = make_shared_ptr<JSONReader>(context, json_data.options, filename);
440-
reader->columns = MultiFileReaderColumnDefinition::ColumnsFromNamesAndTypes(bind_data.names, bind_data.types);
439+
reader->columns = MultiFileColumnDefinition::ColumnsFromNamesAndTypes(bind_data.names, bind_data.types);
441440
return std::move(reader);
442441
}
443442
shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &context, const string &filename,
444443
JSONReaderOptions &options,
445-
const MultiFileReaderOptions &file_options) {
444+
const MultiFileOptions &file_options) {
446445
throw InternalException("Create reader from file not implemented");
447446
}
448447

@@ -474,7 +473,7 @@ void ReadJSONFunction(ClientContext &context, JSONReader &json_reader, JSONScanG
474473
const auto count = lstate.Read();
475474
yyjson_val **values = scan_state.values;
476475

477-
auto &column_ids = json_reader.reader_data.column_ids;
476+
auto &column_ids = json_reader.column_ids;
478477
if (!gstate.names.empty()) {
479478
vector<Vector *> result_vectors;
480479
result_vectors.reserve(column_ids.size());

src/duckdb/extension/json/json_scan.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "json_scan.hpp"
22

33
#include "duckdb/common/enum_util.hpp"
4-
#include "duckdb/common/multi_file_reader.hpp"
4+
#include "duckdb/common/multi_file/multi_file_reader.hpp"
55
#include "duckdb/common/serializer/deserializer.hpp"
66
#include "duckdb/common/serializer/serializer.hpp"
77
#include "duckdb/main/extension_helper.hpp"

src/duckdb/extension/parquet/column_reader.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include "reader/boolean_column_reader.hpp"
44
#include "brotli/decode.h"
55
#include "reader/callback_column_reader.hpp"
6-
#include "reader/cast_column_reader.hpp"
76
#include "reader/decimal_column_reader.hpp"
87
#include "duckdb.hpp"
98
#include "reader/expression_column_reader.hpp"

src/duckdb/extension/parquet/include/parquet_column_schema.hpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
namespace duckdb {
1414
class ParquetReader;
1515

16-
enum class ParquetColumnSchemaType { COLUMN, CAST, FILE_ROW_NUMBER, GEOMETRY };
16+
enum class ParquetColumnSchemaType { COLUMN, FILE_ROW_NUMBER, GEOMETRY, EXPRESSION };
1717

1818
enum class ParquetExtraTypeInfo {
1919
NONE,
@@ -33,8 +33,7 @@ struct ParquetColumnSchema {
3333
ParquetColumnSchemaType schema_type = ParquetColumnSchemaType::COLUMN);
3434
ParquetColumnSchema(string name, LogicalType type, idx_t max_define, idx_t max_repeat, idx_t schema_index,
3535
idx_t column_index, ParquetColumnSchemaType schema_type = ParquetColumnSchemaType::COLUMN);
36-
ParquetColumnSchema(ParquetColumnSchema parent, LogicalType cast_type,
37-
ParquetColumnSchemaType schema_type = ParquetColumnSchemaType::CAST);
36+
ParquetColumnSchema(ParquetColumnSchema parent, LogicalType result_type, ParquetColumnSchemaType schema_type);
3837

3938
ParquetColumnSchemaType schema_type;
4039
string name;

src/duckdb/extension/parquet/include/parquet_reader.hpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
#include "duckdb/common/common.hpp"
1313
#include "duckdb/common/encryption_state.hpp"
1414
#include "duckdb/common/exception.hpp"
15-
#include "duckdb/common/base_file_reader.hpp"
16-
#include "duckdb/common/multi_file_reader_options.hpp"
15+
#include "duckdb/common/multi_file/base_file_reader.hpp"
16+
#include "duckdb/common/multi_file/multi_file_options.hpp"
1717
#include "duckdb/common/string_util.hpp"
1818
#include "duckdb/common/types/data_chunk.hpp"
1919
#include "column_reader.hpp"
@@ -110,12 +110,12 @@ struct ParquetOptions {
110110

111111
struct ParquetOptionsSerialization {
112112
ParquetOptionsSerialization() = default;
113-
ParquetOptionsSerialization(ParquetOptions parquet_options_p, MultiFileReaderOptions file_options_p)
113+
ParquetOptionsSerialization(ParquetOptions parquet_options_p, MultiFileOptions file_options_p)
114114
: parquet_options(std::move(parquet_options_p)), file_options(std::move(file_options_p)) {
115115
}
116116

117117
ParquetOptions parquet_options;
118-
MultiFileReaderOptions file_options;
118+
MultiFileOptions file_options;
119119

120120
public:
121121
void Serialize(Serializer &serializer) const;
@@ -170,6 +170,10 @@ class ParquetReader : public BaseFileReader {
170170

171171
LogicalType DeriveLogicalType(const SchemaElement &s_ele, ParquetColumnSchema &schema) const;
172172

173+
string GetReaderType() const override {
174+
return "Parquet";
175+
}
176+
173177
private:
174178
//! Construct a parquet reader but **do not** open a file, used in ReadStatistics only
175179
ParquetReader(ClientContext &context, ParquetOptions parquet_options,

0 commit comments

Comments
 (0)