Skip to content

Commit eade9cb

Browse files
Update vendored DuckDB sources to cae3365
1 parent cae3365 commit eade9cb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+331
-246
lines changed

src/duckdb/extension/json/include/json_multi_file_info.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ struct JSONMultiFileInfo {
4343
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
4444
BaseUnionData &union_data, const MultiFileBindData &bind_data_p);
4545
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
46-
const string &filename, idx_t file_idx,
46+
const OpenFileInfo &file, idx_t file_idx,
4747
const MultiFileBindData &bind_data);
48-
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const string &filename,
48+
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const OpenFileInfo &file,
4949
JSONReaderOptions &options, const MultiFileOptions &file_options);
5050
static shared_ptr<BaseUnionData> GetUnionData(shared_ptr<BaseFileReader> scan_p, idx_t file_idx);
5151
static void FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &gstate_p);

src/duckdb/extension/json/json_functions/read_json.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ static inline LogicalType RemoveDuplicateStructKeys(const LogicalType &type, con
3939
}
4040

4141
struct AutoDetectState {
42-
AutoDetectState(ClientContext &context_p, MultiFileBindData &bind_data_p, const vector<string> &files,
42+
AutoDetectState(ClientContext &context_p, MultiFileBindData &bind_data_p, const vector<OpenFileInfo> &files,
4343
MutableDateFormatMap &date_format_map)
4444
: context(context_p), bind_data(bind_data_p), files(files), date_format_map(date_format_map), files_scanned(0),
4545
tuples_scanned(0), bytes_scanned(0), total_file_size(0) {
4646
}
4747

4848
ClientContext &context;
4949
MultiFileBindData &bind_data;
50-
const vector<string> &files;
50+
const vector<OpenFileInfo> &files;
5151
MutableDateFormatMap &date_format_map;
5252
atomic<idx_t> files_scanned;
5353
atomic<idx_t> tuples_scanned;
@@ -70,12 +70,12 @@ class JSONSchemaTask : public BaseExecutorTask {
7070
auto &bind_data = auto_detect_state.bind_data;
7171
auto &files = auto_detect_state.files;
7272
auto &json_data = bind_data.bind_data->Cast<JSONScanData>();
73-
auto json_reader = make_shared_ptr<JSONReader>(context, json_data.options, files[file_idx]);
73+
auto json_reader = make_shared_ptr<JSONReader>(context, json_data.options, files[file_idx].path);
7474
if (bind_data.union_readers[file_idx]) {
7575
throw InternalException("Union data already set");
7676
}
7777
auto &reader = *json_reader;
78-
auto union_data = make_uniq<BaseUnionData>(files[file_idx]);
78+
auto union_data = make_uniq<BaseUnionData>(files[file_idx].path);
7979
union_data->reader = std::move(json_reader);
8080
bind_data.union_readers[file_idx] = std::move(union_data);
8181

src/duckdb/extension/json/json_multi_file_info.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,14 +435,14 @@ shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &contex
435435
}
436436

437437
shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &context, GlobalTableFunctionState &gstate_p,
438-
const string &filename, idx_t file_idx,
438+
const OpenFileInfo &file, idx_t file_idx,
439439
const MultiFileBindData &bind_data) {
440440
auto &json_data = bind_data.bind_data->Cast<JSONScanData>();
441-
auto reader = make_shared_ptr<JSONReader>(context, json_data.options, filename);
441+
auto reader = make_shared_ptr<JSONReader>(context, json_data.options, file.path);
442442
reader->columns = MultiFileColumnDefinition::ColumnsFromNamesAndTypes(bind_data.names, bind_data.types);
443443
return std::move(reader);
444444
}
445-
shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &context, const string &filename,
445+
shared_ptr<BaseFileReader> JSONMultiFileInfo::CreateReader(ClientContext &context, const OpenFileInfo &file,
446446
JSONReaderOptions &options,
447447
const MultiFileOptions &file_options) {
448448
throw InternalException("Create reader from file not implemented");

src/duckdb/extension/json/json_reader.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ void JSONReader::OpenJSONFile() {
183183
lock_guard<mutex> guard(lock);
184184
if (!IsOpen()) {
185185
auto &fs = FileSystem::GetFileSystem(context);
186-
auto regular_file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ | options.compression);
186+
auto regular_file_handle = fs.OpenFile(file, FileFlags::FILE_FLAGS_READ | options.compression);
187187
file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
188188
}
189189
Reset();
@@ -242,7 +242,7 @@ void JSONReader::SetRecordType(duckdb::JSONRecordType type) {
242242
}
243243

244244
const string &JSONReader::GetFileName() const {
245-
return file_name;
245+
return file.path;
246246
}
247247

248248
JSONFileHandle &JSONReader::GetFileHandle() const {
@@ -290,8 +290,8 @@ void JSONReader::SetBufferLineOrObjectCount(JSONBufferHandle &handle, idx_t coun
290290
void JSONReader::AddParseError(JSONReaderScanState &scan_state, idx_t line_or_object_in_buf, yyjson_read_err &err,
291291
const string &extra) {
292292
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
293-
auto error_msg = StringUtil::Format("Malformed JSON in file \"%s\", at byte %llu in %s {line}: %s. %s", file_name,
294-
err.pos + 1, unit, err.msg, extra);
293+
auto error_msg = StringUtil::Format("Malformed JSON in file \"%s\", at byte %llu in %s {line}: %s. %s",
294+
GetFileName(), err.pos + 1, unit, err.msg, extra);
295295
lock_guard<mutex> guard(lock);
296296
AddError(scan_state.current_buffer_handle->buffer_index, line_or_object_in_buf + 1, error_msg);
297297
ThrowErrorsIfPossible();
@@ -306,7 +306,7 @@ void JSONReader::AddTransformError(JSONReaderScanState &scan_state, idx_t object
306306
auto line_or_object_in_buffer = scan_state.lines_or_objects_in_buffer - scan_state.scan_count + object_index;
307307
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
308308
auto error_msg =
309-
StringUtil::Format("JSON transform error in file \"%s\", in %s {line}: %s", file_name, unit, error_message);
309+
StringUtil::Format("JSON transform error in file \"%s\", in %s {line}: %s", GetFileName(), unit, error_message);
310310
lock_guard<mutex> guard(lock);
311311
AddError(scan_state.current_buffer_handle->buffer_index, line_or_object_in_buffer, error_msg);
312312
ThrowErrorsIfPossible();
@@ -701,7 +701,8 @@ void JSONReader::AutoDetect(Allocator &allocator, idx_t buffer_capacity) {
701701
GetRecordType() != JSONRecordType::RECORDS) {
702702
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
703703
throw InvalidInputException(
704-
"JSON auto-detection error in file \"%s\": Expected records, detected non-record JSON instead", file_name);
704+
"JSON auto-detection error in file \"%s\": Expected records, detected non-record JSON instead",
705+
GetFileName());
705706
}
706707
// store the buffer in the file so it can be re-used by the first reader of the file
707708
if (!file_handle->IsPipe()) {

src/duckdb/extension/parquet/decoder/dictionary_decoder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ void DictionaryDecoder::InitializeDictionary(idx_t new_dictionary_size, optional
2626
dictionary->Resize(old_dict_size, dictionary_size + 1);
2727
}
2828
dictionary_id =
29-
reader.reader.file_name + "_" + reader.Schema().name + "_" + std::to_string(reader.chunk_read_offset);
29+
reader.reader.GetFileName() + "_" + reader.Schema().name + "_" + std::to_string(reader.chunk_read_offset);
3030
// we use the last entry as a NULL, dictionary vectors don't have a separate validity mask
3131
auto &dict_validity = FlatVector::Validity(*dictionary);
3232
dict_validity.Reset(dictionary_size + 1);

src/duckdb/extension/parquet/include/parquet_crypto.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ class ParquetKeys : public ObjectCacheEntry {
4141

4242
class ParquetEncryptionConfig {
4343
public:
44-
explicit ParquetEncryptionConfig(ClientContext &context);
44+
explicit ParquetEncryptionConfig();
4545
ParquetEncryptionConfig(ClientContext &context, const Value &arg);
46+
ParquetEncryptionConfig(string footer_key);
4647

4748
public:
4849
static shared_ptr<ParquetEncryptionConfig> Create(ClientContext &context, const Value &arg);
@@ -53,8 +54,7 @@ class ParquetEncryptionConfig {
5354
static shared_ptr<ParquetEncryptionConfig> Deserialize(Deserializer &deserializer);
5455

5556
private:
56-
ClientContext &context;
57-
//! Name of the key used for the footer
57+
//! The encryption key used for the footer
5858
string footer_key;
5959
//! Mapping from column name to key name
6060
unordered_map<string, string> column_keys;

src/duckdb/extension/parquet/include/parquet_reader.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ struct ParquetOptionsSerialization {
124124
};
125125

126126
struct ParquetUnionData : public BaseUnionData {
127-
explicit ParquetUnionData(string file_name_p) : BaseUnionData(std::move(file_name_p)) {
127+
explicit ParquetUnionData(OpenFileInfo file_p) : BaseUnionData(std::move(file_p)) {
128128
}
129129
~ParquetUnionData() override;
130130

@@ -138,7 +138,7 @@ class ParquetReader : public BaseFileReader {
138138
static constexpr int32_t ORDINAL_FIELD_ID = 2147483645;
139139

140140
public:
141-
ParquetReader(ClientContext &context, string file_name, ParquetOptions parquet_options,
141+
ParquetReader(ClientContext &context, OpenFileInfo file, ParquetOptions parquet_options,
142142
shared_ptr<ParquetFileMetadataCache> metadata = nullptr);
143143
~ParquetReader() override;
144144

src/duckdb/extension/parquet/parquet_crypto.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,13 @@ string ParquetKeys::GetObjectType() {
4141
return ObjectType();
4242
}
4343

44-
ParquetEncryptionConfig::ParquetEncryptionConfig(ClientContext &context_p) : context(context_p) {
44+
ParquetEncryptionConfig::ParquetEncryptionConfig() {
4545
}
4646

47-
ParquetEncryptionConfig::ParquetEncryptionConfig(ClientContext &context_p, const Value &arg)
48-
: ParquetEncryptionConfig(context_p) {
47+
ParquetEncryptionConfig::ParquetEncryptionConfig(string footer_key_p) : footer_key(std::move(footer_key_p)) {
48+
}
4949

50+
ParquetEncryptionConfig::ParquetEncryptionConfig(ClientContext &context, const Value &arg) {
5051
if (arg.type().id() != LogicalTypeId::STRUCT) {
5152
throw BinderException("Parquet encryption_config must be of type STRUCT");
5253
}
@@ -62,7 +63,11 @@ ParquetEncryptionConfig::ParquetEncryptionConfig(ClientContext &context_p, const
6263
"No key with name \"%s\" exists. Add it with PRAGMA add_parquet_key('<key_name>','<key>');",
6364
footer_key_name);
6465
}
65-
footer_key = footer_key_name;
66+
// footer key name provided - read the key from the config
67+
const auto &keys = ParquetKeys::Get(context);
68+
footer_key = keys.GetKey(footer_key_name);
69+
} else if (StringUtil::Lower(struct_key) == "footer_key_value") {
70+
footer_key = StringValue::Get(children[i].DefaultCastAs(LogicalType::BLOB));
6671
} else if (StringUtil::Lower(struct_key) == "column_keys") {
6772
throw NotImplementedException("Parquet encryption_config column_keys not yet implemented");
6873
} else {
@@ -76,10 +81,7 @@ shared_ptr<ParquetEncryptionConfig> ParquetEncryptionConfig::Create(ClientContex
7681
}
7782

7883
const string &ParquetEncryptionConfig::GetFooterKey() const {
79-
const auto &keys = ParquetKeys::Get(context);
80-
D_ASSERT(!footer_key.empty());
81-
D_ASSERT(keys.HasKey(footer_key));
82-
return keys.GetKey(footer_key);
84+
return footer_key;
8385
}
8486

8587
using duckdb_apache::thrift::protocol::TCompactProtocolFactoryT;

src/duckdb/extension/parquet/parquet_extension.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,9 @@ struct ParquetMultiFileInfo {
107107
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
108108
BaseUnionData &union_data, const MultiFileBindData &bind_data_p);
109109
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
110-
const string &filename, idx_t file_idx,
110+
const OpenFileInfo &file, idx_t file_idx,
111111
const MultiFileBindData &bind_data);
112-
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const string &filename,
112+
static shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const OpenFileInfo &file,
113113
ParquetOptions &options, const MultiFileOptions &file_options);
114114
static shared_ptr<BaseUnionData> GetUnionData(shared_ptr<BaseFileReader> scan_p, idx_t file_idx);
115115
static void FinalizeReader(ClientContext &context, BaseFileReader &reader, GlobalTableFunctionState &);
@@ -306,7 +306,11 @@ class ParquetScanFunction {
306306
auto &bind_data = bind_data_p->Cast<MultiFileBindData>();
307307
auto &parquet_data = bind_data.bind_data->Cast<ParquetReadBindData>();
308308

309-
serializer.WriteProperty(100, "files", bind_data.file_list->GetAllFiles());
309+
vector<string> files;
310+
for (auto &file : bind_data.file_list->GetAllFiles()) {
311+
files.emplace_back(file.path);
312+
}
313+
serializer.WriteProperty(100, "files", files);
310314
serializer.WriteProperty(101, "types", bind_data.types);
311315
serializer.WriteProperty(102, "names", bind_data.names);
312316
ParquetOptionsSerialization serialization(parquet_data.parquet_options, bind_data.file_options);
@@ -508,24 +512,24 @@ shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &con
508512
BaseUnionData &union_data_p,
509513
const MultiFileBindData &bind_data_p) {
510514
auto &union_data = union_data_p.Cast<ParquetUnionData>();
511-
return make_shared_ptr<ParquetReader>(context, union_data.file_name, union_data.options, union_data.metadata);
515+
return make_shared_ptr<ParquetReader>(context, union_data.file, union_data.options, union_data.metadata);
512516
}
513517

514518
shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &context, GlobalTableFunctionState &,
515-
const string &filename, idx_t file_idx,
519+
const OpenFileInfo &file, idx_t file_idx,
516520
const MultiFileBindData &multi_bind_data) {
517521
auto &bind_data = multi_bind_data.bind_data->Cast<ParquetReadBindData>();
518-
return make_shared_ptr<ParquetReader>(context, filename, bind_data.parquet_options);
522+
return make_shared_ptr<ParquetReader>(context, file.path, bind_data.parquet_options);
519523
}
520524

521-
shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &context, const string &filename,
525+
shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &context, const OpenFileInfo &file,
522526
ParquetOptions &options, const MultiFileOptions &) {
523-
return make_shared_ptr<ParquetReader>(context, filename, options);
527+
return make_shared_ptr<ParquetReader>(context, file.path, options);
524528
}
525529

526530
shared_ptr<BaseUnionData> ParquetMultiFileInfo::GetUnionData(shared_ptr<BaseFileReader> scan_p, idx_t file_idx) {
527531
auto &scan = scan_p->Cast<ParquetReader>();
528-
auto result = make_uniq<ParquetUnionData>(scan.file_name);
532+
auto result = make_uniq<ParquetUnionData>(scan.file);
529533
if (file_idx == 0) {
530534
for (auto &column : scan.columns) {
531535
result->names.push_back(column.name);

0 commit comments

Comments
 (0)