Skip to content

Commit 7319174

Browse files
Update vendored DuckDB sources to a9bf1a6
1 parent a9bf1a6 commit 7319174

19 files changed

+253
-113
lines changed

src/duckdb/extension/parquet/parquet_extension.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -797,15 +797,18 @@ class ParquetScanFunction {
797797
auto &gstate = data_p.global_state->Cast<ParquetReadGlobalState>();
798798
auto &bind_data = data_p.bind_data->CastNoConst<ParquetReadBindData>();
799799

800+
bool rowgroup_finished;
800801
do {
801802
if (gstate.CanRemoveColumns()) {
802803
data.all_columns.Reset();
803804
data.reader->Scan(data.scan_state, data.all_columns);
805+
rowgroup_finished = data.all_columns.size() == 0;
804806
bind_data.multi_file_reader->FinalizeChunk(context, bind_data.reader_bind, data.reader->reader_data,
805807
data.all_columns, gstate.multi_file_reader_state);
806808
output.ReferenceColumns(data.all_columns, gstate.projection_ids);
807809
} else {
808810
data.reader->Scan(data.scan_state, output);
811+
rowgroup_finished = output.size() == 0;
809812
bind_data.multi_file_reader->FinalizeChunk(context, bind_data.reader_bind, data.reader->reader_data,
810813
output, gstate.multi_file_reader_state);
811814
}
@@ -814,7 +817,7 @@ class ParquetScanFunction {
814817
if (output.size() > 0) {
815818
return;
816819
}
817-
if (!ParquetParallelStateNext(context, bind_data, data, gstate)) {
820+
if (rowgroup_finished && !ParquetParallelStateNext(context, bind_data, data, gstate)) {
818821
return;
819822
}
820823
} while (true);

src/duckdb/src/common/vector_operations/vector_hash.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ struct HashOp {
2121
};
2222

2323
static inline hash_t CombineHashScalar(hash_t a, hash_t b) {
24-
return (a * UINT64_C(0xbf58476d1ce4e5b9)) ^ b;
24+
a ^= a >> 32;
25+
a *= 0xd6e8feb86659fd93U;
26+
return a ^ b;
2527
}
2628

2729
template <bool HAS_RSEL, class T>

src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ namespace duckdb {
66
CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager> buffer_manager_p,
77
CSVStateMachineCache &state_machine_cache_p, bool default_null_to_varchar_p)
88
: state_machine_cache(state_machine_cache_p), options(options_p), buffer_manager(std::move(buffer_manager_p)),
9-
default_null_to_varchar(default_null_to_varchar_p) {
9+
lines_sniffed(0), default_null_to_varchar(default_null_to_varchar_p) {
1010
// Initialize Format Candidates
1111
for (const auto &format_template : format_template_candidates) {
1212
auto &logical_type = format_template.first;

src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp

+11-8
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ string DialectCandidates::Print() {
8080

8181
DialectCandidates::DialectCandidates(const CSVStateMachineOptions &options) {
8282
// assert that quotes escapes and rules have equal size
83-
auto default_quote = GetDefaultQuote();
84-
auto default_escape = GetDefaultEscape();
85-
auto default_quote_rule = GetDefaultQuoteRule();
86-
auto default_delimiter = GetDefaultDelimiter();
87-
auto default_comment = GetDefaultComment();
83+
const auto default_quote = GetDefaultQuote();
84+
const auto default_escape = GetDefaultEscape();
85+
const auto default_quote_rule = GetDefaultQuoteRule();
86+
const auto default_delimiter = GetDefaultDelimiter();
87+
const auto default_comment = GetDefaultComment();
8888

8989
D_ASSERT(default_quote.size() == default_quote_rule.size() && default_quote_rule.size() == default_escape.size());
9090
// fill the escapes
@@ -187,6 +187,9 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
187187

188188
// Returns true if a comment is acceptable
189189
bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool comment_set_by_user) {
190+
if (comment_set_by_user) {
191+
return true;
192+
}
190193
// For a comment to be acceptable, we want 3/5th's the majority of unmatched in the columns
191194
constexpr double min_majority = 0.6;
192195
// detected comments, are all lines that started with a comment character.
@@ -208,7 +211,7 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
208211
}
209212
}
210213
// If we do not encounter at least one full line comment, we do not consider this comment option.
211-
if (valid_comments == 0 || (!has_full_line_comment && !comment_set_by_user)) {
214+
if (valid_comments == 0 || !has_full_line_comment) {
212215
// this is only valid if our comment character is \0
213216
if (result.state_machine.state_machine_options.comment.GetValue() == '\0') {
214217
return true;
@@ -234,7 +237,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
234237
idx_t num_cols = sniffed_column_counts.result_position == 0 ? 1 : sniffed_column_counts[0].number_of_columns;
235238
const bool ignore_errors = options.ignore_errors.GetValue();
236239
// If we are ignoring errors and not null_padding , we pick the most frequent number of columns as the right one
237-
bool use_most_frequent_columns = ignore_errors && !options.null_padding;
240+
const bool use_most_frequent_columns = ignore_errors && !options.null_padding;
238241
if (use_most_frequent_columns) {
239242
num_cols = sniffed_column_counts.GetMostFrequentColumnCount();
240243
}
@@ -355,7 +358,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
355358
// - There's a single column before.
356359
// - There are more values and no additional padding is required.
357360
// - There's more than one column and less padding is required.
358-
if (columns_match_set && rows_consistent &&
361+
if (columns_match_set && (rows_consistent || (set_columns.IsSet() && ignore_errors)) &&
359362
(single_column_before || ((more_values || more_columns) && !require_more_padding) ||
360363
(more_than_one_column && require_less_padding) || quoted) &&
361364
!invalid_padding && comments_are_acceptable) {

src/duckdb/src/function/table/version/pragma_version.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "4-dev3722"
2+
#define DUCKDB_PATCH_VERSION "4-dev3741"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.1.4-dev3722"
11+
#define DUCKDB_VERSION "v1.1.4-dev3741"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "62582045a3"
14+
#define DUCKDB_SOURCE_ID "ab8c909857"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/function/window/window_boundaries_state.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -302,20 +302,26 @@ WindowBoundsSet WindowBoundariesState::GetWindowBounds(const BoundWindowExpressi
302302
switch (wexpr.GetExpressionType()) {
303303
case ExpressionType::WINDOW_ROW_NUMBER:
304304
result.insert(PARTITION_BEGIN);
305+
if (!wexpr.arg_orders.empty()) {
306+
// Secondary orders need to know how wide the partition is
307+
result.insert(PARTITION_END);
308+
}
305309
break;
306310
case ExpressionType::WINDOW_RANK_DENSE:
307311
case ExpressionType::WINDOW_RANK:
308312
result.insert(PARTITION_BEGIN);
309313
if (wexpr.arg_orders.empty()) {
310314
result.insert(PEER_BEGIN);
311315
} else {
316+
// Secondary orders need to know how wide the partition is
312317
result.insert(PARTITION_END);
313318
}
314319
break;
315320
case ExpressionType::WINDOW_PERCENT_RANK:
316321
result.insert(PARTITION_BEGIN);
317322
result.insert(PARTITION_END);
318323
if (wexpr.arg_orders.empty()) {
324+
// Secondary orders need to know where the first peer is
319325
result.insert(PEER_BEGIN);
320326
}
321327
break;

src/duckdb/src/function/window/window_executor.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &cont
4141

4242
boundary_start_idx = shared.RegisterEvaluate(wexpr.start_expr);
4343
boundary_end_idx = shared.RegisterEvaluate(wexpr.end_expr);
44+
45+
for (const auto &order : wexpr.arg_orders) {
46+
arg_order_idx.emplace_back(shared.RegisterSink(order.expression));
47+
}
4448
}
4549

4650
WindowExecutorGlobalState::WindowExecutorGlobalState(const WindowExecutor &executor, const idx_t payload_count,

src/duckdb/src/function/window/window_merge_sort_tree.cpp

+24-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#include "duckdb/function/window/window_merge_sort_tree.hpp"
2+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
23

34
#include <thread>
45
#include <utility>
56

67
namespace duckdb {
78

89
WindowMergeSortTree::WindowMergeSortTree(ClientContext &context, const vector<BoundOrderByNode> &orders,
9-
const vector<column_t> &sort_idx, const idx_t count)
10+
const vector<column_t> &sort_idx, const idx_t count, bool unique)
1011
: context(context), memory_per_thread(PhysicalOperator::GetMaxThreadMemory(context)), sort_idx(sort_idx),
1112
build_stage(PartitionSortStage::INIT), tasks_completed(0) {
1213
// Sort the unfiltered indices by the orders
@@ -26,7 +27,19 @@ WindowMergeSortTree::WindowMergeSortTree(ClientContext &context, const vector<Bo
2627
payload_layout.Initialize(payload_types);
2728

2829
auto &buffer_manager = BufferManager::GetBufferManager(context);
29-
global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
30+
if (unique) {
31+
vector<BoundOrderByNode> unique_orders;
32+
for (const auto &order : orders) {
33+
unique_orders.emplace_back(order.Copy());
34+
}
35+
auto unique_expr = make_uniq<BoundConstantExpression>(Value(index_type));
36+
const auto order_type = OrderType::ASCENDING;
37+
const auto order_by_type = OrderByNullType::NULLS_LAST;
38+
unique_orders.emplace_back(BoundOrderByNode(order_type, order_by_type, std::move(unique_expr)));
39+
global_sort = make_uniq<GlobalSortState>(buffer_manager, unique_orders, payload_layout);
40+
} else {
41+
global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
42+
}
3043
global_sort->external = ClientConfig::GetConfig(context).force_external;
3144
}
3245

@@ -48,18 +61,22 @@ WindowMergeSortTreeLocalState::WindowMergeSortTreeLocalState(WindowMergeSortTree
4861

4962
void WindowMergeSortTreeLocalState::SinkChunk(DataChunk &chunk, const idx_t row_idx,
5063
optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
64+
// Sequence the payload column
65+
auto &indices = payload_chunk.data[0];
66+
payload_chunk.SetCardinality(chunk);
67+
indices.Sequence(int64_t(row_idx), 1, payload_chunk.size());
68+
5169
// Reference the sort columns
5270
auto &sort_idx = window_tree.sort_idx;
5371
for (column_t c = 0; c < sort_idx.size(); ++c) {
5472
sort_chunk.data[c].Reference(chunk.data[sort_idx[c]]);
5573
}
74+
// Add the row numbers if we are uniquifying
75+
if (sort_idx.size() < sort_chunk.ColumnCount()) {
76+
sort_chunk.data[sort_idx.size()].Reference(indices);
77+
}
5678
sort_chunk.SetCardinality(chunk);
5779

58-
// Sequence the payload column
59-
auto &indices = payload_chunk.data[0];
60-
payload_chunk.SetCardinality(sort_chunk);
61-
indices.Sequence(int64_t(row_idx), 1, payload_chunk.size());
62-
6380
// Apply FILTER clause, if any
6481
if (filter_sel) {
6582
sort_chunk.Slice(*filter_sel, filtered);

src/duckdb/src/function/window/window_rank_function.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,6 @@ void WindowPeerLocalState::NextRank(idx_t partition_begin, idx_t peer_begin, idx
9393
WindowPeerExecutor::WindowPeerExecutor(BoundWindowExpression &wexpr, ClientContext &context,
9494
WindowSharedExpressions &shared)
9595
: WindowExecutor(wexpr, context, shared) {
96-
97-
for (const auto &order : wexpr.arg_orders) {
98-
arg_order_idx.emplace_back(shared.RegisterSink(order.expression));
99-
}
10096
}
10197

10298
unique_ptr<WindowExecutorGlobalState> WindowPeerExecutor::GetGlobalState(const idx_t payload_count,

0 commit comments

Comments
 (0)