Skip to content

Commit 9c464a5

Browse files
Update vendored DuckDB sources to eade9cb
1 parent eade9cb commit 9c464a5

File tree

9 files changed

+200
-131
lines changed

9 files changed

+200
-131
lines changed

src/duckdb/extension/core_functions/scalar/list/list_value.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,59 @@ static void TemplatedListValueFunction(DataChunk &args, Vector &result) {
5252
ListVector::SetListSize(result, args.size() * list_size);
5353
}
5454

55+
static void ListValueListFunction(DataChunk &args, Vector &result) {
56+
const idx_t list_size = args.ColumnCount();
57+
ListVector::Reserve(result, args.size() * list_size);
58+
59+
vector<idx_t> col_offsets;
60+
idx_t offset_sum = 0;
61+
for (idx_t i = 0; i < list_size; i++) {
62+
col_offsets.push_back(offset_sum);
63+
auto &list = args.data[i];
64+
const auto length = ListVector::GetListSize(list);
65+
offset_sum += length;
66+
}
67+
68+
auto &result_list = ListVector::GetEntry(result);
69+
ListVector::Reserve(result_list, offset_sum);
70+
71+
auto &result_child_vector = ListVector::GetEntry(result_list);
72+
for (idx_t i = 0; i < list_size; i++) {
73+
auto list = args.data[i];
74+
const auto length = ListVector::GetListSize(list);
75+
if (length == 0) {
76+
continue;
77+
}
78+
auto &child_vector = ListVector::GetEntry(list);
79+
VectorOperations::Copy(child_vector, result_child_vector, length, 0, col_offsets[i]);
80+
}
81+
82+
const auto result_data = FlatVector::GetData<list_entry_t>(result);
83+
const auto result_list_data = FlatVector::GetData<list_entry_t>(result_list);
84+
auto &result_list_validity = FlatVector::Validity(result_list);
85+
86+
const auto args_unified_format = args.ToUnifiedFormat();
87+
for (idx_t r = 0; r < args.size(); r++) {
88+
for (idx_t c = 0; c < list_size; c++) {
89+
const auto input_idx = args_unified_format[c].sel->get_index(r);
90+
const auto result_idx = r * list_size + c;
91+
const auto input_data = UnifiedVectorFormat::GetData<list_entry_t>(args_unified_format[c]);
92+
if (args_unified_format[c].validity.RowIsValid(input_idx)) {
93+
const auto length = input_data[input_idx].length;
94+
const auto offset = col_offsets[c] + input_data[input_idx].offset;
95+
result_list_data[result_idx] = list_entry_t(offset, length);
96+
} else {
97+
result_list_validity.SetInvalid(result_idx);
98+
}
99+
}
100+
result_data[r].offset = r * list_size;
101+
result_data[r].length = list_size;
102+
}
103+
104+
ListVector::SetListSize(result, args.size() * list_size);
105+
ListVector::SetListSize(result_list, offset_sum);
106+
}
107+
55108
static void TemplatedListValueFunctionFallback(DataChunk &args, Vector &result) {
56109
auto &child_type = ListType::GetChildType(result.GetType());
57110
auto result_data = FlatVector::GetData<list_entry_t>(result);
@@ -125,6 +178,9 @@ static void ListValueFunction(DataChunk &args, ExpressionState &state, Vector &r
125178
case PhysicalType::VARCHAR:
126179
TemplatedListValueFunction<string_t, ListValueStringAssign>(args, result);
127180
break;
181+
case PhysicalType::LIST:
182+
ListValueListFunction(args, result);
183+
break;
128184
default: {
129185
TemplatedListValueFunctionFallback(args, result);
130186
break;

src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@ namespace duckdb {
1515
class BssEncoder {
1616
public:
1717
explicit BssEncoder(const idx_t total_value_count_p, const idx_t bit_width_p)
18-
: total_value_count(total_value_count_p), bit_width(bit_width_p), count(0),
19-
buffer(Allocator::DefaultAllocator().Allocate(total_value_count * bit_width + 1)) {
18+
: total_value_count(total_value_count_p), bit_width(bit_width_p), count(0) {
2019
}
2120

2221
public:
22+
void BeginWrite(Allocator &allocator) {
23+
buffer = allocator.Allocate(total_value_count * bit_width + 1);
24+
}
25+
2326
template <class T>
2427
void WriteValue(const T &value) {
2528
D_ASSERT(sizeof(T) == bit_width);
@@ -41,23 +44,4 @@ class BssEncoder {
4144
AllocatedData buffer;
4245
};
4346

44-
namespace bss_encoder {
45-
46-
template <class T>
47-
void WriteValue(BssEncoder &encoder, const T &value) {
48-
throw InternalException("Can't write type to BYTE_STREAM_SPLIT column");
49-
}
50-
51-
template <>
52-
void WriteValue(BssEncoder &encoder, const float &value) {
53-
encoder.WriteValue(value);
54-
}
55-
56-
template <>
57-
void WriteValue(BssEncoder &encoder, const double &value) {
58-
encoder.WriteValue(value);
59-
}
60-
61-
} // namespace bss_encoder
62-
6347
} // namespace duckdb

src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp

Lines changed: 38 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,27 @@ class DbpEncoder {
2323
}
2424

2525
public:
26-
void BeginWrite(WriteStream &writer, const int64_t &first_value) {
26+
template <class T>
27+
void BeginWrite(WriteStream &writer, const T &first_value) {
28+
throw InternalException("DbpEncoder should only be used with integers");
29+
}
30+
31+
template <class T>
32+
void WriteValue(WriteStream &writer, const T &value) {
33+
throw InternalException("DbpEncoder should only be used with integers");
34+
}
35+
36+
void FinishWrite(WriteStream &writer) {
37+
if (count + block_count != total_value_count) {
38+
throw InternalException("value count mismatch when writing DELTA_BINARY_PACKED");
39+
}
40+
if (block_count != 0) {
41+
WriteBlock(writer);
42+
}
43+
}
44+
45+
private:
46+
void BeginWriteInternal(WriteStream &writer, const int64_t &first_value) {
2747
// <block size in values> <number of miniblocks in a block> <total value count> <first value>
2848

2949
// the block size is a multiple of 128; it is stored as a ULEB128 int
@@ -50,7 +70,7 @@ class DbpEncoder {
5070
block_count = 0;
5171
}
5272

53-
void WriteValue(WriteStream &writer, const int64_t &value) {
73+
void WriteValueInternal(WriteStream &writer, const int64_t &value) {
5474
// 1. Compute the differences between consecutive elements. For the first element in the block,
5575
// use the last element in the previous block or, in the case of the first block,
5676
// use the first value of the whole sequence, stored in the header.
@@ -72,16 +92,6 @@ class DbpEncoder {
7292
}
7393
}
7494

75-
void FinishWrite(WriteStream &writer) {
76-
if (count + block_count != total_value_count) {
77-
throw InternalException("value count mismatch when writing DELTA_BINARY_PACKED");
78-
}
79-
if (block_count != 0) {
80-
WriteBlock(writer);
81-
}
82-
}
83-
84-
private:
8595
void WriteBlock(WriteStream &writer) {
8696
D_ASSERT(count + block_count == total_value_count || block_count == BLOCK_SIZE_IN_VALUES);
8797
const auto number_of_miniblocks =
@@ -176,58 +186,44 @@ class DbpEncoder {
176186
data_t data_packed[NUMBER_OF_VALUES_IN_A_MINIBLOCK * sizeof(int64_t)];
177187
};
178188

179-
namespace dbp_encoder {
180-
181-
template <class T>
182-
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const T &first_value) {
183-
throw InternalException("Can't write type to DELTA_BINARY_PACKED column");
184-
}
185-
186189
template <>
187-
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const int64_t &first_value) {
188-
encoder.BeginWrite(writer, first_value);
190+
inline void DbpEncoder::BeginWrite(WriteStream &writer, const int32_t &first_value) {
191+
BeginWriteInternal(writer, first_value);
189192
}
190193

191194
template <>
192-
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const int32_t &first_value) {
193-
BeginWrite(encoder, writer, UnsafeNumericCast<int64_t>(first_value));
195+
inline void DbpEncoder::BeginWrite(WriteStream &writer, const int64_t &first_value) {
196+
BeginWriteInternal(writer, first_value);
194197
}
195198

196199
template <>
197-
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const uint64_t &first_value) {
198-
encoder.BeginWrite(writer, UnsafeNumericCast<int64_t>(first_value));
200+
inline void DbpEncoder::BeginWrite(WriteStream &writer, const uint32_t &first_value) {
201+
BeginWriteInternal(writer, first_value);
199202
}
200203

201204
template <>
202-
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const uint32_t &first_value) {
203-
BeginWrite(encoder, writer, UnsafeNumericCast<int64_t>(first_value));
204-
}
205-
206-
template <class T>
207-
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const T &value) {
208-
throw InternalException("Can't write type to DELTA_BINARY_PACKED column");
205+
inline void DbpEncoder::BeginWrite(WriteStream &writer, const uint64_t &first_value) {
206+
BeginWriteInternal(writer, first_value);
209207
}
210208

211209
template <>
212-
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const int64_t &value) {
213-
encoder.WriteValue(writer, value);
210+
inline void DbpEncoder::WriteValue(WriteStream &writer, const int32_t &first_value) {
211+
WriteValueInternal(writer, first_value);
214212
}
215213

216214
template <>
217-
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const int32_t &value) {
218-
WriteValue(encoder, writer, UnsafeNumericCast<int64_t>(value));
215+
inline void DbpEncoder::WriteValue(WriteStream &writer, const int64_t &first_value) {
216+
WriteValueInternal(writer, first_value);
219217
}
220218

221219
template <>
222-
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const uint64_t &value) {
223-
encoder.WriteValue(writer, UnsafeNumericCast<int64_t>(value));
220+
inline void DbpEncoder::WriteValue(WriteStream &writer, const uint32_t &first_value) {
221+
WriteValueInternal(writer, first_value);
224222
}
225223

226224
template <>
227-
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const uint32_t &value) {
228-
WriteValue(encoder, writer, UnsafeNumericCast<int64_t>(value));
225+
inline void DbpEncoder::WriteValue(WriteStream &writer, const uint64_t &first_value) {
226+
WriteValueInternal(writer, first_value);
229227
}
230228

231-
} // namespace dbp_encoder
232-
233229
} // namespace duckdb

src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,67 +16,54 @@ namespace duckdb {
1616
class DlbaEncoder {
1717
public:
1818
DlbaEncoder(const idx_t total_value_count_p, const idx_t total_string_size_p)
19-
: dbp_encoder(total_value_count_p), total_string_size(total_string_size_p),
20-
buffer(Allocator::DefaultAllocator().Allocate(total_string_size + 1)),
21-
stream(make_unsafe_uniq<MemoryStream>(buffer.get(), buffer.GetSize())) {
19+
: dbp_encoder(total_value_count_p), total_string_size(total_string_size_p) {
2220
}
2321

2422
public:
25-
void BeginWrite(WriteStream &writer, const string_t &first_value) {
26-
dbp_encoder.BeginWrite(writer, UnsafeNumericCast<int64_t>(first_value.GetSize()));
27-
stream->WriteData(const_data_ptr_cast(first_value.GetData()), first_value.GetSize());
23+
template <class T>
24+
void BeginWrite(Allocator &, WriteStream &, const T &) {
25+
throw InternalException("DlbaEncoder should only be used with strings");
2826
}
2927

30-
void WriteValue(WriteStream &writer, const string_t &value) {
31-
dbp_encoder.WriteValue(writer, UnsafeNumericCast<int64_t>(value.GetSize()));
32-
stream->WriteData(const_data_ptr_cast(value.GetData()), value.GetSize());
28+
template <class T>
29+
void WriteValue(WriteStream &, const T &) {
30+
throw InternalException("DlbaEncoder should only be used with strings");
3331
}
3432

3533
void FinishWrite(WriteStream &writer) {
3634
dbp_encoder.FinishWrite(writer);
3735
writer.WriteData(buffer.get(), stream->GetPosition());
3836
}
3937

38+
template <class SRC>
39+
static idx_t GetStringSize(const SRC &) {
40+
return 0;
41+
}
42+
4043
private:
4144
DbpEncoder dbp_encoder;
4245
const idx_t total_string_size;
4346
AllocatedData buffer;
4447
unsafe_unique_ptr<MemoryStream> stream;
4548
};
4649

47-
namespace dlba_encoder {
48-
49-
template <class T>
50-
void BeginWrite(DlbaEncoder &encoder, WriteStream &writer, const T &first_value) {
51-
throw InternalException("Can't write type to DELTA_LENGTH_BYTE_ARRAY column");
52-
}
53-
5450
template <>
55-
void BeginWrite(DlbaEncoder &encoder, WriteStream &writer, const string_t &first_value) {
56-
encoder.BeginWrite(writer, first_value);
57-
}
58-
59-
template <class T>
60-
void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const T &value) {
61-
throw InternalException("Can't write type to DELTA_LENGTH_BYTE_ARRAY column");
51+
inline void DlbaEncoder::BeginWrite(Allocator &allocator, WriteStream &writer, const string_t &first_value) {
52+
buffer = allocator.Allocate(total_string_size + 1);
53+
stream = make_unsafe_uniq<MemoryStream>(buffer.get(), buffer.GetSize());
54+
dbp_encoder.BeginWrite(writer, UnsafeNumericCast<int64_t>(first_value.GetSize()));
55+
stream->WriteData(const_data_ptr_cast(first_value.GetData()), first_value.GetSize());
6256
}
6357

6458
template <>
65-
void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value) {
66-
encoder.WriteValue(writer, value);
67-
}
68-
69-
// helpers to get size from strings
70-
template <class SRC>
71-
static idx_t GetDlbaStringSize(const SRC &) {
72-
return 0;
59+
inline void DlbaEncoder::WriteValue(WriteStream &writer, const string_t &value) {
60+
dbp_encoder.WriteValue(writer, UnsafeNumericCast<int64_t>(value.GetSize()));
61+
stream->WriteData(const_data_ptr_cast(value.GetData()), value.GetSize());
7362
}
7463

7564
template <>
76-
idx_t GetDlbaStringSize(const string_t &src_value) {
65+
inline idx_t DlbaEncoder::GetStringSize(const string_t &src_value) {
7766
return src_value.GetSize();
7867
}
7968

80-
} // namespace dlba_encoder
81-
8269
} // namespace duckdb

src/duckdb/extension/parquet/include/writer/primitive_column_writer.hpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,10 @@ class PrimitiveColumnWriter : public ColumnWriter {
6363

6464
//! We limit the uncompressed page size to 100MB
6565
//! The max size in Parquet is 2GB, but we choose a more conservative limit
66-
static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
66+
static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 104857600ULL;
6767
//! Dictionary pages must be below 2GB. Unlike data pages, there's only one dictionary page.
6868
//! For this reason we go with a much higher, but still a conservative upper bound of 1GB;
69-
static constexpr const idx_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1e9;
70-
//! If the dictionary has this many entries, we stop creating the dictionary
71-
static constexpr const idx_t DICTIONARY_ANALYZE_THRESHOLD = 1e4;
72-
//! The maximum size a key entry in an RLE page takes
73-
static constexpr const idx_t MAX_DICTIONARY_KEY_SIZE = sizeof(uint32_t);
74-
//! The size of encoding the string length
75-
static constexpr const idx_t STRING_LENGTH_SIZE = sizeof(uint32_t);
69+
static constexpr const idx_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1073741824ULL;
7670

7771
public:
7872
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;

0 commit comments

Comments
 (0)