Skip to content

Commit 42763ee

Browse files
authored
Merge pull request #571 from pdet/column_default
Expressions as Column Default Values
2 parents 64f3b12 + d11165b commit 42763ee

16 files changed

+309
-69
lines changed

duckdb

Submodule duckdb updated 504 files

src/functions/ducklake_add_data_files.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ FROM parquet_full_metadata(%s)
184184
}
185185

186186
for (auto &row : *result) {
187-
auto &chunk = *row.iterator.chunk;
188-
idx_t row_idx = row.row;
187+
auto &chunk = row.GetChunk();
188+
idx_t row_idx = row.GetRowInChunk();
189189

190190
auto &file_metadata_vec = chunk.data[0];
191191
auto &parquet_metadata_vec = chunk.data[1];

src/include/storage/ducklake_field_data.hpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "duckdb/common/case_insensitive_map.hpp"
1313
#include "duckdb/common/types/value.hpp"
1414
#include "common/index.hpp"
15+
#include "duckdb/parser/parsed_expression.hpp"
1516

1617
namespace duckdb {
1718
struct AlterTableInfo;
@@ -23,7 +24,17 @@ class ColumnList;
2324
struct DuckLakeColumnData {
2425
FieldIndex id;
2526
Value initial_default;
26-
Value default_value;
27+
unique_ptr<ParsedExpression> default_value;
28+
DuckLakeColumnData Copy() const {
29+
DuckLakeColumnData copy;
30+
copy.id = id;
31+
copy.initial_default = initial_default;
32+
33+
if (default_value) {
34+
copy.default_value = default_value->Copy();
35+
}
36+
return copy;
37+
}
2738
};
2839

2940
class DuckLakeFieldId {
@@ -58,10 +69,11 @@ class DuckLakeFieldId {
5869
unique_ptr<DuckLakeFieldId> Copy() const;
5970
unique_ptr<ParsedExpression> GetDefault() const;
6071

61-
static unique_ptr<DuckLakeFieldId> FieldIdFromColumn(const ColumnDefinition &col, idx_t &column_id);
72+
static unique_ptr<DuckLakeFieldId> FieldIdFromColumn(const ColumnDefinition &col, idx_t &column_id,
73+
bool add_column = false);
6274
static unique_ptr<DuckLakeFieldId> FieldIdFromType(const string &name, const LogicalType &type,
6375
optional_ptr<const ParsedExpression> default_expr,
64-
idx_t &column_id);
76+
idx_t &column_id, bool add_column);
6577
static unique_ptr<DuckLakeFieldId> Rename(const DuckLakeFieldId &field_id, const string &new_name);
6678
static unique_ptr<DuckLakeFieldId> SetDefault(const DuckLakeFieldId &field_id,
6779
optional_ptr<const ParsedExpression> default_expr);

src/include/storage/ducklake_metadata_info.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "common/ducklake_data_file.hpp"
1919
#include "common/ducklake_name_map.hpp"
2020
#include "storage/ducklake_inlined_data.hpp"
21+
#include "duckdb/parser/parsed_expression.hpp"
2122

2223
namespace duckdb {
2324

@@ -70,6 +71,7 @@ struct DuckLakeColumnInfo {
7071
string type;
7172
Value initial_default;
7273
Value default_value;
74+
string default_value_type;
7375
bool nulls_allowed {};
7476
vector<DuckLakeColumnInfo> children;
7577
vector<DuckLakeTag> tags;

src/storage/ducklake_catalog.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,21 @@ static unique_ptr<DuckLakeFieldId> TransformColumnType(DuckLakeColumnInfo &col)
196196
if (col.children.empty()) {
197197
auto col_type = DuckLakeTypes::FromString(col.type);
198198
col_data.initial_default = col.initial_default.DefaultCastAs(col_type);
199-
col_data.default_value = col.default_value.DefaultCastAs(col_type);
199+
if (col.default_value.IsNull()) {
200+
col_data.default_value = make_uniq<ConstantExpression>(Value());
201+
} else {
202+
if (col.default_value_type == "literal") {
203+
col_data.default_value = make_uniq<ConstantExpression>(col.default_value);
204+
} else if (col.default_value_type == "expression") {
205+
auto sql_expr = Parser::ParseExpressionList(col.default_value.GetValue<string>());
206+
if (sql_expr.size() != 1) {
207+
throw InternalException("Expected a single expression");
208+
}
209+
col_data.default_value = std::move(sql_expr[0]);
210+
} else {
211+
throw NotImplementedException("Column type %s is not supported", col.default_value_type);
212+
}
213+
}
200214
return make_uniq<DuckLakeFieldId>(std::move(col_data), col.name, std::move(col_type));
201215
}
202216
if (StringUtil::CIEquals(col.type, "struct")) {

src/storage/ducklake_field_data.cpp

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,39 @@ DuckLakeFieldId::DuckLakeFieldId(DuckLakeColumnData column_data_p, string name_p
4040
}
4141
}
4242

43-
static Value ExtractDefaultValue(optional_ptr<const ParsedExpression> default_expr, const LogicalType &type) {
43+
static unique_ptr<ParsedExpression> ExtractDefaultExpression(optional_ptr<const ParsedExpression> default_expr,
44+
const LogicalType &type) {
4445
if (!default_expr) {
46+
return make_uniq<ConstantExpression>(Value(type));
47+
}
48+
if (default_expr->HasSubquery()) {
49+
throw NotImplementedException("Expressions with subqueries are not yet supported as default expressions");
50+
}
51+
if (default_expr->IsWindow()) {
52+
throw NotImplementedException("Expressions with window functions are not yet supported as default expressions");
53+
}
54+
return default_expr->Copy();
55+
}
56+
57+
static Value ExtractInitialValue(optional_ptr<const ParsedExpression> initial_expr, const LogicalType &type,
58+
bool add_column) {
59+
if (!initial_expr) {
4560
return Value(type);
4661
}
47-
if (default_expr->type != ExpressionType::VALUE_CONSTANT) {
48-
throw NotImplementedException("Only literals (e.g. 42 or 'hello world') are supported as default values");
62+
if (initial_expr->type != ExpressionType::VALUE_CONSTANT) {
63+
if (!add_column) {
64+
return Value(type);
65+
}
66+
throw NotImplementedException("We cannot add a column with a non-literal default value. Add the column and "
67+
"then explicitly set the default for new values using \"ALTER ... SET DEFAULT\"");
4968
}
50-
auto &const_default = default_expr->Cast<ConstantExpression>();
69+
auto &const_default = initial_expr->Cast<ConstantExpression>();
5170
return const_default.value.DefaultCastAs(type);
5271
}
5372

5473
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::FieldIdFromType(const string &name, const LogicalType &type,
5574
optional_ptr<const ParsedExpression> default_expr,
56-
idx_t &column_id) {
75+
idx_t &column_id, bool add_column) {
5776
DuckLakeColumnData column_data;
5877
column_data.id = FieldIndex(column_id++);
5978
vector<unique_ptr<DuckLakeFieldId>> field_children;
@@ -64,52 +83,53 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::FieldIdFromType(const string &name,
6483
throw NotImplementedException("Default value for STRUCT type not supported");
6584
}
6685
for (auto &entry : StructType::GetChildTypes(type)) {
67-
field_children.push_back(FieldIdFromType(entry.first, entry.second, nullptr, column_id));
86+
field_children.push_back(FieldIdFromType(entry.first, entry.second, nullptr, column_id, add_column));
6887
}
6988
break;
7089
}
7190
case LogicalTypeId::LIST:
7291
if (default_expr) {
7392
throw NotImplementedException("Default value for LIST type not supported");
7493
}
75-
field_children.push_back(FieldIdFromType("element", ListType::GetChildType(type), nullptr, column_id));
94+
field_children.push_back(
95+
FieldIdFromType("element", ListType::GetChildType(type), nullptr, column_id, add_column));
7696
break;
7797
case LogicalTypeId::ARRAY:
7898
if (default_expr) {
7999
throw NotImplementedException("Default value for LIST type not supported");
80100
}
81-
field_children.push_back(FieldIdFromType("element", ArrayType::GetChildType(type), nullptr, column_id));
101+
field_children.push_back(
102+
FieldIdFromType("element", ArrayType::GetChildType(type), nullptr, column_id, add_column));
82103
break;
83104
case LogicalTypeId::MAP:
84105
if (default_expr) {
85106
throw NotImplementedException("Default value for MAP type not supported");
86107
}
87-
field_children.push_back(FieldIdFromType("key", MapType::KeyType(type), nullptr, column_id));
88-
field_children.push_back(FieldIdFromType("value", MapType::ValueType(type), nullptr, column_id));
108+
field_children.push_back(FieldIdFromType("key", MapType::KeyType(type), nullptr, column_id, add_column));
109+
field_children.push_back(FieldIdFromType("value", MapType::ValueType(type), nullptr, column_id, add_column));
89110
break;
90111
default:
91112
break;
92113
}
93-
column_data.initial_default = ExtractDefaultValue(default_expr, type);
94-
column_data.default_value = column_data.initial_default;
114+
column_data.initial_default = ExtractInitialValue(default_expr, type, add_column);
115+
if (default_expr) {
116+
column_data.default_value = default_expr->Copy();
117+
}
118+
95119
return make_uniq<DuckLakeFieldId>(std::move(column_data), name, type, std::move(field_children));
96120
}
97121

98122
unique_ptr<ParsedExpression> DuckLakeFieldId::GetDefault() const {
99-
if (children.empty()) {
100-
if (column_data.default_value.IsNull()) {
101-
// no default value defined
102-
return nullptr;
103-
}
104-
return make_uniq<ConstantExpression>(column_data.default_value);
123+
if (column_data.default_value) {
124+
return column_data.default_value->Copy();
105125
}
106-
// FIXME: default not supported for entries with children
107126
return nullptr;
108127
}
109128

110-
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::FieldIdFromColumn(const ColumnDefinition &col, idx_t &column_id) {
129+
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::FieldIdFromColumn(const ColumnDefinition &col, idx_t &column_id,
130+
bool add_column) {
111131
auto default_val = col.HasDefaultValue() ? optional_ptr<const ParsedExpression>(col.DefaultValue()) : nullptr;
112-
return DuckLakeFieldId::FieldIdFromType(col.Name(), col.Type(), default_val, column_id);
132+
return DuckLakeFieldId::FieldIdFromType(col.Name(), col.Type(), default_val, column_id, add_column);
113133
}
114134

115135
shared_ptr<DuckLakeFieldData> DuckLakeFieldData::FromColumns(const ColumnList &columns) {
@@ -132,7 +152,7 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::Copy() const {
132152
for (auto &child : children) {
133153
new_children.push_back(child->Copy());
134154
}
135-
return make_uniq<DuckLakeFieldId>(column_data, name, type, std::move(new_children));
155+
return make_uniq<DuckLakeFieldId>(column_data.Copy(), name, type, std::move(new_children));
136156
}
137157

138158
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::Rename(const DuckLakeFieldId &field_id, const string &new_name) {
@@ -144,7 +164,7 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::Rename(const DuckLakeFieldId &field
144164
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::SetDefault(const DuckLakeFieldId &field_id,
145165
optional_ptr<const ParsedExpression> default_expr) {
146166
auto result = field_id.Copy();
147-
result->column_data.default_value = ExtractDefaultValue(default_expr, field_id.Type());
167+
result->column_data.default_value = ExtractDefaultExpression(default_expr, field_id.Type());
148168
return result;
149169
}
150170

@@ -200,7 +220,7 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::AddField(const vector<string> &colu
200220
}
201221
}
202222
LogicalType new_type = GetNewNestedType(type, new_children);
203-
return make_uniq<DuckLakeFieldId>(column_data, Name(), std::move(new_type), std::move(new_children));
223+
return make_uniq<DuckLakeFieldId>(column_data.Copy(), Name(), std::move(new_type), std::move(new_children));
204224
}
205225

206226
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::RemoveField(const vector<string> &column_path, idx_t depth) const {
@@ -231,7 +251,7 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::RemoveField(const vector<string> &c
231251
throw InternalException("DuckLakeFieldId::AddField - child not found in struct path");
232252
}
233253
LogicalType new_type = GetNewNestedType(type, new_children);
234-
return make_uniq<DuckLakeFieldId>(column_data, Name(), std::move(new_type), std::move(new_children));
254+
return make_uniq<DuckLakeFieldId>(column_data.Copy(), Name(), std::move(new_type), std::move(new_children));
235255
}
236256

237257
unique_ptr<DuckLakeFieldId> DuckLakeFieldId::RenameField(const vector<string> &column_path, const string &new_name,
@@ -248,8 +268,8 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::RenameField(const vector<string> &c
248268
// leaf - rename the column at this level
249269
auto copied_entry = child.Copy();
250270
auto renamed_entry =
251-
make_uniq<DuckLakeFieldId>(copied_entry->column_data, new_name, std::move(copied_entry->type),
252-
std::move(copied_entry->children));
271+
make_uniq<DuckLakeFieldId>(copied_entry->column_data.Copy(), new_name,
272+
std::move(copied_entry->type), std::move(copied_entry->children));
253273
new_children.push_back(std::move(renamed_entry));
254274
} else {
255275
// not the leaf - find the child to rename it and recurse
@@ -264,7 +284,7 @@ unique_ptr<DuckLakeFieldId> DuckLakeFieldId::RenameField(const vector<string> &c
264284
throw InternalException("DuckLakeFieldId::AddField - child not found in struct path");
265285
}
266286
auto new_type = GetStructType(new_children);
267-
return make_uniq<DuckLakeFieldId>(column_data, Name(), std::move(new_type), std::move(new_children));
287+
return make_uniq<DuckLakeFieldId>(column_data.Copy(), Name(), std::move(new_type), std::move(new_children));
268288
}
269289

270290
shared_ptr<DuckLakeFieldData> DuckLakeFieldData::RenameColumn(const DuckLakeFieldData &field_data,
@@ -288,7 +308,7 @@ shared_ptr<DuckLakeFieldData> DuckLakeFieldData::AddColumn(const DuckLakeFieldDa
288308
for (auto &existing_id : field_data.field_ids) {
289309
result->Add(existing_id->Copy());
290310
}
291-
auto field_id = DuckLakeFieldId::FieldIdFromColumn(new_col, next_column_id);
311+
auto field_id = DuckLakeFieldId::FieldIdFromColumn(new_col, next_column_id, true);
292312
result->Add(std::move(field_id));
293313
return result;
294314
}

0 commit comments

Comments
 (0)