Skip to content

Commit 5cc518f

Browse files
duckdblabs-botbrianwyka
authored andcommitted
Update vendored DuckDB sources to 0e3a5cb
1 parent 3a18f9a commit 5cc518f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+942
-469
lines changed

src/duckdb/src/common/exception.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ bool Exception::InvalidatesTransaction(ExceptionType exception_type) {
6666

6767
bool Exception::InvalidatesDatabase(ExceptionType exception_type) {
6868
switch (exception_type) {
69-
case ExceptionType::INTERNAL:
7069
case ExceptionType::FATAL:
7170
return true;
7271
default:

src/duckdb/src/common/stacktrace.cpp

+41-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static string UnmangleSymbol(string symbol) {
2121
}
2222
}
2323
for (idx_t i = mangle_start; i < symbol.size(); i++) {
24-
if (StringUtil::CharacterIsSpace(symbol[i])) {
24+
if (StringUtil::CharacterIsSpace(symbol[i]) || symbol[i] == ')' || symbol[i] == '+') {
2525
mangle_end = i;
2626
break;
2727
}
@@ -44,6 +44,45 @@ static string UnmangleSymbol(string symbol) {
4444
return result;
4545
}
4646

47+
static string CleanupStackTrace(string symbol) {
48+
#ifdef __APPLE__
49+
// structure of frame pointers is [depth] [library] [pointer] [symbol]
50+
// we are only interested in [depth] and [symbol]
51+
52+
// find the depth
53+
idx_t start;
54+
for (start = 0; start < symbol.size(); start++) {
55+
if (!StringUtil::CharacterIsDigit(symbol[start])) {
56+
break;
57+
}
58+
}
59+
60+
// now scan forward until we find the frame pointer
61+
idx_t frame_end = symbol.size();
62+
for (idx_t i = start; i + 1 < symbol.size(); ++i) {
63+
if (symbol[i] == '0' && symbol[i + 1] == 'x') {
64+
idx_t k;
65+
for (k = i + 2; k < symbol.size(); ++k) {
66+
if (!StringUtil::CharacterIsHex(symbol[k])) {
67+
break;
68+
}
69+
}
70+
frame_end = k;
71+
break;
72+
}
73+
}
74+
static constexpr idx_t STACK_TRACE_INDENTATION = 8;
75+
if (frame_end == symbol.size() || start >= STACK_TRACE_INDENTATION) {
76+
// frame pointer not found - just preserve the original frame
77+
return symbol;
78+
}
79+
idx_t space_count = STACK_TRACE_INDENTATION - start;
80+
return symbol.substr(0, start) + string(space_count, ' ') + symbol.substr(frame_end, symbol.size() - frame_end);
81+
#else
82+
return symbol;
83+
#endif
84+
}
85+
4786
string StackTrace::GetStacktracePointers(idx_t max_depth) {
4887
string result;
4988
auto callstack = unique_ptr<void *[]>(new void *[max_depth]);
@@ -68,7 +107,7 @@ string StackTrace::ResolveStacktraceSymbols(const string &pointers) {
68107
string result;
69108
char **strs = backtrace_symbols(callstack.get(), NumericCast<int>(frame_count));
70109
for (idx_t i = 0; i < frame_count; i++) {
71-
result += UnmangleSymbol(strs[i]);
110+
result += CleanupStackTrace(UnmangleSymbol(strs[i]));
72111
result += "\n";
73112
}
74113
free(reinterpret_cast<void *>(strs));

src/duckdb/src/common/types.cpp

+67-25
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,72 @@ static bool CombineUnequalTypes(const LogicalType &left, const LogicalType &righ
988988
return false;
989989
}
990990

991+
template <class OP>
992+
static bool CombineStructTypes(const LogicalType &left, const LogicalType &right, LogicalType &result) {
993+
auto &left_children = StructType::GetChildTypes(left);
994+
auto &right_children = StructType::GetChildTypes(right);
995+
996+
auto left_unnamed = StructType::IsUnnamed(left);
997+
auto is_unnamed = left_unnamed || StructType::IsUnnamed(right);
998+
child_list_t<LogicalType> child_types;
999+
1000+
// At least one side is unnamed, so we attempt positional casting.
1001+
if (is_unnamed) {
1002+
if (left_children.size() != right_children.size()) {
1003+
// We can't cast, or create the super-set.
1004+
return false;
1005+
}
1006+
1007+
for (idx_t i = 0; i < left_children.size(); i++) {
1008+
LogicalType child_type;
1009+
if (!OP::Operation(left_children[i].second, right_children[i].second, child_type)) {
1010+
return false;
1011+
}
1012+
auto &child_name = left_unnamed ? right_children[i].first : left_children[i].first;
1013+
child_types.emplace_back(child_name, std::move(child_type));
1014+
}
1015+
result = LogicalType::STRUCT(child_types);
1016+
return true;
1017+
}
1018+
1019+
// Create a super-set of the STRUCT fields.
1020+
// First, create a name->index map of the right children.
1021+
case_insensitive_map_t<idx_t> right_children_map;
1022+
for (idx_t i = 0; i < right_children.size(); i++) {
1023+
auto &name = right_children[i].first;
1024+
right_children_map[name] = i;
1025+
}
1026+
1027+
for (idx_t i = 0; i < left_children.size(); i++) {
1028+
auto &left_child = left_children[i];
1029+
auto right_child_it = right_children_map.find(left_child.first);
1030+
1031+
if (right_child_it == right_children_map.end()) {
1032+
// We can directly put the left child.
1033+
child_types.emplace_back(left_child.first, left_child.second);
1034+
continue;
1035+
}
1036+
1037+
// We need to recurse to ensure the children have a maximum logical type.
1038+
LogicalType child_type;
1039+
auto &right_child = right_children[right_child_it->second];
1040+
if (!OP::Operation(left_child.second, right_child.second, child_type)) {
1041+
return false;
1042+
}
1043+
child_types.emplace_back(left_child.first, std::move(child_type));
1044+
right_children_map.erase(right_child_it);
1045+
}
1046+
1047+
// Add all remaining right children.
1048+
for (const auto &right_child_it : right_children_map) {
1049+
auto &right_child = right_children[right_child_it.second];
1050+
child_types.emplace_back(right_child.first, right_child.second);
1051+
}
1052+
1053+
result = LogicalType::STRUCT(child_types);
1054+
return true;
1055+
}
1056+
9911057
template <class OP>
9921058
static bool CombineEqualTypes(const LogicalType &left, const LogicalType &right, LogicalType &result) {
9931059
// Since both left and right are equal we get the left type as our type_id for checks
@@ -1059,31 +1125,7 @@ static bool CombineEqualTypes(const LogicalType &left, const LogicalType &right,
10591125
return true;
10601126
}
10611127
case LogicalTypeId::STRUCT: {
1062-
// struct: perform recursively on each child
1063-
auto &left_child_types = StructType::GetChildTypes(left);
1064-
auto &right_child_types = StructType::GetChildTypes(right);
1065-
bool left_unnamed = StructType::IsUnnamed(left);
1066-
auto any_unnamed = left_unnamed || StructType::IsUnnamed(right);
1067-
if (left_child_types.size() != right_child_types.size()) {
1068-
// child types are not of equal size, we can't cast
1069-
// return false
1070-
return false;
1071-
}
1072-
child_list_t<LogicalType> child_types;
1073-
for (idx_t i = 0; i < left_child_types.size(); i++) {
1074-
LogicalType child_type;
1075-
// Child names must be in the same order OR either one of the structs must be unnamed
1076-
if (!any_unnamed && !StringUtil::CIEquals(left_child_types[i].first, right_child_types[i].first)) {
1077-
return false;
1078-
}
1079-
if (!OP::Operation(left_child_types[i].second, right_child_types[i].second, child_type)) {
1080-
return false;
1081-
}
1082-
auto &child_name = left_unnamed ? right_child_types[i].first : left_child_types[i].first;
1083-
child_types.emplace_back(child_name, std::move(child_type));
1084-
}
1085-
result = LogicalType::STRUCT(child_types);
1086-
return true;
1128+
return CombineStructTypes<OP>(left, right, result);
10871129
}
10881130
case LogicalTypeId::UNION: {
10891131
auto left_member_count = UnionType::GetMemberCount(left);

src/duckdb/src/common/types/vector.cpp

+30-24
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ UnifiedVectorFormat &UnifiedVectorFormat::operator=(UnifiedVectorFormat &&other)
5454
return *this;
5555
}
5656

57-
Vector::Vector(LogicalType type_p, bool create_data, bool zero_data, idx_t capacity)
57+
Vector::Vector(LogicalType type_p, bool create_data, bool initialize_to_zero, idx_t capacity)
5858
: vector_type(VectorType::FLAT_VECTOR), type(std::move(type_p)), data(nullptr), validity(capacity) {
5959
if (create_data) {
60-
Initialize(zero_data, capacity);
60+
Initialize(initialize_to_zero, capacity);
6161
}
6262
}
6363

@@ -306,7 +306,7 @@ void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) {
306306
}
307307
}
308308

309-
void Vector::Initialize(bool zero_data, idx_t capacity) {
309+
void Vector::Initialize(bool initialize_to_zero, idx_t capacity) {
310310
auxiliary.reset();
311311
validity.Reset();
312312
auto &type = GetType();
@@ -325,7 +325,7 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
325325
if (type_size > 0) {
326326
buffer = VectorBuffer::CreateStandardVector(type, capacity);
327327
data = buffer->GetData();
328-
if (zero_data) {
328+
if (initialize_to_zero) {
329329
memset(data, 0, capacity * type_size);
330330
}
331331
}
@@ -1374,10 +1374,10 @@ void Vector::Deserialize(Deserializer &deserializer, idx_t count) {
13741374
}
13751375

13761376
void Vector::SetVectorType(VectorType vector_type_p) {
1377-
this->vector_type = vector_type_p;
1377+
vector_type = vector_type_p;
13781378
auto physical_type = GetType().InternalType();
1379-
if (TypeIsConstantSize(physical_type) &&
1380-
(GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) {
1379+
auto flat_or_const = GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR;
1380+
if (TypeIsConstantSize(physical_type) && flat_or_const) {
13811381
auxiliary.reset();
13821382
}
13831383
if (vector_type == VectorType::CONSTANT_VECTOR && physical_type == PhysicalType::STRUCT) {
@@ -1782,23 +1782,29 @@ void Vector::DebugShuffleNestedVector(Vector &vector, idx_t count) {
17821782
void FlatVector::SetNull(Vector &vector, idx_t idx, bool is_null) {
17831783
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
17841784
vector.validity.Set(idx, !is_null);
1785-
if (is_null) {
1786-
auto &type = vector.GetType();
1787-
auto internal_type = type.InternalType();
1788-
if (internal_type == PhysicalType::STRUCT) {
1789-
// set all child entries to null as well
1790-
auto &entries = StructVector::GetEntries(vector);
1791-
for (auto &entry : entries) {
1792-
FlatVector::SetNull(*entry, idx, is_null);
1793-
}
1794-
} else if (internal_type == PhysicalType::ARRAY) {
1795-
// set the child element in the array to null as well
1796-
auto &child = ArrayVector::GetEntry(vector);
1797-
auto array_size = ArrayType::GetSize(type);
1798-
auto child_offset = idx * array_size;
1799-
for (idx_t i = 0; i < array_size; i++) {
1800-
FlatVector::SetNull(child, child_offset + i, is_null);
1801-
}
1785+
if (!is_null) {
1786+
return;
1787+
}
1788+
1789+
auto &type = vector.GetType();
1790+
auto internal_type = type.InternalType();
1791+
1792+
// Set all child entries to NULL.
1793+
if (internal_type == PhysicalType::STRUCT) {
1794+
auto &entries = StructVector::GetEntries(vector);
1795+
for (auto &entry : entries) {
1796+
FlatVector::SetNull(*entry, idx, is_null);
1797+
}
1798+
return;
1799+
}
1800+
1801+
// Set all child entries to NULL.
1802+
if (internal_type == PhysicalType::ARRAY) {
1803+
auto &child = ArrayVector::GetEntry(vector);
1804+
auto array_size = ArrayType::GetSize(type);
1805+
auto child_offset = idx * array_size;
1806+
for (idx_t i = 0; i < array_size; i++) {
1807+
FlatVector::SetNull(child, child_offset + i, is_null);
18021808
}
18031809
}
18041810
}

src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp

+35-8
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,19 @@ bool LineError::HandleErrors(StringValueResult &result) {
684684
result.state_machine.options, cur_error.current_line_size, lines_per_batch, borked_line,
685685
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl), result.path);
686686
break;
687+
case INVALID_STATE:
688+
if (result.current_line_position.begin == line_pos) {
689+
csv_error = CSVError::InvalidState(
690+
result.state_machine.options, col_idx, lines_per_batch, borked_line,
691+
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
692+
line_pos.GetGlobalPosition(result.requested_size, first_nl), result.path);
693+
} else {
694+
csv_error = CSVError::InvalidState(
695+
result.state_machine.options, col_idx, lines_per_batch, borked_line,
696+
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
697+
line_pos.GetGlobalPosition(result.requested_size), result.path);
698+
}
699+
break;
687700
default:
688701
throw InvalidInputException("CSV Error not allowed when inserting row");
689702
}
@@ -878,7 +891,11 @@ bool StringValueResult::AddRow(StringValueResult &result, const idx_t buffer_pos
878891
}
879892

880893
void StringValueResult::InvalidState(StringValueResult &result) {
881-
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
894+
if (result.quoted) {
895+
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
896+
} else {
897+
result.current_errors.Insert(INVALID_STATE, result.cur_col_id, result.chunk_col_id, result.last_position);
898+
}
882899
}
883900

884901
bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_pos) {
@@ -1724,11 +1741,18 @@ void StringValueScanner::FinalizeChunkProcess() {
17241741
// If we are not done we have two options.
17251742
// 1) If a boundary is set.
17261743
if (iterator.IsBoundarySet()) {
1727-
bool has_unterminated_quotes = false;
1728-
if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1744+
bool found_error = false;
1745+
CSVErrorType type;
1746+
if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES) &&
1747+
!result.current_errors.HasErrorType(INVALID_STATE)) {
17291748
iterator.done = true;
17301749
} else {
1731-
has_unterminated_quotes = true;
1750+
found_error = true;
1751+
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1752+
type = UNTERMINATED_QUOTES;
1753+
} else {
1754+
type = INVALID_STATE;
1755+
}
17321756
}
17331757
// We read until the next line or until we have nothing else to read.
17341758
// Move to next buffer
@@ -1747,18 +1771,21 @@ void StringValueScanner::FinalizeChunkProcess() {
17471771
}
17481772
} else {
17491773
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1750-
has_unterminated_quotes = true;
1774+
found_error = true;
1775+
type = UNTERMINATED_QUOTES;
1776+
} else if (result.current_errors.HasErrorType(INVALID_STATE)) {
1777+
found_error = true;
1778+
type = INVALID_STATE;
17511779
}
17521780
if (result.current_errors.HandleErrors(result)) {
17531781
result.number_of_rows++;
17541782
}
17551783
}
1756-
if (states.IsQuotedCurrent() && !has_unterminated_quotes &&
1784+
if (states.IsQuotedCurrent() && !found_error &&
17571785
state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) {
17581786
// If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
17591787
// quotes
1760-
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
1761-
result.last_position);
1788+
result.current_errors.Insert(type, result.cur_col_id, result.chunk_col_id, result.last_position);
17621789
if (result.current_errors.HandleErrors(result)) {
17631790
result.number_of_rows++;
17641791
}

src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,10 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
7575
transition_array[static_cast<uint8_t>('\r')][state] = CSVState::CARRIAGE_RETURN;
7676
if (state == static_cast<uint8_t>(CSVState::STANDARD_NEWLINE)) {
7777
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::STANDARD;
78+
} else if (!state_machine_options.rfc_4180.GetValue()) {
79+
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
7880
} else {
79-
if (!state_machine_options.rfc_4180.GetValue()) {
80-
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
81-
}
81+
transition_array[static_cast<uint8_t>('\n')][state] = CSVState::INVALID;
8282
}
8383
} else {
8484
transition_array[static_cast<uint8_t>('\r')][state] = CSVState::RECORD_SEPARATOR;

0 commit comments

Comments
 (0)