brianwyka
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/duckdb/extension/core_functions/scalar/random/random.cpp
Lines changed: 3 additions & 2 deletions b/‎src/duckdb/extension/core_functions/scalar/random/random.cpp
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/duckdb/extension/json/json_functions/json_structure.cpp
Lines changed: 2 additions & 0 deletions b/‎src/duckdb/extension/json/json_functions/json_structure.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/duckdb/src/common/adbc/adbc.cpp
Lines changed: 18 additions & 0 deletions b/‎src/duckdb/src/common/adbc/adbc.cpp
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/duckdb/src/common/file_buffer.cpp
Lines changed: 5 additions & 2 deletions b/‎src/duckdb/src/common/file_buffer.cpp
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/duckdb/src/common/random_engine.cpp
Lines changed: 1 addition & 1 deletion b/‎src/duckdb/src/common/random_engine.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/duckdb/src/execution/join_hashtable.cpp
Lines changed: 14 additions & 4 deletions b/‎src/duckdb/src/execution/join_hashtable.cpp
Lines changed: 14 additions & 4 deletions
diff --git a/‎src/duckdb/src/execution/operator/aggregate/physical_window.cpp
Lines changed: 0 additions & 1 deletion b/‎src/duckdb/src/execution/operator/aggregate/physical_window.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
Lines changed: 2 additions & 2 deletions b/‎src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
Lines changed: 2 additions & 1 deletion b/‎src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
Lines changed: 50 additions & 17 deletions b/‎src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
Lines changed: 50 additions & 17 deletions
diff --git a/‎src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
Lines changed: 10 additions & 8 deletions b/‎src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
Lines changed: 10 additions & 8 deletions
diff --git a/‎src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
Lines changed: 1 addition & 1 deletion b/‎src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
Lines changed: 12 additions & 6 deletions b/‎src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
Lines changed: 12 additions & 6 deletions
diff --git a/‎src/duckdb/src/execution/sample/reservoir_sample.cpp
Lines changed: 1 addition & 1 deletion b/‎src/duckdb/src/execution/sample/reservoir_sample.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/duckdb/src/function/table/read_csv.cpp
Lines changed: 3 additions & 1 deletion b/‎src/duckdb/src/function/table/read_csv.cpp
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/duckdb/src/function/table/system/duckdb_memory.cpp
Lines changed: 0 additions & 1 deletion b/‎src/duckdb/src/function/table/system/duckdb_memory.cpp
Lines changed: 0 additions & 1 deletion
@@ -1,3 +1,4 @@
 build
 .idea
+*.iml
 cmake-build-debug
@@ -9,7 +9,8 @@
 namespace duckdb {
 
 struct RandomLocalState : public FunctionLocalState {
-	explicit RandomLocalState(uint32_t seed) : random_engine(seed) {
+	explicit RandomLocalState(uint64_t seed) : random_engine(0) {
+		random_engine.SetSeed(seed);
 	}
 
 	RandomEngine random_engine;
@@ -30,7 +31,7 @@ static unique_ptr<FunctionLocalState> RandomInitLocalState(ExpressionState &stat
                                                            FunctionData *bind_data) {
 	auto &random_engine = RandomEngine::Get(state.GetContext());
 	lock_guard<mutex> guard(random_engine.lock);
-	return make_uniq<RandomLocalState>(random_engine.NextRandomInteger());
+	return make_uniq<RandomLocalState>(random_engine.NextRandomInteger64());
 }
 
 ScalarFunction RandomFun::GetFunction() {
 
@@ -626,6 +626,8 @@ static double CalculateTypeSimilarity(const LogicalType &merged, const LogicalTy
 			// This can happen for empty structs/maps ("{}"), or in rare cases where an inconsistent struct becomes
 			// consistent when merged, but does not have enough children to be considered a map.
 			return CalculateMapAndStructSimilarity(type, merged, true, max_depth, depth);
+		} else if (type.id() != LogicalTypeId::STRUCT) {
+			return -1;
 		}
 
 		// Only structs can be merged into a struct
 
@@ -172,6 +172,15 @@ AdbcStatusCode StatementSetSubstraitPlan(struct AdbcStatement *statement, const
 		return ADBC_STATUS_INVALID_ARGUMENT;
 	}
 	auto wrapper = static_cast<DuckDBAdbcStatementWrapper *>(statement->private_data);
+	if (wrapper->ingestion_stream.release) {
+		// Release any resources currently held by the ingestion stream before we overwrite it
+		wrapper->ingestion_stream.release(&wrapper->ingestion_stream);
+		wrapper->ingestion_stream.release = nullptr;
+	}
+	if (wrapper->statement) {
+		duckdb_destroy_prepare(&wrapper->statement);
+		wrapper->statement = nullptr;
+	}
 	wrapper->substrait_plan = static_cast<uint8_t *>(malloc(sizeof(uint8_t) * length));
 	wrapper->plan_length = length;
 	memcpy(wrapper->substrait_plan, plan, length);
@@ -912,6 +921,15 @@ AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *statement, const char
 	}
 
 	auto wrapper = static_cast<DuckDBAdbcStatementWrapper *>(statement->private_data);
+	if (wrapper->ingestion_stream.release) {
+		// Release any resources currently held by the ingestion stream before we overwrite it
+		wrapper->ingestion_stream.release(&wrapper->ingestion_stream);
+		wrapper->ingestion_stream.release = nullptr;
+	}
+	if (wrapper->statement) {
+		duckdb_destroy_prepare(&wrapper->statement);
+		wrapper->statement = nullptr;
+	}
 	auto res = duckdb_prepare(wrapper->connection, query, &wrapper->statement);
 	auto error_msg = duckdb_prepare_error(wrapper->statement);
 	return CheckResult(res, error, error_msg);
 
@@ -42,19 +42,22 @@ FileBuffer::~FileBuffer() {
 	allocator.FreeData(internal_buffer, internal_size);
 }
 
-void FileBuffer::ReallocBuffer(size_t new_size) {
+void FileBuffer::ReallocBuffer(idx_t new_size) {
 	data_ptr_t new_buffer;
 	if (internal_buffer) {
 		new_buffer = allocator.ReallocateData(internal_buffer, internal_size, new_size);
 	} else {
 		new_buffer = allocator.AllocateData(new_size);
 	}
+
+	// FIXME: should we throw one of our exceptions here?
 	if (!new_buffer) {
 		throw std::bad_alloc();
 	}
 	internal_buffer = new_buffer;
 	internal_size = new_size;
-	// Caller must update these.
+
+	// The caller must update these.
 	buffer = nullptr;
 	size = 0;
 }
 
@@ -59,7 +59,7 @@ uint32_t RandomEngine::NextRandomInteger32(uint32_t min, uint32_t max) {
 	return min + static_cast<uint32_t>(NextRandom32() * double(max - min));
 }
 
-void RandomEngine::SetSeed(uint32_t seed) {
+void RandomEngine::SetSeed(uint64_t seed) {
 	random_state->pcg.seed(seed);
 }
 
 
@@ -1528,18 +1528,28 @@ bool JoinHashTable::PrepareExternalFinalize(const idx_t max_ht_size) {
 
 	// Create vector with unfinished partition indices
 	auto &partitions = sink_collection->GetPartitions();
+	auto min_partition_size = NumericLimits<idx_t>::Maximum();
 	vector<idx_t> partition_indices;
 	partition_indices.reserve(num_partitions);
 	for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
-		if (!completed_partitions.RowIsValidUnsafe(partition_idx)) {
-			partition_indices.push_back(partition_idx);
+		if (completed_partitions.RowIsValidUnsafe(partition_idx)) {
+			continue;
 		}
+		partition_indices.push_back(partition_idx);
+		// Keep track of min partition size
+		const auto size =
+		    partitions[partition_idx]->SizeInBytes() + PointerTableSize(partitions[partition_idx]->Count());
+		min_partition_size = MinValue(min_partition_size, size);
 	}
+
 	// Sort partitions by size, from small to large
-	std::sort(partition_indices.begin(), partition_indices.end(), [&](const idx_t &lhs, const idx_t &rhs) {
+	std::stable_sort(partition_indices.begin(), partition_indices.end(), [&](const idx_t &lhs, const idx_t &rhs) {
 		const auto lhs_size = partitions[lhs]->SizeInBytes() + PointerTableSize(partitions[lhs]->Count());
 		const auto rhs_size = partitions[rhs]->SizeInBytes() + PointerTableSize(partitions[rhs]->Count());
-		return lhs_size < rhs_size;
+		// We divide by min_partition_size, effectively rouding everything down to a multiple of min_partition_size
+		// Makes it so minor differences in partition sizes don't mess up the original order
+		// Retaining as much of the original order as possible reduces I/O (partition idx determines eviction queue idx)
+		return lhs_size / min_partition_size < rhs_size / min_partition_size;
 	});
 
 	// Determine which partitions should go next
 
@@ -2,7 +2,6 @@
 
 #include "duckdb/common/sort/partition_state.hpp"
 #include "duckdb/function/window/window_aggregate_function.hpp"
-#include "duckdb/function/window/window_cumedist_function.hpp"
 #include "duckdb/function/window/window_executor.hpp"
 #include "duckdb/function/window/window_rank_function.hpp"
 #include "duckdb/function/window/window_rownumber_function.hpp"
 
@@ -133,7 +133,7 @@ AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
 	vector<HeaderValue> potential_header;
 	for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
 		auto &cur_vector = data_chunk.data[col_idx];
-		auto vector_data = FlatVector::GetData<string_t>(cur_vector);
+		const auto vector_data = FlatVector::GetData<string_t>(cur_vector);
 		auto &validity = FlatVector::Validity(cur_vector);
 		HeaderValue val;
 		if (validity.RowIsValid(0)) {
@@ -181,7 +181,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 	return min_sniff_res.ToSnifferResult();
 }
 
-SnifferResult CSVSniffer::SniffCSV(bool force_match) {
+SnifferResult CSVSniffer::SniffCSV(const bool force_match) {
 	buffer_manager->sniffing = true;
 	// 1. Dialect Detection
 	DetectDialect();
 
@@ -362,7 +362,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	    (single_column_before || ((more_values || more_columns) && !require_more_padding) ||
 	     (more_than_one_column && require_less_padding) || quoted) &&
 	    !invalid_padding && comments_are_acceptable) {
-		if (!candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size()) {
+		if (!candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size() &&
+		    consistent_rows <= best_consistent_rows) {
 			// We have a candidate that fits our requirements better
 			if (candidates.front()->ever_quoted || !scanner->ever_quoted) {
 				return;
 
@@ -17,8 +17,8 @@ static string GenerateColumnName(const idx_t total_cols, const idx_t col_number,
 // Helper function for UTF-8 aware space trimming
 static string TrimWhitespace(const string &col_name) {
 	utf8proc_int32_t codepoint;
-	auto str = reinterpret_cast<const utf8proc_uint8_t *>(col_name.c_str());
-	idx_t size = col_name.size();
+	const auto str = reinterpret_cast<const utf8proc_uint8_t *>(col_name.c_str());
+	const idx_t size = col_name.size();
 	// Find the first character that is not left trimmed
 	idx_t begin = 0;
 	while (begin < size) {
@@ -96,6 +96,44 @@ static string NormalizeColumnName(const string &col_name) {
 	return col_name_cleaned;
 }
 
+static void ReplaceNames(vector<string> &detected_names, CSVStateMachine &state_machine,
+                         unordered_map<idx_t, vector<LogicalType>> &best_sql_types_candidates_per_column_idx,
+                         CSVReaderOptions &options, const vector<HeaderValue> &best_header_row,
+                         CSVErrorHandler &error_handler) {
+	auto &dialect_options = state_machine.dialect_options;
+	if (!options.columns_set) {
+		if (options.file_options.hive_partitioning || options.file_options.union_by_name || options.multi_file_reader) {
+			// Just do the replacement
+			for (idx_t i = 0; i < MinValue<idx_t>(detected_names.size(), options.name_list.size()); i++) {
+				detected_names[i] = options.name_list[i];
+			}
+			return;
+		}
+		if (options.name_list.size() > dialect_options.num_cols) {
+			if (options.null_padding) {
+				// we increase our types
+				idx_t col = 0;
+				for (idx_t i = dialect_options.num_cols; i < options.name_list.size(); i++) {
+					detected_names.push_back(GenerateColumnName(options.name_list.size(), col++));
+					best_sql_types_candidates_per_column_idx[i] = {LogicalType::VARCHAR};
+				}
+
+				dialect_options.num_cols = options.name_list.size();
+
+			} else {
+				// we throw an error
+				const auto error = CSVError::HeaderSniffingError(
+				    options, best_header_row, options.name_list.size(),
+				    state_machine.dialect_options.state_machine_options.delimiter.GetValue());
+				error_handler.Error(error);
+			}
+		}
+		for (idx_t i = 0; i < options.name_list.size(); i++) {
+			detected_names[i] = options.name_list[i];
+		}
+	}
+}
+
 // If our columns were set by the user, we verify if their names match with the first row
 bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<HeaderValue> &best_header_row,
                                            const SetColumns &set_columns, CSVReaderOptions &options) {
@@ -181,11 +219,8 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
 			detected_names.push_back(GenerateColumnName(dialect_options.num_cols, col));
 		}
 		// If the user provided names, we must replace our header with the user provided names
-		if (!options.columns_set) {
-			for (idx_t i = 0; i < MinValue<idx_t>(best_header_row.size(), options.name_list.size()); i++) {
-				detected_names[i] = options.name_list[i];
-			}
-		}
+		ReplaceNames(detected_names, state_machine, best_sql_types_candidates_per_column_idx, options, best_header_row,
+		             error_handler);
 		return detected_names;
 	}
 	// information for header detection
@@ -199,11 +234,8 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
 				detected_names.push_back(GenerateColumnName(dialect_options.num_cols, col));
 			}
 			dialect_options.rows_until_header += 1;
-			if (!options.columns_set) {
-				for (idx_t i = 0; i < MinValue<idx_t>(detected_names.size(), options.name_list.size()); i++) {
-					detected_names[i] = options.name_list[i];
-				}
-			}
+			ReplaceNames(detected_names, state_machine, best_sql_types_candidates_per_column_idx, options,
+			             best_header_row, error_handler);
 			return detected_names;
 		}
 		auto error =
@@ -295,16 +327,17 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
 	}
 
 	// If the user provided names, we must replace our header with the user provided names
-	if (!options.columns_set) {
-		for (idx_t i = 0; i < MinValue<idx_t>(detected_names.size(), options.name_list.size()); i++) {
-			detected_names[i] = options.name_list[i];
-		}
-	}
+	ReplaceNames(detected_names, state_machine, best_sql_types_candidates_per_column_idx, options, best_header_row,
+	             error_handler);
 	return detected_names;
 }
 void CSVSniffer::DetectHeader() {
 	auto &sniffer_state_machine = best_candidate->GetStateMachine();
 	names = DetectHeaderInternal(buffer_manager->context, best_header_row, sniffer_state_machine, set_columns,
 	                             best_sql_types_candidates_per_column_idx, options, *error_handler);
+	for (idx_t i = max_columns_found; i < names.size(); i++) {
+		detected_types.push_back(LogicalType::VARCHAR);
+	}
+	max_columns_found = names.size();
 }
 } // namespace duckdb
@@ -57,14 +57,14 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 
 	const bool multi_byte_delimiter = delimiter_value.size() != 1;
 
-	bool enable_unquoted_escape = state_machine_options.rfc_4180.GetValue() == false &&
-	                              state_machine_options.quote != state_machine_options.escape &&
-	                              state_machine_options.escape != '\0';
+	const bool enable_unquoted_escape = state_machine_options.rfc_4180.GetValue() == false &&
+	                                    state_machine_options.quote != state_machine_options.escape &&
+	                                    state_machine_options.escape != '\0';
 	// Now set values depending on configuration
 	// 1) Standard/Invalid State
-	vector<uint8_t> std_inv {static_cast<uint8_t>(CSVState::STANDARD), static_cast<uint8_t>(CSVState::INVALID),
-	                         static_cast<uint8_t>(CSVState::STANDARD_NEWLINE)};
-	for (auto &state : std_inv) {
+	const vector<uint8_t> std_inv {static_cast<uint8_t>(CSVState::STANDARD), static_cast<uint8_t>(CSVState::INVALID),
+	                               static_cast<uint8_t>(CSVState::STANDARD_NEWLINE)};
+	for (const auto &state : std_inv) {
 		if (multi_byte_delimiter) {
 			transition_array[delimiter_first_byte][state] = CSVState::DELIMITER_FIRST_BYTE;
 		} else {
@@ -75,7 +75,9 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 			if (state == static_cast<uint8_t>(CSVState::STANDARD_NEWLINE)) {
 				transition_array[static_cast<uint8_t>('\n')][state] = CSVState::STANDARD;
 			} else {
-				transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
+				if (!state_machine_options.rfc_4180.GetValue()) {
+					transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
+				}
 			}
 		} else {
 			transition_array[static_cast<uint8_t>('\r')][state] = CSVState::RECORD_SEPARATOR;
@@ -96,7 +98,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
 		transition_array[' '][static_cast<uint8_t>(CSVState::DELIMITER)] = CSVState::EMPTY_SPACE;
 	}
 
-	vector<uint8_t> delimiter_states {
+	const vector<uint8_t> delimiter_states {
 	    static_cast<uint8_t>(CSVState::DELIMITER), static_cast<uint8_t>(CSVState::DELIMITER_FIRST_BYTE),
 	    static_cast<uint8_t>(CSVState::DELIMITER_SECOND_BYTE), static_cast<uint8_t>(CSVState::DELIMITER_THIRD_BYTE)};
 
 
@@ -60,7 +60,7 @@ void CSVFileScan::SetStart() {
 }
 
 CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, const CSVReaderOptions &options_p,
-                         const idx_t file_idx_p, const ReadCSVData &bind_data, const vector<ColumnIndex> &column_ids,
+                         idx_t file_idx_p, const ReadCSVData &bind_data, const vector<ColumnIndex> &column_ids,
                          CSVSchema &file_schema, bool per_file_single_threaded)
     : file_path(file_path_p), file_idx(file_idx_p),
       error_handler(make_shared_ptr<CSVErrorHandler>(options_p.ignore_errors.GetValue())), options(options_p) {
 
@@ -87,6 +87,15 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 		                 previous_scanner->GetValidationLine());
 	}
 	if (single_threaded) {
+		{
+			lock_guard<mutex> parallel_lock(main_mutex);
+			if (previous_scanner) {
+				// Cleanup previous scanner.
+				previous_scanner->buffer_tracker.reset();
+				current_buffer_in_use.reset();
+				previous_scanner->csv_file_scan->Finish();
+			}
+		}
 		idx_t cur_idx;
 		bool empty_file = false;
 		do {
@@ -108,6 +117,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 			auto file_scan = make_shared_ptr<CSVFileScan>(context, bind_data.files[cur_idx], bind_data.options, cur_idx,
 			                                              bind_data, column_ids, file_schema, true);
 			empty_file = file_scan->file_size == 0;
+
 			if (!empty_file) {
 				lock_guard<mutex> parallel_lock(main_mutex);
 				file_scans.emplace_back(std::move(file_scan));
@@ -116,11 +126,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 				current_boundary.SetCurrentBoundaryToPosition(single_threaded);
 				current_buffer_in_use = make_shared_ptr<CSVBufferUsage>(*file_scans.back()->buffer_manager,
 				                                                        current_boundary.GetBufferIdx());
-				if (previous_scanner) {
-					previous_scanner->buffer_tracker.reset();
-					current_buffer_in_use.reset();
-					previous_scanner->csv_file_scan->Finish();
-				}
+
 				return make_uniq<StringValueScanner>(scanner_idx++, current_file->buffer_manager,
 				                                     current_file->state_machine, current_file->error_handler,
 				                                     current_file, false, current_boundary);
@@ -178,7 +184,7 @@ unique_ptr<StringValueScanner> CSVGlobalState::Next(optional_ptr<StringValueScan
 
 idx_t CSVGlobalState::MaxThreads() const {
 	// We initialize max one thread per our set bytes per thread limit
-	if (single_threaded) {
+	if (single_threaded || !file_scans.front()->on_disk_file) {
 		return system_threads;
 	}
 	idx_t total_threads = file_scans.front()->file_size / CSVIterator::BYTES_PER_THREAD + 1;
 
@@ -505,7 +505,7 @@ void ReservoirSample::EvictOverBudgetSamples() {
 	D_ASSERT(num_samples_to_keep <= sample_count);
 	D_ASSERT(stats_sample);
 	D_ASSERT(sample_count == FIXED_SAMPLE_SIZE);
-	auto new_reservoir_chunk = CreateNewSampleChunk(types, FIXED_SAMPLE_SIZE);
+	auto new_reservoir_chunk = CreateNewSampleChunk(types, sample_count);
 
 	// The current selection vector can potentially have 2048 valid mappings.
 	// If we need to save a sample with less rows than that, we need to do the following
 
@@ -126,7 +126,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	auto &options = result->options;
 	auto multi_file_reader = MultiFileReader::Create(input.table_function);
 	auto multi_file_list = multi_file_reader->CreateFileList(context, input.inputs[0]);
-
+	if (multi_file_list->GetTotalFileCount() > 1) {
+		options.multi_file_reader = true;
+	}
 	options.FromNamedParameters(input.named_parameters, context);
 
 	options.file_options.AutoDetectHivePartitioning(*multi_file_list, context);
 
@@ -27,7 +27,6 @@ static unique_ptr<FunctionData> DuckDBMemoryBind(ClientContext &context, TableFu
 
 unique_ptr<GlobalTableFunctionState> DuckDBMemoryInit(ClientContext &context, TableFunctionInitInput &input) {
 	auto result = make_uniq<DuckDBMemoryData>();
-
 	result->entries = BufferManager::GetBufferManager(context).GetMemoryUsageInfo();
 	return std::move(result);
 }
-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 build
 .idea
 +*.iml
 cmake-build-debug
Original file line number	Diff line number	Diff line change
`@@ -626,6 +626,8 @@ static double CalculateTypeSimilarity(const LogicalType &merged, const LogicalTy`
`626`	`626`	`// This can happen for empty structs/maps ("{}"), or in rare cases where an inconsistent struct becomes`
`627`	`627`	`// consistent when merged, but does not have enough children to be considered a map.`
`628`	`628`	`return CalculateMapAndStructSimilarity(type, merged, true, max_depth, depth);`
	`629`	`+ } else if (type.id() != LogicalTypeId::STRUCT) {`
	`630`	`+ return -1;`
`629`	`631`	`}`
`630`	`632`
`631`	`633`	`// Only structs can be merged into a struct`
Original file line number	Diff line number	Diff line change
`@@ -42,19 +42,22 @@ FileBuffer::~FileBuffer() {`
`42`	`42`	`allocator.FreeData(internal_buffer, internal_size);`
`43`	`43`	`}`
`44`	`44`
`45`		`-void FileBuffer::ReallocBuffer(size_t new_size) {`
	`45`	`+void FileBuffer::ReallocBuffer(idx_t new_size) {`
`46`	`46`	`data_ptr_t new_buffer;`
`47`	`47`	`if (internal_buffer) {`
`48`	`48`	`new_buffer = allocator.ReallocateData(internal_buffer, internal_size, new_size);`
`49`	`49`	`} else {`
`50`	`50`	`new_buffer = allocator.AllocateData(new_size);`
`51`	`51`	`}`
	`52`	`+`
	`53`	`+ // FIXME: should we throw one of our exceptions here?`
`52`	`54`	`if (!new_buffer) {`
`53`	`55`	`throw std::bad_alloc();`
`54`	`56`	`}`
`55`	`57`	`internal_buffer = new_buffer;`
`56`	`58`	`internal_size = new_size;`
`57`		`- // Caller must update these.`
	`59`	`+`
	`60`	`+ // The caller must update these.`
`58`	`61`	`buffer = nullptr;`
`59`	`62`	`size = 0;`
`60`	`63`	`}`
Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ uint32_t RandomEngine::NextRandomInteger32(uint32_t min, uint32_t max) {`
`59`	`59`	`return min + static_cast<uint32_t>(NextRandom32() * double(max - min));`
`60`	`60`	`}`
`61`	`61`
`62`		`-void RandomEngine::SetSeed(uint32_t seed) {`
	`62`	`+void RandomEngine::SetSeed(uint64_t seed) {`
`63`	`63`	`random_state->pcg.seed(seed);`
`64`	`64`	`}`
`65`	`65`
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ void CSVFileScan::SetStart() {`
`60`	`60`	`}`
`61`	`61`
`62`	`62`	`CSVFileScan::CSVFileScan(ClientContext &context, const string &file_path_p, const CSVReaderOptions &options_p,`
`63`		`- const idx_t file_idx_p, const ReadCSVData &bind_data, const vector<ColumnIndex> &column_ids,`
	`63`	`+ idx_t file_idx_p, const ReadCSVData &bind_data, const vector<ColumnIndex> &column_ids,`
`64`	`64`	`CSVSchema &file_schema, bool per_file_single_threaded)`
`65`	`65`	`: file_path(file_path_p), file_idx(file_idx_p),`
`66`	`66`	`error_handler(make_shared_ptr<CSVErrorHandler>(options_p.ignore_errors.GetValue())), options(options_p) {`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,6 @@ static unique_ptr<FunctionData> DuckDBMemoryBind(ClientContext &context, TableFu`
`27`	`27`
`28`	`28`	`unique_ptr<GlobalTableFunctionState> DuckDBMemoryInit(ClientContext &context, TableFunctionInitInput &input) {`
`29`	`29`	`auto result = make_uniq<DuckDBMemoryData>();`
`30`		`-`
`31`	`30`	`result->entries = BufferManager::GetBufferManager(context).GetMemoryUsageInfo();`
`32`	`31`	`return std::move(result);`
`33`	`32`	`}`