Skip to content

Commit 2dd141e

Browse files
Fall 2024 Project 3 (#762)
* feat(p3): introduce external merge sort & remove several executors * new line at eof * refactor OrderBy * introduce external merge sort executor * introduce tuple comparator * add comments for 2-way merge sort requirement * make sort plan node format as external merge sort * fix lint * sync private for test sort * sync private * update submission files * update p4 submission files * initializer -> constructor * update tests and shared ptr for IndexInfo and TableInfo * update TxnMgrDbg calls * fix format errors * sync private * sync private * sync private * redistribute points * rename leaderboard test file --------- Co-authored-by: Yash Kothari <[email protected]>
1 parent 1f1a8a0 commit 2dd141e

34 files changed

+459
-211
lines changed

CMakeLists.txt

+6-12
Original file line numberDiff line numberDiff line change
@@ -324,37 +324,33 @@ set(P3_FILES
324324
"src/include/execution/executors/index_scan_executor.h"
325325
"src/include/execution/executors/insert_executor.h"
326326
"src/include/execution/executors/limit_executor.h"
327+
"src/include/execution/executors/nested_index_join_executor.h"
327328
"src/include/execution/executors/nested_loop_join_executor.h"
328329
"src/include/execution/executors/seq_scan_executor.h"
329-
"src/include/execution/executors/sort_executor.h"
330-
"src/include/execution/executors/topn_executor.h"
331-
"src/include/execution/executors/topn_per_group_executor.h"
330+
"src/include/execution/executors/external_merge_sort_executor.h"
332331
"src/include/execution/executors/update_executor.h"
333-
"src/include/execution/executors/window_function_executor.h"
334332
"src/execution/aggregation_executor.cpp"
335-
"src/execution/window_function_executor.cpp"
336333
"src/execution/delete_executor.cpp"
337334
"src/execution/filter_executor.cpp"
338335
"src/execution/hash_join_executor.cpp"
339336
"src/execution/index_scan_executor.cpp"
340337
"src/execution/insert_executor.cpp"
341338
"src/execution/limit_executor.cpp"
339+
"src/execution/nested_index_join_executor.cpp"
342340
"src/execution/nested_loop_join_executor.cpp"
343341
"src/execution/seq_scan_executor.cpp"
344-
"src/execution/sort_executor.cpp"
345-
"src/execution/topn_executor.cpp"
346-
"src/execution/topn_per_group_executor.cpp"
342+
"src/execution/external_merge_sort_executor.cpp"
347343
"src/execution/update_executor.cpp"
344+
"src/include/execution/execution_common.h"
348345
"src/include/optimizer/optimizer.h"
349346
"src/include/optimizer/optimizer_internal.h"
347+
"src/execution/execution_common.cpp"
350348
"src/optimizer/nlj_as_hash_join.cpp"
351349
"src/optimizer/optimizer_custom_rules.cpp"
352-
"src/optimizer/sort_limit_as_topn.cpp"
353350
"src/optimizer/optimizer_internal.cpp"
354351
"src/optimizer/seqscan_as_indexscan.cpp"
355352
"src/optimizer/column_pruning.cpp"
356353
"src/common/bustub_ddl.cpp"
357-
"src/include/execution/plans/topn_per_group_plan.h"
358354
${P2_FILES}
359355
)
360356

@@ -375,8 +371,6 @@ set(P4_FILES
375371
"src/concurrency/transaction_manager.cpp"
376372
"src/include/concurrency/watermark.h"
377373
"src/concurrency/watermark.cpp"
378-
"src/include/execution/execution_common.h"
379-
"src/execution/execution_common.cpp"
380374
${P3_FILES}
381375
)
382376

src/catalog/table_generator.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ auto TableGenerator::MakeValues(ColumnInsertMeta *col_meta, uint32_t count) -> s
6262
}
6363
}
6464

65-
void TableGenerator::FillTable(TableInfo *info, TableInsertMeta *table_meta) {
65+
void TableGenerator::FillTable(const std::shared_ptr<TableInfo> &info, TableInsertMeta *table_meta) {
6666
uint32_t num_inserted = 0;
6767
uint32_t batch_size = 128;
6868
while (num_inserted < table_meta->num_rows_) {

src/common/bustub_ddl.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ namespace bustub {
4545
void BusTubInstance::HandleCreateStatement(Transaction *txn, const CreateStatement &stmt, ResultWriter &writer) {
4646
std::unique_lock<std::shared_mutex> l(catalog_lock_);
4747
auto info = catalog_->CreateTable(txn, stmt.table_, Schema(stmt.columns_));
48-
IndexInfo *index = nullptr;
48+
std::shared_ptr<IndexInfo> index = nullptr;
4949
if (!stmt.primary_key_.empty()) {
5050
std::vector<uint32_t> col_ids;
5151
for (const auto &col : stmt.primary_key_) {
@@ -106,7 +106,7 @@ void BusTubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
106106
}
107107

108108
std::unique_lock<std::shared_mutex> l(catalog_lock_);
109-
IndexInfo *info = nullptr;
109+
std::shared_ptr<IndexInfo> info = nullptr;
110110

111111
if (stmt.index_type_.empty()) {
112112
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(

src/common/bustub_instance.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ void BusTubInstance::CmdDbgMvcc(const std::vector<std::string> &params, ResultWr
162162
writer.OneCell("table " + table + " not found");
163163
return;
164164
}
165-
TxnMgrDbg("\\dbgmvcc", txn_manager_.get(), table_info, table_info->table_.get());
165+
TxnMgrDbg("\\dbgmvcc", txn_manager_.get(), table_info.get(), table_info->table_.get());
166166
}
167167

168168
void BusTubInstance::CmdDisplayTables(ResultWriter &writer) {
@@ -175,7 +175,7 @@ void BusTubInstance::CmdDisplayTables(ResultWriter &writer) {
175175
writer.EndHeader();
176176
for (const auto &name : table_names) {
177177
writer.BeginRow();
178-
const auto *table_info = catalog_->GetTable(name);
178+
const auto table_info = catalog_->GetTable(name);
179179
writer.WriteCell(fmt::format("{}", table_info->oid_));
180180
writer.WriteCell(table_info->name_);
181181
writer.WriteCell(table_info->schema_.ToString());
@@ -194,7 +194,7 @@ void BusTubInstance::CmdDisplayIndices(ResultWriter &writer) {
194194
writer.WriteHeaderCell("index_cols");
195195
writer.EndHeader();
196196
for (const auto &table_name : table_names) {
197-
for (const auto *index_info : catalog_->GetTableIndexes(table_name)) {
197+
for (const auto &index_info : catalog_->GetTableIndexes(table_name)) {
198198
writer.BeginRow();
199199
writer.WriteCell(table_name);
200200
writer.WriteCell(fmt::format("{}", index_info->index_oid_));

src/execution/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_library(
44
aggregation_executor.cpp
55
delete_executor.cpp
66
execution_common.cpp
7+
external_merge_sort_executor.cpp
78
executor_factory.cpp
89
filter_executor.cpp
910
fmt_impl.cpp

src/execution/execution_common.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,19 @@
1010

1111
namespace bustub {
1212

13+
TupleComparator::TupleComparator(std::vector<OrderBy> order_bys) : order_bys_(std::move(order_bys)) {}
14+
15+
auto TupleComparator::operator()(const SortEntry &entry_a, const SortEntry &entry_b) const -> bool { return false; }
16+
17+
auto GenerateSortKey(const Tuple &tuple, const std::vector<OrderBy> &order_bys, const Schema &schema) -> SortKey {
18+
return {};
19+
}
20+
21+
/**
22+
* Above are all you need for P3.
23+
* You can ignore the remaining part of this file until P4.
24+
*/
25+
1326
auto ReconstructTuple(const Schema *schema, const Tuple &base_tuple, const TupleMeta &base_meta,
1427
const std::vector<UndoLog> &undo_logs) -> std::optional<Tuple> {
1528
UNIMPLEMENTED("not implemented");

src/execution/executor_factory.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "execution/executors/abstract_executor.h"
1919
#include "execution/executors/aggregation_executor.h"
2020
#include "execution/executors/delete_executor.h"
21+
#include "execution/executors/external_merge_sort_executor.h"
2122
#include "execution/executors/filter_executor.h"
2223
#include "execution/executors/hash_join_executor.h"
2324
#include "execution/executors/index_scan_executor.h"
@@ -166,7 +167,7 @@ auto ExecutorFactory::CreateExecutor(ExecutorContext *exec_ctx, const AbstractPl
166167
case PlanType::Sort: {
167168
const auto *sort_plan = dynamic_cast<const SortPlanNode *>(plan.get());
168169
auto child = ExecutorFactory::CreateExecutor(exec_ctx, sort_plan->GetChildPlan());
169-
return std::make_unique<SortExecutor>(exec_ctx, sort_plan, std::move(child));
170+
return std::make_unique<ExternalMergeSortExecutor<2>>(exec_ctx, sort_plan, std::move(child));
170171
}
171172

172173
// Create a new topN executor
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// BusTub
4+
//
5+
// external_merge_sort_executor.cpp
6+
//
7+
// Identification: src/execution/external_merge_sort_executor.cpp
8+
//
9+
// Copyright (c) 2015-2024, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "execution/executors/external_merge_sort_executor.h"
14+
#include <iostream>
15+
#include <optional>
16+
#include <vector>
17+
#include "common/config.h"
18+
#include "execution/plans/sort_plan.h"
19+
20+
namespace bustub {
21+
22+
template <size_t K>
23+
ExternalMergeSortExecutor<K>::ExternalMergeSortExecutor(ExecutorContext *exec_ctx, const SortPlanNode *plan,
24+
std::unique_ptr<AbstractExecutor> &&child_executor)
25+
: AbstractExecutor(exec_ctx), cmp_(plan->GetOrderBy()) {}
26+
27+
template <size_t K>
28+
void ExternalMergeSortExecutor<K>::Init() {
29+
throw NotImplementedException("ExternalMergeSortExecutor is not implemented");
30+
}
31+
32+
template <size_t K>
33+
auto ExternalMergeSortExecutor<K>::Next(Tuple *tuple, RID *rid) -> bool {
34+
return false;
35+
}
36+
37+
template class ExternalMergeSortExecutor<2>;
38+
39+
} // namespace bustub

src/execution/fmt_impl.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ auto UpdatePlanNode::PlanNodeToString() const -> std::string {
7272
}
7373

7474
auto SortPlanNode::PlanNodeToString() const -> std::string {
75-
return fmt::format("Sort {{ order_bys={} }}", order_bys_);
75+
// Note(f24): A sort plan node will be converted to an external merge sort executor in
76+
// Fall 2024. So `ExternalMergeSort` is returned instead of `Sort`.
77+
return fmt::format("ExternalMergeSort {{ order_bys={} }}", order_bys_);
7678
}
7779

7880
auto LimitPlanNode::PlanNodeToString() const -> std::string { return fmt::format("Limit {{ limit={} }}", limit_); }

src/execution/hash_join_executor.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ HashJoinExecutor::HashJoinExecutor(ExecutorContext *exec_ctx, const HashJoinPlan
1919
std::unique_ptr<AbstractExecutor> &&right_child)
2020
: AbstractExecutor(exec_ctx) {
2121
if (!(plan->GetJoinType() == JoinType::LEFT || plan->GetJoinType() == JoinType::INNER)) {
22-
// Note for 2023 Fall: You ONLY need to implement left join and inner join.
22+
// Note for Fall 2024: You ONLY need to implement left join and inner join.
2323
throw bustub::NotImplementedException(fmt::format("join type {} not supported", plan->GetJoinType()));
2424
}
2525
}

src/include/binder/bound_order_by.h

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "binder/bound_expression.h"
1616
#include "common/exception.h"
17+
#include "execution/expressions/abstract_expression.h"
1718
#include "fmt/format.h"
1819

1920
namespace bustub {
@@ -28,6 +29,8 @@ enum class OrderByType : uint8_t {
2829
DESC = 3, /**< Descending order by type. */
2930
};
3031

32+
using OrderBy = std::pair<OrderByType, AbstractExpressionRef>;
33+
3134
/**
3235
* BoundOrderBy is an item in the ORDER BY clause.
3336
*/

0 commit comments

Comments
 (0)