Skip to content

Commit 1887f6e

Browse files
xx01cyxxueqili02
authored andcommitted
make disk manager actually delete page (#785)
1 parent 01a64ff commit 1887f6e

File tree

6 files changed

+298
-254
lines changed

6 files changed

+298
-254
lines changed

src/include/storage/disk/disk_manager.h

+37-43
Original file line numberDiff line numberDiff line change
@@ -18,42 +18,33 @@
1818
#include <future> // NOLINT
1919
#include <mutex> // NOLINT
2020
#include <string>
21+
#include <unordered_map>
22+
#include <vector>
2123

2224
#include "common/config.h"
25+
#include "common/logger.h"
2326

2427
namespace bustub {
2528

2629
/**
2730
* DiskManager takes care of the allocation and deallocation of pages within a database. It performs the reading and
2831
* writing of pages to and from disk, providing a logical file layer within the context of a database management system.
32+
*
33+
* DiskManager uses lazy allocation, meaning that it only allocates space on disk when it is first accessed. It
34+
* maintains a mapping of page ids to their corresponding offsets in the database file. When a page is deleted, it is
35+
* marked as free and can be reused by future allocations.
2936
*/
3037
class DiskManager {
3138
public:
32-
/**
33-
* Creates a new disk manager that writes to the specified database file.
34-
* @param db_file the file name of the database file to write to
35-
*/
3639
explicit DiskManager(const std::filesystem::path &db_file);
3740

3841
/** FOR TEST / LEADERBOARD ONLY, used by DiskManagerMemory */
3942
DiskManager() = default;
4043

4144
virtual ~DiskManager() = default;
4245

43-
/**
44-
* Shut down the disk manager and close all the file resources.
45-
*/
4646
void ShutDown();
4747

48-
/**
49-
* @brief Increases the size of the database file.
50-
*
51-
* This function works like a dynamic array, where the capacity is doubled until all pages can fit.
52-
*
53-
* @param pages The number of pages the caller wants the file used for storage to support.
54-
*/
55-
virtual void IncreaseDiskSpace(size_t pages);
56-
5748
/**
5849
* Write a page to the database file.
5950
* @param page_id id of the page
@@ -74,32 +65,16 @@ class DiskManager {
7465
*/
7566
virtual void DeletePage(page_id_t page_id);
7667

77-
/**
78-
* Flush the entire log buffer into disk.
79-
* @param log_data raw log data
80-
* @param size size of log entry
81-
*/
8268
void WriteLog(char *log_data, int size);
8369

84-
/**
85-
* Read a log entry from the log file.
86-
* @param[out] log_data output buffer
87-
* @param size size of the log entry
88-
* @param offset offset of the log entry in the file
89-
* @return true if the read was successful, false otherwise
90-
*/
9170
auto ReadLog(char *log_data, int size, int offset) -> bool;
9271

93-
/** @return the number of disk flushes */
9472
auto GetNumFlushes() const -> int;
9573

96-
/** @return true iff the in-memory content has not been flushed yet */
9774
auto GetFlushState() const -> bool;
9875

99-
/** @return the number of disk writes */
10076
auto GetNumWrites() const -> int;
10177

102-
/** @return the number of deletions */
10378
auto GetNumDeletes() const -> int;
10479

10580
/**
@@ -112,28 +87,47 @@ class DiskManager {
11287
inline auto HasFlushLogFuture() -> bool { return flush_log_f_ != nullptr; }
11388

11489
/** @brief returns the log file name */
115-
inline auto GetLogFileName() const -> std::filesystem::path { return log_name_; }
90+
inline auto GetLogFileName() const -> std::filesystem::path { return log_file_name_; }
91+
92+
/** @brief returns the size of disk space in use */
93+
auto GetDbFileSize() -> size_t {
94+
auto file_size = GetFileSize(db_file_name_);
95+
if (file_size < 0) {
96+
LOG_DEBUG("I/O error: Fail to get db file size");
97+
return -1;
98+
}
99+
return static_cast<size_t>(file_size);
100+
}
116101

117102
protected:
103+
int num_flushes_{0};
104+
int num_writes_{0};
105+
int num_deletes_{0};
106+
107+
/** @brief The capacity of the file used for storage on disk. */
108+
size_t page_capacity_{DEFAULT_DB_IO_SIZE};
109+
110+
private:
118111
auto GetFileSize(const std::string &file_name) -> int;
112+
113+
auto AllocatePage() -> size_t;
114+
119115
// stream to write log file
120116
std::fstream log_io_;
121-
std::filesystem::path log_name_;
117+
std::filesystem::path log_file_name_;
122118
// stream to write db file
123119
std::fstream db_io_;
124-
std::filesystem::path file_name_;
125-
int num_flushes_{0};
126-
int num_writes_{0};
127-
int num_deletes_{0};
120+
std::filesystem::path db_file_name_;
121+
122+
// Records the offset of each page in the db file.
123+
std::unordered_map<page_id_t, size_t> pages_;
124+
// Records the free slots in the db file if pages are deleted, indicated by offset.
125+
std::vector<size_t> free_slots_;
126+
128127
bool flush_log_{false};
129128
std::future<void> *flush_log_f_{nullptr};
130129
// With multiple buffer pool instances, need to protect file access
131130
std::mutex db_io_latch_;
132-
133-
/** @brief The number of pages allocated to the DBMS on disk. */
134-
size_t pages_{0};
135-
/** @brief The capacity of the file used for storage on disk. */
136-
size_t page_capacity_{DEFAULT_DB_IO_SIZE};
137131
};
138132

139133
} // namespace bustub

src/include/storage/disk/disk_manager_memory.h

+13-144
Original file line numberDiff line numberDiff line change
@@ -40,32 +40,15 @@ namespace bustub {
4040
*/
4141
class DiskManagerMemory : public DiskManager {
4242
public:
43-
explicit DiskManagerMemory(size_t pages);
43+
explicit DiskManagerMemory(size_t capacity);
4444

4545
~DiskManagerMemory() override { delete[] memory_; }
4646

47-
/**
48-
* This function should increase the disk space, but since we have a fixed amount of memory we just check that the
49-
* pages are in bounds.
50-
*/
51-
void IncreaseDiskSpace(size_t pages) override;
52-
53-
/**
54-
* Write a page to the database file.
55-
* @param page_id id of the page
56-
* @param page_data raw page data
57-
*/
5847
void WritePage(page_id_t page_id, const char *page_data) override;
5948

60-
/**
61-
* Read a page from the database file.
62-
* @param page_id id of the page
63-
* @param[out] page_data output buffer
64-
*/
6549
void ReadPage(page_id_t page_id, char *page_data) override;
6650

6751
private:
68-
size_t pages_;
6952
char *memory_;
7053
};
7154

@@ -75,133 +58,21 @@ class DiskManagerMemory : public DiskManager {
7558
*/
7659
class DiskManagerUnlimitedMemory : public DiskManager {
7760
public:
78-
DiskManagerUnlimitedMemory() {
79-
std::scoped_lock l(mutex_);
80-
while (data_.size() < pages_ + 1) {
81-
data_.push_back(std::make_shared<ProtectedPage>());
82-
}
83-
std::fill(recent_access_.begin(), recent_access_.end(), -1);
84-
}
85-
86-
/**
87-
* This function should increase the disk space, but since this is memory we just resize the vector.
88-
*/
89-
void IncreaseDiskSpace(size_t pages) override {
90-
std::scoped_lock l(mutex_);
91-
92-
if (pages < pages_) {
93-
return;
94-
}
95-
96-
while (data_.size() < pages + 1) {
97-
data_.push_back(std::make_shared<ProtectedPage>());
98-
}
99-
100-
pages_ = pages;
101-
}
102-
103-
/**
104-
* Write a page to the database file.
105-
* @param page_id id of the page
106-
* @param page_data raw page data
107-
*/
108-
void WritePage(page_id_t page_id, const char *page_data) override {
109-
ProcessLatency(page_id);
110-
111-
std::unique_lock<std::mutex> l(mutex_);
112-
if (!thread_id_.has_value()) {
113-
thread_id_ = std::this_thread::get_id();
114-
}
115-
if (page_id >= static_cast<int>(data_.size())) {
116-
data_.resize(page_id + 1);
117-
}
118-
if (data_[page_id] == nullptr) {
119-
data_[page_id] = std::make_shared<ProtectedPage>();
120-
}
121-
std::shared_ptr<ProtectedPage> ptr = data_[page_id];
122-
std::unique_lock<std::shared_mutex> l_page(ptr->second);
123-
l.unlock();
124-
125-
memcpy(ptr->first.data(), page_data, BUSTUB_PAGE_SIZE);
126-
num_writes_ += 1;
127-
128-
PostProcessLatency(page_id);
129-
}
130-
131-
/**
132-
* Read a page from the database file.
133-
* @param page_id id of the page
134-
* @param[out] page_data output buffer
135-
*/
136-
void ReadPage(page_id_t page_id, char *page_data) override {
137-
ProcessLatency(page_id);
138-
139-
std::unique_lock<std::mutex> l(mutex_);
140-
if (!thread_id_.has_value()) {
141-
thread_id_ = std::this_thread::get_id();
142-
}
143-
if (page_id >= static_cast<int>(data_.size()) || page_id < 0) {
144-
fmt::println(stderr, "page {} not in range", page_id);
145-
std::terminate();
146-
return;
147-
}
148-
if (data_[page_id] == nullptr) {
149-
fmt::println(stderr, "page {} not exist", page_id, pages_);
150-
std::terminate();
151-
return;
152-
}
153-
std::shared_ptr<ProtectedPage> ptr = data_[page_id];
154-
std::shared_lock<std::shared_mutex> l_page(ptr->second);
155-
l.unlock();
156-
157-
memcpy(page_data, ptr->first.data(), BUSTUB_PAGE_SIZE);
158-
159-
PostProcessLatency(page_id);
160-
}
161-
162-
/**
163-
* Delete a page from the database file. Reclaim the disk space.
164-
* Note: This is a no-op for now without a more complex data structure to
165-
* track deallocated pages.
166-
* @param page_id id of the page
167-
*/
168-
void DeletePage(page_id_t page_id) override { num_deletes_ += 1; }
169-
170-
void ProcessLatency(page_id_t page_id) {
171-
uint64_t sleep_micro_sec = 1000; // for random access, 1ms latency
172-
if (latency_simulator_enabled_) {
173-
std::unique_lock<std::mutex> lck(latency_processor_mutex_);
174-
for (auto &recent_page_id : recent_access_) {
175-
if ((recent_page_id & (~0x3)) == (page_id & (~0x3))) {
176-
sleep_micro_sec = 100; // for access in the same "block", 0.1ms latency
177-
break;
178-
}
179-
if (page_id >= recent_page_id && page_id <= recent_page_id + 3) {
180-
sleep_micro_sec = 100; // for sequential access, 0.1ms latency
181-
break;
182-
}
183-
}
184-
lck.unlock();
185-
std::this_thread::sleep_for(std::chrono::microseconds(sleep_micro_sec));
186-
}
187-
}
188-
189-
void PostProcessLatency(page_id_t page_id) {
190-
if (latency_simulator_enabled_) {
191-
std::scoped_lock<std::mutex> lck(latency_processor_mutex_);
192-
recent_access_[access_ptr_] = page_id;
193-
access_ptr_ = (access_ptr_ + 1) % recent_access_.size();
194-
}
195-
}
61+
DiskManagerUnlimitedMemory();
62+
63+
void WritePage(page_id_t page_id, const char *page_data) override;
64+
65+
void ReadPage(page_id_t page_id, char *page_data) override;
66+
67+
void DeletePage(page_id_t page_id) override;
68+
69+
void ProcessLatency(page_id_t page_id);
70+
71+
void PostProcessLatency(page_id_t page_id);
19672

19773
void EnableLatencySimulator(bool enabled) { latency_simulator_enabled_ = enabled; }
19874

199-
auto GetLastReadThreadAndClear() -> std::optional<std::thread::id> {
200-
std::unique_lock<std::mutex> lck(mutex_);
201-
auto t = thread_id_;
202-
thread_id_ = std::nullopt;
203-
return t;
204-
}
75+
auto GetLastReadThreadAndClear() -> std::optional<std::thread::id>;
20576

20677
private:
20778
bool latency_simulator_enabled_{false};
@@ -216,8 +87,6 @@ class DiskManagerUnlimitedMemory : public DiskManager {
21687
std::mutex mutex_;
21788
std::optional<std::thread::id> thread_id_;
21889
std::vector<std::shared_ptr<ProtectedPage>> data_;
219-
220-
size_t pages_{DEFAULT_DB_IO_SIZE};
22190
};
22291

22392
} // namespace bustub

src/include/storage/disk/disk_scheduler.h

-9
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,6 @@ class DiskScheduler {
6868
*/
6969
auto CreatePromise() -> DiskSchedulerPromise { return {}; };
7070

71-
/**
72-
* @brief Increases the size of the database file to fit the specified number of pages.
73-
*
74-
* This function works like a dynamic array, where the capacity is doubled until all pages can fit.
75-
*
76-
* @param pages The number of pages the caller wants the file used for storage to support.
77-
*/
78-
void IncreaseDiskSpace(size_t pages) { disk_manager_->IncreaseDiskSpace(pages); }
79-
8071
/**
8172
* @brief Deallocates a page on disk.
8273
*

0 commit comments

Comments
 (0)