Skip to content

Commit 61223e2

Browse files
authored
[SYS-6179] Add MultiReadAsync file API. (#290)
* Detaching CloudFileSystemEnv from CloudFileSystem class hierarchy. * [SYS-6179] Add MultiReadAsync file API. Extending the file API to provide an option to implement asynchronous multi read operation. The default implementation delegates to AsyncRead. Updated the AsyncFileReader class implementation to use MultiReadAsync. * Switch to sync default implementation of AsyncMultiRead. * Add CloudManifest handling API to CloudFileSystem. Expanidng the CloudFileSystem API to include methods for handling CloudManifest. This allows us to better hide CloudFileSystemImpl and allow different implementations of CloudFileSystem.
1 parent 40f265a commit 61223e2

10 files changed

+302
-82
lines changed

cloud/cloud_file_system.cc

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "cloud/aws/aws_file_system.h"
1313
#include "cloud/cloud_log_controller_impl.h"
14+
#include "cloud/cloud_manifest.h"
1415
#include "cloud/db_cloud_impl.h"
1516
#include "cloud/filename.h"
1617
#include "env/composite_env_wrapper.h"
@@ -495,7 +496,7 @@ Status CloudFileSystemEnv::CreateFromString(
495496
std::string id;
496497
std::unordered_map<std::string, std::string> options;
497498
Status s;
498-
if (value.find("=") == std::string::npos) {
499+
if (value.find('=') == std::string::npos) {
499500
id = value;
500501
} else {
501502
s = StringToMap(value, &options);
@@ -633,5 +634,19 @@ std::unique_ptr<Env> CloudFileSystemEnv::NewCompositeEnv(
633634
return std::make_unique<CompositeEnvWrapper>(env, fs);
634635
}
635636

637+
IOStatus CloudFileSystemEnv::LoadCloudManifest(
638+
const std::string& dbname, const std::shared_ptr<FileSystem>& fs,
639+
const std::string& cookie, std::unique_ptr<CloudManifest>* cloud_manifest) {
640+
std::unique_ptr<SequentialFileReader> reader;
641+
auto cloud_manifest_file_name = MakeCloudManifestFile(dbname, cookie);
642+
auto s = SequentialFileReader::Create(fs, cloud_manifest_file_name,
643+
FileOptions(), &reader, nullptr /*dbg*/,
644+
nullptr /* rate_limiter */);
645+
if (s.ok()) {
646+
s = CloudManifest::LoadFromLog(std::move(reader), cloud_manifest);
647+
}
648+
return s;
649+
}
650+
636651
} // namespace ROCKSDB_NAMESPACE
637652
#endif // ROCKSDB_LITE

cloud/cloud_file_system_impl.cc

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
// Copyright (c) 2017 Rockset.
2-
#include "rocksdb/utilities/options_type.h"
32
#ifndef ROCKSDB_LITE
43

4+
#include "rocksdb/cloud/cloud_file_system_impl.h"
5+
56
#include <cinttypes>
67

78
#include "cloud/cloud_log_controller_impl.h"
@@ -14,14 +15,14 @@
1415
#include "file/writable_file_writer.h"
1516
#include "port/port_posix.h"
1617
#include "rocksdb/cloud/cloud_file_deletion_scheduler.h"
17-
#include "rocksdb/cloud/cloud_file_system_impl.h"
1818
#include "rocksdb/cloud/cloud_log_controller.h"
1919
#include "rocksdb/cloud/cloud_storage_provider.h"
2020
#include "rocksdb/db.h"
2121
#include "rocksdb/env.h"
2222
#include "rocksdb/io_status.h"
2323
#include "rocksdb/options.h"
2424
#include "rocksdb/status.h"
25+
#include "rocksdb/utilities/options_type.h"
2526
#include "test_util/sync_point.h"
2627
#include "util/xxhash.h"
2728

@@ -850,22 +851,8 @@ IOStatus CloudFileSystemImpl::LoadLocalCloudManifest(
850851
if (cloud_manifest_) {
851852
cloud_manifest_.reset();
852853
}
853-
return CloudFileSystemImpl::LoadLocalCloudManifest(
854-
dbname, GetBaseFileSystem(), cookie, &cloud_manifest_);
855-
}
856-
857-
IOStatus CloudFileSystemImpl::LoadLocalCloudManifest(
858-
const std::string& dbname, const std::shared_ptr<FileSystem>& base_fs,
859-
const std::string& cookie, std::unique_ptr<CloudManifest>* cloud_manifest) {
860-
std::unique_ptr<SequentialFileReader> reader;
861-
auto cloud_manifest_file_name = MakeCloudManifestFile(dbname, cookie);
862-
auto s = SequentialFileReader::Create(base_fs, cloud_manifest_file_name,
863-
FileOptions(), &reader, nullptr /*dbg*/,
864-
nullptr /* rate_limiter */);
865-
if (s.ok()) {
866-
s = CloudManifest::LoadFromLog(std::move(reader), cloud_manifest);
867-
}
868-
return s;
854+
return CloudFileSystemEnv::LoadCloudManifest(dbname, GetBaseFileSystem(),
855+
cookie, &cloud_manifest_);
869856
}
870857

871858
std::string RemapFilenameWithCloudManifest(const std::string& logical_path,
@@ -1349,7 +1336,7 @@ IOStatus CloudFileSystemImpl::NeedsReinitialization(
13491336
// If the local MANIFEST is not compatible with local CLOUDMANIFEST, we will
13501337
// need to reinitialize the entire directory.
13511338
std::unique_ptr<CloudManifest> cloud_manifest;
1352-
auto load_status = LoadLocalCloudManifest(
1339+
auto load_status = CloudFileSystemEnv::LoadCloudManifest(
13531340
local_dir, base_fs, cloud_fs_options.cookie_on_open, &cloud_manifest);
13541341
if (load_status.ok()) {
13551342
std::string current_epoch = cloud_manifest->GetCurrentEpoch();
@@ -1616,9 +1603,8 @@ IOStatus CloudFileSystemImpl::LoadCloudManifest(const std::string& local_dbname,
16161603
//
16171604
// Create appropriate files in the clone dir
16181605
//
1619-
IOStatus CloudFileSystemImpl::SanitizeDirectory(const DBOptions& options,
1620-
const std::string& local_name,
1621-
bool read_only) {
1606+
IOStatus CloudFileSystemImpl::SanitizeLocalDirectory(
1607+
const DBOptions& options, const std::string& local_name, bool read_only) {
16221608
const auto& local_fs = GetBaseFileSystem();
16231609
const IOOptions io_opts;
16241610
IODebugContext* dbg = nullptr;
@@ -1913,11 +1899,6 @@ IOStatus CloudFileSystemImpl::FetchManifest(const std::string& local_dbname,
19131899
return IOStatus::NotFound();
19141900
}
19151901

1916-
IOStatus CloudFileSystemImpl::CreateCloudManifest(
1917-
const std::string& local_dbname) {
1918-
return CreateCloudManifest(local_dbname, cloud_fs_options.cookie_on_open);
1919-
}
1920-
19211902
IOStatus CloudFileSystemImpl::CreateCloudManifest(
19221903
const std::string& local_dbname, const std::string& cookie) {
19231904
// No cloud manifest, create an empty one

cloud/db_cloud_impl.cc

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55

66
#include <cinttypes>
77

8-
#include "rocksdb/cloud/cloud_file_system_impl.h"
98
#include "cloud/cloud_manifest.h"
109
#include "cloud/filename.h"
1110
#include "cloud/manifest_reader.h"
1211
#include "env/composite_env_wrapper.h"
1312
#include "file/file_util.h"
1413
#include "file/sst_file_manager_impl.h"
1514
#include "logging/auto_roll_logger.h"
15+
#include "rocksdb/cloud/cloud_file_system_impl.h"
1616
#include "rocksdb/cloud/cloud_storage_provider.h"
1717
#include "rocksdb/db.h"
1818
#include "rocksdb/env.h"
@@ -45,8 +45,8 @@ class ConstantSizeSstFileManager : public SstFileManagerImpl {
4545
}
4646

4747
Status OnAddFile(const std::string& file_path) override {
48-
return SstFileManagerImpl::OnAddFile(
49-
file_path, uint64_t(constant_file_size_));
48+
return SstFileManagerImpl::OnAddFile(file_path,
49+
uint64_t(constant_file_size_));
5050
}
5151

5252
private:
@@ -97,10 +97,10 @@ Status DBCloud::Open(const Options& opt, const std::string& local_dbname,
9797
}
9898

9999
auto* cfs =
100-
dynamic_cast<CloudFileSystemImpl*>(options.env->GetFileSystem().get());
100+
dynamic_cast<CloudFileSystem*>(options.env->GetFileSystem().get());
101101
assert(cfs);
102-
if (!cfs->info_log_) {
103-
cfs->info_log_ = options.info_log;
102+
if (!cfs->GetLogger()) {
103+
cfs->SetLogger(options.info_log);
104104
}
105105
// Use a constant sized SST File Manager if necesary.
106106
// NOTE: if user already passes in an SST File Manager, we will respect user's
@@ -131,7 +131,7 @@ Status DBCloud::Open(const Options& opt, const std::string& local_dbname,
131131
// If cloud manifest is already loaded, this means the directory has been
132132
// sanitized (possibly by the call to ListColumnFamilies())
133133
if (cfs->GetCloudManifest() == nullptr) {
134-
st = cfs->SanitizeDirectory(options, local_dbname, read_only);
134+
st = cfs->SanitizeLocalDirectory(options, local_dbname, read_only);
135135

136136
if (st.ok()) {
137137
st = cfs->LoadCloudManifest(local_dbname, read_only);
@@ -324,8 +324,7 @@ Status DBCloudImpl::DoCheckpointToCloud(
324324
const BucketOptions& destination, const CheckpointToCloudOptions& options) {
325325
std::vector<std::string> live_files;
326326
uint64_t manifest_file_size{0};
327-
auto* cfs =
328-
dynamic_cast<CloudFileSystemImpl*>(GetEnv()->GetFileSystem().get());
327+
auto* cfs = dynamic_cast<CloudFileSystem*>(GetEnv()->GetFileSystem().get());
329328
assert(cfs);
330329
const auto& local_fs = cfs->GetBaseFileSystem();
331330

@@ -334,7 +333,7 @@ Status DBCloudImpl::DoCheckpointToCloud(
334333
if (!st.ok()) {
335334
return st;
336335
}
337-
336+
338337
// Create a temp MANIFEST file first as this captures all the files we need
339338
auto current_epoch = cfs->GetCloudManifest()->GetCurrentEpoch();
340339
auto manifest_fname = ManifestFileWithEpoch("", current_epoch);
@@ -346,7 +345,6 @@ Status DBCloudImpl::DoCheckpointToCloud(
346345
return st;
347346
}
348347

349-
350348
std::vector<std::pair<std::string, std::string>> files_to_copy;
351349
for (auto& f : live_files) {
352350
uint64_t number = 0;
@@ -453,13 +451,13 @@ Status DBCloud::ListColumnFamilies(const DBOptions& db_options,
453451
const std::string& name,
454452
std::vector<std::string>* column_families) {
455453
auto* cfs =
456-
dynamic_cast<CloudFileSystemImpl*>(db_options.env->GetFileSystem().get());
454+
dynamic_cast<CloudFileSystem*>(db_options.env->GetFileSystem().get());
457455
assert(cfs);
458456

459457
cfs->GetBaseFileSystem()->CreateDirIfMissing(name, IOOptions(),
460458
nullptr /*dbg*/);
461459

462-
auto st = cfs->SanitizeDirectory(db_options, name, false);
460+
auto st = cfs->SanitizeLocalDirectory(db_options, name, false);
463461
if (st.ok()) {
464462
st = cfs->LoadCloudManifest(name, false);
465463
}

file/random_access_file_reader.cc

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#include "file/random_access_file_reader.h"
1111

1212
#include <algorithm>
13+
#include <cstddef>
1314
#include <mutex>
15+
#include <utility>
1416

1517
#include "file/file_util.h"
1618
#include "monitoring/histogram.h"
@@ -599,4 +601,103 @@ void RandomAccessFileReader::ReadAsyncCallback(const FSReadRequest& req,
599601
RecordIOStats(stats_, file_temperature_, is_last_level_, req.result.size());
600602
delete read_async_info;
601603
}
604+
605+
// RocksDB-Cloud contribution begin
606+
607+
// Callback data for non-direct IO version of MultiReadAsync.
608+
struct MultiReadAsyncCbInfo {
609+
MultiReadAsyncCbInfo(
610+
std::function<void(const FSReadRequest*, size_t, void*)> cb, void* cb_arg,
611+
uint64_t start_time)
612+
: cb_(cb), cb_arg_(cb_arg), start_time_(start_time) {}
613+
614+
std::function<void(const FSReadRequest*, size_t, void*)> cb_;
615+
void* cb_arg_;
616+
uint64_t start_time_;
617+
FileOperationInfo::StartTimePoint fs_start_ts_;
618+
};
619+
620+
IOStatus RandomAccessFileReader::MultiReadAsync(
621+
FSReadRequest* reqs, size_t num_reqs, const IOOptions& opts,
622+
std::function<void(const FSReadRequest*, size_t, void*)> cb, void* cb_arg,
623+
void** io_handles, size_t* num_io_handles, IOHandleDeleter* del_fns,
624+
AlignedBuf* /* aligned_buf */) {
625+
IOStatus s;
626+
uint64_t elapsed = 0;
627+
628+
if (use_direct_io()) {
629+
return IOStatus::InvalidArgument(
630+
"DirectIO support not implemented for MultiReadAsync");
631+
}
632+
633+
// Create a callback and populate info.
634+
auto read_async_callback =
635+
std::bind_front(&RandomAccessFileReader::MultiReadAsyncCallback, this);
636+
637+
auto cb_info =
638+
new MultiReadAsyncCbInfo(std::move(cb), cb_arg, clock_->NowMicros());
639+
if (ShouldNotifyListeners()) {
640+
cb_info->fs_start_ts_ = FileOperationInfo::StartNow();
641+
}
642+
643+
StopWatch sw(clock_, nullptr /*stats*/, 0 /*hist_type*/, &elapsed,
644+
true /*overwrite*/, true /*delay_enabled*/);
645+
s = file_->MultiReadAsync(reqs, num_reqs, opts, read_async_callback, cb_info,
646+
io_handles, num_io_handles, del_fns, nullptr);
647+
648+
RecordTick(stats_, READ_ASYNC_MICROS, elapsed);
649+
650+
// Suppress false positive clang analyzer warnings.
651+
// Memory is not released if file_->ReadAsync returns !s.ok(), because
652+
// ReadAsyncCallback is never called in that case. If ReadAsyncCallback is
653+
// called then ReadAsync should always return IOStatus::OK().
654+
#ifndef __clang_analyzer__
655+
if (!s.ok()) {
656+
delete cb_info;
657+
}
658+
#endif // __clang_analyzer__
659+
660+
return s;
661+
}
662+
663+
void RandomAccessFileReader::MultiReadAsyncCallback(const FSReadRequest* reqs,
664+
size_t n_reqs,
665+
void* cb_arg) {
666+
auto cb_info = static_cast<MultiReadAsyncCbInfo*>(cb_arg);
667+
assert(cb_info);
668+
assert(cb_info->cb_);
669+
670+
cb_info->cb_(reqs, n_reqs, cb_info->cb_arg_);
671+
672+
// Update stats and notify listeners.
673+
if (stats_ != nullptr && file_read_hist_ != nullptr) {
674+
// elapsed doesn't take into account delay and overwrite as StopWatch does
675+
// in Read.
676+
uint64_t elapsed = clock_->NowMicros() - cb_info->start_time_;
677+
file_read_hist_->Add(elapsed);
678+
}
679+
680+
for (size_t idx = 0; idx < n_reqs; idx++) {
681+
auto& req = reqs[idx];
682+
if (req.status.ok()) {
683+
RecordInHistogram(stats_, ASYNC_READ_BYTES, req.result.size());
684+
} else if (!req.status.IsAborted()) {
685+
RecordTick(stats_, ASYNC_READ_ERROR_COUNT, 1);
686+
}
687+
if (ShouldNotifyListeners()) {
688+
auto finish_ts = FileOperationInfo::FinishNow();
689+
NotifyOnFileReadFinish(req.offset, req.result.size(),
690+
cb_info->fs_start_ts_, finish_ts, req.status);
691+
}
692+
if (!req.status.ok()) {
693+
NotifyOnIOError(req.status, FileOperationType::kRead, file_name(),
694+
req.result.size(), req.offset);
695+
}
696+
RecordIOStats(stats_, file_temperature_, is_last_level_, req.result.size());
697+
}
698+
delete cb_info;
699+
}
700+
701+
// RocksDB-Cloud contribution end
702+
602703
} // namespace ROCKSDB_NAMESPACE

file/random_access_file_reader.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#pragma once
1111
#include <atomic>
12+
#include <cstddef>
1213
#include <sstream>
1314
#include <string>
1415

@@ -211,7 +212,18 @@ class RandomAccessFileReader {
211212
std::function<void(const FSReadRequest&, void*)> cb,
212213
void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
213214
AlignedBuf* aligned_buf);
214-
215+
215216
void ReadAsyncCallback(const FSReadRequest& req, void* cb_arg);
217+
218+
// RocksDB-Cloud contribution begin
219+
IOStatus MultiReadAsync(
220+
FSReadRequest* reqs, size_t num_reqs, const IOOptions& opts,
221+
std::function<void(const FSReadRequest*, size_t, void*)> cb, void* cb_arg,
222+
void** io_handles, size_t* num_io_handles, IOHandleDeleter* del_fns,
223+
AlignedBuf* aligned_buf);
224+
225+
// Callback for non-directIO MultiReadAsync.
226+
void MultiReadAsyncCallback(const FSReadRequest*, size_t, void*);
227+
// RocksDB-Cloud contribution end
216228
};
217229
} // namespace ROCKSDB_NAMESPACE

0 commit comments

Comments
 (0)