Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clarify CloudFileSystem API. #313

Merged
merged 2 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 18 additions & 19 deletions cloud/cloud_file_system.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
#include <unordered_map>

#include "cloud/aws/aws_file_system.h"
#include "rocksdb/cloud/cloud_file_system_impl.h"
#include "cloud/cloud_log_controller_impl.h"
#include "rocksdb/cloud/cloud_storage_provider_impl.h"
#include "cloud/db_cloud_impl.h"
#include "cloud/filename.h"
#include "env/composite_env_wrapper.h"
#include "options/configurable_helper.h"
#include "options/options_helper.h"
#include "port/likely.h"
#include "rocksdb/cloud/cloud_file_system_impl.h"
#include "rocksdb/cloud/cloud_log_controller.h"
#include "rocksdb/cloud/cloud_storage_provider_impl.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
Expand Down Expand Up @@ -417,19 +417,19 @@ Status CloudFileSystemOptions::Serialize(const ConfigOptions& config_options,
reinterpret_cast<const char*>(this), value);
}

CloudFileSystem::CloudFileSystem(const CloudFileSystemOptions& options,
const std::shared_ptr<FileSystem>& base,
const std::shared_ptr<Logger>& logger)
CloudFileSystemEnv::CloudFileSystemEnv(const CloudFileSystemOptions& options,
const std::shared_ptr<FileSystem>& base,
const std::shared_ptr<Logger>& logger)
: cloud_fs_options(options), base_fs_(base), info_log_(logger) {
RegisterOptions(&cloud_fs_options, &cloud_fs_option_type_info);
}

CloudFileSystem::~CloudFileSystem() {
CloudFileSystemEnv::~CloudFileSystemEnv() {
cloud_fs_options.cloud_log_controller.reset();
cloud_fs_options.storage_provider.reset();
}

Status CloudFileSystem::NewAwsFileSystem(
Status CloudFileSystemEnv::NewAwsFileSystem(
const std::shared_ptr<FileSystem>& base_fs,
const std::string& src_cloud_bucket, const std::string& src_cloud_object,
const std::string& src_cloud_region, const std::string& dest_cloud_bucket,
Expand Down Expand Up @@ -484,23 +484,23 @@ int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) {
return count;
}

void CloudFileSystem::RegisterCloudObjects(const std::string& arg) {
void CloudFileSystemEnv::RegisterCloudObjects(const std::string& arg) {
static std::once_flag do_once;
std::call_once(do_once, [&]() {
auto library = ObjectLibrary::Default();
DoRegisterCloudObjects(*library, arg);
});
}

std::unique_ptr<Env> CloudFileSystem::NewCompositeEnvFromThis(Env* env) {
std::unique_ptr<Env> CloudFileSystemEnv::NewCompositeEnvFromThis(Env* env) {
// We need a shared_ptr<FileSystem> pointing to "this", to initialize the
// env wrapper, but we don't want that shared_ptr to own the lifecycle for
// "this". Creating a shared_ptr with a custom no-op deleter instead.
std::shared_ptr<FileSystem> fs(this, [](auto* /*p*/) { /*noop*/ });
return std::make_unique<CompositeEnvWrapper>(env, fs);
}

Status CloudFileSystem::CreateFromString(
Status CloudFileSystemEnv::CreateFromString(
const ConfigOptions& config_options, const std::string& value,
std::unique_ptr<CloudFileSystem>* result) {
RegisterCloudObjects();
Expand Down Expand Up @@ -529,7 +529,7 @@ Status CloudFileSystem::CreateFromString(
copy.invoke_prepare_options = false; // Prepare here, not there
s = ObjectRegistry::NewInstance()->NewUniqueObject<FileSystem>(id, &fs);
if (s.ok()) {
auto* cfs = dynamic_cast<CloudFileSystem*>(fs.get());
auto* cfs = dynamic_cast<CloudFileSystemEnv*>(fs.get());
assert(cfs);
if (!options.empty()) {
s = cfs->ConfigureFromMap(copy, options);
Expand All @@ -553,7 +553,7 @@ Status CloudFileSystem::CreateFromString(
return s;
}

Status CloudFileSystem::CreateFromString(
Status CloudFileSystemEnv::CreateFromString(
const ConfigOptions& config_options, const std::string& value,
const CloudFileSystemOptions& cloud_options,
std::unique_ptr<CloudFileSystem>* result) {
Expand Down Expand Up @@ -583,7 +583,7 @@ Status CloudFileSystem::CreateFromString(
copy.invoke_prepare_options = false; // Prepare here, not there
s = ObjectRegistry::NewInstance()->NewUniqueObject<FileSystem>(id, &fs);
if (s.ok()) {
auto* cfs = dynamic_cast<CloudFileSystem*>(fs.get());
auto* cfs = dynamic_cast<CloudFileSystemEnv*>(fs.get());
assert(cfs);
auto copts = cfs->GetOptions<CloudFileSystemOptions>();
*copts = cloud_options;
Expand All @@ -610,18 +610,18 @@ Status CloudFileSystem::CreateFromString(
}

#ifndef USE_AWS
Status CloudFileSystem::NewAwsFileSystem(
Status CloudFileSystemEnv::NewAwsFileSystem(
const std::shared_ptr<FileSystem>& /*base_fs*/,
const CloudFileSystemOptions& /*options*/,
const std::shared_ptr<Logger>& /*logger*/, CloudFileSystem** /*cfs*/) {
return Status::NotSupported("RocksDB Cloud not compiled with AWS support");
}
#else
Status CloudFileSystem::NewAwsFileSystem(
Status CloudFileSystemEnv::NewAwsFileSystem(
const std::shared_ptr<FileSystem>& base_fs,
const CloudFileSystemOptions& options,
const std::shared_ptr<Logger>& logger, CloudFileSystem** cfs) {
CloudFileSystem::RegisterCloudObjects();
CloudFileSystemEnv::RegisterCloudObjects();
// Dump out cloud fs options
options.Dump(logger.get());

Expand All @@ -640,9 +640,8 @@ Status CloudFileSystem::NewAwsFileSystem(
}
#endif

std::unique_ptr<Env> CloudFileSystem::NewCompositeEnv(
Env* env,
const std::shared_ptr<FileSystem>& fs) {
std::unique_ptr<Env> CloudFileSystemEnv::NewCompositeEnv(
Env* env, const std::shared_ptr<FileSystem>& fs) {
return std::make_unique<CompositeEnvWrapper>(env, fs);
}

Expand Down
4 changes: 2 additions & 2 deletions cloud/cloud_file_system_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#include <cinttypes>

#include "rocksdb/cloud/cloud_file_deletion_scheduler.h"
#include "cloud/cloud_log_controller_impl.h"
#include "cloud/cloud_manifest.h"
#include "cloud/cloud_scheduler.h"
Expand All @@ -18,6 +17,7 @@
#include "file/writable_file_writer.h"
#include "port/likely.h"
#include "port/port_posix.h"
#include "rocksdb/cloud/cloud_file_deletion_scheduler.h"
#include "rocksdb/cloud/cloud_log_controller.h"
#include "rocksdb/cloud/cloud_storage_provider.h"
#include "rocksdb/db.h"
Expand All @@ -33,7 +33,7 @@ namespace ROCKSDB_NAMESPACE {
CloudFileSystemImpl::CloudFileSystemImpl(
const CloudFileSystemOptions& opts, const std::shared_ptr<FileSystem>& base,
const std::shared_ptr<Logger>& l)
: CloudFileSystem(opts, base, l), purger_is_running_(true) {
: CloudFileSystemEnv(opts, base, l), purger_is_running_(true) {
if (opts.cloud_file_deletion_delay) {
cloud_file_deletion_scheduler_ = CloudFileDeletionScheduler::Create(
CloudScheduler::Get(), *opts.cloud_file_deletion_delay);
Expand Down
195 changes: 115 additions & 80 deletions include/rocksdb/cloud/cloud_file_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,45 +488,15 @@ struct CloudManifestDelta {
std::string epoch; // epoch for the new manifest file
};

//
// The Cloud file system
//
// NOTE: The AWS SDK must be initialized before the CloudFileSystem is
// constructed, and remain active (Aws::ShutdownAPI() not called) as long as any
// CloudFileSystem objects exist.
/** Extension of the FileSystem API for "the Cloud" */
class CloudFileSystem : public FileSystem {
protected:
CloudFileSystemOptions cloud_fs_options;
std::shared_ptr<FileSystem> base_fs_; // The underlying file system

// Creates a new CompositeEnv from "env" and "this".
// The returned Env must not outlive "this"
std::unique_ptr<Env> NewCompositeEnvFromThis(Env* env);

CloudFileSystem(const CloudFileSystemOptions& options,
const std::shared_ptr<FileSystem>& base,
const std::shared_ptr<Logger>& logger);

public:
mutable std::shared_ptr<Logger> info_log_; // informational messages

virtual ~CloudFileSystem();

static void RegisterCloudObjects(const std::string& mode = "");
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& id,
std::unique_ptr<CloudFileSystem>* fs);
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& id,
const CloudFileSystemOptions& cloud_options,
std::unique_ptr<CloudFileSystem>* fs);
static const char* kCloud() { return "cloud"; }
static const char* kAws() { return "aws"; }
virtual const char* Name() const { return "cloud-env"; }

// Returns the underlying file system
const std::shared_ptr<FileSystem>& GetBaseFileSystem() const {
return base_fs_;
}
virtual const std::shared_ptr<FileSystem>& GetBaseFileSystem() const = 0;

virtual IOStatus PreloadCloudManifest(const std::string& local_dbname) = 0;
// This method will migrate the database that is using pure RocksDB into
// RocksDB-Cloud. Call this before opening the database with RocksDB-Cloud.
Expand All @@ -550,52 +520,6 @@ class CloudFileSystem : public FileSystem {
virtual IOStatus DeleteDbid(const std::string& bucket_prefix,
const std::string& dbid) = 0;

Logger* GetLogger() const { return info_log_.get(); }
const std::shared_ptr<CloudStorageProvider>& GetStorageProvider() const {
return cloud_fs_options.storage_provider;
}

const std::shared_ptr<CloudLogController>& GetLogController() const {
return cloud_fs_options.cloud_log_controller;
}

// The SrcBucketName identifies the cloud storage bucket and
// GetSrcObjectPath specifies the path inside that bucket
// where data files reside. The specified bucket is used in
// a readonly mode by the associated DBCloud instance.
const std::string& GetSrcBucketName() const {
return cloud_fs_options.src_bucket.GetBucketName();
}
const std::string& GetSrcObjectPath() const {
return cloud_fs_options.src_bucket.GetObjectPath();
}
bool HasSrcBucket() const { return cloud_fs_options.src_bucket.IsValid(); }

// The DestBucketName identifies the cloud storage bucket and
// GetDestObjectPath specifies the path inside that bucket
// where data files reside. The associated DBCloud instance
// writes newly created files to this bucket.
const std::string& GetDestBucketName() const {
return cloud_fs_options.dest_bucket.GetBucketName();
}
const std::string& GetDestObjectPath() const {
return cloud_fs_options.dest_bucket.GetObjectPath();
}

bool HasDestBucket() const { return cloud_fs_options.dest_bucket.IsValid(); }
bool SrcMatchesDest() const {
if (HasSrcBucket() && HasDestBucket()) {
return cloud_fs_options.src_bucket == cloud_fs_options.dest_bucket;
} else {
return false;
}
}

// returns the options used to create this object
const CloudFileSystemOptions& GetCloudFileSystemOptions() const {
return cloud_fs_options;
}

// Deletes file from a destination bucket.
virtual IOStatus DeleteCloudFileFromDest(const std::string& fname) = 0;
// Copies a local file to a destination bucket.
Expand Down Expand Up @@ -663,6 +587,117 @@ class CloudFileSystem : public FileSystem {
const std::string& dbname,
const std::vector<std::string>& active_cookies) = 0;

// returns the options used to create this object
virtual const CloudFileSystemOptions& GetCloudFileSystemOptions() const = 0;

// The SrcBucketName identifies the cloud storage bucket and
// GetSrcObjectPath specifies the path inside that bucket
// where data files reside. The specified bucket is used in
// a readonly mode by the associated DBCloud instance.
virtual const std::string& GetSrcBucketName() const = 0;
virtual const std::string& GetSrcObjectPath() const = 0;
virtual bool HasSrcBucket() const = 0;

// The DestBucketName identifies the cloud storage bucket and
// GetDestObjectPath specifies the path inside that bucket
// where data files reside. The associated DBCloud instance
// writes newly created files to this bucket.
virtual const std::string& GetDestBucketName() const = 0;
virtual const std::string& GetDestObjectPath() const = 0;
virtual bool HasDestBucket() const = 0;

virtual bool SrcMatchesDest() const = 0;

// Get the storage provider for the FileSystem.
virtual const std::shared_ptr<CloudStorageProvider>& GetStorageProvider()
const = 0;

virtual Logger* GetLogger() const = 0;
};

//
// The Cloud file system
//
// NOTE: The AWS SDK must be initialized before the CloudFileSystem is
// constructed, and remain active (Aws::ShutdownAPI() not called) as long as any
// CloudFileSystem objects exist.
class CloudFileSystemEnv : public CloudFileSystem {
protected:
Copy link

@seckcoder seckcoder Jan 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious why do we keep this separate class instead of moving all these static functions and member variables to CloudFileSystemImpl and simply removing this CloudFileSystemEnv class?

nvm, seems we access these static methods in rockset codebase.

Would it be better to move all these member variables and member functions to CloudFileSystemImpl and only keep these static functions in CloudFileSystemEnv so that CloudFileSystemEnv doesn't have to inherit the interface CloudFileSystem?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure. I kind of assumed there was a reason for these methods to be in CloudFileSystem in the first place. Maybe somebody didn't want to reveal the fact that ColoudFileSystemImpl exists to the client code or didn't want to force them to add another include.
There is some logic to the current state. CloudFileSystem defines the API, CloudFileSystemEnv has various static initialization methods. Theoretically it could even be made separate from the CloudFileSystem class hierarchy altogether. Finally, CloudFileSystemImp implements the CloudFileSystem and FileSystem APIs.

That being said, I am not opposed to moving everything to CloudFileSystemImpl if that is the consensus.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically it could even be made separate from the CloudFileSystem class hierarchy altogether. Finally, CloudFileSystemImp implements the CloudFileSystem and FileSystem APIs

Like this idea.

CloudFileSystemOptions cloud_fs_options;
std::shared_ptr<FileSystem> base_fs_; // The underlying file system

// Creates a new CompositeEnv from "env" and "this".
// The returned Env must not outlive "this"
std::unique_ptr<Env> NewCompositeEnvFromThis(Env* env);

CloudFileSystemEnv(const CloudFileSystemOptions& options,
const std::shared_ptr<FileSystem>& base,
const std::shared_ptr<Logger>& logger);

public:
mutable std::shared_ptr<Logger> info_log_; // informational messages

virtual ~CloudFileSystemEnv();

static void RegisterCloudObjects(const std::string& mode = "");
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& id,
std::unique_ptr<CloudFileSystem>* fs);
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& id,
const CloudFileSystemOptions& cloud_options,
std::unique_ptr<CloudFileSystem>* fs);

const char* Name() const override { return "cloud-env"; }

const std::shared_ptr<FileSystem>& GetBaseFileSystem() const override {
return base_fs_;
}

Logger* GetLogger() const override { return info_log_.get(); }

const std::shared_ptr<CloudStorageProvider>& GetStorageProvider()
const override {
return cloud_fs_options.storage_provider;
}

const std::shared_ptr<CloudLogController>& GetLogController() const {
return cloud_fs_options.cloud_log_controller;
}

const std::string& GetSrcBucketName() const override {
return cloud_fs_options.src_bucket.GetBucketName();
}
const std::string& GetSrcObjectPath() const override {
return cloud_fs_options.src_bucket.GetObjectPath();
}
bool HasSrcBucket() const override {
return cloud_fs_options.src_bucket.IsValid();
}

const std::string& GetDestBucketName() const override {
return cloud_fs_options.dest_bucket.GetBucketName();
}
const std::string& GetDestObjectPath() const override {
return cloud_fs_options.dest_bucket.GetObjectPath();
}

bool HasDestBucket() const override {
return cloud_fs_options.dest_bucket.IsValid();
}
bool SrcMatchesDest() const override {
if (HasSrcBucket() && HasDestBucket()) {
return cloud_fs_options.src_bucket == cloud_fs_options.dest_bucket;
} else {
return false;
}
}

// returns the options used to create this object
const CloudFileSystemOptions& GetCloudFileSystemOptions() const override {
return cloud_fs_options;
}

// Create a new AWS file system.
// src_bucket_name: bucket name suffix where db data is read from
// src_object_prefix: all db objects in source bucket are prepended with this
Expand Down
Loading
Loading