Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions include/paimon/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "paimon/catalog/identifier.h"
#include "paimon/result.h"
#include "paimon/schema/schema.h"
#include "paimon/status.h"
#include "paimon/type_fwd.h"
#include "paimon/visibility.h"
Expand Down Expand Up @@ -93,6 +94,16 @@ class PAIMON_EXPORT Catalog {
/// @return A result containing a vector of table names in the specified database, or an error
/// status.
virtual Result<std::vector<std::string>> ListTables(const std::string& db_name) const = 0;

/// Loads the latest schema of a specified table.
///
/// @note System tables will not be supported.
///
/// @param identifier The identifier (database and table name) of the table to load.
/// @return A result containing table schema if the table exists, or std::nullopt if it
/// doesn't, or an error status on failure.
virtual Result<std::optional<std::shared_ptr<Schema>>> LoadTableSchema(
const Identifier& identifier) const = 0;
};

} // namespace paimon
81 changes: 81 additions & 0 deletions include/paimon/schema/schema.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright 2025-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <map>
#include <memory>
#include <string>
#include <vector>

#include "arrow/api.h"
#include "arrow/c/bridge.h"
#include "paimon/result.h"
#include "paimon/visibility.h"

struct ArrowSchema;

namespace paimon {

/// This interface provides access to TableSchema-related information.
class PAIMON_EXPORT Schema {
public:
virtual ~Schema() = default;

/// Get the Arrow C schema representation of this table schema.
/// @return A result containing an ArrowSchema, or an error status if conversion fails.
virtual Result<std::unique_ptr<::ArrowSchema>> GetArrowSchema() const = 0;

/// Get the names of all fields in the table schema.
/// @return A vector of field names.
virtual std::vector<std::string> FieldNames() const = 0;

/// Get the unique identifier of this table schema.
/// @return The schema ID
virtual int64_t Id() const = 0;

/// Get the list of primary key field names.
/// @return A reference to the vector of primary key names; empty if no primary keys are
/// defined.
virtual const std::vector<std::string>& PrimaryKeys() const = 0;

/// Get the list of partition key field names.
/// @return A reference to the vector of partition key names; empty if the table is not
/// partitioned.
virtual const std::vector<std::string>& PartitionKeys() const = 0;

/// Get the list of bucket key field names used for bucketing.
/// @return A reference to the vector of bucket key names.
virtual const std::vector<std::string>& BucketKeys() const = 0;

/// Get the number of buckets configured for this table.
/// @return The number of buckets.
virtual int32_t NumBuckets() const = 0;

/// Get the highest field ID assigned in this schema.
/// @return The maximum field ID.
virtual int32_t HighestFieldId() const = 0;

/// Get the table-level options associated with this schema.
/// @return A reference to the map of option key-value pairs (e.g., file format, filesystem).
virtual const std::map<std::string, std::string>& Options() const = 0;

/// Get an optional comment describing the table.
/// @return The table comment if set, or std::nullopt otherwise.
virtual std::optional<std::string> Comment() const = 0;
};

} // namespace paimon
16 changes: 16 additions & 0 deletions src/paimon/core/catalog/file_system_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "paimon/common/utils/arrow/status_utils.h"
#include "paimon/common/utils/path_util.h"
#include "paimon/common/utils/string_utils.h"
#include "paimon/core/schema/schema_impl.h"
#include "paimon/core/schema/schema_manager.h"
#include "paimon/fs/file_system.h"
#include "paimon/logging.h"
Expand Down Expand Up @@ -211,4 +212,19 @@ Result<bool> FileSystemCatalog::TableExistsInFileSystem(const std::string& table
}
}

Result<std::optional<std::shared_ptr<Schema>>> FileSystemCatalog::LoadTableSchema(
const Identifier& identifier) const {
if (IsSystemTable(identifier)) {
return Status::NotImplemented("do not support loading schema for system table.");
}
SchemaManager schema_manager(fs_, NewDataTablePath(warehouse_, identifier));
PAIMON_ASSIGN_OR_RAISE(std::optional<std::shared_ptr<TableSchema>> latest_schema,
schema_manager.Latest());
if (latest_schema.has_value()) {
std::shared_ptr<Schema> schema = std::make_shared<SchemaImpl>(*latest_schema);
return std::optional<std::shared_ptr<Schema>>(schema);
}
return std::optional<std::shared_ptr<Schema>>();
}

} // namespace paimon
2 changes: 2 additions & 0 deletions src/paimon/core/catalog/file_system_catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class FileSystemCatalog : public Catalog {

Result<std::vector<std::string>> ListDatabases() const override;
Result<std::vector<std::string>> ListTables(const std::string& database_names) const override;
Result<std::optional<std::shared_ptr<Schema>>> LoadTableSchema(
const Identifier& identifier) const override;

private:
static std::string NewDatabasePath(const std::string& warehouse, const std::string& db_name);
Expand Down
56 changes: 56 additions & 0 deletions src/paimon/core/catalog/file_system_catalog_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ TEST(FileSystemCatalogTest, TestCreateTableWithBlob) {
ASSERT_OK_AND_ASSIGN(std::vector<std::string> table_names, catalog.ListTables("db1"));
ASSERT_EQ(1, table_names.size());
ASSERT_EQ(table_names[0], "tbl1");
ASSERT_OK_AND_ASSIGN(std::optional<std::shared_ptr<Schema>> table_schema,
catalog.LoadTableSchema(Identifier("db1", "tbl1")));
ASSERT_TRUE(table_schema.has_value());
ASSERT_OK_AND_ASSIGN(auto arrow_schema, (*table_schema)->GetArrowSchema());
auto loaded_schema = arrow::ImportSchema(arrow_schema.get()).ValueOrDie();
ASSERT_TRUE(typed_schema.Equals(loaded_schema));
ArrowSchemaRelease(&schema);
}

Expand Down Expand Up @@ -309,4 +315,54 @@ TEST(FileSystemCatalogTest, TestInvalidList) {
"do not support listing tables for system database.");
}

TEST(FileSystemCatalogTest, TestValidateTableSchema) {
std::map<std::string, std::string> options;
options[Options::FILE_SYSTEM] = "local";
options[Options::FILE_FORMAT] = "orc";
ASSERT_OK_AND_ASSIGN(auto core_options, CoreOptions::FromMap(options));
auto dir = UniqueTestDirectory::Create();
ASSERT_TRUE(dir);
FileSystemCatalog catalog(core_options.GetFileSystem(), dir->Str());
ASSERT_OK(catalog.CreateDatabase("db1", options, /*ignore_if_exists=*/true));
arrow::FieldVector fields = {
arrow::field("f0", arrow::utf8()),
arrow::field("f1", arrow::int32()),
arrow::field("f2", arrow::int32()),
arrow::field("f3", arrow::float64()),
};
arrow::Schema typed_schema(fields);
::ArrowSchema schema;
ASSERT_TRUE(arrow::ExportSchema(typed_schema, &schema).ok());
ASSERT_OK(catalog.CreateTable(Identifier("db1", "tbl1"), &schema, {"f1"}, {}, options,
/*ignore_if_exists=*/false));

ASSERT_OK_AND_ASSIGN(std::optional<std::shared_ptr<Schema>> table_schema,
catalog.LoadTableSchema(Identifier("db0", "tbl0")));
ASSERT_FALSE(table_schema.has_value());
ASSERT_OK_AND_ASSIGN(table_schema, catalog.LoadTableSchema(Identifier("db1", "tbl1")));
ASSERT_TRUE(table_schema.has_value());
ASSERT_EQ(0, (*table_schema)->Id());
ASSERT_EQ(3, (*table_schema)->HighestFieldId());
ASSERT_EQ(1, (*table_schema)->PartitionKeys().size());
ASSERT_EQ(0, (*table_schema)->PrimaryKeys().size());
ASSERT_EQ(-1, (*table_schema)->NumBuckets());
ASSERT_FALSE((*table_schema)->Comment().has_value());
std::vector<std::string> field_names = (*table_schema)->FieldNames();
std::vector<std::string> expected_field_names = {"f0", "f1", "f2", "f3"};
ASSERT_EQ(field_names, expected_field_names);

ASSERT_OK_AND_ASSIGN(auto arrow_schema, (*table_schema)->GetArrowSchema());
auto loaded_schema = arrow::ImportSchema(arrow_schema.get()).ValueOrDie();
ASSERT_TRUE(typed_schema.Equals(loaded_schema));

ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {}));
ASSERT_OK(fs->Delete(PathUtil::JoinPath(dir->Str(), "db1.db/tbl1/schema/schema-0")));
ASSERT_OK_AND_ASSIGN(table_schema, catalog.LoadTableSchema(Identifier("db1", "tbl1")));
ASSERT_FALSE(table_schema.has_value());

ASSERT_NOK_WITH_MSG(catalog.LoadTableSchema(Identifier("db1", "tbl$11")),
"do not support loading schema for system table.");
ArrowSchemaRelease(&schema);
}

} // namespace paimon::test
67 changes: 67 additions & 0 deletions src/paimon/core/schema/schema_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright 2025-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <map>
#include <memory>
#include <string>
#include <vector>

#include "paimon/core/schema/table_schema.h"

namespace paimon {

class SchemaImpl : public Schema {
public:
explicit SchemaImpl(const std::shared_ptr<TableSchema>& table_schema)
: table_schema_(table_schema) {}
Result<std::unique_ptr<::ArrowSchema>> GetArrowSchema() const override {
return table_schema_->GetArrowSchema();
}
std::vector<std::string> FieldNames() const override {
return table_schema_->FieldNames();
}
int64_t Id() const override {
return table_schema_->Id();
}
const std::vector<std::string>& PrimaryKeys() const override {
return table_schema_->PrimaryKeys();
}
const std::vector<std::string>& PartitionKeys() const override {
return table_schema_->PartitionKeys();
}
const std::vector<std::string>& BucketKeys() const override {
return table_schema_->BucketKeys();
}
int32_t NumBuckets() const override {
return table_schema_->NumBuckets();
}
int32_t HighestFieldId() const override {
return table_schema_->HighestFieldId();
}
const std::map<std::string, std::string>& Options() const override {
return table_schema_->Options();
}
std::optional<std::string> Comment() const override {
return table_schema_->Comment();
}

private:
std::shared_ptr<TableSchema> table_schema_;
};

} // namespace paimon
8 changes: 8 additions & 0 deletions src/paimon/core/schema/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <utility>

#include "arrow/api.h"
#include "arrow/c/bridge.h"
#include "arrow/util/checked_cast.h"
#include "fmt/format.h"
#include "paimon/common/utils/arrow/status_utils.h"
Expand Down Expand Up @@ -333,4 +334,11 @@ bool TableSchema::CrossPartitionUpdate() const {
return !ObjectUtils::ContainsAll(primary_keys_, partition_keys_);
}

Result<std::unique_ptr<::ArrowSchema>> TableSchema::GetArrowSchema() const {
std::shared_ptr<arrow::Schema> schema = DataField::ConvertDataFieldsToArrowSchema(fields_);
auto arrow_schema = std::make_unique<::ArrowSchema>();
PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, arrow_schema.get()));
return arrow_schema;
}

} // namespace paimon
4 changes: 3 additions & 1 deletion src/paimon/core/schema/table_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,14 @@ class TableSchema : public Jsonizable<TableSchema> {

Result<std::vector<std::string>> TrimmedPrimaryKeys() const;

std::optional<std::string> Commit() const {
std::optional<std::string> Comment() const {
return comment_;
}

bool CrossPartitionUpdate() const;

Result<std::unique_ptr<::ArrowSchema>> GetArrowSchema() const;

private:
JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(TableSchema);

Expand Down
Loading