Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storing system descriptor #265

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions inc/common/pjrt_implementation/client_instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ class ClientInstance {

std::unique_ptr<ModuleBuilder> module_builder_;

// System descriptor (that TTIR to TTNN backend pipeline needs).
tt::runtime::SystemDesc system_descriptor_;

// TODO: Remove once tt-mlir supports passing the system descriptor object to
// TTIR to TTNN backend pipeline.
std::string cached_system_descriptor_path_;

// Synchronization.
// We keep one global execution timeline across all devices. The management
// of this is currently somewhat primitive: we increment it by one for each
Expand Down
12 changes: 8 additions & 4 deletions src/common/module_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ ModuleBuilder::ModuleBuilder()
m_context->appendDialectRegistry(registry);
}

tt_pjrt_status ModuleBuilder::buildModule(const std::string_view &code,
const std::string_view &format) {
tt_pjrt_status
ModuleBuilder::buildModule(const std::string_view &code,
const std::string_view &format,
const std::string &system_descriptor_path) {
DLOG_F(LOG_DEBUG, "ModuleBuilder::buildModule");

m_status = tt_pjrt_status::kSuccess;
Expand All @@ -89,7 +91,7 @@ tt_pjrt_status ModuleBuilder::buildModule(const std::string_view &code,
return m_status;
}

convertFromTTIRToTTNN(mlir_module);
convertFromTTIRToTTNN(mlir_module, system_descriptor_path);
if (!tt_pjrt_status_is_ok(m_status)) {
return m_status;
}
Expand Down Expand Up @@ -193,10 +195,12 @@ void ModuleBuilder::convertFromSHLOToTTIR(
}

void ModuleBuilder::convertFromTTIRToTTNN(
mlir::OwningOpRef<mlir::ModuleOp> &mlir_module) {
mlir::OwningOpRef<mlir::ModuleOp> &mlir_module,
const std::string &system_descriptor_path) {
Comment on lines +198 to +199
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Input-only arguments should be ordered before the output or in-out arguments, see more here: https://google.github.io/styleguide/cppguide.html#Inputs_and_Outputs

mlir::PassManager ttir_to_ttnn_pm(mlir_module.get()->getName());

mlir::tt::ttnn::TTIRToTTNNBackendPipelineOptions options;
options.systemDescPath = system_descriptor_path.data();
mlir::tt::ttnn::createTTIRToTTNNBackendPipeline(ttir_to_ttnn_pm, options);

// Run the pass manager.
Expand Down
6 changes: 4 additions & 2 deletions src/common/module_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ class ModuleBuilder {
ModuleBuilder();

tt_pjrt_status buildModule(const std::string_view &code,
const std::string_view &format);
const std::string_view &format,
const std::string &system_descriptor_path);

const tt::runtime::Binary &getBinary() const { return m_flatbuffer_binary; }

Expand Down Expand Up @@ -56,7 +57,8 @@ class ModuleBuilder {
void convertFromSHLOToTTIR(mlir::OwningOpRef<mlir::ModuleOp> &mlir_module);

// Converts TTIR module to TTNN module.
void convertFromTTIRToTTNN(mlir::OwningOpRef<mlir::ModuleOp> &mlir_module);
void convertFromTTIRToTTNN(mlir::OwningOpRef<mlir::ModuleOp> &mlir_module,
const std::string &system_descriptor_path);

// Creates flatbuffer binary from the built TTNN module.
void
Expand Down
17 changes: 14 additions & 3 deletions src/common/pjrt_implementation/client_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@

#include "common/pjrt_implementation/client_instance.h"

#include <cstddef>
#include <filesystem>
#include <string>

#include "common/pjrt_implementation/utils.h"
#include "tt/runtime/types.h"

namespace tt::pjrt {

Expand All @@ -21,13 +24,17 @@ namespace tt::pjrt {
//===----------------------------------------------------------------------===//

ClientInstance::ClientInstance(std::unique_ptr<Platform> platform)
: platform_(std::move(platform)) {
: platform_(std::move(platform)), system_descriptor_(nullptr) {
DLOG_F(LOG_DEBUG, "ClientInstance::ClientInstance");
module_builder_ = std::make_unique<ModuleBuilder>();
cached_system_descriptor_path_ =
std::filesystem::temp_directory_path().concat(
"/tt_pjrt_system_descriptor");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a TODO comment here explaining that this name would need to be unique to avoid clashes between multiple clients, but since we plan soon to remove the need for storing the descriptor to the disk we are leaving it simple like this until then (or until it causes problems).

}

ClientInstance::~ClientInstance() {
DLOG_F(LOG_DEBUG, "ClientInstance::~ClientInstance");
std::remove(cached_system_descriptor_path_.data());
}

PJRT_Error *ClientInstance::Initialize() {
Expand Down Expand Up @@ -164,8 +171,11 @@ void ClientInstance::BindApi(PJRT_Api *api) {
tt_pjrt_status ClientInstance::PopulateDevices() {
DLOG_F(LOG_DEBUG, "ClientInstance::PopulateDevices");
auto [system_desc, chip_ids] = tt::runtime::getCurrentSystemDesc();
int devices_count = chip_ids.size();

system_descriptor_ = system_desc;
system_descriptor_.store(cached_system_descriptor_path_.data());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check after this if std::filesystem::exists(cached_system_descriptor_path_) and if it doesn't, do:

DLOG_F(ERROR, "Failed to store the system descriptor to the disk using path: %s",
    cached_system_descriptor_path_.c_str());
return tt_pjrt_status::kInternal;


int devices_count = chip_ids.size();
devices_.resize(devices_count);
for (size_t i = 0; i < devices_count; ++i) {
devices_[i] =
Expand All @@ -187,7 +197,8 @@ PJRT_Error *ClientInstance::Compile(const PJRT_Program *program,
std::string_view code(program->code, program->code_size);
std::string_view format(program->format, program->format_size);

tt_pjrt_status status = module_builder_->buildModule(code, format);
tt_pjrt_status status = module_builder_->buildModule(
code, format, cached_system_descriptor_path_);
if (!tt_pjrt_status_is_ok(status)) {
return ErrorInstance::MakeError(status);
}
Expand Down
Loading