Skip to content

Commit

Permalink
feat: make lmem assignment stage more analyzable
Browse files Browse the repository at this point in the history
- define some commonly used LOG macro (Logger.h)
- define some strinify function to show lmem type and timestep mode
  (LayerGroupDefs.h)
- add show_timestep_table to print readable timestep table
  (BasicTimeStep.h/BasicTimeStep.cpp)
- add many DEBUG_WITH_TYPE logs and comments in lmem assignment stage
  (BasicTimeStep.cpp/LmemAllocator.cpp/TimeStepMethod.cpp/SwPipeline.cpp)
- rename some variables and function names for better represent the
  process(gen_all_mem_buffer_ts/tgt_min_address/...)
- reduce assignLmemAddr cyclomatic complexity.(LmemAllocator.cpp:989)

Change-Id: I31dadb9424be334da481f9dfbd45985ca89dc058
  • Loading branch information
sailist committed Jan 20, 2025
1 parent 362b8cd commit b6017e8
Show file tree
Hide file tree
Showing 13 changed files with 598 additions and 183 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class BasicTimeStep {
void add_tpu0_gdma0_ts_field(const TpuTsField &tpu_field,
const GdmaTsField &gdma_field);
void update_gdma0_ts_field(int64_t ts, const GdmaTsField &field);
void show_timestep_table();
std::vector<TimestepRow> &get_timestep_table() { return timestep_table_; }
size_t get_timestep_num() { return timestep_table_.size(); }

Expand Down Expand Up @@ -79,12 +80,12 @@ class BasicTimeStep {
}

TensorInfo &get_tensor_infos();

std::string get_tensor_mode_str(Value v);
// setter
void set_lmem_addr(const mem_buffer_key_t &buffer_key, int64_t lmem_addr);
void set_lmem_occupy(int64_t occupy) { lmem_occupy_ = occupy; }

void gen_all_mem_buffer();
void gen_all_mem_buffer_ts();
void update_all_mem_buffer_size(const LgInfo &lg_info);
void gen_hold_coeff();
bool is_tensor_hold_in_lmem(Value v);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ typedef struct mem_buffer_key {
}
return false;
}

std::string lmem_type_str() {
switch (type) {
case LMEM_WEIGHT:
return "LMEM_WEIGHT";
case LMEM_ACTIVATION:
return "LMEM_ACTIVATION";
case LMEM_OPERATION:
return "LMEM_OPERATION";
case LMEM_ANY:
return "LMEM_ANY";
}
return "LMEM_UNKNOWN";
}
} mem_buffer_key_t;

typedef struct mem_buffer_value {
Expand Down Expand Up @@ -155,6 +169,22 @@ struct tensor_info_t {
void add_slice_info(Operation *next_op, slice_info_t slice_info) {
slice_infos[next_op] = slice_info;
}

const std::string mode_str() const {
switch (mode) {
case TIMESTEP_LOAD:
return "TIMESTEP_LOAD";
case TIMESTEP_STORE:
return "TIMESTEP_STORE";
case TIMESTEP_MOVE:
return "TIMESTEP_MOVE";
case TIMESTEP_LD_G2L2:
return "TIMESTEP_LD_G2L2";
case TIMESTEP_LDST_UNKNOWN:
return "TIMESTEP_LDST_UNKNOWN";
}
return "TIMESTEP_UNKNOWN";
}
};

using ValueSet = std::set<Value, value_compare>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class BasicTimeStep {
void show_timestep();
void clear();

void gen_all_mem_buffer();
void gen_all_mem_buffer_ts();

protected:
LgOptions options_;
Expand Down
23 changes: 12 additions & 11 deletions include/tpu_mlir/Support/Logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,27 @@ inline std::string formatString(const char *format, ...) {

inline void SetLogFlag(int32_t log_level) { cur_log_level = log_level; }

#define LOG_KV(key, value) "; " << key << " = " << value

#define LOG_ITEM(key) "; " << key

#define LOG_ACTION(action) "; action = " << action

#define LOG_STEP(step) "; step = " << step

#define PROFILE_LOG(step, begin) \
do { \
DEBUG_WITH_TYPE("profile", { \
auto current_time = std::chrono::high_resolution_clock::now(); \
auto time_string = std::chrono::system_clock::to_time_t(current_time); \
if (begin) { \
llvm::dbgs() << "; action = profile" \
<< "; step = " << step \
<< "; begin = " << std::ctime(&time_string) << "\n"; \
llvm::dbgs() << LOG_ACTION("profile") << LOG_STEP(step) \
<< LOG_KV("begin", std::ctime(&time_string)) << "\n"; \
} else { \
llvm::dbgs() << "; action = profile" \
<< "; step = " << step \
<< "; end = " << std::ctime(&time_string) << "\n"; \
llvm::dbgs() << LOG_ACTION("profile") << LOG_STEP(step) \
<< LOG_KV("end", std::ctime(&time_string)) << "\n"; \
} \
}); \
} while (0)

#define DEBUG_KV(key, value) \
do { \
llvm::dbgs() << "; " << key << " = " << value << "\n"; \
} while (0)

} // namespace tpu_mlir
1 change: 1 addition & 0 deletions include/tpu_mlir/Support/Module.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ bool IsHdimIsBatch(Value value);
bool isOpInCoreMatch(Operation *Op);
bool isOpInCoreParallel(Operation *Op);
bool isOpInGroupParallel(Operation *Op);
bool isValueBlockArgument(Value v);
bool isOpInDevParallel(Operation *Op);
bool isOpInBlock(Operation *op);
FuncOp getFuncOp(ModuleOp module, StringRef func_name);
Expand Down
173 changes: 169 additions & 4 deletions lib/Dialect/Tpu/Transforms/LayerGroup/BasicTimeStep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,38 @@ namespace tpu {
using namespace tpu_mlir::tpu;
using namespace tpu_mlir::backend;

static inline void stream_tpu_field(const TpuTsField &field) {
llvm::dbgs() << " [ ";
for (int i = 0; i < field.size(); ++i) {
if (i > 0)
llvm::dbgs() << ", ";
llvm::dbgs() << "C(\"" << field[i]->getName() << "\"), \""
<< module::getName(field[i]) << "\"";
}
llvm::dbgs() << " ]";
}

static inline void stream_gdma_field(const GdmaTsField &field) {
llvm::dbgs() << " [ ";
for (int i = 0; i < field.size(); ++i) {
auto mode = field[i].second.mode;
auto modestr = "L";
if (mode == TIMESTEP_STORE) {
modestr = "S";
}

if (i > 0)
llvm::dbgs() << ", ";
std::string op_type =
module::isValueBlockArgument(field[i].first)
? "block_arg"
: field[i].first.getDefiningOp()->getName().getStringRef().str();
llvm::dbgs() << modestr << "(\"" << module::getName(field[i].first)
<< "\")->" << op_type;
}
llvm::dbgs() << " ]";
}

BasicTimeStep::BasicTimeStep() {
// options_ = options;
swpipl_ = std::make_shared<SoftwarePipeline>();
Expand Down Expand Up @@ -50,12 +82,29 @@ void BasicTimeStep::add_tpu0_ts_field(const TpuTsField &field) {
TimestepRow row;
row.tpu0_ts_field = field;
timestep_table_.push_back(row);
DEBUG_WITH_TYPE("timestep_assign", {
llvm::dbgs() << "; action = add_tpu0_ts_field"
<< "; ts = " << timestep_table_.size() - 1;

stream_tpu_field(field);

llvm::dbgs() << "\n";
});
}

void BasicTimeStep::add_gdma0_ts_field(const GdmaTsField &field) {
TimestepRow row;
row.gdma0_ts_field = field;
timestep_table_.push_back(row);

DEBUG_WITH_TYPE("timestep_assign", {
llvm::dbgs() << "; action = add_gdma0_ts_field"
<< "; ts = " << timestep_table_.size() - 1;

stream_gdma_field(field);

llvm::dbgs() << "\n";
});
}

void BasicTimeStep::add_tpu0_gdma0_ts_field(const TpuTsField &tpu_field,
Expand All @@ -64,12 +113,43 @@ void BasicTimeStep::add_tpu0_gdma0_ts_field(const TpuTsField &tpu_field,
row.tpu0_ts_field = tpu_field;
row.gdma0_ts_field = gdma_field;
timestep_table_.push_back(row);

DEBUG_WITH_TYPE("timestep_assign", {
llvm::dbgs() << "; action = add_tpu0_gdma0_ts_field"
<< "; ts = " << timestep_table_.size() - 1;

stream_tpu_field(tpu_field);
stream_gdma_field(gdma_field);

llvm::dbgs() << "\n";
});
}

void BasicTimeStep::update_gdma0_ts_field(int64_t ts,
const GdmaTsField &field) {
this->timestep_table_[ts].gdma0_ts_field.clear();
this->timestep_table_[ts].gdma0_ts_field = field;

DEBUG_WITH_TYPE("timestep_assign", {
llvm::dbgs() << "; action = update_gdma0_ts_field"
<< "; ts = " << ts;

stream_gdma_field(field);

llvm::dbgs() << "\n";
});
}

void BasicTimeStep::show_timestep_table() {
DEBUG_WITH_TYPE("timestep_assign", {
for (int i = 0; i < timestep_table_.size(); ++i) {
llvm::dbgs() << "; ts = " << i << "; ";
stream_tpu_field(timestep_table_[i].tpu0_ts_field);
llvm::dbgs() << " || ";
stream_gdma_field(timestep_table_[i].gdma0_ts_field);
llvm::dbgs() << "\n";
}
});
}

int64_t BasicTimeStep::get_layer_swpipl_stage(Operation *op) {
Expand Down Expand Up @@ -208,7 +288,7 @@ void BasicTimeStep::gen_hold_coeff() {
// }
// }

void BasicTimeStep::gen_all_mem_buffer() {
void BasicTimeStep::gen_all_mem_buffer_ts() {
// input: need_imm_buffers
lmem_buffer_.clear();
l2mem_buffer_.clear();
Expand All @@ -219,10 +299,24 @@ void BasicTimeStep::gen_all_mem_buffer() {

for (int64_t stg = 0; stg < this->swpipl_stage_num_; ++stg) {
// add for software pipeline
// swpipl_stage_num_ always be 3 after software pipeline
bool layer_timestep_valid =
(swpipl_stage_num_ == 1) || (swpipl_stage_num_ > 1 && stg == 1);
DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs() << "; action = lmem_buffer_assign"
<< "; step = "
<< "process_current_stage"
<< "; stg = " << stg
<< "; swpipl_stage_num_ = " << swpipl_stage_num_ << "\n";
});
for (size_t ts = 0; ts < get_timestep_num(); ++ts) {
// process current timestep layers
DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs() << "; action = lmem_buffer_assign"
<< "; step = "
<< "process_current_timestep_layers"
<< "; ts = " << ts << "\n";
});
const TpuTsField &cur_tpu_field = timestep_table_[ts].tpu0_ts_field;
if (layer_timestep_valid) {
for (auto op : cur_tpu_field) {
Expand All @@ -235,6 +329,16 @@ void BasicTimeStep::gen_all_mem_buffer() {
lmem_value.start_ts = ts;
lmem_value.end_ts = -1;

DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs() << "; action = lmem_buffer_assign"
<< "; step = "
<< "initial_results_buffer"
<< "; lmem_key = " << module::getName(lmem_key.value)
<< "; lmem_type = " << lmem_key.lmem_type_str()
<< "; lmem_start_ts = " << lmem_value.start_ts
<< "; lmem_end_ts = " << lmem_value.end_ts << "\n";
});

lmem_buffer_[lmem_key] = lmem_value;
}

Expand All @@ -250,11 +354,36 @@ void BasicTimeStep::gen_all_mem_buffer() {
}
lmem_key.value = in;

// lmem_buffer_[lmem_key].end_ts = ts;
if (lmem_buffer_.find(lmem_key) != lmem_buffer_.end()) {
lmem_buffer_[lmem_key].end_ts = ts;
DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs()
<< "; action = lmem_buffer_assign"
<< "; step = "
<< "update_operands_lmem_buffer"
<< "; lmem_key = " << module::getName(lmem_key.value)
<< "; lmem_type = " << lmem_key.lmem_type_str()
<< "; lmem_start_ts = " << lmem_buffer_[lmem_key].start_ts
<< "; timestep_mode = "
<< get_tensor_mode_str(lmem_key.value)
<< "; lmem_end_ts = " << lmem_buffer_[lmem_key].end_ts
<< "\n";
});
} else {
l2mem_buffer_[lmem_key].end_ts = ts;
DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs()
<< "; action = lmem_buffer_assign"
<< "; step = "
<< "update_operands_l2mem_buffer"
<< "; lmem_key = " << module::getName(lmem_key.value)
<< "; lmem_type = " << lmem_key.lmem_type_str()
<< "; lmem_start_ts = " << l2mem_buffer_[lmem_key].start_ts
<< "; timestep_mode = "
<< get_tensor_mode_str(lmem_key.value)
<< "; lmem_end_ts = " << l2mem_buffer_[lmem_key].end_ts
<< "\n";
});
}
}

Expand All @@ -264,7 +393,17 @@ void BasicTimeStep::gen_all_mem_buffer() {

lmem_value.start_ts = ts;
lmem_value.end_ts = ts;

DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs() << "; action = lmem_buffer_assign"
<< "; step = "
<< "update_imm_buffer"
<< "; lmem_key = " << module::getName(lmem_key.value)
<< "; lmem_type = " << lmem_key.lmem_type_str()
<< "; lmem_start_ts = " << lmem_value.start_ts
<< "; timestep_mode = "
<< get_tensor_mode_str(lmem_key.value)
<< "; lmem_end_ts = " << lmem_value.end_ts << "\n";
});
lmem_buffer_[lmem_key] = lmem_value;
} // cur_tpu_field
}
Expand Down Expand Up @@ -294,12 +433,34 @@ void BasicTimeStep::gen_all_mem_buffer() {
l2mem_buffer_[lmem_key] = lmem_value;
} else {
lmem_buffer_[lmem_key] = lmem_value;
DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs() << "; action = lmem_buffer_assign"
<< "; step = "
<< "update_load_buffer"
<< "; lmem_key = " << module::getName(lmem_key.value)
<< "; lmem_type = " << lmem_key.lmem_type_str()
<< "; timestep_mode = " << tensor_info.mode_str()
<< "; lmem_start_ts = " << lmem_value.start_ts
<< "; lmem_end_ts = " << lmem_value.end_ts << "\n";
});
}
} else if (tensor_info.mode == TIMESTEP_STORE) {
lmem_key.value = tensor.first;
lmem_key.type = LMEM_ACTIVATION;

lmem_buffer_[lmem_key].end_ts = ts;
DEBUG_WITH_TYPE("lmem_buffer_assign", {
llvm::dbgs() << "; action = lmem_buffer_assign"
<< "; step = "
<< "update_store_buffer"
<< "; lmem_key = " << module::getName(lmem_key.value)
<< "; lmem_type = " << lmem_key.lmem_type_str()
<< "; timestep_mode = " << tensor_info.mode_str()
<< "; lmem_start_ts = "
<< lmem_buffer_[lmem_key].start_ts
<< "; lmem_end_ts = " << lmem_buffer_[lmem_key].end_ts
<< "\n";
});
}
}
}
Expand All @@ -308,7 +469,7 @@ void BasicTimeStep::gen_all_mem_buffer() {

void BasicTimeStep::update_all_mem_buffer_size(const LgInfo &lg_info) {
if (lmem_buffer_.empty()) {
gen_all_mem_buffer();
gen_all_mem_buffer_ts();
}
auto &tensor_infos = tensor_infos_;

Expand Down Expand Up @@ -424,6 +585,10 @@ bool BasicTimeStep::is_tensor_hold_in_lmem(Value v) {

TensorInfo &BasicTimeStep::get_tensor_infos() { return tensor_infos_; }

std::string BasicTimeStep::get_tensor_mode_str(Value v) {
return tensor_infos_[v].mode_str();
}

typedef struct {
Value value;
int64_t addr;
Expand Down
Loading

0 comments on commit b6017e8

Please sign in to comment.