Skip to content

Commit

Permalink
modify matmul bias_shape for local layer
Browse files Browse the repository at this point in the history
Change-Id: Ic5611b7f2770fd57da4b93d403da2f1906ab5eb2
  • Loading branch information
Boatin committed Mar 2, 2023
1 parent 1220869 commit d670ada
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 11 deletions.
21 changes: 16 additions & 5 deletions lib/Conversion/TopToTpu/BM1684X/MatMul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void MatMulLowering::LoweringINT8(PatternRewriter &rewriter, top::MatMulOp op,
if (bias_size > p.N)
llvm_unreachable("BatchMatMul does not support batch-bias yet.");
}
int64_t left_num_dims = module::getShape(op.getInput()).size();
if (auto filterOp = dyn_cast<top::WeightOp>(op.getRight().getDefiningOp())) {
auto filter_f32 = filterOp.read<float>();
int64_t in_zp = 0, out_zp = 0;
Expand Down Expand Up @@ -89,7 +90,8 @@ void MatMulLowering::LoweringINT8(PatternRewriter &rewriter, top::MatMulOp op,
operands.push_back(new_filter);
auto new_bias = op.getBias();
if (with_bias) {
std::vector<int64_t> shape = {p.N};
std::vector<int64_t> shape(left_num_dims, 1);
shape[left_num_dims - 1]= p.N;
auto new_type = RankedTensorType::get(shape, rewriter.getI32Type());
new_bias = top::WeightOp::create(op, "bias_int32", *bias_int32, new_type);
operands.push_back(new_bias);
Expand Down Expand Up @@ -119,7 +121,9 @@ void MatMulLowering::LoweringINT8(PatternRewriter &rewriter, top::MatMulOp op,
bias_int32->data()[j] =
std::round(bias_fp32->at(j) / (w_scale * in_scale));
}
auto new_type = RankedTensorType::get({bias_n}, rewriter.getI32Type());
std::vector<int64_t> shape(left_num_dims, 1);
shape[left_num_dims - 1]= bias_n;
auto new_type = RankedTensorType::get(shape, rewriter.getI32Type());
auto new_bias =
top::WeightOp::create(op, "bias_int32", *bias_int32, new_type);
operands[2] = new_bias;
Expand Down Expand Up @@ -161,6 +165,7 @@ void MatMulLowering::LoweringINT4(PatternRewriter &rewriter, top::MatMulOp op,
int64_t in_zp = 0, out_zp = 0;
double in_scale = 1, out_scale = 1, w_scale = 1;

int64_t left_num_dims = module::getShape(op.getInput()).size();
if (auto filterOp = dyn_cast<top::WeightOp>(op.getRight().getDefiningOp())) {
auto filter_f32 = filterOp.read<float>();
int bitwidth = 4;
Expand Down Expand Up @@ -283,7 +288,8 @@ void MatMulLowering::LoweringINT4(PatternRewriter &rewriter, top::MatMulOp op,
operands.push_back(new_filter);
auto new_bias = op.getBias();
if (with_bias) {
std::vector<int64_t> shape = {p.N};
std::vector<int64_t> shape(left_num_dims, 1);
shape[left_num_dims - 1]= p.N;
auto new_type = RankedTensorType::get(shape, rewriter.getI32Type());
new_bias = top::WeightOp::create(op, "bias_int32", *bias_int32, new_type);
operands.push_back(new_bias);
Expand Down Expand Up @@ -313,7 +319,9 @@ void MatMulLowering::LoweringINT4(PatternRewriter &rewriter, top::MatMulOp op,
bias_int32->data()[j] =
std::round(bias_fp32->at(j) / (w_scale * in_scale));
}
auto new_type = RankedTensorType::get({bias_n}, rewriter.getI32Type());
std::vector<int64_t> shape(left_num_dims, 1);
shape[left_num_dims - 1]= bias_n;
auto new_type = RankedTensorType::get(shape, rewriter.getI32Type());
auto new_bias =
top::WeightOp::create(op, "bias_int32", *bias_int32, new_type);
operands[2] = new_bias;
Expand Down Expand Up @@ -391,6 +399,7 @@ void MatMulLowering::LoweringQuantized(PatternRewriter &rewriter,
auto input_qtype = module::getUniformQuantizedType(op.getInput());
auto right_qtype = module::getUniformQuantizedType(op.getRight());
auto output_qtype = module::getUniformQuantizedType(op.getOutput());
int64_t left_num_dims = module::getShape(op.getInput()).size();

const double real_multiplier =
input_qtype.getScale() * right_qtype.getScale() / output_qtype.getScale();
Expand Down Expand Up @@ -445,7 +454,9 @@ void MatMulLowering::LoweringQuantized(PatternRewriter &rewriter,
} else {
bias_quant = i32_array_t(new std::vector<int32_t>(col_size, 0));
}
auto bias_type = RankedTensorType::get({col_size}, rewriter.getI32Type());
std::vector<int64_t> shape(left_num_dims, 1);
shape[left_num_dims - 1]= col_size;
auto bias_type = RankedTensorType::get(shape, rewriter.getI32Type());

if (can_merge_izp) {
// attrs.push_back(rewriter.getNamedAttr(
Expand Down
9 changes: 5 additions & 4 deletions lib/Dialect/Tpu/Interfaces/BM1684X/MatMul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ LogicalResult WeightReorder<tpu::MatMulOp, int8_t>::matchAndRewrite(
bias_quant->data()[i] += p.input_zp * p.right_zp * p.K;
}
auto stype = module::getStorageType(op.getBias());
// std::vector<int64_t> bias_shape = {N};
auto new_type = RankedTensorType::get({p.N}, rewriter.getI32Type());
int64_t left_num_dims = module::getShape(op.getInput()).size();
std::vector<int64_t> bias_shape(left_num_dims, 1);
bias_shape[left_num_dims - 1] = p.N;
auto new_type = RankedTensorType::get(bias_shape, rewriter.getI32Type());
auto new_op =
top::WeightOp::create(op, "bias_merge_izp", *bias_quant, new_type);
op->setOperand(2, new_op);
Expand Down Expand Up @@ -258,8 +260,7 @@ void tpu::MatMulOp::codegen_local_bm1684x(int64_t n_step, int64_t h_step,
common.L_trans = getLeftTranspose();
common.R_trans = p.right_transpose;
common.has_bias = p.with_bias;
common.hdim_is_batch =
false; // group_type == GROUP_SMALL_C ? true : getHdimIsBatch();
common.hdim_is_batch = false;
common.requant_mode = -1;
if (module::isUniformQuantized(getInput())) {
common.R_zp_is_const = true;
Expand Down
11 changes: 9 additions & 2 deletions lib/Dialect/Tpu/Interfaces/Common/MatMul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,20 @@ LogicalResult tpu::MatMulOp::LocalGenSupport() {
return failure();
}

auto Lshape = module::getShape(ins[0]);
auto Rshape = module::getShape(ins[1]);
int left_num_dims = module::getShape(ins[0]).size();
int right_num_dims = module::getShape(ins[1]).size();
if (left_num_dims == 5 && right_num_dims == 2) {
if (((left_num_dims == 4 && Lshape[1] < Lshape[2]) ||
(left_num_dims == 5 && Lshape[1] < Lshape[3])) &&
right_num_dims == 2) {
// GROUP_SMALL_C
return success();
} else if (left_num_dims == 3 && right_num_dims == 2) {
} else if (left_num_dims == 3 && right_num_dims == 3) {
// (1, M, K) x (1, K, N)
return success();
} else if (left_num_dims == 4 && right_num_dims == 4 && getHdimIsBatch()) {
// (B1, M, B2, K) x (B1, K, B2, N)
return success();
}
return failure();
Expand Down
6 changes: 6 additions & 0 deletions python/transform/TFLiteConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,12 @@ def fully_connected_op(self, op):
"do_relu": BoolAttr.get(fused_active == 1),
# "right_transpose": BoolAttr.get(True),
}
if op.inputs[2] is not None:
bias_shape = [1] * len(op.inputs[0].shape)
bias_shape[-1] = op.inputs[2].shape[0]
op.inputs[2].shape = tuple(bias_shape)
op.inputs[2].buffer.shape = tuple(bias_shape)

if op.inputs[1].buffer is not None:
f, c = op.inputs[1].shape
op.inputs[1].shape = (c, f)
Expand Down

0 comments on commit d670ada

Please sign in to comment.