diff --git a/lib/Conversion/TopToTpu/BM1684X/MatMul.cpp b/lib/Conversion/TopToTpu/BM1684X/MatMul.cpp index 3c2be5052..c803a7ebd 100644 --- a/lib/Conversion/TopToTpu/BM1684X/MatMul.cpp +++ b/lib/Conversion/TopToTpu/BM1684X/MatMul.cpp @@ -29,6 +29,7 @@ void MatMulLowering::LoweringINT8(PatternRewriter &rewriter, top::MatMulOp op, if (bias_size > p.N) llvm_unreachable("BatchMatMul does not support batch-bias yet."); } + int64_t left_num_dims = module::getShape(op.getInput()).size(); if (auto filterOp = dyn_cast(op.getRight().getDefiningOp())) { auto filter_f32 = filterOp.read(); int64_t in_zp = 0, out_zp = 0; @@ -89,7 +90,8 @@ void MatMulLowering::LoweringINT8(PatternRewriter &rewriter, top::MatMulOp op, operands.push_back(new_filter); auto new_bias = op.getBias(); if (with_bias) { - std::vector shape = {p.N}; + std::vector shape(left_num_dims, 1); + shape[left_num_dims - 1]= p.N; auto new_type = RankedTensorType::get(shape, rewriter.getI32Type()); new_bias = top::WeightOp::create(op, "bias_int32", *bias_int32, new_type); operands.push_back(new_bias); @@ -119,7 +121,9 @@ void MatMulLowering::LoweringINT8(PatternRewriter &rewriter, top::MatMulOp op, bias_int32->data()[j] = std::round(bias_fp32->at(j) / (w_scale * in_scale)); } - auto new_type = RankedTensorType::get({bias_n}, rewriter.getI32Type()); + std::vector shape(left_num_dims, 1); + shape[left_num_dims - 1]= bias_n; + auto new_type = RankedTensorType::get(shape, rewriter.getI32Type()); auto new_bias = top::WeightOp::create(op, "bias_int32", *bias_int32, new_type); operands[2] = new_bias; @@ -161,6 +165,7 @@ void MatMulLowering::LoweringINT4(PatternRewriter &rewriter, top::MatMulOp op, int64_t in_zp = 0, out_zp = 0; double in_scale = 1, out_scale = 1, w_scale = 1; + int64_t left_num_dims = module::getShape(op.getInput()).size(); if (auto filterOp = dyn_cast(op.getRight().getDefiningOp())) { auto filter_f32 = filterOp.read(); int bitwidth = 4; @@ -283,7 +288,8 @@ void MatMulLowering::LoweringINT4(PatternRewriter &rewriter, top::MatMulOp op, operands.push_back(new_filter); auto new_bias = op.getBias(); if (with_bias) { - std::vector shape = {p.N}; + std::vector shape(left_num_dims, 1); + shape[left_num_dims - 1]= p.N; auto new_type = RankedTensorType::get(shape, rewriter.getI32Type()); new_bias = top::WeightOp::create(op, "bias_int32", *bias_int32, new_type); operands.push_back(new_bias); @@ -313,7 +319,9 @@ void MatMulLowering::LoweringINT4(PatternRewriter &rewriter, top::MatMulOp op, bias_int32->data()[j] = std::round(bias_fp32->at(j) / (w_scale * in_scale)); } - auto new_type = RankedTensorType::get({bias_n}, rewriter.getI32Type()); + std::vector shape(left_num_dims, 1); + shape[left_num_dims - 1]= bias_n; + auto new_type = RankedTensorType::get(shape, rewriter.getI32Type()); auto new_bias = top::WeightOp::create(op, "bias_int32", *bias_int32, new_type); operands[2] = new_bias; @@ -391,6 +399,7 @@ void MatMulLowering::LoweringQuantized(PatternRewriter &rewriter, auto input_qtype = module::getUniformQuantizedType(op.getInput()); auto right_qtype = module::getUniformQuantizedType(op.getRight()); auto output_qtype = module::getUniformQuantizedType(op.getOutput()); + int64_t left_num_dims = module::getShape(op.getInput()).size(); const double real_multiplier = input_qtype.getScale() * right_qtype.getScale() / output_qtype.getScale(); @@ -445,7 +454,9 @@ void MatMulLowering::LoweringQuantized(PatternRewriter &rewriter, } else { bias_quant = i32_array_t(new std::vector(col_size, 0)); } - auto bias_type = RankedTensorType::get({col_size}, rewriter.getI32Type()); + std::vector shape(left_num_dims, 1); + shape[left_num_dims - 1]= col_size; + auto bias_type = RankedTensorType::get(shape, rewriter.getI32Type()); if (can_merge_izp) { // attrs.push_back(rewriter.getNamedAttr( diff --git a/lib/Dialect/Tpu/Interfaces/BM1684X/MatMul.cpp b/lib/Dialect/Tpu/Interfaces/BM1684X/MatMul.cpp index b040455e8..f27fb936c 100644 --- a/lib/Dialect/Tpu/Interfaces/BM1684X/MatMul.cpp +++ b/lib/Dialect/Tpu/Interfaces/BM1684X/MatMul.cpp @@ -60,8 +60,10 @@ LogicalResult WeightReorder::matchAndRewrite( bias_quant->data()[i] += p.input_zp * p.right_zp * p.K; } auto stype = module::getStorageType(op.getBias()); - // std::vector bias_shape = {N}; - auto new_type = RankedTensorType::get({p.N}, rewriter.getI32Type()); + int64_t left_num_dims = module::getShape(op.getInput()).size(); + std::vector bias_shape(left_num_dims, 1); + bias_shape[left_num_dims - 1] = p.N; + auto new_type = RankedTensorType::get(bias_shape, rewriter.getI32Type()); auto new_op = top::WeightOp::create(op, "bias_merge_izp", *bias_quant, new_type); op->setOperand(2, new_op); @@ -258,8 +260,7 @@ void tpu::MatMulOp::codegen_local_bm1684x(int64_t n_step, int64_t h_step, common.L_trans = getLeftTranspose(); common.R_trans = p.right_transpose; common.has_bias = p.with_bias; - common.hdim_is_batch = - false; // group_type == GROUP_SMALL_C ? true : getHdimIsBatch(); + common.hdim_is_batch = false; common.requant_mode = -1; if (module::isUniformQuantized(getInput())) { common.R_zp_is_const = true; diff --git a/lib/Dialect/Tpu/Interfaces/Common/MatMul.cpp b/lib/Dialect/Tpu/Interfaces/Common/MatMul.cpp index 8416372ab..a16a356f3 100644 --- a/lib/Dialect/Tpu/Interfaces/Common/MatMul.cpp +++ b/lib/Dialect/Tpu/Interfaces/Common/MatMul.cpp @@ -166,13 +166,20 @@ LogicalResult tpu::MatMulOp::LocalGenSupport() { return failure(); } + auto Lshape = module::getShape(ins[0]); + auto Rshape = module::getShape(ins[1]); int left_num_dims = module::getShape(ins[0]).size(); int right_num_dims = module::getShape(ins[1]).size(); - if (left_num_dims == 5 && right_num_dims == 2) { + if (((left_num_dims == 4 && Lshape[1] < Lshape[2]) || + (left_num_dims == 5 && Lshape[1] < Lshape[3])) && + right_num_dims == 2) { + // GROUP_SMALL_C return success(); - } else if (left_num_dims == 3 && right_num_dims == 2) { + } else if (left_num_dims == 3 && right_num_dims == 3) { + // (1, M, K) x (1, K, N) return success(); } else if (left_num_dims == 4 && right_num_dims == 4 && getHdimIsBatch()) { + // (B1, M, B2, K) x (B1, K, B2, N) return success(); } return failure(); diff --git a/python/transform/TFLiteConverter.py b/python/transform/TFLiteConverter.py index cd59bbdfc..29177c526 100644 --- a/python/transform/TFLiteConverter.py +++ b/python/transform/TFLiteConverter.py @@ -680,6 +680,12 @@ def fully_connected_op(self, op): "do_relu": BoolAttr.get(fused_active == 1), # "right_transpose": BoolAttr.get(True), } + if op.inputs[2] is not None: + bias_shape = [1] * len(op.inputs[0].shape) + bias_shape[-1] = op.inputs[2].shape[0] + op.inputs[2].shape = tuple(bias_shape) + op.inputs[2].buffer.shape = tuple(bias_shape) + if op.inputs[1].buffer is not None: f, c = op.inputs[1].shape op.inputs[1].shape = (c, f)