Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lowering to linalg for AtenCol2ImOp #4012

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 268 additions & 0 deletions lib/Conversion/TorchToLinalg/DataMovement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
#include "torch-mlir/Dialect/Torch/Utils/TorchUpstream.h"
#include "torch-mlir/Dialect/Torch/Utils/Utils.h"
#include "torch-mlir/Dialect/TorchConversion/IR/TorchConversionOps.h"
#include "llvm/ADT/APInt.h"

#include <numeric>
Expand Down Expand Up @@ -2735,6 +2736,271 @@ SmallVector<StringRef> ConvertSparseOperatorOp::legalizedNames = {
"torch.aten.to_dense", "torch.aten.to_sparse", "torch.aten.to_csr",
"torch.aten.to_csc", "torch.aten.to_bsr", "torch.aten.to_bsc",
};

class ConvertAtenCol2ImOp : public OpConversionPattern<AtenCol2imOp> {
public:
using OpConversionPattern::OpConversionPattern;

// Rewriting method.
LogicalResult
matchAndRewrite(AtenCol2imOp col2imOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
// Retrieve the hyperparameters
Value input = col2imOp.getSelf();
if (!(col2imOp.getOutputSize().getDefiningOp() &&
isa<Torch::PrimListConstructOp>(
col2imOp.getOutputSize().getDefiningOp())))
return failure();

Torch::PrimListConstructOp outputSizes = cast<Torch::PrimListConstructOp>(
col2imOp.getOutputSize().getDefiningOp());
if (!(outputSizes.getNumOperands() == 2 &&
outputSizes->getOperand(0).getDefiningOp() &&
outputSizes->getOperand(1).getDefiningOp() &&
isa<Torch::ConstantIntOp>(
outputSizes->getOperand(0).getDefiningOp())))
return failure();
if (!isa<Torch::ConstantIntOp>(outputSizes->getOperand(1).getDefiningOp()))
return failure();
int height =
cast<Torch::ConstantIntOp>(outputSizes->getOperand(0).getDefiningOp())
.getValue();
int width =
cast<Torch::ConstantIntOp>(outputSizes->getOperand(1).getDefiningOp())
.getValue();
if (!(col2imOp.getPadding().getDefiningOp() &&
isa<Torch::PrimListConstructOp>(
col2imOp.getPadding().getDefiningOp())))
return failure();

Torch::PrimListConstructOp paddings =
cast<Torch::PrimListConstructOp>(col2imOp.getPadding().getDefiningOp());

if (!(paddings.getNumOperands() == 2 &&
paddings->getOperand(0).getDefiningOp() &&
paddings->getOperand(1).getDefiningOp() &&
isa<Torch::ConstantIntOp>(paddings->getOperand(0).getDefiningOp())))
return failure();

if (!isa<Torch::ConstantIntOp>(paddings->getOperand(1).getDefiningOp()))
return failure();
int horizontalPadding =
cast<Torch::ConstantIntOp>(paddings->getOperand(1).getDefiningOp())
.getValue();
int verticalPadding =
cast<Torch::ConstantIntOp>(paddings->getOperand(0).getDefiningOp())
.getValue();
int paddedWidth = width + 2 * horizontalPadding;
int paddedHeight = height + 2 * verticalPadding;
if (!(col2imOp.getKernelSize().getDefiningOp() &&
isa<Torch::PrimListConstructOp>(
col2imOp.getKernelSize().getDefiningOp())))
return failure();
Torch::PrimListConstructOp kerSizes = cast<Torch::PrimListConstructOp>(
col2imOp.getKernelSize().getDefiningOp());
if (!(kerSizes.getNumOperands() == 2 &&
kerSizes->getOperand(0).getDefiningOp() &&
kerSizes->getOperand(1).getDefiningOp() &&
isa<Torch::ConstantIntOp>(kerSizes->getOperand(0).getDefiningOp())))
return failure();
if (!isa<Torch::ConstantIntOp>(kerSizes->getOperand(1).getDefiningOp()))
return failure();
int kernelWidth =
cast<Torch::ConstantIntOp>(kerSizes->getOperand(0).getDefiningOp())
.getValue();
int kernelHeight =
cast<Torch::ConstantIntOp>(kerSizes->getOperand(1).getDefiningOp())
.getValue();
if (!(col2imOp.getDilation().getDefiningOp() &&
isa<Torch::PrimListConstructOp>(
col2imOp.getDilation().getDefiningOp())))
return failure();
Torch::PrimListConstructOp dilations = cast<Torch::PrimListConstructOp>(
col2imOp.getDilation().getDefiningOp());

if (!(dilations.getNumOperands() == 2 &&
dilations->getOperand(0).getDefiningOp() &&
dilations->getOperand(1).getDefiningOp() &&
isa<Torch::ConstantIntOp>(dilations->getOperand(0).getDefiningOp())))
return failure();
if (!isa<Torch::ConstantIntOp>(dilations->getOperand(1).getDefiningOp()))
return failure();
int verticalDilation =
cast<Torch::ConstantIntOp>(dilations->getOperand(0).getDefiningOp())
.getValue();
int horizontalDilation =
cast<Torch::ConstantIntOp>(dilations->getOperand(1).getDefiningOp())
.getValue();
if (!(col2imOp.getStride().getDefiningOp() &&
isa<Torch::PrimListConstructOp>(
col2imOp.getStride().getDefiningOp())))
return failure();
Torch::PrimListConstructOp strides =
cast<Torch::PrimListConstructOp>(col2imOp.getStride().getDefiningOp());

if (!(strides.getNumOperands() == 2 &&
strides->getOperand(0).getDefiningOp() &&
strides->getOperand(1).getDefiningOp() &&
isa<Torch::ConstantIntOp>(strides->getOperand(0).getDefiningOp())))
return failure();

if (!isa<Torch::ConstantIntOp>(strides->getOperand(1).getDefiningOp()))
return failure();

int verticalStride =
cast<Torch::ConstantIntOp>(strides->getOperand(0).getDefiningOp())
.getValue();
int horizontalStride =
cast<Torch::ConstantIntOp>(strides->getOperand(1).getDefiningOp())
.getValue();

// Create intermediate buffers
TensorType outputType =
cast<Torch::ValueTensorType>(col2imOp.getType()).toBuiltinTensor();
Type elementType = outputType.getElementType();
Value outputBuffer = rewriter.create<tensor::EmptyOp>(
col2imOp->getLoc(),
ArrayRef<int64_t>{outputType.getDimSize(0), outputType.getDimSize(1),
height, width},
elementType);
Value paddedOutput = rewriter.create<tensor::EmptyOp>(
col2imOp->getLoc(),
ArrayRef<int64_t>{outputType.getDimSize(0), outputType.getDimSize(1),
paddedHeight, paddedWidth},
elementType);
// Create the linalg loop interators
SmallVector<utils::IteratorType, 6> iteratorTypes(
6, utils::IteratorType::reduction);
iteratorTypes[0] = utils::IteratorType::parallel;
iteratorTypes[1] = utils::IteratorType::parallel;

SmallVector<AffineMap, 4> indexingMaps;
AffineExpr batch = rewriter.getAffineDimExpr(0);
AffineExpr chan = rewriter.getAffineDimExpr(1);
AffineExpr line = rewriter.getAffineDimExpr(2);
AffineExpr col = rewriter.getAffineDimExpr(3);
AffineExpr kerLineIndex = rewriter.getAffineDimExpr(4);
AffineExpr kerColIndex = rewriter.getAffineDimExpr(5);
indexingMaps.push_back(AffineMap::get(
6, 0,
ArrayRef<AffineExpr>{
batch,
kerLineIndex * kernelWidth + kerColIndex +
chan * kernelWidth * kernelHeight,
col + line * (1 + (paddedWidth - 1 -
(kernelWidth - 1) * horizontalDilation) /
horizontalStride)},
rewriter.getContext()));
// We create 2 additional irrelevent indexing maps and inputs (kernel,
// upperBounds) so that the operation is able to find the upper bounds of
// each loop. Otherwise we get the following error: "'linalg.generic' op
// expected the shape-to-loops map to be non-null"
indexingMaps.push_back(
AffineMap::get(6, 0, ArrayRef<AffineExpr>{kerLineIndex, kerColIndex},
rewriter.getContext()));
indexingMaps.push_back(AffineMap::get(6, 0, ArrayRef<AffineExpr>{line, col},
rewriter.getContext()));
indexingMaps.push_back(AffineMap::get(
6, 0,
ArrayRef<AffineExpr>{
batch, chan,
line * verticalStride + kerLineIndex * verticalDilation,
col * horizontalStride + kerColIndex * horizontalDilation},
rewriter.getContext()));
// The body of the linalg.generic op
auto body = [&](OpBuilder &b, Location loc, ValueRange args) {
Value acc =
(elementType.isInteger())
? b.create<arith::AddIOp>(loc, args[0], args[3]).getResult()
: (isa<mlir::FloatType>(elementType)
? b.create<arith::AddFOp>(loc, args[0], args[3])
.getResult()
: b.create<complex::AddOp>(loc, args[0], args[3])
.getResult());
b.create<linalg::YieldOp>(loc, acc);
};
input = rewriter.create<TorchConversion::ToBuiltinTensorOp>(
col2imOp->getLoc(),
cast<Torch::ValueTensorType>(input.getType()).toBuiltinTensor(), input);

// Create the "irrelevent" inputs
Value kernel = rewriter.create<tensor::EmptyOp>(
col2imOp->getLoc(), ArrayRef<int64_t>{kernelWidth, kernelHeight},
elementType);
Value upperBounds = rewriter.create<tensor::EmptyOp>(
col2imOp->getLoc(),
ArrayRef<int64_t>{
1 + (paddedHeight - 1 - (kernelHeight - 1) * verticalDilation) /
verticalStride,
1 + ((paddedWidth - 1 - (kernelWidth - 1) * horizontalDilation)) /
horizontalStride},
elementType);
assert(((isa<ComplexType>(elementType) &&
(cast<ComplexType>(elementType).getElementType().isInteger() ||
isa<mlir::FloatType>(
cast<ComplexType>(elementType).getElementType()))) ||
isa<mlir::FloatType>(elementType) || elementType.isInteger()) &&
"Not implemented yet\n");

TypedAttr init0 =
elementType.isInteger()
? rewriter.getIntegerAttr(elementType, 0)
: (isa<mlir::FloatType>(elementType)
? rewriter.getFloatAttr(elementType, 0.0)
: (cast<ComplexType>(elementType)
.getElementType()
.isInteger()
? TypedAttr(rewriter.getIntegerAttr(
cast<ComplexType>(elementType).getElementType(),
0))
: rewriter.getFloatAttr(
cast<ComplexType>(elementType).getElementType(),
0)));
Value fill0 =
isa<ComplexType>(elementType)
? rewriter.createOrFold<complex::ConstantOp>(
col2imOp->getLoc(), elementType,
rewriter.getArrayAttr(ArrayRef<Attribute>{init0, init0}))
: rewriter.createOrFold<arith::ConstantOp>(col2imOp->getLoc(),
elementType, init0);

paddedOutput =
rewriter
.create<linalg::FillOp>(col2imOp->getLoc(), ValueRange(fill0),
ValueRange(paddedOutput))
->getResult(0);
paddedOutput =
rewriter
.create<linalg::GenericOp>(
col2imOp->getLoc(), paddedOutput.getType(),
ValueRange{input, kernel, upperBounds},
ValueRange(paddedOutput), indexingMaps, iteratorTypes, body)
->getResult(0);

// Remove the padding
OpFoldResult one = rewriter.getI32IntegerAttr(1);
OpFoldResult zero = rewriter.getI32IntegerAttr(0);
OpFoldResult vpad = rewriter.getI32IntegerAttr(verticalPadding);
OpFoldResult hpad = rewriter.getI32IntegerAttr(horizontalPadding);
OpFoldResult vdim = rewriter.getI32IntegerAttr(height);
OpFoldResult hdim = rewriter.getI32IntegerAttr(width);
OpFoldResult batchSize =
rewriter.getI32IntegerAttr(outputType.getDimSize(0));
OpFoldResult nChannels =
rewriter.getI32IntegerAttr(outputType.getDimSize(1));
outputBuffer = rewriter.create<tensor::ExtractSliceOp>(
col2imOp->getLoc(), paddedOutput,
ArrayRef<Range>{Range{zero, batchSize, one},
Range{zero, nChannels, one}, Range{vpad, vdim, one},
Range{hpad, hdim, one}});
rewriter.setInsertionPoint(col2imOp);
TorchConversion::FromBuiltinTensorOp newOp =
rewriter.create<TorchConversion::FromBuiltinTensorOp>(
col2imOp->getLoc(), col2imOp.getType(), outputBuffer);
rewriter.replaceOp(col2imOp, newOp);
return success();
}
};
} // namespace

void mlir::torch::torch_to_linalg::populateDataMovementPatternsAndLegality(
Expand Down Expand Up @@ -2800,6 +3066,8 @@ void mlir::torch::torch_to_linalg::populateDataMovementPatternsAndLegality(
patterns.add<ConvertAtenDiagonalOp>(typeConverter, context);
target.addIllegalOp<AtenDiagEmbedOp>();
patterns.add<ConvertAtenDiagEmbedOp>(typeConverter, context);
target.addIllegalOp<AtenCol2imOp>();
patterns.add<ConvertAtenCol2ImOp>(typeConverter, context);
// Rewrite all special sparse conversions hidden as operators.
target.addDynamicallyLegalOp<OperatorOp>([&](Torch::OperatorOp op) {
return !ConvertSparseOperatorOp::isSparsePrimitive(op.getNameAttr());
Expand Down
42 changes: 42 additions & 0 deletions test/Conversion/TorchToLinalg/datamovement.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,45 @@ func.func @torch.aten.permute$rank0(%arg0: !torch.vtensor<[],f32>) -> !torch.vte
%1 = torch.aten.permute %arg0, %0 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[],f32>
return %1 : !torch.vtensor<[],f32>
}

// -----

// CHECK: #[[MAP:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d4 * 2 + d5 + d1 * 4, d3 + d2 * 16)>
// CHECK: #[[MAP1:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
// CHECK: #[[MAP2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3)>
// CHECK: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2 * 2 + d4, d3 * 2 + d5)>
// CHECK-LABEL: func.func @torch.aten.col2im(
// CHECK-SAME: %[[VAL_ARG0:.*]]: !torch.vtensor<[1,12,128],f32>) -> !torch.vtensor<[1,3,14,30],f32> {
// CHECK: %[[VAL_CST:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<1x3x16x32xf32>
// CHECK: %[[VAL_1:.*]] = torch_c.to_builtin_tensor %[[VAL_ARG0]] : !torch.vtensor<[1,12,128],f32> -> tensor<1x12x128xf32>
// CHECK: %[[VAL_2:.*]] = tensor.empty() : tensor<2x2xf32>
// CHECK: %[[VAL_3:.*]] = tensor.empty() : tensor<8x16xf32>
// CHECK: %[[VAL_4:.*]] = linalg.fill ins(%[[VAL_CST:.*]] : f32) outs(%[[VAL_0]] : tensor<1x3x16x32xf32>) -> tensor<1x3x16x32xf32>
// CHECK: %[[VAL_5:.*]] = linalg.generic {indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%[[VAL_1]], %[[VAL_2]], %[[VAL_3]] : tensor<1x12x128xf32>, tensor<2x2xf32>, tensor<8x16xf32>) outs(%[[VAL_4]] : tensor<1x3x16x32xf32>) {
// CHECK: ^bb0(%[[VAL_IN0:.*]]: f32, %[[VAL_IN1:.*]]: f32, %[[VAL_IN2:.*]]: f32, %[[VAL_OUT:.*]]: f32):
// CHECK: %[[VAL_7:.*]] = arith.addf %[[VAL_IN0]], %[[VAL_OUT]] : f32
// CHECK: linalg.yield %[[VAL_7]] : f32
// CHECK: } -> tensor<1x3x16x32xf32>
// CHECK: %[[VAL_SLICE:.*]] = tensor.extract_slice %[[VAL_5]][0, 0, 1, 1] [1, 3, 14, 30] [1, 1, 1, 1] : tensor<1x3x16x32xf32> to tensor<1x3x14x30xf32>
// CHECK: %[[VAL_6:.*]] = torch_c.from_builtin_tensor %[[VAL_SLICE]] : tensor<1x3x14x30xf32> -> !torch.vtensor<[1,3,14,30],f32>
// CHECK: return %[[VAL_6]] : !torch.vtensor<[1,3,14,30],f32>
func.func @torch.aten.col2im(%arg0: !torch.vtensor<[1,12,128],f32>) -> !torch.vtensor<[1,3,14,30],f32> {
%int14 = torch.constant.int 14
%int30 = torch.constant.int 30
%0 = torch.prim.ListConstruct %int14, %int30 : (!torch.int, !torch.int) -> !torch.list<int>
%int2 = torch.constant.int 2
%int2_0 = torch.constant.int 2
%1 = torch.prim.ListConstruct %int2, %int2_0 : (!torch.int, !torch.int) -> !torch.list<int>
%int1 = torch.constant.int 1
%int1_1 = torch.constant.int 1
%2 = torch.prim.ListConstruct %int1, %int1_1 : (!torch.int, !torch.int) -> !torch.list<int>
%int1_2 = torch.constant.int 1
%int1_3 = torch.constant.int 1
%3 = torch.prim.ListConstruct %int1_2, %int1_3 : (!torch.int, !torch.int) -> !torch.list<int>
%int2_4 = torch.constant.int 2
%int2_5 = torch.constant.int 2
%4 = torch.prim.ListConstruct %int2_4, %int2_5 : (!torch.int, !torch.int) -> !torch.list<int>
%5 = torch.aten.col2im %arg0, %0, %1, %2, %3, %4 : !torch.vtensor<[1,12,128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1,3,14,30],f32>
return %5 : !torch.vtensor<[1,3,14,30],f32>
}