Skip to content
This repository was archived by the owner on May 29, 2023. It is now read-only.

torch.lstsq specific case #36

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ jobs:

- name: Install OpenVINO
run: |
curl ${{env.DIST_WIN}} -o openvino.exe
start /WAIT openvino.exe --s --a install --eula=accept --output=log.txt
shell: cmd
Invoke-WebRequest ${{env.DIST_WIN}} -OutFile openvino.exe
Start-Process -Wait -FilePath "openvino.exe" -ArgumentList "-s --a --silent --eula accept --output=log.txt"
ls "C:\Program Files (x86)"
ls "C:\Program Files (x86)\Intel"
ls "C:\Program Files (x86)\intel"
shell: pwsh

- name: Build CPU extensions
run: |
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Repository with guides to enable some layers from PyTorch in Intel OpenVINO:
* [nn.functional.grid_sample](https://github.com/dkurt/openvino_pytorch_layers/tree/master/examples/grid_sample)
* [torchvision.ops.DeformConv2d](examples/deformable_conv)
* [SparseConv](examples/sparse_conv) from [Open3D](https://github.com/isl-org/Open3D)
* [torch.lstsq](https://pytorch.org/docs/stable/generated/torch.lstsq.html)


## OpenVINO Model Optimizer extension
Expand Down
32 changes: 32 additions & 0 deletions examples/lstsq/export_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
import torch
from torch import nn
from .lstsq import LSTSQ

class Model(nn.Module):
def __init__(self):
super().__init__()

def forward(self, A, B):
return LSTSQ.apply(B, A)


# Solves min_X||AX - B|| where A has a shape Mx2 and B has a shape MxN
def export(M, N):
np.random.seed(324)
torch.manual_seed(32)

model = Model()
A = torch.rand([M, 2])
B = torch.rand([M, N])

with torch.no_grad():
torch.onnx.export(model, (A, B), 'model.onnx',
input_names=['input', 'input1'],
output_names=['output'],
operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)

ref = model(A, B)
np.save('inp', A.detach().numpy())
np.save('inp1', B.detach().numpy())
np.save('ref', ref.detach().numpy())
32 changes: 32 additions & 0 deletions examples/lstsq/lstsq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import torch

def solve_squares(B, A):
# 1. Perform QR decomposition of matrix A
print("A", A.shape)
print("B", B.shape)

def prod(vec0, vec1):
return (vec0 * vec1).sum()

def norm(vec):
return vec / (vec * vec).sum().sqrt()

col0 = norm(A[:, 0])
col1 = norm(A[:, 1] - prod(A[:, 1], col0) * col0)

Q = torch.stack((col0, col1), axis=1)
R = torch.tensor([[prod(A[:, 0], col0), prod(A[:, 1], col0)],
[0, prod(A[:, 1], col1)]])

X = torch.matmul(torch.inverse(R), Q.transpose(1, 0))
X = torch.matmul(X, B)
return X

class LSTSQ(torch.autograd.Function):
@staticmethod
def symbolic(g, input, A):
return g.op("lstsq", input, A)

@staticmethod
def forward(self, input, A):
return torch.lstsq(input, A)[0][:2]
8 changes: 8 additions & 0 deletions tests/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def run_test(convert_ir=True, test_onnx=False, num_inputs=1, threshold=1e-5):
ref = np.load('ref.npy')

ie = IECore()
print(get_extensions_path())
ie.add_extension(get_extensions_path(), 'CPU')
ie.set_config({'CONFIG_FILE': 'user_ie_extensions/gpu_extensions.xml'}, 'GPU')

Expand Down Expand Up @@ -145,3 +146,10 @@ def test_deformable_conv():
)
run_test(num_inputs=2, threshold=2e-5)
run_test(num_inputs=2, test_onnx=True, threshold=2e-5)


def test_lstsq():
from examples.lstsq.export_model import export

export(5, 1000)
run_test(num_inputs=2, test_onnx=True)
16 changes: 16 additions & 0 deletions user_ie_extensions/cpu_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,20 @@ class CalculateGridImpl : public InferenceEngine::ILayerExecImpl {
std::string error;
};

class LSTSQImpl : public InferenceEngine::ILayerExecImpl {
public:
explicit LSTSQImpl(const std::shared_ptr<ngraph::Node>& node);
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
InferenceEngine::ResponseDesc *resp) noexcept override;
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig &config,
InferenceEngine::ResponseDesc *resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
std::vector<InferenceEngine::Blob::Ptr> &outputs,
InferenceEngine::ResponseDesc *resp) noexcept override;
private:
std::vector<ngraph::Shape> inShapes;
ngraph::Shape outShape;
std::string error;
};

} // namespace TemplateExtension
10 changes: 10 additions & 0 deletions user_ie_extensions/extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ Extension::Extension() {
ngraph::OutputVector ng_inputs {node.get_ng_inputs()};
return {std::make_shared<CalculateGridOp>(ng_inputs.at(0))};
});
ngraph::onnx_import::register_operator(LSTSQOp::type_info.name, 1, "", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
ngraph::OutputVector ng_inputs {node.get_ng_inputs()};
return {std::make_shared<LSTSQOp>(ng_inputs.at(0), ng_inputs.at(1))};
});
}

Extension::~Extension() {
Expand All @@ -59,6 +63,7 @@ Extension::~Extension() {
ngraph::onnx_import::unregister_operator(SparseConvOp::type_info.name, 1, "org.open3d");
ngraph::onnx_import::unregister_operator(SparseConvTransposeOp::type_info.name, 1, "org.open3d");
ngraph::onnx_import::unregister_operator(CalculateGridOp::type_info.name, 1, "org.open3d");
ngraph::onnx_import::unregister_operator(LSTSQOp::type_info.name, 1, "");
}

//! [extension:GetVersion]
Expand All @@ -85,6 +90,7 @@ std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
opset.insert<SparseConvOp>();
opset.insert<SparseConvTransposeOp>();
opset.insert<CalculateGridOp>();
opset.insert<LSTSQOp>();
opsets["extension"] = opset;
return opsets;
}
Expand All @@ -98,6 +104,7 @@ std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::N
std::dynamic_pointer_cast<SparseConvOp>(node) ||
std::dynamic_pointer_cast<SparseConvTransposeOp>(node) ||
std::dynamic_pointer_cast<CalculateGridOp>(node) ||
std::dynamic_pointer_cast<LSTSQOp>(node) ||
std::dynamic_pointer_cast<IFFTOp>(node) ||
std::dynamic_pointer_cast<FFTOp>(node)) {
return {"CPU"};
Expand Down Expand Up @@ -129,6 +136,9 @@ InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_
if (std::dynamic_pointer_cast<CalculateGridOp>(node) && implType == "CPU") {
return std::make_shared<CalculateGridImpl>(node);
}
if (std::dynamic_pointer_cast<LSTSQOp>(node) && implType == "CPU") {
return std::make_shared<LSTSQImpl>(node);
}
return nullptr;
}
//! [extension:getImplementation]
Expand Down
162 changes: 162 additions & 0 deletions user_ie_extensions/lstsq_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cpu_kernel.hpp"
#include "op.hpp"
#include <details/ie_exception.hpp>
#include <ie_layouts.h>
#include "ie_parallel.hpp"

using namespace TemplateExtension;

//! [cpu_implementation:ctor]
LSTSQImpl::LSTSQImpl(const std::shared_ptr<ngraph::Node> &node) {
try {
auto castedNode = std::dynamic_pointer_cast<LSTSQOp>(node);
if (!castedNode)
THROW_IE_EXCEPTION << "Cannot create implementation for unknown operation!";
if (castedNode->inputs().size() != 2 || castedNode->outputs().size() != 1)
THROW_IE_EXCEPTION << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
THROW_IE_EXCEPTION << "Cannot create implementation for op with dynamic shapes!";
if (castedNode->get_input_shape(0).size() != 2 || castedNode->get_output_shape(0).size() != 2)
THROW_IE_EXCEPTION << "Operation supports only 4d tensors for input and output.";
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
THROW_IE_EXCEPTION << "Operation supports only FP32 tensors.";
inShapes.resize(2);
for (int i = 0; i < inShapes.size(); ++i)
inShapes[i] = castedNode->get_input_shape(i);
outShape = castedNode->get_output_shape(0);
} catch (InferenceEngine::details::InferenceEngineException& ex) {
error = ex.what();
}

}
//! [cpu_implementation:ctor]

//! [cpu_implementation:getSupportedConfigurations]
InferenceEngine::StatusCode LSTSQImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
InferenceEngine::ResponseDesc *resp) noexcept {
std::vector<InferenceEngine::DataConfig> inDataConfig;
std::vector<InferenceEngine::DataConfig> outDataConfig;
// Allow any offset before data
size_t offset((std::numeric_limits<size_t>::max)());

// Input shape
for (const auto& shape : inShapes)
{
InferenceEngine::SizeVector order(shape.size());
std::iota(order.begin(), order.end(), 0);

InferenceEngine::DataConfig inpConf;
inpConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, shape, {shape, order, offset});
inDataConfig.push_back(inpConf);
}

// Output shape
InferenceEngine::SizeVector order(outShape.size());
std::iota(order.begin(), order.end(), 0);

InferenceEngine::DataConfig outConf;
outConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
outDataConfig.push_back(outConf);

InferenceEngine::LayerConfig layerConfig;
layerConfig.inConfs = inDataConfig;
layerConfig.outConfs = outDataConfig;

conf.push_back(layerConfig);
return InferenceEngine::StatusCode::OK;
}
//! [cpu_implementation:getSupportedConfigurations]

//! [cpu_implementation:init]
InferenceEngine::StatusCode LSTSQImpl::init(InferenceEngine::LayerConfig &config, InferenceEngine::ResponseDesc *resp) noexcept {
try {
if (config.inConfs.size() != 2 || config.outConfs.size() != 1) {
THROW_IE_EXCEPTION << "Operation cannot be initialized with incorrect number of inputs/outputs!";
}

if (config.inConfs[0].desc.getDims().size() != 2 || config.outConfs[0].desc.getDims().size() != 2) {
THROW_IE_EXCEPTION << "Operation can be initialized only with 2d input/output tensors!";
}

if (config.outConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32 ||
config.inConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32) {
THROW_IE_EXCEPTION << "Operation supports only FP32 precisions!";
}
} catch (InferenceEngine::details::InferenceEngineException& ex) {
if (resp) {
strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
resp->msg[sizeof(resp->msg)-1] = 0;
}
return InferenceEngine::GENERAL_ERROR;
}

return InferenceEngine::OK;
}
//! [cpu_implementation:init]

//! [cpu_implementation:execute]
InferenceEngine::StatusCode LSTSQImpl::execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
std::vector<InferenceEngine::Blob::Ptr> &outputs,
InferenceEngine::ResponseDesc *resp) noexcept {
const float* B = inputs[0]->cbuffer().as<float*>();
const float* A = inputs[1]->cbuffer().as<float*>();
float* out = outputs[0]->buffer().as<float*>();

// Perform A = QR factorization. This implementation works on A with 2 columns.
const size_t M = inputs[0]->getTensorDesc().getDims()[0];
const size_t N = inputs[0]->getTensorDesc().getDims()[1];

std::vector<float> Q(M * 2);
std::vector<float> R(4, 0.0f);
float norm0 = 0.0f;
float product = 0.0f; // cross-product between second column of A with first column of Q
for (int i = 0; i < M; ++i) {
float val = A[i * 2];
product += A[i * 2 + 1] * val;
norm0 += val * val;
}
norm0 = sqrtf(norm0);
product /= norm0;
R[1] = product;

float norm1 = 0.0f;
for (int i = 0; i < M; ++i) {
float val = A[i * 2] / norm0;
Q[i * 2] = val;
R[0] += A[i * 2] * val;

val = A[i * 2 + 1] - product * val;
Q[i * 2 + 1] = val;
norm1 += val * val;
R[3] += A[i * 2 + 1] * val;
}
norm1 = sqrtf(norm1);
for (int i = 0; i < M; ++i) {
Q[i * 2 + 1] /= norm1;
}
R[3] /= norm1;

// Inverse R matrix
float scale = 1.0f / (R[0] * R[3]);
std::vector<float> R_inv{R[3] * scale, -R[1] * scale, 0.0f, R[0] * scale};

// Output is inverse(R) * transpose(Q) * B
for (int i = 0; i < M; ++i) {
Q[i * 2] = R_inv[0] * Q[i * 2] + R_inv[1] * Q[i * 2 + 1];
Q[i * 2 + 1] *= R_inv[3];
}

for (int i = 0; i < N; ++i) {
out[i] = 0.0f;
out[N + i] = 0.0f;
for (int j = 0; j < M; ++j) {
out[i] += Q[j * 2] * B[j * N + i];
out[N + i] += Q[j * 2 + 1] * B[j * N + i];
}
}
return InferenceEngine::OK;
}
//! [cpu_implementation:execute]
41 changes: 41 additions & 0 deletions user_ie_extensions/lstsq_op.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "op.hpp"

using namespace TemplateExtension;

constexpr ngraph::NodeTypeInfo LSTSQOp::type_info;

//! [op:ctor]
LSTSQOp::LSTSQOp(
const ngraph::Output<ngraph::Node>& B,
const ngraph::Output<ngraph::Node>& A
)
: Op({B, A}) {
constructor_validate_and_infer_types();
}
//! [op:ctor]

//! [op:validate]
void LSTSQOp::validate_and_infer_types() {
auto outShape = get_input_partial_shape(0);
outShape[0] = 2;
set_output_type(0, get_input_element_type(0), outShape);
}
//! [op:validate]

//! [op:copy]
std::shared_ptr<ngraph::Node> LSTSQOp::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
if (new_args.size() != 2) {
throw ngraph::ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<LSTSQOp>(new_args.at(0), new_args.at(1));
}
//! [op:copy]

//! [op:visit_attributes]
bool LSTSQOp::visit_attributes(ngraph::AttributeVisitor &visitor) {
return true;
}
//! [op:visit_attributes]
Loading