diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3ca2aa9..71de72e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -69,9 +69,12 @@ jobs: - name: Install OpenVINO run: | - curl ${{env.DIST_WIN}} -o openvino.exe - start /WAIT openvino.exe --s --a install --eula=accept --output=log.txt - shell: cmd + Invoke-WebRequest ${{env.DIST_WIN}} -OutFile openvino.exe + Start-Process -Wait -FilePath "openvino.exe" -ArgumentList "-s --a --silent --eula accept --output=log.txt" + ls "C:\Program Files (x86)" + ls "C:\Program Files (x86)\Intel" + ls "C:\Program Files (x86)\intel" + shell: pwsh - name: Build CPU extensions run: | diff --git a/README.md b/README.md index 7296a37..31361f0 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Repository with guides to enable some layers from PyTorch in Intel OpenVINO: * [nn.functional.grid_sample](https://github.com/dkurt/openvino_pytorch_layers/tree/master/examples/grid_sample) * [torchvision.ops.DeformConv2d](examples/deformable_conv) * [SparseConv](examples/sparse_conv) from [Open3D](https://github.com/isl-org/Open3D) +* [torch.lstsq](https://pytorch.org/docs/stable/generated/torch.lstsq.html) ## OpenVINO Model Optimizer extension diff --git a/examples/lstsq/export_model.py b/examples/lstsq/export_model.py new file mode 100644 index 0000000..92365a7 --- /dev/null +++ b/examples/lstsq/export_model.py @@ -0,0 +1,32 @@ +import numpy as np +import torch +from torch import nn +from .lstsq import LSTSQ + +class Model(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, A, B): + return LSTSQ.apply(B, A) + + +# Solves min_X||AX - B|| where A has a shape Mx2 and B has a shape MxN +def export(M, N): + np.random.seed(324) + torch.manual_seed(32) + + model = Model() + A = torch.rand([M, 2]) + B = torch.rand([M, N]) + + with torch.no_grad(): + torch.onnx.export(model, (A, B), 'model.onnx', + input_names=['input', 'input1'], + output_names=['output'], + operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK) + + ref = model(A, B) + np.save('inp', A.detach().numpy()) + np.save('inp1', B.detach().numpy()) + np.save('ref', ref.detach().numpy()) diff --git a/examples/lstsq/lstsq.py b/examples/lstsq/lstsq.py new file mode 100644 index 0000000..21aa98d --- /dev/null +++ b/examples/lstsq/lstsq.py @@ -0,0 +1,32 @@ +import torch + +def solve_squares(B, A): + # 1. Perform QR decomposition of matrix A + print("A", A.shape) + print("B", B.shape) + + def prod(vec0, vec1): + return (vec0 * vec1).sum() + + def norm(vec): + return vec / (vec * vec).sum().sqrt() + + col0 = norm(A[:, 0]) + col1 = norm(A[:, 1] - prod(A[:, 1], col0) * col0) + + Q = torch.stack((col0, col1), axis=1) + R = torch.tensor([[prod(A[:, 0], col0), prod(A[:, 1], col0)], + [0, prod(A[:, 1], col1)]]) + + X = torch.matmul(torch.inverse(R), Q.transpose(1, 0)) + X = torch.matmul(X, B) + return X + +class LSTSQ(torch.autograd.Function): + @staticmethod + def symbolic(g, input, A): + return g.op("lstsq", input, A) + + @staticmethod + def forward(self, input, A): + return torch.lstsq(input, A)[0][:2] diff --git a/tests/run_tests.py b/tests/run_tests.py index 4bb7717..a171955 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -32,6 +32,7 @@ def run_test(convert_ir=True, test_onnx=False, num_inputs=1, threshold=1e-5): ref = np.load('ref.npy') ie = IECore() + print(get_extensions_path()) ie.add_extension(get_extensions_path(), 'CPU') ie.set_config({'CONFIG_FILE': 'user_ie_extensions/gpu_extensions.xml'}, 'GPU') @@ -145,3 +146,10 @@ def test_deformable_conv(): ) run_test(num_inputs=2, threshold=2e-5) run_test(num_inputs=2, test_onnx=True, threshold=2e-5) + + +def test_lstsq(): + from examples.lstsq.export_model import export + + export(5, 1000) + run_test(num_inputs=2, test_onnx=True) diff --git a/user_ie_extensions/cpu_kernel.hpp b/user_ie_extensions/cpu_kernel.hpp index 3173902..d14e8eb 100644 --- a/user_ie_extensions/cpu_kernel.hpp +++ b/user_ie_extensions/cpu_kernel.hpp @@ -127,4 +127,20 @@ class CalculateGridImpl : public InferenceEngine::ILayerExecImpl { std::string error; }; +class LSTSQImpl : public InferenceEngine::ILayerExecImpl { +public: + explicit LSTSQImpl(const std::shared_ptr& node); + InferenceEngine::StatusCode getSupportedConfigurations(std::vector &conf, + InferenceEngine::ResponseDesc *resp) noexcept override; + InferenceEngine::StatusCode init(InferenceEngine::LayerConfig &config, + InferenceEngine::ResponseDesc *resp) noexcept override; + InferenceEngine::StatusCode execute(std::vector &inputs, + std::vector &outputs, + InferenceEngine::ResponseDesc *resp) noexcept override; +private: + std::vector inShapes; + ngraph::Shape outShape; + std::string error; +}; + } // namespace TemplateExtension diff --git a/user_ie_extensions/extension.cpp b/user_ie_extensions/extension.cpp index 981d862..29e8bce 100644 --- a/user_ie_extensions/extension.cpp +++ b/user_ie_extensions/extension.cpp @@ -49,6 +49,10 @@ Extension::Extension() { ngraph::OutputVector ng_inputs {node.get_ng_inputs()}; return {std::make_shared(ng_inputs.at(0))}; }); + ngraph::onnx_import::register_operator(LSTSQOp::type_info.name, 1, "", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector { + ngraph::OutputVector ng_inputs {node.get_ng_inputs()}; + return {std::make_shared(ng_inputs.at(0), ng_inputs.at(1))}; + }); } Extension::~Extension() { @@ -59,6 +63,7 @@ Extension::~Extension() { ngraph::onnx_import::unregister_operator(SparseConvOp::type_info.name, 1, "org.open3d"); ngraph::onnx_import::unregister_operator(SparseConvTransposeOp::type_info.name, 1, "org.open3d"); ngraph::onnx_import::unregister_operator(CalculateGridOp::type_info.name, 1, "org.open3d"); + ngraph::onnx_import::unregister_operator(LSTSQOp::type_info.name, 1, ""); } //! [extension:GetVersion] @@ -85,6 +90,7 @@ std::map Extension::getOpSets() { opset.insert(); opset.insert(); opset.insert(); + opset.insert(); opsets["extension"] = opset; return opsets; } @@ -98,6 +104,7 @@ std::vector Extension::getImplTypes(const std::shared_ptr(node) || std::dynamic_pointer_cast(node) || std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node) || std::dynamic_pointer_cast(node) || std::dynamic_pointer_cast(node)) { return {"CPU"}; @@ -129,6 +136,9 @@ InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ if (std::dynamic_pointer_cast(node) && implType == "CPU") { return std::make_shared(node); } + if (std::dynamic_pointer_cast(node) && implType == "CPU") { + return std::make_shared(node); + } return nullptr; } //! [extension:getImplementation] diff --git a/user_ie_extensions/lstsq_impl.cpp b/user_ie_extensions/lstsq_impl.cpp new file mode 100644 index 0000000..4687f24 --- /dev/null +++ b/user_ie_extensions/lstsq_impl.cpp @@ -0,0 +1,162 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "cpu_kernel.hpp" +#include "op.hpp" +#include
+#include +#include "ie_parallel.hpp" + +using namespace TemplateExtension; + +//! [cpu_implementation:ctor] +LSTSQImpl::LSTSQImpl(const std::shared_ptr &node) { + try { + auto castedNode = std::dynamic_pointer_cast(node); + if (!castedNode) + THROW_IE_EXCEPTION << "Cannot create implementation for unknown operation!"; + if (castedNode->inputs().size() != 2 || castedNode->outputs().size() != 1) + THROW_IE_EXCEPTION << "Cannot create implementation for operation with incorrect number of inputs or outputs!"; + if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic()) + THROW_IE_EXCEPTION << "Cannot create implementation for op with dynamic shapes!"; + if (castedNode->get_input_shape(0).size() != 2 || castedNode->get_output_shape(0).size() != 2) + THROW_IE_EXCEPTION << "Operation supports only 4d tensors for input and output."; + if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32) + THROW_IE_EXCEPTION << "Operation supports only FP32 tensors."; + inShapes.resize(2); + for (int i = 0; i < inShapes.size(); ++i) + inShapes[i] = castedNode->get_input_shape(i); + outShape = castedNode->get_output_shape(0); + } catch (InferenceEngine::details::InferenceEngineException& ex) { + error = ex.what(); + } + +} +//! [cpu_implementation:ctor] + +//! [cpu_implementation:getSupportedConfigurations] +InferenceEngine::StatusCode LSTSQImpl::getSupportedConfigurations(std::vector &conf, + InferenceEngine::ResponseDesc *resp) noexcept { + std::vector inDataConfig; + std::vector outDataConfig; + // Allow any offset before data + size_t offset((std::numeric_limits::max)()); + + // Input shape + for (const auto& shape : inShapes) + { + InferenceEngine::SizeVector order(shape.size()); + std::iota(order.begin(), order.end(), 0); + + InferenceEngine::DataConfig inpConf; + inpConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, shape, {shape, order, offset}); + inDataConfig.push_back(inpConf); + } + + // Output shape + InferenceEngine::SizeVector order(outShape.size()); + std::iota(order.begin(), order.end(), 0); + + InferenceEngine::DataConfig outConf; + outConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset}); + outDataConfig.push_back(outConf); + + InferenceEngine::LayerConfig layerConfig; + layerConfig.inConfs = inDataConfig; + layerConfig.outConfs = outDataConfig; + + conf.push_back(layerConfig); + return InferenceEngine::StatusCode::OK; +} +//! [cpu_implementation:getSupportedConfigurations] + +//! [cpu_implementation:init] +InferenceEngine::StatusCode LSTSQImpl::init(InferenceEngine::LayerConfig &config, InferenceEngine::ResponseDesc *resp) noexcept { + try { + if (config.inConfs.size() != 2 || config.outConfs.size() != 1) { + THROW_IE_EXCEPTION << "Operation cannot be initialized with incorrect number of inputs/outputs!"; + } + + if (config.inConfs[0].desc.getDims().size() != 2 || config.outConfs[0].desc.getDims().size() != 2) { + THROW_IE_EXCEPTION << "Operation can be initialized only with 2d input/output tensors!"; + } + + if (config.outConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32 || + config.inConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32) { + THROW_IE_EXCEPTION << "Operation supports only FP32 precisions!"; + } + } catch (InferenceEngine::details::InferenceEngineException& ex) { + if (resp) { + strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); + resp->msg[sizeof(resp->msg)-1] = 0; + } + return InferenceEngine::GENERAL_ERROR; + } + + return InferenceEngine::OK; +} +//! [cpu_implementation:init] + +//! [cpu_implementation:execute] +InferenceEngine::StatusCode LSTSQImpl::execute(std::vector &inputs, + std::vector &outputs, + InferenceEngine::ResponseDesc *resp) noexcept { + const float* B = inputs[0]->cbuffer().as(); + const float* A = inputs[1]->cbuffer().as(); + float* out = outputs[0]->buffer().as(); + + // Perform A = QR factorization. This implementation works on A with 2 columns. + const size_t M = inputs[0]->getTensorDesc().getDims()[0]; + const size_t N = inputs[0]->getTensorDesc().getDims()[1]; + + std::vector Q(M * 2); + std::vector R(4, 0.0f); + float norm0 = 0.0f; + float product = 0.0f; // cross-product between second column of A with first column of Q + for (int i = 0; i < M; ++i) { + float val = A[i * 2]; + product += A[i * 2 + 1] * val; + norm0 += val * val; + } + norm0 = sqrtf(norm0); + product /= norm0; + R[1] = product; + + float norm1 = 0.0f; + for (int i = 0; i < M; ++i) { + float val = A[i * 2] / norm0; + Q[i * 2] = val; + R[0] += A[i * 2] * val; + + val = A[i * 2 + 1] - product * val; + Q[i * 2 + 1] = val; + norm1 += val * val; + R[3] += A[i * 2 + 1] * val; + } + norm1 = sqrtf(norm1); + for (int i = 0; i < M; ++i) { + Q[i * 2 + 1] /= norm1; + } + R[3] /= norm1; + + // Inverse R matrix + float scale = 1.0f / (R[0] * R[3]); + std::vector R_inv{R[3] * scale, -R[1] * scale, 0.0f, R[0] * scale}; + + // Output is inverse(R) * transpose(Q) * B + for (int i = 0; i < M; ++i) { + Q[i * 2] = R_inv[0] * Q[i * 2] + R_inv[1] * Q[i * 2 + 1]; + Q[i * 2 + 1] *= R_inv[3]; + } + + for (int i = 0; i < N; ++i) { + out[i] = 0.0f; + out[N + i] = 0.0f; + for (int j = 0; j < M; ++j) { + out[i] += Q[j * 2] * B[j * N + i]; + out[N + i] += Q[j * 2 + 1] * B[j * N + i]; + } + } + return InferenceEngine::OK; +} +//! [cpu_implementation:execute] diff --git a/user_ie_extensions/lstsq_op.cpp b/user_ie_extensions/lstsq_op.cpp new file mode 100644 index 0000000..e70e4aa --- /dev/null +++ b/user_ie_extensions/lstsq_op.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "op.hpp" + +using namespace TemplateExtension; + +constexpr ngraph::NodeTypeInfo LSTSQOp::type_info; + +//! [op:ctor] +LSTSQOp::LSTSQOp( + const ngraph::Output& B, + const ngraph::Output& A +) + : Op({B, A}) { + constructor_validate_and_infer_types(); +} +//! [op:ctor] + +//! [op:validate] +void LSTSQOp::validate_and_infer_types() { + auto outShape = get_input_partial_shape(0); + outShape[0] = 2; + set_output_type(0, get_input_element_type(0), outShape); +} +//! [op:validate] + +//! [op:copy] +std::shared_ptr LSTSQOp::clone_with_new_inputs(const ngraph::OutputVector &new_args) const { + if (new_args.size() != 2) { + throw ngraph::ngraph_error("Incorrect number of new arguments"); + } + return std::make_shared(new_args.at(0), new_args.at(1)); +} +//! [op:copy] + +//! [op:visit_attributes] +bool LSTSQOp::visit_attributes(ngraph::AttributeVisitor &visitor) { + return true; +} +//! [op:visit_attributes] diff --git a/user_ie_extensions/op.hpp b/user_ie_extensions/op.hpp index ca7bc12..dbf89cc 100644 --- a/user_ie_extensions/op.hpp +++ b/user_ie_extensions/op.hpp @@ -116,6 +116,20 @@ class CalculateGridOp : public ngraph::op::Op { bool visit_attributes(ngraph::AttributeVisitor& visitor) override; }; +class LSTSQOp : public ngraph::op::Op { +public: + static constexpr ngraph::NodeTypeInfo type_info{"lstsq", 0}; + const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; } + + LSTSQOp() = default; + LSTSQOp(const ngraph::Output& B, + const ngraph::Output& A); + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const ngraph::OutputVector& new_args) const override; + bool visit_attributes(ngraph::AttributeVisitor& visitor) override; +}; + + //! [op:header] } // namespace TemplateExtension