From bfd122cc263c728d479c93c95b0702457ab272d2 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 25 Mar 2025 10:05:36 -0700 Subject: [PATCH] Update [ghstack-poisoned] --- kernels/portable/cpu/op_expm1.cpp | 54 ++++++++++++++++++- .../kernels/portable/op_registration_util.bzl | 1 + 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/kernels/portable/cpu/op_expm1.cpp b/kernels/portable/cpu/op_expm1.cpp index f2d49f615b1..c73321b28e8 100644 --- a/kernels/portable/cpu/op_expm1.cpp +++ b/kernels/portable/cpu/op_expm1.cpp @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -14,9 +15,58 @@ namespace torch { namespace executor { namespace native { +// REVIEW: I'm not entirely sure what the best way to implement this +// namespace is. Some options: +// 1) All in one file, with or without an `IMPLEMENT_VECTORIZED_MATH_OP` macro. +// 2) Include in each `unary_ufunc_*` op_foo.cpp, with or without an +// `IMPLEMENT_VECTORIZED_MATH_OP` macro. +// +// I think my preferred option would be (2) with a macro, but I've +// left the macro out for ease of reading this PoC PR. +namespace math { +using std::expm1; +#ifdef ET_USE_PYTORCH_HEADERS +template +auto expm1(at::vec::Vectorized x) { + // ATen knows to do this conversion because the TensorIterator for this op + // (and lots of similar ones in aten/src/ATen/native/UnaryOps.cpp) is created + // with build_borrowing_unary_float_op. + if constexpr (!executorch::runtime::is_floating_point::value) { + return at::vec::convert(x).expm1(); + } else { + return x.expm1(); + } +} +#endif +} // namespace math Tensor& expm1_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::expm1, ctx, in, out); + ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); + + // Resize for dynamic shape + ET_KERNEL_CHECK_MSG( + ctx, + resize_tensor(out, in.sizes()) == Error::Ok, + InvalidArgument, + out, + "Failed to resize output tensor."); + + ET_KERNEL_CHECK( + ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); + + static constexpr const char op_name[] = "expm1.out"; + ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] { + utils::apply_unitensor_elementwise_fn< + CTYPE_IN, + op_name, + utils::SupportedTensorDtypes::FLOATHBF16>( + [](auto x) { return math::expm1(x); }, + ctx, + in, + utils::SupportedTensorDtypes::REALHBBF16, + out); + }); + + return out; } } // namespace native diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl index b56413b92f4..e5f5e211730 100644 --- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -527,6 +527,7 @@ ATEN_OPS = ( name = "op_expm1", deps = [ "//executorch/kernels/portable/cpu/pattern:pattern", + "//executorch/kernels/portable/cpu/util:elementwise_util", ], ), op_target(