Fix and re-enable test case (pytorch#16643)

smessmer · facebook-github-bot · commit 64339dbd514c · 2019-02-07T13:58:16.000-08:00
Summary: Pull Request resolved: pytorch#16643 The test was disabled in D13908117 because it conflicted with another diff that was about to land. Now fixed the merge conflict and re-landing it. Reviewed By: ezyang Differential Revision: D13911775 fbshipit-source-id: b790f1c3a3f207916eea41ac93bc104d011f629b
diff --git a/caffe2/core/blob.h b/caffe2/core/blob.h
@@ -116,5 +116,13 @@ inline const Tensor& BlobGetTensor(const Blob& blob, DeviceType device_type) {
   CAFFE_THROW("Blob didn't contain a Tensor or the device_type doesn't match");
 }
 
+inline Tensor BlobGetTensorOrUndefined(const Blob& blob) {
+  if (blob.IsType<Tensor>()) {
+    return blob.Get<Tensor>().UnsafeSharedInstance();
+  } else {
+    return Tensor();
+  }
+}
+
 }  // namespace caffe2
 #endif  // CAFFE2_CORE_BLOB_H_
diff --git a/caffe2/core/c10_operator.h b/caffe2/core/c10_operator.h
@@ -66,27 +66,32 @@ inline c10::FunctionSchema make_function_schema_for_c10(const char* OperatorName
 }
 }
 
+#define C10_DECLARE_CAFFE2_OPERATOR(OperatorName)                                                   \
+  namespace caffe2 { namespace _c10_ops {                                                           \
+    C10_DECLARE_OP_SCHEMA(OperatorName);                                                            \
+  }}
+
 /**
  * Call this macro to register a caffe2 operator with the c10 dispatcher.
  */
 // TODO This macro should take a JIT schema string instead of a vector of inputs and outputs.
-#define C10_REGISTER_CAFFE2_OPERATOR(OperatorName, Inputs, Outputs, OperatorClass)                \
+#define C10_REGISTER_CAFFE2_OPERATOR_CPU(OperatorName, Inputs, Outputs, OperatorClass)            \
   /* Register the op schema with the c10 dispatcher */                                            \
-  namespace caffe2 {                                                                              \
+  namespace caffe2 { namespace _c10_ops {                                                         \
     C10_DEFINE_OP_SCHEMA(OperatorName,                                                            \
       caffe2::detail::make_function_schema_for_c10(                                               \
         #OperatorName, Inputs, Outputs));                                                         \
   }                                                                                               \
   /* Store the c10 operator handle so call_caffe2_op_from_c10 can access it */                    \
-  namespace caffe2 { namespace detail {                                                           \
+  namespace detail {                                                                              \
   template<>                                                                                      \
   const c10::OperatorHandle& c10_op_handle_for_c2_op<OperatorClass<caffe2::CPUContext>>() {       \
-    return caffe2::OperatorName();                                                                \
+    return caffe2::_c10_ops::OperatorName();                                                      \
   }                                                                                               \
   }}                                                                                              \
   /* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */                      \
   namespace c10 {                                                                                 \
-  C10_REGISTER_KERNEL(caffe2::OperatorName)                                                       \
+  C10_REGISTER_KERNEL(caffe2::_c10_ops::OperatorName)                                             \
       /*.withCache<Cache>()*/                                                                     \
       .kernel<&caffe2::detail::call_caffe2_op_from_c10<OperatorClass<caffe2::CPUContext>>>()      \
       .dispatchKey(CPUTensorId());                                                                \
diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h
@@ -213,6 +213,10 @@ class CAFFE2_API OperatorBase : public Observable<OperatorBase> {
     BlobSetTensor(outputs_.at(idx), std::move(tensor));
   }
 
+  Tensor OutputTensorOrUndefined(int idx) {
+    return BlobGetTensorOrUndefined(*outputs_.at(idx));
+  }
+
   inline Tensor*
   OutputTensor(int idx, at::IntArrayRef dims, at::TensorOptions options) {
     if (isLegacyOperator()) {
diff --git a/caffe2/core/operator_c10wrapper.h b/caffe2/core/operator_c10wrapper.h
@@ -117,7 +117,12 @@ class C10OperatorWrapper final : public Operator<Context> {
 
   void pushOutputParameters_() {
     for (size_t i = 0; i < num_output_parameters; ++i) {
-      stack_.emplace_back(at::Tensor(C10Tensor(*Output(i))));
+      caffe2::Tensor preallocated_output_tensor = OperatorBase::OutputTensorOrUndefined(i);
+      if (preallocated_output_tensor.defined()) {
+        stack_.emplace_back(at::Tensor(std::move(preallocated_output_tensor)));
+      } else {
+        stack_.emplace_back(IValue());
+      }
     }
   }
 
diff --git a/caffe2/operators/experimental/c10/schemas/layer_norm.cc b/caffe2/operators/experimental/c10/schemas/layer_norm.cc
@@ -26,7 +26,7 @@ struct EpsilonParameter final {
 
 namespace caffe2 {
 REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_WITH_PARAMETERS(
-    caffe2::LayerNorm,
+    caffe2::_c10_ops::LayerNorm,
     C10LayerNorm_DontUseThisOpYet,
     3,
     ParameterHelper<AxisParameter>,
diff --git a/caffe2/operators/layer_norm_op.cc b/caffe2/operators/layer_norm_op.cc
@@ -184,7 +184,7 @@ to the end.)
 
 } // namespace caffe2
 
-C10_REGISTER_CAFFE2_OPERATOR(
+C10_REGISTER_CAFFE2_OPERATOR_CPU(
   LayerNorm,
   (std::vector<c10::Argument>{
     c10::Argument("input"),
diff --git a/caffe2/operators/layer_norm_op.h b/caffe2/operators/layer_norm_op.h
@@ -10,9 +10,9 @@
 #include "caffe2/utils/math.h"
 #include <ATen/core/dispatch/OpSchemaRegistration.h>
 
-namespace caffe2 {
+C10_DECLARE_CAFFE2_OPERATOR(LayerNorm)
 
-C10_DECLARE_OP_SCHEMA(LayerNorm);
+namespace caffe2 {
 
 template <class Context>
 class LayerNormOp final : public Operator<Context> {
diff --git a/caffe2/python/operator_test/layer_norm_op_test.py b/caffe2/python/operator_test/layer_norm_op_test.py
@@ -112,7 +112,6 @@ def test_layer_norm_op(self, X, gc, dc):
         )
 
     @given(X=hu.tensor(min_dim=2), **hu.gcs_cpu_only)
-    @unittest.skip("Tensor interop enforcement needs fixing")
     def test_layer_norm_op_c10(self, X, gc, dc):
         axis = np.random.randint(0, len(X.shape))
         epsilon = 1e-4
@@ -137,6 +136,28 @@ def test_layer_norm_op_c10(self, X, gc, dc):
             outputs_to_check=[0, 1, 2],
         )
 
+    @given(X=hu.tensor(min_dim=2), **hu.gcs_cpu_only)
+    def test_layer_norm_op_c10_preallocated_outputs(self, X, gc, dc):
+        # This test case ensures that it works correctly when output tensors are preallocated.
+        axis = np.random.randint(0, len(X.shape))
+        epsilon = 1e-4
+        self.ws.create_blob('input').feed(X)
+        m = ModelHelper(name="test")
+        m.net.C10LayerNorm_DontUseThisOpYet(["input"], ["output", "mean", "stdev"], axis=axis, epsilon=epsilon)
+        self.ws.create_net(m.param_init_net).run()
+        net = self.ws.create_net(m.net)
+        net.run()
+        net.run() # run two times to be extra sure that the outputs are preallocated
+
+        expected_norm, expected_mean, expected_stdev = _layer_norm_ref(axis, epsilon, X)
+        actual_norm = self.ws.fetch_blob('output')
+        actual_mean = self.ws.fetch_blob('mean')
+        actual_stdev = self.ws.fetch_blob('stdev')
+
+        torch.testing.assert_allclose(expected_norm, actual_norm)
+        torch.testing.assert_allclose(expected_mean, actual_mean)
+        torch.testing.assert_allclose(expected_stdev, actual_stdev)
+
     @given(X=hu.tensor(min_dim=2), **hu.gcs)
     def test_layer_norm_op_pytorch(self, X, gc, dc):
         axis = np.random.randint(0, len(X.shape))

Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,12 @@ class C10OperatorWrapper final : public Operator<Context> {`
`117`	`117`
`118`	`118`	`void pushOutputParameters_() {`
`119`	`119`	`for (size_t i = 0; i < num_output_parameters; ++i) {`
`120`		`- stack_.emplace_back(at::Tensor(C10Tensor(*Output(i))));`
	`120`	`+ caffe2::Tensor preallocated_output_tensor = OperatorBase::OutputTensorOrUndefined(i);`
	`121`	`+ if (preallocated_output_tensor.defined()) {`
	`122`	`+ stack_.emplace_back(at::Tensor(std::move(preallocated_output_tensor)));`
	`123`	`+ } else {`
	`124`	`+ stack_.emplace_back(IValue());`
	`125`	`+ }`
`121`	`126`	`}`
`122`	`127`	`}`
`123`	`128`