Skip to content

Commit 96d826f

Browse files
dreissfacebook-github-bot
authored andcommitted
Define REGISTER_CPU_GRADIENT_OPERATOR (pytorch#12588)
Summary: Pull Request resolved: pytorch#12588 By default, this is an alias for REGISTER_CPU_OPERATOR. If gradients are not required (e.g., on mobile) it can be converted to a no-op by defining CAFFE2_NO_GRADIENT_OPS, resulting in a smaller build. GRADIENT_OPERATOR_SCHEMA works similarly. CAFFE2_NO_GRADIENT_OPS also converts REGISTER_GRADIENT to a no-op. Use these macros in fully_connected_op.cc as an example. Follow-up diffs will convert more operators. I had to introduce MACRO_EXPAND to handle the way Visual Studio expands VA_ARGS. Reviewed By: Yangqing Differential Revision: D10209468 fbshipit-source-id: 4116d9098b97646bb30a00f2a7d46aa5d7ebcae0
1 parent da73d70 commit 96d826f

File tree

5 files changed

+39
-4
lines changed

5 files changed

+39
-4
lines changed

c10/macros/Macros.h

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
#define CONCAT_IMPL(x, y) x##y
3333
#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y)
3434

35+
#define MACRO_EXPAND(args) args
36+
3537
/// C10_NODISCARD - Warn if a type or return value is discarded.
3638
#define C10_NODISCARD
3739
#if __cplusplus > 201402L && defined(__has_cpp_attribute)

caffe2/core/operator.h

+10
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <typeinfo>
1010
#include <vector>
1111

12+
#include "c10/macros/Macros.h"
1213
#include "c10/util/Registry.h"
1314
#include "caffe2/core/blob.h"
1415
#include "caffe2/core/common.h"
@@ -858,6 +859,15 @@ C10_DECLARE_REGISTRY(
858859
#define REGISTER_CPU_OPERATOR_WITH_ENGINE(name, engine, ...) \
859860
C10_REGISTER_CLASS(CPUOperatorRegistry, name##_ENGINE_##engine, __VA_ARGS__)
860861

862+
// Use these macros to register gradient operators. They can be automatically
863+
// excluded from builds that don't need them (e.g., mobile).
864+
#ifdef CAFFE2_NO_GRADIENT_OPS
865+
#define REGISTER_CPU_GRADIENT_OPERATOR(...) /* No gradients. */
866+
#else
867+
#define REGISTER_CPU_GRADIENT_OPERATOR(...) \
868+
MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__))
869+
#endif
870+
861871
C10_DECLARE_REGISTRY(
862872
CUDAOperatorRegistry,
863873
OperatorBase,

caffe2/core/operator_gradient.h

+9
Original file line numberDiff line numberDiff line change
@@ -301,11 +301,20 @@ C10_DECLARE_REGISTRY(
301301
const OperatorDef&,
302302
const vector<GradientWrapper>&);
303303

304+
#ifdef CAFFE2_NO_GRADIENT_OPS
305+
306+
#define REGISTER_GRADIENT(name, ...) /* No gradients. */
307+
#define REGISTER_GRADIENT_STR(str_name, ...) /* No gradients. */
308+
309+
#else
310+
304311
#define REGISTER_GRADIENT(name, ...) \
305312
C10_REGISTER_CLASS(GradientRegistry, name, __VA_ARGS__)
306313
#define REGISTER_GRADIENT_STR(str_name, ...) \
307314
C10_REGISTER_TYPED_CLASS(GradientRegistry, str_name, __VA_ARGS__)
308315

316+
#endif
317+
309318
// NO_GRADIENT means that the operator does not need any gradient computation.
310319
#define NO_GRADIENT(name) REGISTER_GRADIENT(name, NoGradient)
311320

caffe2/core/operator_schema.h

+12
Original file line numberDiff line numberDiff line change
@@ -597,4 +597,16 @@ OpSchema::Cost PointwiseCostInference(
597597

598598
#endif // CAFFE2_NO_OPERATOR_SCHEMA
599599

600+
#ifdef CAFFE2_NO_GRADIENT_OPS
601+
602+
#define GRADIENT_OPERATOR_SCHEMA(name) \
603+
C10_EXPORT void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(){}; \
604+
static OpSchema* C10_ANONYMOUS_VARIABLE(name) CAFFE2_UNUSED = \
605+
1 ? nullptr : &OpSchemaRegistry::NewSchema(#name, __FILE__, __LINE__)
606+
607+
#else
608+
609+
#define GRADIENT_OPERATOR_SCHEMA(name) OPERATOR_SCHEMA(name)
610+
611+
#endif
600612
#endif // CAFFE2_CORE_OPERATOR_SCHEMA_H_

caffe2/operators/fully_connected_op.cc

+6-4
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,17 @@
55
namespace caffe2 {
66

77
REGISTER_CPU_OPERATOR(FC, FullyConnectedOp<CPUContext>);
8-
REGISTER_CPU_OPERATOR(FCGradient, FullyConnectedGradientOp<CPUContext>);
8+
REGISTER_CPU_GRADIENT_OPERATOR(
9+
FCGradient,
10+
FullyConnectedGradientOp<CPUContext>);
911

1012
REGISTER_CPU_OPERATOR(
1113
FCTransposed,
1214
FullyConnectedOp<
1315
CPUContext,
1416
DefaultEngine,
1517
false /* don't transpose weight */>);
16-
REGISTER_CPU_OPERATOR(
18+
REGISTER_CPU_GRADIENT_OPERATOR(
1719
FCTransposedGradient,
1820
FullyConnectedGradientOp<
1921
CPUContext,
@@ -255,13 +257,13 @@ print("Y:\n", workspace.FetchBlob("Y"))
255257
"Ouput blob containing a 2D output matrix of shape $(M,N)$, where $M$ is the batch size and $N$ is the number of nodes in the layer. The ouput is calculated as $Y=XW^T+b$.")
256258
.InheritOnnxSchema("Gemm");
257259

258-
OPERATOR_SCHEMA(FCGradient)
260+
GRADIENT_OPERATOR_SCHEMA(FCGradient)
259261
.NumInputs(3)
260262
.NumOutputs(2, 3)
261263
.TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2, false))
262264
.CostInferenceFunction(
263265
std::bind(CostInferenceForFCGradient, _1, _2, false));
264-
OPERATOR_SCHEMA(FCTransposedGradient)
266+
GRADIENT_OPERATOR_SCHEMA(FCTransposedGradient)
265267
.NumInputs(3)
266268
.NumOutputs(2, 3)
267269
.TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2, false))

0 commit comments

Comments
 (0)