1
1
#ifndef CAFFE2_OPERATORS_INT8_LEAKY_RELU_OP_H_
2
2
#define CAFFE2_OPERATORS_INT8_LEAKY_RELU_OP_H_
3
3
4
+ #include < qnnpack.h>
5
+
4
6
#include " caffe2/core/context.h"
5
7
#include " caffe2/core/operator.h"
6
8
#include " caffe2/core/tensor_int8.h"
@@ -13,48 +15,92 @@ namespace int8 {
13
15
class Int8LeakyReluOp final : public Operator<CPUContext> {
14
16
public:
15
17
Int8LeakyReluOp (const OperatorDef& operator_def, Workspace* ws)
16
- : Operator<CPUContext>(operator_def, ws) {
17
- double alpha = this ->template GetSingleArgument <float >(" alpha" , 0.01 );
18
+ : Operator<CPUContext>(operator_def, ws),
19
+ ws_ (ws) {
20
+ const float alpha = this ->template GetSingleArgument <float >(" alpha" , 0.01 );
18
21
CAFFE_ENFORCE_GT (alpha, 0.0 );
19
22
CAFFE_ENFORCE_LT (alpha, 1.0 );
20
- QuantizeMultiplierSmallerThanOne (alpha, &multiplier_, &shift_);
23
+ this ->alpha_ = alpha;
24
+ }
25
+
26
+ ~Int8LeakyReluOp () {
27
+ if (this ->qnnpackOperator_ != nullptr ) {
28
+ qnnp_delete_operator (this ->qnnpackOperator_ );
29
+ this ->qnnpackOperator_ = nullptr ;
30
+ }
21
31
}
22
32
23
33
bool RunOnDevice () override {
24
34
const auto & X = Inputs ()[0 ]->template Get <Int8TensorCPU>();
25
35
auto * Y = Outputs ()[0 ]->template GetMutable <Int8TensorCPU>();
36
+ const int32_t Y_zero_point =
37
+ this ->template GetSingleArgument <int >(" Y_zero_point" , 0 );
38
+ const float Y_scale =
39
+ this ->template GetSingleArgument <float >(" Y_scale" , 1 );
40
+ CHECK_GE (Y_zero_point, std::numeric_limits<uint8_t >::min ());
41
+ CHECK_LE (Y_zero_point, std::numeric_limits<uint8_t >::max ());
42
+
43
+ /*
44
+ * Record quantization parameters for the input, because if the op is
45
+ * in-place, we may overwrite these parameters later, when we set
46
+ * quantization parameters for output tensor.
47
+ */
48
+ const uint8_t X_zero_point = X.zero_point ;
49
+ const float X_scale = X.scale ;
50
+
51
+ Y->scale = Y_scale;
52
+ Y->zero_point = Y_zero_point;
26
53
Y->t .ResizeLike (X.t );
27
- Y->scale = X.scale ;
28
- Y->zero_point = X.zero_point ;
29
- CHECK_GE (X.zero_point , std::numeric_limits<uint8_t >::min ());
30
- CHECK_LE (X.zero_point , std::numeric_limits<uint8_t >::max ());
31
- int32_t Y_offset = this ->template GetSingleArgument <int >(" Y_zero_point" , 0 );
32
- auto Y_scale = this ->template GetSingleArgument <float >(" Y_scale" , 1 );
33
- CHECK_EQ (Y_offset, X.zero_point );
34
- CHECK_EQ (Y_scale, X.scale );
35
-
36
- const uint8_t * Xdata = X.t .data <uint8_t >();
37
- uint8_t * Ydata = Y->t .mutable_data <uint8_t >();
38
-
39
- // For x < zero_point:
40
- // (y - zero_point) * scale = alpha * (x - zero_point) * scale
41
- // y = alpha * (x - zeropoint) + zero_point
42
- for (int i = 0 ; i < X.t .numel (); i++) {
43
- if (Xdata[i] < X.zero_point ) {
44
- int32_t out = MultiplyByQuantizedMultiplierSmallerThanOne (
45
- Xdata[i] - X.zero_point , multiplier_, shift_) +
46
- X.zero_point ;
47
- Ydata[i] = static_cast <uint8_t >(out);
48
- } else {
49
- Ydata[i] = Xdata[i];
50
- }
54
+
55
+ initQNNPACK ();
56
+
57
+ if (this ->qnnpackOperator_ == nullptr ) {
58
+ const qnnp_status createStatus = qnnp_create_leaky_relu_nc_q8 (
59
+ 1 /* channels */ ,
60
+ this ->alpha_ ,
61
+ static_cast <uint8_t >(X_zero_point), X_scale,
62
+ static_cast <uint8_t >(Y_zero_point), Y_scale,
63
+ 0 /* output min */ ,
64
+ 255 /* output max */ ,
65
+ &qnnpackOperator_);
66
+ CAFFE_ENFORCE (
67
+ createStatus == qnnp_status_success,
68
+ " failed to create QNNPACK Leaky ReLU operator" );
69
+ CAFFE_ENFORCE (this ->qnnpackOperator_ != nullptr );
51
70
}
71
+
72
+ const qnnp_status setupStatus = qnnp_setup_leaky_relu_nc_q8 (
73
+ this ->qnnpackOperator_ ,
74
+ X.t .numel () /* batch size */ ,
75
+ X.t .template data <uint8_t >(),
76
+ 1 /* X stride */ ,
77
+ Y->t .template mutable_data <uint8_t >(),
78
+ 1 /* Y stride */ );
79
+ CAFFE_ENFORCE (
80
+ setupStatus == qnnp_status_success,
81
+ " failed to setup QNNPACK Leaky ReLU operator" );
82
+
83
+ #ifdef FBCODE_CAFFE2
84
+ const qnnp_status runStatus =
85
+ qnnp_run_operator (this ->qnnpackOperator_ , nullptr /* thread pool */ );
86
+ #else
87
+ pthreadpool_t threadpool =
88
+ reinterpret_cast <pthreadpool_t >(ws_->GetThreadPool ());
89
+ const qnnp_status runStatus =
90
+ qnnp_run_operator (this ->qnnpackOperator_ , threadpool);
91
+ #endif
92
+ CAFFE_ENFORCE (
93
+ runStatus == qnnp_status_success,
94
+ " failed to run QNNPACK Leaky ReLU operator" );
95
+
52
96
return true ;
53
97
}
54
98
55
99
private:
56
- int32_t multiplier_;
57
- int shift_;
100
+ float alpha_;
101
+ Workspace* ws_;
102
+ // QNNPACK Leaky ReLU operator
103
+ qnnp_operator_t qnnpackOperator_{nullptr };
58
104
};
59
105
60
106
} // namespace int8
0 commit comments