Add filler for SparseLengthsWeightedSum (pytorch#13949)

Yinghai Lu · facebook-github-bot · commit 7c053b7e643c · 2018-11-16T11:31:05.000-08:00
Summary: Pull Request resolved: pytorch#13949 This diff adds support to fillers for `SparseLengthsWeight*` ops. It does 3 things: 1. Add the fillers for `SparseLengthsWeight*` ops 2. Add filling heuristics to consider the path of `LengthsRangeFill` -> `Gather` -> `SparseLengthsWeightedSum`, where the length input is shared by `LengthsRangeFill` and `SparseLengthsWeightedSum`. Therefore, we need to carefully bound the value of that length input so that at `Gather`, it does not index out-of-bound for the weight input of `Gather`. 3. Fix and simplify the logic of `math::RandFixedSum`, where we just keep rejecting the generated value if it violates the invariants. Reviewed By: highker Differential Revision: D13048216 fbshipit-source-id: bfe402e07e6421b28548047d18b298c148e0ec87
diff --git a/caffe2/core/operator_schema.cc b/caffe2/core/operator_schema.cc
@@ -330,7 +330,8 @@ int OpSchema::CalculateOutput(int num_input) const {
   }
 }
 
-static void SparseLengthsFillerHelper(
+namespace {
+void SparseLengthsFillerHelper(
     const std::vector<std::vector<int64_t>>& shapes,
     size_t value_index,
     size_t length_index,
@@ -340,7 +341,17 @@ static void SparseLengthsFillerHelper(
   (*fillers)[length_index].SparseLengths(shapes[value_index].front());
 }
 
-static void SparseSegmentsFillerHelper(
+void SparseWeightsFillerHelper(
+    const std::vector<std::vector<int64_t>>& shapes,
+    size_t weight_index,
+    std::vector<TensorFiller>* fillers) {
+  (*fillers)[weight_index]
+      .Min(0)
+      .Max(shapes[weight_index].front())
+      .Dist(FD_UNIFORM);
+}
+
+void SparseSegmentsFillerHelper(
     const std::vector<std::vector<int64_t>>& shapes,
     size_t value_index,
     size_t segment_index,
@@ -353,6 +364,7 @@ static void SparseSegmentsFillerHelper(
       .Dist(FD_UNIFORM);
   (*fillers)[segment_index].SparseSegments(shapes[value_index].front() - 1);
 }
+} // namespace
 
 // The helper is build sparse input with values, keys, and lengths; e.g.:
 // values  = [1, 2, 3, 2, 4, 6, 7, 3, 6]
@@ -375,6 +387,31 @@ OpSchema& OpSchema::ValueKeyLengthInputFillers(
   return *this;
 }
 
+// The helper is build sparse input with values, keys, and lengths; e.g.:
+// values  = [1, 2, 3, 2, 4, 6, 7, 3, 6]
+// keys    = [0, 1, 4, 0, 1, 2, 5, 1, 2]
+// weights = [1, 1, 1, 0, 2, 2, 2, 1, 2]
+//            \_____/  \________/  \__/
+// lengths =    [3,        4,       2]
+OpSchema& OpSchema::WeightedValueKeyLengthInputFillers(
+    size_t value_index,
+    size_t key_index,
+    size_t length_index,
+    size_t weight_index) {
+  filler_supplier_ = [this, value_index, key_index, length_index, weight_index](
+                         const std::vector<std::vector<int64_t>>& shapes) {
+    auto fillers = SupplyDenseFillers(shapes);
+    // fill in the length (value_index is used to get the correct shape)
+    SparseLengthsFillerHelper(shapes, key_index, length_index, &fillers);
+    // fill in the keys (value_index is used to get the correct shape)
+    SparseSegmentsFillerHelper(shapes, value_index, key_index, &fillers);
+    // fill in the weights
+    SparseWeightsFillerHelper(shapes, weight_index, &fillers);
+    return fillers;
+  };
+  return *this;
+}
+
 // The helper is build sparse input with values and lengths; e.g.:
 // values  = [1, 2, 3, 2, 4, 6, 7, 3, 6]
 //            \_____/  \________/  \__/
diff --git a/caffe2/core/operator_schema.h b/caffe2/core/operator_schema.h
@@ -371,7 +371,21 @@ class CAFFE2_API OpSchema {
     return device_inference_function_(def);
   }
 
-  // The helper is build sparse input with values, keys, and lengths; e.g.:
+  // The helper is build sparse input with values, keys, weights and lengths;
+  // e.g.:
+  // values  = [1, 2, 3, 2, 4, 6, 7, 3, 6]
+  // keys    = [0, 1, 4, 0, 1, 2, 5, 1, 2]
+  // weights = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+  //            \_____/  \________/  \__/
+  // lengths =    [3,        4,       2]
+  OpSchema& WeightedValueKeyLengthInputFillers(
+      size_t value_index,
+      size_t key_index,
+      size_t length_index,
+      size_t weight_index);
+
+  // The helper is build sparse input with values, keys, weights and lengths;
+  // e.g.:
   // values  = [1, 2, 3, 2, 4, 6, 7, 3, 6]
   // keys    = [0, 1, 4, 0, 1, 2, 5, 1, 2]
   //            \_____/  \________/  \__/
diff --git a/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.cc b/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.cc
@@ -42,7 +42,11 @@ REGISTER_CPU_OPERATOR(
 OPERATOR_SCHEMA(SparseLengthsWeightedSumFused8BitRowwise)
     .NumInputs(4)
     .NumOutputs(1)
-    .DisallowInputFillers() // TODO: Enable the fillers
+    .WeightedValueKeyLengthInputFillers(
+        SparseLengthsFused8BitRowwiseOp<CPUContext, true>::DATA,
+        SparseLengthsFused8BitRowwiseOp<CPUContext, true>::INDICES,
+        SparseLengthsFused8BitRowwiseOp<CPUContext, true>::LENGTHS,
+        SparseLengthsFused8BitRowwiseOp<CPUContext, true>::WEIGHTS)
     .SetDoc(R"DOC(
 Performs the same operation as SparseLengthsWeightedSum,
 but operating on 8-bit rowwise quantized matrices with fused storage
diff --git a/caffe2/operators/lengths_reducer_ops.cc b/caffe2/operators/lengths_reducer_ops.cc
@@ -99,7 +99,11 @@ using SparseLengthsWeightedSumDef = AbstractSparseLengthsDef<
 OPERATOR_SCHEMA(SparseLengthsWeightedSum)
     .NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs)
     .NumOutputs(1)
-    .DisallowInputFillers() // TODO: enable input fillers
+    .WeightedValueKeyLengthInputFillers(
+        SparseLengthsWeightedSumOp::DATA,
+        SparseLengthsWeightedSumOp::INDICES,
+        SparseLengthsWeightedSumOp::LENGTHS,
+        SparseLengthsWeightedSumOp::WEIGHT)
     .SetDoc(FormatDoc<SparseLengthsWeightedSumDef>())
     .Output(0, "OUTPUT", "Aggregated tensor")
     .FillUsing(SparseLengthsWeightedSumDef::PopulateSchema);
diff --git a/caffe2/predictor/emulator/data_filler.cc b/caffe2/predictor/emulator/data_filler.cc
@@ -77,9 +77,23 @@ DataRandomFiller::DataRandomFiller(
             caffe2::to_string(op_types.size()));
 
     for (size_t j = 0; j < op.input_size(); ++j) {
-      inputs_.emplace(
-          op.input(j),
-          std::make_pair(get_tensor_filler(op, j, op_dims), op_types[j]));
+      inputs_[op.input(j)] =
+          std::make_pair(get_tensor_filler(op, j, op_dims), op_types[j]);
+    }
+
+    // Hack, we normal have a path of
+    // length -> LengthsiRangeFill -> Gather -> w -> SparseLengthsWeighted*
+    //       \---------------------------------------/
+    // So when we generate the value of length, we need to bound it to the size
+    // of weight input of Gather too
+    if (op.type().find("SparseLengthsWeighted") == 0 && i > 0) {
+      const auto& prev_op = run_net.op(i - 1);
+      if (prev_op.type() == "Gather") {
+        const auto& prev_dims = input_dims[i - 1];
+        VLOG(1) << "Setting max length value to " << prev_dims[0].front()
+                << " for " << op.input(3);
+        inputs_[op.input(3)].first.Max(prev_dims[0].front());
+      }
     }
 
     for (size_t j = 0; j < op.output_size(); ++j) {
diff --git a/caffe2/utils/filler.h b/caffe2/utils/filler.h
@@ -77,10 +77,13 @@ class TensorFiller {
     return *this;
   }
 
-  // a helper function to construct the lengths vector for sparse features
+  // A helper function to construct the lengths vector for sparse features
+  // We try to pad least one index per batch unless the total_length is 0
   template <class Type>
   TensorFiller& SparseLengths(Type total_length) {
-    return FixedSum(total_length).Min(0).Max(total_length);
+    return FixedSum(total_length)
+        .Min(std::min(static_cast<Type>(1), total_length))
+        .Max(total_length);
   }
 
   // a helper function to construct the segments vector for sparse features
diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc
@@ -2321,37 +2321,42 @@ CAFFE2_RAND_UNIFORM_INT(uint64_t);
 // to between a and b.
 // The algorithm is non-trivial given the adjustment would be different towards
 // each value.
-#define CAFFE2_RAND_FIXED_SUM(T)                                        \
-  template <>                                                           \
-  C10_EXPORT void RandFixedSum<T, CPUContext>(                          \
-      const size_t n,                                                   \
-      const T a,                                                        \
-      const T b,                                                        \
-      const T sum,                                                      \
-      T* r,                                                             \
-      CPUContext* context) {                                            \
-    CAFFE_ENFORCE_GE(a, 0);                                             \
-    CAFFE_ENFORCE_GE(sum / (double)n, a);                               \
-    CAFFE_ENFORCE_LE(sum / (double)n, b);                               \
-    T current_sum = 0;                                                  \
-    for (size_t i = 0; i < n - 1; ++i) {                                \
-      auto remaining_numbers = n - 1 - i;                               \
-      double mean = (sum - current_sum) / remaining_numbers;            \
-      double stdev = std::min(mean - a, b - mean);                      \
-      std::normal_distribution<double> distribution{mean, stdev / 4.0}; \
-      T value = distribution(context->RandGenerator());                 \
-      auto remaining_sum = sum - current_sum - value;                   \
-      if (value < a || remaining_sum > b * remaining_numbers) {         \
-        value = a;                                                      \
-      } else if (value > b || remaining_sum < a * remaining_numbers) {  \
-        value = b;                                                      \
-      }                                                                 \
-      r[i] = value;                                                     \
-      CAFFE_ENFORCE(a <= value && value <= b);                          \
-      current_sum += value;                                             \
-    }                                                                   \
-    r[n - 1] = sum - current_sum;                                       \
-    CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b);                      \
+#define CAFFE2_RAND_FIXED_SUM(T)                                          \
+  template <>                                                             \
+  C10_EXPORT void RandFixedSum<T, CPUContext>(                            \
+      const size_t n,                                                     \
+      const T a,                                                          \
+      const T b,                                                          \
+      const T sum,                                                        \
+      T* r,                                                               \
+      CPUContext* context) {                                              \
+    CAFFE_ENFORCE_GE(a, 0);                                               \
+    CAFFE_ENFORCE_GE(sum / (double)n, a);                                 \
+    CAFFE_ENFORCE_LE(sum / (double)n, b);                                 \
+    T current_sum = 0;                                                    \
+    T remaining_sum = sum;                                                \
+    for (size_t i = 0; i < n; ++i) {                                      \
+      auto remaining_numbers = n - 1 - i;                                 \
+      double mean = (sum - current_sum) / (remaining_numbers + 1);        \
+      double stdev = std::min(mean - a, b - mean);                        \
+      std::normal_distribution<double> distribution{mean, stdev / 4.0};   \
+      T value, remaining_sum_test;                                        \
+      do {                                                                \
+        value = distribution(context->RandGenerator());                   \
+        remaining_sum_test = remaining_sum - value;                       \
+      } while (value < a || remaining_sum_test < a * remaining_numbers || \
+               value > b || remaining_sum_test > b * remaining_numbers);  \
+      r[i] = value;                                                       \
+      CAFFE_ENFORCE(a <= value && value <= b);                            \
+      current_sum += value;                                               \
+      remaining_sum -= value;                                             \
+      CAFFE_ENFORCE_GE(remaining_sum, a* remaining_numbers);              \
+      CAFFE_ENFORCE_LE(remaining_sum, b* remaining_numbers);              \
+    }                                                                     \
+    r[n - 1] += remaining_sum;                                            \
+    current_sum += remaining_sum;                                         \
+    CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b);                        \
+    CAFFE_ENFORCE_EQ(current_sum, sum);                                   \
   }
 CAFFE2_RAND_FIXED_SUM(float);
 CAFFE2_RAND_FIXED_SUM(double);
diff --git a/caffe2/utils/math_test.cc b/caffe2/utils/math_test.cc
@@ -716,6 +716,21 @@ TEST_F(BroadcastTest, BroadcastFloatTest) {
       {1.0f, 1.0f, 2.0f, 2.0f, 1.0f, 1.0f, 2.0f, 2.0f});
 }
 
+class RandFixedSumTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    cpu_context_ = make_unique<CPUContext>(option_);
+  }
+  DeviceOption option_;
+  std::unique_ptr<CPUContext> cpu_context_;
+};
+
+TEST_F(RandFixedSumTest, UpperBound) {
+  std::vector<int> l(20);
+  math::RandFixedSum<int, CPUContext>(
+      20, 1, 1000, 1000, l.data(), cpu_context_.get());
+}
+
 class MomentsTest : public testing::Test {
  protected:
   void SetUp() override {

Original file line number	Diff line number	Diff line change
`@@ -77,10 +77,13 @@ class TensorFiller {`
`77`	`77`	`return *this;`
`78`	`78`	`}`
`79`	`79`
`80`		`- // a helper function to construct the lengths vector for sparse features`
	`80`	`+ // A helper function to construct the lengths vector for sparse features`
	`81`	`+ // We try to pad least one index per batch unless the total_length is 0`
`81`	`82`	`template <class Type>`
`82`	`83`	`TensorFiller& SparseLengths(Type total_length) {`
`83`		`- return FixedSum(total_length).Min(0).Max(total_length);`
	`84`	`+ return FixedSum(total_length)`
	`85`	`+ .Min(std::min(static_cast<Type>(1), total_length))`
	`86`	`+ .Max(total_length);`
`84`	`87`	`}`
`85`	`88`
`86`	`89`	`// a helper function to construct the segments vector for sparse features`