Skip to content

Commit 7c053b7

Browse files
Yinghai Lufacebook-github-bot
Yinghai Lu
authored andcommitted
Add filler for SparseLengthsWeightedSum (pytorch#13949)
Summary: Pull Request resolved: pytorch#13949 This diff adds support to fillers for `SparseLengthsWeight*` ops. It does 3 things: 1. Add the fillers for `SparseLengthsWeight*` ops 2. Add filling heuristics to consider the path of `LengthsRangeFill` -> `Gather` -> `SparseLengthsWeightedSum`, where the length input is shared by `LengthsRangeFill` and `SparseLengthsWeightedSum`. Therefore, we need to carefully bound the value of that length input so that at `Gather`, it does not index out-of-bound for the weight input of `Gather`. 3. Fix and simplify the logic of `math::RandFixedSum`, where we just keep rejecting the generated value if it violates the invariants. Reviewed By: highker Differential Revision: D13048216 fbshipit-source-id: bfe402e07e6421b28548047d18b298c148e0ec87
1 parent 3c7b575 commit 7c053b7

File tree

8 files changed

+137
-41
lines changed

8 files changed

+137
-41
lines changed

caffe2/core/operator_schema.cc

+39-2
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ int OpSchema::CalculateOutput(int num_input) const {
330330
}
331331
}
332332

333-
static void SparseLengthsFillerHelper(
333+
namespace {
334+
void SparseLengthsFillerHelper(
334335
const std::vector<std::vector<int64_t>>& shapes,
335336
size_t value_index,
336337
size_t length_index,
@@ -340,7 +341,17 @@ static void SparseLengthsFillerHelper(
340341
(*fillers)[length_index].SparseLengths(shapes[value_index].front());
341342
}
342343

343-
static void SparseSegmentsFillerHelper(
344+
void SparseWeightsFillerHelper(
345+
const std::vector<std::vector<int64_t>>& shapes,
346+
size_t weight_index,
347+
std::vector<TensorFiller>* fillers) {
348+
(*fillers)[weight_index]
349+
.Min(0)
350+
.Max(shapes[weight_index].front())
351+
.Dist(FD_UNIFORM);
352+
}
353+
354+
void SparseSegmentsFillerHelper(
344355
const std::vector<std::vector<int64_t>>& shapes,
345356
size_t value_index,
346357
size_t segment_index,
@@ -353,6 +364,7 @@ static void SparseSegmentsFillerHelper(
353364
.Dist(FD_UNIFORM);
354365
(*fillers)[segment_index].SparseSegments(shapes[value_index].front() - 1);
355366
}
367+
} // namespace
356368

357369
// The helper is build sparse input with values, keys, and lengths; e.g.:
358370
// values = [1, 2, 3, 2, 4, 6, 7, 3, 6]
@@ -375,6 +387,31 @@ OpSchema& OpSchema::ValueKeyLengthInputFillers(
375387
return *this;
376388
}
377389

390+
// The helper is build sparse input with values, keys, and lengths; e.g.:
391+
// values = [1, 2, 3, 2, 4, 6, 7, 3, 6]
392+
// keys = [0, 1, 4, 0, 1, 2, 5, 1, 2]
393+
// weights = [1, 1, 1, 0, 2, 2, 2, 1, 2]
394+
// \_____/ \________/ \__/
395+
// lengths = [3, 4, 2]
396+
OpSchema& OpSchema::WeightedValueKeyLengthInputFillers(
397+
size_t value_index,
398+
size_t key_index,
399+
size_t length_index,
400+
size_t weight_index) {
401+
filler_supplier_ = [this, value_index, key_index, length_index, weight_index](
402+
const std::vector<std::vector<int64_t>>& shapes) {
403+
auto fillers = SupplyDenseFillers(shapes);
404+
// fill in the length (value_index is used to get the correct shape)
405+
SparseLengthsFillerHelper(shapes, key_index, length_index, &fillers);
406+
// fill in the keys (value_index is used to get the correct shape)
407+
SparseSegmentsFillerHelper(shapes, value_index, key_index, &fillers);
408+
// fill in the weights
409+
SparseWeightsFillerHelper(shapes, weight_index, &fillers);
410+
return fillers;
411+
};
412+
return *this;
413+
}
414+
378415
// The helper is build sparse input with values and lengths; e.g.:
379416
// values = [1, 2, 3, 2, 4, 6, 7, 3, 6]
380417
// \_____/ \________/ \__/

caffe2/core/operator_schema.h

+15-1
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,21 @@ class CAFFE2_API OpSchema {
371371
return device_inference_function_(def);
372372
}
373373

374-
// The helper is build sparse input with values, keys, and lengths; e.g.:
374+
// The helper is build sparse input with values, keys, weights and lengths;
375+
// e.g.:
376+
// values = [1, 2, 3, 2, 4, 6, 7, 3, 6]
377+
// keys = [0, 1, 4, 0, 1, 2, 5, 1, 2]
378+
// weights = [1, 2, 3, 4, 5, 6, 7, 8, 9]
379+
// \_____/ \________/ \__/
380+
// lengths = [3, 4, 2]
381+
OpSchema& WeightedValueKeyLengthInputFillers(
382+
size_t value_index,
383+
size_t key_index,
384+
size_t length_index,
385+
size_t weight_index);
386+
387+
// The helper is build sparse input with values, keys, weights and lengths;
388+
// e.g.:
375389
// values = [1, 2, 3, 2, 4, 6, 7, 3, 6]
376390
// keys = [0, 1, 4, 0, 1, 2, 5, 1, 2]
377391
// \_____/ \________/ \__/

caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ REGISTER_CPU_OPERATOR(
4242
OPERATOR_SCHEMA(SparseLengthsWeightedSumFused8BitRowwise)
4343
.NumInputs(4)
4444
.NumOutputs(1)
45-
.DisallowInputFillers() // TODO: Enable the fillers
45+
.WeightedValueKeyLengthInputFillers(
46+
SparseLengthsFused8BitRowwiseOp<CPUContext, true>::DATA,
47+
SparseLengthsFused8BitRowwiseOp<CPUContext, true>::INDICES,
48+
SparseLengthsFused8BitRowwiseOp<CPUContext, true>::LENGTHS,
49+
SparseLengthsFused8BitRowwiseOp<CPUContext, true>::WEIGHTS)
4650
.SetDoc(R"DOC(
4751
Performs the same operation as SparseLengthsWeightedSum,
4852
but operating on 8-bit rowwise quantized matrices with fused storage

caffe2/operators/lengths_reducer_ops.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,11 @@ using SparseLengthsWeightedSumDef = AbstractSparseLengthsDef<
9999
OPERATOR_SCHEMA(SparseLengthsWeightedSum)
100100
.NumInputs(SparseLengthsWeightedSumDef::ForwardOp::kNumInputs)
101101
.NumOutputs(1)
102-
.DisallowInputFillers() // TODO: enable input fillers
102+
.WeightedValueKeyLengthInputFillers(
103+
SparseLengthsWeightedSumOp::DATA,
104+
SparseLengthsWeightedSumOp::INDICES,
105+
SparseLengthsWeightedSumOp::LENGTHS,
106+
SparseLengthsWeightedSumOp::WEIGHT)
103107
.SetDoc(FormatDoc<SparseLengthsWeightedSumDef>())
104108
.Output(0, "OUTPUT", "Aggregated tensor")
105109
.FillUsing(SparseLengthsWeightedSumDef::PopulateSchema);

caffe2/predictor/emulator/data_filler.cc

+17-3
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,23 @@ DataRandomFiller::DataRandomFiller(
7777
caffe2::to_string(op_types.size()));
7878

7979
for (size_t j = 0; j < op.input_size(); ++j) {
80-
inputs_.emplace(
81-
op.input(j),
82-
std::make_pair(get_tensor_filler(op, j, op_dims), op_types[j]));
80+
inputs_[op.input(j)] =
81+
std::make_pair(get_tensor_filler(op, j, op_dims), op_types[j]);
82+
}
83+
84+
// Hack, we normal have a path of
85+
// length -> LengthsiRangeFill -> Gather -> w -> SparseLengthsWeighted*
86+
// \---------------------------------------/
87+
// So when we generate the value of length, we need to bound it to the size
88+
// of weight input of Gather too
89+
if (op.type().find("SparseLengthsWeighted") == 0 && i > 0) {
90+
const auto& prev_op = run_net.op(i - 1);
91+
if (prev_op.type() == "Gather") {
92+
const auto& prev_dims = input_dims[i - 1];
93+
VLOG(1) << "Setting max length value to " << prev_dims[0].front()
94+
<< " for " << op.input(3);
95+
inputs_[op.input(3)].first.Max(prev_dims[0].front());
96+
}
8397
}
8498

8599
for (size_t j = 0; j < op.output_size(); ++j) {

caffe2/utils/filler.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,13 @@ class TensorFiller {
7777
return *this;
7878
}
7979

80-
// a helper function to construct the lengths vector for sparse features
80+
// A helper function to construct the lengths vector for sparse features
81+
// We try to pad least one index per batch unless the total_length is 0
8182
template <class Type>
8283
TensorFiller& SparseLengths(Type total_length) {
83-
return FixedSum(total_length).Min(0).Max(total_length);
84+
return FixedSum(total_length)
85+
.Min(std::min(static_cast<Type>(1), total_length))
86+
.Max(total_length);
8487
}
8588

8689
// a helper function to construct the segments vector for sparse features

caffe2/utils/math_cpu.cc

+36-31
Original file line numberDiff line numberDiff line change
@@ -2321,37 +2321,42 @@ CAFFE2_RAND_UNIFORM_INT(uint64_t);
23212321
// to between a and b.
23222322
// The algorithm is non-trivial given the adjustment would be different towards
23232323
// each value.
2324-
#define CAFFE2_RAND_FIXED_SUM(T) \
2325-
template <> \
2326-
C10_EXPORT void RandFixedSum<T, CPUContext>( \
2327-
const size_t n, \
2328-
const T a, \
2329-
const T b, \
2330-
const T sum, \
2331-
T* r, \
2332-
CPUContext* context) { \
2333-
CAFFE_ENFORCE_GE(a, 0); \
2334-
CAFFE_ENFORCE_GE(sum / (double)n, a); \
2335-
CAFFE_ENFORCE_LE(sum / (double)n, b); \
2336-
T current_sum = 0; \
2337-
for (size_t i = 0; i < n - 1; ++i) { \
2338-
auto remaining_numbers = n - 1 - i; \
2339-
double mean = (sum - current_sum) / remaining_numbers; \
2340-
double stdev = std::min(mean - a, b - mean); \
2341-
std::normal_distribution<double> distribution{mean, stdev / 4.0}; \
2342-
T value = distribution(context->RandGenerator()); \
2343-
auto remaining_sum = sum - current_sum - value; \
2344-
if (value < a || remaining_sum > b * remaining_numbers) { \
2345-
value = a; \
2346-
} else if (value > b || remaining_sum < a * remaining_numbers) { \
2347-
value = b; \
2348-
} \
2349-
r[i] = value; \
2350-
CAFFE_ENFORCE(a <= value && value <= b); \
2351-
current_sum += value; \
2352-
} \
2353-
r[n - 1] = sum - current_sum; \
2354-
CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b); \
2324+
#define CAFFE2_RAND_FIXED_SUM(T) \
2325+
template <> \
2326+
C10_EXPORT void RandFixedSum<T, CPUContext>( \
2327+
const size_t n, \
2328+
const T a, \
2329+
const T b, \
2330+
const T sum, \
2331+
T* r, \
2332+
CPUContext* context) { \
2333+
CAFFE_ENFORCE_GE(a, 0); \
2334+
CAFFE_ENFORCE_GE(sum / (double)n, a); \
2335+
CAFFE_ENFORCE_LE(sum / (double)n, b); \
2336+
T current_sum = 0; \
2337+
T remaining_sum = sum; \
2338+
for (size_t i = 0; i < n; ++i) { \
2339+
auto remaining_numbers = n - 1 - i; \
2340+
double mean = (sum - current_sum) / (remaining_numbers + 1); \
2341+
double stdev = std::min(mean - a, b - mean); \
2342+
std::normal_distribution<double> distribution{mean, stdev / 4.0}; \
2343+
T value, remaining_sum_test; \
2344+
do { \
2345+
value = distribution(context->RandGenerator()); \
2346+
remaining_sum_test = remaining_sum - value; \
2347+
} while (value < a || remaining_sum_test < a * remaining_numbers || \
2348+
value > b || remaining_sum_test > b * remaining_numbers); \
2349+
r[i] = value; \
2350+
CAFFE_ENFORCE(a <= value && value <= b); \
2351+
current_sum += value; \
2352+
remaining_sum -= value; \
2353+
CAFFE_ENFORCE_GE(remaining_sum, a* remaining_numbers); \
2354+
CAFFE_ENFORCE_LE(remaining_sum, b* remaining_numbers); \
2355+
} \
2356+
r[n - 1] += remaining_sum; \
2357+
current_sum += remaining_sum; \
2358+
CAFFE_ENFORCE(a <= r[n - 1] && r[n - 1] <= b); \
2359+
CAFFE_ENFORCE_EQ(current_sum, sum); \
23552360
}
23562361
CAFFE2_RAND_FIXED_SUM(float);
23572362
CAFFE2_RAND_FIXED_SUM(double);

caffe2/utils/math_test.cc

+15
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,21 @@ TEST_F(BroadcastTest, BroadcastFloatTest) {
716716
{1.0f, 1.0f, 2.0f, 2.0f, 1.0f, 1.0f, 2.0f, 2.0f});
717717
}
718718

719+
class RandFixedSumTest : public testing::Test {
720+
protected:
721+
void SetUp() override {
722+
cpu_context_ = make_unique<CPUContext>(option_);
723+
}
724+
DeviceOption option_;
725+
std::unique_ptr<CPUContext> cpu_context_;
726+
};
727+
728+
TEST_F(RandFixedSumTest, UpperBound) {
729+
std::vector<int> l(20);
730+
math::RandFixedSum<int, CPUContext>(
731+
20, 1, 1000, 1000, l.data(), cpu_context_.get());
732+
}
733+
719734
class MomentsTest : public testing::Test {
720735
protected:
721736
void SetUp() override {

0 commit comments

Comments
 (0)