Skip to content

Commit da73d70

Browse files
Yangqing Jiafacebook-github-bot
authored andcommitted
Remove unsafecoalesce op (pytorch#12897)
Summary: Pull Request resolved: pytorch#12897 UnsafeCoalesce Op is used during memonger days when we try to coalesce operators for better efficienct computation kernels. It creates a little bit of an unsafe underlying memory storage pattern. With the new tensor unification I am not sure if it is still safe for us to do so, so I propose we delete it for the sake of safety. Reviewed By: bddppq, ilia-cher Differential Revision: D10475980 fbshipit-source-id: b1a838c9f47d681c309ee8e2f961b432236e157e
1 parent c774cb8 commit da73d70

File tree

4 files changed

+0
-117
lines changed

4 files changed

+0
-117
lines changed

caffe2/operators/utility_ops.cc

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@ OPERATOR_SCHEMA(WallClockTime)
7272
.SetDoc("Time since epoch in nanoseconds.")
7373
.Output(0, "time", "The time in nanoseconds.");
7474

75-
REGISTER_CPU_OPERATOR(UnsafeCoalesce, UnsafeCoalesceOp<CPUContext>);
76-
7775
OPERATOR_SCHEMA(Print)
7876
.NumInputs(1)
7977
.NumOutputs(0)
@@ -649,31 +647,6 @@ weights derived by lengths. i.e 1/pow(length, power)
649647

650648
SHOULD_NOT_DO_GRADIENT(WallClockTime);
651649

652-
OPERATOR_SCHEMA(UnsafeCoalesce)
653-
.NumInputsOutputs([](int inputs, int outputs) {
654-
return inputs + 1 == outputs;
655-
})
656-
.AllowInplace([](int input, int output) { return input == output; })
657-
.SetDoc(R"DOC(
658-
Coalesce the N inputs into N outputs and a single coalesced output blob.
659-
660-
This allows operations that operate over multiple small kernels (e.g.
661-
biases in a deep CNN) to be coalesced into a single larger operation,
662-
amortizing the kernel launch overhead, synchronization costs for
663-
distributed computation, etc.
664-
665-
The operator:
666-
667-
- computes the total size of the coalesced blob by summing the input sizes
668-
- allocates the coalesced output blob as the total size
669-
- copies the input vectors into the coalesced blob, at the correct offset.
670-
- aliases each Output(i) to- point into the coalesced blob, at the corresponding offset for Input(i).
671-
672-
This is 'unsafe' as the output vectors are aliased, so use with
673-
caution.
674-
675-
)DOC");
676-
677650
OPERATOR_SCHEMA(EnsureDense)
678651
.NumInputs(1)
679652
.NumOutputs(1)
@@ -739,7 +712,6 @@ SHOULD_NOT_DO_GRADIENT(Print);
739712
SHOULD_NOT_DO_GRADIENT(HasElements);
740713
SHOULD_NOT_DO_GRADIENT(IsEmpty);
741714
SHOULD_NOT_DO_GRADIENT(LengthsToShape);
742-
SHOULD_NOT_DO_GRADIENT(UnsafeCoalesce);
743715

744716
class GetAliasGradient : public GradientMakerBase {
745717
using GradientMakerBase::GradientMakerBase;

caffe2/operators/utility_ops.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ REGISTER_CUDA_OPERATOR(ResizeLike, ResizeLikeOp<CUDAContext>);
4444
REGISTER_CUDA_OPERATOR(Sum, SumOp<CUDAContext>);
4545
REGISTER_CUDA_OPERATOR(WeightedSum, WeightedSumOp<CUDAContext>);
4646

47-
REGISTER_CUDA_OPERATOR(UnsafeCoalesce, UnsafeCoalesceOp<CUDAContext>);
48-
4947
CAFFE_KNOWN_TYPE(const float*);
5048

5149
REGISTER_CUDA_OPERATOR(EnsureDense, EnsureDenseOp<CUDAContext>);

caffe2/operators/utility_ops.h

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,63 +1144,6 @@ class LengthsGatherOp : public Operator<Context> {
11441144
INPUT_TAGS(ITEMS, LENGTHS, INDICES);
11451145
};
11461146

1147-
template <class Context>
1148-
class UnsafeCoalesceOp final : public Operator<Context> {
1149-
public:
1150-
USE_OPERATOR_CONTEXT_FUNCTIONS;
1151-
using Operator<Context>::Operator;
1152-
1153-
bool RunOnDevice() override {
1154-
size_t coalesced_size = 0;
1155-
for (int i = 0; i < InputSize(); ++i) {
1156-
CAFFE_ENFORCE(
1157-
!Input(i).meta().placementNew(),
1158-
"Must only coalesce fundamental types, error at input: ",
1159-
i);
1160-
}
1161-
1162-
auto roundToAlignment = [](size_t bytes) -> size_t {
1163-
return ((bytes + gCaffe2Alignment - 1) / gCaffe2Alignment) *
1164-
gCaffe2Alignment;
1165-
};
1166-
1167-
for (int i = 0; i < InputSize(); ++i) {
1168-
coalesced_size += roundToAlignment(Input(i).nbytes());
1169-
}
1170-
1171-
auto* coalesced = Output(OutputSize() - 1);
1172-
coalesced->Resize(coalesced_size);
1173-
math::Set<uint8_t, Context>(
1174-
coalesced_size,
1175-
0.0,
1176-
coalesced->template mutable_data<uint8_t>(),
1177-
&context_);
1178-
1179-
size_t coalesced_offset = 0;
1180-
for (auto i = 0; i < InputSize(); ++i) {
1181-
const auto input_nbytes = Input(i).nbytes();
1182-
context_.CopyBytesSameDevice(
1183-
input_nbytes,
1184-
(const uint8_t*)Input(i).raw_data(),
1185-
coalesced->template mutable_data<uint8_t>() + coalesced_offset);
1186-
1187-
// Note: this could cause Input(i) to free it's data if
1188-
// Output(i) and Input(i) alias each other. This is safe on a
1189-
// GPU (as the copy will happen-before the free), but it's
1190-
// worth mentioning.
1191-
1192-
Output(i)->ResizeLike(Input(i));
1193-
Output(i)->ShareExternalPointer(
1194-
static_cast<void*>(
1195-
coalesced->template mutable_data<uint8_t>() + coalesced_offset),
1196-
Input(i).meta(),
1197-
input_nbytes);
1198-
coalesced_offset += roundToAlignment(input_nbytes);
1199-
}
1200-
return true;
1201-
}
1202-
};
1203-
12041147
template <typename T, class Context>
12051148
class AccumulateHistogramOp : public Operator<Context> {
12061149
public:

caffe2/python/hypothesis_test.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2223,36 +2223,6 @@ def ref_nhwc(x, scale, bias):
22232223
for blob, arr in feeds:
22242224
np.testing.assert_array_equal(ws.blobs[blob].fetch(), arr)
22252225

2226-
@given(sizes=st.lists(st.integers(1, 100), min_size=1),
2227-
in_place=st.booleans(),
2228-
**hu.gcs)
2229-
def test_unsafe_coalesce(self, sizes, in_place, gc, dc):
2230-
gAlignment = 64
2231-
Xs = [np.random.randn(size)
2232-
.astype(np.random.choice([np.float32, np.float64, np.uint8]))
2233-
for size in sizes]
2234-
op = core.CreateOperator(
2235-
"UnsafeCoalesce",
2236-
["X_{}".format(i) for i, _ in enumerate(sizes)],
2237-
[("X_{}" if in_place else "Y_{}").format(i)
2238-
for i, _ in enumerate(sizes)] + ["coalesced"])
2239-
self.assertDeviceChecks(dc, op, Xs, list(range(len(sizes) + 1)))
2240-
2241-
def unsafe_coalesce(*xs):
2242-
def to_uint8(x):
2243-
x_aligned_bytes = ((x.nbytes + gAlignment - 1) // gAlignment) \
2244-
* gAlignment
2245-
x_aligned = np.zeros(
2246-
shape=(x_aligned_bytes // x.dtype.itemsize, ),
2247-
dtype=x.dtype)
2248-
x_aligned[:x.size] = x
2249-
x_cast = np.fromstring(x_aligned.tobytes(), dtype='<u1')
2250-
return x_cast
2251-
flat = [to_uint8(x) for x in xs]
2252-
coalesced = np.concatenate(flat)
2253-
return list(xs) + [coalesced]
2254-
self.assertReferenceChecks(gc, op, Xs, unsafe_coalesce)
2255-
22562226
@given(inp=_dtypes().flatmap(lambda dt: _tensor_and_indices(
22572227
elements=st.floats(min_value=0, max_value=1), dtype=dt)),
22582228
**hu.gcs)

0 commit comments

Comments
 (0)