Skip to content

Commit 6b9d2bc

Browse files
committed
optimize
1 parent e91764e commit 6b9d2bc

File tree

1 file changed

+31
-30
lines changed

1 file changed

+31
-30
lines changed

velox/functions/sparksql/Hash.cpp

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,21 @@ template <
133133
class VectorHasher {
134134
public:
135135
// Compute the hash value of input vector at index.
136-
virtual ReturnType hashAt(vector_size_t index, SeedType seed) = 0;
136+
ReturnType hashAt(vector_size_t index, SeedType seed) {
137+
if (decoded_.isNullAt(index)) {
138+
return seed;
139+
}
140+
return hashNotNull(index, seed);
141+
}
142+
143+
virtual ReturnType hashNotNull(vector_size_t index, SeedType seed) = 0;
144+
145+
VectorHasher(DecodedVector& decoded) : decoded_(decoded) {}
137146

138147
virtual ~VectorHasher() = default;
148+
149+
protected:
150+
const DecodedVector& decoded_;
139151
};
140152

141153
template <typename HashClass, TypeKind kind>
@@ -169,24 +181,23 @@ template <
169181
class PrimitiveVectorHasher
170182
: public VectorHasher<HashClass, SeedType, ReturnType> {
171183
public:
172-
PrimitiveVectorHasher(DecodedVector& decoded) : decoded_(decoded) {}
184+
using T = typename TypeTraits<kind>::NativeType;
173185

174-
ReturnType hashAt(vector_size_t index, SeedType seed) override {
175-
if (decoded_.isNullAt(index)) {
176-
return seed;
177-
}
186+
PrimitiveVectorHasher(DecodedVector& decoded)
187+
: VectorHasher<HashClass, SeedType, ReturnType>(decoded) {}
188+
189+
ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
178190
return hashOne<HashClass>(
179-
decoded_.valueAt<typename TypeTraits<kind>::NativeType>(index), seed);
191+
this->decoded_.template valueAt<typename TypeTraits<kind>::NativeType>(
192+
index),
193+
seed);
180194
}
181-
182-
private:
183-
const DecodedVector& decoded_;
184195
};
185196

186197
template <typename HashClass, typename SeedType, typename ReturnType>
187198
class ArrayVectorHasher : public VectorHasher<HashClass> {
188199
public:
189-
ArrayVectorHasher(DecodedVector& decoded) {
200+
ArrayVectorHasher(DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
190201
base_ = decoded.base()->as<ArrayVector>();
191202
indices_ = decoded.indices();
192203

@@ -195,10 +206,7 @@ class ArrayVectorHasher : public VectorHasher<HashClass> {
195206
elementHasher_ = createVectorHasher<HashClass>(decodedElements_);
196207
}
197208

198-
ReturnType hashAt(vector_size_t index, SeedType seed) override {
199-
if (base_->isNullAt(indices_[index])) {
200-
return seed;
201-
}
209+
ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
202210
auto size = base_->sizeAt(indices_[index]);
203211
auto offset = base_->offsetAt(indices_[index]);
204212

@@ -219,7 +227,7 @@ class ArrayVectorHasher : public VectorHasher<HashClass> {
219227
template <typename HashClass, typename SeedType, typename ReturnType>
220228
class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
221229
public:
222-
MapVectorHasher(DecodedVector& decoded) {
230+
MapVectorHasher(DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
223231
base_ = decoded.base()->as<MapVector>();
224232
indices_ = decoded.indices();
225233

@@ -230,11 +238,7 @@ class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
230238
valueHasher_ = createVectorHasher<HashClass>(decodedValues_);
231239
}
232240

233-
ReturnType hashAt(vector_size_t index, SeedType seed) override {
234-
if (base_->isNullAt(indices_[index])) {
235-
return seed;
236-
}
237-
241+
ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
238242
auto size = base_->sizeAt(indices_[index]);
239243
auto offset = base_->offsetAt(indices_[index]);
240244

@@ -258,7 +262,7 @@ class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
258262
template <typename HashClass, typename SeedType, typename ReturnType>
259263
class RowVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
260264
public:
261-
RowVectorHasher(DecodedVector& decoded) {
265+
RowVectorHasher(DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
262266
base_ = decoded.base()->as<RowVector>();
263267
indices_ = decoded.indices();
264268

@@ -271,14 +275,10 @@ class RowVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
271275
}
272276
}
273277

274-
ReturnType hashAt(vector_size_t index, SeedType seed) override {
275-
if (base_->isNullAt(indices_[index])) {
276-
return seed;
277-
}
278-
278+
ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
279279
ReturnType result = seed;
280280
for (auto i = 0; i < base_->childrenSize(); ++i) {
281-
result = hashers_[i]->hashAt(index, result);
281+
result = hashers_[i]->hashAt(indices_[index], result);
282282
}
283283
return result;
284284
}
@@ -323,12 +323,13 @@ void applyWithType(
323323

324324
auto hasher = createVectorHasher<HashClass>(*decoded);
325325
selected->applyToSelected([&](int row) {
326-
result.set(row, hasher->hashAt(row, result.valueAt(row)));
326+
result.set(row, hasher->hashNotNull(row, result.valueAt(row)));
327327
});
328328
}
329329
}
330330

331-
// Derived from src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java.
331+
// Derived from
332+
// src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java.
332333
//
333334
// Spark's Murmur3 seems slightly different from the original from Austin
334335
// Appleby: in particular the fmix function's first line is different. The

0 commit comments

Comments
 (0)