@@ -133,9 +133,21 @@ template <
133
133
class VectorHasher {
134
134
public:
135
135
// Compute the hash value of input vector at index.
136
- virtual ReturnType hashAt (vector_size_t index, SeedType seed) = 0;
136
+ ReturnType hashAt (vector_size_t index, SeedType seed) {
137
+ if (decoded_.isNullAt (index)) {
138
+ return seed;
139
+ }
140
+ return hashNotNull (index, seed);
141
+ }
142
+
143
+ virtual ReturnType hashNotNull (vector_size_t index, SeedType seed) = 0;
144
+
145
+ VectorHasher (DecodedVector& decoded) : decoded_(decoded) {}
137
146
138
147
virtual ~VectorHasher () = default ;
148
+
149
+ protected:
150
+ const DecodedVector& decoded_;
139
151
};
140
152
141
153
template <typename HashClass, TypeKind kind>
@@ -169,24 +181,23 @@ template <
169
181
class PrimitiveVectorHasher
170
182
: public VectorHasher<HashClass, SeedType, ReturnType> {
171
183
public:
172
- PrimitiveVectorHasher (DecodedVector& decoded) : decoded_(decoded) {}
184
+ using T = typename TypeTraits<kind>::NativeType;
173
185
174
- ReturnType hashAt ( vector_size_t index, SeedType seed) override {
175
- if (decoded_. isNullAt (index)) {
176
- return seed;
177
- }
186
+ PrimitiveVectorHasher (DecodedVector& decoded)
187
+ : VectorHasher<HashClass, SeedType, ReturnType>(decoded) {}
188
+
189
+ ReturnType hashNotNull ( vector_size_t index, SeedType seed) override {
178
190
return hashOne<HashClass>(
179
- decoded_.valueAt <typename TypeTraits<kind>::NativeType>(index), seed);
191
+ this ->decoded_ .template valueAt <typename TypeTraits<kind>::NativeType>(
192
+ index),
193
+ seed);
180
194
}
181
-
182
- private:
183
- const DecodedVector& decoded_;
184
195
};
185
196
186
197
template <typename HashClass, typename SeedType, typename ReturnType>
187
198
class ArrayVectorHasher : public VectorHasher <HashClass> {
188
199
public:
189
- ArrayVectorHasher (DecodedVector& decoded) {
200
+ ArrayVectorHasher (DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
190
201
base_ = decoded.base ()->as <ArrayVector>();
191
202
indices_ = decoded.indices ();
192
203
@@ -195,10 +206,7 @@ class ArrayVectorHasher : public VectorHasher<HashClass> {
195
206
elementHasher_ = createVectorHasher<HashClass>(decodedElements_);
196
207
}
197
208
198
- ReturnType hashAt (vector_size_t index, SeedType seed) override {
199
- if (base_->isNullAt (indices_[index])) {
200
- return seed;
201
- }
209
+ ReturnType hashNotNull (vector_size_t index, SeedType seed) override {
202
210
auto size = base_->sizeAt (indices_[index]);
203
211
auto offset = base_->offsetAt (indices_[index]);
204
212
@@ -219,7 +227,7 @@ class ArrayVectorHasher : public VectorHasher<HashClass> {
219
227
template <typename HashClass, typename SeedType, typename ReturnType>
220
228
class MapVectorHasher : public VectorHasher <HashClass, SeedType, ReturnType> {
221
229
public:
222
- MapVectorHasher (DecodedVector& decoded) {
230
+ MapVectorHasher (DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
223
231
base_ = decoded.base ()->as <MapVector>();
224
232
indices_ = decoded.indices ();
225
233
@@ -230,11 +238,7 @@ class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
230
238
valueHasher_ = createVectorHasher<HashClass>(decodedValues_);
231
239
}
232
240
233
- ReturnType hashAt (vector_size_t index, SeedType seed) override {
234
- if (base_->isNullAt (indices_[index])) {
235
- return seed;
236
- }
237
-
241
+ ReturnType hashNotNull (vector_size_t index, SeedType seed) override {
238
242
auto size = base_->sizeAt (indices_[index]);
239
243
auto offset = base_->offsetAt (indices_[index]);
240
244
@@ -258,7 +262,7 @@ class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
258
262
template <typename HashClass, typename SeedType, typename ReturnType>
259
263
class RowVectorHasher : public VectorHasher <HashClass, SeedType, ReturnType> {
260
264
public:
261
- RowVectorHasher (DecodedVector& decoded) {
265
+ RowVectorHasher (DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
262
266
base_ = decoded.base ()->as <RowVector>();
263
267
indices_ = decoded.indices ();
264
268
@@ -271,14 +275,10 @@ class RowVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
271
275
}
272
276
}
273
277
274
- ReturnType hashAt (vector_size_t index, SeedType seed) override {
275
- if (base_->isNullAt (indices_[index])) {
276
- return seed;
277
- }
278
-
278
+ ReturnType hashNotNull (vector_size_t index, SeedType seed) override {
279
279
ReturnType result = seed;
280
280
for (auto i = 0 ; i < base_->childrenSize (); ++i) {
281
- result = hashers_[i]->hashAt (index, result);
281
+ result = hashers_[i]->hashAt (indices_[ index] , result);
282
282
}
283
283
return result;
284
284
}
@@ -323,12 +323,13 @@ void applyWithType(
323
323
324
324
auto hasher = createVectorHasher<HashClass>(*decoded);
325
325
selected->applyToSelected ([&](int row) {
326
- result.set (row, hasher->hashAt (row, result.valueAt (row)));
326
+ result.set (row, hasher->hashNotNull (row, result.valueAt (row)));
327
327
});
328
328
}
329
329
}
330
330
331
- // Derived from src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java.
331
+ // Derived from
332
+ // src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java.
332
333
//
333
334
// Spark's Murmur3 seems slightly different from the original from Austin
334
335
// Appleby: in particular the fmix function's first line is different. The
0 commit comments