Skip to content

Commit

Permalink
optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
marin-ma committed Apr 9, 2024
1 parent e91764e commit 6b9d2bc
Showing 1 changed file with 31 additions and 30 deletions.
61 changes: 31 additions & 30 deletions velox/functions/sparksql/Hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,21 @@ template <
class VectorHasher {
public:
// Compute the hash value of input vector at index.
virtual ReturnType hashAt(vector_size_t index, SeedType seed) = 0;
ReturnType hashAt(vector_size_t index, SeedType seed) {
if (decoded_.isNullAt(index)) {
return seed;
}
return hashNotNull(index, seed);
}

virtual ReturnType hashNotNull(vector_size_t index, SeedType seed) = 0;

VectorHasher(DecodedVector& decoded) : decoded_(decoded) {}

virtual ~VectorHasher() = default;

protected:
const DecodedVector& decoded_;
};

template <typename HashClass, TypeKind kind>
Expand Down Expand Up @@ -169,24 +181,23 @@ template <
class PrimitiveVectorHasher
: public VectorHasher<HashClass, SeedType, ReturnType> {
public:
PrimitiveVectorHasher(DecodedVector& decoded) : decoded_(decoded) {}
using T = typename TypeTraits<kind>::NativeType;

ReturnType hashAt(vector_size_t index, SeedType seed) override {
if (decoded_.isNullAt(index)) {
return seed;
}
PrimitiveVectorHasher(DecodedVector& decoded)
: VectorHasher<HashClass, SeedType, ReturnType>(decoded) {}

ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
return hashOne<HashClass>(
decoded_.valueAt<typename TypeTraits<kind>::NativeType>(index), seed);
this->decoded_.template valueAt<typename TypeTraits<kind>::NativeType>(
index),
seed);
}

private:
const DecodedVector& decoded_;
};

template <typename HashClass, typename SeedType, typename ReturnType>
class ArrayVectorHasher : public VectorHasher<HashClass> {
public:
ArrayVectorHasher(DecodedVector& decoded) {
ArrayVectorHasher(DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
base_ = decoded.base()->as<ArrayVector>();
indices_ = decoded.indices();

Expand All @@ -195,10 +206,7 @@ class ArrayVectorHasher : public VectorHasher<HashClass> {
elementHasher_ = createVectorHasher<HashClass>(decodedElements_);
}

ReturnType hashAt(vector_size_t index, SeedType seed) override {
if (base_->isNullAt(indices_[index])) {
return seed;
}
ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
auto size = base_->sizeAt(indices_[index]);
auto offset = base_->offsetAt(indices_[index]);

Expand All @@ -219,7 +227,7 @@ class ArrayVectorHasher : public VectorHasher<HashClass> {
template <typename HashClass, typename SeedType, typename ReturnType>
class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
public:
MapVectorHasher(DecodedVector& decoded) {
MapVectorHasher(DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
base_ = decoded.base()->as<MapVector>();
indices_ = decoded.indices();

Expand All @@ -230,11 +238,7 @@ class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
valueHasher_ = createVectorHasher<HashClass>(decodedValues_);
}

ReturnType hashAt(vector_size_t index, SeedType seed) override {
if (base_->isNullAt(indices_[index])) {
return seed;
}

ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
auto size = base_->sizeAt(indices_[index]);
auto offset = base_->offsetAt(indices_[index]);

Expand All @@ -258,7 +262,7 @@ class MapVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
template <typename HashClass, typename SeedType, typename ReturnType>
class RowVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
public:
RowVectorHasher(DecodedVector& decoded) {
RowVectorHasher(DecodedVector& decoded) : VectorHasher<HashClass>(decoded) {
base_ = decoded.base()->as<RowVector>();
indices_ = decoded.indices();

Expand All @@ -271,14 +275,10 @@ class RowVectorHasher : public VectorHasher<HashClass, SeedType, ReturnType> {
}
}

ReturnType hashAt(vector_size_t index, SeedType seed) override {
if (base_->isNullAt(indices_[index])) {
return seed;
}

ReturnType hashNotNull(vector_size_t index, SeedType seed) override {
ReturnType result = seed;
for (auto i = 0; i < base_->childrenSize(); ++i) {
result = hashers_[i]->hashAt(index, result);
result = hashers_[i]->hashAt(indices_[index], result);
}
return result;
}
Expand Down Expand Up @@ -323,12 +323,13 @@ void applyWithType(

auto hasher = createVectorHasher<HashClass>(*decoded);
selected->applyToSelected([&](int row) {
result.set(row, hasher->hashAt(row, result.valueAt(row)));
result.set(row, hasher->hashNotNull(row, result.valueAt(row)));
});
}
}

// Derived from src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java.
// Derived from
// src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java.
//
// Spark's Murmur3 seems slightly different from the original from Austin
// Appleby: in particular the fmix function's first line is different. The
Expand Down

0 comments on commit 6b9d2bc

Please sign in to comment.