Skip to content

Commit

Permalink
add ut for murmur3hash
Browse files Browse the repository at this point in the history
  • Loading branch information
marin-ma committed Apr 9, 2024
1 parent ef95f3d commit e91764e
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 21 deletions.
106 changes: 106 additions & 0 deletions velox/functions/sparksql/tests/HashTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <stdint.h>

using facebook::velox::test::assertEqualVectors;

namespace facebook::velox::functions::sparksql::test {
namespace {

Expand All @@ -27,6 +29,10 @@ class HashTest : public SparkFunctionBaseTest {
std::optional<int32_t> hash(std::optional<T> arg) {
return evaluateOnce<int32_t>("hash(c0)", arg);
}

VectorPtr hash(VectorPtr vector) {
return evaluate("hash(c0)", makeRowVector({vector}));
}
};

TEST_F(HashTest, String) {
Expand Down Expand Up @@ -128,5 +134,105 @@ TEST_F(HashTest, Float) {
EXPECT_EQ(hash<float>(-limits::infinity()), 427440766);
}

TEST_F(HashTest, Array) {
assertEqualVectors(
makeFlatVector<int32_t>({2101165938, 42, 1045631400}),
hash(makeArrayVector<int64_t>({{1, 2, 3, 4, 5}, {}, {1, 2, 3}})));

assertEqualVectors(
makeFlatVector<int32_t>({-559580957, 1765031574, 42}),
hash(makeNullableArrayVector<int32_t>(
{{1, std::nullopt}, {std::nullopt, 2}, {std::nullopt}})));

// Nested array.
{
using innerArrayType = std::vector<std::optional<int64_t>>;
using outerArrayType =
std::vector<std::optional<std::vector<std::optional<int64_t>>>>;

innerArrayType a{1, std::nullopt, 2, 3};
innerArrayType b{4, 5};
innerArrayType c{6, 7, 8};
outerArrayType row1{{a}, {b}};
outerArrayType row2{{a}, {c}};
outerArrayType row3{{{}}};
outerArrayType row4{{{std::nullopt}}};
auto arrayVector = makeNullableNestedArrayVector<int64_t>(
{{row1}, {row2}, {row3}, {row4}, std::nullopt});
assertEqualVectors(
makeFlatVector<int32_t>({2101165938, -992561130, 42, 42, 42}),
hash(arrayVector));
}

// Array of map.
{
using S = StringView;
using P = std::pair<int64_t, std::optional<S>>;
std::vector<P> a{P{1, S{"a"}}, P{2, std::nullopt}};
std::vector<P> b{P{3, S{"c"}}};
std::vector<std::vector<std::vector<P>>> data = {{a, b}};
auto arrayVector = makeArrayOfMapVector<int64_t, S>(data);
assertEqualVectors(
makeFlatVector<int32_t>(std::vector<int32_t>{-718462205}),
hash(arrayVector));
}

// Array of row.
{
std::vector<std::vector<std::optional<std::tuple<int32_t, std::string>>>>
data = {
{{{1, "red"}}, {{2, "blue"}}, {{3, "green"}}},
{{{1, "red"}}, std::nullopt, {{3, "green"}}},
{std::nullopt},
};
auto arrayVector = makeArrayOfRowVector(data, ROW({INTEGER(), VARCHAR()}));
assertEqualVectors(
makeFlatVector<int32_t>({-1458343314, 551500425, 42}),
hash(arrayVector));
}
}

TEST_F(HashTest, Map) {
auto mapVector = makeMapVector<int64_t, double>(
{{{1, 17.0}, {2, 36.0}, {3, 8.0}, {4, 28.0}, {5, 24.0}, {6, 32.0}}});
assertEqualVectors(
makeFlatVector<int32_t>(std::vector<int32_t>{1263683448}),
hash(mapVector));

auto mapOfArrays = createMapOfArraysVector<int32_t, int32_t>(
{{{1, {{1, 2, 3}}}}, {{2, {{4, 5, 6}}}}, {{3, {{7, 8, 9}}}}});
assertEqualVectors(
makeFlatVector<int32_t>({-1818148947, 529298908, 825098912}),
hash(mapOfArrays));

auto mapWithNullArrays = createMapOfArraysVector<int64_t, int64_t>(
{{{1, std::nullopt}}, {{2, {{4, 5, std::nullopt}}}}, {{3, {{}}}}});
assertEqualVectors(
makeFlatVector<int32_t>({-1712319331, 2060637564, 519220707}),
hash(mapWithNullArrays));
}

TEST_F(HashTest, Row) {
auto row = makeRowVector({
makeFlatVector<int64_t>({1, 3}),
makeFlatVector<int64_t>({2, 4}),
});
assertEqualVectors(
makeFlatVector<int32_t>({-1181176833, 1717636039}), hash(row));

row = makeRowVector({
makeNullableFlatVector<int64_t>({1, std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt, 4}),
});
assertEqualVectors(
makeFlatVector<int32_t>({-1712319331, 1344313940}), hash(row));

row->setNull(0, true);
assertEqualVectors(makeFlatVector<int32_t>({42, 1344313940}), hash(row));

row->setNull(1, true);
assertEqualVectors(makeFlatVector<int32_t>({42, 42}), hash(row));
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test
36 changes: 15 additions & 21 deletions velox/functions/sparksql/tests/XxHash64Test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,11 @@ TEST_F(XxHash64Test, float) {

TEST_F(XxHash64Test, array) {
assertEqualVectors(
makeFlatVector<int64_t>(
std::vector<int64_t>{-6041664978295882827, 42, 4904562767517797033}),
makeFlatVector<int64_t>({-6041664978295882827, 42, 4904562767517797033}),
xxhash64(makeArrayVector<int64_t>({{1, 2, 3, 4, 5}, {}, {1, 2, 3}})));

assertEqualVectors(
makeFlatVector<int64_t>(
std::vector<int64_t>{-6698625589789238999, 8420071140774656230, 42}),
makeFlatVector<int64_t>({-6698625589789238999, 8420071140774656230, 42}),
xxhash64(makeNullableArrayVector<int32_t>(
{{1, std::nullopt}, {std::nullopt, 2}, {std::nullopt}})));

Expand All @@ -172,8 +170,8 @@ TEST_F(XxHash64Test, array) {
auto arrayVector = makeNullableNestedArrayVector<int64_t>(
{{row1}, {row2}, {row3}, {row4}, std::nullopt});
assertEqualVectors(
makeFlatVector<int64_t>(std::vector<int64_t>{
-6041664978295882827, -1052942565807509112, 42, 42, 42}),
makeFlatVector<int64_t>(
{-6041664978295882827, -1052942565807509112, 42, 42, 42}),
xxhash64(arrayVector));
}

Expand All @@ -200,8 +198,8 @@ TEST_F(XxHash64Test, array) {
};
auto arrayVector = makeArrayOfRowVector(data, ROW({INTEGER(), VARCHAR()}));
assertEqualVectors(
makeFlatVector<int64_t>(std::vector<int64_t>{
-4096178443626566478, -8973283971856715104, 42}),
makeFlatVector<int64_t>(
{-4096178443626566478, -8973283971856715104, 42}),
xxhash64(arrayVector));
}
}
Expand All @@ -216,15 +214,15 @@ TEST_F(XxHash64Test, map) {
auto mapOfArrays = createMapOfArraysVector<int32_t, int32_t>(
{{{1, {{1, 2, 3}}}}, {{2, {{4, 5, 6}}}}, {{3, {{7, 8, 9}}}}});
assertEqualVectors(
makeFlatVector<int64_t>(std::vector<int64_t>{
-2103781794412908874, 1112887818746642853, 5787852566364222439}),
makeFlatVector<int64_t>(
{-2103781794412908874, 1112887818746642853, 5787852566364222439}),
xxhash64(mapOfArrays));

auto mapWithNullArrays = createMapOfArraysVector<int64_t, int64_t>(
{{{1, std::nullopt}}, {{2, {{4, 5, std::nullopt}}}}, {{3, {{7, 8, 9}}}}});
{{{1, std::nullopt}}, {{2, {{4, 5, std::nullopt}}}}, {{3, {{}}}}});
assertEqualVectors(
makeFlatVector<int64_t>(std::vector<int64_t>{
-7001672635703045582, 7217681953522744649, 5785528104873330081}),
makeFlatVector<int64_t>(
{-7001672635703045582, 7217681953522744649, 3188756510806108107}),
xxhash64(mapWithNullArrays));
}

Expand All @@ -234,27 +232,23 @@ TEST_F(XxHash64Test, row) {
makeFlatVector<int64_t>({2, 4}),
});
assertEqualVectors(
makeFlatVector<int64_t>(
std::vector<int64_t>{-8198029865082835910, 351067884137457704}),
makeFlatVector<int64_t>({-8198029865082835910, 351067884137457704}),
xxhash64(row));

row = makeRowVector({
makeNullableFlatVector<int64_t>({1, std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt, 4}),
});
assertEqualVectors(
makeFlatVector<int64_t>(
std::vector<int64_t>{-7001672635703045582, 404280023041566627}),
makeFlatVector<int64_t>({-7001672635703045582, 404280023041566627}),
xxhash64(row));

row->setNull(0, true);
assertEqualVectors(
makeFlatVector<int64_t>(std::vector<int64_t>{42, 404280023041566627}),
xxhash64(row));
makeFlatVector<int64_t>({42, 404280023041566627}), xxhash64(row));

row->setNull(1, true);
assertEqualVectors(
makeFlatVector<int64_t>(std::vector<int64_t>{42, 42}), xxhash64(row));
assertEqualVectors(makeFlatVector<int64_t>({42, 42}), xxhash64(row));
}

TEST_F(XxHash64Test, hashSeed) {
Expand Down

0 comments on commit e91764e

Please sign in to comment.