Skip to content

Commit

Permalink
Revert "Revert "implemented string hashing in sparqlExpression/String…
Browse files Browse the repository at this point in the history
…Expression.cpp""

This reverts commit 7efc374.
  • Loading branch information
realHannes committed May 10, 2024
1 parent 7efc374 commit 4ab9526
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/engine/sparqlExpressions/NaryExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ SparqlExpression::Ptr makeReplaceExpression(SparqlExpression::Ptr input,
SparqlExpression::Ptr makeStrBeforeExpression(SparqlExpression::Ptr child1,
SparqlExpression::Ptr child2);

SparqlExpression::Ptr makeMD5Expression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeSHA1Expression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeSHA256Expression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeSHA512Expression(SparqlExpression::Ptr child);

SparqlExpression::Ptr makeIfExpression(SparqlExpression::Ptr child1,
SparqlExpression::Ptr child2,
SparqlExpression::Ptr child3);
Expand Down
35 changes: 35 additions & 0 deletions src/engine/sparqlExpressions/StringExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,32 @@ class ConcatExpression : public detail::VariadicExpression {
using EncodeForUriExpression =
StringExpressionImpl<1, decltype(encodeForUriImpl)>;

// HASH
template <class ClassHash>
[[maybe_unused]] auto hash =
[](std::optional<std::string> input) -> IdOrLiteralOrIri {
if (!input.has_value()) {
return Id::makeUndefined();
} else {
ClassHash obj;
std::vector<unsigned char> hashed = obj.hash(input.value());
std::ostringstream oss;
for (const unsigned char& hexHash : hashed) {
oss << std::hex << std::setw(2) << std::setfill('0')
<< static_cast<int>(hexHash);
}
return toLiteral(std::string_view(oss.str()));
}
};
using MD5Expression =
StringExpressionImpl<1, decltype(hash<ad_utility::HashMD5>)>;
using SHA1Expression =
StringExpressionImpl<1, decltype(hash<ad_utility::HashSHA1>)>;
using SHA256Expression =
StringExpressionImpl<1, decltype(hash<ad_utility::HashSHA256>)>;
using SHA512Expression =
StringExpressionImpl<1, decltype(hash<ad_utility::HashSHA512>)>;

} // namespace detail::string_expressions
using namespace detail::string_expressions;
using std::make_unique;
Expand Down Expand Up @@ -451,4 +477,13 @@ Expr makeConcatExpression(std::vector<Expr> children) {
Expr makeEncodeForUriExpression(Expr child) {
return make<EncodeForUriExpression>(child);
}

Expr makeMD5Expression(Expr child) { return make<MD5Expression>(child); }

Expr makeSHA1Expression(Expr child) { return make<SHA1Expression>(child); }

Expr makeSHA256Expression(Expr child) { return make<SHA256Expression>(child); }

Expr makeSHA512Expression(Expr child) { return make<SHA512Expression>(child); }

} // namespace sparqlExpression
59 changes: 59 additions & 0 deletions src/util/StringUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

#include <absl/strings/str_cat.h>
#include <absl/strings/str_replace.h>
#include <openssl/evp.h>
#include <openssl/md5.h>
#include <openssl/sha.h>
#include <unicode/bytestream.h>
#include <unicode/casemap.h>

Expand Down Expand Up @@ -305,6 +308,62 @@ std::string insertThousandSeparator(const std::string_view str,
ostream << std::string_view(std::move(parseIterator), std::end(str));
return ostream.str();
}

// Hash represents the base class for all above specified hash classes.
// `getHash` produces with `openssl/evp.h` the hash value to a given input
// with the function specified in the inheriting class.
class Hash {
protected:
virtual std::vector<unsigned char> hash(std::string& input) = 0;

std::vector<unsigned char> getHash(std::string& input, const EVP_MD* md,
size_t length) {
EVP_MD_CTX* ctx = EVP_MD_CTX_new();
std::vector<unsigned char> hashed(length);
EVP_DigestInit_ex(ctx, md, NULL);
EVP_DigestUpdate(ctx, input.c_str(), input.length());
EVP_DigestFinal(ctx, hashed.data(), NULL);
EVP_MD_CTX_free(ctx);
return hashed;
}
};

// class `HashMD5` contains a method `hash` which takes `std::string`
// and returns `std::vector<unsigned char>`, the resulting hash value
class HashMD5 : protected Hash {
public:
std::vector<unsigned char> hash(std::string& input) override {
return Hash::getHash(input, EVP_md5(), MD5_DIGEST_LENGTH);
}
};

// class `HashSHA1` contains a method `hash` which takes `std::string&`
// and returns `std::vector<unsigned char>`, the resulting hash value
class HashSHA1 : protected Hash {
public:
std::vector<unsigned char> hash(std::string& input) override {
return Hash::getHash(input, EVP_sha1(), SHA_DIGEST_LENGTH);
}
};

// class `HashSHA256` contains a method `hash` which takes `std::string&`
// and returns `std::vector<unsigned char>`, the resulting hash value
class HashSHA256 : protected Hash {
public:
std::vector<unsigned char> hash(std::string& input) override {
return Hash::getHash(input, EVP_sha256(), SHA256_DIGEST_LENGTH);
}
};

// class `HashSHA512` contains a method `hash` which takes `std::string&`
// and returns `std::vector<unsigned char>`, the resulting hash value
class HashSHA512 : protected Hash {
public:
std::vector<unsigned char> hash(std::string& input) override {
return Hash::getHash(input, EVP_sha512(), SHA512_DIGEST_LENGTH);
}
};

} // namespace ad_utility

// these overloads are missing in the STL
Expand Down
30 changes: 30 additions & 0 deletions test/SparqlExpressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,36 @@ TEST(SparqlExpression, testToNumericExpression) {
{U, I(-12475), I(42), I(0), D(-14.57), D(33.0), D(0.00001)}),
Ids{U, D(-12475.00), D(42.00), D(0.00), D(-14.57), D(33.00), D(0.00001)});
}
// ____________________________________________________________________________
TEST(SparqlExpression, testStrToHashExpressions) {
auto checkGetMD5Expression = testUnaryExpression<&makeMD5Expression>;
auto checkGetSHA1Expression = testUnaryExpression<&makeSHA1Expression>;
auto checkGetSHA256Expression = testUnaryExpression<&makeSHA256Expression>;
auto checkGetSHA512Expression = testUnaryExpression<&makeSHA512Expression>;
checkGetMD5Expression(
idOrLitOrStringVec({U, "", "FriburG23o"}),
idOrLitOrStringVec({U, "d41d8cd98f00b204e9800998ecf8427e",
"32521b354b953d386e86ff013fae7fe9"}));
checkGetSHA1Expression(
idOrLitOrStringVec({U, "", "FriburG23o"}),
idOrLitOrStringVec({U, "da39a3ee5e6b4b0d3255bfef95601890afd80709",
"a5a3144774c87ed39fe8cd1e9513fdd5624f2caa"}));
checkGetSHA256Expression(
idOrLitOrStringVec({U, "", "FriburG23o"}),
idOrLitOrStringVec(
{U,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"82ae72fda858128e6b26d1905e2ff53137bbafa0c1d7c7b8e978a6696bc5bdb"
"0"}));
checkGetSHA512Expression(
idOrLitOrStringVec({U, "", "FriburG23o"}),
idOrLitOrStringVec(
{U,
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d"
"0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
"4a1e28e116360f7692877a0bcb45a9566d5d4015fd7ed8821f77108e5e7547369f4"
"e56fa835df65459400623a6f6988b16aa54c07e34b5aea516adb3c13b7ce8"}));
}

// ____________________________________________________________________________
TEST(SparqlExpression, geoSparqlExpressions) {
Expand Down
44 changes: 44 additions & 0 deletions test/StringUtilsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ using ad_utility::getUTF8Substring;
using ad_utility::utf8ToLower;
using ad_utility::utf8ToUpper;

// helper function to convert from hex char to std::string
[[maybe_unused]] auto toString =
[](std::vector<unsigned char> input) -> std::string {
std::stringstream str;
for (const unsigned char& hex : input) {
str << std::hex << std::setw(2) << std::setfill('0')
<< static_cast<int>(hex);
}
return str.str();
};

TEST(StringUtilsTest, utf8ToLower) {
EXPECT_EQ("schindler's list", utf8ToLower("Schindler's List"));
EXPECT_EQ("#+-_foo__bar++", utf8ToLower("#+-_foo__Bar++"));
Expand Down Expand Up @@ -321,3 +332,36 @@ TEST(StringUtilsTest, findLiteralEnd) {
EXPECT_EQ(findLiteralEnd("no\\\"thi\"ng", "\""), 7u);
EXPECT_EQ(findLiteralEnd("no\\\\\"thing", "\""), 4u);
}

// TEST Hash Class
TEST(StringUtilsTest, testStrHashMD5) {
using namespace ad_utility;
std::string testStr1 = "";
std::string testStr2 = "Friburg23o";
HashMD5 hashMD5;
auto res1 = toString(hashMD5.hash(testStr1));
auto res2 = toString(hashMD5.hash(testStr2));
EXPECT_EQ(res1, "d41d8cd98f00b204e9800998ecf8427e");
EXPECT_EQ(res2, "9d9a73f67e20835e516029541595c381");
HashSHA1 hashSHA1;
res1 = toString(hashSHA1.hash(testStr1));
res2 = toString(hashSHA1.hash(testStr2));
EXPECT_EQ(res1, "da39a3ee5e6b4b0d3255bfef95601890afd80709");
EXPECT_EQ(res2, "c3a77a6104fa091f590f594b3e2dba2668196d3c");
HashSHA256 hashSHA256;
res1 = toString(hashSHA256.hash(testStr1));
res2 = toString(hashSHA256.hash(testStr2));
EXPECT_EQ(res1,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
EXPECT_EQ(res2,
"af8d98f09845a700aea36b35e8cc3a35632e38d0f7be9c0ca508e53c578da900");
HashSHA512 hashSHA512;
res1 = toString(hashSHA512.hash(testStr1));
res2 = toString(hashSHA512.hash(testStr2));
EXPECT_EQ(res1,
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47"
"d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
EXPECT_EQ(res2,
"be4422bfad59ee51e98dc51c540dc9d85333cb786333b152d13b2bebde1bdaa499"
"e9d4e1370a5bb2e831f4443b1358f2301fd5214ba80554ea0ff1d185c3b027");
}

0 comments on commit 4ab9526

Please sign in to comment.