Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial join bounding box #1546

Merged
merged 25 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e9ed3e4
add BoundingBox Algorithm
Jonathan24680 Oct 18, 2024
ada2978
Sonarqube issues
Jonathan24680 Nov 3, 2024
eeadcd7
Sonarqube issues
Jonathan24680 Nov 4, 2024
127ba53
dont use cpp 20 version constant of pi
Jonathan24680 Nov 4, 2024
dcb60dd
Merge branch 'master' into SpatialJoinBoundingBox
Jonathan24680 Nov 4, 2024
be09a86
part of first rework
Jonathan24680 Nov 8, 2024
c4ddec6
rework of some parts of the tests
Jonathan24680 Nov 9, 2024
122372b
mr feedback
Jonathan24680 Nov 18, 2024
12f3c13
Merge branch 'master' into SpatialJoinBoundingBox
Jonathan24680 Nov 18, 2024
213b02e
remove second definition of function
Jonathan24680 Nov 19, 2024
ecda249
add new macros AD_LOG vs LOG
joka921 Nov 19, 2024
97d039b
Merge branch 'AD_LOG' of https://github.com/joka921/QLever into AD_LOG
Jonathan24680 Nov 19, 2024
603ec46
Merge branch 'AD_LOG' into SpatialJoinBoundingBox
Jonathan24680 Nov 19, 2024
3831da8
PR feedback
Jonathan24680 Nov 20, 2024
9ffaa0a
Apply the suggestion by Hannah.
joka921 Nov 20, 2024
032eade
Clang-format.
joka921 Nov 20, 2024
d50e2cc
Merge branch 'AD_LOG' of https://github.com/joka921/QLever into AD_LOG
Jonathan24680 Nov 20, 2024
24e31c6
Merge branch 'AD_LOG' into SpatialJoinBoundingBox
Jonathan24680 Nov 20, 2024
8285355
merge log change and fix allocator
Jonathan24680 Nov 20, 2024
56e1618
PR feedback
Jonathan24680 Nov 20, 2024
7d8b98b
pr feedback
Jonathan24680 Nov 21, 2024
89491a1
PR feedback
Jonathan24680 Nov 21, 2024
8743986
merge master into SpatialJoinBoundingBox
Jonathan24680 Nov 22, 2024
23f2ed2
PR feedback
Jonathan24680 Nov 22, 2024
ce5db45
MR feedback
Jonathan24680 Nov 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/engine/SpatialJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,12 +336,13 @@ PreparedSpatialJoinParams SpatialJoin::prepareJoin() const {
// ____________________________________________________________________________
Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(),
addDistToResult_, config_};
addDistToResult_, config_, this};
if (algorithm_ == Algorithm::Baseline) {
return algorithms.BaselineAlgorithm();
} else if (algorithm_ == Algorithm::S2Geometry) {
return algorithms.S2geometryAlgorithm();
} else if (algorithm_ == Algorithm::BoundingBox) {
} else {
AD_CORRECTNESS_CHECK(algorithm_ == Algorithm::BoundingBox);
// as the BoundingBoxAlgorithms only works for max distance and not for
// nearest neighbors, S2geometry gets called as a backup, if the query is
// asking for the nearest neighbors
Expand All @@ -350,8 +351,6 @@ Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
} else {
return algorithms.S2geometryAlgorithm();
}
} else {
AD_THROW("choose a valid Algorithm");
}
}

Expand Down
43 changes: 25 additions & 18 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
SpatialJoinAlgorithms::SpatialJoinAlgorithms(
QueryExecutionContext* qec, PreparedSpatialJoinParams params,
bool addDistToResult,
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config)
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config,
std::optional<SpatialJoin*> spatialJoin)
: qec_{qec},
params_{std::move(params)},
addDistToResult_{addDistToResult},
config_{std::move(config)} {}
config_{std::move(config)},
spatialJoin_{spatialJoin} {}

// ____________________________________________________________________________
std::optional<GeoPoint> SpatialJoinAlgorithms::getPoint(const IdTable* restable,
Expand Down Expand Up @@ -230,7 +232,7 @@

// ____________________________________________________________________________
std::vector<Box> SpatialJoinAlgorithms::computeBoundingBox(
const Point& startPoint) {
const Point& startPoint) const {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
AD_CORRECTNESS_CHECK(maxDist.has_value(),
Expand All @@ -250,9 +252,9 @@
1.02) {
maxDistInMetersBuffer = 1.01 * static_cast<double>(maxDist.value());
} else {
maxDistInMetersBuffer =
static_cast<double>(std::numeric_limits<long long>::max());
}

Check warning on line 257 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L255-L257

Added lines #L255 - L257 were not covered by tests

// for large distances, where the lower calculation would just result in
// a single bounding box for the whole planet, do an optimized version
Expand Down Expand Up @@ -391,7 +393,7 @@

// ____________________________________________________________________________
bool SpatialJoinAlgorithms::isContainedInBoundingBoxes(
const std::vector<Box>& boundingBox, Point point) {
const std::vector<Box>& boundingBox, Point point) const {
convertToNormalCoordinates(point);

return std::ranges::any_of(boundingBox, [point](const Box& aBox) {
Expand Down Expand Up @@ -431,16 +433,21 @@

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
bool alreadyWarned = false;

auto printWarning = [&]() {
if (!alreadyWarned) {
AD_LOG_WARN << "expected a point here, but no point is given. Skipping "
"this point and future 'non points' of this query"
<< std::endl;
alreadyWarned = true;
}
};
auto printWarning =
[alreadyWarned = false](std::optional<SpatialJoin*> spatialJoin) mutable {
if (!alreadyWarned) {
std::string warning =
"The input to a spatial join contained at least one element, "
"that is not a point geometry and is thus skipped. Note that "
"QLever currently only accepts point geometries for the "
"spatial joins";
AD_LOG_WARN << warning << std::endl;
alreadyWarned = true;
if (spatialJoin) {
spatialJoin.value()->addWarning(warning);
}
}
};

const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
Expand All @@ -467,7 +474,7 @@
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);

if (!geopoint) {
printWarning();
printWarning(spatialJoin_);
continue;
}

Expand All @@ -482,7 +489,7 @@
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol);

if (!geopoint1) {
printWarning();
printWarning(spatialJoin_);
continue;
}

Expand All @@ -504,8 +511,8 @@
}
auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftJoinCol, rightJoinCol);
if (distance.getDatatype() == Datatype::Int &&
distance.getInt() <= maxDist) {
if (distance.getInt() <= maxDist) {
AD_CORRECTNESS_CHECK(distance.getDatatype() == Datatype::Int);
addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft,
rowRight, distance);
}
Expand Down
12 changes: 7 additions & 5 deletions src/engine/SpatialJoinAlgorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,21 @@ class SpatialJoinAlgorithms {
SpatialJoinAlgorithms(
QueryExecutionContext* qec, PreparedSpatialJoinParams params,
bool addDistToResult,
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config);
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config,
std::optional<SpatialJoin*> spatialJoin = std::nullopt);
Result BaselineAlgorithm();
Result S2geometryAlgorithm();
Result BoundingBoxAlgorithm();

std::vector<BoostGeometryNamespace::Box>
OnlyForTestingWrapperComputeBoundingBox(
const BoostGeometryNamespace::Point& startPoint) {
const BoostGeometryNamespace::Point& startPoint) const {
return computeBoundingBox(startPoint);
}

bool OnlyForTestingWrapperContainedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& boundingBox,
const BoostGeometryNamespace::Point& point) {
const BoostGeometryNamespace::Point& point) const {
return isContainedInBoundingBoxes(boundingBox, point);
}

Expand Down Expand Up @@ -70,7 +71,7 @@ class SpatialJoinAlgorithms {
// the right, which when seen on the sphere look like a single box, but on the
// map and in the internal representation it looks like two/more boxes)
std::vector<BoostGeometryNamespace::Box> computeBoundingBox(
const BoostGeometryNamespace::Point& startPoint);
const BoostGeometryNamespace::Point& startPoint) const;

// This helper function calculates the bounding boxes based on a box, where
// definitely no match can occur. This means every element in the anti
Expand All @@ -87,12 +88,13 @@ class SpatialJoinAlgorithms {
// bounding boxes
bool isContainedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& boundingBox,
BoostGeometryNamespace::Point point);
BoostGeometryNamespace::Point point) const;

QueryExecutionContext* qec_;
PreparedSpatialJoinParams params_;
bool addDistToResult_;
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config_;
std::optional<SpatialJoin*> spatialJoin_;

// circumference in meters at the equator (max) and the pole (min) (as the
// earth is not exactly a sphere the circumference is different. Note that
Expand Down
18 changes: 10 additions & 8 deletions src/util/Log.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
#define LOGLEVEL INFO
#endif

// Abseil does also define its own LOG macro, so we need to undefine it here.
// NOTE: In case you run into trouble with this conflict, in particular if you
// use the `LOG()` macro and you get compilation errors that mention `abseil`,
// use the (otherwise identical) `AD_LOG` macro below. Same goes for the
// loglevels, use for example `AD_DEBUG` instead of `DEBUG`.
// TODO<joka921> Consistently replace the `LOG` macro by `AD_LOG`.
// Abseil also defines its own LOG macro, so we need to undefine it here.
//
// NOTE: In case you run into trouble with this conflict, in particular, if you
// use the `LOG(INFO)` macro and you get compilation errors that mention
// `abseil`, use the (otherwise identical) `AD_LOG_INFO` macro below.
//
// TODO<joka921>: Eventually replace the `LOG` macro by `AD_LOG` everywhere.

#ifdef LOG
#undef LOG
#endif
Expand All @@ -54,8 +56,8 @@ enum class LogLevel {
TRACE = 6
};

// These should be used in new code to avoid clashes with `abseil` (similar to
// `AD_LOG` vs `LOG`), see above for details.
// Macros for the different log levels. Always use these instead of the old
// `LOG(...)` macro to avoid conflicts with `abseil`.
#define AD_LOG_FATAL AD_LOG(LogLevel::FATAL)
#define AD_LOG_ERROR AD_LOG(LogLevel::ERROR)
#define AD_LOG_WARN AD_LOG(LogLevel::WARN)
Expand Down
143 changes: 139 additions & 4 deletions test/engine/SpatialJoinAlgorithmsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class SpatialJoinParamTest
std::shared_ptr<QueryExecutionTree> leftChild,
std::shared_ptr<QueryExecutionTree> rightChild, bool addLeftChildFirst,
std::vector<std::vector<std::string>> expectedOutputUnorderedRows,
std::vector<std::string> columnNames) {
std::vector<std::string> columnNames,
bool isWrongPointInputTest = false) {
// this function is like transposing a matrix. An entry which has been
// stored at (i, k) is now stored at (k, i). The reason this is needed is
// the following: this function receives the input as a vector of vector of
Expand Down Expand Up @@ -107,6 +108,23 @@ class SpatialJoinParamTest
}*/

EXPECT_THAT(vec, ::testing::UnorderedElementsAreArray(expectedOutput));

if (isWrongPointInputTest &&
GetParam() == SpatialJoin::Algorithm::BoundingBox) {
auto warnings = spatialJoin->collectWarnings();
bool containsWrongPointWarning = false;
std::string warningMessage =
"The input to a spatial join contained at least one element, "
"that is not a point geometry and is thus skipped. Note that "
"QLever currently only accepts point geometries for the "
"spatial joins";
for (const auto& warning : warnings) {
if (warning == warningMessage) {
containsWrongPointWarning = true;
}
}
ASSERT_TRUE(containsWrongPointWarning);
}
}

// build the test using the small dataset. Let the SpatialJoin operation be
Expand Down Expand Up @@ -224,15 +242,14 @@ class SpatialJoinParamTest
auto qec = buildTestQEC();
auto numTriples = qec->getIndex().numTriples().normal;
ASSERT_EQ(numTriples, 15);
// ====================== build small input
// =================================
// ====================== build small input ==============================
TripleComponent point1{Variable{"?point1"}};
TripleComponent subject{
ad_utility::triple_component::Iri::fromIriref("<geometry1>")};
auto smallChild = ad_utility::makeExecutionTree<IndexScan>(
qec, Permutation::Enum::PSO,
SparqlTriple{subject, std::string{"<asWKT>"}, point1});
// ====================== build big input ==================================
// ====================== build big input ================================
auto bigChild =
buildIndexScan(qec, {"?obj2", std::string{"<asWKT>"}, "?point2"});

Expand All @@ -249,6 +266,33 @@ class SpatialJoinParamTest
columnNames);
}

void testWrongPointInInput(
std::string specialPredicate, bool addLeftChildFirst,
std::vector<std::vector<std::string>> expectedOutput,
std::vector<std::string> columnNames) {
auto kg = createSmallDatasetWithPoints();
// make first point wrong:
auto pos = kg.find("POINT(");
kg = kg.insert(pos + 7, "wrongStuff");

ad_utility::MemorySize blocksizePermutations = 128_MB;
auto qec = ad_utility::testing::getQec(kg, true, true, false,
blocksizePermutations, false);
auto numTriples = qec->getIndex().numTriples().normal;
ASSERT_EQ(numTriples, 15);
// ====================== build inputs ================================
TripleComponent point1{Variable{"?point1"}};
TripleComponent point2{Variable{"?point2"}};
auto leftChild =
buildIndexScan(qec, {"?obj1", std::string{"<asWKT>"}, "?point1"});
auto rightChild =
buildIndexScan(qec, {"?obj2", std::string{"<asWKT>"}, "?point2"});

createAndTestSpatialJoin(
qec, SparqlTriple{point1, specialPredicate, point2}, leftChild,
rightChild, addLeftChildFirst, expectedOutput, columnNames, true);
}

protected:
bool useBaselineAlgorithm_;
};
Expand Down Expand Up @@ -518,6 +562,63 @@ std::vector<std::vector<std::string>> expectedMaxDist10000000_rows_small{
mergeToRow(sEif, sEye, expectedDistEyeEif),
mergeToRow(sEif, sLib, expectedDistEifLib)};

std::vector<std::vector<std::string>> expectedMaxDist1_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf),
};

std::vector<std::vector<std::string>>
expectedMaxDist5000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf)};

std::vector<std::vector<std::string>>
expectedMaxDist500000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sMun, sEif, expectedDistMunEif),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sEye, sEif, expectedDistEyeEif),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf),
mergeToRow(sEif, sMun, expectedDistMunEif),
mergeToRow(sEif, sEye, expectedDistEyeEif)};

std::vector<std::vector<std::string>>
expectedMaxDist1000000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sMun, sEif, expectedDistMunEif),
mergeToRow(sMun, sEye, expectedDistMunEye),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sEye, sEif, expectedDistEyeEif),
mergeToRow(sEye, sMun, expectedDistMunEye),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf),
mergeToRow(sEif, sMun, expectedDistMunEif),
mergeToRow(sEif, sEye, expectedDistEyeEif)};

std::vector<std::vector<std::string>>
expectedMaxDist10000000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sMun, sEif, expectedDistMunEif),
mergeToRow(sMun, sEye, expectedDistMunEye),
mergeToRow(sMun, sLib, expectedDistMunLib),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sEye, sEif, expectedDistEyeEif),
mergeToRow(sEye, sMun, expectedDistMunEye),
mergeToRow(sEye, sLib, expectedDistEyeLib),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sLib, sMun, expectedDistMunLib),
mergeToRow(sLib, sEye, expectedDistEyeLib),
mergeToRow(sLib, sEif, expectedDistEifLib),
mergeToRow(sEif, sEif, expectedDistSelf),
mergeToRow(sEif, sMun, expectedDistMunEif),
mergeToRow(sEif, sEye, expectedDistEyeEif),
mergeToRow(sEif, sLib, expectedDistEifLib)};

std::vector<std::vector<std::string>> expectedMaxDist1_rows_diff{
mergeToRow(TF, sTF, expectedDistSelf),
mergeToRow(Mun, sMun, expectedDistSelf),
Expand Down Expand Up @@ -895,6 +996,40 @@ TEST_P(SpatialJoinParamTest, diffSizeIdTables) {
false);
}

TEST_P(SpatialJoinParamTest, wrongPointInInput) {
// expected behavior: point is skipped
std::vector<std::string> columnNames{"?obj1", "?point1", "?obj2", "?point2",
"?distOfTheTwoObjectsAddedInternally"};
testWrongPointInInput("<max-distance-in-meters:1>", true,
expectedMaxDist1_rows_small_wrong_point, columnNames);
testWrongPointInInput("<max-distance-in-meters:1>", false,
expectedMaxDist1_rows_small_wrong_point, columnNames);
testWrongPointInInput("<max-distance-in-meters:5000>", true,
expectedMaxDist5000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:5000>", false,
expectedMaxDist5000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:500000>", true,
expectedMaxDist500000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:500000>", false,
expectedMaxDist500000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:1000000>", true,
expectedMaxDist1000000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:1000000>", false,
expectedMaxDist1000000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:10000000>", true,
expectedMaxDist10000000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:10000000>", false,
expectedMaxDist10000000_rows_small_wrong_point,
columnNames);
}

INSTANTIATE_TEST_SUITE_P(
SpatialJoin, SpatialJoinParamTest,
::testing::Values(SpatialJoin::Algorithm::Baseline,
Expand Down
Loading