Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial join bounding box #1546

Merged
merged 25 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e9ed3e4
add BoundingBox Algorithm
Jonathan24680 Oct 18, 2024
ada2978
Sonarqube issues
Jonathan24680 Nov 3, 2024
eeadcd7
Sonarqube issues
Jonathan24680 Nov 4, 2024
127ba53
dont use cpp 20 version constant of pi
Jonathan24680 Nov 4, 2024
dcb60dd
Merge branch 'master' into SpatialJoinBoundingBox
Jonathan24680 Nov 4, 2024
be09a86
part of first rework
Jonathan24680 Nov 8, 2024
c4ddec6
rework of some parts of the tests
Jonathan24680 Nov 9, 2024
122372b
mr feedback
Jonathan24680 Nov 18, 2024
12f3c13
Merge branch 'master' into SpatialJoinBoundingBox
Jonathan24680 Nov 18, 2024
213b02e
remove second definition of function
Jonathan24680 Nov 19, 2024
ecda249
add new macros AD_LOG vs LOG
joka921 Nov 19, 2024
97d039b
Merge branch 'AD_LOG' of https://github.com/joka921/QLever into AD_LOG
Jonathan24680 Nov 19, 2024
603ec46
Merge branch 'AD_LOG' into SpatialJoinBoundingBox
Jonathan24680 Nov 19, 2024
3831da8
PR feedback
Jonathan24680 Nov 20, 2024
9ffaa0a
Apply the suggestion by Hannah.
joka921 Nov 20, 2024
032eade
Clang-format.
joka921 Nov 20, 2024
d50e2cc
Merge branch 'AD_LOG' of https://github.com/joka921/QLever into AD_LOG
Jonathan24680 Nov 20, 2024
24e31c6
Merge branch 'AD_LOG' into SpatialJoinBoundingBox
Jonathan24680 Nov 20, 2024
8285355
merge log change and fix allocator
Jonathan24680 Nov 20, 2024
56e1618
PR feedback
Jonathan24680 Nov 20, 2024
7d8b98b
pr feedback
Jonathan24680 Nov 21, 2024
89491a1
PR feedback
Jonathan24680 Nov 21, 2024
8743986
merge master into SpatialJoinBoundingBox
Jonathan24680 Nov 22, 2024
23f2ed2
PR feedback
Jonathan24680 Nov 22, 2024
ce5db45
MR feedback
Jonathan24680 Nov 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/engine/SpatialJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,5 @@ class SpatialJoin : public Operation {
// between the two objects
bool addDistToResult_ = true;
const string nameDistanceInternal_ = "?distOfTheTwoObjectsAddedInternally";
Algorithm algorithm_ = Algorithm::BoundingBox;
Algorithm algorithm_ = Algorithm::S2Geometry;
};
56 changes: 33 additions & 23 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#include <s2/s2point_index.h>
#include <s2/util/units/length-units.h>

#include "math.h"
#include <cmath>

#include "util/GeoSparqlHelpers.h"

using namespace BoostGeometryNamespace;
Expand Down Expand Up @@ -232,9 +233,8 @@
const Point& startPoint) {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}
AD_CORRECTNESS_CHECK(maxDist.has_value(),
"Max distance must have a value for this operation");
// haversine function
auto haversine = [](double theta) { return (1 - std::cos(theta)) / 2; };

Expand All @@ -250,15 +250,15 @@
1.02) {
maxDistInMetersBuffer = 1.01 * static_cast<double>(maxDist.value());
} else {
maxDistInMetersBuffer =
static_cast<double>(std::numeric_limits<long long>::max());
}

Check warning on line 255 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L253-L255

Added lines #L253 - L255 were not covered by tests

// for large distances, where the lower calculation would just result in
// a single bounding box for the whole planet, do an optimized version
if (static_cast<double>(maxDist.value()) > circumferenceMax_ / 4.0 &&
static_cast<double>(maxDist.value()) < circumferenceMax_ / 2.01) {
return computeUsingAntiBoundingBox(startPoint);
return computeBoundingBoxForLargeDistances(startPoint);
}

// compute latitude bound
Expand Down Expand Up @@ -311,13 +311,12 @@
}

// ____________________________________________________________________________
std::vector<Box> SpatialJoinAlgorithms::computeUsingAntiBoundingBox(
std::vector<Box> SpatialJoinAlgorithms::computeBoundingBoxForLargeDistances(
const Point& startPoint) const {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}
AD_CORRECTNESS_CHECK(maxDist.has_value(),
"Max distance must have a value for this operation");

// point on the opposite side of the globe
Point antiPoint(startPoint.get<0>() + 180, startPoint.get<1>() * -1);
Expand Down Expand Up @@ -391,17 +390,17 @@
}

// ____________________________________________________________________________
bool SpatialJoinAlgorithms::containedInBoundingBoxes(
const std::vector<Box>& bbox, Point point1) {
convertToNormalCoordinates(point1);
bool SpatialJoinAlgorithms::isContainedInBoundingBoxes(
const std::vector<Box>& boundingBox, Point point) {
convertToNormalCoordinates(point);

return std::ranges::any_of(bbox, [point1](const Box& aBox) {
return boost::geometry::covered_by(point1, aBox);
return std::ranges::any_of(boundingBox, [point](const Box& aBox) {
return boost::geometry::covered_by(point, aBox);
});
}

// ____________________________________________________________________________
void SpatialJoinAlgorithms::convertToNormalCoordinates(Point& point) {
void SpatialJoinAlgorithms::convertToNormalCoordinates(Point& point) const {
// correct lon and lat bounds if necessary
while (point.get<0>() < -180) {
point.set<0>(point.get<0>() + 360);
Expand All @@ -416,8 +415,9 @@
}
}

// ____________________________________________________________________________
std::array<bool, 2> SpatialJoinAlgorithms::isAPoleTouched(
const double& latitude) {
const double& latitude) const {
bool northPoleReached = false;
bool southPoleReached = false;
if (latitude >= 90) {
Expand All @@ -431,11 +431,17 @@

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
auto printWarning = []() {
AD_LOG_WARN
<< "expected a point here, but no point is given. Skipping this point"
<< std::endl;
bool alreadyWarned = false;

auto printWarning = [&]() {
if (!alreadyWarned) {
AD_LOG_WARN << "expected a point here, but no point is given. Skipping "
"this point and future 'non points' of this query"
<< std::endl;
alreadyWarned = true;
}
};

Check warning on line 443 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L438-L443

Added lines #L438 - L443 were not covered by tests

const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
IdTable result{numColumns, qec_->getAllocator()};
Expand All @@ -452,35 +458,39 @@
std::swap(smallerResJoinCol, otherResJoinCol);
}

bgi::rtree<Value, bgi::quadratic<16>> rtree;
bgi::rtree<Value, bgi::quadratic<16>, bgi::indexable<Value>,
bgi::equal_to<Value>, ad_utility::AllocatorWithLimit<Value>>
rtree(bgi::quadratic<16>{}, bgi::indexable<Value>{},
bgi::equal_to<Value>{}, qec_->getAllocator());
for (size_t i = 0; i < smallerResult->numRows(); i++) {
// get point of row i
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);

if (!geopoint) {
printWarning();
continue;
}

Check warning on line 472 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L470-L472

Added lines #L470 - L472 were not covered by tests

Point p(geopoint.value().getLng(), geopoint.value().getLat());

// add every point together with the row number into the rtree
rtree.insert(std::make_pair(std::move(p), i));
}
std::vector<Value, ad_utility::AllocatorWithLimit<Value>> results{
qec_->getAllocator()};
for (size_t i = 0; i < otherResult->numRows(); i++) {
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol);

if (!geopoint1) {
printWarning();
continue;
}

Check warning on line 487 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L485-L487

Added lines #L485 - L487 were not covered by tests

Point p(geopoint1.value().getLng(), geopoint1.value().getLat());

// query the other rtree for every point using the following bounding box
std::vector<Box> bbox = computeBoundingBox(p);
std::vector<Value, ad_utility::AllocatorWithLimit<Value>> results{
qec_->getAllocator()};
results.clear();

std::ranges::for_each(bbox, [&](const Box& bbox) {
rtree.query(bgi::intersects(bbox), std::back_inserter(results));
Expand Down
18 changes: 9 additions & 9 deletions src/engine/SpatialJoinAlgorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class SpatialJoinAlgorithms {
}

bool OnlyForTestingWrapperContainedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& bbox,
const BoostGeometryNamespace::Point& point1) {
return containedInBoundingBoxes(bbox, point1);
const std::vector<BoostGeometryNamespace::Box>& boundingBox,
const BoostGeometryNamespace::Point& point) {
return isContainedInBoundingBoxes(boundingBox, point);
}

private:
Expand Down Expand Up @@ -80,14 +80,14 @@ class SpatialJoinAlgorithms {
// gets used, when the usual procedure, would just result in taking a big
// bounding box, which covers the whole planet (so for extremely large max
// distances)
std::vector<BoostGeometryNamespace::Box> computeUsingAntiBoundingBox(
std::vector<BoostGeometryNamespace::Box> computeBoundingBoxForLargeDistances(
const BoostGeometryNamespace::Point& startPoint) const;

// This function returns true, iff the given point is contained in any of the
// bounding boxes
bool containedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& bbox,
BoostGeometryNamespace::Point point1);
bool isContainedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& boundingBox,
BoostGeometryNamespace::Point point);

QueryExecutionContext* qec_;
PreparedSpatialJoinParams params_;
Expand All @@ -105,8 +105,8 @@ class SpatialJoinAlgorithms {
static constexpr double radius_ = 6'378'000;

// convert coordinates to the usual ranges (-180 to 180 and -90 to 90)
void convertToNormalCoordinates(BoostGeometryNamespace::Point& point);
void convertToNormalCoordinates(BoostGeometryNamespace::Point& point) const;

// return whether one of the poles is being touched
std::array<bool, 2> isAPoleTouched(const double& latitude);
std::array<bool, 2> isAPoleTouched(const double& latitude) const;
};
124 changes: 119 additions & 5 deletions test/engine/SpatialJoinAlgorithmsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,13 @@ class SpatialJoinParamTest
auto qec = buildTestQEC();
auto numTriples = qec->getIndex().numTriples().normal;
ASSERT_EQ(numTriples, 15);
// ========================= build big child
// =================================
// ========================= build big child =============================
auto bigChild = buildMediumChild(
qec, {"?obj1", std::string{"<name>"}, "?name1"},
{"?obj1", std::string{"<hasGeometry>"}, "?geo1"},
{"?geo1", std::string{"<asWKT>"}, "?point1"}, "?obj1", "?geo1");

// ========================= build small child
// ===============================
// ========================= build small child ===========================
TripleComponent point2{Variable{"?point2"}};
auto smallChild =
buildIndexScan(qec, {"?obj2", std::string{"<asWKT>"}, "?point2"});
Expand All @@ -219,6 +217,38 @@ class SpatialJoinParamTest
columnNames);
}

void testDiffSizeIdTables(
std::string specialPredicate, bool addLeftChildFirst,
std::vector<std::vector<std::string>> expectedOutput,
std::vector<std::string> columnNames, bool bigChildLeft) {
auto qec = buildTestQEC();
auto numTriples = qec->getIndex().numTriples().normal;
ASSERT_EQ(numTriples, 15);
// ====================== build small input
// =================================
TripleComponent point1{Variable{"?point1"}};
TripleComponent subject{
ad_utility::triple_component::Iri::fromIriref("<geometry1>")};
auto smallChild = ad_utility::makeExecutionTree<IndexScan>(
qec, Permutation::Enum::PSO,
SparqlTriple{subject, std::string{"<asWKT>"}, point1});
// ====================== build big input ==================================
auto bigChild =
buildIndexScan(qec, {"?obj2", std::string{"<asWKT>"}, "?point2"});

auto firstChild = bigChildLeft ? bigChild : smallChild;
auto secondChild = bigChildLeft ? smallChild : bigChild;
auto firstVariable =
bigChildLeft ? TripleComponent{Variable{"?point2"}} : point1;
auto secondVariable =
bigChildLeft ? point1 : TripleComponent{Variable{"?point2"}};

createAndTestSpatialJoin(
qec, SparqlTriple{firstVariable, specialPredicate, secondVariable},
firstChild, secondChild, addLeftChildFirst, expectedOutput,
columnNames);
}

protected:
bool useBaselineAlgorithm_;
};
Expand Down Expand Up @@ -566,6 +596,31 @@ std::vector<std::vector<std::string>> expectedMaxDist10000000_rows_diff{
mergeToRow(Eif, sEye, expectedDistEyeEif),
mergeToRow(Eif, sLib, expectedDistEifLib)};

std::vector<std::vector<std::string>> expectedMaxDist1_rows_diffIDTable{
mergeToRow({sTF.at(1)}, sTF, expectedDistSelf)};

std::vector<std::vector<std::string>> expectedMaxDist5000_rows_diffIDTable{
mergeToRow({sTF.at(1)}, sTF, expectedDistSelf),
mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun)};

std::vector<std::vector<std::string>> expectedMaxDist500000_rows_diffIDTable{
mergeToRow({sTF.at(1)}, sTF, expectedDistSelf),
mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun),
mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif)};

std::vector<std::vector<std::string>> expectedMaxDist1000000_rows_diffIDTable{
mergeToRow({sTF.at(1)}, sTF, expectedDistSelf),
mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun),
mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif),
mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye)};

std::vector<std::vector<std::string>> expectedMaxDist10000000_rows_diffIDTable{
mergeToRow({sTF.at(1)}, sTF, expectedDistSelf),
mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun),
mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif),
mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye),
mergeToRow({sTF.at(1)}, sLib, expectedDistUniLib)};

std::vector<std::vector<std::string>> expectedNearestNeighbors1{
mergeToRow(TF, TF, expectedDistSelf),
mergeToRow(Mun, Mun, expectedDistSelf),
Expand Down Expand Up @@ -781,6 +836,65 @@ TEST_P(SpatialJoinParamTest, computeResultSmallDatasetDifferentSizeChildren) {
expectedMaxDist10000000_rows_diff, columnNames, false);
}

TEST_P(SpatialJoinParamTest, diffSizeIdTables) {
std::vector<std::string> columnNames{"?point1", "?obj2", "?point2",
"?distOfTheTwoObjectsAddedInternally"};
testDiffSizeIdTables("<max-distance-in-meters:1>", true,
expectedMaxDist1_rows_diffIDTable, columnNames, true);
testDiffSizeIdTables("<max-distance-in-meters:1>", true,
expectedMaxDist1_rows_diffIDTable, columnNames, false);
testDiffSizeIdTables("<max-distance-in-meters:1>", false,
expectedMaxDist1_rows_diffIDTable, columnNames, true);
testDiffSizeIdTables("<max-distance-in-meters:1>", false,
expectedMaxDist1_rows_diffIDTable, columnNames, false);
testDiffSizeIdTables("<max-distance-in-meters:5000>", true,
expectedMaxDist5000_rows_diffIDTable, columnNames, true);
testDiffSizeIdTables("<max-distance-in-meters:5000>", true,
expectedMaxDist5000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:5000>", false,
expectedMaxDist5000_rows_diffIDTable, columnNames, true);
testDiffSizeIdTables("<max-distance-in-meters:5000>", false,
expectedMaxDist5000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:500000>", true,
expectedMaxDist500000_rows_diffIDTable, columnNames,
true);
testDiffSizeIdTables("<max-distance-in-meters:500000>", true,
expectedMaxDist500000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:500000>", false,
expectedMaxDist500000_rows_diffIDTable, columnNames,
true);
testDiffSizeIdTables("<max-distance-in-meters:500000>", false,
expectedMaxDist500000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:1000000>", true,
expectedMaxDist1000000_rows_diffIDTable, columnNames,
true);
testDiffSizeIdTables("<max-distance-in-meters:1000000>", true,
expectedMaxDist1000000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:1000000>", false,
expectedMaxDist1000000_rows_diffIDTable, columnNames,
true);
testDiffSizeIdTables("<max-distance-in-meters:1000000>", false,
expectedMaxDist1000000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:10000000>", true,
expectedMaxDist10000000_rows_diffIDTable, columnNames,
true);
testDiffSizeIdTables("<max-distance-in-meters:10000000>", true,
expectedMaxDist10000000_rows_diffIDTable, columnNames,
false);
testDiffSizeIdTables("<max-distance-in-meters:10000000>", false,
expectedMaxDist10000000_rows_diffIDTable, columnNames,
true);
testDiffSizeIdTables("<max-distance-in-meters:10000000>", false,
expectedMaxDist10000000_rows_diffIDTable, columnNames,
false);
}

INSTANTIATE_TEST_SUITE_P(
SpatialJoin, SpatialJoinParamTest,
::testing::Values(SpatialJoin::Algorithm::Baseline,
Expand Down Expand Up @@ -934,7 +1048,7 @@ TEST(SpatialJoin, computeBoundingBox) {
}
}

TEST(SpatialJoin, containedInBoundingBoxes) {
TEST(SpatialJoin, isContainedInBoundingBoxes) {
// build dummy join to access the containedInBoundingBox and
// computeBoundingBox functions
auto qec = buildTestQEC();
Expand Down
Loading