Skip to content

Commit 9a7cf23

Browse files
committed
Allow database backend to optimize group-by-XZ operation
1 parent 7685e54 commit 9a7cf23

11 files changed

+89
-70
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ Makefile
1313
cmake_install.cmake
1414
cmake_config.h
1515
compile_commands.json
16+
.vscode/

src/TileGenerator.cpp

+5-11
Original file line numberDiff line numberDiff line change
@@ -464,26 +464,20 @@ void TileGenerator::loadBlocks()
464464
const int16_t yMin = mod16(m_yMin);
465465

466466
if (m_exhaustiveSearch == EXH_NEVER || m_exhaustiveSearch == EXH_Y) {
467-
std::vector<BlockPos> vec = m_db->getBlockPos(
467+
std::vector<BlockPos> vec = m_db->getBlockPosXZ(
468468
BlockPos(m_geomX, yMin, m_geomY),
469469
BlockPos(m_geomX2, yMax, m_geomY2)
470470
);
471471

472472
for (auto pos : vec) {
473473
assert(pos.x >= m_geomX && pos.x < m_geomX2);
474-
assert(pos.y >= yMin && pos.y < yMax);
475474
assert(pos.z >= m_geomY && pos.z < m_geomY2);
476475

477476
// Adjust minimum and maximum positions to the nearest block
478-
if (pos.x < m_xMin)
479-
m_xMin = pos.x;
480-
if (pos.x > m_xMax)
481-
m_xMax = pos.x;
482-
483-
if (pos.z < m_zMin)
484-
m_zMin = pos.z;
485-
if (pos.z > m_zMax)
486-
m_zMax = pos.z;
477+
m_xMin = mymin<int>(m_xMin, pos.x);
478+
m_xMax = mymax<int>(m_xMax, pos.x);
479+
m_zMin = mymin<int>(m_zMin, pos.z);
480+
m_zMax = mymax<int>(m_zMax, pos.z);
487481

488482
m_positions[pos.z].emplace(pos.x);
489483
}

src/db-leveldb.cpp

+34-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <stdexcept>
22
#include <sstream>
3+
#include <algorithm>
34
#include "db-leveldb.h"
45
#include "types.h"
56

@@ -18,14 +19,20 @@ static inline std::string i64tos(int64_t i)
1819
return os.str();
1920
}
2021

22+
// finds the first position in the list where it.x >= x
23+
#define lower_bound_x(container, find_x) \
24+
std::lower_bound((container).begin(), (container).end(), (find_x), \
25+
[] (const vec2 &left, int16_t right) { \
26+
return left.x < right; \
27+
})
2128

2229
DBLevelDB::DBLevelDB(const std::string &mapdir)
2330
{
2431
leveldb::Options options;
2532
options.create_if_missing = false;
2633
leveldb::Status status = leveldb::DB::Open(options, mapdir + "map.db", &db);
2734
if (!status.ok()) {
28-
throw std::runtime_error(std::string("Failed to open Database: ") + status.ToString());
35+
throw std::runtime_error(std::string("Failed to open database: ") + status.ToString());
2936
}
3037

3138
/* LevelDB is a dumb key-value store, so the only optimization we can do
@@ -41,18 +48,24 @@ DBLevelDB::~DBLevelDB()
4148
}
4249

4350

44-
std::vector<BlockPos> DBLevelDB::getBlockPos(BlockPos min, BlockPos max)
51+
std::vector<BlockPos> DBLevelDB::getBlockPosXZ(BlockPos min, BlockPos max)
4552
{
4653
std::vector<BlockPos> res;
4754
for (const auto &it : posCache) {
48-
if (it.first < min.z || it.first >= max.z)
55+
const int16_t zpos = it.first;
56+
if (zpos < min.z || zpos >= max.z)
4957
continue;
50-
for (auto pos2 : it.second) {
51-
if (pos2.first < min.x || pos2.first >= max.x)
58+
auto it2 = lower_bound_x(it.second, min.x);
59+
for (; it2 != it.second.end(); it2++) {
60+
const auto &pos2 = *it2;
61+
if (pos2.x >= max.x)
62+
break; // went past
63+
if (pos2.y < min.y || pos2.y >= max.y)
5264
continue;
53-
if (pos2.second < min.y || pos2.second >= max.y)
65+
// skip duplicates
66+
if (!res.empty() && res.back().x == pos2.x && res.back().z == zpos)
5467
continue;
55-
res.emplace_back(pos2.first, pos2.second, it.first);
68+
res.emplace_back(pos2.x, pos2.y, zpos);
5669
}
5770
}
5871
return res;
@@ -61,14 +74,17 @@ std::vector<BlockPos> DBLevelDB::getBlockPos(BlockPos min, BlockPos max)
6174

6275
void DBLevelDB::loadPosCache()
6376
{
64-
leveldb::Iterator * it = db->NewIterator(leveldb::ReadOptions());
77+
leveldb::Iterator *it = db->NewIterator(leveldb::ReadOptions());
6578
for (it->SeekToFirst(); it->Valid(); it->Next()) {
6679
int64_t posHash = stoi64(it->key().ToString());
6780
BlockPos pos = decodeBlockPos(posHash);
6881

6982
posCache[pos.z].emplace_back(pos.x, pos.y);
7083
}
7184
delete it;
85+
86+
for (auto &it : posCache)
87+
std::sort(it.second.begin(), it.second.end());
7288
}
7389

7490

@@ -81,13 +97,18 @@ void DBLevelDB::getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
8197
auto it = posCache.find(z);
8298
if (it == posCache.cend())
8399
return;
84-
for (auto pos2 : it->second) {
85-
if (pos2.first != x)
86-
continue;
87-
if (pos2.second < min_y || pos2.second >= max_y)
100+
auto it2 = lower_bound_x(it->second, x);
101+
if (it2 == it->second.end() || it2->x != x)
102+
return;
103+
// it2 is now pointing to a contigous part where it2->x == x
104+
for (; it2 != it->second.end(); it2++) {
105+
const auto &pos2 = *it2;
106+
if (pos2.x != x)
107+
break; // went past
108+
if (pos2.y < min_y || pos2.y >= max_y)
88109
continue;
89110

90-
BlockPos pos(x, pos2.second, z);
111+
BlockPos pos(x, pos2.y, z);
91112
status = db->Get(leveldb::ReadOptions(), i64tos(encodeBlockPos(pos)), &datastr);
92113
if (status.ok()) {
93114
blocks.emplace_back(

src/db-leveldb.h

+17-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
class DBLevelDB : public DB {
99
public:
1010
DBLevelDB(const std::string &mapdir);
11-
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
11+
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
1212
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
1313
int16_t min_y, int16_t max_y) override;
1414
void getBlocksByPos(BlockList &blocks,
@@ -18,11 +18,24 @@ class DBLevelDB : public DB {
1818
bool preferRangeQueries() const override { return false; }
1919

2020
private:
21-
using pos2d = std::pair<int16_t, int16_t>;
21+
struct vec2 {
22+
int16_t x, y;
23+
constexpr vec2() : x(0), y(0) {}
24+
constexpr vec2(int16_t x, int16_t y) : x(x), y(y) {}
25+
26+
inline bool operator<(const vec2 &p) const
27+
{
28+
if (x < p.x)
29+
return true;
30+
if (x > p.x)
31+
return false;
32+
return y < p.y;
33+
}
34+
};
2235

2336
void loadPosCache();
2437

2538
// indexed by Z, contains all (x,y) position pairs
26-
std::unordered_map<int16_t, std::vector<pos2d>> posCache;
27-
leveldb::DB *db;
39+
std::unordered_map<int16_t, std::vector<vec2>> posCache;
40+
leveldb::DB *db = NULL;
2841
};

src/db-postgresql.cpp

+9-15
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ DBPostgreSQL::DBPostgreSQL(const std::string &mapdir)
2727

2828
prepareStatement(
2929
"get_block_pos",
30-
"SELECT posX::int4, posY::int4, posZ::int4 FROM blocks WHERE"
30+
"SELECT posX::int4, posZ::int4 FROM blocks WHERE"
3131
" (posX BETWEEN $1::int4 AND $2::int4) AND"
3232
" (posY BETWEEN $3::int4 AND $4::int4) AND"
33-
" (posZ BETWEEN $5::int4 AND $6::int4)"
33+
" (posZ BETWEEN $5::int4 AND $6::int4) GROUP BY posX, posZ"
3434
);
3535
prepareStatement(
3636
"get_blocks",
@@ -60,7 +60,7 @@ DBPostgreSQL::~DBPostgreSQL()
6060
}
6161

6262

63-
std::vector<BlockPos> DBPostgreSQL::getBlockPos(BlockPos min, BlockPos max)
63+
std::vector<BlockPos> DBPostgreSQL::getBlockPosXZ(BlockPos min, BlockPos max)
6464
{
6565
int32_t const x1 = htonl(min.x);
6666
int32_t const x2 = htonl(max.x - 1);
@@ -83,11 +83,14 @@ std::vector<BlockPos> DBPostgreSQL::getBlockPos(BlockPos min, BlockPos max)
8383
std::vector<BlockPos> positions;
8484
positions.reserve(numrows);
8585

86-
for (int row = 0; row < numrows; ++row)
87-
positions.emplace_back(pg_to_blockpos(results, row, 0));
86+
BlockPos pos;
87+
for (int row = 0; row < numrows; ++row) {
88+
pos.x = pg_binary_to_int(results, row, 0);
89+
pos.z = pg_binary_to_int(results, row, 1);
90+
positions.push_back(pos);
91+
}
8892

8993
PQclear(results);
90-
9194
return positions;
9295
}
9396

@@ -215,12 +218,3 @@ int DBPostgreSQL::pg_binary_to_int(PGresult *res, int row, int col)
215218
int32_t* raw = reinterpret_cast<int32_t*>(PQgetvalue(res, row, col));
216219
return ntohl(*raw);
217220
}
218-
219-
BlockPos DBPostgreSQL::pg_to_blockpos(PGresult *res, int row, int col)
220-
{
221-
BlockPos result;
222-
result.x = pg_binary_to_int(res, row, col);
223-
result.y = pg_binary_to_int(res, row, col + 1);
224-
result.z = pg_binary_to_int(res, row, col + 2);
225-
return result;
226-
}

src/db-postgresql.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
class DBPostgreSQL : public DB {
77
public:
88
DBPostgreSQL(const std::string &mapdir);
9-
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
9+
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
1010
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
1111
int16_t min_y, int16_t max_y) override;
1212
void getBlocksByPos(BlockList &blocks,
@@ -25,7 +25,6 @@ class DBPostgreSQL : public DB {
2525
bool clear = true
2626
);
2727
int pg_binary_to_int(PGresult *res, int row, int col);
28-
BlockPos pg_to_blockpos(PGresult *res, int row, int col);
2928

3029
private:
3130
PGconn *db;

src/db-redis.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ DBRedis::~DBRedis()
6868
}
6969

7070

71-
std::vector<BlockPos> DBRedis::getBlockPos(BlockPos min, BlockPos max)
71+
std::vector<BlockPos> DBRedis::getBlockPosXZ(BlockPos min, BlockPos max)
7272
{
7373
std::vector<BlockPos> res;
7474
for (const auto &it : posCache) {

src/db-redis.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
class DBRedis : public DB {
1010
public:
1111
DBRedis(const std::string &mapdir);
12-
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
12+
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
1313
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
1414
int16_t min_y, int16_t max_y) override;
1515
void getBlocksByPos(BlockList &blocks,

src/db-sqlite3.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ DBSQLite3::DBSQLite3(const std::string &mapdir)
7979
"SELECT data FROM blocks WHERE x = ? AND y = ? AND z = ?"));
8080

8181
SQLOK(prepare(stmt_get_block_pos_range,
82-
"SELECT x, y, z FROM blocks WHERE "
82+
"SELECT x, z FROM blocks WHERE "
8383
"x >= ? AND y >= ? AND z >= ? AND "
84-
"x < ? AND y < ? AND z < ?"));
84+
"x < ? AND y < ? AND z < ? GROUP BY x, z"));
8585
} else {
8686
SQLOK(prepare(stmt_get_blocks_z,
8787
"SELECT pos, data FROM blocks WHERE pos BETWEEN ? AND ?"));
@@ -119,7 +119,7 @@ inline void DBSQLite3::getPosRange(int64_t &min, int64_t &max,
119119
}
120120

121121

122-
std::vector<BlockPos> DBSQLite3::getBlockPos(BlockPos min, BlockPos max)
122+
std::vector<BlockPos> DBSQLite3::getBlockPosXZ(BlockPos min, BlockPos max)
123123
{
124124
int result;
125125
sqlite3_stmt *stmt;
@@ -152,12 +152,13 @@ std::vector<BlockPos> DBSQLite3::getBlockPos(BlockPos min, BlockPos max)
152152

153153
if (newFormat) {
154154
pos.x = sqlite3_column_int(stmt, 0);
155-
pos.y = sqlite3_column_int(stmt, 1);
156-
pos.z = sqlite3_column_int(stmt, 2);
155+
pos.z = sqlite3_column_int(stmt, 1);
157156
} else {
158157
pos = decodeBlockPos(sqlite3_column_int64(stmt, 0));
159158
if (pos.x < min.x || pos.x >= max.x || pos.y < min.y || pos.y >= max.y)
160159
continue;
160+
// note that we can't try to deduplicate these because the order
161+
// of the encoded pos (if sorted) is ZYX.
161162
}
162163
positions.emplace_back(pos);
163164
}

src/db-sqlite3.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class SQLite3Base {
4747
class DBSQLite3 : public DB, SQLite3Base {
4848
public:
4949
DBSQLite3(const std::string &mapdir);
50-
std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) override;
50+
std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) override;
5151
void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
5252
int16_t min_y, int16_t max_y) override;
5353
void getBlocksByPos(BlockList &blocks,

src/db.h

+13-17
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,15 @@
66
#include <utility>
77
#include "types.h"
88

9-
109
struct BlockPos {
11-
int16_t x;
12-
int16_t y;
13-
int16_t z;
10+
int16_t x, y, z;
1411

15-
BlockPos() : x(0), y(0), z(0) {}
16-
explicit BlockPos(int16_t v) : x(v), y(v), z(v) {}
17-
BlockPos(int16_t x, int16_t y, int16_t z) : x(x), y(y), z(z) {}
12+
constexpr BlockPos() : x(0), y(0), z(0) {}
13+
explicit constexpr BlockPos(int16_t v) : x(v), y(v), z(v) {}
14+
constexpr BlockPos(int16_t x, int16_t y, int16_t z) : x(x), y(y), z(z) {}
1815

1916
// Implements the inverse ordering so that (2,2,2) < (1,1,1)
20-
bool operator < (const BlockPos &p) const
17+
inline bool operator<(const BlockPos &p) const
2118
{
2219
if (z > p.z)
2320
return true;
@@ -27,11 +24,7 @@ struct BlockPos {
2724
return true;
2825
if (y < p.y)
2926
return false;
30-
if (x > p.x)
31-
return true;
32-
if (x < p.x)
33-
return false;
34-
return false;
27+
return x > p.x;
3528
}
3629
};
3730

@@ -47,25 +40,28 @@ class DB {
4740
static inline BlockPos decodeBlockPos(int64_t hash);
4841

4942
public:
50-
/* Return all block positions inside the range given by min and max,
51-
* so that min.x <= x < max.x, ...
43+
/* Return all unique (X, Z) position pairs inside area given by min and max,
44+
* so that min.x <= x < max.x && min.z <= z < max.z
45+
* Note: duplicates are allowed, but results in wasted time.
5246
*/
53-
virtual std::vector<BlockPos> getBlockPos(BlockPos min, BlockPos max) = 0;
47+
virtual std::vector<BlockPos> getBlockPosXZ(BlockPos min, BlockPos max) = 0;
48+
5449
/* Read all blocks in column given by x and z
5550
* and inside the given Y range (min_y <= y < max_y) into list
5651
*/
5752
virtual void getBlocksOnXZ(BlockList &blocks, int16_t x, int16_t z,
5853
int16_t min_y, int16_t max_y) = 0;
54+
5955
/* Read blocks at given positions into list
6056
*/
6157
virtual void getBlocksByPos(BlockList &blocks,
6258
const std::vector<BlockPos> &positions) = 0;
59+
6360
/* Can this database efficiently do range queries?
6461
* (for large data sets, more efficient that brute force)
6562
*/
6663
virtual bool preferRangeQueries() const = 0;
6764

68-
6965
virtual ~DB() {}
7066
};
7167

0 commit comments

Comments
 (0)