Skip to content

Commit 8652f02

Browse files
github-actions[bot]Shelnutt2ypatia
authored
[Backport release-2.27] Improve readers by parallelizing I/O and compute operations (#5401) (#5451)
Backport of #5401 to release-2.27 --- TYPE: IMPROVEMENT DESC: Improve readers by parallelizing I/O and compute operations --------- Co-authored-by: Seth Shelnutt <Shelnutt2@gmail.com> Co-authored-by: Ypatia Tsavliri <ypatia@tiledb.com> Co-authored-by: Seth Shelnutt <seth@tiledb.io>
1 parent a2e10e1 commit 8652f02

23 files changed

+416
-143
lines changed

test/src/unit-ReadCellSlabIter.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,10 @@ void set_result_tile_dim(
183183
std::nullopt,
184184
std::nullopt,
185185
std::nullopt);
186-
ResultTile::TileData tile_data{nullptr, nullptr, nullptr};
186+
ResultTile::TileData tile_data{
187+
{nullptr, ThreadPool::SharedTask()},
188+
{nullptr, ThreadPool::SharedTask()},
189+
{nullptr, ThreadPool::SharedTask()}};
187190
result_tile.init_coord_tile(
188191
constants::format_version,
189192
array_schema,

test/src/unit-cppapi-consolidation-with-timestamps.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ TEST_CASE_METHOD(
636636

637637
// Will only allow to load two tiles out of 3.
638638
Config cfg;
639-
cfg.set("sm.mem.total_budget", "30000");
639+
cfg.set("sm.mem.total_budget", "50000");
640640
cfg.set("sm.mem.reader.sparse_global_order.ratio_coords", "0.15");
641641
ctx_ = Context(cfg);
642642

@@ -685,7 +685,7 @@ TEST_CASE_METHOD(
685685

686686
// Will only allow to load two tiles out of 3.
687687
Config cfg;
688-
cfg.set("sm.mem.total_budget", "30000");
688+
cfg.set("sm.mem.total_budget", "50000");
689689
cfg.set("sm.mem.reader.sparse_global_order.ratio_coords", "0.15");
690690
ctx_ = Context(cfg);
691691

test/src/unit-result-tile.cc

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,10 @@ TEST_CASE_METHOD(
213213
0,
214214
std::nullopt,
215215
std::nullopt);
216-
ResultTile::TileData tile_data{nullptr, nullptr, nullptr};
216+
ResultTile::TileData tile_data{
217+
{nullptr, ThreadPool::SharedTask()},
218+
{nullptr, ThreadPool::SharedTask()},
219+
{nullptr, ThreadPool::SharedTask()}};
217220
rt.init_coord_tile(
218221
constants::format_version,
219222
array_schema,
@@ -230,7 +233,10 @@ TEST_CASE_METHOD(
230233
0,
231234
std::nullopt,
232235
std::nullopt);
233-
ResultTile::TileData tile_data{nullptr, nullptr, nullptr};
236+
ResultTile::TileData tile_data{
237+
{nullptr, ThreadPool::SharedTask()},
238+
{nullptr, ThreadPool::SharedTask()},
239+
{nullptr, ThreadPool::SharedTask()}};
234240
rt.init_coord_tile(
235241
constants::format_version,
236242
array_schema,
@@ -326,7 +332,10 @@ TEST_CASE_METHOD(
326332
0,
327333
std::nullopt,
328334
std::nullopt);
329-
ResultTile::TileData tile_data{nullptr, nullptr, nullptr};
335+
ResultTile::TileData tile_data{
336+
{nullptr, ThreadPool::SharedTask()},
337+
{nullptr, ThreadPool::SharedTask()},
338+
{nullptr, ThreadPool::SharedTask()}};
330339
rt.init_coord_tile(
331340
constants::format_version,
332341
array_schema,
@@ -343,7 +352,10 @@ TEST_CASE_METHOD(
343352
0,
344353
std::nullopt,
345354
std::nullopt);
346-
ResultTile::TileData tile_data{nullptr, nullptr, nullptr};
355+
ResultTile::TileData tile_data{
356+
{nullptr, ThreadPool::SharedTask()},
357+
{nullptr, ThreadPool::SharedTask()},
358+
{nullptr, ThreadPool::SharedTask()}};
347359
rt.init_coord_tile(
348360
constants::format_version,
349361
array_schema,

test/src/unit-sparse-global-order-reader.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1993,9 +1993,10 @@ TEST_CASE_METHOD(
19931993
}
19941994

19951995
// FIXME: there is no per fragment budget anymore
1996-
// Two result tile (2 * (~3000 + 8) will be bigger than the per fragment
1997-
// budget (1000).
1998-
memory_.total_budget_ = "35000";
1996+
// Two result tiles (2 * (2842 + 8)) = 5700 will be bigger than the per
1997+
// fragment budget (50000 * 0.11 / 2 fragments = 2750), so only one result
1998+
// tile will be loaded each time.
1999+
memory_.total_budget_ = "60000";
19992000
memory_.ratio_coords_ = "0.11";
20002001
update_config();
20012002

@@ -2518,8 +2519,9 @@ TEST_CASE_METHOD(
25182519
}
25192520

25202521
// FIXME: there is no per fragment budget anymore
2521-
// Two result tile (2 * (~4000 + 8) will be bigger than the per fragment
2522-
// budget (1000).
2522+
// Two result tiles (2 * (2842 + 8)) = 5700 will be bigger than the per
2523+
// fragment budget (40000 * 0.22 /2 frag = 4400), so only one will be loaded
2524+
// each time.
25232525
memory_.total_budget_ = "40000";
25242526
memory_.ratio_coords_ = "0.22";
25252527
update_config();

test/src/unit-sparse-unordered-with-dups-reader.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,9 +1064,12 @@ TEST_CASE_METHOD(
10641064

10651065
if (one_frag) {
10661066
CHECK(1 == loop_num->second);
1067-
} else {
1068-
CHECK(9 == loop_num->second);
10691067
}
1068+
/**
1069+
* We can't do a similar check for multiple fragments as it is architecture
1070+
* dependent how many tiles fit in the memory budget. And thus also
1071+
* architecture dependent as to how many internal loops we have.
1072+
*/
10701073

10711074
// Try to read multiple frags without partial tile offset reading. Should
10721075
// fail

tiledb/sm/filter/compression_filter.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ Status CompressionFilter::decompress_var_string_coords(
636636
auto output_view = span<std::byte>(
637637
reinterpret_cast<std::byte*>(output_buffer->data()), uncompressed_size);
638638
auto offsets_view = span<uint64_t>(
639-
offsets_tile->data_as<offsets_t>(), uncompressed_offsets_size);
639+
offsets_tile->data_as_unsafe<offsets_t>(), uncompressed_offsets_size);
640640

641641
if (compressor_ == Compressor::RLE) {
642642
uint8_t rle_len_bytesize, string_len_bytesize;

tiledb/sm/filter/filter_pipeline.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ Status FilterPipeline::run_reverse(
464464
// If the pipeline is empty, just copy input to output.
465465
if (filters_.empty()) {
466466
void* output_chunk_buffer =
467-
tile->data_as<char>() + chunk_data.chunk_offsets_[i];
467+
tile->data_as_unsafe<char>() + chunk_data.chunk_offsets_[i];
468468
RETURN_NOT_OK(input_data.copy_to(output_chunk_buffer));
469469
continue;
470470
}
@@ -487,7 +487,7 @@ Status FilterPipeline::run_reverse(
487487
bool last_filter = filter_idx == 0;
488488
if (last_filter) {
489489
void* output_chunk_buffer =
490-
tile->data_as<char>() + chunk_data.chunk_offsets_[i];
490+
tile->data_as_unsafe<char>() + chunk_data.chunk_offsets_[i];
491491
RETURN_NOT_OK(output_data.set_fixed_allocation(
492492
output_chunk_buffer, chunk.unfiltered_data_size_));
493493
reader_stats->add_counter(

tiledb/sm/filter/test/filter_test_support.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ Tile create_tile_for_unfiltering(
203203
tile->cell_size() * nelts,
204204
tile->filtered_buffer().data(),
205205
tile->filtered_buffer().size(),
206-
tracker};
206+
tracker,
207+
std::nullopt};
207208
}
208209

209210
void run_reverse(

tiledb/sm/filter/test/tile_data_generator.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ class TileDataGenerator {
9999
original_tile_size(),
100100
filtered_buffer.data(),
101101
filtered_buffer.size(),
102-
memory_tracker);
102+
memory_tracker,
103+
std::nullopt);
103104
}
104105

105106
/** Returns the size of the original unfiltered data. */

tiledb/sm/metadata/test/unit_metadata.cc

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ TEST_CASE(
123123
tile1->size(),
124124
tile1->filtered_buffer().data(),
125125
tile1->filtered_buffer().size(),
126-
tracker);
126+
tracker,
127+
ThreadPool::SharedTask());
127128
memcpy(metadata_tiles[0]->data(), tile1->data(), tile1->size());
128129

129130
metadata_tiles[1] = tdb::make_shared<Tile>(
@@ -135,7 +136,8 @@ TEST_CASE(
135136
tile2->size(),
136137
tile2->filtered_buffer().data(),
137138
tile2->filtered_buffer().size(),
138-
tracker);
139+
tracker,
140+
ThreadPool::SharedTask());
139141
memcpy(metadata_tiles[1]->data(), tile2->data(), tile2->size());
140142

141143
metadata_tiles[2] = tdb::make_shared<Tile>(
@@ -147,7 +149,8 @@ TEST_CASE(
147149
tile3->size(),
148150
tile3->filtered_buffer().data(),
149151
tile3->filtered_buffer().size(),
150-
tracker);
152+
tracker,
153+
ThreadPool::SharedTask());
151154
memcpy(metadata_tiles[2]->data(), tile3->data(), tile3->size());
152155

153156
meta = Metadata::deserialize(metadata_tiles);

0 commit comments

Comments
 (0)