8
8
#include " utils.h"
9
9
#include " value_generators.h"
10
10
11
+ #include < algorithm>
11
12
#include < cstddef>
12
13
#include < initializer_list>
13
14
#include < ios>
@@ -38,13 +39,25 @@ size_t EstimateColumnStringMemoryUsage(
38
39
if (item_estimated_size == ColumnString::EstimatedValueSize{0 } && total_items_size && number_of_items)
39
40
item_estimated_size = ColumnString::EstimatedValueSize (static_cast <double >(*total_items_size) / number_of_items);
40
41
41
- return number_of_items * sizeof (std::string_view)
42
- + number_of_items * static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio
42
+ const size_t estimated_total_item_size = number_of_items * static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio;
43
+ const auto estimated_number_of_blocks = std::max<size_t >(1 , estimated_total_item_size ? COLUMN_STRING_DEFAULT_BLOCK_SIZE / estimated_total_item_size : 1 );
44
+
45
+ // space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
46
+ const auto estimate_lost_space_in_block = (static_cast <size_t >(item_estimated_size) != 0
47
+ ? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast <size_t >(static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio)
48
+ : COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10 );
49
+
50
+ const auto max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
51
+
52
+ return (number_of_items * sizeof (std::string_view)
53
+ + estimated_total_item_size
54
+ + estimate_lost_space_in_block * estimated_number_of_blocks
43
55
+ COLUMN_STRING_DEFAULT_BLOCK_SIZE
44
56
// It is hard to compute overhead added by vector<ColumnString::Block>
45
57
// (mostly because we don't know number of ColumnString::Block instances from outside, and this number depends on many factors),
46
58
// so we just make a guess.
47
- + COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD;
59
+ + COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD)
60
+ * max_estimation_error_factor;
48
61
}
49
62
50
63
std::string ScaleString (std::string str, size_t required_size) {
@@ -289,8 +302,15 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
289
302
// Adjust number of items so the test doesn't use too much memory
290
303
if (static_cast <size_t >(single_value_size_estimation) != 0
291
304
// *2 since we store both reference values and values in column itself.
292
- && EstimateColumnStringMemoryUsage (expected_number_of_items, single_value_size_estimation, size_ratio.average ) * 2 > MAX_MEMORY_USAGE) {
305
+ && EstimateColumnStringMemoryUsage (expected_number_of_items, single_value_size_estimation, size_ratio.average ) > MAX_MEMORY_USAGE) {
306
+ const auto old_expected_number_of_items = expected_number_of_items;
293
307
expected_number_of_items = MAX_MEMORY_USAGE / (static_cast <size_t >(single_value_size_estimation) * 2 * size_ratio.average );
308
+
309
+ std::cerr << " To avoid using too much memory, reduced number of items in test"
310
+ << " from " << old_expected_number_of_items
311
+ << " to " << expected_number_of_items
312
+ << " , expected item size is " << single_value_size_estimation
313
+ << std::endl;
294
314
}
295
315
}
296
316
@@ -318,9 +338,9 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
318
338
ASSERT_TRUE (CompareRecursive (values, column));
319
339
}
320
340
321
- size_t EstimateMemoryUsage (size_t total_values_size) {
341
+ size_t EstimateMemoryUsage (size_t total_values_size, float expected_number_of_items_multiplier = 1.0 ) {
322
342
const auto & [single_value_size_estimation, size_ratio] = GetParam ();
323
- return EstimateColumnStringMemoryUsage (expected_number_of_items, single_value_size_estimation, size_ratio.average , total_values_size);
343
+ return EstimateColumnStringMemoryUsage (expected_number_of_items * expected_number_of_items_multiplier , single_value_size_estimation, size_ratio.average , total_values_size);
324
344
}
325
345
};
326
346
@@ -363,8 +383,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, AppendNoReserve)
363
383
364
384
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
365
385
366
- // since there was no Reserve call prior, there could be more some overallocations, hence *2
367
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * 2 );
386
+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
387
+
388
+ // since there was no Reserve call prior, there could be more some overallocations, hence some estimation error
389
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor);
368
390
}
369
391
370
392
TEST_P (ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
@@ -377,8 +399,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
377
399
EXPECT_NO_THROW (col.Reserve (expected_number_of_items));
378
400
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
379
401
402
+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
403
+
380
404
// Allow minor overallocations, hence * 1.2
381
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * 2 );
405
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor );
382
406
}
383
407
384
408
TEST_P (ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
@@ -391,8 +415,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
391
415
EXPECT_NO_THROW (col.Reserve (expected_number_of_items * .8 ));
392
416
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
393
417
418
+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
419
+
394
420
// Allow minor overallocations, hence * 1.2
395
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * 2 );
421
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor );
396
422
}
397
423
398
424
TEST_P (ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
@@ -405,8 +431,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
405
431
EXPECT_NO_THROW (col.Reserve (expected_number_of_items * 1.2 ));
406
432
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
407
433
434
+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
435
+
408
436
// Allow minor overallocations, hence * 1.2
409
- EXPECT_LT (col.MemoryUsage (), EstimateColumnStringMemoryUsage (expected_number_of_items * 1.2 , single_value_size_estimation, size_ratio. average , total_values_size ) * 2 );
437
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size, 1.2 ) * max_estimation_error_factor );
410
438
}
411
439
412
440
/* * TODO more tests
0 commit comments