@@ -34,6 +34,12 @@ size_t EstimateColumnStringMemoryUsage(
34
34
) {
35
35
static const size_t COLUMN_STRING_DEFAULT_BLOCK_SIZE = 4096 ;
36
36
static const size_t COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD = 4096 ;
37
+ const float max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
38
+
39
+ // space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
40
+ const auto estimate_lost_space_in_block = (static_cast <size_t >(item_estimated_size) != 0
41
+ ? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast <size_t >(static_cast <size_t >(item_estimated_size) * std::max (1 .0f , value_to_estimation_average_size_ratio))
42
+ : COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10 );
37
43
38
44
// if no estimation provided, use factual total size of all items
39
45
if (item_estimated_size == ColumnString::EstimatedValueSize{0 } && total_items_size && number_of_items)
@@ -42,13 +48,6 @@ size_t EstimateColumnStringMemoryUsage(
42
48
const size_t estimated_total_item_size = number_of_items * static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio;
43
49
const auto estimated_number_of_blocks = std::max<size_t >(1 , estimated_total_item_size ? COLUMN_STRING_DEFAULT_BLOCK_SIZE / estimated_total_item_size : 1 );
44
50
45
- // space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
46
- const auto estimate_lost_space_in_block = (static_cast <size_t >(item_estimated_size) != 0
47
- ? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast <size_t >(static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio)
48
- : COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10 );
49
-
50
- const auto max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2 : 1.2 ;
51
-
52
51
return (number_of_items * sizeof (std::string_view)
53
52
+ estimated_total_item_size
54
53
+ estimate_lost_space_in_block * estimated_number_of_blocks
@@ -213,49 +212,6 @@ TEST(ColumnString, InvalidSizeEstimation) {
213
212
EXPECT_THROW (col.SetEstimatedValueSize (ColumnString::EstimatedValueSize (std::numeric_limits<size_t >::max ())), ValidationError);
214
213
}
215
214
216
- TEST (ColumnString, WithSizeEstimation) {
217
- const ColumnString::EstimatedValueSize value_size_estimations[] = {
218
- ColumnString::EstimatedValueSize::TINY,
219
- ColumnString::EstimatedValueSize::SMALL,
220
- ColumnString::EstimatedValueSize::MEDIUM,
221
- ColumnString::EstimatedValueSize::LARGE,
222
-
223
- // ColumnString::EstimatedValueSize(0),
224
- ColumnString::EstimatedValueSize (1 ),
225
- ColumnString::EstimatedValueSize (300 ),
226
- ColumnString::EstimatedValueSize (10'000 ),
227
- };
228
-
229
- auto values = MakeStrings ();
230
- std::cerr << " Number of values: " << values.size () << std::endl;
231
-
232
- for (ColumnString::EstimatedValueSize estimation : value_size_estimations) {
233
- SCOPED_TRACE (::testing::Message (" with estimation: " ) << estimation);
234
- std::cerr << " \n Estimation " << estimation << std::endl;
235
-
236
- auto col = std::make_shared<ColumnString>(estimation);
237
-
238
- dumpMemoryUsage (" After constructing with estimation" , col);
239
-
240
- col->Reserve (values.size ());
241
- dumpMemoryUsage (" After Reserve()" , col);
242
-
243
- size_t i = 0 ;
244
- for (const auto & v : values) {
245
- col->Append (v);
246
-
247
- EXPECT_EQ (i + 1 , col->Size ());
248
- EXPECT_EQ (v, col->At (i));
249
-
250
- ++i;
251
- }
252
-
253
- dumpMemoryUsage (" After appending all values" , col);
254
- }
255
- }
256
-
257
-
258
-
259
215
struct SizeRatio {
260
216
std::vector<float > ratios;
261
217
float average;
@@ -276,19 +232,6 @@ std::ostream & operator<<(std::ostream& ostr, const SizeRatio & r) {
276
232
return ostr << " SizeRatio{ average: " << std::fixed << r.average << " } " ;
277
233
}
278
234
279
- // std::vector<SizeRatio> ratios{
280
- // // estimation is about right
281
- // SizeRatio({0.9, 0.95, 1.0, 1.05, 1.1}),
282
- // // estimation is a bit high, real values are about 0.8 of estimated size
283
- // SizeRatio({0.75, 0.8, 0.85}),
284
- // // estimation is a bit low, real values are about 1.2 of estimated size
285
- // SizeRatio({1.25, 1.2, 1.25}),
286
- // // estimation is to high, real values are about 2.0 of estimated size
287
- // SizeRatio({1.9, 2, 2.1}),
288
- // // estimation is to low, real values are about 0.5 of estimated size
289
- // SizeRatio({0.4, 0.5, 0.6}),
290
- // };
291
-
292
235
/* * Make sure that setting value size estimates with ColumnString::EstimatedValueSize either via contructor or via SetEstimatedValueSize
293
236
* doesn't break ColumnString functionality and well-behaves with Reserve() and Append().
294
237
* I.e. values are appended properly, nothing crashes and memory usage is not crazy-high if estimation is incorrect.
@@ -340,7 +283,10 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
340
283
341
284
size_t EstimateMemoryUsage (size_t total_values_size, float expected_number_of_items_multiplier = 1.0 ) {
342
285
const auto & [single_value_size_estimation, size_ratio] = GetParam ();
343
- return EstimateColumnStringMemoryUsage (expected_number_of_items * expected_number_of_items_multiplier, single_value_size_estimation, size_ratio.average , total_values_size);
286
+ return EstimateColumnStringMemoryUsage (expected_number_of_items * expected_number_of_items_multiplier,
287
+ single_value_size_estimation,
288
+ size_ratio.average ,
289
+ total_values_size);
344
290
}
345
291
};
346
292
@@ -383,10 +329,8 @@ TEST_P(ColumnStringEstimatedValueSizeTest, AppendNoReserve)
383
329
384
330
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
385
331
386
- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
387
-
388
332
// since there was no Reserve call prior, there could be more some overallocations, hence some estimation error
389
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor );
333
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size));
390
334
}
391
335
392
336
TEST_P (ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
@@ -399,10 +343,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
399
343
EXPECT_NO_THROW (col.Reserve (expected_number_of_items));
400
344
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
401
345
402
- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
403
-
404
- // Allow minor overallocations, hence * 1.2
405
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor);
346
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size));
406
347
}
407
348
408
349
TEST_P (ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
@@ -415,10 +356,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
415
356
EXPECT_NO_THROW (col.Reserve (expected_number_of_items * .8 ));
416
357
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
417
358
418
- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
419
-
420
- // Allow minor overallocations, hence * 1.2
421
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor);
359
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size));
422
360
}
423
361
424
362
TEST_P (ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
@@ -431,10 +369,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
431
369
EXPECT_NO_THROW (col.Reserve (expected_number_of_items * 1.2 ));
432
370
EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
433
371
434
- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
435
-
436
- // Allow minor overallocations, hence * 1.2
437
- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size, 1.2 ) * max_estimation_error_factor);
372
+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size, 1.2 ));
438
373
}
439
374
440
375
/* * TODO more tests
0 commit comments