Skip to content

Commit a72ad04

Browse files
committed
Better estimations in tests
also removed commented out code
1 parent 12f0399 commit a72ad04

File tree

1 file changed

+14
-79
lines changed

1 file changed

+14
-79
lines changed

ut/ColumnString_ut.cpp

Lines changed: 14 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ size_t EstimateColumnStringMemoryUsage(
3434
) {
3535
static const size_t COLUMN_STRING_DEFAULT_BLOCK_SIZE = 4096;
3636
static const size_t COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD = 4096;
37+
const float max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2.5 : 2;
38+
39+
// space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
40+
const auto estimate_lost_space_in_block = (static_cast<size_t>(item_estimated_size) != 0
41+
? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast<size_t>(static_cast<size_t>(item_estimated_size) * std::max(1.0f, value_to_estimation_average_size_ratio))
42+
: COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10);
3743

3844
// if no estimation provided, use factual total size of all items
3945
if (item_estimated_size == ColumnString::EstimatedValueSize{0} && total_items_size && number_of_items)
@@ -42,13 +48,6 @@ size_t EstimateColumnStringMemoryUsage(
4248
const size_t estimated_total_item_size = number_of_items * static_cast<size_t>(item_estimated_size) * value_to_estimation_average_size_ratio;
4349
const auto estimated_number_of_blocks = std::max<size_t>(1, estimated_total_item_size ? COLUMN_STRING_DEFAULT_BLOCK_SIZE / estimated_total_item_size : 1);
4450

45-
// space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
46-
const auto estimate_lost_space_in_block = (static_cast<size_t>(item_estimated_size) != 0
47-
? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast<size_t>(static_cast<size_t>(item_estimated_size) * value_to_estimation_average_size_ratio)
48-
: COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10);
49-
50-
const auto max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2 : 1.2;
51-
5251
return (number_of_items * sizeof(std::string_view)
5352
+ estimated_total_item_size
5453
+ estimate_lost_space_in_block * estimated_number_of_blocks
@@ -213,49 +212,6 @@ TEST(ColumnString, InvalidSizeEstimation) {
213212
EXPECT_THROW(col.SetEstimatedValueSize(ColumnString::EstimatedValueSize(std::numeric_limits<size_t>::max())), ValidationError);
214213
}
215214

216-
TEST(ColumnString, WithSizeEstimation) {
217-
const ColumnString::EstimatedValueSize value_size_estimations[] = {
218-
ColumnString::EstimatedValueSize::TINY,
219-
ColumnString::EstimatedValueSize::SMALL,
220-
ColumnString::EstimatedValueSize::MEDIUM,
221-
ColumnString::EstimatedValueSize::LARGE,
222-
223-
// ColumnString::EstimatedValueSize(0),
224-
ColumnString::EstimatedValueSize(1),
225-
ColumnString::EstimatedValueSize(300),
226-
ColumnString::EstimatedValueSize(10'000),
227-
};
228-
229-
auto values = MakeStrings();
230-
std::cerr << "Number of values: " << values.size() << std::endl;
231-
232-
for (ColumnString::EstimatedValueSize estimation : value_size_estimations) {
233-
SCOPED_TRACE(::testing::Message("with estimation: ") << estimation);
234-
std::cerr << "\nEstimation " << estimation << std::endl;
235-
236-
auto col = std::make_shared<ColumnString>(estimation);
237-
238-
dumpMemoryUsage("After constructing with estimation", col);
239-
240-
col->Reserve(values.size());
241-
dumpMemoryUsage("After Reserve()", col);
242-
243-
size_t i = 0;
244-
for (const auto & v : values) {
245-
col->Append(v);
246-
247-
EXPECT_EQ(i + 1, col->Size());
248-
EXPECT_EQ(v, col->At(i));
249-
250-
++i;
251-
}
252-
253-
dumpMemoryUsage("After appending all values", col);
254-
}
255-
}
256-
257-
258-
259215
struct SizeRatio {
260216
std::vector<float> ratios;
261217
float average;
@@ -276,19 +232,6 @@ std::ostream & operator<<(std::ostream& ostr, const SizeRatio & r) {
276232
return ostr << "SizeRatio{ average: " << std::fixed << r.average << " } ";
277233
}
278234

279-
//std::vector<SizeRatio> ratios{
280-
// // estimation is about right
281-
// SizeRatio({0.9, 0.95, 1.0, 1.05, 1.1}),
282-
// // estimation is a bit high, real values are about 0.8 of estimated size
283-
// SizeRatio({0.75, 0.8, 0.85}),
284-
// // estimation is a bit low, real values are about 1.2 of estimated size
285-
// SizeRatio({1.25, 1.2, 1.25}),
286-
// // estimation is to high, real values are about 2.0 of estimated size
287-
// SizeRatio({1.9, 2, 2.1}),
288-
// // estimation is to low, real values are about 0.5 of estimated size
289-
// SizeRatio({0.4, 0.5, 0.6}),
290-
// };
291-
292235
/** Make sure that setting value size estimates with ColumnString::EstimatedValueSize either via contructor or via SetEstimatedValueSize
293236
* doesn't break ColumnString functionality and well-behaves with Reserve() and Append().
294237
* I.e. values are appended properly, nothing crashes and memory usage is not crazy-high if estimation is incorrect.
@@ -340,7 +283,10 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
340283

341284
size_t EstimateMemoryUsage(size_t total_values_size, float expected_number_of_items_multiplier = 1.0) {
342285
const auto & [single_value_size_estimation, size_ratio] = GetParam();
343-
return EstimateColumnStringMemoryUsage(expected_number_of_items * expected_number_of_items_multiplier, single_value_size_estimation, size_ratio.average, total_values_size);
286+
return EstimateColumnStringMemoryUsage(expected_number_of_items * expected_number_of_items_multiplier,
287+
single_value_size_estimation,
288+
size_ratio.average,
289+
total_values_size);
344290
}
345291
};
346292

@@ -383,10 +329,8 @@ TEST_P(ColumnStringEstimatedValueSizeTest, AppendNoReserve)
383329

384330
EXPECT_NO_FATAL_FAILURE(AppendStrings(col, total_values_size));
385331

386-
const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2;
387-
388332
// since there was no Reserve call prior, there could be more some overallocations, hence some estimation error
389-
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size) * max_estimation_error_factor);
333+
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size));
390334
}
391335

392336
TEST_P(ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
@@ -399,10 +343,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
399343
EXPECT_NO_THROW(col.Reserve(expected_number_of_items));
400344
EXPECT_NO_FATAL_FAILURE(AppendStrings(col, total_values_size));
401345

402-
const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2;
403-
404-
// Allow minor overallocations, hence * 1.2
405-
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size) * max_estimation_error_factor);
346+
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size));
406347
}
407348

408349
TEST_P(ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
@@ -415,10 +356,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
415356
EXPECT_NO_THROW(col.Reserve(expected_number_of_items * .8));
416357
EXPECT_NO_FATAL_FAILURE(AppendStrings(col, total_values_size));
417358

418-
const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2;
419-
420-
// Allow minor overallocations, hence * 1.2
421-
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size) * max_estimation_error_factor);
359+
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size));
422360
}
423361

424362
TEST_P(ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
@@ -431,10 +369,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
431369
EXPECT_NO_THROW(col.Reserve(expected_number_of_items * 1.2));
432370
EXPECT_NO_FATAL_FAILURE(AppendStrings(col, total_values_size));
433371

434-
const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2;
435-
436-
// Allow minor overallocations, hence * 1.2
437-
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size, 1.2) * max_estimation_error_factor);
372+
EXPECT_LT(col.MemoryUsage(), EstimateMemoryUsage(total_values_size, 1.2));
438373
}
439374

440375
/** TODO more tests

0 commit comments

Comments
 (0)