70
70
* - Additional benchmarking scenarios can be created by extending `benchmark_interface`.
71
71
*/
72
72
73
+ #include < list>
73
74
#include < malloc.h>
74
75
#include < random>
75
76
@@ -86,6 +87,7 @@ struct alloc_data {
86
87
};
87
88
88
89
struct next_alloc_data {
90
+ bool alloc; // true if allocation, false if deallocation
89
91
size_t offset;
90
92
size_t size;
91
93
};
@@ -288,18 +290,17 @@ template <
288
290
typename =
289
291
std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
290
292
class multiple_malloc_free_benchmark : public benchmark_interface <Size, Alloc> {
291
- using distribution = std::uniform_int_distribution< size_t >;
293
+ protected:
292
294
template <class T > using vector2d = std::vector<std::vector<T>>;
293
295
using base = benchmark_interface<Size, Alloc>;
294
-
295
296
int allocsPerIterations = 10 ;
296
297
bool thread_local_allocations = true ;
297
298
size_t max_allocs = 0 ;
298
299
299
300
vector2d<alloc_data> allocations;
300
301
vector2d<next_alloc_data> next;
301
302
using next_alloc_data_iterator =
302
- std::vector<next_alloc_data>::const_iterator;
303
+ typename std::vector<next_alloc_data>::const_iterator;
303
304
std::vector<std::unique_ptr<next_alloc_data_iterator>> next_iter;
304
305
int64_t iterations;
305
306
@@ -386,15 +387,20 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
386
387
auto tid = state.thread_index ();
387
388
auto &allocation = allocations[tid];
388
389
auto &iter = next_iter[tid];
390
+
389
391
for (int i = 0 ; i < allocsPerIterations; i++) {
390
392
auto &n = *(*iter)++;
391
393
auto &alloc = allocation[n.offset ];
392
- base::allocator.benchFree (alloc.ptr , alloc.size );
393
- alloc.size = n.size ;
394
- alloc.ptr = base::allocator.benchAlloc (alloc.size );
395
-
396
- if (alloc.ptr == NULL ) {
397
- state.SkipWithError (" allocation failed" );
394
+ if (n.alloc ) {
395
+ alloc.ptr = base::allocator.benchAlloc (n.size );
396
+ if (alloc.ptr == NULL ) {
397
+ state.SkipWithError (" allocation failed" );
398
+ }
399
+ alloc.size = n.size ;
400
+ } else {
401
+ base::allocator.benchFree (alloc.ptr , alloc.size );
402
+ alloc.ptr = NULL ;
403
+ alloc.size = 0 ;
398
404
}
399
405
}
400
406
}
@@ -412,13 +418,14 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
412
418
}
413
419
414
420
private:
415
- void prealloc (benchmark::State &state) {
421
+ virtual void prealloc (benchmark::State &state) {
416
422
auto tid = state.thread_index ();
417
423
auto &i = allocations[tid];
418
424
i.resize (max_allocs);
419
425
auto sizeGenerator = base::alloc_sizes[tid];
420
426
421
- for (size_t j = 0 ; j < max_allocs; j++) {
427
+ // Preallocate half of the available slots, for allocations
428
+ for (size_t j = 0 ; j < max_allocs / 2 ; j++) {
422
429
auto size = sizeGenerator.nextSize ();
423
430
i[j].ptr = base::allocator.benchAlloc (size);
424
431
if (i[j].ptr == NULL ) {
@@ -441,20 +448,168 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
441
448
}
442
449
}
443
450
444
- void prepareWorkload (benchmark::State &state) {
451
+ virtual void prepareWorkload (benchmark::State &state) {
445
452
auto tid = state.thread_index ();
446
453
auto &n = next[tid];
454
+
455
+ // Create generators for random index selection and binary decision.
456
+ using distribution = std::uniform_int_distribution<size_t >;
447
457
std::default_random_engine generator;
448
- distribution dist;
458
+ distribution dist_offset (0 , max_allocs - 1 );
459
+ distribution dist_opt_type (0 , 1 );
449
460
generator.seed (0 );
450
- dist. param ( distribution::param_type ( 0 , max_allocs - 1 ));
461
+
451
462
auto sizeGenerator = base::alloc_sizes[tid];
463
+ std::vector<size_t > free;
464
+ std::vector<size_t > allocated;
465
+ free.reserve (max_allocs / 2 );
466
+ allocated.reserve (max_allocs / 2 );
467
+ // Preallocate memory: initially, half the indices are allocated.
468
+ // See prealloc() function;
469
+ size_t i = 0 ;
470
+ while (i < max_allocs / 2 ) {
471
+ allocated.push_back (i++);
472
+ }
473
+ // The remaining indices are marked as free.
474
+ while (i < max_allocs) {
475
+ free.push_back (i++);
476
+ }
452
477
453
478
n.clear ();
454
479
for (int64_t j = 0 ; j < state.max_iterations * allocsPerIterations;
455
480
j++) {
456
- n.push_back ({dist (generator), sizeGenerator.nextSize ()});
481
+ // Decide whether to allocate or free:
482
+ // - If no allocations exist, allocation is forced.
483
+ // - If there is maximum number of allocation, free is forced
484
+ // - Otherwise, use a binary random choice (0 or 1)
485
+ if (allocated.empty () ||
486
+ (dist_opt_type (generator) == 0 && !free.empty ())) {
487
+ // Allocation:
488
+ std::swap (free[dist_offset (generator) % free.size ()],
489
+ free.back ());
490
+ auto offset = free.back ();
491
+ free.pop_back ();
492
+
493
+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
494
+ allocated.push_back (offset);
495
+ } else {
496
+ // Free
497
+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
498
+ allocated.back ());
499
+ auto offset = allocated.back ();
500
+ allocated.pop_back ();
501
+
502
+ n.push_back ({false , offset, 0 });
503
+ free.push_back (offset);
504
+ }
457
505
}
506
+
458
507
next_iter[tid] = std::make_unique<next_alloc_data_iterator>(n.cbegin ());
459
508
}
460
509
};
510
+ // This class benchmarks performance by randomly allocating and freeing memory.
511
+ // Initially, it slowly increases the memory footprint, and later decreases it.
512
+ template <
513
+ typename Size, typename Alloc,
514
+ typename =
515
+ std::enable_if_t <std::is_base_of<alloc_size_interface, Size>::value>,
516
+ typename =
517
+ std::enable_if_t <std::is_base_of<allocator_interface, Alloc>::value>>
518
+ class peak_alloc_benchmark
519
+ : public multiple_malloc_free_benchmark<Size, Alloc> {
520
+ using base = multiple_malloc_free_benchmark<Size, Alloc>;
521
+ virtual void prepareWorkload (benchmark::State &state) override {
522
+ // Retrieve the thread index and corresponding operation buffer.
523
+ auto tid = state.thread_index ();
524
+ auto &n = this ->next [tid];
525
+
526
+ // Set up the random generators for index selection and decision making.
527
+ std::default_random_engine generator;
528
+ std::uniform_int_distribution<size_t > dist_offset (0 ,
529
+ this ->max_allocs - 1 );
530
+ std::uniform_real_distribution<double > dist_opt_type (0 , 1 );
531
+ generator.seed (0 );
532
+ auto sizeGenerator = this ->alloc_sizes [tid];
533
+
534
+ n.clear ();
535
+ std::vector<size_t > free;
536
+ std::vector<size_t > allocated;
537
+ free.reserve (this ->max_allocs );
538
+ // Initially, all indices are available.
539
+ for (size_t i = 0 ; i < this ->max_allocs ; i++) {
540
+ free.push_back (i);
541
+ }
542
+
543
+ // Total number of allocation/free operations to simulate.
544
+ int64_t operations_number =
545
+ state.max_iterations * this ->allocsPerIterations ;
546
+ for (int64_t j = 0 ; j < operations_number; j++) {
547
+ int64_t target_allocation;
548
+
549
+ // Determine the target number of allocations based on the progress of the iterations.
550
+ // In the first half of the iterations, the target allocation increases linearly.
551
+ // In the second half, it decreases linearly.
552
+ if (j < operations_number / 2 ) {
553
+ target_allocation = 2 * static_cast <int64_t >(this ->max_allocs ) *
554
+ j / operations_number;
555
+ } else {
556
+ target_allocation = -2 *
557
+ static_cast <int64_t >(this ->max_allocs ) *
558
+ j / operations_number +
559
+ 2 * static_cast <int64_t >(this ->max_allocs );
560
+ }
561
+
562
+ // x represents the gap between the target and current allocations.
563
+ auto x = static_cast <double >(target_allocation -
564
+ static_cast <double >(allocated.size ()));
565
+
566
+ // Use a normal CDF with high sigma so that when x is positive,
567
+ // we are slightly more likely to allocate,
568
+ // and when x is negative, slightly more likely to free memory,
569
+ // keeping the overall change gradual.
570
+
571
+ const double sigma = 1000 ;
572
+ auto cdf = normalCDF (x, sigma);
573
+
574
+ // Decide whether to allocate or free:
575
+ // - If no allocations exist, allocation is forced.
576
+ // - If there is maximum number of allocation, free is forced
577
+ // - Otherwise, Based on the computed probability, choose whether to allocate or free
578
+ if (allocated.empty () ||
579
+ (!free.empty () && cdf > dist_opt_type (generator))) {
580
+ // Allocation
581
+ std::swap (free[dist_offset (generator) % free.size ()],
582
+ free.back ());
583
+ auto offset = free.back ();
584
+ free.pop_back ();
585
+ n.push_back ({true , offset, sizeGenerator.nextSize ()});
586
+ allocated.push_back (offset);
587
+ } else {
588
+ // Free
589
+ std::swap (allocated[dist_offset (generator) % allocated.size ()],
590
+ allocated.back ());
591
+ auto offset = allocated.back ();
592
+ allocated.pop_back ();
593
+ n.push_back ({false , offset, 0 });
594
+ free.push_back (offset);
595
+ }
596
+ }
597
+
598
+ this ->next_iter [tid] =
599
+ std::make_unique<std::vector<next_alloc_data>::const_iterator>(
600
+ n.cbegin ());
601
+ }
602
+
603
+ virtual void prealloc (benchmark::State &state) {
604
+ auto tid = state.thread_index ();
605
+ auto &i = base::allocations[tid];
606
+ i.resize (base::max_allocs);
607
+ }
608
+ virtual std::string name () { return base::base::name () + " /peak_alloc" ; }
609
+
610
+ private:
611
+ // Function to calculate the CDF of a normal distribution
612
+ double normalCDF (double x, double sigma = 1.0 , double mu = 0.0 ) {
613
+ return 0.5 * (1 + std::erf ((x - mu) / (sigma * std::sqrt (2.0 ))));
614
+ }
615
+ };
0 commit comments