Skip to content

Commit a08d13d

Browse files
authored
Refactor stress test GC and support stress GC for Immix (#482)
* Refactor stress GC code: extract some code to Allocator, add methods to the Allocator trait to support stress GC. * Implement stress test for ImmixAllocator
1 parent 0ededb3 commit a08d13d

File tree

6 files changed

+294
-132
lines changed

6 files changed

+294
-132
lines changed

src/plan/global.rs

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ pub struct BasePlan<VM: VMBinding> {
406406
pub scanned_stacks: AtomicUsize,
407407
pub mutator_iterator_lock: Mutex<()>,
408408
// A counter that keeps tracks of the number of bytes allocated since last stress test
409-
pub allocation_bytes: AtomicUsize,
409+
allocation_bytes: AtomicUsize,
410410
// Wrapper around analysis counters
411411
#[cfg(feature = "analysis")]
412412
pub analysis_manager: AnalysisManager<VM>,
@@ -754,33 +754,31 @@ impl<VM: VMBinding> BasePlan<VM> {
754754
is_internal_triggered
755755
}
756756

757-
pub fn increase_allocation_bytes_by(&self, size: usize) {
757+
/// Increase the allocation bytes and return the current allocation bytes after increasing
758+
pub fn increase_allocation_bytes_by(&self, size: usize) -> usize {
758759
let old_allocation_bytes = self.allocation_bytes.fetch_add(size, Ordering::SeqCst);
759760
trace!(
760761
"Stress GC: old_allocation_bytes = {}, size = {}, allocation_bytes = {}",
761762
old_allocation_bytes,
762763
size,
763764
self.allocation_bytes.load(Ordering::Relaxed),
764765
);
766+
old_allocation_bytes + size
765767
}
766768

767-
#[inline]
768-
pub(super) fn stress_test_gc_required(&self) -> bool {
769-
let stress_factor = self.options.stress_factor;
770-
if self.initialized.load(Ordering::SeqCst)
771-
&& (self.allocation_bytes.load(Ordering::SeqCst) > stress_factor)
772-
{
773-
trace!(
774-
"Stress GC: allocation_bytes = {}, stress_factor = {}",
775-
self.allocation_bytes.load(Ordering::Relaxed),
776-
stress_factor
777-
);
778-
trace!("Doing stress GC");
779-
self.allocation_bytes.store(0, Ordering::SeqCst);
780-
true
781-
} else {
782-
false
783-
}
769+
/// Check if the options are set for stress GC. If either stress_factor or analysis_factor is set,
770+
/// we should do stress GC.
771+
pub fn is_stress_test_gc_enabled(&self) -> bool {
772+
use crate::util::constants::DEFAULT_STRESS_FACTOR;
773+
self.options.stress_factor != DEFAULT_STRESS_FACTOR
774+
|| self.options.analysis_factor != DEFAULT_STRESS_FACTOR
775+
}
776+
777+
/// Check if we should do a stress GC now. If GC is initialized and the allocation bytes exceeds
778+
/// the stress factor, we should do a stress GC.
779+
pub fn should_do_stress_gc(&self) -> bool {
780+
self.initialized.load(Ordering::SeqCst)
781+
&& (self.allocation_bytes.load(Ordering::SeqCst) > self.options.stress_factor)
784782
}
785783

786784
pub(super) fn collection_required<P: Plan>(
@@ -789,7 +787,17 @@ impl<VM: VMBinding> BasePlan<VM> {
789787
space_full: bool,
790788
_space: &dyn Space<VM>,
791789
) -> bool {
792-
let stress_force_gc = self.stress_test_gc_required();
790+
let stress_force_gc = self.should_do_stress_gc();
791+
if stress_force_gc {
792+
debug!(
793+
"Stress GC: allocation_bytes = {}, stress_factor = {}",
794+
self.allocation_bytes.load(Ordering::Relaxed),
795+
self.options.stress_factor
796+
);
797+
debug!("Doing stress GC");
798+
self.allocation_bytes.store(0, Ordering::SeqCst);
799+
}
800+
793801
debug!(
794802
"self.get_pages_reserved()={}, self.get_total_pages()={}",
795803
plan.get_pages_reserved(),

src/util/alloc/allocator.rs

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
use crate::util::address::Address;
2-
use crate::util::constants::DEFAULT_STRESS_FACTOR;
3-
42
use std::sync::atomic::Ordering;
53

64
use crate::plan::Plan;
@@ -110,6 +108,10 @@ pub trait Allocator<VM: VMBinding>: Downcast {
110108
fn get_space(&self) -> &'static dyn Space<VM>;
111109
fn get_plan(&self) -> &'static dyn Plan<VM = VM>;
112110

111+
/// Does this allocator do thread local allocation? If an allocator does not do thread local allocation,
112+
/// each allocation will go to slowpath and will have a check for GC polls.
113+
fn does_thread_local_allocation(&self) -> bool;
114+
113115
fn alloc(&mut self, size: usize, align: usize, offset: isize) -> Address;
114116

115117
#[inline(never)]
@@ -121,17 +123,31 @@ pub trait Allocator<VM: VMBinding>: Downcast {
121123
fn alloc_slow_inline(&mut self, size: usize, align: usize, offset: isize) -> Address {
122124
let tls = self.get_tls();
123125
let plan = self.get_plan().base();
124-
let stress_test = plan.options.stress_factor != DEFAULT_STRESS_FACTOR
125-
|| plan.options.analysis_factor != DEFAULT_STRESS_FACTOR;
126+
let is_mutator = VM::VMActivePlan::is_mutator(tls);
127+
let stress_test = plan.is_stress_test_gc_enabled();
126128

127129
// Information about the previous collection.
128130
let mut emergency_collection = false;
129131
let mut previous_result_zero = false;
130132
loop {
131133
// Try to allocate using the slow path
132-
let result = self.alloc_slow_once(size, align, offset);
134+
let result = if is_mutator && stress_test {
135+
// If we are doing stress GC, we invoke the special allow_slow_once call.
136+
// allow_slow_once_stress_test() should make sure that every allocation goes
137+
// to the slowpath (here) so we can check the allocation bytes and decide
138+
// if we need to do a stress GC.
133139

134-
if !VM::VMActivePlan::is_mutator(tls) {
140+
// If we should do a stress GC now, we tell the alloc_slow_once_stress_test()
141+
// so they would avoid try any thread local allocation, and directly call
142+
// global acquire and do a poll.
143+
let need_poll = is_mutator && plan.should_do_stress_gc();
144+
self.alloc_slow_once_stress_test(size, align, offset, need_poll)
145+
} else {
146+
// If we are not doing stress GC, just call the normal alloc_slow_once().
147+
self.alloc_slow_once(size, align, offset)
148+
};
149+
150+
if !is_mutator {
135151
debug_assert!(!result.is_zero());
136152
return result;
137153
}
@@ -150,7 +166,19 @@ pub trait Allocator<VM: VMBinding>: Downcast {
150166
// called by acquire(). In order to not double count the allocation, we only
151167
// update allocation bytes if the previous result wasn't 0x0.
152168
if stress_test && self.get_plan().is_initialized() && !previous_result_zero {
153-
plan.increase_allocation_bytes_by(size);
169+
let _allocation_bytes = plan.increase_allocation_bytes_by(size);
170+
171+
// This is the allocation hook for the analysis trait. If you want to call
172+
// an analysis counter specific allocation hook, then here is the place to do so
173+
#[cfg(feature = "analysis")]
174+
if _allocation_bytes > plan.options.analysis_factor {
175+
trace!(
176+
"Analysis: allocation_bytes = {} more than analysis_factor = {}",
177+
_allocation_bytes,
178+
plan.options.analysis_factor
179+
);
180+
plan.analysis_manager.alloc_hook(size, align, offset);
181+
}
154182
}
155183

156184
return result;
@@ -196,7 +224,47 @@ pub trait Allocator<VM: VMBinding>: Downcast {
196224
}
197225
}
198226

227+
/// Single slow path allocation attempt. This is called by allocSlow.
199228
fn alloc_slow_once(&mut self, size: usize, align: usize, offset: isize) -> Address;
229+
230+
/// Single slowpath allocation attempt for stress test. When the stress factor is set (e.g. to N),
231+
/// we would expect for every N bytes allocated, we will trigger a stress GC.
232+
/// However, for allocators that do thread local allocation, they may allocate from their thread local buffer
233+
/// which does not have a GC poll check, and they may even allocate with the JIT generated allocation
234+
/// fastpath which is unaware of stress test GC. For both cases, we are not able to guarantee
235+
/// a stress GC is triggered every N bytes. To solve this, when the stress factor is set, we
236+
/// will call this method instead of the normal alloc_slow_once(). We expect the implementation of this slow allocation
237+
/// will trick the fastpath so every allocation will fail in the fastpath, jump to the slow path and eventually
238+
/// call this method again for the actual allocation.
239+
///
240+
/// The actual implementation about how to trick the fastpath may vary. For example, our bump pointer allocator will
241+
/// set the thread local buffer limit to the buffer size instead of the buffer end address. In this case, every fastpath
242+
/// check (cursor + size < limit) will fail, and jump to this slowpath. In the slowpath, we still allocate from the thread
243+
/// local buffer, and recompute the limit (remaining buffer size).
244+
///
245+
/// If an allocator does not do thread local allocation (which returns false for does_thread_local_allocation()), it does
246+
/// not need to override this method. The default implementation will simply call allow_slow_once() and it will work fine
247+
/// for allocators that do not have thread local allocation.
248+
///
249+
/// Arguments:
250+
/// * `size`: the allocation size in bytes.
251+
/// * `align`: the required alignment in bytes.
252+
/// * `offset` the required offset in bytes.
253+
/// * `need_poll`: if this is true, the implementation must poll for a GC, rather than attempting to allocate from the local buffer.
254+
fn alloc_slow_once_stress_test(
255+
&mut self,
256+
size: usize,
257+
align: usize,
258+
offset: isize,
259+
need_poll: bool,
260+
) -> Address {
261+
// If an allocator does thread local allocation but does not override this method to provide a correct implementation,
262+
// we will log a warning.
263+
if self.does_thread_local_allocation() && need_poll {
264+
warn!("{} does not support stress GC (An allocator that does thread local allocation needs to implement allow_slow_once_stress_test()).", std::any::type_name::<Self>());
265+
}
266+
self.alloc_slow_once(size, align, offset)
267+
}
200268
}
201269

202270
impl_downcast!(Allocator<VM> where VM: VMBinding);

src/util/alloc/bumpallocator.rs

Lines changed: 36 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
use crate::util::constants::DEFAULT_STRESS_FACTOR;
2-
use std::sync::atomic::Ordering;
3-
41
use super::allocator::{align_allocation_no_fill, fill_alignment_gap};
52
use crate::util::Address;
63

@@ -10,7 +7,7 @@ use crate::plan::Plan;
107
use crate::policy::space::Space;
118
use crate::util::conversions::bytes_to_pages;
129
use crate::util::opaque_pointer::*;
13-
use crate::vm::{ActivePlan, VMBinding};
10+
use crate::vm::VMBinding;
1411

1512
const BYTES_IN_PAGE: usize = 1 << 12;
1613
const BLOCK_SIZE: usize = 8 * BYTES_IN_PAGE;
@@ -49,6 +46,9 @@ impl<VM: VMBinding> Allocator<VM> for BumpAllocator<VM> {
4946
fn get_plan(&self) -> &'static dyn Plan<VM = VM> {
5047
self.plan
5148
}
49+
fn does_thread_local_allocation(&self) -> bool {
50+
true
51+
}
5252

5353
fn alloc(&mut self, size: usize, align: usize, offset: isize) -> Address {
5454
trace!("alloc");
@@ -74,44 +74,25 @@ impl<VM: VMBinding> Allocator<VM> for BumpAllocator<VM> {
7474

7575
fn alloc_slow_once(&mut self, size: usize, align: usize, offset: isize) -> Address {
7676
trace!("alloc_slow");
77-
// TODO: internalLimit etc.
78-
let base = &self.plan.base();
79-
80-
if base.options.stress_factor == DEFAULT_STRESS_FACTOR
81-
&& base.options.analysis_factor == DEFAULT_STRESS_FACTOR
82-
{
83-
self.acquire_block(size, align, offset, false)
84-
} else {
85-
self.alloc_slow_once_stress_test(size, align, offset)
86-
}
87-
}
88-
89-
fn get_tls(&self) -> VMThread {
90-
self.tls
91-
}
92-
}
93-
94-
impl<VM: VMBinding> BumpAllocator<VM> {
95-
pub fn new(
96-
tls: VMThread,
97-
space: &'static dyn Space<VM>,
98-
plan: &'static dyn Plan<VM = VM>,
99-
) -> Self {
100-
BumpAllocator {
101-
tls,
102-
cursor: unsafe { Address::zero() },
103-
limit: unsafe { Address::zero() },
104-
space,
105-
plan,
106-
}
77+
self.acquire_block(size, align, offset, false)
10778
}
10879

10980
// Slow path for allocation if the stress test flag has been enabled. It works
11081
// by manipulating the limit to be below the cursor always.
11182
// Performs three kinds of allocations: (i) if the hard limit has been met;
11283
// (ii) the bump pointer semantics from the fastpath; and (iii) if the stress
11384
// factor has been crossed.
114-
fn alloc_slow_once_stress_test(&mut self, size: usize, align: usize, offset: isize) -> Address {
85+
fn alloc_slow_once_stress_test(
86+
&mut self,
87+
size: usize,
88+
align: usize,
89+
offset: isize,
90+
need_poll: bool,
91+
) -> Address {
92+
if need_poll {
93+
return self.acquire_block(size, align, offset, true);
94+
}
95+
11596
trace!("alloc_slow stress_test");
11697
let result = align_allocation_no_fill::<VM>(self.cursor, align, offset);
11798
let new_cursor = result + size;
@@ -122,35 +103,6 @@ impl<VM: VMBinding> BumpAllocator<VM> {
122103
if new_cursor > self.cursor + self.limit.as_usize() {
123104
self.acquire_block(size, align, offset, true)
124105
} else {
125-
let base = &self.plan.base();
126-
let is_mutator = VM::VMActivePlan::is_mutator(self.tls) && self.plan.is_initialized();
127-
128-
if is_mutator
129-
&& base.allocation_bytes.load(Ordering::SeqCst) > base.options.stress_factor
130-
{
131-
trace!(
132-
"Stress GC: allocation_bytes = {} more than stress_factor = {}",
133-
base.allocation_bytes.load(Ordering::Relaxed),
134-
base.options.stress_factor
135-
);
136-
return self.acquire_block(size, align, offset, true);
137-
}
138-
139-
// This is the allocation hook for the analysis trait. If you want to call
140-
// an analysis counter specific allocation hook, then here is the place to do so
141-
#[cfg(feature = "analysis")]
142-
if is_mutator
143-
&& base.allocation_bytes.load(Ordering::SeqCst) > base.options.analysis_factor
144-
{
145-
trace!(
146-
"Analysis: allocation_bytes = {} more than analysis_factor = {}",
147-
base.allocation_bytes.load(Ordering::Relaxed),
148-
base.options.analysis_factor
149-
);
150-
151-
base.analysis_manager.alloc_hook(size, align, offset);
152-
}
153-
154106
fill_alignment_gap::<VM>(self.cursor, result);
155107
self.limit -= new_cursor - self.cursor;
156108
self.cursor = new_cursor;
@@ -165,6 +117,26 @@ impl<VM: VMBinding> BumpAllocator<VM> {
165117
}
166118
}
167119

120+
fn get_tls(&self) -> VMThread {
121+
self.tls
122+
}
123+
}
124+
125+
impl<VM: VMBinding> BumpAllocator<VM> {
126+
pub fn new(
127+
tls: VMThread,
128+
space: &'static dyn Space<VM>,
129+
plan: &'static dyn Plan<VM = VM>,
130+
) -> Self {
131+
BumpAllocator {
132+
tls,
133+
cursor: unsafe { Address::zero() },
134+
limit: unsafe { Address::zero() },
135+
space,
136+
plan,
137+
}
138+
}
139+
168140
#[inline]
169141
fn acquire_block(
170142
&mut self,

0 commit comments

Comments
 (0)