mmtk
diff --git a/‎src/plan/global.rs
Lines changed: 28 additions & 20 deletions b/‎src/plan/global.rs
Lines changed: 28 additions & 20 deletions
diff --git a/‎src/util/alloc/allocator.rs
Lines changed: 75 additions & 7 deletions b/‎src/util/alloc/allocator.rs
Lines changed: 75 additions & 7 deletions
diff --git a/‎src/util/alloc/bumpallocator.rs
Lines changed: 36 additions & 64 deletions b/‎src/util/alloc/bumpallocator.rs
Lines changed: 36 additions & 64 deletions
@@ -406,7 +406,7 @@ pub struct BasePlan<VM: VMBinding> {
     pub scanned_stacks: AtomicUsize,
     pub mutator_iterator_lock: Mutex<()>,
     // A counter that keeps tracks of the number of bytes allocated since last stress test
-    pub allocation_bytes: AtomicUsize,
+    allocation_bytes: AtomicUsize,
     // Wrapper around analysis counters
     #[cfg(feature = "analysis")]
     pub analysis_manager: AnalysisManager<VM>,
@@ -754,33 +754,31 @@ impl<VM: VMBinding> BasePlan<VM> {
         is_internal_triggered
     }
 
-    pub fn increase_allocation_bytes_by(&self, size: usize) {
+    /// Increase the allocation bytes and return the current allocation bytes after increasing
+    pub fn increase_allocation_bytes_by(&self, size: usize) -> usize {
         let old_allocation_bytes = self.allocation_bytes.fetch_add(size, Ordering::SeqCst);
         trace!(
             "Stress GC: old_allocation_bytes = {}, size = {}, allocation_bytes = {}",
             old_allocation_bytes,
             size,
             self.allocation_bytes.load(Ordering::Relaxed),
         );
+        old_allocation_bytes + size
     }
 
-    #[inline]
-    pub(super) fn stress_test_gc_required(&self) -> bool {
-        let stress_factor = self.options.stress_factor;
-        if self.initialized.load(Ordering::SeqCst)
-            && (self.allocation_bytes.load(Ordering::SeqCst) > stress_factor)
-        {
-            trace!(
-                "Stress GC: allocation_bytes = {}, stress_factor = {}",
-                self.allocation_bytes.load(Ordering::Relaxed),
-                stress_factor
-            );
-            trace!("Doing stress GC");
-            self.allocation_bytes.store(0, Ordering::SeqCst);
-            true
-        } else {
-            false
-        }
+    /// Check if the options are set for stress GC. If either stress_factor or analysis_factor is set,
+    /// we should do stress GC.
+    pub fn is_stress_test_gc_enabled(&self) -> bool {
+        use crate::util::constants::DEFAULT_STRESS_FACTOR;
+        self.options.stress_factor != DEFAULT_STRESS_FACTOR
+            || self.options.analysis_factor != DEFAULT_STRESS_FACTOR
+    }
+
+    /// Check if we should do a stress GC now. If GC is initialized and the allocation bytes exceeds
+    /// the stress factor, we should do a stress GC.
+    pub fn should_do_stress_gc(&self) -> bool {
+        self.initialized.load(Ordering::SeqCst)
+            && (self.allocation_bytes.load(Ordering::SeqCst) > self.options.stress_factor)
     }
 
     pub(super) fn collection_required<P: Plan>(
@@ -789,7 +787,17 @@ impl<VM: VMBinding> BasePlan<VM> {
         space_full: bool,
         _space: &dyn Space<VM>,
     ) -> bool {
-        let stress_force_gc = self.stress_test_gc_required();
+        let stress_force_gc = self.should_do_stress_gc();
+        if stress_force_gc {
+            debug!(
+                "Stress GC: allocation_bytes = {}, stress_factor = {}",
+                self.allocation_bytes.load(Ordering::Relaxed),
+                self.options.stress_factor
+            );
+            debug!("Doing stress GC");
+            self.allocation_bytes.store(0, Ordering::SeqCst);
+        }
+
         debug!(
             "self.get_pages_reserved()={}, self.get_total_pages()={}",
             plan.get_pages_reserved(),
 
@@ -1,6 +1,4 @@
 use crate::util::address::Address;
-use crate::util::constants::DEFAULT_STRESS_FACTOR;
-
 use std::sync::atomic::Ordering;
 
 use crate::plan::Plan;
@@ -110,6 +108,10 @@ pub trait Allocator<VM: VMBinding>: Downcast {
     fn get_space(&self) -> &'static dyn Space<VM>;
     fn get_plan(&self) -> &'static dyn Plan<VM = VM>;
 
+    /// Does this allocator do thread local allocation? If an allocator does not do thread local allocation,
+    /// each allocation will go to slowpath and will have a check for GC polls.
+    fn does_thread_local_allocation(&self) -> bool;
+
     fn alloc(&mut self, size: usize, align: usize, offset: isize) -> Address;
 
     #[inline(never)]
@@ -121,17 +123,31 @@ pub trait Allocator<VM: VMBinding>: Downcast {
     fn alloc_slow_inline(&mut self, size: usize, align: usize, offset: isize) -> Address {
         let tls = self.get_tls();
         let plan = self.get_plan().base();
-        let stress_test = plan.options.stress_factor != DEFAULT_STRESS_FACTOR
-            || plan.options.analysis_factor != DEFAULT_STRESS_FACTOR;
+        let is_mutator = VM::VMActivePlan::is_mutator(tls);
+        let stress_test = plan.is_stress_test_gc_enabled();
 
         // Information about the previous collection.
         let mut emergency_collection = false;
         let mut previous_result_zero = false;
         loop {
             // Try to allocate using the slow path
-            let result = self.alloc_slow_once(size, align, offset);
+            let result = if is_mutator && stress_test {
+                // If we are doing stress GC, we invoke the special allow_slow_once call.
+                // allow_slow_once_stress_test() should make sure that every allocation goes
+                // to the slowpath (here) so we can check the allocation bytes and decide
+                // if we need to do a stress GC.
 
-            if !VM::VMActivePlan::is_mutator(tls) {
+                // If we should do a stress GC now, we tell the alloc_slow_once_stress_test()
+                // so they would avoid try any thread local allocation, and directly call
+                // global acquire and do a poll.
+                let need_poll = is_mutator && plan.should_do_stress_gc();
+                self.alloc_slow_once_stress_test(size, align, offset, need_poll)
+            } else {
+                // If we are not doing stress GC, just call the normal alloc_slow_once().
+                self.alloc_slow_once(size, align, offset)
+            };
+
+            if !is_mutator {
                 debug_assert!(!result.is_zero());
                 return result;
             }
@@ -150,7 +166,19 @@ pub trait Allocator<VM: VMBinding>: Downcast {
                 // called by acquire(). In order to not double count the allocation, we only
                 // update allocation bytes if the previous result wasn't 0x0.
                 if stress_test && self.get_plan().is_initialized() && !previous_result_zero {
-                    plan.increase_allocation_bytes_by(size);
+                    let _allocation_bytes = plan.increase_allocation_bytes_by(size);
+
+                    // This is the allocation hook for the analysis trait. If you want to call
+                    // an analysis counter specific allocation hook, then here is the place to do so
+                    #[cfg(feature = "analysis")]
+                    if _allocation_bytes > plan.options.analysis_factor {
+                        trace!(
+                            "Analysis: allocation_bytes = {} more than analysis_factor = {}",
+                            _allocation_bytes,
+                            plan.options.analysis_factor
+                        );
+                        plan.analysis_manager.alloc_hook(size, align, offset);
+                    }
                 }
 
                 return result;
@@ -196,7 +224,47 @@ pub trait Allocator<VM: VMBinding>: Downcast {
         }
     }
 
+    /// Single slow path allocation attempt. This is called by allocSlow.
     fn alloc_slow_once(&mut self, size: usize, align: usize, offset: isize) -> Address;
+
+    /// Single slowpath allocation attempt for stress test. When the stress factor is set (e.g. to N),
+    /// we would expect for every N bytes allocated, we will trigger a stress GC.
+    /// However, for allocators that do thread local allocation, they may allocate from their thread local buffer
+    /// which does not have a GC poll check, and they may even allocate with the JIT generated allocation
+    /// fastpath which is unaware of stress test GC. For both cases, we are not able to guarantee
+    /// a stress GC is triggered every N bytes. To solve this, when the stress factor is set, we
+    /// will call this method instead of the normal alloc_slow_once(). We expect the implementation of this slow allocation
+    /// will trick the fastpath so every allocation will fail in the fastpath, jump to the slow path and eventually
+    /// call this method again for the actual allocation.
+    ///
+    /// The actual implementation about how to trick the fastpath may vary. For example, our bump pointer allocator will
+    /// set the thread local buffer limit to the buffer size instead of the buffer end address. In this case, every fastpath
+    /// check (cursor + size < limit) will fail, and jump to this slowpath. In the slowpath, we still allocate from the thread
+    /// local buffer, and recompute the limit (remaining buffer size).
+    ///
+    /// If an allocator does not do thread local allocation (which returns false for does_thread_local_allocation()), it does
+    /// not need to override this method. The default implementation will simply call allow_slow_once() and it will work fine
+    /// for allocators that do not have thread local allocation.
+    ///
+    /// Arguments:
+    /// * `size`: the allocation size in bytes.
+    /// * `align`: the required alignment in bytes.
+    /// * `offset` the required offset in bytes.
+    /// * `need_poll`: if this is true, the implementation must poll for a GC, rather than attempting to allocate from the local buffer.
+    fn alloc_slow_once_stress_test(
+        &mut self,
+        size: usize,
+        align: usize,
+        offset: isize,
+        need_poll: bool,
+    ) -> Address {
+        // If an allocator does thread local allocation but does not override this method to provide a correct implementation,
+        // we will log a warning.
+        if self.does_thread_local_allocation() && need_poll {
+            warn!("{} does not support stress GC (An allocator that does thread local allocation needs to implement allow_slow_once_stress_test()).", std::any::type_name::<Self>());
+        }
+        self.alloc_slow_once(size, align, offset)
+    }
 }
 
 impl_downcast!(Allocator<VM> where VM: VMBinding);
@@ -1,6 +1,3 @@
-use crate::util::constants::DEFAULT_STRESS_FACTOR;
-use std::sync::atomic::Ordering;
-
 use super::allocator::{align_allocation_no_fill, fill_alignment_gap};
 use crate::util::Address;
 
@@ -10,7 +7,7 @@ use crate::plan::Plan;
 use crate::policy::space::Space;
 use crate::util::conversions::bytes_to_pages;
 use crate::util::opaque_pointer::*;
-use crate::vm::{ActivePlan, VMBinding};
+use crate::vm::VMBinding;
 
 const BYTES_IN_PAGE: usize = 1 << 12;
 const BLOCK_SIZE: usize = 8 * BYTES_IN_PAGE;
@@ -49,6 +46,9 @@ impl<VM: VMBinding> Allocator<VM> for BumpAllocator<VM> {
     fn get_plan(&self) -> &'static dyn Plan<VM = VM> {
         self.plan
     }
+    fn does_thread_local_allocation(&self) -> bool {
+        true
+    }
 
     fn alloc(&mut self, size: usize, align: usize, offset: isize) -> Address {
         trace!("alloc");
@@ -74,44 +74,25 @@ impl<VM: VMBinding> Allocator<VM> for BumpAllocator<VM> {
 
     fn alloc_slow_once(&mut self, size: usize, align: usize, offset: isize) -> Address {
         trace!("alloc_slow");
-        // TODO: internalLimit etc.
-        let base = &self.plan.base();
-
-        if base.options.stress_factor == DEFAULT_STRESS_FACTOR
-            && base.options.analysis_factor == DEFAULT_STRESS_FACTOR
-        {
-            self.acquire_block(size, align, offset, false)
-        } else {
-            self.alloc_slow_once_stress_test(size, align, offset)
-        }
-    }
-
-    fn get_tls(&self) -> VMThread {
-        self.tls
-    }
-}
-
-impl<VM: VMBinding> BumpAllocator<VM> {
-    pub fn new(
-        tls: VMThread,
-        space: &'static dyn Space<VM>,
-        plan: &'static dyn Plan<VM = VM>,
-    ) -> Self {
-        BumpAllocator {
-            tls,
-            cursor: unsafe { Address::zero() },
-            limit: unsafe { Address::zero() },
-            space,
-            plan,
-        }
+        self.acquire_block(size, align, offset, false)
     }
 
     // Slow path for allocation if the stress test flag has been enabled. It works
     // by manipulating the limit to be below the cursor always.
     // Performs three kinds of allocations: (i) if the hard limit has been met;
     // (ii) the bump pointer semantics from the fastpath; and (iii) if the stress
     // factor has been crossed.
-    fn alloc_slow_once_stress_test(&mut self, size: usize, align: usize, offset: isize) -> Address {
+    fn alloc_slow_once_stress_test(
+        &mut self,
+        size: usize,
+        align: usize,
+        offset: isize,
+        need_poll: bool,
+    ) -> Address {
+        if need_poll {
+            return self.acquire_block(size, align, offset, true);
+        }
+
         trace!("alloc_slow stress_test");
         let result = align_allocation_no_fill::<VM>(self.cursor, align, offset);
         let new_cursor = result + size;
@@ -122,35 +103,6 @@ impl<VM: VMBinding> BumpAllocator<VM> {
         if new_cursor > self.cursor + self.limit.as_usize() {
             self.acquire_block(size, align, offset, true)
         } else {
-            let base = &self.plan.base();
-            let is_mutator = VM::VMActivePlan::is_mutator(self.tls) && self.plan.is_initialized();
-
-            if is_mutator
-                && base.allocation_bytes.load(Ordering::SeqCst) > base.options.stress_factor
-            {
-                trace!(
-                    "Stress GC: allocation_bytes = {} more than stress_factor = {}",
-                    base.allocation_bytes.load(Ordering::Relaxed),
-                    base.options.stress_factor
-                );
-                return self.acquire_block(size, align, offset, true);
-            }
-
-            // This is the allocation hook for the analysis trait. If you want to call
-            // an analysis counter specific allocation hook, then here is the place to do so
-            #[cfg(feature = "analysis")]
-            if is_mutator
-                && base.allocation_bytes.load(Ordering::SeqCst) > base.options.analysis_factor
-            {
-                trace!(
-                    "Analysis: allocation_bytes = {} more than analysis_factor = {}",
-                    base.allocation_bytes.load(Ordering::Relaxed),
-                    base.options.analysis_factor
-                );
-
-                base.analysis_manager.alloc_hook(size, align, offset);
-            }
-
             fill_alignment_gap::<VM>(self.cursor, result);
             self.limit -= new_cursor - self.cursor;
             self.cursor = new_cursor;
@@ -165,6 +117,26 @@ impl<VM: VMBinding> BumpAllocator<VM> {
         }
     }
 
+    fn get_tls(&self) -> VMThread {
+        self.tls
+    }
+}
+
+impl<VM: VMBinding> BumpAllocator<VM> {
+    pub fn new(
+        tls: VMThread,
+        space: &'static dyn Space<VM>,
+        plan: &'static dyn Plan<VM = VM>,
+    ) -> Self {
+        BumpAllocator {
+            tls,
+            cursor: unsafe { Address::zero() },
+            limit: unsafe { Address::zero() },
+            space,
+            plan,
+        }
+    }
+
     #[inline]
     fn acquire_block(
         &mut self,