Skip to content

Commit 25051ea

Browse files
authored
Heap traversal (#1174)
This PR adds a heap traversal API `MMTK::enumerate_objects` which enumerates all objects in the MMTk heap at the time of calling. We added `SideMetadataSpec::scan_non_zero_values` to support enumerating objects by scanning the VO bit metadata. It can also be used for scanning other side metadata when needed. Note, however, that it is inefficient (but should work correctly) if the metadata is not contiguous or the metadata has more than one bit per region. If there is any need for scanning such metadata, we need further refactoring.
1 parent 38b3fc3 commit 25051ea

File tree

26 files changed

+599
-4
lines changed

26 files changed

+599
-4
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ sysinfo = "0.30.9"
5555
[dev-dependencies]
5656
paste = "1.0.8"
5757
rand = "0.8.5"
58+
rand_chacha = "0.3.1"
5859
criterion = "0.4"
5960

6061
[build-dependencies]
+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//! Benchmarks for scanning side metadata for non-zero bits.
2+
3+
use criterion::Criterion;
4+
use mmtk::util::{
5+
constants::LOG_BITS_IN_WORD, test_private::scan_non_zero_bits_in_metadata_bytes, Address,
6+
};
7+
use rand::{seq::IteratorRandom, SeedableRng};
8+
use rand_chacha::ChaCha8Rng;
9+
10+
fn allocate_aligned(size: usize) -> Address {
11+
let ptr = unsafe {
12+
std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align(size, size).unwrap())
13+
};
14+
Address::from_mut_ptr(ptr)
15+
}
16+
17+
const BLOCK_BYTES: usize = 32768usize; // Match an Immix block size.
18+
19+
// Asssume one-bit-per-word metadata (matching VO bits).
20+
const BLOCK_META_BYTES: usize = BLOCK_BYTES >> LOG_BITS_IN_WORD;
21+
22+
/// Set this many distinct bits in the bitmap.
23+
const NUM_OBJECTS: usize = 200;
24+
25+
/// Get a deterministic seeded Rng.
26+
fn get_rng() -> ChaCha8Rng {
27+
// Create an Rng from a seed and an explicit Rng type.
28+
// Not secure at all, but completely deterministic and reproducible.
29+
// The following seed is read from /dev/random
30+
const SEED64: u64 = 0x4050cb1b5ab26c70;
31+
ChaCha8Rng::seed_from_u64(SEED64)
32+
}
33+
34+
/// A bitmap, with known location of each bit for assertion.
35+
struct PreparedBitmap {
36+
start: Address,
37+
end: Address,
38+
set_bits: Vec<(Address, u8)>,
39+
}
40+
41+
/// Make a bitmap of the desired size and set bits.
42+
fn make_standard_bitmap() -> PreparedBitmap {
43+
let start = allocate_aligned(BLOCK_META_BYTES);
44+
let end = start + BLOCK_META_BYTES;
45+
let mut rng = get_rng();
46+
47+
let mut set_bits = (0..(BLOCK_BYTES >> LOG_BITS_IN_WORD))
48+
.choose_multiple(&mut rng, NUM_OBJECTS)
49+
.iter()
50+
.map(|total_bit_offset| {
51+
let word_offset = total_bit_offset >> LOG_BITS_IN_WORD;
52+
let bit_offset = total_bit_offset & ((1 << LOG_BITS_IN_WORD) - 1);
53+
(start + (word_offset << LOG_BITS_IN_WORD), bit_offset as u8)
54+
})
55+
.collect::<Vec<_>>();
56+
57+
set_bits.sort();
58+
59+
for (addr, bit) in set_bits.iter() {
60+
let word = unsafe { addr.load::<usize>() };
61+
let new_word = word | (1 << bit);
62+
unsafe { addr.store::<usize>(new_word) };
63+
}
64+
65+
PreparedBitmap {
66+
start,
67+
end,
68+
set_bits,
69+
}
70+
}
71+
72+
pub fn bench(c: &mut Criterion) {
73+
c.bench_function("bscan_block", |b| {
74+
let bitmap = make_standard_bitmap();
75+
let mut holder: Vec<(Address, u8)> = Vec::with_capacity(NUM_OBJECTS);
76+
77+
b.iter(|| {
78+
holder.clear();
79+
scan_non_zero_bits_in_metadata_bytes(bitmap.start, bitmap.end, &mut |addr, shift| {
80+
holder.push((addr, shift));
81+
});
82+
});
83+
84+
assert_eq!(holder.len(), NUM_OBJECTS);
85+
assert_eq!(holder, bitmap.set_bits);
86+
});
87+
}
+2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
pub mod bscan;
12
pub mod bzero_bset;
23

34
pub use criterion::Criterion;
45

56
pub fn bench(c: &mut Criterion) {
7+
bscan::bench(c);
68
bzero_bset::bench(c);
79
}

src/mmtk.rs

+52
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ use crate::plan::Plan;
66
use crate::policy::sft_map::{create_sft_map, SFTMap};
77
use crate::scheduler::GCWorkScheduler;
88

9+
#[cfg(feature = "vo_bit")]
10+
use crate::util::address::ObjectReference;
911
#[cfg(feature = "analysis")]
1012
use crate::util::analysis::AnalysisManager;
1113
use crate::util::finalizable_processor::FinalizableProcessor;
@@ -467,4 +469,54 @@ impl<VM: VMBinding> MMTK<VM> {
467469
pub fn get_options(&self) -> &Options {
468470
&self.options
469471
}
472+
473+
/// Enumerate objects in all spaces in this MMTK instance.
474+
///
475+
/// The call-back function `f` is called for every object that has the valid object bit (VO
476+
/// bit), i.e. objects that are allocated in the heap of this MMTK instance, but has not been
477+
/// reclaimed, yet.
478+
///
479+
/// # Notes about object initialization and finalization
480+
///
481+
/// When this function visits an object, it only guarantees that its VO bit must have been set.
482+
/// It is not guaranteed if the object has been "fully initialized" in the sense of the
483+
/// programming language the VM is implementing. For example, the object header and the type
484+
/// information may not have been written.
485+
///
486+
/// It will also visit objects that have been "finalized" in the sense of the programming
487+
/// langauge the VM is implementing, as long as the object has not been reclaimed by the GC,
488+
/// yet. Be careful. If the object header is destroyed, it may not be safe to access such
489+
/// objects in the high-level language.
490+
///
491+
/// # Interaction with allocation and GC
492+
///
493+
/// This function does not mutate the heap. It is safe if multiple threads execute this
494+
/// function concurrently during mutator time.
495+
///
496+
/// It has *undefined behavior* if allocation or GC happens while this function is being
497+
/// executed. The VM binding must ensure no threads are allocating and GC does not start while
498+
/// executing this function. One way to do this is stopping all mutators before calling this
499+
/// function.
500+
///
501+
/// Some high-level languages may provide an API that allows the user to allocate objects and
502+
/// trigger GC while enumerating objects. One example is [`ObjectSpace::each_object`][os_eo] in
503+
/// Ruby. The VM binding may use the callback of this function to save all visited object
504+
/// references and let the user visit those references after this function returns. Make sure
505+
/// those saved references are in the root set or in an object that will live through GCs before
506+
/// the high-level language finishes visiting the saved object references.
507+
///
508+
/// [os_eo]: https://docs.ruby-lang.org/en/master/ObjectSpace.html#method-c-each_object
509+
#[cfg(feature = "vo_bit")]
510+
pub fn enumerate_objects<F>(&self, f: F)
511+
where
512+
F: FnMut(ObjectReference),
513+
{
514+
use crate::util::object_enum;
515+
516+
let mut enumerator = object_enum::ClosureObjectEnumerator::<_, VM>::new(f);
517+
let plan = self.get_plan();
518+
plan.for_each_space(&mut |space| {
519+
space.enumerate_objects(&mut enumerator);
520+
})
521+
}
470522
}

src/policy/copyspace.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ use crate::policy::sft::SFT;
66
use crate::policy::space::{CommonSpace, Space};
77
use crate::scheduler::GCWorker;
88
use crate::util::alloc::allocator::AllocatorContext;
9-
use crate::util::copy::*;
109
use crate::util::heap::{MonotonePageResource, PageResource};
1110
use crate::util::metadata::{extract_side_metadata, MetadataSpec};
11+
use crate::util::object_enum::ObjectEnumerator;
1212
use crate::util::object_forwarding;
13+
use crate::util::{copy::*, object_enum};
1314
use crate::util::{Address, ObjectReference};
1415
use crate::vm::*;
1516
use libc::{mprotect, PROT_EXEC, PROT_NONE, PROT_READ, PROT_WRITE};
@@ -133,6 +134,10 @@ impl<VM: VMBinding> Space<VM> for CopySpace<VM> {
133134
fn set_copy_for_sft_trace(&mut self, semantics: Option<CopySemantics>) {
134135
self.common.copy = semantics;
135136
}
137+
138+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
139+
object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr);
140+
}
136141
}
137142

138143
impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for CopySpace<VM> {

src/policy/immix/block.rs

+7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::util::metadata::side_metadata::{MetadataByteArrayRef, SideMetadataSpe
1010
use crate::util::metadata::vo_bit;
1111
#[cfg(feature = "object_pinning")]
1212
use crate::util::metadata::MetadataSpec;
13+
use crate::util::object_enum::BlockMayHaveObjects;
1314
use crate::util::Address;
1415
use crate::vm::*;
1516
use std::sync::atomic::Ordering;
@@ -86,6 +87,12 @@ impl Region for Block {
8687
}
8788
}
8889

90+
impl BlockMayHaveObjects for Block {
91+
fn may_have_objects(&self) -> bool {
92+
self.get_state() != BlockState::Unallocated
93+
}
94+
}
95+
8996
impl Block {
9097
/// Log pages in block
9198
pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize;

src/policy/immix/immixspace.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ use crate::util::metadata::side_metadata::SideMetadataSpec;
1717
#[cfg(feature = "vo_bit")]
1818
use crate::util::metadata::vo_bit;
1919
use crate::util::metadata::{self, MetadataSpec};
20+
use crate::util::object_enum::ObjectEnumerator;
2021
use crate::util::object_forwarding;
21-
use crate::util::{copy::*, epilogue};
22+
use crate::util::{copy::*, epilogue, object_enum};
2223
use crate::util::{Address, ObjectReference};
2324
use crate::vm::*;
2425
use crate::{
@@ -189,6 +190,10 @@ impl<VM: VMBinding> Space<VM> for ImmixSpace<VM> {
189190
fn set_copy_for_sft_trace(&mut self, _semantics: Option<CopySemantics>) {
190191
panic!("We do not use SFT to trace objects for Immix. set_copy_context() cannot be used.")
191192
}
193+
194+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
195+
object_enum::enumerate_blocks_from_chunk_map::<Block>(enumerator, &self.chunk_map);
196+
}
192197
}
193198

194199
impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for ImmixSpace<VM> {

src/policy/immortalspace.rs

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::util::address::Address;
66
use crate::util::heap::{MonotonePageResource, PageResource};
77
use crate::util::metadata::mark_bit::MarkState;
88

9+
use crate::util::object_enum::{self, ObjectEnumerator};
910
use crate::util::{metadata, ObjectReference};
1011

1112
use crate::plan::{ObjectQueue, VectorObjectQueue};
@@ -112,6 +113,10 @@ impl<VM: VMBinding> Space<VM> for ImmortalSpace<VM> {
112113
fn release_multiple_pages(&mut self, _start: Address) {
113114
panic!("immortalspace only releases pages enmasse")
114115
}
116+
117+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
118+
object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr);
119+
}
115120
}
116121

117122
use crate::scheduler::GCWorker;

src/policy/largeobjectspace.rs

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::policy::space::{CommonSpace, Space};
88
use crate::util::constants::BYTES_IN_PAGE;
99
use crate::util::heap::{FreeListPageResource, PageResource};
1010
use crate::util::metadata;
11+
use crate::util::object_enum::ObjectEnumerator;
1112
use crate::util::opaque_pointer::*;
1213
use crate::util::treadmill::TreadMill;
1314
use crate::util::{Address, ObjectReference};
@@ -175,6 +176,10 @@ impl<VM: VMBinding> Space<VM> for LargeObjectSpace<VM> {
175176
fn release_multiple_pages(&mut self, start: Address) {
176177
self.pr.release_pages(start);
177178
}
179+
180+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
181+
self.treadmill.enumerate_objects(enumerator);
182+
}
178183
}
179184

180185
use crate::scheduler::GCWorker;

src/policy/lockfreeimmortalspace.rs

+5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::util::heap::VMRequest;
1616
use crate::util::memory::MmapStrategy;
1717
use crate::util::metadata::side_metadata::SideMetadataContext;
1818
use crate::util::metadata::side_metadata::SideMetadataSanity;
19+
use crate::util::object_enum::ObjectEnumerator;
1920
use crate::util::opaque_pointer::*;
2021
use crate::util::ObjectReference;
2122
use crate::vm::VMBinding;
@@ -166,6 +167,10 @@ impl<VM: VMBinding> Space<VM> for LockFreeImmortalSpace<VM> {
166167
side_metadata_sanity_checker
167168
.verify_metadata_context(std::any::type_name::<Self>(), &self.metadata)
168169
}
170+
171+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
172+
enumerator.visit_address_range(self.start, self.start + self.total_bytes);
173+
}
169174
}
170175

171176
use crate::plan::{ObjectQueue, VectorObjectQueue};

src/policy/markcompactspace.rs

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::util::constants::LOG_BYTES_IN_WORD;
1111
use crate::util::copy::CopySemantics;
1212
use crate::util::heap::{MonotonePageResource, PageResource};
1313
use crate::util::metadata::{extract_side_metadata, vo_bit};
14+
use crate::util::object_enum::{self, ObjectEnumerator};
1415
use crate::util::{Address, ObjectReference};
1516
use crate::{vm::*, ObjectQueue};
1617
use atomic::Ordering;
@@ -131,6 +132,10 @@ impl<VM: VMBinding> Space<VM> for MarkCompactSpace<VM> {
131132
fn release_multiple_pages(&mut self, _start: Address) {
132133
panic!("markcompactspace only releases pages enmasse")
133134
}
135+
136+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
137+
object_enum::enumerate_blocks_from_monotonic_page_resource(enumerator, &self.pr);
138+
}
134139
}
135140

136141
impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for MarkCompactSpace<VM> {

src/policy/marksweepspace/malloc_ms/global.rs

+5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::util::metadata::side_metadata::{
1515
SideMetadataContext, SideMetadataSanity, SideMetadataSpec,
1616
};
1717
use crate::util::metadata::MetadataSpec;
18+
use crate::util::object_enum::ObjectEnumerator;
1819
use crate::util::opaque_pointer::*;
1920
use crate::util::Address;
2021
use crate::util::ObjectReference;
@@ -229,6 +230,10 @@ impl<VM: VMBinding> Space<VM> for MallocSpace<VM> {
229230
side_metadata_sanity_checker
230231
.verify_metadata_context(std::any::type_name::<Self>(), &self.metadata)
231232
}
233+
234+
fn enumerate_objects(&self, _enumerator: &mut dyn ObjectEnumerator) {
235+
unimplemented!()
236+
}
232237
}
233238

234239
use crate::scheduler::GCWorker;

src/policy/marksweepspace/native_ms/block.rs

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use super::MarkSweepSpace;
77
use crate::util::constants::LOG_BYTES_IN_PAGE;
88
use crate::util::heap::chunk_map::*;
99
use crate::util::linear_scan::Region;
10+
use crate::util::object_enum::BlockMayHaveObjects;
1011
use crate::vm::ObjectModel;
1112
use crate::{
1213
util::{
@@ -48,6 +49,12 @@ impl Region for Block {
4849
}
4950
}
5051

52+
impl BlockMayHaveObjects for Block {
53+
fn may_have_objects(&self) -> bool {
54+
self.get_state() != BlockState::Unallocated
55+
}
56+
}
57+
5158
impl Block {
5259
/// Log pages in block
5360
pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize;

src/policy/marksweepspace/native_ms/global.rs

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::{
1111
epilogue,
1212
heap::{BlockPageResource, PageResource},
1313
metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec},
14+
object_enum::{self, ObjectEnumerator},
1415
ObjectReference,
1516
},
1617
vm::{ActivePlan, VMBinding},
@@ -247,6 +248,10 @@ impl<VM: VMBinding> Space<VM> for MarkSweepSpace<VM> {
247248
fn release_multiple_pages(&mut self, _start: crate::util::Address) {
248249
todo!()
249250
}
251+
252+
fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator) {
253+
object_enum::enumerate_blocks_from_chunk_map::<Block>(enumerator, &self.chunk_map);
254+
}
250255
}
251256

252257
impl<VM: VMBinding> crate::policy::gc_work::PolicyTraceObject<VM> for MarkSweepSpace<VM> {

0 commit comments

Comments
 (0)