Skip to content

Commit 45cdf31

Browse files
authored
Require ObjectReference to point inside object (#1195)
Require the raw address of `ObjectReference` to be within the address range of the object it refers to. The raw address is now used directly for side metadata access and SFT dispatching. This makes "in-object address" unnecessary, and we removed the concept of "in-object address" and related constants and methods. Methods which use the "in-object address" for SFT dispatching or side-metadata access used to have a `<VM: VMBinding>` type parameter. This PR removes that type parameter. Because `ObjectReference` is now both within an object an word-aligned, the algorithm for searching for VO bits from internal pointers is slightly simplified. The method `is_mmtk_object` now has undefined behavior for arguments that are zero or misaligned because they are obviously illegal addresses for `ObjectReference`, and the user should have filtered them out in the first place. Fixes: #1170
1 parent b3385b8 commit 45cdf31

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+447
-386
lines changed

benches/mock_bench/internal_pointer.rs

+2-8
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@ pub fn bench(c: &mut Criterion) {
4242
);
4343
let obj_end = addr + NORMAL_OBJECT_SIZE;
4444
_b.iter(|| {
45-
memory_manager::find_object_from_internal_pointer::<MockVM>(
46-
obj_end - 1,
47-
NORMAL_OBJECT_SIZE,
48-
);
45+
memory_manager::find_object_from_internal_pointer(obj_end - 1, NORMAL_OBJECT_SIZE);
4946
})
5047
}
5148
#[cfg(not(feature = "is_mmtk_object"))]
@@ -83,10 +80,7 @@ pub fn bench(c: &mut Criterion) {
8380
);
8481
let obj_end = addr + LARGE_OBJECT_SIZE;
8582
_b.iter(|| {
86-
memory_manager::find_object_from_internal_pointer::<MockVM>(
87-
obj_end - 1,
88-
LARGE_OBJECT_SIZE,
89-
);
83+
memory_manager::find_object_from_internal_pointer(obj_end - 1, LARGE_OBJECT_SIZE);
9084
})
9185
}
9286
#[cfg(not(feature = "is_mmtk_object"))]

benches/mock_bench/sft.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ pub fn bench(c: &mut Criterion) {
1212
let obj = MockVM::object_start_to_ref(addr);
1313

1414
c.bench_function("sft read", |b| {
15-
b.iter(|| memory_manager::is_in_mmtk_spaces::<MockVM>(black_box(obj)))
15+
b.iter(|| memory_manager::is_in_mmtk_spaces(black_box(obj)))
1616
});
1717
}

docs/dummyvm/src/api.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -142,23 +142,23 @@ pub extern "C" fn mmtk_total_bytes() -> usize {
142142

143143
#[no_mangle]
144144
pub extern "C" fn mmtk_is_live_object(object: ObjectReference) -> bool {
145-
memory_manager::is_live_object::<DummyVM>(object)
145+
memory_manager::is_live_object(object)
146146
}
147147

148148
#[no_mangle]
149149
pub extern "C" fn mmtk_will_never_move(object: ObjectReference) -> bool {
150-
!object.is_movable::<DummyVM>()
150+
!object.is_movable()
151151
}
152152

153153
#[cfg(feature = "is_mmtk_object")]
154154
#[no_mangle]
155155
pub extern "C" fn mmtk_is_mmtk_object(addr: Address) -> bool {
156-
memory_manager::is_mmtk_object(addr)
156+
memory_manager::is_mmtk_object(addr).is_some()
157157
}
158158

159159
#[no_mangle]
160160
pub extern "C" fn mmtk_is_in_mmtk_spaces(object: ObjectReference) -> bool {
161-
memory_manager::is_in_mmtk_spaces::<DummyVM>(object)
161+
memory_manager::is_in_mmtk_spaces(object)
162162
}
163163

164164
#[no_mangle]

docs/dummyvm/src/object_model.rs

+1-8
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@ use mmtk::vm::*;
66
pub struct VMObjectModel {}
77

88
/// This is the offset from the allocation result to the object reference for the object.
9-
/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start``, and
9+
/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start`, and
1010
/// remove this constant.
1111
pub const OBJECT_REF_OFFSET: usize = 0;
1212

13-
/// This is the offset from the object reference to an in-object address. The binding needs
14-
/// to guarantee the in-object address is inside the storage associated with the object.
15-
/// It has to be a constant offset. See `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`.
16-
pub const IN_OBJECT_ADDRESS_OFFSET: isize = 0;
17-
1813
// This is the offset from the object reference to the object header.
1914
// This value is used in `ref_to_header` where MMTk loads header metadata from.
2015
pub const OBJECT_HEADER_OFFSET: usize = 0;
@@ -86,8 +81,6 @@ impl ObjectModel<DummyVM> for VMObjectModel {
8681
object.to_raw_address().sub(OBJECT_HEADER_OFFSET)
8782
}
8883

89-
const IN_OBJECT_ADDRESS_OFFSET: isize = IN_OBJECT_ADDRESS_OFFSET;
90-
9184
fn dump_object(_object: ObjectReference) {
9285
unimplemented!()
9386
}

docs/userguide/src/migration/prefix.md

+48
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,54 @@ Notes for the mmtk-core developers:
3030

3131
<!-- Insert new versions here -->
3232

33+
## 0.28.0
34+
35+
### `ObjectReference` must point inside an object
36+
37+
```admonish tldr
38+
`ObjectReference` is now required to be an address within an object. The concept of "in-object
39+
address" and related methods are removed. Some methods which used to depend on the "in-object
40+
address" no longer need the `<VM>` type argument.
41+
```
42+
43+
API changes:
44+
45+
- struct `ObjectReference`
46+
+ Its "raw address" must be within an object now.
47+
+ The following methods which were used to access the in-object address are removed.
48+
* `from_address`
49+
* `to_address`
50+
* When accessing side metadata, the "raw address" should be used, instead.
51+
+ The following methods no longer have the `<VM>` type argument.
52+
* `get_forwarded_object`
53+
* `is_in_any_space`
54+
* `is_live`
55+
* `is_movable`
56+
* `is_reachable`
57+
- module `memory_manager`
58+
+ `is_mmtk_object`: It now requires the address parameter to be non-zero and word-aligned.
59+
* Otherwise it will not be a legal `ObjectReference` in the first place. The user should
60+
filter out such illegal values.
61+
+ The following functions no longer have the `<VM>` type argument.
62+
* `find_object_from_internal_pointer`
63+
* `is_in_mmtk_space`
64+
* `is_live_object`
65+
* `is_pinned`
66+
* `pin_object`
67+
* `unpin_object`
68+
- struct `Region`
69+
+ The following methods no longer have the `<VM>` type argument.
70+
* `containing`
71+
- trait `ObjectModel`
72+
+ `IN_OBJECT_ADDRESS_OFFSET`: removed because it is no longer needed.
73+
74+
See also:
75+
76+
- PR: <https://github.com/mmtk/mmtk-core/issues/1170>
77+
- Examples:
78+
+ https://github.com/mmtk/mmtk-openjdk/pull/286: a simple case
79+
+ https://github.com/mmtk/mmtk-jikesrvm/issues/178: a VM that needs much change for this
80+
3381
## 0.27.0
3482

3583
### `is_mmtk_object` returns `Option<ObjectReference>

docs/userguide/src/portingguide/howto/nogc.md

+44-10
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,39 @@ We recommend going through the [list of metadata specifications](https://docs.mm
9595

9696
#### `ObjectReference` vs `Address`
9797

98-
A key principle in MMTk is the distinction between [`ObjectReference`](https://docs.mmtk.io/api/mmtk/util/address/struct.ObjectReference.html) and [`Address`](https://docs.mmtk.io/api/mmtk/util/address/struct.Address.html). The idea is that very few operations are allowed on an `ObjectReference`. For example, MMTk does not allow address arithmetic on `ObjectReference`s. This allows us to preserve memory-safety, only performing unsafe operations when required, and gives us a cleaner and more flexible abstraction to work with as it can allow object handles or offsets etc. `Address`, on the other hand, represents an arbitrary machine address. You might be interested in reading the *Demystifying Magic: High-level Low-level Programming* paper[^3] which describes the above in more detail.
99-
100-
In MMTk, `ObjectReference` is a special address that represents an object. A binding may use tagged references, compressed pointers, etc.
101-
They need to deal with the encoding and the decoding in their [`Slot`](https://docs.mmtk.io/api/mmtk/vm/slot/trait.Slot.html) implementation,
102-
and always present plain `ObjectReference`s to MMTk. See [this test](https://github.com/mmtk/mmtk-core/blob/master/src/vm/tests/mock_tests/mock_test_slots.rs) for some `Slot` implementation examples.
103-
104-
[^3]: https://users.cecs.anu.edu.au/~steveb/pubs/papers/vmmagic-vee-2009.pdf
98+
A key principle in MMTk is the distinction between [`ObjectReference`](https://docs.mmtk.io/api/mmtk/util/address/struct.ObjectReference.html) and [`Address`](https://docs.mmtk.io/api/mmtk/util/address/struct.Address.html). The idea is that very few operations are allowed on an `ObjectReference`. For example, MMTk does not allow address arithmetic on `ObjectReference`s. This allows us to preserve memory-safety, only performing unsafe operations when required, and gives us a cleaner and more flexible abstraction to work with as it can allow object handles or offsets etc. `Address`, on the other hand, represents an arbitrary machine address. You might be interested in reading the [*Demystifying Magic: High-level Low-level Programming*][FBC09] paper which describes the above in more detail.
99+
100+
In MMTk, `ObjectReference` is a special address that represents an object. It is required to be
101+
within the address range of the object it refers to, and must be word-aligned. This address is used
102+
by MMTk to access side metadata, and find the space or regions (chunk, block, line, etc.) that
103+
contains the object. It must also be efficient to locate the object header (where in-header MMTk
104+
metadata are held) and the object's VM-specific metadata, such as type information, from a given
105+
`ObjectReference`. MMTk will need to access those information, either directly or indirectly via
106+
traits implemented by the binding, during tracing, which is performance-critical.
107+
108+
The address used as `ObjectReference` is nominated by the VM binding when an object is allocated (or
109+
moved by a moving GC, which we can ignore for now when supporting NoGC). VMs usually have their own
110+
concepts of "object reference" which refer to objects. Some of them, including OpenJDK and CRuby,
111+
uses addresses to the object (the starting address or at an offset within the object) to refer to an
112+
object. Such VMs can directly use their "object reference" for the address of MMTk's
113+
`ObjectReference`.
114+
115+
Some VMs, such as JikesRVM, refers to an object by an address at a constant offset after the header,
116+
and can be outside the object. This does not satisfy the requirement of MMTk's `ObjectReference`,
117+
and the VM binding needs to make a clear distinction between the VM-level object reference and
118+
MMTk's `ObjectReference` type. A detailed example for supporting such a VM can be found
119+
[here][jikesrvm-objref].
120+
121+
Other VMs may use tagged references, compressed pointers, etc. They need to convert them to plain
122+
addresses to be used as MMTk's `ObjectReference`. Specifically, if the VM use such representations
123+
in object fields, the VM binding can deal with the encoding and the decoding in its
124+
[`Slot`][slot-trait] implementation, and always present plain `ObjectReference`s to MMTk. See [this
125+
test] for some `Slot` implementation examples.
126+
127+
[FBC09]: https://users.cecs.anu.edu.au/~steveb/pubs/papers/vmmagic-vee-2009.pdf
128+
[jikesrvm-objref]: https://github.com/mmtk/mmtk-jikesrvm/issues/178
129+
[slot-trait]: https://docs.mmtk.io/api/mmtk/vm/slot/trait.Slot.html
130+
[slot-test]: https://github.com/mmtk/mmtk-core/blob/master/src/vm/tests/mock_tests/mock_test_slots.rs
105131

106132
#### Miscellaneous configuration options
107133

@@ -261,7 +287,7 @@ void *mmtk_alloc(MmtkMutator mutator, size_t size, size_t align,
261287
* Set relevant object metadata
262288
*
263289
* @param mutator the mutator instance that is requesting the allocation
264-
* @param object the returned address of the allocated object
290+
* @param object the ObjectReference address chosen by the VM binding
265291
* @param size the size of the allocated object
266292
* @param allocator the allocation semantics to use for the allocation
267293
*/
@@ -274,13 +300,21 @@ In order to perform allocations, you will need to know what object alignment the
274300

275301
Now that MMTk is aware of each mutator thread, you have to change the runtime's allocation functions to call into MMTk to allocate using `mmtk_alloc` and set object metadata using `mmtk_post_alloc`. Note that there may be multiple allocation functions in the runtime so make sure that you edit them all!
276302

277-
You should use the saved `Mutator` pointer as the first parameter, the requested object size as the next parameter, and any alignment requirements the runtimes has as the third parameter.
303+
When calling `mmtk_alloc`, you should use the saved `Mutator` pointer as the first parameter, the requested object size as the next parameter, and any alignment requirements the runtimes has as the third parameter.
278304

279305
If your runtime requires a non-zero allocation offset (i.e. the alignment requirements are for the offset address, not the returned address) then you have to provide the required value as the fourth parameter. Note that you ***must*** also update the [`USE_ALLOCATION_OFFSET`](https://docs.mmtk.io/api/mmtk/vm/trait.VMBinding.html#associatedconstant.USE_ALLOCATION_OFFSET) constant in the `VMBinding` implementation if your runtime requires a non-zero allocation offset.
280306

281307
For the time-being, you can ignore the `allocator` parameter in both these functions and always pass a value of `0` which means MMTk will pick the default allocator for your collector (a bump pointer allocator in the case of NoGC).
282308

283-
Finally, you need to call `mmtk_post_alloc` with the object address returned from the previous `mmtk_alloc` call in order to initialize object metadata.
309+
The return value of `mmtk_alloc` is the starting address of the allocated object.
310+
311+
Then you should nominate a word-aligned address within the allocated bytes to be the
312+
`ObjectReference` used to refer to that object from now on. It doesn't have to be the starting
313+
address.
314+
315+
Finally, you need to call `mmtk_post_alloc` with your chosen `ObjectReference` in order to
316+
initialize MMTk-level object metadata, such as logging bits, valid-object (VO) bits, etc. As a VM
317+
binding developer, you can ignore the details for now.
284318

285319
**Note:** Currently MMTk assumes object sizes are multiples of the `MIN_ALIGNMENT`. If you encounter errors with alignment, a simple workaround would be to align the requested object size up to the `MIN_ALIGNMENT`. See [here](https://github.com/mmtk/mmtk-core/issues/730) for the tracking issue to fix this bug.
286320

src/memory_manager.rs

+27-23
Original file line numberDiff line numberDiff line change
@@ -579,16 +579,17 @@ pub fn handle_user_collection_request<VM: VMBinding>(mmtk: &MMTK<VM>, tls: VMMut
579579
///
580580
/// Arguments:
581581
/// * `object`: The object reference to query.
582-
pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
583-
object.is_live::<VM>()
582+
pub fn is_live_object(object: ObjectReference) -> bool {
583+
object.is_live()
584584
}
585585

586-
/// Check if `addr` is the address of an object reference to an MMTk object.
586+
/// Check if `addr` is the raw address of an object reference to an MMTk object.
587587
///
588588
/// Concretely:
589-
/// 1. Return true if `ObjectReference::from_raw_address(addr)` is a valid object reference to an
590-
/// object in any space in MMTk.
591-
/// 2. Return false otherwise.
589+
/// 1. Return `Some(object)` if `ObjectReference::from_raw_address(addr)` is a valid object
590+
/// reference to an object in any space in MMTk. `object` is the result of
591+
/// `ObjectReference::from_raw_address(addr)`.
592+
/// 2. Return `None` otherwise.
592593
///
593594
/// This function is useful for conservative root scanning. The VM can iterate through all words in
594595
/// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses
@@ -603,7 +604,9 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
603604
/// is present. See `crate::plan::global::BasePlan::vm_space`.
604605
///
605606
/// Argument:
606-
/// * `addr`: An arbitrary address.
607+
/// * `addr`: A non-zero word-aligned address. Because the raw address of an `ObjectReference`
608+
/// cannot be zero and must be word-aligned, the caller must filter out zero and misaligned
609+
/// addresses before calling this function. Otherwise the behavior is undefined.
607610
#[cfg(feature = "is_mmtk_object")]
608611
pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
609612
crate::util::is_mmtk_object::check_object_reference(addr)
@@ -613,12 +616,13 @@ pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
613616
/// This should be used instead of [`crate::memory_manager::is_mmtk_object`] for conservative stack scanning if
614617
/// the binding may have internal pointers on the stack.
615618
///
616-
/// Note that, we only consider pointers that point to addresses that are equal or greater than the in-object addresss
617-
/// (i.e. [`crate::util::ObjectReference::to_address()`] which is the same as `object_ref.to_raw_address() + ObjectModel::IN_OBJECT_ADDRESS_OFFSET`),
618-
/// and within the allocation as 'internal pointers'. To be precise, for each object ref `obj_ref`, internal pointers are in the range
619-
/// `[obj_ref + ObjectModel::IN_OBJECT_ADDRESS_OFFSET, ObjectModel::ref_to_object_start(obj_ref) + ObjectModel::get_current_size(obj_ref))`.
620-
/// If a binding defines internal pointers differently, calling this method is undefined behavior.
621-
/// If this is the case for you, please submit an issue or engage us on Zulip to discuss more.
619+
/// Note that, we only consider pointers that point to addresses that are equal to or greater than
620+
/// the raw addresss of the object's `ObjectReference`, and within the allocation as 'internal
621+
/// pointers'. To be precise, for each object ref `obj_ref`, internal pointers are in the range
622+
/// `[obj_ref.to_raw_address(), obj_ref.to_object_start() +
623+
/// ObjectModel::get_current_size(obj_ref))`. If a binding defines internal pointers differently,
624+
/// calling this method is undefined behavior. If this is the case for you, please submit an issue
625+
/// or engage us on Zulip to discuss more.
622626
///
623627
/// Note that, in the similar situation as [`crate::memory_manager::is_mmtk_object`], the binding should filter
624628
/// out obvious non-pointers (e.g. alignment check, bound check, etc) before calling this function to avoid unnecessary
@@ -633,7 +637,7 @@ pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
633637
/// * `internal_ptr`: The address to start searching. We search backwards from this address (including this address) to find the base reference.
634638
/// * `max_search_bytes`: The maximum number of bytes we may search for an object with VO bit set. `internal_ptr - max_search_bytes` is not included.
635639
#[cfg(feature = "is_mmtk_object")]
636-
pub fn find_object_from_internal_pointer<VM: VMBinding>(
640+
pub fn find_object_from_internal_pointer(
637641
internal_ptr: Address,
638642
max_search_bytes: usize,
639643
) -> Option<ObjectReference> {
@@ -655,7 +659,7 @@ pub fn find_object_from_internal_pointer<VM: VMBinding>(
655659
/// object for the VM in response to `memory_manager::alloc`, this function will return true; but
656660
/// if the VM directly called `malloc` to allocate the object, this function will return false.
657661
///
658-
/// If `is_mmtk_object(object.to_address())` returns true, `is_in_mmtk_spaces(object)` must also
662+
/// If `is_mmtk_object(object.to_raw_address())` returns true, `is_in_mmtk_spaces(object)` must also
659663
/// return true.
660664
///
661665
/// This function is useful if an object reference in the VM can be either a pointer into the MMTk
@@ -669,10 +673,10 @@ pub fn find_object_from_internal_pointer<VM: VMBinding>(
669673
///
670674
/// Arguments:
671675
/// * `object`: The object reference to query.
672-
pub fn is_in_mmtk_spaces<VM: VMBinding>(object: ObjectReference) -> bool {
676+
pub fn is_in_mmtk_spaces(object: ObjectReference) -> bool {
673677
use crate::mmtk::SFT_MAP;
674678
SFT_MAP
675-
.get_checked(object.to_address::<VM>())
679+
.get_checked(object.to_raw_address())
676680
.is_in_space(object)
677681
}
678682

@@ -766,10 +770,10 @@ pub fn add_finalizer<VM: VMBinding>(
766770
/// Arguments:
767771
/// * `object`: The object to be pinned
768772
#[cfg(feature = "object_pinning")]
769-
pub fn pin_object<VM: VMBinding>(object: ObjectReference) -> bool {
773+
pub fn pin_object(object: ObjectReference) -> bool {
770774
use crate::mmtk::SFT_MAP;
771775
SFT_MAP
772-
.get_checked(object.to_address::<VM>())
776+
.get_checked(object.to_raw_address())
773777
.pin_object(object)
774778
}
775779

@@ -780,10 +784,10 @@ pub fn pin_object<VM: VMBinding>(object: ObjectReference) -> bool {
780784
/// Arguments:
781785
/// * `object`: The object to be pinned
782786
#[cfg(feature = "object_pinning")]
783-
pub fn unpin_object<VM: VMBinding>(object: ObjectReference) -> bool {
787+
pub fn unpin_object(object: ObjectReference) -> bool {
784788
use crate::mmtk::SFT_MAP;
785789
SFT_MAP
786-
.get_checked(object.to_address::<VM>())
790+
.get_checked(object.to_raw_address())
787791
.unpin_object(object)
788792
}
789793

@@ -792,10 +796,10 @@ pub fn unpin_object<VM: VMBinding>(object: ObjectReference) -> bool {
792796
/// Arguments:
793797
/// * `object`: The object to be checked
794798
#[cfg(feature = "object_pinning")]
795-
pub fn is_pinned<VM: VMBinding>(object: ObjectReference) -> bool {
799+
pub fn is_pinned(object: ObjectReference) -> bool {
796800
use crate::mmtk::SFT_MAP;
797801
SFT_MAP
798-
.get_checked(object.to_address::<VM>())
802+
.get_checked(object.to_raw_address())
799803
.is_object_pinned(object)
800804
}
801805

src/plan/barriers.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ impl<S: BarrierSemantics> ObjectBarrier<S> {
182182
fn log_object(&self, object: ObjectReference) -> bool {
183183
#[cfg(all(feature = "vo_bit", feature = "extreme_assertions"))]
184184
debug_assert!(
185-
crate::util::metadata::vo_bit::is_vo_bit_set::<S::VM>(object),
185+
crate::util::metadata::vo_bit::is_vo_bit_set(object),
186186
"object bit is unset"
187187
);
188188
loop {

0 commit comments

Comments
 (0)