Skip to content

Commit 793385a

Browse files
feat: fix graph copy stab/destab for multi-migration and dedup table (#5993)
The graph copy stabilization/destabilization algorithm basically does a BFS walk to first serialize the heap to stable memory, then after an upgrade a deserialization bringing back up the heap from stable memory. This BFS was initially designed to just start the BFS walk out of the stable actor record. However, later additions to the enhanced OP incremental GC added 2 GC-only roots: the `blob deduplication table` and the `migration functions list`. Without proper handling of the serialization/deserialization, these objects would be lost and the new actor version would work erroneously. This PR modifies the BFS algorithm to add the two objects as roots in the serialization/deserialization graph copy algorithm such that they are first saved properly to stable memory and then restored correctly into the heap. Other future GC roots can be treated similary. To verify the functionality works correctly, two tests were modified to trigger this behavior and check if the new upgraded actor works correctly. **Attention point:** This PR also fixes a corner-case. For enhanced multi-migration, which migration function (in the chain) is matched against the old actor type is only determined at runtime (see `desugar.ml`), depending on which migration function in the chain it's been brought at previously. The old code in `compile_enhanced.ml` was performing compatibility checks against the new actor `.pre`, which can be wrong for multi-migration in case there might be multiple steps missing. Originally, the code made a check against old actor and new actor `.pre` types early in the `start destabilization` code path. This cannot work with the new migration. The solution is to restore into `metadata` the old actor type from stable memory and let the checks happen at `ICStableRead` time (via `register_stable_type`, see `compile_enhanced.ml`). Previously, this metadata was wiped and `ICStableRead` was not performing any checks, just recovering the actor. This way, the checks happen correctly at runtime for both enhanced multi-migration and regular migration (or upgrade). --------- Co-authored-by: Claudio Russo <claudio@dfinity.org>
1 parent 895c20f commit 793385a

19 files changed

Lines changed: 342 additions & 62 deletions

rts/motoko-rts-tests/src/stabilization.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ use crate::{
1414
use motoko_rts::{
1515
memory::{alloc_array, Memory},
1616
stabilization::{
17-
deserialization::Deserialization, graph_copy::GraphCopy, serialization::Serialization,
17+
deserialization::Deserialization,
18+
graph_copy::GraphCopy,
19+
layout::StableValue,
20+
serialization::{Serialization, SerializationRoots},
1821
},
1922
types::{Value, Words, TAG_ARRAY_M},
2023
};
@@ -168,14 +171,25 @@ fn test_serialization_deserialization(random: &mut Rand32, max_objects: usize, s
168171

169172
fn serialize(old_stable_root: Value, stable_start: u64) -> u64 {
170173
let mut memory = TestMemory::new(Words(0));
171-
let mut serialization = Serialization::start(&mut memory, old_stable_root, stable_start);
174+
let roots = SerializationRoots {
175+
actor: old_stable_root,
176+
dedup_table: Value::from_raw(0),
177+
migrations_list: Value::from_raw(0),
178+
};
179+
let mut serialization = Serialization::start(&mut memory, roots, stable_start);
172180
serialization.copy_increment(&mut memory);
173181
assert!(serialization.is_completed());
174182
serialization.serialized_data_length()
175183
}
176184

177185
fn deserialize<M: Memory>(mem: &mut M, stable_start: u64, stable_size: u64) -> Value {
178-
let mut deserialization = Deserialization::start(mem, stable_start, stable_size);
186+
let mut deserialization = Deserialization::start(
187+
mem,
188+
stable_start,
189+
stable_size,
190+
StableValue::from_raw(0),
191+
StableValue::from_raw(0),
192+
);
179193
deserialization.copy_increment(mem);
180194
assert!(deserialization.is_completed());
181195
deserialization.get_stable_root()

rts/motoko-rts/src/persistence.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ use crate::{
1515
persistence::compatibility::memory_compatible,
1616
region::{
1717
LEGACY_VERSION_NO_STABLE_MEMORY, LEGACY_VERSION_REGIONS, LEGACY_VERSION_SOME_STABLE_MEMORY,
18-
VERSION_GRAPH_COPY_NO_REGIONS, VERSION_GRAPH_COPY_REGIONS, VERSION_STABLE_HEAP_NO_REGIONS,
19-
VERSION_STABLE_HEAP_REGIONS,
18+
VERSION_GRAPH_COPY_NO_REGIONS, VERSION_GRAPH_COPY_REGIONS,
19+
VERSION_GRAPH_COPY_V1_NO_REGIONS, VERSION_GRAPH_COPY_V1_REGIONS,
20+
VERSION_STABLE_HEAP_NO_REGIONS, VERSION_STABLE_HEAP_REGIONS,
2021
},
2122
rts_trap_with,
2223
stable_mem::read_persistence_version,
@@ -158,6 +159,8 @@ unsafe fn use_enhanced_orthogonal_persistence() -> bool {
158159
VERSION_STABLE_HEAP_NO_REGIONS | VERSION_STABLE_HEAP_REGIONS => true,
159160
VERSION_GRAPH_COPY_NO_REGIONS
160161
| VERSION_GRAPH_COPY_REGIONS
162+
| VERSION_GRAPH_COPY_V1_NO_REGIONS
163+
| VERSION_GRAPH_COPY_V1_REGIONS
161164
| LEGACY_VERSION_NO_STABLE_MEMORY
162165
| LEGACY_VERSION_SOME_STABLE_MEMORY
163166
| LEGACY_VERSION_REGIONS => false,
@@ -250,6 +253,14 @@ unsafe fn update_stable_type<M: Memory>(
250253
(*metadata).stable_type.assign(mem, &new_type);
251254
}
252255

256+
/// Restore the old stable type from graph-copy stabilization metadata into
257+
/// the freshly initialized persistent metadata so that `register_stable_type`
258+
/// can check compatibility when the migration chain runs after destabilization.
259+
pub unsafe fn restore_stable_type<M: Memory>(mem: &mut M, old_type: &TypeDescriptor) {
260+
let metadata = PersistentMetadata::get();
261+
(*metadata).stable_type.assign(mem, old_type);
262+
}
263+
253264
/// Register the stable actor type on canister initialization and upgrade.
254265
/// The type is stored in the persistent metadata memory for later retrieval on canister upgrades.
255266
/// On an upgrade, the memory compatibility between the new and existing stable type is checked.

rts/motoko-rts/src/region.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ pub(crate) const VERSION_GRAPH_COPY_NO_REGIONS: usize = 3;
1616
pub(crate) const VERSION_GRAPH_COPY_REGIONS: usize = 4;
1717
pub(crate) const VERSION_STABLE_HEAP_NO_REGIONS: usize = 5;
1818
pub(crate) const VERSION_STABLE_HEAP_REGIONS: usize = 6;
19+
// V1 graph-copy: adds a 16-byte extension block in front of the legacy 40-byte
20+
// last-page record carrying extra GC roots (dedup table, migrations list).
21+
pub(crate) const VERSION_GRAPH_COPY_V1_NO_REGIONS: usize = 7;
22+
pub(crate) const VERSION_GRAPH_COPY_V1_REGIONS: usize = 8;
1923

2024
const _: () = assert!(meta_data::size::PAGE_IN_BYTES == crate::stable_mem::PAGE_SIZE);
2125
const _: () = assert!(meta_data::size::PAGES_IN_BLOCK <= u8::MAX as u32);

rts/motoko-rts/src/stabilization/deserialization.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::{
66
gc::incremental::array_slicing::slice_array,
77
memory::Memory,
88
stabilization::deserialization::scan_stack::STACK_EMPTY,
9-
types::{FwdPtr, Tag, Value, TAG_ARRAY_SLICE_MIN, TAG_FWD_PTR},
9+
types::{FwdPtr, Tag, Value, NULL_POINTER, TAG_ARRAY_SLICE_MIN, TAG_FWD_PTR},
1010
visitor::visit_pointer_fields,
1111
};
1212

@@ -26,6 +26,9 @@ pub struct Deserialization {
2626
stable_root: Option<Value>,
2727
limit: ExecutionMonitor,
2828
clear_position: u64,
29+
/// Heap addresses for the helper GC roots.
30+
pub dedup_table_address: Value,
31+
pub migrations_list_address: Value,
2932
}
3033

3134
/// Helper type to pass serialization context instead of closures.
@@ -58,7 +61,13 @@ impl<'a, M: Memory> DeserializationContext<'a, M> {
5861
/// mechanism to avoid instruction limit exceeding.
5962
impl Deserialization {
6063
/// Start the deserialization, followed by a series of copy increments.
61-
pub fn start<M: Memory>(mem: &mut M, stable_start: u64, stable_size: u64) -> Deserialization {
64+
pub fn start<M: Memory>(
65+
mem: &mut M,
66+
stable_start: u64,
67+
stable_size: u64,
68+
dedup_table_address: StableValue,
69+
migrations_list_address: StableValue,
70+
) -> Deserialization {
6271
let from_space = StableMemoryAccess::open(stable_start, stable_size);
6372
let scan_stack = unsafe { ScanStack::new(mem) };
6473
let limit = ExecutionMonitor::new();
@@ -70,8 +79,18 @@ impl Deserialization {
7079
stable_root: None,
7180
limit,
7281
clear_position: stable_start,
82+
dedup_table_address: NULL_POINTER,
83+
migrations_list_address: NULL_POINTER,
7384
};
74-
deserialization.start(mem, StableValue::serialize(Value::from_ptr(0)));
85+
let _ = deserialization.start(mem, StableValue::serialize(Value::from_ptr(0)));
86+
// Load up the heap addresses of the GC helper roots.
87+
if dedup_table_address != StableValue::from_raw(0) {
88+
deserialization.dedup_table_address = deserialization.start(mem, dedup_table_address);
89+
}
90+
if migrations_list_address != StableValue::from_raw(0) {
91+
deserialization.migrations_list_address =
92+
deserialization.start(mem, migrations_list_address);
93+
}
7594
deserialization
7695
}
7796

rts/motoko-rts/src/stabilization/graph_copy.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ pub trait GraphCopy<S: Copy, T: Copy, P: Copy + Default> {
2323
/// copy_algorthm.copy_increment();
2424
/// }
2525
/// ```
26-
fn start<M: Memory>(&mut self, mem: &mut M, root: S) {
27-
self.evacuate(mem, root);
26+
fn start<M: Memory>(&mut self, mem: &mut M, root: S) -> T {
27+
self.evacuate(mem, root)
2828
}
2929

3030
/// Determine whether the scanning algorithm is completed,

rts/motoko-rts/src/stabilization/ic.rs

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
mod metadata;
1+
pub mod metadata;
22
mod performance;
33

44
use motoko_rts_macros::ic_mem_fn;
@@ -7,11 +7,13 @@ use crate::{
77
gc::incremental::{is_gc_stopped, resume_gc, stop_gc},
88
memory::Memory,
99
persistence::{
10-
compatibility::{memory_compatible, TypeDescriptor},
10+
compatibility::TypeDescriptor, get_dedup_table_ptr, get_migration_functions_ptr,
11+
restore_stable_type, set_dedup_table_ptr, set_migration_functions_ptr,
1112
set_upgrade_instructions,
1213
},
1314
rts_trap_with,
1415
stabilization::ic::metadata::StabilizationMetadata,
16+
stabilization::serialization::SerializationRoots,
1517
stable_mem::{self, moc_stable_mem_set_size, PAGE_SIZE},
1618
types::Value,
1719
};
@@ -25,8 +27,8 @@ struct StabilizationState {
2527
old_candid_data: Value,
2628
old_type_offsets: Value,
2729
completed: bool,
28-
serialization: Serialization,
29-
instruction_meter: InstructionMeter,
30+
pub serialization: Serialization,
31+
pub instruction_meter: InstructionMeter,
3032
}
3133

3234
impl StabilizationState {
@@ -72,7 +74,12 @@ pub unsafe fn start_graph_stabilization<M: Memory>(
7274
assert!(is_gc_stopped());
7375
let stable_memory_pages = stable_mem::size(); // Backup the virtual size.
7476
let serialized_data_start = stable_memory_pages * PAGE_SIZE;
75-
let serialization = Serialization::start(mem, stable_actor, serialized_data_start);
77+
let serialization_roots = SerializationRoots {
78+
actor: stable_actor,
79+
dedup_table: *get_dedup_table_ptr(),
80+
migrations_list: *get_migration_functions_ptr(),
81+
};
82+
let serialization = Serialization::start(mem, serialization_roots, serialized_data_start);
7683
STABILIZATION_STATE = Some(StabilizationState::new(
7784
serialization,
7885
old_candid_data,
@@ -126,7 +133,11 @@ unsafe fn write_metadata() {
126133
type_descriptor,
127134
};
128135
state.instruction_meter.stop();
129-
metadata.store(&mut state.instruction_meter);
136+
metadata.store(
137+
&mut state.instruction_meter,
138+
state.serialization.dedup_table_address,
139+
state.serialization.migrations_list_address,
140+
);
130141
}
131142

132143
struct DestabilizationState {
@@ -140,33 +151,17 @@ static mut DESTABILIZATION_STATE: Option<DestabilizationState> = None;
140151

141152
/// Starts the graph-copy-based destabilization process.
142153
/// This requires that the deserialization is subsequently run and completed.
143-
/// Also checks whether the new program version is compatible to the stored state by comparing the type
144-
/// tables of both the old and the new program version.
145-
/// The check is identical to enhanced orthogonal persistence, except that the metadata is obtained from
146-
/// stable memory and not the persistent main memory.
147-
/// The parameters encode the type table of the new program version to which that data is to be upgraded.
148-
/// `new_candid_data`: A blob encoding the Candid type as a table.
149-
/// `new_type_offsets`: A blob encoding the type offsets in the Candid type table.
150-
/// Type index 0 represents the stable actor object to be serialized.
151-
/// Traps if the stable state is incompatible with the new program version and the upgrade is not
152-
/// possible.
154+
/// The old type descriptor from stable memory is restored into `PersistentMetadata`
155+
/// so that `register_stable_type` can check compatibility when the migration chain
156+
/// runs after destabilization (inside `Persistence.load` / `ICStableRead`).
153157
#[ic_mem_fn(ic_only)]
154-
pub unsafe fn start_graph_destabilization<M: Memory>(
155-
mem: &mut M,
156-
new_candid_data: Value,
157-
new_type_offsets: Value,
158-
) {
158+
pub unsafe fn start_graph_destabilization<M: Memory>(mem: &mut M) {
159159
assert!(DESTABILIZATION_STATE.is_none());
160160

161161
let mut instruction_meter = InstructionMeter::new();
162162
instruction_meter.start();
163-
let mut new_type_descriptor = TypeDescriptor::new(new_candid_data, new_type_offsets);
164-
let (metadata, statistics) = StabilizationMetadata::load(mem);
165-
let mut old_type_descriptor = metadata.type_descriptor;
166-
if !memory_compatible(mem, &mut old_type_descriptor, &mut new_type_descriptor) {
167-
rts_trap_with("Memory-incompatible program upgrade");
168-
}
169-
// Restore the virtual size.
163+
let (metadata, last_page_record) = StabilizationMetadata::load(mem);
164+
restore_stable_type(mem, &metadata.type_descriptor);
170165
moc_stable_mem_set_size(metadata.serialized_data_start / PAGE_SIZE);
171166

172167
// Stop the GC until the incremental graph destabilization has been completed.
@@ -176,11 +171,13 @@ pub unsafe fn start_graph_destabilization<M: Memory>(
176171
mem,
177172
metadata.serialized_data_start,
178173
metadata.serialized_data_length,
174+
last_page_record.dedup_table_address,
175+
last_page_record.migrations_list_address,
179176
);
180177
instruction_meter.stop();
181178
DESTABILIZATION_STATE = Some(DestabilizationState {
182179
deserialization,
183-
stabilization_statistics: statistics,
180+
stabilization_statistics: last_page_record.statistics,
184181
completed: false,
185182
instruction_meter,
186183
});
@@ -214,6 +211,12 @@ pub unsafe fn graph_destabilization_increment<M: Memory>(mem: &mut M) -> bool {
214211
state.instruction_meter.stop();
215212
if state.deserialization.is_completed() {
216213
record_upgrade_costs();
214+
215+
// We need to put back in the metadata pointing to the
216+
// helper GC roots for the dedup table and migration list.
217+
set_dedup_table_ptr(mem, state.deserialization.dedup_table_address);
218+
set_migration_functions_ptr(mem, state.deserialization.migrations_list_address);
219+
217220
state.completed = true;
218221
memory_sanity_check(mem);
219222
}

0 commit comments

Comments
 (0)