Skip to content

Commit 6b6ec4e

Browse files
authored
[reconfigurator] Planner: mark zones ready_for_cleanup when appropriate (#7588)
This PR implements two related planner changes: * When expunging a sled, all the zones are immediately marked as `Expunged { ready_for_cleanup: true, .. }`. * When there are expunged zones on an in-service sled, we'll consult the latest inventory collection and flip `ready_to_cleanup` to true if the zone is gone and the sled's zone config generation is >= the generation in which the zone was expunged. No downstream users of `ready_for_cleanup` are included in this PR; those will come in followups.
1 parent 06c17ba commit 6b6ec4e

11 files changed

+463
-109
lines changed

nexus/reconfigurator/planning/src/blueprint_builder/builder.rs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,18 @@ impl<'a> BlueprintBuilder<'a> {
828828
}
829829
}
830830

831+
pub fn current_sled_state(
832+
&self,
833+
sled_id: SledUuid,
834+
) -> Result<SledState, Error> {
835+
let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
836+
Error::Planner(anyhow!(
837+
"tried to get sled state for unknown sled {sled_id}"
838+
))
839+
})?;
840+
Ok(editor.state())
841+
}
842+
831843
/// Set the desired state of the given sled.
832844
pub fn set_sled_decommissioned(
833845
&mut self,
@@ -935,9 +947,16 @@ impl<'a> BlueprintBuilder<'a> {
935947
// Expunging a disk expunges any datasets and zones that depend on it,
936948
// so expunging all in-service disks should have also expunged all
937949
// datasets and zones. Double-check that that's true.
950+
let mut zones_ready_for_cleanup = Vec::new();
938951
for zone in editor.zones(BlueprintZoneDisposition::any) {
939952
match zone.disposition {
940-
BlueprintZoneDisposition::Expunged { .. } => (),
953+
BlueprintZoneDisposition::Expunged { .. } => {
954+
// Since this is a full sled expungement, we'll never see an
955+
// inventory collection indicating the zones are shut down,
956+
// nor do we need to: go ahead and mark any expunged zones
957+
// as ready for cleanup.
958+
zones_ready_for_cleanup.push(zone.id);
959+
}
941960
BlueprintZoneDisposition::InService => {
942961
return Err(Error::Planner(anyhow!(
943962
"expunged all disks but a zone \
@@ -957,6 +976,11 @@ impl<'a> BlueprintBuilder<'a> {
957976
}
958977
}
959978
}
979+
for zone_id in zones_ready_for_cleanup {
980+
editor
981+
.mark_expunged_zone_ready_for_cleanup(&zone_id)
982+
.map_err(|err| Error::SledEditError { sled_id, err })?;
983+
}
960984

961985
// If we didn't expunge anything, this sled was presumably expunged in a
962986
// prior planning run. Only note the operation if we did anything.
@@ -975,6 +999,23 @@ impl<'a> BlueprintBuilder<'a> {
975999
Ok(())
9761000
}
9771001

1002+
/// Mark expunged zones as ready for cleanup.
1003+
pub(crate) fn mark_expunged_zones_ready_for_cleanup(
1004+
&mut self,
1005+
sled_id: SledUuid,
1006+
zone_ids: &[OmicronZoneUuid],
1007+
) -> Result<(), Error> {
1008+
let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
1009+
Error::Planner(anyhow!("tried to expunge unknown sled {sled_id}"))
1010+
})?;
1011+
for zone_id in zone_ids {
1012+
editor
1013+
.mark_expunged_zone_ready_for_cleanup(zone_id)
1014+
.map_err(|err| Error::SledEditError { sled_id, err })?;
1015+
}
1016+
Ok(())
1017+
}
1018+
9781019
pub(crate) fn expunge_all_multinode_clickhouse(
9791020
&mut self,
9801021
sled_id: SledUuid,

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ impl SledEditor {
180180
}
181181
}
182182

183+
pub fn state(&self) -> SledState {
184+
match &self.0 {
185+
InnerSledEditor::Active(_) => SledState::Active,
186+
InnerSledEditor::Decommissioned(edited_sled) => edited_sled.state,
187+
}
188+
}
189+
183190
pub fn edit_counts(&self) -> SledEditCounts {
184191
match &self.0 {
185192
InnerSledEditor::Active(editor) => editor.edit_counts(),
@@ -336,6 +343,13 @@ impl SledEditor {
336343
self.as_active_mut()?.expunge_zone(zone_id)
337344
}
338345

346+
pub fn mark_expunged_zone_ready_for_cleanup(
347+
&mut self,
348+
zone_id: &OmicronZoneUuid,
349+
) -> Result<bool, SledEditError> {
350+
self.as_active_mut()?.mark_expunged_zone_ready_for_cleanup(zone_id)
351+
}
352+
339353
/// Backwards compatibility / test helper: If we're given a blueprint that
340354
/// has zones but wasn't created via `SledEditor`, it might not have
341355
/// datasets for all its zones. This method backfills them.
@@ -601,6 +615,15 @@ impl ActiveSledEditor {
601615
Ok(did_expunge)
602616
}
603617

618+
pub fn mark_expunged_zone_ready_for_cleanup(
619+
&mut self,
620+
zone_id: &OmicronZoneUuid,
621+
) -> Result<bool, SledEditError> {
622+
let did_mark_ready =
623+
self.zones.mark_expunged_zone_ready_for_cleanup(zone_id)?;
624+
Ok(did_mark_ready)
625+
}
626+
604627
/// Backwards compatibility / test helper: If we're given a blueprint that
605628
/// has zones but wasn't created via `SledEditor`, it might not have
606629
/// datasets for all its zones. This method backfills them.

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/zones.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ pub enum ZonesEditError {
2222
AddDuplicateZoneId { id: OmicronZoneUuid, kind1: ZoneKind, kind2: ZoneKind },
2323
#[error("tried to expunge nonexistent zone {id}")]
2424
ExpungeNonexistentZone { id: OmicronZoneUuid },
25+
#[error("tried to mark a nonexistent zone as ready for cleanup: {id}")]
26+
MarkNonexistentZoneReadyForCleanup { id: OmicronZoneUuid },
27+
#[error("tried to mark a non-expunged zone as ready for cleanup: {id}")]
28+
MarkNonExpungedZoneReadyForCleanup { id: OmicronZoneUuid },
2529
}
2630

2731
#[derive(Debug, thiserror::Error)]
@@ -138,6 +142,41 @@ impl ZonesEditor {
138142
Ok((did_expunge, config.into_ref()))
139143
}
140144

145+
/// Set an expunged zone's `ready_for_cleanup` flag to true.
146+
///
147+
/// Unlike most edit operations, this (alone) will not result in an
148+
/// increased generation when `finalize()` is called: this flag is produced
149+
/// and consumed inside the Reconfigurator system, and is not included in
150+
/// the generation-guarded config send to sled-agents.
151+
///
152+
/// # Errors
153+
///
154+
/// Fails if this zone ID does not exist or is not already in the expunged
155+
/// disposition.
156+
pub fn mark_expunged_zone_ready_for_cleanup(
157+
&mut self,
158+
zone_id: &OmicronZoneUuid,
159+
) -> Result<bool, ZonesEditError> {
160+
let mut config = self.zones.get_mut(zone_id).ok_or_else(|| {
161+
ZonesEditError::MarkNonexistentZoneReadyForCleanup { id: *zone_id }
162+
})?;
163+
164+
match &mut config.disposition {
165+
BlueprintZoneDisposition::InService => {
166+
Err(ZonesEditError::MarkNonExpungedZoneReadyForCleanup {
167+
id: *zone_id,
168+
})
169+
}
170+
BlueprintZoneDisposition::Expunged {
171+
ready_for_cleanup, ..
172+
} => {
173+
let did_mark_ready = !*ready_for_cleanup;
174+
*ready_for_cleanup = true;
175+
Ok(did_mark_ready)
176+
}
177+
}
178+
}
179+
141180
fn expunge_impl(
142181
config: &mut BlueprintZoneConfig,
143182
counts: &mut EditCounts,

0 commit comments

Comments
 (0)