Skip to content

Commit 86a2ce1

Browse files
authored
Add early rejection of IOs if too many Downstairs are inactive (#1565)
This aborts the IO before passing it to the Downstairs, so it's not assigned a `JobId` or put into the `ActiveJobs` map. The most noticeable change is that writes are now fast-err'd instead of fast-acked if > 1 Downstairs is inactive.
1 parent df17b64 commit 86a2ce1

File tree

5 files changed

+305
-3
lines changed

5 files changed

+305
-3
lines changed

upstairs/src/client.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1891,7 +1891,19 @@ impl DownstairsClient {
18911891
self.client_delay_us.load(Ordering::Relaxed)
18921892
}
18931893

1894-
/// Looks up the region UUID
1894+
/// Checks whether the client is in a state where it can accept IO
1895+
pub(crate) fn is_accepting_io(&self) -> bool {
1896+
matches!(
1897+
self.state,
1898+
DsState::Active
1899+
| DsState::LiveRepair
1900+
| DsState::Connecting {
1901+
mode: ConnectionMode::Offline,
1902+
..
1903+
}
1904+
)
1905+
}
1906+
18951907
pub(crate) fn id(&self) -> Option<Uuid> {
18961908
self.region_uuid
18971909
}

upstairs/src/downstairs.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3326,6 +3326,13 @@ impl Downstairs {
33263326
}
33273327
}
33283328

3329+
/// Returns the number of clients that can accept IO
3330+
///
3331+
/// A client can accept IO if it is in the `Active` or `LiveRepair` state.
3332+
pub fn active_client_count(&self) -> usize {
3333+
self.clients.iter().filter(|c| c.is_accepting_io()).count()
3334+
}
3335+
33293336
/// Wrapper for marking a single job as done from the given client
33303337
///
33313338
/// This can be used to test handling of job acks, etc

upstairs/src/dummy_downstairs_tests.rs

Lines changed: 252 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use crate::guest::Guest;
1212
use crate::up_main;
1313
use crate::BlockIO;
1414
use crate::Buffer;
15+
use crate::ClientFaultReason;
16+
use crate::ClientStopReason;
1517
use crate::ConnectionMode;
1618
use crate::CrucibleError;
1719
use crate::DsState;
@@ -33,6 +35,7 @@ use crucible_protocol::JobId;
3335
use crucible_protocol::Message;
3436
use crucible_protocol::ReadBlockContext;
3537
use crucible_protocol::ReadResponseHeader;
38+
use crucible_protocol::SnapshotDetails;
3639
use crucible_protocol::WriteHeader;
3740

3841
use bytes::BytesMut;
@@ -289,6 +292,35 @@ impl DownstairsHandle {
289292
}
290293
}
291294

295+
/// Awaits a `Message::Flush` and sends a `FlushAck` with an `IoError`
296+
///
297+
/// Returns the flush number for further checks.
298+
///
299+
/// # Panics
300+
/// If a non-flush message arrives
301+
pub async fn err_flush(&mut self) -> u64 {
302+
match self.recv().await.unwrap() {
303+
Message::Flush {
304+
job_id,
305+
flush_number,
306+
upstairs_id,
307+
..
308+
} => {
309+
self.send(Message::FlushAck {
310+
upstairs_id,
311+
session_id: self.upstairs_session_id.unwrap(),
312+
job_id,
313+
result: Err(CrucibleError::IoError("oh no".to_string())),
314+
})
315+
.unwrap();
316+
flush_number
317+
}
318+
m => {
319+
panic!("saw non flush {m:?}");
320+
}
321+
}
322+
}
323+
292324
/// Awaits a `Message::Write { .. }` and sends a `WriteAck`
293325
///
294326
/// Returns the job ID for further checks.
@@ -311,6 +343,23 @@ impl DownstairsHandle {
311343
}
312344
}
313345

346+
/// Awaits a `Message::Write` and sends a `WriteAck` with `IOError`
347+
pub async fn err_write(&mut self) -> JobId {
348+
match self.recv().await.unwrap() {
349+
Message::Write { header, .. } => {
350+
self.send(Message::WriteAck {
351+
upstairs_id: header.upstairs_id,
352+
session_id: self.upstairs_session_id.unwrap(),
353+
job_id: header.job_id,
354+
result: Err(CrucibleError::IoError("oh no".to_string())),
355+
})
356+
.unwrap();
357+
header.job_id
358+
}
359+
m => panic!("saw non write: {m:?}"),
360+
}
361+
}
362+
314363
/// Awaits a `Message::Barrier { .. }` and sends a `BarrierAck`
315364
///
316365
/// Returns the job ID for further checks.
@@ -358,7 +407,7 @@ impl DownstairsHandle {
358407
job_id,
359408
blocks: Ok(vec![block]),
360409
},
361-
data: data.clone(),
410+
data,
362411
})
363412
.unwrap();
364413
job_id
@@ -811,7 +860,7 @@ async fn run_live_repair(mut harness: TestHarness) {
811860
job_id,
812861
blocks: Ok(vec![block]),
813862
},
814-
data: data.clone(),
863+
data,
815864
}) {
816865
Ok(()) => panic!("DS1 should be disconnected"),
817866
Err(e) => {
@@ -3055,3 +3104,204 @@ async fn test_bytes_based_barrier() {
30553104
harness.ds2.ack_flush().await;
30563105
harness.ds3.ack_flush().await;
30573106
}
3107+
3108+
fn assert_faulted(s: &DsState) {
3109+
match s {
3110+
DsState::Stopping(ClientStopReason::Fault(
3111+
ClientFaultReason::RequestedFault,
3112+
))
3113+
| DsState::Connecting {
3114+
mode: ConnectionMode::Faulted,
3115+
..
3116+
} => (),
3117+
_ => panic!("invalid state: expected faulted, got {s:?}"),
3118+
}
3119+
}
3120+
3121+
/// Test for early rejection of writes if > 1 Downstairs is unavailable
3122+
#[tokio::test]
3123+
async fn fast_write_rejection() {
3124+
let mut harness = TestHarness::new().await;
3125+
3126+
let write_buf = BytesMut::from(vec![1; 4096].as_slice());
3127+
harness
3128+
.guest
3129+
.write(BlockIndex(0), write_buf.clone())
3130+
.await
3131+
.unwrap();
3132+
3133+
harness.ds1().err_write().await;
3134+
harness.ds2.ack_write().await;
3135+
harness.ds3.ack_write().await;
3136+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3137+
let ds = harness.guest.downstairs_state().await.unwrap();
3138+
assert_faulted(&ds[ClientId::new(0)]);
3139+
assert_eq!(ds[ClientId::new(1)], DsState::Active);
3140+
assert_eq!(ds[ClientId::new(2)], DsState::Active);
3141+
3142+
// Send a second write, which should still work (because we have 2/3 ds)
3143+
harness
3144+
.guest
3145+
.write(BlockIndex(0), write_buf.clone())
3146+
.await
3147+
.unwrap();
3148+
harness.ds2.err_write().await;
3149+
harness.ds3.ack_write().await;
3150+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3151+
let ds = harness.guest.downstairs_state().await.unwrap();
3152+
assert_faulted(&ds[ClientId::new(0)]);
3153+
assert_faulted(&ds[ClientId::new(1)]);
3154+
assert_eq!(ds[ClientId::new(2)], DsState::Active);
3155+
3156+
// Subsequent writes should be rejected immediately
3157+
let r = harness.guest.write(BlockIndex(0), write_buf.clone()).await;
3158+
assert!(
3159+
matches!(r, Err(CrucibleError::IoError(..))),
3160+
"expected IoError, got {r:?}"
3161+
);
3162+
}
3163+
3164+
/// Make sure reads work with only 1x Downstairs
3165+
#[tokio::test]
3166+
async fn read_with_one_fault() {
3167+
let mut harness = TestHarness::new().await;
3168+
3169+
// Use a write to fault DS0 (XXX why do read errors not fault a DS?)
3170+
let write_buf = BytesMut::from(vec![1; 4096].as_slice());
3171+
harness
3172+
.guest
3173+
.write(BlockIndex(0), write_buf.clone())
3174+
.await
3175+
.unwrap();
3176+
harness.ds1().err_write().await;
3177+
harness.ds2.ack_write().await;
3178+
harness.ds3.ack_write().await;
3179+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3180+
let ds = harness.guest.downstairs_state().await.unwrap();
3181+
assert_faulted(&ds[ClientId::new(0)]);
3182+
assert_eq!(ds[ClientId::new(1)], DsState::Active);
3183+
assert_eq!(ds[ClientId::new(2)], DsState::Active);
3184+
3185+
// Check that reads still work
3186+
let h = harness.spawn(|guest| async move {
3187+
let mut buffer = Buffer::new(1, 512);
3188+
guest.read(BlockIndex(0), &mut buffer).await.unwrap();
3189+
});
3190+
harness.ds2.ack_read().await;
3191+
h.await.unwrap(); // we have > 1x reply, so the read will return
3192+
harness.ds3.ack_read().await;
3193+
3194+
// Take out DS2 next
3195+
harness
3196+
.guest
3197+
.write(BlockIndex(0), write_buf.clone())
3198+
.await
3199+
.unwrap();
3200+
harness.ds2.err_write().await;
3201+
harness.ds3.ack_write().await;
3202+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3203+
let ds = harness.guest.downstairs_state().await.unwrap();
3204+
assert_faulted(&ds[ClientId::new(0)]);
3205+
assert_faulted(&ds[ClientId::new(1)]);
3206+
assert_eq!(ds[ClientId::new(2)], DsState::Active);
3207+
3208+
// Reads still work with 1x Downstairs
3209+
let h = harness.spawn(|guest| async move {
3210+
let mut buffer = Buffer::new(1, 512);
3211+
guest.read(BlockIndex(0), &mut buffer).await.unwrap();
3212+
});
3213+
harness.ds3.ack_read().await;
3214+
h.await.unwrap(); // we have > 1x reply, so the read will return
3215+
}
3216+
3217+
/// Test early rejection of reads with 0x running Downstairs
3218+
#[tokio::test]
3219+
async fn fast_read_rejection() {
3220+
let mut harness = TestHarness::new().await;
3221+
3222+
// Use a write to fault DS0 (XXX why do read errors not fault a DS?)
3223+
let write_buf = BytesMut::from(vec![1; 4096].as_slice());
3224+
harness
3225+
.guest
3226+
.write(BlockIndex(0), write_buf.clone())
3227+
.await
3228+
.unwrap();
3229+
harness.ds1().err_write().await;
3230+
harness.ds2.err_write().await;
3231+
harness.ds3.err_write().await;
3232+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3233+
let ds = harness.guest.downstairs_state().await.unwrap();
3234+
assert_faulted(&ds[ClientId::new(0)]);
3235+
assert_faulted(&ds[ClientId::new(1)]);
3236+
assert_faulted(&ds[ClientId::new(2)]);
3237+
3238+
// Reads should return errors immediately
3239+
let mut buffer = Buffer::new(1, 512);
3240+
match harness.guest.read(BlockIndex(0), &mut buffer).await {
3241+
Err(CrucibleError::IoError(s)) => {
3242+
assert!(s.contains("too many inactive clients"))
3243+
}
3244+
r => panic!("expected IoError, got {r:?}"),
3245+
}
3246+
}
3247+
3248+
/// Test for early rejection of flushes
3249+
#[tokio::test]
3250+
async fn fast_flush_rejection() {
3251+
let mut harness = TestHarness::new().await;
3252+
3253+
let h = harness.spawn(|guest| async move {
3254+
guest.flush(None).await.unwrap();
3255+
});
3256+
harness.ds1().err_flush().await;
3257+
harness.ds2.ack_flush().await;
3258+
harness.ds3.ack_flush().await;
3259+
h.await.unwrap();
3260+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3261+
let ds = harness.guest.downstairs_state().await.unwrap();
3262+
assert_faulted(&ds[ClientId::new(0)]);
3263+
assert_eq!(ds[ClientId::new(1)], DsState::Active);
3264+
assert_eq!(ds[ClientId::new(2)], DsState::Active);
3265+
3266+
// A flush with snapshot should fail immediately
3267+
match harness
3268+
.guest
3269+
.flush(Some(SnapshotDetails {
3270+
snapshot_name: "hiiiii".to_string(),
3271+
}))
3272+
.await
3273+
{
3274+
Err(CrucibleError::IoError(s)) => {
3275+
assert!(s.contains("too many inactive clients"))
3276+
}
3277+
r => panic!("expected IoError, got {r:?}"),
3278+
}
3279+
3280+
// A non-snapshot flush should still succeed
3281+
let h = harness.spawn(|guest| async move {
3282+
guest.flush(None).await.unwrap();
3283+
});
3284+
harness.ds2.ack_flush().await;
3285+
harness.ds3.ack_flush().await;
3286+
h.await.unwrap();
3287+
3288+
// Use a flush to take out another downstairs
3289+
let h = harness.spawn(|guest| async move { guest.flush(None).await });
3290+
harness.ds2.ack_flush().await;
3291+
harness.ds3.err_flush().await;
3292+
let r = h.await.unwrap();
3293+
assert!(r.is_err());
3294+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
3295+
let ds = harness.guest.downstairs_state().await.unwrap();
3296+
assert_faulted(&ds[ClientId::new(0)]);
3297+
assert_eq!(ds[ClientId::new(1)], DsState::Active);
3298+
assert_faulted(&ds[ClientId::new(2)]);
3299+
3300+
// Subsequent flushes should fail immediately
3301+
match harness.guest.flush(None).await {
3302+
Err(CrucibleError::IoError(s)) => {
3303+
assert!(s.contains("too many inactive clients"))
3304+
}
3305+
r => panic!("expected IoError, got {r:?}"),
3306+
}
3307+
}

upstairs/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,11 @@ impl DownstairsIO {
952952

953953
let bad_job = match &self.work {
954954
IOop::Read { .. } => wc.done == 0,
955+
// Flushes with snapshots must be good on all 3x Downstairs
956+
IOop::Flush {
957+
snapshot_details: Some(..),
958+
..
959+
} => wc.skipped + wc.error > 0,
955960
IOop::Write { .. }
956961
| IOop::WriteUnwritten { .. }
957962
| IOop::Flush { .. }

upstairs/src/upstairs.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,6 +1140,15 @@ impl Upstairs {
11401140
done.send_err(CrucibleError::UpstairsInactive);
11411141
return;
11421142
}
1143+
1144+
let n = self.downstairs.active_client_count();
1145+
let required = if snapshot_details.is_some() { 3 } else { 2 };
1146+
if n < required {
1147+
done.send_err(CrucibleError::IoError(format!(
1148+
"too many inactive clients: need {required}, got {n}"
1149+
)));
1150+
return;
1151+
}
11431152
self.submit_flush(Some(done), snapshot_details, Some(io_guard));
11441153
}
11451154
BlockOp::ReplaceDownstairs { id, old, new, done } => {
@@ -1351,6 +1360,17 @@ impl Upstairs {
13511360
return;
13521361
}
13531362

1363+
let n = self.downstairs.active_client_count();
1364+
if n < 1 {
1365+
res.send_err((
1366+
data,
1367+
CrucibleError::IoError(format!(
1368+
"too many inactive clients: need 1, got {n}"
1369+
)),
1370+
));
1371+
return;
1372+
}
1373+
13541374
/*
13551375
* Get the next ID for the guest work struct we will make at the
13561376
* end. This ID is also put into the IO struct we create that
@@ -1467,6 +1487,14 @@ impl Upstairs {
14671487
return None;
14681488
}
14691489

1490+
let n = self.downstairs.active_client_count();
1491+
if n < 2 {
1492+
res.send_err(CrucibleError::IoError(format!(
1493+
"too many inactive clients: need 2, got {n}"
1494+
)));
1495+
return None;
1496+
}
1497+
14701498
/*
14711499
* Verify IO is in range for our region
14721500
*/

0 commit comments

Comments
 (0)