@@ -12,6 +12,8 @@ use crate::guest::Guest;
1212use crate :: up_main;
1313use crate :: BlockIO ;
1414use crate :: Buffer ;
15+ use crate :: ClientFaultReason ;
16+ use crate :: ClientStopReason ;
1517use crate :: ConnectionMode ;
1618use crate :: CrucibleError ;
1719use crate :: DsState ;
@@ -33,6 +35,7 @@ use crucible_protocol::JobId;
3335use crucible_protocol:: Message ;
3436use crucible_protocol:: ReadBlockContext ;
3537use crucible_protocol:: ReadResponseHeader ;
38+ use crucible_protocol:: SnapshotDetails ;
3639use crucible_protocol:: WriteHeader ;
3740
3841use bytes:: BytesMut ;
@@ -289,6 +292,35 @@ impl DownstairsHandle {
289292 }
290293 }
291294
295+ /// Awaits a `Message::Flush` and sends a `FlushAck` with an `IoError`
296+ ///
297+ /// Returns the flush number for further checks.
298+ ///
299+ /// # Panics
300+ /// If a non-flush message arrives
301+ pub async fn err_flush ( & mut self ) -> u64 {
302+ match self . recv ( ) . await . unwrap ( ) {
303+ Message :: Flush {
304+ job_id,
305+ flush_number,
306+ upstairs_id,
307+ ..
308+ } => {
309+ self . send ( Message :: FlushAck {
310+ upstairs_id,
311+ session_id : self . upstairs_session_id . unwrap ( ) ,
312+ job_id,
313+ result : Err ( CrucibleError :: IoError ( "oh no" . to_string ( ) ) ) ,
314+ } )
315+ . unwrap ( ) ;
316+ flush_number
317+ }
318+ m => {
319+ panic ! ( "saw non flush {m:?}" ) ;
320+ }
321+ }
322+ }
323+
292324 /// Awaits a `Message::Write { .. }` and sends a `WriteAck`
293325 ///
294326 /// Returns the job ID for further checks.
@@ -311,6 +343,23 @@ impl DownstairsHandle {
311343 }
312344 }
313345
346+ /// Awaits a `Message::Write` and sends a `WriteAck` with `IOError`
347+ pub async fn err_write ( & mut self ) -> JobId {
348+ match self . recv ( ) . await . unwrap ( ) {
349+ Message :: Write { header, .. } => {
350+ self . send ( Message :: WriteAck {
351+ upstairs_id : header. upstairs_id ,
352+ session_id : self . upstairs_session_id . unwrap ( ) ,
353+ job_id : header. job_id ,
354+ result : Err ( CrucibleError :: IoError ( "oh no" . to_string ( ) ) ) ,
355+ } )
356+ . unwrap ( ) ;
357+ header. job_id
358+ }
359+ m => panic ! ( "saw non write: {m:?}" ) ,
360+ }
361+ }
362+
314363 /// Awaits a `Message::Barrier { .. }` and sends a `BarrierAck`
315364 ///
316365 /// Returns the job ID for further checks.
@@ -358,7 +407,7 @@ impl DownstairsHandle {
358407 job_id,
359408 blocks : Ok ( vec ! [ block] ) ,
360409 } ,
361- data : data . clone ( ) ,
410+ data,
362411 } )
363412 . unwrap ( ) ;
364413 job_id
@@ -811,7 +860,7 @@ async fn run_live_repair(mut harness: TestHarness) {
811860 job_id,
812861 blocks : Ok ( vec ! [ block] ) ,
813862 } ,
814- data : data . clone ( ) ,
863+ data,
815864 } ) {
816865 Ok ( ( ) ) => panic ! ( "DS1 should be disconnected" ) ,
817866 Err ( e) => {
@@ -3055,3 +3104,204 @@ async fn test_bytes_based_barrier() {
30553104 harness. ds2 . ack_flush ( ) . await ;
30563105 harness. ds3 . ack_flush ( ) . await ;
30573106}
3107+
3108+ fn assert_faulted ( s : & DsState ) {
3109+ match s {
3110+ DsState :: Stopping ( ClientStopReason :: Fault (
3111+ ClientFaultReason :: RequestedFault ,
3112+ ) )
3113+ | DsState :: Connecting {
3114+ mode : ConnectionMode :: Faulted ,
3115+ ..
3116+ } => ( ) ,
3117+ _ => panic ! ( "invalid state: expected faulted, got {s:?}" ) ,
3118+ }
3119+ }
3120+
3121+ /// Test for early rejection of writes if > 1 Downstairs is unavailable
3122+ #[ tokio:: test]
3123+ async fn fast_write_rejection ( ) {
3124+ let mut harness = TestHarness :: new ( ) . await ;
3125+
3126+ let write_buf = BytesMut :: from ( vec ! [ 1 ; 4096 ] . as_slice ( ) ) ;
3127+ harness
3128+ . guest
3129+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3130+ . await
3131+ . unwrap ( ) ;
3132+
3133+ harness. ds1 ( ) . err_write ( ) . await ;
3134+ harness. ds2 . ack_write ( ) . await ;
3135+ harness. ds3 . ack_write ( ) . await ;
3136+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3137+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3138+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3139+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3140+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3141+
3142+ // Send a second write, which should still work (because we have 2/3 ds)
3143+ harness
3144+ . guest
3145+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3146+ . await
3147+ . unwrap ( ) ;
3148+ harness. ds2 . err_write ( ) . await ;
3149+ harness. ds3 . ack_write ( ) . await ;
3150+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3151+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3152+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3153+ assert_faulted ( & ds[ ClientId :: new ( 1 ) ] ) ;
3154+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3155+
3156+ // Subsequent writes should be rejected immediately
3157+ let r = harness. guest . write ( BlockIndex ( 0 ) , write_buf. clone ( ) ) . await ;
3158+ assert ! (
3159+ matches!( r, Err ( CrucibleError :: IoError ( ..) ) ) ,
3160+ "expected IoError, got {r:?}"
3161+ ) ;
3162+ }
3163+
3164+ /// Make sure reads work with only 1x Downstairs
3165+ #[ tokio:: test]
3166+ async fn read_with_one_fault ( ) {
3167+ let mut harness = TestHarness :: new ( ) . await ;
3168+
3169+ // Use a write to fault DS0 (XXX why do read errors not fault a DS?)
3170+ let write_buf = BytesMut :: from ( vec ! [ 1 ; 4096 ] . as_slice ( ) ) ;
3171+ harness
3172+ . guest
3173+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3174+ . await
3175+ . unwrap ( ) ;
3176+ harness. ds1 ( ) . err_write ( ) . await ;
3177+ harness. ds2 . ack_write ( ) . await ;
3178+ harness. ds3 . ack_write ( ) . await ;
3179+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3180+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3181+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3182+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3183+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3184+
3185+ // Check that reads still work
3186+ let h = harness. spawn ( |guest| async move {
3187+ let mut buffer = Buffer :: new ( 1 , 512 ) ;
3188+ guest. read ( BlockIndex ( 0 ) , & mut buffer) . await . unwrap ( ) ;
3189+ } ) ;
3190+ harness. ds2 . ack_read ( ) . await ;
3191+ h. await . unwrap ( ) ; // we have > 1x reply, so the read will return
3192+ harness. ds3 . ack_read ( ) . await ;
3193+
3194+ // Take out DS2 next
3195+ harness
3196+ . guest
3197+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3198+ . await
3199+ . unwrap ( ) ;
3200+ harness. ds2 . err_write ( ) . await ;
3201+ harness. ds3 . ack_write ( ) . await ;
3202+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3203+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3204+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3205+ assert_faulted ( & ds[ ClientId :: new ( 1 ) ] ) ;
3206+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3207+
3208+ // Reads still work with 1x Downstairs
3209+ let h = harness. spawn ( |guest| async move {
3210+ let mut buffer = Buffer :: new ( 1 , 512 ) ;
3211+ guest. read ( BlockIndex ( 0 ) , & mut buffer) . await . unwrap ( ) ;
3212+ } ) ;
3213+ harness. ds3 . ack_read ( ) . await ;
3214+ h. await . unwrap ( ) ; // we have > 1x reply, so the read will return
3215+ }
3216+
3217+ /// Test early rejection of reads with 0x running Downstairs
3218+ #[ tokio:: test]
3219+ async fn fast_read_rejection ( ) {
3220+ let mut harness = TestHarness :: new ( ) . await ;
3221+
3222+ // Use a write to fault DS0 (XXX why do read errors not fault a DS?)
3223+ let write_buf = BytesMut :: from ( vec ! [ 1 ; 4096 ] . as_slice ( ) ) ;
3224+ harness
3225+ . guest
3226+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3227+ . await
3228+ . unwrap ( ) ;
3229+ harness. ds1 ( ) . err_write ( ) . await ;
3230+ harness. ds2 . err_write ( ) . await ;
3231+ harness. ds3 . err_write ( ) . await ;
3232+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3233+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3234+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3235+ assert_faulted ( & ds[ ClientId :: new ( 1 ) ] ) ;
3236+ assert_faulted ( & ds[ ClientId :: new ( 2 ) ] ) ;
3237+
3238+ // Reads should return errors immediately
3239+ let mut buffer = Buffer :: new ( 1 , 512 ) ;
3240+ match harness. guest . read ( BlockIndex ( 0 ) , & mut buffer) . await {
3241+ Err ( CrucibleError :: IoError ( s) ) => {
3242+ assert ! ( s. contains( "too many inactive clients" ) )
3243+ }
3244+ r => panic ! ( "expected IoError, got {r:?}" ) ,
3245+ }
3246+ }
3247+
3248+ /// Test for early rejection of flushes
3249+ #[ tokio:: test]
3250+ async fn fast_flush_rejection ( ) {
3251+ let mut harness = TestHarness :: new ( ) . await ;
3252+
3253+ let h = harness. spawn ( |guest| async move {
3254+ guest. flush ( None ) . await . unwrap ( ) ;
3255+ } ) ;
3256+ harness. ds1 ( ) . err_flush ( ) . await ;
3257+ harness. ds2 . ack_flush ( ) . await ;
3258+ harness. ds3 . ack_flush ( ) . await ;
3259+ h. await . unwrap ( ) ;
3260+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3261+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3262+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3263+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3264+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3265+
3266+ // A flush with snapshot should fail immediately
3267+ match harness
3268+ . guest
3269+ . flush ( Some ( SnapshotDetails {
3270+ snapshot_name : "hiiiii" . to_string ( ) ,
3271+ } ) )
3272+ . await
3273+ {
3274+ Err ( CrucibleError :: IoError ( s) ) => {
3275+ assert ! ( s. contains( "too many inactive clients" ) )
3276+ }
3277+ r => panic ! ( "expected IoError, got {r:?}" ) ,
3278+ }
3279+
3280+ // A non-snapshot flush should still succeed
3281+ let h = harness. spawn ( |guest| async move {
3282+ guest. flush ( None ) . await . unwrap ( ) ;
3283+ } ) ;
3284+ harness. ds2 . ack_flush ( ) . await ;
3285+ harness. ds3 . ack_flush ( ) . await ;
3286+ h. await . unwrap ( ) ;
3287+
3288+ // Use a flush to take out another downstairs
3289+ let h = harness. spawn ( |guest| async move { guest. flush ( None ) . await } ) ;
3290+ harness. ds2 . ack_flush ( ) . await ;
3291+ harness. ds3 . err_flush ( ) . await ;
3292+ let r = h. await . unwrap ( ) ;
3293+ assert ! ( r. is_err( ) ) ;
3294+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3295+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3296+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3297+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3298+ assert_faulted ( & ds[ ClientId :: new ( 2 ) ] ) ;
3299+
3300+ // Subsequent flushes should fail immediately
3301+ match harness. guest . flush ( None ) . await {
3302+ Err ( CrucibleError :: IoError ( s) ) => {
3303+ assert ! ( s. contains( "too many inactive clients" ) )
3304+ }
3305+ r => panic ! ( "expected IoError, got {r:?}" ) ,
3306+ }
3307+ }
0 commit comments