@@ -12,6 +12,8 @@ use crate::guest::Guest;
12
12
use crate :: up_main;
13
13
use crate :: BlockIO ;
14
14
use crate :: Buffer ;
15
+ use crate :: ClientFaultReason ;
16
+ use crate :: ClientStopReason ;
15
17
use crate :: ConnectionMode ;
16
18
use crate :: CrucibleError ;
17
19
use crate :: DsState ;
@@ -33,6 +35,7 @@ use crucible_protocol::JobId;
33
35
use crucible_protocol:: Message ;
34
36
use crucible_protocol:: ReadBlockContext ;
35
37
use crucible_protocol:: ReadResponseHeader ;
38
+ use crucible_protocol:: SnapshotDetails ;
36
39
use crucible_protocol:: WriteHeader ;
37
40
38
41
use bytes:: BytesMut ;
@@ -289,6 +292,35 @@ impl DownstairsHandle {
289
292
}
290
293
}
291
294
295
+ /// Awaits a `Message::Flush` and sends a `FlushAck` with an `IoError`
296
+ ///
297
+ /// Returns the flush number for further checks.
298
+ ///
299
+ /// # Panics
300
+ /// If a non-flush message arrives
301
+ pub async fn err_flush ( & mut self ) -> u64 {
302
+ match self . recv ( ) . await . unwrap ( ) {
303
+ Message :: Flush {
304
+ job_id,
305
+ flush_number,
306
+ upstairs_id,
307
+ ..
308
+ } => {
309
+ self . send ( Message :: FlushAck {
310
+ upstairs_id,
311
+ session_id : self . upstairs_session_id . unwrap ( ) ,
312
+ job_id,
313
+ result : Err ( CrucibleError :: IoError ( "oh no" . to_string ( ) ) ) ,
314
+ } )
315
+ . unwrap ( ) ;
316
+ flush_number
317
+ }
318
+ m => {
319
+ panic ! ( "saw non flush {m:?}" ) ;
320
+ }
321
+ }
322
+ }
323
+
292
324
/// Awaits a `Message::Write { .. }` and sends a `WriteAck`
293
325
///
294
326
/// Returns the job ID for further checks.
@@ -311,6 +343,23 @@ impl DownstairsHandle {
311
343
}
312
344
}
313
345
346
+ /// Awaits a `Message::Write` and sends a `WriteAck` with `IOError`
347
+ pub async fn err_write ( & mut self ) -> JobId {
348
+ match self . recv ( ) . await . unwrap ( ) {
349
+ Message :: Write { header, .. } => {
350
+ self . send ( Message :: WriteAck {
351
+ upstairs_id : header. upstairs_id ,
352
+ session_id : self . upstairs_session_id . unwrap ( ) ,
353
+ job_id : header. job_id ,
354
+ result : Err ( CrucibleError :: IoError ( "oh no" . to_string ( ) ) ) ,
355
+ } )
356
+ . unwrap ( ) ;
357
+ header. job_id
358
+ }
359
+ m => panic ! ( "saw non write: {m:?}" ) ,
360
+ }
361
+ }
362
+
314
363
/// Awaits a `Message::Barrier { .. }` and sends a `BarrierAck`
315
364
///
316
365
/// Returns the job ID for further checks.
@@ -358,7 +407,7 @@ impl DownstairsHandle {
358
407
job_id,
359
408
blocks : Ok ( vec ! [ block] ) ,
360
409
} ,
361
- data : data . clone ( ) ,
410
+ data,
362
411
} )
363
412
. unwrap ( ) ;
364
413
job_id
@@ -811,7 +860,7 @@ async fn run_live_repair(mut harness: TestHarness) {
811
860
job_id,
812
861
blocks : Ok ( vec ! [ block] ) ,
813
862
} ,
814
- data : data . clone ( ) ,
863
+ data,
815
864
} ) {
816
865
Ok ( ( ) ) => panic ! ( "DS1 should be disconnected" ) ,
817
866
Err ( e) => {
@@ -3055,3 +3104,204 @@ async fn test_bytes_based_barrier() {
3055
3104
harness. ds2 . ack_flush ( ) . await ;
3056
3105
harness. ds3 . ack_flush ( ) . await ;
3057
3106
}
3107
+
3108
+ fn assert_faulted ( s : & DsState ) {
3109
+ match s {
3110
+ DsState :: Stopping ( ClientStopReason :: Fault (
3111
+ ClientFaultReason :: RequestedFault ,
3112
+ ) )
3113
+ | DsState :: Connecting {
3114
+ mode : ConnectionMode :: Faulted ,
3115
+ ..
3116
+ } => ( ) ,
3117
+ _ => panic ! ( "invalid state: expected faulted, got {s:?}" ) ,
3118
+ }
3119
+ }
3120
+
3121
+ /// Test for early rejection of writes if > 1 Downstairs is unavailable
3122
+ #[ tokio:: test]
3123
+ async fn fast_write_rejection ( ) {
3124
+ let mut harness = TestHarness :: new ( ) . await ;
3125
+
3126
+ let write_buf = BytesMut :: from ( vec ! [ 1 ; 4096 ] . as_slice ( ) ) ;
3127
+ harness
3128
+ . guest
3129
+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3130
+ . await
3131
+ . unwrap ( ) ;
3132
+
3133
+ harness. ds1 ( ) . err_write ( ) . await ;
3134
+ harness. ds2 . ack_write ( ) . await ;
3135
+ harness. ds3 . ack_write ( ) . await ;
3136
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3137
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3138
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3139
+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3140
+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3141
+
3142
+ // Send a second write, which should still work (because we have 2/3 ds)
3143
+ harness
3144
+ . guest
3145
+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3146
+ . await
3147
+ . unwrap ( ) ;
3148
+ harness. ds2 . err_write ( ) . await ;
3149
+ harness. ds3 . ack_write ( ) . await ;
3150
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3151
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3152
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3153
+ assert_faulted ( & ds[ ClientId :: new ( 1 ) ] ) ;
3154
+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3155
+
3156
+ // Subsequent writes should be rejected immediately
3157
+ let r = harness. guest . write ( BlockIndex ( 0 ) , write_buf. clone ( ) ) . await ;
3158
+ assert ! (
3159
+ matches!( r, Err ( CrucibleError :: IoError ( ..) ) ) ,
3160
+ "expected IoError, got {r:?}"
3161
+ ) ;
3162
+ }
3163
+
3164
+ /// Make sure reads work with only 1x Downstairs
3165
+ #[ tokio:: test]
3166
+ async fn read_with_one_fault ( ) {
3167
+ let mut harness = TestHarness :: new ( ) . await ;
3168
+
3169
+ // Use a write to fault DS0 (XXX why do read errors not fault a DS?)
3170
+ let write_buf = BytesMut :: from ( vec ! [ 1 ; 4096 ] . as_slice ( ) ) ;
3171
+ harness
3172
+ . guest
3173
+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3174
+ . await
3175
+ . unwrap ( ) ;
3176
+ harness. ds1 ( ) . err_write ( ) . await ;
3177
+ harness. ds2 . ack_write ( ) . await ;
3178
+ harness. ds3 . ack_write ( ) . await ;
3179
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3180
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3181
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3182
+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3183
+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3184
+
3185
+ // Check that reads still work
3186
+ let h = harness. spawn ( |guest| async move {
3187
+ let mut buffer = Buffer :: new ( 1 , 512 ) ;
3188
+ guest. read ( BlockIndex ( 0 ) , & mut buffer) . await . unwrap ( ) ;
3189
+ } ) ;
3190
+ harness. ds2 . ack_read ( ) . await ;
3191
+ h. await . unwrap ( ) ; // we have > 1x reply, so the read will return
3192
+ harness. ds3 . ack_read ( ) . await ;
3193
+
3194
+ // Take out DS2 next
3195
+ harness
3196
+ . guest
3197
+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3198
+ . await
3199
+ . unwrap ( ) ;
3200
+ harness. ds2 . err_write ( ) . await ;
3201
+ harness. ds3 . ack_write ( ) . await ;
3202
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3203
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3204
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3205
+ assert_faulted ( & ds[ ClientId :: new ( 1 ) ] ) ;
3206
+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3207
+
3208
+ // Reads still work with 1x Downstairs
3209
+ let h = harness. spawn ( |guest| async move {
3210
+ let mut buffer = Buffer :: new ( 1 , 512 ) ;
3211
+ guest. read ( BlockIndex ( 0 ) , & mut buffer) . await . unwrap ( ) ;
3212
+ } ) ;
3213
+ harness. ds3 . ack_read ( ) . await ;
3214
+ h. await . unwrap ( ) ; // we have > 1x reply, so the read will return
3215
+ }
3216
+
3217
+ /// Test early rejection of reads with 0x running Downstairs
3218
+ #[ tokio:: test]
3219
+ async fn fast_read_rejection ( ) {
3220
+ let mut harness = TestHarness :: new ( ) . await ;
3221
+
3222
+ // Use a write to fault DS0 (XXX why do read errors not fault a DS?)
3223
+ let write_buf = BytesMut :: from ( vec ! [ 1 ; 4096 ] . as_slice ( ) ) ;
3224
+ harness
3225
+ . guest
3226
+ . write ( BlockIndex ( 0 ) , write_buf. clone ( ) )
3227
+ . await
3228
+ . unwrap ( ) ;
3229
+ harness. ds1 ( ) . err_write ( ) . await ;
3230
+ harness. ds2 . err_write ( ) . await ;
3231
+ harness. ds3 . err_write ( ) . await ;
3232
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3233
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3234
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3235
+ assert_faulted ( & ds[ ClientId :: new ( 1 ) ] ) ;
3236
+ assert_faulted ( & ds[ ClientId :: new ( 2 ) ] ) ;
3237
+
3238
+ // Reads should return errors immediately
3239
+ let mut buffer = Buffer :: new ( 1 , 512 ) ;
3240
+ match harness. guest . read ( BlockIndex ( 0 ) , & mut buffer) . await {
3241
+ Err ( CrucibleError :: IoError ( s) ) => {
3242
+ assert ! ( s. contains( "too many inactive clients" ) )
3243
+ }
3244
+ r => panic ! ( "expected IoError, got {r:?}" ) ,
3245
+ }
3246
+ }
3247
+
3248
+ /// Test for early rejection of flushes
3249
+ #[ tokio:: test]
3250
+ async fn fast_flush_rejection ( ) {
3251
+ let mut harness = TestHarness :: new ( ) . await ;
3252
+
3253
+ let h = harness. spawn ( |guest| async move {
3254
+ guest. flush ( None ) . await . unwrap ( ) ;
3255
+ } ) ;
3256
+ harness. ds1 ( ) . err_flush ( ) . await ;
3257
+ harness. ds2 . ack_flush ( ) . await ;
3258
+ harness. ds3 . ack_flush ( ) . await ;
3259
+ h. await . unwrap ( ) ;
3260
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3261
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3262
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3263
+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3264
+ assert_eq ! ( ds[ ClientId :: new( 2 ) ] , DsState :: Active ) ;
3265
+
3266
+ // A flush with snapshot should fail immediately
3267
+ match harness
3268
+ . guest
3269
+ . flush ( Some ( SnapshotDetails {
3270
+ snapshot_name : "hiiiii" . to_string ( ) ,
3271
+ } ) )
3272
+ . await
3273
+ {
3274
+ Err ( CrucibleError :: IoError ( s) ) => {
3275
+ assert ! ( s. contains( "too many inactive clients" ) )
3276
+ }
3277
+ r => panic ! ( "expected IoError, got {r:?}" ) ,
3278
+ }
3279
+
3280
+ // A non-snapshot flush should still succeed
3281
+ let h = harness. spawn ( |guest| async move {
3282
+ guest. flush ( None ) . await . unwrap ( ) ;
3283
+ } ) ;
3284
+ harness. ds2 . ack_flush ( ) . await ;
3285
+ harness. ds3 . ack_flush ( ) . await ;
3286
+ h. await . unwrap ( ) ;
3287
+
3288
+ // Use a flush to take out another downstairs
3289
+ let h = harness. spawn ( |guest| async move { guest. flush ( None ) . await } ) ;
3290
+ harness. ds2 . ack_flush ( ) . await ;
3291
+ harness. ds3 . err_flush ( ) . await ;
3292
+ let r = h. await . unwrap ( ) ;
3293
+ assert ! ( r. is_err( ) ) ;
3294
+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 100 ) ) . await ;
3295
+ let ds = harness. guest . downstairs_state ( ) . await . unwrap ( ) ;
3296
+ assert_faulted ( & ds[ ClientId :: new ( 0 ) ] ) ;
3297
+ assert_eq ! ( ds[ ClientId :: new( 1 ) ] , DsState :: Active ) ;
3298
+ assert_faulted ( & ds[ ClientId :: new ( 2 ) ] ) ;
3299
+
3300
+ // Subsequent flushes should fail immediately
3301
+ match harness. guest . flush ( None ) . await {
3302
+ Err ( CrucibleError :: IoError ( s) ) => {
3303
+ assert ! ( s. contains( "too many inactive clients" ) )
3304
+ }
3305
+ r => panic ! ( "expected IoError, got {r:?}" ) ,
3306
+ }
3307
+ }
0 commit comments