Skip to content

Commit 0f03885

Browse files
mtkaczykgregkh
authored andcommitted
raid5: introduce MD_BROKEN
commit 57668f0 upstream. Raid456 module had allowed to achieve failed state. It was fixed by fb73b35 ("raid5: block failing device if raid will be failed"). This fix introduces a bug, now if raid5 fails during IO, it may result with a hung task without completion. Faulty flag on the device is necessary to process all requests and is checked many times, mainly in analyze_stripe(). Allow to set faulty on drive again and set MD_BROKEN if raid is failed. As a result, this level is allowed to achieve failed state again, but communication with userspace (via -EBUSY status) will be preserved. This restores possibility to fail array via #mdadm --set-faulty command and will be fixed by additional verification on mdadm side. Reproduction steps: mdadm -CR imsm -e imsm -n 3 /dev/nvme[0-2]n1 mdadm -CR r5 -e imsm -l5 -n3 /dev/nvme[0-2]n1 --assume-clean mkfs.xfs /dev/md126 -f mount /dev/md126 /mnt/root/ fio --filename=/mnt/root/file --size=5GB --direct=1 --rw=randrw --bs=64k --ioengine=libaio --iodepth=64 --runtime=240 --numjobs=4 --time_based --group_reporting --name=throughput-test-job --eta-newline=1 & echo 1 > /sys/block/nvme2n1/device/device/remove echo 1 > /sys/block/nvme1n1/device/device/remove [ 1475.787779] Call Trace: [ 1475.793111] __schedule+0x2a6/0x700 [ 1475.799460] schedule+0x38/0xa0 [ 1475.805454] raid5_get_active_stripe+0x469/0x5f0 [raid456] [ 1475.813856] ? finish_wait+0x80/0x80 [ 1475.820332] raid5_make_request+0x180/0xb40 [raid456] [ 1475.828281] ? finish_wait+0x80/0x80 [ 1475.834727] ? finish_wait+0x80/0x80 [ 1475.841127] ? finish_wait+0x80/0x80 [ 1475.847480] md_handle_request+0x119/0x190 [ 1475.854390] md_make_request+0x8a/0x190 [ 1475.861041] generic_make_request+0xcf/0x310 [ 1475.868145] submit_bio+0x3c/0x160 [ 1475.874355] iomap_dio_submit_bio.isra.20+0x51/0x60 [ 1475.882070] iomap_dio_bio_actor+0x175/0x390 [ 1475.889149] iomap_apply+0xff/0x310 [ 1475.895447] ? iomap_dio_bio_actor+0x390/0x390 [ 1475.902736] ? iomap_dio_bio_actor+0x390/0x390 [ 1475.909974] iomap_dio_rw+0x2f2/0x490 [ 1475.916415] ? iomap_dio_bio_actor+0x390/0x390 [ 1475.923680] ? atime_needs_update+0x77/0xe0 [ 1475.930674] ? xfs_file_dio_aio_read+0x6b/0xe0 [xfs] [ 1475.938455] xfs_file_dio_aio_read+0x6b/0xe0 [xfs] [ 1475.946084] xfs_file_read_iter+0xba/0xd0 [xfs] [ 1475.953403] aio_read+0xd5/0x180 [ 1475.959395] ? _cond_resched+0x15/0x30 [ 1475.965907] io_submit_one+0x20b/0x3c0 [ 1475.972398] __x64_sys_io_submit+0xa2/0x180 [ 1475.979335] ? do_io_getevents+0x7c/0xc0 [ 1475.986009] do_syscall_64+0x5b/0x1a0 [ 1475.992419] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 1476.000255] RIP: 0033:0x7f11fc27978d [ 1476.006631] Code: Bad RIP value. [ 1476.073251] INFO: task fio:3877 blocked for more than 120 seconds. Cc: [email protected] Fixes: fb73b35 ("raid5: block failing device if raid will be failed") Reviewd-by: Xiao Ni <[email protected]> Signed-off-by: Mariusz Tkaczyk <[email protected]> Signed-off-by: Song Liu <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 8df42bc commit 0f03885

File tree

1 file changed

+22
-25
lines changed

1 file changed

+22
-25
lines changed

drivers/md/raid5.c

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -686,17 +686,17 @@ int raid5_calc_degraded(struct r5conf *conf)
686686
return degraded;
687687
}
688688

689-
static int has_failed(struct r5conf *conf)
689+
static bool has_failed(struct r5conf *conf)
690690
{
691-
int degraded;
691+
int degraded = conf->mddev->degraded;
692692

693-
if (conf->mddev->reshape_position == MaxSector)
694-
return conf->mddev->degraded > conf->max_degraded;
693+
if (test_bit(MD_BROKEN, &conf->mddev->flags))
694+
return true;
695695

696-
degraded = raid5_calc_degraded(conf);
697-
if (degraded > conf->max_degraded)
698-
return 1;
699-
return 0;
696+
if (conf->mddev->reshape_position != MaxSector)
697+
degraded = raid5_calc_degraded(conf);
698+
699+
return degraded > conf->max_degraded;
700700
}
701701

702702
struct stripe_head *
@@ -2877,34 +2877,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
28772877
unsigned long flags;
28782878
pr_debug("raid456: error called\n");
28792879

2880+
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
2881+
mdname(mddev), bdevname(rdev->bdev, b));
2882+
28802883
spin_lock_irqsave(&conf->device_lock, flags);
2884+
set_bit(Faulty, &rdev->flags);
2885+
clear_bit(In_sync, &rdev->flags);
2886+
mddev->degraded = raid5_calc_degraded(conf);
28812887

2882-
if (test_bit(In_sync, &rdev->flags) &&
2883-
mddev->degraded == conf->max_degraded) {
2884-
/*
2885-
* Don't allow to achieve failed state
2886-
* Don't try to recover this device
2887-
*/
2888+
if (has_failed(conf)) {
2889+
set_bit(MD_BROKEN, &conf->mddev->flags);
28882890
conf->recovery_disabled = mddev->recovery_disabled;
2889-
spin_unlock_irqrestore(&conf->device_lock, flags);
2890-
return;
2891+
2892+
pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
2893+
mdname(mddev), mddev->degraded, conf->raid_disks);
2894+
} else {
2895+
pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
2896+
mdname(mddev), conf->raid_disks - mddev->degraded);
28912897
}
28922898

2893-
set_bit(Faulty, &rdev->flags);
2894-
clear_bit(In_sync, &rdev->flags);
2895-
mddev->degraded = raid5_calc_degraded(conf);
28962899
spin_unlock_irqrestore(&conf->device_lock, flags);
28972900
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
28982901

28992902
set_bit(Blocked, &rdev->flags);
29002903
set_mask_bits(&mddev->sb_flags, 0,
29012904
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
2902-
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
2903-
"md/raid:%s: Operation continuing on %d devices.\n",
2904-
mdname(mddev),
2905-
bdevname(rdev->bdev, b),
2906-
mdname(mddev),
2907-
conf->raid_disks - mddev->degraded);
29082905
r5c_update_on_rdev_error(mddev, rdev);
29092906
}
29102907

0 commit comments

Comments
 (0)