From b5d27718f38843a74552e9a93d32e2391fd3999f Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 5 Jul 2017 17:34:04 +0800 Subject: Raid5 should update rdev->sectors after reshape The raid5 md device is created by the disks which we don't use the total size. For example, the size of the device is 5G and it just uses 3G of the devices to create one raid5 device. Then change the chunksize and wait reshape to finish. After reshape finishing stop the raid and assemble it again. It fails. mdadm -CR /dev/md0 -l5 -n3 /dev/loop[0-2] --size=3G --chunk=32 --assume-clean mdadm /dev/md0 --grow --chunk=64 wait reshape to finish mdadm -S /dev/md0 mdadm -As The error messages: [197519.814302] md: loop1 does not have a valid v1.2 superblock, not importing! [197519.821686] md: md_import_device returned -22 After reshape the data offset is changed. It selects backwards direction in this condition. In function super_1_load it compares the available space of the underlying device with sb->data_size. The new data offset gets bigger after reshape. So super_1_load returns -EINVAL. rdev->sectors is updated in md_finish_reshape. Then sb->data_size is set in super_1_sync based on rdev->sectors. So add md_finish_reshape in end_reshape. Signed-off-by: Xiao Ni Acked-by: Guoqing Jiang Cc: stable@vger.kernel.org Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2ceb338b094b..aeeb8d6854e2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7951,12 +7951,10 @@ static void end_reshape(struct r5conf *conf) { if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { - struct md_rdev *rdev; spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; - rdev_for_each(rdev, conf->mddev) - rdev->data_offset = rdev->new_data_offset; + md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; conf->mddev->reshape_position = MaxSector; -- cgit v1.2.1 From 6308d8e3d42bea15461c696df1ad74c2944b5c23 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 21 Jul 2017 16:33:44 +0800 Subject: md: simplify code with bio_io_error Since bio_io_error sets bi_status to BLK_STS_IOERR, and calls bio_endio, so we can use it directly. And as mentioned by Shaohua, there are also two places in raid5.c can use bio_io_error either. Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index aeeb8d6854e2..4904dffec915 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3381,9 +3381,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - bi->bi_status = BLK_STS_IOERR; md_write_end(conf->mddev); - bio_endio(bi); + bio_io_error(bi); bi = nextbi; } if (bitmap_end) @@ -3403,9 +3402,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); - bi->bi_status = BLK_STS_IOERR; md_write_end(conf->mddev); - bio_endio(bi); + bio_io_error(bi); bi = bi2; } @@ -3429,8 +3427,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - bi->bi_status = BLK_STS_IOERR; - bio_endio(bi); + bio_io_error(bi); bi = nextbi; } } -- cgit v1.2.1 From 7e96d559634b73a8158ee99a7abece2eacec2668 Mon Sep 17 00:00:00 2001 From: Ofer Heifetz Date: Mon, 24 Jul 2017 09:17:40 +0300 Subject: md/raid5: add thread_group worker async_tx_issue_pending_all Since thread_group worker and raid5d kthread are not in sync, if worker writes stripe before raid5d then requests will be waiting for issue_pendig. Issue observed when building raid5 with ext4, in some build runs jbd2 would get hung and requests were waiting in the HW engine waiting to be issued. Fix this by adding a call to async_tx_issue_pending_all in the raid5_do_work. Signed-off-by: Ofer Heifetz Cc: stable@vger.kernel.org Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4904dffec915..0fc2748aaf95 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6234,6 +6234,8 @@ static void raid5_do_work(struct work_struct *work) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); + + async_tx_issue_pending_all(); blk_finish_plug(&plug); pr_debug("--- raid5worker inactive\n"); -- cgit v1.2.1