From 022e510fcbda79183fd2cdc01abb01b4be80d03f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Jul 2017 16:14:42 +0800 Subject: md: remove 'idx' from 'struct resync_pages' bio_add_page() won't fail for resync bio, and the page index for each bio is same, so remove it. More importantly the 'idx' of 'struct resync_pages' is initialized in mempool allocator function, the current way is wrong since mempool is only responsible for allocation, we can't use that for initialization. Suggested-by: NeilBrown Reported-by: NeilBrown Reported-and-tested-by: Patrick Fixes: f0250618361d(md: raid10: don't use bio's vec table to manage resync pages) Fixes: 98d30c5812c3(md: raid1: don't use bio's vec table to manage resync pages) Cc: stable@vger.kernel.org (4.12+) Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3febfc8391fb..0896c772a560 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -170,7 +170,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) resync_get_all_pages(rp); } - rp->idx = 0; rp->raid_bio = r1_bio; bio->bi_private = rp; } @@ -2619,6 +2618,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, int good_sectors = RESYNC_SECTORS; int min_bad = 0; /* number of sectors that are bad in all devices */ int idx = sector_to_idx(sector_nr); + int page_idx = 0; if (!conf->r1buf_pool) if (init_resync(conf)) @@ -2846,7 +2846,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r1_bio->bios[i]; rp = get_resync_pages(bio); if (bio->bi_end_io) { - page = resync_fetch_page(rp, rp->idx++); + page = resync_fetch_page(rp, page_idx); /* * won't fail because the vec table is big @@ -2858,7 +2858,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, nr_sectors += len>>9; sector_nr += len>>9; sync_blocks -= (len>>9); - } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES); + } while (++page_idx < RESYNC_PAGES); r1_bio->sectors = nr_sectors; -- cgit v1.2.1 From fb0eb5df09307603b21845af1d143cc910154593 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Jul 2017 16:14:43 +0800 Subject: md: raid1/raid10: initialize bvec table via bio_add_page() We will support multipage bvec soon, so initialize bvec table using the standardy way instead of writing the talbe directly. Otherwise it won't work any more once multipage bvec is enabled. Acked-by: Guoqing Jiang Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0896c772a560..fe86ab18961b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -81,6 +81,8 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #define raid1_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) +#include "raid1-10.c" + /* * 'strct resync_pages' stores actual pages used for doing the resync * IO, and it is per-bio, so make .bi_private points to it. @@ -2085,10 +2087,7 @@ static void process_checks(struct r1bio *r1_bio) /* Fix variable parts of all bios */ vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); for (i = 0; i < conf->raid_disks * 2; i++) { - int j; - int size; blk_status_t status; - struct bio_vec *bi; struct bio *b = r1_bio->bios[i]; struct resync_pages *rp = get_resync_pages(b); if (b->bi_end_io != end_sync_read) @@ -2097,8 +2096,6 @@ static void process_checks(struct r1bio *r1_bio) status = b->bi_status; bio_reset(b); b->bi_status = status; - b->bi_vcnt = vcnt; - b->bi_iter.bi_size = r1_bio->sectors << 9; b->bi_iter.bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; b->bi_bdev = conf->mirrors[i].rdev->bdev; @@ -2106,15 +2103,8 @@ static void process_checks(struct r1bio *r1_bio) rp->raid_bio = r1_bio; b->bi_private = rp; - size = b->bi_iter.bi_size; - bio_for_each_segment_all(bi, b, j) { - bi->bv_offset = 0; - if (size > PAGE_SIZE) - bi->bv_len = PAGE_SIZE; - else - bi->bv_len = size; - size -= PAGE_SIZE; - } + /* initialize bvec table again */ + md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9); } for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && -- cgit v1.2.1 From be453e7761d0e72d8a1b2fcfde6d1a7e53881190 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Jul 2017 16:14:44 +0800 Subject: md: raid1-10: move raid1/raid10 common code into raid1-10.c No function change, just move 'struct resync_pages' and related helpers into raid1-10.c Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fe86ab18961b..8387eb1540cd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -83,15 +83,6 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #include "raid1-10.c" -/* - * 'strct resync_pages' stores actual pages used for doing the resync - * IO, and it is per-bio, so make .bi_private points to it. - */ -static inline struct resync_pages *get_resync_pages(struct bio *bio) -{ - return bio->bi_private; -} - /* * for resync bio, r1bio pointer can be retrieved from the per-bio * 'struct resync_pages'. -- cgit v1.2.1 From 16d56e2fcc1fc15b981369653c3b41d7ff0b443d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 17 Jul 2017 14:33:48 -0700 Subject: md/raid1: fix writebehind bio clone After bio is submitted, we should not clone it as its bi_iter might be invalid by driver. This is the case of behind_master_bio. In certain situration, we could dispatch behind_master_bio immediately for the first disk and then clone it for other disks. https://bugzilla.kernel.org/show_bug.cgi?id=196383 Reported-and-tested-by: Markus Reviewed-by: Ming Lei Fix: 841c1316c7da(md: raid1: improve write behind) Cc: stable@vger.kernel.org (4.12+) Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8387eb1540cd..1d235cc8b402 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -484,10 +484,6 @@ static void raid1_end_write_request(struct bio *bio) } if (behind) { - /* we release behind master bio when all write are done */ - if (r1_bio->behind_master_bio == bio) - to_put = NULL; - if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); @@ -1080,7 +1076,7 @@ static void unfreeze_array(struct r1conf *conf) wake_up(&conf->wait_barrier); } -static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, +static void alloc_behind_master_bio(struct r1bio *r1_bio, struct bio *bio) { int size = bio->bi_iter.bi_size; @@ -1090,11 +1086,13 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); if (!behind_bio) - goto fail; + return; /* discard op, we don't support writezero/writesame yet */ - if (!bio_has_data(bio)) + if (!bio_has_data(bio)) { + behind_bio->bi_iter.bi_size = size; goto skip_copy; + } while (i < vcnt && size) { struct page *page; @@ -1115,14 +1113,13 @@ skip_copy: r1_bio->behind_master_bio = behind_bio;; set_bit(R1BIO_BehindIO, &r1_bio->state); - return behind_bio; + return; free_pages: pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_iter.bi_size); bio_free_pages(behind_bio); -fail: - return behind_bio; + bio_put(behind_bio); } struct raid1_plug_cb { @@ -1475,7 +1472,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { - mbio = alloc_behind_master_bio(r1_bio, bio); + alloc_behind_master_bio(r1_bio, bio); } bitmap_startwrite(bitmap, r1_bio->sector, @@ -1485,14 +1482,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, first_clone = 0; } - if (!mbio) { - if (r1_bio->behind_master_bio) - mbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, - mddev->bio_set); - else - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - } + if (r1_bio->behind_master_bio) + mbio = bio_clone_fast(r1_bio->behind_master_bio, + GFP_NOIO, mddev->bio_set); + else + mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); if (r1_bio->behind_master_bio) { if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) @@ -2346,8 +2340,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) wbio = bio_clone_fast(r1_bio->behind_master_bio, GFP_NOIO, mddev->bio_set); - /* We really need a _all clone */ - wbio->bi_iter = (struct bvec_iter){ 0 }; } else { wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, mddev->bio_set); -- cgit v1.2.1 From 6308d8e3d42bea15461c696df1ad74c2944b5c23 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 21 Jul 2017 16:33:44 +0800 Subject: md: simplify code with bio_io_error Since bio_io_error sets bi_status to BLK_STS_IOERR, and calls bio_endio, so we can use it directly. And as mentioned by Shaohua, there are also two places in raid5.c can use bio_io_error either. Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1d235cc8b402..f50958ded9f0 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -790,8 +790,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) bio->bi_next = NULL; bio->bi_bdev = rdev->bdev; if (test_bit(Faulty, &rdev->flags)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ -- cgit v1.2.1