From 6ea9c07c6c6d1c14d9757dd8470dc4c85bbe9f28 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:09 -0700
Subject: [PATCH] md: cause md/raid1 to "repack" working devices when number of
 drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1db5de52d376..4947f599b652 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1349,17 +1349,26 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	 * We allocate a new r1bio_pool if we can.
 	 * Then raise a device barrier and wait until all IO stops.
 	 * Then resize conf->mirrors and swap in the new r1bio pool.
+	 *
+	 * At the same time, we "pack" the devices so that all the missing
+	 * devices have the higher raid_disk numbers.
 	 */
 	mempool_t *newpool, *oldpool;
 	struct pool_info *newpoolinfo;
 	mirror_info_t *newmirrors;
 	conf_t *conf = mddev_to_conf(mddev);
+	int cnt;
 
-	int d;
+	int d, d2;
 
-	for (d= raid_disks; d < conf->raid_disks; d++)
-		if (conf->mirrors[d].rdev)
+	if (raid_disks < conf->raid_disks) {
+		cnt=0;
+		for (d= 0; d < conf->raid_disks; d++)
+			if (conf->mirrors[d].rdev)
+				cnt++;
+		if (cnt > raid_disks)
 			return -EBUSY;
+	}
 
 	newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);
 	if (!newpoolinfo)
@@ -1390,8 +1399,12 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	/* ok, everything is stopped */
 	oldpool = conf->r1bio_pool;
 	conf->r1bio_pool = newpool;
-	for (d=0; d < raid_disks && d < conf->raid_disks; d++)
-		newmirrors[d] = conf->mirrors[d];
+
+	for (d=d2=0; d < conf->raid_disks; d++)
+		if (conf->mirrors[d].rdev) {
+			conf->mirrors[d].rdev->raid_disk = d2;
+			newmirrors[d2++].rdev = conf->mirrors[d].rdev;
+		}
 	kfree(conf->mirrors);
 	conf->mirrors = newmirrors;
 	kfree(conf->poolinfo);
@@ -1400,6 +1413,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	mddev->degraded += (raid_disks - conf->raid_disks);
 	conf->raid_disks = mddev->raid_disks = raid_disks;
 
+	conf->last_used = 0; /* just make sure it is in-range */
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier--;
 	spin_unlock_irq(&conf->resync_lock);
-- 
cgit v1.2.1


From fca4d848f0e6fafdc2b25f8a0cf1e76935f13ac2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:11 -0700
Subject: [PATCH] md: merge md_enter_safemode into md_check_recovery

md_enter_safemode checks if it is time to mark the md superblock as 'clean'.
i.e.  if all writes have completed and a suitable delay has passed.

This is currently called from md_handle_safemode which in-turn is called
(almost) every time md_check_recovery is called, and from the end of
md_do_sync which causes the mddev->thread to run, which will always call
md_check_recovery as well.

So it doesn't need to be a separate function and fits quite well into
md_check_recovery.

The "almost" is because multipathd calls md_check_recovery but not
md_handle_safemode.  This is OK because the code from md_enter_safemode is a
no-op if mddev->safemode == 0, which it always is for a multipathd (providing
we don't allow it to be set to 2 on a signal...)

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4947f599b652..b34ad56362df 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -931,7 +931,6 @@ static void raid1d(mddev_t *mddev)
 	mdk_rdev_t *rdev;
 
 	md_check_recovery(mddev);
-	md_handle_safemode(mddev);
 	
 	for (;;) {
 		char b[BDEVNAME_SIZE];
-- 
cgit v1.2.1


From 06d91a5fe0b50c9060e70bdf7786f8a3c66249db Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:12 -0700
Subject: [PATCH] md: improve locking on 'safemode' and move superblock writes

When md marks the superblock dirty before a write, it calls
generic_make_request (to write the superblock) from within
generic_make_request (to write the first dirty block), which could cause
problems later.

With this patch, the superblock write is always done by the helper thread, and
write request are delayed until that write completes.

Also, the locking around marking the array dirty and writing the superblock is
improved to avoid possible races.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b34ad56362df..3f1280bbaf39 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -530,6 +530,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
 	 */
+	if (md_write_start(mddev, bio)==0)
+		return 0;
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
 	conf->nr_pending++;
@@ -611,7 +613,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	rcu_read_unlock();
 
 	atomic_set(&r1_bio->remaining, 1);
-	md_write_start(mddev);
+
 	for (i = 0; i < disks; i++) {
 		struct bio *mbio;
 		if (!r1_bio->bios[i])
-- 
cgit v1.2.1


From 57afd89f98a990747445f01c458ecae64263b2f8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:13 -0700
Subject: [PATCH] md: improve the interface to sync_request

1/ change the return value (which is number-of-sectors synced)
 from 'int' to 'sector_t'.
 The number of sectors is usually easily small enough to fit
 in an int, but if resync needs to abort, it may want to return
 the total number of remaining sectors, which could be large.
 Also errors cannot be returned as negative numbers now, so use
 0 instead
2/ Add a 'skipped' return parameter to allow the array to report
 that it skipped the sectors.  This allows md to take this into account
 in the speed calculations.
 Currently there is no important skipping, but the bitmap-based-resync
 that is coming will use this.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3f1280bbaf39..3c5c916cb09e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1010,7 +1010,7 @@ static int init_resync(conf_t *conf)
  * that can be installed to exclude normal IO requests.
  */
 
-static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
+static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
 {
 	conf_t *conf = mddev_to_conf(mddev);
 	mirror_info_t *mirror;
@@ -1023,7 +1023,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
 
 	if (!conf->r1buf_pool)
 		if (init_resync(conf))
-			return -ENOMEM;
+			return 0;
 
 	max_sector = mddev->size << 1;
 	if (sector_nr >= max_sector) {
@@ -1107,8 +1107,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
 		/* There is nowhere to write, so all non-sync
 		 * drives must be failed - so we are finished
 		 */
-		int rv = max_sector - sector_nr;
-		md_done_sync(mddev, rv, 1);
+		sector_t rv = max_sector - sector_nr;
+		*skipped = 1;
 		put_buf(r1_bio);
 		rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
 		return rv;
-- 
cgit v1.2.1


From 191ea9b2c7cc3ebbe0678834ab710d7d95ad3f9a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:23 -0700
Subject: [PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 182 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 154 insertions(+), 28 deletions(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3c5c916cb09e..0fd4c3bfc851 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -12,6 +12,15 @@
  * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
  * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
  *
+ * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
+ * bitmapped intelligence in resync:
+ *
+ *      - bitmap marked during normal i/o
+ *      - bitmap used to skip nondirty blocks during sync
+ *
+ * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology:
+ * - persistent bitmap code
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
@@ -22,7 +31,16 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include "dm-bio-list.h"
 #include <linux/raid/raid1.h>
+#include <linux/raid/bitmap.h>
+
+#define DEBUG 0
+#if DEBUG
+#define PRINTK(x...) printk(x)
+#else
+#define PRINTK(x...)
+#endif
 
 /*
  * Number of guaranteed r1bios in case of extreme VM load:
@@ -287,9 +305,11 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
+	if (!uptodate) {
 		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-	else
+		/* an I/O failed, we can't clear the bitmap */
+		set_bit(R1BIO_Degraded, &r1_bio->state);
+	} else
 		/*
 		 * Set R1BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -309,6 +329,10 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 	 * already.
 	 */
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
+		/* clear the bitmap if all writes complete successfully */
+		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+				r1_bio->sectors,
+				!test_bit(R1BIO_Degraded, &r1_bio->state));
 		md_write_end(r1_bio->mddev);
 		raid_end_bio_io(r1_bio);
 	}
@@ -458,7 +482,10 @@ static void unplug_slaves(mddev_t *mddev)
 
 static void raid1_unplug(request_queue_t *q)
 {
-	unplug_slaves(q->queuedata);
+	mddev_t *mddev = q->queuedata;
+
+	unplug_slaves(mddev);
+	md_wakeup_thread(mddev->thread);
 }
 
 static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
@@ -501,16 +528,16 @@ static void device_barrier(conf_t *conf, sector_t sect)
 {
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
-			    conf->resync_lock, unplug_slaves(conf->mddev));
+			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
 	
 	if (!conf->barrier++) {
 		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, unplug_slaves(conf->mddev));
+				    conf->resync_lock, raid1_unplug(conf->mddev->queue));
 		if (conf->nr_pending)
 			BUG();
 	}
 	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, unplug_slaves(conf->mddev));
+			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
 	conf->next_resync = sect;
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -522,8 +549,12 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	mirror_info_t *mirror;
 	r1bio_t *r1_bio;
 	struct bio *read_bio;
-	int i, disks;
+	int i, targets = 0, disks;
 	mdk_rdev_t *rdev;
+	struct bitmap *bitmap = mddev->bitmap;
+	unsigned long flags;
+	struct bio_list bl;
+
 
 	/*
 	 * Register the new request and wait if the reconstruction
@@ -554,7 +585,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 
 	r1_bio->master_bio = bio;
 	r1_bio->sectors = bio->bi_size >> 9;
-
+	r1_bio->state = 0;
 	r1_bio->mddev = mddev;
 	r1_bio->sector = bio->bi_sector;
 
@@ -597,6 +628,13 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 * bios[x] to bio
 	 */
 	disks = conf->raid_disks;
+#if 0
+	{ static int first=1;
+	if (first) printk("First Write sector %llu disks %d\n",
+			  (unsigned long long)r1_bio->sector, disks);
+	first = 0;
+	}
+#endif
 	rcu_read_lock();
 	for (i = 0;  i < disks; i++) {
 		if ((rdev=conf->mirrors[i].rdev) != NULL &&
@@ -607,13 +645,21 @@ static int make_request(request_queue_t *q, struct bio * bio)
 				r1_bio->bios[i] = NULL;
 			} else
 				r1_bio->bios[i] = bio;
+			targets++;
 		} else
 			r1_bio->bios[i] = NULL;
 	}
 	rcu_read_unlock();
 
-	atomic_set(&r1_bio->remaining, 1);
+	if (targets < conf->raid_disks) {
+		/* array is degraded, we will not clear the bitmap
+		 * on I/O completion (see raid1_end_write_request) */
+		set_bit(R1BIO_Degraded, &r1_bio->state);
+	}
+
+	atomic_set(&r1_bio->remaining, 0);
 
+	bio_list_init(&bl);
 	for (i = 0; i < disks; i++) {
 		struct bio *mbio;
 		if (!r1_bio->bios[i])
@@ -629,14 +675,23 @@ static int make_request(request_queue_t *q, struct bio * bio)
 		mbio->bi_private = r1_bio;
 
 		atomic_inc(&r1_bio->remaining);
-		generic_make_request(mbio);
-	}
 
-	if (atomic_dec_and_test(&r1_bio->remaining)) {
-		md_write_end(mddev);
-		raid_end_bio_io(r1_bio);
+		bio_list_add(&bl, mbio);
 	}
 
+	bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors);
+	spin_lock_irqsave(&conf->device_lock, flags);
+	bio_list_merge(&conf->pending_bio_list, &bl);
+	bio_list_init(&bl);
+
+	blk_plug_device(mddev->queue);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
+
+#if 0
+	while ((bio = bio_list_pop(&bl)) != NULL)
+		generic_make_request(bio);
+#endif
+
 	return 0;
 }
 
@@ -716,7 +771,7 @@ static void close_sync(conf_t *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
-			    conf->resync_lock, 	unplug_slaves(conf->mddev));
+			    conf->resync_lock, 	raid1_unplug(conf->mddev->queue));
 	spin_unlock_irq(&conf->resync_lock);
 
 	if (conf->barrier) BUG();
@@ -830,10 +885,11 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 	 * or re-read if the read failed.
 	 * We don't do much here, just schedule handling by raid1d
 	 */
-	if (!uptodate)
+	if (!uptodate) {
 		md_error(r1_bio->mddev,
 			 conf->mirrors[r1_bio->read_disk].rdev);
-	else
+		set_bit(R1BIO_Degraded, &r1_bio->state);
+	} else
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 	rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
 	reschedule_retry(r1_bio);
@@ -857,8 +913,10 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
 			mirror = i;
 			break;
 		}
-	if (!uptodate)
+	if (!uptodate) {
 		md_error(mddev, conf->mirrors[mirror].rdev);
+		set_bit(R1BIO_Degraded, &r1_bio->state);
+	}
 	update_head_pos(mirror, r1_bio);
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
@@ -878,6 +936,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 
 	bio = r1_bio->bios[r1_bio->read_disk];
 
+/*
+	if (r1_bio->sector == 0) printk("First sync write startss\n");
+*/
 	/*
 	 * schedule writes
 	 */
@@ -905,10 +966,12 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 		atomic_inc(&conf->mirrors[i].rdev->nr_pending);
 		atomic_inc(&r1_bio->remaining);
 		md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
+
 		generic_make_request(wbio);
 	}
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
+		/* if we're here, all write(s) have completed, so clean up */
 		md_done_sync(mddev, r1_bio->sectors, 1);
 		put_buf(r1_bio);
 	}
@@ -937,6 +1000,26 @@ static void raid1d(mddev_t *mddev)
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 		spin_lock_irqsave(&conf->device_lock, flags);
+
+		if (conf->pending_bio_list.head) {
+			bio = bio_list_get(&conf->pending_bio_list);
+			blk_remove_plug(mddev->queue);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+			/* flush any pending bitmap writes to disk before proceeding w/ I/O */
+			if (bitmap_unplug(mddev->bitmap) != 0)
+				printk("%s: bitmap file write failed!\n", mdname(mddev));
+
+			while (bio) { /* submit pending writes */
+				struct bio *next = bio->bi_next;
+				bio->bi_next = NULL;
+				generic_make_request(bio);
+				bio = next;
+			}
+			unplug = 1;
+
+			continue;
+		}
+
 		if (list_empty(head))
 			break;
 		r1_bio = list_entry(head->prev, r1bio_t, retry_list);
@@ -1020,17 +1103,43 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	int disk;
 	int i;
 	int write_targets = 0;
+	int sync_blocks;
 
 	if (!conf->r1buf_pool)
+	{
+/*
+		printk("sync start - bitmap %p\n", mddev->bitmap);
+*/
 		if (init_resync(conf))
 			return 0;
+	}
 
 	max_sector = mddev->size << 1;
 	if (sector_nr >= max_sector) {
+		/* If we aborted, we need to abort the
+		 * sync on the 'current' bitmap chunk (there will
+		 * only be one in raid1 resync.
+		 * We can find the current addess in mddev->curr_resync
+		 */
+		if (!conf->fullsync) {
+			if (mddev->curr_resync < max_sector)
+				bitmap_end_sync(mddev->bitmap,
+						mddev->curr_resync,
+						&sync_blocks, 1);
+			bitmap_close_sync(mddev->bitmap);
+		}
+		if (mddev->curr_resync >= max_sector)
+			conf->fullsync = 0;
 		close_sync(conf);
 		return 0;
 	}
 
+	if (!conf->fullsync &&
+	    !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
+		/* We can skip this block, and probably several more */
+		*skipped = 1;
+		return sync_blocks;
+	}
 	/*
 	 * If there is non-resync activity waiting for us then
 	 * put in a delay to throttle resync.
@@ -1069,6 +1178,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
 	r1_bio->mddev = mddev;
 	r1_bio->sector = sector_nr;
+	r1_bio->state = 0;
 	set_bit(R1BIO_IsSync, &r1_bio->state);
 	r1_bio->read_disk = disk;
 
@@ -1103,6 +1213,11 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		bio->bi_private = r1_bio;
 	}
+
+	if (write_targets + 1 < conf->raid_disks)
+		/* array degraded, can't clear bitmap */
+		set_bit(R1BIO_Degraded, &r1_bio->state);
+
 	if (write_targets == 0) {
 		/* There is nowhere to write, so all non-sync
 		 * drives must be failed - so we are finished
@@ -1122,6 +1237,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			len = (max_sector - sector_nr) << 9;
 		if (len == 0)
 			break;
+		if (!conf->fullsync && sync_blocks == 0)
+			if (!bitmap_start_sync(mddev->bitmap,
+					       sector_nr, &sync_blocks))
+				break;
+		if (sync_blocks < (PAGE_SIZE>>9))
+			BUG();
+		if (len > (sync_blocks<<9)) len = sync_blocks<<9;
+
 		for (i=0 ; i < conf->raid_disks; i++) {
 			bio = r1_bio->bios[i];
 			if (bio->bi_end_io) {
@@ -1144,6 +1267,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		}
 		nr_sectors += len>>9;
 		sector_nr += len>>9;
+		sync_blocks -= (len>>9);
 	} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
  bio_full:
 	bio = r1_bio->bios[disk];
@@ -1232,6 +1356,9 @@ static int run(mddev_t *mddev)
 	init_waitqueue_head(&conf->wait_idle);
 	init_waitqueue_head(&conf->wait_resume);
 
+	bio_list_init(&conf->pending_bio_list);
+	bio_list_init(&conf->flushing_bio_list);
+
 	if (!conf->working_disks) {
 		printk(KERN_ERR "raid1: no operational mirrors for %s\n",
 			mdname(mddev));
@@ -1260,16 +1387,15 @@ static int run(mddev_t *mddev)
 	conf->last_used = j;
 
 
-
-	{
-		mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
-		if (!mddev->thread) {
-			printk(KERN_ERR 
-				"raid1: couldn't allocate thread for %s\n", 
-				mdname(mddev));
-			goto out_free_conf;
-		}
+	mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
+	if (!mddev->thread) {
+		printk(KERN_ERR
+		       "raid1: couldn't allocate thread for %s\n",
+		       mdname(mddev));
+		goto out_free_conf;
 	}
+	if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
+
 	printk(KERN_INFO 
 		"raid1: raid set %s active with %d out of %d mirrors\n",
 		mdname(mddev), mddev->raid_disks - mddev->degraded, 
@@ -1394,7 +1520,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-			    conf->resync_lock, unplug_slaves(mddev));
+			    conf->resync_lock, raid1_unplug(mddev->queue));
 	spin_unlock_irq(&conf->resync_lock);
 
 	/* ok, everything is stopped */
-- 
cgit v1.2.1


From ab7a30c7051ee32d0d72415fe0a16d60eba38a0d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:23 -0700
Subject: [PATCH] md: fix bug when raid1 attempts a partial reconstruct.

The logic here is wrong.  if fullsync is 0, it WILL BUG.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0fd4c3bfc851..9d9acc3e51a7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1237,13 +1237,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			len = (max_sector - sector_nr) << 9;
 		if (len == 0)
 			break;
-		if (!conf->fullsync && sync_blocks == 0)
-			if (!bitmap_start_sync(mddev->bitmap,
-					       sector_nr, &sync_blocks))
-				break;
-		if (sync_blocks < (PAGE_SIZE>>9))
-			BUG();
-		if (len > (sync_blocks<<9)) len = sync_blocks<<9;
+		if (!conf->fullsync) {
+			if (sync_blocks == 0) {
+				if (!bitmap_start_sync(mddev->bitmap,
+						       sector_nr, &sync_blocks))
+					break;
+				if (sync_blocks < (PAGE_SIZE>>9))
+					BUG();
+				if (len > (sync_blocks<<9)) len = sync_blocks<<9;
+			}
+		}
 
 		for (i=0 ; i < conf->raid_disks; i++) {
 			bio = r1_bio->bios[i];
-- 
cgit v1.2.1


From 289e99e8ed8f36e386bf7de49947311c17ae1482 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:24 -0700
Subject: [PATCH] md: initialise sync_blocks in raid1 resync

Otherwise it could have a random value and might BUG.  This fixes a BUG
during resync problem in raid1 introduced by the bitmap-based-intent-loggin
patches.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9d9acc3e51a7..c3b4772cfaea 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1230,6 +1230,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	}
 
 	nr_sectors = 0;
+	sync_blocks = 0;
 	do {
 		struct page *page;
 		int len = PAGE_SIZE;
-- 
cgit v1.2.1


From 41158c7eb22312cfaa256744e1553bb4042ff085 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:25 -0700
Subject: [PATCH] md: optimise reconstruction when re-adding a recently failed
 drive.

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c3b4772cfaea..3f5234fe3593 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 {
 	conf_t *conf = mddev->private;
 	int found = 0;
-	int mirror;
+	int mirror = 0;
 	mirror_info_t *p;
 
+	if (rdev->saved_raid_disk >= 0 &&
+	    conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
+		mirror = rdev->saved_raid_disk;
 	for (mirror=0; mirror < mddev->raid_disks; mirror++)
 		if ( !(p=conf->mirrors+mirror)->rdev) {
 
@@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
 			found = 1;
+			if (rdev->saved_raid_disk != mirror)
+				conf->fullsync = 1;
 			p->rdev = rdev;
 			break;
 		}
-- 
cgit v1.2.1


From 3d310eb7b3df1252e8595d059d982b0a9825a137 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:26 -0700
Subject: [PATCH] md: fix deadlock due to md thread processing delayed
 requests.

Before completing a 'write' the md superblock might need to be updated.
This is best done by the md_thread.

The current code schedules this up and queues the write request for later
handling by the md_thread.

However some personalities (Raid5/raid6) will deadlock if the md_thread
tries to submit requests to its own array.

So this patch changes things so the processes submitting the request waits
for the superblock to be written and then submits the request itself.

This fixes a recently-created deadlock in raid5/raid6

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3f5234fe3593..98b09773e79e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -561,8 +561,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
 	 */
-	if (md_write_start(mddev, bio)==0)
-		return 0;
+	md_write_start(mddev, bio); /* wait on superblock update early */
+
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
 	conf->nr_pending++;
-- 
cgit v1.2.1


From 990a8baf568ca1d0ae65e59783ff821794118d07 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Tue, 21 Jun 2005 17:17:30 -0700
Subject: [PATCH] md: remove unneeded NULL checks before kfree

This patch removes some unneeded checks of pointers being NULL before
calling kfree() on them.  kfree() handles NULL pointers just fine, checking
first is pointless.

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'drivers/md/raid1.c')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 98b09773e79e..ff1dbec864af 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1427,10 +1427,8 @@ out_free_conf:
 	if (conf) {
 		if (conf->r1bio_pool)
 			mempool_destroy(conf->r1bio_pool);
-		if (conf->mirrors)
-			kfree(conf->mirrors);
-		if (conf->poolinfo)
-			kfree(conf->poolinfo);
+		kfree(conf->mirrors);
+		kfree(conf->poolinfo);
 		kfree(conf);
 		mddev->private = NULL;
 	}
@@ -1447,10 +1445,8 @@ static int stop(mddev_t *mddev)
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	if (conf->r1bio_pool)
 		mempool_destroy(conf->r1bio_pool);
-	if (conf->mirrors)
-		kfree(conf->mirrors);
-	if (conf->poolinfo)
-		kfree(conf->poolinfo);
+	kfree(conf->mirrors);
+	kfree(conf->poolinfo);
 	kfree(conf);
 	mddev->private = NULL;
 	return 0;
-- 
cgit v1.2.1