From f4370781d83cd2e52eb515e4663155e8091e4d4e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:14 -0700 Subject: [PATCH] md: possible fix for unplug problem I have reports of a problem with raid5 which turns out to be because the raid5 device gets stuck in a 'plugged' state. This shouldn't be able to happen as 3msec after it gets plugged it should get unplugged. However it happens none-the-less. This patch fixes the problem and is a reasonable thing to do, though it might hurt performance slightly in some cases. Until I can find the real problem, we should probably have this workaround in place. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7433871f4b3a..5764387c6989 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -270,7 +270,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), conf->device_lock, - unplug_slaves(conf->mddev) + raid5_unplug_device(conf->mddev->queue) ); conf->inactive_blocked = 0; } else -- cgit v1.2.1 From 31b65a0d3894566191b91fbadd122c354a363b5d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:14 -0700 Subject: [PATCH] md: set desc_nr correctly for version-1 superblocks This has to be done in ->load_super, not ->validate_super Without this, hot-adding devices to an array doesn't always work right - though there is a work around in mdadm-2.5.2 to make this less of an issue. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index e4e161372a3e..9f76e0cd7805 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1062,6 +1062,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (rdev->sb_size & bmask) rdev-> sb_size = (rdev->sb_size | bmask)+1; + if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) + rdev->desc_nr = -1; + else + rdev->desc_nr = le32_to_cpu(sb->dev_number); + if (refdev == 0) ret = 1; else { @@ -1171,7 +1176,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) } if (mddev->level != LEVEL_MULTIPATH) { int role; - rdev->desc_nr = le32_to_cpu(sb->dev_number); role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); switch(role) { case 0xffff: /* spare */ -- cgit v1.2.1 From 0b8c9de05c2a860fe6b02fedcb48763bcee648b3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:15 -0700 Subject: [PATCH] md: delay starting md threads until array is completely setup When an array is started we start one or two threads (two if there is a reshape or recovery that needs to be completed). We currently start these *before* the array is completely set up and in particular before queue->queuedata is set. If the thread actually starts very quickly on another CPU, we can end up dereferencing queue->queuedata and oops. This patch also makes sure we don't try to start a recovery if a reshape is being restarted. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 8 ++++---- drivers/md/raid5.c | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9f76e0cd7805..fb50e5642c63 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3095,7 +3095,6 @@ static int do_md_run(mddev_t * mddev) } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); if (mddev->sb_dirty) md_update_sb(mddev); @@ -3116,7 +3115,7 @@ static int do_md_run(mddev_t * mddev) * start recovery here. If we leave it to md_check_recovery, * it will remove the drives and not do the right thing */ - if (mddev->degraded) { + if (mddev->degraded && !mddev->sync_thread) { struct list_head *rtmp; int spares = 0; ITERATE_RDEV(mddev,rdev,rtmp) @@ -3137,10 +3136,11 @@ static int do_md_run(mddev_t * mddev) mdname(mddev)); /* leave the spares where they are, it shouldn't hurt */ mddev->recovery = 0; - } else - md_wakeup_thread(mddev->sync_thread); + } } } + md_wakeup_thread(mddev->thread); + md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ mddev->changed = 1; md_new_event(mddev); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5764387c6989..a02f35f1a796 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3246,9 +3246,6 @@ static int run(mddev_t *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, "%s_reshape"); - /* FIXME if md_register_thread fails?? */ - md_wakeup_thread(mddev->sync_thread); - } /* read-ahead size must cover two whole stripes, which is -- cgit v1.2.1 From ff4e8d9a9f46e3a7f89d14ade52fe5d53a82c022 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:16 -0700 Subject: [PATCH] md: fix resync speed calculation for restarted resyncs We introduced 'io_sectors' recently so we could count the sectors that causes io during resync separate from sectors which didn't cause IO - there can be a difference if a bitmap is being used to accelerate resync. However when a speed is reported, we find the number of sectors processed recently by subtracting an oldish io_sectors count from a current 'curr_resync' count. This is wrong because curr_resync counts all sectors, not just io sectors. So, add a field to mddev to store the curren io_sectors separately from curr_resync, and use that in the calculations. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 10 ++++++---- drivers/md/raid5.c | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index fb50e5642c63..4bd3ccf363bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2719,7 +2719,7 @@ static ssize_t sync_speed_show(mddev_t *mddev, char *page) { unsigned long resync, dt, db; - resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)); dt = ((jiffies - mddev->resync_mark) / HZ); if (!dt) dt++; db = resync - (mddev->resync_mark_cnt); @@ -4687,12 +4687,13 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) */ dt = ((jiffies - mddev->resync_mark) / HZ); if (!dt) dt++; - db = resync - (mddev->resync_mark_cnt/2); - rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; + db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) + - mddev->resync_mark_cnt; + rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100; seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); - seq_printf(seq, " speed=%ldK/sec", db/dt); + seq_printf(seq, " speed=%ldK/sec", db/2/dt); } static void *md_seq_start(struct seq_file *seq, loff_t *pos) @@ -5203,6 +5204,7 @@ void md_do_sync(mddev_t *mddev) j += sectors; if (j>1) mddev->curr_resync = j; + mddev->curr_mark_cnt = io_sectors; if (last_check == 0) /* this is the earliers that rebuilt will be * visible in /proc/mdstat diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a02f35f1a796..dd0d00108a31 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -281,7 +281,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); - if (list_empty(&sh->lru)) + if (list_empty(&sh->lru) && + !test_bit(STRIPE_EXPANDING, &sh->state)) BUG(); list_del_init(&sh->lru); } -- cgit v1.2.1 From 7c785b7a18dc30572a49c6b75efd384269735d14 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:16 -0700 Subject: [PATCH] md: fix a plug/unplug race in raid5 When a device is unplugged, requests are moved from one or two (depending on whether a bitmap is in use) queues to the main request queue. So whenever requests are put on either of those queues, we should make sure the raid5 array is 'plugged'. However we don't. We currently plug the raid5 queue just before putting requests on queues, so there is room for a race. If something unplugs the queue at just the wrong time, requests will be left on the queue and nothing will want to unplug them. Normally something else will plug and unplug the queue fairly soon, but there is a risk that nothing will. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dd0d00108a31..6ba394082129 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -88,12 +88,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) BUG_ON(!list_empty(&sh->lru)); BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { - if (test_bit(STRIPE_DELAYED, &sh->state)) + if (test_bit(STRIPE_DELAYED, &sh->state)) { list_add_tail(&sh->lru, &conf->delayed_list); - else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && - conf->seq_write == sh->bm_seq) + blk_plug_device(conf->mddev->queue); + } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && + conf->seq_write == sh->bm_seq) { list_add_tail(&sh->lru, &conf->bitmap_list); - else { + blk_plug_device(conf->mddev->queue); + } else { clear_bit(STRIPE_BIT_DELAY, &sh->state); list_add_tail(&sh->lru, &conf->handle_list); } @@ -2555,13 +2557,6 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } -static inline void raid5_plug_device(raid5_conf_t *conf) -{ - spin_lock_irq(&conf->device_lock); - blk_plug_device(conf->mddev->queue); - spin_unlock_irq(&conf->device_lock); -} - static int make_request(request_queue_t *q, struct bio * bi) { mddev_t *mddev = q->queuedata; @@ -2671,7 +2666,6 @@ static int make_request(request_queue_t *q, struct bio * bi) goto retry; } finish_wait(&conf->wait_for_overlap, &w); - raid5_plug_device(conf); handle_stripe(sh, NULL); release_stripe(sh); } else { -- cgit v1.2.1 From ae3c20ccf84c88d45616f12122f781a900118f09 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:17 -0700 Subject: [PATCH] md: fix some small races in bitmap plugging in raid5 The comment gives more details, but I didn't quite have the sequencing write, so there was room for races to leave bits unset in the on-disk bitmap for short periods of time. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6ba394082129..56303ff31730 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -18,6 +18,30 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* + * BITMAP UNPLUGGING: + * + * The sequencing for updating the bitmap reliably is a little + * subtle (and I got it wrong the first time) so it deserves some + * explanation. + * + * We group bitmap updates into batches. Each batch has a number. + * We may write out several batches at once, but that isn't very important. + * conf->bm_write is the number of the last batch successfully written. + * conf->bm_flush is the number of the last batch that was closed to + * new additions. + * When we discover that we will need to write to any block in a stripe + * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq + * the number of the batch it will be in. This is bm_flush+1. + * When we are ready to do a write, if that batch hasn't been written yet, + * we plug the array and queue the stripe for later. + * When an unplug happens, we increment bm_flush, thus closing the current + * batch. + * When we notice that bm_flush > bm_write, we write out all pending updates + * to the bitmap, and advance bm_write to where bm_flush was. + * This may occasionally write a bit out twice, but is sure never to + * miss any bits. + */ #include #include @@ -92,7 +116,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) list_add_tail(&sh->lru, &conf->delayed_list); blk_plug_device(conf->mddev->queue); } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && - conf->seq_write == sh->bm_seq) { + sh->bm_seq - conf->seq_write > 0) { list_add_tail(&sh->lru, &conf->bitmap_list); blk_plug_device(conf->mddev->queue); } else { @@ -1273,9 +1297,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in (unsigned long long)sh->sector, dd_idx); if (conf->mddev->bitmap && firstwrite) { - sh->bm_seq = conf->seq_write; bitmap_startwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0); + sh->bm_seq = conf->seq_flush+1; set_bit(STRIPE_BIT_DELAY, &sh->state); } @@ -2918,7 +2942,7 @@ static void raid5d (mddev_t *mddev) while (1) { struct list_head *first; - if (conf->seq_flush - conf->seq_write > 0) { + if (conf->seq_flush != conf->seq_write) { int seq = conf->seq_flush; spin_unlock_irq(&conf->device_lock); bitmap_unplug(mddev->bitmap); -- cgit v1.2.1 From 5e3db645f890660ce8774a18bcd418570298937e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:18 -0700 Subject: [PATCH] md: fix usage of wrong variable in raid1 Though it rarely matters, we should be using 's' rather than r1_bio->sector here. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cead918578a7..5a479d692fac 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1145,7 +1145,7 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) long sectors_to_go = r1_bio->sectors; /* make sure these bits doesn't get cleared. */ do { - bitmap_end_sync(mddev->bitmap, r1_bio->sector, + bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); s += sync_blocks; sectors_to_go -= sync_blocks; -- cgit v1.2.1 From 80ca3a44f563a763fa872390dcb393f2d82027bf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:18 -0700 Subject: [PATCH] md: unify usage of symbolic names for perms Some places we use number (0660) someplaces names (S_IRUGO). Change all numbers to be names, and change 0655 to be what it should be. Also make some formatting more consistent. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 56 +++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 4bd3ccf363bd..c0da5eedc245 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -110,7 +110,7 @@ static ctl_table raid_table[] = { .procname = "speed_limit_min", .data = &sysctl_speed_limit_min, .maxlen = sizeof(int), - .mode = 0644, + .mode = S_IRUGO|S_IWUSR, .proc_handler = &proc_dointvec, }, { @@ -118,7 +118,7 @@ static ctl_table raid_table[] = { .procname = "speed_limit_max", .data = &sysctl_speed_limit_max, .maxlen = sizeof(int), - .mode = 0644, + .mode = S_IRUGO|S_IWUSR, .proc_handler = &proc_dointvec, }, { .ctl_name = 0 } @@ -129,7 +129,7 @@ static ctl_table raid_dir_table[] = { .ctl_name = DEV_RAID, .procname = "raid", .maxlen = 0, - .mode = 0555, + .mode = S_IRUGO|S_IXUGO, .child = raid_table, }, { .ctl_name = 0 } @@ -1783,8 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) } return err ? err : len; } -static struct rdev_sysfs_entry -rdev_state = __ATTR(state, 0644, state_show, state_store); +static struct rdev_sysfs_entry rdev_state = +__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store); static ssize_t super_show(mdk_rdev_t *rdev, char *page) @@ -1815,7 +1815,7 @@ errors_store(mdk_rdev_t *rdev, const char *buf, size_t len) return -EINVAL; } static struct rdev_sysfs_entry rdev_errors = -__ATTR(errors, 0644, errors_show, errors_store); +__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store); static ssize_t slot_show(mdk_rdev_t *rdev, char *page) @@ -1849,7 +1849,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) static struct rdev_sysfs_entry rdev_slot = -__ATTR(slot, 0644, slot_show, slot_store); +__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store); static ssize_t offset_show(mdk_rdev_t *rdev, char *page) @@ -1871,7 +1871,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) } static struct rdev_sysfs_entry rdev_offset = -__ATTR(offset, 0644, offset_show, offset_store); +__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); static ssize_t rdev_size_show(mdk_rdev_t *rdev, char *page) @@ -1895,7 +1895,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) } static struct rdev_sysfs_entry rdev_size = -__ATTR(size, 0644, rdev_size_show, rdev_size_store); +__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store); static struct attribute *rdev_default_attrs[] = { &rdev_state.attr, @@ -2132,7 +2132,7 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) return len; } static struct md_sysfs_entry md_safe_delay = -__ATTR(safe_mode_delay, 0644,safe_delay_show, safe_delay_store); +__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store); static ssize_t level_show(mddev_t *mddev, char *page) @@ -2167,7 +2167,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) } static struct md_sysfs_entry md_level = -__ATTR(level, 0644, level_show, level_store); +__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store); static ssize_t @@ -2192,7 +2192,7 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) return len; } static struct md_sysfs_entry md_layout = -__ATTR(layout, 0655, layout_show, layout_store); +__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); static ssize_t @@ -2223,7 +2223,7 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len) return rv ? rv : len; } static struct md_sysfs_entry md_raid_disks = -__ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store); +__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); static ssize_t chunk_size_show(mddev_t *mddev, char *page) @@ -2247,7 +2247,7 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) return len; } static struct md_sysfs_entry md_chunk_size = -__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); +__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); static ssize_t resync_start_show(mddev_t *mddev, char *page) @@ -2271,7 +2271,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len) return len; } static struct md_sysfs_entry md_resync_start = -__ATTR(resync_start, 0644, resync_start_show, resync_start_store); +__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); /* * The array state can be: @@ -2441,7 +2441,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) else return len; } -static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store); +static struct md_sysfs_entry md_array_state = +__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); static ssize_t null_show(mddev_t *mddev, char *page) @@ -2501,7 +2502,7 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len) } static struct md_sysfs_entry md_new_device = -__ATTR(new_dev, 0200, null_show, new_dev_store); +__ATTR(new_dev, S_IWUSR, null_show, new_dev_store); static ssize_t size_show(mddev_t *mddev, char *page) @@ -2539,7 +2540,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len) } static struct md_sysfs_entry md_size = -__ATTR(component_size, 0644, size_show, size_store); +__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); /* Metdata version. @@ -2587,7 +2588,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) } static struct md_sysfs_entry md_metadata = -__ATTR(metadata_version, 0644, metadata_show, metadata_store); +__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); static ssize_t action_show(mddev_t *mddev, char *page) @@ -2655,12 +2656,11 @@ mismatch_cnt_show(mddev_t *mddev, char *page) (unsigned long long) mddev->resync_mismatches); } -static struct md_sysfs_entry -md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); +static struct md_sysfs_entry md_scan_mode = +__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); -static struct md_sysfs_entry -md_mismatches = __ATTR_RO(mismatch_cnt); +static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); static ssize_t sync_min_show(mddev_t *mddev, char *page) @@ -2726,8 +2726,7 @@ sync_speed_show(mddev_t *mddev, char *page) return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ } -static struct md_sysfs_entry -md_sync_speed = __ATTR_RO(sync_speed); +static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); static ssize_t sync_completed_show(mddev_t *mddev, char *page) @@ -2743,8 +2742,7 @@ sync_completed_show(mddev_t *mddev, char *page) return sprintf(page, "%lu / %lu\n", resync, max_blocks); } -static struct md_sysfs_entry -md_sync_completed = __ATTR_RO(sync_completed); +static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); static ssize_t suspend_lo_show(mddev_t *mddev, char *page) @@ -5651,8 +5649,8 @@ static int set_ro(const char *val, struct kernel_param *kp) return -EINVAL; } -module_param_call(start_ro, set_ro, get_ro, NULL, 0600); -module_param(start_dirty_degraded, int, 0644); +module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); +module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); EXPORT_SYMBOL(register_md_personality); -- cgit v1.2.1 From 67463acb646904d76a8e237cc31eaa87872f30cc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:19 -0700 Subject: [PATCH] md: require CAP_SYS_ADMIN for (re-)configuring md devices via sysfs The ioctl requires CAP_SYS_ADMIN, so sysfs should too. Note that we don't require CAP_SYS_ADMIN for reading attributes even though the ioctl does. There is no reason to limit the read access, and much of the information is already available via /proc/mdstat Cc: Chris Wright Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index c0da5eedc245..a5286beddcf5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1926,6 +1926,8 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, if (!entry->store) return -EIO; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return entry->store(rdev, page, length); } @@ -2859,6 +2861,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, if (!entry->store) return -EIO; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; rv = mddev_lock(mddev); if (!rv) { rv = entry->store(mddev, page, length); -- cgit v1.2.1 From d69504325978c461b51b03cca49626026970307b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Jul 2006 04:44:20 -0700 Subject: [PATCH] md: include sector number in messages about corrected read errors This is generally useful, but particularly helps see if it is the same sector that always needs correcting, or different ones. [akpm@osdl.org: fix printk warnings] Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid1.c | 3 +++ drivers/md/raid10.c | 4 ++++ drivers/md/raid5.c | 30 +++++++++++++++++++++++------- 3 files changed, 30 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5a479d692fac..1efe22a2d041 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1509,6 +1509,9 @@ static void raid1d(mddev_t *mddev) s<<9, conf->tmppage, READ) == 0) /* Well, this device is dead */ md_error(mddev, rdev); + else + printk(KERN_INFO "raid1:%s: read error corrected (%d sectors at %llu on %s)\n", + mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b)); } } } else { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7f636283a1ba..016ddb831c9b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1492,6 +1492,10 @@ static void raid10d(mddev_t *mddev) s<<9, conf->tmppage, READ) == 0) /* Well, this device is dead */ md_error(mddev, rdev); + else + printk(KERN_INFO "raid10:%s: read error corrected (%d sectors at %llu on %s)\n", + mdname(mddev), s, (unsigned long long)(sect+rdev->data_offset), bdevname(rdev->bdev, b)); + rdev_dec_pending(rdev, mddev); rcu_read_lock(); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 56303ff31730..450066007160 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -523,6 +523,8 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); + char b[BDEVNAME_SIZE]; + mdk_rdev_t *rdev; if (bi->bi_size) return 1; @@ -570,25 +572,39 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, set_bit(R5_UPTODATE, &sh->dev[i].flags); #endif if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - printk(KERN_INFO "raid5: read error corrected!!\n"); + rdev = conf->disks[i].rdev; + printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", + mdname(conf->mddev), STRIPE_SECTORS, + (unsigned long long)sh->sector + rdev->data_offset, + bdevname(rdev->bdev, b)); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } if (atomic_read(&conf->disks[i].rdev->read_errors)) atomic_set(&conf->disks[i].rdev->read_errors, 0); } else { + const char *bdn = bdevname(conf->disks[i].rdev->bdev, b); int retry = 0; + rdev = conf->disks[i].rdev; + clear_bit(R5_UPTODATE, &sh->dev[i].flags); - atomic_inc(&conf->disks[i].rdev->read_errors); + atomic_inc(&rdev->read_errors); if (conf->mddev->degraded) - printk(KERN_WARNING "raid5: read error not correctable.\n"); + printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)sh->sector + rdev->data_offset, + bdn); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - printk(KERN_WARNING "raid5: read error NOT corrected!!\n"); - else if (atomic_read(&conf->disks[i].rdev->read_errors) + printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)sh->sector + rdev->data_offset, + bdn); + else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING - "raid5: Too many read errors, failing device.\n"); + "raid5:%s: Too many read errors, failing device %s.\n", + mdname(conf->mddev), bdn); else retry = 1; if (retry) @@ -596,7 +612,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, else { clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); - md_error(conf->mddev, conf->disks[i].rdev); + md_error(conf->mddev, rdev); } } rdev_dec_pending(conf->disks[i].rdev, conf->mddev); -- cgit v1.2.1 From d0a0a5ee7a0094231a11cfe3f86d2d8f5f994e01 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 10 Jul 2006 04:44:20 -0700 Subject: [PATCH] md: fix oops in error-handling During early MD setup (superblock reading), we don't have a personality yet. But the error-handling code tries to dereference mddev->pers. Fix. Acked-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index a5286beddcf5..b6d16022a53e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4592,6 +4592,8 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) __builtin_return_address(0),__builtin_return_address(1), __builtin_return_address(2),__builtin_return_address(3)); */ + if (!mddev->pers) + return; if (!mddev->pers->error_handler) return; mddev->pers->error_handler(mddev,rdev); -- cgit v1.2.1