From 4706b349f4a8312d31b3d0cf61fe721699356920 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Feb 2009 15:06:47 +1100 Subject: [PATCH 1/3] md: Allow read error in a single drive raid1 to be passed up. If a raid1 only has a single working device and gets a read error, we choose to simply return that error up to the filesystem (or whatever) rather than failing the whole array. However the codes doesn't quite do that. We attempt a readbalance which allocates the same drive, so we retry the read - indefinitely. Instead: If read_balance in the error case chooses the same drive that just failed, treat it as a failure and don't retry. Signed-off-by: NeilBrown --- drivers/md/raid1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7b4f5f7155d..01e3cffd03b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1640,7 +1640,8 @@ static void raid1d(mddev_t *mddev) } bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1) { + if ((disk=read_balance(conf, r1_bio)) == -1 || + disk == r1_bio->read_disk) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), From 852c8bf484a0e17ee27f413ef26e87f522af5607 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 6 Feb 2009 15:10:52 +1100 Subject: [PATCH 2/3] md: Fix a bug in linear.c causing which_dev() to return the wrong device. ab5bd5cbc8d4b868378d062eed3d4240930fbb86 introduced the following bug in linear software raid for large arrays on 32 bit machines: which_dev() computes the device holding a given sector by shifting down the sector number to a 32 bit range, dividing by the array spacing and looking up the resulting index in the hash table of the array. Because the computed index might be slightly too small, a loop at the end of which_dev() increases the index until the given sector actually falls into the range of the device associated with that index. The changes of the above mentioned commit caused this loop to check whether the _index_ rather than the sector number is small enough, effectively bypassing the loop and thus possibly returning the wrong device. As reported by Simon Kirby, this leads to errors such as linear_make_request: Sector 2340486136 out of bounds on dev sdi: 156301312 sectors, offset 2109870464 Fix this bug by introducing a local variable for the index so that the variable containing the passed sector is left unchanged. Cc: stable@kernel.org Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 1e3aea9eecf..09658b21847 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -25,13 +25,13 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { dev_info_t *hash; linear_conf_t *conf = mddev_to_conf(mddev); + sector_t idx = sector >> conf->sector_shift; /* * sector_div(a,b) returns the remainer and sets a to a/b */ - sector >>= conf->sector_shift; - (void)sector_div(sector, conf->spacing); - hash = conf->hash_table[sector]; + (void)sector_div(idx, conf->spacing); + hash = conf->hash_table[idx]; while (sector >= hash->num_sectors + hash->start_sector) hash++; From de01dfadf25bf83cfe3d85c163005c4320532658 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Feb 2009 18:02:46 +1100 Subject: [PATCH 3/3] md: Ensure an md array never has too many devices. Each different metadata format supported by md supports a different maximum number of devices. We really should be enforcing this maximum in the kernel, but we aren't quite doing that properly. We currently only enforce it at the 'hot_add' point, which is an older interface which is not used by current userspace. We need to also enforce it at 'add_new_disk' time for active arrays and at 'do_md_run' time when starting a new array. So move the test from 'hot_add' into 'bind_rdev_to_array' which is called from both 'hot_add' and 'add_new_disk, and add a new test in 'analyse_sbs' which is called from 'do_md_run'. This bug (or missing feature) has been around "forever" and so the patch is suitable for any -stable that is currently maintained. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 41e2509bf89..4495104f6c9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1481,6 +1481,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if (find_rdev_nr(mddev, rdev->desc_nr)) return -EBUSY; } + if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) { + printk(KERN_WARNING "md: %s: array is limited to %d devices\n", + mdname(mddev), mddev->max_disks); + return -EBUSY; + } bdevname(rdev->bdev,b); while ( (s=strchr(b, '/')) != NULL) *s = '!'; @@ -2441,6 +2446,15 @@ static void analyze_sbs(mddev_t * mddev) i = 0; rdev_for_each(rdev, tmp, mddev) { + if (rdev->desc_nr >= mddev->max_disks || + i > mddev->max_disks) { + printk(KERN_WARNING + "md: %s: %s: only %d devices permitted\n", + mdname(mddev), bdevname(rdev->bdev, b), + mddev->max_disks); + kick_rdev_from_array(rdev); + continue; + } if (rdev != freshest) if (super_types[mddev->major_version]. validate_super(mddev, rdev)) { @@ -4614,13 +4628,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) * noticed in interrupt contexts ... */ - if (rdev->desc_nr == mddev->max_disks) { - printk(KERN_WARNING "%s: can not hot-add to full array!\n", - mdname(mddev)); - err = -EBUSY; - goto abort_unbind_export; - } - rdev->raid_disk = -1; md_update_sb(mddev, 1); @@ -4634,9 +4641,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) md_new_event(mddev); return 0; -abort_unbind_export: - unbind_rdev_from_array(rdev); - abort_export: export_rdev(rdev); return err;