md/raid5: don't include 'spare' drives when reshaping to fewer devices.

[sfrench/cifs-2.6.git] / drivers / md / raid10.c
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 57d71d5d88f45d842752fe195f3e566faa65e9e3..42e64e4e5e2503fb4b81c12ac6706932f4397ba4 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,6 +18,7 @@
   * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   */
  
+#include <linux/slab.h>
  #include <linux/delay.h>
  #include <linux/blkdev.h>
  #include <linux/seq_file.h>
@@ -285,7 +286,8 @@ static void raid10_end_read_request(struct bio *bio, int error)
                  */
                 char b[BDEVNAME_SIZE];
                 if (printk_ratelimit())
-                       printk(KERN_ERR "raid10: %s: rescheduling sector %llu\n",
+                       printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n",
+                              mdname(conf->mddev),
                                bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
                 reschedule_retry(r10_bio);
         }
@@ -494,7 +496,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
   */
  static int read_balance(conf_t *conf, r10bio_t *r10_bio)
  {
-       const unsigned long this_sector = r10_bio->sector;
+       const sector_t this_sector = r10_bio->sector;
         int disk, slot, nslot;
         const int sectors = r10_bio->sectors;
         sector_t new_distance, current_distance;
@@ -788,14 +790,12 @@ static void unfreeze_array(conf_t *conf)
         spin_unlock_irq(&conf->resync_lock);
  }
  
-static int make_request(struct request_queue *q, struct bio * bio)
+static int make_request(mddev_t *mddev, struct bio * bio)
  {
-       mddev_t *mddev = q->queuedata;
         conf_t *conf = mddev->private;
         mirror_info_t *mirror;
         r10bio_t *r10_bio;
         struct bio *read_bio;
-       int cpu;
         int i;
         int chunk_sects = conf->chunk_mask + 1;
         const int rw = bio_data_dir(bio);
@@ -825,16 +825,16 @@ static int make_request(struct request_queue *q, struct bio * bio)
                  */
                 bp = bio_split(bio,
                                chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
-               if (make_request(q, &bp->bio1))
+               if (make_request(mddev, &bp->bio1))
                         generic_make_request(&bp->bio1);
-               if (make_request(q, &bp->bio2))
+               if (make_request(mddev, &bp->bio2))
                         generic_make_request(&bp->bio2);
  
                 bio_pair_release(bp);
                 return 0;
         bad_map:
-               printk("raid10_make_request bug: can't convert block across chunks"
-                      " or bigger than %dk %llu %d\n", chunk_sects/2,
+               printk("md/raid10:%s: make_request bug: can't convert block across chunks"
+                      " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
                        (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
  
                 bio_io_error(bio);
@@ -850,12 +850,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
          */
         wait_barrier(conf);
  
-       cpu = part_stat_lock();
-       part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-       part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
-                     bio_sectors(bio));
-       part_stat_unlock();
-
         r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
  
         r10_bio->master_bio = bio;
@@ -1039,9 +1033,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
         }
         set_bit(Faulty, &rdev->flags);
         set_bit(MD_CHANGE_DEVS, &mddev->flags);
-       printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n"
-               "raid10: Operation continuing on %d devices.\n",
-               bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
+       printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n"
+              KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n",
+              mdname(mddev), bdevname(rdev->bdev, b),
+              mdname(mddev), conf->raid_disks - mddev->degraded);
  }
  
  static void print_conf(conf_t *conf)
@@ -1049,19 +1044,19 @@ static void print_conf(conf_t *conf)
         int i;
         mirror_info_t *tmp;
  
-       printk("RAID10 conf printout:\n");
+       printk(KERN_DEBUG "RAID10 conf printout:\n");
         if (!conf) {
-               printk("(!conf)\n");
+               printk(KERN_DEBUG "(!conf)\n");
                 return;
         }
-       printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
+       printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
                 conf->raid_disks);
  
         for (i = 0; i < conf->raid_disks; i++) {
                 char b[BDEVNAME_SIZE];
                 tmp = conf->mirrors + i;
                 if (tmp->rdev)
-                       printk(" disk %d, wo:%d, o:%d, dev:%s\n",
+                       printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
                                 i, !test_bit(In_sync, &tmp->rdev->flags),
                                 !test_bit(Faulty, &tmp->rdev->flags),
                                 bdevname(tmp->rdev->bdev,b));
@@ -1487,14 +1482,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
         int sectors = r10_bio->sectors;
         mdk_rdev_t*rdev;
         int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
+       int d = r10_bio->devs[r10_bio->read_slot].devnum;
  
         rcu_read_lock();
-       {
-               int d = r10_bio->devs[r10_bio->read_slot].devnum;
+       rdev = rcu_dereference(conf->mirrors[d].rdev);
+       if (rdev) { /* If rdev is not NULL */
                 char b[BDEVNAME_SIZE];
                 int cur_read_error_count = 0;
  
-               rdev = rcu_dereference(conf->mirrors[d].rdev);
                 bdevname(rdev->bdev, b);
  
                 if (test_bit(Faulty, &rdev->flags)) {
@@ -1510,13 +1505,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                 if (cur_read_error_count > max_read_errors) {
                         rcu_read_unlock();
                         printk(KERN_NOTICE
-                              "raid10: %s: Raid device exceeded "
+                              "md/raid10:%s: %s: Raid device exceeded "
                                "read_error threshold "
                                "[cur %d:max %d]\n",
+                              mdname(mddev),
                                b, cur_read_error_count, max_read_errors);
                         printk(KERN_NOTICE
-                              "raid10: %s: Failing raid "
-                              "device\n", b);
+                              "md/raid10:%s: %s: Failing raid "
+                              "device\n", mdname(mddev), b);
                         md_error(mddev, conf->mirrors[d].rdev);
                         return;
                 }
@@ -1534,7 +1530,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
  
                 rcu_read_lock();
                 do {
-                       int d = r10_bio->devs[sl].devnum;
+                       d = r10_bio->devs[sl].devnum;
                         rdev = rcu_dereference(conf->mirrors[d].rdev);
                         if (rdev &&
                             test_bit(In_sync, &rdev->flags)) {
@@ -1568,7 +1564,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                 rcu_read_lock();
                 while (sl != r10_bio->read_slot) {
                         char b[BDEVNAME_SIZE];
-                       int d;
+
                         if (sl==0)
                                 sl = conf->copies;
                         sl--;
@@ -1586,15 +1582,16 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                                     == 0) {
                                         /* Well, this device is dead */
                                         printk(KERN_NOTICE
-                                              "raid10:%s: read correction "
+                                              "md/raid10:%s: read correction "
                                                "write failed"
                                                " (%d sectors at %llu on %s)\n",
                                                mdname(mddev), s,
                                                (unsigned long long)(sect+
                                                rdev->data_offset),
                                                bdevname(rdev->bdev, b));
-                                       printk(KERN_NOTICE "raid10:%s: failing "
+                                       printk(KERN_NOTICE "md/raid10:%s: %s: failing "
                                                "drive\n",
+                                              mdname(mddev),
                                                bdevname(rdev->bdev, b));
                                         md_error(mddev, rdev);
                                 }
@@ -1604,7 +1601,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                 }
                 sl = start;
                 while (sl != r10_bio->read_slot) {
-                       int d;
+
                         if (sl==0)
                                 sl = conf->copies;
                         sl--;
@@ -1622,20 +1619,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                                                  READ) == 0) {
                                         /* Well, this device is dead */
                                         printk(KERN_NOTICE
-                                              "raid10:%s: unable to read back "
+                                              "md/raid10:%s: unable to read back "
                                                "corrected sectors"
                                                " (%d sectors at %llu on %s)\n",
                                                mdname(mddev), s,
                                                (unsigned long long)(sect+
                                                     rdev->data_offset),
                                                bdevname(rdev->bdev, b));
-                                       printk(KERN_NOTICE "raid10:%s: failing drive\n",
+                                       printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
+                                              mdname(mddev),
                                                bdevname(rdev->bdev, b));
  
                                         md_error(mddev, rdev);
                                 } else {
                                         printk(KERN_INFO
-                                              "raid10:%s: read error corrected"
+                                              "md/raid10:%s: read error corrected"
                                                " (%d sectors at %llu on %s)\n",
                                                mdname(mddev), s,
                                                (unsigned long long)(sect+
@@ -1710,8 +1708,9 @@ static void raid10d(mddev_t *mddev)
                                 mddev->ro ? IO_BLOCKED : NULL;
                         mirror = read_balance(conf, r10_bio);
                         if (mirror == -1) {
-                               printk(KERN_ALERT "raid10: %s: unrecoverable I/O"
+                               printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
                                        " read error for block %llu\n",
+                                      mdname(mddev),
                                        bdevname(bio->bi_bdev,b),
                                        (unsigned long long)r10_bio->sector);
                                 raid_end_bio_io(r10_bio);
@@ -1721,8 +1720,9 @@ static void raid10d(mddev_t *mddev)
                                 bio_put(bio);
                                 rdev = conf->mirrors[mirror].rdev;
                                 if (printk_ratelimit())
-                                       printk(KERN_ERR "raid10: %s: redirecting sector %llu to"
+                                       printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
                                                " another mirror\n",
+                                              mdname(mddev),
                                                bdevname(rdev->bdev,b),
                                                (unsigned long long)r10_bio->sector);
                                 bio = bio_clone(r10_bio->master_bio, GFP_NOIO);
@@ -1980,7 +1980,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                                         r10_bio = rb2;
                                         if (!test_and_set_bit(MD_RECOVERY_INTR,
                                                               &mddev->recovery))
-                                               printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
+                                               printk(KERN_INFO "md/raid10:%s: insufficient "
+                                                      "working devices for recovery.\n",
                                                        mdname(mddev));
                                         break;
                                 }
@@ -2160,21 +2161,22 @@ static conf_t *setup_conf(mddev_t *mddev)
         sector_t stride, size;
         int err = -EINVAL;
  
-       if (mddev->chunk_sectors < (PAGE_SIZE >> 9) ||
-           !is_power_of_2(mddev->chunk_sectors)) {
-               printk(KERN_ERR "md/raid10: chunk size must be "
-                      "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE);
+       if (mddev->new_chunk_sectors < (PAGE_SIZE >> 9) ||
+           !is_power_of_2(mddev->new_chunk_sectors)) {
+               printk(KERN_ERR "md/raid10:%s: chunk size must be "
+                      "at least PAGE_SIZE(%ld) and be a power of 2.\n",
+                      mdname(mddev), PAGE_SIZE);
                 goto out;
         }
  
-       nc = mddev->layout & 255;
-       fc = (mddev->layout >> 8) & 255;
-       fo = mddev->layout & (1<<16);
+       nc = mddev->new_layout & 255;
+       fc = (mddev->new_layout >> 8) & 255;
+       fo = mddev->new_layout & (1<<16);
  
         if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
-           (mddev->layout >> 17)) {
-               printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
-                      mdname(mddev), mddev->layout);
+           (mddev->new_layout >> 17)) {
+               printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
+                      mdname(mddev), mddev->new_layout);
                 goto out;
         }
  
@@ -2239,12 +2241,11 @@ static conf_t *setup_conf(mddev_t *mddev)
         if (!conf->thread)
                 goto out;
  
-       conf->scale_disks = 0;
         conf->mddev = mddev;
         return conf;
  
   out:
-       printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
+       printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
                mdname(mddev));
         if (conf) {
                 if (conf->r10bio_pool)
@@ -2298,11 +2299,6 @@ static int run(mddev_t *mddev)
                 if (disk_idx >= conf->raid_disks
                     || disk_idx < 0)
                         continue;
-               if (conf->scale_disks) {
-                       disk_idx *= conf->scale_disks;
-                       rdev->raid_disk = disk_idx;
-                       /* MOVE 'rd%d' link !! */
-               }
                 disk = conf->mirrors + disk_idx;
  
                 disk->rdev = rdev;
@@ -2322,7 +2318,7 @@ static int run(mddev_t *mddev)
         }
         /* need to check that every block has at least one working mirror */
         if (!enough(conf)) {
-               printk(KERN_ERR "raid10: not enough operational mirrors for %s\n",
+               printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
                        mdname(mddev));
                 goto out_free_conf;
         }
@@ -2342,11 +2338,11 @@ static int run(mddev_t *mddev)
         }
  
         if (mddev->recovery_cp != MaxSector)
-               printk(KERN_NOTICE "raid10: %s is not clean"
+               printk(KERN_NOTICE "md/raid10:%s: not clean"
                        " -- starting background reconstruction\n",
                        mdname(mddev));
         printk(KERN_INFO
-               "raid10: raid set %s active with %d out of %d devices\n",
+               "md/raid10:%s: active with %d out of %d devices\n",
                 mdname(mddev), conf->raid_disks - mddev->degraded,
                 conf->raid_disks);
         /*
@@ -2428,30 +2424,27 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
         conf_t *conf;
  
         if (mddev->degraded > 0) {
-               printk(KERN_ERR "error: degraded raid0!\n");
+               printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n",
+                      mdname(mddev));
                 return ERR_PTR(-EINVAL);
         }
  
-       /* Update slot numbers to obtain
-        * degraded raid10 with missing mirrors
-        */
-       list_for_each_entry(rdev, &mddev->disks, same_set) {
-               rdev->raid_disk *= 2;
-       }
-
         /* Set new parameters */
         mddev->new_level = 10;
         /* new layout: far_copies = 1, near_copies = 2 */
         mddev->new_layout = (1<<8) + 2;
         mddev->new_chunk_sectors = mddev->chunk_sectors;
         mddev->delta_disks = mddev->raid_disks;
-       mddev->degraded = mddev->raid_disks;
         mddev->raid_disks *= 2;
         /* make sure it will be not marked as dirty */
         mddev->recovery_cp = MaxSector;
  
         conf = setup_conf(mddev);
-       conf->scale_disks = 2;
+       if (!IS_ERR(conf))
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       if (rdev->raid_disk >= 0)
+                               rdev->new_raid_disk = rdev->raid_disk * 2;
+               
         return conf;
  }
  
@@ -2466,7 +2459,9 @@ static void *raid10_takeover(mddev_t *mddev)
                 /* for raid0 takeover only one zone is supported */
                 raid0_priv = mddev->private;
                 if (raid0_priv->nr_strip_zones > 1) {
-                       printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n");
+                       printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
+                              " with more than one zone.\n",
+                              mdname(mddev));
                         return ERR_PTR(-EINVAL);
                 }
                 return raid10_takeover_raid0(mddev);