md/raid5: don't include 'spare' drives when reshaping to fewer devices.
[sfrench/cifs-2.6.git] / drivers / md / raid10.c
index 57d71d5d88f45d842752fe195f3e566faa65e9e3..42e64e4e5e2503fb4b81c12ac6706932f4397ba4 100644 (file)
@@ -18,6 +18,7 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
@@ -285,7 +286,8 @@ static void raid10_end_read_request(struct bio *bio, int error)
                 */
                char b[BDEVNAME_SIZE];
                if (printk_ratelimit())
-                       printk(KERN_ERR "raid10: %s: rescheduling sector %llu\n",
+                       printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n",
+                              mdname(conf->mddev),
                               bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
                reschedule_retry(r10_bio);
        }
@@ -494,7 +496,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
  */
 static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 {
-       const unsigned long this_sector = r10_bio->sector;
+       const sector_t this_sector = r10_bio->sector;
        int disk, slot, nslot;
        const int sectors = r10_bio->sectors;
        sector_t new_distance, current_distance;
@@ -788,14 +790,12 @@ static void unfreeze_array(conf_t *conf)
        spin_unlock_irq(&conf->resync_lock);
 }
 
-static int make_request(struct request_queue *q, struct bio * bio)
+static int make_request(mddev_t *mddev, struct bio * bio)
 {
-       mddev_t *mddev = q->queuedata;
        conf_t *conf = mddev->private;
        mirror_info_t *mirror;
        r10bio_t *r10_bio;
        struct bio *read_bio;
-       int cpu;
        int i;
        int chunk_sects = conf->chunk_mask + 1;
        const int rw = bio_data_dir(bio);
@@ -825,16 +825,16 @@ static int make_request(struct request_queue *q, struct bio * bio)
                 */
                bp = bio_split(bio,
                               chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
-               if (make_request(q, &bp->bio1))
+               if (make_request(mddev, &bp->bio1))
                        generic_make_request(&bp->bio1);
-               if (make_request(q, &bp->bio2))
+               if (make_request(mddev, &bp->bio2))
                        generic_make_request(&bp->bio2);
 
                bio_pair_release(bp);
                return 0;
        bad_map:
-               printk("raid10_make_request bug: can't convert block across chunks"
-                      " or bigger than %dk %llu %d\n", chunk_sects/2,
+               printk("md/raid10:%s: make_request bug: can't convert block across chunks"
+                      " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
                       (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 
                bio_io_error(bio);
@@ -850,12 +850,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
         */
        wait_barrier(conf);
 
-       cpu = part_stat_lock();
-       part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-       part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
-                     bio_sectors(bio));
-       part_stat_unlock();
-
        r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
        r10_bio->master_bio = bio;
@@ -1039,9 +1033,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
        }
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
-       printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n"
-               "raid10: Operation continuing on %d devices.\n",
-               bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
+       printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n"
+              KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n",
+              mdname(mddev), bdevname(rdev->bdev, b),
+              mdname(mddev), conf->raid_disks - mddev->degraded);
 }
 
 static void print_conf(conf_t *conf)
@@ -1049,19 +1044,19 @@ static void print_conf(conf_t *conf)
        int i;
        mirror_info_t *tmp;
 
-       printk("RAID10 conf printout:\n");
+       printk(KERN_DEBUG "RAID10 conf printout:\n");
        if (!conf) {
-               printk("(!conf)\n");
+               printk(KERN_DEBUG "(!conf)\n");
                return;
        }
-       printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
+       printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
                conf->raid_disks);
 
        for (i = 0; i < conf->raid_disks; i++) {
                char b[BDEVNAME_SIZE];
                tmp = conf->mirrors + i;
                if (tmp->rdev)
-                       printk(" disk %d, wo:%d, o:%d, dev:%s\n",
+                       printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
                                i, !test_bit(In_sync, &tmp->rdev->flags),
                                !test_bit(Faulty, &tmp->rdev->flags),
                                bdevname(tmp->rdev->bdev,b));
@@ -1487,14 +1482,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
        int sectors = r10_bio->sectors;
        mdk_rdev_t*rdev;
        int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
+       int d = r10_bio->devs[r10_bio->read_slot].devnum;
 
        rcu_read_lock();
-       {
-               int d = r10_bio->devs[r10_bio->read_slot].devnum;
+       rdev = rcu_dereference(conf->mirrors[d].rdev);
+       if (rdev) { /* If rdev is not NULL */
                char b[BDEVNAME_SIZE];
                int cur_read_error_count = 0;
 
-               rdev = rcu_dereference(conf->mirrors[d].rdev);
                bdevname(rdev->bdev, b);
 
                if (test_bit(Faulty, &rdev->flags)) {
@@ -1510,13 +1505,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                if (cur_read_error_count > max_read_errors) {
                        rcu_read_unlock();
                        printk(KERN_NOTICE
-                              "raid10: %s: Raid device exceeded "
+                              "md/raid10:%s: %s: Raid device exceeded "
                               "read_error threshold "
                               "[cur %d:max %d]\n",
+                              mdname(mddev),
                               b, cur_read_error_count, max_read_errors);
                        printk(KERN_NOTICE
-                              "raid10: %s: Failing raid "
-                              "device\n", b);
+                              "md/raid10:%s: %s: Failing raid "
+                              "device\n", mdname(mddev), b);
                        md_error(mddev, conf->mirrors[d].rdev);
                        return;
                }
@@ -1534,7 +1530,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
 
                rcu_read_lock();
                do {
-                       int d = r10_bio->devs[sl].devnum;
+                       d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (rdev &&
                            test_bit(In_sync, &rdev->flags)) {
@@ -1568,7 +1564,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                rcu_read_lock();
                while (sl != r10_bio->read_slot) {
                        char b[BDEVNAME_SIZE];
-                       int d;
+
                        if (sl==0)
                                sl = conf->copies;
                        sl--;
@@ -1586,15 +1582,16 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                                    == 0) {
                                        /* Well, this device is dead */
                                        printk(KERN_NOTICE
-                                              "raid10:%s: read correction "
+                                              "md/raid10:%s: read correction "
                                               "write failed"
                                               " (%d sectors at %llu on %s)\n",
                                               mdname(mddev), s,
                                               (unsigned long long)(sect+
                                               rdev->data_offset),
                                               bdevname(rdev->bdev, b));
-                                       printk(KERN_NOTICE "raid10:%s: failing "
+                                       printk(KERN_NOTICE "md/raid10:%s: %s: failing "
                                               "drive\n",
+                                              mdname(mddev),
                                               bdevname(rdev->bdev, b));
                                        md_error(mddev, rdev);
                                }
@@ -1604,7 +1601,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                }
                sl = start;
                while (sl != r10_bio->read_slot) {
-                       int d;
+
                        if (sl==0)
                                sl = conf->copies;
                        sl--;
@@ -1622,20 +1619,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                                                 READ) == 0) {
                                        /* Well, this device is dead */
                                        printk(KERN_NOTICE
-                                              "raid10:%s: unable to read back "
+                                              "md/raid10:%s: unable to read back "
                                               "corrected sectors"
                                               " (%d sectors at %llu on %s)\n",
                                               mdname(mddev), s,
                                               (unsigned long long)(sect+
                                                    rdev->data_offset),
                                               bdevname(rdev->bdev, b));
-                                       printk(KERN_NOTICE "raid10:%s: failing drive\n",
+                                       printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
+                                              mdname(mddev),
                                               bdevname(rdev->bdev, b));
 
                                        md_error(mddev, rdev);
                                } else {
                                        printk(KERN_INFO
-                                              "raid10:%s: read error corrected"
+                                              "md/raid10:%s: read error corrected"
                                               " (%d sectors at %llu on %s)\n",
                                               mdname(mddev), s,
                                               (unsigned long long)(sect+
@@ -1710,8 +1708,9 @@ static void raid10d(mddev_t *mddev)
                                mddev->ro ? IO_BLOCKED : NULL;
                        mirror = read_balance(conf, r10_bio);
                        if (mirror == -1) {
-                               printk(KERN_ALERT "raid10: %s: unrecoverable I/O"
+                               printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
                                       " read error for block %llu\n",
+                                      mdname(mddev),
                                       bdevname(bio->bi_bdev,b),
                                       (unsigned long long)r10_bio->sector);
                                raid_end_bio_io(r10_bio);
@@ -1721,8 +1720,9 @@ static void raid10d(mddev_t *mddev)
                                bio_put(bio);
                                rdev = conf->mirrors[mirror].rdev;
                                if (printk_ratelimit())
-                                       printk(KERN_ERR "raid10: %s: redirecting sector %llu to"
+                                       printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
                                               " another mirror\n",
+                                              mdname(mddev),
                                               bdevname(rdev->bdev,b),
                                               (unsigned long long)r10_bio->sector);
                                bio = bio_clone(r10_bio->master_bio, GFP_NOIO);
@@ -1980,7 +1980,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                                        r10_bio = rb2;
                                        if (!test_and_set_bit(MD_RECOVERY_INTR,
                                                              &mddev->recovery))
-                                               printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
+                                               printk(KERN_INFO "md/raid10:%s: insufficient "
+                                                      "working devices for recovery.\n",
                                                       mdname(mddev));
                                        break;
                                }
@@ -2160,21 +2161,22 @@ static conf_t *setup_conf(mddev_t *mddev)
        sector_t stride, size;
        int err = -EINVAL;
 
-       if (mddev->chunk_sectors < (PAGE_SIZE >> 9) ||
-           !is_power_of_2(mddev->chunk_sectors)) {
-               printk(KERN_ERR "md/raid10: chunk size must be "
-                      "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE);
+       if (mddev->new_chunk_sectors < (PAGE_SIZE >> 9) ||
+           !is_power_of_2(mddev->new_chunk_sectors)) {
+               printk(KERN_ERR "md/raid10:%s: chunk size must be "
+                      "at least PAGE_SIZE(%ld) and be a power of 2.\n",
+                      mdname(mddev), PAGE_SIZE);
                goto out;
        }
 
-       nc = mddev->layout & 255;
-       fc = (mddev->layout >> 8) & 255;
-       fo = mddev->layout & (1<<16);
+       nc = mddev->new_layout & 255;
+       fc = (mddev->new_layout >> 8) & 255;
+       fo = mddev->new_layout & (1<<16);
 
        if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
-           (mddev->layout >> 17)) {
-               printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
-                      mdname(mddev), mddev->layout);
+           (mddev->new_layout >> 17)) {
+               printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
+                      mdname(mddev), mddev->new_layout);
                goto out;
        }
 
@@ -2239,12 +2241,11 @@ static conf_t *setup_conf(mddev_t *mddev)
        if (!conf->thread)
                goto out;
 
-       conf->scale_disks = 0;
        conf->mddev = mddev;
        return conf;
 
  out:
-       printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
+       printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
               mdname(mddev));
        if (conf) {
                if (conf->r10bio_pool)
@@ -2298,11 +2299,6 @@ static int run(mddev_t *mddev)
                if (disk_idx >= conf->raid_disks
                    || disk_idx < 0)
                        continue;
-               if (conf->scale_disks) {
-                       disk_idx *= conf->scale_disks;
-                       rdev->raid_disk = disk_idx;
-                       /* MOVE 'rd%d' link !! */
-               }
                disk = conf->mirrors + disk_idx;
 
                disk->rdev = rdev;
@@ -2322,7 +2318,7 @@ static int run(mddev_t *mddev)
        }
        /* need to check that every block has at least one working mirror */
        if (!enough(conf)) {
-               printk(KERN_ERR "raid10: not enough operational mirrors for %s\n",
+               printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
                       mdname(mddev));
                goto out_free_conf;
        }
@@ -2342,11 +2338,11 @@ static int run(mddev_t *mddev)
        }
 
        if (mddev->recovery_cp != MaxSector)
-               printk(KERN_NOTICE "raid10: %s is not clean"
+               printk(KERN_NOTICE "md/raid10:%s: not clean"
                       " -- starting background reconstruction\n",
                       mdname(mddev));
        printk(KERN_INFO
-               "raid10: raid set %s active with %d out of %d devices\n",
+               "md/raid10:%s: active with %d out of %d devices\n",
                mdname(mddev), conf->raid_disks - mddev->degraded,
                conf->raid_disks);
        /*
@@ -2428,30 +2424,27 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
        conf_t *conf;
 
        if (mddev->degraded > 0) {
-               printk(KERN_ERR "error: degraded raid0!\n");
+               printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n",
+                      mdname(mddev));
                return ERR_PTR(-EINVAL);
        }
 
-       /* Update slot numbers to obtain
-        * degraded raid10 with missing mirrors
-        */
-       list_for_each_entry(rdev, &mddev->disks, same_set) {
-               rdev->raid_disk *= 2;
-       }
-
        /* Set new parameters */
        mddev->new_level = 10;
        /* new layout: far_copies = 1, near_copies = 2 */
        mddev->new_layout = (1<<8) + 2;
        mddev->new_chunk_sectors = mddev->chunk_sectors;
        mddev->delta_disks = mddev->raid_disks;
-       mddev->degraded = mddev->raid_disks;
        mddev->raid_disks *= 2;
        /* make sure it will be not marked as dirty */
        mddev->recovery_cp = MaxSector;
 
        conf = setup_conf(mddev);
-       conf->scale_disks = 2;
+       if (!IS_ERR(conf))
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       if (rdev->raid_disk >= 0)
+                               rdev->new_raid_disk = rdev->raid_disk * 2;
+               
        return conf;
 }
 
@@ -2466,7 +2459,9 @@ static void *raid10_takeover(mddev_t *mddev)
                /* for raid0 takeover only one zone is supported */
                raid0_priv = mddev->private;
                if (raid0_priv->nr_strip_zones > 1) {
-                       printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n");
+                       printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
+                              " with more than one zone.\n",
+                              mdname(mddev));
                        return ERR_PTR(-EINVAL);
                }
                return raid10_takeover_raid0(mddev);