Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[sfrench/cifs-2.6.git] / drivers / md / raid10.c
index 0138a727c1f3c220bc91a02c9bdf720599465eb9..d1295aff41739eea048f0e43513dfba6ade4c8f1 100644 (file)
@@ -911,7 +911,12 @@ static void flush_pending_writes(struct r10conf *conf)
                while (bio) { /* submit pending writes */
                        struct bio *next = bio->bi_next;
                        bio->bi_next = NULL;
-                       generic_make_request(bio);
+                       if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+                           !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+                               /* Just ignore it */
+                               bio_endio(bio, 0);
+                       else
+                               generic_make_request(bio);
                        bio = next;
                }
        } else
@@ -1050,6 +1055,44 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
                return rdev->new_data_offset;
 }
 
+struct raid10_plug_cb {
+       struct blk_plug_cb      cb;
+       struct bio_list         pending;
+       int                     pending_cnt;
+};
+
+static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+       struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
+                                                  cb);
+       struct mddev *mddev = plug->cb.data;
+       struct r10conf *conf = mddev->private;
+       struct bio *bio;
+
+       if (from_schedule) {
+               spin_lock_irq(&conf->device_lock);
+               bio_list_merge(&conf->pending_bio_list, &plug->pending);
+               conf->pending_count += plug->pending_cnt;
+               spin_unlock_irq(&conf->device_lock);
+               md_wakeup_thread(mddev->thread);
+               kfree(plug);
+               return;
+       }
+
+       /* we aren't scheduling, so we can do the write-out directly. */
+       bio = bio_list_get(&plug->pending);
+       bitmap_unplug(mddev->bitmap);
+       wake_up(&conf->wait_barrier);
+
+       while (bio) { /* submit pending writes */
+               struct bio *next = bio->bi_next;
+               bio->bi_next = NULL;
+               generic_make_request(bio);
+               bio = next;
+       }
+       kfree(plug);
+}
+
 static void make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r10conf *conf = mddev->private;
@@ -1061,8 +1104,12 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        const int rw = bio_data_dir(bio);
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
+       const unsigned long do_discard = (bio->bi_rw
+                                         & (REQ_DISCARD | REQ_SECURE));
        unsigned long flags;
        struct md_rdev *blocked_rdev;
+       struct blk_plug_cb *cb;
+       struct raid10_plug_cb *plug = NULL;
        int sectors_handled;
        int max_sectors;
        int sectors;
@@ -1081,7 +1128,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                         || conf->prev.near_copies < conf->prev.raid_disks))) {
                struct bio_pair *bp;
                /* Sanity check -- queue functions should prevent this happening */
-               if (bio->bi_vcnt != 1 ||
+               if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
                    bio->bi_idx != 0)
                        goto bad_map;
                /* This is a one page bio that upper layers
@@ -1410,15 +1457,26 @@ retry_write:
                                                      conf->mirrors[d].rdev));
                mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync | do_fua;
+               mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
                mbio->bi_private = r10_bio;
 
                atomic_inc(&r10_bio->remaining);
+
+               cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
+               if (cb)
+                       plug = container_of(cb, struct raid10_plug_cb, cb);
+               else
+                       plug = NULL;
                spin_lock_irqsave(&conf->device_lock, flags);
-               bio_list_add(&conf->pending_bio_list, mbio);
-               conf->pending_count++;
+               if (plug) {
+                       bio_list_add(&plug->pending, mbio);
+                       plug->pending_cnt++;
+               } else {
+                       bio_list_add(&conf->pending_bio_list, mbio);
+                       conf->pending_count++;
+               }
                spin_unlock_irqrestore(&conf->device_lock, flags);
-               if (!mddev_check_plugged(mddev))
+               if (!plug)
                        md_wakeup_thread(mddev->thread);
 
                if (!r10_bio->devs[i].repl_bio)
@@ -1439,7 +1497,7 @@ retry_write:
                                           conf->mirrors[d].replacement));
                mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
                mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync | do_fua;
+               mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
                mbio->bi_private = r10_bio;
 
                atomic_inc(&r10_bio->remaining);
@@ -1638,7 +1696,7 @@ static int raid10_spare_active(struct mddev *mddev)
                           && !test_bit(Faulty, &tmp->rdev->flags)
                           && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
                        count++;
-                       sysfs_notify_dirent(tmp->rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
                }
        }
        spin_lock_irqsave(&conf->device_lock, flags);
@@ -1725,6 +1783,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                clear_bit(Unmerged, &rdev->flags);
        }
        md_integrity_add_rdev(rdev, mddev);
+       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
        print_conf(conf);
        return err;
 }
@@ -1952,7 +2013,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                                        break;
                        if (j == vcnt)
                                continue;
-                       mddev->resync_mismatches += r10_bio->sectors;
+                       atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
                        if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
                                /* Don't fix anything. */
                                continue;
@@ -2673,8 +2734,9 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
        }
 }
 
-static void raid10d(struct mddev *mddev)
+static void raid10d(struct md_thread *thread)
 {
+       struct mddev *mddev = thread->mddev;
        struct r10bio *r10_bio;
        unsigned long flags;
        struct r10conf *conf = mddev->private;
@@ -3158,7 +3220,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                else {
                                        bad_sectors -= (sector - first_bad);
                                        if (max_sync > bad_sectors)
-                                               max_sync = max_sync;
+                                               max_sync = bad_sectors;
                                        continue;
                                }
                        }
@@ -3482,6 +3544,7 @@ static int run(struct mddev *mddev)
        sector_t size;
        sector_t min_offset_diff = 0;
        int first = 1;
+       bool discard_supported = false;
 
        if (mddev->private == NULL) {
                conf = setup_conf(mddev);
@@ -3498,6 +3561,8 @@ static int run(struct mddev *mddev)
 
        chunk_size = mddev->chunk_sectors << 9;
        if (mddev->queue) {
+               blk_queue_max_discard_sectors(mddev->queue,
+                                             mddev->chunk_sectors);
                blk_queue_io_min(mddev->queue, chunk_size);
                if (conf->geo.raid_disks % conf->geo.near_copies)
                        blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3543,8 +3608,19 @@ static int run(struct mddev *mddev)
                                          rdev->data_offset << 9);
 
                disk->head_position = 0;
+
+               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+                       discard_supported = true;
        }
 
+       if (mddev->queue) {
+               if (discard_supported)
+                       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+                                               mddev->queue);
+               else
+                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+                                                 mddev->queue);
+       }
        /* need to check that every block has at least one working mirror */
        if (!enough(conf, -1)) {
                printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",