Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[sfrench/cifs-2.6.git] / drivers / md / raid10.c
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 0138a727c1f3c220bc91a02c9bdf720599465eb9..d1295aff41739eea048f0e43513dfba6ade4c8f1 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -911,7 +911,12 @@ static void flush_pending_writes(struct r10conf *conf)
                 while (bio) { /* submit pending writes */
                         struct bio *next = bio->bi_next;
                         bio->bi_next = NULL;
-                       generic_make_request(bio);
+                       if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+                           !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+                               /* Just ignore it */
+                               bio_endio(bio, 0);
+                       else
+                               generic_make_request(bio);
                         bio = next;
                 }
         } else
@@ -1050,6 +1055,44 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
                 return rdev->new_data_offset;
  }
  
+struct raid10_plug_cb {
+       struct blk_plug_cb      cb;
+       struct bio_list         pending;
+       int                     pending_cnt;
+};
+
+static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+       struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
+                                                  cb);
+       struct mddev *mddev = plug->cb.data;
+       struct r10conf *conf = mddev->private;
+       struct bio *bio;
+
+       if (from_schedule) {
+               spin_lock_irq(&conf->device_lock);
+               bio_list_merge(&conf->pending_bio_list, &plug->pending);
+               conf->pending_count += plug->pending_cnt;
+               spin_unlock_irq(&conf->device_lock);
+               md_wakeup_thread(mddev->thread);
+               kfree(plug);
+               return;
+       }
+
+       /* we aren't scheduling, so we can do the write-out directly. */
+       bio = bio_list_get(&plug->pending);
+       bitmap_unplug(mddev->bitmap);
+       wake_up(&conf->wait_barrier);
+
+       while (bio) { /* submit pending writes */
+               struct bio *next = bio->bi_next;
+               bio->bi_next = NULL;
+               generic_make_request(bio);
+               bio = next;
+       }
+       kfree(plug);
+}
+
  static void make_request(struct mddev *mddev, struct bio * bio)
  {
         struct r10conf *conf = mddev->private;
@@ -1061,8 +1104,12 @@ static void make_request(struct mddev *mddev, struct bio * bio)
         const int rw = bio_data_dir(bio);
         const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
         const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
+       const unsigned long do_discard = (bio->bi_rw
+                                         & (REQ_DISCARD | REQ_SECURE));
         unsigned long flags;
         struct md_rdev *blocked_rdev;
+       struct blk_plug_cb *cb;
+       struct raid10_plug_cb *plug = NULL;
         int sectors_handled;
         int max_sectors;
         int sectors;
@@ -1081,7 +1128,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                          || conf->prev.near_copies < conf->prev.raid_disks))) {
                 struct bio_pair *bp;
                 /* Sanity check -- queue functions should prevent this happening */
-               if (bio->bi_vcnt != 1 ||
+               if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
                     bio->bi_idx != 0)
                         goto bad_map;
                 /* This is a one page bio that upper layers
@@ -1410,15 +1457,26 @@ retry_write:
                                                       conf->mirrors[d].rdev));
                 mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                 mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync | do_fua;
+               mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
                 mbio->bi_private = r10_bio;
  
                 atomic_inc(&r10_bio->remaining);
+
+               cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
+               if (cb)
+                       plug = container_of(cb, struct raid10_plug_cb, cb);
+               else
+                       plug = NULL;
                 spin_lock_irqsave(&conf->device_lock, flags);
-               bio_list_add(&conf->pending_bio_list, mbio);
-               conf->pending_count++;
+               if (plug) {
+                       bio_list_add(&plug->pending, mbio);
+                       plug->pending_cnt++;
+               } else {
+                       bio_list_add(&conf->pending_bio_list, mbio);
+                       conf->pending_count++;
+               }
                 spin_unlock_irqrestore(&conf->device_lock, flags);
-               if (!mddev_check_plugged(mddev))
+               if (!plug)
                         md_wakeup_thread(mddev->thread);
  
                 if (!r10_bio->devs[i].repl_bio)
@@ -1439,7 +1497,7 @@ retry_write:
                                            conf->mirrors[d].replacement));
                 mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
                 mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync | do_fua;
+               mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
                 mbio->bi_private = r10_bio;
  
                 atomic_inc(&r10_bio->remaining);
@@ -1638,7 +1696,7 @@ static int raid10_spare_active(struct mddev *mddev)
                            && !test_bit(Faulty, &tmp->rdev->flags)
                            && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
                         count++;
-                       sysfs_notify_dirent(tmp->rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
                 }
         }
         spin_lock_irqsave(&conf->device_lock, flags);
@@ -1725,6 +1783,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                 clear_bit(Unmerged, &rdev->flags);
         }
         md_integrity_add_rdev(rdev, mddev);
+       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
         print_conf(conf);
         return err;
  }
@@ -1952,7 +2013,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                                         break;
                         if (j == vcnt)
                                 continue;
-                       mddev->resync_mismatches += r10_bio->sectors;
+                       atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
                         if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
                                 /* Don't fix anything. */
                                 continue;
@@ -2673,8 +2734,9 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
         }
  }
  
-static void raid10d(struct mddev *mddev)
+static void raid10d(struct md_thread *thread)
  {
+       struct mddev *mddev = thread->mddev;
         struct r10bio *r10_bio;
         unsigned long flags;
         struct r10conf *conf = mddev->private;
@@ -3158,7 +3220,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                 else {
                                         bad_sectors -= (sector - first_bad);
                                         if (max_sync > bad_sectors)
-                                               max_sync = max_sync;
+                                               max_sync = bad_sectors;
                                         continue;
                                 }
                         }
@@ -3482,6 +3544,7 @@ static int run(struct mddev *mddev)
         sector_t size;
         sector_t min_offset_diff = 0;
         int first = 1;
+       bool discard_supported = false;
  
         if (mddev->private == NULL) {
                 conf = setup_conf(mddev);
@@ -3498,6 +3561,8 @@ static int run(struct mddev *mddev)
  
         chunk_size = mddev->chunk_sectors << 9;
         if (mddev->queue) {
+               blk_queue_max_discard_sectors(mddev->queue,
+                                             mddev->chunk_sectors);
                 blk_queue_io_min(mddev->queue, chunk_size);
                 if (conf->geo.raid_disks % conf->geo.near_copies)
                         blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3543,8 +3608,19 @@ static int run(struct mddev *mddev)
                                           rdev->data_offset << 9);
  
                 disk->head_position = 0;
+
+               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+                       discard_supported = true;
         }
  
+       if (mddev->queue) {
+               if (discard_supported)
+                       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+                                               mddev->queue);
+               else
+                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+                                                 mddev->queue);
+       }
         /* need to check that every block has at least one working mirror */
         if (!enough(conf, -1)) {
                 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",