Merge branch 'for-4.6/core' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 18 Mar 2016 23:43:11 +0000 (16:43 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 18 Mar 2016 23:43:11 +0000 (16:43 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 18 Mar 2016 23:43:11 +0000 (16:43 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 18 Mar 2016 23:43:11 +0000 (16:43 -0700)
diff --git a/block/bio.c b/block/bio.c

index cf7591551b1716b74fa3765cb1c271a554e8ef56..f124a0a624fcbeea867865e7a53f634275dfb4b8 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -296,13 +296,19 @@ void bio_reset(struct bio *bio)
  }
  EXPORT_SYMBOL(bio_reset);
  
-static void bio_chain_endio(struct bio *bio)
+static struct bio *__bio_chain_endio(struct bio *bio)
  {
         struct bio *parent = bio->bi_private;
  
-       parent->bi_error = bio->bi_error;
-       bio_endio(parent);
+       if (!parent->bi_error)
+               parent->bi_error = bio->bi_error;
         bio_put(bio);
+       return parent;
+}
+
+static void bio_chain_endio(struct bio *bio)
+{
+       bio_endio(__bio_chain_endio(bio));
  }
  
  /*
@@ -1742,29 +1748,25 @@ static inline bool bio_remaining_done(struct bio *bio)
   **/
  void bio_endio(struct bio *bio)
  {
-       while (bio) {
-               if (unlikely(!bio_remaining_done(bio)))
-                       break;
+again:
+       if (!bio_remaining_done(bio))
+               return;
  
-               /*
-                * Need to have a real endio function for chained bios,
-                * otherwise various corner cases will break (like stacking
-                * block devices that save/restore bi_end_io) - however, we want
-                * to avoid unbounded recursion and blowing the stack. Tail call
-                * optimization would handle this, but compiling with frame
-                * pointers also disables gcc's sibling call optimization.
-                */
-               if (bio->bi_end_io == bio_chain_endio) {
-                       struct bio *parent = bio->bi_private;
-                       parent->bi_error = bio->bi_error;
-                       bio_put(bio);
-                       bio = parent;
-               } else {
-                       if (bio->bi_end_io)
-                               bio->bi_end_io(bio);
-                       bio = NULL;
-               }
+       /*
+        * Need to have a real endio function for chained bios, otherwise
+        * various corner cases will break (like stacking block devices that
+        * save/restore bi_end_io) - however, we want to avoid unbounded
+        * recursion and blowing the stack. Tail call optimization would
+        * handle this, but compiling with frame pointers also disables
+        * gcc's sibling call optimization.
+        */
+       if (bio->bi_end_io == bio_chain_endio) {
+               bio = __bio_chain_endio(bio);
+               goto again;
         }
+
+       if (bio->bi_end_io)
+               bio->bi_end_io(bio);
  }
  EXPORT_SYMBOL(bio_endio);
  
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c

index 1cf18784c5cf3c44be94dbd003ca9d7088f883e0..431fdda21737cb91b9a5a0f49fd1c2ccd2cfae7c 100644 (file)
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -408,17 +408,18 @@ void blk_mq_unregister_disk(struct gendisk *disk)
         blk_mq_enable_hotplug();
  }
  
+void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
+{
+       kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+}
+
  static void blk_mq_sysfs_init(struct request_queue *q)
  {
-       struct blk_mq_hw_ctx *hctx;
         struct blk_mq_ctx *ctx;
         int i;
  
         kobject_init(&q->mq_kobj, &blk_mq_ktype);
  
-       queue_for_each_hw_ctx(q, hctx, i)
-               kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-
         queue_for_each_ctx(q, ctx, i)
                 kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
  }
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 56c0a726b619374ec9f8cf4b60649a14919f0625..050f7a13021baca7347df79d8d26dad2b89e327e 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -544,7 +544,10 @@ EXPORT_SYMBOL(blk_mq_abort_requeue_list);
  
  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
  {
-       return tags->rqs[tag];
+       if (tag < tags->nr_tags)
+               return tags->rqs[tag];
+
+       return NULL;
  }
  EXPORT_SYMBOL(blk_mq_tag_to_rq);
  
@@ -1744,31 +1747,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
         return -1;
  }
  
-static int blk_mq_init_hw_queues(struct request_queue *q,
-               struct blk_mq_tag_set *set)
-{
-       struct blk_mq_hw_ctx *hctx;
-       unsigned int i;
-
-       /*
-        * Initialize hardware queues
-        */
-       queue_for_each_hw_ctx(q, hctx, i) {
-               if (blk_mq_init_hctx(q, set, hctx, i))
-                       break;
-       }
-
-       if (i == q->nr_hw_queues)
-               return 0;
-
-       /*
-        * Init failed
-        */
-       blk_mq_exit_hw_queues(q, set, i);
-
-       return 1;
-}
-
  static void blk_mq_init_cpu_queues(struct request_queue *q,
                                    unsigned int nr_hw_queues)
  {
@@ -1826,6 +1804,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
                         continue;
  
                 hctx = q->mq_ops->map_queue(q, i);
+
                 cpumask_set_cpu(i, hctx->cpumask);
                 ctx->index_hw = hctx->nr_ctx;
                 hctx->ctxs[hctx->nr_ctx++] = ctx;
@@ -1974,56 +1953,93 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
  }
  EXPORT_SYMBOL(blk_mq_init_queue);
  
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-                                                 struct request_queue *q)
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+                                               struct request_queue *q)
  {
-       struct blk_mq_hw_ctx **hctxs;
-       struct blk_mq_ctx __percpu *ctx;
-       unsigned int *map;
-       int i;
-
-       ctx = alloc_percpu(struct blk_mq_ctx);
-       if (!ctx)
-               return ERR_PTR(-ENOMEM);
-
-       hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
-                       set->numa_node);
-
-       if (!hctxs)
-               goto err_percpu;
-
-       map = blk_mq_make_queue_map(set);
-       if (!map)
-               goto err_map;
+       int i, j;
+       struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
  
+       blk_mq_sysfs_unregister(q);
         for (i = 0; i < set->nr_hw_queues; i++) {
-               int node = blk_mq_hw_queue_to_node(map, i);
+               int node;
  
+               if (hctxs[i])
+                       continue;
+
+               node = blk_mq_hw_queue_to_node(q->mq_map, i);
                 hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
                                         GFP_KERNEL, node);
                 if (!hctxs[i])
-                       goto err_hctxs;
+                       break;
  
                 if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
-                                               node))
-                       goto err_hctxs;
+                                               node)) {
+                       kfree(hctxs[i]);
+                       hctxs[i] = NULL;
+                       break;
+               }
  
                 atomic_set(&hctxs[i]->nr_active, 0);
                 hctxs[i]->numa_node = node;
                 hctxs[i]->queue_num = i;
+
+               if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
+                       free_cpumask_var(hctxs[i]->cpumask);
+                       kfree(hctxs[i]);
+                       hctxs[i] = NULL;
+                       break;
+               }
+               blk_mq_hctx_kobj_init(hctxs[i]);
         }
+       for (j = i; j < q->nr_hw_queues; j++) {
+               struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+               if (hctx) {
+                       if (hctx->tags) {
+                               blk_mq_free_rq_map(set, hctx->tags, j);
+                               set->tags[j] = NULL;
+                       }
+                       blk_mq_exit_hctx(q, set, hctx, j);
+                       free_cpumask_var(hctx->cpumask);
+                       kobject_put(&hctx->kobj);
+                       kfree(hctx->ctxs);
+                       kfree(hctx);
+                       hctxs[j] = NULL;
+
+               }
+       }
+       q->nr_hw_queues = i;
+       blk_mq_sysfs_register(q);
+}
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+                                                 struct request_queue *q)
+{
+       /* mark the queue as mq asap */
+       q->mq_ops = set->ops;
+
+       q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+       if (!q->queue_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
+                                               GFP_KERNEL, set->numa_node);
+       if (!q->queue_hw_ctx)
+               goto err_percpu;
+
+       q->mq_map = blk_mq_make_queue_map(set);
+       if (!q->mq_map)
+               goto err_map;
+
+       blk_mq_realloc_hw_ctxs(set, q);
+       if (!q->nr_hw_queues)
+               goto err_hctxs;
  
         INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
         blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
  
         q->nr_queues = nr_cpu_ids;
-       q->nr_hw_queues = set->nr_hw_queues;
-       q->mq_map = map;
-
-       q->queue_ctx = ctx;
-       q->queue_hw_ctx = hctxs;
  
-       q->mq_ops = set->ops;
         q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
  
         if (!(set->flags & BLK_MQ_F_SG_MERGE))
@@ -2050,9 +2066,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
  
         blk_mq_init_cpu_queues(q, set->nr_hw_queues);
  
-       if (blk_mq_init_hw_queues(q, set))
-               goto err_hctxs;
-
         get_online_cpus();
         mutex_lock(&all_q_mutex);
  
@@ -2066,17 +2079,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
         return q;
  
  err_hctxs:
-       kfree(map);
-       for (i = 0; i < set->nr_hw_queues; i++) {
-               if (!hctxs[i])
-                       break;
-               free_cpumask_var(hctxs[i]->cpumask);
-               kfree(hctxs[i]);
-       }
+       kfree(q->mq_map);
  err_map:
-       kfree(hctxs);
+       kfree(q->queue_hw_ctx);
  err_percpu:
-       free_percpu(ctx);
+       free_percpu(q->queue_ctx);
         return ERR_PTR(-ENOMEM);
  }
  EXPORT_SYMBOL(blk_mq_init_allocated_queue);
@@ -2284,9 +2291,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
                 set->nr_hw_queues = 1;
                 set->queue_depth = min(64U, set->queue_depth);
         }
+       /*
+        * There is no use for more h/w queues than cpus.
+        */
+       if (set->nr_hw_queues > nr_cpu_ids)
+               set->nr_hw_queues = nr_cpu_ids;
  
-       set->tags = kmalloc_node(set->nr_hw_queues *
-                                sizeof(struct blk_mq_tags *),
+       set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
                                  GFP_KERNEL, set->numa_node);
         if (!set->tags)
                 return -ENOMEM;
@@ -2309,7 +2320,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
  {
         int i;
  
-       for (i = 0; i < set->nr_hw_queues; i++) {
+       for (i = 0; i < nr_cpu_ids; i++) {
                 if (set->tags[i])
                         blk_mq_free_rq_map(set, set->tags[i], i);
         }
@@ -2330,6 +2341,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  
         ret = 0;
         queue_for_each_hw_ctx(q, hctx, i) {
+               if (!hctx->tags)
+                       continue;
                 ret = blk_mq_tag_update_depth(hctx->tags, nr);
                 if (ret)
                         break;
@@ -2341,6 +2354,35 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
         return ret;
  }
  
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+       struct request_queue *q;
+
+       if (nr_hw_queues > nr_cpu_ids)
+               nr_hw_queues = nr_cpu_ids;
+       if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
+               return;
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_freeze_queue(q);
+
+       set->nr_hw_queues = nr_hw_queues;
+       list_for_each_entry(q, &set->tag_list, tag_set_list) {
+               blk_mq_realloc_hw_ctxs(set, q);
+
+               if (q->nr_hw_queues > 1)
+                       blk_queue_make_request(q, blk_mq_make_request);
+               else
+                       blk_queue_make_request(q, blk_sq_make_request);
+
+               blk_mq_queue_reinit(q, cpu_online_mask);
+       }
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+
  void blk_mq_disable_hotplug(void)
  {
         mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h

index eaede8e45c9c3e5f2555ea86ec39492e1c042e59..9087b11037b70ae514fd46ea10f659152f291aa2 100644 (file)
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -57,6 +57,7 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
   */
  extern int blk_mq_sysfs_register(struct request_queue *q);
  extern void blk_mq_sysfs_unregister(struct request_queue *q);
+extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
  
  extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
  
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 1f9093e901daed7849a54633be5d80ce96b010f4..e3c591dd8f19d0b46fe42dc842510f9d220af143 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -632,6 +632,13 @@ static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
         return pblkg ? blkg_to_cfqg(pblkg) : NULL;
  }
  
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+                                     struct cfq_group *ancestor)
+{
+       return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
+                                   cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
+}
+
  static inline void cfqg_get(struct cfq_group *cfqg)
  {
         return blkg_get(cfqg_to_blkg(cfqg));
@@ -758,6 +765,11 @@ static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
  #else  /* CONFIG_CFQ_GROUP_IOSCHED */
  
  static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+                                     struct cfq_group *ancestor)
+{
+       return true;
+}
  static inline void cfqg_get(struct cfq_group *cfqg) { }
  static inline void cfqg_put(struct cfq_group *cfqg) { }
  
@@ -2897,6 +2909,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  static void cfq_arm_slice_timer(struct cfq_data *cfqd)
  {
         struct cfq_queue *cfqq = cfqd->active_queue;
+       struct cfq_rb_root *st = cfqq->service_tree;
         struct cfq_io_cq *cic;
         unsigned long sl, group_idle = 0;
  
@@ -2947,8 +2960,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
                 return;
         }
  
-       /* There are other queues in the group, don't do group idle */
-       if (group_idle && cfqq->cfqg->nr_cfqq > 1)
+       /*
+        * There are other queues in the group or this is the only group and
+        * it has too big thinktime, don't do group idle.
+        */
+       if (group_idle &&
+           (cfqq->cfqg->nr_cfqq > 1 ||
+            cfq_io_thinktime_big(cfqd, &st->ttime, true)))
                 return;
  
         cfq_mark_cfqq_wait_request(cfqq);
@@ -3947,16 +3965,27 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
         if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
                 return true;
  
-       if (new_cfqq->cfqg != cfqq->cfqg)
+       /*
+        * Treat ancestors of current cgroup the same way as current cgroup.
+        * For anybody else we disallow preemption to guarantee service
+        * fairness among cgroups.
+        */
+       if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
                 return false;
  
         if (cfq_slice_used(cfqq))
                 return true;
  
+       /*
+        * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
+        */
+       if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
+               return true;
+
+       WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
         /* Allow preemption only if we are idling on sync-noidle tree */
         if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
             cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
-           new_cfqq->service_tree->count == 2 &&
             RB_EMPTY_ROOT(&cfqq->sort_list))
                 return true;
  
@@ -3967,12 +3996,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
         if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
                 return true;
  
-       /*
-        * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
-        */
-       if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
-               return true;
-
         /* An idle queue should not be idle now for some reason */
         if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
                 return true;
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 826b164a4b5b1faa9719aefc9cceccec20eda479..3172c4e2f50255e1cb1fb426c1723a6b7b493bb2 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -575,7 +575,11 @@ static const struct super_operations bdev_sops = {
  static struct dentry *bd_mount(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data)
  {
-       return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+       struct dentry *dent;
+       dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+       if (dent)
+               dent->d_sb->s_iflags |= SB_I_CGROUPWB;
+       return dent;
  }
  
  static struct file_system_type bd_type = {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index 7fc9296b574290e768dbb38585e725525802940b..15a73d49fd1d54a4b401bf3a042b466de6f311c8 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -244,6 +244,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
  void blk_mq_unfreeze_queue(struct request_queue *q);
  void blk_mq_freeze_queue_start(struct request_queue *q);
  
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+
  /*
   * Driver command data is immediately after the request. So subtract request
   * size to get back to the original request, add request size to get the PDU.
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 18 Mar 2016 23:43:11 +0000 (16:43 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 18 Mar 2016 23:43:11 +0000 (16:43 -0700)
block/bio.c		patch \| blob \| history
block/blk-mq-sysfs.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
block/blk-mq.h		patch \| blob \| history
block/cfq-iosched.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
include/linux/blk-mq.h		patch \| blob \| history