Merge branch 'urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/brodo/pcmcia-2.6
[sfrench/cifs-2.6.git] / block / cfq-iosched.c
index ed897b5ef315dedfe01d0fe8817ef2259b770252..7982b830db58df900fb679919c81edfc0ee34689 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
-#include "blk-cgroup.h"
+#include "cfq.h"
 
 /*
  * tunables
@@ -64,6 +64,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
+static DEFINE_SPINLOCK(cic_index_lock);
+static DEFINE_IDA(cic_index_ida);
+
 #define CFQ_PRIO_LISTS         IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)   ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)     ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -271,6 +274,7 @@ struct cfq_data {
        unsigned int cfq_latency;
        unsigned int cfq_group_isolation;
 
+       unsigned int cic_index;
        struct list_head cic_list;
 
        /*
@@ -430,6 +434,24 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
        cic->cfqq[is_sync] = cfqq;
 }
 
+#define CIC_DEAD_KEY   1ul
+#define CIC_DEAD_INDEX_SHIFT   1
+
+static inline void *cfqd_dead_key(struct cfq_data *cfqd)
+{
+       return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
+}
+
+static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
+{
+       struct cfq_data *cfqd = cic->key;
+
+       if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
+               return NULL;
+
+       return cfqd;
+}
+
 /*
  * We regard a request as SYNC, if it's either a read or has the SYNC bit
  * set (in which case it could also be direct WRITE).
@@ -857,7 +879,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
        if (!RB_EMPTY_NODE(&cfqg->rb_node))
                cfq_rb_erase(&cfqg->rb_node, st);
        cfqg->saved_workload_slice = 0;
-       blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
+       cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
 }
 
 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
@@ -917,8 +939,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 
        cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                        st->min_vdisktime);
-       blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
-       blkiocg_set_start_empty_time(&cfqg->blkg);
+       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
+       cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -973,7 +995,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 
        /* Add group onto cgroup list */
        sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-       blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
+       cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
                                        MKDEV(major, minor));
        cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
 
@@ -1057,7 +1079,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
                 * it from cgroup list, then it will take care of destroying
                 * cfqg also.
                 */
-               if (!blkiocg_del_blkio_group(&cfqg->blkg))
+               if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
                        cfq_destroy_cfqg(cfqd, cfqg);
        }
 }
@@ -1399,10 +1421,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
        elv_rb_del(&cfqq->sort_list, rq);
        cfqq->queued[rq_is_sync(rq)]--;
-       blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq),
-                                               rq_is_sync(rq));
+       cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
+                                       rq_data_dir(rq), rq_is_sync(rq));
        cfq_add_rq_rb(rq);
-       blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
+       cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
                        &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
                        rq_is_sync(rq));
 }
@@ -1460,8 +1482,8 @@ static void cfq_remove_request(struct request *rq)
        cfq_del_rq_rb(rq);
 
        cfqq->cfqd->rq_queued--;
-       blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq),
-                                               rq_is_sync(rq));
+       cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
+                                       rq_data_dir(rq), rq_is_sync(rq));
        if (rq_is_meta(rq)) {
                WARN_ON(!cfqq->meta_pending);
                cfqq->meta_pending--;
@@ -1496,8 +1518,8 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
                                struct bio *bio)
 {
-       blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio),
-                                       cfq_bio_sync(bio));
+       cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
+                                       bio_data_dir(bio), cfq_bio_sync(bio));
 }
 
 static void
@@ -1517,8 +1539,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
        if (cfqq->next_rq == next)
                cfqq->next_rq = rq;
        cfq_remove_request(next);
-       blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next),
-                                       rq_is_sync(next));
+       cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
+                                       rq_data_dir(next), rq_is_sync(next));
 }
 
 static int cfq_allow_merge(struct request_queue *q, struct request *rq,
@@ -1549,7 +1571,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        del_timer(&cfqd->idle_slice_timer);
-       blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
+       cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
 }
 
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -1558,7 +1580,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
        if (cfqq) {
                cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
                                cfqd->serving_prio, cfqd->serving_type);
-               blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
+               cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
                cfqq->slice_start = 0;
                cfqq->dispatch_start = jiffies;
                cfqq->allocated_slice = 0;
@@ -1889,7 +1911,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        sl = cfqd->cfq_slice_idle;
 
        mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-       blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
+       cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
        cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
 }
 
@@ -1909,7 +1931,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
        elv_dispatch_sort(q, rq);
 
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
-       blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
+       cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
                                        rq_data_dir(rq), rq_is_sync(rq));
 }
 
@@ -1964,6 +1986,15 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
        int process_refs, new_process_refs;
        struct cfq_queue *__cfqq;
 
+       /*
+        * If there are no process references on the new_cfqq, then it is
+        * unsafe to follow the ->new_cfqq chain as other cfqq's in the
+        * chain may have dropped their last reference (not just their
+        * last process reference).
+        */
+       if (!cfqq_process_refs(new_cfqq))
+               return;
+
        /* Avoid a circular list and skip interim queue merges */
        while ((__cfqq = new_cfqq->new_cfqq)) {
                if (__cfqq == cfqq)
@@ -1972,17 +2003,17 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
        }
 
        process_refs = cfqq_process_refs(cfqq);
+       new_process_refs = cfqq_process_refs(new_cfqq);
        /*
         * If the process for the cfqq has gone away, there is no
         * sense in merging the queues.
         */
-       if (process_refs == 0)
+       if (process_refs == 0 || new_process_refs == 0)
                return;
 
        /*
         * Merge in the direction of the lesser amount of work.
         */
-       new_process_refs = cfqq_process_refs(new_cfqq);
        if (new_process_refs >= process_refs) {
                cfqq->new_cfqq = new_cfqq;
                atomic_add(process_refs, &new_cfqq->ref);
@@ -2510,11 +2541,12 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
 {
        unsigned long flags;
+       unsigned long dead_key = (unsigned long) cic->key;
 
-       BUG_ON(!cic->dead_key);
+       BUG_ON(!(dead_key & CIC_DEAD_KEY));
 
        spin_lock_irqsave(&ioc->lock, flags);
-       radix_tree_delete(&ioc->radix_root, cic->dead_key);
+       radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
        hlist_del_rcu(&cic->cic_list);
        spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2537,15 +2569,10 @@ static void cfq_free_io_context(struct io_context *ioc)
        __call_for_each_cic(ioc, cic_free_func);
 }
 
-static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+static void cfq_put_cooperator(struct cfq_queue *cfqq)
 {
        struct cfq_queue *__cfqq, *next;
 
-       if (unlikely(cfqq == cfqd->active_queue)) {
-               __cfq_slice_expired(cfqd, cfqq, 0);
-               cfq_schedule_dispatch(cfqd);
-       }
-
        /*
         * If this queue was scheduled to merge with another queue, be
         * sure to drop the reference taken on that queue (and others in
@@ -2561,6 +2588,16 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                cfq_put_queue(__cfqq);
                __cfqq = next;
        }
+}
+
+static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+       if (unlikely(cfqq == cfqd->active_queue)) {
+               __cfq_slice_expired(cfqd, cfqq, 0);
+               cfq_schedule_dispatch(cfqd);
+       }
+
+       cfq_put_cooperator(cfqq);
 
        cfq_put_queue(cfqq);
 }
@@ -2573,11 +2610,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
        list_del_init(&cic->queue_list);
 
        /*
-        * Make sure key == NULL is seen for dead queues
+        * Make sure dead mark is seen for dead queues
         */
        smp_wmb();
-       cic->dead_key = (unsigned long) cic->key;
-       cic->key = NULL;
+       cic->key = cfqd_dead_key(cfqd);
 
        if (ioc->ioc_data == cic)
                rcu_assign_pointer(ioc->ioc_data, NULL);
@@ -2596,7 +2632,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 static void cfq_exit_single_io_context(struct io_context *ioc,
                                       struct cfq_io_context *cic)
 {
-       struct cfq_data *cfqd = cic->key;
+       struct cfq_data *cfqd = cic_to_cfqd(cic);
 
        if (cfqd) {
                struct request_queue *q = cfqd->queue;
@@ -2609,7 +2645,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
                 * race between exiting task and queue
                 */
                smp_read_barrier_depends();
-               if (cic->key)
+               if (cic->key == cfqd)
                        __cfq_exit_single_io_context(cfqd, cic);
 
                spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2689,7 +2725,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 
 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
 {
-       struct cfq_data *cfqd = cic->key;
+       struct cfq_data *cfqd = cic_to_cfqd(cic);
        struct cfq_queue *cfqq;
        unsigned long flags;
 
@@ -2746,7 +2782,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
 {
        struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
-       struct cfq_data *cfqd = cic->key;
+       struct cfq_data *cfqd = cic_to_cfqd(cic);
        unsigned long flags;
        struct request_queue *q;
 
@@ -2883,12 +2919,13 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
        unsigned long flags;
 
        WARN_ON(!list_empty(&cic->queue_list));
+       BUG_ON(cic->key != cfqd_dead_key(cfqd));
 
        spin_lock_irqsave(&ioc->lock, flags);
 
        BUG_ON(ioc->ioc_data == cic);
 
-       radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
+       radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
        hlist_del_rcu(&cic->cic_list);
        spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2900,7 +2937,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
        struct cfq_io_context *cic;
        unsigned long flags;
-       void *k;
 
        if (unlikely(!ioc))
                return NULL;
@@ -2917,13 +2953,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
        }
 
        do {
-               cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
+               cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
                rcu_read_unlock();
                if (!cic)
                        break;
-               /* ->key must be copied to avoid race with cfq_exit_queue() */
-               k = cic->key;
-               if (unlikely(!k)) {
+               if (unlikely(cic->key != cfqd)) {
                        cfq_drop_dead_cic(cfqd, ioc, cic);
                        rcu_read_lock();
                        continue;
@@ -2956,7 +2990,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 
                spin_lock_irqsave(&ioc->lock, flags);
                ret = radix_tree_insert(&ioc->radix_root,
-                                               (unsigned long) cfqd, cic);
+                                               cfqd->cic_index, cic);
                if (!ret)
                        hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
                spin_unlock_irqrestore(&ioc->lock, flags);
@@ -3223,7 +3257,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                                cfq_clear_cfqq_wait_request(cfqq);
                                __blk_run_queue(cfqd->queue);
                        } else {
-                               blkiocg_update_idle_time_stats(
+                               cfq_blkiocg_update_idle_time_stats(
                                                &cfqq->cfqg->blkg);
                                cfq_mark_cfqq_must_dispatch(cfqq);
                        }
@@ -3251,7 +3285,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
        list_add_tail(&rq->queuelist, &cfqq->fifo);
        cfq_add_rq_rb(rq);
-       blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
+       cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
                        &cfqd->serving_group->blkg, rq_data_dir(rq),
                        rq_is_sync(rq));
        cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -3339,9 +3373,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        WARN_ON(!cfqq->dispatched);
        cfqd->rq_in_driver--;
        cfqq->dispatched--;
-       blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq),
-                       rq_io_start_time_ns(rq), rq_data_dir(rq),
-                       rq_is_sync(rq));
+       cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
+                       rq_start_time_ns(rq), rq_io_start_time_ns(rq),
+                       rq_data_dir(rq), rq_is_sync(rq));
 
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -3516,6 +3550,9 @@ split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
        }
 
        cic_set_cfqq(cic, NULL, 1);
+
+       cfq_put_cooperator(cfqq);
+
        cfq_put_queue(cfqq);
        return NULL;
 }
@@ -3702,16 +3739,38 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
        cfq_put_async_queues(cfqd);
        cfq_release_cfq_groups(cfqd);
-       blkiocg_del_blkio_group(&cfqd->root_group.blkg);
+       cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
 
        spin_unlock_irq(q->queue_lock);
 
        cfq_shutdown_timer_wq(cfqd);
 
+       spin_lock(&cic_index_lock);
+       ida_remove(&cic_index_ida, cfqd->cic_index);
+       spin_unlock(&cic_index_lock);
+
        /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
        call_rcu(&cfqd->rcu, cfq_cfqd_free);
 }
 
+static int cfq_alloc_cic_index(void)
+{
+       int index, error;
+
+       do {
+               if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
+                       return -ENOMEM;
+
+               spin_lock(&cic_index_lock);
+               error = ida_get_new(&cic_index_ida, &index);
+               spin_unlock(&cic_index_lock);
+               if (error && error != -EAGAIN)
+                       return error;
+       } while (error);
+
+       return index;
+}
+
 static void *cfq_init_queue(struct request_queue *q)
 {
        struct cfq_data *cfqd;
@@ -3719,10 +3778,16 @@ static void *cfq_init_queue(struct request_queue *q)
        struct cfq_group *cfqg;
        struct cfq_rb_root *st;
 
+       i = cfq_alloc_cic_index();
+       if (i < 0)
+               return NULL;
+
        cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
        if (!cfqd)
                return NULL;
 
+       cfqd->cic_index = i;
+
        /* Init root service tree */
        cfqd->grp_service_tree = CFQ_RB_ROOT;
 
@@ -3742,8 +3807,8 @@ static void *cfq_init_queue(struct request_queue *q)
         */
        atomic_set(&cfqg->ref, 1);
        rcu_read_lock();
-       blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd,
-                                       0);
+       cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
+                                       (void *)cfqd, 0);
        rcu_read_unlock();
 #endif
        /*
@@ -3984,6 +4049,7 @@ static void __exit cfq_exit(void)
         */
        if (elv_ioc_count_read(cfq_ioc_count))
                wait_for_completion(&all_gone);
+       ida_destroy(&cic_index_ida);
        cfq_slab_kill();
 }