dbeecf7be719eaada4457ed3c7ac799fd0bbacec
[sfrench/cifs-2.6.git] / block / elevator.c
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@kernel.dk> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hash.h>
36 #include <linux/uaccess.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/blk-cgroup.h>
39
40 #include <trace/events/block.h>
41
42 #include "blk.h"
43 #include "blk-mq-sched.h"
44
45 static DEFINE_SPINLOCK(elv_list_lock);
46 static LIST_HEAD(elv_list);
47
48 /*
49  * Merge hash stuff.
50  */
51 #define rq_hash_key(rq)         (blk_rq_pos(rq) + blk_rq_sectors(rq))
52
53 /*
54  * Query io scheduler to see if the current process issuing bio may be
55  * merged with rq.
56  */
57 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
58 {
59         struct request_queue *q = rq->q;
60         struct elevator_queue *e = q->elevator;
61
62         if (e->uses_mq && e->type->ops.mq.allow_merge)
63                 return e->type->ops.mq.allow_merge(q, rq, bio);
64         else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
65                 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
66
67         return 1;
68 }
69
70 /*
71  * can we safely merge with this request?
72  */
73 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
74 {
75         if (!blk_rq_merge_ok(rq, bio))
76                 return false;
77
78         if (!elv_iosched_allow_bio_merge(rq, bio))
79                 return false;
80
81         return true;
82 }
83 EXPORT_SYMBOL(elv_bio_merge_ok);
84
85 static struct elevator_type *elevator_find(const char *name)
86 {
87         struct elevator_type *e;
88
89         list_for_each_entry(e, &elv_list, list) {
90                 if (!strcmp(e->elevator_name, name))
91                         return e;
92         }
93
94         return NULL;
95 }
96
97 static void elevator_put(struct elevator_type *e)
98 {
99         module_put(e->elevator_owner);
100 }
101
102 static struct elevator_type *elevator_get(const char *name, bool try_loading)
103 {
104         struct elevator_type *e;
105
106         spin_lock(&elv_list_lock);
107
108         e = elevator_find(name);
109         if (!e && try_loading) {
110                 spin_unlock(&elv_list_lock);
111                 request_module("%s-iosched", name);
112                 spin_lock(&elv_list_lock);
113                 e = elevator_find(name);
114         }
115
116         if (e && !try_module_get(e->elevator_owner))
117                 e = NULL;
118
119         spin_unlock(&elv_list_lock);
120
121         return e;
122 }
123
124 static char chosen_elevator[ELV_NAME_MAX];
125
126 static int __init elevator_setup(char *str)
127 {
128         /*
129          * Be backwards-compatible with previous kernels, so users
130          * won't get the wrong elevator.
131          */
132         strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
133         return 1;
134 }
135
136 __setup("elevator=", elevator_setup);
137
138 /* called during boot to load the elevator chosen by the elevator param */
139 void __init load_default_elevator_module(void)
140 {
141         struct elevator_type *e;
142
143         if (!chosen_elevator[0])
144                 return;
145
146         spin_lock(&elv_list_lock);
147         e = elevator_find(chosen_elevator);
148         spin_unlock(&elv_list_lock);
149
150         if (!e)
151                 request_module("%s-iosched", chosen_elevator);
152 }
153
154 static struct kobj_type elv_ktype;
155
156 struct elevator_queue *elevator_alloc(struct request_queue *q,
157                                   struct elevator_type *e)
158 {
159         struct elevator_queue *eq;
160
161         eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
162         if (unlikely(!eq))
163                 return NULL;
164
165         eq->type = e;
166         kobject_init(&eq->kobj, &elv_ktype);
167         mutex_init(&eq->sysfs_lock);
168         hash_init(eq->hash);
169         eq->uses_mq = e->uses_mq;
170
171         return eq;
172 }
173 EXPORT_SYMBOL(elevator_alloc);
174
175 static void elevator_release(struct kobject *kobj)
176 {
177         struct elevator_queue *e;
178
179         e = container_of(kobj, struct elevator_queue, kobj);
180         elevator_put(e->type);
181         kfree(e);
182 }
183
184 int elevator_init(struct request_queue *q, char *name)
185 {
186         struct elevator_type *e = NULL;
187         int err;
188
189         /*
190          * q->sysfs_lock must be held to provide mutual exclusion between
191          * elevator_switch() and here.
192          */
193         lockdep_assert_held(&q->sysfs_lock);
194
195         if (unlikely(q->elevator))
196                 return 0;
197
198         INIT_LIST_HEAD(&q->queue_head);
199         q->last_merge = NULL;
200         q->end_sector = 0;
201         q->boundary_rq = NULL;
202
203         if (name) {
204                 e = elevator_get(name, true);
205                 if (!e)
206                         return -EINVAL;
207         }
208
209         /*
210          * Use the default elevator specified by config boot param for
211          * non-mq devices, or by config option. Don't try to load modules
212          * as we could be running off async and request_module() isn't
213          * allowed from async.
214          */
215         if (!e && !q->mq_ops && *chosen_elevator) {
216                 e = elevator_get(chosen_elevator, false);
217                 if (!e)
218                         printk(KERN_ERR "I/O scheduler %s not found\n",
219                                                         chosen_elevator);
220         }
221
222         if (!e) {
223                 /*
224                  * For blk-mq devices, we default to using mq-deadline,
225                  * if available, for single queue devices. If deadline
226                  * isn't available OR we have multiple queues, default
227                  * to "none".
228                  */
229                 if (q->mq_ops) {
230                         if (q->nr_hw_queues == 1)
231                                 e = elevator_get("mq-deadline", false);
232                         if (!e)
233                                 return 0;
234                 } else
235                         e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
236
237                 if (!e) {
238                         printk(KERN_ERR
239                                 "Default I/O scheduler not found. " \
240                                 "Using noop.\n");
241                         e = elevator_get("noop", false);
242                 }
243         }
244
245         if (e->uses_mq)
246                 err = blk_mq_init_sched(q, e);
247         else
248                 err = e->ops.sq.elevator_init_fn(q, e);
249         if (err)
250                 elevator_put(e);
251         return err;
252 }
253 EXPORT_SYMBOL(elevator_init);
254
255 void elevator_exit(struct request_queue *q, struct elevator_queue *e)
256 {
257         mutex_lock(&e->sysfs_lock);
258         if (e->uses_mq && e->type->ops.mq.exit_sched)
259                 blk_mq_exit_sched(q, e);
260         else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
261                 e->type->ops.sq.elevator_exit_fn(e);
262         mutex_unlock(&e->sysfs_lock);
263
264         kobject_put(&e->kobj);
265 }
266 EXPORT_SYMBOL(elevator_exit);
267
268 static inline void __elv_rqhash_del(struct request *rq)
269 {
270         hash_del(&rq->hash);
271         rq->rq_flags &= ~RQF_HASHED;
272 }
273
274 void elv_rqhash_del(struct request_queue *q, struct request *rq)
275 {
276         if (ELV_ON_HASH(rq))
277                 __elv_rqhash_del(rq);
278 }
279 EXPORT_SYMBOL_GPL(elv_rqhash_del);
280
281 void elv_rqhash_add(struct request_queue *q, struct request *rq)
282 {
283         struct elevator_queue *e = q->elevator;
284
285         BUG_ON(ELV_ON_HASH(rq));
286         hash_add(e->hash, &rq->hash, rq_hash_key(rq));
287         rq->rq_flags |= RQF_HASHED;
288 }
289 EXPORT_SYMBOL_GPL(elv_rqhash_add);
290
291 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
292 {
293         __elv_rqhash_del(rq);
294         elv_rqhash_add(q, rq);
295 }
296
297 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
298 {
299         struct elevator_queue *e = q->elevator;
300         struct hlist_node *next;
301         struct request *rq;
302
303         hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
304                 BUG_ON(!ELV_ON_HASH(rq));
305
306                 if (unlikely(!rq_mergeable(rq))) {
307                         __elv_rqhash_del(rq);
308                         continue;
309                 }
310
311                 if (rq_hash_key(rq) == offset)
312                         return rq;
313         }
314
315         return NULL;
316 }
317
318 /*
319  * RB-tree support functions for inserting/lookup/removal of requests
320  * in a sorted RB tree.
321  */
322 void elv_rb_add(struct rb_root *root, struct request *rq)
323 {
324         struct rb_node **p = &root->rb_node;
325         struct rb_node *parent = NULL;
326         struct request *__rq;
327
328         while (*p) {
329                 parent = *p;
330                 __rq = rb_entry(parent, struct request, rb_node);
331
332                 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
333                         p = &(*p)->rb_left;
334                 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
335                         p = &(*p)->rb_right;
336         }
337
338         rb_link_node(&rq->rb_node, parent, p);
339         rb_insert_color(&rq->rb_node, root);
340 }
341 EXPORT_SYMBOL(elv_rb_add);
342
343 void elv_rb_del(struct rb_root *root, struct request *rq)
344 {
345         BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
346         rb_erase(&rq->rb_node, root);
347         RB_CLEAR_NODE(&rq->rb_node);
348 }
349 EXPORT_SYMBOL(elv_rb_del);
350
351 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
352 {
353         struct rb_node *n = root->rb_node;
354         struct request *rq;
355
356         while (n) {
357                 rq = rb_entry(n, struct request, rb_node);
358
359                 if (sector < blk_rq_pos(rq))
360                         n = n->rb_left;
361                 else if (sector > blk_rq_pos(rq))
362                         n = n->rb_right;
363                 else
364                         return rq;
365         }
366
367         return NULL;
368 }
369 EXPORT_SYMBOL(elv_rb_find);
370
371 /*
372  * Insert rq into dispatch queue of q.  Queue lock must be held on
373  * entry.  rq is sort instead into the dispatch queue. To be used by
374  * specific elevators.
375  */
376 void elv_dispatch_sort(struct request_queue *q, struct request *rq)
377 {
378         sector_t boundary;
379         struct list_head *entry;
380
381         if (q->last_merge == rq)
382                 q->last_merge = NULL;
383
384         elv_rqhash_del(q, rq);
385
386         q->nr_sorted--;
387
388         boundary = q->end_sector;
389         list_for_each_prev(entry, &q->queue_head) {
390                 struct request *pos = list_entry_rq(entry);
391
392                 if (req_op(rq) != req_op(pos))
393                         break;
394                 if (rq_data_dir(rq) != rq_data_dir(pos))
395                         break;
396                 if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
397                         break;
398                 if (blk_rq_pos(rq) >= boundary) {
399                         if (blk_rq_pos(pos) < boundary)
400                                 continue;
401                 } else {
402                         if (blk_rq_pos(pos) >= boundary)
403                                 break;
404                 }
405                 if (blk_rq_pos(rq) >= blk_rq_pos(pos))
406                         break;
407         }
408
409         list_add(&rq->queuelist, entry);
410 }
411 EXPORT_SYMBOL(elv_dispatch_sort);
412
413 /*
414  * Insert rq into dispatch queue of q.  Queue lock must be held on
415  * entry.  rq is added to the back of the dispatch queue. To be used by
416  * specific elevators.
417  */
418 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
419 {
420         if (q->last_merge == rq)
421                 q->last_merge = NULL;
422
423         elv_rqhash_del(q, rq);
424
425         q->nr_sorted--;
426
427         q->end_sector = rq_end_sector(rq);
428         q->boundary_rq = rq;
429         list_add_tail(&rq->queuelist, &q->queue_head);
430 }
431 EXPORT_SYMBOL(elv_dispatch_add_tail);
432
433 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
434                 struct bio *bio)
435 {
436         struct elevator_queue *e = q->elevator;
437         struct request *__rq;
438
439         /*
440          * Levels of merges:
441          *      nomerges:  No merges at all attempted
442          *      noxmerges: Only simple one-hit cache try
443          *      merges:    All merge tries attempted
444          */
445         if (blk_queue_nomerges(q) || !bio_mergeable(bio))
446                 return ELEVATOR_NO_MERGE;
447
448         /*
449          * First try one-hit cache.
450          */
451         if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
452                 enum elv_merge ret = blk_try_merge(q->last_merge, bio);
453
454                 if (ret != ELEVATOR_NO_MERGE) {
455                         *req = q->last_merge;
456                         return ret;
457                 }
458         }
459
460         if (blk_queue_noxmerges(q))
461                 return ELEVATOR_NO_MERGE;
462
463         /*
464          * See if our hash lookup can find a potential backmerge.
465          */
466         __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
467         if (__rq && elv_bio_merge_ok(__rq, bio)) {
468                 *req = __rq;
469                 return ELEVATOR_BACK_MERGE;
470         }
471
472         if (e->uses_mq && e->type->ops.mq.request_merge)
473                 return e->type->ops.mq.request_merge(q, req, bio);
474         else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
475                 return e->type->ops.sq.elevator_merge_fn(q, req, bio);
476
477         return ELEVATOR_NO_MERGE;
478 }
479
480 /*
481  * Attempt to do an insertion back merge. Only check for the case where
482  * we can append 'rq' to an existing request, so we can throw 'rq' away
483  * afterwards.
484  *
485  * Returns true if we merged, false otherwise
486  */
487 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
488 {
489         struct request *__rq;
490         bool ret;
491
492         if (blk_queue_nomerges(q))
493                 return false;
494
495         /*
496          * First try one-hit cache.
497          */
498         if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
499                 return true;
500
501         if (blk_queue_noxmerges(q))
502                 return false;
503
504         ret = false;
505         /*
506          * See if our hash lookup can find a potential backmerge.
507          */
508         while (1) {
509                 __rq = elv_rqhash_find(q, blk_rq_pos(rq));
510                 if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
511                         break;
512
513                 /* The merged request could be merged with others, try again */
514                 ret = true;
515                 rq = __rq;
516         }
517
518         return ret;
519 }
520
521 void elv_merged_request(struct request_queue *q, struct request *rq,
522                 enum elv_merge type)
523 {
524         struct elevator_queue *e = q->elevator;
525
526         if (e->uses_mq && e->type->ops.mq.request_merged)
527                 e->type->ops.mq.request_merged(q, rq, type);
528         else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
529                 e->type->ops.sq.elevator_merged_fn(q, rq, type);
530
531         if (type == ELEVATOR_BACK_MERGE)
532                 elv_rqhash_reposition(q, rq);
533
534         q->last_merge = rq;
535 }
536
537 void elv_merge_requests(struct request_queue *q, struct request *rq,
538                              struct request *next)
539 {
540         struct elevator_queue *e = q->elevator;
541         bool next_sorted = false;
542
543         if (e->uses_mq && e->type->ops.mq.requests_merged)
544                 e->type->ops.mq.requests_merged(q, rq, next);
545         else if (e->type->ops.sq.elevator_merge_req_fn) {
546                 next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
547                 if (next_sorted)
548                         e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
549         }
550
551         elv_rqhash_reposition(q, rq);
552
553         if (next_sorted) {
554                 elv_rqhash_del(q, next);
555                 q->nr_sorted--;
556         }
557
558         q->last_merge = rq;
559 }
560
561 void elv_bio_merged(struct request_queue *q, struct request *rq,
562                         struct bio *bio)
563 {
564         struct elevator_queue *e = q->elevator;
565
566         if (WARN_ON_ONCE(e->uses_mq))
567                 return;
568
569         if (e->type->ops.sq.elevator_bio_merged_fn)
570                 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
571 }
572
573 #ifdef CONFIG_PM
574 static void blk_pm_requeue_request(struct request *rq)
575 {
576         if (rq->q->dev && !(rq->rq_flags & RQF_PM))
577                 rq->q->nr_pending--;
578 }
579
580 static void blk_pm_add_request(struct request_queue *q, struct request *rq)
581 {
582         if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
583             (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
584                 pm_request_resume(q->dev);
585 }
586 #else
587 static inline void blk_pm_requeue_request(struct request *rq) {}
588 static inline void blk_pm_add_request(struct request_queue *q,
589                                       struct request *rq)
590 {
591 }
592 #endif
593
594 void elv_requeue_request(struct request_queue *q, struct request *rq)
595 {
596         /*
597          * it already went through dequeue, we need to decrement the
598          * in_flight count again
599          */
600         if (blk_account_rq(rq)) {
601                 q->in_flight[rq_is_sync(rq)]--;
602                 if (rq->rq_flags & RQF_SORTED)
603                         elv_deactivate_rq(q, rq);
604         }
605
606         rq->rq_flags &= ~RQF_STARTED;
607
608         blk_pm_requeue_request(rq);
609
610         __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
611 }
612
613 void elv_drain_elevator(struct request_queue *q)
614 {
615         struct elevator_queue *e = q->elevator;
616         static int printed;
617
618         if (WARN_ON_ONCE(e->uses_mq))
619                 return;
620
621         lockdep_assert_held(q->queue_lock);
622
623         while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
624                 ;
625         if (q->nr_sorted && printed++ < 10) {
626                 printk(KERN_ERR "%s: forced dispatching is broken "
627                        "(nr_sorted=%u), please report this\n",
628                        q->elevator->type->elevator_name, q->nr_sorted);
629         }
630 }
631
632 void __elv_add_request(struct request_queue *q, struct request *rq, int where)
633 {
634         trace_block_rq_insert(q, rq);
635
636         blk_pm_add_request(q, rq);
637
638         rq->q = q;
639
640         if (rq->rq_flags & RQF_SOFTBARRIER) {
641                 /* barriers are scheduling boundary, update end_sector */
642                 if (!blk_rq_is_passthrough(rq)) {
643                         q->end_sector = rq_end_sector(rq);
644                         q->boundary_rq = rq;
645                 }
646         } else if (!(rq->rq_flags & RQF_ELVPRIV) &&
647                     (where == ELEVATOR_INSERT_SORT ||
648                      where == ELEVATOR_INSERT_SORT_MERGE))
649                 where = ELEVATOR_INSERT_BACK;
650
651         switch (where) {
652         case ELEVATOR_INSERT_REQUEUE:
653         case ELEVATOR_INSERT_FRONT:
654                 rq->rq_flags |= RQF_SOFTBARRIER;
655                 list_add(&rq->queuelist, &q->queue_head);
656                 break;
657
658         case ELEVATOR_INSERT_BACK:
659                 rq->rq_flags |= RQF_SOFTBARRIER;
660                 elv_drain_elevator(q);
661                 list_add_tail(&rq->queuelist, &q->queue_head);
662                 /*
663                  * We kick the queue here for the following reasons.
664                  * - The elevator might have returned NULL previously
665                  *   to delay requests and returned them now.  As the
666                  *   queue wasn't empty before this request, ll_rw_blk
667                  *   won't run the queue on return, resulting in hang.
668                  * - Usually, back inserted requests won't be merged
669                  *   with anything.  There's no point in delaying queue
670                  *   processing.
671                  */
672                 __blk_run_queue(q);
673                 break;
674
675         case ELEVATOR_INSERT_SORT_MERGE:
676                 /*
677                  * If we succeed in merging this request with one in the
678                  * queue already, we are done - rq has now been freed,
679                  * so no need to do anything further.
680                  */
681                 if (elv_attempt_insert_merge(q, rq))
682                         break;
683         case ELEVATOR_INSERT_SORT:
684                 BUG_ON(blk_rq_is_passthrough(rq));
685                 rq->rq_flags |= RQF_SORTED;
686                 q->nr_sorted++;
687                 if (rq_mergeable(rq)) {
688                         elv_rqhash_add(q, rq);
689                         if (!q->last_merge)
690                                 q->last_merge = rq;
691                 }
692
693                 /*
694                  * Some ioscheds (cfq) run q->request_fn directly, so
695                  * rq cannot be accessed after calling
696                  * elevator_add_req_fn.
697                  */
698                 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
699                 break;
700
701         case ELEVATOR_INSERT_FLUSH:
702                 rq->rq_flags |= RQF_SOFTBARRIER;
703                 blk_insert_flush(rq);
704                 break;
705         default:
706                 printk(KERN_ERR "%s: bad insertion point %d\n",
707                        __func__, where);
708                 BUG();
709         }
710 }
711 EXPORT_SYMBOL(__elv_add_request);
712
713 void elv_add_request(struct request_queue *q, struct request *rq, int where)
714 {
715         unsigned long flags;
716
717         spin_lock_irqsave(q->queue_lock, flags);
718         __elv_add_request(q, rq, where);
719         spin_unlock_irqrestore(q->queue_lock, flags);
720 }
721 EXPORT_SYMBOL(elv_add_request);
722
723 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
724 {
725         struct elevator_queue *e = q->elevator;
726
727         if (e->uses_mq && e->type->ops.mq.next_request)
728                 return e->type->ops.mq.next_request(q, rq);
729         else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
730                 return e->type->ops.sq.elevator_latter_req_fn(q, rq);
731
732         return NULL;
733 }
734
735 struct request *elv_former_request(struct request_queue *q, struct request *rq)
736 {
737         struct elevator_queue *e = q->elevator;
738
739         if (e->uses_mq && e->type->ops.mq.former_request)
740                 return e->type->ops.mq.former_request(q, rq);
741         if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
742                 return e->type->ops.sq.elevator_former_req_fn(q, rq);
743         return NULL;
744 }
745
746 int elv_set_request(struct request_queue *q, struct request *rq,
747                     struct bio *bio, gfp_t gfp_mask)
748 {
749         struct elevator_queue *e = q->elevator;
750
751         if (WARN_ON_ONCE(e->uses_mq))
752                 return 0;
753
754         if (e->type->ops.sq.elevator_set_req_fn)
755                 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
756         return 0;
757 }
758
759 void elv_put_request(struct request_queue *q, struct request *rq)
760 {
761         struct elevator_queue *e = q->elevator;
762
763         if (WARN_ON_ONCE(e->uses_mq))
764                 return;
765
766         if (e->type->ops.sq.elevator_put_req_fn)
767                 e->type->ops.sq.elevator_put_req_fn(rq);
768 }
769
770 int elv_may_queue(struct request_queue *q, unsigned int op)
771 {
772         struct elevator_queue *e = q->elevator;
773
774         if (WARN_ON_ONCE(e->uses_mq))
775                 return 0;
776
777         if (e->type->ops.sq.elevator_may_queue_fn)
778                 return e->type->ops.sq.elevator_may_queue_fn(q, op);
779
780         return ELV_MQUEUE_MAY;
781 }
782
783 void elv_completed_request(struct request_queue *q, struct request *rq)
784 {
785         struct elevator_queue *e = q->elevator;
786
787         if (WARN_ON_ONCE(e->uses_mq))
788                 return;
789
790         /*
791          * request is released from the driver, io must be done
792          */
793         if (blk_account_rq(rq)) {
794                 q->in_flight[rq_is_sync(rq)]--;
795                 if ((rq->rq_flags & RQF_SORTED) &&
796                     e->type->ops.sq.elevator_completed_req_fn)
797                         e->type->ops.sq.elevator_completed_req_fn(q, rq);
798         }
799 }
800
801 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
802
803 static ssize_t
804 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
805 {
806         struct elv_fs_entry *entry = to_elv(attr);
807         struct elevator_queue *e;
808         ssize_t error;
809
810         if (!entry->show)
811                 return -EIO;
812
813         e = container_of(kobj, struct elevator_queue, kobj);
814         mutex_lock(&e->sysfs_lock);
815         error = e->type ? entry->show(e, page) : -ENOENT;
816         mutex_unlock(&e->sysfs_lock);
817         return error;
818 }
819
820 static ssize_t
821 elv_attr_store(struct kobject *kobj, struct attribute *attr,
822                const char *page, size_t length)
823 {
824         struct elv_fs_entry *entry = to_elv(attr);
825         struct elevator_queue *e;
826         ssize_t error;
827
828         if (!entry->store)
829                 return -EIO;
830
831         e = container_of(kobj, struct elevator_queue, kobj);
832         mutex_lock(&e->sysfs_lock);
833         error = e->type ? entry->store(e, page, length) : -ENOENT;
834         mutex_unlock(&e->sysfs_lock);
835         return error;
836 }
837
838 static const struct sysfs_ops elv_sysfs_ops = {
839         .show   = elv_attr_show,
840         .store  = elv_attr_store,
841 };
842
843 static struct kobj_type elv_ktype = {
844         .sysfs_ops      = &elv_sysfs_ops,
845         .release        = elevator_release,
846 };
847
848 int elv_register_queue(struct request_queue *q)
849 {
850         struct elevator_queue *e = q->elevator;
851         int error;
852
853         error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
854         if (!error) {
855                 struct elv_fs_entry *attr = e->type->elevator_attrs;
856                 if (attr) {
857                         while (attr->attr.name) {
858                                 if (sysfs_create_file(&e->kobj, &attr->attr))
859                                         break;
860                                 attr++;
861                         }
862                 }
863                 kobject_uevent(&e->kobj, KOBJ_ADD);
864                 e->registered = 1;
865                 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
866                         e->type->ops.sq.elevator_registered_fn(q);
867         }
868         return error;
869 }
870 EXPORT_SYMBOL(elv_register_queue);
871
872 void elv_unregister_queue(struct request_queue *q)
873 {
874         if (q) {
875                 struct elevator_queue *e = q->elevator;
876
877                 kobject_uevent(&e->kobj, KOBJ_REMOVE);
878                 kobject_del(&e->kobj);
879                 e->registered = 0;
880         }
881 }
882 EXPORT_SYMBOL(elv_unregister_queue);
883
884 int elv_register(struct elevator_type *e)
885 {
886         char *def = "";
887
888         /* create icq_cache if requested */
889         if (e->icq_size) {
890                 if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
891                     WARN_ON(e->icq_align < __alignof__(struct io_cq)))
892                         return -EINVAL;
893
894                 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
895                          "%s_io_cq", e->elevator_name);
896                 e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
897                                                  e->icq_align, 0, NULL);
898                 if (!e->icq_cache)
899                         return -ENOMEM;
900         }
901
902         /* register, don't allow duplicate names */
903         spin_lock(&elv_list_lock);
904         if (elevator_find(e->elevator_name)) {
905                 spin_unlock(&elv_list_lock);
906                 if (e->icq_cache)
907                         kmem_cache_destroy(e->icq_cache);
908                 return -EBUSY;
909         }
910         list_add_tail(&e->list, &elv_list);
911         spin_unlock(&elv_list_lock);
912
913         /* print pretty message */
914         if (!strcmp(e->elevator_name, chosen_elevator) ||
915                         (!*chosen_elevator &&
916                          !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
917                                 def = " (default)";
918
919         printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
920                                                                 def);
921         return 0;
922 }
923 EXPORT_SYMBOL_GPL(elv_register);
924
925 void elv_unregister(struct elevator_type *e)
926 {
927         /* unregister */
928         spin_lock(&elv_list_lock);
929         list_del_init(&e->list);
930         spin_unlock(&elv_list_lock);
931
932         /*
933          * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
934          * sure all RCU operations are complete before proceeding.
935          */
936         if (e->icq_cache) {
937                 rcu_barrier();
938                 kmem_cache_destroy(e->icq_cache);
939                 e->icq_cache = NULL;
940         }
941 }
942 EXPORT_SYMBOL_GPL(elv_unregister);
943
944 static int elevator_switch_mq(struct request_queue *q,
945                               struct elevator_type *new_e)
946 {
947         int ret;
948
949         blk_mq_freeze_queue(q);
950         blk_mq_quiesce_queue(q);
951
952         if (q->elevator) {
953                 if (q->elevator->registered)
954                         elv_unregister_queue(q);
955                 ioc_clear_queue(q);
956                 elevator_exit(q, q->elevator);
957         }
958
959         ret = blk_mq_init_sched(q, new_e);
960         if (ret)
961                 goto out;
962
963         if (new_e) {
964                 ret = elv_register_queue(q);
965                 if (ret) {
966                         elevator_exit(q, q->elevator);
967                         goto out;
968                 }
969         }
970
971         if (new_e)
972                 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
973         else
974                 blk_add_trace_msg(q, "elv switch: none");
975
976 out:
977         blk_mq_unfreeze_queue(q);
978         blk_mq_start_stopped_hw_queues(q, true);
979         return ret;
980
981 }
982
983 /*
984  * switch to new_e io scheduler. be careful not to introduce deadlocks -
985  * we don't free the old io scheduler, before we have allocated what we
986  * need for the new one. this way we have a chance of going back to the old
987  * one, if the new one fails init for some reason.
988  */
989 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
990 {
991         struct elevator_queue *old = q->elevator;
992         bool old_registered = false;
993         int err;
994
995         if (q->mq_ops)
996                 return elevator_switch_mq(q, new_e);
997
998         /*
999          * Turn on BYPASS and drain all requests w/ elevator private data.
1000          * Block layer doesn't call into a quiesced elevator - all requests
1001          * are directly put on the dispatch list without elevator data
1002          * using INSERT_BACK.  All requests have SOFTBARRIER set and no
1003          * merge happens either.
1004          */
1005         if (old) {
1006                 old_registered = old->registered;
1007
1008                 blk_queue_bypass_start(q);
1009
1010                 /* unregister and clear all auxiliary data of the old elevator */
1011                 if (old_registered)
1012                         elv_unregister_queue(q);
1013
1014                 ioc_clear_queue(q);
1015         }
1016
1017         /* allocate, init and register new elevator */
1018         err = new_e->ops.sq.elevator_init_fn(q, new_e);
1019         if (err)
1020                 goto fail_init;
1021
1022         err = elv_register_queue(q);
1023         if (err)
1024                 goto fail_register;
1025
1026         /* done, kill the old one and finish */
1027         if (old) {
1028                 elevator_exit(q, old);
1029                 blk_queue_bypass_end(q);
1030         }
1031
1032         blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1033
1034         return 0;
1035
1036 fail_register:
1037         elevator_exit(q, q->elevator);
1038 fail_init:
1039         /* switch failed, restore and re-register old elevator */
1040         if (old) {
1041                 q->elevator = old;
1042                 elv_register_queue(q);
1043                 blk_queue_bypass_end(q);
1044         }
1045
1046         return err;
1047 }
1048
1049 /*
1050  * Switch this queue to the given IO scheduler.
1051  */
1052 static int __elevator_change(struct request_queue *q, const char *name)
1053 {
1054         char elevator_name[ELV_NAME_MAX];
1055         struct elevator_type *e;
1056
1057         /*
1058          * Special case for mq, turn off scheduling
1059          */
1060         if (q->mq_ops && !strncmp(name, "none", 4))
1061                 return elevator_switch(q, NULL);
1062
1063         strlcpy(elevator_name, name, sizeof(elevator_name));
1064         e = elevator_get(strstrip(elevator_name), true);
1065         if (!e) {
1066                 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
1067                 return -EINVAL;
1068         }
1069
1070         if (q->elevator &&
1071             !strcmp(elevator_name, q->elevator->type->elevator_name)) {
1072                 elevator_put(e);
1073                 return 0;
1074         }
1075
1076         if (!e->uses_mq && q->mq_ops) {
1077                 elevator_put(e);
1078                 return -EINVAL;
1079         }
1080         if (e->uses_mq && !q->mq_ops) {
1081                 elevator_put(e);
1082                 return -EINVAL;
1083         }
1084
1085         return elevator_switch(q, e);
1086 }
1087
1088 int elevator_change(struct request_queue *q, const char *name)
1089 {
1090         int ret;
1091
1092         /* Protect q->elevator from elevator_init() */
1093         mutex_lock(&q->sysfs_lock);
1094         ret = __elevator_change(q, name);
1095         mutex_unlock(&q->sysfs_lock);
1096
1097         return ret;
1098 }
1099 EXPORT_SYMBOL(elevator_change);
1100
1101 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1102                           size_t count)
1103 {
1104         int ret;
1105
1106         if (!(q->mq_ops || q->request_fn))
1107                 return count;
1108
1109         ret = __elevator_change(q, name);
1110         if (!ret)
1111                 return count;
1112
1113         printk(KERN_ERR "elevator: switch to %s failed\n", name);
1114         return ret;
1115 }
1116
1117 ssize_t elv_iosched_show(struct request_queue *q, char *name)
1118 {
1119         struct elevator_queue *e = q->elevator;
1120         struct elevator_type *elv = NULL;
1121         struct elevator_type *__e;
1122         int len = 0;
1123
1124         if (!blk_queue_stackable(q))
1125                 return sprintf(name, "none\n");
1126
1127         if (!q->elevator)
1128                 len += sprintf(name+len, "[none] ");
1129         else
1130                 elv = e->type;
1131
1132         spin_lock(&elv_list_lock);
1133         list_for_each_entry(__e, &elv_list, list) {
1134                 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
1135                         len += sprintf(name+len, "[%s] ", elv->elevator_name);
1136                         continue;
1137                 }
1138                 if (__e->uses_mq && q->mq_ops)
1139                         len += sprintf(name+len, "%s ", __e->elevator_name);
1140                 else if (!__e->uses_mq && !q->mq_ops)
1141                         len += sprintf(name+len, "%s ", __e->elevator_name);
1142         }
1143         spin_unlock(&elv_list_lock);
1144
1145         if (q->mq_ops && q->elevator)
1146                 len += sprintf(name+len, "none");
1147
1148         len += sprintf(len+name, "\n");
1149         return len;
1150 }
1151
1152 struct request *elv_rb_former_request(struct request_queue *q,
1153                                       struct request *rq)
1154 {
1155         struct rb_node *rbprev = rb_prev(&rq->rb_node);
1156
1157         if (rbprev)
1158                 return rb_entry_rq(rbprev);
1159
1160         return NULL;
1161 }
1162 EXPORT_SYMBOL(elv_rb_former_request);
1163
1164 struct request *elv_rb_latter_request(struct request_queue *q,
1165                                       struct request *rq)
1166 {
1167         struct rb_node *rbnext = rb_next(&rq->rb_node);
1168
1169         if (rbnext)
1170                 return rb_entry_rq(rbnext);
1171
1172         return NULL;
1173 }
1174 EXPORT_SYMBOL(elv_rb_latter_request);