block: enumify ELEVATOR_*_MERGE
[linux-2.6-block.git] / block / elevator.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Block device elevator/IO-scheduler.
3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 *
0fe23479 6 * 30042000 Jens Axboe <axboe@kernel.dk> :
1da177e4
LT
7 *
8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces:
10 * - elevator_fn, inserts a new request in the queue list
11 * - elevator_merge_fn, decides whether a new buffer can be merged with
12 * an existing request
13 * - elevator_dequeue_fn, called when a request is taken off the active list
14 *
15 * 20082000 Dave Jones <davej@suse.de> :
16 * Removed tests for max-bomb-segments, which was breaking elvtune
17 * when run without -bN
18 *
19 * Jens:
20 * - Rework again to work with bio instead of buffer_heads
21 * - loose bi_dev comparisons, partition handling is right now
22 * - completely modularize elevator setup and teardown
23 *
24 */
25#include <linux/kernel.h>
26#include <linux/fs.h>
27#include <linux/blkdev.h>
28#include <linux/elevator.h>
29#include <linux/bio.h>
1da177e4
LT
30#include <linux/module.h>
31#include <linux/slab.h>
32#include <linux/init.h>
33#include <linux/compiler.h>
2056a782 34#include <linux/blktrace_api.h>
9817064b 35#include <linux/hash.h>
0835da67 36#include <linux/uaccess.h>
c8158819 37#include <linux/pm_runtime.h>
eea8f41c 38#include <linux/blk-cgroup.h>
1da177e4 39
55782138
LZ
40#include <trace/events/block.h>
41
242f9dcb 42#include "blk.h"
bd166ef1 43#include "blk-mq-sched.h"
242f9dcb 44
1da177e4
LT
45static DEFINE_SPINLOCK(elv_list_lock);
46static LIST_HEAD(elv_list);
47
9817064b
JA
48/*
49 * Merge hash stuff.
50 */
83096ebf 51#define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
9817064b 52
da775265
JA
53/*
54 * Query io scheduler to see if the current process issuing bio may be
55 * merged with rq.
56 */
72ef799b 57static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
da775265 58{
165125e1 59 struct request_queue *q = rq->q;
b374d18a 60 struct elevator_queue *e = q->elevator;
da775265 61
bd166ef1
JA
62 if (e->uses_mq && e->type->ops.mq.allow_merge)
63 return e->type->ops.mq.allow_merge(q, rq, bio);
64 else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
c51ca6cf 65 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
da775265
JA
66
67 return 1;
68}
69
1da177e4
LT
70/*
71 * can we safely merge with this request?
72 */
72ef799b 73bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
1da177e4 74{
050c8ea8 75 if (!blk_rq_merge_ok(rq, bio))
72ef799b 76 return false;
7ba1ba12 77
72ef799b
TE
78 if (!elv_iosched_allow_bio_merge(rq, bio))
79 return false;
1da177e4 80
72ef799b 81 return true;
1da177e4 82}
72ef799b 83EXPORT_SYMBOL(elv_bio_merge_ok);
1da177e4 84
1da177e4
LT
85static struct elevator_type *elevator_find(const char *name)
86{
a22b169d 87 struct elevator_type *e;
1da177e4 88
70cee26e 89 list_for_each_entry(e, &elv_list, list) {
a22b169d
VT
90 if (!strcmp(e->elevator_name, name))
91 return e;
1da177e4 92 }
1da177e4 93
a22b169d 94 return NULL;
1da177e4
LT
95}
96
97static void elevator_put(struct elevator_type *e)
98{
99 module_put(e->elevator_owner);
100}
101
21c3c5d2 102static struct elevator_type *elevator_get(const char *name, bool try_loading)
1da177e4 103{
2824bc93 104 struct elevator_type *e;
1da177e4 105
2a12dcd7 106 spin_lock(&elv_list_lock);
2824bc93
TH
107
108 e = elevator_find(name);
21c3c5d2 109 if (!e && try_loading) {
e1640949 110 spin_unlock(&elv_list_lock);
490b94be 111 request_module("%s-iosched", name);
e1640949
JA
112 spin_lock(&elv_list_lock);
113 e = elevator_find(name);
114 }
115
2824bc93
TH
116 if (e && !try_module_get(e->elevator_owner))
117 e = NULL;
118
2a12dcd7 119 spin_unlock(&elv_list_lock);
1da177e4
LT
120
121 return e;
122}
123
484fc254 124static char chosen_elevator[ELV_NAME_MAX];
1da177e4 125
5f003976 126static int __init elevator_setup(char *str)
1da177e4 127{
752a3b79
CE
128 /*
129 * Be backwards-compatible with previous kernels, so users
130 * won't get the wrong elevator.
131 */
492af635 132 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
9b41046c 133 return 1;
1da177e4
LT
134}
135
136__setup("elevator=", elevator_setup);
137
bb813f4c
TH
138/* called during boot to load the elevator chosen by the elevator param */
139void __init load_default_elevator_module(void)
140{
141 struct elevator_type *e;
142
143 if (!chosen_elevator[0])
144 return;
145
146 spin_lock(&elv_list_lock);
147 e = elevator_find(chosen_elevator);
148 spin_unlock(&elv_list_lock);
149
150 if (!e)
151 request_module("%s-iosched", chosen_elevator);
152}
153
3d1ab40f
AV
154static struct kobj_type elv_ktype;
155
d50235b7 156struct elevator_queue *elevator_alloc(struct request_queue *q,
165125e1 157 struct elevator_type *e)
3d1ab40f 158{
b374d18a 159 struct elevator_queue *eq;
9817064b 160
c1b511eb 161 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
9817064b 162 if (unlikely(!eq))
8406a4d5 163 return NULL;
9817064b 164
22f746e2 165 eq->type = e;
f9cb074b 166 kobject_init(&eq->kobj, &elv_ktype);
9817064b 167 mutex_init(&eq->sysfs_lock);
242d98f0 168 hash_init(eq->hash);
bd166ef1 169 eq->uses_mq = e->uses_mq;
9817064b 170
3d1ab40f
AV
171 return eq;
172}
d50235b7 173EXPORT_SYMBOL(elevator_alloc);
3d1ab40f
AV
174
175static void elevator_release(struct kobject *kobj)
176{
b374d18a 177 struct elevator_queue *e;
9817064b 178
b374d18a 179 e = container_of(kobj, struct elevator_queue, kobj);
22f746e2 180 elevator_put(e->type);
3d1ab40f
AV
181 kfree(e);
182}
183
165125e1 184int elevator_init(struct request_queue *q, char *name)
1da177e4
LT
185{
186 struct elevator_type *e = NULL;
f8fc877d 187 int err;
1da177e4 188
eb1c160b
TS
189 /*
190 * q->sysfs_lock must be held to provide mutual exclusion between
191 * elevator_switch() and here.
192 */
193 lockdep_assert_held(&q->sysfs_lock);
194
1abec4fd
MS
195 if (unlikely(q->elevator))
196 return 0;
197
cb98fc8b
TH
198 INIT_LIST_HEAD(&q->queue_head);
199 q->last_merge = NULL;
200 q->end_sector = 0;
201 q->boundary_rq = NULL;
cb98fc8b 202
4eb166d9 203 if (name) {
21c3c5d2 204 e = elevator_get(name, true);
4eb166d9
JA
205 if (!e)
206 return -EINVAL;
207 }
1da177e4 208
21c3c5d2
TH
209 /*
210 * Use the default elevator specified by config boot param or
211 * config option. Don't try to load modules as we could be running
212 * off async and request_module() isn't allowed from async.
213 */
4eb166d9 214 if (!e && *chosen_elevator) {
21c3c5d2 215 e = elevator_get(chosen_elevator, false);
4eb166d9
JA
216 if (!e)
217 printk(KERN_ERR "I/O scheduler %s not found\n",
218 chosen_elevator);
219 }
248d5ca5 220
4eb166d9 221 if (!e) {
d3484991
JA
222 if (q->mq_ops && q->nr_hw_queues == 1)
223 e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
224 else if (q->mq_ops)
225 e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
226 else
227 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
228
4eb166d9
JA
229 if (!e) {
230 printk(KERN_ERR
231 "Default I/O scheduler not found. " \
bd166ef1 232 "Using noop/none.\n");
21c3c5d2 233 e = elevator_get("noop", false);
4eb166d9 234 }
5f003976
ND
235 }
236
bd166ef1
JA
237 if (e->uses_mq) {
238 err = blk_mq_sched_setup(q);
239 if (!err)
240 err = e->ops.mq.init_sched(q, e);
241 } else
242 err = e->ops.sq.elevator_init_fn(q, e);
243 if (err) {
244 if (e->uses_mq)
245 blk_mq_sched_teardown(q);
d32f6b57 246 elevator_put(e);
bd166ef1 247 }
d32f6b57 248 return err;
1da177e4 249}
2e662b65
JA
250EXPORT_SYMBOL(elevator_init);
251
b374d18a 252void elevator_exit(struct elevator_queue *e)
1da177e4 253{
3d1ab40f 254 mutex_lock(&e->sysfs_lock);
bd166ef1
JA
255 if (e->uses_mq && e->type->ops.mq.exit_sched)
256 e->type->ops.mq.exit_sched(e);
257 else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
c51ca6cf 258 e->type->ops.sq.elevator_exit_fn(e);
3d1ab40f 259 mutex_unlock(&e->sysfs_lock);
1da177e4 260
3d1ab40f 261 kobject_put(&e->kobj);
1da177e4 262}
2e662b65
JA
263EXPORT_SYMBOL(elevator_exit);
264
9817064b
JA
265static inline void __elv_rqhash_del(struct request *rq)
266{
242d98f0 267 hash_del(&rq->hash);
e8064021 268 rq->rq_flags &= ~RQF_HASHED;
9817064b
JA
269}
270
70b3ea05 271void elv_rqhash_del(struct request_queue *q, struct request *rq)
9817064b
JA
272{
273 if (ELV_ON_HASH(rq))
274 __elv_rqhash_del(rq);
275}
bd166ef1 276EXPORT_SYMBOL_GPL(elv_rqhash_del);
9817064b 277
70b3ea05 278void elv_rqhash_add(struct request_queue *q, struct request *rq)
9817064b 279{
b374d18a 280 struct elevator_queue *e = q->elevator;
9817064b
JA
281
282 BUG_ON(ELV_ON_HASH(rq));
242d98f0 283 hash_add(e->hash, &rq->hash, rq_hash_key(rq));
e8064021 284 rq->rq_flags |= RQF_HASHED;
9817064b 285}
bd166ef1 286EXPORT_SYMBOL_GPL(elv_rqhash_add);
9817064b 287
70b3ea05 288void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
9817064b
JA
289{
290 __elv_rqhash_del(rq);
291 elv_rqhash_add(q, rq);
292}
293
70b3ea05 294struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
9817064b 295{
b374d18a 296 struct elevator_queue *e = q->elevator;
b67bfe0d 297 struct hlist_node *next;
9817064b
JA
298 struct request *rq;
299
ee89f812 300 hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
9817064b
JA
301 BUG_ON(!ELV_ON_HASH(rq));
302
303 if (unlikely(!rq_mergeable(rq))) {
304 __elv_rqhash_del(rq);
305 continue;
306 }
307
308 if (rq_hash_key(rq) == offset)
309 return rq;
310 }
311
312 return NULL;
313}
314
2e662b65
JA
315/*
316 * RB-tree support functions for inserting/lookup/removal of requests
317 * in a sorted RB tree.
318 */
796d5116 319void elv_rb_add(struct rb_root *root, struct request *rq)
2e662b65
JA
320{
321 struct rb_node **p = &root->rb_node;
322 struct rb_node *parent = NULL;
323 struct request *__rq;
324
325 while (*p) {
326 parent = *p;
327 __rq = rb_entry(parent, struct request, rb_node);
328
83096ebf 329 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
2e662b65 330 p = &(*p)->rb_left;
796d5116 331 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
2e662b65 332 p = &(*p)->rb_right;
2e662b65
JA
333 }
334
335 rb_link_node(&rq->rb_node, parent, p);
336 rb_insert_color(&rq->rb_node, root);
2e662b65 337}
2e662b65
JA
338EXPORT_SYMBOL(elv_rb_add);
339
340void elv_rb_del(struct rb_root *root, struct request *rq)
341{
342 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
343 rb_erase(&rq->rb_node, root);
344 RB_CLEAR_NODE(&rq->rb_node);
345}
2e662b65
JA
346EXPORT_SYMBOL(elv_rb_del);
347
348struct request *elv_rb_find(struct rb_root *root, sector_t sector)
349{
350 struct rb_node *n = root->rb_node;
351 struct request *rq;
352
353 while (n) {
354 rq = rb_entry(n, struct request, rb_node);
355
83096ebf 356 if (sector < blk_rq_pos(rq))
2e662b65 357 n = n->rb_left;
83096ebf 358 else if (sector > blk_rq_pos(rq))
2e662b65
JA
359 n = n->rb_right;
360 else
361 return rq;
362 }
363
364 return NULL;
365}
2e662b65
JA
366EXPORT_SYMBOL(elv_rb_find);
367
8922e16c
TH
368/*
369 * Insert rq into dispatch queue of q. Queue lock must be held on
dbe7f76d 370 * entry. rq is sort instead into the dispatch queue. To be used by
2e662b65 371 * specific elevators.
8922e16c 372 */
165125e1 373void elv_dispatch_sort(struct request_queue *q, struct request *rq)
8922e16c
TH
374{
375 sector_t boundary;
8922e16c
TH
376 struct list_head *entry;
377
06b86245
TH
378 if (q->last_merge == rq)
379 q->last_merge = NULL;
9817064b
JA
380
381 elv_rqhash_del(q, rq);
382
15853af9 383 q->nr_sorted--;
06b86245 384
1b47f531 385 boundary = q->end_sector;
8922e16c
TH
386 list_for_each_prev(entry, &q->queue_head) {
387 struct request *pos = list_entry_rq(entry);
388
7afafc8a 389 if (req_op(rq) != req_op(pos))
e17fc0a1 390 break;
783660b2
JA
391 if (rq_data_dir(rq) != rq_data_dir(pos))
392 break;
e8064021 393 if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
8922e16c 394 break;
83096ebf
TH
395 if (blk_rq_pos(rq) >= boundary) {
396 if (blk_rq_pos(pos) < boundary)
8922e16c
TH
397 continue;
398 } else {
83096ebf 399 if (blk_rq_pos(pos) >= boundary)
8922e16c
TH
400 break;
401 }
83096ebf 402 if (blk_rq_pos(rq) >= blk_rq_pos(pos))
8922e16c
TH
403 break;
404 }
405
406 list_add(&rq->queuelist, entry);
407}
2e662b65
JA
408EXPORT_SYMBOL(elv_dispatch_sort);
409
9817064b 410/*
2e662b65
JA
411 * Insert rq into dispatch queue of q. Queue lock must be held on
412 * entry. rq is added to the back of the dispatch queue. To be used by
413 * specific elevators.
9817064b
JA
414 */
415void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
416{
417 if (q->last_merge == rq)
418 q->last_merge = NULL;
419
420 elv_rqhash_del(q, rq);
421
422 q->nr_sorted--;
423
424 q->end_sector = rq_end_sector(rq);
425 q->boundary_rq = rq;
426 list_add_tail(&rq->queuelist, &q->queue_head);
427}
2e662b65
JA
428EXPORT_SYMBOL(elv_dispatch_add_tail);
429
34fe7c05
CH
430enum elv_merge elv_merge(struct request_queue *q, struct request **req,
431 struct bio *bio)
1da177e4 432{
b374d18a 433 struct elevator_queue *e = q->elevator;
9817064b 434 struct request *__rq;
06b86245 435
488991e2
AB
436 /*
437 * Levels of merges:
438 * nomerges: No merges at all attempted
439 * noxmerges: Only simple one-hit cache try
440 * merges: All merge tries attempted
441 */
7460d389 442 if (blk_queue_nomerges(q) || !bio_mergeable(bio))
488991e2
AB
443 return ELEVATOR_NO_MERGE;
444
9817064b
JA
445 /*
446 * First try one-hit cache.
447 */
72ef799b 448 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
34fe7c05
CH
449 enum elv_merge ret = blk_try_merge(q->last_merge, bio);
450
06b86245
TH
451 if (ret != ELEVATOR_NO_MERGE) {
452 *req = q->last_merge;
453 return ret;
454 }
455 }
1da177e4 456
488991e2 457 if (blk_queue_noxmerges(q))
ac9fafa1
AB
458 return ELEVATOR_NO_MERGE;
459
9817064b
JA
460 /*
461 * See if our hash lookup can find a potential backmerge.
462 */
4f024f37 463 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
72ef799b 464 if (__rq && elv_bio_merge_ok(__rq, bio)) {
9817064b
JA
465 *req = __rq;
466 return ELEVATOR_BACK_MERGE;
467 }
468
bd166ef1
JA
469 if (e->uses_mq && e->type->ops.mq.request_merge)
470 return e->type->ops.mq.request_merge(q, req, bio);
471 else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
c51ca6cf 472 return e->type->ops.sq.elevator_merge_fn(q, req, bio);
1da177e4
LT
473
474 return ELEVATOR_NO_MERGE;
475}
476
5e84ea3a
JA
477/*
478 * Attempt to do an insertion back merge. Only check for the case where
479 * we can append 'rq' to an existing request, so we can throw 'rq' away
480 * afterwards.
481 *
482 * Returns true if we merged, false otherwise
483 */
bd166ef1 484bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
5e84ea3a
JA
485{
486 struct request *__rq;
bee0393c 487 bool ret;
5e84ea3a
JA
488
489 if (blk_queue_nomerges(q))
490 return false;
491
492 /*
493 * First try one-hit cache.
494 */
495 if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
496 return true;
497
498 if (blk_queue_noxmerges(q))
499 return false;
500
bee0393c 501 ret = false;
5e84ea3a
JA
502 /*
503 * See if our hash lookup can find a potential backmerge.
504 */
bee0393c
SL
505 while (1) {
506 __rq = elv_rqhash_find(q, blk_rq_pos(rq));
507 if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
508 break;
509
510 /* The merged request could be merged with others, try again */
511 ret = true;
512 rq = __rq;
513 }
27419322 514
bee0393c 515 return ret;
5e84ea3a
JA
516}
517
34fe7c05
CH
518void elv_merged_request(struct request_queue *q, struct request *rq,
519 enum elv_merge type)
1da177e4 520{
b374d18a 521 struct elevator_queue *e = q->elevator;
1da177e4 522
bd166ef1
JA
523 if (e->uses_mq && e->type->ops.mq.request_merged)
524 e->type->ops.mq.request_merged(q, rq, type);
525 else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
c51ca6cf 526 e->type->ops.sq.elevator_merged_fn(q, rq, type);
06b86245 527
2e662b65
JA
528 if (type == ELEVATOR_BACK_MERGE)
529 elv_rqhash_reposition(q, rq);
9817064b 530
06b86245 531 q->last_merge = rq;
1da177e4
LT
532}
533
165125e1 534void elv_merge_requests(struct request_queue *q, struct request *rq,
1da177e4
LT
535 struct request *next)
536{
b374d18a 537 struct elevator_queue *e = q->elevator;
bd166ef1
JA
538 bool next_sorted = false;
539
540 if (e->uses_mq && e->type->ops.mq.requests_merged)
541 e->type->ops.mq.requests_merged(q, rq, next);
542 else if (e->type->ops.sq.elevator_merge_req_fn) {
a1ae0f74 543 next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
bd166ef1
JA
544 if (next_sorted)
545 e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
546 }
06b86245 547
9817064b 548 elv_rqhash_reposition(q, rq);
9817064b 549
5e84ea3a
JA
550 if (next_sorted) {
551 elv_rqhash_del(q, next);
552 q->nr_sorted--;
553 }
554
06b86245 555 q->last_merge = rq;
1da177e4
LT
556}
557
812d4026
DS
558void elv_bio_merged(struct request_queue *q, struct request *rq,
559 struct bio *bio)
560{
561 struct elevator_queue *e = q->elevator;
562
bd166ef1
JA
563 if (WARN_ON_ONCE(e->uses_mq))
564 return;
565
c51ca6cf
JA
566 if (e->type->ops.sq.elevator_bio_merged_fn)
567 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
812d4026
DS
568}
569
47fafbc7 570#ifdef CONFIG_PM
c8158819
LM
571static void blk_pm_requeue_request(struct request *rq)
572{
e8064021 573 if (rq->q->dev && !(rq->rq_flags & RQF_PM))
c8158819
LM
574 rq->q->nr_pending--;
575}
576
577static void blk_pm_add_request(struct request_queue *q, struct request *rq)
578{
e8064021 579 if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
c8158819
LM
580 (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
581 pm_request_resume(q->dev);
582}
583#else
584static inline void blk_pm_requeue_request(struct request *rq) {}
585static inline void blk_pm_add_request(struct request_queue *q,
586 struct request *rq)
587{
588}
589#endif
590
165125e1 591void elv_requeue_request(struct request_queue *q, struct request *rq)
1da177e4 592{
1da177e4
LT
593 /*
594 * it already went through dequeue, we need to decrement the
595 * in_flight count again
596 */
8922e16c 597 if (blk_account_rq(rq)) {
0a7ae2ff 598 q->in_flight[rq_is_sync(rq)]--;
e8064021 599 if (rq->rq_flags & RQF_SORTED)
cad97516 600 elv_deactivate_rq(q, rq);
8922e16c 601 }
1da177e4 602
e8064021 603 rq->rq_flags &= ~RQF_STARTED;
1da177e4 604
c8158819
LM
605 blk_pm_requeue_request(rq);
606
b710a480 607 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
1da177e4
LT
608}
609
26308eab 610void elv_drain_elevator(struct request_queue *q)
15853af9 611{
bd166ef1 612 struct elevator_queue *e = q->elevator;
15853af9 613 static int printed;
e3c78ca5 614
bd166ef1
JA
615 if (WARN_ON_ONCE(e->uses_mq))
616 return;
617
e3c78ca5
TH
618 lockdep_assert_held(q->queue_lock);
619
bd166ef1 620 while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
15853af9 621 ;
e3c78ca5 622 if (q->nr_sorted && printed++ < 10) {
15853af9
TH
623 printk(KERN_ERR "%s: forced dispatching is broken "
624 "(nr_sorted=%u), please report this\n",
22f746e2 625 q->elevator->type->elevator_name, q->nr_sorted);
15853af9
TH
626 }
627}
628
b710a480 629void __elv_add_request(struct request_queue *q, struct request *rq, int where)
1da177e4 630{
5f3ea37c 631 trace_block_rq_insert(q, rq);
2056a782 632
c8158819
LM
633 blk_pm_add_request(q, rq);
634
1da177e4
LT
635 rq->q = q;
636
e8064021 637 if (rq->rq_flags & RQF_SOFTBARRIER) {
b710a480 638 /* barriers are scheduling boundary, update end_sector */
57292b58 639 if (!blk_rq_is_passthrough(rq)) {
b710a480
JA
640 q->end_sector = rq_end_sector(rq);
641 q->boundary_rq = rq;
642 }
e8064021 643 } else if (!(rq->rq_flags & RQF_ELVPRIV) &&
3aa72873
JA
644 (where == ELEVATOR_INSERT_SORT ||
645 where == ELEVATOR_INSERT_SORT_MERGE))
b710a480
JA
646 where = ELEVATOR_INSERT_BACK;
647
8922e16c 648 switch (where) {
28e7d184 649 case ELEVATOR_INSERT_REQUEUE:
8922e16c 650 case ELEVATOR_INSERT_FRONT:
e8064021 651 rq->rq_flags |= RQF_SOFTBARRIER;
8922e16c
TH
652 list_add(&rq->queuelist, &q->queue_head);
653 break;
654
655 case ELEVATOR_INSERT_BACK:
e8064021 656 rq->rq_flags |= RQF_SOFTBARRIER;
15853af9 657 elv_drain_elevator(q);
8922e16c
TH
658 list_add_tail(&rq->queuelist, &q->queue_head);
659 /*
660 * We kick the queue here for the following reasons.
661 * - The elevator might have returned NULL previously
662 * to delay requests and returned them now. As the
663 * queue wasn't empty before this request, ll_rw_blk
664 * won't run the queue on return, resulting in hang.
665 * - Usually, back inserted requests won't be merged
666 * with anything. There's no point in delaying queue
667 * processing.
668 */
24ecfbe2 669 __blk_run_queue(q);
8922e16c
TH
670 break;
671
5e84ea3a
JA
672 case ELEVATOR_INSERT_SORT_MERGE:
673 /*
674 * If we succeed in merging this request with one in the
675 * queue already, we are done - rq has now been freed,
676 * so no need to do anything further.
677 */
678 if (elv_attempt_insert_merge(q, rq))
679 break;
8922e16c 680 case ELEVATOR_INSERT_SORT:
57292b58 681 BUG_ON(blk_rq_is_passthrough(rq));
e8064021 682 rq->rq_flags |= RQF_SORTED;
15853af9 683 q->nr_sorted++;
9817064b
JA
684 if (rq_mergeable(rq)) {
685 elv_rqhash_add(q, rq);
686 if (!q->last_merge)
687 q->last_merge = rq;
688 }
689
ca23509f
TH
690 /*
691 * Some ioscheds (cfq) run q->request_fn directly, so
692 * rq cannot be accessed after calling
693 * elevator_add_req_fn.
694 */
c51ca6cf 695 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
8922e16c
TH
696 break;
697
ae1b1539 698 case ELEVATOR_INSERT_FLUSH:
e8064021 699 rq->rq_flags |= RQF_SOFTBARRIER;
ae1b1539
TH
700 blk_insert_flush(rq);
701 break;
8922e16c
TH
702 default:
703 printk(KERN_ERR "%s: bad insertion point %d\n",
24c03d47 704 __func__, where);
8922e16c
TH
705 BUG();
706 }
1da177e4 707}
2e662b65
JA
708EXPORT_SYMBOL(__elv_add_request);
709
7eaceacc 710void elv_add_request(struct request_queue *q, struct request *rq, int where)
1da177e4
LT
711{
712 unsigned long flags;
713
714 spin_lock_irqsave(q->queue_lock, flags);
7eaceacc 715 __elv_add_request(q, rq, where);
1da177e4
LT
716 spin_unlock_irqrestore(q->queue_lock, flags);
717}
2e662b65
JA
718EXPORT_SYMBOL(elv_add_request);
719
165125e1 720struct request *elv_latter_request(struct request_queue *q, struct request *rq)
1da177e4 721{
b374d18a 722 struct elevator_queue *e = q->elevator;
1da177e4 723
bd166ef1
JA
724 if (e->uses_mq && e->type->ops.mq.next_request)
725 return e->type->ops.mq.next_request(q, rq);
726 else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
c51ca6cf 727 return e->type->ops.sq.elevator_latter_req_fn(q, rq);
bd166ef1 728
1da177e4
LT
729 return NULL;
730}
731
165125e1 732struct request *elv_former_request(struct request_queue *q, struct request *rq)
1da177e4 733{
b374d18a 734 struct elevator_queue *e = q->elevator;
1da177e4 735
bd166ef1
JA
736 if (e->uses_mq && e->type->ops.mq.former_request)
737 return e->type->ops.mq.former_request(q, rq);
738 if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
c51ca6cf 739 return e->type->ops.sq.elevator_former_req_fn(q, rq);
1da177e4
LT
740 return NULL;
741}
742
852c788f
TH
743int elv_set_request(struct request_queue *q, struct request *rq,
744 struct bio *bio, gfp_t gfp_mask)
1da177e4 745{
b374d18a 746 struct elevator_queue *e = q->elevator;
1da177e4 747
bd166ef1
JA
748 if (WARN_ON_ONCE(e->uses_mq))
749 return 0;
750
c51ca6cf
JA
751 if (e->type->ops.sq.elevator_set_req_fn)
752 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
1da177e4
LT
753 return 0;
754}
755
165125e1 756void elv_put_request(struct request_queue *q, struct request *rq)
1da177e4 757{
b374d18a 758 struct elevator_queue *e = q->elevator;
1da177e4 759
bd166ef1
JA
760 if (WARN_ON_ONCE(e->uses_mq))
761 return;
762
c51ca6cf
JA
763 if (e->type->ops.sq.elevator_put_req_fn)
764 e->type->ops.sq.elevator_put_req_fn(rq);
1da177e4
LT
765}
766
ef295ecf 767int elv_may_queue(struct request_queue *q, unsigned int op)
1da177e4 768{
b374d18a 769 struct elevator_queue *e = q->elevator;
1da177e4 770
bd166ef1
JA
771 if (WARN_ON_ONCE(e->uses_mq))
772 return 0;
773
c51ca6cf
JA
774 if (e->type->ops.sq.elevator_may_queue_fn)
775 return e->type->ops.sq.elevator_may_queue_fn(q, op);
1da177e4
LT
776
777 return ELV_MQUEUE_MAY;
778}
779
165125e1 780void elv_completed_request(struct request_queue *q, struct request *rq)
1da177e4 781{
b374d18a 782 struct elevator_queue *e = q->elevator;
1da177e4 783
bd166ef1
JA
784 if (WARN_ON_ONCE(e->uses_mq))
785 return;
786
1da177e4
LT
787 /*
788 * request is released from the driver, io must be done
789 */
8922e16c 790 if (blk_account_rq(rq)) {
0a7ae2ff 791 q->in_flight[rq_is_sync(rq)]--;
e8064021 792 if ((rq->rq_flags & RQF_SORTED) &&
c51ca6cf
JA
793 e->type->ops.sq.elevator_completed_req_fn)
794 e->type->ops.sq.elevator_completed_req_fn(q, rq);
1bc691d3 795 }
1da177e4
LT
796}
797
3d1ab40f
AV
798#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
799
800static ssize_t
801elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1da177e4 802{
3d1ab40f 803 struct elv_fs_entry *entry = to_elv(attr);
b374d18a 804 struct elevator_queue *e;
3d1ab40f
AV
805 ssize_t error;
806
807 if (!entry->show)
808 return -EIO;
809
b374d18a 810 e = container_of(kobj, struct elevator_queue, kobj);
3d1ab40f 811 mutex_lock(&e->sysfs_lock);
22f746e2 812 error = e->type ? entry->show(e, page) : -ENOENT;
3d1ab40f
AV
813 mutex_unlock(&e->sysfs_lock);
814 return error;
815}
1da177e4 816
3d1ab40f
AV
817static ssize_t
818elv_attr_store(struct kobject *kobj, struct attribute *attr,
819 const char *page, size_t length)
820{
3d1ab40f 821 struct elv_fs_entry *entry = to_elv(attr);
b374d18a 822 struct elevator_queue *e;
3d1ab40f 823 ssize_t error;
1da177e4 824
3d1ab40f
AV
825 if (!entry->store)
826 return -EIO;
1da177e4 827
b374d18a 828 e = container_of(kobj, struct elevator_queue, kobj);
3d1ab40f 829 mutex_lock(&e->sysfs_lock);
22f746e2 830 error = e->type ? entry->store(e, page, length) : -ENOENT;
3d1ab40f
AV
831 mutex_unlock(&e->sysfs_lock);
832 return error;
833}
834
52cf25d0 835static const struct sysfs_ops elv_sysfs_ops = {
3d1ab40f
AV
836 .show = elv_attr_show,
837 .store = elv_attr_store,
838};
839
840static struct kobj_type elv_ktype = {
841 .sysfs_ops = &elv_sysfs_ops,
842 .release = elevator_release,
843};
844
5a5bafdc 845int elv_register_queue(struct request_queue *q)
3d1ab40f 846{
5a5bafdc 847 struct elevator_queue *e = q->elevator;
3d1ab40f
AV
848 int error;
849
b2d6db58 850 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
3d1ab40f 851 if (!error) {
22f746e2 852 struct elv_fs_entry *attr = e->type->elevator_attrs;
3d1ab40f 853 if (attr) {
e572ec7e
AV
854 while (attr->attr.name) {
855 if (sysfs_create_file(&e->kobj, &attr->attr))
3d1ab40f 856 break;
e572ec7e 857 attr++;
3d1ab40f
AV
858 }
859 }
860 kobject_uevent(&e->kobj, KOBJ_ADD);
430c62fb 861 e->registered = 1;
bd166ef1 862 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
c51ca6cf 863 e->type->ops.sq.elevator_registered_fn(q);
3d1ab40f
AV
864 }
865 return error;
1da177e4 866}
f8fc877d 867EXPORT_SYMBOL(elv_register_queue);
bc1c1169 868
1da177e4
LT
869void elv_unregister_queue(struct request_queue *q)
870{
f8fc877d
TH
871 if (q) {
872 struct elevator_queue *e = q->elevator;
873
874 kobject_uevent(&e->kobj, KOBJ_REMOVE);
875 kobject_del(&e->kobj);
876 e->registered = 0;
877 }
1da177e4 878}
01effb0d 879EXPORT_SYMBOL(elv_unregister_queue);
1da177e4 880
e567bf71 881int elv_register(struct elevator_type *e)
1da177e4 882{
1ffb96c5 883 char *def = "";
2a12dcd7 884
3d3c2379
TH
885 /* create icq_cache if requested */
886 if (e->icq_size) {
887 if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
888 WARN_ON(e->icq_align < __alignof__(struct io_cq)))
889 return -EINVAL;
890
891 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
892 "%s_io_cq", e->elevator_name);
893 e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
894 e->icq_align, 0, NULL);
895 if (!e->icq_cache)
896 return -ENOMEM;
897 }
898
899 /* register, don't allow duplicate names */
2a12dcd7 900 spin_lock(&elv_list_lock);
3d3c2379
TH
901 if (elevator_find(e->elevator_name)) {
902 spin_unlock(&elv_list_lock);
903 if (e->icq_cache)
904 kmem_cache_destroy(e->icq_cache);
905 return -EBUSY;
906 }
1da177e4 907 list_add_tail(&e->list, &elv_list);
2a12dcd7 908 spin_unlock(&elv_list_lock);
1da177e4 909
3d3c2379 910 /* print pretty message */
5f003976
ND
911 if (!strcmp(e->elevator_name, chosen_elevator) ||
912 (!*chosen_elevator &&
913 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
1ffb96c5
TV
914 def = " (default)";
915
4eb166d9
JA
916 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
917 def);
3d3c2379 918 return 0;
1da177e4
LT
919}
920EXPORT_SYMBOL_GPL(elv_register);
921
922void elv_unregister(struct elevator_type *e)
923{
3d3c2379 924 /* unregister */
2a12dcd7 925 spin_lock(&elv_list_lock);
1da177e4 926 list_del_init(&e->list);
2a12dcd7 927 spin_unlock(&elv_list_lock);
3d3c2379
TH
928
929 /*
930 * Destroy icq_cache if it exists. icq's are RCU managed. Make
931 * sure all RCU operations are complete before proceeding.
932 */
933 if (e->icq_cache) {
934 rcu_barrier();
935 kmem_cache_destroy(e->icq_cache);
936 e->icq_cache = NULL;
937 }
1da177e4
LT
938}
939EXPORT_SYMBOL_GPL(elv_unregister);
940
941/*
942 * switch to new_e io scheduler. be careful not to introduce deadlocks -
943 * we don't free the old io scheduler, before we have allocated what we
944 * need for the new one. this way we have a chance of going back to the old
cb98fc8b 945 * one, if the new one fails init for some reason.
1da177e4 946 */
165125e1 947static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1da177e4 948{
5a5bafdc 949 struct elevator_queue *old = q->elevator;
bd166ef1 950 bool old_registered = false;
e8989fae 951 int err;
1da177e4 952
bd166ef1
JA
953 if (q->mq_ops) {
954 blk_mq_freeze_queue(q);
955 blk_mq_quiesce_queue(q);
956 }
957
5a5bafdc
TH
958 /*
959 * Turn on BYPASS and drain all requests w/ elevator private data.
960 * Block layer doesn't call into a quiesced elevator - all requests
961 * are directly put on the dispatch list without elevator data
962 * using INSERT_BACK. All requests have SOFTBARRIER set and no
963 * merge happens either.
964 */
bd166ef1
JA
965 if (old) {
966 old_registered = old->registered;
967
968 if (old->uses_mq)
969 blk_mq_sched_teardown(q);
cb98fc8b 970
bd166ef1
JA
971 if (!q->mq_ops)
972 blk_queue_bypass_start(q);
1da177e4 973
bd166ef1
JA
974 /* unregister and clear all auxiliary data of the old elevator */
975 if (old_registered)
976 elv_unregister_queue(q);
977
978 spin_lock_irq(q->queue_lock);
979 ioc_clear_queue(q);
980 spin_unlock_irq(q->queue_lock);
981 }
f8fc877d 982
5a5bafdc 983 /* allocate, init and register new elevator */
bd166ef1
JA
984 if (new_e) {
985 if (new_e->uses_mq) {
986 err = blk_mq_sched_setup(q);
987 if (!err)
988 err = new_e->ops.mq.init_sched(q, new_e);
989 } else
990 err = new_e->ops.sq.elevator_init_fn(q, new_e);
991 if (err)
992 goto fail_init;
5a5bafdc 993
5a5bafdc
TH
994 err = elv_register_queue(q);
995 if (err)
996 goto fail_register;
bd166ef1
JA
997 } else
998 q->elevator = NULL;
5a5bafdc
TH
999
1000 /* done, kill the old one and finish */
bd166ef1
JA
1001 if (old) {
1002 elevator_exit(old);
1003 if (!q->mq_ops)
1004 blk_queue_bypass_end(q);
1005 }
1006
1007 if (q->mq_ops) {
1008 blk_mq_unfreeze_queue(q);
1009 blk_mq_start_stopped_hw_queues(q, true);
1010 }
75ad23bc 1011
bd166ef1
JA
1012 if (new_e)
1013 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1014 else
1015 blk_add_trace_msg(q, "elv switch: none");
4722dc52 1016
5dd531a0 1017 return 0;
1da177e4
LT
1018
1019fail_register:
bd166ef1
JA
1020 if (q->mq_ops)
1021 blk_mq_sched_teardown(q);
5a5bafdc
TH
1022 elevator_exit(q->elevator);
1023fail_init:
1024 /* switch failed, restore and re-register old elevator */
bd166ef1
JA
1025 if (old) {
1026 q->elevator = old;
1027 elv_register_queue(q);
1028 if (!q->mq_ops)
1029 blk_queue_bypass_end(q);
1030 }
1031 if (q->mq_ops) {
1032 blk_mq_unfreeze_queue(q);
1033 blk_mq_start_stopped_hw_queues(q, true);
1034 }
75ad23bc 1035
5dd531a0 1036 return err;
1da177e4
LT
1037}
1038
5dd531a0
JA
1039/*
1040 * Switch this queue to the given IO scheduler.
1041 */
7c8a3679 1042static int __elevator_change(struct request_queue *q, const char *name)
1da177e4
LT
1043{
1044 char elevator_name[ELV_NAME_MAX];
1045 struct elevator_type *e;
1046
bd166ef1
JA
1047 /*
1048 * Special case for mq, turn off scheduling
1049 */
1050 if (q->mq_ops && !strncmp(name, "none", 4))
1051 return elevator_switch(q, NULL);
cd43e26f 1052
ee2e992c 1053 strlcpy(elevator_name, name, sizeof(elevator_name));
21c3c5d2 1054 e = elevator_get(strstrip(elevator_name), true);
1da177e4
LT
1055 if (!e) {
1056 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
1057 return -EINVAL;
1058 }
1059
bd166ef1
JA
1060 if (q->elevator &&
1061 !strcmp(elevator_name, q->elevator->type->elevator_name)) {
2ca7d93b 1062 elevator_put(e);
5dd531a0 1063 return 0;
2ca7d93b 1064 }
1da177e4 1065
bd166ef1
JA
1066 if (!e->uses_mq && q->mq_ops) {
1067 elevator_put(e);
1068 return -EINVAL;
1069 }
1070 if (e->uses_mq && !q->mq_ops) {
1071 elevator_put(e);
1072 return -EINVAL;
1073 }
1074
5dd531a0
JA
1075 return elevator_switch(q, e);
1076}
7c8a3679
TS
1077
1078int elevator_change(struct request_queue *q, const char *name)
1079{
1080 int ret;
1081
1082 /* Protect q->elevator from elevator_init() */
1083 mutex_lock(&q->sysfs_lock);
1084 ret = __elevator_change(q, name);
1085 mutex_unlock(&q->sysfs_lock);
1086
1087 return ret;
1088}
5dd531a0
JA
1089EXPORT_SYMBOL(elevator_change);
1090
1091ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1092 size_t count)
1093{
1094 int ret;
1095
bd166ef1 1096 if (!(q->mq_ops || q->request_fn))
5dd531a0
JA
1097 return count;
1098
7c8a3679 1099 ret = __elevator_change(q, name);
5dd531a0
JA
1100 if (!ret)
1101 return count;
1102
1103 printk(KERN_ERR "elevator: switch to %s failed\n", name);
1104 return ret;
1da177e4
LT
1105}
1106
165125e1 1107ssize_t elv_iosched_show(struct request_queue *q, char *name)
1da177e4 1108{
b374d18a 1109 struct elevator_queue *e = q->elevator;
bd166ef1 1110 struct elevator_type *elv = NULL;
70cee26e 1111 struct elevator_type *__e;
1da177e4
LT
1112 int len = 0;
1113
bd166ef1 1114 if (!blk_queue_stackable(q))
cd43e26f
MP
1115 return sprintf(name, "none\n");
1116
bd166ef1
JA
1117 if (!q->elevator)
1118 len += sprintf(name+len, "[none] ");
1119 else
1120 elv = e->type;
cd43e26f 1121
2a12dcd7 1122 spin_lock(&elv_list_lock);
70cee26e 1123 list_for_each_entry(__e, &elv_list, list) {
bd166ef1 1124 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
1da177e4 1125 len += sprintf(name+len, "[%s] ", elv->elevator_name);
bd166ef1
JA
1126 continue;
1127 }
1128 if (__e->uses_mq && q->mq_ops)
1129 len += sprintf(name+len, "%s ", __e->elevator_name);
1130 else if (!__e->uses_mq && !q->mq_ops)
1da177e4
LT
1131 len += sprintf(name+len, "%s ", __e->elevator_name);
1132 }
2a12dcd7 1133 spin_unlock(&elv_list_lock);
1da177e4 1134
bd166ef1
JA
1135 if (q->mq_ops && q->elevator)
1136 len += sprintf(name+len, "none");
1137
1da177e4
LT
1138 len += sprintf(len+name, "\n");
1139 return len;
1140}
1141
165125e1
JA
1142struct request *elv_rb_former_request(struct request_queue *q,
1143 struct request *rq)
2e662b65
JA
1144{
1145 struct rb_node *rbprev = rb_prev(&rq->rb_node);
1146
1147 if (rbprev)
1148 return rb_entry_rq(rbprev);
1149
1150 return NULL;
1151}
2e662b65
JA
1152EXPORT_SYMBOL(elv_rb_former_request);
1153
165125e1
JA
1154struct request *elv_rb_latter_request(struct request_queue *q,
1155 struct request *rq)
2e662b65
JA
1156{
1157 struct rb_node *rbnext = rb_next(&rq->rb_node);
1158
1159 if (rbnext)
1160 return rb_entry_rq(rbnext);
1161
1162 return NULL;
1163}
2e662b65 1164EXPORT_SYMBOL(elv_rb_latter_request);