Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
1da177e4 LT |
2 | * Block device elevator/IO-scheduler. |
3 | * | |
4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | |
5 | * | |
0fe23479 | 6 | * 30042000 Jens Axboe <axboe@kernel.dk> : |
1da177e4 LT |
7 | * |
8 | * Split the elevator a bit so that it is possible to choose a different | |
9 | * one or even write a new "plug in". There are three pieces: | |
10 | * - elevator_fn, inserts a new request in the queue list | |
11 | * - elevator_merge_fn, decides whether a new buffer can be merged with | |
12 | * an existing request | |
13 | * - elevator_dequeue_fn, called when a request is taken off the active list | |
14 | * | |
15 | * 20082000 Dave Jones <davej@suse.de> : | |
16 | * Removed tests for max-bomb-segments, which was breaking elvtune | |
17 | * when run without -bN | |
18 | * | |
19 | * Jens: | |
20 | * - Rework again to work with bio instead of buffer_heads | |
21 | * - loose bi_dev comparisons, partition handling is right now | |
22 | * - completely modularize elevator setup and teardown | |
23 | * | |
24 | */ | |
25 | #include <linux/kernel.h> | |
26 | #include <linux/fs.h> | |
27 | #include <linux/blkdev.h> | |
28 | #include <linux/elevator.h> | |
29 | #include <linux/bio.h> | |
1da177e4 LT |
30 | #include <linux/module.h> |
31 | #include <linux/slab.h> | |
32 | #include <linux/init.h> | |
33 | #include <linux/compiler.h> | |
2056a782 | 34 | #include <linux/blktrace_api.h> |
9817064b | 35 | #include <linux/hash.h> |
0835da67 | 36 | #include <linux/uaccess.h> |
c8158819 | 37 | #include <linux/pm_runtime.h> |
eea8f41c | 38 | #include <linux/blk-cgroup.h> |
1da177e4 | 39 | |
55782138 LZ |
40 | #include <trace/events/block.h> |
41 | ||
242f9dcb | 42 | #include "blk.h" |
bd166ef1 | 43 | #include "blk-mq-sched.h" |
bca6b067 | 44 | #include "blk-pm.h" |
8330cdb0 | 45 | #include "blk-wbt.h" |
242f9dcb | 46 | |
1da177e4 LT |
47 | static DEFINE_SPINLOCK(elv_list_lock); |
48 | static LIST_HEAD(elv_list); | |
49 | ||
9817064b JA |
50 | /* |
51 | * Merge hash stuff. | |
52 | */ | |
83096ebf | 53 | #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) |
9817064b | 54 | |
da775265 JA |
55 | /* |
56 | * Query io scheduler to see if the current process issuing bio may be | |
57 | * merged with rq. | |
58 | */ | |
72ef799b | 59 | static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) |
da775265 | 60 | { |
165125e1 | 61 | struct request_queue *q = rq->q; |
b374d18a | 62 | struct elevator_queue *e = q->elevator; |
da775265 | 63 | |
f9cd4bfe JA |
64 | if (e->type->ops.allow_merge) |
65 | return e->type->ops.allow_merge(q, rq, bio); | |
da775265 JA |
66 | |
67 | return 1; | |
68 | } | |
69 | ||
1da177e4 LT |
70 | /* |
71 | * can we safely merge with this request? | |
72 | */ | |
72ef799b | 73 | bool elv_bio_merge_ok(struct request *rq, struct bio *bio) |
1da177e4 | 74 | { |
050c8ea8 | 75 | if (!blk_rq_merge_ok(rq, bio)) |
72ef799b | 76 | return false; |
7ba1ba12 | 77 | |
72ef799b TE |
78 | if (!elv_iosched_allow_bio_merge(rq, bio)) |
79 | return false; | |
1da177e4 | 80 | |
72ef799b | 81 | return true; |
1da177e4 | 82 | } |
72ef799b | 83 | EXPORT_SYMBOL(elv_bio_merge_ok); |
1da177e4 | 84 | |
8ac0d9a8 JA |
85 | static bool elevator_match(const struct elevator_type *e, const char *name) |
86 | { | |
87 | if (!strcmp(e->elevator_name, name)) | |
88 | return true; | |
89 | if (e->elevator_alias && !strcmp(e->elevator_alias, name)) | |
90 | return true; | |
91 | ||
92 | return false; | |
93 | } | |
94 | ||
2527d997 | 95 | /* |
a1ce35fa | 96 | * Return scheduler with name 'name' |
2527d997 | 97 | */ |
a1ce35fa | 98 | static struct elevator_type *elevator_find(const char *name) |
1da177e4 | 99 | { |
a22b169d | 100 | struct elevator_type *e; |
1da177e4 | 101 | |
70cee26e | 102 | list_for_each_entry(e, &elv_list, list) { |
a1ce35fa | 103 | if (elevator_match(e, name)) |
a22b169d | 104 | return e; |
1da177e4 | 105 | } |
1da177e4 | 106 | |
a22b169d | 107 | return NULL; |
1da177e4 LT |
108 | } |
109 | ||
110 | static void elevator_put(struct elevator_type *e) | |
111 | { | |
112 | module_put(e->elevator_owner); | |
113 | } | |
114 | ||
2527d997 JA |
115 | static struct elevator_type *elevator_get(struct request_queue *q, |
116 | const char *name, bool try_loading) | |
1da177e4 | 117 | { |
2824bc93 | 118 | struct elevator_type *e; |
1da177e4 | 119 | |
2a12dcd7 | 120 | spin_lock(&elv_list_lock); |
2824bc93 | 121 | |
a1ce35fa | 122 | e = elevator_find(name); |
21c3c5d2 | 123 | if (!e && try_loading) { |
e1640949 | 124 | spin_unlock(&elv_list_lock); |
490b94be | 125 | request_module("%s-iosched", name); |
e1640949 | 126 | spin_lock(&elv_list_lock); |
a1ce35fa | 127 | e = elevator_find(name); |
e1640949 JA |
128 | } |
129 | ||
2824bc93 TH |
130 | if (e && !try_module_get(e->elevator_owner)) |
131 | e = NULL; | |
132 | ||
2a12dcd7 | 133 | spin_unlock(&elv_list_lock); |
1da177e4 LT |
134 | return e; |
135 | } | |
136 | ||
484fc254 | 137 | static char chosen_elevator[ELV_NAME_MAX]; |
1da177e4 | 138 | |
5f003976 | 139 | static int __init elevator_setup(char *str) |
1da177e4 | 140 | { |
752a3b79 CE |
141 | /* |
142 | * Be backwards-compatible with previous kernels, so users | |
143 | * won't get the wrong elevator. | |
144 | */ | |
492af635 | 145 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); |
9b41046c | 146 | return 1; |
1da177e4 LT |
147 | } |
148 | ||
149 | __setup("elevator=", elevator_setup); | |
150 | ||
3d1ab40f AV |
151 | static struct kobj_type elv_ktype; |
152 | ||
d50235b7 | 153 | struct elevator_queue *elevator_alloc(struct request_queue *q, |
165125e1 | 154 | struct elevator_type *e) |
3d1ab40f | 155 | { |
b374d18a | 156 | struct elevator_queue *eq; |
9817064b | 157 | |
c1b511eb | 158 | eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); |
9817064b | 159 | if (unlikely(!eq)) |
8406a4d5 | 160 | return NULL; |
9817064b | 161 | |
22f746e2 | 162 | eq->type = e; |
f9cb074b | 163 | kobject_init(&eq->kobj, &elv_ktype); |
9817064b | 164 | mutex_init(&eq->sysfs_lock); |
242d98f0 | 165 | hash_init(eq->hash); |
9817064b | 166 | |
3d1ab40f AV |
167 | return eq; |
168 | } | |
d50235b7 | 169 | EXPORT_SYMBOL(elevator_alloc); |
3d1ab40f AV |
170 | |
171 | static void elevator_release(struct kobject *kobj) | |
172 | { | |
b374d18a | 173 | struct elevator_queue *e; |
9817064b | 174 | |
b374d18a | 175 | e = container_of(kobj, struct elevator_queue, kobj); |
22f746e2 | 176 | elevator_put(e->type); |
3d1ab40f AV |
177 | kfree(e); |
178 | } | |
179 | ||
54d5329d | 180 | void elevator_exit(struct request_queue *q, struct elevator_queue *e) |
1da177e4 | 181 | { |
3d1ab40f | 182 | mutex_lock(&e->sysfs_lock); |
f9cd4bfe | 183 | if (e->type->ops.exit_sched) |
54d5329d | 184 | blk_mq_exit_sched(q, e); |
3d1ab40f | 185 | mutex_unlock(&e->sysfs_lock); |
1da177e4 | 186 | |
3d1ab40f | 187 | kobject_put(&e->kobj); |
1da177e4 | 188 | } |
2e662b65 | 189 | |
9817064b JA |
190 | static inline void __elv_rqhash_del(struct request *rq) |
191 | { | |
242d98f0 | 192 | hash_del(&rq->hash); |
e8064021 | 193 | rq->rq_flags &= ~RQF_HASHED; |
9817064b JA |
194 | } |
195 | ||
70b3ea05 | 196 | void elv_rqhash_del(struct request_queue *q, struct request *rq) |
9817064b JA |
197 | { |
198 | if (ELV_ON_HASH(rq)) | |
199 | __elv_rqhash_del(rq); | |
200 | } | |
bd166ef1 | 201 | EXPORT_SYMBOL_GPL(elv_rqhash_del); |
9817064b | 202 | |
70b3ea05 | 203 | void elv_rqhash_add(struct request_queue *q, struct request *rq) |
9817064b | 204 | { |
b374d18a | 205 | struct elevator_queue *e = q->elevator; |
9817064b JA |
206 | |
207 | BUG_ON(ELV_ON_HASH(rq)); | |
242d98f0 | 208 | hash_add(e->hash, &rq->hash, rq_hash_key(rq)); |
e8064021 | 209 | rq->rq_flags |= RQF_HASHED; |
9817064b | 210 | } |
bd166ef1 | 211 | EXPORT_SYMBOL_GPL(elv_rqhash_add); |
9817064b | 212 | |
70b3ea05 | 213 | void elv_rqhash_reposition(struct request_queue *q, struct request *rq) |
9817064b JA |
214 | { |
215 | __elv_rqhash_del(rq); | |
216 | elv_rqhash_add(q, rq); | |
217 | } | |
218 | ||
70b3ea05 | 219 | struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) |
9817064b | 220 | { |
b374d18a | 221 | struct elevator_queue *e = q->elevator; |
b67bfe0d | 222 | struct hlist_node *next; |
9817064b JA |
223 | struct request *rq; |
224 | ||
ee89f812 | 225 | hash_for_each_possible_safe(e->hash, rq, next, hash, offset) { |
9817064b JA |
226 | BUG_ON(!ELV_ON_HASH(rq)); |
227 | ||
228 | if (unlikely(!rq_mergeable(rq))) { | |
229 | __elv_rqhash_del(rq); | |
230 | continue; | |
231 | } | |
232 | ||
233 | if (rq_hash_key(rq) == offset) | |
234 | return rq; | |
235 | } | |
236 | ||
237 | return NULL; | |
238 | } | |
239 | ||
2e662b65 JA |
240 | /* |
241 | * RB-tree support functions for inserting/lookup/removal of requests | |
242 | * in a sorted RB tree. | |
243 | */ | |
796d5116 | 244 | void elv_rb_add(struct rb_root *root, struct request *rq) |
2e662b65 JA |
245 | { |
246 | struct rb_node **p = &root->rb_node; | |
247 | struct rb_node *parent = NULL; | |
248 | struct request *__rq; | |
249 | ||
250 | while (*p) { | |
251 | parent = *p; | |
252 | __rq = rb_entry(parent, struct request, rb_node); | |
253 | ||
83096ebf | 254 | if (blk_rq_pos(rq) < blk_rq_pos(__rq)) |
2e662b65 | 255 | p = &(*p)->rb_left; |
796d5116 | 256 | else if (blk_rq_pos(rq) >= blk_rq_pos(__rq)) |
2e662b65 | 257 | p = &(*p)->rb_right; |
2e662b65 JA |
258 | } |
259 | ||
260 | rb_link_node(&rq->rb_node, parent, p); | |
261 | rb_insert_color(&rq->rb_node, root); | |
2e662b65 | 262 | } |
2e662b65 JA |
263 | EXPORT_SYMBOL(elv_rb_add); |
264 | ||
265 | void elv_rb_del(struct rb_root *root, struct request *rq) | |
266 | { | |
267 | BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); | |
268 | rb_erase(&rq->rb_node, root); | |
269 | RB_CLEAR_NODE(&rq->rb_node); | |
270 | } | |
2e662b65 JA |
271 | EXPORT_SYMBOL(elv_rb_del); |
272 | ||
273 | struct request *elv_rb_find(struct rb_root *root, sector_t sector) | |
274 | { | |
275 | struct rb_node *n = root->rb_node; | |
276 | struct request *rq; | |
277 | ||
278 | while (n) { | |
279 | rq = rb_entry(n, struct request, rb_node); | |
280 | ||
83096ebf | 281 | if (sector < blk_rq_pos(rq)) |
2e662b65 | 282 | n = n->rb_left; |
83096ebf | 283 | else if (sector > blk_rq_pos(rq)) |
2e662b65 JA |
284 | n = n->rb_right; |
285 | else | |
286 | return rq; | |
287 | } | |
288 | ||
289 | return NULL; | |
290 | } | |
2e662b65 JA |
291 | EXPORT_SYMBOL(elv_rb_find); |
292 | ||
34fe7c05 CH |
293 | enum elv_merge elv_merge(struct request_queue *q, struct request **req, |
294 | struct bio *bio) | |
1da177e4 | 295 | { |
b374d18a | 296 | struct elevator_queue *e = q->elevator; |
9817064b | 297 | struct request *__rq; |
06b86245 | 298 | |
488991e2 AB |
299 | /* |
300 | * Levels of merges: | |
301 | * nomerges: No merges at all attempted | |
302 | * noxmerges: Only simple one-hit cache try | |
303 | * merges: All merge tries attempted | |
304 | */ | |
7460d389 | 305 | if (blk_queue_nomerges(q) || !bio_mergeable(bio)) |
488991e2 AB |
306 | return ELEVATOR_NO_MERGE; |
307 | ||
9817064b JA |
308 | /* |
309 | * First try one-hit cache. | |
310 | */ | |
72ef799b | 311 | if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { |
34fe7c05 CH |
312 | enum elv_merge ret = blk_try_merge(q->last_merge, bio); |
313 | ||
06b86245 TH |
314 | if (ret != ELEVATOR_NO_MERGE) { |
315 | *req = q->last_merge; | |
316 | return ret; | |
317 | } | |
318 | } | |
1da177e4 | 319 | |
488991e2 | 320 | if (blk_queue_noxmerges(q)) |
ac9fafa1 AB |
321 | return ELEVATOR_NO_MERGE; |
322 | ||
9817064b JA |
323 | /* |
324 | * See if our hash lookup can find a potential backmerge. | |
325 | */ | |
4f024f37 | 326 | __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); |
72ef799b | 327 | if (__rq && elv_bio_merge_ok(__rq, bio)) { |
9817064b JA |
328 | *req = __rq; |
329 | return ELEVATOR_BACK_MERGE; | |
330 | } | |
331 | ||
f9cd4bfe JA |
332 | if (e->type->ops.request_merge) |
333 | return e->type->ops.request_merge(q, req, bio); | |
1da177e4 LT |
334 | |
335 | return ELEVATOR_NO_MERGE; | |
336 | } | |
337 | ||
5e84ea3a JA |
338 | /* |
339 | * Attempt to do an insertion back merge. Only check for the case where | |
340 | * we can append 'rq' to an existing request, so we can throw 'rq' away | |
341 | * afterwards. | |
342 | * | |
343 | * Returns true if we merged, false otherwise | |
344 | */ | |
bd166ef1 | 345 | bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) |
5e84ea3a JA |
346 | { |
347 | struct request *__rq; | |
bee0393c | 348 | bool ret; |
5e84ea3a JA |
349 | |
350 | if (blk_queue_nomerges(q)) | |
351 | return false; | |
352 | ||
353 | /* | |
354 | * First try one-hit cache. | |
355 | */ | |
356 | if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) | |
357 | return true; | |
358 | ||
359 | if (blk_queue_noxmerges(q)) | |
360 | return false; | |
361 | ||
bee0393c | 362 | ret = false; |
5e84ea3a JA |
363 | /* |
364 | * See if our hash lookup can find a potential backmerge. | |
365 | */ | |
bee0393c SL |
366 | while (1) { |
367 | __rq = elv_rqhash_find(q, blk_rq_pos(rq)); | |
368 | if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) | |
369 | break; | |
370 | ||
371 | /* The merged request could be merged with others, try again */ | |
372 | ret = true; | |
373 | rq = __rq; | |
374 | } | |
27419322 | 375 | |
bee0393c | 376 | return ret; |
5e84ea3a JA |
377 | } |
378 | ||
34fe7c05 CH |
379 | void elv_merged_request(struct request_queue *q, struct request *rq, |
380 | enum elv_merge type) | |
1da177e4 | 381 | { |
b374d18a | 382 | struct elevator_queue *e = q->elevator; |
1da177e4 | 383 | |
f9cd4bfe JA |
384 | if (e->type->ops.request_merged) |
385 | e->type->ops.request_merged(q, rq, type); | |
06b86245 | 386 | |
2e662b65 JA |
387 | if (type == ELEVATOR_BACK_MERGE) |
388 | elv_rqhash_reposition(q, rq); | |
9817064b | 389 | |
06b86245 | 390 | q->last_merge = rq; |
1da177e4 LT |
391 | } |
392 | ||
165125e1 | 393 | void elv_merge_requests(struct request_queue *q, struct request *rq, |
1da177e4 LT |
394 | struct request *next) |
395 | { | |
b374d18a | 396 | struct elevator_queue *e = q->elevator; |
bd166ef1 | 397 | |
f9cd4bfe JA |
398 | if (e->type->ops.requests_merged) |
399 | e->type->ops.requests_merged(q, rq, next); | |
06b86245 | 400 | |
9817064b | 401 | elv_rqhash_reposition(q, rq); |
06b86245 | 402 | q->last_merge = rq; |
1da177e4 LT |
403 | } |
404 | ||
165125e1 | 405 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) |
1da177e4 | 406 | { |
b374d18a | 407 | struct elevator_queue *e = q->elevator; |
1da177e4 | 408 | |
f9cd4bfe JA |
409 | if (e->type->ops.next_request) |
410 | return e->type->ops.next_request(q, rq); | |
bd166ef1 | 411 | |
1da177e4 LT |
412 | return NULL; |
413 | } | |
414 | ||
165125e1 | 415 | struct request *elv_former_request(struct request_queue *q, struct request *rq) |
1da177e4 | 416 | { |
b374d18a | 417 | struct elevator_queue *e = q->elevator; |
1da177e4 | 418 | |
f9cd4bfe JA |
419 | if (e->type->ops.former_request) |
420 | return e->type->ops.former_request(q, rq); | |
bd166ef1 | 421 | |
a1ce35fa | 422 | return NULL; |
1da177e4 LT |
423 | } |
424 | ||
3d1ab40f AV |
425 | #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) |
426 | ||
427 | static ssize_t | |
428 | elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | |
1da177e4 | 429 | { |
3d1ab40f | 430 | struct elv_fs_entry *entry = to_elv(attr); |
b374d18a | 431 | struct elevator_queue *e; |
3d1ab40f AV |
432 | ssize_t error; |
433 | ||
434 | if (!entry->show) | |
435 | return -EIO; | |
436 | ||
b374d18a | 437 | e = container_of(kobj, struct elevator_queue, kobj); |
3d1ab40f | 438 | mutex_lock(&e->sysfs_lock); |
22f746e2 | 439 | error = e->type ? entry->show(e, page) : -ENOENT; |
3d1ab40f AV |
440 | mutex_unlock(&e->sysfs_lock); |
441 | return error; | |
442 | } | |
1da177e4 | 443 | |
3d1ab40f AV |
444 | static ssize_t |
445 | elv_attr_store(struct kobject *kobj, struct attribute *attr, | |
446 | const char *page, size_t length) | |
447 | { | |
3d1ab40f | 448 | struct elv_fs_entry *entry = to_elv(attr); |
b374d18a | 449 | struct elevator_queue *e; |
3d1ab40f | 450 | ssize_t error; |
1da177e4 | 451 | |
3d1ab40f AV |
452 | if (!entry->store) |
453 | return -EIO; | |
1da177e4 | 454 | |
b374d18a | 455 | e = container_of(kobj, struct elevator_queue, kobj); |
3d1ab40f | 456 | mutex_lock(&e->sysfs_lock); |
22f746e2 | 457 | error = e->type ? entry->store(e, page, length) : -ENOENT; |
3d1ab40f AV |
458 | mutex_unlock(&e->sysfs_lock); |
459 | return error; | |
460 | } | |
461 | ||
52cf25d0 | 462 | static const struct sysfs_ops elv_sysfs_ops = { |
3d1ab40f AV |
463 | .show = elv_attr_show, |
464 | .store = elv_attr_store, | |
465 | }; | |
466 | ||
467 | static struct kobj_type elv_ktype = { | |
468 | .sysfs_ops = &elv_sysfs_ops, | |
469 | .release = elevator_release, | |
470 | }; | |
471 | ||
5a5bafdc | 472 | int elv_register_queue(struct request_queue *q) |
3d1ab40f | 473 | { |
5a5bafdc | 474 | struct elevator_queue *e = q->elevator; |
3d1ab40f AV |
475 | int error; |
476 | ||
14a23498 BVA |
477 | lockdep_assert_held(&q->sysfs_lock); |
478 | ||
b2d6db58 | 479 | error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); |
3d1ab40f | 480 | if (!error) { |
22f746e2 | 481 | struct elv_fs_entry *attr = e->type->elevator_attrs; |
3d1ab40f | 482 | if (attr) { |
e572ec7e AV |
483 | while (attr->attr.name) { |
484 | if (sysfs_create_file(&e->kobj, &attr->attr)) | |
3d1ab40f | 485 | break; |
e572ec7e | 486 | attr++; |
3d1ab40f AV |
487 | } |
488 | } | |
489 | kobject_uevent(&e->kobj, KOBJ_ADD); | |
430c62fb | 490 | e->registered = 1; |
3d1ab40f AV |
491 | } |
492 | return error; | |
1da177e4 | 493 | } |
bc1c1169 | 494 | |
1da177e4 LT |
495 | void elv_unregister_queue(struct request_queue *q) |
496 | { | |
14a23498 BVA |
497 | lockdep_assert_held(&q->sysfs_lock); |
498 | ||
f8fc877d TH |
499 | if (q) { |
500 | struct elevator_queue *e = q->elevator; | |
501 | ||
502 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | |
503 | kobject_del(&e->kobj); | |
504 | e->registered = 0; | |
8330cdb0 JK |
505 | /* Re-enable throttling in case elevator disabled it */ |
506 | wbt_enable_default(q); | |
f8fc877d | 507 | } |
1da177e4 LT |
508 | } |
509 | ||
e567bf71 | 510 | int elv_register(struct elevator_type *e) |
1da177e4 | 511 | { |
1ffb96c5 | 512 | char *def = ""; |
2a12dcd7 | 513 | |
3d3c2379 TH |
514 | /* create icq_cache if requested */ |
515 | if (e->icq_size) { | |
516 | if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || | |
517 | WARN_ON(e->icq_align < __alignof__(struct io_cq))) | |
518 | return -EINVAL; | |
519 | ||
520 | snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), | |
521 | "%s_io_cq", e->elevator_name); | |
522 | e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, | |
523 | e->icq_align, 0, NULL); | |
524 | if (!e->icq_cache) | |
525 | return -ENOMEM; | |
526 | } | |
527 | ||
528 | /* register, don't allow duplicate names */ | |
2a12dcd7 | 529 | spin_lock(&elv_list_lock); |
a1ce35fa | 530 | if (elevator_find(e->elevator_name)) { |
3d3c2379 | 531 | spin_unlock(&elv_list_lock); |
62d2a194 | 532 | kmem_cache_destroy(e->icq_cache); |
3d3c2379 TH |
533 | return -EBUSY; |
534 | } | |
1da177e4 | 535 | list_add_tail(&e->list, &elv_list); |
2a12dcd7 | 536 | spin_unlock(&elv_list_lock); |
1da177e4 | 537 | |
4eb166d9 JA |
538 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, |
539 | def); | |
3d3c2379 | 540 | return 0; |
1da177e4 LT |
541 | } |
542 | EXPORT_SYMBOL_GPL(elv_register); | |
543 | ||
544 | void elv_unregister(struct elevator_type *e) | |
545 | { | |
3d3c2379 | 546 | /* unregister */ |
2a12dcd7 | 547 | spin_lock(&elv_list_lock); |
1da177e4 | 548 | list_del_init(&e->list); |
2a12dcd7 | 549 | spin_unlock(&elv_list_lock); |
3d3c2379 TH |
550 | |
551 | /* | |
552 | * Destroy icq_cache if it exists. icq's are RCU managed. Make | |
553 | * sure all RCU operations are complete before proceeding. | |
554 | */ | |
555 | if (e->icq_cache) { | |
556 | rcu_barrier(); | |
557 | kmem_cache_destroy(e->icq_cache); | |
558 | e->icq_cache = NULL; | |
559 | } | |
1da177e4 LT |
560 | } |
561 | EXPORT_SYMBOL_GPL(elv_unregister); | |
562 | ||
d48ece20 | 563 | int elevator_switch_mq(struct request_queue *q, |
54d5329d OS |
564 | struct elevator_type *new_e) |
565 | { | |
566 | int ret; | |
567 | ||
14a23498 BVA |
568 | lockdep_assert_held(&q->sysfs_lock); |
569 | ||
54d5329d OS |
570 | if (q->elevator) { |
571 | if (q->elevator->registered) | |
572 | elv_unregister_queue(q); | |
573 | ioc_clear_queue(q); | |
574 | elevator_exit(q, q->elevator); | |
575 | } | |
576 | ||
577 | ret = blk_mq_init_sched(q, new_e); | |
578 | if (ret) | |
579 | goto out; | |
580 | ||
581 | if (new_e) { | |
582 | ret = elv_register_queue(q); | |
583 | if (ret) { | |
584 | elevator_exit(q, q->elevator); | |
585 | goto out; | |
586 | } | |
587 | } | |
588 | ||
589 | if (new_e) | |
590 | blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); | |
591 | else | |
592 | blk_add_trace_msg(q, "elv switch: none"); | |
593 | ||
594 | out: | |
54d5329d | 595 | return ret; |
54d5329d OS |
596 | } |
597 | ||
131d08e1 CH |
598 | /* |
599 | * For blk-mq devices, we default to using mq-deadline, if available, for single | |
600 | * queue devices. If deadline isn't available OR we have multiple queues, | |
601 | * default to "none". | |
602 | */ | |
603 | int elevator_init_mq(struct request_queue *q) | |
604 | { | |
605 | struct elevator_type *e; | |
606 | int err = 0; | |
607 | ||
608 | if (q->nr_hw_queues != 1) | |
609 | return 0; | |
610 | ||
611 | /* | |
612 | * q->sysfs_lock must be held to provide mutual exclusion between | |
613 | * elevator_switch() and here. | |
614 | */ | |
615 | mutex_lock(&q->sysfs_lock); | |
616 | if (unlikely(q->elevator)) | |
617 | goto out_unlock; | |
618 | ||
619 | e = elevator_get(q, "mq-deadline", false); | |
620 | if (!e) | |
621 | goto out_unlock; | |
622 | ||
623 | err = blk_mq_init_sched(q, e); | |
624 | if (err) | |
625 | elevator_put(e); | |
626 | out_unlock: | |
627 | mutex_unlock(&q->sysfs_lock); | |
628 | return err; | |
629 | } | |
630 | ||
631 | ||
1da177e4 LT |
632 | /* |
633 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | |
634 | * we don't free the old io scheduler, before we have allocated what we | |
635 | * need for the new one. this way we have a chance of going back to the old | |
cb98fc8b | 636 | * one, if the new one fails init for some reason. |
1da177e4 | 637 | */ |
165125e1 | 638 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
1da177e4 | 639 | { |
e8989fae | 640 | int err; |
1da177e4 | 641 | |
14a23498 BVA |
642 | lockdep_assert_held(&q->sysfs_lock); |
643 | ||
a1ce35fa JA |
644 | blk_mq_freeze_queue(q); |
645 | blk_mq_quiesce_queue(q); | |
4722dc52 | 646 | |
a1ce35fa | 647 | err = elevator_switch_mq(q, new_e); |
1da177e4 | 648 | |
a1ce35fa JA |
649 | blk_mq_unquiesce_queue(q); |
650 | blk_mq_unfreeze_queue(q); | |
75ad23bc | 651 | |
5dd531a0 | 652 | return err; |
1da177e4 LT |
653 | } |
654 | ||
5dd531a0 JA |
655 | /* |
656 | * Switch this queue to the given IO scheduler. | |
657 | */ | |
7c8a3679 | 658 | static int __elevator_change(struct request_queue *q, const char *name) |
1da177e4 LT |
659 | { |
660 | char elevator_name[ELV_NAME_MAX]; | |
661 | struct elevator_type *e; | |
662 | ||
e9a823fb DJ |
663 | /* Make sure queue is not in the middle of being removed */ |
664 | if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) | |
665 | return -ENOENT; | |
666 | ||
bd166ef1 JA |
667 | /* |
668 | * Special case for mq, turn off scheduling | |
669 | */ | |
fbd72127 AZ |
670 | if (!strncmp(name, "none", 4)) { |
671 | if (!q->elevator) | |
672 | return 0; | |
bd166ef1 | 673 | return elevator_switch(q, NULL); |
fbd72127 | 674 | } |
cd43e26f | 675 | |
ee2e992c | 676 | strlcpy(elevator_name, name, sizeof(elevator_name)); |
2527d997 | 677 | e = elevator_get(q, strstrip(elevator_name), true); |
340ff321 | 678 | if (!e) |
1da177e4 | 679 | return -EINVAL; |
1da177e4 | 680 | |
8ac0d9a8 | 681 | if (q->elevator && elevator_match(q->elevator->type, elevator_name)) { |
2ca7d93b | 682 | elevator_put(e); |
5dd531a0 | 683 | return 0; |
2ca7d93b | 684 | } |
1da177e4 | 685 | |
5dd531a0 JA |
686 | return elevator_switch(q, e); |
687 | } | |
7c8a3679 | 688 | |
3a5088c8 ML |
689 | static inline bool elv_support_iosched(struct request_queue *q) |
690 | { | |
344e9ffc | 691 | if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) |
3a5088c8 ML |
692 | return false; |
693 | return true; | |
694 | } | |
695 | ||
5dd531a0 JA |
696 | ssize_t elv_iosched_store(struct request_queue *q, const char *name, |
697 | size_t count) | |
698 | { | |
699 | int ret; | |
700 | ||
344e9ffc | 701 | if (!queue_is_mq(q) || !elv_support_iosched(q)) |
5dd531a0 JA |
702 | return count; |
703 | ||
7c8a3679 | 704 | ret = __elevator_change(q, name); |
5dd531a0 JA |
705 | if (!ret) |
706 | return count; | |
707 | ||
5dd531a0 | 708 | return ret; |
1da177e4 LT |
709 | } |
710 | ||
165125e1 | 711 | ssize_t elv_iosched_show(struct request_queue *q, char *name) |
1da177e4 | 712 | { |
b374d18a | 713 | struct elevator_queue *e = q->elevator; |
bd166ef1 | 714 | struct elevator_type *elv = NULL; |
70cee26e | 715 | struct elevator_type *__e; |
1da177e4 LT |
716 | int len = 0; |
717 | ||
344e9ffc | 718 | if (!queue_is_mq(q)) |
cd43e26f MP |
719 | return sprintf(name, "none\n"); |
720 | ||
bd166ef1 JA |
721 | if (!q->elevator) |
722 | len += sprintf(name+len, "[none] "); | |
723 | else | |
724 | elv = e->type; | |
cd43e26f | 725 | |
2a12dcd7 | 726 | spin_lock(&elv_list_lock); |
70cee26e | 727 | list_for_each_entry(__e, &elv_list, list) { |
a1ce35fa | 728 | if (elv && elevator_match(elv, __e->elevator_name)) { |
1da177e4 | 729 | len += sprintf(name+len, "[%s] ", elv->elevator_name); |
bd166ef1 JA |
730 | continue; |
731 | } | |
a1ce35fa | 732 | if (elv_support_iosched(q)) |
1da177e4 LT |
733 | len += sprintf(name+len, "%s ", __e->elevator_name); |
734 | } | |
2a12dcd7 | 735 | spin_unlock(&elv_list_lock); |
1da177e4 | 736 | |
344e9ffc | 737 | if (q->elevator) |
bd166ef1 JA |
738 | len += sprintf(name+len, "none"); |
739 | ||
1da177e4 LT |
740 | len += sprintf(len+name, "\n"); |
741 | return len; | |
742 | } | |
743 | ||
165125e1 JA |
744 | struct request *elv_rb_former_request(struct request_queue *q, |
745 | struct request *rq) | |
2e662b65 JA |
746 | { |
747 | struct rb_node *rbprev = rb_prev(&rq->rb_node); | |
748 | ||
749 | if (rbprev) | |
750 | return rb_entry_rq(rbprev); | |
751 | ||
752 | return NULL; | |
753 | } | |
2e662b65 JA |
754 | EXPORT_SYMBOL(elv_rb_former_request); |
755 | ||
165125e1 JA |
756 | struct request *elv_rb_latter_request(struct request_queue *q, |
757 | struct request *rq) | |
2e662b65 JA |
758 | { |
759 | struct rb_node *rbnext = rb_next(&rq->rb_node); | |
760 | ||
761 | if (rbnext) | |
762 | return rb_entry_rq(rbnext); | |
763 | ||
764 | return NULL; | |
765 | } | |
2e662b65 | 766 | EXPORT_SYMBOL(elv_rb_latter_request); |