Commit | Line | Data |
---|---|---|
8c16567d | 1 | // SPDX-License-Identifier: GPL-2.0 |
00e04393 OS |
2 | /* |
3 | * The Kyber I/O scheduler. Controls latency by throttling queue depths using | |
4 | * scalable techniques. | |
5 | * | |
6 | * Copyright (C) 2017 Facebook | |
00e04393 OS |
7 | */ |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/blkdev.h> | |
11 | #include <linux/blk-mq.h> | |
00e04393 OS |
12 | #include <linux/module.h> |
13 | #include <linux/sbitmap.h> | |
14 | ||
b357e4a6 CK |
15 | #include <trace/events/block.h> |
16 | ||
2e9bc346 | 17 | #include "elevator.h" |
00e04393 OS |
18 | #include "blk.h" |
19 | #include "blk-mq.h" | |
16b738f6 | 20 | #include "blk-mq-debugfs.h" |
00e04393 OS |
21 | #include "blk-mq-sched.h" |
22 | #include "blk-mq-tag.h" | |
00e04393 | 23 | |
6c3b7af1 OS |
24 | #define CREATE_TRACE_POINTS |
25 | #include <trace/events/kyber.h> | |
26 | ||
6e25cb01 OS |
27 | /* |
28 | * Scheduling domains: the device is divided into multiple domains based on the | |
29 | * request type. | |
30 | */ | |
00e04393 OS |
31 | enum { |
32 | KYBER_READ, | |
6e25cb01 OS |
33 | KYBER_WRITE, |
34 | KYBER_DISCARD, | |
35 | KYBER_OTHER, | |
00e04393 OS |
36 | KYBER_NUM_DOMAINS, |
37 | }; | |
38 | ||
6c3b7af1 OS |
39 | static const char *kyber_domain_names[] = { |
40 | [KYBER_READ] = "READ", | |
41 | [KYBER_WRITE] = "WRITE", | |
42 | [KYBER_DISCARD] = "DISCARD", | |
43 | [KYBER_OTHER] = "OTHER", | |
44 | }; | |
45 | ||
00e04393 | 46 | enum { |
00e04393 OS |
47 | /* |
48 | * In order to prevent starvation of synchronous requests by a flood of | |
49 | * asynchronous requests, we reserve 25% of requests for synchronous | |
50 | * operations. | |
51 | */ | |
52 | KYBER_ASYNC_PERCENT = 75, | |
53 | }; | |
54 | ||
55 | /* | |
6e25cb01 | 56 | * Maximum device-wide depth for each scheduling domain. |
00e04393 | 57 | * |
6e25cb01 OS |
58 | * Even for fast devices with lots of tags like NVMe, you can saturate the |
59 | * device with only a fraction of the maximum possible queue depth. So, we cap | |
60 | * these to a reasonable value. | |
00e04393 OS |
61 | */ |
62 | static const unsigned int kyber_depth[] = { | |
63 | [KYBER_READ] = 256, | |
6e25cb01 OS |
64 | [KYBER_WRITE] = 128, |
65 | [KYBER_DISCARD] = 64, | |
66 | [KYBER_OTHER] = 16, | |
00e04393 OS |
67 | }; |
68 | ||
69 | /* | |
6e25cb01 OS |
70 | * Default latency targets for each scheduling domain. |
71 | */ | |
72 | static const u64 kyber_latency_targets[] = { | |
f0a0cddd OS |
73 | [KYBER_READ] = 2ULL * NSEC_PER_MSEC, |
74 | [KYBER_WRITE] = 10ULL * NSEC_PER_MSEC, | |
75 | [KYBER_DISCARD] = 5ULL * NSEC_PER_SEC, | |
6e25cb01 OS |
76 | }; |
77 | ||
78 | /* | |
79 | * Batch size (number of requests we'll dispatch in a row) for each scheduling | |
80 | * domain. | |
00e04393 OS |
81 | */ |
82 | static const unsigned int kyber_batch_size[] = { | |
83 | [KYBER_READ] = 16, | |
6e25cb01 OS |
84 | [KYBER_WRITE] = 8, |
85 | [KYBER_DISCARD] = 1, | |
86 | [KYBER_OTHER] = 1, | |
87 | }; | |
88 | ||
89 | /* | |
90 | * Requests latencies are recorded in a histogram with buckets defined relative | |
91 | * to the target latency: | |
92 | * | |
93 | * <= 1/4 * target latency | |
94 | * <= 1/2 * target latency | |
95 | * <= 3/4 * target latency | |
96 | * <= target latency | |
97 | * <= 1 1/4 * target latency | |
98 | * <= 1 1/2 * target latency | |
99 | * <= 1 3/4 * target latency | |
100 | * > 1 3/4 * target latency | |
101 | */ | |
102 | enum { | |
103 | /* | |
104 | * The width of the latency histogram buckets is | |
105 | * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency. | |
106 | */ | |
107 | KYBER_LATENCY_SHIFT = 2, | |
108 | /* | |
109 | * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency, | |
110 | * thus, "good". | |
111 | */ | |
112 | KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT, | |
113 | /* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */ | |
114 | KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT, | |
115 | }; | |
116 | ||
117 | /* | |
118 | * We measure both the total latency and the I/O latency (i.e., latency after | |
119 | * submitting to the device). | |
120 | */ | |
121 | enum { | |
122 | KYBER_TOTAL_LATENCY, | |
123 | KYBER_IO_LATENCY, | |
124 | }; | |
125 | ||
6c3b7af1 OS |
126 | static const char *kyber_latency_type_names[] = { |
127 | [KYBER_TOTAL_LATENCY] = "total", | |
128 | [KYBER_IO_LATENCY] = "I/O", | |
129 | }; | |
130 | ||
6e25cb01 OS |
131 | /* |
132 | * Per-cpu latency histograms: total latency and I/O latency for each scheduling | |
133 | * domain except for KYBER_OTHER. | |
134 | */ | |
135 | struct kyber_cpu_latency { | |
136 | atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; | |
00e04393 OS |
137 | }; |
138 | ||
a6088845 JW |
139 | /* |
140 | * There is a same mapping between ctx & hctx and kcq & khd, | |
141 | * we use request->mq_ctx->index_hw to index the kcq in khd. | |
142 | */ | |
143 | struct kyber_ctx_queue { | |
144 | /* | |
145 | * Used to ensure operations on rq_list and kcq_map to be an atmoic one. | |
146 | * Also protect the rqs on rq_list when merge. | |
147 | */ | |
148 | spinlock_t lock; | |
149 | struct list_head rq_list[KYBER_NUM_DOMAINS]; | |
150 | } ____cacheline_aligned_in_smp; | |
151 | ||
00e04393 | 152 | struct kyber_queue_data { |
6c3b7af1 | 153 | struct request_queue *q; |
c4110804 | 154 | dev_t dev; |
6c3b7af1 | 155 | |
00e04393 | 156 | /* |
6e25cb01 OS |
157 | * Each scheduling domain has a limited number of in-flight requests |
158 | * device-wide, limited by these tokens. | |
00e04393 OS |
159 | */ |
160 | struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; | |
161 | ||
162 | /* | |
163 | * Async request percentage, converted to per-word depth for | |
164 | * sbitmap_get_shallow(). | |
165 | */ | |
166 | unsigned int async_depth; | |
167 | ||
6e25cb01 OS |
168 | struct kyber_cpu_latency __percpu *cpu_latency; |
169 | ||
170 | /* Timer for stats aggregation and adjusting domain tokens. */ | |
171 | struct timer_list timer; | |
172 | ||
173 | unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; | |
174 | ||
175 | unsigned long latency_timeout[KYBER_OTHER]; | |
176 | ||
177 | int domain_p99[KYBER_OTHER]; | |
178 | ||
00e04393 | 179 | /* Target latencies in nanoseconds. */ |
6e25cb01 | 180 | u64 latency_targets[KYBER_OTHER]; |
00e04393 OS |
181 | }; |
182 | ||
183 | struct kyber_hctx_data { | |
184 | spinlock_t lock; | |
185 | struct list_head rqs[KYBER_NUM_DOMAINS]; | |
186 | unsigned int cur_domain; | |
187 | unsigned int batching; | |
a6088845 JW |
188 | struct kyber_ctx_queue *kcqs; |
189 | struct sbitmap kcq_map[KYBER_NUM_DOMAINS]; | |
00203ba4 | 190 | struct sbq_wait domain_wait[KYBER_NUM_DOMAINS]; |
fcf38cdf | 191 | struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS]; |
00e04393 OS |
192 | atomic_t wait_index[KYBER_NUM_DOMAINS]; |
193 | }; | |
194 | ||
fcf38cdf OS |
195 | static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, |
196 | void *key); | |
197 | ||
a6088845 | 198 | static unsigned int kyber_sched_domain(unsigned int op) |
00e04393 | 199 | { |
6e25cb01 OS |
200 | switch (op & REQ_OP_MASK) { |
201 | case REQ_OP_READ: | |
00e04393 | 202 | return KYBER_READ; |
6e25cb01 OS |
203 | case REQ_OP_WRITE: |
204 | return KYBER_WRITE; | |
205 | case REQ_OP_DISCARD: | |
206 | return KYBER_DISCARD; | |
207 | default: | |
00e04393 | 208 | return KYBER_OTHER; |
6e25cb01 | 209 | } |
00e04393 OS |
210 | } |
211 | ||
6e25cb01 OS |
212 | static void flush_latency_buckets(struct kyber_queue_data *kqd, |
213 | struct kyber_cpu_latency *cpu_latency, | |
214 | unsigned int sched_domain, unsigned int type) | |
00e04393 | 215 | { |
6e25cb01 OS |
216 | unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; |
217 | atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; | |
218 | unsigned int bucket; | |
00e04393 | 219 | |
6e25cb01 OS |
220 | for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) |
221 | buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0); | |
00e04393 OS |
222 | } |
223 | ||
224 | /* | |
6e25cb01 OS |
225 | * Calculate the histogram bucket with the given percentile rank, or -1 if there |
226 | * aren't enough samples yet. | |
00e04393 | 227 | */ |
6e25cb01 OS |
228 | static int calculate_percentile(struct kyber_queue_data *kqd, |
229 | unsigned int sched_domain, unsigned int type, | |
230 | unsigned int percentile) | |
00e04393 | 231 | { |
6e25cb01 OS |
232 | unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; |
233 | unsigned int bucket, samples = 0, percentile_samples; | |
234 | ||
235 | for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) | |
236 | samples += buckets[bucket]; | |
237 | ||
238 | if (!samples) | |
239 | return -1; | |
00e04393 OS |
240 | |
241 | /* | |
6e25cb01 OS |
242 | * We do the calculation once we have 500 samples or one second passes |
243 | * since the first sample was recorded, whichever comes first. | |
00e04393 | 244 | */ |
6e25cb01 OS |
245 | if (!kqd->latency_timeout[sched_domain]) |
246 | kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); | |
247 | if (samples < 500 && | |
248 | time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { | |
249 | return -1; | |
250 | } | |
251 | kqd->latency_timeout[sched_domain] = 0; | |
00e04393 | 252 | |
6e25cb01 OS |
253 | percentile_samples = DIV_ROUND_UP(samples * percentile, 100); |
254 | for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { | |
255 | if (buckets[bucket] >= percentile_samples) | |
00e04393 | 256 | break; |
6e25cb01 | 257 | percentile_samples -= buckets[bucket]; |
00e04393 | 258 | } |
6e25cb01 | 259 | memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); |
00e04393 | 260 | |
c4110804 | 261 | trace_kyber_latency(kqd->dev, kyber_domain_names[sched_domain], |
6c3b7af1 OS |
262 | kyber_latency_type_names[type], percentile, |
263 | bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples); | |
264 | ||
6e25cb01 OS |
265 | return bucket; |
266 | } | |
267 | ||
268 | static void kyber_resize_domain(struct kyber_queue_data *kqd, | |
269 | unsigned int sched_domain, unsigned int depth) | |
270 | { | |
00e04393 | 271 | depth = clamp(depth, 1U, kyber_depth[sched_domain]); |
6c3b7af1 | 272 | if (depth != kqd->domain_tokens[sched_domain].sb.depth) { |
00e04393 | 273 | sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); |
c4110804 | 274 | trace_kyber_adjust(kqd->dev, kyber_domain_names[sched_domain], |
6c3b7af1 OS |
275 | depth); |
276 | } | |
00e04393 OS |
277 | } |
278 | ||
6e25cb01 OS |
279 | static void kyber_timer_fn(struct timer_list *t) |
280 | { | |
281 | struct kyber_queue_data *kqd = from_timer(kqd, t, timer); | |
282 | unsigned int sched_domain; | |
283 | int cpu; | |
284 | bool bad = false; | |
285 | ||
286 | /* Sum all of the per-cpu latency histograms. */ | |
287 | for_each_online_cpu(cpu) { | |
288 | struct kyber_cpu_latency *cpu_latency; | |
289 | ||
290 | cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); | |
291 | for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { | |
292 | flush_latency_buckets(kqd, cpu_latency, sched_domain, | |
293 | KYBER_TOTAL_LATENCY); | |
294 | flush_latency_buckets(kqd, cpu_latency, sched_domain, | |
295 | KYBER_IO_LATENCY); | |
00e04393 OS |
296 | } |
297 | } | |
298 | ||
6e25cb01 OS |
299 | /* |
300 | * Check if any domains have a high I/O latency, which might indicate | |
301 | * congestion in the device. Note that we use the p90; we don't want to | |
302 | * be too sensitive to outliers here. | |
303 | */ | |
304 | for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { | |
305 | int p90; | |
00e04393 | 306 | |
6e25cb01 OS |
307 | p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY, |
308 | 90); | |
309 | if (p90 >= KYBER_GOOD_BUCKETS) | |
310 | bad = true; | |
311 | } | |
00e04393 OS |
312 | |
313 | /* | |
6e25cb01 OS |
314 | * Adjust the scheduling domain depths. If we determined that there was |
315 | * congestion, we throttle all domains with good latencies. Either way, | |
316 | * we ease up on throttling domains with bad latencies. | |
00e04393 | 317 | */ |
6e25cb01 OS |
318 | for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { |
319 | unsigned int orig_depth, depth; | |
320 | int p99; | |
321 | ||
322 | p99 = calculate_percentile(kqd, sched_domain, | |
323 | KYBER_TOTAL_LATENCY, 99); | |
324 | /* | |
325 | * This is kind of subtle: different domains will not | |
326 | * necessarily have enough samples to calculate the latency | |
327 | * percentiles during the same window, so we have to remember | |
328 | * the p99 for the next time we observe congestion; once we do, | |
329 | * we don't want to throttle again until we get more data, so we | |
330 | * reset it to -1. | |
331 | */ | |
332 | if (bad) { | |
333 | if (p99 < 0) | |
334 | p99 = kqd->domain_p99[sched_domain]; | |
335 | kqd->domain_p99[sched_domain] = -1; | |
336 | } else if (p99 >= 0) { | |
337 | kqd->domain_p99[sched_domain] = p99; | |
338 | } | |
339 | if (p99 < 0) | |
340 | continue; | |
341 | ||
342 | /* | |
343 | * If this domain has bad latency, throttle less. Otherwise, | |
344 | * throttle more iff we determined that there is congestion. | |
345 | * | |
346 | * The new depth is scaled linearly with the p99 latency vs the | |
347 | * latency target. E.g., if the p99 is 3/4 of the target, then | |
348 | * we throttle down to 3/4 of the current depth, and if the p99 | |
349 | * is 2x the target, then we double the depth. | |
350 | */ | |
351 | if (bad || p99 >= KYBER_GOOD_BUCKETS) { | |
352 | orig_depth = kqd->domain_tokens[sched_domain].sb.depth; | |
353 | depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT; | |
354 | kyber_resize_domain(kqd, sched_domain, depth); | |
355 | } | |
356 | } | |
00e04393 OS |
357 | } |
358 | ||
00e04393 OS |
359 | static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) |
360 | { | |
361 | struct kyber_queue_data *kqd; | |
00e04393 OS |
362 | int ret = -ENOMEM; |
363 | int i; | |
364 | ||
6e25cb01 | 365 | kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); |
00e04393 OS |
366 | if (!kqd) |
367 | goto err; | |
00e04393 | 368 | |
6c3b7af1 | 369 | kqd->q = q; |
c4110804 | 370 | kqd->dev = disk_devt(q->disk); |
6c3b7af1 | 371 | |
6e25cb01 OS |
372 | kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, |
373 | GFP_KERNEL | __GFP_ZERO); | |
374 | if (!kqd->cpu_latency) | |
00e04393 OS |
375 | goto err_kqd; |
376 | ||
6e25cb01 OS |
377 | timer_setup(&kqd->timer, kyber_timer_fn, 0); |
378 | ||
00e04393 OS |
379 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) { |
380 | WARN_ON(!kyber_depth[i]); | |
381 | WARN_ON(!kyber_batch_size[i]); | |
382 | ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], | |
fa2a1f60 OS |
383 | kyber_depth[i], -1, false, |
384 | GFP_KERNEL, q->node); | |
00e04393 OS |
385 | if (ret) { |
386 | while (--i >= 0) | |
387 | sbitmap_queue_free(&kqd->domain_tokens[i]); | |
6e25cb01 | 388 | goto err_buckets; |
00e04393 | 389 | } |
00e04393 OS |
390 | } |
391 | ||
6e25cb01 OS |
392 | for (i = 0; i < KYBER_OTHER; i++) { |
393 | kqd->domain_p99[i] = -1; | |
394 | kqd->latency_targets[i] = kyber_latency_targets[i]; | |
395 | } | |
00e04393 | 396 | |
00e04393 OS |
397 | return kqd; |
398 | ||
6e25cb01 OS |
399 | err_buckets: |
400 | free_percpu(kqd->cpu_latency); | |
00e04393 OS |
401 | err_kqd: |
402 | kfree(kqd); | |
403 | err: | |
404 | return ERR_PTR(ret); | |
405 | } | |
406 | ||
407 | static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) | |
408 | { | |
409 | struct kyber_queue_data *kqd; | |
410 | struct elevator_queue *eq; | |
411 | ||
412 | eq = elevator_alloc(q, e); | |
413 | if (!eq) | |
414 | return -ENOMEM; | |
415 | ||
416 | kqd = kyber_queue_data_alloc(q); | |
417 | if (IS_ERR(kqd)) { | |
418 | kobject_put(&eq->kobj); | |
419 | return PTR_ERR(kqd); | |
420 | } | |
421 | ||
6e25cb01 OS |
422 | blk_stat_enable_accounting(q); |
423 | ||
00e04393 OS |
424 | eq->elevator_data = kqd; |
425 | q->elevator = eq; | |
426 | ||
00e04393 OS |
427 | return 0; |
428 | } | |
429 | ||
430 | static void kyber_exit_sched(struct elevator_queue *e) | |
431 | { | |
432 | struct kyber_queue_data *kqd = e->elevator_data; | |
00e04393 OS |
433 | int i; |
434 | ||
6e25cb01 | 435 | del_timer_sync(&kqd->timer); |
68497092 | 436 | blk_stat_disable_accounting(kqd->q); |
00e04393 OS |
437 | |
438 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) | |
439 | sbitmap_queue_free(&kqd->domain_tokens[i]); | |
6e25cb01 | 440 | free_percpu(kqd->cpu_latency); |
00e04393 OS |
441 | kfree(kqd); |
442 | } | |
443 | ||
a6088845 JW |
444 | static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq) |
445 | { | |
446 | unsigned int i; | |
447 | ||
448 | spin_lock_init(&kcq->lock); | |
449 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) | |
450 | INIT_LIST_HEAD(&kcq->rq_list[i]); | |
451 | } | |
452 | ||
ffa772cf | 453 | static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx) |
00e04393 | 454 | { |
28820640 | 455 | struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; |
ffa772cf | 456 | struct blk_mq_tags *tags = hctx->sched_tags; |
ae0f1a73 | 457 | unsigned int shift = tags->bitmap_tags.sb.shift; |
ffa772cf YY |
458 | |
459 | kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; | |
460 | ||
ae0f1a73 | 461 | sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth); |
ffa772cf YY |
462 | } |
463 | ||
464 | static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) | |
465 | { | |
00e04393 OS |
466 | struct kyber_hctx_data *khd; |
467 | int i; | |
468 | ||
469 | khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node); | |
470 | if (!khd) | |
471 | return -ENOMEM; | |
472 | ||
a6088845 JW |
473 | khd->kcqs = kmalloc_array_node(hctx->nr_ctx, |
474 | sizeof(struct kyber_ctx_queue), | |
475 | GFP_KERNEL, hctx->numa_node); | |
476 | if (!khd->kcqs) | |
477 | goto err_khd; | |
478 | ||
479 | for (i = 0; i < hctx->nr_ctx; i++) | |
480 | kyber_ctx_queue_init(&khd->kcqs[i]); | |
481 | ||
482 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) { | |
483 | if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx, | |
efe1f3a1 | 484 | ilog2(8), GFP_KERNEL, hctx->numa_node, |
c548e62b | 485 | false, false)) { |
a6088845 JW |
486 | while (--i >= 0) |
487 | sbitmap_free(&khd->kcq_map[i]); | |
488 | goto err_kcqs; | |
489 | } | |
490 | } | |
491 | ||
00e04393 OS |
492 | spin_lock_init(&khd->lock); |
493 | ||
494 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) { | |
495 | INIT_LIST_HEAD(&khd->rqs[i]); | |
00203ba4 JA |
496 | khd->domain_wait[i].sbq = NULL; |
497 | init_waitqueue_func_entry(&khd->domain_wait[i].wait, | |
fcf38cdf | 498 | kyber_domain_wake); |
00203ba4 JA |
499 | khd->domain_wait[i].wait.private = hctx; |
500 | INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry); | |
00e04393 OS |
501 | atomic_set(&khd->wait_index[i], 0); |
502 | } | |
503 | ||
504 | khd->cur_domain = 0; | |
505 | khd->batching = 0; | |
506 | ||
507 | hctx->sched_data = khd; | |
ffa772cf | 508 | kyber_depth_updated(hctx); |
00e04393 OS |
509 | |
510 | return 0; | |
a6088845 JW |
511 | |
512 | err_kcqs: | |
513 | kfree(khd->kcqs); | |
514 | err_khd: | |
515 | kfree(khd); | |
516 | return -ENOMEM; | |
00e04393 OS |
517 | } |
518 | ||
519 | static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) | |
520 | { | |
a6088845 JW |
521 | struct kyber_hctx_data *khd = hctx->sched_data; |
522 | int i; | |
523 | ||
524 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) | |
525 | sbitmap_free(&khd->kcq_map[i]); | |
526 | kfree(khd->kcqs); | |
00e04393 OS |
527 | kfree(hctx->sched_data); |
528 | } | |
529 | ||
530 | static int rq_get_domain_token(struct request *rq) | |
531 | { | |
532 | return (long)rq->elv.priv[0]; | |
533 | } | |
534 | ||
535 | static void rq_set_domain_token(struct request *rq, int token) | |
536 | { | |
537 | rq->elv.priv[0] = (void *)(long)token; | |
538 | } | |
539 | ||
540 | static void rq_clear_domain_token(struct kyber_queue_data *kqd, | |
541 | struct request *rq) | |
542 | { | |
543 | unsigned int sched_domain; | |
544 | int nr; | |
545 | ||
546 | nr = rq_get_domain_token(rq); | |
547 | if (nr != -1) { | |
a6088845 | 548 | sched_domain = kyber_sched_domain(rq->cmd_flags); |
00e04393 OS |
549 | sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr, |
550 | rq->mq_ctx->cpu); | |
551 | } | |
552 | } | |
553 | ||
5bbf4e5a | 554 | static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) |
00e04393 | 555 | { |
00e04393 OS |
556 | /* |
557 | * We use the scheduler tags as per-hardware queue queueing tokens. | |
558 | * Async requests can be limited at this stage. | |
559 | */ | |
5bbf4e5a CH |
560 | if (!op_is_sync(op)) { |
561 | struct kyber_queue_data *kqd = data->q->elevator->elevator_data; | |
562 | ||
00e04393 | 563 | data->shallow_depth = kqd->async_depth; |
5bbf4e5a CH |
564 | } |
565 | } | |
00e04393 | 566 | |
efed9a33 | 567 | static bool kyber_bio_merge(struct request_queue *q, struct bio *bio, |
14ccb66b | 568 | unsigned int nr_segs) |
a6088845 | 569 | { |
efed9a33 OS |
570 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); |
571 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); | |
a6088845 | 572 | struct kyber_hctx_data *khd = hctx->sched_data; |
f31967f0 | 573 | struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; |
a6088845 JW |
574 | unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); |
575 | struct list_head *rq_list = &kcq->rq_list[sched_domain]; | |
576 | bool merged; | |
577 | ||
578 | spin_lock(&kcq->lock); | |
bdc6a287 | 579 | merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); |
a6088845 | 580 | spin_unlock(&kcq->lock); |
a6088845 JW |
581 | |
582 | return merged; | |
583 | } | |
584 | ||
5d9c305b | 585 | static void kyber_prepare_request(struct request *rq) |
5bbf4e5a CH |
586 | { |
587 | rq_set_domain_token(rq, -1); | |
00e04393 OS |
588 | } |
589 | ||
a6088845 JW |
590 | static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, |
591 | struct list_head *rq_list, bool at_head) | |
592 | { | |
593 | struct kyber_hctx_data *khd = hctx->sched_data; | |
594 | struct request *rq, *next; | |
595 | ||
596 | list_for_each_entry_safe(rq, next, rq_list, queuelist) { | |
597 | unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags); | |
f31967f0 | 598 | struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]]; |
a6088845 JW |
599 | struct list_head *head = &kcq->rq_list[sched_domain]; |
600 | ||
601 | spin_lock(&kcq->lock); | |
fb7b9b02 | 602 | trace_block_rq_insert(rq); |
a6088845 JW |
603 | if (at_head) |
604 | list_move(&rq->queuelist, head); | |
605 | else | |
606 | list_move_tail(&rq->queuelist, head); | |
607 | sbitmap_set_bit(&khd->kcq_map[sched_domain], | |
f31967f0 | 608 | rq->mq_ctx->index_hw[hctx->type]); |
a6088845 JW |
609 | spin_unlock(&kcq->lock); |
610 | } | |
611 | } | |
612 | ||
7b9e9361 | 613 | static void kyber_finish_request(struct request *rq) |
00e04393 | 614 | { |
7b9e9361 | 615 | struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; |
00e04393 OS |
616 | |
617 | rq_clear_domain_token(kqd, rq); | |
00e04393 OS |
618 | } |
619 | ||
6e25cb01 OS |
620 | static void add_latency_sample(struct kyber_cpu_latency *cpu_latency, |
621 | unsigned int sched_domain, unsigned int type, | |
622 | u64 target, u64 latency) | |
00e04393 | 623 | { |
6e25cb01 OS |
624 | unsigned int bucket; |
625 | u64 divisor; | |
00e04393 | 626 | |
6e25cb01 OS |
627 | if (latency > 0) { |
628 | divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1); | |
629 | bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), | |
630 | KYBER_LATENCY_BUCKETS - 1); | |
631 | } else { | |
632 | bucket = 0; | |
00e04393 OS |
633 | } |
634 | ||
6e25cb01 OS |
635 | atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); |
636 | } | |
00e04393 | 637 | |
6e25cb01 OS |
638 | static void kyber_completed_request(struct request *rq, u64 now) |
639 | { | |
640 | struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; | |
641 | struct kyber_cpu_latency *cpu_latency; | |
642 | unsigned int sched_domain; | |
643 | u64 target; | |
644 | ||
645 | sched_domain = kyber_sched_domain(rq->cmd_flags); | |
646 | if (sched_domain == KYBER_OTHER) | |
00e04393 OS |
647 | return; |
648 | ||
6e25cb01 OS |
649 | cpu_latency = get_cpu_ptr(kqd->cpu_latency); |
650 | target = kqd->latency_targets[sched_domain]; | |
651 | add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY, | |
652 | target, now - rq->start_time_ns); | |
653 | add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target, | |
654 | now - rq->io_start_time_ns); | |
655 | put_cpu_ptr(kqd->cpu_latency); | |
00e04393 | 656 | |
6e25cb01 | 657 | timer_reduce(&kqd->timer, jiffies + HZ / 10); |
00e04393 OS |
658 | } |
659 | ||
a6088845 JW |
660 | struct flush_kcq_data { |
661 | struct kyber_hctx_data *khd; | |
662 | unsigned int sched_domain; | |
663 | struct list_head *list; | |
664 | }; | |
665 | ||
666 | static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data) | |
00e04393 | 667 | { |
a6088845 JW |
668 | struct flush_kcq_data *flush_data = data; |
669 | struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr]; | |
00e04393 | 670 | |
a6088845 JW |
671 | spin_lock(&kcq->lock); |
672 | list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain], | |
673 | flush_data->list); | |
674 | sbitmap_clear_bit(sb, bitnr); | |
675 | spin_unlock(&kcq->lock); | |
00e04393 | 676 | |
a6088845 JW |
677 | return true; |
678 | } | |
679 | ||
680 | static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd, | |
681 | unsigned int sched_domain, | |
682 | struct list_head *list) | |
683 | { | |
684 | struct flush_kcq_data data = { | |
685 | .khd = khd, | |
686 | .sched_domain = sched_domain, | |
687 | .list = list, | |
688 | }; | |
689 | ||
690 | sbitmap_for_each_set(&khd->kcq_map[sched_domain], | |
691 | flush_busy_kcq, &data); | |
00e04393 OS |
692 | } |
693 | ||
00203ba4 | 694 | static int kyber_domain_wake(wait_queue_entry_t *wqe, unsigned mode, int flags, |
00e04393 OS |
695 | void *key) |
696 | { | |
00203ba4 JA |
697 | struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private); |
698 | struct sbq_wait *wait = container_of(wqe, struct sbq_wait, wait); | |
00e04393 | 699 | |
00203ba4 | 700 | sbitmap_del_wait_queue(wait); |
00e04393 OS |
701 | blk_mq_run_hw_queue(hctx, true); |
702 | return 1; | |
703 | } | |
704 | ||
705 | static int kyber_get_domain_token(struct kyber_queue_data *kqd, | |
706 | struct kyber_hctx_data *khd, | |
707 | struct blk_mq_hw_ctx *hctx) | |
708 | { | |
709 | unsigned int sched_domain = khd->cur_domain; | |
710 | struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain]; | |
00203ba4 | 711 | struct sbq_wait *wait = &khd->domain_wait[sched_domain]; |
00e04393 OS |
712 | struct sbq_wait_state *ws; |
713 | int nr; | |
714 | ||
715 | nr = __sbitmap_queue_get(domain_tokens); | |
00e04393 OS |
716 | |
717 | /* | |
718 | * If we failed to get a domain token, make sure the hardware queue is | |
719 | * run when one becomes available. Note that this is serialized on | |
720 | * khd->lock, but we still need to be careful about the waker. | |
721 | */ | |
00203ba4 | 722 | if (nr < 0 && list_empty_careful(&wait->wait.entry)) { |
00e04393 OS |
723 | ws = sbq_wait_ptr(domain_tokens, |
724 | &khd->wait_index[sched_domain]); | |
fcf38cdf | 725 | khd->domain_ws[sched_domain] = ws; |
00203ba4 | 726 | sbitmap_add_wait_queue(domain_tokens, ws, wait); |
00e04393 OS |
727 | |
728 | /* | |
729 | * Try again in case a token was freed before we got on the wait | |
fcf38cdf | 730 | * queue. |
00e04393 OS |
731 | */ |
732 | nr = __sbitmap_queue_get(domain_tokens); | |
fcf38cdf | 733 | } |
8cf46660 | 734 | |
fcf38cdf OS |
735 | /* |
736 | * If we got a token while we were on the wait queue, remove ourselves | |
737 | * from the wait queue to ensure that all wake ups make forward | |
738 | * progress. It's possible that the waker already deleted the entry | |
739 | * between the !list_empty_careful() check and us grabbing the lock, but | |
740 | * list_del_init() is okay with that. | |
741 | */ | |
00203ba4 | 742 | if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) { |
fcf38cdf OS |
743 | ws = khd->domain_ws[sched_domain]; |
744 | spin_lock_irq(&ws->wait.lock); | |
00203ba4 | 745 | sbitmap_del_wait_queue(wait); |
fcf38cdf | 746 | spin_unlock_irq(&ws->wait.lock); |
00e04393 | 747 | } |
fcf38cdf | 748 | |
00e04393 OS |
749 | return nr; |
750 | } | |
751 | ||
752 | static struct request * | |
753 | kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, | |
754 | struct kyber_hctx_data *khd, | |
a6088845 | 755 | struct blk_mq_hw_ctx *hctx) |
00e04393 OS |
756 | { |
757 | struct list_head *rqs; | |
758 | struct request *rq; | |
759 | int nr; | |
760 | ||
761 | rqs = &khd->rqs[khd->cur_domain]; | |
00e04393 OS |
762 | |
763 | /* | |
a6088845 JW |
764 | * If we already have a flushed request, then we just need to get a |
765 | * token for it. Otherwise, if there are pending requests in the kcqs, | |
766 | * flush the kcqs, but only if we can get a token. If not, we should | |
767 | * leave the requests in the kcqs so that they can be merged. Note that | |
768 | * khd->lock serializes the flushes, so if we observed any bit set in | |
769 | * the kcq_map, we will always get a request. | |
00e04393 | 770 | */ |
a6088845 | 771 | rq = list_first_entry_or_null(rqs, struct request, queuelist); |
00e04393 OS |
772 | if (rq) { |
773 | nr = kyber_get_domain_token(kqd, khd, hctx); | |
774 | if (nr >= 0) { | |
775 | khd->batching++; | |
776 | rq_set_domain_token(rq, nr); | |
777 | list_del_init(&rq->queuelist); | |
778 | return rq; | |
6c3b7af1 | 779 | } else { |
c4110804 | 780 | trace_kyber_throttled(kqd->dev, |
6c3b7af1 | 781 | kyber_domain_names[khd->cur_domain]); |
00e04393 | 782 | } |
a6088845 JW |
783 | } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { |
784 | nr = kyber_get_domain_token(kqd, khd, hctx); | |
785 | if (nr >= 0) { | |
786 | kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs); | |
787 | rq = list_first_entry(rqs, struct request, queuelist); | |
788 | khd->batching++; | |
789 | rq_set_domain_token(rq, nr); | |
790 | list_del_init(&rq->queuelist); | |
791 | return rq; | |
6c3b7af1 | 792 | } else { |
c4110804 | 793 | trace_kyber_throttled(kqd->dev, |
6c3b7af1 | 794 | kyber_domain_names[khd->cur_domain]); |
a6088845 | 795 | } |
00e04393 OS |
796 | } |
797 | ||
798 | /* There were either no pending requests or no tokens. */ | |
799 | return NULL; | |
800 | } | |
801 | ||
802 | static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx) | |
803 | { | |
804 | struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; | |
805 | struct kyber_hctx_data *khd = hctx->sched_data; | |
00e04393 OS |
806 | struct request *rq; |
807 | int i; | |
808 | ||
809 | spin_lock(&khd->lock); | |
810 | ||
811 | /* | |
812 | * First, if we are still entitled to batch, try to dispatch a request | |
813 | * from the batch. | |
814 | */ | |
815 | if (khd->batching < kyber_batch_size[khd->cur_domain]) { | |
a6088845 | 816 | rq = kyber_dispatch_cur_domain(kqd, khd, hctx); |
00e04393 OS |
817 | if (rq) |
818 | goto out; | |
819 | } | |
820 | ||
821 | /* | |
822 | * Either, | |
823 | * 1. We were no longer entitled to a batch. | |
824 | * 2. The domain we were batching didn't have any requests. | |
825 | * 3. The domain we were batching was out of tokens. | |
826 | * | |
827 | * Start another batch. Note that this wraps back around to the original | |
828 | * domain if no other domains have requests or tokens. | |
829 | */ | |
830 | khd->batching = 0; | |
831 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) { | |
832 | if (khd->cur_domain == KYBER_NUM_DOMAINS - 1) | |
833 | khd->cur_domain = 0; | |
834 | else | |
835 | khd->cur_domain++; | |
836 | ||
a6088845 | 837 | rq = kyber_dispatch_cur_domain(kqd, khd, hctx); |
00e04393 OS |
838 | if (rq) |
839 | goto out; | |
840 | } | |
841 | ||
842 | rq = NULL; | |
843 | out: | |
844 | spin_unlock(&khd->lock); | |
845 | return rq; | |
846 | } | |
847 | ||
848 | static bool kyber_has_work(struct blk_mq_hw_ctx *hctx) | |
849 | { | |
850 | struct kyber_hctx_data *khd = hctx->sched_data; | |
851 | int i; | |
852 | ||
853 | for (i = 0; i < KYBER_NUM_DOMAINS; i++) { | |
a6088845 JW |
854 | if (!list_empty_careful(&khd->rqs[i]) || |
855 | sbitmap_any_bit_set(&khd->kcq_map[i])) | |
00e04393 OS |
856 | return true; |
857 | } | |
a6088845 JW |
858 | |
859 | return false; | |
00e04393 OS |
860 | } |
861 | ||
6e25cb01 OS |
862 | #define KYBER_LAT_SHOW_STORE(domain, name) \ |
863 | static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \ | |
864 | char *page) \ | |
00e04393 OS |
865 | { \ |
866 | struct kyber_queue_data *kqd = e->elevator_data; \ | |
867 | \ | |
6e25cb01 | 868 | return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \ |
00e04393 OS |
869 | } \ |
870 | \ | |
6e25cb01 OS |
871 | static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \ |
872 | const char *page, size_t count) \ | |
00e04393 OS |
873 | { \ |
874 | struct kyber_queue_data *kqd = e->elevator_data; \ | |
875 | unsigned long long nsec; \ | |
876 | int ret; \ | |
877 | \ | |
878 | ret = kstrtoull(page, 10, &nsec); \ | |
879 | if (ret) \ | |
880 | return ret; \ | |
881 | \ | |
6e25cb01 | 882 | kqd->latency_targets[domain] = nsec; \ |
00e04393 OS |
883 | \ |
884 | return count; \ | |
885 | } | |
6e25cb01 OS |
886 | KYBER_LAT_SHOW_STORE(KYBER_READ, read); |
887 | KYBER_LAT_SHOW_STORE(KYBER_WRITE, write); | |
00e04393 OS |
888 | #undef KYBER_LAT_SHOW_STORE |
889 | ||
890 | #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store) | |
891 | static struct elv_fs_entry kyber_sched_attrs[] = { | |
892 | KYBER_LAT_ATTR(read), | |
893 | KYBER_LAT_ATTR(write), | |
894 | __ATTR_NULL | |
895 | }; | |
896 | #undef KYBER_LAT_ATTR | |
897 | ||
16b738f6 OS |
898 | #ifdef CONFIG_BLK_DEBUG_FS |
899 | #define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \ | |
900 | static int kyber_##name##_tokens_show(void *data, struct seq_file *m) \ | |
901 | { \ | |
902 | struct request_queue *q = data; \ | |
903 | struct kyber_queue_data *kqd = q->elevator->elevator_data; \ | |
904 | \ | |
905 | sbitmap_queue_show(&kqd->domain_tokens[domain], m); \ | |
906 | return 0; \ | |
907 | } \ | |
908 | \ | |
909 | static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos) \ | |
910 | __acquires(&khd->lock) \ | |
911 | { \ | |
912 | struct blk_mq_hw_ctx *hctx = m->private; \ | |
913 | struct kyber_hctx_data *khd = hctx->sched_data; \ | |
914 | \ | |
915 | spin_lock(&khd->lock); \ | |
916 | return seq_list_start(&khd->rqs[domain], *pos); \ | |
917 | } \ | |
918 | \ | |
919 | static void *kyber_##name##_rqs_next(struct seq_file *m, void *v, \ | |
920 | loff_t *pos) \ | |
921 | { \ | |
922 | struct blk_mq_hw_ctx *hctx = m->private; \ | |
923 | struct kyber_hctx_data *khd = hctx->sched_data; \ | |
924 | \ | |
925 | return seq_list_next(v, &khd->rqs[domain], pos); \ | |
926 | } \ | |
927 | \ | |
928 | static void kyber_##name##_rqs_stop(struct seq_file *m, void *v) \ | |
929 | __releases(&khd->lock) \ | |
930 | { \ | |
931 | struct blk_mq_hw_ctx *hctx = m->private; \ | |
932 | struct kyber_hctx_data *khd = hctx->sched_data; \ | |
933 | \ | |
934 | spin_unlock(&khd->lock); \ | |
935 | } \ | |
936 | \ | |
937 | static const struct seq_operations kyber_##name##_rqs_seq_ops = { \ | |
938 | .start = kyber_##name##_rqs_start, \ | |
939 | .next = kyber_##name##_rqs_next, \ | |
940 | .stop = kyber_##name##_rqs_stop, \ | |
941 | .show = blk_mq_debugfs_rq_show, \ | |
942 | }; \ | |
943 | \ | |
944 | static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ | |
945 | { \ | |
946 | struct blk_mq_hw_ctx *hctx = data; \ | |
947 | struct kyber_hctx_data *khd = hctx->sched_data; \ | |
00203ba4 | 948 | wait_queue_entry_t *wait = &khd->domain_wait[domain].wait; \ |
16b738f6 | 949 | \ |
2055da97 | 950 | seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \ |
16b738f6 OS |
951 | return 0; \ |
952 | } | |
953 | KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) | |
6e25cb01 OS |
954 | KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write) |
955 | KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard) | |
16b738f6 OS |
956 | KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) |
957 | #undef KYBER_DEBUGFS_DOMAIN_ATTRS | |
958 | ||
959 | static int kyber_async_depth_show(void *data, struct seq_file *m) | |
960 | { | |
961 | struct request_queue *q = data; | |
962 | struct kyber_queue_data *kqd = q->elevator->elevator_data; | |
963 | ||
964 | seq_printf(m, "%u\n", kqd->async_depth); | |
965 | return 0; | |
966 | } | |
967 | ||
968 | static int kyber_cur_domain_show(void *data, struct seq_file *m) | |
969 | { | |
970 | struct blk_mq_hw_ctx *hctx = data; | |
971 | struct kyber_hctx_data *khd = hctx->sched_data; | |
972 | ||
6c3b7af1 | 973 | seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); |
16b738f6 OS |
974 | return 0; |
975 | } | |
976 | ||
977 | static int kyber_batching_show(void *data, struct seq_file *m) | |
978 | { | |
979 | struct blk_mq_hw_ctx *hctx = data; | |
980 | struct kyber_hctx_data *khd = hctx->sched_data; | |
981 | ||
982 | seq_printf(m, "%u\n", khd->batching); | |
983 | return 0; | |
984 | } | |
985 | ||
986 | #define KYBER_QUEUE_DOMAIN_ATTRS(name) \ | |
987 | {#name "_tokens", 0400, kyber_##name##_tokens_show} | |
988 | static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { | |
989 | KYBER_QUEUE_DOMAIN_ATTRS(read), | |
6e25cb01 OS |
990 | KYBER_QUEUE_DOMAIN_ATTRS(write), |
991 | KYBER_QUEUE_DOMAIN_ATTRS(discard), | |
16b738f6 OS |
992 | KYBER_QUEUE_DOMAIN_ATTRS(other), |
993 | {"async_depth", 0400, kyber_async_depth_show}, | |
994 | {}, | |
995 | }; | |
996 | #undef KYBER_QUEUE_DOMAIN_ATTRS | |
997 | ||
998 | #define KYBER_HCTX_DOMAIN_ATTRS(name) \ | |
999 | {#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \ | |
1000 | {#name "_waiting", 0400, kyber_##name##_waiting_show} | |
1001 | static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { | |
1002 | KYBER_HCTX_DOMAIN_ATTRS(read), | |
6e25cb01 OS |
1003 | KYBER_HCTX_DOMAIN_ATTRS(write), |
1004 | KYBER_HCTX_DOMAIN_ATTRS(discard), | |
16b738f6 OS |
1005 | KYBER_HCTX_DOMAIN_ATTRS(other), |
1006 | {"cur_domain", 0400, kyber_cur_domain_show}, | |
1007 | {"batching", 0400, kyber_batching_show}, | |
1008 | {}, | |
1009 | }; | |
1010 | #undef KYBER_HCTX_DOMAIN_ATTRS | |
1011 | #endif | |
1012 | ||
00e04393 | 1013 | static struct elevator_type kyber_sched = { |
f9cd4bfe | 1014 | .ops = { |
00e04393 OS |
1015 | .init_sched = kyber_init_sched, |
1016 | .exit_sched = kyber_exit_sched, | |
1017 | .init_hctx = kyber_init_hctx, | |
1018 | .exit_hctx = kyber_exit_hctx, | |
5bbf4e5a | 1019 | .limit_depth = kyber_limit_depth, |
a6088845 | 1020 | .bio_merge = kyber_bio_merge, |
5bbf4e5a | 1021 | .prepare_request = kyber_prepare_request, |
a6088845 | 1022 | .insert_requests = kyber_insert_requests, |
7b9e9361 | 1023 | .finish_request = kyber_finish_request, |
ba989a01 | 1024 | .requeue_request = kyber_finish_request, |
00e04393 OS |
1025 | .completed_request = kyber_completed_request, |
1026 | .dispatch_request = kyber_dispatch_request, | |
1027 | .has_work = kyber_has_work, | |
ffa772cf | 1028 | .depth_updated = kyber_depth_updated, |
00e04393 | 1029 | }, |
16b738f6 OS |
1030 | #ifdef CONFIG_BLK_DEBUG_FS |
1031 | .queue_debugfs_attrs = kyber_queue_debugfs_attrs, | |
1032 | .hctx_debugfs_attrs = kyber_hctx_debugfs_attrs, | |
1033 | #endif | |
00e04393 OS |
1034 | .elevator_attrs = kyber_sched_attrs, |
1035 | .elevator_name = "kyber", | |
b6e68ee8 | 1036 | .elevator_features = ELEVATOR_F_MQ_AWARE, |
00e04393 OS |
1037 | .elevator_owner = THIS_MODULE, |
1038 | }; | |
1039 | ||
1040 | static int __init kyber_init(void) | |
1041 | { | |
1042 | return elv_register(&kyber_sched); | |
1043 | } | |
1044 | ||
1045 | static void __exit kyber_exit(void) | |
1046 | { | |
1047 | elv_unregister(&kyber_sched); | |
1048 | } | |
1049 | ||
1050 | module_init(kyber_init); | |
1051 | module_exit(kyber_exit); | |
1052 | ||
1053 | MODULE_AUTHOR("Omar Sandoval"); | |
1054 | MODULE_LICENSE("GPL"); | |
1055 | MODULE_DESCRIPTION("Kyber I/O scheduler"); |