Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
31e4c28d VG |
2 | #ifndef _BLK_CGROUP_H |
3 | #define _BLK_CGROUP_H | |
4 | /* | |
5 | * Common Block IO controller cgroup interface | |
6 | * | |
7 | * Based on ideas and code from CFQ, CFS and BFQ: | |
8 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | |
9 | * | |
10 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | |
11 | * Paolo Valente <paolo.valente@unimore.it> | |
12 | * | |
13 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | |
14 | * Nauman Rafique <nauman@google.com> | |
15 | */ | |
16 | ||
17 | #include <linux/cgroup.h> | |
f7331648 | 18 | #include <linux/percpu.h> |
24bdb8ef | 19 | #include <linux/percpu_counter.h> |
f7331648 | 20 | #include <linux/u64_stats_sync.h> |
829fdb50 | 21 | #include <linux/seq_file.h> |
a637120e | 22 | #include <linux/radix-tree.h> |
a051661c | 23 | #include <linux/blkdev.h> |
a5049a8a | 24 | #include <linux/atomic.h> |
902ec5b6 | 25 | #include <linux/kthread.h> |
5cdf2e3f | 26 | #include <linux/fs.h> |
31e4c28d | 27 | |
24bdb8ef TH |
28 | /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ |
29 | #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) | |
30 | ||
9355aede VG |
31 | /* Max limits for throttle policy */ |
32 | #define THROTL_IOPS_MAX UINT_MAX | |
33 | ||
f48ec1d7 TH |
34 | #ifdef CONFIG_BLK_CGROUP |
35 | ||
f7331648 TH |
36 | enum blkg_iostat_type { |
37 | BLKG_IOSTAT_READ, | |
38 | BLKG_IOSTAT_WRITE, | |
39 | BLKG_IOSTAT_DISCARD, | |
40 | ||
41 | BLKG_IOSTAT_NR, | |
42 | }; | |
43 | ||
a637120e TH |
44 | struct blkcg_gq; |
45 | ||
3c798398 | 46 | struct blkcg { |
36558c8a TH |
47 | struct cgroup_subsys_state css; |
48 | spinlock_t lock; | |
a637120e TH |
49 | |
50 | struct radix_tree_root blkg_tree; | |
55679c8d | 51 | struct blkcg_gq __rcu *blkg_hint; |
36558c8a | 52 | struct hlist_head blkg_list; |
9a9e8a26 | 53 | |
81437648 | 54 | struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; |
52ebea74 | 55 | |
7876f930 | 56 | struct list_head all_blkcgs_node; |
52ebea74 TH |
57 | #ifdef CONFIG_CGROUP_WRITEBACK |
58 | struct list_head cgwb_list; | |
59b57717 | 59 | refcount_t cgwb_refcnt; |
52ebea74 | 60 | #endif |
31e4c28d VG |
61 | }; |
62 | ||
f7331648 TH |
63 | struct blkg_iostat { |
64 | u64 bytes[BLKG_IOSTAT_NR]; | |
65 | u64 ios[BLKG_IOSTAT_NR]; | |
66 | }; | |
67 | ||
68 | struct blkg_iostat_set { | |
69 | struct u64_stats_sync sync; | |
70 | struct blkg_iostat cur; | |
71 | struct blkg_iostat last; | |
72 | }; | |
73 | ||
f95a04af TH |
74 | /* |
75 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a | |
76 | * request_queue (q). This is used by blkcg policies which need to track | |
77 | * information per blkcg - q pair. | |
78 | * | |
001bea73 TH |
79 | * There can be multiple active blkcg policies and each blkg:policy pair is |
80 | * represented by a blkg_policy_data which is allocated and freed by each | |
81 | * policy's pd_alloc/free_fn() methods. A policy can allocate private data | |
82 | * area by allocating larger data structure which embeds blkg_policy_data | |
83 | * at the beginning. | |
f95a04af | 84 | */ |
0381411e | 85 | struct blkg_policy_data { |
b276a876 | 86 | /* the blkg and policy id this per-policy data belongs to */ |
3c798398 | 87 | struct blkcg_gq *blkg; |
b276a876 | 88 | int plid; |
0381411e TH |
89 | }; |
90 | ||
e48453c3 | 91 | /* |
e4a9bde9 TH |
92 | * Policies that need to keep per-blkcg data which is independent from any |
93 | * request_queue associated to it should implement cpd_alloc/free_fn() | |
94 | * methods. A policy can allocate private data area by allocating larger | |
95 | * data structure which embeds blkcg_policy_data at the beginning. | |
96 | * cpd_init() is invoked to let each policy handle per-blkcg data. | |
e48453c3 AA |
97 | */ |
98 | struct blkcg_policy_data { | |
81437648 TH |
99 | /* the blkcg and policy id this per-policy data belongs to */ |
100 | struct blkcg *blkcg; | |
e48453c3 | 101 | int plid; |
e48453c3 AA |
102 | }; |
103 | ||
3c798398 TH |
104 | /* association between a blk cgroup and a request queue */ |
105 | struct blkcg_gq { | |
c875f4d0 | 106 | /* Pointer to the associated request_queue */ |
36558c8a TH |
107 | struct request_queue *q; |
108 | struct list_head q_node; | |
109 | struct hlist_node blkcg_node; | |
3c798398 | 110 | struct blkcg *blkcg; |
3c547865 | 111 | |
ce7acfea TH |
112 | /* |
113 | * Each blkg gets congested separately and the congestion state is | |
114 | * propagated to the matching bdi_writeback_congested. | |
115 | */ | |
116 | struct bdi_writeback_congested *wb_congested; | |
117 | ||
3c547865 TH |
118 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ |
119 | struct blkcg_gq *parent; | |
120 | ||
1adaf3dd | 121 | /* reference count */ |
7fcf2b03 | 122 | struct percpu_ref refcnt; |
22084190 | 123 | |
f427d909 TH |
124 | /* is this blkg online? protected by both blkcg and q locks */ |
125 | bool online; | |
126 | ||
f7331648 TH |
127 | struct blkg_iostat_set __percpu *iostat_cpu; |
128 | struct blkg_iostat_set iostat; | |
77ea7338 | 129 | |
36558c8a | 130 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
1adaf3dd | 131 | |
d3f77dfd TH |
132 | spinlock_t async_bio_lock; |
133 | struct bio_list async_bios; | |
134 | struct work_struct async_bio_work; | |
d09d8df3 JB |
135 | |
136 | atomic_t use_delay; | |
137 | atomic64_t delay_nsec; | |
138 | atomic64_t delay_start; | |
139 | u64 last_delay; | |
140 | int last_use; | |
d3f77dfd TH |
141 | |
142 | struct rcu_head rcu_head; | |
31e4c28d VG |
143 | }; |
144 | ||
e4a9bde9 | 145 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
81437648 | 146 | typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); |
e4a9bde9 | 147 | typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); |
69d7fde5 | 148 | typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); |
cf09a8ee TH |
149 | typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, |
150 | struct request_queue *q, struct blkcg *blkcg); | |
a9520cd6 TH |
151 | typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); |
152 | typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); | |
153 | typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); | |
001bea73 | 154 | typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); |
a9520cd6 | 155 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); |
903d23f0 JB |
156 | typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, |
157 | size_t size); | |
3e252066 | 158 | |
3c798398 | 159 | struct blkcg_policy { |
36558c8a | 160 | int plid; |
36558c8a | 161 | /* cgroup files for the policy */ |
2ee867dc | 162 | struct cftype *dfl_cftypes; |
880f50e2 | 163 | struct cftype *legacy_cftypes; |
f9fcc2d3 TH |
164 | |
165 | /* operations */ | |
e4a9bde9 | 166 | blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; |
e48453c3 | 167 | blkcg_pol_init_cpd_fn *cpd_init_fn; |
e4a9bde9 | 168 | blkcg_pol_free_cpd_fn *cpd_free_fn; |
69d7fde5 | 169 | blkcg_pol_bind_cpd_fn *cpd_bind_fn; |
e4a9bde9 | 170 | |
001bea73 | 171 | blkcg_pol_alloc_pd_fn *pd_alloc_fn; |
f9fcc2d3 | 172 | blkcg_pol_init_pd_fn *pd_init_fn; |
f427d909 TH |
173 | blkcg_pol_online_pd_fn *pd_online_fn; |
174 | blkcg_pol_offline_pd_fn *pd_offline_fn; | |
001bea73 | 175 | blkcg_pol_free_pd_fn *pd_free_fn; |
f9fcc2d3 | 176 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
903d23f0 | 177 | blkcg_pol_stat_pd_fn *pd_stat_fn; |
3e252066 VG |
178 | }; |
179 | ||
3c798398 | 180 | extern struct blkcg blkcg_root; |
496d5e75 | 181 | extern struct cgroup_subsys_state * const blkcg_root_css; |
07b0fdec | 182 | extern bool blkcg_debug_stats; |
36558c8a | 183 | |
24f29046 TH |
184 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, |
185 | struct request_queue *q, bool update_hint); | |
b978962a DZ |
186 | struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, |
187 | struct request_queue *q); | |
3c798398 TH |
188 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
189 | struct request_queue *q); | |
36558c8a | 190 | int blkcg_init_queue(struct request_queue *q); |
36558c8a | 191 | void blkcg_exit_queue(struct request_queue *q); |
5efd6113 | 192 | |
3e252066 | 193 | /* Blkio controller policy registration */ |
d5bf0291 | 194 | int blkcg_policy_register(struct blkcg_policy *pol); |
3c798398 | 195 | void blkcg_policy_unregister(struct blkcg_policy *pol); |
36558c8a | 196 | int blkcg_activate_policy(struct request_queue *q, |
3c798398 | 197 | const struct blkcg_policy *pol); |
36558c8a | 198 | void blkcg_deactivate_policy(struct request_queue *q, |
3c798398 | 199 | const struct blkcg_policy *pol); |
3e252066 | 200 | |
dd165eb3 | 201 | const char *blkg_dev_name(struct blkcg_gq *blkg); |
3c798398 | 202 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
f95a04af TH |
203 | u64 (*prfill)(struct seq_file *, |
204 | struct blkg_policy_data *, int), | |
3c798398 | 205 | const struct blkcg_policy *pol, int data, |
ec399347 | 206 | bool show_total); |
f95a04af | 207 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); |
16b3de66 | 208 | |
829fdb50 | 209 | struct blkg_conf_ctx { |
36558c8a | 210 | struct gendisk *disk; |
3c798398 | 211 | struct blkcg_gq *blkg; |
36aa9e5f | 212 | char *body; |
829fdb50 TH |
213 | }; |
214 | ||
015d254c | 215 | struct gendisk *blkcg_conf_get_disk(char **inputp); |
3c798398 | 216 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
36aa9e5f | 217 | char *input, struct blkg_conf_ctx *ctx); |
829fdb50 TH |
218 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
219 | ||
0fe061b9 DZ |
220 | /** |
221 | * blkcg_css - find the current css | |
222 | * | |
223 | * Find the css associated with either the kthread or the current task. | |
224 | * This may return a dying css, so it is up to the caller to use tryget logic | |
225 | * to confirm it is alive and well. | |
226 | */ | |
227 | static inline struct cgroup_subsys_state *blkcg_css(void) | |
228 | { | |
229 | struct cgroup_subsys_state *css; | |
230 | ||
231 | css = kthread_blkcg(); | |
232 | if (css) | |
233 | return css; | |
234 | return task_css(current, io_cgrp_id); | |
235 | } | |
236 | ||
a7c6d554 TH |
237 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
238 | { | |
239 | return css ? container_of(css, struct blkcg, css) : NULL; | |
240 | } | |
241 | ||
0fe061b9 DZ |
242 | /** |
243 | * __bio_blkcg - internal, inconsistent version to get blkcg | |
244 | * | |
245 | * DO NOT USE. | |
246 | * This function is inconsistent and consequently is dangerous to use. The | |
247 | * first part of the function returns a blkcg where a reference is owned by the | |
248 | * bio. This means it does not need to be rcu protected as it cannot go away | |
249 | * with the bio owning a reference to it. However, the latter potentially gets | |
250 | * it from task_css(). This can race against task migration and the cgroup | |
251 | * dying. It is also semantically different as it must be called rcu protected | |
252 | * and is susceptible to failure when trying to get a reference to it. | |
253 | * Therefore, it is not ok to assume that *_get() will always succeed on the | |
254 | * blkcg returned here. | |
255 | */ | |
256 | static inline struct blkcg *__bio_blkcg(struct bio *bio) | |
27e6fa99 | 257 | { |
db6638d7 DZ |
258 | if (bio && bio->bi_blkg) |
259 | return bio->bi_blkg->blkcg; | |
0fe061b9 DZ |
260 | return css_to_blkcg(blkcg_css()); |
261 | } | |
b5f2954d | 262 | |
0fe061b9 DZ |
263 | /** |
264 | * bio_blkcg - grab the blkcg associated with a bio | |
265 | * @bio: target bio | |
266 | * | |
267 | * This returns the blkcg associated with a bio, %NULL if not associated. | |
268 | * Callers are expected to either handle %NULL or know association has been | |
269 | * done prior to calling this. | |
270 | */ | |
271 | static inline struct blkcg *bio_blkcg(struct bio *bio) | |
272 | { | |
db6638d7 DZ |
273 | if (bio && bio->bi_blkg) |
274 | return bio->bi_blkg->blkcg; | |
0fe061b9 | 275 | return NULL; |
fd383c2d TH |
276 | } |
277 | ||
d09d8df3 JB |
278 | static inline bool blk_cgroup_congested(void) |
279 | { | |
280 | struct cgroup_subsys_state *css; | |
281 | bool ret = false; | |
282 | ||
283 | rcu_read_lock(); | |
284 | css = kthread_blkcg(); | |
285 | if (!css) | |
286 | css = task_css(current, io_cgrp_id); | |
287 | while (css) { | |
288 | if (atomic_read(&css->cgroup->congestion_count)) { | |
289 | ret = true; | |
290 | break; | |
291 | } | |
292 | css = css->parent; | |
293 | } | |
294 | rcu_read_unlock(); | |
295 | return ret; | |
296 | } | |
297 | ||
c7c98fd3 JB |
298 | /** |
299 | * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg | |
300 | * @return: true if this bio needs to be submitted with the root blkg context. | |
301 | * | |
302 | * In order to avoid priority inversions we sometimes need to issue a bio as if | |
303 | * it were attached to the root blkg, and then backcharge to the actual owning | |
304 | * blkg. The idea is we do bio_blkcg() to look up the actual context for the | |
305 | * bio and attach the appropriate blkg to the bio. Then we call this helper and | |
306 | * if it is true run with the root blkg for that queue and then do any | |
307 | * backcharging to the originating cgroup once the io is complete. | |
308 | */ | |
309 | static inline bool bio_issue_as_root_blkg(struct bio *bio) | |
310 | { | |
0d1e0c7c | 311 | return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; |
c7c98fd3 JB |
312 | } |
313 | ||
3c547865 TH |
314 | /** |
315 | * blkcg_parent - get the parent of a blkcg | |
316 | * @blkcg: blkcg of interest | |
317 | * | |
318 | * Return the parent blkcg of @blkcg. Can be called anytime. | |
319 | */ | |
320 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | |
321 | { | |
5c9d535b | 322 | return css_to_blkcg(blkcg->css.parent); |
3c547865 TH |
323 | } |
324 | ||
24f29046 TH |
325 | /** |
326 | * __blkg_lookup - internal version of blkg_lookup() | |
327 | * @blkcg: blkcg of interest | |
328 | * @q: request_queue of interest | |
329 | * @update_hint: whether to update lookup hint with the result or not | |
330 | * | |
331 | * This is internal version and shouldn't be used by policy | |
332 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of | |
333 | * @q's bypass state. If @update_hint is %true, the caller should be | |
334 | * holding @q->queue_lock and lookup hint is updated on success. | |
335 | */ | |
336 | static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | |
337 | struct request_queue *q, | |
338 | bool update_hint) | |
339 | { | |
340 | struct blkcg_gq *blkg; | |
341 | ||
85b6bc9d TH |
342 | if (blkcg == &blkcg_root) |
343 | return q->root_blkg; | |
344 | ||
24f29046 TH |
345 | blkg = rcu_dereference(blkcg->blkg_hint); |
346 | if (blkg && blkg->q == q) | |
347 | return blkg; | |
348 | ||
349 | return blkg_lookup_slowpath(blkcg, q, update_hint); | |
350 | } | |
351 | ||
352 | /** | |
353 | * blkg_lookup - lookup blkg for the specified blkcg - q pair | |
354 | * @blkcg: blkcg of interest | |
355 | * @q: request_queue of interest | |
356 | * | |
357 | * Lookup blkg for the @blkcg - @q pair. This function should be called | |
012d4a65 | 358 | * under RCU read lock. |
24f29046 TH |
359 | */ |
360 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, | |
361 | struct request_queue *q) | |
362 | { | |
363 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
24f29046 TH |
364 | return __blkg_lookup(blkcg, q, false); |
365 | } | |
366 | ||
6bad9b21 | 367 | /** |
b86d865c | 368 | * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair |
6bad9b21 BVA |
369 | * @q: request_queue of interest |
370 | * | |
371 | * Lookup blkg for @q at the root level. See also blkg_lookup(). | |
372 | */ | |
b86d865c | 373 | static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) |
6bad9b21 | 374 | { |
b86d865c | 375 | return q->root_blkg; |
6bad9b21 BVA |
376 | } |
377 | ||
0381411e TH |
378 | /** |
379 | * blkg_to_pdata - get policy private data | |
380 | * @blkg: blkg of interest | |
381 | * @pol: policy of interest | |
382 | * | |
383 | * Return pointer to private data associated with the @blkg-@pol pair. | |
384 | */ | |
f95a04af TH |
385 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
386 | struct blkcg_policy *pol) | |
0381411e | 387 | { |
f95a04af | 388 | return blkg ? blkg->pd[pol->plid] : NULL; |
0381411e TH |
389 | } |
390 | ||
e48453c3 AA |
391 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, |
392 | struct blkcg_policy *pol) | |
393 | { | |
81437648 | 394 | return blkcg ? blkcg->cpd[pol->plid] : NULL; |
e48453c3 AA |
395 | } |
396 | ||
0381411e TH |
397 | /** |
398 | * pdata_to_blkg - get blkg associated with policy private data | |
f95a04af | 399 | * @pd: policy private data of interest |
0381411e | 400 | * |
f95a04af | 401 | * @pd is policy private data. Determine the blkg it's associated with. |
0381411e | 402 | */ |
f95a04af | 403 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) |
0381411e | 404 | { |
f95a04af | 405 | return pd ? pd->blkg : NULL; |
0381411e TH |
406 | } |
407 | ||
81437648 TH |
408 | static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) |
409 | { | |
410 | return cpd ? cpd->blkcg : NULL; | |
411 | } | |
412 | ||
59b57717 DZF |
413 | extern void blkcg_destroy_blkgs(struct blkcg *blkcg); |
414 | ||
415 | #ifdef CONFIG_CGROUP_WRITEBACK | |
416 | ||
417 | /** | |
418 | * blkcg_cgwb_get - get a reference for blkcg->cgwb_list | |
419 | * @blkcg: blkcg of interest | |
420 | * | |
421 | * This is used to track the number of active wb's related to a blkcg. | |
422 | */ | |
423 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) | |
424 | { | |
425 | refcount_inc(&blkcg->cgwb_refcnt); | |
426 | } | |
427 | ||
428 | /** | |
429 | * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list | |
430 | * @blkcg: blkcg of interest | |
431 | * | |
432 | * This is used to track the number of active wb's related to a blkcg. | |
433 | * When this count goes to zero, all active wb has finished so the | |
434 | * blkcg can continue destruction by calling blkcg_destroy_blkgs(). | |
435 | * This work may occur in cgwb_release_workfn() on the cgwb_release | |
436 | * workqueue. | |
437 | */ | |
438 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | |
439 | { | |
440 | if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) | |
441 | blkcg_destroy_blkgs(blkcg); | |
442 | } | |
443 | ||
444 | #else | |
445 | ||
446 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } | |
447 | ||
448 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | |
449 | { | |
450 | /* wb isn't being accounted, so trigger destruction right away */ | |
451 | blkcg_destroy_blkgs(blkcg); | |
452 | } | |
453 | ||
454 | #endif | |
455 | ||
54e7ed12 TH |
456 | /** |
457 | * blkg_path - format cgroup path of blkg | |
458 | * @blkg: blkg of interest | |
459 | * @buf: target buffer | |
460 | * @buflen: target buffer length | |
461 | * | |
462 | * Format the path of the cgroup of @blkg into @buf. | |
463 | */ | |
3c798398 | 464 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) |
afc24d49 | 465 | { |
4c737b41 | 466 | return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); |
afc24d49 VG |
467 | } |
468 | ||
1adaf3dd TH |
469 | /** |
470 | * blkg_get - get a blkg reference | |
471 | * @blkg: blkg to get | |
472 | * | |
a5049a8a | 473 | * The caller should be holding an existing reference. |
1adaf3dd | 474 | */ |
3c798398 | 475 | static inline void blkg_get(struct blkcg_gq *blkg) |
1adaf3dd | 476 | { |
7fcf2b03 | 477 | percpu_ref_get(&blkg->refcnt); |
1adaf3dd TH |
478 | } |
479 | ||
d09d8df3 | 480 | /** |
7754f669 | 481 | * blkg_tryget - try and get a blkg reference |
d09d8df3 JB |
482 | * @blkg: blkg to get |
483 | * | |
484 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst | |
485 | * of freeing this blkg, so we can only use it if the refcnt is not zero. | |
486 | */ | |
7754f669 | 487 | static inline bool blkg_tryget(struct blkcg_gq *blkg) |
d09d8df3 | 488 | { |
6ab21879 | 489 | return blkg && percpu_ref_tryget(&blkg->refcnt); |
d09d8df3 JB |
490 | } |
491 | ||
beea9da0 | 492 | /** |
7754f669 | 493 | * blkg_tryget_closest - try and get a blkg ref on the closet blkg |
beea9da0 DZ |
494 | * @blkg: blkg to get |
495 | * | |
6ab21879 DZ |
496 | * This needs to be called rcu protected. As the failure mode here is to walk |
497 | * up the blkg tree, this ensure that the blkg->parent pointers are always | |
498 | * valid. This returns the blkg that it ended up taking a reference on or %NULL | |
499 | * if no reference was taken. | |
beea9da0 | 500 | */ |
7754f669 | 501 | static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) |
beea9da0 | 502 | { |
6ab21879 DZ |
503 | struct blkcg_gq *ret_blkg = NULL; |
504 | ||
505 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
506 | ||
507 | while (blkg) { | |
508 | if (blkg_tryget(blkg)) { | |
509 | ret_blkg = blkg; | |
510 | break; | |
511 | } | |
beea9da0 | 512 | blkg = blkg->parent; |
6ab21879 | 513 | } |
beea9da0 | 514 | |
6ab21879 | 515 | return ret_blkg; |
beea9da0 | 516 | } |
07b05bcc | 517 | |
1adaf3dd TH |
518 | /** |
519 | * blkg_put - put a blkg reference | |
520 | * @blkg: blkg to put | |
1adaf3dd | 521 | */ |
3c798398 | 522 | static inline void blkg_put(struct blkcg_gq *blkg) |
1adaf3dd | 523 | { |
7fcf2b03 | 524 | percpu_ref_put(&blkg->refcnt); |
1adaf3dd TH |
525 | } |
526 | ||
dd4a4ffc TH |
527 | /** |
528 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | |
529 | * @d_blkg: loop cursor pointing to the current descendant | |
492eb21b | 530 | * @pos_css: used for iteration |
dd4a4ffc TH |
531 | * @p_blkg: target blkg to walk descendants of |
532 | * | |
533 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | |
534 | * read locked. If called under either blkcg or queue lock, the iteration | |
535 | * is guaranteed to include all and only online blkgs. The caller may | |
492eb21b | 536 | * update @pos_css by calling css_rightmost_descendant() to skip subtree. |
bd8815a6 | 537 | * @p_blkg is included in the iteration and the first node to be visited. |
dd4a4ffc | 538 | */ |
492eb21b TH |
539 | #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ |
540 | css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ | |
541 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ | |
dd4a4ffc TH |
542 | (p_blkg)->q, false))) |
543 | ||
aa539cb3 TH |
544 | /** |
545 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants | |
546 | * @d_blkg: loop cursor pointing to the current descendant | |
492eb21b | 547 | * @pos_css: used for iteration |
aa539cb3 TH |
548 | * @p_blkg: target blkg to walk descendants of |
549 | * | |
550 | * Similar to blkg_for_each_descendant_pre() but performs post-order | |
bd8815a6 TH |
551 | * traversal instead. Synchronization rules are the same. @p_blkg is |
552 | * included in the iteration and the last node to be visited. | |
aa539cb3 | 553 | */ |
492eb21b TH |
554 | #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ |
555 | css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ | |
556 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ | |
aa539cb3 TH |
557 | (p_blkg)->q, false))) |
558 | ||
ae118896 TH |
559 | #ifdef CONFIG_BLK_DEV_THROTTLING |
560 | extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | |
561 | struct bio *bio); | |
562 | #else | |
563 | static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | |
564 | struct bio *bio) { return false; } | |
565 | #endif | |
566 | ||
d3f77dfd TH |
567 | bool __blkcg_punt_bio_submit(struct bio *bio); |
568 | ||
569 | static inline bool blkcg_punt_bio_submit(struct bio *bio) | |
570 | { | |
571 | if (bio->bi_opf & REQ_CGROUP_PUNT) | |
572 | return __blkcg_punt_bio_submit(bio); | |
573 | else | |
574 | return false; | |
575 | } | |
e439bedf DZ |
576 | |
577 | static inline void blkcg_bio_issue_init(struct bio *bio) | |
578 | { | |
579 | bio_issue_init(&bio->bi_issue, bio_sectors(bio)); | |
580 | } | |
581 | ||
ae118896 TH |
582 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
583 | struct bio *bio) | |
584 | { | |
ae118896 TH |
585 | struct blkcg_gq *blkg; |
586 | bool throtl = false; | |
587 | ||
4705de73 DZ |
588 | rcu_read_lock(); |
589 | ||
5cdf2e3f DZ |
590 | if (!bio->bi_blkg) { |
591 | char b[BDEVNAME_SIZE]; | |
592 | ||
593 | WARN_ONCE(1, | |
594 | "no blkg associated for bio on block-device: %s\n", | |
595 | bio_devname(bio, b)); | |
596 | bio_associate_blkg(bio); | |
597 | } | |
b5f2954d | 598 | |
5cdf2e3f | 599 | blkg = bio->bi_blkg; |
ae118896 TH |
600 | |
601 | throtl = blk_throtl_bio(q, blkg, bio); | |
602 | ||
77ea7338 | 603 | if (!throtl) { |
f7331648 TH |
604 | struct blkg_iostat_set *bis; |
605 | int rwd, cpu; | |
606 | ||
607 | if (op_is_discard(bio->bi_opf)) | |
608 | rwd = BLKG_IOSTAT_DISCARD; | |
609 | else if (op_is_write(bio->bi_opf)) | |
610 | rwd = BLKG_IOSTAT_WRITE; | |
611 | else | |
612 | rwd = BLKG_IOSTAT_READ; | |
613 | ||
614 | cpu = get_cpu(); | |
615 | bis = per_cpu_ptr(blkg->iostat_cpu, cpu); | |
616 | u64_stats_update_begin(&bis->sync); | |
617 | ||
c454edc2 JB |
618 | /* |
619 | * If the bio is flagged with BIO_QUEUE_ENTERED it means this | |
620 | * is a split bio and we would have already accounted for the | |
621 | * size of the bio. | |
622 | */ | |
623 | if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) | |
f7331648 TH |
624 | bis->cur.bytes[rwd] += bio->bi_iter.bi_size; |
625 | bis->cur.ios[rwd]++; | |
626 | ||
627 | u64_stats_update_end(&bis->sync); | |
496074f9 TH |
628 | if (cgroup_subsys_on_dfl(io_cgrp_subsys)) |
629 | cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); | |
f7331648 | 630 | put_cpu(); |
77ea7338 TH |
631 | } |
632 | ||
e439bedf DZ |
633 | blkcg_bio_issue_init(bio); |
634 | ||
4705de73 | 635 | rcu_read_unlock(); |
ae118896 TH |
636 | return !throtl; |
637 | } | |
638 | ||
d09d8df3 JB |
639 | static inline void blkcg_use_delay(struct blkcg_gq *blkg) |
640 | { | |
641 | if (atomic_add_return(1, &blkg->use_delay) == 1) | |
642 | atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); | |
643 | } | |
644 | ||
645 | static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) | |
646 | { | |
647 | int old = atomic_read(&blkg->use_delay); | |
648 | ||
649 | if (old == 0) | |
650 | return 0; | |
651 | ||
652 | /* | |
653 | * We do this song and dance because we can race with somebody else | |
654 | * adding or removing delay. If we just did an atomic_dec we'd end up | |
655 | * negative and we'd already be in trouble. We need to subtract 1 and | |
656 | * then check to see if we were the last delay so we can drop the | |
657 | * congestion count on the cgroup. | |
658 | */ | |
659 | while (old) { | |
660 | int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); | |
661 | if (cur == old) | |
662 | break; | |
663 | old = cur; | |
664 | } | |
665 | ||
666 | if (old == 0) | |
667 | return 0; | |
668 | if (old == 1) | |
669 | atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); | |
670 | return 1; | |
671 | } | |
672 | ||
673 | static inline void blkcg_clear_delay(struct blkcg_gq *blkg) | |
674 | { | |
675 | int old = atomic_read(&blkg->use_delay); | |
676 | if (!old) | |
677 | return; | |
678 | /* We only want 1 person clearing the congestion count for this blkg. */ | |
679 | while (old) { | |
680 | int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); | |
681 | if (cur == old) { | |
682 | atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); | |
683 | break; | |
684 | } | |
685 | old = cur; | |
686 | } | |
687 | } | |
688 | ||
689 | void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); | |
690 | void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); | |
691 | void blkcg_maybe_throttle_current(void); | |
36558c8a TH |
692 | #else /* CONFIG_BLK_CGROUP */ |
693 | ||
efa7d1c7 TH |
694 | struct blkcg { |
695 | }; | |
2f5ea477 | 696 | |
f95a04af TH |
697 | struct blkg_policy_data { |
698 | }; | |
699 | ||
e48453c3 AA |
700 | struct blkcg_policy_data { |
701 | }; | |
702 | ||
3c798398 | 703 | struct blkcg_gq { |
2f5ea477 JA |
704 | }; |
705 | ||
3c798398 | 706 | struct blkcg_policy { |
3e252066 VG |
707 | }; |
708 | ||
496d5e75 TH |
709 | #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) |
710 | ||
d09d8df3 JB |
711 | static inline void blkcg_maybe_throttle_current(void) { } |
712 | static inline bool blk_cgroup_congested(void) { return false; } | |
713 | ||
efa7d1c7 TH |
714 | #ifdef CONFIG_BLOCK |
715 | ||
d09d8df3 JB |
716 | static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } |
717 | ||
3c798398 | 718 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
b86d865c BVA |
719 | static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) |
720 | { return NULL; } | |
5efd6113 | 721 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } |
5efd6113 | 722 | static inline void blkcg_exit_queue(struct request_queue *q) { } |
d5bf0291 | 723 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } |
3c798398 | 724 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } |
a2b1693b | 725 | static inline int blkcg_activate_policy(struct request_queue *q, |
3c798398 | 726 | const struct blkcg_policy *pol) { return 0; } |
a2b1693b | 727 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
3c798398 TH |
728 | const struct blkcg_policy *pol) { } |
729 | ||
0fe061b9 | 730 | static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } |
b1208b56 | 731 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
a051661c | 732 | |
f95a04af TH |
733 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
734 | struct blkcg_policy *pol) { return NULL; } | |
735 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | |
3c798398 TH |
736 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } |
737 | static inline void blkg_get(struct blkcg_gq *blkg) { } | |
738 | static inline void blkg_put(struct blkcg_gq *blkg) { } | |
afc24d49 | 739 | |
d3f77dfd | 740 | static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } |
e439bedf | 741 | static inline void blkcg_bio_issue_init(struct bio *bio) { } |
ae118896 TH |
742 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
743 | struct bio *bio) { return true; } | |
744 | ||
a051661c TH |
745 | #define blk_queue_for_each_rl(rl, q) \ |
746 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | |
747 | ||
efa7d1c7 | 748 | #endif /* CONFIG_BLOCK */ |
36558c8a TH |
749 | #endif /* CONFIG_BLK_CGROUP */ |
750 | #endif /* _BLK_CGROUP_H */ |