cfq-iosched: enable full blkcg hierarchy support
[linux-2.6-block.git] / block / blk-cgroup.h
CommitLineData
31e4c28d
VG
1#ifndef _BLK_CGROUP_H
2#define _BLK_CGROUP_H
3/*
4 * Common Block IO controller cgroup interface
5 *
6 * Based on ideas and code from CFQ, CFS and BFQ:
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 *
9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
10 * Paolo Valente <paolo.valente@unimore.it>
11 *
12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
13 * Nauman Rafique <nauman@google.com>
14 */
15
16#include <linux/cgroup.h>
575969a0 17#include <linux/u64_stats_sync.h>
829fdb50 18#include <linux/seq_file.h>
a637120e 19#include <linux/radix-tree.h>
a051661c 20#include <linux/blkdev.h>
31e4c28d 21
9355aede
VG
22/* Max limits for throttle policy */
23#define THROTL_IOPS_MAX UINT_MAX
24
3381cb8d
TH
25/* CFQ specific, out here for blkcg->cfq_weight */
26#define CFQ_WEIGHT_MIN 10
27#define CFQ_WEIGHT_MAX 1000
28#define CFQ_WEIGHT_DEFAULT 500
29
f48ec1d7
TH
30#ifdef CONFIG_BLK_CGROUP
31
edcb0722
TH
32enum blkg_rwstat_type {
33 BLKG_RWSTAT_READ,
34 BLKG_RWSTAT_WRITE,
35 BLKG_RWSTAT_SYNC,
36 BLKG_RWSTAT_ASYNC,
37
38 BLKG_RWSTAT_NR,
39 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
303a3acb
DS
40};
41
a637120e
TH
42struct blkcg_gq;
43
3c798398 44struct blkcg {
36558c8a
TH
45 struct cgroup_subsys_state css;
46 spinlock_t lock;
a637120e
TH
47
48 struct radix_tree_root blkg_tree;
49 struct blkcg_gq *blkg_hint;
36558c8a 50 struct hlist_head blkg_list;
9a9e8a26
TH
51
52 /* for policies to test whether associated blkcg has changed */
36558c8a 53 uint64_t id;
3381cb8d 54
3c798398 55 /* TODO: per-policy storage in blkcg */
36558c8a 56 unsigned int cfq_weight; /* belongs to cfq */
e71357e1 57 unsigned int cfq_leaf_weight;
31e4c28d
VG
58};
59
edcb0722
TH
60struct blkg_stat {
61 struct u64_stats_sync syncp;
62 uint64_t cnt;
63};
64
65struct blkg_rwstat {
66 struct u64_stats_sync syncp;
67 uint64_t cnt[BLKG_RWSTAT_NR];
68};
69
f95a04af
TH
70/*
71 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
72 * request_queue (q). This is used by blkcg policies which need to track
73 * information per blkcg - q pair.
74 *
75 * There can be multiple active blkcg policies and each has its private
76 * data on each blkg, the size of which is determined by
77 * blkcg_policy->pd_size. blkcg core allocates and frees such areas
78 * together with blkg and invokes pd_init/exit_fn() methods.
79 *
80 * Such private data must embed struct blkg_policy_data (pd) at the
81 * beginning and pd_size can't be smaller than pd.
82 */
0381411e
TH
83struct blkg_policy_data {
84 /* the blkg this per-policy data belongs to */
3c798398 85 struct blkcg_gq *blkg;
0381411e 86
a2b1693b 87 /* used during policy activation */
36558c8a 88 struct list_head alloc_node;
0381411e
TH
89};
90
3c798398
TH
91/* association between a blk cgroup and a request queue */
92struct blkcg_gq {
c875f4d0 93 /* Pointer to the associated request_queue */
36558c8a
TH
94 struct request_queue *q;
95 struct list_head q_node;
96 struct hlist_node blkcg_node;
3c798398 97 struct blkcg *blkcg;
3c547865
TH
98
99 /* all non-root blkcg_gq's are guaranteed to have access to parent */
100 struct blkcg_gq *parent;
101
a051661c
TH
102 /* request allocation list for this blkcg-q pair */
103 struct request_list rl;
3c547865 104
1adaf3dd 105 /* reference count */
36558c8a 106 int refcnt;
22084190 107
36558c8a 108 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
1adaf3dd 109
36558c8a 110 struct rcu_head rcu_head;
31e4c28d
VG
111};
112
3c798398
TH
113typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
114typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
115typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
3e252066 116
3c798398 117struct blkcg_policy {
36558c8a
TH
118 int plid;
119 /* policy specific private data size */
f95a04af 120 size_t pd_size;
36558c8a
TH
121 /* cgroup files for the policy */
122 struct cftype *cftypes;
f9fcc2d3
TH
123
124 /* operations */
125 blkcg_pol_init_pd_fn *pd_init_fn;
126 blkcg_pol_exit_pd_fn *pd_exit_fn;
127 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
3e252066
VG
128};
129
3c798398 130extern struct blkcg blkcg_root;
36558c8a 131
3c798398
TH
132struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
133struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
134 struct request_queue *q);
36558c8a
TH
135int blkcg_init_queue(struct request_queue *q);
136void blkcg_drain_queue(struct request_queue *q);
137void blkcg_exit_queue(struct request_queue *q);
5efd6113 138
3e252066 139/* Blkio controller policy registration */
3c798398
TH
140int blkcg_policy_register(struct blkcg_policy *pol);
141void blkcg_policy_unregister(struct blkcg_policy *pol);
36558c8a 142int blkcg_activate_policy(struct request_queue *q,
3c798398 143 const struct blkcg_policy *pol);
36558c8a 144void blkcg_deactivate_policy(struct request_queue *q,
3c798398 145 const struct blkcg_policy *pol);
3e252066 146
3c798398 147void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
f95a04af
TH
148 u64 (*prfill)(struct seq_file *,
149 struct blkg_policy_data *, int),
3c798398 150 const struct blkcg_policy *pol, int data,
ec399347 151 bool show_total);
f95a04af
TH
152u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
153u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
829fdb50 154 const struct blkg_rwstat *rwstat);
f95a04af
TH
155u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
156u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
157 int off);
829fdb50
TH
158
159struct blkg_conf_ctx {
36558c8a 160 struct gendisk *disk;
3c798398 161 struct blkcg_gq *blkg;
36558c8a 162 u64 v;
829fdb50
TH
163};
164
3c798398
TH
165int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
166 const char *input, struct blkg_conf_ctx *ctx);
829fdb50
TH
167void blkg_conf_finish(struct blkg_conf_ctx *ctx);
168
169
b1208b56
TH
170static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
171{
172 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
173 struct blkcg, css);
174}
175
176static inline struct blkcg *task_blkcg(struct task_struct *tsk)
177{
178 return container_of(task_subsys_state(tsk, blkio_subsys_id),
179 struct blkcg, css);
180}
181
182static inline struct blkcg *bio_blkcg(struct bio *bio)
183{
184 if (bio && bio->bi_css)
185 return container_of(bio->bi_css, struct blkcg, css);
186 return task_blkcg(current);
187}
188
3c547865
TH
189/**
190 * blkcg_parent - get the parent of a blkcg
191 * @blkcg: blkcg of interest
192 *
193 * Return the parent blkcg of @blkcg. Can be called anytime.
194 */
195static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
196{
197 struct cgroup *pcg = blkcg->css.cgroup->parent;
198
199 return pcg ? cgroup_to_blkcg(pcg) : NULL;
200}
201
0381411e
TH
202/**
203 * blkg_to_pdata - get policy private data
204 * @blkg: blkg of interest
205 * @pol: policy of interest
206 *
207 * Return pointer to private data associated with the @blkg-@pol pair.
208 */
f95a04af
TH
209static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
210 struct blkcg_policy *pol)
0381411e 211{
f95a04af 212 return blkg ? blkg->pd[pol->plid] : NULL;
0381411e
TH
213}
214
215/**
216 * pdata_to_blkg - get blkg associated with policy private data
f95a04af 217 * @pd: policy private data of interest
0381411e 218 *
f95a04af 219 * @pd is policy private data. Determine the blkg it's associated with.
0381411e 220 */
f95a04af 221static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
0381411e 222{
f95a04af 223 return pd ? pd->blkg : NULL;
0381411e
TH
224}
225
54e7ed12
TH
226/**
227 * blkg_path - format cgroup path of blkg
228 * @blkg: blkg of interest
229 * @buf: target buffer
230 * @buflen: target buffer length
231 *
232 * Format the path of the cgroup of @blkg into @buf.
233 */
3c798398 234static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
afc24d49 235{
54e7ed12
TH
236 int ret;
237
238 rcu_read_lock();
239 ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
240 rcu_read_unlock();
241 if (ret)
242 strncpy(buf, "<unavailable>", buflen);
243 return ret;
afc24d49
VG
244}
245
1adaf3dd
TH
246/**
247 * blkg_get - get a blkg reference
248 * @blkg: blkg to get
249 *
250 * The caller should be holding queue_lock and an existing reference.
251 */
3c798398 252static inline void blkg_get(struct blkcg_gq *blkg)
1adaf3dd
TH
253{
254 lockdep_assert_held(blkg->q->queue_lock);
255 WARN_ON_ONCE(!blkg->refcnt);
256 blkg->refcnt++;
257}
258
3c798398 259void __blkg_release(struct blkcg_gq *blkg);
1adaf3dd
TH
260
261/**
262 * blkg_put - put a blkg reference
263 * @blkg: blkg to put
264 *
265 * The caller should be holding queue_lock.
266 */
3c798398 267static inline void blkg_put(struct blkcg_gq *blkg)
1adaf3dd
TH
268{
269 lockdep_assert_held(blkg->q->queue_lock);
270 WARN_ON_ONCE(blkg->refcnt <= 0);
271 if (!--blkg->refcnt)
272 __blkg_release(blkg);
273}
274
a051661c
TH
275/**
276 * blk_get_rl - get request_list to use
277 * @q: request_queue of interest
278 * @bio: bio which will be attached to the allocated request (may be %NULL)
279 *
280 * The caller wants to allocate a request from @q to use for @bio. Find
281 * the request_list to use and obtain a reference on it. Should be called
282 * under queue_lock. This function is guaranteed to return non-%NULL
283 * request_list.
284 */
285static inline struct request_list *blk_get_rl(struct request_queue *q,
286 struct bio *bio)
287{
288 struct blkcg *blkcg;
289 struct blkcg_gq *blkg;
290
291 rcu_read_lock();
292
293 blkcg = bio_blkcg(bio);
294
295 /* bypass blkg lookup and use @q->root_rl directly for root */
296 if (blkcg == &blkcg_root)
297 goto root_rl;
298
299 /*
300 * Try to use blkg->rl. blkg lookup may fail under memory pressure
301 * or if either the blkcg or queue is going away. Fall back to
302 * root_rl in such cases.
303 */
304 blkg = blkg_lookup_create(blkcg, q);
305 if (unlikely(IS_ERR(blkg)))
306 goto root_rl;
307
308 blkg_get(blkg);
309 rcu_read_unlock();
310 return &blkg->rl;
311root_rl:
312 rcu_read_unlock();
313 return &q->root_rl;
314}
315
316/**
317 * blk_put_rl - put request_list
318 * @rl: request_list to put
319 *
320 * Put the reference acquired by blk_get_rl(). Should be called under
321 * queue_lock.
322 */
323static inline void blk_put_rl(struct request_list *rl)
324{
325 /* root_rl may not have blkg set */
326 if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
327 blkg_put(rl->blkg);
328}
329
330/**
331 * blk_rq_set_rl - associate a request with a request_list
332 * @rq: request of interest
333 * @rl: target request_list
334 *
335 * Associate @rq with @rl so that accounting and freeing can know the
336 * request_list @rq came from.
337 */
338static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
339{
340 rq->rl = rl;
341}
342
343/**
344 * blk_rq_rl - return the request_list a request came from
345 * @rq: request of interest
346 *
347 * Return the request_list @rq is allocated from.
348 */
349static inline struct request_list *blk_rq_rl(struct request *rq)
350{
351 return rq->rl;
352}
353
354struct request_list *__blk_queue_next_rl(struct request_list *rl,
355 struct request_queue *q);
356/**
357 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
358 *
359 * Should be used under queue_lock.
360 */
361#define blk_queue_for_each_rl(rl, q) \
362 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
363
edcb0722
TH
364/**
365 * blkg_stat_add - add a value to a blkg_stat
366 * @stat: target blkg_stat
367 * @val: value to add
368 *
369 * Add @val to @stat. The caller is responsible for synchronizing calls to
370 * this function.
371 */
372static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
373{
374 u64_stats_update_begin(&stat->syncp);
375 stat->cnt += val;
376 u64_stats_update_end(&stat->syncp);
377}
378
379/**
380 * blkg_stat_read - read the current value of a blkg_stat
381 * @stat: blkg_stat to read
382 *
383 * Read the current value of @stat. This function can be called without
384 * synchroniztion and takes care of u64 atomicity.
385 */
386static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
387{
388 unsigned int start;
389 uint64_t v;
390
391 do {
392 start = u64_stats_fetch_begin(&stat->syncp);
393 v = stat->cnt;
394 } while (u64_stats_fetch_retry(&stat->syncp, start));
395
396 return v;
397}
398
399/**
400 * blkg_stat_reset - reset a blkg_stat
401 * @stat: blkg_stat to reset
402 */
403static inline void blkg_stat_reset(struct blkg_stat *stat)
404{
405 stat->cnt = 0;
406}
407
408/**
409 * blkg_rwstat_add - add a value to a blkg_rwstat
410 * @rwstat: target blkg_rwstat
411 * @rw: mask of REQ_{WRITE|SYNC}
412 * @val: value to add
413 *
414 * Add @val to @rwstat. The counters are chosen according to @rw. The
415 * caller is responsible for synchronizing calls to this function.
416 */
417static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
418 int rw, uint64_t val)
419{
420 u64_stats_update_begin(&rwstat->syncp);
421
422 if (rw & REQ_WRITE)
423 rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
424 else
425 rwstat->cnt[BLKG_RWSTAT_READ] += val;
426 if (rw & REQ_SYNC)
427 rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
428 else
429 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
430
431 u64_stats_update_end(&rwstat->syncp);
432}
433
434/**
435 * blkg_rwstat_read - read the current values of a blkg_rwstat
436 * @rwstat: blkg_rwstat to read
437 *
438 * Read the current snapshot of @rwstat and return it as the return value.
439 * This function can be called without synchronization and takes care of
440 * u64 atomicity.
441 */
c94bed89 442static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
edcb0722
TH
443{
444 unsigned int start;
445 struct blkg_rwstat tmp;
446
447 do {
448 start = u64_stats_fetch_begin(&rwstat->syncp);
449 tmp = *rwstat;
450 } while (u64_stats_fetch_retry(&rwstat->syncp, start));
451
452 return tmp;
453}
454
455/**
456 * blkg_rwstat_sum - read the total count of a blkg_rwstat
457 * @rwstat: blkg_rwstat to read
458 *
459 * Return the total count of @rwstat regardless of the IO direction. This
460 * function can be called without synchronization and takes care of u64
461 * atomicity.
462 */
463static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
464{
465 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
466
467 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
468}
469
470/**
471 * blkg_rwstat_reset - reset a blkg_rwstat
472 * @rwstat: blkg_rwstat to reset
473 */
474static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
475{
476 memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
477}
478
36558c8a
TH
479#else /* CONFIG_BLK_CGROUP */
480
481struct cgroup;
b1208b56 482struct blkcg;
2f5ea477 483
f95a04af
TH
484struct blkg_policy_data {
485};
486
3c798398 487struct blkcg_gq {
2f5ea477
JA
488};
489
3c798398 490struct blkcg_policy {
3e252066
VG
491};
492
3c798398 493static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
5efd6113
TH
494static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
495static inline void blkcg_drain_queue(struct request_queue *q) { }
496static inline void blkcg_exit_queue(struct request_queue *q) { }
3c798398
TH
497static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
498static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
a2b1693b 499static inline int blkcg_activate_policy(struct request_queue *q,
3c798398 500 const struct blkcg_policy *pol) { return 0; }
a2b1693b 501static inline void blkcg_deactivate_policy(struct request_queue *q,
3c798398
TH
502 const struct blkcg_policy *pol) { }
503
b1208b56
TH
504static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
505static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
a051661c 506
f95a04af
TH
507static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
508 struct blkcg_policy *pol) { return NULL; }
509static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
3c798398
TH
510static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
511static inline void blkg_get(struct blkcg_gq *blkg) { }
512static inline void blkg_put(struct blkcg_gq *blkg) { }
afc24d49 513
a051661c
TH
514static inline struct request_list *blk_get_rl(struct request_queue *q,
515 struct bio *bio) { return &q->root_rl; }
516static inline void blk_put_rl(struct request_list *rl) { }
517static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
518static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
519
520#define blk_queue_for_each_rl(rl, q) \
521 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
522
36558c8a
TH
523#endif /* CONFIG_BLK_CGROUP */
524#endif /* _BLK_CGROUP_H */