blkio: Take care of cgroup deletion and cfq group reference counting
[linux-2.6-block.git] / block / blk-cgroup.c
CommitLineData
31e4c28d
VG
1/*
2 * Common Block IO controller cgroup interface
3 *
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com>
12 */
13#include <linux/ioprio.h>
14#include "blk-cgroup.h"
15
b1c35769
VG
16extern void cfq_unlink_blkio_group(void *, struct blkio_group *);
17
31e4c28d
VG
18struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
19
20struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
21{
22 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
23 struct blkio_cgroup, css);
24}
25
26void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
27 struct blkio_group *blkg, void *key)
28{
29 unsigned long flags;
30
31 spin_lock_irqsave(&blkcg->lock, flags);
32 rcu_assign_pointer(blkg->key, key);
b1c35769 33 blkg->blkcg_id = css_id(&blkcg->css);
31e4c28d
VG
34 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
35 spin_unlock_irqrestore(&blkcg->lock, flags);
36}
37
b1c35769
VG
38static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
39{
40 hlist_del_init_rcu(&blkg->blkcg_node);
41 blkg->blkcg_id = 0;
42}
43
44/*
45 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
46 * indicating that blk_group was unhashed by the time we got to it.
47 */
31e4c28d
VG
48int blkiocg_del_blkio_group(struct blkio_group *blkg)
49{
b1c35769
VG
50 struct blkio_cgroup *blkcg;
51 unsigned long flags;
52 struct cgroup_subsys_state *css;
53 int ret = 1;
54
55 rcu_read_lock();
56 css = css_lookup(&blkio_subsys, blkg->blkcg_id);
57 if (!css)
58 goto out;
59
60 blkcg = container_of(css, struct blkio_cgroup, css);
61 spin_lock_irqsave(&blkcg->lock, flags);
62 if (!hlist_unhashed(&blkg->blkcg_node)) {
63 __blkiocg_del_blkio_group(blkg);
64 ret = 0;
65 }
66 spin_unlock_irqrestore(&blkcg->lock, flags);
67out:
68 rcu_read_unlock();
69 return ret;
31e4c28d
VG
70}
71
72/* called under rcu_read_lock(). */
73struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
74{
75 struct blkio_group *blkg;
76 struct hlist_node *n;
77 void *__key;
78
79 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
80 __key = blkg->key;
81 if (__key == key)
82 return blkg;
83 }
84
85 return NULL;
86}
87
88#define SHOW_FUNCTION(__VAR) \
89static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \
90 struct cftype *cftype) \
91{ \
92 struct blkio_cgroup *blkcg; \
93 \
94 blkcg = cgroup_to_blkio_cgroup(cgroup); \
95 return (u64)blkcg->__VAR; \
96}
97
98SHOW_FUNCTION(weight);
99#undef SHOW_FUNCTION
100
101static int
102blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
103{
104 struct blkio_cgroup *blkcg;
105
106 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
107 return -EINVAL;
108
109 blkcg = cgroup_to_blkio_cgroup(cgroup);
110 blkcg->weight = (unsigned int)val;
111 return 0;
112}
113
114struct cftype blkio_files[] = {
115 {
116 .name = "weight",
117 .read_u64 = blkiocg_weight_read,
118 .write_u64 = blkiocg_weight_write,
119 },
120};
121
122static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
123{
124 return cgroup_add_files(cgroup, subsys, blkio_files,
125 ARRAY_SIZE(blkio_files));
126}
127
128static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
129{
130 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
b1c35769
VG
131 unsigned long flags;
132 struct blkio_group *blkg;
133 void *key;
134
135 rcu_read_lock();
136remove_entry:
137 spin_lock_irqsave(&blkcg->lock, flags);
138
139 if (hlist_empty(&blkcg->blkg_list)) {
140 spin_unlock_irqrestore(&blkcg->lock, flags);
141 goto done;
142 }
143
144 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
145 blkcg_node);
146 key = rcu_dereference(blkg->key);
147 __blkiocg_del_blkio_group(blkg);
31e4c28d 148
b1c35769
VG
149 spin_unlock_irqrestore(&blkcg->lock, flags);
150
151 /*
152 * This blkio_group is being unlinked as associated cgroup is going
153 * away. Let all the IO controlling policies know about this event.
154 *
155 * Currently this is static call to one io controlling policy. Once
156 * we have more policies in place, we need some dynamic registration
157 * of callback function.
158 */
159 cfq_unlink_blkio_group(key, blkg);
160 goto remove_entry;
161done:
31e4c28d 162 free_css_id(&blkio_subsys, &blkcg->css);
b1c35769 163 rcu_read_unlock();
31e4c28d
VG
164 kfree(blkcg);
165}
166
167static struct cgroup_subsys_state *
168blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
169{
170 struct blkio_cgroup *blkcg, *parent_blkcg;
171
172 if (!cgroup->parent) {
173 blkcg = &blkio_root_cgroup;
174 goto done;
175 }
176
177 /* Currently we do not support hierarchy deeper than two level (0,1) */
178 parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent);
179 if (css_depth(&parent_blkcg->css) > 0)
180 return ERR_PTR(-EINVAL);
181
182 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
183 if (!blkcg)
184 return ERR_PTR(-ENOMEM);
185
186 blkcg->weight = BLKIO_WEIGHT_DEFAULT;
187done:
188 spin_lock_init(&blkcg->lock);
189 INIT_HLIST_HEAD(&blkcg->blkg_list);
190
191 return &blkcg->css;
192}
193
194/*
195 * We cannot support shared io contexts, as we have no mean to support
196 * two tasks with the same ioc in two different groups without major rework
197 * of the main cic data structures. For now we allow a task to change
198 * its cgroup only if it's the only owner of its ioc.
199 */
200static int blkiocg_can_attach(struct cgroup_subsys *subsys,
201 struct cgroup *cgroup, struct task_struct *tsk,
202 bool threadgroup)
203{
204 struct io_context *ioc;
205 int ret = 0;
206
207 /* task_lock() is needed to avoid races with exit_io_context() */
208 task_lock(tsk);
209 ioc = tsk->io_context;
210 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
211 ret = -EINVAL;
212 task_unlock(tsk);
213
214 return ret;
215}
216
217static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
218 struct cgroup *prev, struct task_struct *tsk,
219 bool threadgroup)
220{
221 struct io_context *ioc;
222
223 task_lock(tsk);
224 ioc = tsk->io_context;
225 if (ioc)
226 ioc->cgroup_changed = 1;
227 task_unlock(tsk);
228}
229
230struct cgroup_subsys blkio_subsys = {
231 .name = "blkio",
232 .create = blkiocg_create,
233 .can_attach = blkiocg_can_attach,
234 .attach = blkiocg_attach,
235 .destroy = blkiocg_destroy,
236 .populate = blkiocg_populate,
237 .subsys_id = blkio_subsys_id,
238 .use_id = 1,
239};