Commit | Line | Data |
---|---|---|
2bc64a20 AK |
1 | /* |
2 | * | |
3 | * Copyright IBM Corporation, 2012 | |
4 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of version 2.1 of the GNU Lesser General Public License | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it would be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
13 | * | |
14 | */ | |
15 | ||
16 | #include <linux/cgroup.h> | |
17 | #include <linux/slab.h> | |
18 | #include <linux/hugetlb.h> | |
19 | #include <linux/hugetlb_cgroup.h> | |
20 | ||
21 | struct hugetlb_cgroup { | |
22 | struct cgroup_subsys_state css; | |
23 | /* | |
24 | * the counter to account for hugepages from hugetlb. | |
25 | */ | |
26 | struct res_counter hugepage[HUGE_MAX_HSTATE]; | |
27 | }; | |
28 | ||
29 | struct cgroup_subsys hugetlb_subsys __read_mostly; | |
30 | static struct hugetlb_cgroup *root_h_cgroup __read_mostly; | |
31 | ||
32 | static inline | |
33 | struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) | |
34 | { | |
35 | return container_of(s, struct hugetlb_cgroup, css); | |
36 | } | |
37 | ||
38 | static inline | |
39 | struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup) | |
40 | { | |
41 | return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup, | |
42 | hugetlb_subsys_id)); | |
43 | } | |
44 | ||
45 | static inline | |
46 | struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) | |
47 | { | |
48 | return hugetlb_cgroup_from_css(task_subsys_state(task, | |
49 | hugetlb_subsys_id)); | |
50 | } | |
51 | ||
52 | static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) | |
53 | { | |
54 | return (h_cg == root_h_cgroup); | |
55 | } | |
56 | ||
57 | static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg) | |
58 | { | |
59 | if (!cg->parent) | |
60 | return NULL; | |
61 | return hugetlb_cgroup_from_cgroup(cg->parent); | |
62 | } | |
63 | ||
64 | static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg) | |
65 | { | |
66 | int idx; | |
67 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg); | |
68 | ||
69 | for (idx = 0; idx < hugetlb_max_hstate; idx++) { | |
70 | if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) | |
71 | return true; | |
72 | } | |
73 | return false; | |
74 | } | |
75 | ||
76 | static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup) | |
77 | { | |
78 | int idx; | |
79 | struct cgroup *parent_cgroup; | |
80 | struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup; | |
81 | ||
82 | h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); | |
83 | if (!h_cgroup) | |
84 | return ERR_PTR(-ENOMEM); | |
85 | ||
86 | parent_cgroup = cgroup->parent; | |
87 | if (parent_cgroup) { | |
88 | parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup); | |
89 | for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) | |
90 | res_counter_init(&h_cgroup->hugepage[idx], | |
91 | &parent_h_cgroup->hugepage[idx]); | |
92 | } else { | |
93 | root_h_cgroup = h_cgroup; | |
94 | for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) | |
95 | res_counter_init(&h_cgroup->hugepage[idx], NULL); | |
96 | } | |
97 | return &h_cgroup->css; | |
98 | } | |
99 | ||
100 | static void hugetlb_cgroup_destroy(struct cgroup *cgroup) | |
101 | { | |
102 | struct hugetlb_cgroup *h_cgroup; | |
103 | ||
104 | h_cgroup = hugetlb_cgroup_from_cgroup(cgroup); | |
105 | kfree(h_cgroup); | |
106 | } | |
107 | ||
da1def55 AK |
108 | |
109 | /* | |
110 | * Should be called with hugetlb_lock held. | |
111 | * Since we are holding hugetlb_lock, pages cannot get moved from | |
112 | * active list or uncharged from the cgroup, So no need to get | |
113 | * page reference and test for page active here. This function | |
114 | * cannot fail. | |
115 | */ | |
116 | static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup, | |
117 | struct page *page) | |
118 | { | |
119 | int csize; | |
120 | struct res_counter *counter; | |
121 | struct res_counter *fail_res; | |
122 | struct hugetlb_cgroup *page_hcg; | |
123 | struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cgroup); | |
124 | struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup); | |
125 | ||
126 | page_hcg = hugetlb_cgroup_from_page(page); | |
127 | /* | |
128 | * We can have pages in active list without any cgroup | |
129 | * ie, hugepage with less than 3 pages. We can safely | |
130 | * ignore those pages. | |
131 | */ | |
132 | if (!page_hcg || page_hcg != h_cg) | |
133 | goto out; | |
134 | ||
135 | csize = PAGE_SIZE << compound_order(page); | |
136 | if (!parent) { | |
137 | parent = root_h_cgroup; | |
138 | /* root has no limit */ | |
139 | res_counter_charge_nofail(&parent->hugepage[idx], | |
140 | csize, &fail_res); | |
141 | } | |
142 | counter = &h_cg->hugepage[idx]; | |
143 | res_counter_uncharge_until(counter, counter->parent, csize); | |
144 | ||
145 | set_hugetlb_cgroup(page, parent); | |
146 | out: | |
147 | return; | |
148 | } | |
149 | ||
150 | /* | |
151 | * Force the hugetlb cgroup to empty the hugetlb resources by moving them to | |
152 | * the parent cgroup. | |
153 | */ | |
2bc64a20 AK |
154 | static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup) |
155 | { | |
da1def55 AK |
156 | struct hstate *h; |
157 | struct page *page; | |
158 | int ret = 0, idx = 0; | |
159 | ||
160 | do { | |
161 | if (cgroup_task_count(cgroup) || | |
162 | !list_empty(&cgroup->children)) { | |
163 | ret = -EBUSY; | |
164 | goto out; | |
165 | } | |
166 | for_each_hstate(h) { | |
167 | spin_lock(&hugetlb_lock); | |
168 | list_for_each_entry(page, &h->hugepage_activelist, lru) | |
169 | hugetlb_cgroup_move_parent(idx, cgroup, page); | |
170 | ||
171 | spin_unlock(&hugetlb_lock); | |
172 | idx++; | |
173 | } | |
174 | cond_resched(); | |
175 | } while (hugetlb_cgroup_have_usage(cgroup)); | |
176 | out: | |
177 | return ret; | |
2bc64a20 AK |
178 | } |
179 | ||
6d76dcf4 AK |
180 | int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, |
181 | struct hugetlb_cgroup **ptr) | |
182 | { | |
183 | int ret = 0; | |
184 | struct res_counter *fail_res; | |
185 | struct hugetlb_cgroup *h_cg = NULL; | |
186 | unsigned long csize = nr_pages * PAGE_SIZE; | |
187 | ||
188 | if (hugetlb_cgroup_disabled()) | |
189 | goto done; | |
190 | /* | |
191 | * We don't charge any cgroup if the compound page have less | |
192 | * than 3 pages. | |
193 | */ | |
194 | if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) | |
195 | goto done; | |
196 | again: | |
197 | rcu_read_lock(); | |
198 | h_cg = hugetlb_cgroup_from_task(current); | |
199 | if (!css_tryget(&h_cg->css)) { | |
200 | rcu_read_unlock(); | |
201 | goto again; | |
202 | } | |
203 | rcu_read_unlock(); | |
204 | ||
205 | ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res); | |
206 | css_put(&h_cg->css); | |
207 | done: | |
208 | *ptr = h_cg; | |
209 | return ret; | |
210 | } | |
211 | ||
212 | void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, | |
213 | struct hugetlb_cgroup *h_cg, | |
214 | struct page *page) | |
215 | { | |
216 | if (hugetlb_cgroup_disabled() || !h_cg) | |
217 | return; | |
218 | ||
219 | spin_lock(&hugetlb_lock); | |
220 | set_hugetlb_cgroup(page, h_cg); | |
221 | spin_unlock(&hugetlb_lock); | |
222 | return; | |
223 | } | |
224 | ||
225 | /* | |
226 | * Should be called with hugetlb_lock held | |
227 | */ | |
228 | void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, | |
229 | struct page *page) | |
230 | { | |
231 | struct hugetlb_cgroup *h_cg; | |
232 | unsigned long csize = nr_pages * PAGE_SIZE; | |
233 | ||
234 | if (hugetlb_cgroup_disabled()) | |
235 | return; | |
236 | VM_BUG_ON(!spin_is_locked(&hugetlb_lock)); | |
237 | h_cg = hugetlb_cgroup_from_page(page); | |
238 | if (unlikely(!h_cg)) | |
239 | return; | |
240 | set_hugetlb_cgroup(page, NULL); | |
241 | res_counter_uncharge(&h_cg->hugepage[idx], csize); | |
242 | return; | |
243 | } | |
244 | ||
245 | void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, | |
246 | struct hugetlb_cgroup *h_cg) | |
247 | { | |
248 | unsigned long csize = nr_pages * PAGE_SIZE; | |
249 | ||
250 | if (hugetlb_cgroup_disabled() || !h_cg) | |
251 | return; | |
252 | ||
253 | if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) | |
254 | return; | |
255 | ||
256 | res_counter_uncharge(&h_cg->hugepage[idx], csize); | |
257 | return; | |
258 | } | |
259 | ||
2bc64a20 AK |
260 | struct cgroup_subsys hugetlb_subsys = { |
261 | .name = "hugetlb", | |
262 | .create = hugetlb_cgroup_create, | |
263 | .pre_destroy = hugetlb_cgroup_pre_destroy, | |
264 | .destroy = hugetlb_cgroup_destroy, | |
265 | .subsys_id = hugetlb_subsys_id, | |
266 | }; |