Commit | Line | Data |
---|---|---|
2e76c24d LZ |
1 | #include <linux/cgroup.h> |
2 | #include <linux/slab.h> | |
3 | #include <linux/percpu.h> | |
4 | #include <linux/spinlock.h> | |
5 | #include <linux/cpumask.h> | |
6 | #include <linux/seq_file.h> | |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/kernel_stat.h> | |
b329fd5b | 9 | #include <linux/err.h> |
2e76c24d LZ |
10 | |
11 | #include "sched.h" | |
12 | ||
13 | /* | |
14 | * CPU accounting code for task groups. | |
15 | * | |
16 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | |
17 | * (balbir@in.ibm.com). | |
18 | */ | |
19 | ||
d1712796 LZ |
20 | /* Time spent by the tasks of the cpu accounting group executing in ... */ |
21 | enum cpuacct_stat_index { | |
22 | CPUACCT_STAT_USER, /* ... user mode */ | |
23 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | |
24 | ||
25 | CPUACCT_STAT_NSTATS, | |
26 | }; | |
27 | ||
d740037f DY |
28 | enum cpuacct_usage_index { |
29 | CPUACCT_USAGE_USER, /* ... user mode */ | |
30 | CPUACCT_USAGE_SYSTEM, /* ... kernel mode */ | |
31 | ||
32 | CPUACCT_USAGE_NRUSAGE, | |
33 | }; | |
34 | ||
35 | struct cpuacct_usage { | |
36 | u64 usages[CPUACCT_USAGE_NRUSAGE]; | |
37 | }; | |
38 | ||
d1712796 LZ |
39 | /* track cpu usage of a group of tasks and its child groups */ |
40 | struct cpuacct { | |
41 | struct cgroup_subsys_state css; | |
42 | /* cpuusage holds pointer to a u64-type object on every cpu */ | |
d740037f | 43 | struct cpuacct_usage __percpu *cpuusage; |
d1712796 LZ |
44 | struct kernel_cpustat __percpu *cpustat; |
45 | }; | |
46 | ||
a7c6d554 TH |
47 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
48 | { | |
49 | return css ? container_of(css, struct cpuacct, css) : NULL; | |
50 | } | |
51 | ||
d1712796 LZ |
52 | /* return cpu accounting group to which this task belongs */ |
53 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | |
54 | { | |
073219e9 | 55 | return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
d1712796 LZ |
56 | } |
57 | ||
d1712796 LZ |
58 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
59 | { | |
5c9d535b | 60 | return css_ca(ca->css.parent); |
d1712796 LZ |
61 | } |
62 | ||
d740037f | 63 | static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); |
14c6d3c8 LZ |
64 | static struct cpuacct root_cpuacct = { |
65 | .cpustat = &kernel_cpustat, | |
66 | .cpuusage = &root_cpuacct_cpuusage, | |
67 | }; | |
2e76c24d LZ |
68 | |
69 | /* create a new cpu accounting group */ | |
eb95419b TH |
70 | static struct cgroup_subsys_state * |
71 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) | |
2e76c24d LZ |
72 | { |
73 | struct cpuacct *ca; | |
74 | ||
eb95419b | 75 | if (!parent_css) |
2e76c24d LZ |
76 | return &root_cpuacct.css; |
77 | ||
78 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | |
79 | if (!ca) | |
80 | goto out; | |
81 | ||
d740037f | 82 | ca->cpuusage = alloc_percpu(struct cpuacct_usage); |
2e76c24d LZ |
83 | if (!ca->cpuusage) |
84 | goto out_free_ca; | |
85 | ||
86 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | |
87 | if (!ca->cpustat) | |
88 | goto out_free_cpuusage; | |
89 | ||
90 | return &ca->css; | |
91 | ||
92 | out_free_cpuusage: | |
93 | free_percpu(ca->cpuusage); | |
94 | out_free_ca: | |
95 | kfree(ca); | |
96 | out: | |
97 | return ERR_PTR(-ENOMEM); | |
98 | } | |
99 | ||
100 | /* destroy an existing cpu accounting group */ | |
eb95419b | 101 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
2e76c24d | 102 | { |
eb95419b | 103 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
104 | |
105 | free_percpu(ca->cpustat); | |
106 | free_percpu(ca->cpuusage); | |
107 | kfree(ca); | |
108 | } | |
109 | ||
d740037f DY |
110 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
111 | enum cpuacct_usage_index index) | |
2e76c24d | 112 | { |
d740037f | 113 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
2e76c24d LZ |
114 | u64 data; |
115 | ||
d740037f DY |
116 | /* |
117 | * We allow index == CPUACCT_USAGE_NRUSAGE here to read | |
118 | * the sum of suages. | |
119 | */ | |
120 | BUG_ON(index > CPUACCT_USAGE_NRUSAGE); | |
121 | ||
2e76c24d LZ |
122 | #ifndef CONFIG_64BIT |
123 | /* | |
124 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | |
125 | */ | |
126 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
d740037f DY |
127 | #endif |
128 | ||
129 | if (index == CPUACCT_USAGE_NRUSAGE) { | |
130 | int i = 0; | |
131 | ||
132 | data = 0; | |
133 | for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) | |
134 | data += cpuusage->usages[i]; | |
135 | } else { | |
136 | data = cpuusage->usages[index]; | |
137 | } | |
138 | ||
139 | #ifndef CONFIG_64BIT | |
2e76c24d | 140 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
2e76c24d LZ |
141 | #endif |
142 | ||
143 | return data; | |
144 | } | |
145 | ||
146 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |
147 | { | |
d740037f DY |
148 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
149 | int i; | |
2e76c24d LZ |
150 | |
151 | #ifndef CONFIG_64BIT | |
152 | /* | |
153 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | |
154 | */ | |
155 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
d740037f DY |
156 | #endif |
157 | ||
158 | for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) | |
159 | cpuusage->usages[i] = val; | |
160 | ||
161 | #ifndef CONFIG_64BIT | |
2e76c24d | 162 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
2e76c24d LZ |
163 | #endif |
164 | } | |
165 | ||
166 | /* return total cpu usage (in nanoseconds) of a group */ | |
d740037f DY |
167 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
168 | enum cpuacct_usage_index index) | |
2e76c24d | 169 | { |
182446d0 | 170 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
171 | u64 totalcpuusage = 0; |
172 | int i; | |
173 | ||
5ca3726a | 174 | for_each_possible_cpu(i) |
d740037f | 175 | totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
2e76c24d LZ |
176 | |
177 | return totalcpuusage; | |
178 | } | |
179 | ||
d740037f DY |
180 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
181 | struct cftype *cft) | |
182 | { | |
183 | return __cpuusage_read(css, CPUACCT_USAGE_USER); | |
184 | } | |
185 | ||
186 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, | |
187 | struct cftype *cft) | |
188 | { | |
189 | return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM); | |
190 | } | |
191 | ||
192 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) | |
193 | { | |
194 | return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE); | |
195 | } | |
196 | ||
182446d0 | 197 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
1a736b77 | 198 | u64 val) |
2e76c24d | 199 | { |
182446d0 | 200 | struct cpuacct *ca = css_ca(css); |
d740037f | 201 | int cpu; |
2e76c24d | 202 | |
1a736b77 DY |
203 | /* |
204 | * Only allow '0' here to do a reset. | |
205 | */ | |
d740037f DY |
206 | if (val) |
207 | return -EINVAL; | |
2e76c24d | 208 | |
d740037f DY |
209 | for_each_possible_cpu(cpu) |
210 | cpuacct_cpuusage_write(ca, cpu, 0); | |
2e76c24d | 211 | |
d740037f | 212 | return 0; |
2e76c24d LZ |
213 | } |
214 | ||
d740037f DY |
215 | static int __cpuacct_percpu_seq_show(struct seq_file *m, |
216 | enum cpuacct_usage_index index) | |
2e76c24d | 217 | { |
2da8ca82 | 218 | struct cpuacct *ca = css_ca(seq_css(m)); |
2e76c24d LZ |
219 | u64 percpu; |
220 | int i; | |
221 | ||
5ca3726a | 222 | for_each_possible_cpu(i) { |
d740037f | 223 | percpu = cpuacct_cpuusage_read(ca, i, index); |
2e76c24d LZ |
224 | seq_printf(m, "%llu ", (unsigned long long) percpu); |
225 | } | |
226 | seq_printf(m, "\n"); | |
227 | return 0; | |
228 | } | |
229 | ||
d740037f DY |
230 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
231 | { | |
232 | return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER); | |
233 | } | |
234 | ||
235 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) | |
236 | { | |
237 | return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM); | |
238 | } | |
239 | ||
240 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) | |
241 | { | |
242 | return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE); | |
243 | } | |
244 | ||
2e76c24d LZ |
245 | static const char * const cpuacct_stat_desc[] = { |
246 | [CPUACCT_STAT_USER] = "user", | |
247 | [CPUACCT_STAT_SYSTEM] = "system", | |
248 | }; | |
249 | ||
2da8ca82 | 250 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
2e76c24d | 251 | { |
2da8ca82 | 252 | struct cpuacct *ca = css_ca(seq_css(sf)); |
2e76c24d LZ |
253 | int cpu; |
254 | s64 val = 0; | |
255 | ||
5ca3726a | 256 | for_each_possible_cpu(cpu) { |
2e76c24d LZ |
257 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); |
258 | val += kcpustat->cpustat[CPUTIME_USER]; | |
259 | val += kcpustat->cpustat[CPUTIME_NICE]; | |
260 | } | |
261 | val = cputime64_to_clock_t(val); | |
44ffc75b | 262 | seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); |
2e76c24d LZ |
263 | |
264 | val = 0; | |
5ca3726a | 265 | for_each_possible_cpu(cpu) { |
2e76c24d LZ |
266 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); |
267 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | |
268 | val += kcpustat->cpustat[CPUTIME_IRQ]; | |
269 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | |
270 | } | |
271 | ||
272 | val = cputime64_to_clock_t(val); | |
44ffc75b | 273 | seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); |
2e76c24d LZ |
274 | |
275 | return 0; | |
276 | } | |
277 | ||
278 | static struct cftype files[] = { | |
279 | { | |
280 | .name = "usage", | |
281 | .read_u64 = cpuusage_read, | |
282 | .write_u64 = cpuusage_write, | |
283 | }, | |
d740037f DY |
284 | { |
285 | .name = "usage_user", | |
286 | .read_u64 = cpuusage_user_read, | |
287 | }, | |
288 | { | |
289 | .name = "usage_sys", | |
290 | .read_u64 = cpuusage_sys_read, | |
291 | }, | |
2e76c24d LZ |
292 | { |
293 | .name = "usage_percpu", | |
2da8ca82 | 294 | .seq_show = cpuacct_percpu_seq_show, |
2e76c24d | 295 | }, |
d740037f DY |
296 | { |
297 | .name = "usage_percpu_user", | |
298 | .seq_show = cpuacct_percpu_user_seq_show, | |
299 | }, | |
300 | { | |
301 | .name = "usage_percpu_sys", | |
302 | .seq_show = cpuacct_percpu_sys_seq_show, | |
303 | }, | |
2e76c24d LZ |
304 | { |
305 | .name = "stat", | |
2da8ca82 | 306 | .seq_show = cpuacct_stats_show, |
2e76c24d LZ |
307 | }, |
308 | { } /* terminate */ | |
309 | }; | |
310 | ||
311 | /* | |
312 | * charge this task's execution time to its accounting group. | |
313 | * | |
314 | * called with rq->lock held. | |
315 | */ | |
316 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |
317 | { | |
318 | struct cpuacct *ca; | |
bd928830 AB |
319 | int index = CPUACCT_USAGE_SYSTEM; |
320 | struct pt_regs *regs = task_pt_regs(tsk); | |
d740037f | 321 | |
bd928830 | 322 | if (regs && user_mode(regs)) |
d740037f | 323 | index = CPUACCT_USAGE_USER; |
2e76c24d LZ |
324 | |
325 | rcu_read_lock(); | |
d740037f | 326 | |
73e6aafd | 327 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
d740037f DY |
328 | this_cpu_ptr(ca->cpuusage)->usages[index] += cputime; |
329 | ||
2e76c24d LZ |
330 | rcu_read_unlock(); |
331 | } | |
332 | ||
1966aaf7 LZ |
333 | /* |
334 | * Add user/system time to cpuacct. | |
335 | * | |
336 | * Note: it's the caller that updates the account of the root cgroup. | |
337 | */ | |
73e6aafd | 338 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
1966aaf7 | 339 | { |
1966aaf7 LZ |
340 | struct cpuacct *ca; |
341 | ||
1966aaf7 | 342 | rcu_read_lock(); |
73e6aafd ZL |
343 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
344 | this_cpu_ptr(ca->cpustat)->cpustat[index] += val; | |
1966aaf7 LZ |
345 | rcu_read_unlock(); |
346 | } | |
347 | ||
073219e9 | 348 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
621e2de0 LZ |
349 | .css_alloc = cpuacct_css_alloc, |
350 | .css_free = cpuacct_css_free, | |
5577964e | 351 | .legacy_cftypes = files, |
b38e42e9 | 352 | .early_init = true, |
2e76c24d | 353 | }; |