Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
801c1419 | 2 | |
2e76c24d LZ |
3 | /* |
4 | * CPU accounting code for task groups. | |
5 | * | |
6 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | |
7 | * (balbir@in.ibm.com). | |
8 | */ | |
9 | ||
97fb7a0a | 10 | /* Time spent by the tasks of the CPU accounting group executing in ... */ |
d1712796 LZ |
11 | enum cpuacct_stat_index { |
12 | CPUACCT_STAT_USER, /* ... user mode */ | |
13 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | |
14 | ||
15 | CPUACCT_STAT_NSTATS, | |
16 | }; | |
17 | ||
9acacc2a ZL |
18 | static const char * const cpuacct_stat_desc[] = { |
19 | [CPUACCT_STAT_USER] = "user", | |
20 | [CPUACCT_STAT_SYSTEM] = "system", | |
d740037f DY |
21 | }; |
22 | ||
97fb7a0a | 23 | /* track CPU usage of a group of tasks and its child groups */ |
d1712796 | 24 | struct cpuacct { |
97fb7a0a IM |
25 | struct cgroup_subsys_state css; |
26 | /* cpuusage holds pointer to a u64-type object on every CPU */ | |
dd02d423 | 27 | u64 __percpu *cpuusage; |
97fb7a0a | 28 | struct kernel_cpustat __percpu *cpustat; |
d1712796 LZ |
29 | }; |
30 | ||
a7c6d554 TH |
31 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
32 | { | |
33 | return css ? container_of(css, struct cpuacct, css) : NULL; | |
34 | } | |
35 | ||
97fb7a0a | 36 | /* Return CPU accounting group to which this task belongs */ |
d1712796 LZ |
37 | static inline struct cpuacct *task_ca(struct task_struct *tsk) |
38 | { | |
073219e9 | 39 | return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
d1712796 LZ |
40 | } |
41 | ||
d1712796 LZ |
42 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
43 | { | |
5c9d535b | 44 | return css_ca(ca->css.parent); |
d1712796 LZ |
45 | } |
46 | ||
dd02d423 | 47 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
14c6d3c8 LZ |
48 | static struct cpuacct root_cpuacct = { |
49 | .cpustat = &kernel_cpustat, | |
50 | .cpuusage = &root_cpuacct_cpuusage, | |
51 | }; | |
2e76c24d | 52 | |
97fb7a0a | 53 | /* Create a new CPU accounting group */ |
eb95419b TH |
54 | static struct cgroup_subsys_state * |
55 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) | |
2e76c24d LZ |
56 | { |
57 | struct cpuacct *ca; | |
58 | ||
eb95419b | 59 | if (!parent_css) |
2e76c24d LZ |
60 | return &root_cpuacct.css; |
61 | ||
62 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | |
63 | if (!ca) | |
64 | goto out; | |
65 | ||
dd02d423 | 66 | ca->cpuusage = alloc_percpu(u64); |
2e76c24d LZ |
67 | if (!ca->cpuusage) |
68 | goto out_free_ca; | |
69 | ||
70 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | |
71 | if (!ca->cpustat) | |
72 | goto out_free_cpuusage; | |
73 | ||
74 | return &ca->css; | |
75 | ||
76 | out_free_cpuusage: | |
77 | free_percpu(ca->cpuusage); | |
78 | out_free_ca: | |
79 | kfree(ca); | |
80 | out: | |
81 | return ERR_PTR(-ENOMEM); | |
82 | } | |
83 | ||
97fb7a0a | 84 | /* Destroy an existing CPU accounting group */ |
eb95419b | 85 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
2e76c24d | 86 | { |
eb95419b | 87 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
88 | |
89 | free_percpu(ca->cpustat); | |
90 | free_percpu(ca->cpuusage); | |
91 | kfree(ca); | |
92 | } | |
93 | ||
d740037f | 94 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
9acacc2a | 95 | enum cpuacct_stat_index index) |
2e76c24d | 96 | { |
dd02d423 AR |
97 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
98 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; | |
2e76c24d LZ |
99 | u64 data; |
100 | ||
d740037f | 101 | /* |
9acacc2a | 102 | * We allow index == CPUACCT_STAT_NSTATS here to read |
3b03706f | 103 | * the sum of usages. |
d740037f | 104 | */ |
c7ccbf4b AR |
105 | if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS)) |
106 | return 0; | |
d740037f | 107 | |
2e76c24d LZ |
108 | #ifndef CONFIG_64BIT |
109 | /* | |
110 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | |
111 | */ | |
5cb9eaa3 | 112 | raw_spin_rq_lock_irq(cpu_rq(cpu)); |
d740037f DY |
113 | #endif |
114 | ||
dd02d423 AR |
115 | switch (index) { |
116 | case CPUACCT_STAT_USER: | |
117 | data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE]; | |
118 | break; | |
119 | case CPUACCT_STAT_SYSTEM: | |
120 | data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] + | |
121 | cpustat[CPUTIME_SOFTIRQ]; | |
122 | break; | |
123 | case CPUACCT_STAT_NSTATS: | |
124 | data = *cpuusage; | |
125 | break; | |
d740037f DY |
126 | } |
127 | ||
128 | #ifndef CONFIG_64BIT | |
5cb9eaa3 | 129 | raw_spin_rq_unlock_irq(cpu_rq(cpu)); |
2e76c24d LZ |
130 | #endif |
131 | ||
132 | return data; | |
133 | } | |
134 | ||
dd02d423 | 135 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu) |
2e76c24d | 136 | { |
dd02d423 AR |
137 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
138 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; | |
139 | ||
140 | /* Don't allow to reset global kernel_cpustat */ | |
141 | if (ca == &root_cpuacct) | |
142 | return; | |
2e76c24d LZ |
143 | |
144 | #ifndef CONFIG_64BIT | |
145 | /* | |
146 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | |
147 | */ | |
5cb9eaa3 | 148 | raw_spin_rq_lock_irq(cpu_rq(cpu)); |
d740037f | 149 | #endif |
dd02d423 AR |
150 | *cpuusage = 0; |
151 | cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0; | |
152 | cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0; | |
153 | cpustat[CPUTIME_SOFTIRQ] = 0; | |
d740037f DY |
154 | |
155 | #ifndef CONFIG_64BIT | |
5cb9eaa3 | 156 | raw_spin_rq_unlock_irq(cpu_rq(cpu)); |
2e76c24d LZ |
157 | #endif |
158 | } | |
159 | ||
97fb7a0a | 160 | /* Return total CPU usage (in nanoseconds) of a group */ |
d740037f | 161 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
9acacc2a | 162 | enum cpuacct_stat_index index) |
2e76c24d | 163 | { |
182446d0 | 164 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
165 | u64 totalcpuusage = 0; |
166 | int i; | |
167 | ||
5ca3726a | 168 | for_each_possible_cpu(i) |
d740037f | 169 | totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
2e76c24d LZ |
170 | |
171 | return totalcpuusage; | |
172 | } | |
173 | ||
d740037f DY |
174 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
175 | struct cftype *cft) | |
176 | { | |
9acacc2a | 177 | return __cpuusage_read(css, CPUACCT_STAT_USER); |
d740037f DY |
178 | } |
179 | ||
180 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, | |
181 | struct cftype *cft) | |
182 | { | |
9acacc2a | 183 | return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); |
d740037f DY |
184 | } |
185 | ||
186 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) | |
187 | { | |
9acacc2a | 188 | return __cpuusage_read(css, CPUACCT_STAT_NSTATS); |
d740037f DY |
189 | } |
190 | ||
182446d0 | 191 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
1a736b77 | 192 | u64 val) |
2e76c24d | 193 | { |
182446d0 | 194 | struct cpuacct *ca = css_ca(css); |
d740037f | 195 | int cpu; |
2e76c24d | 196 | |
1a736b77 DY |
197 | /* |
198 | * Only allow '0' here to do a reset. | |
199 | */ | |
d740037f DY |
200 | if (val) |
201 | return -EINVAL; | |
2e76c24d | 202 | |
d740037f | 203 | for_each_possible_cpu(cpu) |
dd02d423 | 204 | cpuacct_cpuusage_write(ca, cpu); |
2e76c24d | 205 | |
d740037f | 206 | return 0; |
2e76c24d LZ |
207 | } |
208 | ||
d740037f | 209 | static int __cpuacct_percpu_seq_show(struct seq_file *m, |
9acacc2a | 210 | enum cpuacct_stat_index index) |
2e76c24d | 211 | { |
2da8ca82 | 212 | struct cpuacct *ca = css_ca(seq_css(m)); |
2e76c24d LZ |
213 | u64 percpu; |
214 | int i; | |
215 | ||
5ca3726a | 216 | for_each_possible_cpu(i) { |
d740037f | 217 | percpu = cpuacct_cpuusage_read(ca, i, index); |
2e76c24d LZ |
218 | seq_printf(m, "%llu ", (unsigned long long) percpu); |
219 | } | |
220 | seq_printf(m, "\n"); | |
221 | return 0; | |
222 | } | |
223 | ||
d740037f DY |
224 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
225 | { | |
9acacc2a | 226 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); |
d740037f DY |
227 | } |
228 | ||
229 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) | |
230 | { | |
9acacc2a | 231 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); |
d740037f DY |
232 | } |
233 | ||
234 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) | |
235 | { | |
9acacc2a | 236 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); |
d740037f DY |
237 | } |
238 | ||
277a13e4 ZL |
239 | static int cpuacct_all_seq_show(struct seq_file *m, void *V) |
240 | { | |
241 | struct cpuacct *ca = css_ca(seq_css(m)); | |
242 | int index; | |
243 | int cpu; | |
244 | ||
245 | seq_puts(m, "cpu"); | |
246 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) | |
247 | seq_printf(m, " %s", cpuacct_stat_desc[index]); | |
248 | seq_puts(m, "\n"); | |
249 | ||
250 | for_each_possible_cpu(cpu) { | |
277a13e4 | 251 | seq_printf(m, "%d", cpu); |
dd02d423 AR |
252 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
253 | seq_printf(m, " %llu", | |
254 | cpuacct_cpuusage_read(ca, cpu, index)); | |
277a13e4 ZL |
255 | seq_puts(m, "\n"); |
256 | } | |
257 | return 0; | |
258 | } | |
259 | ||
2da8ca82 | 260 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
2e76c24d | 261 | { |
2da8ca82 | 262 | struct cpuacct *ca = css_ca(seq_css(sf)); |
8c92606a AR |
263 | struct task_cputime cputime; |
264 | u64 val[CPUACCT_STAT_NSTATS]; | |
2e76c24d | 265 | int cpu; |
8e546bfa | 266 | int stat; |
2e76c24d | 267 | |
8c92606a | 268 | memset(&cputime, 0, sizeof(cputime)); |
5ca3726a | 269 | for_each_possible_cpu(cpu) { |
8e546bfa | 270 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
2e76c24d | 271 | |
8c92606a AR |
272 | cputime.utime += cpustat[CPUTIME_USER]; |
273 | cputime.utime += cpustat[CPUTIME_NICE]; | |
274 | cputime.stime += cpustat[CPUTIME_SYSTEM]; | |
275 | cputime.stime += cpustat[CPUTIME_IRQ]; | |
276 | cputime.stime += cpustat[CPUTIME_SOFTIRQ]; | |
277 | ||
278 | cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu); | |
2e76c24d LZ |
279 | } |
280 | ||
8c92606a AR |
281 | cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime, |
282 | &val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]); | |
283 | ||
8e546bfa | 284 | for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { |
8c92606a AR |
285 | seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat], |
286 | nsec_to_clock_t(val[stat])); | |
8e546bfa | 287 | } |
2e76c24d LZ |
288 | |
289 | return 0; | |
290 | } | |
291 | ||
292 | static struct cftype files[] = { | |
293 | { | |
294 | .name = "usage", | |
295 | .read_u64 = cpuusage_read, | |
296 | .write_u64 = cpuusage_write, | |
297 | }, | |
d740037f DY |
298 | { |
299 | .name = "usage_user", | |
300 | .read_u64 = cpuusage_user_read, | |
301 | }, | |
302 | { | |
303 | .name = "usage_sys", | |
304 | .read_u64 = cpuusage_sys_read, | |
305 | }, | |
2e76c24d LZ |
306 | { |
307 | .name = "usage_percpu", | |
2da8ca82 | 308 | .seq_show = cpuacct_percpu_seq_show, |
2e76c24d | 309 | }, |
d740037f DY |
310 | { |
311 | .name = "usage_percpu_user", | |
312 | .seq_show = cpuacct_percpu_user_seq_show, | |
313 | }, | |
314 | { | |
315 | .name = "usage_percpu_sys", | |
316 | .seq_show = cpuacct_percpu_sys_seq_show, | |
317 | }, | |
277a13e4 ZL |
318 | { |
319 | .name = "usage_all", | |
320 | .seq_show = cpuacct_all_seq_show, | |
321 | }, | |
2e76c24d LZ |
322 | { |
323 | .name = "stat", | |
2da8ca82 | 324 | .seq_show = cpuacct_stats_show, |
2e76c24d LZ |
325 | }, |
326 | { } /* terminate */ | |
327 | }; | |
328 | ||
329 | /* | |
330 | * charge this task's execution time to its accounting group. | |
331 | * | |
332 | * called with rq->lock held. | |
333 | */ | |
334 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |
335 | { | |
248cc999 | 336 | unsigned int cpu = task_cpu(tsk); |
2e76c24d | 337 | struct cpuacct *ca; |
2e76c24d | 338 | |
dc6e0818 | 339 | lockdep_assert_rq_held(cpu_rq(cpu)); |
d740037f | 340 | |
73e6aafd | 341 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
248cc999 | 342 | *per_cpu_ptr(ca->cpuusage, cpu) += cputime; |
2e76c24d LZ |
343 | } |
344 | ||
1966aaf7 LZ |
345 | /* |
346 | * Add user/system time to cpuacct. | |
347 | * | |
348 | * Note: it's the caller that updates the account of the root cgroup. | |
349 | */ | |
73e6aafd | 350 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
1966aaf7 | 351 | { |
1966aaf7 LZ |
352 | struct cpuacct *ca; |
353 | ||
73e6aafd | 354 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
12aa2587 | 355 | __this_cpu_add(ca->cpustat->cpustat[index], val); |
1966aaf7 LZ |
356 | } |
357 | ||
073219e9 | 358 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
621e2de0 LZ |
359 | .css_alloc = cpuacct_css_alloc, |
360 | .css_free = cpuacct_css_free, | |
5577964e | 361 | .legacy_cftypes = files, |
b38e42e9 | 362 | .early_init = true, |
2e76c24d | 363 | }; |