Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
041cd640 TH |
2 | #include "cgroup-internal.h" |
3 | ||
4 | #include <linux/sched/cputime.h> | |
5 | ||
a319185b YA |
6 | #include <linux/bpf.h> |
7 | #include <linux/btf.h> | |
8 | #include <linux/btf_ids.h> | |
9 | ||
0fa294fb | 10 | static DEFINE_SPINLOCK(cgroup_rstat_lock); |
c58632b3 | 11 | static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock); |
041cd640 | 12 | |
a17556f8 TH |
13 | static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu); |
14 | ||
c58632b3 | 15 | static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu) |
041cd640 | 16 | { |
c58632b3 | 17 | return per_cpu_ptr(cgrp->rstat_cpu, cpu); |
041cd640 TH |
18 | } |
19 | ||
20 | /** | |
6162cef0 | 21 | * cgroup_rstat_updated - keep track of updated rstat_cpu |
041cd640 | 22 | * @cgrp: target cgroup |
c58632b3 | 23 | * @cpu: cpu on which rstat_cpu was updated |
041cd640 | 24 | * |
c58632b3 TH |
25 | * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching |
26 | * rstat_cpu->updated_children list. See the comment on top of | |
27 | * cgroup_rstat_cpu definition for details. | |
041cd640 | 28 | */ |
400031e0 | 29 | __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) |
041cd640 | 30 | { |
c58632b3 | 31 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); |
041cd640 TH |
32 | unsigned long flags; |
33 | ||
34 | /* | |
d8ef4b38 TH |
35 | * Speculative already-on-list test. This may race leading to |
36 | * temporary inaccuracies, which is fine. | |
37 | * | |
041cd640 TH |
38 | * Because @parent's updated_children is terminated with @parent |
39 | * instead of NULL, we can tell whether @cgrp is on the list by | |
40 | * testing the next pointer for NULL. | |
41 | */ | |
eda09706 | 42 | if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next)) |
041cd640 TH |
43 | return; |
44 | ||
45 | raw_spin_lock_irqsave(cpu_lock, flags); | |
46 | ||
47 | /* put @cgrp and all ancestors on the corresponding updated lists */ | |
dc26532a | 48 | while (true) { |
c58632b3 | 49 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); |
dc26532a JW |
50 | struct cgroup *parent = cgroup_parent(cgrp); |
51 | struct cgroup_rstat_cpu *prstatc; | |
041cd640 TH |
52 | |
53 | /* | |
54 | * Both additions and removals are bottom-up. If a cgroup | |
55 | * is already in the tree, all ancestors are. | |
56 | */ | |
c58632b3 | 57 | if (rstatc->updated_next) |
041cd640 TH |
58 | break; |
59 | ||
dc26532a JW |
60 | /* Root has no parent to link it to, but mark it busy */ |
61 | if (!parent) { | |
62 | rstatc->updated_next = cgrp; | |
63 | break; | |
64 | } | |
65 | ||
66 | prstatc = cgroup_rstat_cpu(parent, cpu); | |
c58632b3 TH |
67 | rstatc->updated_next = prstatc->updated_children; |
68 | prstatc->updated_children = cgrp; | |
dc26532a JW |
69 | |
70 | cgrp = parent; | |
041cd640 TH |
71 | } |
72 | ||
73 | raw_spin_unlock_irqrestore(cpu_lock, flags); | |
74 | } | |
75 | ||
76 | /** | |
c58632b3 | 77 | * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree |
041cd640 TH |
78 | * @pos: current position |
79 | * @root: root of the tree to traversal | |
80 | * @cpu: target cpu | |
81 | * | |
08b2b6fd | 82 | * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts |
041cd640 TH |
83 | * the traversal and %NULL return indicates the end. During traversal, |
84 | * each returned cgroup is unlinked from the tree. Must be called with the | |
c58632b3 | 85 | * matching cgroup_rstat_cpu_lock held. |
041cd640 TH |
86 | * |
87 | * The only ordering guarantee is that, for a parent and a child pair | |
88 | * covered by a given traversal, if a child is visited, its parent is | |
89 | * guaranteed to be visited afterwards. | |
90 | */ | |
c58632b3 TH |
91 | static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, |
92 | struct cgroup *root, int cpu) | |
041cd640 | 93 | { |
c58632b3 | 94 | struct cgroup_rstat_cpu *rstatc; |
f5f60d23 | 95 | struct cgroup *parent; |
041cd640 TH |
96 | |
97 | if (pos == root) | |
98 | return NULL; | |
99 | ||
100 | /* | |
101 | * We're gonna walk down to the first leaf and visit/remove it. We | |
102 | * can pick whatever unvisited node as the starting point. | |
103 | */ | |
f5f60d23 | 104 | if (!pos) { |
041cd640 | 105 | pos = root; |
f5f60d23 WY |
106 | /* return NULL if this subtree is not on-list */ |
107 | if (!cgroup_rstat_cpu(pos, cpu)->updated_next) | |
108 | return NULL; | |
109 | } else { | |
041cd640 | 110 | pos = cgroup_parent(pos); |
f5f60d23 | 111 | } |
041cd640 TH |
112 | |
113 | /* walk down to the first leaf */ | |
114 | while (true) { | |
c58632b3 TH |
115 | rstatc = cgroup_rstat_cpu(pos, cpu); |
116 | if (rstatc->updated_children == pos) | |
041cd640 | 117 | break; |
c58632b3 | 118 | pos = rstatc->updated_children; |
041cd640 TH |
119 | } |
120 | ||
121 | /* | |
122 | * Unlink @pos from the tree. As the updated_children list is | |
123 | * singly linked, we have to walk it to find the removal point. | |
124 | * However, due to the way we traverse, @pos will be the first | |
125 | * child in most cases. The only exception is @root. | |
126 | */ | |
f5f60d23 WY |
127 | parent = cgroup_parent(pos); |
128 | if (parent) { | |
129 | struct cgroup_rstat_cpu *prstatc; | |
130 | struct cgroup **nextp; | |
041cd640 | 131 | |
f5f60d23 WY |
132 | prstatc = cgroup_rstat_cpu(parent, cpu); |
133 | nextp = &prstatc->updated_children; | |
134 | while (*nextp != pos) { | |
135 | struct cgroup_rstat_cpu *nrstatc; | |
136 | ||
137 | nrstatc = cgroup_rstat_cpu(*nextp, cpu); | |
138 | WARN_ON_ONCE(*nextp == parent); | |
139 | nextp = &nrstatc->updated_next; | |
140 | } | |
141 | *nextp = rstatc->updated_next; | |
041cd640 TH |
142 | } |
143 | ||
f5f60d23 WY |
144 | rstatc->updated_next = NULL; |
145 | return pos; | |
041cd640 TH |
146 | } |
147 | ||
a319185b YA |
148 | /* |
149 | * A hook for bpf stat collectors to attach to and flush their stats. | |
150 | * Together with providing bpf kfuncs for cgroup_rstat_updated() and | |
151 | * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that | |
152 | * collect cgroup stats can integrate with rstat for efficient flushing. | |
153 | * | |
154 | * A static noinline declaration here could cause the compiler to optimize away | |
155 | * the function. A global noinline declaration will keep the definition, but may | |
156 | * optimize away the callsite. Therefore, __weak is needed to ensure that the | |
157 | * call is still emitted, by telling the compiler that we don't know what the | |
158 | * function might eventually be. | |
a319185b | 159 | */ |
15fb6f2b DM |
160 | |
161 | __bpf_hook_start(); | |
a319185b YA |
162 | |
163 | __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, | |
164 | struct cgroup *parent, int cpu) | |
165 | { | |
166 | } | |
167 | ||
15fb6f2b | 168 | __bpf_hook_end(); |
a319185b | 169 | |
a17556f8 | 170 | /* see cgroup_rstat_flush() */ |
0a2dc6ac | 171 | static void cgroup_rstat_flush_locked(struct cgroup *cgrp) |
0fa294fb | 172 | __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) |
a17556f8 TH |
173 | { |
174 | int cpu; | |
175 | ||
0fa294fb | 176 | lockdep_assert_held(&cgroup_rstat_lock); |
a17556f8 TH |
177 | |
178 | for_each_possible_cpu(cpu) { | |
179 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, | |
180 | cpu); | |
181 | struct cgroup *pos = NULL; | |
b1e2c8df | 182 | unsigned long flags; |
a17556f8 | 183 | |
b1e2c8df SAS |
184 | /* |
185 | * The _irqsave() is needed because cgroup_rstat_lock is | |
186 | * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring | |
187 | * this lock with the _irq() suffix only disables interrupts on | |
188 | * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables | |
189 | * interrupts on both configurations. The _irqsave() ensures | |
190 | * that interrupts are always disabled and later restored. | |
191 | */ | |
192 | raw_spin_lock_irqsave(cpu_lock, flags); | |
8f53470b TH |
193 | while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { |
194 | struct cgroup_subsys_state *css; | |
195 | ||
a17556f8 | 196 | cgroup_base_stat_flush(pos, cpu); |
a319185b | 197 | bpf_rstat_flush(pos, cgroup_parent(pos), cpu); |
8f53470b TH |
198 | |
199 | rcu_read_lock(); | |
200 | list_for_each_entry_rcu(css, &pos->rstat_css_list, | |
201 | rstat_css_node) | |
202 | css->ss->css_rstat_flush(css, cpu); | |
203 | rcu_read_unlock(); | |
204 | } | |
b1e2c8df | 205 | raw_spin_unlock_irqrestore(cpu_lock, flags); |
0fa294fb | 206 | |
0a2dc6ac YA |
207 | /* play nice and yield if necessary */ |
208 | if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) { | |
0fa294fb TH |
209 | spin_unlock_irq(&cgroup_rstat_lock); |
210 | if (!cond_resched()) | |
211 | cpu_relax(); | |
212 | spin_lock_irq(&cgroup_rstat_lock); | |
213 | } | |
a17556f8 TH |
214 | } |
215 | } | |
216 | ||
217 | /** | |
218 | * cgroup_rstat_flush - flush stats in @cgrp's subtree | |
219 | * @cgrp: target cgroup | |
220 | * | |
221 | * Collect all per-cpu stats in @cgrp's subtree into the global counters | |
222 | * and propagate them upwards. After this function returns, all cgroups in | |
223 | * the subtree have up-to-date ->stat. | |
224 | * | |
225 | * This also gets all cgroups in the subtree including @cgrp off the | |
226 | * ->updated_children lists. | |
0fa294fb TH |
227 | * |
228 | * This function may block. | |
a17556f8 | 229 | */ |
400031e0 | 230 | __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) |
a17556f8 | 231 | { |
0fa294fb TH |
232 | might_sleep(); |
233 | ||
234 | spin_lock_irq(&cgroup_rstat_lock); | |
0a2dc6ac | 235 | cgroup_rstat_flush_locked(cgrp); |
0fa294fb TH |
236 | spin_unlock_irq(&cgroup_rstat_lock); |
237 | } | |
238 | ||
6162cef0 | 239 | /** |
2ca11b0e | 240 | * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold |
6162cef0 TH |
241 | * @cgrp: target cgroup |
242 | * | |
243 | * Flush stats in @cgrp's subtree and prevent further flushes. Must be | |
244 | * paired with cgroup_rstat_flush_release(). | |
0fa294fb TH |
245 | * |
246 | * This function may block. | |
6162cef0 TH |
247 | */ |
248 | void cgroup_rstat_flush_hold(struct cgroup *cgrp) | |
0fa294fb | 249 | __acquires(&cgroup_rstat_lock) |
6162cef0 | 250 | { |
0fa294fb TH |
251 | might_sleep(); |
252 | spin_lock_irq(&cgroup_rstat_lock); | |
0a2dc6ac | 253 | cgroup_rstat_flush_locked(cgrp); |
6162cef0 TH |
254 | } |
255 | ||
256 | /** | |
257 | * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() | |
258 | */ | |
259 | void cgroup_rstat_flush_release(void) | |
0fa294fb | 260 | __releases(&cgroup_rstat_lock) |
6162cef0 | 261 | { |
0fa294fb | 262 | spin_unlock_irq(&cgroup_rstat_lock); |
6162cef0 TH |
263 | } |
264 | ||
a17556f8 TH |
265 | int cgroup_rstat_init(struct cgroup *cgrp) |
266 | { | |
267 | int cpu; | |
268 | ||
269 | /* the root cgrp has rstat_cpu preallocated */ | |
270 | if (!cgrp->rstat_cpu) { | |
271 | cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); | |
272 | if (!cgrp->rstat_cpu) | |
273 | return -ENOMEM; | |
274 | } | |
275 | ||
276 | /* ->updated_children list is self terminated */ | |
277 | for_each_possible_cpu(cpu) { | |
278 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | |
279 | ||
280 | rstatc->updated_children = cgrp; | |
281 | u64_stats_init(&rstatc->bsync); | |
282 | } | |
283 | ||
284 | return 0; | |
285 | } | |
286 | ||
287 | void cgroup_rstat_exit(struct cgroup *cgrp) | |
288 | { | |
289 | int cpu; | |
290 | ||
291 | cgroup_rstat_flush(cgrp); | |
292 | ||
293 | /* sanity check */ | |
294 | for_each_possible_cpu(cpu) { | |
295 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | |
296 | ||
297 | if (WARN_ON_ONCE(rstatc->updated_children != cgrp) || | |
298 | WARN_ON_ONCE(rstatc->updated_next)) | |
299 | return; | |
300 | } | |
301 | ||
302 | free_percpu(cgrp->rstat_cpu); | |
303 | cgrp->rstat_cpu = NULL; | |
304 | } | |
305 | ||
306 | void __init cgroup_rstat_boot(void) | |
307 | { | |
308 | int cpu; | |
309 | ||
310 | for_each_possible_cpu(cpu) | |
311 | raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu)); | |
a17556f8 TH |
312 | } |
313 | ||
314 | /* | |
315 | * Functions for cgroup basic resource statistics implemented on top of | |
316 | * rstat. | |
317 | */ | |
1bb5ec2e TH |
318 | static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, |
319 | struct cgroup_base_stat *src_bstat) | |
041cd640 | 320 | { |
d4ff749b TH |
321 | dst_bstat->cputime.utime += src_bstat->cputime.utime; |
322 | dst_bstat->cputime.stime += src_bstat->cputime.stime; | |
323 | dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime; | |
1fcf54de JD |
324 | #ifdef CONFIG_SCHED_CORE |
325 | dst_bstat->forceidle_sum += src_bstat->forceidle_sum; | |
326 | #endif | |
041cd640 TH |
327 | } |
328 | ||
1bb5ec2e TH |
329 | static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, |
330 | struct cgroup_base_stat *src_bstat) | |
331 | { | |
332 | dst_bstat->cputime.utime -= src_bstat->cputime.utime; | |
333 | dst_bstat->cputime.stime -= src_bstat->cputime.stime; | |
334 | dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime; | |
1fcf54de JD |
335 | #ifdef CONFIG_SCHED_CORE |
336 | dst_bstat->forceidle_sum -= src_bstat->forceidle_sum; | |
337 | #endif | |
1bb5ec2e TH |
338 | } |
339 | ||
d4ff749b | 340 | static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) |
041cd640 | 341 | { |
c58632b3 | 342 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); |
dc26532a | 343 | struct cgroup *parent = cgroup_parent(cgrp); |
0437719c | 344 | struct cgroup_rstat_cpu *prstatc; |
95b99f35 | 345 | struct cgroup_base_stat delta; |
041cd640 TH |
346 | unsigned seq; |
347 | ||
dc26532a JW |
348 | /* Root-level stats are sourced from system-wide CPU stats */ |
349 | if (!parent) | |
350 | return; | |
351 | ||
041cd640 TH |
352 | /* fetch the current per-cpu values */ |
353 | do { | |
d4ff749b | 354 | seq = __u64_stats_fetch_begin(&rstatc->bsync); |
95b99f35 | 355 | delta = rstatc->bstat; |
d4ff749b | 356 | } while (__u64_stats_fetch_retry(&rstatc->bsync, seq)); |
041cd640 | 357 | |
0437719c | 358 | /* propagate per-cpu delta to cgroup and per-cpu global statistics */ |
1bb5ec2e TH |
359 | cgroup_base_stat_sub(&delta, &rstatc->last_bstat); |
360 | cgroup_base_stat_add(&cgrp->bstat, &delta); | |
361 | cgroup_base_stat_add(&rstatc->last_bstat, &delta); | |
0437719c | 362 | cgroup_base_stat_add(&rstatc->subtree_bstat, &delta); |
1bb5ec2e | 363 | |
0437719c | 364 | /* propagate cgroup and per-cpu global delta to parent (unless that's root) */ |
dc26532a | 365 | if (cgroup_parent(parent)) { |
1bb5ec2e TH |
366 | delta = cgrp->bstat; |
367 | cgroup_base_stat_sub(&delta, &cgrp->last_bstat); | |
368 | cgroup_base_stat_add(&parent->bstat, &delta); | |
369 | cgroup_base_stat_add(&cgrp->last_bstat, &delta); | |
0437719c HJ |
370 | |
371 | delta = rstatc->subtree_bstat; | |
372 | prstatc = cgroup_rstat_cpu(parent, cpu); | |
373 | cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat); | |
374 | cgroup_base_stat_add(&prstatc->subtree_bstat, &delta); | |
375 | cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta); | |
1bb5ec2e | 376 | } |
041cd640 TH |
377 | } |
378 | ||
c58632b3 | 379 | static struct cgroup_rstat_cpu * |
c3df5fb5 | 380 | cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) |
041cd640 | 381 | { |
c58632b3 | 382 | struct cgroup_rstat_cpu *rstatc; |
041cd640 | 383 | |
c58632b3 | 384 | rstatc = get_cpu_ptr(cgrp->rstat_cpu); |
c3df5fb5 | 385 | *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); |
c58632b3 | 386 | return rstatc; |
041cd640 TH |
387 | } |
388 | ||
d4ff749b | 389 | static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, |
c3df5fb5 TH |
390 | struct cgroup_rstat_cpu *rstatc, |
391 | unsigned long flags) | |
041cd640 | 392 | { |
c3df5fb5 | 393 | u64_stats_update_end_irqrestore(&rstatc->bsync, flags); |
6162cef0 | 394 | cgroup_rstat_updated(cgrp, smp_processor_id()); |
c58632b3 | 395 | put_cpu_ptr(rstatc); |
041cd640 TH |
396 | } |
397 | ||
398 | void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) | |
399 | { | |
c58632b3 | 400 | struct cgroup_rstat_cpu *rstatc; |
c3df5fb5 | 401 | unsigned long flags; |
041cd640 | 402 | |
c3df5fb5 | 403 | rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); |
d4ff749b | 404 | rstatc->bstat.cputime.sum_exec_runtime += delta_exec; |
c3df5fb5 | 405 | cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); |
041cd640 TH |
406 | } |
407 | ||
408 | void __cgroup_account_cputime_field(struct cgroup *cgrp, | |
409 | enum cpu_usage_stat index, u64 delta_exec) | |
410 | { | |
c58632b3 | 411 | struct cgroup_rstat_cpu *rstatc; |
c3df5fb5 | 412 | unsigned long flags; |
041cd640 | 413 | |
c3df5fb5 | 414 | rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); |
041cd640 TH |
415 | |
416 | switch (index) { | |
417 | case CPUTIME_USER: | |
418 | case CPUTIME_NICE: | |
d4ff749b | 419 | rstatc->bstat.cputime.utime += delta_exec; |
041cd640 TH |
420 | break; |
421 | case CPUTIME_SYSTEM: | |
422 | case CPUTIME_IRQ: | |
423 | case CPUTIME_SOFTIRQ: | |
d4ff749b | 424 | rstatc->bstat.cputime.stime += delta_exec; |
041cd640 | 425 | break; |
1fcf54de JD |
426 | #ifdef CONFIG_SCHED_CORE |
427 | case CPUTIME_FORCEIDLE: | |
428 | rstatc->bstat.forceidle_sum += delta_exec; | |
429 | break; | |
430 | #endif | |
041cd640 TH |
431 | default: |
432 | break; | |
433 | } | |
434 | ||
c3df5fb5 | 435 | cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); |
041cd640 TH |
436 | } |
437 | ||
936f2a70 BB |
438 | /* |
439 | * compute the cputime for the root cgroup by getting the per cpu data | |
440 | * at a global level, then categorizing the fields in a manner consistent | |
441 | * with how it is done by __cgroup_account_cputime_field for each bit of | |
442 | * cpu time attributed to a cgroup. | |
443 | */ | |
1fcf54de | 444 | static void root_cgroup_cputime(struct cgroup_base_stat *bstat) |
936f2a70 | 445 | { |
1fcf54de | 446 | struct task_cputime *cputime = &bstat->cputime; |
936f2a70 BB |
447 | int i; |
448 | ||
fcdb1eda | 449 | memset(bstat, 0, sizeof(*bstat)); |
936f2a70 BB |
450 | for_each_possible_cpu(i) { |
451 | struct kernel_cpustat kcpustat; | |
452 | u64 *cpustat = kcpustat.cpustat; | |
453 | u64 user = 0; | |
454 | u64 sys = 0; | |
455 | ||
456 | kcpustat_cpu_fetch(&kcpustat, i); | |
457 | ||
458 | user += cpustat[CPUTIME_USER]; | |
459 | user += cpustat[CPUTIME_NICE]; | |
460 | cputime->utime += user; | |
461 | ||
462 | sys += cpustat[CPUTIME_SYSTEM]; | |
463 | sys += cpustat[CPUTIME_IRQ]; | |
464 | sys += cpustat[CPUTIME_SOFTIRQ]; | |
465 | cputime->stime += sys; | |
466 | ||
467 | cputime->sum_exec_runtime += user; | |
468 | cputime->sum_exec_runtime += sys; | |
469 | cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; | |
1fcf54de JD |
470 | |
471 | #ifdef CONFIG_SCHED_CORE | |
472 | bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; | |
473 | #endif | |
936f2a70 BB |
474 | } |
475 | } | |
476 | ||
d4ff749b | 477 | void cgroup_base_stat_cputime_show(struct seq_file *seq) |
041cd640 TH |
478 | { |
479 | struct cgroup *cgrp = seq_css(seq)->cgroup; | |
480 | u64 usage, utime, stime; | |
1fcf54de JD |
481 | struct cgroup_base_stat bstat; |
482 | #ifdef CONFIG_SCHED_CORE | |
483 | u64 forceidle_time; | |
484 | #endif | |
936f2a70 BB |
485 | |
486 | if (cgroup_parent(cgrp)) { | |
487 | cgroup_rstat_flush_hold(cgrp); | |
488 | usage = cgrp->bstat.cputime.sum_exec_runtime; | |
489 | cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, | |
490 | &utime, &stime); | |
1fcf54de JD |
491 | #ifdef CONFIG_SCHED_CORE |
492 | forceidle_time = cgrp->bstat.forceidle_sum; | |
493 | #endif | |
936f2a70 BB |
494 | cgroup_rstat_flush_release(); |
495 | } else { | |
1fcf54de JD |
496 | root_cgroup_cputime(&bstat); |
497 | usage = bstat.cputime.sum_exec_runtime; | |
498 | utime = bstat.cputime.utime; | |
499 | stime = bstat.cputime.stime; | |
500 | #ifdef CONFIG_SCHED_CORE | |
501 | forceidle_time = bstat.forceidle_sum; | |
502 | #endif | |
936f2a70 | 503 | } |
041cd640 TH |
504 | |
505 | do_div(usage, NSEC_PER_USEC); | |
506 | do_div(utime, NSEC_PER_USEC); | |
507 | do_div(stime, NSEC_PER_USEC); | |
1fcf54de JD |
508 | #ifdef CONFIG_SCHED_CORE |
509 | do_div(forceidle_time, NSEC_PER_USEC); | |
510 | #endif | |
041cd640 | 511 | |
d41bf8c9 TH |
512 | seq_printf(seq, "usage_usec %llu\n" |
513 | "user_usec %llu\n" | |
514 | "system_usec %llu\n", | |
515 | usage, utime, stime); | |
1fcf54de JD |
516 | |
517 | #ifdef CONFIG_SCHED_CORE | |
518 | seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time); | |
519 | #endif | |
041cd640 | 520 | } |
a319185b YA |
521 | |
522 | /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ | |
523 | BTF_SET8_START(bpf_rstat_kfunc_ids) | |
524 | BTF_ID_FLAGS(func, cgroup_rstat_updated) | |
525 | BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE) | |
526 | BTF_SET8_END(bpf_rstat_kfunc_ids) | |
527 | ||
528 | static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { | |
529 | .owner = THIS_MODULE, | |
530 | .set = &bpf_rstat_kfunc_ids, | |
531 | }; | |
532 | ||
533 | static int __init bpf_rstat_kfunc_init(void) | |
534 | { | |
535 | return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, | |
536 | &bpf_rstat_kfunc_set); | |
537 | } | |
538 | late_initcall(bpf_rstat_kfunc_init); |