Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
041cd640 TH |
2 | #include "cgroup-internal.h" |
3 | ||
4 | #include <linux/sched/cputime.h> | |
5 | ||
a319185b YA |
6 | #include <linux/bpf.h> |
7 | #include <linux/btf.h> | |
8 | #include <linux/btf_ids.h> | |
9 | ||
0fa294fb | 10 | static DEFINE_SPINLOCK(cgroup_rstat_lock); |
c58632b3 | 11 | static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock); |
041cd640 | 12 | |
a17556f8 TH |
13 | static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu); |
14 | ||
c58632b3 | 15 | static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu) |
041cd640 | 16 | { |
c58632b3 | 17 | return per_cpu_ptr(cgrp->rstat_cpu, cpu); |
041cd640 TH |
18 | } |
19 | ||
20 | /** | |
6162cef0 | 21 | * cgroup_rstat_updated - keep track of updated rstat_cpu |
041cd640 | 22 | * @cgrp: target cgroup |
c58632b3 | 23 | * @cpu: cpu on which rstat_cpu was updated |
041cd640 | 24 | * |
c58632b3 TH |
25 | * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching |
26 | * rstat_cpu->updated_children list. See the comment on top of | |
27 | * cgroup_rstat_cpu definition for details. | |
041cd640 | 28 | */ |
400031e0 | 29 | __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) |
041cd640 | 30 | { |
c58632b3 | 31 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); |
041cd640 TH |
32 | unsigned long flags; |
33 | ||
34 | /* | |
d8ef4b38 TH |
35 | * Speculative already-on-list test. This may race leading to |
36 | * temporary inaccuracies, which is fine. | |
37 | * | |
041cd640 TH |
38 | * Because @parent's updated_children is terminated with @parent |
39 | * instead of NULL, we can tell whether @cgrp is on the list by | |
40 | * testing the next pointer for NULL. | |
41 | */ | |
eda09706 | 42 | if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next)) |
041cd640 TH |
43 | return; |
44 | ||
45 | raw_spin_lock_irqsave(cpu_lock, flags); | |
46 | ||
47 | /* put @cgrp and all ancestors on the corresponding updated lists */ | |
dc26532a | 48 | while (true) { |
c58632b3 | 49 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); |
dc26532a JW |
50 | struct cgroup *parent = cgroup_parent(cgrp); |
51 | struct cgroup_rstat_cpu *prstatc; | |
041cd640 TH |
52 | |
53 | /* | |
54 | * Both additions and removals are bottom-up. If a cgroup | |
55 | * is already in the tree, all ancestors are. | |
56 | */ | |
c58632b3 | 57 | if (rstatc->updated_next) |
041cd640 TH |
58 | break; |
59 | ||
dc26532a JW |
60 | /* Root has no parent to link it to, but mark it busy */ |
61 | if (!parent) { | |
62 | rstatc->updated_next = cgrp; | |
63 | break; | |
64 | } | |
65 | ||
66 | prstatc = cgroup_rstat_cpu(parent, cpu); | |
c58632b3 TH |
67 | rstatc->updated_next = prstatc->updated_children; |
68 | prstatc->updated_children = cgrp; | |
dc26532a JW |
69 | |
70 | cgrp = parent; | |
041cd640 TH |
71 | } |
72 | ||
73 | raw_spin_unlock_irqrestore(cpu_lock, flags); | |
74 | } | |
75 | ||
76 | /** | |
c58632b3 | 77 | * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree |
041cd640 TH |
78 | * @pos: current position |
79 | * @root: root of the tree to traversal | |
80 | * @cpu: target cpu | |
81 | * | |
08b2b6fd | 82 | * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts |
041cd640 TH |
83 | * the traversal and %NULL return indicates the end. During traversal, |
84 | * each returned cgroup is unlinked from the tree. Must be called with the | |
c58632b3 | 85 | * matching cgroup_rstat_cpu_lock held. |
041cd640 TH |
86 | * |
87 | * The only ordering guarantee is that, for a parent and a child pair | |
88 | * covered by a given traversal, if a child is visited, its parent is | |
89 | * guaranteed to be visited afterwards. | |
90 | */ | |
c58632b3 TH |
91 | static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, |
92 | struct cgroup *root, int cpu) | |
041cd640 | 93 | { |
c58632b3 | 94 | struct cgroup_rstat_cpu *rstatc; |
f5f60d23 | 95 | struct cgroup *parent; |
041cd640 TH |
96 | |
97 | if (pos == root) | |
98 | return NULL; | |
99 | ||
100 | /* | |
101 | * We're gonna walk down to the first leaf and visit/remove it. We | |
102 | * can pick whatever unvisited node as the starting point. | |
103 | */ | |
f5f60d23 | 104 | if (!pos) { |
041cd640 | 105 | pos = root; |
f5f60d23 WY |
106 | /* return NULL if this subtree is not on-list */ |
107 | if (!cgroup_rstat_cpu(pos, cpu)->updated_next) | |
108 | return NULL; | |
109 | } else { | |
041cd640 | 110 | pos = cgroup_parent(pos); |
f5f60d23 | 111 | } |
041cd640 TH |
112 | |
113 | /* walk down to the first leaf */ | |
114 | while (true) { | |
c58632b3 TH |
115 | rstatc = cgroup_rstat_cpu(pos, cpu); |
116 | if (rstatc->updated_children == pos) | |
041cd640 | 117 | break; |
c58632b3 | 118 | pos = rstatc->updated_children; |
041cd640 TH |
119 | } |
120 | ||
121 | /* | |
122 | * Unlink @pos from the tree. As the updated_children list is | |
123 | * singly linked, we have to walk it to find the removal point. | |
124 | * However, due to the way we traverse, @pos will be the first | |
125 | * child in most cases. The only exception is @root. | |
126 | */ | |
f5f60d23 WY |
127 | parent = cgroup_parent(pos); |
128 | if (parent) { | |
129 | struct cgroup_rstat_cpu *prstatc; | |
130 | struct cgroup **nextp; | |
041cd640 | 131 | |
f5f60d23 WY |
132 | prstatc = cgroup_rstat_cpu(parent, cpu); |
133 | nextp = &prstatc->updated_children; | |
134 | while (*nextp != pos) { | |
135 | struct cgroup_rstat_cpu *nrstatc; | |
136 | ||
137 | nrstatc = cgroup_rstat_cpu(*nextp, cpu); | |
138 | WARN_ON_ONCE(*nextp == parent); | |
139 | nextp = &nrstatc->updated_next; | |
140 | } | |
141 | *nextp = rstatc->updated_next; | |
041cd640 TH |
142 | } |
143 | ||
f5f60d23 WY |
144 | rstatc->updated_next = NULL; |
145 | return pos; | |
041cd640 TH |
146 | } |
147 | ||
a319185b YA |
148 | /* |
149 | * A hook for bpf stat collectors to attach to and flush their stats. | |
150 | * Together with providing bpf kfuncs for cgroup_rstat_updated() and | |
151 | * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that | |
152 | * collect cgroup stats can integrate with rstat for efficient flushing. | |
153 | * | |
154 | * A static noinline declaration here could cause the compiler to optimize away | |
155 | * the function. A global noinline declaration will keep the definition, but may | |
156 | * optimize away the callsite. Therefore, __weak is needed to ensure that the | |
157 | * call is still emitted, by telling the compiler that we don't know what the | |
158 | * function might eventually be. | |
159 | * | |
160 | * __diag_* below are needed to dismiss the missing prototype warning. | |
161 | */ | |
162 | __diag_push(); | |
163 | __diag_ignore_all("-Wmissing-prototypes", | |
164 | "kfuncs which will be used in BPF programs"); | |
165 | ||
166 | __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, | |
167 | struct cgroup *parent, int cpu) | |
168 | { | |
169 | } | |
170 | ||
171 | __diag_pop(); | |
172 | ||
a17556f8 | 173 | /* see cgroup_rstat_flush() */ |
0fa294fb TH |
174 | static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) |
175 | __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) | |
a17556f8 TH |
176 | { |
177 | int cpu; | |
178 | ||
0fa294fb | 179 | lockdep_assert_held(&cgroup_rstat_lock); |
a17556f8 TH |
180 | |
181 | for_each_possible_cpu(cpu) { | |
182 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, | |
183 | cpu); | |
184 | struct cgroup *pos = NULL; | |
b1e2c8df | 185 | unsigned long flags; |
a17556f8 | 186 | |
b1e2c8df SAS |
187 | /* |
188 | * The _irqsave() is needed because cgroup_rstat_lock is | |
189 | * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring | |
190 | * this lock with the _irq() suffix only disables interrupts on | |
191 | * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables | |
192 | * interrupts on both configurations. The _irqsave() ensures | |
193 | * that interrupts are always disabled and later restored. | |
194 | */ | |
195 | raw_spin_lock_irqsave(cpu_lock, flags); | |
8f53470b TH |
196 | while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { |
197 | struct cgroup_subsys_state *css; | |
198 | ||
a17556f8 | 199 | cgroup_base_stat_flush(pos, cpu); |
a319185b | 200 | bpf_rstat_flush(pos, cgroup_parent(pos), cpu); |
8f53470b TH |
201 | |
202 | rcu_read_lock(); | |
203 | list_for_each_entry_rcu(css, &pos->rstat_css_list, | |
204 | rstat_css_node) | |
205 | css->ss->css_rstat_flush(css, cpu); | |
206 | rcu_read_unlock(); | |
207 | } | |
b1e2c8df | 208 | raw_spin_unlock_irqrestore(cpu_lock, flags); |
0fa294fb TH |
209 | |
210 | /* if @may_sleep, play nice and yield if necessary */ | |
211 | if (may_sleep && (need_resched() || | |
212 | spin_needbreak(&cgroup_rstat_lock))) { | |
213 | spin_unlock_irq(&cgroup_rstat_lock); | |
214 | if (!cond_resched()) | |
215 | cpu_relax(); | |
216 | spin_lock_irq(&cgroup_rstat_lock); | |
217 | } | |
a17556f8 TH |
218 | } |
219 | } | |
220 | ||
221 | /** | |
222 | * cgroup_rstat_flush - flush stats in @cgrp's subtree | |
223 | * @cgrp: target cgroup | |
224 | * | |
225 | * Collect all per-cpu stats in @cgrp's subtree into the global counters | |
226 | * and propagate them upwards. After this function returns, all cgroups in | |
227 | * the subtree have up-to-date ->stat. | |
228 | * | |
229 | * This also gets all cgroups in the subtree including @cgrp off the | |
230 | * ->updated_children lists. | |
0fa294fb TH |
231 | * |
232 | * This function may block. | |
a17556f8 | 233 | */ |
400031e0 | 234 | __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) |
a17556f8 | 235 | { |
0fa294fb TH |
236 | might_sleep(); |
237 | ||
238 | spin_lock_irq(&cgroup_rstat_lock); | |
239 | cgroup_rstat_flush_locked(cgrp, true); | |
240 | spin_unlock_irq(&cgroup_rstat_lock); | |
241 | } | |
242 | ||
243 | /** | |
244 | * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush() | |
245 | * @cgrp: target cgroup | |
246 | * | |
247 | * This function can be called from any context. | |
248 | */ | |
249 | void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp) | |
250 | { | |
251 | unsigned long flags; | |
252 | ||
253 | spin_lock_irqsave(&cgroup_rstat_lock, flags); | |
254 | cgroup_rstat_flush_locked(cgrp, false); | |
255 | spin_unlock_irqrestore(&cgroup_rstat_lock, flags); | |
a17556f8 TH |
256 | } |
257 | ||
6162cef0 | 258 | /** |
2ca11b0e | 259 | * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold |
6162cef0 TH |
260 | * @cgrp: target cgroup |
261 | * | |
262 | * Flush stats in @cgrp's subtree and prevent further flushes. Must be | |
263 | * paired with cgroup_rstat_flush_release(). | |
0fa294fb TH |
264 | * |
265 | * This function may block. | |
6162cef0 TH |
266 | */ |
267 | void cgroup_rstat_flush_hold(struct cgroup *cgrp) | |
0fa294fb | 268 | __acquires(&cgroup_rstat_lock) |
6162cef0 | 269 | { |
0fa294fb TH |
270 | might_sleep(); |
271 | spin_lock_irq(&cgroup_rstat_lock); | |
272 | cgroup_rstat_flush_locked(cgrp, true); | |
6162cef0 TH |
273 | } |
274 | ||
275 | /** | |
276 | * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() | |
277 | */ | |
278 | void cgroup_rstat_flush_release(void) | |
0fa294fb | 279 | __releases(&cgroup_rstat_lock) |
6162cef0 | 280 | { |
0fa294fb | 281 | spin_unlock_irq(&cgroup_rstat_lock); |
6162cef0 TH |
282 | } |
283 | ||
a17556f8 TH |
284 | int cgroup_rstat_init(struct cgroup *cgrp) |
285 | { | |
286 | int cpu; | |
287 | ||
288 | /* the root cgrp has rstat_cpu preallocated */ | |
289 | if (!cgrp->rstat_cpu) { | |
290 | cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); | |
291 | if (!cgrp->rstat_cpu) | |
292 | return -ENOMEM; | |
293 | } | |
294 | ||
295 | /* ->updated_children list is self terminated */ | |
296 | for_each_possible_cpu(cpu) { | |
297 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | |
298 | ||
299 | rstatc->updated_children = cgrp; | |
300 | u64_stats_init(&rstatc->bsync); | |
301 | } | |
302 | ||
303 | return 0; | |
304 | } | |
305 | ||
306 | void cgroup_rstat_exit(struct cgroup *cgrp) | |
307 | { | |
308 | int cpu; | |
309 | ||
310 | cgroup_rstat_flush(cgrp); | |
311 | ||
312 | /* sanity check */ | |
313 | for_each_possible_cpu(cpu) { | |
314 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | |
315 | ||
316 | if (WARN_ON_ONCE(rstatc->updated_children != cgrp) || | |
317 | WARN_ON_ONCE(rstatc->updated_next)) | |
318 | return; | |
319 | } | |
320 | ||
321 | free_percpu(cgrp->rstat_cpu); | |
322 | cgrp->rstat_cpu = NULL; | |
323 | } | |
324 | ||
325 | void __init cgroup_rstat_boot(void) | |
326 | { | |
327 | int cpu; | |
328 | ||
329 | for_each_possible_cpu(cpu) | |
330 | raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu)); | |
a17556f8 TH |
331 | } |
332 | ||
333 | /* | |
334 | * Functions for cgroup basic resource statistics implemented on top of | |
335 | * rstat. | |
336 | */ | |
1bb5ec2e TH |
337 | static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, |
338 | struct cgroup_base_stat *src_bstat) | |
041cd640 | 339 | { |
d4ff749b TH |
340 | dst_bstat->cputime.utime += src_bstat->cputime.utime; |
341 | dst_bstat->cputime.stime += src_bstat->cputime.stime; | |
342 | dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime; | |
1fcf54de JD |
343 | #ifdef CONFIG_SCHED_CORE |
344 | dst_bstat->forceidle_sum += src_bstat->forceidle_sum; | |
345 | #endif | |
041cd640 TH |
346 | } |
347 | ||
1bb5ec2e TH |
348 | static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, |
349 | struct cgroup_base_stat *src_bstat) | |
350 | { | |
351 | dst_bstat->cputime.utime -= src_bstat->cputime.utime; | |
352 | dst_bstat->cputime.stime -= src_bstat->cputime.stime; | |
353 | dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime; | |
1fcf54de JD |
354 | #ifdef CONFIG_SCHED_CORE |
355 | dst_bstat->forceidle_sum -= src_bstat->forceidle_sum; | |
356 | #endif | |
1bb5ec2e TH |
357 | } |
358 | ||
d4ff749b | 359 | static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) |
041cd640 | 360 | { |
c58632b3 | 361 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); |
dc26532a | 362 | struct cgroup *parent = cgroup_parent(cgrp); |
95b99f35 | 363 | struct cgroup_base_stat delta; |
041cd640 TH |
364 | unsigned seq; |
365 | ||
dc26532a JW |
366 | /* Root-level stats are sourced from system-wide CPU stats */ |
367 | if (!parent) | |
368 | return; | |
369 | ||
041cd640 TH |
370 | /* fetch the current per-cpu values */ |
371 | do { | |
d4ff749b | 372 | seq = __u64_stats_fetch_begin(&rstatc->bsync); |
95b99f35 | 373 | delta = rstatc->bstat; |
d4ff749b | 374 | } while (__u64_stats_fetch_retry(&rstatc->bsync, seq)); |
041cd640 | 375 | |
1bb5ec2e | 376 | /* propagate percpu delta to global */ |
1bb5ec2e TH |
377 | cgroup_base_stat_sub(&delta, &rstatc->last_bstat); |
378 | cgroup_base_stat_add(&cgrp->bstat, &delta); | |
379 | cgroup_base_stat_add(&rstatc->last_bstat, &delta); | |
380 | ||
dc26532a JW |
381 | /* propagate global delta to parent (unless that's root) */ |
382 | if (cgroup_parent(parent)) { | |
1bb5ec2e TH |
383 | delta = cgrp->bstat; |
384 | cgroup_base_stat_sub(&delta, &cgrp->last_bstat); | |
385 | cgroup_base_stat_add(&parent->bstat, &delta); | |
386 | cgroup_base_stat_add(&cgrp->last_bstat, &delta); | |
387 | } | |
041cd640 TH |
388 | } |
389 | ||
c58632b3 | 390 | static struct cgroup_rstat_cpu * |
c3df5fb5 | 391 | cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) |
041cd640 | 392 | { |
c58632b3 | 393 | struct cgroup_rstat_cpu *rstatc; |
041cd640 | 394 | |
c58632b3 | 395 | rstatc = get_cpu_ptr(cgrp->rstat_cpu); |
c3df5fb5 | 396 | *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); |
c58632b3 | 397 | return rstatc; |
041cd640 TH |
398 | } |
399 | ||
d4ff749b | 400 | static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, |
c3df5fb5 TH |
401 | struct cgroup_rstat_cpu *rstatc, |
402 | unsigned long flags) | |
041cd640 | 403 | { |
c3df5fb5 | 404 | u64_stats_update_end_irqrestore(&rstatc->bsync, flags); |
6162cef0 | 405 | cgroup_rstat_updated(cgrp, smp_processor_id()); |
c58632b3 | 406 | put_cpu_ptr(rstatc); |
041cd640 TH |
407 | } |
408 | ||
409 | void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) | |
410 | { | |
c58632b3 | 411 | struct cgroup_rstat_cpu *rstatc; |
c3df5fb5 | 412 | unsigned long flags; |
041cd640 | 413 | |
c3df5fb5 | 414 | rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); |
d4ff749b | 415 | rstatc->bstat.cputime.sum_exec_runtime += delta_exec; |
c3df5fb5 | 416 | cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); |
041cd640 TH |
417 | } |
418 | ||
419 | void __cgroup_account_cputime_field(struct cgroup *cgrp, | |
420 | enum cpu_usage_stat index, u64 delta_exec) | |
421 | { | |
c58632b3 | 422 | struct cgroup_rstat_cpu *rstatc; |
c3df5fb5 | 423 | unsigned long flags; |
041cd640 | 424 | |
c3df5fb5 | 425 | rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); |
041cd640 TH |
426 | |
427 | switch (index) { | |
428 | case CPUTIME_USER: | |
429 | case CPUTIME_NICE: | |
d4ff749b | 430 | rstatc->bstat.cputime.utime += delta_exec; |
041cd640 TH |
431 | break; |
432 | case CPUTIME_SYSTEM: | |
433 | case CPUTIME_IRQ: | |
434 | case CPUTIME_SOFTIRQ: | |
d4ff749b | 435 | rstatc->bstat.cputime.stime += delta_exec; |
041cd640 | 436 | break; |
1fcf54de JD |
437 | #ifdef CONFIG_SCHED_CORE |
438 | case CPUTIME_FORCEIDLE: | |
439 | rstatc->bstat.forceidle_sum += delta_exec; | |
440 | break; | |
441 | #endif | |
041cd640 TH |
442 | default: |
443 | break; | |
444 | } | |
445 | ||
c3df5fb5 | 446 | cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); |
041cd640 TH |
447 | } |
448 | ||
936f2a70 BB |
449 | /* |
450 | * compute the cputime for the root cgroup by getting the per cpu data | |
451 | * at a global level, then categorizing the fields in a manner consistent | |
452 | * with how it is done by __cgroup_account_cputime_field for each bit of | |
453 | * cpu time attributed to a cgroup. | |
454 | */ | |
1fcf54de | 455 | static void root_cgroup_cputime(struct cgroup_base_stat *bstat) |
936f2a70 | 456 | { |
1fcf54de | 457 | struct task_cputime *cputime = &bstat->cputime; |
936f2a70 BB |
458 | int i; |
459 | ||
460 | cputime->stime = 0; | |
461 | cputime->utime = 0; | |
462 | cputime->sum_exec_runtime = 0; | |
463 | for_each_possible_cpu(i) { | |
464 | struct kernel_cpustat kcpustat; | |
465 | u64 *cpustat = kcpustat.cpustat; | |
466 | u64 user = 0; | |
467 | u64 sys = 0; | |
468 | ||
469 | kcpustat_cpu_fetch(&kcpustat, i); | |
470 | ||
471 | user += cpustat[CPUTIME_USER]; | |
472 | user += cpustat[CPUTIME_NICE]; | |
473 | cputime->utime += user; | |
474 | ||
475 | sys += cpustat[CPUTIME_SYSTEM]; | |
476 | sys += cpustat[CPUTIME_IRQ]; | |
477 | sys += cpustat[CPUTIME_SOFTIRQ]; | |
478 | cputime->stime += sys; | |
479 | ||
480 | cputime->sum_exec_runtime += user; | |
481 | cputime->sum_exec_runtime += sys; | |
482 | cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; | |
1fcf54de JD |
483 | |
484 | #ifdef CONFIG_SCHED_CORE | |
485 | bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; | |
486 | #endif | |
936f2a70 BB |
487 | } |
488 | } | |
489 | ||
d4ff749b | 490 | void cgroup_base_stat_cputime_show(struct seq_file *seq) |
041cd640 TH |
491 | { |
492 | struct cgroup *cgrp = seq_css(seq)->cgroup; | |
493 | u64 usage, utime, stime; | |
1fcf54de JD |
494 | struct cgroup_base_stat bstat; |
495 | #ifdef CONFIG_SCHED_CORE | |
496 | u64 forceidle_time; | |
497 | #endif | |
936f2a70 BB |
498 | |
499 | if (cgroup_parent(cgrp)) { | |
500 | cgroup_rstat_flush_hold(cgrp); | |
501 | usage = cgrp->bstat.cputime.sum_exec_runtime; | |
502 | cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, | |
503 | &utime, &stime); | |
1fcf54de JD |
504 | #ifdef CONFIG_SCHED_CORE |
505 | forceidle_time = cgrp->bstat.forceidle_sum; | |
506 | #endif | |
936f2a70 BB |
507 | cgroup_rstat_flush_release(); |
508 | } else { | |
1fcf54de JD |
509 | root_cgroup_cputime(&bstat); |
510 | usage = bstat.cputime.sum_exec_runtime; | |
511 | utime = bstat.cputime.utime; | |
512 | stime = bstat.cputime.stime; | |
513 | #ifdef CONFIG_SCHED_CORE | |
514 | forceidle_time = bstat.forceidle_sum; | |
515 | #endif | |
936f2a70 | 516 | } |
041cd640 TH |
517 | |
518 | do_div(usage, NSEC_PER_USEC); | |
519 | do_div(utime, NSEC_PER_USEC); | |
520 | do_div(stime, NSEC_PER_USEC); | |
1fcf54de JD |
521 | #ifdef CONFIG_SCHED_CORE |
522 | do_div(forceidle_time, NSEC_PER_USEC); | |
523 | #endif | |
041cd640 | 524 | |
d41bf8c9 TH |
525 | seq_printf(seq, "usage_usec %llu\n" |
526 | "user_usec %llu\n" | |
527 | "system_usec %llu\n", | |
528 | usage, utime, stime); | |
1fcf54de JD |
529 | |
530 | #ifdef CONFIG_SCHED_CORE | |
531 | seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time); | |
532 | #endif | |
041cd640 | 533 | } |
a319185b YA |
534 | |
535 | /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ | |
536 | BTF_SET8_START(bpf_rstat_kfunc_ids) | |
537 | BTF_ID_FLAGS(func, cgroup_rstat_updated) | |
538 | BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE) | |
539 | BTF_SET8_END(bpf_rstat_kfunc_ids) | |
540 | ||
541 | static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { | |
542 | .owner = THIS_MODULE, | |
543 | .set = &bpf_rstat_kfunc_ids, | |
544 | }; | |
545 | ||
546 | static int __init bpf_rstat_kfunc_init(void) | |
547 | { | |
548 | return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, | |
549 | &bpf_rstat_kfunc_set); | |
550 | } | |
551 | late_initcall(bpf_rstat_kfunc_init); |