Commit | Line | Data |
---|---|---|
2e76c24d LZ |
1 | #include <linux/cgroup.h> |
2 | #include <linux/slab.h> | |
3 | #include <linux/percpu.h> | |
4 | #include <linux/spinlock.h> | |
5 | #include <linux/cpumask.h> | |
6 | #include <linux/seq_file.h> | |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/kernel_stat.h> | |
b329fd5b | 9 | #include <linux/err.h> |
2e76c24d LZ |
10 | |
11 | #include "sched.h" | |
12 | ||
13 | /* | |
14 | * CPU accounting code for task groups. | |
15 | * | |
16 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | |
17 | * (balbir@in.ibm.com). | |
18 | */ | |
19 | ||
d1712796 LZ |
20 | /* Time spent by the tasks of the cpu accounting group executing in ... */ |
21 | enum cpuacct_stat_index { | |
22 | CPUACCT_STAT_USER, /* ... user mode */ | |
23 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | |
24 | ||
25 | CPUACCT_STAT_NSTATS, | |
26 | }; | |
27 | ||
28 | /* track cpu usage of a group of tasks and its child groups */ | |
29 | struct cpuacct { | |
30 | struct cgroup_subsys_state css; | |
31 | /* cpuusage holds pointer to a u64-type object on every cpu */ | |
32 | u64 __percpu *cpuusage; | |
33 | struct kernel_cpustat __percpu *cpustat; | |
34 | }; | |
35 | ||
a7c6d554 TH |
36 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
37 | { | |
38 | return css ? container_of(css, struct cpuacct, css) : NULL; | |
39 | } | |
40 | ||
d1712796 LZ |
41 | /* return cpu accounting group corresponding to this container */ |
42 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | |
43 | { | |
a7c6d554 | 44 | return css_ca(cgroup_css(cgrp, cpuacct_subsys_id)); |
d1712796 LZ |
45 | } |
46 | ||
47 | /* return cpu accounting group to which this task belongs */ | |
48 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | |
49 | { | |
a7c6d554 | 50 | return css_ca(task_css(tsk, cpuacct_subsys_id)); |
d1712796 LZ |
51 | } |
52 | ||
d1712796 LZ |
53 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
54 | { | |
63876986 | 55 | return css_ca(css_parent(&ca->css)); |
d1712796 LZ |
56 | } |
57 | ||
7943e15a | 58 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
14c6d3c8 LZ |
59 | static struct cpuacct root_cpuacct = { |
60 | .cpustat = &kernel_cpustat, | |
61 | .cpuusage = &root_cpuacct_cpuusage, | |
62 | }; | |
2e76c24d LZ |
63 | |
64 | /* create a new cpu accounting group */ | |
eb95419b TH |
65 | static struct cgroup_subsys_state * |
66 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) | |
2e76c24d LZ |
67 | { |
68 | struct cpuacct *ca; | |
69 | ||
eb95419b | 70 | if (!parent_css) |
2e76c24d LZ |
71 | return &root_cpuacct.css; |
72 | ||
73 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | |
74 | if (!ca) | |
75 | goto out; | |
76 | ||
77 | ca->cpuusage = alloc_percpu(u64); | |
78 | if (!ca->cpuusage) | |
79 | goto out_free_ca; | |
80 | ||
81 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | |
82 | if (!ca->cpustat) | |
83 | goto out_free_cpuusage; | |
84 | ||
85 | return &ca->css; | |
86 | ||
87 | out_free_cpuusage: | |
88 | free_percpu(ca->cpuusage); | |
89 | out_free_ca: | |
90 | kfree(ca); | |
91 | out: | |
92 | return ERR_PTR(-ENOMEM); | |
93 | } | |
94 | ||
95 | /* destroy an existing cpu accounting group */ | |
eb95419b | 96 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
2e76c24d | 97 | { |
eb95419b | 98 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
99 | |
100 | free_percpu(ca->cpustat); | |
101 | free_percpu(ca->cpuusage); | |
102 | kfree(ca); | |
103 | } | |
104 | ||
105 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | |
106 | { | |
107 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
108 | u64 data; | |
109 | ||
110 | #ifndef CONFIG_64BIT | |
111 | /* | |
112 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | |
113 | */ | |
114 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
115 | data = *cpuusage; | |
116 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
117 | #else | |
118 | data = *cpuusage; | |
119 | #endif | |
120 | ||
121 | return data; | |
122 | } | |
123 | ||
124 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |
125 | { | |
126 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
127 | ||
128 | #ifndef CONFIG_64BIT | |
129 | /* | |
130 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | |
131 | */ | |
132 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
133 | *cpuusage = val; | |
134 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
135 | #else | |
136 | *cpuusage = val; | |
137 | #endif | |
138 | } | |
139 | ||
140 | /* return total cpu usage (in nanoseconds) of a group */ | |
141 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | |
142 | { | |
143 | struct cpuacct *ca = cgroup_ca(cgrp); | |
144 | u64 totalcpuusage = 0; | |
145 | int i; | |
146 | ||
147 | for_each_present_cpu(i) | |
148 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | |
149 | ||
150 | return totalcpuusage; | |
151 | } | |
152 | ||
153 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | |
154 | u64 reset) | |
155 | { | |
156 | struct cpuacct *ca = cgroup_ca(cgrp); | |
157 | int err = 0; | |
158 | int i; | |
159 | ||
160 | if (reset) { | |
161 | err = -EINVAL; | |
162 | goto out; | |
163 | } | |
164 | ||
165 | for_each_present_cpu(i) | |
166 | cpuacct_cpuusage_write(ca, i, 0); | |
167 | ||
168 | out: | |
169 | return err; | |
170 | } | |
171 | ||
172 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |
173 | struct seq_file *m) | |
174 | { | |
175 | struct cpuacct *ca = cgroup_ca(cgroup); | |
176 | u64 percpu; | |
177 | int i; | |
178 | ||
179 | for_each_present_cpu(i) { | |
180 | percpu = cpuacct_cpuusage_read(ca, i); | |
181 | seq_printf(m, "%llu ", (unsigned long long) percpu); | |
182 | } | |
183 | seq_printf(m, "\n"); | |
184 | return 0; | |
185 | } | |
186 | ||
187 | static const char * const cpuacct_stat_desc[] = { | |
188 | [CPUACCT_STAT_USER] = "user", | |
189 | [CPUACCT_STAT_SYSTEM] = "system", | |
190 | }; | |
191 | ||
192 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | |
193 | struct cgroup_map_cb *cb) | |
194 | { | |
195 | struct cpuacct *ca = cgroup_ca(cgrp); | |
196 | int cpu; | |
197 | s64 val = 0; | |
198 | ||
199 | for_each_online_cpu(cpu) { | |
200 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | |
201 | val += kcpustat->cpustat[CPUTIME_USER]; | |
202 | val += kcpustat->cpustat[CPUTIME_NICE]; | |
203 | } | |
204 | val = cputime64_to_clock_t(val); | |
205 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | |
206 | ||
207 | val = 0; | |
208 | for_each_online_cpu(cpu) { | |
209 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | |
210 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | |
211 | val += kcpustat->cpustat[CPUTIME_IRQ]; | |
212 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | |
213 | } | |
214 | ||
215 | val = cputime64_to_clock_t(val); | |
216 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | |
217 | ||
218 | return 0; | |
219 | } | |
220 | ||
221 | static struct cftype files[] = { | |
222 | { | |
223 | .name = "usage", | |
224 | .read_u64 = cpuusage_read, | |
225 | .write_u64 = cpuusage_write, | |
226 | }, | |
227 | { | |
228 | .name = "usage_percpu", | |
229 | .read_seq_string = cpuacct_percpu_seq_read, | |
230 | }, | |
231 | { | |
232 | .name = "stat", | |
233 | .read_map = cpuacct_stats_show, | |
234 | }, | |
235 | { } /* terminate */ | |
236 | }; | |
237 | ||
238 | /* | |
239 | * charge this task's execution time to its accounting group. | |
240 | * | |
241 | * called with rq->lock held. | |
242 | */ | |
243 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |
244 | { | |
245 | struct cpuacct *ca; | |
246 | int cpu; | |
247 | ||
2e76c24d LZ |
248 | cpu = task_cpu(tsk); |
249 | ||
250 | rcu_read_lock(); | |
251 | ||
252 | ca = task_ca(tsk); | |
253 | ||
543bc0e7 | 254 | while (true) { |
2e76c24d LZ |
255 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
256 | *cpuusage += cputime; | |
543bc0e7 LZ |
257 | |
258 | ca = parent_ca(ca); | |
259 | if (!ca) | |
260 | break; | |
2e76c24d LZ |
261 | } |
262 | ||
263 | rcu_read_unlock(); | |
264 | } | |
265 | ||
1966aaf7 LZ |
266 | /* |
267 | * Add user/system time to cpuacct. | |
268 | * | |
269 | * Note: it's the caller that updates the account of the root cgroup. | |
270 | */ | |
271 | void cpuacct_account_field(struct task_struct *p, int index, u64 val) | |
272 | { | |
273 | struct kernel_cpustat *kcpustat; | |
274 | struct cpuacct *ca; | |
275 | ||
1966aaf7 LZ |
276 | rcu_read_lock(); |
277 | ca = task_ca(p); | |
5f40d804 | 278 | while (ca != &root_cpuacct) { |
1966aaf7 LZ |
279 | kcpustat = this_cpu_ptr(ca->cpustat); |
280 | kcpustat->cpustat[index] += val; | |
63876986 | 281 | ca = parent_ca(ca); |
1966aaf7 LZ |
282 | } |
283 | rcu_read_unlock(); | |
284 | } | |
285 | ||
2e76c24d | 286 | struct cgroup_subsys cpuacct_subsys = { |
621e2de0 LZ |
287 | .name = "cpuacct", |
288 | .css_alloc = cpuacct_css_alloc, | |
289 | .css_free = cpuacct_css_free, | |
290 | .subsys_id = cpuacct_subsys_id, | |
291 | .base_cftypes = files, | |
292 | .early_init = 1, | |
2e76c24d | 293 | }; |