cgroup: add/update accessors which obtain subsys specific data from css
[linux-2.6-block.git] / kernel / sched / cpuacct.c
1 #include <linux/cgroup.h>
2 #include <linux/slab.h>
3 #include <linux/percpu.h>
4 #include <linux/spinlock.h>
5 #include <linux/cpumask.h>
6 #include <linux/seq_file.h>
7 #include <linux/rcupdate.h>
8 #include <linux/kernel_stat.h>
9 #include <linux/err.h>
10
11 #include "sched.h"
12
13 /*
14  * CPU accounting code for task groups.
15  *
16  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
17  * (balbir@in.ibm.com).
18  */
19
20 /* Time spent by the tasks of the cpu accounting group executing in ... */
21 enum cpuacct_stat_index {
22         CPUACCT_STAT_USER,      /* ... user mode */
23         CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
24
25         CPUACCT_STAT_NSTATS,
26 };
27
28 /* track cpu usage of a group of tasks and its child groups */
29 struct cpuacct {
30         struct cgroup_subsys_state css;
31         /* cpuusage holds pointer to a u64-type object on every cpu */
32         u64 __percpu *cpuusage;
33         struct kernel_cpustat __percpu *cpustat;
34 };
35
36 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
37 {
38         return css ? container_of(css, struct cpuacct, css) : NULL;
39 }
40
41 /* return cpu accounting group corresponding to this container */
42 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
43 {
44         return css_ca(cgroup_css(cgrp, cpuacct_subsys_id));
45 }
46
47 /* return cpu accounting group to which this task belongs */
48 static inline struct cpuacct *task_ca(struct task_struct *tsk)
49 {
50         return css_ca(task_css(tsk, cpuacct_subsys_id));
51 }
52
53 static inline struct cpuacct *__parent_ca(struct cpuacct *ca)
54 {
55         return cgroup_ca(ca->css.cgroup->parent);
56 }
57
58 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
59 {
60         if (!ca->css.cgroup->parent)
61                 return NULL;
62         return cgroup_ca(ca->css.cgroup->parent);
63 }
64
65 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
66 static struct cpuacct root_cpuacct = {
67         .cpustat        = &kernel_cpustat,
68         .cpuusage       = &root_cpuacct_cpuusage,
69 };
70
71 /* create a new cpu accounting group */
72 static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
73 {
74         struct cpuacct *ca;
75
76         if (!cgrp->parent)
77                 return &root_cpuacct.css;
78
79         ca = kzalloc(sizeof(*ca), GFP_KERNEL);
80         if (!ca)
81                 goto out;
82
83         ca->cpuusage = alloc_percpu(u64);
84         if (!ca->cpuusage)
85                 goto out_free_ca;
86
87         ca->cpustat = alloc_percpu(struct kernel_cpustat);
88         if (!ca->cpustat)
89                 goto out_free_cpuusage;
90
91         return &ca->css;
92
93 out_free_cpuusage:
94         free_percpu(ca->cpuusage);
95 out_free_ca:
96         kfree(ca);
97 out:
98         return ERR_PTR(-ENOMEM);
99 }
100
101 /* destroy an existing cpu accounting group */
102 static void cpuacct_css_free(struct cgroup *cgrp)
103 {
104         struct cpuacct *ca = cgroup_ca(cgrp);
105
106         free_percpu(ca->cpustat);
107         free_percpu(ca->cpuusage);
108         kfree(ca);
109 }
110
111 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
112 {
113         u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
114         u64 data;
115
116 #ifndef CONFIG_64BIT
117         /*
118          * Take rq->lock to make 64-bit read safe on 32-bit platforms.
119          */
120         raw_spin_lock_irq(&cpu_rq(cpu)->lock);
121         data = *cpuusage;
122         raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
123 #else
124         data = *cpuusage;
125 #endif
126
127         return data;
128 }
129
130 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
131 {
132         u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
133
134 #ifndef CONFIG_64BIT
135         /*
136          * Take rq->lock to make 64-bit write safe on 32-bit platforms.
137          */
138         raw_spin_lock_irq(&cpu_rq(cpu)->lock);
139         *cpuusage = val;
140         raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
141 #else
142         *cpuusage = val;
143 #endif
144 }
145
146 /* return total cpu usage (in nanoseconds) of a group */
147 static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
148 {
149         struct cpuacct *ca = cgroup_ca(cgrp);
150         u64 totalcpuusage = 0;
151         int i;
152
153         for_each_present_cpu(i)
154                 totalcpuusage += cpuacct_cpuusage_read(ca, i);
155
156         return totalcpuusage;
157 }
158
159 static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
160                                                                 u64 reset)
161 {
162         struct cpuacct *ca = cgroup_ca(cgrp);
163         int err = 0;
164         int i;
165
166         if (reset) {
167                 err = -EINVAL;
168                 goto out;
169         }
170
171         for_each_present_cpu(i)
172                 cpuacct_cpuusage_write(ca, i, 0);
173
174 out:
175         return err;
176 }
177
178 static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
179                                    struct seq_file *m)
180 {
181         struct cpuacct *ca = cgroup_ca(cgroup);
182         u64 percpu;
183         int i;
184
185         for_each_present_cpu(i) {
186                 percpu = cpuacct_cpuusage_read(ca, i);
187                 seq_printf(m, "%llu ", (unsigned long long) percpu);
188         }
189         seq_printf(m, "\n");
190         return 0;
191 }
192
193 static const char * const cpuacct_stat_desc[] = {
194         [CPUACCT_STAT_USER] = "user",
195         [CPUACCT_STAT_SYSTEM] = "system",
196 };
197
198 static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
199                               struct cgroup_map_cb *cb)
200 {
201         struct cpuacct *ca = cgroup_ca(cgrp);
202         int cpu;
203         s64 val = 0;
204
205         for_each_online_cpu(cpu) {
206                 struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
207                 val += kcpustat->cpustat[CPUTIME_USER];
208                 val += kcpustat->cpustat[CPUTIME_NICE];
209         }
210         val = cputime64_to_clock_t(val);
211         cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
212
213         val = 0;
214         for_each_online_cpu(cpu) {
215                 struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
216                 val += kcpustat->cpustat[CPUTIME_SYSTEM];
217                 val += kcpustat->cpustat[CPUTIME_IRQ];
218                 val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
219         }
220
221         val = cputime64_to_clock_t(val);
222         cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
223
224         return 0;
225 }
226
227 static struct cftype files[] = {
228         {
229                 .name = "usage",
230                 .read_u64 = cpuusage_read,
231                 .write_u64 = cpuusage_write,
232         },
233         {
234                 .name = "usage_percpu",
235                 .read_seq_string = cpuacct_percpu_seq_read,
236         },
237         {
238                 .name = "stat",
239                 .read_map = cpuacct_stats_show,
240         },
241         { }     /* terminate */
242 };
243
244 /*
245  * charge this task's execution time to its accounting group.
246  *
247  * called with rq->lock held.
248  */
249 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
250 {
251         struct cpuacct *ca;
252         int cpu;
253
254         cpu = task_cpu(tsk);
255
256         rcu_read_lock();
257
258         ca = task_ca(tsk);
259
260         while (true) {
261                 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
262                 *cpuusage += cputime;
263
264                 ca = parent_ca(ca);
265                 if (!ca)
266                         break;
267         }
268
269         rcu_read_unlock();
270 }
271
272 /*
273  * Add user/system time to cpuacct.
274  *
275  * Note: it's the caller that updates the account of the root cgroup.
276  */
277 void cpuacct_account_field(struct task_struct *p, int index, u64 val)
278 {
279         struct kernel_cpustat *kcpustat;
280         struct cpuacct *ca;
281
282         rcu_read_lock();
283         ca = task_ca(p);
284         while (ca != &root_cpuacct) {
285                 kcpustat = this_cpu_ptr(ca->cpustat);
286                 kcpustat->cpustat[index] += val;
287                 ca = __parent_ca(ca);
288         }
289         rcu_read_unlock();
290 }
291
292 struct cgroup_subsys cpuacct_subsys = {
293         .name           = "cpuacct",
294         .css_alloc      = cpuacct_css_alloc,
295         .css_free       = cpuacct_css_free,
296         .subsys_id      = cpuacct_subsys_id,
297         .base_cftypes   = files,
298         .early_init     = 1,
299 };