Merge branch 'linus' into xen-64bit
[linux-2.6-block.git] / kernel / trace / trace_sched_wakeup.c
CommitLineData
352ad25a
SR
1/*
2 * trace task wakeup timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h>
16#include <linux/uaccess.h>
17#include <linux/ftrace.h>
5b82a1b0 18#include <linux/marker.h>
352ad25a
SR
19
20#include "trace.h"
21
22static struct trace_array *wakeup_trace;
23static int __read_mostly tracer_enabled;
24
25static struct task_struct *wakeup_task;
26static int wakeup_cpu;
27static unsigned wakeup_prio = -1;
28
29static DEFINE_SPINLOCK(wakeup_lock);
30
e309b41d 31static void __wakeup_reset(struct trace_array *tr);
352ad25a 32
7e18d8e7
SR
33#ifdef CONFIG_FTRACE
34/*
35 * irqsoff uses its own tracer function to keep the overhead down:
36 */
37static void
38wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
39{
40 struct trace_array *tr = wakeup_trace;
41 struct trace_array_cpu *data;
42 unsigned long flags;
43 long disabled;
44 int resched;
45 int cpu;
46
47 if (likely(!wakeup_task))
48 return;
49
50 resched = need_resched();
51 preempt_disable_notrace();
52
53 cpu = raw_smp_processor_id();
54 data = tr->data[cpu];
55 disabled = atomic_inc_return(&data->disabled);
56 if (unlikely(disabled != 1))
57 goto out;
58
59 spin_lock_irqsave(&wakeup_lock, flags);
60
61 if (unlikely(!wakeup_task))
62 goto unlock;
63
64 /*
65 * The task can't disappear because it needs to
66 * wake up first, and we have the wakeup_lock.
67 */
68 if (task_cpu(wakeup_task) != cpu)
69 goto unlock;
70
71 trace_function(tr, data, ip, parent_ip, flags);
72
73 unlock:
74 spin_unlock_irqrestore(&wakeup_lock, flags);
75
76 out:
77 atomic_dec(&data->disabled);
78
79 /*
80 * To prevent recursion from the scheduler, if the
81 * resched flag was set before we entered, then
82 * don't reschedule.
83 */
84 if (resched)
85 preempt_enable_no_resched_notrace();
86 else
87 preempt_enable_notrace();
88}
89
90static struct ftrace_ops trace_ops __read_mostly =
91{
92 .func = wakeup_tracer_call,
93};
94#endif /* CONFIG_FTRACE */
95
352ad25a
SR
96/*
97 * Should this new latency be reported/recorded?
98 */
e309b41d 99static int report_latency(cycle_t delta)
352ad25a
SR
100{
101 if (tracing_thresh) {
102 if (delta < tracing_thresh)
103 return 0;
104 } else {
105 if (delta <= tracing_max_latency)
106 return 0;
107 }
108 return 1;
109}
110
5b82a1b0
MD
111static void notrace
112wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
113 struct task_struct *next)
352ad25a
SR
114{
115 unsigned long latency = 0, t0 = 0, t1 = 0;
5b82a1b0
MD
116 struct trace_array **ptr = private;
117 struct trace_array *tr = *ptr;
352ad25a
SR
118 struct trace_array_cpu *data;
119 cycle_t T0, T1, delta;
120 unsigned long flags;
121 long disabled;
122 int cpu;
123
124 if (unlikely(!tracer_enabled))
125 return;
126
127 /*
128 * When we start a new trace, we set wakeup_task to NULL
129 * and then set tracer_enabled = 1. We want to make sure
130 * that another CPU does not see the tracer_enabled = 1
131 * and the wakeup_task with an older task, that might
132 * actually be the same as next.
133 */
134 smp_rmb();
135
136 if (next != wakeup_task)
137 return;
138
7e18d8e7 139 /* The task we are waiting for is waking up */
352ad25a
SR
140 data = tr->data[wakeup_cpu];
141
142 /* disable local data, not wakeup_cpu data */
143 cpu = raw_smp_processor_id();
144 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
145 if (likely(disabled != 1))
146 goto out;
147
148 spin_lock_irqsave(&wakeup_lock, flags);
149
150 /* We could race with grabbing wakeup_lock */
151 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock;
153
6fb44b71 154 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
352ad25a
SR
155
156 /*
157 * usecs conversion is slow so we try to delay the conversion
158 * as long as possible:
159 */
160 T0 = data->preempt_timestamp;
750ed1a4 161 T1 = ftrace_now(cpu);
352ad25a
SR
162 delta = T1-T0;
163
164 if (!report_latency(delta))
165 goto out_unlock;
166
167 latency = nsecs_to_usecs(delta);
168
169 tracing_max_latency = delta;
170 t0 = nsecs_to_usecs(T0);
171 t1 = nsecs_to_usecs(T1);
172
173 update_max_tr(tr, wakeup_task, wakeup_cpu);
174
352ad25a
SR
175out_unlock:
176 __wakeup_reset(tr);
177 spin_unlock_irqrestore(&wakeup_lock, flags);
178out:
179 atomic_dec(&tr->data[cpu]->disabled);
180}
181
5b82a1b0
MD
182static notrace void
183sched_switch_callback(void *probe_data, void *call_data,
184 const char *format, va_list *args)
185{
186 struct task_struct *prev;
187 struct task_struct *next;
188 struct rq *__rq;
189
190 /* skip prev_pid %d next_pid %d prev_state %ld */
191 (void)va_arg(*args, int);
192 (void)va_arg(*args, int);
193 (void)va_arg(*args, long);
194 __rq = va_arg(*args, typeof(__rq));
195 prev = va_arg(*args, typeof(prev));
196 next = va_arg(*args, typeof(next));
197
198 tracing_record_cmdline(prev);
199
200 /*
201 * If tracer_switch_func only points to the local
202 * switch func, it still needs the ptr passed to it.
203 */
204 wakeup_sched_switch(probe_data, __rq, prev, next);
205}
206
e309b41d 207static void __wakeup_reset(struct trace_array *tr)
352ad25a
SR
208{
209 struct trace_array_cpu *data;
210 int cpu;
211
212 assert_spin_locked(&wakeup_lock);
213
214 for_each_possible_cpu(cpu) {
215 data = tr->data[cpu];
216 tracing_reset(data);
217 }
218
219 wakeup_cpu = -1;
220 wakeup_prio = -1;
221
222 if (wakeup_task)
223 put_task_struct(wakeup_task);
224
225 wakeup_task = NULL;
226}
227
e309b41d 228static void wakeup_reset(struct trace_array *tr)
352ad25a
SR
229{
230 unsigned long flags;
231
232 spin_lock_irqsave(&wakeup_lock, flags);
233 __wakeup_reset(tr);
234 spin_unlock_irqrestore(&wakeup_lock, flags);
235}
236
e309b41d 237static void
352ad25a
SR
238wakeup_check_start(struct trace_array *tr, struct task_struct *p,
239 struct task_struct *curr)
240{
241 int cpu = smp_processor_id();
242 unsigned long flags;
243 long disabled;
244
245 if (likely(!rt_task(p)) ||
246 p->prio >= wakeup_prio ||
247 p->prio >= curr->prio)
248 return;
249
250 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
251 if (unlikely(disabled != 1))
252 goto out;
253
254 /* interrupts should be off from try_to_wake_up */
255 spin_lock(&wakeup_lock);
256
257 /* check for races. */
258 if (!tracer_enabled || p->prio >= wakeup_prio)
259 goto out_locked;
260
261 /* reset the trace */
262 __wakeup_reset(tr);
263
264 wakeup_cpu = task_cpu(p);
265 wakeup_prio = p->prio;
266
267 wakeup_task = p;
268 get_task_struct(wakeup_task);
269
270 local_save_flags(flags);
271
750ed1a4 272 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
6fb44b71
SR
273 trace_function(tr, tr->data[wakeup_cpu],
274 CALLER_ADDR1, CALLER_ADDR2, flags);
352ad25a
SR
275
276out_locked:
277 spin_unlock(&wakeup_lock);
278out:
279 atomic_dec(&tr->data[cpu]->disabled);
280}
281
5b82a1b0
MD
282static notrace void
283wake_up_callback(void *probe_data, void *call_data,
284 const char *format, va_list *args)
352ad25a 285{
5b82a1b0
MD
286 struct trace_array **ptr = probe_data;
287 struct trace_array *tr = *ptr;
288 struct task_struct *curr;
289 struct task_struct *task;
290 struct rq *__rq;
291
352ad25a
SR
292 if (likely(!tracer_enabled))
293 return;
294
5b82a1b0
MD
295 /* Skip pid %d state %ld */
296 (void)va_arg(*args, int);
297 (void)va_arg(*args, long);
298 /* now get the meat: "rq %p task %p rq->curr %p" */
299 __rq = va_arg(*args, typeof(__rq));
300 task = va_arg(*args, typeof(task));
301 curr = va_arg(*args, typeof(curr));
302
303 tracing_record_cmdline(task);
8ac0fca4 304 tracing_record_cmdline(curr);
352ad25a 305
5b82a1b0 306 wakeup_check_start(tr, task, curr);
352ad25a
SR
307}
308
e309b41d 309static void start_wakeup_tracer(struct trace_array *tr)
352ad25a 310{
5b82a1b0
MD
311 int ret;
312
313 ret = marker_probe_register("kernel_sched_wakeup",
314 "pid %d state %ld ## rq %p task %p rq->curr %p",
315 wake_up_callback,
316 &wakeup_trace);
317 if (ret) {
318 pr_info("wakeup trace: Couldn't add marker"
319 " probe to kernel_sched_wakeup\n");
320 return;
321 }
322
323 ret = marker_probe_register("kernel_sched_wakeup_new",
324 "pid %d state %ld ## rq %p task %p rq->curr %p",
325 wake_up_callback,
326 &wakeup_trace);
327 if (ret) {
328 pr_info("wakeup trace: Couldn't add marker"
329 " probe to kernel_sched_wakeup_new\n");
330 goto fail_deprobe;
331 }
332
333 ret = marker_probe_register("kernel_sched_schedule",
334 "prev_pid %d next_pid %d prev_state %ld "
335 "## rq %p prev %p next %p",
336 sched_switch_callback,
337 &wakeup_trace);
338 if (ret) {
339 pr_info("sched trace: Couldn't add marker"
340 " probe to kernel_sched_schedule\n");
341 goto fail_deprobe_wake_new;
342 }
343
352ad25a
SR
344 wakeup_reset(tr);
345
346 /*
347 * Don't let the tracer_enabled = 1 show up before
348 * the wakeup_task is reset. This may be overkill since
349 * wakeup_reset does a spin_unlock after setting the
350 * wakeup_task to NULL, but I want to be safe.
351 * This is a slow path anyway.
352 */
353 smp_wmb();
354
7e18d8e7 355 register_ftrace_function(&trace_ops);
352ad25a 356
ad591240
SR
357 tracer_enabled = 1;
358
352ad25a 359 return;
5b82a1b0
MD
360fail_deprobe_wake_new:
361 marker_probe_unregister("kernel_sched_wakeup_new",
362 wake_up_callback,
363 &wakeup_trace);
364fail_deprobe:
365 marker_probe_unregister("kernel_sched_wakeup",
366 wake_up_callback,
367 &wakeup_trace);
352ad25a
SR
368}
369
e309b41d 370static void stop_wakeup_tracer(struct trace_array *tr)
352ad25a
SR
371{
372 tracer_enabled = 0;
7e18d8e7 373 unregister_ftrace_function(&trace_ops);
5b82a1b0
MD
374 marker_probe_unregister("kernel_sched_schedule",
375 sched_switch_callback,
376 &wakeup_trace);
377 marker_probe_unregister("kernel_sched_wakeup_new",
378 wake_up_callback,
379 &wakeup_trace);
380 marker_probe_unregister("kernel_sched_wakeup",
381 wake_up_callback,
382 &wakeup_trace);
352ad25a
SR
383}
384
e309b41d 385static void wakeup_tracer_init(struct trace_array *tr)
352ad25a
SR
386{
387 wakeup_trace = tr;
388
389 if (tr->ctrl)
390 start_wakeup_tracer(tr);
391}
392
e309b41d 393static void wakeup_tracer_reset(struct trace_array *tr)
352ad25a
SR
394{
395 if (tr->ctrl) {
396 stop_wakeup_tracer(tr);
397 /* make sure we put back any tasks we are tracing */
398 wakeup_reset(tr);
399 }
400}
401
402static void wakeup_tracer_ctrl_update(struct trace_array *tr)
403{
404 if (tr->ctrl)
405 start_wakeup_tracer(tr);
406 else
407 stop_wakeup_tracer(tr);
408}
409
e309b41d 410static void wakeup_tracer_open(struct trace_iterator *iter)
352ad25a
SR
411{
412 /* stop the trace while dumping */
413 if (iter->tr->ctrl)
414 stop_wakeup_tracer(iter->tr);
415}
416
e309b41d 417static void wakeup_tracer_close(struct trace_iterator *iter)
352ad25a
SR
418{
419 /* forget about any processes we were recording */
420 if (iter->tr->ctrl)
421 start_wakeup_tracer(iter->tr);
422}
423
424static struct tracer wakeup_tracer __read_mostly =
425{
426 .name = "wakeup",
427 .init = wakeup_tracer_init,
428 .reset = wakeup_tracer_reset,
429 .open = wakeup_tracer_open,
430 .close = wakeup_tracer_close,
431 .ctrl_update = wakeup_tracer_ctrl_update,
432 .print_max = 1,
60a11774
SR
433#ifdef CONFIG_FTRACE_SELFTEST
434 .selftest = trace_selftest_startup_wakeup,
435#endif
352ad25a
SR
436};
437
438__init static int init_wakeup_tracer(void)
439{
440 int ret;
441
442 ret = register_tracer(&wakeup_tracer);
443 if (ret)
444 return ret;
445
446 return 0;
447}
448device_initcall(init_wakeup_tracer);