Commit | Line | Data |
---|---|---|
352ad25a SR |
1 | /* |
2 | * trace task wakeup timings | |
3 | * | |
4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | |
5 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | |
6 | * | |
7 | * Based on code from the latency_tracer, that is: | |
8 | * | |
9 | * Copyright (C) 2004-2006 Ingo Molnar | |
10 | * Copyright (C) 2004 William Lee Irwin III | |
11 | */ | |
12 | #include <linux/module.h> | |
13 | #include <linux/fs.h> | |
14 | #include <linux/debugfs.h> | |
15 | #include <linux/kallsyms.h> | |
16 | #include <linux/uaccess.h> | |
17 | #include <linux/ftrace.h> | |
5b82a1b0 | 18 | #include <linux/marker.h> |
352ad25a SR |
19 | |
20 | #include "trace.h" | |
21 | ||
22 | static struct trace_array *wakeup_trace; | |
23 | static int __read_mostly tracer_enabled; | |
24 | ||
25 | static struct task_struct *wakeup_task; | |
26 | static int wakeup_cpu; | |
27 | static unsigned wakeup_prio = -1; | |
28 | ||
29 | static DEFINE_SPINLOCK(wakeup_lock); | |
30 | ||
e309b41d | 31 | static void __wakeup_reset(struct trace_array *tr); |
352ad25a SR |
32 | |
33 | /* | |
34 | * Should this new latency be reported/recorded? | |
35 | */ | |
e309b41d | 36 | static int report_latency(cycle_t delta) |
352ad25a SR |
37 | { |
38 | if (tracing_thresh) { | |
39 | if (delta < tracing_thresh) | |
40 | return 0; | |
41 | } else { | |
42 | if (delta <= tracing_max_latency) | |
43 | return 0; | |
44 | } | |
45 | return 1; | |
46 | } | |
47 | ||
5b82a1b0 MD |
48 | static void notrace |
49 | wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, | |
50 | struct task_struct *next) | |
352ad25a SR |
51 | { |
52 | unsigned long latency = 0, t0 = 0, t1 = 0; | |
5b82a1b0 MD |
53 | struct trace_array **ptr = private; |
54 | struct trace_array *tr = *ptr; | |
352ad25a SR |
55 | struct trace_array_cpu *data; |
56 | cycle_t T0, T1, delta; | |
57 | unsigned long flags; | |
58 | long disabled; | |
59 | int cpu; | |
60 | ||
61 | if (unlikely(!tracer_enabled)) | |
62 | return; | |
63 | ||
64 | /* | |
65 | * When we start a new trace, we set wakeup_task to NULL | |
66 | * and then set tracer_enabled = 1. We want to make sure | |
67 | * that another CPU does not see the tracer_enabled = 1 | |
68 | * and the wakeup_task with an older task, that might | |
69 | * actually be the same as next. | |
70 | */ | |
71 | smp_rmb(); | |
72 | ||
73 | if (next != wakeup_task) | |
74 | return; | |
75 | ||
76 | /* The task we are waitng for is waking up */ | |
77 | data = tr->data[wakeup_cpu]; | |
78 | ||
79 | /* disable local data, not wakeup_cpu data */ | |
80 | cpu = raw_smp_processor_id(); | |
81 | disabled = atomic_inc_return(&tr->data[cpu]->disabled); | |
82 | if (likely(disabled != 1)) | |
83 | goto out; | |
84 | ||
85 | spin_lock_irqsave(&wakeup_lock, flags); | |
86 | ||
87 | /* We could race with grabbing wakeup_lock */ | |
88 | if (unlikely(!tracer_enabled || next != wakeup_task)) | |
89 | goto out_unlock; | |
90 | ||
6fb44b71 | 91 | trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); |
352ad25a SR |
92 | |
93 | /* | |
94 | * usecs conversion is slow so we try to delay the conversion | |
95 | * as long as possible: | |
96 | */ | |
97 | T0 = data->preempt_timestamp; | |
750ed1a4 | 98 | T1 = ftrace_now(cpu); |
352ad25a SR |
99 | delta = T1-T0; |
100 | ||
101 | if (!report_latency(delta)) | |
102 | goto out_unlock; | |
103 | ||
104 | latency = nsecs_to_usecs(delta); | |
105 | ||
106 | tracing_max_latency = delta; | |
107 | t0 = nsecs_to_usecs(T0); | |
108 | t1 = nsecs_to_usecs(T1); | |
109 | ||
110 | update_max_tr(tr, wakeup_task, wakeup_cpu); | |
111 | ||
352ad25a SR |
112 | out_unlock: |
113 | __wakeup_reset(tr); | |
114 | spin_unlock_irqrestore(&wakeup_lock, flags); | |
115 | out: | |
116 | atomic_dec(&tr->data[cpu]->disabled); | |
117 | } | |
118 | ||
5b82a1b0 MD |
119 | static notrace void |
120 | sched_switch_callback(void *probe_data, void *call_data, | |
121 | const char *format, va_list *args) | |
122 | { | |
123 | struct task_struct *prev; | |
124 | struct task_struct *next; | |
125 | struct rq *__rq; | |
126 | ||
127 | /* skip prev_pid %d next_pid %d prev_state %ld */ | |
128 | (void)va_arg(*args, int); | |
129 | (void)va_arg(*args, int); | |
130 | (void)va_arg(*args, long); | |
131 | __rq = va_arg(*args, typeof(__rq)); | |
132 | prev = va_arg(*args, typeof(prev)); | |
133 | next = va_arg(*args, typeof(next)); | |
134 | ||
135 | tracing_record_cmdline(prev); | |
136 | ||
137 | /* | |
138 | * If tracer_switch_func only points to the local | |
139 | * switch func, it still needs the ptr passed to it. | |
140 | */ | |
141 | wakeup_sched_switch(probe_data, __rq, prev, next); | |
142 | } | |
143 | ||
e309b41d | 144 | static void __wakeup_reset(struct trace_array *tr) |
352ad25a SR |
145 | { |
146 | struct trace_array_cpu *data; | |
147 | int cpu; | |
148 | ||
149 | assert_spin_locked(&wakeup_lock); | |
150 | ||
151 | for_each_possible_cpu(cpu) { | |
152 | data = tr->data[cpu]; | |
153 | tracing_reset(data); | |
154 | } | |
155 | ||
156 | wakeup_cpu = -1; | |
157 | wakeup_prio = -1; | |
158 | ||
159 | if (wakeup_task) | |
160 | put_task_struct(wakeup_task); | |
161 | ||
162 | wakeup_task = NULL; | |
163 | } | |
164 | ||
e309b41d | 165 | static void wakeup_reset(struct trace_array *tr) |
352ad25a SR |
166 | { |
167 | unsigned long flags; | |
168 | ||
169 | spin_lock_irqsave(&wakeup_lock, flags); | |
170 | __wakeup_reset(tr); | |
171 | spin_unlock_irqrestore(&wakeup_lock, flags); | |
172 | } | |
173 | ||
e309b41d | 174 | static void |
352ad25a SR |
175 | wakeup_check_start(struct trace_array *tr, struct task_struct *p, |
176 | struct task_struct *curr) | |
177 | { | |
178 | int cpu = smp_processor_id(); | |
179 | unsigned long flags; | |
180 | long disabled; | |
181 | ||
182 | if (likely(!rt_task(p)) || | |
183 | p->prio >= wakeup_prio || | |
184 | p->prio >= curr->prio) | |
185 | return; | |
186 | ||
187 | disabled = atomic_inc_return(&tr->data[cpu]->disabled); | |
188 | if (unlikely(disabled != 1)) | |
189 | goto out; | |
190 | ||
191 | /* interrupts should be off from try_to_wake_up */ | |
192 | spin_lock(&wakeup_lock); | |
193 | ||
194 | /* check for races. */ | |
195 | if (!tracer_enabled || p->prio >= wakeup_prio) | |
196 | goto out_locked; | |
197 | ||
198 | /* reset the trace */ | |
199 | __wakeup_reset(tr); | |
200 | ||
201 | wakeup_cpu = task_cpu(p); | |
202 | wakeup_prio = p->prio; | |
203 | ||
204 | wakeup_task = p; | |
205 | get_task_struct(wakeup_task); | |
206 | ||
207 | local_save_flags(flags); | |
208 | ||
750ed1a4 | 209 | tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); |
6fb44b71 SR |
210 | trace_function(tr, tr->data[wakeup_cpu], |
211 | CALLER_ADDR1, CALLER_ADDR2, flags); | |
352ad25a SR |
212 | |
213 | out_locked: | |
214 | spin_unlock(&wakeup_lock); | |
215 | out: | |
216 | atomic_dec(&tr->data[cpu]->disabled); | |
217 | } | |
218 | ||
5b82a1b0 MD |
219 | static notrace void |
220 | wake_up_callback(void *probe_data, void *call_data, | |
221 | const char *format, va_list *args) | |
352ad25a | 222 | { |
5b82a1b0 MD |
223 | struct trace_array **ptr = probe_data; |
224 | struct trace_array *tr = *ptr; | |
225 | struct task_struct *curr; | |
226 | struct task_struct *task; | |
227 | struct rq *__rq; | |
228 | ||
352ad25a SR |
229 | if (likely(!tracer_enabled)) |
230 | return; | |
231 | ||
5b82a1b0 MD |
232 | /* Skip pid %d state %ld */ |
233 | (void)va_arg(*args, int); | |
234 | (void)va_arg(*args, long); | |
235 | /* now get the meat: "rq %p task %p rq->curr %p" */ | |
236 | __rq = va_arg(*args, typeof(__rq)); | |
237 | task = va_arg(*args, typeof(task)); | |
238 | curr = va_arg(*args, typeof(curr)); | |
239 | ||
240 | tracing_record_cmdline(task); | |
8ac0fca4 | 241 | tracing_record_cmdline(curr); |
352ad25a | 242 | |
5b82a1b0 | 243 | wakeup_check_start(tr, task, curr); |
352ad25a SR |
244 | } |
245 | ||
e309b41d | 246 | static void start_wakeup_tracer(struct trace_array *tr) |
352ad25a | 247 | { |
5b82a1b0 MD |
248 | int ret; |
249 | ||
250 | ret = marker_probe_register("kernel_sched_wakeup", | |
251 | "pid %d state %ld ## rq %p task %p rq->curr %p", | |
252 | wake_up_callback, | |
253 | &wakeup_trace); | |
254 | if (ret) { | |
255 | pr_info("wakeup trace: Couldn't add marker" | |
256 | " probe to kernel_sched_wakeup\n"); | |
257 | return; | |
258 | } | |
259 | ||
260 | ret = marker_probe_register("kernel_sched_wakeup_new", | |
261 | "pid %d state %ld ## rq %p task %p rq->curr %p", | |
262 | wake_up_callback, | |
263 | &wakeup_trace); | |
264 | if (ret) { | |
265 | pr_info("wakeup trace: Couldn't add marker" | |
266 | " probe to kernel_sched_wakeup_new\n"); | |
267 | goto fail_deprobe; | |
268 | } | |
269 | ||
270 | ret = marker_probe_register("kernel_sched_schedule", | |
271 | "prev_pid %d next_pid %d prev_state %ld " | |
272 | "## rq %p prev %p next %p", | |
273 | sched_switch_callback, | |
274 | &wakeup_trace); | |
275 | if (ret) { | |
276 | pr_info("sched trace: Couldn't add marker" | |
277 | " probe to kernel_sched_schedule\n"); | |
278 | goto fail_deprobe_wake_new; | |
279 | } | |
280 | ||
352ad25a SR |
281 | wakeup_reset(tr); |
282 | ||
283 | /* | |
284 | * Don't let the tracer_enabled = 1 show up before | |
285 | * the wakeup_task is reset. This may be overkill since | |
286 | * wakeup_reset does a spin_unlock after setting the | |
287 | * wakeup_task to NULL, but I want to be safe. | |
288 | * This is a slow path anyway. | |
289 | */ | |
290 | smp_wmb(); | |
291 | ||
292 | tracer_enabled = 1; | |
293 | ||
294 | return; | |
5b82a1b0 MD |
295 | fail_deprobe_wake_new: |
296 | marker_probe_unregister("kernel_sched_wakeup_new", | |
297 | wake_up_callback, | |
298 | &wakeup_trace); | |
299 | fail_deprobe: | |
300 | marker_probe_unregister("kernel_sched_wakeup", | |
301 | wake_up_callback, | |
302 | &wakeup_trace); | |
352ad25a SR |
303 | } |
304 | ||
e309b41d | 305 | static void stop_wakeup_tracer(struct trace_array *tr) |
352ad25a SR |
306 | { |
307 | tracer_enabled = 0; | |
5b82a1b0 MD |
308 | marker_probe_unregister("kernel_sched_schedule", |
309 | sched_switch_callback, | |
310 | &wakeup_trace); | |
311 | marker_probe_unregister("kernel_sched_wakeup_new", | |
312 | wake_up_callback, | |
313 | &wakeup_trace); | |
314 | marker_probe_unregister("kernel_sched_wakeup", | |
315 | wake_up_callback, | |
316 | &wakeup_trace); | |
352ad25a SR |
317 | } |
318 | ||
e309b41d | 319 | static void wakeup_tracer_init(struct trace_array *tr) |
352ad25a SR |
320 | { |
321 | wakeup_trace = tr; | |
322 | ||
323 | if (tr->ctrl) | |
324 | start_wakeup_tracer(tr); | |
325 | } | |
326 | ||
e309b41d | 327 | static void wakeup_tracer_reset(struct trace_array *tr) |
352ad25a SR |
328 | { |
329 | if (tr->ctrl) { | |
330 | stop_wakeup_tracer(tr); | |
331 | /* make sure we put back any tasks we are tracing */ | |
332 | wakeup_reset(tr); | |
333 | } | |
334 | } | |
335 | ||
336 | static void wakeup_tracer_ctrl_update(struct trace_array *tr) | |
337 | { | |
338 | if (tr->ctrl) | |
339 | start_wakeup_tracer(tr); | |
340 | else | |
341 | stop_wakeup_tracer(tr); | |
342 | } | |
343 | ||
e309b41d | 344 | static void wakeup_tracer_open(struct trace_iterator *iter) |
352ad25a SR |
345 | { |
346 | /* stop the trace while dumping */ | |
347 | if (iter->tr->ctrl) | |
348 | stop_wakeup_tracer(iter->tr); | |
349 | } | |
350 | ||
e309b41d | 351 | static void wakeup_tracer_close(struct trace_iterator *iter) |
352ad25a SR |
352 | { |
353 | /* forget about any processes we were recording */ | |
354 | if (iter->tr->ctrl) | |
355 | start_wakeup_tracer(iter->tr); | |
356 | } | |
357 | ||
358 | static struct tracer wakeup_tracer __read_mostly = | |
359 | { | |
360 | .name = "wakeup", | |
361 | .init = wakeup_tracer_init, | |
362 | .reset = wakeup_tracer_reset, | |
363 | .open = wakeup_tracer_open, | |
364 | .close = wakeup_tracer_close, | |
365 | .ctrl_update = wakeup_tracer_ctrl_update, | |
366 | .print_max = 1, | |
60a11774 SR |
367 | #ifdef CONFIG_FTRACE_SELFTEST |
368 | .selftest = trace_selftest_startup_wakeup, | |
369 | #endif | |
352ad25a SR |
370 | }; |
371 | ||
372 | __init static int init_wakeup_tracer(void) | |
373 | { | |
374 | int ret; | |
375 | ||
376 | ret = register_tracer(&wakeup_tracer); | |
377 | if (ret) | |
378 | return ret; | |
379 | ||
380 | return 0; | |
381 | } | |
382 | device_initcall(init_wakeup_tracer); |