Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski...
[linux-2.6-block.git] / tools / perf / util / thread-stack.c
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include "thread.h"
19 #include "event.h"
20 #include "machine.h"
21 #include "util.h"
22 #include "debug.h"
23 #include "symbol.h"
24 #include "comm.h"
25 #include "call-path.h"
26 #include "thread-stack.h"
27
28 #define STACK_GROWTH 2048
29
30 /**
31  * struct thread_stack_entry - thread stack entry.
32  * @ret_addr: return address
33  * @timestamp: timestamp (if known)
34  * @ref: external reference (e.g. db_id of sample)
35  * @branch_count: the branch count when the entry was created
36  * @cp: call path
37  * @no_call: a 'call' was not seen
38  */
39 struct thread_stack_entry {
40         u64 ret_addr;
41         u64 timestamp;
42         u64 ref;
43         u64 branch_count;
44         struct call_path *cp;
45         bool no_call;
46 };
47
48 /**
49  * struct thread_stack - thread stack constructed from 'call' and 'return'
50  *                       branch samples.
51  * @stack: array that holds the stack
52  * @cnt: number of entries in the stack
53  * @sz: current maximum stack size
54  * @trace_nr: current trace number
55  * @branch_count: running branch count
56  * @kernel_start: kernel start address
57  * @last_time: last timestamp
58  * @crp: call/return processor
59  * @comm: current comm
60  */
61 struct thread_stack {
62         struct thread_stack_entry *stack;
63         size_t cnt;
64         size_t sz;
65         u64 trace_nr;
66         u64 branch_count;
67         u64 kernel_start;
68         u64 last_time;
69         struct call_return_processor *crp;
70         struct comm *comm;
71 };
72
73 static int thread_stack__grow(struct thread_stack *ts)
74 {
75         struct thread_stack_entry *new_stack;
76         size_t sz, new_sz;
77
78         new_sz = ts->sz + STACK_GROWTH;
79         sz = new_sz * sizeof(struct thread_stack_entry);
80
81         new_stack = realloc(ts->stack, sz);
82         if (!new_stack)
83                 return -ENOMEM;
84
85         ts->stack = new_stack;
86         ts->sz = new_sz;
87
88         return 0;
89 }
90
91 static struct thread_stack *thread_stack__new(struct thread *thread,
92                                               struct call_return_processor *crp)
93 {
94         struct thread_stack *ts;
95
96         ts = zalloc(sizeof(struct thread_stack));
97         if (!ts)
98                 return NULL;
99
100         if (thread_stack__grow(ts)) {
101                 free(ts);
102                 return NULL;
103         }
104
105         if (thread->mg && thread->mg->machine)
106                 ts->kernel_start = machine__kernel_start(thread->mg->machine);
107         else
108                 ts->kernel_start = 1ULL << 63;
109         ts->crp = crp;
110
111         return ts;
112 }
113
114 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
115 {
116         int err = 0;
117
118         if (ts->cnt == ts->sz) {
119                 err = thread_stack__grow(ts);
120                 if (err) {
121                         pr_warning("Out of memory: discarding thread stack\n");
122                         ts->cnt = 0;
123                 }
124         }
125
126         ts->stack[ts->cnt++].ret_addr = ret_addr;
127
128         return err;
129 }
130
131 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
132 {
133         size_t i;
134
135         /*
136          * In some cases there may be functions which are not seen to return.
137          * For example when setjmp / longjmp has been used.  Or the perf context
138          * switch in the kernel which doesn't stop and start tracing in exactly
139          * the same code path.  When that happens the return address will be
140          * further down the stack.  If the return address is not found at all,
141          * we assume the opposite (i.e. this is a return for a call that wasn't
142          * seen for some reason) and leave the stack alone.
143          */
144         for (i = ts->cnt; i; ) {
145                 if (ts->stack[--i].ret_addr == ret_addr) {
146                         ts->cnt = i;
147                         return;
148                 }
149         }
150 }
151
152 static bool thread_stack__in_kernel(struct thread_stack *ts)
153 {
154         if (!ts->cnt)
155                 return false;
156
157         return ts->stack[ts->cnt - 1].cp->in_kernel;
158 }
159
160 static int thread_stack__call_return(struct thread *thread,
161                                      struct thread_stack *ts, size_t idx,
162                                      u64 timestamp, u64 ref, bool no_return)
163 {
164         struct call_return_processor *crp = ts->crp;
165         struct thread_stack_entry *tse;
166         struct call_return cr = {
167                 .thread = thread,
168                 .comm = ts->comm,
169                 .db_id = 0,
170         };
171
172         tse = &ts->stack[idx];
173         cr.cp = tse->cp;
174         cr.call_time = tse->timestamp;
175         cr.return_time = timestamp;
176         cr.branch_count = ts->branch_count - tse->branch_count;
177         cr.call_ref = tse->ref;
178         cr.return_ref = ref;
179         if (tse->no_call)
180                 cr.flags |= CALL_RETURN_NO_CALL;
181         if (no_return)
182                 cr.flags |= CALL_RETURN_NO_RETURN;
183
184         return crp->process(&cr, crp->data);
185 }
186
187 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
188 {
189         struct call_return_processor *crp = ts->crp;
190         int err;
191
192         if (!crp) {
193                 ts->cnt = 0;
194                 return 0;
195         }
196
197         while (ts->cnt) {
198                 err = thread_stack__call_return(thread, ts, --ts->cnt,
199                                                 ts->last_time, 0, true);
200                 if (err) {
201                         pr_err("Error flushing thread stack!\n");
202                         ts->cnt = 0;
203                         return err;
204                 }
205         }
206
207         return 0;
208 }
209
210 int thread_stack__flush(struct thread *thread)
211 {
212         if (thread->ts)
213                 return __thread_stack__flush(thread, thread->ts);
214
215         return 0;
216 }
217
218 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
219                         u64 to_ip, u16 insn_len, u64 trace_nr)
220 {
221         if (!thread)
222                 return -EINVAL;
223
224         if (!thread->ts) {
225                 thread->ts = thread_stack__new(thread, NULL);
226                 if (!thread->ts) {
227                         pr_warning("Out of memory: no thread stack\n");
228                         return -ENOMEM;
229                 }
230                 thread->ts->trace_nr = trace_nr;
231         }
232
233         /*
234          * When the trace is discontinuous, the trace_nr changes.  In that case
235          * the stack might be completely invalid.  Better to report nothing than
236          * to report something misleading, so flush the stack.
237          */
238         if (trace_nr != thread->ts->trace_nr) {
239                 if (thread->ts->trace_nr)
240                         __thread_stack__flush(thread, thread->ts);
241                 thread->ts->trace_nr = trace_nr;
242         }
243
244         /* Stop here if thread_stack__process() is in use */
245         if (thread->ts->crp)
246                 return 0;
247
248         if (flags & PERF_IP_FLAG_CALL) {
249                 u64 ret_addr;
250
251                 if (!to_ip)
252                         return 0;
253                 ret_addr = from_ip + insn_len;
254                 if (ret_addr == to_ip)
255                         return 0; /* Zero-length calls are excluded */
256                 return thread_stack__push(thread->ts, ret_addr);
257         } else if (flags & PERF_IP_FLAG_RETURN) {
258                 if (!from_ip)
259                         return 0;
260                 thread_stack__pop(thread->ts, to_ip);
261         }
262
263         return 0;
264 }
265
266 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
267 {
268         if (!thread || !thread->ts)
269                 return;
270
271         if (trace_nr != thread->ts->trace_nr) {
272                 if (thread->ts->trace_nr)
273                         __thread_stack__flush(thread, thread->ts);
274                 thread->ts->trace_nr = trace_nr;
275         }
276 }
277
278 void thread_stack__free(struct thread *thread)
279 {
280         if (thread->ts) {
281                 __thread_stack__flush(thread, thread->ts);
282                 zfree(&thread->ts->stack);
283                 zfree(&thread->ts);
284         }
285 }
286
287 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
288                           size_t sz, u64 ip)
289 {
290         size_t i;
291
292         if (!thread || !thread->ts)
293                 chain->nr = 1;
294         else
295                 chain->nr = min(sz, thread->ts->cnt + 1);
296
297         chain->ips[0] = ip;
298
299         for (i = 1; i < chain->nr; i++)
300                 chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
301 }
302
303 struct call_return_processor *
304 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
305                            void *data)
306 {
307         struct call_return_processor *crp;
308
309         crp = zalloc(sizeof(struct call_return_processor));
310         if (!crp)
311                 return NULL;
312         crp->cpr = call_path_root__new();
313         if (!crp->cpr)
314                 goto out_free;
315         crp->process = process;
316         crp->data = data;
317         return crp;
318
319 out_free:
320         free(crp);
321         return NULL;
322 }
323
324 void call_return_processor__free(struct call_return_processor *crp)
325 {
326         if (crp) {
327                 call_path_root__free(crp->cpr);
328                 free(crp);
329         }
330 }
331
332 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
333                                  u64 timestamp, u64 ref, struct call_path *cp,
334                                  bool no_call)
335 {
336         struct thread_stack_entry *tse;
337         int err;
338
339         if (ts->cnt == ts->sz) {
340                 err = thread_stack__grow(ts);
341                 if (err)
342                         return err;
343         }
344
345         tse = &ts->stack[ts->cnt++];
346         tse->ret_addr = ret_addr;
347         tse->timestamp = timestamp;
348         tse->ref = ref;
349         tse->branch_count = ts->branch_count;
350         tse->cp = cp;
351         tse->no_call = no_call;
352
353         return 0;
354 }
355
356 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
357                                 u64 ret_addr, u64 timestamp, u64 ref,
358                                 struct symbol *sym)
359 {
360         int err;
361
362         if (!ts->cnt)
363                 return 1;
364
365         if (ts->cnt == 1) {
366                 struct thread_stack_entry *tse = &ts->stack[0];
367
368                 if (tse->cp->sym == sym)
369                         return thread_stack__call_return(thread, ts, --ts->cnt,
370                                                          timestamp, ref, false);
371         }
372
373         if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
374                 return thread_stack__call_return(thread, ts, --ts->cnt,
375                                                  timestamp, ref, false);
376         } else {
377                 size_t i = ts->cnt - 1;
378
379                 while (i--) {
380                         if (ts->stack[i].ret_addr != ret_addr)
381                                 continue;
382                         i += 1;
383                         while (ts->cnt > i) {
384                                 err = thread_stack__call_return(thread, ts,
385                                                                 --ts->cnt,
386                                                                 timestamp, ref,
387                                                                 true);
388                                 if (err)
389                                         return err;
390                         }
391                         return thread_stack__call_return(thread, ts, --ts->cnt,
392                                                          timestamp, ref, false);
393                 }
394         }
395
396         return 1;
397 }
398
399 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
400                                 struct perf_sample *sample,
401                                 struct addr_location *from_al,
402                                 struct addr_location *to_al, u64 ref)
403 {
404         struct call_path_root *cpr = ts->crp->cpr;
405         struct call_path *cp;
406         struct symbol *sym;
407         u64 ip;
408
409         if (sample->ip) {
410                 ip = sample->ip;
411                 sym = from_al->sym;
412         } else if (sample->addr) {
413                 ip = sample->addr;
414                 sym = to_al->sym;
415         } else {
416                 return 0;
417         }
418
419         cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
420                                 ts->kernel_start);
421         if (!cp)
422                 return -ENOMEM;
423
424         return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
425                                      true);
426 }
427
428 static int thread_stack__no_call_return(struct thread *thread,
429                                         struct thread_stack *ts,
430                                         struct perf_sample *sample,
431                                         struct addr_location *from_al,
432                                         struct addr_location *to_al, u64 ref)
433 {
434         struct call_path_root *cpr = ts->crp->cpr;
435         struct call_path *cp, *parent;
436         u64 ks = ts->kernel_start;
437         int err;
438
439         if (sample->ip >= ks && sample->addr < ks) {
440                 /* Return to userspace, so pop all kernel addresses */
441                 while (thread_stack__in_kernel(ts)) {
442                         err = thread_stack__call_return(thread, ts, --ts->cnt,
443                                                         sample->time, ref,
444                                                         true);
445                         if (err)
446                                 return err;
447                 }
448
449                 /* If the stack is empty, push the userspace address */
450                 if (!ts->cnt) {
451                         cp = call_path__findnew(cpr, &cpr->call_path,
452                                                 to_al->sym, sample->addr,
453                                                 ts->kernel_start);
454                         if (!cp)
455                                 return -ENOMEM;
456                         return thread_stack__push_cp(ts, 0, sample->time, ref,
457                                                      cp, true);
458                 }
459         } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
460                 /* Return to userspace, so pop all kernel addresses */
461                 while (thread_stack__in_kernel(ts)) {
462                         err = thread_stack__call_return(thread, ts, --ts->cnt,
463                                                         sample->time, ref,
464                                                         true);
465                         if (err)
466                                 return err;
467                 }
468         }
469
470         if (ts->cnt)
471                 parent = ts->stack[ts->cnt - 1].cp;
472         else
473                 parent = &cpr->call_path;
474
475         /* This 'return' had no 'call', so push and pop top of stack */
476         cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
477                                 ts->kernel_start);
478         if (!cp)
479                 return -ENOMEM;
480
481         err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
482                                     true);
483         if (err)
484                 return err;
485
486         return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
487                                     to_al->sym);
488 }
489
490 static int thread_stack__trace_begin(struct thread *thread,
491                                      struct thread_stack *ts, u64 timestamp,
492                                      u64 ref)
493 {
494         struct thread_stack_entry *tse;
495         int err;
496
497         if (!ts->cnt)
498                 return 0;
499
500         /* Pop trace end */
501         tse = &ts->stack[ts->cnt - 1];
502         if (tse->cp->sym == NULL && tse->cp->ip == 0) {
503                 err = thread_stack__call_return(thread, ts, --ts->cnt,
504                                                 timestamp, ref, false);
505                 if (err)
506                         return err;
507         }
508
509         return 0;
510 }
511
512 static int thread_stack__trace_end(struct thread_stack *ts,
513                                    struct perf_sample *sample, u64 ref)
514 {
515         struct call_path_root *cpr = ts->crp->cpr;
516         struct call_path *cp;
517         u64 ret_addr;
518
519         /* No point having 'trace end' on the bottom of the stack */
520         if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
521                 return 0;
522
523         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
524                                 ts->kernel_start);
525         if (!cp)
526                 return -ENOMEM;
527
528         ret_addr = sample->ip + sample->insn_len;
529
530         return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
531                                      false);
532 }
533
534 int thread_stack__process(struct thread *thread, struct comm *comm,
535                           struct perf_sample *sample,
536                           struct addr_location *from_al,
537                           struct addr_location *to_al, u64 ref,
538                           struct call_return_processor *crp)
539 {
540         struct thread_stack *ts = thread->ts;
541         int err = 0;
542
543         if (ts) {
544                 if (!ts->crp) {
545                         /* Supersede thread_stack__event() */
546                         thread_stack__free(thread);
547                         thread->ts = thread_stack__new(thread, crp);
548                         if (!thread->ts)
549                                 return -ENOMEM;
550                         ts = thread->ts;
551                         ts->comm = comm;
552                 }
553         } else {
554                 thread->ts = thread_stack__new(thread, crp);
555                 if (!thread->ts)
556                         return -ENOMEM;
557                 ts = thread->ts;
558                 ts->comm = comm;
559         }
560
561         /* Flush stack on exec */
562         if (ts->comm != comm && thread->pid_ == thread->tid) {
563                 err = __thread_stack__flush(thread, ts);
564                 if (err)
565                         return err;
566                 ts->comm = comm;
567         }
568
569         /* If the stack is empty, put the current symbol on the stack */
570         if (!ts->cnt) {
571                 err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
572                                            ref);
573                 if (err)
574                         return err;
575         }
576
577         ts->branch_count += 1;
578         ts->last_time = sample->time;
579
580         if (sample->flags & PERF_IP_FLAG_CALL) {
581                 struct call_path_root *cpr = ts->crp->cpr;
582                 struct call_path *cp;
583                 u64 ret_addr;
584
585                 if (!sample->ip || !sample->addr)
586                         return 0;
587
588                 ret_addr = sample->ip + sample->insn_len;
589                 if (ret_addr == sample->addr)
590                         return 0; /* Zero-length calls are excluded */
591
592                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
593                                         to_al->sym, sample->addr,
594                                         ts->kernel_start);
595                 if (!cp)
596                         return -ENOMEM;
597                 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
598                                             cp, false);
599         } else if (sample->flags & PERF_IP_FLAG_RETURN) {
600                 if (!sample->ip || !sample->addr)
601                         return 0;
602
603                 err = thread_stack__pop_cp(thread, ts, sample->addr,
604                                            sample->time, ref, from_al->sym);
605                 if (err) {
606                         if (err < 0)
607                                 return err;
608                         err = thread_stack__no_call_return(thread, ts, sample,
609                                                            from_al, to_al, ref);
610                 }
611         } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
612                 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
613         } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
614                 err = thread_stack__trace_end(ts, sample, ref);
615         }
616
617         return err;
618 }