Commit | Line | Data |
---|---|---|
00447ccd AH |
1 | /* |
2 | * thread-stack.c: Synthesize a thread's stack using call / return events | |
3 | * Copyright (c) 2014, Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | */ | |
15 | ||
92a9e4f7 AH |
16 | #include <linux/rbtree.h> |
17 | #include <linux/list.h> | |
256d92bc | 18 | #include <linux/log2.h> |
a43783ae | 19 | #include <errno.h> |
00447ccd AH |
20 | #include "thread.h" |
21 | #include "event.h" | |
92a9e4f7 | 22 | #include "machine.h" |
3c0cd952 | 23 | #include "env.h" |
00447ccd AH |
24 | #include "util.h" |
25 | #include "debug.h" | |
92a9e4f7 AH |
26 | #include "symbol.h" |
27 | #include "comm.h" | |
451db126 | 28 | #include "call-path.h" |
00447ccd AH |
29 | #include "thread-stack.h" |
30 | ||
92a9e4f7 AH |
31 | #define STACK_GROWTH 2048 |
32 | ||
3c0cd952 AH |
33 | /* |
34 | * State of retpoline detection. | |
35 | * | |
36 | * RETPOLINE_NONE: no retpoline detection | |
37 | * X86_RETPOLINE_POSSIBLE: x86 retpoline possible | |
38 | * X86_RETPOLINE_DETECTED: x86 retpoline detected | |
39 | */ | |
40 | enum retpoline_state_t { | |
41 | RETPOLINE_NONE, | |
42 | X86_RETPOLINE_POSSIBLE, | |
43 | X86_RETPOLINE_DETECTED, | |
44 | }; | |
45 | ||
92a9e4f7 AH |
46 | /** |
47 | * struct thread_stack_entry - thread stack entry. | |
48 | * @ret_addr: return address | |
49 | * @timestamp: timestamp (if known) | |
50 | * @ref: external reference (e.g. db_id of sample) | |
51 | * @branch_count: the branch count when the entry was created | |
52 | * @cp: call path | |
53 | * @no_call: a 'call' was not seen | |
4d60e5e3 | 54 | * @trace_end: a 'call' but trace ended |
f08046cb | 55 | * @non_call: a branch but not a 'call' to the start of a different symbol |
92a9e4f7 | 56 | */ |
00447ccd AH |
57 | struct thread_stack_entry { |
58 | u64 ret_addr; | |
92a9e4f7 AH |
59 | u64 timestamp; |
60 | u64 ref; | |
61 | u64 branch_count; | |
62 | struct call_path *cp; | |
63 | bool no_call; | |
4d60e5e3 | 64 | bool trace_end; |
f08046cb | 65 | bool non_call; |
00447ccd AH |
66 | }; |
67 | ||
92a9e4f7 AH |
68 | /** |
69 | * struct thread_stack - thread stack constructed from 'call' and 'return' | |
70 | * branch samples. | |
71 | * @stack: array that holds the stack | |
72 | * @cnt: number of entries in the stack | |
73 | * @sz: current maximum stack size | |
74 | * @trace_nr: current trace number | |
75 | * @branch_count: running branch count | |
76 | * @kernel_start: kernel start address | |
77 | * @last_time: last timestamp | |
78 | * @crp: call/return processor | |
79 | * @comm: current comm | |
f6060ac6 | 80 | * @arr_sz: size of array if this is the first element of an array |
3c0cd952 | 81 | * @rstate: used to detect retpolines |
92a9e4f7 | 82 | */ |
00447ccd AH |
83 | struct thread_stack { |
84 | struct thread_stack_entry *stack; | |
85 | size_t cnt; | |
86 | size_t sz; | |
87 | u64 trace_nr; | |
92a9e4f7 AH |
88 | u64 branch_count; |
89 | u64 kernel_start; | |
90 | u64 last_time; | |
91 | struct call_return_processor *crp; | |
92 | struct comm *comm; | |
f6060ac6 | 93 | unsigned int arr_sz; |
3c0cd952 | 94 | enum retpoline_state_t rstate; |
00447ccd AH |
95 | }; |
96 | ||
256d92bc AH |
97 | /* |
98 | * Assume pid == tid == 0 identifies the idle task as defined by | |
99 | * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, | |
100 | * and therefore requires a stack for each cpu. | |
101 | */ | |
102 | static inline bool thread_stack__per_cpu(struct thread *thread) | |
103 | { | |
104 | return !(thread->tid || thread->pid_); | |
105 | } | |
106 | ||
00447ccd AH |
107 | static int thread_stack__grow(struct thread_stack *ts) |
108 | { | |
109 | struct thread_stack_entry *new_stack; | |
110 | size_t sz, new_sz; | |
111 | ||
112 | new_sz = ts->sz + STACK_GROWTH; | |
113 | sz = new_sz * sizeof(struct thread_stack_entry); | |
114 | ||
115 | new_stack = realloc(ts->stack, sz); | |
116 | if (!new_stack) | |
117 | return -ENOMEM; | |
118 | ||
119 | ts->stack = new_stack; | |
120 | ts->sz = new_sz; | |
121 | ||
122 | return 0; | |
123 | } | |
124 | ||
2e9e8688 AH |
125 | static int thread_stack__init(struct thread_stack *ts, struct thread *thread, |
126 | struct call_return_processor *crp) | |
127 | { | |
128 | int err; | |
129 | ||
130 | err = thread_stack__grow(ts); | |
131 | if (err) | |
132 | return err; | |
133 | ||
3c0cd952 AH |
134 | if (thread->mg && thread->mg->machine) { |
135 | struct machine *machine = thread->mg->machine; | |
136 | const char *arch = perf_env__arch(machine->env); | |
137 | ||
138 | ts->kernel_start = machine__kernel_start(machine); | |
139 | if (!strcmp(arch, "x86")) | |
140 | ts->rstate = X86_RETPOLINE_POSSIBLE; | |
141 | } else { | |
2e9e8688 | 142 | ts->kernel_start = 1ULL << 63; |
3c0cd952 | 143 | } |
2e9e8688 AH |
144 | ts->crp = crp; |
145 | ||
146 | return 0; | |
147 | } | |
148 | ||
256d92bc | 149 | static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, |
92a9e4f7 | 150 | struct call_return_processor *crp) |
00447ccd | 151 | { |
139f42f3 AH |
152 | struct thread_stack *ts = thread->ts, *new_ts; |
153 | unsigned int old_sz = ts ? ts->arr_sz : 0; | |
154 | unsigned int new_sz = 1; | |
155 | ||
256d92bc AH |
156 | if (thread_stack__per_cpu(thread) && cpu > 0) |
157 | new_sz = roundup_pow_of_two(cpu + 1); | |
158 | ||
139f42f3 AH |
159 | if (!ts || new_sz > old_sz) { |
160 | new_ts = calloc(new_sz, sizeof(*ts)); | |
161 | if (!new_ts) | |
162 | return NULL; | |
163 | if (ts) | |
164 | memcpy(new_ts, ts, old_sz * sizeof(*ts)); | |
165 | new_ts->arr_sz = new_sz; | |
166 | zfree(&thread->ts); | |
167 | thread->ts = new_ts; | |
168 | ts = new_ts; | |
00447ccd AH |
169 | } |
170 | ||
256d92bc AH |
171 | if (thread_stack__per_cpu(thread) && cpu > 0 && |
172 | (unsigned int)cpu < ts->arr_sz) | |
173 | ts += cpu; | |
174 | ||
139f42f3 AH |
175 | if (!ts->stack && |
176 | thread_stack__init(ts, thread, crp)) | |
177 | return NULL; | |
bd8e68ac | 178 | |
00447ccd AH |
179 | return ts; |
180 | } | |
181 | ||
256d92bc | 182 | static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) |
bd8e68ac | 183 | { |
256d92bc AH |
184 | struct thread_stack *ts = thread->ts; |
185 | ||
186 | if (cpu < 0) | |
187 | cpu = 0; | |
188 | ||
189 | if (!ts || (unsigned int)cpu >= ts->arr_sz) | |
190 | return NULL; | |
191 | ||
192 | ts += cpu; | |
193 | ||
194 | if (!ts->stack) | |
195 | return NULL; | |
196 | ||
197 | return ts; | |
198 | } | |
199 | ||
200 | static inline struct thread_stack *thread__stack(struct thread *thread, | |
201 | int cpu) | |
202 | { | |
203 | if (!thread) | |
204 | return NULL; | |
205 | ||
206 | if (thread_stack__per_cpu(thread)) | |
207 | return thread__cpu_stack(thread, cpu); | |
208 | ||
209 | return thread->ts; | |
bd8e68ac AH |
210 | } |
211 | ||
4d60e5e3 AH |
212 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, |
213 | bool trace_end) | |
00447ccd AH |
214 | { |
215 | int err = 0; | |
216 | ||
217 | if (ts->cnt == ts->sz) { | |
218 | err = thread_stack__grow(ts); | |
219 | if (err) { | |
220 | pr_warning("Out of memory: discarding thread stack\n"); | |
221 | ts->cnt = 0; | |
222 | } | |
223 | } | |
224 | ||
4d60e5e3 | 225 | ts->stack[ts->cnt].trace_end = trace_end; |
00447ccd AH |
226 | ts->stack[ts->cnt++].ret_addr = ret_addr; |
227 | ||
228 | return err; | |
229 | } | |
230 | ||
231 | static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) | |
232 | { | |
233 | size_t i; | |
234 | ||
235 | /* | |
236 | * In some cases there may be functions which are not seen to return. | |
237 | * For example when setjmp / longjmp has been used. Or the perf context | |
238 | * switch in the kernel which doesn't stop and start tracing in exactly | |
239 | * the same code path. When that happens the return address will be | |
240 | * further down the stack. If the return address is not found at all, | |
241 | * we assume the opposite (i.e. this is a return for a call that wasn't | |
242 | * seen for some reason) and leave the stack alone. | |
243 | */ | |
244 | for (i = ts->cnt; i; ) { | |
245 | if (ts->stack[--i].ret_addr == ret_addr) { | |
246 | ts->cnt = i; | |
247 | return; | |
248 | } | |
249 | } | |
250 | } | |
251 | ||
4d60e5e3 AH |
252 | static void thread_stack__pop_trace_end(struct thread_stack *ts) |
253 | { | |
254 | size_t i; | |
255 | ||
256 | for (i = ts->cnt; i; ) { | |
257 | if (ts->stack[--i].trace_end) | |
258 | ts->cnt = i; | |
259 | else | |
260 | return; | |
261 | } | |
262 | } | |
263 | ||
92a9e4f7 AH |
264 | static bool thread_stack__in_kernel(struct thread_stack *ts) |
265 | { | |
266 | if (!ts->cnt) | |
267 | return false; | |
268 | ||
269 | return ts->stack[ts->cnt - 1].cp->in_kernel; | |
270 | } | |
271 | ||
272 | static int thread_stack__call_return(struct thread *thread, | |
273 | struct thread_stack *ts, size_t idx, | |
274 | u64 timestamp, u64 ref, bool no_return) | |
275 | { | |
276 | struct call_return_processor *crp = ts->crp; | |
277 | struct thread_stack_entry *tse; | |
278 | struct call_return cr = { | |
279 | .thread = thread, | |
280 | .comm = ts->comm, | |
281 | .db_id = 0, | |
282 | }; | |
283 | ||
284 | tse = &ts->stack[idx]; | |
285 | cr.cp = tse->cp; | |
286 | cr.call_time = tse->timestamp; | |
287 | cr.return_time = timestamp; | |
288 | cr.branch_count = ts->branch_count - tse->branch_count; | |
289 | cr.call_ref = tse->ref; | |
290 | cr.return_ref = ref; | |
291 | if (tse->no_call) | |
292 | cr.flags |= CALL_RETURN_NO_CALL; | |
293 | if (no_return) | |
294 | cr.flags |= CALL_RETURN_NO_RETURN; | |
f08046cb AH |
295 | if (tse->non_call) |
296 | cr.flags |= CALL_RETURN_NON_CALL; | |
92a9e4f7 AH |
297 | |
298 | return crp->process(&cr, crp->data); | |
299 | } | |
300 | ||
a5499b37 | 301 | static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) |
92a9e4f7 AH |
302 | { |
303 | struct call_return_processor *crp = ts->crp; | |
304 | int err; | |
305 | ||
306 | if (!crp) { | |
307 | ts->cnt = 0; | |
308 | return 0; | |
309 | } | |
310 | ||
311 | while (ts->cnt) { | |
312 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
313 | ts->last_time, 0, true); | |
314 | if (err) { | |
315 | pr_err("Error flushing thread stack!\n"); | |
316 | ts->cnt = 0; | |
317 | return err; | |
318 | } | |
319 | } | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
a5499b37 AH |
324 | int thread_stack__flush(struct thread *thread) |
325 | { | |
bd8e68ac | 326 | struct thread_stack *ts = thread->ts; |
f6060ac6 AH |
327 | unsigned int pos; |
328 | int err = 0; | |
bd8e68ac | 329 | |
f6060ac6 AH |
330 | if (ts) { |
331 | for (pos = 0; pos < ts->arr_sz; pos++) { | |
332 | int ret = __thread_stack__flush(thread, ts + pos); | |
a5499b37 | 333 | |
f6060ac6 AH |
334 | if (ret) |
335 | err = ret; | |
336 | } | |
337 | } | |
338 | ||
339 | return err; | |
a5499b37 AH |
340 | } |
341 | ||
256d92bc | 342 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
00447ccd AH |
343 | u64 to_ip, u16 insn_len, u64 trace_nr) |
344 | { | |
256d92bc | 345 | struct thread_stack *ts = thread__stack(thread, cpu); |
bd8e68ac | 346 | |
00447ccd AH |
347 | if (!thread) |
348 | return -EINVAL; | |
349 | ||
bd8e68ac | 350 | if (!ts) { |
256d92bc | 351 | ts = thread_stack__new(thread, cpu, NULL); |
bd8e68ac | 352 | if (!ts) { |
00447ccd AH |
353 | pr_warning("Out of memory: no thread stack\n"); |
354 | return -ENOMEM; | |
355 | } | |
bd8e68ac | 356 | ts->trace_nr = trace_nr; |
00447ccd AH |
357 | } |
358 | ||
359 | /* | |
360 | * When the trace is discontinuous, the trace_nr changes. In that case | |
361 | * the stack might be completely invalid. Better to report nothing than | |
92a9e4f7 | 362 | * to report something misleading, so flush the stack. |
00447ccd | 363 | */ |
bd8e68ac AH |
364 | if (trace_nr != ts->trace_nr) { |
365 | if (ts->trace_nr) | |
366 | __thread_stack__flush(thread, ts); | |
367 | ts->trace_nr = trace_nr; | |
00447ccd AH |
368 | } |
369 | ||
92a9e4f7 | 370 | /* Stop here if thread_stack__process() is in use */ |
bd8e68ac | 371 | if (ts->crp) |
92a9e4f7 AH |
372 | return 0; |
373 | ||
00447ccd AH |
374 | if (flags & PERF_IP_FLAG_CALL) { |
375 | u64 ret_addr; | |
376 | ||
377 | if (!to_ip) | |
378 | return 0; | |
379 | ret_addr = from_ip + insn_len; | |
380 | if (ret_addr == to_ip) | |
381 | return 0; /* Zero-length calls are excluded */ | |
bd8e68ac | 382 | return thread_stack__push(ts, ret_addr, |
4d60e5e3 AH |
383 | flags & PERF_IP_FLAG_TRACE_END); |
384 | } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { | |
385 | /* | |
386 | * If the caller did not change the trace number (which would | |
387 | * have flushed the stack) then try to make sense of the stack. | |
388 | * Possibly, tracing began after returning to the current | |
389 | * address, so try to pop that. Also, do not expect a call made | |
390 | * when the trace ended, to return, so pop that. | |
391 | */ | |
bd8e68ac AH |
392 | thread_stack__pop(ts, to_ip); |
393 | thread_stack__pop_trace_end(ts); | |
4d60e5e3 | 394 | } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { |
bd8e68ac | 395 | thread_stack__pop(ts, to_ip); |
00447ccd AH |
396 | } |
397 | ||
398 | return 0; | |
399 | } | |
400 | ||
256d92bc | 401 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) |
92a9e4f7 | 402 | { |
256d92bc | 403 | struct thread_stack *ts = thread__stack(thread, cpu); |
bd8e68ac AH |
404 | |
405 | if (!ts) | |
92a9e4f7 AH |
406 | return; |
407 | ||
bd8e68ac AH |
408 | if (trace_nr != ts->trace_nr) { |
409 | if (ts->trace_nr) | |
410 | __thread_stack__flush(thread, ts); | |
411 | ts->trace_nr = trace_nr; | |
92a9e4f7 AH |
412 | } |
413 | } | |
414 | ||
f6060ac6 AH |
415 | static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) |
416 | { | |
417 | __thread_stack__flush(thread, ts); | |
418 | zfree(&ts->stack); | |
419 | } | |
420 | ||
421 | static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) | |
422 | { | |
423 | unsigned int arr_sz = ts->arr_sz; | |
424 | ||
425 | __thread_stack__free(thread, ts); | |
426 | memset(ts, 0, sizeof(*ts)); | |
427 | ts->arr_sz = arr_sz; | |
428 | } | |
429 | ||
00447ccd AH |
430 | void thread_stack__free(struct thread *thread) |
431 | { | |
bd8e68ac | 432 | struct thread_stack *ts = thread->ts; |
f6060ac6 | 433 | unsigned int pos; |
bd8e68ac AH |
434 | |
435 | if (ts) { | |
f6060ac6 AH |
436 | for (pos = 0; pos < ts->arr_sz; pos++) |
437 | __thread_stack__free(thread, ts + pos); | |
00447ccd AH |
438 | zfree(&thread->ts); |
439 | } | |
440 | } | |
441 | ||
24248306 AH |
442 | static inline u64 callchain_context(u64 ip, u64 kernel_start) |
443 | { | |
444 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; | |
445 | } | |
446 | ||
256d92bc AH |
447 | void thread_stack__sample(struct thread *thread, int cpu, |
448 | struct ip_callchain *chain, | |
24248306 | 449 | size_t sz, u64 ip, u64 kernel_start) |
00447ccd | 450 | { |
256d92bc | 451 | struct thread_stack *ts = thread__stack(thread, cpu); |
24248306 AH |
452 | u64 context = callchain_context(ip, kernel_start); |
453 | u64 last_context; | |
454 | size_t i, j; | |
00447ccd | 455 | |
24248306 AH |
456 | if (sz < 2) { |
457 | chain->nr = 0; | |
458 | return; | |
459 | } | |
00447ccd | 460 | |
24248306 AH |
461 | chain->ips[0] = context; |
462 | chain->ips[1] = ip; | |
463 | ||
bd8e68ac | 464 | if (!ts) { |
24248306 AH |
465 | chain->nr = 2; |
466 | return; | |
467 | } | |
468 | ||
469 | last_context = context; | |
470 | ||
bd8e68ac AH |
471 | for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { |
472 | ip = ts->stack[ts->cnt - j].ret_addr; | |
24248306 AH |
473 | context = callchain_context(ip, kernel_start); |
474 | if (context != last_context) { | |
475 | if (i >= sz - 1) | |
476 | break; | |
477 | chain->ips[i++] = context; | |
478 | last_context = context; | |
479 | } | |
480 | chain->ips[i] = ip; | |
481 | } | |
00447ccd | 482 | |
24248306 | 483 | chain->nr = i; |
00447ccd | 484 | } |
92a9e4f7 | 485 | |
92a9e4f7 AH |
486 | struct call_return_processor * |
487 | call_return_processor__new(int (*process)(struct call_return *cr, void *data), | |
488 | void *data) | |
489 | { | |
490 | struct call_return_processor *crp; | |
491 | ||
492 | crp = zalloc(sizeof(struct call_return_processor)); | |
493 | if (!crp) | |
494 | return NULL; | |
495 | crp->cpr = call_path_root__new(); | |
496 | if (!crp->cpr) | |
497 | goto out_free; | |
498 | crp->process = process; | |
499 | crp->data = data; | |
500 | return crp; | |
501 | ||
502 | out_free: | |
503 | free(crp); | |
504 | return NULL; | |
505 | } | |
506 | ||
507 | void call_return_processor__free(struct call_return_processor *crp) | |
508 | { | |
509 | if (crp) { | |
510 | call_path_root__free(crp->cpr); | |
511 | free(crp); | |
512 | } | |
513 | } | |
514 | ||
515 | static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, | |
516 | u64 timestamp, u64 ref, struct call_path *cp, | |
2dcde4e1 | 517 | bool no_call, bool trace_end) |
92a9e4f7 AH |
518 | { |
519 | struct thread_stack_entry *tse; | |
520 | int err; | |
521 | ||
e7a3a055 AH |
522 | if (!cp) |
523 | return -ENOMEM; | |
524 | ||
92a9e4f7 AH |
525 | if (ts->cnt == ts->sz) { |
526 | err = thread_stack__grow(ts); | |
527 | if (err) | |
528 | return err; | |
529 | } | |
530 | ||
531 | tse = &ts->stack[ts->cnt++]; | |
532 | tse->ret_addr = ret_addr; | |
533 | tse->timestamp = timestamp; | |
534 | tse->ref = ref; | |
535 | tse->branch_count = ts->branch_count; | |
536 | tse->cp = cp; | |
537 | tse->no_call = no_call; | |
2dcde4e1 | 538 | tse->trace_end = trace_end; |
f08046cb | 539 | tse->non_call = false; |
92a9e4f7 AH |
540 | |
541 | return 0; | |
542 | } | |
543 | ||
544 | static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, | |
545 | u64 ret_addr, u64 timestamp, u64 ref, | |
546 | struct symbol *sym) | |
547 | { | |
548 | int err; | |
549 | ||
550 | if (!ts->cnt) | |
551 | return 1; | |
552 | ||
553 | if (ts->cnt == 1) { | |
554 | struct thread_stack_entry *tse = &ts->stack[0]; | |
555 | ||
556 | if (tse->cp->sym == sym) | |
557 | return thread_stack__call_return(thread, ts, --ts->cnt, | |
558 | timestamp, ref, false); | |
559 | } | |
560 | ||
f08046cb AH |
561 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && |
562 | !ts->stack[ts->cnt - 1].non_call) { | |
92a9e4f7 AH |
563 | return thread_stack__call_return(thread, ts, --ts->cnt, |
564 | timestamp, ref, false); | |
565 | } else { | |
566 | size_t i = ts->cnt - 1; | |
567 | ||
568 | while (i--) { | |
f08046cb AH |
569 | if (ts->stack[i].ret_addr != ret_addr || |
570 | ts->stack[i].non_call) | |
92a9e4f7 AH |
571 | continue; |
572 | i += 1; | |
573 | while (ts->cnt > i) { | |
574 | err = thread_stack__call_return(thread, ts, | |
575 | --ts->cnt, | |
576 | timestamp, ref, | |
577 | true); | |
578 | if (err) | |
579 | return err; | |
580 | } | |
581 | return thread_stack__call_return(thread, ts, --ts->cnt, | |
582 | timestamp, ref, false); | |
583 | } | |
584 | } | |
585 | ||
586 | return 1; | |
587 | } | |
588 | ||
e0b89511 | 589 | static int thread_stack__bottom(struct thread_stack *ts, |
92a9e4f7 AH |
590 | struct perf_sample *sample, |
591 | struct addr_location *from_al, | |
592 | struct addr_location *to_al, u64 ref) | |
593 | { | |
594 | struct call_path_root *cpr = ts->crp->cpr; | |
595 | struct call_path *cp; | |
596 | struct symbol *sym; | |
597 | u64 ip; | |
598 | ||
599 | if (sample->ip) { | |
600 | ip = sample->ip; | |
601 | sym = from_al->sym; | |
602 | } else if (sample->addr) { | |
603 | ip = sample->addr; | |
604 | sym = to_al->sym; | |
605 | } else { | |
606 | return 0; | |
607 | } | |
608 | ||
609 | cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, | |
610 | ts->kernel_start); | |
92a9e4f7 | 611 | |
e0b89511 | 612 | return thread_stack__push_cp(ts, ip, sample->time, ref, cp, |
2dcde4e1 | 613 | true, false); |
92a9e4f7 AH |
614 | } |
615 | ||
616 | static int thread_stack__no_call_return(struct thread *thread, | |
617 | struct thread_stack *ts, | |
618 | struct perf_sample *sample, | |
619 | struct addr_location *from_al, | |
620 | struct addr_location *to_al, u64 ref) | |
621 | { | |
622 | struct call_path_root *cpr = ts->crp->cpr; | |
90c2cda7 AH |
623 | struct call_path *root = &cpr->call_path; |
624 | struct symbol *fsym = from_al->sym; | |
625 | struct symbol *tsym = to_al->sym; | |
92a9e4f7 AH |
626 | struct call_path *cp, *parent; |
627 | u64 ks = ts->kernel_start; | |
90c2cda7 AH |
628 | u64 addr = sample->addr; |
629 | u64 tm = sample->time; | |
630 | u64 ip = sample->ip; | |
92a9e4f7 AH |
631 | int err; |
632 | ||
90c2cda7 | 633 | if (ip >= ks && addr < ks) { |
92a9e4f7 AH |
634 | /* Return to userspace, so pop all kernel addresses */ |
635 | while (thread_stack__in_kernel(ts)) { | |
636 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
90c2cda7 | 637 | tm, ref, true); |
92a9e4f7 AH |
638 | if (err) |
639 | return err; | |
640 | } | |
641 | ||
642 | /* If the stack is empty, push the userspace address */ | |
643 | if (!ts->cnt) { | |
90c2cda7 AH |
644 | cp = call_path__findnew(cpr, root, tsym, addr, ks); |
645 | return thread_stack__push_cp(ts, 0, tm, ref, cp, true, | |
646 | false); | |
92a9e4f7 | 647 | } |
90c2cda7 | 648 | } else if (thread_stack__in_kernel(ts) && ip < ks) { |
92a9e4f7 AH |
649 | /* Return to userspace, so pop all kernel addresses */ |
650 | while (thread_stack__in_kernel(ts)) { | |
651 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
90c2cda7 | 652 | tm, ref, true); |
92a9e4f7 AH |
653 | if (err) |
654 | return err; | |
655 | } | |
656 | } | |
657 | ||
658 | if (ts->cnt) | |
659 | parent = ts->stack[ts->cnt - 1].cp; | |
660 | else | |
90c2cda7 | 661 | parent = root; |
92a9e4f7 | 662 | |
1f35cd65 AH |
663 | if (parent->sym == from_al->sym) { |
664 | /* | |
665 | * At the bottom of the stack, assume the missing 'call' was | |
666 | * before the trace started. So, pop the current symbol and push | |
667 | * the 'to' symbol. | |
668 | */ | |
669 | if (ts->cnt == 1) { | |
670 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
671 | tm, ref, false); | |
672 | if (err) | |
673 | return err; | |
674 | } | |
675 | ||
676 | if (!ts->cnt) { | |
677 | cp = call_path__findnew(cpr, root, tsym, addr, ks); | |
678 | ||
679 | return thread_stack__push_cp(ts, addr, tm, ref, cp, | |
680 | true, false); | |
681 | } | |
682 | ||
683 | /* | |
684 | * Otherwise assume the 'return' is being used as a jump (e.g. | |
685 | * retpoline) and just push the 'to' symbol. | |
686 | */ | |
687 | cp = call_path__findnew(cpr, parent, tsym, addr, ks); | |
688 | ||
689 | err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); | |
690 | if (!err) | |
691 | ts->stack[ts->cnt - 1].non_call = true; | |
692 | ||
693 | return err; | |
694 | } | |
695 | ||
696 | /* | |
697 | * Assume 'parent' has not yet returned, so push 'to', and then push and | |
698 | * pop 'from'. | |
699 | */ | |
700 | ||
701 | cp = call_path__findnew(cpr, parent, tsym, addr, ks); | |
92a9e4f7 | 702 | |
90c2cda7 | 703 | err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); |
92a9e4f7 AH |
704 | if (err) |
705 | return err; | |
706 | ||
1f35cd65 AH |
707 | cp = call_path__findnew(cpr, cp, fsym, ip, ks); |
708 | ||
709 | err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); | |
710 | if (err) | |
711 | return err; | |
712 | ||
713 | return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); | |
92a9e4f7 AH |
714 | } |
715 | ||
716 | static int thread_stack__trace_begin(struct thread *thread, | |
717 | struct thread_stack *ts, u64 timestamp, | |
718 | u64 ref) | |
719 | { | |
720 | struct thread_stack_entry *tse; | |
721 | int err; | |
722 | ||
723 | if (!ts->cnt) | |
724 | return 0; | |
725 | ||
726 | /* Pop trace end */ | |
727 | tse = &ts->stack[ts->cnt - 1]; | |
2dcde4e1 | 728 | if (tse->trace_end) { |
92a9e4f7 AH |
729 | err = thread_stack__call_return(thread, ts, --ts->cnt, |
730 | timestamp, ref, false); | |
731 | if (err) | |
732 | return err; | |
733 | } | |
734 | ||
735 | return 0; | |
736 | } | |
737 | ||
738 | static int thread_stack__trace_end(struct thread_stack *ts, | |
739 | struct perf_sample *sample, u64 ref) | |
740 | { | |
741 | struct call_path_root *cpr = ts->crp->cpr; | |
742 | struct call_path *cp; | |
743 | u64 ret_addr; | |
744 | ||
745 | /* No point having 'trace end' on the bottom of the stack */ | |
746 | if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) | |
747 | return 0; | |
748 | ||
749 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, | |
750 | ts->kernel_start); | |
92a9e4f7 AH |
751 | |
752 | ret_addr = sample->ip + sample->insn_len; | |
753 | ||
754 | return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, | |
2dcde4e1 | 755 | false, true); |
92a9e4f7 AH |
756 | } |
757 | ||
3c0cd952 AH |
758 | static bool is_x86_retpoline(const char *name) |
759 | { | |
760 | const char *p = strstr(name, "__x86_indirect_thunk_"); | |
761 | ||
762 | return p == name || !strcmp(name, "__indirect_thunk_start"); | |
763 | } | |
764 | ||
765 | /* | |
766 | * x86 retpoline functions pollute the call graph. This function removes them. | |
767 | * This does not handle function return thunks, nor is there any improvement | |
768 | * for the handling of inline thunks or extern thunks. | |
769 | */ | |
770 | static int thread_stack__x86_retpoline(struct thread_stack *ts, | |
771 | struct perf_sample *sample, | |
772 | struct addr_location *to_al) | |
773 | { | |
774 | struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; | |
775 | struct call_path_root *cpr = ts->crp->cpr; | |
776 | struct symbol *sym = tse->cp->sym; | |
777 | struct symbol *tsym = to_al->sym; | |
778 | struct call_path *cp; | |
779 | ||
780 | if (sym && is_x86_retpoline(sym->name)) { | |
781 | /* | |
782 | * This is a x86 retpoline fn. It pollutes the call graph by | |
783 | * showing up everywhere there is an indirect branch, but does | |
784 | * not itself mean anything. Here the top-of-stack is removed, | |
785 | * by decrementing the stack count, and then further down, the | |
786 | * resulting top-of-stack is replaced with the actual target. | |
787 | * The result is that the retpoline functions will no longer | |
788 | * appear in the call graph. Note this only affects the call | |
789 | * graph, since all the original branches are left unchanged. | |
790 | */ | |
791 | ts->cnt -= 1; | |
792 | sym = ts->stack[ts->cnt - 2].cp->sym; | |
793 | if (sym && sym == tsym && to_al->addr != tsym->start) { | |
794 | /* | |
795 | * Target is back to the middle of the symbol we came | |
796 | * from so assume it is an indirect jmp and forget it | |
797 | * altogether. | |
798 | */ | |
799 | ts->cnt -= 1; | |
800 | return 0; | |
801 | } | |
802 | } else if (sym && sym == tsym) { | |
803 | /* | |
804 | * Target is back to the symbol we came from so assume it is an | |
805 | * indirect jmp and forget it altogether. | |
806 | */ | |
807 | ts->cnt -= 1; | |
808 | return 0; | |
809 | } | |
810 | ||
811 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, | |
812 | sample->addr, ts->kernel_start); | |
813 | if (!cp) | |
814 | return -ENOMEM; | |
815 | ||
816 | /* Replace the top-of-stack with the actual target */ | |
817 | ts->stack[ts->cnt - 1].cp = cp; | |
818 | ||
819 | return 0; | |
820 | } | |
821 | ||
92a9e4f7 AH |
822 | int thread_stack__process(struct thread *thread, struct comm *comm, |
823 | struct perf_sample *sample, | |
824 | struct addr_location *from_al, | |
825 | struct addr_location *to_al, u64 ref, | |
826 | struct call_return_processor *crp) | |
827 | { | |
256d92bc | 828 | struct thread_stack *ts = thread__stack(thread, sample->cpu); |
3c0cd952 | 829 | enum retpoline_state_t rstate; |
92a9e4f7 AH |
830 | int err = 0; |
831 | ||
03b32cb2 AH |
832 | if (ts && !ts->crp) { |
833 | /* Supersede thread_stack__event() */ | |
f6060ac6 | 834 | thread_stack__reset(thread, ts); |
03b32cb2 AH |
835 | ts = NULL; |
836 | } | |
837 | ||
838 | if (!ts) { | |
256d92bc | 839 | ts = thread_stack__new(thread, sample->cpu, crp); |
bd8e68ac | 840 | if (!ts) |
92a9e4f7 | 841 | return -ENOMEM; |
92a9e4f7 AH |
842 | ts->comm = comm; |
843 | } | |
844 | ||
3c0cd952 AH |
845 | rstate = ts->rstate; |
846 | if (rstate == X86_RETPOLINE_DETECTED) | |
847 | ts->rstate = X86_RETPOLINE_POSSIBLE; | |
848 | ||
92a9e4f7 AH |
849 | /* Flush stack on exec */ |
850 | if (ts->comm != comm && thread->pid_ == thread->tid) { | |
a5499b37 | 851 | err = __thread_stack__flush(thread, ts); |
92a9e4f7 AH |
852 | if (err) |
853 | return err; | |
854 | ts->comm = comm; | |
855 | } | |
856 | ||
857 | /* If the stack is empty, put the current symbol on the stack */ | |
858 | if (!ts->cnt) { | |
e0b89511 | 859 | err = thread_stack__bottom(ts, sample, from_al, to_al, ref); |
92a9e4f7 AH |
860 | if (err) |
861 | return err; | |
862 | } | |
863 | ||
864 | ts->branch_count += 1; | |
865 | ts->last_time = sample->time; | |
866 | ||
867 | if (sample->flags & PERF_IP_FLAG_CALL) { | |
2dcde4e1 | 868 | bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; |
92a9e4f7 AH |
869 | struct call_path_root *cpr = ts->crp->cpr; |
870 | struct call_path *cp; | |
871 | u64 ret_addr; | |
872 | ||
873 | if (!sample->ip || !sample->addr) | |
874 | return 0; | |
875 | ||
876 | ret_addr = sample->ip + sample->insn_len; | |
877 | if (ret_addr == sample->addr) | |
878 | return 0; /* Zero-length calls are excluded */ | |
879 | ||
880 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | |
881 | to_al->sym, sample->addr, | |
882 | ts->kernel_start); | |
92a9e4f7 | 883 | err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, |
2dcde4e1 | 884 | cp, false, trace_end); |
3c0cd952 AH |
885 | |
886 | /* | |
887 | * A call to the same symbol but not the start of the symbol, | |
888 | * may be the start of a x86 retpoline. | |
889 | */ | |
890 | if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && | |
891 | from_al->sym == to_al->sym && | |
892 | to_al->addr != to_al->sym->start) | |
893 | ts->rstate = X86_RETPOLINE_DETECTED; | |
894 | ||
92a9e4f7 AH |
895 | } else if (sample->flags & PERF_IP_FLAG_RETURN) { |
896 | if (!sample->ip || !sample->addr) | |
897 | return 0; | |
898 | ||
3c0cd952 AH |
899 | /* x86 retpoline 'return' doesn't match the stack */ |
900 | if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && | |
901 | ts->stack[ts->cnt - 1].ret_addr != sample->addr) | |
902 | return thread_stack__x86_retpoline(ts, sample, to_al); | |
903 | ||
92a9e4f7 AH |
904 | err = thread_stack__pop_cp(thread, ts, sample->addr, |
905 | sample->time, ref, from_al->sym); | |
906 | if (err) { | |
907 | if (err < 0) | |
908 | return err; | |
909 | err = thread_stack__no_call_return(thread, ts, sample, | |
910 | from_al, to_al, ref); | |
911 | } | |
912 | } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { | |
913 | err = thread_stack__trace_begin(thread, ts, sample->time, ref); | |
914 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { | |
915 | err = thread_stack__trace_end(ts, sample, ref); | |
f08046cb AH |
916 | } else if (sample->flags & PERF_IP_FLAG_BRANCH && |
917 | from_al->sym != to_al->sym && to_al->sym && | |
918 | to_al->addr == to_al->sym->start) { | |
919 | struct call_path_root *cpr = ts->crp->cpr; | |
920 | struct call_path *cp; | |
921 | ||
922 | /* | |
923 | * The compiler might optimize a call/ret combination by making | |
924 | * it a jmp. Make that visible by recording on the stack a | |
925 | * branch to the start of a different symbol. Note, that means | |
926 | * when a ret pops the stack, all jmps must be popped off first. | |
927 | */ | |
928 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | |
929 | to_al->sym, sample->addr, | |
930 | ts->kernel_start); | |
931 | err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, | |
932 | false); | |
933 | if (!err) | |
934 | ts->stack[ts->cnt - 1].non_call = true; | |
92a9e4f7 AH |
935 | } |
936 | ||
937 | return err; | |
938 | } | |
e216708d | 939 | |
256d92bc | 940 | size_t thread_stack__depth(struct thread *thread, int cpu) |
e216708d | 941 | { |
256d92bc | 942 | struct thread_stack *ts = thread__stack(thread, cpu); |
bd8e68ac AH |
943 | |
944 | if (!ts) | |
e216708d | 945 | return 0; |
bd8e68ac | 946 | return ts->cnt; |
e216708d | 947 | } |