Commit | Line | Data |
---|---|---|
2025cf9e | 1 | // SPDX-License-Identifier: GPL-2.0-only |
00447ccd AH |
2 | /* |
3 | * thread-stack.c: Synthesize a thread's stack using call / return events | |
4 | * Copyright (c) 2014, Intel Corporation. | |
00447ccd AH |
5 | */ |
6 | ||
92a9e4f7 AH |
7 | #include <linux/rbtree.h> |
8 | #include <linux/list.h> | |
256d92bc | 9 | #include <linux/log2.h> |
a43783ae | 10 | #include <errno.h> |
00447ccd AH |
11 | #include "thread.h" |
12 | #include "event.h" | |
92a9e4f7 | 13 | #include "machine.h" |
3c0cd952 | 14 | #include "env.h" |
00447ccd AH |
15 | #include "util.h" |
16 | #include "debug.h" | |
92a9e4f7 AH |
17 | #include "symbol.h" |
18 | #include "comm.h" | |
451db126 | 19 | #include "call-path.h" |
00447ccd AH |
20 | #include "thread-stack.h" |
21 | ||
92a9e4f7 AH |
22 | #define STACK_GROWTH 2048 |
23 | ||
3c0cd952 AH |
24 | /* |
25 | * State of retpoline detection. | |
26 | * | |
27 | * RETPOLINE_NONE: no retpoline detection | |
28 | * X86_RETPOLINE_POSSIBLE: x86 retpoline possible | |
29 | * X86_RETPOLINE_DETECTED: x86 retpoline detected | |
30 | */ | |
31 | enum retpoline_state_t { | |
32 | RETPOLINE_NONE, | |
33 | X86_RETPOLINE_POSSIBLE, | |
34 | X86_RETPOLINE_DETECTED, | |
35 | }; | |
36 | ||
92a9e4f7 AH |
37 | /** |
38 | * struct thread_stack_entry - thread stack entry. | |
39 | * @ret_addr: return address | |
40 | * @timestamp: timestamp (if known) | |
41 | * @ref: external reference (e.g. db_id of sample) | |
42 | * @branch_count: the branch count when the entry was created | |
f435887e | 43 | * @db_id: id used for db-export |
92a9e4f7 AH |
44 | * @cp: call path |
45 | * @no_call: a 'call' was not seen | |
4d60e5e3 | 46 | * @trace_end: a 'call' but trace ended |
f08046cb | 47 | * @non_call: a branch but not a 'call' to the start of a different symbol |
92a9e4f7 | 48 | */ |
00447ccd AH |
49 | struct thread_stack_entry { |
50 | u64 ret_addr; | |
92a9e4f7 AH |
51 | u64 timestamp; |
52 | u64 ref; | |
53 | u64 branch_count; | |
f435887e | 54 | u64 db_id; |
92a9e4f7 AH |
55 | struct call_path *cp; |
56 | bool no_call; | |
4d60e5e3 | 57 | bool trace_end; |
f08046cb | 58 | bool non_call; |
00447ccd AH |
59 | }; |
60 | ||
92a9e4f7 AH |
61 | /** |
62 | * struct thread_stack - thread stack constructed from 'call' and 'return' | |
63 | * branch samples. | |
64 | * @stack: array that holds the stack | |
65 | * @cnt: number of entries in the stack | |
66 | * @sz: current maximum stack size | |
67 | * @trace_nr: current trace number | |
68 | * @branch_count: running branch count | |
69 | * @kernel_start: kernel start address | |
70 | * @last_time: last timestamp | |
71 | * @crp: call/return processor | |
72 | * @comm: current comm | |
f6060ac6 | 73 | * @arr_sz: size of array if this is the first element of an array |
3c0cd952 | 74 | * @rstate: used to detect retpolines |
92a9e4f7 | 75 | */ |
00447ccd AH |
76 | struct thread_stack { |
77 | struct thread_stack_entry *stack; | |
78 | size_t cnt; | |
79 | size_t sz; | |
80 | u64 trace_nr; | |
92a9e4f7 AH |
81 | u64 branch_count; |
82 | u64 kernel_start; | |
83 | u64 last_time; | |
84 | struct call_return_processor *crp; | |
85 | struct comm *comm; | |
f6060ac6 | 86 | unsigned int arr_sz; |
3c0cd952 | 87 | enum retpoline_state_t rstate; |
00447ccd AH |
88 | }; |
89 | ||
256d92bc AH |
90 | /* |
91 | * Assume pid == tid == 0 identifies the idle task as defined by | |
92 | * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, | |
93 | * and therefore requires a stack for each cpu. | |
94 | */ | |
95 | static inline bool thread_stack__per_cpu(struct thread *thread) | |
96 | { | |
97 | return !(thread->tid || thread->pid_); | |
98 | } | |
99 | ||
00447ccd AH |
100 | static int thread_stack__grow(struct thread_stack *ts) |
101 | { | |
102 | struct thread_stack_entry *new_stack; | |
103 | size_t sz, new_sz; | |
104 | ||
105 | new_sz = ts->sz + STACK_GROWTH; | |
106 | sz = new_sz * sizeof(struct thread_stack_entry); | |
107 | ||
108 | new_stack = realloc(ts->stack, sz); | |
109 | if (!new_stack) | |
110 | return -ENOMEM; | |
111 | ||
112 | ts->stack = new_stack; | |
113 | ts->sz = new_sz; | |
114 | ||
115 | return 0; | |
116 | } | |
117 | ||
2e9e8688 AH |
118 | static int thread_stack__init(struct thread_stack *ts, struct thread *thread, |
119 | struct call_return_processor *crp) | |
120 | { | |
121 | int err; | |
122 | ||
123 | err = thread_stack__grow(ts); | |
124 | if (err) | |
125 | return err; | |
126 | ||
3c0cd952 AH |
127 | if (thread->mg && thread->mg->machine) { |
128 | struct machine *machine = thread->mg->machine; | |
129 | const char *arch = perf_env__arch(machine->env); | |
130 | ||
131 | ts->kernel_start = machine__kernel_start(machine); | |
132 | if (!strcmp(arch, "x86")) | |
133 | ts->rstate = X86_RETPOLINE_POSSIBLE; | |
134 | } else { | |
2e9e8688 | 135 | ts->kernel_start = 1ULL << 63; |
3c0cd952 | 136 | } |
2e9e8688 AH |
137 | ts->crp = crp; |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
256d92bc | 142 | static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, |
92a9e4f7 | 143 | struct call_return_processor *crp) |
00447ccd | 144 | { |
139f42f3 AH |
145 | struct thread_stack *ts = thread->ts, *new_ts; |
146 | unsigned int old_sz = ts ? ts->arr_sz : 0; | |
147 | unsigned int new_sz = 1; | |
148 | ||
256d92bc AH |
149 | if (thread_stack__per_cpu(thread) && cpu > 0) |
150 | new_sz = roundup_pow_of_two(cpu + 1); | |
151 | ||
139f42f3 AH |
152 | if (!ts || new_sz > old_sz) { |
153 | new_ts = calloc(new_sz, sizeof(*ts)); | |
154 | if (!new_ts) | |
155 | return NULL; | |
156 | if (ts) | |
157 | memcpy(new_ts, ts, old_sz * sizeof(*ts)); | |
158 | new_ts->arr_sz = new_sz; | |
159 | zfree(&thread->ts); | |
160 | thread->ts = new_ts; | |
161 | ts = new_ts; | |
00447ccd AH |
162 | } |
163 | ||
256d92bc AH |
164 | if (thread_stack__per_cpu(thread) && cpu > 0 && |
165 | (unsigned int)cpu < ts->arr_sz) | |
166 | ts += cpu; | |
167 | ||
139f42f3 AH |
168 | if (!ts->stack && |
169 | thread_stack__init(ts, thread, crp)) | |
170 | return NULL; | |
bd8e68ac | 171 | |
00447ccd AH |
172 | return ts; |
173 | } | |
174 | ||
256d92bc | 175 | static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) |
bd8e68ac | 176 | { |
256d92bc AH |
177 | struct thread_stack *ts = thread->ts; |
178 | ||
179 | if (cpu < 0) | |
180 | cpu = 0; | |
181 | ||
182 | if (!ts || (unsigned int)cpu >= ts->arr_sz) | |
183 | return NULL; | |
184 | ||
185 | ts += cpu; | |
186 | ||
187 | if (!ts->stack) | |
188 | return NULL; | |
189 | ||
190 | return ts; | |
191 | } | |
192 | ||
193 | static inline struct thread_stack *thread__stack(struct thread *thread, | |
194 | int cpu) | |
195 | { | |
196 | if (!thread) | |
197 | return NULL; | |
198 | ||
199 | if (thread_stack__per_cpu(thread)) | |
200 | return thread__cpu_stack(thread, cpu); | |
201 | ||
202 | return thread->ts; | |
bd8e68ac AH |
203 | } |
204 | ||
4d60e5e3 AH |
205 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, |
206 | bool trace_end) | |
00447ccd AH |
207 | { |
208 | int err = 0; | |
209 | ||
210 | if (ts->cnt == ts->sz) { | |
211 | err = thread_stack__grow(ts); | |
212 | if (err) { | |
213 | pr_warning("Out of memory: discarding thread stack\n"); | |
214 | ts->cnt = 0; | |
215 | } | |
216 | } | |
217 | ||
4d60e5e3 | 218 | ts->stack[ts->cnt].trace_end = trace_end; |
00447ccd AH |
219 | ts->stack[ts->cnt++].ret_addr = ret_addr; |
220 | ||
221 | return err; | |
222 | } | |
223 | ||
224 | static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) | |
225 | { | |
226 | size_t i; | |
227 | ||
228 | /* | |
229 | * In some cases there may be functions which are not seen to return. | |
230 | * For example when setjmp / longjmp has been used. Or the perf context | |
231 | * switch in the kernel which doesn't stop and start tracing in exactly | |
232 | * the same code path. When that happens the return address will be | |
233 | * further down the stack. If the return address is not found at all, | |
234 | * we assume the opposite (i.e. this is a return for a call that wasn't | |
235 | * seen for some reason) and leave the stack alone. | |
236 | */ | |
237 | for (i = ts->cnt; i; ) { | |
238 | if (ts->stack[--i].ret_addr == ret_addr) { | |
239 | ts->cnt = i; | |
240 | return; | |
241 | } | |
242 | } | |
243 | } | |
244 | ||
4d60e5e3 AH |
245 | static void thread_stack__pop_trace_end(struct thread_stack *ts) |
246 | { | |
247 | size_t i; | |
248 | ||
249 | for (i = ts->cnt; i; ) { | |
250 | if (ts->stack[--i].trace_end) | |
251 | ts->cnt = i; | |
252 | else | |
253 | return; | |
254 | } | |
255 | } | |
256 | ||
92a9e4f7 AH |
257 | static bool thread_stack__in_kernel(struct thread_stack *ts) |
258 | { | |
259 | if (!ts->cnt) | |
260 | return false; | |
261 | ||
262 | return ts->stack[ts->cnt - 1].cp->in_kernel; | |
263 | } | |
264 | ||
265 | static int thread_stack__call_return(struct thread *thread, | |
266 | struct thread_stack *ts, size_t idx, | |
267 | u64 timestamp, u64 ref, bool no_return) | |
268 | { | |
269 | struct call_return_processor *crp = ts->crp; | |
270 | struct thread_stack_entry *tse; | |
271 | struct call_return cr = { | |
272 | .thread = thread, | |
273 | .comm = ts->comm, | |
274 | .db_id = 0, | |
275 | }; | |
f435887e | 276 | u64 *parent_db_id; |
92a9e4f7 AH |
277 | |
278 | tse = &ts->stack[idx]; | |
279 | cr.cp = tse->cp; | |
280 | cr.call_time = tse->timestamp; | |
281 | cr.return_time = timestamp; | |
282 | cr.branch_count = ts->branch_count - tse->branch_count; | |
f435887e | 283 | cr.db_id = tse->db_id; |
92a9e4f7 AH |
284 | cr.call_ref = tse->ref; |
285 | cr.return_ref = ref; | |
286 | if (tse->no_call) | |
287 | cr.flags |= CALL_RETURN_NO_CALL; | |
288 | if (no_return) | |
289 | cr.flags |= CALL_RETURN_NO_RETURN; | |
f08046cb AH |
290 | if (tse->non_call) |
291 | cr.flags |= CALL_RETURN_NON_CALL; | |
92a9e4f7 | 292 | |
f435887e AH |
293 | /* |
294 | * The parent db_id must be assigned before exporting the child. Note | |
295 | * it is not possible to export the parent first because its information | |
296 | * is not yet complete because its 'return' has not yet been processed. | |
297 | */ | |
298 | parent_db_id = idx ? &(tse - 1)->db_id : NULL; | |
299 | ||
300 | return crp->process(&cr, parent_db_id, crp->data); | |
92a9e4f7 AH |
301 | } |
302 | ||
a5499b37 | 303 | static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) |
92a9e4f7 AH |
304 | { |
305 | struct call_return_processor *crp = ts->crp; | |
306 | int err; | |
307 | ||
308 | if (!crp) { | |
309 | ts->cnt = 0; | |
310 | return 0; | |
311 | } | |
312 | ||
313 | while (ts->cnt) { | |
314 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
315 | ts->last_time, 0, true); | |
316 | if (err) { | |
317 | pr_err("Error flushing thread stack!\n"); | |
318 | ts->cnt = 0; | |
319 | return err; | |
320 | } | |
321 | } | |
322 | ||
323 | return 0; | |
324 | } | |
325 | ||
a5499b37 AH |
326 | int thread_stack__flush(struct thread *thread) |
327 | { | |
bd8e68ac | 328 | struct thread_stack *ts = thread->ts; |
f6060ac6 AH |
329 | unsigned int pos; |
330 | int err = 0; | |
bd8e68ac | 331 | |
f6060ac6 AH |
332 | if (ts) { |
333 | for (pos = 0; pos < ts->arr_sz; pos++) { | |
334 | int ret = __thread_stack__flush(thread, ts + pos); | |
a5499b37 | 335 | |
f6060ac6 AH |
336 | if (ret) |
337 | err = ret; | |
338 | } | |
339 | } | |
340 | ||
341 | return err; | |
a5499b37 AH |
342 | } |
343 | ||
256d92bc | 344 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
00447ccd AH |
345 | u64 to_ip, u16 insn_len, u64 trace_nr) |
346 | { | |
256d92bc | 347 | struct thread_stack *ts = thread__stack(thread, cpu); |
bd8e68ac | 348 | |
00447ccd AH |
349 | if (!thread) |
350 | return -EINVAL; | |
351 | ||
bd8e68ac | 352 | if (!ts) { |
256d92bc | 353 | ts = thread_stack__new(thread, cpu, NULL); |
bd8e68ac | 354 | if (!ts) { |
00447ccd AH |
355 | pr_warning("Out of memory: no thread stack\n"); |
356 | return -ENOMEM; | |
357 | } | |
bd8e68ac | 358 | ts->trace_nr = trace_nr; |
00447ccd AH |
359 | } |
360 | ||
361 | /* | |
362 | * When the trace is discontinuous, the trace_nr changes. In that case | |
363 | * the stack might be completely invalid. Better to report nothing than | |
92a9e4f7 | 364 | * to report something misleading, so flush the stack. |
00447ccd | 365 | */ |
bd8e68ac AH |
366 | if (trace_nr != ts->trace_nr) { |
367 | if (ts->trace_nr) | |
368 | __thread_stack__flush(thread, ts); | |
369 | ts->trace_nr = trace_nr; | |
00447ccd AH |
370 | } |
371 | ||
92a9e4f7 | 372 | /* Stop here if thread_stack__process() is in use */ |
bd8e68ac | 373 | if (ts->crp) |
92a9e4f7 AH |
374 | return 0; |
375 | ||
00447ccd AH |
376 | if (flags & PERF_IP_FLAG_CALL) { |
377 | u64 ret_addr; | |
378 | ||
379 | if (!to_ip) | |
380 | return 0; | |
381 | ret_addr = from_ip + insn_len; | |
382 | if (ret_addr == to_ip) | |
383 | return 0; /* Zero-length calls are excluded */ | |
bd8e68ac | 384 | return thread_stack__push(ts, ret_addr, |
4d60e5e3 AH |
385 | flags & PERF_IP_FLAG_TRACE_END); |
386 | } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { | |
387 | /* | |
388 | * If the caller did not change the trace number (which would | |
389 | * have flushed the stack) then try to make sense of the stack. | |
390 | * Possibly, tracing began after returning to the current | |
391 | * address, so try to pop that. Also, do not expect a call made | |
392 | * when the trace ended, to return, so pop that. | |
393 | */ | |
bd8e68ac AH |
394 | thread_stack__pop(ts, to_ip); |
395 | thread_stack__pop_trace_end(ts); | |
4d60e5e3 | 396 | } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { |
bd8e68ac | 397 | thread_stack__pop(ts, to_ip); |
00447ccd AH |
398 | } |
399 | ||
400 | return 0; | |
401 | } | |
402 | ||
256d92bc | 403 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) |
92a9e4f7 | 404 | { |
256d92bc | 405 | struct thread_stack *ts = thread__stack(thread, cpu); |
bd8e68ac AH |
406 | |
407 | if (!ts) | |
92a9e4f7 AH |
408 | return; |
409 | ||
bd8e68ac AH |
410 | if (trace_nr != ts->trace_nr) { |
411 | if (ts->trace_nr) | |
412 | __thread_stack__flush(thread, ts); | |
413 | ts->trace_nr = trace_nr; | |
92a9e4f7 AH |
414 | } |
415 | } | |
416 | ||
f6060ac6 AH |
417 | static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) |
418 | { | |
419 | __thread_stack__flush(thread, ts); | |
420 | zfree(&ts->stack); | |
421 | } | |
422 | ||
423 | static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) | |
424 | { | |
425 | unsigned int arr_sz = ts->arr_sz; | |
426 | ||
427 | __thread_stack__free(thread, ts); | |
428 | memset(ts, 0, sizeof(*ts)); | |
429 | ts->arr_sz = arr_sz; | |
430 | } | |
431 | ||
00447ccd AH |
432 | void thread_stack__free(struct thread *thread) |
433 | { | |
bd8e68ac | 434 | struct thread_stack *ts = thread->ts; |
f6060ac6 | 435 | unsigned int pos; |
bd8e68ac AH |
436 | |
437 | if (ts) { | |
f6060ac6 AH |
438 | for (pos = 0; pos < ts->arr_sz; pos++) |
439 | __thread_stack__free(thread, ts + pos); | |
00447ccd AH |
440 | zfree(&thread->ts); |
441 | } | |
442 | } | |
443 | ||
24248306 AH |
444 | static inline u64 callchain_context(u64 ip, u64 kernel_start) |
445 | { | |
446 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; | |
447 | } | |
448 | ||
256d92bc AH |
449 | void thread_stack__sample(struct thread *thread, int cpu, |
450 | struct ip_callchain *chain, | |
24248306 | 451 | size_t sz, u64 ip, u64 kernel_start) |
00447ccd | 452 | { |
256d92bc | 453 | struct thread_stack *ts = thread__stack(thread, cpu); |
24248306 AH |
454 | u64 context = callchain_context(ip, kernel_start); |
455 | u64 last_context; | |
456 | size_t i, j; | |
00447ccd | 457 | |
24248306 AH |
458 | if (sz < 2) { |
459 | chain->nr = 0; | |
460 | return; | |
461 | } | |
00447ccd | 462 | |
24248306 AH |
463 | chain->ips[0] = context; |
464 | chain->ips[1] = ip; | |
465 | ||
bd8e68ac | 466 | if (!ts) { |
24248306 AH |
467 | chain->nr = 2; |
468 | return; | |
469 | } | |
470 | ||
471 | last_context = context; | |
472 | ||
bd8e68ac AH |
473 | for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { |
474 | ip = ts->stack[ts->cnt - j].ret_addr; | |
24248306 AH |
475 | context = callchain_context(ip, kernel_start); |
476 | if (context != last_context) { | |
477 | if (i >= sz - 1) | |
478 | break; | |
479 | chain->ips[i++] = context; | |
480 | last_context = context; | |
481 | } | |
482 | chain->ips[i] = ip; | |
483 | } | |
00447ccd | 484 | |
24248306 | 485 | chain->nr = i; |
00447ccd | 486 | } |
92a9e4f7 | 487 | |
92a9e4f7 | 488 | struct call_return_processor * |
f435887e | 489 | call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), |
92a9e4f7 AH |
490 | void *data) |
491 | { | |
492 | struct call_return_processor *crp; | |
493 | ||
494 | crp = zalloc(sizeof(struct call_return_processor)); | |
495 | if (!crp) | |
496 | return NULL; | |
497 | crp->cpr = call_path_root__new(); | |
498 | if (!crp->cpr) | |
499 | goto out_free; | |
500 | crp->process = process; | |
501 | crp->data = data; | |
502 | return crp; | |
503 | ||
504 | out_free: | |
505 | free(crp); | |
506 | return NULL; | |
507 | } | |
508 | ||
509 | void call_return_processor__free(struct call_return_processor *crp) | |
510 | { | |
511 | if (crp) { | |
512 | call_path_root__free(crp->cpr); | |
513 | free(crp); | |
514 | } | |
515 | } | |
516 | ||
517 | static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, | |
518 | u64 timestamp, u64 ref, struct call_path *cp, | |
2dcde4e1 | 519 | bool no_call, bool trace_end) |
92a9e4f7 AH |
520 | { |
521 | struct thread_stack_entry *tse; | |
522 | int err; | |
523 | ||
e7a3a055 AH |
524 | if (!cp) |
525 | return -ENOMEM; | |
526 | ||
92a9e4f7 AH |
527 | if (ts->cnt == ts->sz) { |
528 | err = thread_stack__grow(ts); | |
529 | if (err) | |
530 | return err; | |
531 | } | |
532 | ||
533 | tse = &ts->stack[ts->cnt++]; | |
534 | tse->ret_addr = ret_addr; | |
535 | tse->timestamp = timestamp; | |
536 | tse->ref = ref; | |
537 | tse->branch_count = ts->branch_count; | |
538 | tse->cp = cp; | |
539 | tse->no_call = no_call; | |
2dcde4e1 | 540 | tse->trace_end = trace_end; |
f08046cb | 541 | tse->non_call = false; |
f435887e | 542 | tse->db_id = 0; |
92a9e4f7 AH |
543 | |
544 | return 0; | |
545 | } | |
546 | ||
547 | static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, | |
548 | u64 ret_addr, u64 timestamp, u64 ref, | |
549 | struct symbol *sym) | |
550 | { | |
551 | int err; | |
552 | ||
553 | if (!ts->cnt) | |
554 | return 1; | |
555 | ||
556 | if (ts->cnt == 1) { | |
557 | struct thread_stack_entry *tse = &ts->stack[0]; | |
558 | ||
559 | if (tse->cp->sym == sym) | |
560 | return thread_stack__call_return(thread, ts, --ts->cnt, | |
561 | timestamp, ref, false); | |
562 | } | |
563 | ||
f08046cb AH |
564 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && |
565 | !ts->stack[ts->cnt - 1].non_call) { | |
92a9e4f7 AH |
566 | return thread_stack__call_return(thread, ts, --ts->cnt, |
567 | timestamp, ref, false); | |
568 | } else { | |
569 | size_t i = ts->cnt - 1; | |
570 | ||
571 | while (i--) { | |
f08046cb AH |
572 | if (ts->stack[i].ret_addr != ret_addr || |
573 | ts->stack[i].non_call) | |
92a9e4f7 AH |
574 | continue; |
575 | i += 1; | |
576 | while (ts->cnt > i) { | |
577 | err = thread_stack__call_return(thread, ts, | |
578 | --ts->cnt, | |
579 | timestamp, ref, | |
580 | true); | |
581 | if (err) | |
582 | return err; | |
583 | } | |
584 | return thread_stack__call_return(thread, ts, --ts->cnt, | |
585 | timestamp, ref, false); | |
586 | } | |
587 | } | |
588 | ||
589 | return 1; | |
590 | } | |
591 | ||
e0b89511 | 592 | static int thread_stack__bottom(struct thread_stack *ts, |
92a9e4f7 AH |
593 | struct perf_sample *sample, |
594 | struct addr_location *from_al, | |
595 | struct addr_location *to_al, u64 ref) | |
596 | { | |
597 | struct call_path_root *cpr = ts->crp->cpr; | |
598 | struct call_path *cp; | |
599 | struct symbol *sym; | |
600 | u64 ip; | |
601 | ||
602 | if (sample->ip) { | |
603 | ip = sample->ip; | |
604 | sym = from_al->sym; | |
605 | } else if (sample->addr) { | |
606 | ip = sample->addr; | |
607 | sym = to_al->sym; | |
608 | } else { | |
609 | return 0; | |
610 | } | |
611 | ||
612 | cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, | |
613 | ts->kernel_start); | |
92a9e4f7 | 614 | |
e0b89511 | 615 | return thread_stack__push_cp(ts, ip, sample->time, ref, cp, |
2dcde4e1 | 616 | true, false); |
92a9e4f7 AH |
617 | } |
618 | ||
619 | static int thread_stack__no_call_return(struct thread *thread, | |
620 | struct thread_stack *ts, | |
621 | struct perf_sample *sample, | |
622 | struct addr_location *from_al, | |
623 | struct addr_location *to_al, u64 ref) | |
624 | { | |
625 | struct call_path_root *cpr = ts->crp->cpr; | |
90c2cda7 AH |
626 | struct call_path *root = &cpr->call_path; |
627 | struct symbol *fsym = from_al->sym; | |
628 | struct symbol *tsym = to_al->sym; | |
92a9e4f7 AH |
629 | struct call_path *cp, *parent; |
630 | u64 ks = ts->kernel_start; | |
90c2cda7 AH |
631 | u64 addr = sample->addr; |
632 | u64 tm = sample->time; | |
633 | u64 ip = sample->ip; | |
92a9e4f7 AH |
634 | int err; |
635 | ||
90c2cda7 | 636 | if (ip >= ks && addr < ks) { |
92a9e4f7 AH |
637 | /* Return to userspace, so pop all kernel addresses */ |
638 | while (thread_stack__in_kernel(ts)) { | |
639 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
90c2cda7 | 640 | tm, ref, true); |
92a9e4f7 AH |
641 | if (err) |
642 | return err; | |
643 | } | |
644 | ||
645 | /* If the stack is empty, push the userspace address */ | |
646 | if (!ts->cnt) { | |
90c2cda7 AH |
647 | cp = call_path__findnew(cpr, root, tsym, addr, ks); |
648 | return thread_stack__push_cp(ts, 0, tm, ref, cp, true, | |
649 | false); | |
92a9e4f7 | 650 | } |
90c2cda7 | 651 | } else if (thread_stack__in_kernel(ts) && ip < ks) { |
92a9e4f7 AH |
652 | /* Return to userspace, so pop all kernel addresses */ |
653 | while (thread_stack__in_kernel(ts)) { | |
654 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
90c2cda7 | 655 | tm, ref, true); |
92a9e4f7 AH |
656 | if (err) |
657 | return err; | |
658 | } | |
659 | } | |
660 | ||
661 | if (ts->cnt) | |
662 | parent = ts->stack[ts->cnt - 1].cp; | |
663 | else | |
90c2cda7 | 664 | parent = root; |
92a9e4f7 | 665 | |
1f35cd65 AH |
666 | if (parent->sym == from_al->sym) { |
667 | /* | |
668 | * At the bottom of the stack, assume the missing 'call' was | |
669 | * before the trace started. So, pop the current symbol and push | |
670 | * the 'to' symbol. | |
671 | */ | |
672 | if (ts->cnt == 1) { | |
673 | err = thread_stack__call_return(thread, ts, --ts->cnt, | |
674 | tm, ref, false); | |
675 | if (err) | |
676 | return err; | |
677 | } | |
678 | ||
679 | if (!ts->cnt) { | |
680 | cp = call_path__findnew(cpr, root, tsym, addr, ks); | |
681 | ||
682 | return thread_stack__push_cp(ts, addr, tm, ref, cp, | |
683 | true, false); | |
684 | } | |
685 | ||
686 | /* | |
687 | * Otherwise assume the 'return' is being used as a jump (e.g. | |
688 | * retpoline) and just push the 'to' symbol. | |
689 | */ | |
690 | cp = call_path__findnew(cpr, parent, tsym, addr, ks); | |
691 | ||
692 | err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); | |
693 | if (!err) | |
694 | ts->stack[ts->cnt - 1].non_call = true; | |
695 | ||
696 | return err; | |
697 | } | |
698 | ||
699 | /* | |
700 | * Assume 'parent' has not yet returned, so push 'to', and then push and | |
701 | * pop 'from'. | |
702 | */ | |
703 | ||
704 | cp = call_path__findnew(cpr, parent, tsym, addr, ks); | |
92a9e4f7 | 705 | |
90c2cda7 | 706 | err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); |
92a9e4f7 AH |
707 | if (err) |
708 | return err; | |
709 | ||
1f35cd65 AH |
710 | cp = call_path__findnew(cpr, cp, fsym, ip, ks); |
711 | ||
712 | err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); | |
713 | if (err) | |
714 | return err; | |
715 | ||
716 | return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); | |
92a9e4f7 AH |
717 | } |
718 | ||
719 | static int thread_stack__trace_begin(struct thread *thread, | |
720 | struct thread_stack *ts, u64 timestamp, | |
721 | u64 ref) | |
722 | { | |
723 | struct thread_stack_entry *tse; | |
724 | int err; | |
725 | ||
726 | if (!ts->cnt) | |
727 | return 0; | |
728 | ||
729 | /* Pop trace end */ | |
730 | tse = &ts->stack[ts->cnt - 1]; | |
2dcde4e1 | 731 | if (tse->trace_end) { |
92a9e4f7 AH |
732 | err = thread_stack__call_return(thread, ts, --ts->cnt, |
733 | timestamp, ref, false); | |
734 | if (err) | |
735 | return err; | |
736 | } | |
737 | ||
738 | return 0; | |
739 | } | |
740 | ||
741 | static int thread_stack__trace_end(struct thread_stack *ts, | |
742 | struct perf_sample *sample, u64 ref) | |
743 | { | |
744 | struct call_path_root *cpr = ts->crp->cpr; | |
745 | struct call_path *cp; | |
746 | u64 ret_addr; | |
747 | ||
748 | /* No point having 'trace end' on the bottom of the stack */ | |
749 | if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) | |
750 | return 0; | |
751 | ||
752 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, | |
753 | ts->kernel_start); | |
92a9e4f7 AH |
754 | |
755 | ret_addr = sample->ip + sample->insn_len; | |
756 | ||
757 | return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, | |
2dcde4e1 | 758 | false, true); |
92a9e4f7 AH |
759 | } |
760 | ||
3c0cd952 AH |
761 | static bool is_x86_retpoline(const char *name) |
762 | { | |
763 | const char *p = strstr(name, "__x86_indirect_thunk_"); | |
764 | ||
765 | return p == name || !strcmp(name, "__indirect_thunk_start"); | |
766 | } | |
767 | ||
768 | /* | |
769 | * x86 retpoline functions pollute the call graph. This function removes them. | |
770 | * This does not handle function return thunks, nor is there any improvement | |
771 | * for the handling of inline thunks or extern thunks. | |
772 | */ | |
773 | static int thread_stack__x86_retpoline(struct thread_stack *ts, | |
774 | struct perf_sample *sample, | |
775 | struct addr_location *to_al) | |
776 | { | |
777 | struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; | |
778 | struct call_path_root *cpr = ts->crp->cpr; | |
779 | struct symbol *sym = tse->cp->sym; | |
780 | struct symbol *tsym = to_al->sym; | |
781 | struct call_path *cp; | |
782 | ||
783 | if (sym && is_x86_retpoline(sym->name)) { | |
784 | /* | |
785 | * This is a x86 retpoline fn. It pollutes the call graph by | |
786 | * showing up everywhere there is an indirect branch, but does | |
787 | * not itself mean anything. Here the top-of-stack is removed, | |
788 | * by decrementing the stack count, and then further down, the | |
789 | * resulting top-of-stack is replaced with the actual target. | |
790 | * The result is that the retpoline functions will no longer | |
791 | * appear in the call graph. Note this only affects the call | |
792 | * graph, since all the original branches are left unchanged. | |
793 | */ | |
794 | ts->cnt -= 1; | |
795 | sym = ts->stack[ts->cnt - 2].cp->sym; | |
796 | if (sym && sym == tsym && to_al->addr != tsym->start) { | |
797 | /* | |
798 | * Target is back to the middle of the symbol we came | |
799 | * from so assume it is an indirect jmp and forget it | |
800 | * altogether. | |
801 | */ | |
802 | ts->cnt -= 1; | |
803 | return 0; | |
804 | } | |
805 | } else if (sym && sym == tsym) { | |
806 | /* | |
807 | * Target is back to the symbol we came from so assume it is an | |
808 | * indirect jmp and forget it altogether. | |
809 | */ | |
810 | ts->cnt -= 1; | |
811 | return 0; | |
812 | } | |
813 | ||
814 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, | |
815 | sample->addr, ts->kernel_start); | |
816 | if (!cp) | |
817 | return -ENOMEM; | |
818 | ||
819 | /* Replace the top-of-stack with the actual target */ | |
820 | ts->stack[ts->cnt - 1].cp = cp; | |
821 | ||
822 | return 0; | |
823 | } | |
824 | ||
92a9e4f7 AH |
825 | int thread_stack__process(struct thread *thread, struct comm *comm, |
826 | struct perf_sample *sample, | |
827 | struct addr_location *from_al, | |
828 | struct addr_location *to_al, u64 ref, | |
829 | struct call_return_processor *crp) | |
830 | { | |
256d92bc | 831 | struct thread_stack *ts = thread__stack(thread, sample->cpu); |
3c0cd952 | 832 | enum retpoline_state_t rstate; |
92a9e4f7 AH |
833 | int err = 0; |
834 | ||
03b32cb2 AH |
835 | if (ts && !ts->crp) { |
836 | /* Supersede thread_stack__event() */ | |
f6060ac6 | 837 | thread_stack__reset(thread, ts); |
03b32cb2 AH |
838 | ts = NULL; |
839 | } | |
840 | ||
841 | if (!ts) { | |
256d92bc | 842 | ts = thread_stack__new(thread, sample->cpu, crp); |
bd8e68ac | 843 | if (!ts) |
92a9e4f7 | 844 | return -ENOMEM; |
92a9e4f7 AH |
845 | ts->comm = comm; |
846 | } | |
847 | ||
3c0cd952 AH |
848 | rstate = ts->rstate; |
849 | if (rstate == X86_RETPOLINE_DETECTED) | |
850 | ts->rstate = X86_RETPOLINE_POSSIBLE; | |
851 | ||
92a9e4f7 AH |
852 | /* Flush stack on exec */ |
853 | if (ts->comm != comm && thread->pid_ == thread->tid) { | |
a5499b37 | 854 | err = __thread_stack__flush(thread, ts); |
92a9e4f7 AH |
855 | if (err) |
856 | return err; | |
857 | ts->comm = comm; | |
858 | } | |
859 | ||
860 | /* If the stack is empty, put the current symbol on the stack */ | |
861 | if (!ts->cnt) { | |
e0b89511 | 862 | err = thread_stack__bottom(ts, sample, from_al, to_al, ref); |
92a9e4f7 AH |
863 | if (err) |
864 | return err; | |
865 | } | |
866 | ||
867 | ts->branch_count += 1; | |
868 | ts->last_time = sample->time; | |
869 | ||
870 | if (sample->flags & PERF_IP_FLAG_CALL) { | |
2dcde4e1 | 871 | bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; |
92a9e4f7 AH |
872 | struct call_path_root *cpr = ts->crp->cpr; |
873 | struct call_path *cp; | |
874 | u64 ret_addr; | |
875 | ||
876 | if (!sample->ip || !sample->addr) | |
877 | return 0; | |
878 | ||
879 | ret_addr = sample->ip + sample->insn_len; | |
880 | if (ret_addr == sample->addr) | |
881 | return 0; /* Zero-length calls are excluded */ | |
882 | ||
883 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | |
884 | to_al->sym, sample->addr, | |
885 | ts->kernel_start); | |
92a9e4f7 | 886 | err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, |
2dcde4e1 | 887 | cp, false, trace_end); |
3c0cd952 AH |
888 | |
889 | /* | |
890 | * A call to the same symbol but not the start of the symbol, | |
891 | * may be the start of a x86 retpoline. | |
892 | */ | |
893 | if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && | |
894 | from_al->sym == to_al->sym && | |
895 | to_al->addr != to_al->sym->start) | |
896 | ts->rstate = X86_RETPOLINE_DETECTED; | |
897 | ||
92a9e4f7 AH |
898 | } else if (sample->flags & PERF_IP_FLAG_RETURN) { |
899 | if (!sample->ip || !sample->addr) | |
900 | return 0; | |
901 | ||
3c0cd952 AH |
902 | /* x86 retpoline 'return' doesn't match the stack */ |
903 | if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && | |
904 | ts->stack[ts->cnt - 1].ret_addr != sample->addr) | |
905 | return thread_stack__x86_retpoline(ts, sample, to_al); | |
906 | ||
92a9e4f7 AH |
907 | err = thread_stack__pop_cp(thread, ts, sample->addr, |
908 | sample->time, ref, from_al->sym); | |
909 | if (err) { | |
910 | if (err < 0) | |
911 | return err; | |
912 | err = thread_stack__no_call_return(thread, ts, sample, | |
913 | from_al, to_al, ref); | |
914 | } | |
915 | } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { | |
916 | err = thread_stack__trace_begin(thread, ts, sample->time, ref); | |
917 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { | |
918 | err = thread_stack__trace_end(ts, sample, ref); | |
f08046cb AH |
919 | } else if (sample->flags & PERF_IP_FLAG_BRANCH && |
920 | from_al->sym != to_al->sym && to_al->sym && | |
921 | to_al->addr == to_al->sym->start) { | |
922 | struct call_path_root *cpr = ts->crp->cpr; | |
923 | struct call_path *cp; | |
924 | ||
925 | /* | |
926 | * The compiler might optimize a call/ret combination by making | |
927 | * it a jmp. Make that visible by recording on the stack a | |
928 | * branch to the start of a different symbol. Note, that means | |
929 | * when a ret pops the stack, all jmps must be popped off first. | |
930 | */ | |
931 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | |
932 | to_al->sym, sample->addr, | |
933 | ts->kernel_start); | |
934 | err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, | |
935 | false); | |
936 | if (!err) | |
937 | ts->stack[ts->cnt - 1].non_call = true; | |
92a9e4f7 AH |
938 | } |
939 | ||
940 | return err; | |
941 | } | |
e216708d | 942 | |
256d92bc | 943 | size_t thread_stack__depth(struct thread *thread, int cpu) |
e216708d | 944 | { |
256d92bc | 945 | struct thread_stack *ts = thread__stack(thread, cpu); |
bd8e68ac AH |
946 | |
947 | if (!ts) | |
e216708d | 948 | return 0; |
bd8e68ac | 949 | return ts->cnt; |
e216708d | 950 | } |