Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
7c7900f8 | 2 | #include <linux/sched.h> |
29930025 | 3 | #include <linux/sched/task.h> |
68db0cf1 | 4 | #include <linux/sched/task_stack.h> |
a8b7a923 JP |
5 | #include <linux/interrupt.h> |
6 | #include <asm/sections.h> | |
7c7900f8 JP |
7 | #include <asm/ptrace.h> |
8 | #include <asm/bitops.h> | |
9 | #include <asm/stacktrace.h> | |
10 | #include <asm/unwind.h> | |
11 | ||
12 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) | |
13 | ||
ee9f8fce JP |
14 | unsigned long unwind_get_return_address(struct unwind_state *state) |
15 | { | |
16 | if (unwind_done(state)) | |
17 | return 0; | |
18 | ||
19 | return __kernel_text_address(state->ip) ? state->ip : 0; | |
20 | } | |
21 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | |
22 | ||
23 | unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) | |
24 | { | |
25 | if (unwind_done(state)) | |
26 | return NULL; | |
27 | ||
28 | return state->regs ? &state->regs->ip : state->bp + 1; | |
29 | } | |
84936118 | 30 | |
aa4f8534 | 31 | static void unwind_dump(struct unwind_state *state) |
8b5e99f0 JP |
32 | { |
33 | static bool dumped_before = false; | |
34 | bool prev_zero, zero = false; | |
aa4f8534 | 35 | unsigned long word, *sp; |
262fa734 JP |
36 | struct stack_info stack_info = {0}; |
37 | unsigned long visit_mask = 0; | |
8b5e99f0 JP |
38 | |
39 | if (dumped_before) | |
40 | return; | |
41 | ||
42 | dumped_before = true; | |
43 | ||
4ea3d741 | 44 | printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", |
8b5e99f0 JP |
45 | state->stack_info.type, state->stack_info.next_sp, |
46 | state->stack_mask, state->graph_idx); | |
47 | ||
99bd28a4 JP |
48 | for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; |
49 | sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { | |
262fa734 JP |
50 | if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) |
51 | break; | |
8b5e99f0 | 52 | |
262fa734 | 53 | for (; sp < stack_info.end; sp++) { |
8b5e99f0 | 54 | |
262fa734 JP |
55 | word = READ_ONCE_NOCHECK(*sp); |
56 | ||
57 | prev_zero = zero; | |
58 | zero = word == 0; | |
8b5e99f0 | 59 | |
262fa734 JP |
60 | if (zero) { |
61 | if (!prev_zero) | |
62 | printk_deferred("%p: %0*x ...\n", | |
63 | sp, BITS_PER_LONG/4, 0); | |
64 | continue; | |
65 | } | |
66 | ||
67 | printk_deferred("%p: %0*lx (%pB)\n", | |
68 | sp, BITS_PER_LONG/4, word, (void *)word); | |
69 | } | |
8b5e99f0 JP |
70 | } |
71 | } | |
72 | ||
a8b7a923 JP |
73 | static bool in_entry_code(unsigned long ip) |
74 | { | |
75 | char *addr = (char *)ip; | |
76 | ||
f0178fc0 | 77 | return addr >= __entry_text_start && addr < __entry_text_end; |
a8b7a923 JP |
78 | } |
79 | ||
b0d50c7b JP |
80 | static inline unsigned long *last_frame(struct unwind_state *state) |
81 | { | |
82 | return (unsigned long *)task_pt_regs(state->task) - 2; | |
83 | } | |
84 | ||
519fb5c3 JP |
85 | static bool is_last_frame(struct unwind_state *state) |
86 | { | |
87 | return state->bp == last_frame(state); | |
88 | } | |
89 | ||
87a6b297 JP |
90 | #ifdef CONFIG_X86_32 |
91 | #define GCC_REALIGN_WORDS 3 | |
92 | #else | |
93 | #define GCC_REALIGN_WORDS 1 | |
94 | #endif | |
95 | ||
b0d50c7b JP |
96 | static inline unsigned long *last_aligned_frame(struct unwind_state *state) |
97 | { | |
98 | return last_frame(state) - GCC_REALIGN_WORDS; | |
99 | } | |
100 | ||
519fb5c3 | 101 | static bool is_last_aligned_frame(struct unwind_state *state) |
acb4608a | 102 | { |
b0d50c7b JP |
103 | unsigned long *last_bp = last_frame(state); |
104 | unsigned long *aligned_bp = last_aligned_frame(state); | |
acb4608a | 105 | |
8023e0e2 | 106 | /* |
519fb5c3 JP |
107 | * GCC can occasionally decide to realign the stack pointer and change |
108 | * the offset of the stack frame in the prologue of a function called | |
109 | * by head/entry code. Examples: | |
87a6b297 JP |
110 | * |
111 | * <start_secondary>: | |
112 | * push %edi | |
113 | * lea 0x8(%esp),%edi | |
114 | * and $0xfffffff8,%esp | |
115 | * pushl -0x4(%edi) | |
116 | * push %ebp | |
117 | * mov %esp,%ebp | |
118 | * | |
119 | * <x86_64_start_kernel>: | |
120 | * lea 0x8(%rsp),%r10 | |
121 | * and $0xfffffffffffffff0,%rsp | |
122 | * pushq -0x8(%r10) | |
123 | * push %rbp | |
124 | * mov %rsp,%rbp | |
125 | * | |
519fb5c3 JP |
126 | * After aligning the stack, it pushes a duplicate copy of the return |
127 | * address before pushing the frame pointer. | |
128 | */ | |
129 | return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); | |
130 | } | |
131 | ||
132 | static bool is_last_ftrace_frame(struct unwind_state *state) | |
133 | { | |
134 | unsigned long *last_bp = last_frame(state); | |
135 | unsigned long *last_ftrace_bp = last_bp - 3; | |
136 | ||
137 | /* | |
138 | * When unwinding from an ftrace handler of a function called by entry | |
139 | * code, the stack layout of the last frame is: | |
140 | * | |
141 | * bp | |
142 | * parent ret addr | |
143 | * bp | |
144 | * function ret addr | |
145 | * parent ret addr | |
146 | * pt_regs | |
147 | * ----------------- | |
8023e0e2 | 148 | */ |
519fb5c3 JP |
149 | return (state->bp == last_ftrace_bp && |
150 | *state->bp == *(state->bp + 2) && | |
151 | *(state->bp + 1) == *(state->bp + 4)); | |
152 | } | |
153 | ||
154 | static bool is_last_task_frame(struct unwind_state *state) | |
155 | { | |
156 | return is_last_frame(state) || is_last_aligned_frame(state) || | |
157 | is_last_ftrace_frame(state); | |
acb4608a JP |
158 | } |
159 | ||
946c1911 JP |
160 | /* |
161 | * This determines if the frame pointer actually contains an encoded pointer to | |
162 | * pt_regs on the stack. See ENCODE_FRAME_POINTER. | |
163 | */ | |
5c99b692 | 164 | #ifdef CONFIG_X86_64 |
946c1911 JP |
165 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) |
166 | { | |
167 | unsigned long regs = (unsigned long)bp; | |
168 | ||
169 | if (!(regs & 0x1)) | |
170 | return NULL; | |
171 | ||
172 | return (struct pt_regs *)(regs & ~0x1); | |
173 | } | |
5c99b692 JP |
174 | #else |
175 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) | |
176 | { | |
177 | unsigned long regs = (unsigned long)bp; | |
178 | ||
179 | if (regs & 0x80000000) | |
180 | return NULL; | |
181 | ||
182 | return (struct pt_regs *)(regs | 0x80000000); | |
183 | } | |
184 | #endif | |
946c1911 | 185 | |
37ad4ee8 AP |
186 | /* |
187 | * While walking the stack, KMSAN may stomp on stale locals from other | |
188 | * functions that were marked as uninitialized upon function exit, and | |
189 | * now hold the call frame information for the current function (e.g. the frame | |
190 | * pointer). Because KMSAN does not specifically mark call frames as | |
191 | * initialized, false positive reports are possible. To prevent such reports, | |
192 | * we mark the functions scanning the stack (here and below) with | |
193 | * __no_kmsan_checks. | |
194 | */ | |
195 | __no_kmsan_checks | |
5ed8d8bb JP |
196 | static bool update_stack_state(struct unwind_state *state, |
197 | unsigned long *next_bp) | |
7c7900f8 JP |
198 | { |
199 | struct stack_info *info = &state->stack_info; | |
5ed8d8bb JP |
200 | enum stack_type prev_type = info->type; |
201 | struct pt_regs *regs; | |
6bcdf9d5 | 202 | unsigned long *frame, *prev_frame_end, *addr_p, addr; |
5ed8d8bb JP |
203 | size_t len; |
204 | ||
205 | if (state->regs) | |
3c88c692 | 206 | prev_frame_end = (void *)state->regs + sizeof(*state->regs); |
5ed8d8bb JP |
207 | else |
208 | prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; | |
209 | ||
210 | /* Is the next frame pointer an encoded pointer to pt_regs? */ | |
211 | regs = decode_frame_pointer(next_bp); | |
212 | if (regs) { | |
213 | frame = (unsigned long *)regs; | |
3c88c692 | 214 | len = sizeof(*regs); |
a8b7a923 | 215 | state->got_irq = true; |
5ed8d8bb JP |
216 | } else { |
217 | frame = next_bp; | |
218 | len = FRAME_HEADER_SIZE; | |
219 | } | |
7c7900f8 JP |
220 | |
221 | /* | |
5ed8d8bb | 222 | * If the next bp isn't on the current stack, switch to the next one. |
7c7900f8 JP |
223 | * |
224 | * We may have to traverse multiple stacks to deal with the possibility | |
5ed8d8bb JP |
225 | * that info->next_sp could point to an empty stack and the next bp |
226 | * could be on a subsequent stack. | |
7c7900f8 | 227 | */ |
5ed8d8bb | 228 | while (!on_stack(info, frame, len)) |
7c7900f8 JP |
229 | if (get_stack_info(info->next_sp, state->task, info, |
230 | &state->stack_mask)) | |
231 | return false; | |
232 | ||
5ed8d8bb JP |
233 | /* Make sure it only unwinds up and doesn't overlap the prev frame: */ |
234 | if (state->orig_sp && state->stack_info.type == prev_type && | |
235 | frame < prev_frame_end) | |
236 | return false; | |
237 | ||
238 | /* Move state to the next frame: */ | |
239 | if (regs) { | |
240 | state->regs = regs; | |
241 | state->bp = NULL; | |
242 | } else { | |
243 | state->bp = next_bp; | |
244 | state->regs = NULL; | |
245 | } | |
246 | ||
6bcdf9d5 JP |
247 | /* Save the return address: */ |
248 | if (state->regs && user_mode(state->regs)) | |
249 | state->ip = 0; | |
250 | else { | |
251 | addr_p = unwind_get_return_address_ptr(state); | |
252 | addr = READ_ONCE_TASK_STACK(state->task, *addr_p); | |
19138af1 | 253 | state->ip = unwind_recover_ret_addr(state, addr, addr_p); |
6bcdf9d5 JP |
254 | } |
255 | ||
5ed8d8bb | 256 | /* Save the original stack pointer for unwind_dump(): */ |
262fa734 | 257 | if (!state->orig_sp) |
5ed8d8bb | 258 | state->orig_sp = frame; |
8b5e99f0 | 259 | |
7c7900f8 JP |
260 | return true; |
261 | } | |
262 | ||
37ad4ee8 | 263 | __no_kmsan_checks |
7c7900f8 JP |
264 | bool unwind_next_frame(struct unwind_state *state) |
265 | { | |
946c1911 | 266 | struct pt_regs *regs; |
5ed8d8bb | 267 | unsigned long *next_bp; |
7c7900f8 JP |
268 | |
269 | if (unwind_done(state)) | |
270 | return false; | |
271 | ||
5ed8d8bb | 272 | /* Have we reached the end? */ |
946c1911 JP |
273 | if (state->regs && user_mode(state->regs)) |
274 | goto the_end; | |
275 | ||
acb4608a JP |
276 | if (is_last_task_frame(state)) { |
277 | regs = task_pt_regs(state->task); | |
278 | ||
279 | /* | |
280 | * kthreads (other than the boot CPU's idle thread) have some | |
281 | * partial regs at the end of their stack which were placed | |
714acdbd | 282 | * there by copy_thread(). But the regs don't have any |
acb4608a JP |
283 | * useful information, so we can skip them. |
284 | * | |
285 | * This user_mode() check is slightly broader than a PF_KTHREAD | |
286 | * check because it also catches the awkward situation where a | |
287 | * newly forked kthread transitions into a user task by calling | |
be619f7f | 288 | * kernel_execve(), which eventually clears PF_KTHREAD. |
acb4608a JP |
289 | */ |
290 | if (!user_mode(regs)) | |
291 | goto the_end; | |
292 | ||
293 | /* | |
294 | * We're almost at the end, but not quite: there's still the | |
295 | * syscall regs frame. Entry code doesn't encode the regs | |
296 | * pointer for syscalls, so we have to set it manually. | |
297 | */ | |
298 | state->regs = regs; | |
299 | state->bp = NULL; | |
6bcdf9d5 | 300 | state->ip = 0; |
acb4608a JP |
301 | return true; |
302 | } | |
303 | ||
5ed8d8bb | 304 | /* Get the next frame pointer: */ |
f4f34e1b JH |
305 | if (state->next_bp) { |
306 | next_bp = state->next_bp; | |
307 | state->next_bp = NULL; | |
308 | } else if (state->regs) { | |
946c1911 | 309 | next_bp = (unsigned long *)state->regs->bp; |
f4f34e1b | 310 | } else { |
5ed8d8bb | 311 | next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); |
f4f34e1b | 312 | } |
946c1911 | 313 | |
5ed8d8bb | 314 | /* Move to the next frame if it's safe: */ |
a8b7a923 | 315 | if (!update_stack_state(state, next_bp)) |
c32c47c6 | 316 | goto bad_address; |
c32c47c6 | 317 | |
7c7900f8 | 318 | return true; |
946c1911 | 319 | |
c32c47c6 | 320 | bad_address: |
af085d90 JP |
321 | state->error = true; |
322 | ||
900742d8 JP |
323 | /* |
324 | * When unwinding a non-current task, the task might actually be | |
325 | * running on another CPU, in which case it could be modifying its | |
326 | * stack while we're reading it. This is generally not a problem and | |
327 | * can be ignored as long as the caller understands that unwinding | |
328 | * another task will not always succeed. | |
329 | */ | |
330 | if (state->task != current) | |
331 | goto the_end; | |
332 | ||
a8b7a923 JP |
333 | /* |
334 | * Don't warn if the unwinder got lost due to an interrupt in entry | |
b0d50c7b | 335 | * code or in the C handler before the first frame pointer got set up: |
a8b7a923 JP |
336 | */ |
337 | if (state->got_irq && in_entry_code(state->ip)) | |
338 | goto the_end; | |
b0d50c7b JP |
339 | if (state->regs && |
340 | state->regs->sp >= (unsigned long)last_aligned_frame(state) && | |
341 | state->regs->sp < (unsigned long)task_pt_regs(state->task)) | |
342 | goto the_end; | |
a8b7a923 | 343 | |
d4a2d031 JP |
344 | /* |
345 | * There are some known frame pointer issues on 32-bit. Disable | |
346 | * unwinder warnings on 32-bit until it gets objtool support. | |
347 | */ | |
348 | if (IS_ENABLED(CONFIG_X86_32)) | |
349 | goto the_end; | |
350 | ||
b08418b5 JP |
351 | if (state->task != current) |
352 | goto the_end; | |
353 | ||
24d86f59 JP |
354 | if (state->regs) { |
355 | printk_deferred_once(KERN_WARNING | |
356 | "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", | |
357 | state->regs, state->task->comm, | |
5ed8d8bb | 358 | state->task->pid, next_bp); |
aa4f8534 | 359 | unwind_dump(state); |
24d86f59 JP |
360 | } else { |
361 | printk_deferred_once(KERN_WARNING | |
362 | "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", | |
363 | state->bp, state->task->comm, | |
5ed8d8bb | 364 | state->task->pid, next_bp); |
aa4f8534 | 365 | unwind_dump(state); |
24d86f59 | 366 | } |
946c1911 JP |
367 | the_end: |
368 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
369 | return false; | |
7c7900f8 JP |
370 | } |
371 | EXPORT_SYMBOL_GPL(unwind_next_frame); | |
372 | ||
373 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | |
374 | struct pt_regs *regs, unsigned long *first_frame) | |
375 | { | |
5ed8d8bb | 376 | unsigned long *bp; |
946c1911 | 377 | |
7c7900f8 JP |
378 | memset(state, 0, sizeof(*state)); |
379 | state->task = task; | |
a8b7a923 | 380 | state->got_irq = (regs); |
7c7900f8 | 381 | |
5ed8d8bb | 382 | /* Don't even attempt to start from user mode regs: */ |
7c7900f8 JP |
383 | if (regs && user_mode(regs)) { |
384 | state->stack_info.type = STACK_TYPE_UNKNOWN; | |
385 | return; | |
386 | } | |
387 | ||
946c1911 | 388 | bp = get_frame_pointer(task, regs); |
7c7900f8 | 389 | |
f4f34e1b JH |
390 | /* |
391 | * If we crash with IP==0, the last successfully executed instruction | |
392 | * was probably an indirect function call with a NULL function pointer. | |
393 | * That means that SP points into the middle of an incomplete frame: | |
394 | * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we | |
395 | * would have written a frame pointer if we hadn't crashed. | |
396 | * Pretend that the frame is complete and that BP points to it, but save | |
397 | * the real BP so that we can use it when looking for the next frame. | |
398 | */ | |
3c88c692 | 399 | if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) { |
f4f34e1b | 400 | state->next_bp = bp; |
3c88c692 | 401 | bp = ((unsigned long *)regs->sp) - 1; |
f4f34e1b JH |
402 | } |
403 | ||
5ed8d8bb JP |
404 | /* Initialize stack info and make sure the frame data is accessible: */ |
405 | get_stack_info(bp, state->task, &state->stack_info, | |
7c7900f8 | 406 | &state->stack_mask); |
5ed8d8bb | 407 | update_stack_state(state, bp); |
7c7900f8 JP |
408 | |
409 | /* | |
410 | * The caller can provide the address of the first frame directly | |
411 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | |
412 | * to start unwinding at. Skip ahead until we reach it. | |
413 | */ | |
414 | while (!unwind_done(state) && | |
415 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | |
f4f34e1b | 416 | (state->next_bp == NULL && state->bp < first_frame))) |
7c7900f8 JP |
417 | unwind_next_frame(state); |
418 | } | |
419 | EXPORT_SYMBOL_GPL(__unwind_start); |