Commit | Line | Data |
---|---|---|
142781e1 TG |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef __LINUX_ENTRYCOMMON_H | |
3 | #define __LINUX_ENTRYCOMMON_H | |
4 | ||
5 | #include <linux/tracehook.h> | |
6 | #include <linux/syscalls.h> | |
7 | #include <linux/seccomp.h> | |
8 | #include <linux/sched.h> | |
9 | ||
10 | #include <asm/entry-common.h> | |
11 | ||
12 | /* | |
13 | * Define dummy _TIF work flags if not defined by the architecture or for | |
14 | * disabled functionality. | |
15 | */ | |
16 | #ifndef _TIF_SYSCALL_EMU | |
17 | # define _TIF_SYSCALL_EMU (0) | |
18 | #endif | |
19 | ||
20 | #ifndef _TIF_SYSCALL_TRACEPOINT | |
21 | # define _TIF_SYSCALL_TRACEPOINT (0) | |
22 | #endif | |
23 | ||
24 | #ifndef _TIF_SECCOMP | |
25 | # define _TIF_SECCOMP (0) | |
26 | #endif | |
27 | ||
28 | #ifndef _TIF_SYSCALL_AUDIT | |
29 | # define _TIF_SYSCALL_AUDIT (0) | |
30 | #endif | |
31 | ||
a9f3a74a TG |
32 | #ifndef _TIF_PATCH_PENDING |
33 | # define _TIF_PATCH_PENDING (0) | |
34 | #endif | |
35 | ||
36 | #ifndef _TIF_UPROBE | |
37 | # define _TIF_UPROBE (0) | |
38 | #endif | |
39 | ||
142781e1 TG |
40 | /* |
41 | * TIF flags handled in syscall_enter_from_usermode() | |
42 | */ | |
43 | #ifndef ARCH_SYSCALL_ENTER_WORK | |
44 | # define ARCH_SYSCALL_ENTER_WORK (0) | |
45 | #endif | |
46 | ||
47 | #define SYSCALL_ENTER_WORK \ | |
48 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ | |
49 | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ | |
50 | ARCH_SYSCALL_ENTER_WORK) | |
51 | ||
a9f3a74a TG |
52 | /* |
53 | * TIF flags handled in syscall_exit_to_user_mode() | |
54 | */ | |
55 | #ifndef ARCH_SYSCALL_EXIT_WORK | |
56 | # define ARCH_SYSCALL_EXIT_WORK (0) | |
57 | #endif | |
58 | ||
59 | #define SYSCALL_EXIT_WORK \ | |
60 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
61 | _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) | |
62 | ||
63 | /* | |
64 | * TIF flags handled in exit_to_user_mode_loop() | |
65 | */ | |
66 | #ifndef ARCH_EXIT_TO_USER_MODE_WORK | |
67 | # define ARCH_EXIT_TO_USER_MODE_WORK (0) | |
68 | #endif | |
69 | ||
70 | #define EXIT_TO_USER_MODE_WORK \ | |
71 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
72 | _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ | |
73 | ARCH_EXIT_TO_USER_MODE_WORK) | |
74 | ||
142781e1 TG |
75 | /** |
76 | * arch_check_user_regs - Architecture specific sanity check for user mode regs | |
77 | * @regs: Pointer to currents pt_regs | |
78 | * | |
79 | * Defaults to an empty implementation. Can be replaced by architecture | |
80 | * specific code. | |
81 | * | |
82 | * Invoked from syscall_enter_from_user_mode() in the non-instrumentable | |
83 | * section. Use __always_inline so the compiler cannot push it out of line | |
84 | * and make it instrumentable. | |
85 | */ | |
86 | static __always_inline void arch_check_user_regs(struct pt_regs *regs); | |
87 | ||
88 | #ifndef arch_check_user_regs | |
89 | static __always_inline void arch_check_user_regs(struct pt_regs *regs) {} | |
90 | #endif | |
91 | ||
92 | /** | |
93 | * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry() | |
94 | * @regs: Pointer to currents pt_regs | |
95 | * | |
96 | * Returns: 0 on success or an error code to skip the syscall. | |
97 | * | |
98 | * Defaults to tracehook_report_syscall_entry(). Can be replaced by | |
99 | * architecture specific code. | |
100 | * | |
101 | * Invoked from syscall_enter_from_user_mode() | |
102 | */ | |
103 | static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs); | |
104 | ||
105 | #ifndef arch_syscall_enter_tracehook | |
106 | static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) | |
107 | { | |
108 | return tracehook_report_syscall_entry(regs); | |
109 | } | |
110 | #endif | |
111 | ||
112 | /** | |
113 | * syscall_enter_from_user_mode - Check and handle work before invoking | |
114 | * a syscall | |
115 | * @regs: Pointer to currents pt_regs | |
116 | * @syscall: The syscall number | |
117 | * | |
118 | * Invoked from architecture specific syscall entry code with interrupts | |
119 | * disabled. The calling code has to be non-instrumentable. When the | |
120 | * function returns all state is correct and the subsequent functions can be | |
121 | * instrumented. | |
122 | * | |
123 | * Returns: The original or a modified syscall number | |
124 | * | |
125 | * If the returned syscall number is -1 then the syscall should be | |
126 | * skipped. In this case the caller may invoke syscall_set_error() or | |
127 | * syscall_set_return_value() first. If neither of those are called and -1 | |
128 | * is returned, then the syscall will fail with ENOSYS. | |
129 | * | |
130 | * The following functionality is handled here: | |
131 | * | |
132 | * 1) Establish state (lockdep, RCU (context tracking), tracing) | |
133 | * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), | |
134 | * __secure_computing(), trace_sys_enter() | |
135 | * 3) Invocation of audit_syscall_entry() | |
136 | */ | |
137 | long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); | |
138 | ||
a9f3a74a TG |
139 | /** |
140 | * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() | |
141 | * @ti_work: Cached TIF flags gathered with interrupts disabled | |
142 | * | |
143 | * Defaults to local_irq_enable(). Can be supplied by architecture specific | |
144 | * code. | |
145 | */ | |
146 | static inline void local_irq_enable_exit_to_user(unsigned long ti_work); | |
147 | ||
148 | #ifndef local_irq_enable_exit_to_user | |
149 | static inline void local_irq_enable_exit_to_user(unsigned long ti_work) | |
150 | { | |
151 | local_irq_enable(); | |
152 | } | |
153 | #endif | |
154 | ||
155 | /** | |
156 | * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() | |
157 | * | |
158 | * Defaults to local_irq_disable(). Can be supplied by architecture specific | |
159 | * code. | |
160 | */ | |
161 | static inline void local_irq_disable_exit_to_user(void); | |
162 | ||
163 | #ifndef local_irq_disable_exit_to_user | |
164 | static inline void local_irq_disable_exit_to_user(void) | |
165 | { | |
166 | local_irq_disable(); | |
167 | } | |
168 | #endif | |
169 | ||
170 | /** | |
171 | * arch_exit_to_user_mode_work - Architecture specific TIF work for exit | |
172 | * to user mode. | |
173 | * @regs: Pointer to currents pt_regs | |
174 | * @ti_work: Cached TIF flags gathered with interrupts disabled | |
175 | * | |
176 | * Invoked from exit_to_user_mode_loop() with interrupt enabled | |
177 | * | |
178 | * Defaults to NOOP. Can be supplied by architecture specific code. | |
179 | */ | |
180 | static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, | |
181 | unsigned long ti_work); | |
182 | ||
183 | #ifndef arch_exit_to_user_mode_work | |
184 | static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, | |
185 | unsigned long ti_work) | |
186 | { | |
187 | } | |
188 | #endif | |
189 | ||
190 | /** | |
191 | * arch_exit_to_user_mode_prepare - Architecture specific preparation for | |
192 | * exit to user mode. | |
193 | * @regs: Pointer to currents pt_regs | |
194 | * @ti_work: Cached TIF flags gathered with interrupts disabled | |
195 | * | |
196 | * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last | |
197 | * function before return. Defaults to NOOP. | |
198 | */ | |
199 | static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, | |
200 | unsigned long ti_work); | |
201 | ||
202 | #ifndef arch_exit_to_user_mode_prepare | |
203 | static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, | |
204 | unsigned long ti_work) | |
205 | { | |
206 | } | |
207 | #endif | |
208 | ||
209 | /** | |
210 | * arch_exit_to_user_mode - Architecture specific final work before | |
211 | * exit to user mode. | |
212 | * | |
213 | * Invoked from exit_to_user_mode() with interrupt disabled as the last | |
214 | * function before return. Defaults to NOOP. | |
215 | * | |
216 | * This needs to be __always_inline because it is non-instrumentable code | |
217 | * invoked after context tracking switched to user mode. | |
218 | * | |
219 | * An architecture implementation must not do anything complex, no locking | |
220 | * etc. The main purpose is for speculation mitigations. | |
221 | */ | |
222 | static __always_inline void arch_exit_to_user_mode(void); | |
223 | ||
224 | #ifndef arch_exit_to_user_mode | |
225 | static __always_inline void arch_exit_to_user_mode(void) { } | |
226 | #endif | |
227 | ||
228 | /** | |
229 | * arch_do_signal - Architecture specific signal delivery function | |
230 | * @regs: Pointer to currents pt_regs | |
231 | * | |
232 | * Invoked from exit_to_user_mode_loop(). | |
233 | */ | |
234 | void arch_do_signal(struct pt_regs *regs); | |
235 | ||
236 | /** | |
237 | * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() | |
238 | * @regs: Pointer to currents pt_regs | |
239 | * @step: Indicator for single step | |
240 | * | |
241 | * Defaults to tracehook_report_syscall_exit(). Can be replaced by | |
242 | * architecture specific code. | |
243 | * | |
244 | * Invoked from syscall_exit_to_user_mode() | |
245 | */ | |
246 | static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); | |
247 | ||
248 | #ifndef arch_syscall_exit_tracehook | |
249 | static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) | |
250 | { | |
251 | tracehook_report_syscall_exit(regs, step); | |
252 | } | |
253 | #endif | |
254 | ||
255 | /** | |
256 | * syscall_exit_to_user_mode - Handle work before returning to user mode | |
257 | * @regs: Pointer to currents pt_regs | |
258 | * | |
259 | * Invoked with interrupts enabled and fully valid regs. Returns with all | |
260 | * work handled, interrupts disabled such that the caller can immediately | |
261 | * switch to user mode. Called from architecture specific syscall and ret | |
262 | * from fork code. | |
263 | * | |
264 | * The call order is: | |
265 | * 1) One-time syscall exit work: | |
266 | * - rseq syscall exit | |
267 | * - audit | |
268 | * - syscall tracing | |
269 | * - tracehook (single stepping) | |
270 | * | |
271 | * 2) Preparatory work | |
272 | * - Exit to user mode loop (common TIF handling). Invokes | |
273 | * arch_exit_to_user_mode_work() for architecture specific TIF work | |
274 | * - Architecture specific one time work arch_exit_to_user_mode_prepare() | |
275 | * - Address limit and lockdep checks | |
276 | * | |
277 | * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes | |
278 | * arch_exit_to_user_mode() to handle e.g. speculation mitigations | |
279 | */ | |
280 | void syscall_exit_to_user_mode(struct pt_regs *regs); | |
281 | ||
142781e1 TG |
282 | /** |
283 | * irqentry_enter_from_user_mode - Establish state before invoking the irq handler | |
284 | * @regs: Pointer to currents pt_regs | |
285 | * | |
286 | * Invoked from architecture specific entry code with interrupts disabled. | |
287 | * Can only be called when the interrupt entry came from user mode. The | |
288 | * calling code must be non-instrumentable. When the function returns all | |
289 | * state is correct and the subsequent functions can be instrumented. | |
290 | * | |
291 | * The function establishes state (lockdep, RCU (context tracking), tracing) | |
292 | */ | |
293 | void irqentry_enter_from_user_mode(struct pt_regs *regs); | |
294 | ||
a9f3a74a TG |
295 | /** |
296 | * irqentry_exit_to_user_mode - Interrupt exit work | |
297 | * @regs: Pointer to current's pt_regs | |
298 | * | |
299 | * Invoked with interrupts disbled and fully valid regs. Returns with all | |
300 | * work handled, interrupts disabled such that the caller can immediately | |
301 | * switch to user mode. Called from architecture specific interrupt | |
302 | * handling code. | |
303 | * | |
304 | * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). | |
305 | * Interrupt exit is not invoking #1 which is the syscall specific one time | |
306 | * work. | |
307 | */ | |
308 | void irqentry_exit_to_user_mode(struct pt_regs *regs); | |
309 | ||
a5497bab TG |
310 | #ifndef irqentry_state |
311 | typedef struct irqentry_state { | |
312 | bool exit_rcu; | |
313 | } irqentry_state_t; | |
314 | #endif | |
315 | ||
316 | /** | |
317 | * irqentry_enter - Handle state tracking on ordinary interrupt entries | |
318 | * @regs: Pointer to pt_regs of interrupted context | |
319 | * | |
320 | * Invokes: | |
321 | * - lockdep irqflag state tracking as low level ASM entry disabled | |
322 | * interrupts. | |
323 | * | |
324 | * - Context tracking if the exception hit user mode. | |
325 | * | |
326 | * - The hardirq tracer to keep the state consistent as low level ASM | |
327 | * entry disabled interrupts. | |
328 | * | |
329 | * As a precondition, this requires that the entry came from user mode, | |
330 | * idle, or a kernel context in which RCU is watching. | |
331 | * | |
332 | * For kernel mode entries RCU handling is done conditional. If RCU is | |
333 | * watching then the only RCU requirement is to check whether the tick has | |
334 | * to be restarted. If RCU is not watching then rcu_irq_enter() has to be | |
335 | * invoked on entry and rcu_irq_exit() on exit. | |
336 | * | |
337 | * Avoiding the rcu_irq_enter/exit() calls is an optimization but also | |
338 | * solves the problem of kernel mode pagefaults which can schedule, which | |
339 | * is not possible after invoking rcu_irq_enter() without undoing it. | |
340 | * | |
341 | * For user mode entries irqentry_enter_from_user_mode() is invoked to | |
342 | * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit | |
343 | * would not be possible. | |
344 | * | |
345 | * Returns: An opaque object that must be passed to idtentry_exit() | |
346 | */ | |
347 | irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); | |
348 | ||
349 | /** | |
350 | * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt | |
351 | * | |
352 | * Conditional reschedule with additional sanity checks. | |
353 | */ | |
354 | void irqentry_exit_cond_resched(void); | |
355 | ||
356 | /** | |
357 | * irqentry_exit - Handle return from exception that used irqentry_enter() | |
358 | * @regs: Pointer to pt_regs (exception entry regs) | |
359 | * @state: Return value from matching call to irqentry_enter() | |
360 | * | |
361 | * Depending on the return target (kernel/user) this runs the necessary | |
362 | * preemption and work checks if possible and reguired and returns to | |
363 | * the caller with interrupts disabled and no further work pending. | |
364 | * | |
365 | * This is the last action before returning to the low level ASM code which | |
366 | * just needs to return to the appropriate context. | |
367 | * | |
368 | * Counterpart to irqentry_enter(). | |
369 | */ | |
370 | void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); | |
371 | ||
142781e1 | 372 | #endif |