}
/*
- * We can return 0 to resume the syscall or anything else to go to phase
- * 2. If we resume the syscall, we need to put something appropriate in
- * regs->orig_ax.
- *
- * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
- * are fully functional.
- *
- * For phase 2's benefit, our return value is:
- * 0: resume the syscall
- * 1: go to phase 2; no seccomp phase 2 needed
- * anything else: go to phase 2; pass return value to seccomp
+ * Returns the syscall nr to run (which should match regs->orig_ax) or -1
+ * to skip the syscall.
*/
-unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
+static long syscall_trace_enter(struct pt_regs *regs)
{
+ u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long ret = 0;
+ bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
+ if (unlikely(work & _TIF_SYSCALL_EMU))
+ emulated = true;
+
+ if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
+ tracehook_report_syscall_entry(regs))
+ return -1L;
+
+ if (emulated)
+ return -1L;
+
#ifdef CONFIG_SECCOMP
/*
- * Do seccomp first -- it should minimize exposure of other
- * code, and keeping seccomp fast is probably more valuable
- * than the rest of this.
+ * Do seccomp after ptrace, to catch any tracer changes.
*/
if (work & _TIF_SECCOMP) {
struct seccomp_data sd;
sd.args[5] = regs->bp;
}
- BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
- BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
-
- ret = seccomp_phase1(&sd);
- if (ret == SECCOMP_PHASE1_SKIP) {
- regs->orig_ax = -1;
- ret = 0;
- } else if (ret != SECCOMP_PHASE1_OK) {
- return ret; /* Go directly to phase 2 */
- }
-
- work &= ~_TIF_SECCOMP;
- }
-#endif
-
- /* Do our best to finish without phase 2. */
- if (work == 0)
- return ret; /* seccomp and/or nohz only (ret == 0 here) */
-
-#ifdef CONFIG_AUDITSYSCALL
- if (work == _TIF_SYSCALL_AUDIT) {
- /*
- * If there is no more work to be done except auditing,
- * then audit in phase 1. Phase 2 always audits, so, if
- * we audit here, then we can't go on to phase 2.
- */
- do_audit_syscall_entry(regs, arch);
- return 0;
- }
-#endif
-
- return 1; /* Something is enabled that we can't handle in phase 1 */
-}
-
-/* Returns the syscall nr to run (which should match regs->orig_ax). */
-long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
- unsigned long phase1_result)
-{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
- long ret = 0;
- u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
-
- if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
- BUG_ON(regs != task_pt_regs(current));
-
-#ifdef CONFIG_SECCOMP
- /*
- * Call seccomp_phase2 before running the other hooks so that
- * they can see any changes made by a seccomp tracer.
- */
- if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
- /* seccomp failures shouldn't expose any additional code. */
- return -1;
+ ret = __secure_computing(&sd);
+ if (ret == -1)
+ return ret;
}
#endif
- if (unlikely(work & _TIF_SYSCALL_EMU))
- ret = -1L;
-
- if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- ret = -1L;
-
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
return ret ?: regs->orig_ax;
}
-long syscall_trace_enter(struct pt_regs *regs)
-{
- u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
- unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
-
- if (phase1_result == 0)
- return regs->orig_ax;
- else
- return syscall_trace_enter_phase2(regs, arch, phase1_result);
-}
-
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
+ *
+ * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+ * special case only applies after poking regs and before the
+ * very next return to user mode.
*/
- ti->status &= ~TS_COMPAT;
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
* have to worry about atomic accesses.
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
+ #ifdef CONFIG_COMPAT
+ #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+ #endif
#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK 1
-static inline void set_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- ti->status |= TS_RESTORE_SIGMASK;
- WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
- return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- if (!(ti->status & TS_RESTORE_SIGMASK))
- return false;
- ti->status &= ~TS_RESTORE_SIGMASK;
- return true;
-}
static inline bool in_ia32_syscall(void)
{
buf = (void __user *)buf_val;
} get_user_catch(err);
- err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
+ err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
force_iret();
struct fpu *fpu = ¤t->thread.fpu;
/* redzone */
- if (config_enabled(CONFIG_X86_64))
+ if (IS_ENABLED(CONFIG_X86_64))
sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
- } else if (config_enabled(CONFIG_X86_32) &&
+ } else if (IS_ENABLED(CONFIG_X86_32) &&
!onsigstack &&
(regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
}
if (fpu->fpstate_active) {
- sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+ sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
&buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
static inline int is_ia32_compat_frame(void)
{
- return config_enabled(CONFIG_IA32_EMULATION) &&
+ return IS_ENABLED(CONFIG_IA32_EMULATION) &&
test_thread_flag(TIF_IA32);
}
static inline int is_ia32_frame(void)
{
- return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+ return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
}
static inline int is_x32_frame(void)
{
- return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+ return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
}
static int
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
- #ifdef CONFIG_X86_64
- if (in_ia32_syscall())
+ /*
+ * This function is fundamentally broken as currently
+ * implemented.
+ *
+ * The idea is that we want to trigger a call to the
+ * restart_block() syscall and that we want in_ia32_syscall(),
+ * in_x32_syscall(), etc. to match whatever they were in the
+ * syscall being restarted. We assume that the syscall
+ * instruction at (regs->ip - 2) matches whatever syscall
+ * instruction we used to enter in the first place.
+ *
+ * The problem is that we can get here when ptrace pokes
+ * syscall-like values into regs even if we're not in a syscall
+ * at all.
+ *
+ * For now, we maintain historical behavior and guess based on
+ * stored state. We could do better by saving the actual
+ * syscall arch in restart_block or (with caveats on x32) by
+ * checking if regs->ip points to 'int $0x80'. The current
+ * behavior is incorrect if a tracer has a different bitness
+ * than the tracee.
+ */
+ #ifdef CONFIG_IA32_EMULATION
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI