| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _LINUX_PTRACE_H |
| 3 | #define _LINUX_PTRACE_H |
| 4 | |
| 5 | #include <linux/compiler.h> /* For unlikely. */ |
| 6 | #include <linux/sched.h> /* For struct task_struct. */ |
| 7 | #include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ |
| 8 | #include <linux/err.h> /* for IS_ERR_VALUE */ |
| 9 | #include <linux/bug.h> /* For BUG_ON. */ |
| 10 | #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ |
| 11 | #include <uapi/linux/ptrace.h> |
| 12 | #include <linux/seccomp.h> |
| 13 | |
| 14 | /* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ |
| 15 | struct syscall_info { |
| 16 | __u64 sp; |
| 17 | struct seccomp_data data; |
| 18 | }; |
| 19 | |
| 20 | extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, |
| 21 | void *buf, int len, unsigned int gup_flags); |
| 22 | |
| 23 | /* |
| 24 | * Ptrace flags |
| 25 | * |
| 26 | * The owner ship rules for task->ptrace which holds the ptrace |
| 27 | * flags is simple. When a task is running it owns it's task->ptrace |
| 28 | * flags. When the a task is stopped the ptracer owns task->ptrace. |
| 29 | */ |
| 30 | |
| 31 | #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ |
| 32 | #define PT_PTRACED 0x00000001 |
| 33 | |
| 34 | #define PT_OPT_FLAG_SHIFT 3 |
| 35 | /* PT_TRACE_* event enable flags */ |
| 36 | #define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) |
| 37 | #define PT_TRACESYSGOOD PT_EVENT_FLAG(0) |
| 38 | #define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) |
| 39 | #define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) |
| 40 | #define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) |
| 41 | #define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC) |
| 42 | #define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) |
| 43 | #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) |
| 44 | #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) |
| 45 | |
| 46 | #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) |
| 47 | #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) |
| 48 | |
| 49 | extern long arch_ptrace(struct task_struct *child, long request, |
| 50 | unsigned long addr, unsigned long data); |
| 51 | extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); |
| 52 | extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); |
| 53 | extern void ptrace_disable(struct task_struct *); |
| 54 | extern int ptrace_request(struct task_struct *child, long request, |
| 55 | unsigned long addr, unsigned long data); |
| 56 | extern int ptrace_notify(int exit_code, unsigned long message); |
| 57 | extern void __ptrace_link(struct task_struct *child, |
| 58 | struct task_struct *new_parent, |
| 59 | const struct cred *ptracer_cred); |
| 60 | extern void __ptrace_unlink(struct task_struct *child); |
| 61 | extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); |
| 62 | #define PTRACE_MODE_READ 0x01 |
| 63 | #define PTRACE_MODE_ATTACH 0x02 |
| 64 | #define PTRACE_MODE_NOAUDIT 0x04 |
| 65 | #define PTRACE_MODE_FSCREDS 0x08 |
| 66 | #define PTRACE_MODE_REALCREDS 0x10 |
| 67 | |
| 68 | /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ |
| 69 | #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) |
| 70 | #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) |
| 71 | #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) |
| 72 | #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) |
| 73 | |
| 74 | /** |
| 75 | * ptrace_may_access - check whether the caller is permitted to access |
| 76 | * a target task. |
| 77 | * @task: target task |
| 78 | * @mode: selects type of access and caller credentials |
| 79 | * |
| 80 | * Returns true on success, false on denial. |
| 81 | * |
| 82 | * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must |
| 83 | * be set in @mode to specify whether the access was requested through |
| 84 | * a filesystem syscall (should use effective capabilities and fsuid |
| 85 | * of the caller) or through an explicit syscall such as |
| 86 | * process_vm_writev or ptrace (and should use the real credentials). |
| 87 | */ |
| 88 | extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); |
| 89 | |
| 90 | static inline int ptrace_reparented(struct task_struct *child) |
| 91 | { |
| 92 | return !same_thread_group(child->real_parent, child->parent); |
| 93 | } |
| 94 | |
| 95 | static inline void ptrace_unlink(struct task_struct *child) |
| 96 | { |
| 97 | if (unlikely(child->ptrace)) |
| 98 | __ptrace_unlink(child); |
| 99 | } |
| 100 | |
| 101 | int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, |
| 102 | unsigned long data); |
| 103 | int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, |
| 104 | unsigned long data); |
| 105 | |
| 106 | /** |
| 107 | * ptrace_parent - return the task that is tracing the given task |
| 108 | * @task: task to consider |
| 109 | * |
| 110 | * Returns %NULL if no one is tracing @task, or the &struct task_struct |
| 111 | * pointer to its tracer. |
| 112 | * |
| 113 | * Must called under rcu_read_lock(). The pointer returned might be kept |
| 114 | * live only by RCU. During exec, this may be called with task_lock() held |
| 115 | * on @task, still held from when check_unsafe_exec() was called. |
| 116 | */ |
| 117 | static inline struct task_struct *ptrace_parent(struct task_struct *task) |
| 118 | { |
| 119 | if (unlikely(task->ptrace)) |
| 120 | return rcu_dereference(task->parent); |
| 121 | return NULL; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * ptrace_event_enabled - test whether a ptrace event is enabled |
| 126 | * @task: ptracee of interest |
| 127 | * @event: %PTRACE_EVENT_* to test |
| 128 | * |
| 129 | * Test whether @event is enabled for ptracee @task. |
| 130 | * |
| 131 | * Returns %true if @event is enabled, %false otherwise. |
| 132 | */ |
| 133 | static inline bool ptrace_event_enabled(struct task_struct *task, int event) |
| 134 | { |
| 135 | return task->ptrace & PT_EVENT_FLAG(event); |
| 136 | } |
| 137 | |
| 138 | /** |
| 139 | * ptrace_event - possibly stop for a ptrace event notification |
| 140 | * @event: %PTRACE_EVENT_* value to report |
| 141 | * @message: value for %PTRACE_GETEVENTMSG to return |
| 142 | * |
| 143 | * Check whether @event is enabled and, if so, report @event and @message |
| 144 | * to the ptrace parent. |
| 145 | * |
| 146 | * Called without locks. |
| 147 | */ |
| 148 | static inline void ptrace_event(int event, unsigned long message) |
| 149 | { |
| 150 | if (unlikely(ptrace_event_enabled(current, event))) { |
| 151 | ptrace_notify((event << 8) | SIGTRAP, message); |
| 152 | } else if (event == PTRACE_EVENT_EXEC) { |
| 153 | /* legacy EXEC report via SIGTRAP */ |
| 154 | if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) |
| 155 | send_sig(SIGTRAP, current, 0); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | /** |
| 160 | * ptrace_event_pid - possibly stop for a ptrace event notification |
| 161 | * @event: %PTRACE_EVENT_* value to report |
| 162 | * @pid: process identifier for %PTRACE_GETEVENTMSG to return |
| 163 | * |
| 164 | * Check whether @event is enabled and, if so, report @event and @pid |
| 165 | * to the ptrace parent. @pid is reported as the pid_t seen from the |
| 166 | * ptrace parent's pid namespace. |
| 167 | * |
| 168 | * Called without locks. |
| 169 | */ |
| 170 | static inline void ptrace_event_pid(int event, struct pid *pid) |
| 171 | { |
| 172 | /* |
| 173 | * FIXME: There's a potential race if a ptracer in a different pid |
| 174 | * namespace than parent attaches between computing message below and |
| 175 | * when we acquire tasklist_lock in ptrace_stop(). If this happens, |
| 176 | * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. |
| 177 | */ |
| 178 | unsigned long message = 0; |
| 179 | struct pid_namespace *ns; |
| 180 | |
| 181 | rcu_read_lock(); |
| 182 | ns = task_active_pid_ns(rcu_dereference(current->parent)); |
| 183 | if (ns) |
| 184 | message = pid_nr_ns(pid, ns); |
| 185 | rcu_read_unlock(); |
| 186 | |
| 187 | ptrace_event(event, message); |
| 188 | } |
| 189 | |
| 190 | /** |
| 191 | * ptrace_init_task - initialize ptrace state for a new child |
| 192 | * @child: new child task |
| 193 | * @ptrace: true if child should be ptrace'd by parent's tracer |
| 194 | * |
| 195 | * This is called immediately after adding @child to its parent's children |
| 196 | * list. @ptrace is false in the normal case, and true to ptrace @child. |
| 197 | * |
| 198 | * Called with current's siglock and write_lock_irq(&tasklist_lock) held. |
| 199 | */ |
| 200 | static inline void ptrace_init_task(struct task_struct *child, bool ptrace) |
| 201 | { |
| 202 | INIT_LIST_HEAD(&child->ptrace_entry); |
| 203 | INIT_LIST_HEAD(&child->ptraced); |
| 204 | child->jobctl = 0; |
| 205 | child->ptrace = 0; |
| 206 | child->parent = child->real_parent; |
| 207 | |
| 208 | if (unlikely(ptrace) && current->ptrace) { |
| 209 | child->ptrace = current->ptrace; |
| 210 | __ptrace_link(child, current->parent, current->ptracer_cred); |
| 211 | |
| 212 | if (child->ptrace & PT_SEIZED) |
| 213 | task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); |
| 214 | else |
| 215 | sigaddset(&child->pending.signal, SIGSTOP); |
| 216 | } |
| 217 | else |
| 218 | child->ptracer_cred = NULL; |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped |
| 223 | * @task: task in %EXIT_DEAD state |
| 224 | * |
| 225 | * Called with write_lock(&tasklist_lock) held. |
| 226 | */ |
| 227 | static inline void ptrace_release_task(struct task_struct *task) |
| 228 | { |
| 229 | BUG_ON(!list_empty(&task->ptraced)); |
| 230 | ptrace_unlink(task); |
| 231 | BUG_ON(!list_empty(&task->ptrace_entry)); |
| 232 | } |
| 233 | |
| 234 | #ifndef force_successful_syscall_return |
| 235 | /* |
| 236 | * System call handlers that, upon successful completion, need to return a |
| 237 | * negative value should call force_successful_syscall_return() right before |
| 238 | * returning. On architectures where the syscall convention provides for a |
| 239 | * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly |
| 240 | * others), this macro can be used to ensure that the error flag will not get |
| 241 | * set. On architectures which do not support a separate error flag, the macro |
| 242 | * is a no-op and the spurious error condition needs to be filtered out by some |
| 243 | * other means (e.g., in user-level, by passing an extra argument to the |
| 244 | * syscall handler, or something along those lines). |
| 245 | */ |
| 246 | #define force_successful_syscall_return() do { } while (0) |
| 247 | #endif |
| 248 | |
| 249 | #ifndef is_syscall_success |
| 250 | /* |
| 251 | * On most systems we can tell if a syscall is a success based on if the retval |
| 252 | * is an error value. On some systems like ia64 and powerpc they have different |
| 253 | * indicators of success/failure and must define their own. |
| 254 | */ |
| 255 | #define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs)))) |
| 256 | #endif |
| 257 | |
| 258 | /* |
| 259 | * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__. |
| 260 | * |
| 261 | * These do-nothing inlines are used when the arch does not |
| 262 | * implement single-step. The kerneldoc comments are here |
| 263 | * to document the interface for all arch definitions. |
| 264 | */ |
| 265 | |
| 266 | #ifndef arch_has_single_step |
| 267 | /** |
| 268 | * arch_has_single_step - does this CPU support user-mode single-step? |
| 269 | * |
| 270 | * If this is defined, then there must be function declarations or |
| 271 | * inlines for user_enable_single_step() and user_disable_single_step(). |
| 272 | * arch_has_single_step() should evaluate to nonzero iff the machine |
| 273 | * supports instruction single-step for user mode. |
| 274 | * It can be a constant or it can test a CPU feature bit. |
| 275 | */ |
| 276 | #define arch_has_single_step() (0) |
| 277 | |
| 278 | /** |
| 279 | * user_enable_single_step - single-step in user-mode task |
| 280 | * @task: either current or a task stopped in %TASK_TRACED |
| 281 | * |
| 282 | * This can only be called when arch_has_single_step() has returned nonzero. |
| 283 | * Set @task so that when it returns to user mode, it will trap after the |
| 284 | * next single instruction executes. If arch_has_block_step() is defined, |
| 285 | * this must clear the effects of user_enable_block_step() too. |
| 286 | */ |
| 287 | static inline void user_enable_single_step(struct task_struct *task) |
| 288 | { |
| 289 | BUG(); /* This can never be called. */ |
| 290 | } |
| 291 | |
| 292 | /** |
| 293 | * user_disable_single_step - cancel user-mode single-step |
| 294 | * @task: either current or a task stopped in %TASK_TRACED |
| 295 | * |
| 296 | * Clear @task of the effects of user_enable_single_step() and |
| 297 | * user_enable_block_step(). This can be called whether or not either |
| 298 | * of those was ever called on @task, and even if arch_has_single_step() |
| 299 | * returned zero. |
| 300 | */ |
| 301 | static inline void user_disable_single_step(struct task_struct *task) |
| 302 | { |
| 303 | } |
| 304 | #else |
| 305 | extern void user_enable_single_step(struct task_struct *); |
| 306 | extern void user_disable_single_step(struct task_struct *); |
| 307 | #endif /* arch_has_single_step */ |
| 308 | |
| 309 | #ifndef arch_has_block_step |
| 310 | /** |
| 311 | * arch_has_block_step - does this CPU support user-mode block-step? |
| 312 | * |
| 313 | * If this is defined, then there must be a function declaration or inline |
| 314 | * for user_enable_block_step(), and arch_has_single_step() must be defined |
| 315 | * too. arch_has_block_step() should evaluate to nonzero iff the machine |
| 316 | * supports step-until-branch for user mode. It can be a constant or it |
| 317 | * can test a CPU feature bit. |
| 318 | */ |
| 319 | #define arch_has_block_step() (0) |
| 320 | |
| 321 | /** |
| 322 | * user_enable_block_step - step until branch in user-mode task |
| 323 | * @task: either current or a task stopped in %TASK_TRACED |
| 324 | * |
| 325 | * This can only be called when arch_has_block_step() has returned nonzero, |
| 326 | * and will never be called when single-instruction stepping is being used. |
| 327 | * Set @task so that when it returns to user mode, it will trap after the |
| 328 | * next branch or trap taken. |
| 329 | */ |
| 330 | static inline void user_enable_block_step(struct task_struct *task) |
| 331 | { |
| 332 | BUG(); /* This can never be called. */ |
| 333 | } |
| 334 | #else |
| 335 | extern void user_enable_block_step(struct task_struct *); |
| 336 | #endif /* arch_has_block_step */ |
| 337 | |
| 338 | #ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT |
| 339 | extern void user_single_step_report(struct pt_regs *regs); |
| 340 | #else |
| 341 | static inline void user_single_step_report(struct pt_regs *regs) |
| 342 | { |
| 343 | kernel_siginfo_t info; |
| 344 | clear_siginfo(&info); |
| 345 | info.si_signo = SIGTRAP; |
| 346 | info.si_errno = 0; |
| 347 | info.si_code = SI_USER; |
| 348 | info.si_pid = 0; |
| 349 | info.si_uid = 0; |
| 350 | force_sig_info(&info); |
| 351 | } |
| 352 | #endif |
| 353 | |
| 354 | #ifndef arch_ptrace_stop_needed |
| 355 | /** |
| 356 | * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called |
| 357 | * |
| 358 | * This is called with the siglock held, to decide whether or not it's |
| 359 | * necessary to release the siglock and call arch_ptrace_stop(). It can be |
| 360 | * defined to a constant if arch_ptrace_stop() is never required, or always |
| 361 | * is. On machines where this makes sense, it should be defined to a quick |
| 362 | * test to optimize out calling arch_ptrace_stop() when it would be |
| 363 | * superfluous. For example, if the thread has not been back to user mode |
| 364 | * since the last stop, the thread state might indicate that nothing needs |
| 365 | * to be done. |
| 366 | * |
| 367 | * This is guaranteed to be invoked once before a task stops for ptrace and |
| 368 | * may include arch-specific operations necessary prior to a ptrace stop. |
| 369 | */ |
| 370 | #define arch_ptrace_stop_needed() (0) |
| 371 | #endif |
| 372 | |
| 373 | #ifndef arch_ptrace_stop |
| 374 | /** |
| 375 | * arch_ptrace_stop - Do machine-specific work before stopping for ptrace |
| 376 | * |
| 377 | * This is called with no locks held when arch_ptrace_stop_needed() has |
| 378 | * just returned nonzero. It is allowed to block, e.g. for user memory |
| 379 | * access. The arch can have machine-specific work to be done before |
| 380 | * ptrace stops. On ia64, register backing store gets written back to user |
| 381 | * memory here. Since this can be costly (requires dropping the siglock), |
| 382 | * we only do it when the arch requires it for this particular stop, as |
| 383 | * indicated by arch_ptrace_stop_needed(). |
| 384 | */ |
| 385 | #define arch_ptrace_stop() do { } while (0) |
| 386 | #endif |
| 387 | |
| 388 | #ifndef current_pt_regs |
| 389 | #define current_pt_regs() task_pt_regs(current) |
| 390 | #endif |
| 391 | |
| 392 | #ifndef current_user_stack_pointer |
| 393 | #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) |
| 394 | #endif |
| 395 | |
| 396 | #ifndef exception_ip |
| 397 | #define exception_ip(x) instruction_pointer(x) |
| 398 | #endif |
| 399 | |
| 400 | extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); |
| 401 | |
| 402 | extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); |
| 403 | |
| 404 | /* |
| 405 | * ptrace report for syscall entry and exit looks identical. |
| 406 | */ |
| 407 | static inline int ptrace_report_syscall(unsigned long message) |
| 408 | { |
| 409 | int ptrace = current->ptrace; |
| 410 | int signr; |
| 411 | |
| 412 | if (!(ptrace & PT_PTRACED)) |
| 413 | return 0; |
| 414 | |
| 415 | signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), |
| 416 | message); |
| 417 | |
| 418 | /* |
| 419 | * this isn't the same as continuing with a signal, but it will do |
| 420 | * for normal use. strace only continues with a signal if the |
| 421 | * stopping signal is not SIGTRAP. -brl |
| 422 | */ |
| 423 | if (signr) |
| 424 | send_sig(signr, current, 1); |
| 425 | |
| 426 | return fatal_signal_pending(current); |
| 427 | } |
| 428 | |
| 429 | /** |
| 430 | * ptrace_report_syscall_entry - task is about to attempt a system call |
| 431 | * @regs: user register state of current task |
| 432 | * |
| 433 | * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or |
| 434 | * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just |
| 435 | * entered the kernel for a system call. Full user register state is |
| 436 | * available here. Changing the values in @regs can affect the system |
| 437 | * call number and arguments to be tried. It is safe to block here, |
| 438 | * preventing the system call from beginning. |
| 439 | * |
| 440 | * Returns zero normally, or nonzero if the calling arch code should abort |
| 441 | * the system call. That must prevent normal entry so no system call is |
| 442 | * made. If @task ever returns to user mode after this, its register state |
| 443 | * is unspecified, but should be something harmless like an %ENOSYS error |
| 444 | * return. It should preserve enough information so that syscall_rollback() |
| 445 | * can work (see asm-generic/syscall.h). |
| 446 | * |
| 447 | * Called without locks, just after entering kernel mode. |
| 448 | */ |
| 449 | static inline __must_check int ptrace_report_syscall_entry( |
| 450 | struct pt_regs *regs) |
| 451 | { |
| 452 | return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); |
| 453 | } |
| 454 | |
| 455 | /** |
| 456 | * ptrace_report_syscall_exit - task has just finished a system call |
| 457 | * @regs: user register state of current task |
| 458 | * @step: nonzero if simulating single-step or block-step |
| 459 | * |
| 460 | * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when |
| 461 | * the current task has just finished an attempted system call. Full |
| 462 | * user register state is available here. It is safe to block here, |
| 463 | * preventing signals from being processed. |
| 464 | * |
| 465 | * If @step is nonzero, this report is also in lieu of the normal |
| 466 | * trap that would follow the system call instruction because |
| 467 | * user_enable_block_step() or user_enable_single_step() was used. |
| 468 | * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. |
| 469 | * |
| 470 | * Called without locks, just before checking for pending signals. |
| 471 | */ |
| 472 | static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) |
| 473 | { |
| 474 | if (step) |
| 475 | user_single_step_report(regs); |
| 476 | else |
| 477 | ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); |
| 478 | } |
| 479 | #endif |