Merge tag 'locking-core-2023-05-05' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / fs / binfmt_elf.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * linux/fs/binfmt_elf.c
4 *
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines. Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
9 *
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/fs.h>
ce81bb25 16#include <linux/log2.h>
1da177e4
LT
17#include <linux/mm.h>
18#include <linux/mman.h>
1da177e4
LT
19#include <linux/errno.h>
20#include <linux/signal.h>
21#include <linux/binfmts.h>
22#include <linux/string.h>
23#include <linux/file.h>
1da177e4 24#include <linux/slab.h>
1da177e4
LT
25#include <linux/personality.h>
26#include <linux/elfcore.h>
27#include <linux/init.h>
28#include <linux/highuid.h>
1da177e4
LT
29#include <linux/compiler.h>
30#include <linux/highmem.h>
03911132 31#include <linux/hugetlb.h>
1da177e4 32#include <linux/pagemap.h>
2aa362c4 33#include <linux/vmalloc.h>
1da177e4 34#include <linux/security.h>
1da177e4 35#include <linux/random.h>
f4e5cc2c 36#include <linux/elf.h>
d1fd836d 37#include <linux/elf-randomize.h>
7e80d0d0 38#include <linux/utsname.h>
088e7af7 39#include <linux/coredump.h>
6fac4829 40#include <linux/sched.h>
f7ccbae4 41#include <linux/sched/coredump.h>
68db0cf1 42#include <linux/sched/task_stack.h>
32ef5517 43#include <linux/sched/cputime.h>
00e19cee
DM
44#include <linux/sizes.h>
45#include <linux/types.h>
5b825c3a 46#include <linux/cred.h>
5037835c 47#include <linux/dax.h>
7c0f6ba6 48#include <linux/uaccess.h>
317c8194 49#include <linux/rseq.h>
1da177e4
LT
50#include <asm/param.h>
51#include <asm/page.h>
52
00e19cee
DM
53#ifndef ELF_COMPAT
54#define ELF_COMPAT 0
55#endif
56
2aa362c4
DV
57#ifndef user_long_t
58#define user_long_t long
59#endif
49ae4d4b
DV
60#ifndef user_siginfo_t
61#define user_siginfo_t siginfo_t
62#endif
63
4755200b
NP
64/* That's for binfmt_elf_fdpic to deal with */
65#ifndef elf_check_fdpic
66#define elf_check_fdpic(ex) false
67#endif
68
71613c3b 69static int load_elf_binary(struct linux_binprm *bprm);
1da177e4 70
69369a70
JT
71#ifdef CONFIG_USELIB
72static int load_elf_library(struct file *);
73#else
74#define load_elf_library NULL
75#endif
76
1da177e4
LT
77/*
78 * If we don't support core dumping, then supply a NULL so we
79 * don't even try.
80 */
698ba7b5 81#ifdef CONFIG_ELF_CORE
f6151dfe 82static int elf_core_dump(struct coredump_params *cprm);
1da177e4
LT
83#else
84#define elf_core_dump NULL
85#endif
86
87#if ELF_EXEC_PAGESIZE > PAGE_SIZE
f4e5cc2c 88#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
1da177e4 89#else
f4e5cc2c 90#define ELF_MIN_ALIGN PAGE_SIZE
1da177e4
LT
91#endif
92
93#ifndef ELF_CORE_EFLAGS
94#define ELF_CORE_EFLAGS 0
95#endif
96
10b19249 97#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
1da177e4
LT
98#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
99#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
100
101static struct linux_binfmt elf_format = {
f670d0ec
MP
102 .module = THIS_MODULE,
103 .load_binary = load_elf_binary,
104 .load_shlib = load_elf_library,
d65bc29b 105#ifdef CONFIG_COREDUMP
f670d0ec
MP
106 .core_dump = elf_core_dump,
107 .min_coredump = ELF_EXEC_PAGESIZE,
d65bc29b 108#endif
1da177e4
LT
109};
110
18676ffc 111#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
1da177e4 112
16e72e9b 113static int set_brk(unsigned long start, unsigned long end, int prot)
1da177e4
LT
114{
115 start = ELF_PAGEALIGN(start);
116 end = ELF_PAGEALIGN(end);
117 if (end > start) {
16e72e9b
DV
118 /*
119 * Map the last of the bss segment.
120 * If the header is requesting these pages to be
121 * executable, honour that (ppc32 needs this).
122 */
123 int error = vm_brk_flags(start, end - start,
124 prot & PROT_EXEC ? VM_EXEC : 0);
5d22fc25
LT
125 if (error)
126 return error;
1da177e4
LT
127 }
128 current->mm->start_brk = current->mm->brk = end;
129 return 0;
130}
131
1da177e4
LT
132/* We need to explicitly zero any fractional pages
133 after the data section (i.e. bss). This would
134 contain the junk from the file that should not
f4e5cc2c
JJ
135 be in memory
136 */
1da177e4
LT
137static int padzero(unsigned long elf_bss)
138{
139 unsigned long nbyte;
140
141 nbyte = ELF_PAGEOFFSET(elf_bss);
142 if (nbyte) {
143 nbyte = ELF_MIN_ALIGN - nbyte;
144 if (clear_user((void __user *) elf_bss, nbyte))
145 return -EFAULT;
146 }
147 return 0;
148}
149
09c6dd3c 150/* Let's use some macros to make this stack manipulation a little clearer */
1da177e4
LT
151#ifdef CONFIG_STACK_GROWSUP
152#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
153#define STACK_ROUND(sp, items) \
154 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
f4e5cc2c
JJ
155#define STACK_ALLOC(sp, len) ({ \
156 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
157 old_sp; })
1da177e4
LT
158#else
159#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
160#define STACK_ROUND(sp, items) \
161 (((unsigned long) (sp - items)) &~ 15UL)
a43e5e3a 162#define STACK_ALLOC(sp, len) (sp -= len)
1da177e4
LT
163#endif
164
483fad1c
NL
165#ifndef ELF_BASE_PLATFORM
166/*
167 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
168 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
169 * will be copied to the user stack in the same manner as AT_PLATFORM.
170 */
171#define ELF_BASE_PLATFORM NULL
172#endif
173
1da177e4 174static int
a62c5b1b 175create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
0da1d500
AK
176 unsigned long interp_load_addr,
177 unsigned long e_entry, unsigned long phdr_addr)
1da177e4 178{
03c6d723 179 struct mm_struct *mm = current->mm;
1da177e4
LT
180 unsigned long p = bprm->p;
181 int argc = bprm->argc;
182 int envc = bprm->envc;
1da177e4
LT
183 elf_addr_t __user *sp;
184 elf_addr_t __user *u_platform;
483fad1c 185 elf_addr_t __user *u_base_platform;
f06295b4 186 elf_addr_t __user *u_rand_bytes;
1da177e4 187 const char *k_platform = ELF_PLATFORM;
483fad1c 188 const char *k_base_platform = ELF_BASE_PLATFORM;
f06295b4 189 unsigned char k_rand_bytes[16];
1da177e4
LT
190 int items;
191 elf_addr_t *elf_info;
2347961b 192 elf_addr_t flags = 0;
1f83d806 193 int ei_index;
86a264ab 194 const struct cred *cred = current_cred();
b6a2fea3 195 struct vm_area_struct *vma;
1da177e4 196
d68c9d6a
FBH
197 /*
198 * In some cases (e.g. Hyper-Threading), we want to avoid L1
199 * evictions by the processes running on the same package. One
200 * thing we can do is to shuffle the initial stack for them.
201 */
202
203 p = arch_align_stack(p);
204
1da177e4
LT
205 /*
206 * If this architecture has a platform capability string, copy it
207 * to userspace. In some cases (Sparc), this info is impossible
208 * for userspace to get any other way, in others (i386) it is
209 * merely difficult.
210 */
1da177e4
LT
211 u_platform = NULL;
212 if (k_platform) {
213 size_t len = strlen(k_platform) + 1;
214
1da177e4 215 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
646e84de 216 if (copy_to_user(u_platform, k_platform, len))
1da177e4
LT
217 return -EFAULT;
218 }
219
483fad1c
NL
220 /*
221 * If this architecture has a "base" platform capability
222 * string, copy it to userspace.
223 */
224 u_base_platform = NULL;
225 if (k_base_platform) {
226 size_t len = strlen(k_base_platform) + 1;
227
228 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
646e84de 229 if (copy_to_user(u_base_platform, k_base_platform, len))
483fad1c
NL
230 return -EFAULT;
231 }
232
f06295b4
KC
233 /*
234 * Generate 16 random bytes for userspace PRNG seeding.
235 */
236 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
237 u_rand_bytes = (elf_addr_t __user *)
238 STACK_ALLOC(p, sizeof(k_rand_bytes));
646e84de 239 if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
f06295b4
KC
240 return -EFAULT;
241
1da177e4 242 /* Create the ELF interpreter info */
03c6d723 243 elf_info = (elf_addr_t *)mm->saved_auxv;
4f9a58d7 244 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
1da177e4 245#define NEW_AUX_ENT(id, val) \
f4e5cc2c 246 do { \
1f83d806
AD
247 *elf_info++ = id; \
248 *elf_info++ = val; \
f4e5cc2c 249 } while (0)
1da177e4
LT
250
251#ifdef ARCH_DLINFO
8f6e3f9e 252 /*
1da177e4
LT
253 * ARCH_DLINFO must come first so PPC can do its special alignment of
254 * AUXV.
4f9a58d7
OH
255 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
256 * ARCH_DLINFO changes
1da177e4
LT
257 */
258 ARCH_DLINFO;
259#endif
260 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
261 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
262 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
0da1d500 263 NEW_AUX_ENT(AT_PHDR, phdr_addr);
f4e5cc2c 264 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
1da177e4
LT
265 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
266 NEW_AUX_ENT(AT_BASE, interp_load_addr);
2347961b
LV
267 if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
268 flags |= AT_FLAGS_PRESERVE_ARGV0;
269 NEW_AUX_ENT(AT_FLAGS, flags);
a62c5b1b 270 NEW_AUX_ENT(AT_ENTRY, e_entry);
ebc887b2
EB
271 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
272 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
273 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
274 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
c425e189 275 NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
f06295b4 276 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
2171364d
MN
277#ifdef ELF_HWCAP2
278 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
279#endif
65191087 280 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
1da177e4 281 if (k_platform) {
f4e5cc2c 282 NEW_AUX_ENT(AT_PLATFORM,
785d5570 283 (elf_addr_t)(unsigned long)u_platform);
1da177e4 284 }
483fad1c
NL
285 if (k_base_platform) {
286 NEW_AUX_ENT(AT_BASE_PLATFORM,
287 (elf_addr_t)(unsigned long)u_base_platform);
288 }
b8a61c9e
EB
289 if (bprm->have_execfd) {
290 NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
1da177e4 291 }
317c8194
MD
292#ifdef CONFIG_RSEQ
293 NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
294 NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
295#endif
1da177e4
LT
296#undef NEW_AUX_ENT
297 /* AT_NULL is zero; clear the rest too */
03c6d723
AD
298 memset(elf_info, 0, (char *)mm->saved_auxv +
299 sizeof(mm->saved_auxv) - (char *)elf_info);
1da177e4
LT
300
301 /* And advance past the AT_NULL entry. */
1f83d806 302 elf_info += 2;
1da177e4 303
03c6d723 304 ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
1da177e4
LT
305 sp = STACK_ADD(p, ei_index);
306
d20894a2 307 items = (argc + 1) + (envc + 1) + 1;
1da177e4
LT
308 bprm->p = STACK_ROUND(sp, items);
309
310 /* Point sp at the lowest address on the stack */
311#ifdef CONFIG_STACK_GROWSUP
312 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
f4e5cc2c 313 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
1da177e4
LT
314#else
315 sp = (elf_addr_t __user *)bprm->p;
316#endif
317
b6a2fea3
OW
318
319 /*
320 * Grow the stack manually; some architectures have a limit on how
321 * far ahead a user-space access may be in order to grow the stack.
322 */
b2767d97
JH
323 if (mmap_read_lock_killable(mm))
324 return -EINTR;
03c6d723 325 vma = find_extend_vma(mm, bprm->p);
b2767d97 326 mmap_read_unlock(mm);
b6a2fea3
OW
327 if (!vma)
328 return -EFAULT;
329
1da177e4 330 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
646e84de 331 if (put_user(argc, sp++))
1da177e4 332 return -EFAULT;
1da177e4 333
67c6777a 334 /* Populate list of argv pointers back to argv strings. */
03c6d723 335 p = mm->arg_end = mm->arg_start;
1da177e4
LT
336 while (argc-- > 0) {
337 size_t len;
646e84de 338 if (put_user((elf_addr_t)p, sp++))
841d5fb7 339 return -EFAULT;
b6a2fea3
OW
340 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
341 if (!len || len > MAX_ARG_STRLEN)
23c4971e 342 return -EINVAL;
1da177e4
LT
343 p += len;
344 }
646e84de 345 if (put_user(0, sp++))
1da177e4 346 return -EFAULT;
03c6d723 347 mm->arg_end = p;
67c6777a
KC
348
349 /* Populate list of envp pointers back to envp strings. */
03c6d723 350 mm->env_end = mm->env_start = p;
1da177e4
LT
351 while (envc-- > 0) {
352 size_t len;
646e84de 353 if (put_user((elf_addr_t)p, sp++))
841d5fb7 354 return -EFAULT;
b6a2fea3
OW
355 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
356 if (!len || len > MAX_ARG_STRLEN)
23c4971e 357 return -EINVAL;
1da177e4
LT
358 p += len;
359 }
646e84de 360 if (put_user(0, sp++))
1da177e4 361 return -EFAULT;
03c6d723 362 mm->env_end = p;
1da177e4
LT
363
364 /* Put the elf_info on the stack in the right place. */
03c6d723 365 if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
1da177e4
LT
366 return -EFAULT;
367 return 0;
368}
369
1da177e4 370static unsigned long elf_map(struct file *filep, unsigned long addr,
49ac9819 371 const struct elf_phdr *eppnt, int prot, int type,
cc503c1b 372 unsigned long total_size)
1da177e4
LT
373{
374 unsigned long map_addr;
cc503c1b
JK
375 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
376 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
377 addr = ELF_PAGESTART(addr);
378 size = ELF_PAGEALIGN(size);
1da177e4 379
dda6ebde
DG
380 /* mmap() will return -EINVAL if given a zero size, but a
381 * segment with zero filesize is perfectly valid */
cc503c1b
JK
382 if (!size)
383 return addr;
384
cc503c1b
JK
385 /*
386 * total_size is the size of the ELF (interpreter) image.
387 * The _first_ mmap needs to know the full size, otherwise
388 * randomization might put this image into an overlapping
389 * position with the ELF binary image. (since size < total_size)
390 * So we first map the 'big' image - and unmap the remainder at
391 * the end. (which unmap is needed for ELF images with holes.)
392 */
393 if (total_size) {
394 total_size = ELF_PAGEALIGN(total_size);
5a5e4c2e 395 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
cc503c1b 396 if (!BAD_ADDR(map_addr))
5a5e4c2e 397 vm_munmap(map_addr+size, total_size-size);
cc503c1b 398 } else
5a5e4c2e 399 map_addr = vm_mmap(filep, addr, size, prot, type, off);
cc503c1b 400
d23a61ee
TH
401 if ((type & MAP_FIXED_NOREPLACE) &&
402 PTR_ERR((void *)map_addr) == -EEXIST)
403 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
404 task_pid_nr(current), current->comm, (void *)addr);
4ed28639 405
1da177e4
LT
406 return(map_addr);
407}
408
10b19249 409static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
cc503c1b 410{
10b19249
AD
411 elf_addr_t min_addr = -1;
412 elf_addr_t max_addr = 0;
413 bool pt_load = false;
414 int i;
cc503c1b
JK
415
416 for (i = 0; i < nr; i++) {
10b19249
AD
417 if (phdr[i].p_type == PT_LOAD) {
418 min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
419 max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
420 pt_load = true;
cc503c1b
JK
421 }
422 }
10b19249 423 return pt_load ? (max_addr - min_addr) : 0;
cc503c1b
JK
424}
425
658c0335
AD
426static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
427{
428 ssize_t rv;
429
430 rv = kernel_read(file, buf, len, &pos);
431 if (unlikely(rv != len)) {
432 return (rv < 0) ? rv : -EIO;
433 }
434 return 0;
435}
436
ce81bb25
CK
437static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
438{
439 unsigned long alignment = 0;
440 int i;
441
442 for (i = 0; i < nr; i++) {
443 if (cmds[i].p_type == PT_LOAD) {
444 unsigned long p_align = cmds[i].p_align;
445
446 /* skip non-power of two alignments as invalid */
447 if (!is_power_of_2(p_align))
448 continue;
449 alignment = max(alignment, p_align);
450 }
451 }
452
453 /* ensure we align to at least one page */
454 return ELF_PAGEALIGN(alignment);
455}
456
6a8d3894
PB
457/**
458 * load_elf_phdrs() - load ELF program headers
459 * @elf_ex: ELF header of the binary whose program headers should be loaded
460 * @elf_file: the opened ELF binary file
461 *
462 * Loads ELF program headers from the binary file elf_file, which has the ELF
463 * header pointed to by elf_ex, into a newly allocated array. The caller is
cfc46ca4 464 * responsible for freeing the allocated data. Returns NULL upon failure.
6a8d3894 465 */
49ac9819 466static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
6a8d3894
PB
467 struct file *elf_file)
468{
469 struct elf_phdr *elf_phdata = NULL;
ef20c513 470 int retval = -1;
faf1c315 471 unsigned int size;
6a8d3894
PB
472
473 /*
474 * If the size of this structure has changed, then punt, since
475 * we will be doing the wrong thing.
476 */
477 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
478 goto out;
479
480 /* Sanity check the number of program headers... */
6a8d3894
PB
481 /* ...and their total size. */
482 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
faf1c315 483 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
6a8d3894
PB
484 goto out;
485
486 elf_phdata = kmalloc(size, GFP_KERNEL);
487 if (!elf_phdata)
488 goto out;
489
490 /* Read in the program headers */
658c0335 491 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
6a8d3894 492
6a8d3894 493out:
ef20c513 494 if (retval) {
6a8d3894
PB
495 kfree(elf_phdata);
496 elf_phdata = NULL;
497 }
498 return elf_phdata;
499}
cc503c1b 500
774c105e
PB
501#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
502
503/**
504 * struct arch_elf_state - arch-specific ELF loading state
505 *
506 * This structure is used to preserve architecture specific data during
507 * the loading of an ELF file, throughout the checking of architecture
508 * specific ELF headers & through to the point where the ELF load is
509 * known to be proceeding (ie. SET_PERSONALITY).
510 *
511 * This implementation is a dummy for architectures which require no
512 * specific state.
513 */
514struct arch_elf_state {
515};
516
517#define INIT_ARCH_ELF_STATE {}
518
519/**
520 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
521 * @ehdr: The main ELF header
522 * @phdr: The program header to check
523 * @elf: The open ELF file
524 * @is_interp: True if the phdr is from the interpreter of the ELF being
525 * loaded, else false.
526 * @state: Architecture-specific state preserved throughout the process
527 * of loading the ELF.
528 *
529 * Inspects the program header phdr to validate its correctness and/or
530 * suitability for the system. Called once per ELF program header in the
531 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
532 * interpreter.
533 *
534 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
535 * with that return code.
536 */
537static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
538 struct elf_phdr *phdr,
539 struct file *elf, bool is_interp,
540 struct arch_elf_state *state)
541{
542 /* Dummy implementation, always proceed */
543 return 0;
544}
545
546/**
54d15714 547 * arch_check_elf() - check an ELF executable
774c105e
PB
548 * @ehdr: The main ELF header
549 * @has_interp: True if the ELF has an interpreter, else false.
eb4bc076 550 * @interp_ehdr: The interpreter's ELF header
774c105e
PB
551 * @state: Architecture-specific state preserved throughout the process
552 * of loading the ELF.
553 *
554 * Provides a final opportunity for architecture code to reject the loading
555 * of the ELF & cause an exec syscall to return an error. This is called after
556 * all program headers to be checked by arch_elf_pt_proc have been.
557 *
558 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
559 * with that return code.
560 */
561static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
eb4bc076 562 struct elfhdr *interp_ehdr,
774c105e
PB
563 struct arch_elf_state *state)
564{
565 /* Dummy implementation, always proceed */
566 return 0;
567}
568
569#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
cc503c1b 570
fe0f6766
DM
571static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
572 bool has_interp, bool is_interp)
d8e7cb39
AD
573{
574 int prot = 0;
575
576 if (p_flags & PF_R)
577 prot |= PROT_READ;
578 if (p_flags & PF_W)
579 prot |= PROT_WRITE;
580 if (p_flags & PF_X)
581 prot |= PROT_EXEC;
fe0f6766
DM
582
583 return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
d8e7cb39
AD
584}
585
1da177e4
LT
586/* This is much more generalized than the library routine read function,
587 so we keep this separate. Technically the library read function
588 is only provided so that we can read a.out libraries that have
589 an ELF header */
590
f4e5cc2c 591static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
81696d5d 592 struct file *interpreter,
fe0f6766
DM
593 unsigned long no_base, struct elf_phdr *interp_elf_phdata,
594 struct arch_elf_state *arch_state)
1da177e4 595{
1da177e4
LT
596 struct elf_phdr *eppnt;
597 unsigned long load_addr = 0;
598 int load_addr_set = 0;
599 unsigned long last_bss = 0, elf_bss = 0;
16e72e9b 600 int bss_prot = 0;
1da177e4 601 unsigned long error = ~0UL;
cc503c1b 602 unsigned long total_size;
6a8d3894 603 int i;
1da177e4
LT
604
605 /* First of all, some simple consistency checks */
606 if (interp_elf_ex->e_type != ET_EXEC &&
607 interp_elf_ex->e_type != ET_DYN)
608 goto out;
4755200b
NP
609 if (!elf_check_arch(interp_elf_ex) ||
610 elf_check_fdpic(interp_elf_ex))
1da177e4 611 goto out;
72c2d531 612 if (!interpreter->f_op->mmap)
1da177e4
LT
613 goto out;
614
a9d9ef13
PB
615 total_size = total_mapping_size(interp_elf_phdata,
616 interp_elf_ex->e_phnum);
cc503c1b
JK
617 if (!total_size) {
618 error = -EINVAL;
a9d9ef13 619 goto out;
cc503c1b
JK
620 }
621
a9d9ef13 622 eppnt = interp_elf_phdata;
f4e5cc2c
JJ
623 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
624 if (eppnt->p_type == PT_LOAD) {
4589ff7c 625 int elf_type = MAP_PRIVATE;
fe0f6766
DM
626 int elf_prot = make_prot(eppnt->p_flags, arch_state,
627 true, true);
f4e5cc2c
JJ
628 unsigned long vaddr = 0;
629 unsigned long k, map_addr;
630
f4e5cc2c
JJ
631 vaddr = eppnt->p_vaddr;
632 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
9b2f72cc 633 elf_type |= MAP_FIXED;
cc503c1b
JK
634 else if (no_base && interp_elf_ex->e_type == ET_DYN)
635 load_addr = -vaddr;
f4e5cc2c
JJ
636
637 map_addr = elf_map(interpreter, load_addr + vaddr,
bb1ad820 638 eppnt, elf_prot, elf_type, total_size);
cc503c1b 639 total_size = 0;
f4e5cc2c
JJ
640 error = map_addr;
641 if (BAD_ADDR(map_addr))
a9d9ef13 642 goto out;
f4e5cc2c
JJ
643
644 if (!load_addr_set &&
645 interp_elf_ex->e_type == ET_DYN) {
646 load_addr = map_addr - ELF_PAGESTART(vaddr);
647 load_addr_set = 1;
648 }
649
650 /*
651 * Check to see if the section's size will overflow the
652 * allowed task size. Note that p_filesz must always be
653 * <= p_memsize so it's only necessary to check p_memsz.
654 */
655 k = load_addr + eppnt->p_vaddr;
ce51059b 656 if (BAD_ADDR(k) ||
f4e5cc2c
JJ
657 eppnt->p_filesz > eppnt->p_memsz ||
658 eppnt->p_memsz > TASK_SIZE ||
659 TASK_SIZE - eppnt->p_memsz < k) {
660 error = -ENOMEM;
a9d9ef13 661 goto out;
f4e5cc2c
JJ
662 }
663
664 /*
665 * Find the end of the file mapping for this phdr, and
666 * keep track of the largest address we see for this.
667 */
668 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
669 if (k > elf_bss)
670 elf_bss = k;
671
672 /*
673 * Do the same thing for the memory mapping - between
674 * elf_bss and last_bss is the bss section.
675 */
0036d1f7 676 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
16e72e9b 677 if (k > last_bss) {
f4e5cc2c 678 last_bss = k;
16e72e9b
DV
679 bss_prot = elf_prot;
680 }
f4e5cc2c 681 }
1da177e4
LT
682 }
683
0036d1f7
KC
684 /*
685 * Now fill out the bss section: first pad the last page from
686 * the file up to the page boundary, and zero it from elf_bss
687 * up to the end of the page.
688 */
689 if (padzero(elf_bss)) {
690 error = -EFAULT;
691 goto out;
692 }
693 /*
694 * Next, align both the file and mem bss up to the page size,
695 * since this is where elf_bss was just zeroed up to, and where
16e72e9b 696 * last_bss will end after the vm_brk_flags() below.
0036d1f7
KC
697 */
698 elf_bss = ELF_PAGEALIGN(elf_bss);
699 last_bss = ELF_PAGEALIGN(last_bss);
700 /* Finally, if there is still more bss to allocate, do it. */
752015d1 701 if (last_bss > elf_bss) {
16e72e9b
DV
702 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
703 bss_prot & PROT_EXEC ? VM_EXEC : 0);
5d22fc25 704 if (error)
a9d9ef13 705 goto out;
1da177e4
LT
706 }
707
cc503c1b 708 error = load_addr;
1da177e4
LT
709out:
710 return error;
711}
712
1da177e4
LT
713/*
714 * These are the functions used to load ELF style executables and shared
715 * libraries. There is no binary dependent code anywhere else.
716 */
717
00e19cee
DM
718static int parse_elf_property(const char *data, size_t *off, size_t datasz,
719 struct arch_elf_state *arch,
720 bool have_prev_type, u32 *prev_type)
721{
722 size_t o, step;
723 const struct gnu_property *pr;
724 int ret;
725
726 if (*off == datasz)
727 return -ENOENT;
728
729 if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
730 return -EIO;
731 o = *off;
732 datasz -= *off;
733
734 if (datasz < sizeof(*pr))
735 return -ENOEXEC;
736 pr = (const struct gnu_property *)(data + o);
737 o += sizeof(*pr);
738 datasz -= sizeof(*pr);
739
740 if (pr->pr_datasz > datasz)
741 return -ENOEXEC;
742
743 WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
744 step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
745 if (step > datasz)
746 return -ENOEXEC;
747
748 /* Properties are supposed to be unique and sorted on pr_type: */
749 if (have_prev_type && pr->pr_type <= *prev_type)
750 return -ENOEXEC;
751 *prev_type = pr->pr_type;
752
753 ret = arch_parse_elf_property(pr->pr_type, data + o,
754 pr->pr_datasz, ELF_COMPAT, arch);
755 if (ret)
756 return ret;
757
758 *off = o + step;
759 return 0;
760}
761
762#define NOTE_DATA_SZ SZ_1K
763#define GNU_PROPERTY_TYPE_0_NAME "GNU"
764#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
765
766static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
767 struct arch_elf_state *arch)
768{
769 union {
770 struct elf_note nhdr;
771 char data[NOTE_DATA_SZ];
772 } note;
773 loff_t pos;
774 ssize_t n;
775 size_t off, datasz;
776 int ret;
777 bool have_prev_type;
778 u32 prev_type;
779
780 if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
781 return 0;
782
783 /* load_elf_binary() shouldn't call us unless this is true... */
784 if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
785 return -ENOEXEC;
786
787 /* If the properties are crazy large, that's too bad (for now): */
788 if (phdr->p_filesz > sizeof(note))
789 return -ENOEXEC;
790
791 pos = phdr->p_offset;
792 n = kernel_read(f, &note, phdr->p_filesz, &pos);
793
794 BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
795 if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
796 return -EIO;
797
798 if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
799 note.nhdr.n_namesz != NOTE_NAME_SZ ||
800 strncmp(note.data + sizeof(note.nhdr),
801 GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
802 return -ENOEXEC;
803
804 off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
805 ELF_GNU_PROPERTY_ALIGN);
806 if (off > n)
807 return -ENOEXEC;
808
809 if (note.nhdr.n_descsz > n - off)
810 return -ENOEXEC;
811 datasz = off + note.nhdr.n_descsz;
812
813 have_prev_type = false;
814 do {
815 ret = parse_elf_property(note.data, &off, datasz, arch,
816 have_prev_type, &prev_type);
817 have_prev_type = true;
818 } while (!ret);
819
820 return ret == -ENOENT ? 0 : ret;
821}
822
71613c3b 823static int load_elf_binary(struct linux_binprm *bprm)
1da177e4
LT
824{
825 struct file *interpreter = NULL; /* to shut gcc up */
2b4bfbe0
AK
826 unsigned long load_bias = 0, phdr_addr = 0;
827 int first_pt_load = 1;
1da177e4 828 unsigned long error;
a9d9ef13 829 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
00e19cee 830 struct elf_phdr *elf_property_phdata = NULL;
1da177e4 831 unsigned long elf_bss, elf_brk;
16e72e9b 832 int bss_prot = 0;
1da177e4 833 int retval, i;
cc503c1b 834 unsigned long elf_entry;
a62c5b1b 835 unsigned long e_entry;
cc503c1b 836 unsigned long interp_load_addr = 0;
1da177e4 837 unsigned long start_code, end_code, start_data, end_data;
1a530a6f 838 unsigned long reloc_func_desc __maybe_unused = 0;
8de61e69 839 int executable_stack = EXSTACK_DEFAULT;
a62c5b1b 840 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
0693ffeb 841 struct elfhdr *interp_elf_ex = NULL;
774c105e 842 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
03c6d723 843 struct mm_struct *mm;
249b08e4 844 struct pt_regs *regs;
1da177e4 845
1da177e4
LT
846 retval = -ENOEXEC;
847 /* First of all, some simple consistency checks */
a62c5b1b 848 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
1da177e4
LT
849 goto out;
850
a62c5b1b 851 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
1da177e4 852 goto out;
a62c5b1b 853 if (!elf_check_arch(elf_ex))
1da177e4 854 goto out;
a62c5b1b 855 if (elf_check_fdpic(elf_ex))
4755200b 856 goto out;
72c2d531 857 if (!bprm->file->f_op->mmap)
1da177e4
LT
858 goto out;
859
a62c5b1b 860 elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
1da177e4
LT
861 if (!elf_phdata)
862 goto out;
863
1da177e4 864 elf_ppnt = elf_phdata;
a62c5b1b 865 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
be0deb58 866 char *elf_interpreter;
1da177e4 867
00e19cee
DM
868 if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
869 elf_property_phdata = elf_ppnt;
870 continue;
871 }
872
be0deb58
AD
873 if (elf_ppnt->p_type != PT_INTERP)
874 continue;
1fb84496 875
be0deb58
AD
876 /*
877 * This is the program interpreter used for shared libraries -
878 * for now assume that this is an a.out format binary.
879 */
880 retval = -ENOEXEC;
881 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
882 goto out_free_ph;
1da177e4 883
be0deb58
AD
884 retval = -ENOMEM;
885 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
886 if (!elf_interpreter)
887 goto out_free_ph;
cc338010 888
658c0335
AD
889 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
890 elf_ppnt->p_offset);
891 if (retval < 0)
be0deb58 892 goto out_free_interp;
be0deb58
AD
893 /* make sure path is NULL terminated */
894 retval = -ENOEXEC;
895 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
896 goto out_free_interp;
897
898 interpreter = open_exec(elf_interpreter);
899 kfree(elf_interpreter);
900 retval = PTR_ERR(interpreter);
901 if (IS_ERR(interpreter))
cc338010 902 goto out_free_ph;
be0deb58
AD
903
904 /*
905 * If the binary is not readable then enforce mm->dumpable = 0
906 * regardless of the interpreter's permissions.
907 */
908 would_dump(bprm, interpreter);
909
0693ffeb
AD
910 interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
911 if (!interp_elf_ex) {
912 retval = -ENOMEM;
594d2a14 913 goto out_free_file;
0693ffeb
AD
914 }
915
be0deb58 916 /* Get the exec headers */
c69bcc93
AD
917 retval = elf_read(interpreter, interp_elf_ex,
918 sizeof(*interp_elf_ex), 0);
658c0335 919 if (retval < 0)
be0deb58 920 goto out_free_dentry;
be0deb58
AD
921
922 break;
923
924out_free_interp:
925 kfree(elf_interpreter);
926 goto out_free_ph;
1da177e4
LT
927 }
928
929 elf_ppnt = elf_phdata;
a62c5b1b 930 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
774c105e
PB
931 switch (elf_ppnt->p_type) {
932 case PT_GNU_STACK:
1da177e4
LT
933 if (elf_ppnt->p_flags & PF_X)
934 executable_stack = EXSTACK_ENABLE_X;
935 else
936 executable_stack = EXSTACK_DISABLE_X;
937 break;
774c105e
PB
938
939 case PT_LOPROC ... PT_HIPROC:
a62c5b1b 940 retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
774c105e
PB
941 bprm->file, false,
942 &arch_state);
943 if (retval)
944 goto out_free_dentry;
945 break;
1da177e4 946 }
1da177e4
LT
947
948 /* Some simple consistency checks for the interpreter */
cc338010 949 if (interpreter) {
1da177e4 950 retval = -ELIBBAD;
d20894a2 951 /* Not an ELF interpreter */
c69bcc93 952 if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
1da177e4 953 goto out_free_dentry;
1da177e4 954 /* Verify the interpreter has a valid arch */
c69bcc93
AD
955 if (!elf_check_arch(interp_elf_ex) ||
956 elf_check_fdpic(interp_elf_ex))
1da177e4 957 goto out_free_dentry;
a9d9ef13
PB
958
959 /* Load the interpreter program headers */
c69bcc93 960 interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
a9d9ef13
PB
961 interpreter);
962 if (!interp_elf_phdata)
963 goto out_free_dentry;
774c105e
PB
964
965 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
00e19cee 966 elf_property_phdata = NULL;
774c105e 967 elf_ppnt = interp_elf_phdata;
c69bcc93 968 for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
774c105e 969 switch (elf_ppnt->p_type) {
00e19cee
DM
970 case PT_GNU_PROPERTY:
971 elf_property_phdata = elf_ppnt;
972 break;
973
774c105e 974 case PT_LOPROC ... PT_HIPROC:
c69bcc93 975 retval = arch_elf_pt_proc(interp_elf_ex,
774c105e
PB
976 elf_ppnt, interpreter,
977 true, &arch_state);
978 if (retval)
979 goto out_free_dentry;
980 break;
981 }
1da177e4
LT
982 }
983
00e19cee
DM
984 retval = parse_elf_properties(interpreter ?: bprm->file,
985 elf_property_phdata, &arch_state);
986 if (retval)
987 goto out_free_dentry;
988
774c105e
PB
989 /*
990 * Allow arch code to reject the ELF at this point, whilst it's
991 * still possible to return an error to the code that invoked
992 * the exec syscall.
993 */
a62c5b1b 994 retval = arch_check_elf(elf_ex,
c69bcc93 995 !!interpreter, interp_elf_ex,
eb4bc076 996 &arch_state);
774c105e
PB
997 if (retval)
998 goto out_free_dentry;
999
1da177e4 1000 /* Flush all traces of the currently running executable */
2388777a 1001 retval = begin_new_exec(bprm);
1da177e4
LT
1002 if (retval)
1003 goto out_free_dentry;
1004
1da177e4
LT
1005 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
1006 may depend on the personality. */
a62c5b1b
AD
1007 SET_PERSONALITY2(*elf_ex, &arch_state);
1008 if (elf_read_implies_exec(*elf_ex, executable_stack))
1da177e4
LT
1009 current->personality |= READ_IMPLIES_EXEC;
1010
f4e5cc2c 1011 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4 1012 current->flags |= PF_RANDOMIZE;
221af7f8
LT
1013
1014 setup_new_exec(bprm);
1da177e4
LT
1015
1016 /* Do this so that we can load the interpreter, if need be. We will
1017 change some of these later */
1da177e4
LT
1018 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1019 executable_stack);
19d860a1 1020 if (retval < 0)
1da177e4 1021 goto out_free_dentry;
8f6e3f9e 1022
85264316
AD
1023 elf_bss = 0;
1024 elf_brk = 0;
1025
1026 start_code = ~0UL;
1027 end_code = 0;
1028 start_data = 0;
1029 end_data = 0;
1030
af901ca1 1031 /* Now we do a little grungy work by mmapping the ELF image into
cc503c1b 1032 the correct location in memory. */
f4e5cc2c 1033 for(i = 0, elf_ppnt = elf_phdata;
a62c5b1b 1034 i < elf_ex->e_phnum; i++, elf_ppnt++) {
b212921b 1035 int elf_prot, elf_flags;
1da177e4 1036 unsigned long k, vaddr;
a87938b2 1037 unsigned long total_size = 0;
ce81bb25 1038 unsigned long alignment;
1da177e4
LT
1039
1040 if (elf_ppnt->p_type != PT_LOAD)
1041 continue;
1042
1043 if (unlikely (elf_brk > elf_bss)) {
1044 unsigned long nbyte;
8f6e3f9e 1045
1da177e4
LT
1046 /* There was a PT_LOAD segment with p_memsz > p_filesz
1047 before this one. Map anonymous pages, if needed,
1048 and clear the area. */
f670d0ec 1049 retval = set_brk(elf_bss + load_bias,
16e72e9b
DV
1050 elf_brk + load_bias,
1051 bss_prot);
19d860a1 1052 if (retval)
1da177e4 1053 goto out_free_dentry;
1da177e4
LT
1054 nbyte = ELF_PAGEOFFSET(elf_bss);
1055 if (nbyte) {
1056 nbyte = ELF_MIN_ALIGN - nbyte;
1057 if (nbyte > elf_brk - elf_bss)
1058 nbyte = elf_brk - elf_bss;
1059 if (clear_user((void __user *)elf_bss +
1060 load_bias, nbyte)) {
1061 /*
1062 * This bss-zeroing can fail if the ELF
f4e5cc2c 1063 * file specifies odd protections. So
1da177e4
LT
1064 * we don't check the return value
1065 */
1066 }
1067 }
1068 }
1069
fe0f6766
DM
1070 elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1071 !!interpreter, false);
1da177e4 1072
4589ff7c 1073 elf_flags = MAP_PRIVATE;
1da177e4
LT
1074
1075 vaddr = elf_ppnt->p_vaddr;
eab09532 1076 /*
2b4bfbe0 1077 * The first time through the loop, first_pt_load is true:
5f501d55
KC
1078 * layout will be calculated. Once set, use MAP_FIXED since
1079 * we know we've already safely mapped the entire region with
1080 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
eab09532 1081 */
2b4bfbe0 1082 if (!first_pt_load) {
b212921b 1083 elf_flags |= MAP_FIXED;
5f501d55
KC
1084 } else if (elf_ex->e_type == ET_EXEC) {
1085 /*
1086 * This logic is run once for the first LOAD Program
1087 * Header for ET_EXEC binaries. No special handling
1088 * is needed.
1089 */
1090 elf_flags |= MAP_FIXED_NOREPLACE;
a62c5b1b 1091 } else if (elf_ex->e_type == ET_DYN) {
eab09532
KC
1092 /*
1093 * This logic is run once for the first LOAD Program
1094 * Header for ET_DYN binaries to calculate the
1095 * randomization (load_bias) for all the LOAD
5f501d55 1096 * Program Headers.
eab09532
KC
1097 *
1098 * There are effectively two types of ET_DYN
1099 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1100 * and loaders (ET_DYN without INTERP, since they
1101 * _are_ the ELF interpreter). The loaders must
1102 * be loaded away from programs since the program
1103 * may otherwise collide with the loader (especially
1104 * for ET_EXEC which does not have a randomized
1105 * position). For example to handle invocations of
1106 * "./ld.so someprog" to test out a new version of
1107 * the loader, the subsequent program that the
1108 * loader loads must avoid the loader itself, so
1109 * they cannot share the same load range. Sufficient
1110 * room for the brk must be allocated with the
1111 * loader as well, since brk must be available with
1112 * the loader.
1113 *
1114 * Therefore, programs are loaded offset from
1115 * ELF_ET_DYN_BASE and loaders are loaded into the
1116 * independently randomized mmap region (0 load_bias
5f501d55 1117 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
eab09532 1118 */
aeb79237 1119 if (interpreter) {
eab09532
KC
1120 load_bias = ELF_ET_DYN_BASE;
1121 if (current->flags & PF_RANDOMIZE)
1122 load_bias += arch_mmap_rnd();
aeb79237 1123 alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
ce81bb25
CK
1124 if (alignment)
1125 load_bias &= ~(alignment - 1);
5f501d55 1126 elf_flags |= MAP_FIXED_NOREPLACE;
eab09532
KC
1127 } else
1128 load_bias = 0;
1129
1130 /*
1131 * Since load_bias is used for all subsequent loading
1132 * calculations, we must lower it by the first vaddr
1133 * so that the remaining calculations based on the
1134 * ELF vaddrs will be correctly offset. The result
1135 * is then page aligned.
1136 */
1137 load_bias = ELF_PAGESTART(load_bias - vaddr);
1138
439a8468
KC
1139 /*
1140 * Calculate the entire size of the ELF mapping
1141 * (total_size), used for the initial mapping,
1142 * due to load_addr_set which is set to true later
1143 * once the initial mapping is performed.
1144 *
1145 * Note that this is only sensible when the LOAD
1146 * segments are contiguous (or overlapping). If
1147 * used for LOADs that are far apart, this would
1148 * cause the holes between LOADs to be mapped,
1149 * running the risk of having the mapping fail,
1150 * as it would be larger than the ELF file itself.
1151 *
1152 * As a result, only ET_DYN does this, since
1153 * some ET_EXEC (e.g. ia64) may have large virtual
1154 * memory holes between LOADs.
1155 *
1156 */
a87938b2 1157 total_size = total_mapping_size(elf_phdata,
a62c5b1b 1158 elf_ex->e_phnum);
a87938b2 1159 if (!total_size) {
2b1d3ae9 1160 retval = -EINVAL;
a87938b2
MD
1161 goto out_free_dentry;
1162 }
1da177e4
LT
1163 }
1164
f4e5cc2c 1165 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
a87938b2 1166 elf_prot, elf_flags, total_size);
1da177e4 1167 if (BAD_ADDR(error)) {
dc64cc12 1168 retval = IS_ERR_VALUE(error) ?
b140f251 1169 PTR_ERR((void*)error) : -EINVAL;
1da177e4
LT
1170 goto out_free_dentry;
1171 }
1172
2b4bfbe0
AK
1173 if (first_pt_load) {
1174 first_pt_load = 0;
a62c5b1b 1175 if (elf_ex->e_type == ET_DYN) {
1da177e4
LT
1176 load_bias += error -
1177 ELF_PAGESTART(load_bias + vaddr);
1da177e4
LT
1178 reloc_func_desc = load_bias;
1179 }
1180 }
0da1d500
AK
1181
1182 /*
1183 * Figure out which segment in the file contains the Program
1184 * Header table, and map to the associated memory address.
1185 */
1186 if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1187 elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1188 phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1189 elf_ppnt->p_vaddr;
1190 }
1191
1da177e4 1192 k = elf_ppnt->p_vaddr;
f67ef446 1193 if ((elf_ppnt->p_flags & PF_X) && k < start_code)
f4e5cc2c
JJ
1194 start_code = k;
1195 if (start_data < k)
1196 start_data = k;
1da177e4
LT
1197
1198 /*
1199 * Check to see if the section's size will overflow the
1200 * allowed task size. Note that p_filesz must always be
1201 * <= p_memsz so it is only necessary to check p_memsz.
1202 */
ce51059b 1203 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1da177e4
LT
1204 elf_ppnt->p_memsz > TASK_SIZE ||
1205 TASK_SIZE - elf_ppnt->p_memsz < k) {
f4e5cc2c 1206 /* set_brk can never work. Avoid overflows. */
b140f251 1207 retval = -EINVAL;
1da177e4
LT
1208 goto out_free_dentry;
1209 }
1210
1211 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1212
1213 if (k > elf_bss)
1214 elf_bss = k;
1215 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1216 end_code = k;
1217 if (end_data < k)
1218 end_data = k;
1219 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
16e72e9b
DV
1220 if (k > elf_brk) {
1221 bss_prot = elf_prot;
1da177e4 1222 elf_brk = k;
16e72e9b 1223 }
1da177e4
LT
1224 }
1225
a62c5b1b 1226 e_entry = elf_ex->e_entry + load_bias;
0da1d500 1227 phdr_addr += load_bias;
1da177e4
LT
1228 elf_bss += load_bias;
1229 elf_brk += load_bias;
1230 start_code += load_bias;
1231 end_code += load_bias;
1232 start_data += load_bias;
1233 end_data += load_bias;
1234
1235 /* Calling set_brk effectively mmaps the pages that we need
1236 * for the bss and break sections. We must do this before
1237 * mapping in the interpreter, to make sure it doesn't wind
1238 * up getting placed where the bss needs to go.
1239 */
16e72e9b 1240 retval = set_brk(elf_bss, elf_brk, bss_prot);
19d860a1 1241 if (retval)
1da177e4 1242 goto out_free_dentry;
6de50517 1243 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1da177e4
LT
1244 retval = -EFAULT; /* Nobody gets to see this, but.. */
1245 goto out_free_dentry;
1246 }
1247
cc338010 1248 if (interpreter) {
c69bcc93 1249 elf_entry = load_elf_interp(interp_elf_ex,
d20894a2 1250 interpreter,
fe0f6766
DM
1251 load_bias, interp_elf_phdata,
1252 &arch_state);
dc64cc12 1253 if (!IS_ERR_VALUE(elf_entry)) {
d20894a2
AK
1254 /*
1255 * load_elf_interp() returns relocation
1256 * adjustment
1257 */
1258 interp_load_addr = elf_entry;
c69bcc93 1259 elf_entry += interp_elf_ex->e_entry;
cc503c1b 1260 }
1da177e4 1261 if (BAD_ADDR(elf_entry)) {
dc64cc12 1262 retval = IS_ERR_VALUE(elf_entry) ?
ce51059b 1263 (int)elf_entry : -EINVAL;
1da177e4
LT
1264 goto out_free_dentry;
1265 }
1266 reloc_func_desc = interp_load_addr;
1267
1268 allow_write_access(interpreter);
1269 fput(interpreter);
0693ffeb
AD
1270
1271 kfree(interp_elf_ex);
aa0d1564 1272 kfree(interp_elf_phdata);
1da177e4 1273 } else {
a62c5b1b 1274 elf_entry = e_entry;
5342fba5 1275 if (BAD_ADDR(elf_entry)) {
ce51059b 1276 retval = -EINVAL;
5342fba5
SS
1277 goto out_free_dentry;
1278 }
1da177e4
LT
1279 }
1280
1281 kfree(elf_phdata);
1282
1da177e4
LT
1283 set_binfmt(&elf_format);
1284
547ee84c 1285#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
9a29a671 1286 retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
19d860a1 1287 if (retval < 0)
18c8baff 1288 goto out;
547ee84c
BH
1289#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1290
0da1d500
AK
1291 retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1292 e_entry, phdr_addr);
19d860a1 1293 if (retval < 0)
b6a2fea3 1294 goto out;
03c6d723
AD
1295
1296 mm = current->mm;
1297 mm->end_code = end_code;
1298 mm->start_code = start_code;
1299 mm->start_data = start_data;
1300 mm->end_data = end_data;
1301 mm->start_stack = bprm->p;
1da177e4 1302
4471a675 1303 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
bbdc6076
KC
1304 /*
1305 * For architectures with ELF randomization, when executing
1306 * a loader directly (i.e. no interpreter listed in ELF
1307 * headers), move the brk area out of the mmap region
1308 * (since it grows up, and may collide early with the stack
1309 * growing down), and into the unused ELF_ET_DYN_BASE region.
1310 */
7be3cb01 1311 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
03c6d723
AD
1312 elf_ex->e_type == ET_DYN && !interpreter) {
1313 mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1314 }
bbdc6076 1315
03c6d723 1316 mm->brk = mm->start_brk = arch_randomize_brk(mm);
204db6ed 1317#ifdef compat_brk_randomized
4471a675
JK
1318 current->brk_randomized = 1;
1319#endif
1320 }
c1d171a0 1321
1da177e4
LT
1322 if (current->personality & MMAP_PAGE_ZERO) {
1323 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1324 and some applications "depend" upon this behavior.
1325 Since we do not have the power to recompile these, we
f4e5cc2c 1326 emulate the SVr4 behavior. Sigh. */
6be5ceb0 1327 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1da177e4 1328 MAP_FIXED | MAP_PRIVATE, 0);
1da177e4
LT
1329 }
1330
249b08e4 1331 regs = current_pt_regs();
1da177e4
LT
1332#ifdef ELF_PLAT_INIT
1333 /*
1334 * The ABI may specify that certain registers be set up in special
1335 * ways (on i386 %edx is the address of a DT_FINI function, for
1336 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1337 * that the e_entry field is the address of the function descriptor
1338 * for the startup routine, rather than the address of the startup
1339 * routine itself. This macro performs whatever initialization to
1340 * the regs structure is required as well as any relocations to the
1341 * function descriptor entries when executing dynamically links apps.
1342 */
1343 ELF_PLAT_INIT(regs, reloc_func_desc);
1344#endif
1345
b8383831 1346 finalize_exec(bprm);
bc3d7bf6 1347 START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1da177e4
LT
1348 retval = 0;
1349out:
1da177e4
LT
1350 return retval;
1351
1352 /* error cleanup */
1353out_free_dentry:
0693ffeb 1354 kfree(interp_elf_ex);
a9d9ef13 1355 kfree(interp_elf_phdata);
594d2a14 1356out_free_file:
1da177e4
LT
1357 allow_write_access(interpreter);
1358 if (interpreter)
1359 fput(interpreter);
1da177e4
LT
1360out_free_ph:
1361 kfree(elf_phdata);
1362 goto out;
1363}
1364
69369a70 1365#ifdef CONFIG_USELIB
1da177e4
LT
1366/* This is really simpleminded and specialized - we are loading an
1367 a.out library that is given an ELF header. */
1da177e4
LT
1368static int load_elf_library(struct file *file)
1369{
1370 struct elf_phdr *elf_phdata;
1371 struct elf_phdr *eppnt;
1372 unsigned long elf_bss, bss, len;
1373 int retval, error, i, j;
1374 struct elfhdr elf_ex;
1375
1376 error = -ENOEXEC;
658c0335
AD
1377 retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1378 if (retval < 0)
1da177e4
LT
1379 goto out;
1380
1381 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1382 goto out;
1383
1384 /* First of all, some simple consistency checks */
1385 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
72c2d531 1386 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1da177e4 1387 goto out;
4755200b
NP
1388 if (elf_check_fdpic(&elf_ex))
1389 goto out;
1da177e4
LT
1390
1391 /* Now read in all of the header information */
1392
1393 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1394 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1395
1396 error = -ENOMEM;
1397 elf_phdata = kmalloc(j, GFP_KERNEL);
1398 if (!elf_phdata)
1399 goto out;
1400
1401 eppnt = elf_phdata;
1402 error = -ENOEXEC;
658c0335
AD
1403 retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1404 if (retval < 0)
1da177e4
LT
1405 goto out_free_ph;
1406
1407 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1408 if ((eppnt + i)->p_type == PT_LOAD)
1409 j++;
1410 if (j != 1)
1411 goto out_free_ph;
1412
1413 while (eppnt->p_type != PT_LOAD)
1414 eppnt++;
1415
1416 /* Now use mmap to map the library into memory. */
6be5ceb0 1417 error = vm_mmap(file,
1da177e4
LT
1418 ELF_PAGESTART(eppnt->p_vaddr),
1419 (eppnt->p_filesz +
1420 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1421 PROT_READ | PROT_WRITE | PROT_EXEC,
42be8b42 1422 MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1da177e4
LT
1423 (eppnt->p_offset -
1424 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1da177e4
LT
1425 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1426 goto out_free_ph;
1427
1428 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1429 if (padzero(elf_bss)) {
1430 error = -EFAULT;
1431 goto out_free_ph;
1432 }
1433
24962af7
OS
1434 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1435 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
ecc2bc8a
MH
1436 if (bss > len) {
1437 error = vm_brk(len, bss - len);
5d22fc25 1438 if (error)
ecc2bc8a
MH
1439 goto out_free_ph;
1440 }
1da177e4
LT
1441 error = 0;
1442
1443out_free_ph:
1444 kfree(elf_phdata);
1445out:
1446 return error;
1447}
69369a70 1448#endif /* #ifdef CONFIG_USELIB */
1da177e4 1449
698ba7b5 1450#ifdef CONFIG_ELF_CORE
1da177e4
LT
1451/*
1452 * ELF core dumper
1453 *
1454 * Modelled on fs/exec.c:aout_core_dump()
1455 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1456 */
1da177e4 1457
1da177e4
LT
1458/* An ELF note in memory */
1459struct memelfnote
1460{
1461 const char *name;
1462 int type;
1463 unsigned int datasz;
1464 void *data;
1465};
1466
1467static int notesize(struct memelfnote *en)
1468{
1469 int sz;
1470
1471 sz = sizeof(struct elf_note);
1472 sz += roundup(strlen(en->name) + 1, 4);
1473 sz += roundup(en->datasz, 4);
1474
1475 return sz;
1476}
1477
ecc8c772 1478static int writenote(struct memelfnote *men, struct coredump_params *cprm)
d025c9db
AK
1479{
1480 struct elf_note en;
1da177e4
LT
1481 en.n_namesz = strlen(men->name) + 1;
1482 en.n_descsz = men->datasz;
1483 en.n_type = men->type;
1484
ecc8c772 1485 return dump_emit(cprm, &en, sizeof(en)) &&
22a8cb82
AV
1486 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1487 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1da177e4 1488}
1da177e4 1489
3aba481f 1490static void fill_elf_header(struct elfhdr *elf, int segs,
d3330cf0 1491 u16 machine, u32 flags)
1da177e4 1492{
6970c8ef
CG
1493 memset(elf, 0, sizeof(*elf));
1494
1da177e4
LT
1495 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1496 elf->e_ident[EI_CLASS] = ELF_CLASS;
1497 elf->e_ident[EI_DATA] = ELF_DATA;
1498 elf->e_ident[EI_VERSION] = EV_CURRENT;
1499 elf->e_ident[EI_OSABI] = ELF_OSABI;
1da177e4
LT
1500
1501 elf->e_type = ET_CORE;
3aba481f 1502 elf->e_machine = machine;
1da177e4 1503 elf->e_version = EV_CURRENT;
1da177e4 1504 elf->e_phoff = sizeof(struct elfhdr);
3aba481f 1505 elf->e_flags = flags;
1da177e4
LT
1506 elf->e_ehsize = sizeof(struct elfhdr);
1507 elf->e_phentsize = sizeof(struct elf_phdr);
1508 elf->e_phnum = segs;
1da177e4
LT
1509}
1510
8d6b5eee 1511static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1da177e4
LT
1512{
1513 phdr->p_type = PT_NOTE;
1514 phdr->p_offset = offset;
1515 phdr->p_vaddr = 0;
1516 phdr->p_paddr = 0;
1517 phdr->p_filesz = sz;
1518 phdr->p_memsz = 0;
1519 phdr->p_flags = 0;
1520 phdr->p_align = 0;
1da177e4
LT
1521}
1522
8f6e3f9e 1523static void fill_note(struct memelfnote *note, const char *name, int type,
1da177e4
LT
1524 unsigned int sz, void *data)
1525{
1526 note->name = name;
1527 note->type = type;
1528 note->datasz = sz;
1529 note->data = data;
1da177e4
LT
1530}
1531
1532/*
f4e5cc2c
JJ
1533 * fill up all the fields in prstatus from the given task struct, except
1534 * registers which need to be filled up separately.
1da177e4 1535 */
f2485a2d 1536static void fill_prstatus(struct elf_prstatus_common *prstatus,
f4e5cc2c 1537 struct task_struct *p, long signr)
1da177e4
LT
1538{
1539 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1540 prstatus->pr_sigpend = p->pending.signal.sig[0];
1541 prstatus->pr_sighold = p->blocked.sig[0];
3b34fc58
ON
1542 rcu_read_lock();
1543 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1544 rcu_read_unlock();
b488893a 1545 prstatus->pr_pid = task_pid_vnr(p);
b488893a
PE
1546 prstatus->pr_pgrp = task_pgrp_vnr(p);
1547 prstatus->pr_sid = task_session_vnr(p);
1da177e4 1548 if (thread_group_leader(p)) {
cd19c364 1549 struct task_cputime cputime;
f06febc9 1550
1da177e4 1551 /*
f06febc9
FM
1552 * This is the record for the group leader. It shows the
1553 * group-wide total, not its individual thread total.
1da177e4 1554 */
cd19c364 1555 thread_group_cputime(p, &cputime);
e2bb80d5
AB
1556 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1557 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1da177e4 1558 } else {
cd19c364 1559 u64 utime, stime;
6fac4829 1560
cd19c364 1561 task_cputime(p, &utime, &stime);
e2bb80d5
AB
1562 prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1563 prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1da177e4 1564 }
5613fda9 1565
e2bb80d5
AB
1566 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1567 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1da177e4
LT
1568}
1569
1570static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1571 struct mm_struct *mm)
1572{
c69e8d9c 1573 const struct cred *cred;
a84a5059 1574 unsigned int i, len;
2f064a59
PZ
1575 unsigned int state;
1576
1da177e4
LT
1577 /* first copy the parameters from user space */
1578 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1579
1580 len = mm->arg_end - mm->arg_start;
1581 if (len >= ELF_PRARGSZ)
1582 len = ELF_PRARGSZ-1;
1583 if (copy_from_user(&psinfo->pr_psargs,
1584 (const char __user *)mm->arg_start, len))
1585 return -EFAULT;
1586 for(i = 0; i < len; i++)
1587 if (psinfo->pr_psargs[i] == 0)
1588 psinfo->pr_psargs[i] = ' ';
1589 psinfo->pr_psargs[len] = 0;
1590
3b34fc58
ON
1591 rcu_read_lock();
1592 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1593 rcu_read_unlock();
b488893a 1594 psinfo->pr_pid = task_pid_vnr(p);
b488893a
PE
1595 psinfo->pr_pgrp = task_pgrp_vnr(p);
1596 psinfo->pr_sid = task_session_vnr(p);
1da177e4 1597
2f064a59
PZ
1598 state = READ_ONCE(p->__state);
1599 i = state ? ffz(~state) + 1 : 0;
1da177e4 1600 psinfo->pr_state = i;
55148548 1601 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1da177e4
LT
1602 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1603 psinfo->pr_nice = task_nice(p);
1604 psinfo->pr_flag = p->flags;
c69e8d9c
DH
1605 rcu_read_lock();
1606 cred = __task_cred(p);
ebc887b2
EB
1607 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1608 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
c69e8d9c 1609 rcu_read_unlock();
95af469c 1610 get_task_comm(psinfo->pr_fname, p);
2f064a59 1611
1da177e4
LT
1612 return 0;
1613}
1614
3aba481f
RM
1615static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1616{
1617 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1618 int i = 0;
1619 do
1620 i += 2;
1621 while (auxv[i - 2] != AT_NULL);
1622 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1623}
1624
49ae4d4b 1625static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
ae7795bc 1626 const kernel_siginfo_t *siginfo)
49ae4d4b 1627{
fa4751f4 1628 copy_siginfo_to_external(csigdata, siginfo);
49ae4d4b
DV
1629 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1630}
1631
2aa362c4
DV
1632#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1633/*
1634 * Format of NT_FILE note:
1635 *
1636 * long count -- how many files are mapped
1637 * long page_size -- units for file_ofs
1638 * array of [COUNT] elements of
1639 * long start
1640 * long end
1641 * long file_ofs
1642 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1643 */
390031c9 1644static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
2aa362c4 1645{
2aa362c4
DV
1646 unsigned count, size, names_ofs, remaining, n;
1647 user_long_t *data;
1648 user_long_t *start_end_ofs;
1649 char *name_base, *name_curpos;
390031c9 1650 int i;
2aa362c4
DV
1651
1652 /* *Estimated* file count and total data size needed */
390031c9 1653 count = cprm->vma_count;
60c9d92f
AD
1654 if (count > UINT_MAX / 64)
1655 return -EINVAL;
2aa362c4
DV
1656 size = count * 64;
1657
1658 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1659 alloc:
1660 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
72023656 1661 return -EINVAL;
2aa362c4 1662 size = round_up(size, PAGE_SIZE);
1fbede6e
AD
1663 /*
1664 * "size" can be 0 here legitimately.
1665 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1666 */
86a2bb5a
AD
1667 data = kvmalloc(size, GFP_KERNEL);
1668 if (ZERO_OR_NULL_PTR(data))
72023656 1669 return -ENOMEM;
2aa362c4
DV
1670
1671 start_end_ofs = data + 2;
1672 name_base = name_curpos = ((char *)data) + names_ofs;
1673 remaining = size - names_ofs;
1674 count = 0;
390031c9
EB
1675 for (i = 0; i < cprm->vma_count; i++) {
1676 struct core_vma_metadata *m = &cprm->vma_meta[i];
2aa362c4
DV
1677 struct file *file;
1678 const char *filename;
1679
390031c9 1680 file = m->file;
2aa362c4
DV
1681 if (!file)
1682 continue;
9bf39ab2 1683 filename = file_path(file, name_curpos, remaining);
2aa362c4
DV
1684 if (IS_ERR(filename)) {
1685 if (PTR_ERR(filename) == -ENAMETOOLONG) {
86a2bb5a 1686 kvfree(data);
2aa362c4
DV
1687 size = size * 5 / 4;
1688 goto alloc;
1689 }
1690 continue;
1691 }
1692
9bf39ab2 1693 /* file_path() fills at the end, move name down */
2aa362c4
DV
1694 /* n = strlen(filename) + 1: */
1695 n = (name_curpos + remaining) - filename;
1696 remaining = filename - name_curpos;
1697 memmove(name_curpos, filename, n);
1698 name_curpos += n;
1699
390031c9
EB
1700 *start_end_ofs++ = m->start;
1701 *start_end_ofs++ = m->end;
1702 *start_end_ofs++ = m->pgoff;
2aa362c4
DV
1703 count++;
1704 }
1705
1706 /* Now we know exact count of files, can store it */
1707 data[0] = count;
1708 data[1] = PAGE_SIZE;
1709 /*
03c6d723 1710 * Count usually is less than mm->map_count,
2aa362c4
DV
1711 * we need to move filenames down.
1712 */
390031c9 1713 n = cprm->vma_count - count;
2aa362c4
DV
1714 if (n != 0) {
1715 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1716 memmove(name_base - shift_bytes, name_base,
1717 name_curpos - name_base);
1718 name_curpos -= shift_bytes;
1719 }
1720
1721 size = name_curpos - (char *)data;
1722 fill_note(note, "CORE", NT_FILE, size, data);
72023656 1723 return 0;
2aa362c4
DV
1724}
1725
4206d3aa
RM
1726#include <linux/regset.h>
1727
1728struct elf_thread_core_info {
1729 struct elf_thread_core_info *next;
1730 struct task_struct *task;
1731 struct elf_prstatus prstatus;
5e01fdff 1732 struct memelfnote notes[];
4206d3aa
RM
1733};
1734
1735struct elf_note_info {
1736 struct elf_thread_core_info *thread;
1737 struct memelfnote psinfo;
49ae4d4b 1738 struct memelfnote signote;
4206d3aa 1739 struct memelfnote auxv;
2aa362c4 1740 struct memelfnote files;
49ae4d4b 1741 user_siginfo_t csigdata;
4206d3aa
RM
1742 size_t size;
1743 int thread_notes;
1744};
1745
e92edb85 1746#ifdef CORE_DUMP_USE_REGSET
d31472b6
RM
1747/*
1748 * When a regset has a writeback hook, we call it on each thread before
1749 * dumping user memory. On register window machines, this makes sure the
1750 * user memory backing the register data is up to date before we read it.
1751 */
1752static void do_thread_regset_writeback(struct task_struct *task,
1753 const struct user_regset *regset)
1754{
1755 if (regset->writeback)
1756 regset->writeback(task, regset, 1);
1757}
1758
0953f65d 1759#ifndef PRSTATUS_SIZE
8a00dd00 1760#define PRSTATUS_SIZE sizeof(struct elf_prstatus)
0953f65d
L
1761#endif
1762
1763#ifndef SET_PR_FPVALID
8a00dd00 1764#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
0953f65d
L
1765#endif
1766
4206d3aa
RM
1767static int fill_thread_core_info(struct elf_thread_core_info *t,
1768 const struct user_regset_view *view,
dd664099 1769 long signr, struct elf_note_info *info)
4206d3aa 1770{
dd664099 1771 unsigned int note_iter, view_iter;
4206d3aa
RM
1772
1773 /*
1774 * NT_PRSTATUS is the one special case, because the regset data
1775 * goes into the pr_reg field inside the note contents, rather
1776 * than being the whole note contents. We fill the reset in here.
1777 * We assume that regset 0 is NT_PRSTATUS.
1778 */
f2485a2d 1779 fill_prstatus(&t->prstatus.common, t->task, signr);
8a00dd00 1780 regset_get(t->task, &view->regsets[0],
b4e9c954 1781 sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
4206d3aa
RM
1782
1783 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
8a00dd00 1784 PRSTATUS_SIZE, &t->prstatus);
dd664099 1785 info->size += notesize(&t->notes[0]);
4206d3aa 1786
d31472b6
RM
1787 do_thread_regset_writeback(t->task, &view->regsets[0]);
1788
4206d3aa
RM
1789 /*
1790 * Each other regset might generate a note too. For each regset
dd664099 1791 * that has no core_note_type or is inactive, skip it.
4206d3aa 1792 */
dd664099
RE
1793 note_iter = 1;
1794 for (view_iter = 1; view_iter < view->n; ++view_iter) {
1795 const struct user_regset *regset = &view->regsets[view_iter];
b4e9c954
AV
1796 int note_type = regset->core_note_type;
1797 bool is_fpreg = note_type == NT_PRFPREG;
1798 void *data;
1799 int ret;
1800
d31472b6 1801 do_thread_regset_writeback(t->task, regset);
b4e9c954
AV
1802 if (!note_type) // not for coredumps
1803 continue;
1804 if (regset->active && regset->active(t->task, regset) <= 0)
1805 continue;
1806
1807 ret = regset_get_alloc(t->task, regset, ~0U, &data);
1808 if (ret < 0)
1809 continue;
1810
dd664099
RE
1811 if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1812 break;
1813
b4e9c954 1814 if (is_fpreg)
8a00dd00 1815 SET_PR_FPVALID(&t->prstatus);
b4e9c954 1816
dd664099 1817 fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
b4e9c954
AV
1818 note_type, ret, data);
1819
dd664099
RE
1820 info->size += notesize(&t->notes[note_iter]);
1821 note_iter++;
4206d3aa
RM
1822 }
1823
1824 return 1;
1825}
e92edb85
AV
1826#else
1827static int fill_thread_core_info(struct elf_thread_core_info *t,
1828 const struct user_regset_view *view,
1829 long signr, struct elf_note_info *info)
1830{
1831 struct task_struct *p = t->task;
1832 elf_fpregset_t *fpu;
1833
1834 fill_prstatus(&t->prstatus.common, p, signr);
1835 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1836
1837 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1838 &(t->prstatus));
1839 info->size += notesize(&t->notes[0]);
1840
1841 fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
1842 if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1843 kfree(fpu);
1844 return 1;
1845 }
1846
1847 t->prstatus.pr_fpvalid = 1;
1848 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1849 info->size += notesize(&t->notes[1]);
1850
1851 return 1;
1852}
1853#endif
4206d3aa
RM
1854
1855static int fill_note_info(struct elfhdr *elf, int phdrs,
1856 struct elf_note_info *info,
9ec7d323 1857 struct coredump_params *cprm)
4206d3aa
RM
1858{
1859 struct task_struct *dump_task = current;
e92edb85 1860 const struct user_regset_view *view;
4206d3aa
RM
1861 struct elf_thread_core_info *t;
1862 struct elf_prpsinfo *psinfo;
83914441 1863 struct core_thread *ct;
4206d3aa
RM
1864
1865 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
922ef161 1866 if (!psinfo)
4206d3aa 1867 return 0;
e2dbe125
AW
1868 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1869
e92edb85
AV
1870#ifdef CORE_DUMP_USE_REGSET
1871 view = task_user_regset_view(dump_task);
1872
4206d3aa
RM
1873 /*
1874 * Figure out how many notes we're going to need for each thread.
1875 */
1876 info->thread_notes = 0;
922ef161 1877 for (int i = 0; i < view->n; ++i)
4206d3aa
RM
1878 if (view->regsets[i].core_note_type != 0)
1879 ++info->thread_notes;
1880
1881 /*
1882 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1883 * since it is our one special case.
1884 */
1885 if (unlikely(info->thread_notes == 0) ||
1886 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1887 WARN_ON(1);
1888 return 0;
1889 }
1890
1891 /*
1892 * Initialize the ELF file header.
1893 */
1894 fill_elf_header(elf, phdrs,
d3330cf0 1895 view->e_machine, view->e_flags);
e92edb85
AV
1896#else
1897 view = NULL;
1898 info->thread_notes = 2;
1899 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1900#endif
4206d3aa
RM
1901
1902 /*
1903 * Allocate a structure for each thread.
1904 */
4b0e21d6
AV
1905 info->thread = kzalloc(offsetof(struct elf_thread_core_info,
1906 notes[info->thread_notes]),
1907 GFP_KERNEL);
1908 if (unlikely(!info->thread))
1909 return 0;
1910
1911 info->thread->task = dump_task;
1912 for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
83914441
ON
1913 t = kzalloc(offsetof(struct elf_thread_core_info,
1914 notes[info->thread_notes]),
1915 GFP_KERNEL);
1916 if (unlikely(!t))
1917 return 0;
1918
1919 t->task = ct->task;
4b0e21d6
AV
1920 t->next = info->thread->next;
1921 info->thread->next = t;
83914441 1922 }
4206d3aa
RM
1923
1924 /*
1925 * Now fill in each thread's information.
1926 */
1927 for (t = info->thread; t != NULL; t = t->next)
dd664099 1928 if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
4206d3aa
RM
1929 return 0;
1930
1931 /*
1932 * Fill in the two process-wide notes.
1933 */
1934 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1935 info->size += notesize(&info->psinfo);
1936
9ec7d323 1937 fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
49ae4d4b
DV
1938 info->size += notesize(&info->signote);
1939
4206d3aa
RM
1940 fill_auxv_note(&info->auxv, current->mm);
1941 info->size += notesize(&info->auxv);
1942
390031c9 1943 if (fill_files_note(&info->files, cprm) == 0)
72023656 1944 info->size += notesize(&info->files);
2aa362c4 1945
4206d3aa
RM
1946 return 1;
1947}
1948
4206d3aa
RM
1949/*
1950 * Write all the notes for each thread. When writing the first thread, the
1951 * process-wide notes are interleaved after the first thread-specific note.
1952 */
1953static int write_note_info(struct elf_note_info *info,
ecc8c772 1954 struct coredump_params *cprm)
4206d3aa 1955{
b219e25f 1956 bool first = true;
4206d3aa
RM
1957 struct elf_thread_core_info *t = info->thread;
1958
1959 do {
1960 int i;
1961
ecc8c772 1962 if (!writenote(&t->notes[0], cprm))
4206d3aa
RM
1963 return 0;
1964
ecc8c772 1965 if (first && !writenote(&info->psinfo, cprm))
4206d3aa 1966 return 0;
ecc8c772 1967 if (first && !writenote(&info->signote, cprm))
49ae4d4b 1968 return 0;
ecc8c772 1969 if (first && !writenote(&info->auxv, cprm))
4206d3aa 1970 return 0;
72023656 1971 if (first && info->files.data &&
ecc8c772 1972 !writenote(&info->files, cprm))
2aa362c4 1973 return 0;
4206d3aa
RM
1974
1975 for (i = 1; i < info->thread_notes; ++i)
1976 if (t->notes[i].data &&
ecc8c772 1977 !writenote(&t->notes[i], cprm))
4206d3aa
RM
1978 return 0;
1979
b219e25f 1980 first = false;
4206d3aa
RM
1981 t = t->next;
1982 } while (t);
1983
1984 return 1;
1985}
1986
1987static void free_note_info(struct elf_note_info *info)
1988{
1989 struct elf_thread_core_info *threads = info->thread;
1990 while (threads) {
1991 unsigned int i;
1992 struct elf_thread_core_info *t = threads;
1993 threads = t->next;
1994 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1995 for (i = 1; i < info->thread_notes; ++i)
1996 kfree(t->notes[i].data);
1997 kfree(t);
1998 }
1999 kfree(info->psinfo.data);
86a2bb5a 2000 kvfree(info->files.data);
4206d3aa
RM
2001}
2002
8d9032bb
DH
2003static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2004 elf_addr_t e_shoff, int segs)
2005{
2006 elf->e_shoff = e_shoff;
2007 elf->e_shentsize = sizeof(*shdr4extnum);
2008 elf->e_shnum = 1;
2009 elf->e_shstrndx = SHN_UNDEF;
2010
2011 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2012
2013 shdr4extnum->sh_type = SHT_NULL;
2014 shdr4extnum->sh_size = elf->e_shnum;
2015 shdr4extnum->sh_link = elf->e_shstrndx;
2016 shdr4extnum->sh_info = segs;
2017}
2018
1da177e4
LT
2019/*
2020 * Actual dumper
2021 *
2022 * This is a two-pass process; first we find the offsets of the bits,
2023 * and then they are actually written out. If we run out of core limit
2024 * we just truncate.
2025 */
f6151dfe 2026static int elf_core_dump(struct coredump_params *cprm)
1da177e4 2027{
1da177e4 2028 int has_dumped = 0;
95c5436a 2029 int segs, i;
225a3f53 2030 struct elfhdr elf;
cdc3d562 2031 loff_t offset = 0, dataoff;
72023656 2032 struct elf_note_info info = { };
93eb211e 2033 struct elf_phdr *phdr4note = NULL;
8d9032bb
DH
2034 struct elf_shdr *shdr4extnum = NULL;
2035 Elf_Half e_phnum;
2036 elf_addr_t e_shoff;
1da177e4 2037
341c87bf
KH
2038 /*
2039 * The number of segs are recored into ELF header as 16bit value.
2040 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2041 */
19e183b5 2042 segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
f47aef55 2043
8d9032bb
DH
2044 /* for notes section */
2045 segs++;
2046
2047 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2048 * this, kernel supports extended numbering. Have a look at
2049 * include/linux/elf.h for further information. */
2050 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2051
1da177e4 2052 /*
3aba481f
RM
2053 * Collect all the non-memory information about the process for the
2054 * notes. This also sets up the file header.
1da177e4 2055 */
9ec7d323 2056 if (!fill_note_info(&elf, e_phnum, &info, cprm))
d2530b43 2057 goto end_coredump;
1da177e4 2058
3aba481f 2059 has_dumped = 1;
079148b9 2060
70e79866 2061 offset += sizeof(elf); /* ELF header */
8d9032bb 2062 offset += segs * sizeof(struct elf_phdr); /* Program headers */
1da177e4
LT
2063
2064 /* Write notes phdr entry */
2065 {
38ba2f11 2066 size_t sz = info.size;
1da177e4 2067
c39ab6de 2068 /* For cell spufs */
e5501492 2069 sz += elf_coredump_extra_notes_size();
bf1ab978 2070
93eb211e
DH
2071 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2072 if (!phdr4note)
088e7af7 2073 goto end_coredump;
93eb211e
DH
2074
2075 fill_elf_note_phdr(phdr4note, sz, offset);
2076 offset += sz;
1da177e4
LT
2077 }
2078
1da177e4
LT
2079 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2080
95c5436a 2081 offset += cprm->vma_data_size;
19e183b5 2082 offset += elf_core_extra_data_size(cprm);
8d9032bb
DH
2083 e_shoff = offset;
2084
2085 if (e_phnum == PN_XNUM) {
2086 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2087 if (!shdr4extnum)
2088 goto end_coredump;
225a3f53 2089 fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
8d9032bb
DH
2090 }
2091
2092 offset = dataoff;
2093
225a3f53 2094 if (!dump_emit(cprm, &elf, sizeof(elf)))
93eb211e
DH
2095 goto end_coredump;
2096
ecc8c772 2097 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
93eb211e
DH
2098 goto end_coredump;
2099
1da177e4 2100 /* Write program headers for segments dump */
95c5436a
EB
2101 for (i = 0; i < cprm->vma_count; i++) {
2102 struct core_vma_metadata *meta = cprm->vma_meta + i;
1da177e4 2103 struct elf_phdr phdr;
1da177e4
LT
2104
2105 phdr.p_type = PT_LOAD;
2106 phdr.p_offset = offset;
a07279c9 2107 phdr.p_vaddr = meta->start;
1da177e4 2108 phdr.p_paddr = 0;
a07279c9
JH
2109 phdr.p_filesz = meta->dump_size;
2110 phdr.p_memsz = meta->end - meta->start;
1da177e4 2111 offset += phdr.p_filesz;
a07279c9
JH
2112 phdr.p_flags = 0;
2113 if (meta->flags & VM_READ)
2114 phdr.p_flags |= PF_R;
2115 if (meta->flags & VM_WRITE)
f4e5cc2c 2116 phdr.p_flags |= PF_W;
a07279c9 2117 if (meta->flags & VM_EXEC)
f4e5cc2c 2118 phdr.p_flags |= PF_X;
1da177e4
LT
2119 phdr.p_align = ELF_EXEC_PAGESIZE;
2120
ecc8c772 2121 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
088e7af7 2122 goto end_coredump;
1da177e4
LT
2123 }
2124
506f21c5 2125 if (!elf_core_write_extra_phdrs(cprm, offset))
1fcccbac 2126 goto end_coredump;
1da177e4 2127
8f6e3f9e 2128 /* write out the notes section */
ecc8c772 2129 if (!write_note_info(&info, cprm))
3aba481f 2130 goto end_coredump;
1da177e4 2131
c39ab6de 2132 /* For cell spufs */
cdc3d562 2133 if (elf_coredump_extra_notes_write(cprm))
e5501492 2134 goto end_coredump;
bf1ab978 2135
d025c9db 2136 /* Align to page */
d0f1088b 2137 dump_skip_to(cprm, dataoff);
1da177e4 2138
95c5436a
EB
2139 for (i = 0; i < cprm->vma_count; i++) {
2140 struct core_vma_metadata *meta = cprm->vma_meta + i;
a07279c9
JH
2141
2142 if (!dump_user_range(cprm, meta->start, meta->dump_size))
afc63a97 2143 goto end_coredump;
1da177e4
LT
2144 }
2145
aa3e7eaf 2146 if (!elf_core_write_extra_data(cprm))
1fcccbac 2147 goto end_coredump;
1da177e4 2148
8d9032bb 2149 if (e_phnum == PN_XNUM) {
13046ece 2150 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
8d9032bb
DH
2151 goto end_coredump;
2152 }
2153
1da177e4 2154end_coredump:
3aba481f 2155 free_note_info(&info);
8d9032bb 2156 kfree(shdr4extnum);
93eb211e 2157 kfree(phdr4note);
1da177e4 2158 return has_dumped;
1da177e4
LT
2159}
2160
698ba7b5 2161#endif /* CONFIG_ELF_CORE */
1da177e4
LT
2162
2163static int __init init_elf_binfmt(void)
2164{
8fc3dc5a
AV
2165 register_binfmt(&elf_format);
2166 return 0;
1da177e4
LT
2167}
2168
2169static void __exit exit_elf_binfmt(void)
2170{
2171 /* Remove the COFF and ELF loaders. */
2172 unregister_binfmt(&elf_format);
2173}
2174
2175core_initcall(init_elf_binfmt);
2176module_exit(exit_elf_binfmt);
9e1a3ce0
KC
2177
2178#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2179#include "binfmt_elf_test.c"
2180#endif