Merge tag 'pm-6.16-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[linux-2.6-block.git] / fs / binfmt_elf.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * linux/fs/binfmt_elf.c
4 *
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines. Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
9 *
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/fs.h>
ce81bb25 16#include <linux/log2.h>
1da177e4
LT
17#include <linux/mm.h>
18#include <linux/mman.h>
1da177e4
LT
19#include <linux/errno.h>
20#include <linux/signal.h>
21#include <linux/binfmts.h>
22#include <linux/string.h>
23#include <linux/file.h>
1da177e4 24#include <linux/slab.h>
1da177e4
LT
25#include <linux/personality.h>
26#include <linux/elfcore.h>
27#include <linux/init.h>
28#include <linux/highuid.h>
1da177e4
LT
29#include <linux/compiler.h>
30#include <linux/highmem.h>
03911132 31#include <linux/hugetlb.h>
1da177e4 32#include <linux/pagemap.h>
2aa362c4 33#include <linux/vmalloc.h>
1da177e4 34#include <linux/security.h>
1da177e4 35#include <linux/random.h>
f4e5cc2c 36#include <linux/elf.h>
d1fd836d 37#include <linux/elf-randomize.h>
7e80d0d0 38#include <linux/utsname.h>
088e7af7 39#include <linux/coredump.h>
6fac4829 40#include <linux/sched.h>
f7ccbae4 41#include <linux/sched/coredump.h>
68db0cf1 42#include <linux/sched/task_stack.h>
32ef5517 43#include <linux/sched/cputime.h>
00e19cee
DM
44#include <linux/sizes.h>
45#include <linux/types.h>
5b825c3a 46#include <linux/cred.h>
5037835c 47#include <linux/dax.h>
7c0f6ba6 48#include <linux/uaccess.h>
317c8194 49#include <linux/rseq.h>
1da177e4
LT
50#include <asm/param.h>
51#include <asm/page.h>
52
00e19cee
DM
53#ifndef ELF_COMPAT
54#define ELF_COMPAT 0
55#endif
56
2aa362c4
DV
57#ifndef user_long_t
58#define user_long_t long
59#endif
49ae4d4b
DV
60#ifndef user_siginfo_t
61#define user_siginfo_t siginfo_t
62#endif
63
4755200b
NP
64/* That's for binfmt_elf_fdpic to deal with */
65#ifndef elf_check_fdpic
66#define elf_check_fdpic(ex) false
67#endif
68
71613c3b 69static int load_elf_binary(struct linux_binprm *bprm);
1da177e4 70
1da177e4
LT
71/*
72 * If we don't support core dumping, then supply a NULL so we
73 * don't even try.
74 */
698ba7b5 75#ifdef CONFIG_ELF_CORE
f6151dfe 76static int elf_core_dump(struct coredump_params *cprm);
1da177e4
LT
77#else
78#define elf_core_dump NULL
79#endif
80
81#if ELF_EXEC_PAGESIZE > PAGE_SIZE
f4e5cc2c 82#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
1da177e4 83#else
f4e5cc2c 84#define ELF_MIN_ALIGN PAGE_SIZE
1da177e4
LT
85#endif
86
87#ifndef ELF_CORE_EFLAGS
88#define ELF_CORE_EFLAGS 0
89#endif
90
10b19249 91#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
1da177e4
LT
92#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
93#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
94
95static struct linux_binfmt elf_format = {
f670d0ec
MP
96 .module = THIS_MODULE,
97 .load_binary = load_elf_binary,
d65bc29b 98#ifdef CONFIG_COREDUMP
f670d0ec
MP
99 .core_dump = elf_core_dump,
100 .min_coredump = ELF_EXEC_PAGESIZE,
d65bc29b 101#endif
1da177e4
LT
102};
103
18676ffc 104#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
1da177e4 105
f9c0a39d
KC
106/*
107 * We need to explicitly zero any trailing portion of the page that follows
108 * p_filesz when it ends before the page ends (e.g. bss), otherwise this
109 * memory will contain the junk from the file that should not be present.
f4e5cc2c 110 */
f9c0a39d 111static int padzero(unsigned long address)
1da177e4
LT
112{
113 unsigned long nbyte;
114
f9c0a39d 115 nbyte = ELF_PAGEOFFSET(address);
1da177e4
LT
116 if (nbyte) {
117 nbyte = ELF_MIN_ALIGN - nbyte;
f9c0a39d 118 if (clear_user((void __user *)address, nbyte))
1da177e4
LT
119 return -EFAULT;
120 }
121 return 0;
122}
123
09c6dd3c 124/* Let's use some macros to make this stack manipulation a little clearer */
1da177e4
LT
125#ifdef CONFIG_STACK_GROWSUP
126#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127#define STACK_ROUND(sp, items) \
128 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
f4e5cc2c
JJ
129#define STACK_ALLOC(sp, len) ({ \
130 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131 old_sp; })
1da177e4
LT
132#else
133#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134#define STACK_ROUND(sp, items) \
135 (((unsigned long) (sp - items)) &~ 15UL)
a43e5e3a 136#define STACK_ALLOC(sp, len) (sp -= len)
1da177e4
LT
137#endif
138
483fad1c
NL
139#ifndef ELF_BASE_PLATFORM
140/*
141 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143 * will be copied to the user stack in the same manner as AT_PLATFORM.
144 */
145#define ELF_BASE_PLATFORM NULL
146#endif
147
1da177e4 148static int
a62c5b1b 149create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
0da1d500
AK
150 unsigned long interp_load_addr,
151 unsigned long e_entry, unsigned long phdr_addr)
1da177e4 152{
03c6d723 153 struct mm_struct *mm = current->mm;
1da177e4
LT
154 unsigned long p = bprm->p;
155 int argc = bprm->argc;
156 int envc = bprm->envc;
1da177e4
LT
157 elf_addr_t __user *sp;
158 elf_addr_t __user *u_platform;
483fad1c 159 elf_addr_t __user *u_base_platform;
f06295b4 160 elf_addr_t __user *u_rand_bytes;
1da177e4 161 const char *k_platform = ELF_PLATFORM;
483fad1c 162 const char *k_base_platform = ELF_BASE_PLATFORM;
f06295b4 163 unsigned char k_rand_bytes[16];
1da177e4
LT
164 int items;
165 elf_addr_t *elf_info;
2347961b 166 elf_addr_t flags = 0;
1f83d806 167 int ei_index;
86a264ab 168 const struct cred *cred = current_cred();
b6a2fea3 169 struct vm_area_struct *vma;
1da177e4 170
d68c9d6a
FBH
171 /*
172 * In some cases (e.g. Hyper-Threading), we want to avoid L1
173 * evictions by the processes running on the same package. One
174 * thing we can do is to shuffle the initial stack for them.
175 */
176
177 p = arch_align_stack(p);
178
1da177e4
LT
179 /*
180 * If this architecture has a platform capability string, copy it
181 * to userspace. In some cases (Sparc), this info is impossible
182 * for userspace to get any other way, in others (i386) it is
183 * merely difficult.
184 */
1da177e4
LT
185 u_platform = NULL;
186 if (k_platform) {
187 size_t len = strlen(k_platform) + 1;
188
1da177e4 189 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
646e84de 190 if (copy_to_user(u_platform, k_platform, len))
1da177e4
LT
191 return -EFAULT;
192 }
193
483fad1c
NL
194 /*
195 * If this architecture has a "base" platform capability
196 * string, copy it to userspace.
197 */
198 u_base_platform = NULL;
199 if (k_base_platform) {
200 size_t len = strlen(k_base_platform) + 1;
201
202 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
646e84de 203 if (copy_to_user(u_base_platform, k_base_platform, len))
483fad1c
NL
204 return -EFAULT;
205 }
206
f06295b4
KC
207 /*
208 * Generate 16 random bytes for userspace PRNG seeding.
209 */
210 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
211 u_rand_bytes = (elf_addr_t __user *)
212 STACK_ALLOC(p, sizeof(k_rand_bytes));
646e84de 213 if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
f06295b4
KC
214 return -EFAULT;
215
1da177e4 216 /* Create the ELF interpreter info */
03c6d723 217 elf_info = (elf_addr_t *)mm->saved_auxv;
4f9a58d7 218 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
1da177e4 219#define NEW_AUX_ENT(id, val) \
f4e5cc2c 220 do { \
1f83d806
AD
221 *elf_info++ = id; \
222 *elf_info++ = val; \
f4e5cc2c 223 } while (0)
1da177e4
LT
224
225#ifdef ARCH_DLINFO
8f6e3f9e 226 /*
1da177e4
LT
227 * ARCH_DLINFO must come first so PPC can do its special alignment of
228 * AUXV.
4f9a58d7
OH
229 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
230 * ARCH_DLINFO changes
1da177e4
LT
231 */
232 ARCH_DLINFO;
233#endif
234 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
235 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
236 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
0da1d500 237 NEW_AUX_ENT(AT_PHDR, phdr_addr);
f4e5cc2c 238 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
1da177e4
LT
239 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
240 NEW_AUX_ENT(AT_BASE, interp_load_addr);
2347961b
LV
241 if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
242 flags |= AT_FLAGS_PRESERVE_ARGV0;
243 NEW_AUX_ENT(AT_FLAGS, flags);
a62c5b1b 244 NEW_AUX_ENT(AT_ENTRY, e_entry);
ebc887b2
EB
245 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
246 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
247 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
248 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
c425e189 249 NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
f06295b4 250 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
2171364d
MN
251#ifdef ELF_HWCAP2
252 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
4e6e8c2b
MB
253#endif
254#ifdef ELF_HWCAP3
255 NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
256#endif
257#ifdef ELF_HWCAP4
258 NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
2171364d 259#endif
65191087 260 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
1da177e4 261 if (k_platform) {
f4e5cc2c 262 NEW_AUX_ENT(AT_PLATFORM,
785d5570 263 (elf_addr_t)(unsigned long)u_platform);
1da177e4 264 }
483fad1c
NL
265 if (k_base_platform) {
266 NEW_AUX_ENT(AT_BASE_PLATFORM,
267 (elf_addr_t)(unsigned long)u_base_platform);
268 }
b8a61c9e
EB
269 if (bprm->have_execfd) {
270 NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
1da177e4 271 }
317c8194
MD
272#ifdef CONFIG_RSEQ
273 NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
274 NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
275#endif
1da177e4
LT
276#undef NEW_AUX_ENT
277 /* AT_NULL is zero; clear the rest too */
03c6d723
AD
278 memset(elf_info, 0, (char *)mm->saved_auxv +
279 sizeof(mm->saved_auxv) - (char *)elf_info);
1da177e4
LT
280
281 /* And advance past the AT_NULL entry. */
1f83d806 282 elf_info += 2;
1da177e4 283
03c6d723 284 ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
1da177e4
LT
285 sp = STACK_ADD(p, ei_index);
286
d20894a2 287 items = (argc + 1) + (envc + 1) + 1;
1da177e4
LT
288 bprm->p = STACK_ROUND(sp, items);
289
290 /* Point sp at the lowest address on the stack */
291#ifdef CONFIG_STACK_GROWSUP
292 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
f4e5cc2c 293 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
1da177e4
LT
294#else
295 sp = (elf_addr_t __user *)bprm->p;
296#endif
297
b6a2fea3
OW
298
299 /*
300 * Grow the stack manually; some architectures have a limit on how
301 * far ahead a user-space access may be in order to grow the stack.
302 */
f440fa1a 303 if (mmap_write_lock_killable(mm))
b2767d97 304 return -EINTR;
8d7071af 305 vma = find_extend_vma_locked(mm, bprm->p);
f440fa1a 306 mmap_write_unlock(mm);
b6a2fea3
OW
307 if (!vma)
308 return -EFAULT;
309
1da177e4 310 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
646e84de 311 if (put_user(argc, sp++))
1da177e4 312 return -EFAULT;
1da177e4 313
67c6777a 314 /* Populate list of argv pointers back to argv strings. */
03c6d723 315 p = mm->arg_end = mm->arg_start;
1da177e4
LT
316 while (argc-- > 0) {
317 size_t len;
646e84de 318 if (put_user((elf_addr_t)p, sp++))
841d5fb7 319 return -EFAULT;
b6a2fea3
OW
320 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
321 if (!len || len > MAX_ARG_STRLEN)
23c4971e 322 return -EINVAL;
1da177e4
LT
323 p += len;
324 }
646e84de 325 if (put_user(0, sp++))
1da177e4 326 return -EFAULT;
03c6d723 327 mm->arg_end = p;
67c6777a
KC
328
329 /* Populate list of envp pointers back to envp strings. */
03c6d723 330 mm->env_end = mm->env_start = p;
1da177e4
LT
331 while (envc-- > 0) {
332 size_t len;
646e84de 333 if (put_user((elf_addr_t)p, sp++))
841d5fb7 334 return -EFAULT;
b6a2fea3
OW
335 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
336 if (!len || len > MAX_ARG_STRLEN)
23c4971e 337 return -EINVAL;
1da177e4
LT
338 p += len;
339 }
646e84de 340 if (put_user(0, sp++))
1da177e4 341 return -EFAULT;
03c6d723 342 mm->env_end = p;
1da177e4
LT
343
344 /* Put the elf_info on the stack in the right place. */
03c6d723 345 if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
1da177e4
LT
346 return -EFAULT;
347 return 0;
348}
349
f9c0a39d
KC
350/*
351 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
352 * into memory at "addr". (Note that p_filesz is rounded up to the
353 * next page, so any extra bytes from the file must be wiped.)
354 */
1da177e4 355static unsigned long elf_map(struct file *filep, unsigned long addr,
49ac9819 356 const struct elf_phdr *eppnt, int prot, int type,
cc503c1b 357 unsigned long total_size)
1da177e4
LT
358{
359 unsigned long map_addr;
cc503c1b
JK
360 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
361 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
362 addr = ELF_PAGESTART(addr);
363 size = ELF_PAGEALIGN(size);
1da177e4 364
dda6ebde
DG
365 /* mmap() will return -EINVAL if given a zero size, but a
366 * segment with zero filesize is perfectly valid */
cc503c1b
JK
367 if (!size)
368 return addr;
369
cc503c1b
JK
370 /*
371 * total_size is the size of the ELF (interpreter) image.
372 * The _first_ mmap needs to know the full size, otherwise
373 * randomization might put this image into an overlapping
374 * position with the ELF binary image. (since size < total_size)
375 * So we first map the 'big' image - and unmap the remainder at
376 * the end. (which unmap is needed for ELF images with holes.)
377 */
378 if (total_size) {
379 total_size = ELF_PAGEALIGN(total_size);
5a5e4c2e 380 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
cc503c1b 381 if (!BAD_ADDR(map_addr))
5a5e4c2e 382 vm_munmap(map_addr+size, total_size-size);
cc503c1b 383 } else
5a5e4c2e 384 map_addr = vm_mmap(filep, addr, size, prot, type, off);
cc503c1b 385
d23a61ee
TH
386 if ((type & MAP_FIXED_NOREPLACE) &&
387 PTR_ERR((void *)map_addr) == -EEXIST)
388 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
389 task_pid_nr(current), current->comm, (void *)addr);
4ed28639 390
1da177e4
LT
391 return(map_addr);
392}
393
f9c0a39d
KC
394/*
395 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
396 * into memory at "addr". Memory from "p_filesz" through "p_memsz"
397 * rounded up to the next page is zeroed.
398 */
585a0186
EB
399static unsigned long elf_load(struct file *filep, unsigned long addr,
400 const struct elf_phdr *eppnt, int prot, int type,
401 unsigned long total_size)
402{
403 unsigned long zero_start, zero_end;
404 unsigned long map_addr;
405
406 if (eppnt->p_filesz) {
407 map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
408 if (BAD_ADDR(map_addr))
409 return map_addr;
410 if (eppnt->p_memsz > eppnt->p_filesz) {
411 zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
412 eppnt->p_filesz;
413 zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
414 eppnt->p_memsz;
415
f9c0a39d
KC
416 /*
417 * Zero the end of the last mapped page but ignore
418 * any errors if the segment isn't writable.
419 */
420 if (padzero(zero_start) && (prot & PROT_WRITE))
421 return -EFAULT;
585a0186
EB
422 }
423 } else {
424 map_addr = zero_start = ELF_PAGESTART(addr);
425 zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
426 eppnt->p_memsz;
427 }
428 if (eppnt->p_memsz > eppnt->p_filesz) {
429 /*
430 * Map the last of the segment.
431 * If the header is requesting these pages to be
432 * executable, honour that (ppc32 needs this).
433 */
434 int error;
435
436 zero_start = ELF_PAGEALIGN(zero_start);
437 zero_end = ELF_PAGEALIGN(zero_end);
438
439 error = vm_brk_flags(zero_start, zero_end - zero_start,
440 prot & PROT_EXEC ? VM_EXEC : 0);
441 if (error)
442 map_addr = error;
443 }
444 return map_addr;
445}
446
447
10b19249 448static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
cc503c1b 449{
10b19249
AD
450 elf_addr_t min_addr = -1;
451 elf_addr_t max_addr = 0;
452 bool pt_load = false;
453 int i;
cc503c1b
JK
454
455 for (i = 0; i < nr; i++) {
10b19249
AD
456 if (phdr[i].p_type == PT_LOAD) {
457 min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
458 max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
459 pt_load = true;
cc503c1b
JK
460 }
461 }
10b19249 462 return pt_load ? (max_addr - min_addr) : 0;
cc503c1b
JK
463}
464
658c0335
AD
465static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
466{
467 ssize_t rv;
468
469 rv = kernel_read(file, buf, len, &pos);
470 if (unlikely(rv != len)) {
471 return (rv < 0) ? rv : -EIO;
472 }
473 return 0;
474}
475
ce81bb25
CK
476static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
477{
478 unsigned long alignment = 0;
479 int i;
480
481 for (i = 0; i < nr; i++) {
482 if (cmds[i].p_type == PT_LOAD) {
483 unsigned long p_align = cmds[i].p_align;
484
485 /* skip non-power of two alignments as invalid */
486 if (!is_power_of_2(p_align))
487 continue;
488 alignment = max(alignment, p_align);
489 }
490 }
491
492 /* ensure we align to at least one page */
493 return ELF_PAGEALIGN(alignment);
494}
495
6a8d3894
PB
496/**
497 * load_elf_phdrs() - load ELF program headers
498 * @elf_ex: ELF header of the binary whose program headers should be loaded
499 * @elf_file: the opened ELF binary file
500 *
501 * Loads ELF program headers from the binary file elf_file, which has the ELF
502 * header pointed to by elf_ex, into a newly allocated array. The caller is
cfc46ca4 503 * responsible for freeing the allocated data. Returns NULL upon failure.
6a8d3894 504 */
49ac9819 505static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
6a8d3894
PB
506 struct file *elf_file)
507{
508 struct elf_phdr *elf_phdata = NULL;
ef20c513 509 int retval = -1;
faf1c315 510 unsigned int size;
6a8d3894
PB
511
512 /*
513 * If the size of this structure has changed, then punt, since
514 * we will be doing the wrong thing.
515 */
516 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
517 goto out;
518
519 /* Sanity check the number of program headers... */
6a8d3894
PB
520 /* ...and their total size. */
521 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
faf1c315 522 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
6a8d3894
PB
523 goto out;
524
525 elf_phdata = kmalloc(size, GFP_KERNEL);
526 if (!elf_phdata)
527 goto out;
528
529 /* Read in the program headers */
658c0335 530 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
6a8d3894 531
6a8d3894 532out:
ef20c513 533 if (retval) {
6a8d3894
PB
534 kfree(elf_phdata);
535 elf_phdata = NULL;
536 }
537 return elf_phdata;
538}
cc503c1b 539
774c105e
PB
540#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
541
542/**
543 * struct arch_elf_state - arch-specific ELF loading state
544 *
545 * This structure is used to preserve architecture specific data during
546 * the loading of an ELF file, throughout the checking of architecture
547 * specific ELF headers & through to the point where the ELF load is
548 * known to be proceeding (ie. SET_PERSONALITY).
549 *
550 * This implementation is a dummy for architectures which require no
551 * specific state.
552 */
553struct arch_elf_state {
554};
555
556#define INIT_ARCH_ELF_STATE {}
557
558/**
559 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
560 * @ehdr: The main ELF header
561 * @phdr: The program header to check
562 * @elf: The open ELF file
563 * @is_interp: True if the phdr is from the interpreter of the ELF being
564 * loaded, else false.
565 * @state: Architecture-specific state preserved throughout the process
566 * of loading the ELF.
567 *
568 * Inspects the program header phdr to validate its correctness and/or
569 * suitability for the system. Called once per ELF program header in the
570 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
571 * interpreter.
572 *
573 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
574 * with that return code.
575 */
576static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
577 struct elf_phdr *phdr,
578 struct file *elf, bool is_interp,
579 struct arch_elf_state *state)
580{
581 /* Dummy implementation, always proceed */
582 return 0;
583}
584
585/**
54d15714 586 * arch_check_elf() - check an ELF executable
774c105e
PB
587 * @ehdr: The main ELF header
588 * @has_interp: True if the ELF has an interpreter, else false.
eb4bc076 589 * @interp_ehdr: The interpreter's ELF header
774c105e
PB
590 * @state: Architecture-specific state preserved throughout the process
591 * of loading the ELF.
592 *
593 * Provides a final opportunity for architecture code to reject the loading
594 * of the ELF & cause an exec syscall to return an error. This is called after
595 * all program headers to be checked by arch_elf_pt_proc have been.
596 *
597 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
598 * with that return code.
599 */
600static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
eb4bc076 601 struct elfhdr *interp_ehdr,
774c105e
PB
602 struct arch_elf_state *state)
603{
604 /* Dummy implementation, always proceed */
605 return 0;
606}
607
608#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
cc503c1b 609
fe0f6766
DM
610static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
611 bool has_interp, bool is_interp)
d8e7cb39
AD
612{
613 int prot = 0;
614
615 if (p_flags & PF_R)
616 prot |= PROT_READ;
617 if (p_flags & PF_W)
618 prot |= PROT_WRITE;
619 if (p_flags & PF_X)
620 prot |= PROT_EXEC;
fe0f6766
DM
621
622 return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
d8e7cb39
AD
623}
624
1da177e4
LT
625/* This is much more generalized than the library routine read function,
626 so we keep this separate. Technically the library read function
627 is only provided so that we can read a.out libraries that have
628 an ELF header */
629
f4e5cc2c 630static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
81696d5d 631 struct file *interpreter,
fe0f6766
DM
632 unsigned long no_base, struct elf_phdr *interp_elf_phdata,
633 struct arch_elf_state *arch_state)
1da177e4 634{
1da177e4
LT
635 struct elf_phdr *eppnt;
636 unsigned long load_addr = 0;
637 int load_addr_set = 0;
1da177e4 638 unsigned long error = ~0UL;
cc503c1b 639 unsigned long total_size;
6a8d3894 640 int i;
1da177e4
LT
641
642 /* First of all, some simple consistency checks */
643 if (interp_elf_ex->e_type != ET_EXEC &&
644 interp_elf_ex->e_type != ET_DYN)
645 goto out;
4755200b
NP
646 if (!elf_check_arch(interp_elf_ex) ||
647 elf_check_fdpic(interp_elf_ex))
1da177e4 648 goto out;
72c2d531 649 if (!interpreter->f_op->mmap)
1da177e4
LT
650 goto out;
651
a9d9ef13
PB
652 total_size = total_mapping_size(interp_elf_phdata,
653 interp_elf_ex->e_phnum);
cc503c1b
JK
654 if (!total_size) {
655 error = -EINVAL;
a9d9ef13 656 goto out;
cc503c1b
JK
657 }
658
a9d9ef13 659 eppnt = interp_elf_phdata;
f4e5cc2c
JJ
660 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
661 if (eppnt->p_type == PT_LOAD) {
4589ff7c 662 int elf_type = MAP_PRIVATE;
fe0f6766
DM
663 int elf_prot = make_prot(eppnt->p_flags, arch_state,
664 true, true);
f4e5cc2c
JJ
665 unsigned long vaddr = 0;
666 unsigned long k, map_addr;
667
f4e5cc2c
JJ
668 vaddr = eppnt->p_vaddr;
669 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
9b2f72cc 670 elf_type |= MAP_FIXED;
cc503c1b
JK
671 else if (no_base && interp_elf_ex->e_type == ET_DYN)
672 load_addr = -vaddr;
f4e5cc2c 673
8b04d326 674 map_addr = elf_load(interpreter, load_addr + vaddr,
bb1ad820 675 eppnt, elf_prot, elf_type, total_size);
cc503c1b 676 total_size = 0;
f4e5cc2c
JJ
677 error = map_addr;
678 if (BAD_ADDR(map_addr))
a9d9ef13 679 goto out;
f4e5cc2c
JJ
680
681 if (!load_addr_set &&
682 interp_elf_ex->e_type == ET_DYN) {
683 load_addr = map_addr - ELF_PAGESTART(vaddr);
684 load_addr_set = 1;
685 }
686
687 /*
688 * Check to see if the section's size will overflow the
689 * allowed task size. Note that p_filesz must always be
690 * <= p_memsize so it's only necessary to check p_memsz.
691 */
692 k = load_addr + eppnt->p_vaddr;
ce51059b 693 if (BAD_ADDR(k) ||
f4e5cc2c
JJ
694 eppnt->p_filesz > eppnt->p_memsz ||
695 eppnt->p_memsz > TASK_SIZE ||
696 TASK_SIZE - eppnt->p_memsz < k) {
697 error = -ENOMEM;
a9d9ef13 698 goto out;
f4e5cc2c 699 }
f4e5cc2c 700 }
1da177e4
LT
701 }
702
cc503c1b 703 error = load_addr;
1da177e4
LT
704out:
705 return error;
706}
707
1da177e4
LT
708/*
709 * These are the functions used to load ELF style executables and shared
710 * libraries. There is no binary dependent code anywhere else.
711 */
712
00e19cee
DM
713static int parse_elf_property(const char *data, size_t *off, size_t datasz,
714 struct arch_elf_state *arch,
715 bool have_prev_type, u32 *prev_type)
716{
717 size_t o, step;
718 const struct gnu_property *pr;
719 int ret;
720
721 if (*off == datasz)
722 return -ENOENT;
723
724 if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
725 return -EIO;
726 o = *off;
727 datasz -= *off;
728
729 if (datasz < sizeof(*pr))
730 return -ENOEXEC;
731 pr = (const struct gnu_property *)(data + o);
732 o += sizeof(*pr);
733 datasz -= sizeof(*pr);
734
735 if (pr->pr_datasz > datasz)
736 return -ENOEXEC;
737
738 WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
739 step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
740 if (step > datasz)
741 return -ENOEXEC;
742
743 /* Properties are supposed to be unique and sorted on pr_type: */
744 if (have_prev_type && pr->pr_type <= *prev_type)
745 return -ENOEXEC;
746 *prev_type = pr->pr_type;
747
748 ret = arch_parse_elf_property(pr->pr_type, data + o,
749 pr->pr_datasz, ELF_COMPAT, arch);
750 if (ret)
751 return ret;
752
753 *off = o + step;
754 return 0;
755}
756
757#define NOTE_DATA_SZ SZ_1K
2fc4947b 758#define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0))
00e19cee
DM
759
760static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
761 struct arch_elf_state *arch)
762{
763 union {
764 struct elf_note nhdr;
765 char data[NOTE_DATA_SZ];
766 } note;
767 loff_t pos;
768 ssize_t n;
769 size_t off, datasz;
770 int ret;
771 bool have_prev_type;
772 u32 prev_type;
773
774 if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
775 return 0;
776
777 /* load_elf_binary() shouldn't call us unless this is true... */
778 if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
779 return -ENOEXEC;
780
781 /* If the properties are crazy large, that's too bad (for now): */
782 if (phdr->p_filesz > sizeof(note))
783 return -ENOEXEC;
784
785 pos = phdr->p_offset;
786 n = kernel_read(f, &note, phdr->p_filesz, &pos);
787
788 BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
789 if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
790 return -EIO;
791
792 if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
793 note.nhdr.n_namesz != NOTE_NAME_SZ ||
794 strncmp(note.data + sizeof(note.nhdr),
2fc4947b 795 NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr)))
00e19cee
DM
796 return -ENOEXEC;
797
798 off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
799 ELF_GNU_PROPERTY_ALIGN);
800 if (off > n)
801 return -ENOEXEC;
802
803 if (note.nhdr.n_descsz > n - off)
804 return -ENOEXEC;
805 datasz = off + note.nhdr.n_descsz;
806
807 have_prev_type = false;
808 do {
809 ret = parse_elf_property(note.data, &off, datasz, arch,
810 have_prev_type, &prev_type);
811 have_prev_type = true;
812 } while (!ret);
813
814 return ret == -ENOENT ? 0 : ret;
815}
816
71613c3b 817static int load_elf_binary(struct linux_binprm *bprm)
1da177e4
LT
818{
819 struct file *interpreter = NULL; /* to shut gcc up */
2b4bfbe0
AK
820 unsigned long load_bias = 0, phdr_addr = 0;
821 int first_pt_load = 1;
1da177e4 822 unsigned long error;
a9d9ef13 823 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
00e19cee 824 struct elf_phdr *elf_property_phdata = NULL;
8ed2ef21 825 unsigned long elf_brk;
11854fe2 826 bool brk_moved = false;
1da177e4 827 int retval, i;
cc503c1b 828 unsigned long elf_entry;
a62c5b1b 829 unsigned long e_entry;
cc503c1b 830 unsigned long interp_load_addr = 0;
1da177e4 831 unsigned long start_code, end_code, start_data, end_data;
1a530a6f 832 unsigned long reloc_func_desc __maybe_unused = 0;
8de61e69 833 int executable_stack = EXSTACK_DEFAULT;
a62c5b1b 834 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
0693ffeb 835 struct elfhdr *interp_elf_ex = NULL;
774c105e 836 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
03c6d723 837 struct mm_struct *mm;
249b08e4 838 struct pt_regs *regs;
1da177e4 839
1da177e4
LT
840 retval = -ENOEXEC;
841 /* First of all, some simple consistency checks */
a62c5b1b 842 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
1da177e4
LT
843 goto out;
844
a62c5b1b 845 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
1da177e4 846 goto out;
a62c5b1b 847 if (!elf_check_arch(elf_ex))
1da177e4 848 goto out;
a62c5b1b 849 if (elf_check_fdpic(elf_ex))
4755200b 850 goto out;
72c2d531 851 if (!bprm->file->f_op->mmap)
1da177e4
LT
852 goto out;
853
a62c5b1b 854 elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
1da177e4
LT
855 if (!elf_phdata)
856 goto out;
857
1da177e4 858 elf_ppnt = elf_phdata;
a62c5b1b 859 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
be0deb58 860 char *elf_interpreter;
1da177e4 861
00e19cee
DM
862 if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
863 elf_property_phdata = elf_ppnt;
864 continue;
865 }
866
be0deb58
AD
867 if (elf_ppnt->p_type != PT_INTERP)
868 continue;
1fb84496 869
be0deb58
AD
870 /*
871 * This is the program interpreter used for shared libraries -
872 * for now assume that this is an a.out format binary.
873 */
874 retval = -ENOEXEC;
875 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
876 goto out_free_ph;
1da177e4 877
be0deb58
AD
878 retval = -ENOMEM;
879 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
880 if (!elf_interpreter)
881 goto out_free_ph;
cc338010 882
658c0335
AD
883 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
884 elf_ppnt->p_offset);
885 if (retval < 0)
be0deb58 886 goto out_free_interp;
be0deb58
AD
887 /* make sure path is NULL terminated */
888 retval = -ENOEXEC;
889 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
890 goto out_free_interp;
891
892 interpreter = open_exec(elf_interpreter);
893 kfree(elf_interpreter);
894 retval = PTR_ERR(interpreter);
895 if (IS_ERR(interpreter))
cc338010 896 goto out_free_ph;
be0deb58
AD
897
898 /*
899 * If the binary is not readable then enforce mm->dumpable = 0
900 * regardless of the interpreter's permissions.
901 */
902 would_dump(bprm, interpreter);
903
0693ffeb
AD
904 interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
905 if (!interp_elf_ex) {
906 retval = -ENOMEM;
594d2a14 907 goto out_free_file;
0693ffeb
AD
908 }
909
be0deb58 910 /* Get the exec headers */
c69bcc93
AD
911 retval = elf_read(interpreter, interp_elf_ex,
912 sizeof(*interp_elf_ex), 0);
658c0335 913 if (retval < 0)
be0deb58 914 goto out_free_dentry;
be0deb58
AD
915
916 break;
917
918out_free_interp:
919 kfree(elf_interpreter);
920 goto out_free_ph;
1da177e4
LT
921 }
922
923 elf_ppnt = elf_phdata;
a62c5b1b 924 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
774c105e
PB
925 switch (elf_ppnt->p_type) {
926 case PT_GNU_STACK:
1da177e4
LT
927 if (elf_ppnt->p_flags & PF_X)
928 executable_stack = EXSTACK_ENABLE_X;
929 else
930 executable_stack = EXSTACK_DISABLE_X;
931 break;
774c105e
PB
932
933 case PT_LOPROC ... PT_HIPROC:
a62c5b1b 934 retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
774c105e
PB
935 bprm->file, false,
936 &arch_state);
937 if (retval)
938 goto out_free_dentry;
939 break;
1da177e4 940 }
1da177e4
LT
941
942 /* Some simple consistency checks for the interpreter */
cc338010 943 if (interpreter) {
1da177e4 944 retval = -ELIBBAD;
d20894a2 945 /* Not an ELF interpreter */
c69bcc93 946 if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
1da177e4 947 goto out_free_dentry;
1da177e4 948 /* Verify the interpreter has a valid arch */
c69bcc93
AD
949 if (!elf_check_arch(interp_elf_ex) ||
950 elf_check_fdpic(interp_elf_ex))
1da177e4 951 goto out_free_dentry;
a9d9ef13
PB
952
953 /* Load the interpreter program headers */
c69bcc93 954 interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
a9d9ef13
PB
955 interpreter);
956 if (!interp_elf_phdata)
957 goto out_free_dentry;
774c105e
PB
958
959 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
00e19cee 960 elf_property_phdata = NULL;
774c105e 961 elf_ppnt = interp_elf_phdata;
c69bcc93 962 for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
774c105e 963 switch (elf_ppnt->p_type) {
00e19cee
DM
964 case PT_GNU_PROPERTY:
965 elf_property_phdata = elf_ppnt;
966 break;
967
774c105e 968 case PT_LOPROC ... PT_HIPROC:
c69bcc93 969 retval = arch_elf_pt_proc(interp_elf_ex,
774c105e
PB
970 elf_ppnt, interpreter,
971 true, &arch_state);
972 if (retval)
973 goto out_free_dentry;
974 break;
975 }
1da177e4
LT
976 }
977
00e19cee
DM
978 retval = parse_elf_properties(interpreter ?: bprm->file,
979 elf_property_phdata, &arch_state);
980 if (retval)
981 goto out_free_dentry;
982
774c105e
PB
983 /*
984 * Allow arch code to reject the ELF at this point, whilst it's
985 * still possible to return an error to the code that invoked
986 * the exec syscall.
987 */
a62c5b1b 988 retval = arch_check_elf(elf_ex,
c69bcc93 989 !!interpreter, interp_elf_ex,
eb4bc076 990 &arch_state);
774c105e
PB
991 if (retval)
992 goto out_free_dentry;
993
1da177e4 994 /* Flush all traces of the currently running executable */
2388777a 995 retval = begin_new_exec(bprm);
1da177e4
LT
996 if (retval)
997 goto out_free_dentry;
998
1da177e4
LT
999 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
1000 may depend on the personality. */
a62c5b1b
AD
1001 SET_PERSONALITY2(*elf_ex, &arch_state);
1002 if (elf_read_implies_exec(*elf_ex, executable_stack))
1da177e4
LT
1003 current->personality |= READ_IMPLIES_EXEC;
1004
2a97388a
AD
1005 const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
1006 if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
1da177e4 1007 current->flags |= PF_RANDOMIZE;
221af7f8
LT
1008
1009 setup_new_exec(bprm);
1da177e4
LT
1010
1011 /* Do this so that we can load the interpreter, if need be. We will
1012 change some of these later */
1da177e4
LT
1013 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1014 executable_stack);
19d860a1 1015 if (retval < 0)
1da177e4 1016 goto out_free_dentry;
8f6e3f9e 1017
85264316
AD
1018 elf_brk = 0;
1019
1020 start_code = ~0UL;
1021 end_code = 0;
1022 start_data = 0;
1023 end_data = 0;
1024
af901ca1 1025 /* Now we do a little grungy work by mmapping the ELF image into
cc503c1b 1026 the correct location in memory. */
f4e5cc2c 1027 for(i = 0, elf_ppnt = elf_phdata;
a62c5b1b 1028 i < elf_ex->e_phnum; i++, elf_ppnt++) {
b212921b 1029 int elf_prot, elf_flags;
1da177e4 1030 unsigned long k, vaddr;
a87938b2 1031 unsigned long total_size = 0;
ce81bb25 1032 unsigned long alignment;
1da177e4
LT
1033
1034 if (elf_ppnt->p_type != PT_LOAD)
1035 continue;
1036
fe0f6766
DM
1037 elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1038 !!interpreter, false);
1da177e4 1039
4589ff7c 1040 elf_flags = MAP_PRIVATE;
1da177e4
LT
1041
1042 vaddr = elf_ppnt->p_vaddr;
eab09532 1043 /*
2b4bfbe0 1044 * The first time through the loop, first_pt_load is true:
5f501d55
KC
1045 * layout will be calculated. Once set, use MAP_FIXED since
1046 * we know we've already safely mapped the entire region with
1047 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
eab09532 1048 */
2b4bfbe0 1049 if (!first_pt_load) {
b212921b 1050 elf_flags |= MAP_FIXED;
5f501d55
KC
1051 } else if (elf_ex->e_type == ET_EXEC) {
1052 /*
1053 * This logic is run once for the first LOAD Program
1054 * Header for ET_EXEC binaries. No special handling
1055 * is needed.
1056 */
1057 elf_flags |= MAP_FIXED_NOREPLACE;
a62c5b1b 1058 } else if (elf_ex->e_type == ET_DYN) {
eab09532
KC
1059 /*
1060 * This logic is run once for the first LOAD Program
1061 * Header for ET_DYN binaries to calculate the
1062 * randomization (load_bias) for all the LOAD
5f501d55 1063 * Program Headers.
2d4cf7b1
KC
1064 */
1065
1066 /*
1067 * Calculate the entire size of the ELF mapping
1068 * (total_size), used for the initial mapping,
1069 * due to load_addr_set which is set to true later
1070 * once the initial mapping is performed.
1071 *
1072 * Note that this is only sensible when the LOAD
1073 * segments are contiguous (or overlapping). If
1074 * used for LOADs that are far apart, this would
1075 * cause the holes between LOADs to be mapped,
1076 * running the risk of having the mapping fail,
1077 * as it would be larger than the ELF file itself.
eab09532 1078 *
2d4cf7b1
KC
1079 * As a result, only ET_DYN does this, since
1080 * some ET_EXEC (e.g. ia64) may have large virtual
1081 * memory holes between LOADs.
1082 *
1083 */
1084 total_size = total_mapping_size(elf_phdata,
1085 elf_ex->e_phnum);
1086 if (!total_size) {
1087 retval = -EINVAL;
1088 goto out_free_dentry;
1089 }
1090
3545deff
KC
1091 /* Calculate any requested alignment. */
1092 alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1093
11854fe2
KC
1094 /**
1095 * DOC: PIE handling
1096 *
1097 * There are effectively two types of ET_DYN ELF
1098 * binaries: programs (i.e. PIE: ET_DYN with
1099 * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
1100 * without PT_INTERP, usually the ELF interpreter
1101 * itself). Loaders must be loaded away from programs
1102 * since the program may otherwise collide with the
1103 * loader (especially for ET_EXEC which does not have
1104 * a randomized position).
1105 *
1106 * For example, to handle invocations of
eab09532
KC
1107 * "./ld.so someprog" to test out a new version of
1108 * the loader, the subsequent program that the
1109 * loader loads must avoid the loader itself, so
1110 * they cannot share the same load range. Sufficient
1111 * room for the brk must be allocated with the
1112 * loader as well, since brk must be available with
1113 * the loader.
1114 *
1115 * Therefore, programs are loaded offset from
1116 * ELF_ET_DYN_BASE and loaders are loaded into the
1117 * independently randomized mmap region (0 load_bias
5f501d55 1118 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
11854fe2
KC
1119 *
1120 * See below for "brk" handling details, which is
1121 * also affected by program vs loader and ASLR.
eab09532 1122 */
aeb79237 1123 if (interpreter) {
3545deff 1124 /* On ET_DYN with PT_INTERP, we do the ASLR. */
eab09532
KC
1125 load_bias = ELF_ET_DYN_BASE;
1126 if (current->flags & PF_RANDOMIZE)
1127 load_bias += arch_mmap_rnd();
3545deff 1128 /* Adjust alignment as requested. */
ce81bb25
CK
1129 if (alignment)
1130 load_bias &= ~(alignment - 1);
5f501d55 1131 elf_flags |= MAP_FIXED_NOREPLACE;
3545deff
KC
1132 } else {
1133 /*
1134 * For ET_DYN without PT_INTERP, we rely on
1135 * the architectures's (potentially ASLR) mmap
1136 * base address (via a load_bias of 0).
1137 *
1138 * When a large alignment is requested, we
1139 * must do the allocation at address "0" right
1140 * now to discover where things will load so
1141 * that we can adjust the resulting alignment.
1142 * In this case (load_bias != 0), we can use
1143 * MAP_FIXED_NOREPLACE to make sure the mapping
1144 * doesn't collide with anything.
1145 */
1146 if (alignment > ELF_MIN_ALIGN) {
1147 load_bias = elf_load(bprm->file, 0, elf_ppnt,
1148 elf_prot, elf_flags, total_size);
1149 if (BAD_ADDR(load_bias)) {
1150 retval = IS_ERR_VALUE(load_bias) ?
1151 PTR_ERR((void*)load_bias) : -EINVAL;
1152 goto out_free_dentry;
1153 }
1154 vm_munmap(load_bias, total_size);
1155 /* Adjust alignment as requested. */
1156 if (alignment)
1157 load_bias &= ~(alignment - 1);
1158 elf_flags |= MAP_FIXED_NOREPLACE;
1159 } else
1160 load_bias = 0;
1161 }
eab09532
KC
1162
1163 /*
1164 * Since load_bias is used for all subsequent loading
1165 * calculations, we must lower it by the first vaddr
1166 * so that the remaining calculations based on the
1167 * ELF vaddrs will be correctly offset. The result
1168 * is then page aligned.
1169 */
1170 load_bias = ELF_PAGESTART(load_bias - vaddr);
1da177e4
LT
1171 }
1172
585a0186 1173 error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
a87938b2 1174 elf_prot, elf_flags, total_size);
1da177e4 1175 if (BAD_ADDR(error)) {
dc64cc12 1176 retval = IS_ERR_VALUE(error) ?
b140f251 1177 PTR_ERR((void*)error) : -EINVAL;
1da177e4
LT
1178 goto out_free_dentry;
1179 }
1180
2b4bfbe0
AK
1181 if (first_pt_load) {
1182 first_pt_load = 0;
a62c5b1b 1183 if (elf_ex->e_type == ET_DYN) {
1da177e4
LT
1184 load_bias += error -
1185 ELF_PAGESTART(load_bias + vaddr);
1da177e4
LT
1186 reloc_func_desc = load_bias;
1187 }
1188 }
0da1d500
AK
1189
1190 /*
1191 * Figure out which segment in the file contains the Program
1192 * Header table, and map to the associated memory address.
1193 */
1194 if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1195 elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1196 phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1197 elf_ppnt->p_vaddr;
1198 }
1199
1da177e4 1200 k = elf_ppnt->p_vaddr;
f67ef446 1201 if ((elf_ppnt->p_flags & PF_X) && k < start_code)
f4e5cc2c
JJ
1202 start_code = k;
1203 if (start_data < k)
1204 start_data = k;
1da177e4
LT
1205
1206 /*
1207 * Check to see if the section's size will overflow the
1208 * allowed task size. Note that p_filesz must always be
1209 * <= p_memsz so it is only necessary to check p_memsz.
1210 */
ce51059b 1211 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1da177e4
LT
1212 elf_ppnt->p_memsz > TASK_SIZE ||
1213 TASK_SIZE - elf_ppnt->p_memsz < k) {
f4e5cc2c 1214 /* set_brk can never work. Avoid overflows. */
b140f251 1215 retval = -EINVAL;
1da177e4
LT
1216 goto out_free_dentry;
1217 }
1218
1219 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1220
1da177e4
LT
1221 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1222 end_code = k;
1223 if (end_data < k)
1224 end_data = k;
1225 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
585a0186 1226 if (k > elf_brk)
1da177e4
LT
1227 elf_brk = k;
1228 }
1229
a62c5b1b 1230 e_entry = elf_ex->e_entry + load_bias;
0da1d500 1231 phdr_addr += load_bias;
1da177e4
LT
1232 elf_brk += load_bias;
1233 start_code += load_bias;
1234 end_code += load_bias;
1235 start_data += load_bias;
1236 end_data += load_bias;
1237
cc338010 1238 if (interpreter) {
c69bcc93 1239 elf_entry = load_elf_interp(interp_elf_ex,
d20894a2 1240 interpreter,
fe0f6766
DM
1241 load_bias, interp_elf_phdata,
1242 &arch_state);
dc64cc12 1243 if (!IS_ERR_VALUE(elf_entry)) {
d20894a2
AK
1244 /*
1245 * load_elf_interp() returns relocation
1246 * adjustment
1247 */
1248 interp_load_addr = elf_entry;
c69bcc93 1249 elf_entry += interp_elf_ex->e_entry;
cc503c1b 1250 }
1da177e4 1251 if (BAD_ADDR(elf_entry)) {
dc64cc12 1252 retval = IS_ERR_VALUE(elf_entry) ?
ce51059b 1253 (int)elf_entry : -EINVAL;
1da177e4
LT
1254 goto out_free_dentry;
1255 }
1256 reloc_func_desc = interp_load_addr;
1257
0357ef03 1258 exe_file_allow_write_access(interpreter);
1da177e4 1259 fput(interpreter);
0693ffeb
AD
1260
1261 kfree(interp_elf_ex);
aa0d1564 1262 kfree(interp_elf_phdata);
1da177e4 1263 } else {
a62c5b1b 1264 elf_entry = e_entry;
5342fba5 1265 if (BAD_ADDR(elf_entry)) {
ce51059b 1266 retval = -EINVAL;
5342fba5
SS
1267 goto out_free_dentry;
1268 }
1da177e4
LT
1269 }
1270
1271 kfree(elf_phdata);
1272
1da177e4
LT
1273 set_binfmt(&elf_format);
1274
547ee84c 1275#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
9a29a671 1276 retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
19d860a1 1277 if (retval < 0)
18c8baff 1278 goto out;
547ee84c
BH
1279#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1280
0da1d500
AK
1281 retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1282 e_entry, phdr_addr);
19d860a1 1283 if (retval < 0)
b6a2fea3 1284 goto out;
03c6d723
AD
1285
1286 mm = current->mm;
1287 mm->end_code = end_code;
1288 mm->start_code = start_code;
1289 mm->start_data = start_data;
1290 mm->end_data = end_data;
1291 mm->start_stack = bprm->p;
1da177e4 1292
11854fe2
KC
1293 /**
1294 * DOC: "brk" handling
1295 *
1296 * For architectures with ELF randomization, when executing a
1297 * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
1298 * move the brk area out of the mmap region and into the unused
1299 * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
1300 * early with the stack growing down or other regions being put
1301 * into the mmap region by the kernel (e.g. vdso).
1302 *
1303 * In the CONFIG_COMPAT_BRK case, though, everything is turned
1304 * off because we're not allowed to move the brk at all.
1305 */
1306 if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
1307 IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1308 elf_ex->e_type == ET_DYN && !interpreter) {
1309 elf_brk = ELF_ET_DYN_BASE;
1310 /* This counts as moving the brk, so let brk(2) know. */
1311 brk_moved = true;
1312 }
1313 mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
1314
1315 if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
bbdc6076 1316 /*
11854fe2
KC
1317 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
1318 * leave a gap between .bss and brk.
bbdc6076 1319 */
11854fe2 1320 if (!brk_moved)
2a5eb999 1321 mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
bbdc6076 1322
03c6d723 1323 mm->brk = mm->start_brk = arch_randomize_brk(mm);
11854fe2
KC
1324 brk_moved = true;
1325 }
1326
204db6ed 1327#ifdef compat_brk_randomized
11854fe2 1328 if (brk_moved)
4471a675
JK
1329 current->brk_randomized = 1;
1330#endif
c1d171a0 1331
1da177e4
LT
1332 if (current->personality & MMAP_PAGE_ZERO) {
1333 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1334 and some applications "depend" upon this behavior.
1335 Since we do not have the power to recompile these, we
f4e5cc2c 1336 emulate the SVr4 behavior. Sigh. */
6be5ceb0 1337 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1da177e4 1338 MAP_FIXED | MAP_PRIVATE, 0);
44f65d90
JX
1339
1340 retval = do_mseal(0, PAGE_SIZE, 0);
1341 if (retval)
1342 pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
1343 task_pid_nr(current), retval);
1da177e4
LT
1344 }
1345
249b08e4 1346 regs = current_pt_regs();
1da177e4
LT
1347#ifdef ELF_PLAT_INIT
1348 /*
1349 * The ABI may specify that certain registers be set up in special
1350 * ways (on i386 %edx is the address of a DT_FINI function, for
1351 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1352 * that the e_entry field is the address of the function descriptor
1353 * for the startup routine, rather than the address of the startup
1354 * routine itself. This macro performs whatever initialization to
1355 * the regs structure is required as well as any relocations to the
1356 * function descriptor entries when executing dynamically links apps.
1357 */
1358 ELF_PLAT_INIT(regs, reloc_func_desc);
1359#endif
1360
b8383831 1361 finalize_exec(bprm);
bc3d7bf6 1362 START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1da177e4
LT
1363 retval = 0;
1364out:
1da177e4
LT
1365 return retval;
1366
1367 /* error cleanup */
1368out_free_dentry:
0693ffeb 1369 kfree(interp_elf_ex);
a9d9ef13 1370 kfree(interp_elf_phdata);
594d2a14 1371out_free_file:
0357ef03 1372 exe_file_allow_write_access(interpreter);
1da177e4
LT
1373 if (interpreter)
1374 fput(interpreter);
1da177e4
LT
1375out_free_ph:
1376 kfree(elf_phdata);
1377 goto out;
1378}
1379
698ba7b5 1380#ifdef CONFIG_ELF_CORE
1da177e4
LT
1381/*
1382 * ELF core dumper
1383 *
1384 * Modelled on fs/exec.c:aout_core_dump()
1385 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1386 */
1da177e4 1387
1da177e4
LT
1388/* An ELF note in memory */
1389struct memelfnote
1390{
1391 const char *name;
1392 int type;
1393 unsigned int datasz;
1394 void *data;
1395};
1396
1397static int notesize(struct memelfnote *en)
1398{
1399 int sz;
1400
1401 sz = sizeof(struct elf_note);
1402 sz += roundup(strlen(en->name) + 1, 4);
1403 sz += roundup(en->datasz, 4);
1404
1405 return sz;
1406}
1407
ecc8c772 1408static int writenote(struct memelfnote *men, struct coredump_params *cprm)
d025c9db
AK
1409{
1410 struct elf_note en;
1da177e4
LT
1411 en.n_namesz = strlen(men->name) + 1;
1412 en.n_descsz = men->datasz;
1413 en.n_type = men->type;
1414
ecc8c772 1415 return dump_emit(cprm, &en, sizeof(en)) &&
22a8cb82
AV
1416 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1417 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1da177e4 1418}
1da177e4 1419
3aba481f 1420static void fill_elf_header(struct elfhdr *elf, int segs,
d3330cf0 1421 u16 machine, u32 flags)
1da177e4 1422{
6970c8ef
CG
1423 memset(elf, 0, sizeof(*elf));
1424
1da177e4
LT
1425 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1426 elf->e_ident[EI_CLASS] = ELF_CLASS;
1427 elf->e_ident[EI_DATA] = ELF_DATA;
1428 elf->e_ident[EI_VERSION] = EV_CURRENT;
1429 elf->e_ident[EI_OSABI] = ELF_OSABI;
1da177e4
LT
1430
1431 elf->e_type = ET_CORE;
3aba481f 1432 elf->e_machine = machine;
1da177e4 1433 elf->e_version = EV_CURRENT;
1da177e4 1434 elf->e_phoff = sizeof(struct elfhdr);
3aba481f 1435 elf->e_flags = flags;
1da177e4
LT
1436 elf->e_ehsize = sizeof(struct elfhdr);
1437 elf->e_phentsize = sizeof(struct elf_phdr);
1438 elf->e_phnum = segs;
1da177e4
LT
1439}
1440
8d6b5eee 1441static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1da177e4
LT
1442{
1443 phdr->p_type = PT_NOTE;
1444 phdr->p_offset = offset;
1445 phdr->p_vaddr = 0;
1446 phdr->p_paddr = 0;
1447 phdr->p_filesz = sz;
1448 phdr->p_memsz = 0;
1449 phdr->p_flags = 0;
60592fb6 1450 phdr->p_align = 4;
1da177e4
LT
1451}
1452
8f6e3f9e 1453static void fill_note(struct memelfnote *note, const char *name, int type,
1da177e4
LT
1454 unsigned int sz, void *data)
1455{
1456 note->name = name;
1457 note->type = type;
1458 note->datasz = sz;
1459 note->data = data;
1da177e4
LT
1460}
1461
1462/*
f4e5cc2c
JJ
1463 * fill up all the fields in prstatus from the given task struct, except
1464 * registers which need to be filled up separately.
1da177e4 1465 */
f2485a2d 1466static void fill_prstatus(struct elf_prstatus_common *prstatus,
f4e5cc2c 1467 struct task_struct *p, long signr)
1da177e4
LT
1468{
1469 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1470 prstatus->pr_sigpend = p->pending.signal.sig[0];
1471 prstatus->pr_sighold = p->blocked.sig[0];
3b34fc58
ON
1472 rcu_read_lock();
1473 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1474 rcu_read_unlock();
b488893a 1475 prstatus->pr_pid = task_pid_vnr(p);
b488893a
PE
1476 prstatus->pr_pgrp = task_pgrp_vnr(p);
1477 prstatus->pr_sid = task_session_vnr(p);
1da177e4 1478 if (thread_group_leader(p)) {
cd19c364 1479 struct task_cputime cputime;
f06febc9 1480
1da177e4 1481 /*
f06febc9
FM
1482 * This is the record for the group leader. It shows the
1483 * group-wide total, not its individual thread total.
1da177e4 1484 */
cd19c364 1485 thread_group_cputime(p, &cputime);
e2bb80d5
AB
1486 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1487 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1da177e4 1488 } else {
cd19c364 1489 u64 utime, stime;
6fac4829 1490
cd19c364 1491 task_cputime(p, &utime, &stime);
e2bb80d5
AB
1492 prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1493 prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1da177e4 1494 }
5613fda9 1495
e2bb80d5
AB
1496 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1497 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1da177e4
LT
1498}
1499
1500static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1501 struct mm_struct *mm)
1502{
c69e8d9c 1503 const struct cred *cred;
a84a5059 1504 unsigned int i, len;
2f064a59
PZ
1505 unsigned int state;
1506
1da177e4
LT
1507 /* first copy the parameters from user space */
1508 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1509
1510 len = mm->arg_end - mm->arg_start;
1511 if (len >= ELF_PRARGSZ)
1512 len = ELF_PRARGSZ-1;
1513 if (copy_from_user(&psinfo->pr_psargs,
1514 (const char __user *)mm->arg_start, len))
1515 return -EFAULT;
1516 for(i = 0; i < len; i++)
1517 if (psinfo->pr_psargs[i] == 0)
1518 psinfo->pr_psargs[i] = ' ';
1519 psinfo->pr_psargs[len] = 0;
1520
3b34fc58
ON
1521 rcu_read_lock();
1522 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1523 rcu_read_unlock();
b488893a 1524 psinfo->pr_pid = task_pid_vnr(p);
b488893a
PE
1525 psinfo->pr_pgrp = task_pgrp_vnr(p);
1526 psinfo->pr_sid = task_session_vnr(p);
1da177e4 1527
2f064a59
PZ
1528 state = READ_ONCE(p->__state);
1529 i = state ? ffz(~state) + 1 : 0;
1da177e4 1530 psinfo->pr_state = i;
55148548 1531 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1da177e4
LT
1532 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1533 psinfo->pr_nice = task_nice(p);
1534 psinfo->pr_flag = p->flags;
c69e8d9c
DH
1535 rcu_read_lock();
1536 cred = __task_cred(p);
ebc887b2
EB
1537 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1538 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
c69e8d9c 1539 rcu_read_unlock();
95af469c 1540 get_task_comm(psinfo->pr_fname, p);
2f064a59 1541
1da177e4
LT
1542 return 0;
1543}
1544
3aba481f
RM
1545static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1546{
1547 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1548 int i = 0;
1549 do
1550 i += 2;
1551 while (auxv[i - 2] != AT_NULL);
2fc4947b 1552 fill_note(note, NN_AUXV, NT_AUXV, i * sizeof(elf_addr_t), auxv);
3aba481f
RM
1553}
1554
49ae4d4b 1555static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
ae7795bc 1556 const kernel_siginfo_t *siginfo)
49ae4d4b 1557{
fa4751f4 1558 copy_siginfo_to_external(csigdata, siginfo);
2fc4947b 1559 fill_note(note, NN_SIGINFO, NT_SIGINFO, sizeof(*csigdata), csigdata);
49ae4d4b
DV
1560}
1561
2aa362c4
DV
1562/*
1563 * Format of NT_FILE note:
1564 *
1565 * long count -- how many files are mapped
1566 * long page_size -- units for file_ofs
1567 * array of [COUNT] elements of
1568 * long start
1569 * long end
1570 * long file_ofs
1571 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1572 */
390031c9 1573static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
2aa362c4 1574{
2aa362c4
DV
1575 unsigned count, size, names_ofs, remaining, n;
1576 user_long_t *data;
1577 user_long_t *start_end_ofs;
1578 char *name_base, *name_curpos;
390031c9 1579 int i;
2aa362c4
DV
1580
1581 /* *Estimated* file count and total data size needed */
390031c9 1582 count = cprm->vma_count;
60c9d92f
AD
1583 if (count > UINT_MAX / 64)
1584 return -EINVAL;
2aa362c4
DV
1585 size = count * 64;
1586
1587 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1588 alloc:
4bbf9c3b
AP
1589 /* paranoia check */
1590 if (size >= core_file_note_size_limit) {
1591 pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
1592 size);
72023656 1593 return -EINVAL;
4bbf9c3b 1594 }
2aa362c4 1595 size = round_up(size, PAGE_SIZE);
1fbede6e
AD
1596 /*
1597 * "size" can be 0 here legitimately.
1598 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1599 */
86a2bb5a
AD
1600 data = kvmalloc(size, GFP_KERNEL);
1601 if (ZERO_OR_NULL_PTR(data))
72023656 1602 return -ENOMEM;
2aa362c4
DV
1603
1604 start_end_ofs = data + 2;
1605 name_base = name_curpos = ((char *)data) + names_ofs;
1606 remaining = size - names_ofs;
1607 count = 0;
390031c9
EB
1608 for (i = 0; i < cprm->vma_count; i++) {
1609 struct core_vma_metadata *m = &cprm->vma_meta[i];
2aa362c4
DV
1610 struct file *file;
1611 const char *filename;
1612
390031c9 1613 file = m->file;
2aa362c4
DV
1614 if (!file)
1615 continue;
9bf39ab2 1616 filename = file_path(file, name_curpos, remaining);
2aa362c4
DV
1617 if (IS_ERR(filename)) {
1618 if (PTR_ERR(filename) == -ENAMETOOLONG) {
86a2bb5a 1619 kvfree(data);
2aa362c4
DV
1620 size = size * 5 / 4;
1621 goto alloc;
1622 }
1623 continue;
1624 }
1625
9bf39ab2 1626 /* file_path() fills at the end, move name down */
2aa362c4
DV
1627 /* n = strlen(filename) + 1: */
1628 n = (name_curpos + remaining) - filename;
1629 remaining = filename - name_curpos;
1630 memmove(name_curpos, filename, n);
1631 name_curpos += n;
1632
390031c9
EB
1633 *start_end_ofs++ = m->start;
1634 *start_end_ofs++ = m->end;
1635 *start_end_ofs++ = m->pgoff;
2aa362c4
DV
1636 count++;
1637 }
1638
1639 /* Now we know exact count of files, can store it */
1640 data[0] = count;
1641 data[1] = PAGE_SIZE;
1642 /*
03c6d723 1643 * Count usually is less than mm->map_count,
2aa362c4
DV
1644 * we need to move filenames down.
1645 */
390031c9 1646 n = cprm->vma_count - count;
2aa362c4
DV
1647 if (n != 0) {
1648 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1649 memmove(name_base - shift_bytes, name_base,
1650 name_curpos - name_base);
1651 name_curpos -= shift_bytes;
1652 }
1653
1654 size = name_curpos - (char *)data;
2fc4947b 1655 fill_note(note, NN_FILE, NT_FILE, size, data);
72023656 1656 return 0;
2aa362c4
DV
1657}
1658
4206d3aa
RM
1659#include <linux/regset.h>
1660
1661struct elf_thread_core_info {
1662 struct elf_thread_core_info *next;
1663 struct task_struct *task;
1664 struct elf_prstatus prstatus;
5e01fdff 1665 struct memelfnote notes[];
4206d3aa
RM
1666};
1667
1668struct elf_note_info {
1669 struct elf_thread_core_info *thread;
1670 struct memelfnote psinfo;
49ae4d4b 1671 struct memelfnote signote;
4206d3aa 1672 struct memelfnote auxv;
2aa362c4 1673 struct memelfnote files;
49ae4d4b 1674 user_siginfo_t csigdata;
4206d3aa
RM
1675 size_t size;
1676 int thread_notes;
1677};
1678
e92edb85 1679#ifdef CORE_DUMP_USE_REGSET
d31472b6
RM
1680/*
1681 * When a regset has a writeback hook, we call it on each thread before
1682 * dumping user memory. On register window machines, this makes sure the
1683 * user memory backing the register data is up to date before we read it.
1684 */
1685static void do_thread_regset_writeback(struct task_struct *task,
1686 const struct user_regset *regset)
1687{
1688 if (regset->writeback)
1689 regset->writeback(task, regset, 1);
1690}
1691
0953f65d 1692#ifndef PRSTATUS_SIZE
8a00dd00 1693#define PRSTATUS_SIZE sizeof(struct elf_prstatus)
0953f65d
L
1694#endif
1695
1696#ifndef SET_PR_FPVALID
8a00dd00 1697#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
0953f65d
L
1698#endif
1699
4206d3aa
RM
1700static int fill_thread_core_info(struct elf_thread_core_info *t,
1701 const struct user_regset_view *view,
dd664099 1702 long signr, struct elf_note_info *info)
4206d3aa 1703{
dd664099 1704 unsigned int note_iter, view_iter;
4206d3aa
RM
1705
1706 /*
1707 * NT_PRSTATUS is the one special case, because the regset data
1708 * goes into the pr_reg field inside the note contents, rather
aa88054b 1709 * than being the whole note contents. We fill the regset in here.
4206d3aa
RM
1710 * We assume that regset 0 is NT_PRSTATUS.
1711 */
f2485a2d 1712 fill_prstatus(&t->prstatus.common, t->task, signr);
8a00dd00 1713 regset_get(t->task, &view->regsets[0],
b4e9c954 1714 sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
4206d3aa 1715
2fc4947b 1716 fill_note(&t->notes[0], NN_PRSTATUS, NT_PRSTATUS,
8a00dd00 1717 PRSTATUS_SIZE, &t->prstatus);
dd664099 1718 info->size += notesize(&t->notes[0]);
4206d3aa 1719
d31472b6
RM
1720 do_thread_regset_writeback(t->task, &view->regsets[0]);
1721
4206d3aa
RM
1722 /*
1723 * Each other regset might generate a note too. For each regset
dd664099 1724 * that has no core_note_type or is inactive, skip it.
4206d3aa 1725 */
dd664099
RE
1726 note_iter = 1;
1727 for (view_iter = 1; view_iter < view->n; ++view_iter) {
1728 const struct user_regset *regset = &view->regsets[view_iter];
b4e9c954
AV
1729 int note_type = regset->core_note_type;
1730 bool is_fpreg = note_type == NT_PRFPREG;
1731 void *data;
1732 int ret;
1733
d31472b6 1734 do_thread_regset_writeback(t->task, regset);
b4e9c954
AV
1735 if (!note_type) // not for coredumps
1736 continue;
1737 if (regset->active && regset->active(t->task, regset) <= 0)
1738 continue;
1739
1740 ret = regset_get_alloc(t->task, regset, ~0U, &data);
1741 if (ret < 0)
1742 continue;
1743
dd664099
RE
1744 if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1745 break;
1746
b4e9c954 1747 if (is_fpreg)
8a00dd00 1748 SET_PR_FPVALID(&t->prstatus);
b4e9c954 1749
2fc4947b 1750 fill_note(&t->notes[note_iter], is_fpreg ? NN_PRFPREG : "LINUX",
b4e9c954
AV
1751 note_type, ret, data);
1752
dd664099
RE
1753 info->size += notesize(&t->notes[note_iter]);
1754 note_iter++;
4206d3aa
RM
1755 }
1756
1757 return 1;
1758}
e92edb85
AV
1759#else
1760static int fill_thread_core_info(struct elf_thread_core_info *t,
1761 const struct user_regset_view *view,
1762 long signr, struct elf_note_info *info)
1763{
1764 struct task_struct *p = t->task;
1765 elf_fpregset_t *fpu;
1766
1767 fill_prstatus(&t->prstatus.common, p, signr);
1768 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1769
2fc4947b 1770 fill_note(&t->notes[0], NN_PRSTATUS, NT_PRSTATUS, sizeof(t->prstatus),
e92edb85
AV
1771 &(t->prstatus));
1772 info->size += notesize(&t->notes[0]);
1773
1774 fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
1775 if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1776 kfree(fpu);
1777 return 1;
1778 }
1779
1780 t->prstatus.pr_fpvalid = 1;
2fc4947b 1781 fill_note(&t->notes[1], NN_PRFPREG, NT_PRFPREG, sizeof(*fpu), fpu);
e92edb85
AV
1782 info->size += notesize(&t->notes[1]);
1783
1784 return 1;
1785}
1786#endif
4206d3aa
RM
1787
1788static int fill_note_info(struct elfhdr *elf, int phdrs,
1789 struct elf_note_info *info,
9ec7d323 1790 struct coredump_params *cprm)
4206d3aa
RM
1791{
1792 struct task_struct *dump_task = current;
e92edb85 1793 const struct user_regset_view *view;
4206d3aa
RM
1794 struct elf_thread_core_info *t;
1795 struct elf_prpsinfo *psinfo;
83914441 1796 struct core_thread *ct;
4206d3aa
RM
1797
1798 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
922ef161 1799 if (!psinfo)
4206d3aa 1800 return 0;
2fc4947b 1801 fill_note(&info->psinfo, NN_PRPSINFO, NT_PRPSINFO, sizeof(*psinfo), psinfo);
e2dbe125 1802
e92edb85
AV
1803#ifdef CORE_DUMP_USE_REGSET
1804 view = task_user_regset_view(dump_task);
1805
4206d3aa
RM
1806 /*
1807 * Figure out how many notes we're going to need for each thread.
1808 */
1809 info->thread_notes = 0;
922ef161 1810 for (int i = 0; i < view->n; ++i)
4206d3aa
RM
1811 if (view->regsets[i].core_note_type != 0)
1812 ++info->thread_notes;
1813
1814 /*
1815 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1816 * since it is our one special case.
1817 */
1818 if (unlikely(info->thread_notes == 0) ||
1819 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1820 WARN_ON(1);
1821 return 0;
1822 }
1823
1824 /*
1825 * Initialize the ELF file header.
1826 */
1827 fill_elf_header(elf, phdrs,
d3330cf0 1828 view->e_machine, view->e_flags);
e92edb85
AV
1829#else
1830 view = NULL;
1831 info->thread_notes = 2;
1832 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1833#endif
4206d3aa
RM
1834
1835 /*
1836 * Allocate a structure for each thread.
1837 */
4b0e21d6
AV
1838 info->thread = kzalloc(offsetof(struct elf_thread_core_info,
1839 notes[info->thread_notes]),
1840 GFP_KERNEL);
1841 if (unlikely(!info->thread))
1842 return 0;
1843
1844 info->thread->task = dump_task;
1845 for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
83914441
ON
1846 t = kzalloc(offsetof(struct elf_thread_core_info,
1847 notes[info->thread_notes]),
1848 GFP_KERNEL);
1849 if (unlikely(!t))
1850 return 0;
1851
1852 t->task = ct->task;
4b0e21d6
AV
1853 t->next = info->thread->next;
1854 info->thread->next = t;
83914441 1855 }
4206d3aa
RM
1856
1857 /*
1858 * Now fill in each thread's information.
1859 */
1860 for (t = info->thread; t != NULL; t = t->next)
dd664099 1861 if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
4206d3aa
RM
1862 return 0;
1863
1864 /*
1865 * Fill in the two process-wide notes.
1866 */
1867 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1868 info->size += notesize(&info->psinfo);
1869
9ec7d323 1870 fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
49ae4d4b
DV
1871 info->size += notesize(&info->signote);
1872
4206d3aa
RM
1873 fill_auxv_note(&info->auxv, current->mm);
1874 info->size += notesize(&info->auxv);
1875
390031c9 1876 if (fill_files_note(&info->files, cprm) == 0)
72023656 1877 info->size += notesize(&info->files);
2aa362c4 1878
4206d3aa
RM
1879 return 1;
1880}
1881
4206d3aa
RM
1882/*
1883 * Write all the notes for each thread. When writing the first thread, the
1884 * process-wide notes are interleaved after the first thread-specific note.
1885 */
1886static int write_note_info(struct elf_note_info *info,
ecc8c772 1887 struct coredump_params *cprm)
4206d3aa 1888{
b219e25f 1889 bool first = true;
4206d3aa
RM
1890 struct elf_thread_core_info *t = info->thread;
1891
1892 do {
1893 int i;
1894
ecc8c772 1895 if (!writenote(&t->notes[0], cprm))
4206d3aa
RM
1896 return 0;
1897
ecc8c772 1898 if (first && !writenote(&info->psinfo, cprm))
4206d3aa 1899 return 0;
ecc8c772 1900 if (first && !writenote(&info->signote, cprm))
49ae4d4b 1901 return 0;
ecc8c772 1902 if (first && !writenote(&info->auxv, cprm))
4206d3aa 1903 return 0;
72023656 1904 if (first && info->files.data &&
ecc8c772 1905 !writenote(&info->files, cprm))
2aa362c4 1906 return 0;
4206d3aa
RM
1907
1908 for (i = 1; i < info->thread_notes; ++i)
1909 if (t->notes[i].data &&
ecc8c772 1910 !writenote(&t->notes[i], cprm))
4206d3aa
RM
1911 return 0;
1912
b219e25f 1913 first = false;
4206d3aa
RM
1914 t = t->next;
1915 } while (t);
1916
1917 return 1;
1918}
1919
1920static void free_note_info(struct elf_note_info *info)
1921{
1922 struct elf_thread_core_info *threads = info->thread;
1923 while (threads) {
1924 unsigned int i;
1925 struct elf_thread_core_info *t = threads;
1926 threads = t->next;
1927 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1928 for (i = 1; i < info->thread_notes; ++i)
6b839b3b 1929 kvfree(t->notes[i].data);
4206d3aa
RM
1930 kfree(t);
1931 }
1932 kfree(info->psinfo.data);
86a2bb5a 1933 kvfree(info->files.data);
4206d3aa
RM
1934}
1935
8d9032bb
DH
1936static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1937 elf_addr_t e_shoff, int segs)
1938{
1939 elf->e_shoff = e_shoff;
1940 elf->e_shentsize = sizeof(*shdr4extnum);
1941 elf->e_shnum = 1;
1942 elf->e_shstrndx = SHN_UNDEF;
1943
1944 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1945
1946 shdr4extnum->sh_type = SHT_NULL;
1947 shdr4extnum->sh_size = elf->e_shnum;
1948 shdr4extnum->sh_link = elf->e_shstrndx;
1949 shdr4extnum->sh_info = segs;
1950}
1951
1da177e4
LT
1952/*
1953 * Actual dumper
1954 *
1955 * This is a two-pass process; first we find the offsets of the bits,
1956 * and then they are actually written out. If we run out of core limit
1957 * we just truncate.
1958 */
f6151dfe 1959static int elf_core_dump(struct coredump_params *cprm)
1da177e4 1960{
1da177e4 1961 int has_dumped = 0;
95c5436a 1962 int segs, i;
225a3f53 1963 struct elfhdr elf;
cdc3d562 1964 loff_t offset = 0, dataoff;
72023656 1965 struct elf_note_info info = { };
93eb211e 1966 struct elf_phdr *phdr4note = NULL;
8d9032bb
DH
1967 struct elf_shdr *shdr4extnum = NULL;
1968 Elf_Half e_phnum;
1969 elf_addr_t e_shoff;
1da177e4 1970
341c87bf
KH
1971 /*
1972 * The number of segs are recored into ELF header as 16bit value.
1973 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1974 */
19e183b5 1975 segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
f47aef55 1976
8d9032bb
DH
1977 /* for notes section */
1978 segs++;
1979
1980 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1981 * this, kernel supports extended numbering. Have a look at
1982 * include/linux/elf.h for further information. */
1983 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1984
1da177e4 1985 /*
3aba481f
RM
1986 * Collect all the non-memory information about the process for the
1987 * notes. This also sets up the file header.
1da177e4 1988 */
a78282e2 1989 if (!fill_note_info(&elf, e_phnum, &info, cprm))
d2530b43 1990 goto end_coredump;
1da177e4 1991
3aba481f 1992 has_dumped = 1;
079148b9 1993
70e79866 1994 offset += sizeof(elf); /* ELF header */
8d9032bb 1995 offset += segs * sizeof(struct elf_phdr); /* Program headers */
1da177e4
LT
1996
1997 /* Write notes phdr entry */
1998 {
38ba2f11 1999 size_t sz = info.size;
1da177e4 2000
ba386777 2001 /* For cell spufs and x86 xstate */
e5501492 2002 sz += elf_coredump_extra_notes_size();
bf1ab978 2003
93eb211e 2004 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
a78282e2 2005 if (!phdr4note)
088e7af7 2006 goto end_coredump;
93eb211e
DH
2007
2008 fill_elf_note_phdr(phdr4note, sz, offset);
2009 offset += sz;
1da177e4
LT
2010 }
2011
1da177e4
LT
2012 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2013
95c5436a 2014 offset += cprm->vma_data_size;
19e183b5 2015 offset += elf_core_extra_data_size(cprm);
8d9032bb
DH
2016 e_shoff = offset;
2017
2018 if (e_phnum == PN_XNUM) {
2019 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
a78282e2 2020 if (!shdr4extnum)
8d9032bb 2021 goto end_coredump;
225a3f53 2022 fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
8d9032bb
DH
2023 }
2024
2025 offset = dataoff;
2026
a78282e2 2027 if (!dump_emit(cprm, &elf, sizeof(elf)))
93eb211e
DH
2028 goto end_coredump;
2029
a78282e2 2030 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
93eb211e
DH
2031 goto end_coredump;
2032
1da177e4 2033 /* Write program headers for segments dump */
95c5436a
EB
2034 for (i = 0; i < cprm->vma_count; i++) {
2035 struct core_vma_metadata *meta = cprm->vma_meta + i;
1da177e4 2036 struct elf_phdr phdr;
1da177e4
LT
2037
2038 phdr.p_type = PT_LOAD;
2039 phdr.p_offset = offset;
a07279c9 2040 phdr.p_vaddr = meta->start;
1da177e4 2041 phdr.p_paddr = 0;
a07279c9
JH
2042 phdr.p_filesz = meta->dump_size;
2043 phdr.p_memsz = meta->end - meta->start;
1da177e4 2044 offset += phdr.p_filesz;
a07279c9
JH
2045 phdr.p_flags = 0;
2046 if (meta->flags & VM_READ)
2047 phdr.p_flags |= PF_R;
2048 if (meta->flags & VM_WRITE)
f4e5cc2c 2049 phdr.p_flags |= PF_W;
a07279c9 2050 if (meta->flags & VM_EXEC)
f4e5cc2c 2051 phdr.p_flags |= PF_X;
1da177e4
LT
2052 phdr.p_align = ELF_EXEC_PAGESIZE;
2053
a78282e2 2054 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
088e7af7 2055 goto end_coredump;
1da177e4
LT
2056 }
2057
a78282e2 2058 if (!elf_core_write_extra_phdrs(cprm, offset))
1fcccbac 2059 goto end_coredump;
1da177e4 2060
8f6e3f9e 2061 /* write out the notes section */
a78282e2 2062 if (!write_note_info(&info, cprm))
3aba481f 2063 goto end_coredump;
1da177e4 2064
ba386777 2065 /* For cell spufs and x86 xstate */
a78282e2 2066 if (elf_coredump_extra_notes_write(cprm))
e5501492 2067 goto end_coredump;
bf1ab978 2068
d025c9db 2069 /* Align to page */
d0f1088b 2070 dump_skip_to(cprm, dataoff);
1da177e4 2071
95c5436a
EB
2072 for (i = 0; i < cprm->vma_count; i++) {
2073 struct core_vma_metadata *meta = cprm->vma_meta + i;
a07279c9 2074
a78282e2 2075 if (!dump_user_range(cprm, meta->start, meta->dump_size))
afc63a97 2076 goto end_coredump;
1da177e4
LT
2077 }
2078
a78282e2 2079 if (!elf_core_write_extra_data(cprm))
1fcccbac 2080 goto end_coredump;
1da177e4 2081
8d9032bb 2082 if (e_phnum == PN_XNUM) {
a78282e2 2083 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
8d9032bb
DH
2084 goto end_coredump;
2085 }
2086
1da177e4 2087end_coredump:
3aba481f 2088 free_note_info(&info);
8d9032bb 2089 kfree(shdr4extnum);
93eb211e 2090 kfree(phdr4note);
1da177e4 2091 return has_dumped;
1da177e4
LT
2092}
2093
698ba7b5 2094#endif /* CONFIG_ELF_CORE */
1da177e4
LT
2095
2096static int __init init_elf_binfmt(void)
2097{
8fc3dc5a
AV
2098 register_binfmt(&elf_format);
2099 return 0;
1da177e4
LT
2100}
2101
2102static void __exit exit_elf_binfmt(void)
2103{
2104 /* Remove the COFF and ELF loaders. */
2105 unregister_binfmt(&elf_format);
2106}
2107
2108core_initcall(init_elf_binfmt);
2109module_exit(exit_elf_binfmt);
9e1a3ce0
KC
2110
2111#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
b6f5ee4d 2112#include "tests/binfmt_elf_kunit.c"
9e1a3ce0 2113#endif