fs/epoll: make nesting accounting safe for -rt kernel
[linux-block.git] / fs / binfmt_elf.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * linux/fs/binfmt_elf.c
4 *
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines. Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
9 *
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/fs.h>
1da177e4
LT
16#include <linux/mm.h>
17#include <linux/mman.h>
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/signal.h>
20#include <linux/binfmts.h>
21#include <linux/string.h>
22#include <linux/file.h>
1da177e4 23#include <linux/slab.h>
1da177e4
LT
24#include <linux/personality.h>
25#include <linux/elfcore.h>
26#include <linux/init.h>
27#include <linux/highuid.h>
1da177e4
LT
28#include <linux/compiler.h>
29#include <linux/highmem.h>
03911132 30#include <linux/hugetlb.h>
1da177e4 31#include <linux/pagemap.h>
2aa362c4 32#include <linux/vmalloc.h>
1da177e4 33#include <linux/security.h>
1da177e4 34#include <linux/random.h>
f4e5cc2c 35#include <linux/elf.h>
d1fd836d 36#include <linux/elf-randomize.h>
7e80d0d0 37#include <linux/utsname.h>
088e7af7 38#include <linux/coredump.h>
6fac4829 39#include <linux/sched.h>
f7ccbae4 40#include <linux/sched/coredump.h>
68db0cf1 41#include <linux/sched/task_stack.h>
32ef5517 42#include <linux/sched/cputime.h>
5b825c3a 43#include <linux/cred.h>
5037835c 44#include <linux/dax.h>
7c0f6ba6 45#include <linux/uaccess.h>
1da177e4
LT
46#include <asm/param.h>
47#include <asm/page.h>
48
2aa362c4
DV
49#ifndef user_long_t
50#define user_long_t long
51#endif
49ae4d4b
DV
52#ifndef user_siginfo_t
53#define user_siginfo_t siginfo_t
54#endif
55
4755200b
NP
56/* That's for binfmt_elf_fdpic to deal with */
57#ifndef elf_check_fdpic
58#define elf_check_fdpic(ex) false
59#endif
60
71613c3b 61static int load_elf_binary(struct linux_binprm *bprm);
1da177e4 62
69369a70
JT
63#ifdef CONFIG_USELIB
64static int load_elf_library(struct file *);
65#else
66#define load_elf_library NULL
67#endif
68
1da177e4
LT
69/*
70 * If we don't support core dumping, then supply a NULL so we
71 * don't even try.
72 */
698ba7b5 73#ifdef CONFIG_ELF_CORE
f6151dfe 74static int elf_core_dump(struct coredump_params *cprm);
1da177e4
LT
75#else
76#define elf_core_dump NULL
77#endif
78
79#if ELF_EXEC_PAGESIZE > PAGE_SIZE
f4e5cc2c 80#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
1da177e4 81#else
f4e5cc2c 82#define ELF_MIN_ALIGN PAGE_SIZE
1da177e4
LT
83#endif
84
85#ifndef ELF_CORE_EFLAGS
86#define ELF_CORE_EFLAGS 0
87#endif
88
89#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
90#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
91#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
92
93static struct linux_binfmt elf_format = {
f670d0ec
MP
94 .module = THIS_MODULE,
95 .load_binary = load_elf_binary,
96 .load_shlib = load_elf_library,
97 .core_dump = elf_core_dump,
98 .min_coredump = ELF_EXEC_PAGESIZE,
1da177e4
LT
99};
100
18676ffc 101#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
1da177e4 102
16e72e9b 103static int set_brk(unsigned long start, unsigned long end, int prot)
1da177e4
LT
104{
105 start = ELF_PAGEALIGN(start);
106 end = ELF_PAGEALIGN(end);
107 if (end > start) {
16e72e9b
DV
108 /*
109 * Map the last of the bss segment.
110 * If the header is requesting these pages to be
111 * executable, honour that (ppc32 needs this).
112 */
113 int error = vm_brk_flags(start, end - start,
114 prot & PROT_EXEC ? VM_EXEC : 0);
5d22fc25
LT
115 if (error)
116 return error;
1da177e4
LT
117 }
118 current->mm->start_brk = current->mm->brk = end;
119 return 0;
120}
121
1da177e4
LT
122/* We need to explicitly zero any fractional pages
123 after the data section (i.e. bss). This would
124 contain the junk from the file that should not
f4e5cc2c
JJ
125 be in memory
126 */
1da177e4
LT
127static int padzero(unsigned long elf_bss)
128{
129 unsigned long nbyte;
130
131 nbyte = ELF_PAGEOFFSET(elf_bss);
132 if (nbyte) {
133 nbyte = ELF_MIN_ALIGN - nbyte;
134 if (clear_user((void __user *) elf_bss, nbyte))
135 return -EFAULT;
136 }
137 return 0;
138}
139
09c6dd3c 140/* Let's use some macros to make this stack manipulation a little clearer */
1da177e4
LT
141#ifdef CONFIG_STACK_GROWSUP
142#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
143#define STACK_ROUND(sp, items) \
144 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
f4e5cc2c
JJ
145#define STACK_ALLOC(sp, len) ({ \
146 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
147 old_sp; })
1da177e4
LT
148#else
149#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
150#define STACK_ROUND(sp, items) \
151 (((unsigned long) (sp - items)) &~ 15UL)
152#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
153#endif
154
483fad1c
NL
155#ifndef ELF_BASE_PLATFORM
156/*
157 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
158 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
159 * will be copied to the user stack in the same manner as AT_PLATFORM.
160 */
161#define ELF_BASE_PLATFORM NULL
162#endif
163
1da177e4 164static int
a62c5b1b
AD
165create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
166 unsigned long load_addr, unsigned long interp_load_addr,
167 unsigned long e_entry)
1da177e4 168{
03c6d723 169 struct mm_struct *mm = current->mm;
1da177e4
LT
170 unsigned long p = bprm->p;
171 int argc = bprm->argc;
172 int envc = bprm->envc;
1da177e4
LT
173 elf_addr_t __user *sp;
174 elf_addr_t __user *u_platform;
483fad1c 175 elf_addr_t __user *u_base_platform;
f06295b4 176 elf_addr_t __user *u_rand_bytes;
1da177e4 177 const char *k_platform = ELF_PLATFORM;
483fad1c 178 const char *k_base_platform = ELF_BASE_PLATFORM;
f06295b4 179 unsigned char k_rand_bytes[16];
1da177e4
LT
180 int items;
181 elf_addr_t *elf_info;
1f83d806 182 int ei_index;
86a264ab 183 const struct cred *cred = current_cred();
b6a2fea3 184 struct vm_area_struct *vma;
1da177e4 185
d68c9d6a
FBH
186 /*
187 * In some cases (e.g. Hyper-Threading), we want to avoid L1
188 * evictions by the processes running on the same package. One
189 * thing we can do is to shuffle the initial stack for them.
190 */
191
192 p = arch_align_stack(p);
193
1da177e4
LT
194 /*
195 * If this architecture has a platform capability string, copy it
196 * to userspace. In some cases (Sparc), this info is impossible
197 * for userspace to get any other way, in others (i386) it is
198 * merely difficult.
199 */
1da177e4
LT
200 u_platform = NULL;
201 if (k_platform) {
202 size_t len = strlen(k_platform) + 1;
203
1da177e4
LT
204 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
205 if (__copy_to_user(u_platform, k_platform, len))
206 return -EFAULT;
207 }
208
483fad1c
NL
209 /*
210 * If this architecture has a "base" platform capability
211 * string, copy it to userspace.
212 */
213 u_base_platform = NULL;
214 if (k_base_platform) {
215 size_t len = strlen(k_base_platform) + 1;
216
217 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
218 if (__copy_to_user(u_base_platform, k_base_platform, len))
219 return -EFAULT;
220 }
221
f06295b4
KC
222 /*
223 * Generate 16 random bytes for userspace PRNG seeding.
224 */
225 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
226 u_rand_bytes = (elf_addr_t __user *)
227 STACK_ALLOC(p, sizeof(k_rand_bytes));
228 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
229 return -EFAULT;
230
1da177e4 231 /* Create the ELF interpreter info */
03c6d723 232 elf_info = (elf_addr_t *)mm->saved_auxv;
4f9a58d7 233 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
1da177e4 234#define NEW_AUX_ENT(id, val) \
f4e5cc2c 235 do { \
1f83d806
AD
236 *elf_info++ = id; \
237 *elf_info++ = val; \
f4e5cc2c 238 } while (0)
1da177e4
LT
239
240#ifdef ARCH_DLINFO
241 /*
242 * ARCH_DLINFO must come first so PPC can do its special alignment of
243 * AUXV.
4f9a58d7
OH
244 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
245 * ARCH_DLINFO changes
1da177e4
LT
246 */
247 ARCH_DLINFO;
248#endif
249 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
250 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
251 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
252 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
f4e5cc2c 253 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
1da177e4
LT
254 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
255 NEW_AUX_ENT(AT_BASE, interp_load_addr);
256 NEW_AUX_ENT(AT_FLAGS, 0);
a62c5b1b 257 NEW_AUX_ENT(AT_ENTRY, e_entry);
ebc887b2
EB
258 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
259 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
260 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
261 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
c425e189 262 NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
f06295b4 263 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
2171364d
MN
264#ifdef ELF_HWCAP2
265 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
266#endif
65191087 267 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
1da177e4 268 if (k_platform) {
f4e5cc2c 269 NEW_AUX_ENT(AT_PLATFORM,
785d5570 270 (elf_addr_t)(unsigned long)u_platform);
1da177e4 271 }
483fad1c
NL
272 if (k_base_platform) {
273 NEW_AUX_ENT(AT_BASE_PLATFORM,
274 (elf_addr_t)(unsigned long)u_base_platform);
275 }
1da177e4 276 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
785d5570 277 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
1da177e4
LT
278 }
279#undef NEW_AUX_ENT
280 /* AT_NULL is zero; clear the rest too */
03c6d723
AD
281 memset(elf_info, 0, (char *)mm->saved_auxv +
282 sizeof(mm->saved_auxv) - (char *)elf_info);
1da177e4
LT
283
284 /* And advance past the AT_NULL entry. */
1f83d806 285 elf_info += 2;
1da177e4 286
03c6d723 287 ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
1da177e4
LT
288 sp = STACK_ADD(p, ei_index);
289
d20894a2 290 items = (argc + 1) + (envc + 1) + 1;
1da177e4
LT
291 bprm->p = STACK_ROUND(sp, items);
292
293 /* Point sp at the lowest address on the stack */
294#ifdef CONFIG_STACK_GROWSUP
295 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
f4e5cc2c 296 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
1da177e4
LT
297#else
298 sp = (elf_addr_t __user *)bprm->p;
299#endif
300
b6a2fea3
OW
301
302 /*
303 * Grow the stack manually; some architectures have a limit on how
304 * far ahead a user-space access may be in order to grow the stack.
305 */
03c6d723 306 vma = find_extend_vma(mm, bprm->p);
b6a2fea3
OW
307 if (!vma)
308 return -EFAULT;
309
1da177e4
LT
310 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
311 if (__put_user(argc, sp++))
312 return -EFAULT;
1da177e4 313
67c6777a 314 /* Populate list of argv pointers back to argv strings. */
03c6d723 315 p = mm->arg_end = mm->arg_start;
1da177e4
LT
316 while (argc-- > 0) {
317 size_t len;
67c6777a 318 if (__put_user((elf_addr_t)p, sp++))
841d5fb7 319 return -EFAULT;
b6a2fea3
OW
320 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
321 if (!len || len > MAX_ARG_STRLEN)
23c4971e 322 return -EINVAL;
1da177e4
LT
323 p += len;
324 }
67c6777a 325 if (__put_user(0, sp++))
1da177e4 326 return -EFAULT;
03c6d723 327 mm->arg_end = p;
67c6777a
KC
328
329 /* Populate list of envp pointers back to envp strings. */
03c6d723 330 mm->env_end = mm->env_start = p;
1da177e4
LT
331 while (envc-- > 0) {
332 size_t len;
67c6777a 333 if (__put_user((elf_addr_t)p, sp++))
841d5fb7 334 return -EFAULT;
b6a2fea3
OW
335 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
336 if (!len || len > MAX_ARG_STRLEN)
23c4971e 337 return -EINVAL;
1da177e4
LT
338 p += len;
339 }
67c6777a 340 if (__put_user(0, sp++))
1da177e4 341 return -EFAULT;
03c6d723 342 mm->env_end = p;
1da177e4
LT
343
344 /* Put the elf_info on the stack in the right place. */
03c6d723 345 if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
1da177e4
LT
346 return -EFAULT;
347 return 0;
348}
349
c07380be
JH
350#ifndef elf_map
351
1da177e4 352static unsigned long elf_map(struct file *filep, unsigned long addr,
49ac9819 353 const struct elf_phdr *eppnt, int prot, int type,
cc503c1b 354 unsigned long total_size)
1da177e4
LT
355{
356 unsigned long map_addr;
cc503c1b
JK
357 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
358 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
359 addr = ELF_PAGESTART(addr);
360 size = ELF_PAGEALIGN(size);
1da177e4 361
dda6ebde
DG
362 /* mmap() will return -EINVAL if given a zero size, but a
363 * segment with zero filesize is perfectly valid */
cc503c1b
JK
364 if (!size)
365 return addr;
366
cc503c1b
JK
367 /*
368 * total_size is the size of the ELF (interpreter) image.
369 * The _first_ mmap needs to know the full size, otherwise
370 * randomization might put this image into an overlapping
371 * position with the ELF binary image. (since size < total_size)
372 * So we first map the 'big' image - and unmap the remainder at
373 * the end. (which unmap is needed for ELF images with holes.)
374 */
375 if (total_size) {
376 total_size = ELF_PAGEALIGN(total_size);
5a5e4c2e 377 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
cc503c1b 378 if (!BAD_ADDR(map_addr))
5a5e4c2e 379 vm_munmap(map_addr+size, total_size-size);
cc503c1b 380 } else
5a5e4c2e 381 map_addr = vm_mmap(filep, addr, size, prot, type, off);
cc503c1b 382
d23a61ee
TH
383 if ((type & MAP_FIXED_NOREPLACE) &&
384 PTR_ERR((void *)map_addr) == -EEXIST)
385 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
386 task_pid_nr(current), current->comm, (void *)addr);
4ed28639 387
1da177e4
LT
388 return(map_addr);
389}
390
c07380be
JH
391#endif /* !elf_map */
392
49ac9819 393static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
cc503c1b
JK
394{
395 int i, first_idx = -1, last_idx = -1;
396
397 for (i = 0; i < nr; i++) {
398 if (cmds[i].p_type == PT_LOAD) {
399 last_idx = i;
400 if (first_idx == -1)
401 first_idx = i;
402 }
403 }
404 if (first_idx == -1)
405 return 0;
406
407 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
408 ELF_PAGESTART(cmds[first_idx].p_vaddr);
409}
410
658c0335
AD
411static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
412{
413 ssize_t rv;
414
415 rv = kernel_read(file, buf, len, &pos);
416 if (unlikely(rv != len)) {
417 return (rv < 0) ? rv : -EIO;
418 }
419 return 0;
420}
421
6a8d3894
PB
422/**
423 * load_elf_phdrs() - load ELF program headers
424 * @elf_ex: ELF header of the binary whose program headers should be loaded
425 * @elf_file: the opened ELF binary file
426 *
427 * Loads ELF program headers from the binary file elf_file, which has the ELF
428 * header pointed to by elf_ex, into a newly allocated array. The caller is
429 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
430 */
49ac9819 431static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
6a8d3894
PB
432 struct file *elf_file)
433{
434 struct elf_phdr *elf_phdata = NULL;
faf1c315 435 int retval, err = -1;
faf1c315 436 unsigned int size;
6a8d3894
PB
437
438 /*
439 * If the size of this structure has changed, then punt, since
440 * we will be doing the wrong thing.
441 */
442 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
443 goto out;
444
445 /* Sanity check the number of program headers... */
6a8d3894
PB
446 /* ...and their total size. */
447 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
faf1c315 448 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
6a8d3894
PB
449 goto out;
450
451 elf_phdata = kmalloc(size, GFP_KERNEL);
452 if (!elf_phdata)
453 goto out;
454
455 /* Read in the program headers */
658c0335
AD
456 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
457 if (retval < 0) {
458 err = retval;
6a8d3894
PB
459 goto out;
460 }
461
462 /* Success! */
463 err = 0;
464out:
465 if (err) {
466 kfree(elf_phdata);
467 elf_phdata = NULL;
468 }
469 return elf_phdata;
470}
cc503c1b 471
774c105e
PB
472#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
473
474/**
475 * struct arch_elf_state - arch-specific ELF loading state
476 *
477 * This structure is used to preserve architecture specific data during
478 * the loading of an ELF file, throughout the checking of architecture
479 * specific ELF headers & through to the point where the ELF load is
480 * known to be proceeding (ie. SET_PERSONALITY).
481 *
482 * This implementation is a dummy for architectures which require no
483 * specific state.
484 */
485struct arch_elf_state {
486};
487
488#define INIT_ARCH_ELF_STATE {}
489
490/**
491 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
492 * @ehdr: The main ELF header
493 * @phdr: The program header to check
494 * @elf: The open ELF file
495 * @is_interp: True if the phdr is from the interpreter of the ELF being
496 * loaded, else false.
497 * @state: Architecture-specific state preserved throughout the process
498 * of loading the ELF.
499 *
500 * Inspects the program header phdr to validate its correctness and/or
501 * suitability for the system. Called once per ELF program header in the
502 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
503 * interpreter.
504 *
505 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
506 * with that return code.
507 */
508static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
509 struct elf_phdr *phdr,
510 struct file *elf, bool is_interp,
511 struct arch_elf_state *state)
512{
513 /* Dummy implementation, always proceed */
514 return 0;
515}
516
517/**
54d15714 518 * arch_check_elf() - check an ELF executable
774c105e
PB
519 * @ehdr: The main ELF header
520 * @has_interp: True if the ELF has an interpreter, else false.
eb4bc076 521 * @interp_ehdr: The interpreter's ELF header
774c105e
PB
522 * @state: Architecture-specific state preserved throughout the process
523 * of loading the ELF.
524 *
525 * Provides a final opportunity for architecture code to reject the loading
526 * of the ELF & cause an exec syscall to return an error. This is called after
527 * all program headers to be checked by arch_elf_pt_proc have been.
528 *
529 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
530 * with that return code.
531 */
532static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
eb4bc076 533 struct elfhdr *interp_ehdr,
774c105e
PB
534 struct arch_elf_state *state)
535{
536 /* Dummy implementation, always proceed */
537 return 0;
538}
539
540#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
cc503c1b 541
d8e7cb39
AD
542static inline int make_prot(u32 p_flags)
543{
544 int prot = 0;
545
546 if (p_flags & PF_R)
547 prot |= PROT_READ;
548 if (p_flags & PF_W)
549 prot |= PROT_WRITE;
550 if (p_flags & PF_X)
551 prot |= PROT_EXEC;
552 return prot;
553}
554
1da177e4
LT
555/* This is much more generalized than the library routine read function,
556 so we keep this separate. Technically the library read function
557 is only provided so that we can read a.out libraries that have
558 an ELF header */
559
f4e5cc2c 560static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
81696d5d 561 struct file *interpreter,
a9d9ef13 562 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
1da177e4 563{
1da177e4
LT
564 struct elf_phdr *eppnt;
565 unsigned long load_addr = 0;
566 int load_addr_set = 0;
567 unsigned long last_bss = 0, elf_bss = 0;
16e72e9b 568 int bss_prot = 0;
1da177e4 569 unsigned long error = ~0UL;
cc503c1b 570 unsigned long total_size;
6a8d3894 571 int i;
1da177e4
LT
572
573 /* First of all, some simple consistency checks */
574 if (interp_elf_ex->e_type != ET_EXEC &&
575 interp_elf_ex->e_type != ET_DYN)
576 goto out;
4755200b
NP
577 if (!elf_check_arch(interp_elf_ex) ||
578 elf_check_fdpic(interp_elf_ex))
1da177e4 579 goto out;
72c2d531 580 if (!interpreter->f_op->mmap)
1da177e4
LT
581 goto out;
582
a9d9ef13
PB
583 total_size = total_mapping_size(interp_elf_phdata,
584 interp_elf_ex->e_phnum);
cc503c1b
JK
585 if (!total_size) {
586 error = -EINVAL;
a9d9ef13 587 goto out;
cc503c1b
JK
588 }
589
a9d9ef13 590 eppnt = interp_elf_phdata;
f4e5cc2c
JJ
591 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
592 if (eppnt->p_type == PT_LOAD) {
593 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
d8e7cb39 594 int elf_prot = make_prot(eppnt->p_flags);
f4e5cc2c
JJ
595 unsigned long vaddr = 0;
596 unsigned long k, map_addr;
597
f4e5cc2c
JJ
598 vaddr = eppnt->p_vaddr;
599 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
4ed28639 600 elf_type |= MAP_FIXED_NOREPLACE;
cc503c1b
JK
601 else if (no_base && interp_elf_ex->e_type == ET_DYN)
602 load_addr = -vaddr;
f4e5cc2c
JJ
603
604 map_addr = elf_map(interpreter, load_addr + vaddr,
bb1ad820 605 eppnt, elf_prot, elf_type, total_size);
cc503c1b 606 total_size = 0;
f4e5cc2c
JJ
607 error = map_addr;
608 if (BAD_ADDR(map_addr))
a9d9ef13 609 goto out;
f4e5cc2c
JJ
610
611 if (!load_addr_set &&
612 interp_elf_ex->e_type == ET_DYN) {
613 load_addr = map_addr - ELF_PAGESTART(vaddr);
614 load_addr_set = 1;
615 }
616
617 /*
618 * Check to see if the section's size will overflow the
619 * allowed task size. Note that p_filesz must always be
620 * <= p_memsize so it's only necessary to check p_memsz.
621 */
622 k = load_addr + eppnt->p_vaddr;
ce51059b 623 if (BAD_ADDR(k) ||
f4e5cc2c
JJ
624 eppnt->p_filesz > eppnt->p_memsz ||
625 eppnt->p_memsz > TASK_SIZE ||
626 TASK_SIZE - eppnt->p_memsz < k) {
627 error = -ENOMEM;
a9d9ef13 628 goto out;
f4e5cc2c
JJ
629 }
630
631 /*
632 * Find the end of the file mapping for this phdr, and
633 * keep track of the largest address we see for this.
634 */
635 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
636 if (k > elf_bss)
637 elf_bss = k;
638
639 /*
640 * Do the same thing for the memory mapping - between
641 * elf_bss and last_bss is the bss section.
642 */
0036d1f7 643 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
16e72e9b 644 if (k > last_bss) {
f4e5cc2c 645 last_bss = k;
16e72e9b
DV
646 bss_prot = elf_prot;
647 }
f4e5cc2c 648 }
1da177e4
LT
649 }
650
0036d1f7
KC
651 /*
652 * Now fill out the bss section: first pad the last page from
653 * the file up to the page boundary, and zero it from elf_bss
654 * up to the end of the page.
655 */
656 if (padzero(elf_bss)) {
657 error = -EFAULT;
658 goto out;
659 }
660 /*
661 * Next, align both the file and mem bss up to the page size,
662 * since this is where elf_bss was just zeroed up to, and where
16e72e9b 663 * last_bss will end after the vm_brk_flags() below.
0036d1f7
KC
664 */
665 elf_bss = ELF_PAGEALIGN(elf_bss);
666 last_bss = ELF_PAGEALIGN(last_bss);
667 /* Finally, if there is still more bss to allocate, do it. */
752015d1 668 if (last_bss > elf_bss) {
16e72e9b
DV
669 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
670 bss_prot & PROT_EXEC ? VM_EXEC : 0);
5d22fc25 671 if (error)
a9d9ef13 672 goto out;
1da177e4
LT
673 }
674
cc503c1b 675 error = load_addr;
1da177e4
LT
676out:
677 return error;
678}
679
1da177e4
LT
680/*
681 * These are the functions used to load ELF style executables and shared
682 * libraries. There is no binary dependent code anywhere else.
683 */
684
71613c3b 685static int load_elf_binary(struct linux_binprm *bprm)
1da177e4
LT
686{
687 struct file *interpreter = NULL; /* to shut gcc up */
688 unsigned long load_addr = 0, load_bias = 0;
689 int load_addr_set = 0;
1da177e4 690 unsigned long error;
a9d9ef13 691 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
1da177e4 692 unsigned long elf_bss, elf_brk;
16e72e9b 693 int bss_prot = 0;
1da177e4 694 int retval, i;
cc503c1b 695 unsigned long elf_entry;
a62c5b1b 696 unsigned long e_entry;
cc503c1b 697 unsigned long interp_load_addr = 0;
1da177e4 698 unsigned long start_code, end_code, start_data, end_data;
1a530a6f 699 unsigned long reloc_func_desc __maybe_unused = 0;
8de61e69 700 int executable_stack = EXSTACK_DEFAULT;
a62c5b1b 701 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
1da177e4 702 struct {
1da177e4 703 struct elfhdr interp_elf_ex;
1da177e4 704 } *loc;
774c105e 705 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
03c6d723 706 struct mm_struct *mm;
249b08e4 707 struct pt_regs *regs;
1da177e4
LT
708
709 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
710 if (!loc) {
711 retval = -ENOMEM;
712 goto out_ret;
713 }
1da177e4
LT
714
715 retval = -ENOEXEC;
716 /* First of all, some simple consistency checks */
a62c5b1b 717 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
1da177e4
LT
718 goto out;
719
a62c5b1b 720 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
1da177e4 721 goto out;
a62c5b1b 722 if (!elf_check_arch(elf_ex))
1da177e4 723 goto out;
a62c5b1b 724 if (elf_check_fdpic(elf_ex))
4755200b 725 goto out;
72c2d531 726 if (!bprm->file->f_op->mmap)
1da177e4
LT
727 goto out;
728
a62c5b1b 729 elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
1da177e4
LT
730 if (!elf_phdata)
731 goto out;
732
1da177e4 733 elf_ppnt = elf_phdata;
a62c5b1b 734 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
be0deb58 735 char *elf_interpreter;
1da177e4 736
be0deb58
AD
737 if (elf_ppnt->p_type != PT_INTERP)
738 continue;
1fb84496 739
be0deb58
AD
740 /*
741 * This is the program interpreter used for shared libraries -
742 * for now assume that this is an a.out format binary.
743 */
744 retval = -ENOEXEC;
745 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
746 goto out_free_ph;
1da177e4 747
be0deb58
AD
748 retval = -ENOMEM;
749 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
750 if (!elf_interpreter)
751 goto out_free_ph;
cc338010 752
658c0335
AD
753 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
754 elf_ppnt->p_offset);
755 if (retval < 0)
be0deb58 756 goto out_free_interp;
be0deb58
AD
757 /* make sure path is NULL terminated */
758 retval = -ENOEXEC;
759 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
760 goto out_free_interp;
761
762 interpreter = open_exec(elf_interpreter);
763 kfree(elf_interpreter);
764 retval = PTR_ERR(interpreter);
765 if (IS_ERR(interpreter))
cc338010 766 goto out_free_ph;
be0deb58
AD
767
768 /*
769 * If the binary is not readable then enforce mm->dumpable = 0
770 * regardless of the interpreter's permissions.
771 */
772 would_dump(bprm, interpreter);
773
774 /* Get the exec headers */
658c0335
AD
775 retval = elf_read(interpreter, &loc->interp_elf_ex,
776 sizeof(loc->interp_elf_ex), 0);
777 if (retval < 0)
be0deb58 778 goto out_free_dentry;
be0deb58
AD
779
780 break;
781
782out_free_interp:
783 kfree(elf_interpreter);
784 goto out_free_ph;
1da177e4
LT
785 }
786
787 elf_ppnt = elf_phdata;
a62c5b1b 788 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
774c105e
PB
789 switch (elf_ppnt->p_type) {
790 case PT_GNU_STACK:
1da177e4
LT
791 if (elf_ppnt->p_flags & PF_X)
792 executable_stack = EXSTACK_ENABLE_X;
793 else
794 executable_stack = EXSTACK_DISABLE_X;
795 break;
774c105e
PB
796
797 case PT_LOPROC ... PT_HIPROC:
a62c5b1b 798 retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
774c105e
PB
799 bprm->file, false,
800 &arch_state);
801 if (retval)
802 goto out_free_dentry;
803 break;
1da177e4 804 }
1da177e4
LT
805
806 /* Some simple consistency checks for the interpreter */
cc338010 807 if (interpreter) {
1da177e4 808 retval = -ELIBBAD;
d20894a2
AK
809 /* Not an ELF interpreter */
810 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1da177e4 811 goto out_free_dentry;
1da177e4 812 /* Verify the interpreter has a valid arch */
4755200b
NP
813 if (!elf_check_arch(&loc->interp_elf_ex) ||
814 elf_check_fdpic(&loc->interp_elf_ex))
1da177e4 815 goto out_free_dentry;
a9d9ef13
PB
816
817 /* Load the interpreter program headers */
818 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
819 interpreter);
820 if (!interp_elf_phdata)
821 goto out_free_dentry;
774c105e
PB
822
823 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
824 elf_ppnt = interp_elf_phdata;
825 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
826 switch (elf_ppnt->p_type) {
827 case PT_LOPROC ... PT_HIPROC:
828 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
829 elf_ppnt, interpreter,
830 true, &arch_state);
831 if (retval)
832 goto out_free_dentry;
833 break;
834 }
1da177e4
LT
835 }
836
774c105e
PB
837 /*
838 * Allow arch code to reject the ELF at this point, whilst it's
839 * still possible to return an error to the code that invoked
840 * the exec syscall.
841 */
a62c5b1b 842 retval = arch_check_elf(elf_ex,
eb4bc076
MR
843 !!interpreter, &loc->interp_elf_ex,
844 &arch_state);
774c105e
PB
845 if (retval)
846 goto out_free_dentry;
847
1da177e4
LT
848 /* Flush all traces of the currently running executable */
849 retval = flush_old_exec(bprm);
850 if (retval)
851 goto out_free_dentry;
852
1da177e4
LT
853 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
854 may depend on the personality. */
a62c5b1b
AD
855 SET_PERSONALITY2(*elf_ex, &arch_state);
856 if (elf_read_implies_exec(*elf_ex, executable_stack))
1da177e4
LT
857 current->personality |= READ_IMPLIES_EXEC;
858
f4e5cc2c 859 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4 860 current->flags |= PF_RANDOMIZE;
221af7f8
LT
861
862 setup_new_exec(bprm);
9f834ec1 863 install_exec_creds(bprm);
1da177e4
LT
864
865 /* Do this so that we can load the interpreter, if need be. We will
866 change some of these later */
1da177e4
LT
867 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
868 executable_stack);
19d860a1 869 if (retval < 0)
1da177e4 870 goto out_free_dentry;
1da177e4 871
85264316
AD
872 elf_bss = 0;
873 elf_brk = 0;
874
875 start_code = ~0UL;
876 end_code = 0;
877 start_data = 0;
878 end_data = 0;
879
af901ca1 880 /* Now we do a little grungy work by mmapping the ELF image into
cc503c1b 881 the correct location in memory. */
f4e5cc2c 882 for(i = 0, elf_ppnt = elf_phdata;
a62c5b1b 883 i < elf_ex->e_phnum; i++, elf_ppnt++) {
b212921b 884 int elf_prot, elf_flags;
1da177e4 885 unsigned long k, vaddr;
a87938b2 886 unsigned long total_size = 0;
1da177e4
LT
887
888 if (elf_ppnt->p_type != PT_LOAD)
889 continue;
890
891 if (unlikely (elf_brk > elf_bss)) {
892 unsigned long nbyte;
893
894 /* There was a PT_LOAD segment with p_memsz > p_filesz
895 before this one. Map anonymous pages, if needed,
896 and clear the area. */
f670d0ec 897 retval = set_brk(elf_bss + load_bias,
16e72e9b
DV
898 elf_brk + load_bias,
899 bss_prot);
19d860a1 900 if (retval)
1da177e4 901 goto out_free_dentry;
1da177e4
LT
902 nbyte = ELF_PAGEOFFSET(elf_bss);
903 if (nbyte) {
904 nbyte = ELF_MIN_ALIGN - nbyte;
905 if (nbyte > elf_brk - elf_bss)
906 nbyte = elf_brk - elf_bss;
907 if (clear_user((void __user *)elf_bss +
908 load_bias, nbyte)) {
909 /*
910 * This bss-zeroing can fail if the ELF
f4e5cc2c 911 * file specifies odd protections. So
1da177e4
LT
912 * we don't check the return value
913 */
914 }
915 }
916 }
917
d8e7cb39 918 elf_prot = make_prot(elf_ppnt->p_flags);
1da177e4 919
f4e5cc2c 920 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
1da177e4
LT
921
922 vaddr = elf_ppnt->p_vaddr;
eab09532
KC
923 /*
924 * If we are loading ET_EXEC or we have already performed
925 * the ET_DYN load_addr calculations, proceed normally.
926 */
a62c5b1b 927 if (elf_ex->e_type == ET_EXEC || load_addr_set) {
b212921b 928 elf_flags |= MAP_FIXED;
a62c5b1b 929 } else if (elf_ex->e_type == ET_DYN) {
eab09532
KC
930 /*
931 * This logic is run once for the first LOAD Program
932 * Header for ET_DYN binaries to calculate the
933 * randomization (load_bias) for all the LOAD
934 * Program Headers, and to calculate the entire
935 * size of the ELF mapping (total_size). (Note that
936 * load_addr_set is set to true later once the
937 * initial mapping is performed.)
938 *
939 * There are effectively two types of ET_DYN
940 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
941 * and loaders (ET_DYN without INTERP, since they
942 * _are_ the ELF interpreter). The loaders must
943 * be loaded away from programs since the program
944 * may otherwise collide with the loader (especially
945 * for ET_EXEC which does not have a randomized
946 * position). For example to handle invocations of
947 * "./ld.so someprog" to test out a new version of
948 * the loader, the subsequent program that the
949 * loader loads must avoid the loader itself, so
950 * they cannot share the same load range. Sufficient
951 * room for the brk must be allocated with the
952 * loader as well, since brk must be available with
953 * the loader.
954 *
955 * Therefore, programs are loaded offset from
956 * ELF_ET_DYN_BASE and loaders are loaded into the
957 * independently randomized mmap region (0 load_bias
958 * without MAP_FIXED).
959 */
cc338010 960 if (interpreter) {
eab09532
KC
961 load_bias = ELF_ET_DYN_BASE;
962 if (current->flags & PF_RANDOMIZE)
963 load_bias += arch_mmap_rnd();
b212921b 964 elf_flags |= MAP_FIXED;
eab09532
KC
965 } else
966 load_bias = 0;
967
968 /*
969 * Since load_bias is used for all subsequent loading
970 * calculations, we must lower it by the first vaddr
971 * so that the remaining calculations based on the
972 * ELF vaddrs will be correctly offset. The result
973 * is then page aligned.
974 */
975 load_bias = ELF_PAGESTART(load_bias - vaddr);
976
a87938b2 977 total_size = total_mapping_size(elf_phdata,
a62c5b1b 978 elf_ex->e_phnum);
a87938b2 979 if (!total_size) {
2b1d3ae9 980 retval = -EINVAL;
a87938b2
MD
981 goto out_free_dentry;
982 }
1da177e4
LT
983 }
984
f4e5cc2c 985 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
a87938b2 986 elf_prot, elf_flags, total_size);
1da177e4 987 if (BAD_ADDR(error)) {
b140f251
AK
988 retval = IS_ERR((void *)error) ?
989 PTR_ERR((void*)error) : -EINVAL;
1da177e4
LT
990 goto out_free_dentry;
991 }
992
993 if (!load_addr_set) {
994 load_addr_set = 1;
995 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
a62c5b1b 996 if (elf_ex->e_type == ET_DYN) {
1da177e4
LT
997 load_bias += error -
998 ELF_PAGESTART(load_bias + vaddr);
999 load_addr += load_bias;
1000 reloc_func_desc = load_bias;
1001 }
1002 }
1003 k = elf_ppnt->p_vaddr;
f67ef446 1004 if ((elf_ppnt->p_flags & PF_X) && k < start_code)
f4e5cc2c
JJ
1005 start_code = k;
1006 if (start_data < k)
1007 start_data = k;
1da177e4
LT
1008
1009 /*
1010 * Check to see if the section's size will overflow the
1011 * allowed task size. Note that p_filesz must always be
1012 * <= p_memsz so it is only necessary to check p_memsz.
1013 */
ce51059b 1014 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1da177e4
LT
1015 elf_ppnt->p_memsz > TASK_SIZE ||
1016 TASK_SIZE - elf_ppnt->p_memsz < k) {
f4e5cc2c 1017 /* set_brk can never work. Avoid overflows. */
b140f251 1018 retval = -EINVAL;
1da177e4
LT
1019 goto out_free_dentry;
1020 }
1021
1022 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1023
1024 if (k > elf_bss)
1025 elf_bss = k;
1026 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1027 end_code = k;
1028 if (end_data < k)
1029 end_data = k;
1030 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
16e72e9b
DV
1031 if (k > elf_brk) {
1032 bss_prot = elf_prot;
1da177e4 1033 elf_brk = k;
16e72e9b 1034 }
1da177e4
LT
1035 }
1036
a62c5b1b 1037 e_entry = elf_ex->e_entry + load_bias;
1da177e4
LT
1038 elf_bss += load_bias;
1039 elf_brk += load_bias;
1040 start_code += load_bias;
1041 end_code += load_bias;
1042 start_data += load_bias;
1043 end_data += load_bias;
1044
1045 /* Calling set_brk effectively mmaps the pages that we need
1046 * for the bss and break sections. We must do this before
1047 * mapping in the interpreter, to make sure it doesn't wind
1048 * up getting placed where the bss needs to go.
1049 */
16e72e9b 1050 retval = set_brk(elf_bss, elf_brk, bss_prot);
19d860a1 1051 if (retval)
1da177e4 1052 goto out_free_dentry;
6de50517 1053 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1da177e4
LT
1054 retval = -EFAULT; /* Nobody gets to see this, but.. */
1055 goto out_free_dentry;
1056 }
1057
cc338010 1058 if (interpreter) {
d20894a2
AK
1059 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1060 interpreter,
a9d9ef13 1061 load_bias, interp_elf_phdata);
d20894a2
AK
1062 if (!IS_ERR((void *)elf_entry)) {
1063 /*
1064 * load_elf_interp() returns relocation
1065 * adjustment
1066 */
1067 interp_load_addr = elf_entry;
1068 elf_entry += loc->interp_elf_ex.e_entry;
cc503c1b 1069 }
1da177e4 1070 if (BAD_ADDR(elf_entry)) {
ce51059b
CE
1071 retval = IS_ERR((void *)elf_entry) ?
1072 (int)elf_entry : -EINVAL;
1da177e4
LT
1073 goto out_free_dentry;
1074 }
1075 reloc_func_desc = interp_load_addr;
1076
1077 allow_write_access(interpreter);
1078 fput(interpreter);
1da177e4 1079 } else {
a62c5b1b 1080 elf_entry = e_entry;
5342fba5 1081 if (BAD_ADDR(elf_entry)) {
ce51059b 1082 retval = -EINVAL;
5342fba5
SS
1083 goto out_free_dentry;
1084 }
1da177e4
LT
1085 }
1086
774c105e 1087 kfree(interp_elf_phdata);
1da177e4
LT
1088 kfree(elf_phdata);
1089
1da177e4
LT
1090 set_binfmt(&elf_format);
1091
547ee84c 1092#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
cc338010 1093 retval = arch_setup_additional_pages(bprm, !!interpreter);
19d860a1 1094 if (retval < 0)
18c8baff 1095 goto out;
547ee84c
BH
1096#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1097
a62c5b1b
AD
1098 retval = create_elf_tables(bprm, elf_ex,
1099 load_addr, interp_load_addr, e_entry);
19d860a1 1100 if (retval < 0)
b6a2fea3 1101 goto out;
03c6d723
AD
1102
1103 mm = current->mm;
1104 mm->end_code = end_code;
1105 mm->start_code = start_code;
1106 mm->start_data = start_data;
1107 mm->end_data = end_data;
1108 mm->start_stack = bprm->p;
1da177e4 1109
4471a675 1110 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
bbdc6076
KC
1111 /*
1112 * For architectures with ELF randomization, when executing
1113 * a loader directly (i.e. no interpreter listed in ELF
1114 * headers), move the brk area out of the mmap region
1115 * (since it grows up, and may collide early with the stack
1116 * growing down), and into the unused ELF_ET_DYN_BASE region.
1117 */
7be3cb01 1118 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
03c6d723
AD
1119 elf_ex->e_type == ET_DYN && !interpreter) {
1120 mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1121 }
bbdc6076 1122
03c6d723 1123 mm->brk = mm->start_brk = arch_randomize_brk(mm);
204db6ed 1124#ifdef compat_brk_randomized
4471a675
JK
1125 current->brk_randomized = 1;
1126#endif
1127 }
c1d171a0 1128
1da177e4
LT
1129 if (current->personality & MMAP_PAGE_ZERO) {
1130 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1131 and some applications "depend" upon this behavior.
1132 Since we do not have the power to recompile these, we
f4e5cc2c 1133 emulate the SVr4 behavior. Sigh. */
6be5ceb0 1134 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1da177e4 1135 MAP_FIXED | MAP_PRIVATE, 0);
1da177e4
LT
1136 }
1137
249b08e4 1138 regs = current_pt_regs();
1da177e4
LT
1139#ifdef ELF_PLAT_INIT
1140 /*
1141 * The ABI may specify that certain registers be set up in special
1142 * ways (on i386 %edx is the address of a DT_FINI function, for
1143 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1144 * that the e_entry field is the address of the function descriptor
1145 * for the startup routine, rather than the address of the startup
1146 * routine itself. This macro performs whatever initialization to
1147 * the regs structure is required as well as any relocations to the
1148 * function descriptor entries when executing dynamically links apps.
1149 */
1150 ELF_PLAT_INIT(regs, reloc_func_desc);
1151#endif
1152
b8383831 1153 finalize_exec(bprm);
1da177e4 1154 start_thread(regs, elf_entry, bprm->p);
1da177e4
LT
1155 retval = 0;
1156out:
1157 kfree(loc);
1158out_ret:
1159 return retval;
1160
1161 /* error cleanup */
1162out_free_dentry:
a9d9ef13 1163 kfree(interp_elf_phdata);
1da177e4
LT
1164 allow_write_access(interpreter);
1165 if (interpreter)
1166 fput(interpreter);
1da177e4
LT
1167out_free_ph:
1168 kfree(elf_phdata);
1169 goto out;
1170}
1171
69369a70 1172#ifdef CONFIG_USELIB
1da177e4
LT
1173/* This is really simpleminded and specialized - we are loading an
1174 a.out library that is given an ELF header. */
1da177e4
LT
1175static int load_elf_library(struct file *file)
1176{
1177 struct elf_phdr *elf_phdata;
1178 struct elf_phdr *eppnt;
1179 unsigned long elf_bss, bss, len;
1180 int retval, error, i, j;
1181 struct elfhdr elf_ex;
1182
1183 error = -ENOEXEC;
658c0335
AD
1184 retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1185 if (retval < 0)
1da177e4
LT
1186 goto out;
1187
1188 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1189 goto out;
1190
1191 /* First of all, some simple consistency checks */
1192 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
72c2d531 1193 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1da177e4 1194 goto out;
4755200b
NP
1195 if (elf_check_fdpic(&elf_ex))
1196 goto out;
1da177e4
LT
1197
1198 /* Now read in all of the header information */
1199
1200 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1201 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1202
1203 error = -ENOMEM;
1204 elf_phdata = kmalloc(j, GFP_KERNEL);
1205 if (!elf_phdata)
1206 goto out;
1207
1208 eppnt = elf_phdata;
1209 error = -ENOEXEC;
658c0335
AD
1210 retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1211 if (retval < 0)
1da177e4
LT
1212 goto out_free_ph;
1213
1214 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1215 if ((eppnt + i)->p_type == PT_LOAD)
1216 j++;
1217 if (j != 1)
1218 goto out_free_ph;
1219
1220 while (eppnt->p_type != PT_LOAD)
1221 eppnt++;
1222
1223 /* Now use mmap to map the library into memory. */
6be5ceb0 1224 error = vm_mmap(file,
1da177e4
LT
1225 ELF_PAGESTART(eppnt->p_vaddr),
1226 (eppnt->p_filesz +
1227 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1228 PROT_READ | PROT_WRITE | PROT_EXEC,
4ed28639 1229 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1da177e4
LT
1230 (eppnt->p_offset -
1231 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1da177e4
LT
1232 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1233 goto out_free_ph;
1234
1235 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1236 if (padzero(elf_bss)) {
1237 error = -EFAULT;
1238 goto out_free_ph;
1239 }
1240
24962af7
OS
1241 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1242 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
ecc2bc8a
MH
1243 if (bss > len) {
1244 error = vm_brk(len, bss - len);
5d22fc25 1245 if (error)
ecc2bc8a
MH
1246 goto out_free_ph;
1247 }
1da177e4
LT
1248 error = 0;
1249
1250out_free_ph:
1251 kfree(elf_phdata);
1252out:
1253 return error;
1254}
69369a70 1255#endif /* #ifdef CONFIG_USELIB */
1da177e4 1256
698ba7b5 1257#ifdef CONFIG_ELF_CORE
1da177e4
LT
1258/*
1259 * ELF core dumper
1260 *
1261 * Modelled on fs/exec.c:aout_core_dump()
1262 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1263 */
1da177e4 1264
909af768
JB
1265/*
1266 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1267 * that are useful for post-mortem analysis are included in every core dump.
1268 * In that way we ensure that the core dump is fully interpretable later
1269 * without matching up the same kernel and hardware config to see what PC values
1270 * meant. These special mappings include - vDSO, vsyscall, and other
1271 * architecture specific mappings
1272 */
1273static bool always_dump_vma(struct vm_area_struct *vma)
1274{
1275 /* Any vsyscall mappings? */
1276 if (vma == get_gate_vma(vma->vm_mm))
1277 return true;
78d683e8
AL
1278
1279 /*
1280 * Assume that all vmas with a .name op should always be dumped.
1281 * If this changes, a new vm_ops field can easily be added.
1282 */
1283 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1284 return true;
1285
909af768
JB
1286 /*
1287 * arch_vma_name() returns non-NULL for special architecture mappings,
1288 * such as vDSO sections.
1289 */
1290 if (arch_vma_name(vma))
1291 return true;
1292
1293 return false;
1294}
1295
1da177e4 1296/*
82df3973 1297 * Decide what to dump of a segment, part, all or none.
1da177e4 1298 */
82df3973
RM
1299static unsigned long vma_dump_size(struct vm_area_struct *vma,
1300 unsigned long mm_flags)
1da177e4 1301{
e575f111
KM
1302#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1303
909af768
JB
1304 /* always dump the vdso and vsyscall sections */
1305 if (always_dump_vma(vma))
82df3973 1306 goto whole;
e5b97dde 1307
0103bd16 1308 if (vma->vm_flags & VM_DONTDUMP)
accb61fe
JB
1309 return 0;
1310
5037835c
RZ
1311 /* support for DAX */
1312 if (vma_is_dax(vma)) {
1313 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1314 goto whole;
1315 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1316 goto whole;
1317 return 0;
1318 }
1319
e575f111 1320 /* Hugetlb memory check */
03911132 1321 if (is_vm_hugetlb_page(vma)) {
e575f111
KM
1322 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1323 goto whole;
1324 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1325 goto whole;
23d9e482 1326 return 0;
e575f111
KM
1327 }
1328
1da177e4 1329 /* Do not dump I/O mapped devices or special mappings */
314e51b9 1330 if (vma->vm_flags & VM_IO)
1da177e4
LT
1331 return 0;
1332
a1b59e80
KH
1333 /* By default, dump shared memory if mapped from an anonymous file. */
1334 if (vma->vm_flags & VM_SHARED) {
496ad9aa 1335 if (file_inode(vma->vm_file)->i_nlink == 0 ?
82df3973
RM
1336 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1337 goto whole;
1338 return 0;
a1b59e80 1339 }
1da177e4 1340
82df3973
RM
1341 /* Dump segments that have been written to. */
1342 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1343 goto whole;
1344 if (vma->vm_file == NULL)
1345 return 0;
1da177e4 1346
82df3973
RM
1347 if (FILTER(MAPPED_PRIVATE))
1348 goto whole;
1349
1350 /*
1351 * If this looks like the beginning of a DSO or executable mapping,
1352 * check for an ELF header. If we find one, dump the first page to
1353 * aid in determining what was mapped here.
1354 */
92dc07b1
RM
1355 if (FILTER(ELF_HEADERS) &&
1356 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
82df3973
RM
1357 u32 __user *header = (u32 __user *) vma->vm_start;
1358 u32 word;
92dc07b1 1359 mm_segment_t fs = get_fs();
82df3973
RM
1360 /*
1361 * Doing it this way gets the constant folded by GCC.
1362 */
1363 union {
1364 u32 cmp;
1365 char elfmag[SELFMAG];
1366 } magic;
1367 BUILD_BUG_ON(SELFMAG != sizeof word);
1368 magic.elfmag[EI_MAG0] = ELFMAG0;
1369 magic.elfmag[EI_MAG1] = ELFMAG1;
1370 magic.elfmag[EI_MAG2] = ELFMAG2;
1371 magic.elfmag[EI_MAG3] = ELFMAG3;
92dc07b1
RM
1372 /*
1373 * Switch to the user "segment" for get_user(),
1374 * then put back what elf_core_dump() had in place.
1375 */
1376 set_fs(USER_DS);
1377 if (unlikely(get_user(word, header)))
1378 word = 0;
1379 set_fs(fs);
1380 if (word == magic.cmp)
82df3973
RM
1381 return PAGE_SIZE;
1382 }
1383
1384#undef FILTER
1385
1386 return 0;
1387
1388whole:
1389 return vma->vm_end - vma->vm_start;
1da177e4
LT
1390}
1391
1da177e4
LT
1392/* An ELF note in memory */
1393struct memelfnote
1394{
1395 const char *name;
1396 int type;
1397 unsigned int datasz;
1398 void *data;
1399};
1400
1401static int notesize(struct memelfnote *en)
1402{
1403 int sz;
1404
1405 sz = sizeof(struct elf_note);
1406 sz += roundup(strlen(en->name) + 1, 4);
1407 sz += roundup(en->datasz, 4);
1408
1409 return sz;
1410}
1411
ecc8c772 1412static int writenote(struct memelfnote *men, struct coredump_params *cprm)
d025c9db
AK
1413{
1414 struct elf_note en;
1da177e4
LT
1415 en.n_namesz = strlen(men->name) + 1;
1416 en.n_descsz = men->datasz;
1417 en.n_type = men->type;
1418
ecc8c772 1419 return dump_emit(cprm, &en, sizeof(en)) &&
22a8cb82
AV
1420 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1421 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1da177e4 1422}
1da177e4 1423
3aba481f 1424static void fill_elf_header(struct elfhdr *elf, int segs,
d3330cf0 1425 u16 machine, u32 flags)
1da177e4 1426{
6970c8ef
CG
1427 memset(elf, 0, sizeof(*elf));
1428
1da177e4
LT
1429 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1430 elf->e_ident[EI_CLASS] = ELF_CLASS;
1431 elf->e_ident[EI_DATA] = ELF_DATA;
1432 elf->e_ident[EI_VERSION] = EV_CURRENT;
1433 elf->e_ident[EI_OSABI] = ELF_OSABI;
1da177e4
LT
1434
1435 elf->e_type = ET_CORE;
3aba481f 1436 elf->e_machine = machine;
1da177e4 1437 elf->e_version = EV_CURRENT;
1da177e4 1438 elf->e_phoff = sizeof(struct elfhdr);
3aba481f 1439 elf->e_flags = flags;
1da177e4
LT
1440 elf->e_ehsize = sizeof(struct elfhdr);
1441 elf->e_phentsize = sizeof(struct elf_phdr);
1442 elf->e_phnum = segs;
1da177e4
LT
1443}
1444
8d6b5eee 1445static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1da177e4
LT
1446{
1447 phdr->p_type = PT_NOTE;
1448 phdr->p_offset = offset;
1449 phdr->p_vaddr = 0;
1450 phdr->p_paddr = 0;
1451 phdr->p_filesz = sz;
1452 phdr->p_memsz = 0;
1453 phdr->p_flags = 0;
1454 phdr->p_align = 0;
1da177e4
LT
1455}
1456
1457static void fill_note(struct memelfnote *note, const char *name, int type,
1458 unsigned int sz, void *data)
1459{
1460 note->name = name;
1461 note->type = type;
1462 note->datasz = sz;
1463 note->data = data;
1da177e4
LT
1464}
1465
1466/*
f4e5cc2c
JJ
1467 * fill up all the fields in prstatus from the given task struct, except
1468 * registers which need to be filled up separately.
1da177e4
LT
1469 */
1470static void fill_prstatus(struct elf_prstatus *prstatus,
f4e5cc2c 1471 struct task_struct *p, long signr)
1da177e4
LT
1472{
1473 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1474 prstatus->pr_sigpend = p->pending.signal.sig[0];
1475 prstatus->pr_sighold = p->blocked.sig[0];
3b34fc58
ON
1476 rcu_read_lock();
1477 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1478 rcu_read_unlock();
b488893a 1479 prstatus->pr_pid = task_pid_vnr(p);
b488893a
PE
1480 prstatus->pr_pgrp = task_pgrp_vnr(p);
1481 prstatus->pr_sid = task_session_vnr(p);
1da177e4 1482 if (thread_group_leader(p)) {
cd19c364 1483 struct task_cputime cputime;
f06febc9 1484
1da177e4 1485 /*
f06febc9
FM
1486 * This is the record for the group leader. It shows the
1487 * group-wide total, not its individual thread total.
1da177e4 1488 */
cd19c364 1489 thread_group_cputime(p, &cputime);
e2bb80d5
AB
1490 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1491 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1da177e4 1492 } else {
cd19c364 1493 u64 utime, stime;
6fac4829 1494
cd19c364 1495 task_cputime(p, &utime, &stime);
e2bb80d5
AB
1496 prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1497 prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1da177e4 1498 }
5613fda9 1499
e2bb80d5
AB
1500 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1501 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1da177e4
LT
1502}
1503
1504static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1505 struct mm_struct *mm)
1506{
c69e8d9c 1507 const struct cred *cred;
a84a5059 1508 unsigned int i, len;
1da177e4
LT
1509
1510 /* first copy the parameters from user space */
1511 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1512
1513 len = mm->arg_end - mm->arg_start;
1514 if (len >= ELF_PRARGSZ)
1515 len = ELF_PRARGSZ-1;
1516 if (copy_from_user(&psinfo->pr_psargs,
1517 (const char __user *)mm->arg_start, len))
1518 return -EFAULT;
1519 for(i = 0; i < len; i++)
1520 if (psinfo->pr_psargs[i] == 0)
1521 psinfo->pr_psargs[i] = ' ';
1522 psinfo->pr_psargs[len] = 0;
1523
3b34fc58
ON
1524 rcu_read_lock();
1525 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1526 rcu_read_unlock();
b488893a 1527 psinfo->pr_pid = task_pid_vnr(p);
b488893a
PE
1528 psinfo->pr_pgrp = task_pgrp_vnr(p);
1529 psinfo->pr_sid = task_session_vnr(p);
1da177e4
LT
1530
1531 i = p->state ? ffz(~p->state) + 1 : 0;
1532 psinfo->pr_state = i;
55148548 1533 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1da177e4
LT
1534 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1535 psinfo->pr_nice = task_nice(p);
1536 psinfo->pr_flag = p->flags;
c69e8d9c
DH
1537 rcu_read_lock();
1538 cred = __task_cred(p);
ebc887b2
EB
1539 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1540 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
c69e8d9c 1541 rcu_read_unlock();
1da177e4
LT
1542 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1543
1544 return 0;
1545}
1546
3aba481f
RM
1547static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1548{
1549 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1550 int i = 0;
1551 do
1552 i += 2;
1553 while (auxv[i - 2] != AT_NULL);
1554 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1555}
1556
49ae4d4b 1557static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
ae7795bc 1558 const kernel_siginfo_t *siginfo)
49ae4d4b
DV
1559{
1560 mm_segment_t old_fs = get_fs();
1561 set_fs(KERNEL_DS);
1562 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1563 set_fs(old_fs);
1564 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1565}
1566
2aa362c4
DV
1567#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1568/*
1569 * Format of NT_FILE note:
1570 *
1571 * long count -- how many files are mapped
1572 * long page_size -- units for file_ofs
1573 * array of [COUNT] elements of
1574 * long start
1575 * long end
1576 * long file_ofs
1577 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1578 */
72023656 1579static int fill_files_note(struct memelfnote *note)
2aa362c4 1580{
03c6d723 1581 struct mm_struct *mm = current->mm;
2aa362c4
DV
1582 struct vm_area_struct *vma;
1583 unsigned count, size, names_ofs, remaining, n;
1584 user_long_t *data;
1585 user_long_t *start_end_ofs;
1586 char *name_base, *name_curpos;
1587
1588 /* *Estimated* file count and total data size needed */
03c6d723 1589 count = mm->map_count;
60c9d92f
AD
1590 if (count > UINT_MAX / 64)
1591 return -EINVAL;
2aa362c4
DV
1592 size = count * 64;
1593
1594 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1595 alloc:
1596 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
72023656 1597 return -EINVAL;
2aa362c4 1598 size = round_up(size, PAGE_SIZE);
1fbede6e
AD
1599 /*
1600 * "size" can be 0 here legitimately.
1601 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1602 */
86a2bb5a
AD
1603 data = kvmalloc(size, GFP_KERNEL);
1604 if (ZERO_OR_NULL_PTR(data))
72023656 1605 return -ENOMEM;
2aa362c4
DV
1606
1607 start_end_ofs = data + 2;
1608 name_base = name_curpos = ((char *)data) + names_ofs;
1609 remaining = size - names_ofs;
1610 count = 0;
03c6d723 1611 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
2aa362c4
DV
1612 struct file *file;
1613 const char *filename;
1614
1615 file = vma->vm_file;
1616 if (!file)
1617 continue;
9bf39ab2 1618 filename = file_path(file, name_curpos, remaining);
2aa362c4
DV
1619 if (IS_ERR(filename)) {
1620 if (PTR_ERR(filename) == -ENAMETOOLONG) {
86a2bb5a 1621 kvfree(data);
2aa362c4
DV
1622 size = size * 5 / 4;
1623 goto alloc;
1624 }
1625 continue;
1626 }
1627
9bf39ab2 1628 /* file_path() fills at the end, move name down */
2aa362c4
DV
1629 /* n = strlen(filename) + 1: */
1630 n = (name_curpos + remaining) - filename;
1631 remaining = filename - name_curpos;
1632 memmove(name_curpos, filename, n);
1633 name_curpos += n;
1634
1635 *start_end_ofs++ = vma->vm_start;
1636 *start_end_ofs++ = vma->vm_end;
1637 *start_end_ofs++ = vma->vm_pgoff;
1638 count++;
1639 }
1640
1641 /* Now we know exact count of files, can store it */
1642 data[0] = count;
1643 data[1] = PAGE_SIZE;
1644 /*
03c6d723 1645 * Count usually is less than mm->map_count,
2aa362c4
DV
1646 * we need to move filenames down.
1647 */
03c6d723 1648 n = mm->map_count - count;
2aa362c4
DV
1649 if (n != 0) {
1650 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1651 memmove(name_base - shift_bytes, name_base,
1652 name_curpos - name_base);
1653 name_curpos -= shift_bytes;
1654 }
1655
1656 size = name_curpos - (char *)data;
1657 fill_note(note, "CORE", NT_FILE, size, data);
72023656 1658 return 0;
2aa362c4
DV
1659}
1660
4206d3aa
RM
1661#ifdef CORE_DUMP_USE_REGSET
1662#include <linux/regset.h>
1663
1664struct elf_thread_core_info {
1665 struct elf_thread_core_info *next;
1666 struct task_struct *task;
1667 struct elf_prstatus prstatus;
1668 struct memelfnote notes[0];
1669};
1670
1671struct elf_note_info {
1672 struct elf_thread_core_info *thread;
1673 struct memelfnote psinfo;
49ae4d4b 1674 struct memelfnote signote;
4206d3aa 1675 struct memelfnote auxv;
2aa362c4 1676 struct memelfnote files;
49ae4d4b 1677 user_siginfo_t csigdata;
4206d3aa
RM
1678 size_t size;
1679 int thread_notes;
1680};
1681
d31472b6
RM
1682/*
1683 * When a regset has a writeback hook, we call it on each thread before
1684 * dumping user memory. On register window machines, this makes sure the
1685 * user memory backing the register data is up to date before we read it.
1686 */
1687static void do_thread_regset_writeback(struct task_struct *task,
1688 const struct user_regset *regset)
1689{
1690 if (regset->writeback)
1691 regset->writeback(task, regset, 1);
1692}
1693
0953f65d 1694#ifndef PRSTATUS_SIZE
90954e7b 1695#define PRSTATUS_SIZE(S, R) sizeof(S)
0953f65d
L
1696#endif
1697
1698#ifndef SET_PR_FPVALID
90954e7b 1699#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
0953f65d
L
1700#endif
1701
4206d3aa
RM
1702static int fill_thread_core_info(struct elf_thread_core_info *t,
1703 const struct user_regset_view *view,
1704 long signr, size_t *total)
1705{
1706 unsigned int i;
27e64b4b 1707 unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
4206d3aa
RM
1708
1709 /*
1710 * NT_PRSTATUS is the one special case, because the regset data
1711 * goes into the pr_reg field inside the note contents, rather
1712 * than being the whole note contents. We fill the reset in here.
1713 * We assume that regset 0 is NT_PRSTATUS.
1714 */
1715 fill_prstatus(&t->prstatus, t->task, signr);
27e64b4b 1716 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
90954e7b 1717 &t->prstatus.pr_reg, NULL);
4206d3aa
RM
1718
1719 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
27e64b4b 1720 PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
4206d3aa
RM
1721 *total += notesize(&t->notes[0]);
1722
d31472b6
RM
1723 do_thread_regset_writeback(t->task, &view->regsets[0]);
1724
4206d3aa
RM
1725 /*
1726 * Each other regset might generate a note too. For each regset
1727 * that has no core_note_type or is inactive, we leave t->notes[i]
1728 * all zero and we'll know to skip writing it later.
1729 */
1730 for (i = 1; i < view->n; ++i) {
1731 const struct user_regset *regset = &view->regsets[i];
d31472b6 1732 do_thread_regset_writeback(t->task, regset);
c8e25258 1733 if (regset->core_note_type && regset->get &&
2f819db5 1734 (!regset->active || regset->active(t->task, regset) > 0)) {
4206d3aa 1735 int ret;
27e64b4b 1736 size_t size = regset_size(t->task, regset);
4206d3aa
RM
1737 void *data = kmalloc(size, GFP_KERNEL);
1738 if (unlikely(!data))
1739 return 0;
1740 ret = regset->get(t->task, regset,
1741 0, size, data, NULL);
1742 if (unlikely(ret))
1743 kfree(data);
1744 else {
1745 if (regset->core_note_type != NT_PRFPREG)
1746 fill_note(&t->notes[i], "LINUX",
1747 regset->core_note_type,
1748 size, data);
1749 else {
90954e7b 1750 SET_PR_FPVALID(&t->prstatus,
27e64b4b 1751 1, regset0_size);
4206d3aa
RM
1752 fill_note(&t->notes[i], "CORE",
1753 NT_PRFPREG, size, data);
1754 }
1755 *total += notesize(&t->notes[i]);
1756 }
1757 }
1758 }
1759
1760 return 1;
1761}
1762
1763static int fill_note_info(struct elfhdr *elf, int phdrs,
1764 struct elf_note_info *info,
ae7795bc 1765 const kernel_siginfo_t *siginfo, struct pt_regs *regs)
4206d3aa
RM
1766{
1767 struct task_struct *dump_task = current;
1768 const struct user_regset_view *view = task_user_regset_view(dump_task);
1769 struct elf_thread_core_info *t;
1770 struct elf_prpsinfo *psinfo;
83914441 1771 struct core_thread *ct;
4206d3aa
RM
1772 unsigned int i;
1773
1774 info->size = 0;
1775 info->thread = NULL;
1776
1777 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
6899e92d
AC
1778 if (psinfo == NULL) {
1779 info->psinfo.data = NULL; /* So we don't free this wrongly */
4206d3aa 1780 return 0;
6899e92d 1781 }
4206d3aa 1782
e2dbe125
AW
1783 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1784
4206d3aa
RM
1785 /*
1786 * Figure out how many notes we're going to need for each thread.
1787 */
1788 info->thread_notes = 0;
1789 for (i = 0; i < view->n; ++i)
1790 if (view->regsets[i].core_note_type != 0)
1791 ++info->thread_notes;
1792
1793 /*
1794 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1795 * since it is our one special case.
1796 */
1797 if (unlikely(info->thread_notes == 0) ||
1798 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1799 WARN_ON(1);
1800 return 0;
1801 }
1802
1803 /*
1804 * Initialize the ELF file header.
1805 */
1806 fill_elf_header(elf, phdrs,
d3330cf0 1807 view->e_machine, view->e_flags);
4206d3aa
RM
1808
1809 /*
1810 * Allocate a structure for each thread.
1811 */
83914441
ON
1812 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1813 t = kzalloc(offsetof(struct elf_thread_core_info,
1814 notes[info->thread_notes]),
1815 GFP_KERNEL);
1816 if (unlikely(!t))
1817 return 0;
1818
1819 t->task = ct->task;
1820 if (ct->task == dump_task || !info->thread) {
1821 t->next = info->thread;
1822 info->thread = t;
1823 } else {
1824 /*
1825 * Make sure to keep the original task at
1826 * the head of the list.
1827 */
1828 t->next = info->thread->next;
1829 info->thread->next = t;
4206d3aa 1830 }
83914441 1831 }
4206d3aa
RM
1832
1833 /*
1834 * Now fill in each thread's information.
1835 */
1836 for (t = info->thread; t != NULL; t = t->next)
5ab1c309 1837 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
4206d3aa
RM
1838 return 0;
1839
1840 /*
1841 * Fill in the two process-wide notes.
1842 */
1843 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1844 info->size += notesize(&info->psinfo);
1845
49ae4d4b
DV
1846 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1847 info->size += notesize(&info->signote);
1848
4206d3aa
RM
1849 fill_auxv_note(&info->auxv, current->mm);
1850 info->size += notesize(&info->auxv);
1851
72023656
DA
1852 if (fill_files_note(&info->files) == 0)
1853 info->size += notesize(&info->files);
2aa362c4 1854
4206d3aa
RM
1855 return 1;
1856}
1857
1858static size_t get_note_info_size(struct elf_note_info *info)
1859{
1860 return info->size;
1861}
1862
1863/*
1864 * Write all the notes for each thread. When writing the first thread, the
1865 * process-wide notes are interleaved after the first thread-specific note.
1866 */
1867static int write_note_info(struct elf_note_info *info,
ecc8c772 1868 struct coredump_params *cprm)
4206d3aa 1869{
b219e25f 1870 bool first = true;
4206d3aa
RM
1871 struct elf_thread_core_info *t = info->thread;
1872
1873 do {
1874 int i;
1875
ecc8c772 1876 if (!writenote(&t->notes[0], cprm))
4206d3aa
RM
1877 return 0;
1878
ecc8c772 1879 if (first && !writenote(&info->psinfo, cprm))
4206d3aa 1880 return 0;
ecc8c772 1881 if (first && !writenote(&info->signote, cprm))
49ae4d4b 1882 return 0;
ecc8c772 1883 if (first && !writenote(&info->auxv, cprm))
4206d3aa 1884 return 0;
72023656 1885 if (first && info->files.data &&
ecc8c772 1886 !writenote(&info->files, cprm))
2aa362c4 1887 return 0;
4206d3aa
RM
1888
1889 for (i = 1; i < info->thread_notes; ++i)
1890 if (t->notes[i].data &&
ecc8c772 1891 !writenote(&t->notes[i], cprm))
4206d3aa
RM
1892 return 0;
1893
b219e25f 1894 first = false;
4206d3aa
RM
1895 t = t->next;
1896 } while (t);
1897
1898 return 1;
1899}
1900
1901static void free_note_info(struct elf_note_info *info)
1902{
1903 struct elf_thread_core_info *threads = info->thread;
1904 while (threads) {
1905 unsigned int i;
1906 struct elf_thread_core_info *t = threads;
1907 threads = t->next;
1908 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1909 for (i = 1; i < info->thread_notes; ++i)
1910 kfree(t->notes[i].data);
1911 kfree(t);
1912 }
1913 kfree(info->psinfo.data);
86a2bb5a 1914 kvfree(info->files.data);
4206d3aa
RM
1915}
1916
1917#else
1918
1da177e4
LT
1919/* Here is the structure in which status of each thread is captured. */
1920struct elf_thread_status
1921{
1922 struct list_head list;
1923 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1924 elf_fpregset_t fpu; /* NT_PRFPREG */
1925 struct task_struct *thread;
1926#ifdef ELF_CORE_COPY_XFPREGS
5b20cd80 1927 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1da177e4
LT
1928#endif
1929 struct memelfnote notes[3];
1930 int num_notes;
1931};
1932
1933/*
1934 * In order to add the specific thread information for the elf file format,
f4e5cc2c
JJ
1935 * we need to keep a linked list of every threads pr_status and then create
1936 * a single section for them in the final core file.
1da177e4
LT
1937 */
1938static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1939{
1940 int sz = 0;
1941 struct task_struct *p = t->thread;
1942 t->num_notes = 0;
1943
1944 fill_prstatus(&t->prstatus, p, signr);
1945 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1946
f4e5cc2c
JJ
1947 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1948 &(t->prstatus));
1da177e4
LT
1949 t->num_notes++;
1950 sz += notesize(&t->notes[0]);
1951
f4e5cc2c
JJ
1952 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1953 &t->fpu))) {
1954 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1955 &(t->fpu));
1da177e4
LT
1956 t->num_notes++;
1957 sz += notesize(&t->notes[1]);
1958 }
1959
1960#ifdef ELF_CORE_COPY_XFPREGS
1961 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
5b20cd80
MN
1962 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1963 sizeof(t->xfpu), &t->xfpu);
1da177e4
LT
1964 t->num_notes++;
1965 sz += notesize(&t->notes[2]);
1966 }
1967#endif
1968 return sz;
1969}
1970
3aba481f
RM
1971struct elf_note_info {
1972 struct memelfnote *notes;
72023656 1973 struct memelfnote *notes_files;
3aba481f
RM
1974 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1975 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1976 struct list_head thread_list;
1977 elf_fpregset_t *fpu;
1978#ifdef ELF_CORE_COPY_XFPREGS
1979 elf_fpxregset_t *xfpu;
1980#endif
49ae4d4b 1981 user_siginfo_t csigdata;
3aba481f
RM
1982 int thread_status_size;
1983 int numnote;
1984};
1985
0cf062d0 1986static int elf_note_info_init(struct elf_note_info *info)
3aba481f 1987{
0cf062d0 1988 memset(info, 0, sizeof(*info));
3aba481f
RM
1989 INIT_LIST_HEAD(&info->thread_list);
1990
49ae4d4b 1991 /* Allocate space for ELF notes */
6da2ec56 1992 info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
3aba481f
RM
1993 if (!info->notes)
1994 return 0;
1995 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1996 if (!info->psinfo)
f34f9d18 1997 return 0;
3aba481f
RM
1998 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1999 if (!info->prstatus)
f34f9d18 2000 return 0;
3aba481f
RM
2001 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2002 if (!info->fpu)
f34f9d18 2003 return 0;
3aba481f
RM
2004#ifdef ELF_CORE_COPY_XFPREGS
2005 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2006 if (!info->xfpu)
f34f9d18 2007 return 0;
3aba481f 2008#endif
0cf062d0 2009 return 1;
0cf062d0
AW
2010}
2011
2012static int fill_note_info(struct elfhdr *elf, int phdrs,
2013 struct elf_note_info *info,
ae7795bc 2014 const kernel_siginfo_t *siginfo, struct pt_regs *regs)
0cf062d0 2015{
afabada9
AV
2016 struct core_thread *ct;
2017 struct elf_thread_status *ets;
0cf062d0
AW
2018
2019 if (!elf_note_info_init(info))
2020 return 0;
3aba481f 2021
afabada9
AV
2022 for (ct = current->mm->core_state->dumper.next;
2023 ct; ct = ct->next) {
2024 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2025 if (!ets)
2026 return 0;
83914441 2027
afabada9
AV
2028 ets->thread = ct->task;
2029 list_add(&ets->list, &info->thread_list);
2030 }
83914441 2031
93f044e2 2032 list_for_each_entry(ets, &info->thread_list, list) {
afabada9 2033 int sz;
3aba481f 2034
afabada9
AV
2035 sz = elf_dump_thread_status(siginfo->si_signo, ets);
2036 info->thread_status_size += sz;
3aba481f
RM
2037 }
2038 /* now collect the dump for the current */
2039 memset(info->prstatus, 0, sizeof(*info->prstatus));
5ab1c309 2040 fill_prstatus(info->prstatus, current, siginfo->si_signo);
3aba481f
RM
2041 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2042
2043 /* Set up header */
d3330cf0 2044 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
3aba481f
RM
2045
2046 /*
2047 * Set up the notes in similar form to SVR4 core dumps made
2048 * with info from their /proc.
2049 */
2050
2051 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2052 sizeof(*info->prstatus), info->prstatus);
2053 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2054 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2055 sizeof(*info->psinfo), info->psinfo);
2056
2aa362c4
DV
2057 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2058 fill_auxv_note(info->notes + 3, current->mm);
72023656 2059 info->numnote = 4;
3aba481f 2060
72023656
DA
2061 if (fill_files_note(info->notes + info->numnote) == 0) {
2062 info->notes_files = info->notes + info->numnote;
2063 info->numnote++;
2064 }
3aba481f
RM
2065
2066 /* Try to dump the FPU. */
2067 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2068 info->fpu);
2069 if (info->prstatus->pr_fpvalid)
2070 fill_note(info->notes + info->numnote++,
2071 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2072#ifdef ELF_CORE_COPY_XFPREGS
2073 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2074 fill_note(info->notes + info->numnote++,
2075 "LINUX", ELF_CORE_XFPREG_TYPE,
2076 sizeof(*info->xfpu), info->xfpu);
2077#endif
2078
2079 return 1;
3aba481f
RM
2080}
2081
2082static size_t get_note_info_size(struct elf_note_info *info)
2083{
2084 int sz = 0;
2085 int i;
2086
2087 for (i = 0; i < info->numnote; i++)
2088 sz += notesize(info->notes + i);
2089
2090 sz += info->thread_status_size;
2091
2092 return sz;
2093}
2094
2095static int write_note_info(struct elf_note_info *info,
ecc8c772 2096 struct coredump_params *cprm)
3aba481f 2097{
93f044e2 2098 struct elf_thread_status *ets;
3aba481f 2099 int i;
3aba481f
RM
2100
2101 for (i = 0; i < info->numnote; i++)
ecc8c772 2102 if (!writenote(info->notes + i, cprm))
3aba481f
RM
2103 return 0;
2104
2105 /* write out the thread status notes section */
93f044e2
AD
2106 list_for_each_entry(ets, &info->thread_list, list) {
2107 for (i = 0; i < ets->num_notes; i++)
2108 if (!writenote(&ets->notes[i], cprm))
3aba481f
RM
2109 return 0;
2110 }
2111
2112 return 1;
2113}
2114
2115static void free_note_info(struct elf_note_info *info)
2116{
2117 while (!list_empty(&info->thread_list)) {
2118 struct list_head *tmp = info->thread_list.next;
2119 list_del(tmp);
2120 kfree(list_entry(tmp, struct elf_thread_status, list));
2121 }
2122
72023656
DA
2123 /* Free data possibly allocated by fill_files_note(): */
2124 if (info->notes_files)
86a2bb5a 2125 kvfree(info->notes_files->data);
2aa362c4 2126
3aba481f
RM
2127 kfree(info->prstatus);
2128 kfree(info->psinfo);
2129 kfree(info->notes);
2130 kfree(info->fpu);
2131#ifdef ELF_CORE_COPY_XFPREGS
2132 kfree(info->xfpu);
2133#endif
2134}
2135
4206d3aa
RM
2136#endif
2137
f47aef55
RM
2138static struct vm_area_struct *first_vma(struct task_struct *tsk,
2139 struct vm_area_struct *gate_vma)
2140{
2141 struct vm_area_struct *ret = tsk->mm->mmap;
2142
2143 if (ret)
2144 return ret;
2145 return gate_vma;
2146}
2147/*
2148 * Helper function for iterating across a vma list. It ensures that the caller
2149 * will visit `gate_vma' prior to terminating the search.
2150 */
2151static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2152 struct vm_area_struct *gate_vma)
2153{
2154 struct vm_area_struct *ret;
2155
2156 ret = this_vma->vm_next;
2157 if (ret)
2158 return ret;
2159 if (this_vma == gate_vma)
2160 return NULL;
2161 return gate_vma;
2162}
2163
8d9032bb
DH
2164static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2165 elf_addr_t e_shoff, int segs)
2166{
2167 elf->e_shoff = e_shoff;
2168 elf->e_shentsize = sizeof(*shdr4extnum);
2169 elf->e_shnum = 1;
2170 elf->e_shstrndx = SHN_UNDEF;
2171
2172 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2173
2174 shdr4extnum->sh_type = SHT_NULL;
2175 shdr4extnum->sh_size = elf->e_shnum;
2176 shdr4extnum->sh_link = elf->e_shstrndx;
2177 shdr4extnum->sh_info = segs;
2178}
2179
1da177e4
LT
2180/*
2181 * Actual dumper
2182 *
2183 * This is a two-pass process; first we find the offsets of the bits,
2184 * and then they are actually written out. If we run out of core limit
2185 * we just truncate.
2186 */
f6151dfe 2187static int elf_core_dump(struct coredump_params *cprm)
1da177e4 2188{
1da177e4
LT
2189 int has_dumped = 0;
2190 mm_segment_t fs;
52f5592e
JL
2191 int segs, i;
2192 size_t vma_data_size = 0;
f47aef55 2193 struct vm_area_struct *vma, *gate_vma;
225a3f53 2194 struct elfhdr elf;
cdc3d562 2195 loff_t offset = 0, dataoff;
72023656 2196 struct elf_note_info info = { };
93eb211e 2197 struct elf_phdr *phdr4note = NULL;
8d9032bb
DH
2198 struct elf_shdr *shdr4extnum = NULL;
2199 Elf_Half e_phnum;
2200 elf_addr_t e_shoff;
52f5592e 2201 elf_addr_t *vma_filesz = NULL;
1da177e4
LT
2202
2203 /*
2204 * We no longer stop all VM operations.
2205 *
f4e5cc2c
JJ
2206 * This is because those proceses that could possibly change map_count
2207 * or the mmap / vma pages are now blocked in do_exit on current
2208 * finishing this core dump.
1da177e4
LT
2209 *
2210 * Only ptrace can touch these memory addresses, but it doesn't change
f4e5cc2c 2211 * the map_count or the pages allocated. So no possibility of crashing
1da177e4
LT
2212 * exists while dumping the mm->vm_next areas to the core file.
2213 */
2214
341c87bf
KH
2215 /*
2216 * The number of segs are recored into ELF header as 16bit value.
2217 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2218 */
1da177e4 2219 segs = current->mm->map_count;
1fcccbac 2220 segs += elf_core_extra_phdrs();
1da177e4 2221
31db58b3 2222 gate_vma = get_gate_vma(current->mm);
f47aef55
RM
2223 if (gate_vma != NULL)
2224 segs++;
2225
8d9032bb
DH
2226 /* for notes section */
2227 segs++;
2228
2229 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2230 * this, kernel supports extended numbering. Have a look at
2231 * include/linux/elf.h for further information. */
2232 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2233
1da177e4 2234 /*
3aba481f
RM
2235 * Collect all the non-memory information about the process for the
2236 * notes. This also sets up the file header.
1da177e4 2237 */
225a3f53 2238 if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
3aba481f 2239 goto cleanup;
1da177e4 2240
3aba481f 2241 has_dumped = 1;
079148b9 2242
1da177e4
LT
2243 fs = get_fs();
2244 set_fs(KERNEL_DS);
2245
225a3f53 2246 offset += sizeof(elf); /* Elf header */
8d9032bb 2247 offset += segs * sizeof(struct elf_phdr); /* Program headers */
1da177e4
LT
2248
2249 /* Write notes phdr entry */
2250 {
3aba481f 2251 size_t sz = get_note_info_size(&info);
1da177e4 2252
e5501492 2253 sz += elf_coredump_extra_notes_size();
bf1ab978 2254
93eb211e
DH
2255 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2256 if (!phdr4note)
088e7af7 2257 goto end_coredump;
93eb211e
DH
2258
2259 fill_elf_note_phdr(phdr4note, sz, offset);
2260 offset += sz;
1da177e4
LT
2261 }
2262
1da177e4
LT
2263 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2264
1fbede6e
AD
2265 /*
2266 * Zero vma process will get ZERO_SIZE_PTR here.
2267 * Let coredump continue for register state at least.
2268 */
86a2bb5a
AD
2269 vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2270 GFP_KERNEL);
1fbede6e 2271 if (!vma_filesz)
52f5592e
JL
2272 goto end_coredump;
2273
2274 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2275 vma = next_vma(vma, gate_vma)) {
2276 unsigned long dump_size;
2277
2278 dump_size = vma_dump_size(vma, cprm->mm_flags);
2279 vma_filesz[i++] = dump_size;
2280 vma_data_size += dump_size;
2281 }
2282
2283 offset += vma_data_size;
8d9032bb
DH
2284 offset += elf_core_extra_data_size();
2285 e_shoff = offset;
2286
2287 if (e_phnum == PN_XNUM) {
2288 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2289 if (!shdr4extnum)
2290 goto end_coredump;
225a3f53 2291 fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
8d9032bb
DH
2292 }
2293
2294 offset = dataoff;
2295
225a3f53 2296 if (!dump_emit(cprm, &elf, sizeof(elf)))
93eb211e
DH
2297 goto end_coredump;
2298
ecc8c772 2299 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
93eb211e
DH
2300 goto end_coredump;
2301
1da177e4 2302 /* Write program headers for segments dump */
52f5592e 2303 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
f47aef55 2304 vma = next_vma(vma, gate_vma)) {
1da177e4 2305 struct elf_phdr phdr;
1da177e4
LT
2306
2307 phdr.p_type = PT_LOAD;
2308 phdr.p_offset = offset;
2309 phdr.p_vaddr = vma->vm_start;
2310 phdr.p_paddr = 0;
52f5592e 2311 phdr.p_filesz = vma_filesz[i++];
82df3973 2312 phdr.p_memsz = vma->vm_end - vma->vm_start;
1da177e4
LT
2313 offset += phdr.p_filesz;
2314 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
f4e5cc2c
JJ
2315 if (vma->vm_flags & VM_WRITE)
2316 phdr.p_flags |= PF_W;
2317 if (vma->vm_flags & VM_EXEC)
2318 phdr.p_flags |= PF_X;
1da177e4
LT
2319 phdr.p_align = ELF_EXEC_PAGESIZE;
2320
ecc8c772 2321 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
088e7af7 2322 goto end_coredump;
1da177e4
LT
2323 }
2324
506f21c5 2325 if (!elf_core_write_extra_phdrs(cprm, offset))
1fcccbac 2326 goto end_coredump;
1da177e4
LT
2327
2328 /* write out the notes section */
ecc8c772 2329 if (!write_note_info(&info, cprm))
3aba481f 2330 goto end_coredump;
1da177e4 2331
cdc3d562 2332 if (elf_coredump_extra_notes_write(cprm))
e5501492 2333 goto end_coredump;
bf1ab978 2334
d025c9db 2335 /* Align to page */
1607f09c 2336 if (!dump_skip(cprm, dataoff - cprm->pos))
f3e8fccd 2337 goto end_coredump;
1da177e4 2338
52f5592e 2339 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
f47aef55 2340 vma = next_vma(vma, gate_vma)) {
1da177e4 2341 unsigned long addr;
82df3973 2342 unsigned long end;
1da177e4 2343
52f5592e 2344 end = vma->vm_start + vma_filesz[i++];
1da177e4 2345
82df3973 2346 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
f4e5cc2c 2347 struct page *page;
f3e8fccd
HD
2348 int stop;
2349
2350 page = get_dump_page(addr);
2351 if (page) {
2352 void *kaddr = kmap(page);
13046ece 2353 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
f3e8fccd 2354 kunmap(page);
09cbfeaf 2355 put_page(page);
f3e8fccd 2356 } else
9b56d543 2357 stop = !dump_skip(cprm, PAGE_SIZE);
f3e8fccd
HD
2358 if (stop)
2359 goto end_coredump;
1da177e4
LT
2360 }
2361 }
4d22c75d 2362 dump_truncate(cprm);
1da177e4 2363
aa3e7eaf 2364 if (!elf_core_write_extra_data(cprm))
1fcccbac 2365 goto end_coredump;
1da177e4 2366
8d9032bb 2367 if (e_phnum == PN_XNUM) {
13046ece 2368 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
8d9032bb
DH
2369 goto end_coredump;
2370 }
2371
1da177e4
LT
2372end_coredump:
2373 set_fs(fs);
2374
2375cleanup:
3aba481f 2376 free_note_info(&info);
8d9032bb 2377 kfree(shdr4extnum);
86a2bb5a 2378 kvfree(vma_filesz);
93eb211e 2379 kfree(phdr4note);
1da177e4 2380 return has_dumped;
1da177e4
LT
2381}
2382
698ba7b5 2383#endif /* CONFIG_ELF_CORE */
1da177e4
LT
2384
2385static int __init init_elf_binfmt(void)
2386{
8fc3dc5a
AV
2387 register_binfmt(&elf_format);
2388 return 0;
1da177e4
LT
2389}
2390
2391static void __exit exit_elf_binfmt(void)
2392{
2393 /* Remove the COFF and ELF loaders. */
2394 unregister_binfmt(&elf_format);
2395}
2396
2397core_initcall(init_elf_binfmt);
2398module_exit(exit_elf_binfmt);
2399MODULE_LICENSE("GPL");