2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <linux/uaccess.h>
40 #include <asm/param.h>
44 #define user_long_t long
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 int, int, unsigned long);
55 static int load_elf_library(struct file *);
57 #define load_elf_library NULL
61 * If we don't support core dumping, then supply a NULL so we
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
67 #define elf_core_dump NULL
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
73 #define ELF_MIN_ALIGN PAGE_SIZE
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS 0
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
84 static struct linux_binfmt elf_format = {
85 .module = THIS_MODULE,
86 .load_binary = load_elf_binary,
87 .load_shlib = load_elf_library,
88 .core_dump = elf_core_dump,
89 .min_coredump = ELF_EXEC_PAGESIZE,
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
94 static int set_brk(unsigned long start, unsigned long end, int prot)
96 start = ELF_PAGEALIGN(start);
97 end = ELF_PAGEALIGN(end);
100 * Map the last of the bss segment.
101 * If the header is requesting these pages to be
102 * executable, honour that (ppc32 needs this).
104 int error = vm_brk_flags(start, end - start,
105 prot & PROT_EXEC ? VM_EXEC : 0);
109 current->mm->start_brk = current->mm->brk = end;
113 /* We need to explicitly zero any fractional pages
114 after the data section (i.e. bss). This would
115 contain the junk from the file that should not
118 static int padzero(unsigned long elf_bss)
122 nbyte = ELF_PAGEOFFSET(elf_bss);
124 nbyte = ELF_MIN_ALIGN - nbyte;
125 if (clear_user((void __user *) elf_bss, nbyte))
131 /* Let's use some macros to make this stack manipulation a little clearer */
132 #ifdef CONFIG_STACK_GROWSUP
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
134 #define STACK_ROUND(sp, items) \
135 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ \
137 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
140 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
141 #define STACK_ROUND(sp, items) \
142 (((unsigned long) (sp - items)) &~ 15UL)
143 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
146 #ifndef ELF_BASE_PLATFORM
148 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
149 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
150 * will be copied to the user stack in the same manner as AT_PLATFORM.
152 #define ELF_BASE_PLATFORM NULL
156 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
157 unsigned long load_addr, unsigned long interp_load_addr)
159 unsigned long p = bprm->p;
160 int argc = bprm->argc;
161 int envc = bprm->envc;
162 elf_addr_t __user *argv;
163 elf_addr_t __user *envp;
164 elf_addr_t __user *sp;
165 elf_addr_t __user *u_platform;
166 elf_addr_t __user *u_base_platform;
167 elf_addr_t __user *u_rand_bytes;
168 const char *k_platform = ELF_PLATFORM;
169 const char *k_base_platform = ELF_BASE_PLATFORM;
170 unsigned char k_rand_bytes[16];
172 elf_addr_t *elf_info;
174 const struct cred *cred = current_cred();
175 struct vm_area_struct *vma;
178 * In some cases (e.g. Hyper-Threading), we want to avoid L1
179 * evictions by the processes running on the same package. One
180 * thing we can do is to shuffle the initial stack for them.
183 p = arch_align_stack(p);
186 * If this architecture has a platform capability string, copy it
187 * to userspace. In some cases (Sparc), this info is impossible
188 * for userspace to get any other way, in others (i386) it is
193 size_t len = strlen(k_platform) + 1;
195 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
196 if (__copy_to_user(u_platform, k_platform, len))
201 * If this architecture has a "base" platform capability
202 * string, copy it to userspace.
204 u_base_platform = NULL;
205 if (k_base_platform) {
206 size_t len = strlen(k_base_platform) + 1;
208 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
209 if (__copy_to_user(u_base_platform, k_base_platform, len))
214 * Generate 16 random bytes for userspace PRNG seeding.
216 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
217 u_rand_bytes = (elf_addr_t __user *)
218 STACK_ALLOC(p, sizeof(k_rand_bytes));
219 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
222 /* Create the ELF interpreter info */
223 elf_info = (elf_addr_t *)current->mm->saved_auxv;
224 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
225 #define NEW_AUX_ENT(id, val) \
227 elf_info[ei_index++] = id; \
228 elf_info[ei_index++] = val; \
233 * ARCH_DLINFO must come first so PPC can do its special alignment of
235 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
236 * ARCH_DLINFO changes
240 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
241 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
242 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
243 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
244 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
245 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
246 NEW_AUX_ENT(AT_BASE, interp_load_addr);
247 NEW_AUX_ENT(AT_FLAGS, 0);
248 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
249 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
250 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
251 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
252 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
253 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
254 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
256 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
258 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
260 NEW_AUX_ENT(AT_PLATFORM,
261 (elf_addr_t)(unsigned long)u_platform);
263 if (k_base_platform) {
264 NEW_AUX_ENT(AT_BASE_PLATFORM,
265 (elf_addr_t)(unsigned long)u_base_platform);
267 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
268 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
271 /* AT_NULL is zero; clear the rest too */
272 memset(&elf_info[ei_index], 0,
273 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
275 /* And advance past the AT_NULL entry. */
278 sp = STACK_ADD(p, ei_index);
280 items = (argc + 1) + (envc + 1) + 1;
281 bprm->p = STACK_ROUND(sp, items);
283 /* Point sp at the lowest address on the stack */
284 #ifdef CONFIG_STACK_GROWSUP
285 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
286 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
288 sp = (elf_addr_t __user *)bprm->p;
293 * Grow the stack manually; some architectures have a limit on how
294 * far ahead a user-space access may be in order to grow the stack.
296 vma = find_extend_vma(current->mm, bprm->p);
300 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
301 if (__put_user(argc, sp++))
304 envp = argv + argc + 1;
306 /* Populate argv and envp */
307 p = current->mm->arg_end = current->mm->arg_start;
310 if (__put_user((elf_addr_t)p, argv++))
312 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 if (!len || len > MAX_ARG_STRLEN)
317 if (__put_user(0, argv))
319 current->mm->arg_end = current->mm->env_start = p;
322 if (__put_user((elf_addr_t)p, envp++))
324 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
325 if (!len || len > MAX_ARG_STRLEN)
329 if (__put_user(0, envp))
331 current->mm->env_end = p;
333 /* Put the elf_info on the stack in the right place. */
334 sp = (elf_addr_t __user *)envp + 1;
335 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
342 static unsigned long elf_map(struct file *filep, unsigned long addr,
343 struct elf_phdr *eppnt, int prot, int type,
344 unsigned long total_size)
346 unsigned long map_addr;
347 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
348 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
349 addr = ELF_PAGESTART(addr);
350 size = ELF_PAGEALIGN(size);
352 /* mmap() will return -EINVAL if given a zero size, but a
353 * segment with zero filesize is perfectly valid */
358 * total_size is the size of the ELF (interpreter) image.
359 * The _first_ mmap needs to know the full size, otherwise
360 * randomization might put this image into an overlapping
361 * position with the ELF binary image. (since size < total_size)
362 * So we first map the 'big' image - and unmap the remainder at
363 * the end. (which unmap is needed for ELF images with holes.)
366 total_size = ELF_PAGEALIGN(total_size);
367 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
368 if (!BAD_ADDR(map_addr))
369 vm_munmap(map_addr+size, total_size-size);
371 map_addr = vm_mmap(filep, addr, size, prot, type, off);
376 #endif /* !elf_map */
378 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
380 int i, first_idx = -1, last_idx = -1;
382 for (i = 0; i < nr; i++) {
383 if (cmds[i].p_type == PT_LOAD) {
392 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
393 ELF_PAGESTART(cmds[first_idx].p_vaddr);
397 * load_elf_phdrs() - load ELF program headers
398 * @elf_ex: ELF header of the binary whose program headers should be loaded
399 * @elf_file: the opened ELF binary file
401 * Loads ELF program headers from the binary file elf_file, which has the ELF
402 * header pointed to by elf_ex, into a newly allocated array. The caller is
403 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
405 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
406 struct file *elf_file)
408 struct elf_phdr *elf_phdata = NULL;
409 int retval, size, err = -1;
412 * If the size of this structure has changed, then punt, since
413 * we will be doing the wrong thing.
415 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
418 /* Sanity check the number of program headers... */
419 if (elf_ex->e_phnum < 1 ||
420 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
423 /* ...and their total size. */
424 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
425 if (size > ELF_MIN_ALIGN)
428 elf_phdata = kmalloc(size, GFP_KERNEL);
432 /* Read in the program headers */
433 retval = kernel_read(elf_file, elf_ex->e_phoff,
434 (char *)elf_phdata, size);
435 if (retval != size) {
436 err = (retval < 0) ? retval : -EIO;
450 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
453 * struct arch_elf_state - arch-specific ELF loading state
455 * This structure is used to preserve architecture specific data during
456 * the loading of an ELF file, throughout the checking of architecture
457 * specific ELF headers & through to the point where the ELF load is
458 * known to be proceeding (ie. SET_PERSONALITY).
460 * This implementation is a dummy for architectures which require no
463 struct arch_elf_state {
466 #define INIT_ARCH_ELF_STATE {}
469 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
470 * @ehdr: The main ELF header
471 * @phdr: The program header to check
472 * @elf: The open ELF file
473 * @is_interp: True if the phdr is from the interpreter of the ELF being
474 * loaded, else false.
475 * @state: Architecture-specific state preserved throughout the process
476 * of loading the ELF.
478 * Inspects the program header phdr to validate its correctness and/or
479 * suitability for the system. Called once per ELF program header in the
480 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
483 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
484 * with that return code.
486 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
487 struct elf_phdr *phdr,
488 struct file *elf, bool is_interp,
489 struct arch_elf_state *state)
491 /* Dummy implementation, always proceed */
496 * arch_check_elf() - check an ELF executable
497 * @ehdr: The main ELF header
498 * @has_interp: True if the ELF has an interpreter, else false.
499 * @interp_ehdr: The interpreter's ELF header
500 * @state: Architecture-specific state preserved throughout the process
501 * of loading the ELF.
503 * Provides a final opportunity for architecture code to reject the loading
504 * of the ELF & cause an exec syscall to return an error. This is called after
505 * all program headers to be checked by arch_elf_pt_proc have been.
507 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
508 * with that return code.
510 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
511 struct elfhdr *interp_ehdr,
512 struct arch_elf_state *state)
514 /* Dummy implementation, always proceed */
518 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
520 /* This is much more generalized than the library routine read function,
521 so we keep this separate. Technically the library read function
522 is only provided so that we can read a.out libraries that have
525 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
526 struct file *interpreter, unsigned long *interp_map_addr,
527 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
529 struct elf_phdr *eppnt;
530 unsigned long load_addr = 0;
531 int load_addr_set = 0;
532 unsigned long last_bss = 0, elf_bss = 0;
534 unsigned long error = ~0UL;
535 unsigned long total_size;
538 /* First of all, some simple consistency checks */
539 if (interp_elf_ex->e_type != ET_EXEC &&
540 interp_elf_ex->e_type != ET_DYN)
542 if (!elf_check_arch(interp_elf_ex))
544 if (!interpreter->f_op->mmap)
547 total_size = total_mapping_size(interp_elf_phdata,
548 interp_elf_ex->e_phnum);
554 eppnt = interp_elf_phdata;
555 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
556 if (eppnt->p_type == PT_LOAD) {
557 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
559 unsigned long vaddr = 0;
560 unsigned long k, map_addr;
562 if (eppnt->p_flags & PF_R)
563 elf_prot = PROT_READ;
564 if (eppnt->p_flags & PF_W)
565 elf_prot |= PROT_WRITE;
566 if (eppnt->p_flags & PF_X)
567 elf_prot |= PROT_EXEC;
568 vaddr = eppnt->p_vaddr;
569 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
570 elf_type |= MAP_FIXED;
571 else if (no_base && interp_elf_ex->e_type == ET_DYN)
574 map_addr = elf_map(interpreter, load_addr + vaddr,
575 eppnt, elf_prot, elf_type, total_size);
577 if (!*interp_map_addr)
578 *interp_map_addr = map_addr;
580 if (BAD_ADDR(map_addr))
583 if (!load_addr_set &&
584 interp_elf_ex->e_type == ET_DYN) {
585 load_addr = map_addr - ELF_PAGESTART(vaddr);
590 * Check to see if the section's size will overflow the
591 * allowed task size. Note that p_filesz must always be
592 * <= p_memsize so it's only necessary to check p_memsz.
594 k = load_addr + eppnt->p_vaddr;
596 eppnt->p_filesz > eppnt->p_memsz ||
597 eppnt->p_memsz > TASK_SIZE ||
598 TASK_SIZE - eppnt->p_memsz < k) {
604 * Find the end of the file mapping for this phdr, and
605 * keep track of the largest address we see for this.
607 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
612 * Do the same thing for the memory mapping - between
613 * elf_bss and last_bss is the bss section.
615 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
624 * Now fill out the bss section: first pad the last page from
625 * the file up to the page boundary, and zero it from elf_bss
626 * up to the end of the page.
628 if (padzero(elf_bss)) {
633 * Next, align both the file and mem bss up to the page size,
634 * since this is where elf_bss was just zeroed up to, and where
635 * last_bss will end after the vm_brk_flags() below.
637 elf_bss = ELF_PAGEALIGN(elf_bss);
638 last_bss = ELF_PAGEALIGN(last_bss);
639 /* Finally, if there is still more bss to allocate, do it. */
640 if (last_bss > elf_bss) {
641 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
642 bss_prot & PROT_EXEC ? VM_EXEC : 0);
653 * These are the functions used to load ELF style executables and shared
654 * libraries. There is no binary dependent code anywhere else.
657 #ifndef STACK_RND_MASK
658 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
661 static unsigned long randomize_stack_top(unsigned long stack_top)
663 unsigned long random_variable = 0;
665 if ((current->flags & PF_RANDOMIZE) &&
666 !(current->personality & ADDR_NO_RANDOMIZE)) {
667 random_variable = get_random_long();
668 random_variable &= STACK_RND_MASK;
669 random_variable <<= PAGE_SHIFT;
671 #ifdef CONFIG_STACK_GROWSUP
672 return PAGE_ALIGN(stack_top) + random_variable;
674 return PAGE_ALIGN(stack_top) - random_variable;
678 static int load_elf_binary(struct linux_binprm *bprm)
680 struct file *interpreter = NULL; /* to shut gcc up */
681 unsigned long load_addr = 0, load_bias = 0;
682 int load_addr_set = 0;
683 char * elf_interpreter = NULL;
685 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
686 unsigned long elf_bss, elf_brk;
689 unsigned long elf_entry;
690 unsigned long interp_load_addr = 0;
691 unsigned long start_code, end_code, start_data, end_data;
692 unsigned long reloc_func_desc __maybe_unused = 0;
693 int executable_stack = EXSTACK_DEFAULT;
694 struct pt_regs *regs = current_pt_regs();
696 struct elfhdr elf_ex;
697 struct elfhdr interp_elf_ex;
699 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
701 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
707 /* Get the exec-header */
708 loc->elf_ex = *((struct elfhdr *)bprm->buf);
711 /* First of all, some simple consistency checks */
712 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
717 if (!elf_check_arch(&loc->elf_ex))
719 if (!bprm->file->f_op->mmap)
722 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
726 elf_ppnt = elf_phdata;
735 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
736 if (elf_ppnt->p_type == PT_INTERP) {
737 /* This is the program interpreter used for
738 * shared libraries - for now assume that this
739 * is an a.out format binary
742 if (elf_ppnt->p_filesz > PATH_MAX ||
743 elf_ppnt->p_filesz < 2)
747 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
749 if (!elf_interpreter)
752 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
755 if (retval != elf_ppnt->p_filesz) {
758 goto out_free_interp;
760 /* make sure path is NULL terminated */
762 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
763 goto out_free_interp;
765 interpreter = open_exec(elf_interpreter);
766 retval = PTR_ERR(interpreter);
767 if (IS_ERR(interpreter))
768 goto out_free_interp;
771 * If the binary is not readable then enforce
772 * mm->dumpable = 0 regardless of the interpreter's
775 would_dump(bprm, interpreter);
777 /* Get the exec headers */
778 retval = kernel_read(interpreter, 0,
779 (void *)&loc->interp_elf_ex,
780 sizeof(loc->interp_elf_ex));
781 if (retval != sizeof(loc->interp_elf_ex)) {
784 goto out_free_dentry;
792 elf_ppnt = elf_phdata;
793 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
794 switch (elf_ppnt->p_type) {
796 if (elf_ppnt->p_flags & PF_X)
797 executable_stack = EXSTACK_ENABLE_X;
799 executable_stack = EXSTACK_DISABLE_X;
802 case PT_LOPROC ... PT_HIPROC:
803 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
807 goto out_free_dentry;
811 /* Some simple consistency checks for the interpreter */
812 if (elf_interpreter) {
814 /* Not an ELF interpreter */
815 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
816 goto out_free_dentry;
817 /* Verify the interpreter has a valid arch */
818 if (!elf_check_arch(&loc->interp_elf_ex))
819 goto out_free_dentry;
821 /* Load the interpreter program headers */
822 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
824 if (!interp_elf_phdata)
825 goto out_free_dentry;
827 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
828 elf_ppnt = interp_elf_phdata;
829 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
830 switch (elf_ppnt->p_type) {
831 case PT_LOPROC ... PT_HIPROC:
832 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
833 elf_ppnt, interpreter,
836 goto out_free_dentry;
842 * Allow arch code to reject the ELF at this point, whilst it's
843 * still possible to return an error to the code that invoked
846 retval = arch_check_elf(&loc->elf_ex,
847 !!interpreter, &loc->interp_elf_ex,
850 goto out_free_dentry;
852 /* Flush all traces of the currently running executable */
853 retval = flush_old_exec(bprm);
855 goto out_free_dentry;
857 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
858 may depend on the personality. */
859 SET_PERSONALITY2(loc->elf_ex, &arch_state);
860 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
861 current->personality |= READ_IMPLIES_EXEC;
863 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
864 current->flags |= PF_RANDOMIZE;
866 setup_new_exec(bprm);
867 install_exec_creds(bprm);
869 /* Do this so that we can load the interpreter, if need be. We will
870 change some of these later */
871 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
874 goto out_free_dentry;
876 current->mm->start_stack = bprm->p;
878 /* Now we do a little grungy work by mmapping the ELF image into
879 the correct location in memory. */
880 for(i = 0, elf_ppnt = elf_phdata;
881 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
882 int elf_prot = 0, elf_flags;
883 unsigned long k, vaddr;
884 unsigned long total_size = 0;
886 if (elf_ppnt->p_type != PT_LOAD)
889 if (unlikely (elf_brk > elf_bss)) {
892 /* There was a PT_LOAD segment with p_memsz > p_filesz
893 before this one. Map anonymous pages, if needed,
894 and clear the area. */
895 retval = set_brk(elf_bss + load_bias,
899 goto out_free_dentry;
900 nbyte = ELF_PAGEOFFSET(elf_bss);
902 nbyte = ELF_MIN_ALIGN - nbyte;
903 if (nbyte > elf_brk - elf_bss)
904 nbyte = elf_brk - elf_bss;
905 if (clear_user((void __user *)elf_bss +
908 * This bss-zeroing can fail if the ELF
909 * file specifies odd protections. So
910 * we don't check the return value
916 if (elf_ppnt->p_flags & PF_R)
917 elf_prot |= PROT_READ;
918 if (elf_ppnt->p_flags & PF_W)
919 elf_prot |= PROT_WRITE;
920 if (elf_ppnt->p_flags & PF_X)
921 elf_prot |= PROT_EXEC;
923 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
925 vaddr = elf_ppnt->p_vaddr;
926 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
927 elf_flags |= MAP_FIXED;
928 } else if (loc->elf_ex.e_type == ET_DYN) {
929 /* Try and get dynamic programs out of the way of the
930 * default mmap base, as well as whatever program they
931 * might try to exec. This is because the brk will
932 * follow the loader, and is not movable. */
933 load_bias = ELF_ET_DYN_BASE - vaddr;
934 if (current->flags & PF_RANDOMIZE)
935 load_bias += arch_mmap_rnd();
936 load_bias = ELF_PAGESTART(load_bias);
937 total_size = total_mapping_size(elf_phdata,
938 loc->elf_ex.e_phnum);
941 goto out_free_dentry;
945 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
946 elf_prot, elf_flags, total_size);
947 if (BAD_ADDR(error)) {
948 retval = IS_ERR((void *)error) ?
949 PTR_ERR((void*)error) : -EINVAL;
950 goto out_free_dentry;
953 if (!load_addr_set) {
955 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
956 if (loc->elf_ex.e_type == ET_DYN) {
958 ELF_PAGESTART(load_bias + vaddr);
959 load_addr += load_bias;
960 reloc_func_desc = load_bias;
963 k = elf_ppnt->p_vaddr;
970 * Check to see if the section's size will overflow the
971 * allowed task size. Note that p_filesz must always be
972 * <= p_memsz so it is only necessary to check p_memsz.
974 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
975 elf_ppnt->p_memsz > TASK_SIZE ||
976 TASK_SIZE - elf_ppnt->p_memsz < k) {
977 /* set_brk can never work. Avoid overflows. */
979 goto out_free_dentry;
982 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
986 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
990 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
997 loc->elf_ex.e_entry += load_bias;
998 elf_bss += load_bias;
999 elf_brk += load_bias;
1000 start_code += load_bias;
1001 end_code += load_bias;
1002 start_data += load_bias;
1003 end_data += load_bias;
1005 /* Calling set_brk effectively mmaps the pages that we need
1006 * for the bss and break sections. We must do this before
1007 * mapping in the interpreter, to make sure it doesn't wind
1008 * up getting placed where the bss needs to go.
1010 retval = set_brk(elf_bss, elf_brk, bss_prot);
1012 goto out_free_dentry;
1013 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1014 retval = -EFAULT; /* Nobody gets to see this, but.. */
1015 goto out_free_dentry;
1018 if (elf_interpreter) {
1019 unsigned long interp_map_addr = 0;
1021 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1024 load_bias, interp_elf_phdata);
1025 if (!IS_ERR((void *)elf_entry)) {
1027 * load_elf_interp() returns relocation
1030 interp_load_addr = elf_entry;
1031 elf_entry += loc->interp_elf_ex.e_entry;
1033 if (BAD_ADDR(elf_entry)) {
1034 retval = IS_ERR((void *)elf_entry) ?
1035 (int)elf_entry : -EINVAL;
1036 goto out_free_dentry;
1038 reloc_func_desc = interp_load_addr;
1040 allow_write_access(interpreter);
1042 kfree(elf_interpreter);
1044 elf_entry = loc->elf_ex.e_entry;
1045 if (BAD_ADDR(elf_entry)) {
1047 goto out_free_dentry;
1051 kfree(interp_elf_phdata);
1054 set_binfmt(&elf_format);
1056 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1057 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1060 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1062 retval = create_elf_tables(bprm, &loc->elf_ex,
1063 load_addr, interp_load_addr);
1066 /* N.B. passed_fileno might not be initialized? */
1067 current->mm->end_code = end_code;
1068 current->mm->start_code = start_code;
1069 current->mm->start_data = start_data;
1070 current->mm->end_data = end_data;
1071 current->mm->start_stack = bprm->p;
1073 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1074 current->mm->brk = current->mm->start_brk =
1075 arch_randomize_brk(current->mm);
1076 #ifdef compat_brk_randomized
1077 current->brk_randomized = 1;
1081 if (current->personality & MMAP_PAGE_ZERO) {
1082 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1083 and some applications "depend" upon this behavior.
1084 Since we do not have the power to recompile these, we
1085 emulate the SVr4 behavior. Sigh. */
1086 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1087 MAP_FIXED | MAP_PRIVATE, 0);
1090 #ifdef ELF_PLAT_INIT
1092 * The ABI may specify that certain registers be set up in special
1093 * ways (on i386 %edx is the address of a DT_FINI function, for
1094 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1095 * that the e_entry field is the address of the function descriptor
1096 * for the startup routine, rather than the address of the startup
1097 * routine itself. This macro performs whatever initialization to
1098 * the regs structure is required as well as any relocations to the
1099 * function descriptor entries when executing dynamically links apps.
1101 ELF_PLAT_INIT(regs, reloc_func_desc);
1104 start_thread(regs, elf_entry, bprm->p);
1113 kfree(interp_elf_phdata);
1114 allow_write_access(interpreter);
1118 kfree(elf_interpreter);
1124 #ifdef CONFIG_USELIB
1125 /* This is really simpleminded and specialized - we are loading an
1126 a.out library that is given an ELF header. */
1127 static int load_elf_library(struct file *file)
1129 struct elf_phdr *elf_phdata;
1130 struct elf_phdr *eppnt;
1131 unsigned long elf_bss, bss, len;
1132 int retval, error, i, j;
1133 struct elfhdr elf_ex;
1136 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1137 if (retval != sizeof(elf_ex))
1140 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1143 /* First of all, some simple consistency checks */
1144 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1145 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1148 /* Now read in all of the header information */
1150 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1151 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1154 elf_phdata = kmalloc(j, GFP_KERNEL);
1160 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1164 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1165 if ((eppnt + i)->p_type == PT_LOAD)
1170 while (eppnt->p_type != PT_LOAD)
1173 /* Now use mmap to map the library into memory. */
1174 error = vm_mmap(file,
1175 ELF_PAGESTART(eppnt->p_vaddr),
1177 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1178 PROT_READ | PROT_WRITE | PROT_EXEC,
1179 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1181 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1182 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1185 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1186 if (padzero(elf_bss)) {
1191 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1193 bss = eppnt->p_memsz + eppnt->p_vaddr;
1195 error = vm_brk(len, bss - len);
1206 #endif /* #ifdef CONFIG_USELIB */
1208 #ifdef CONFIG_ELF_CORE
1212 * Modelled on fs/exec.c:aout_core_dump()
1213 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1217 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1218 * that are useful for post-mortem analysis are included in every core dump.
1219 * In that way we ensure that the core dump is fully interpretable later
1220 * without matching up the same kernel and hardware config to see what PC values
1221 * meant. These special mappings include - vDSO, vsyscall, and other
1222 * architecture specific mappings
1224 static bool always_dump_vma(struct vm_area_struct *vma)
1226 /* Any vsyscall mappings? */
1227 if (vma == get_gate_vma(vma->vm_mm))
1231 * Assume that all vmas with a .name op should always be dumped.
1232 * If this changes, a new vm_ops field can easily be added.
1234 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1238 * arch_vma_name() returns non-NULL for special architecture mappings,
1239 * such as vDSO sections.
1241 if (arch_vma_name(vma))
1248 * Decide what to dump of a segment, part, all or none.
1250 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1251 unsigned long mm_flags)
1253 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1255 /* always dump the vdso and vsyscall sections */
1256 if (always_dump_vma(vma))
1259 if (vma->vm_flags & VM_DONTDUMP)
1262 /* support for DAX */
1263 if (vma_is_dax(vma)) {
1264 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1266 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1271 /* Hugetlb memory check */
1272 if (vma->vm_flags & VM_HUGETLB) {
1273 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1275 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1280 /* Do not dump I/O mapped devices or special mappings */
1281 if (vma->vm_flags & VM_IO)
1284 /* By default, dump shared memory if mapped from an anonymous file. */
1285 if (vma->vm_flags & VM_SHARED) {
1286 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1287 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1292 /* Dump segments that have been written to. */
1293 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1295 if (vma->vm_file == NULL)
1298 if (FILTER(MAPPED_PRIVATE))
1302 * If this looks like the beginning of a DSO or executable mapping,
1303 * check for an ELF header. If we find one, dump the first page to
1304 * aid in determining what was mapped here.
1306 if (FILTER(ELF_HEADERS) &&
1307 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1308 u32 __user *header = (u32 __user *) vma->vm_start;
1310 mm_segment_t fs = get_fs();
1312 * Doing it this way gets the constant folded by GCC.
1316 char elfmag[SELFMAG];
1318 BUILD_BUG_ON(SELFMAG != sizeof word);
1319 magic.elfmag[EI_MAG0] = ELFMAG0;
1320 magic.elfmag[EI_MAG1] = ELFMAG1;
1321 magic.elfmag[EI_MAG2] = ELFMAG2;
1322 magic.elfmag[EI_MAG3] = ELFMAG3;
1324 * Switch to the user "segment" for get_user(),
1325 * then put back what elf_core_dump() had in place.
1328 if (unlikely(get_user(word, header)))
1331 if (word == magic.cmp)
1340 return vma->vm_end - vma->vm_start;
1343 /* An ELF note in memory */
1348 unsigned int datasz;
1352 static int notesize(struct memelfnote *en)
1356 sz = sizeof(struct elf_note);
1357 sz += roundup(strlen(en->name) + 1, 4);
1358 sz += roundup(en->datasz, 4);
1363 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1366 en.n_namesz = strlen(men->name) + 1;
1367 en.n_descsz = men->datasz;
1368 en.n_type = men->type;
1370 return dump_emit(cprm, &en, sizeof(en)) &&
1371 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1372 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1375 static void fill_elf_header(struct elfhdr *elf, int segs,
1376 u16 machine, u32 flags)
1378 memset(elf, 0, sizeof(*elf));
1380 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1381 elf->e_ident[EI_CLASS] = ELF_CLASS;
1382 elf->e_ident[EI_DATA] = ELF_DATA;
1383 elf->e_ident[EI_VERSION] = EV_CURRENT;
1384 elf->e_ident[EI_OSABI] = ELF_OSABI;
1386 elf->e_type = ET_CORE;
1387 elf->e_machine = machine;
1388 elf->e_version = EV_CURRENT;
1389 elf->e_phoff = sizeof(struct elfhdr);
1390 elf->e_flags = flags;
1391 elf->e_ehsize = sizeof(struct elfhdr);
1392 elf->e_phentsize = sizeof(struct elf_phdr);
1393 elf->e_phnum = segs;
1398 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1400 phdr->p_type = PT_NOTE;
1401 phdr->p_offset = offset;
1404 phdr->p_filesz = sz;
1411 static void fill_note(struct memelfnote *note, const char *name, int type,
1412 unsigned int sz, void *data)
1422 * fill up all the fields in prstatus from the given task struct, except
1423 * registers which need to be filled up separately.
1425 static void fill_prstatus(struct elf_prstatus *prstatus,
1426 struct task_struct *p, long signr)
1428 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1429 prstatus->pr_sigpend = p->pending.signal.sig[0];
1430 prstatus->pr_sighold = p->blocked.sig[0];
1432 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1434 prstatus->pr_pid = task_pid_vnr(p);
1435 prstatus->pr_pgrp = task_pgrp_vnr(p);
1436 prstatus->pr_sid = task_session_vnr(p);
1437 if (thread_group_leader(p)) {
1438 struct task_cputime cputime;
1441 * This is the record for the group leader. It shows the
1442 * group-wide total, not its individual thread total.
1444 thread_group_cputime(p, &cputime);
1445 prstatus->pr_utime = ns_to_timeval(cputime.utime);
1446 prstatus->pr_stime = ns_to_timeval(cputime.stime);
1450 task_cputime(p, &utime, &stime);
1451 prstatus->pr_utime = ns_to_timeval(utime);
1452 prstatus->pr_stime = ns_to_timeval(stime);
1455 prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1456 prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1459 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1460 struct mm_struct *mm)
1462 const struct cred *cred;
1463 unsigned int i, len;
1465 /* first copy the parameters from user space */
1466 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1468 len = mm->arg_end - mm->arg_start;
1469 if (len >= ELF_PRARGSZ)
1470 len = ELF_PRARGSZ-1;
1471 if (copy_from_user(&psinfo->pr_psargs,
1472 (const char __user *)mm->arg_start, len))
1474 for(i = 0; i < len; i++)
1475 if (psinfo->pr_psargs[i] == 0)
1476 psinfo->pr_psargs[i] = ' ';
1477 psinfo->pr_psargs[len] = 0;
1480 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1482 psinfo->pr_pid = task_pid_vnr(p);
1483 psinfo->pr_pgrp = task_pgrp_vnr(p);
1484 psinfo->pr_sid = task_session_vnr(p);
1486 i = p->state ? ffz(~p->state) + 1 : 0;
1487 psinfo->pr_state = i;
1488 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1489 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1490 psinfo->pr_nice = task_nice(p);
1491 psinfo->pr_flag = p->flags;
1493 cred = __task_cred(p);
1494 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1495 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1497 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1502 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1504 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1508 while (auxv[i - 2] != AT_NULL);
1509 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1512 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1513 const siginfo_t *siginfo)
1515 mm_segment_t old_fs = get_fs();
1517 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1519 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1522 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1524 * Format of NT_FILE note:
1526 * long count -- how many files are mapped
1527 * long page_size -- units for file_ofs
1528 * array of [COUNT] elements of
1532 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1534 static int fill_files_note(struct memelfnote *note)
1536 struct vm_area_struct *vma;
1537 unsigned count, size, names_ofs, remaining, n;
1539 user_long_t *start_end_ofs;
1540 char *name_base, *name_curpos;
1542 /* *Estimated* file count and total data size needed */
1543 count = current->mm->map_count;
1546 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1548 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1550 size = round_up(size, PAGE_SIZE);
1551 data = vmalloc(size);
1555 start_end_ofs = data + 2;
1556 name_base = name_curpos = ((char *)data) + names_ofs;
1557 remaining = size - names_ofs;
1559 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1561 const char *filename;
1563 file = vma->vm_file;
1566 filename = file_path(file, name_curpos, remaining);
1567 if (IS_ERR(filename)) {
1568 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1570 size = size * 5 / 4;
1576 /* file_path() fills at the end, move name down */
1577 /* n = strlen(filename) + 1: */
1578 n = (name_curpos + remaining) - filename;
1579 remaining = filename - name_curpos;
1580 memmove(name_curpos, filename, n);
1583 *start_end_ofs++ = vma->vm_start;
1584 *start_end_ofs++ = vma->vm_end;
1585 *start_end_ofs++ = vma->vm_pgoff;
1589 /* Now we know exact count of files, can store it */
1591 data[1] = PAGE_SIZE;
1593 * Count usually is less than current->mm->map_count,
1594 * we need to move filenames down.
1596 n = current->mm->map_count - count;
1598 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1599 memmove(name_base - shift_bytes, name_base,
1600 name_curpos - name_base);
1601 name_curpos -= shift_bytes;
1604 size = name_curpos - (char *)data;
1605 fill_note(note, "CORE", NT_FILE, size, data);
1609 #ifdef CORE_DUMP_USE_REGSET
1610 #include <linux/regset.h>
1612 struct elf_thread_core_info {
1613 struct elf_thread_core_info *next;
1614 struct task_struct *task;
1615 struct elf_prstatus prstatus;
1616 struct memelfnote notes[0];
1619 struct elf_note_info {
1620 struct elf_thread_core_info *thread;
1621 struct memelfnote psinfo;
1622 struct memelfnote signote;
1623 struct memelfnote auxv;
1624 struct memelfnote files;
1625 user_siginfo_t csigdata;
1631 * When a regset has a writeback hook, we call it on each thread before
1632 * dumping user memory. On register window machines, this makes sure the
1633 * user memory backing the register data is up to date before we read it.
1635 static void do_thread_regset_writeback(struct task_struct *task,
1636 const struct user_regset *regset)
1638 if (regset->writeback)
1639 regset->writeback(task, regset, 1);
1642 #ifndef PRSTATUS_SIZE
1643 #define PRSTATUS_SIZE(S, R) sizeof(S)
1646 #ifndef SET_PR_FPVALID
1647 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1650 static int fill_thread_core_info(struct elf_thread_core_info *t,
1651 const struct user_regset_view *view,
1652 long signr, size_t *total)
1655 unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1658 * NT_PRSTATUS is the one special case, because the regset data
1659 * goes into the pr_reg field inside the note contents, rather
1660 * than being the whole note contents. We fill the reset in here.
1661 * We assume that regset 0 is NT_PRSTATUS.
1663 fill_prstatus(&t->prstatus, t->task, signr);
1664 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1665 &t->prstatus.pr_reg, NULL);
1667 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1668 PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1669 *total += notesize(&t->notes[0]);
1671 do_thread_regset_writeback(t->task, &view->regsets[0]);
1674 * Each other regset might generate a note too. For each regset
1675 * that has no core_note_type or is inactive, we leave t->notes[i]
1676 * all zero and we'll know to skip writing it later.
1678 for (i = 1; i < view->n; ++i) {
1679 const struct user_regset *regset = &view->regsets[i];
1680 do_thread_regset_writeback(t->task, regset);
1681 if (regset->core_note_type && regset->get &&
1682 (!regset->active || regset->active(t->task, regset))) {
1684 size_t size = regset->n * regset->size;
1685 void *data = kmalloc(size, GFP_KERNEL);
1686 if (unlikely(!data))
1688 ret = regset->get(t->task, regset,
1689 0, size, data, NULL);
1693 if (regset->core_note_type != NT_PRFPREG)
1694 fill_note(&t->notes[i], "LINUX",
1695 regset->core_note_type,
1698 SET_PR_FPVALID(&t->prstatus,
1700 fill_note(&t->notes[i], "CORE",
1701 NT_PRFPREG, size, data);
1703 *total += notesize(&t->notes[i]);
1711 static int fill_note_info(struct elfhdr *elf, int phdrs,
1712 struct elf_note_info *info,
1713 const siginfo_t *siginfo, struct pt_regs *regs)
1715 struct task_struct *dump_task = current;
1716 const struct user_regset_view *view = task_user_regset_view(dump_task);
1717 struct elf_thread_core_info *t;
1718 struct elf_prpsinfo *psinfo;
1719 struct core_thread *ct;
1723 info->thread = NULL;
1725 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1726 if (psinfo == NULL) {
1727 info->psinfo.data = NULL; /* So we don't free this wrongly */
1731 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1734 * Figure out how many notes we're going to need for each thread.
1736 info->thread_notes = 0;
1737 for (i = 0; i < view->n; ++i)
1738 if (view->regsets[i].core_note_type != 0)
1739 ++info->thread_notes;
1742 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1743 * since it is our one special case.
1745 if (unlikely(info->thread_notes == 0) ||
1746 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1752 * Initialize the ELF file header.
1754 fill_elf_header(elf, phdrs,
1755 view->e_machine, view->e_flags);
1758 * Allocate a structure for each thread.
1760 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1761 t = kzalloc(offsetof(struct elf_thread_core_info,
1762 notes[info->thread_notes]),
1768 if (ct->task == dump_task || !info->thread) {
1769 t->next = info->thread;
1773 * Make sure to keep the original task at
1774 * the head of the list.
1776 t->next = info->thread->next;
1777 info->thread->next = t;
1782 * Now fill in each thread's information.
1784 for (t = info->thread; t != NULL; t = t->next)
1785 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1789 * Fill in the two process-wide notes.
1791 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1792 info->size += notesize(&info->psinfo);
1794 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1795 info->size += notesize(&info->signote);
1797 fill_auxv_note(&info->auxv, current->mm);
1798 info->size += notesize(&info->auxv);
1800 if (fill_files_note(&info->files) == 0)
1801 info->size += notesize(&info->files);
1806 static size_t get_note_info_size(struct elf_note_info *info)
1812 * Write all the notes for each thread. When writing the first thread, the
1813 * process-wide notes are interleaved after the first thread-specific note.
1815 static int write_note_info(struct elf_note_info *info,
1816 struct coredump_params *cprm)
1819 struct elf_thread_core_info *t = info->thread;
1824 if (!writenote(&t->notes[0], cprm))
1827 if (first && !writenote(&info->psinfo, cprm))
1829 if (first && !writenote(&info->signote, cprm))
1831 if (first && !writenote(&info->auxv, cprm))
1833 if (first && info->files.data &&
1834 !writenote(&info->files, cprm))
1837 for (i = 1; i < info->thread_notes; ++i)
1838 if (t->notes[i].data &&
1839 !writenote(&t->notes[i], cprm))
1849 static void free_note_info(struct elf_note_info *info)
1851 struct elf_thread_core_info *threads = info->thread;
1854 struct elf_thread_core_info *t = threads;
1856 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1857 for (i = 1; i < info->thread_notes; ++i)
1858 kfree(t->notes[i].data);
1861 kfree(info->psinfo.data);
1862 vfree(info->files.data);
1867 /* Here is the structure in which status of each thread is captured. */
1868 struct elf_thread_status
1870 struct list_head list;
1871 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1872 elf_fpregset_t fpu; /* NT_PRFPREG */
1873 struct task_struct *thread;
1874 #ifdef ELF_CORE_COPY_XFPREGS
1875 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1877 struct memelfnote notes[3];
1882 * In order to add the specific thread information for the elf file format,
1883 * we need to keep a linked list of every threads pr_status and then create
1884 * a single section for them in the final core file.
1886 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1889 struct task_struct *p = t->thread;
1892 fill_prstatus(&t->prstatus, p, signr);
1893 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1895 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1898 sz += notesize(&t->notes[0]);
1900 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1902 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1905 sz += notesize(&t->notes[1]);
1908 #ifdef ELF_CORE_COPY_XFPREGS
1909 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1910 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1911 sizeof(t->xfpu), &t->xfpu);
1913 sz += notesize(&t->notes[2]);
1919 struct elf_note_info {
1920 struct memelfnote *notes;
1921 struct memelfnote *notes_files;
1922 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1923 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1924 struct list_head thread_list;
1925 elf_fpregset_t *fpu;
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 elf_fpxregset_t *xfpu;
1929 user_siginfo_t csigdata;
1930 int thread_status_size;
1934 static int elf_note_info_init(struct elf_note_info *info)
1936 memset(info, 0, sizeof(*info));
1937 INIT_LIST_HEAD(&info->thread_list);
1939 /* Allocate space for ELF notes */
1940 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1943 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1946 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1947 if (!info->prstatus)
1949 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1952 #ifdef ELF_CORE_COPY_XFPREGS
1953 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1960 static int fill_note_info(struct elfhdr *elf, int phdrs,
1961 struct elf_note_info *info,
1962 const siginfo_t *siginfo, struct pt_regs *regs)
1964 struct list_head *t;
1965 struct core_thread *ct;
1966 struct elf_thread_status *ets;
1968 if (!elf_note_info_init(info))
1971 for (ct = current->mm->core_state->dumper.next;
1972 ct; ct = ct->next) {
1973 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1977 ets->thread = ct->task;
1978 list_add(&ets->list, &info->thread_list);
1981 list_for_each(t, &info->thread_list) {
1984 ets = list_entry(t, struct elf_thread_status, list);
1985 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1986 info->thread_status_size += sz;
1988 /* now collect the dump for the current */
1989 memset(info->prstatus, 0, sizeof(*info->prstatus));
1990 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1991 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1994 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1997 * Set up the notes in similar form to SVR4 core dumps made
1998 * with info from their /proc.
2001 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2002 sizeof(*info->prstatus), info->prstatus);
2003 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2004 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2005 sizeof(*info->psinfo), info->psinfo);
2007 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2008 fill_auxv_note(info->notes + 3, current->mm);
2011 if (fill_files_note(info->notes + info->numnote) == 0) {
2012 info->notes_files = info->notes + info->numnote;
2016 /* Try to dump the FPU. */
2017 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2019 if (info->prstatus->pr_fpvalid)
2020 fill_note(info->notes + info->numnote++,
2021 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2022 #ifdef ELF_CORE_COPY_XFPREGS
2023 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2024 fill_note(info->notes + info->numnote++,
2025 "LINUX", ELF_CORE_XFPREG_TYPE,
2026 sizeof(*info->xfpu), info->xfpu);
2032 static size_t get_note_info_size(struct elf_note_info *info)
2037 for (i = 0; i < info->numnote; i++)
2038 sz += notesize(info->notes + i);
2040 sz += info->thread_status_size;
2045 static int write_note_info(struct elf_note_info *info,
2046 struct coredump_params *cprm)
2049 struct list_head *t;
2051 for (i = 0; i < info->numnote; i++)
2052 if (!writenote(info->notes + i, cprm))
2055 /* write out the thread status notes section */
2056 list_for_each(t, &info->thread_list) {
2057 struct elf_thread_status *tmp =
2058 list_entry(t, struct elf_thread_status, list);
2060 for (i = 0; i < tmp->num_notes; i++)
2061 if (!writenote(&tmp->notes[i], cprm))
2068 static void free_note_info(struct elf_note_info *info)
2070 while (!list_empty(&info->thread_list)) {
2071 struct list_head *tmp = info->thread_list.next;
2073 kfree(list_entry(tmp, struct elf_thread_status, list));
2076 /* Free data possibly allocated by fill_files_note(): */
2077 if (info->notes_files)
2078 vfree(info->notes_files->data);
2080 kfree(info->prstatus);
2081 kfree(info->psinfo);
2084 #ifdef ELF_CORE_COPY_XFPREGS
2091 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2092 struct vm_area_struct *gate_vma)
2094 struct vm_area_struct *ret = tsk->mm->mmap;
2101 * Helper function for iterating across a vma list. It ensures that the caller
2102 * will visit `gate_vma' prior to terminating the search.
2104 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2105 struct vm_area_struct *gate_vma)
2107 struct vm_area_struct *ret;
2109 ret = this_vma->vm_next;
2112 if (this_vma == gate_vma)
2117 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2118 elf_addr_t e_shoff, int segs)
2120 elf->e_shoff = e_shoff;
2121 elf->e_shentsize = sizeof(*shdr4extnum);
2123 elf->e_shstrndx = SHN_UNDEF;
2125 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2127 shdr4extnum->sh_type = SHT_NULL;
2128 shdr4extnum->sh_size = elf->e_shnum;
2129 shdr4extnum->sh_link = elf->e_shstrndx;
2130 shdr4extnum->sh_info = segs;
2136 * This is a two-pass process; first we find the offsets of the bits,
2137 * and then they are actually written out. If we run out of core limit
2140 static int elf_core_dump(struct coredump_params *cprm)
2145 size_t vma_data_size = 0;
2146 struct vm_area_struct *vma, *gate_vma;
2147 struct elfhdr *elf = NULL;
2148 loff_t offset = 0, dataoff;
2149 struct elf_note_info info = { };
2150 struct elf_phdr *phdr4note = NULL;
2151 struct elf_shdr *shdr4extnum = NULL;
2154 elf_addr_t *vma_filesz = NULL;
2157 * We no longer stop all VM operations.
2159 * This is because those proceses that could possibly change map_count
2160 * or the mmap / vma pages are now blocked in do_exit on current
2161 * finishing this core dump.
2163 * Only ptrace can touch these memory addresses, but it doesn't change
2164 * the map_count or the pages allocated. So no possibility of crashing
2165 * exists while dumping the mm->vm_next areas to the core file.
2168 /* alloc memory for large data structures: too large to be on stack */
2169 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2173 * The number of segs are recored into ELF header as 16bit value.
2174 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2176 segs = current->mm->map_count;
2177 segs += elf_core_extra_phdrs();
2179 gate_vma = get_gate_vma(current->mm);
2180 if (gate_vma != NULL)
2183 /* for notes section */
2186 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2187 * this, kernel supports extended numbering. Have a look at
2188 * include/linux/elf.h for further information. */
2189 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2192 * Collect all the non-memory information about the process for the
2193 * notes. This also sets up the file header.
2195 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2203 offset += sizeof(*elf); /* Elf header */
2204 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2206 /* Write notes phdr entry */
2208 size_t sz = get_note_info_size(&info);
2210 sz += elf_coredump_extra_notes_size();
2212 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2216 fill_elf_note_phdr(phdr4note, sz, offset);
2220 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2222 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2224 vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2228 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2229 vma = next_vma(vma, gate_vma)) {
2230 unsigned long dump_size;
2232 dump_size = vma_dump_size(vma, cprm->mm_flags);
2233 vma_filesz[i++] = dump_size;
2234 vma_data_size += dump_size;
2237 offset += vma_data_size;
2238 offset += elf_core_extra_data_size();
2241 if (e_phnum == PN_XNUM) {
2242 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2245 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2250 if (!dump_emit(cprm, elf, sizeof(*elf)))
2253 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2256 /* Write program headers for segments dump */
2257 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2258 vma = next_vma(vma, gate_vma)) {
2259 struct elf_phdr phdr;
2261 phdr.p_type = PT_LOAD;
2262 phdr.p_offset = offset;
2263 phdr.p_vaddr = vma->vm_start;
2265 phdr.p_filesz = vma_filesz[i++];
2266 phdr.p_memsz = vma->vm_end - vma->vm_start;
2267 offset += phdr.p_filesz;
2268 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2269 if (vma->vm_flags & VM_WRITE)
2270 phdr.p_flags |= PF_W;
2271 if (vma->vm_flags & VM_EXEC)
2272 phdr.p_flags |= PF_X;
2273 phdr.p_align = ELF_EXEC_PAGESIZE;
2275 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2279 if (!elf_core_write_extra_phdrs(cprm, offset))
2282 /* write out the notes section */
2283 if (!write_note_info(&info, cprm))
2286 if (elf_coredump_extra_notes_write(cprm))
2290 if (!dump_skip(cprm, dataoff - cprm->pos))
2293 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2294 vma = next_vma(vma, gate_vma)) {
2298 end = vma->vm_start + vma_filesz[i++];
2300 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2304 page = get_dump_page(addr);
2306 void *kaddr = kmap(page);
2307 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2311 stop = !dump_skip(cprm, PAGE_SIZE);
2316 dump_truncate(cprm);
2318 if (!elf_core_write_extra_data(cprm))
2321 if (e_phnum == PN_XNUM) {
2322 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2330 free_note_info(&info);
2339 #endif /* CONFIG_ELF_CORE */
2341 static int __init init_elf_binfmt(void)
2343 register_binfmt(&elf_format);
2347 static void __exit exit_elf_binfmt(void)
2349 /* Remove the COFF and ELF loaders. */
2350 unregister_binfmt(&elf_format);
2353 core_initcall(init_elf_binfmt);
2354 module_exit(exit_elf_binfmt);
2355 MODULE_LICENSE("GPL");