Merge branch 'CVE-2014-7970' of git://git.kernel.org/pub/scm/linux/kernel/git/luto...
[sfrench/cifs-2.6.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50                                 int, int, unsigned long);
51
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump   NULL
66 #endif
67
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN   PAGE_SIZE
72 #endif
73
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS 0
76 #endif
77
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81
82 static struct linux_binfmt elf_format = {
83         .module         = THIS_MODULE,
84         .load_binary    = load_elf_binary,
85         .load_shlib     = load_elf_library,
86         .core_dump      = elf_core_dump,
87         .min_coredump   = ELF_EXEC_PAGESIZE,
88 };
89
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91
92 static int set_brk(unsigned long start, unsigned long end)
93 {
94         start = ELF_PAGEALIGN(start);
95         end = ELF_PAGEALIGN(end);
96         if (end > start) {
97                 unsigned long addr;
98                 addr = vm_brk(start, end - start);
99                 if (BAD_ADDR(addr))
100                         return addr;
101         }
102         current->mm->start_brk = current->mm->brk = end;
103         return 0;
104 }
105
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
111 static int padzero(unsigned long elf_bss)
112 {
113         unsigned long nbyte;
114
115         nbyte = ELF_PAGEOFFSET(elf_bss);
116         if (nbyte) {
117                 nbyte = ELF_MIN_ALIGN - nbyte;
118                 if (clear_user((void __user *) elf_bss, nbyte))
119                         return -EFAULT;
120         }
121         return 0;
122 }
123
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131         old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135         (((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150                 unsigned long load_addr, unsigned long interp_load_addr)
151 {
152         unsigned long p = bprm->p;
153         int argc = bprm->argc;
154         int envc = bprm->envc;
155         elf_addr_t __user *argv;
156         elf_addr_t __user *envp;
157         elf_addr_t __user *sp;
158         elf_addr_t __user *u_platform;
159         elf_addr_t __user *u_base_platform;
160         elf_addr_t __user *u_rand_bytes;
161         const char *k_platform = ELF_PLATFORM;
162         const char *k_base_platform = ELF_BASE_PLATFORM;
163         unsigned char k_rand_bytes[16];
164         int items;
165         elf_addr_t *elf_info;
166         int ei_index = 0;
167         const struct cred *cred = current_cred();
168         struct vm_area_struct *vma;
169
170         /*
171          * In some cases (e.g. Hyper-Threading), we want to avoid L1
172          * evictions by the processes running on the same package. One
173          * thing we can do is to shuffle the initial stack for them.
174          */
175
176         p = arch_align_stack(p);
177
178         /*
179          * If this architecture has a platform capability string, copy it
180          * to userspace.  In some cases (Sparc), this info is impossible
181          * for userspace to get any other way, in others (i386) it is
182          * merely difficult.
183          */
184         u_platform = NULL;
185         if (k_platform) {
186                 size_t len = strlen(k_platform) + 1;
187
188                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_platform, k_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * If this architecture has a "base" platform capability
195          * string, copy it to userspace.
196          */
197         u_base_platform = NULL;
198         if (k_base_platform) {
199                 size_t len = strlen(k_base_platform) + 1;
200
201                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202                 if (__copy_to_user(u_base_platform, k_base_platform, len))
203                         return -EFAULT;
204         }
205
206         /*
207          * Generate 16 random bytes for userspace PRNG seeding.
208          */
209         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210         u_rand_bytes = (elf_addr_t __user *)
211                        STACK_ALLOC(p, sizeof(k_rand_bytes));
212         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213                 return -EFAULT;
214
215         /* Create the ELF interpreter info */
216         elf_info = (elf_addr_t *)current->mm->saved_auxv;
217         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219         do { \
220                 elf_info[ei_index++] = id; \
221                 elf_info[ei_index++] = val; \
222         } while (0)
223
224 #ifdef ARCH_DLINFO
225         /* 
226          * ARCH_DLINFO must come first so PPC can do its special alignment of
227          * AUXV.
228          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229          * ARCH_DLINFO changes
230          */
231         ARCH_DLINFO;
232 #endif
233         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239         NEW_AUX_ENT(AT_BASE, interp_load_addr);
240         NEW_AUX_ENT(AT_FLAGS, 0);
241         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252         if (k_platform) {
253                 NEW_AUX_ENT(AT_PLATFORM,
254                             (elf_addr_t)(unsigned long)u_platform);
255         }
256         if (k_base_platform) {
257                 NEW_AUX_ENT(AT_BASE_PLATFORM,
258                             (elf_addr_t)(unsigned long)u_base_platform);
259         }
260         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262         }
263 #undef NEW_AUX_ENT
264         /* AT_NULL is zero; clear the rest too */
265         memset(&elf_info[ei_index], 0,
266                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267
268         /* And advance past the AT_NULL entry.  */
269         ei_index += 2;
270
271         sp = STACK_ADD(p, ei_index);
272
273         items = (argc + 1) + (envc + 1) + 1;
274         bprm->p = STACK_ROUND(sp, items);
275
276         /* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281         sp = (elf_addr_t __user *)bprm->p;
282 #endif
283
284
285         /*
286          * Grow the stack manually; some architectures have a limit on how
287          * far ahead a user-space access may be in order to grow the stack.
288          */
289         vma = find_extend_vma(current->mm, bprm->p);
290         if (!vma)
291                 return -EFAULT;
292
293         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
294         if (__put_user(argc, sp++))
295                 return -EFAULT;
296         argv = sp;
297         envp = argv + argc + 1;
298
299         /* Populate argv and envp */
300         p = current->mm->arg_end = current->mm->arg_start;
301         while (argc-- > 0) {
302                 size_t len;
303                 if (__put_user((elf_addr_t)p, argv++))
304                         return -EFAULT;
305                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306                 if (!len || len > MAX_ARG_STRLEN)
307                         return -EINVAL;
308                 p += len;
309         }
310         if (__put_user(0, argv))
311                 return -EFAULT;
312         current->mm->arg_end = current->mm->env_start = p;
313         while (envc-- > 0) {
314                 size_t len;
315                 if (__put_user((elf_addr_t)p, envp++))
316                         return -EFAULT;
317                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318                 if (!len || len > MAX_ARG_STRLEN)
319                         return -EINVAL;
320                 p += len;
321         }
322         if (__put_user(0, envp))
323                 return -EFAULT;
324         current->mm->env_end = p;
325
326         /* Put the elf_info on the stack in the right place.  */
327         sp = (elf_addr_t __user *)envp + 1;
328         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329                 return -EFAULT;
330         return 0;
331 }
332
333 #ifndef elf_map
334
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336                 struct elf_phdr *eppnt, int prot, int type,
337                 unsigned long total_size)
338 {
339         unsigned long map_addr;
340         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342         addr = ELF_PAGESTART(addr);
343         size = ELF_PAGEALIGN(size);
344
345         /* mmap() will return -EINVAL if given a zero size, but a
346          * segment with zero filesize is perfectly valid */
347         if (!size)
348                 return addr;
349
350         /*
351         * total_size is the size of the ELF (interpreter) image.
352         * The _first_ mmap needs to know the full size, otherwise
353         * randomization might put this image into an overlapping
354         * position with the ELF binary image. (since size < total_size)
355         * So we first map the 'big' image - and unmap the remainder at
356         * the end. (which unmap is needed for ELF images with holes.)
357         */
358         if (total_size) {
359                 total_size = ELF_PAGEALIGN(total_size);
360                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361                 if (!BAD_ADDR(map_addr))
362                         vm_munmap(map_addr+size, total_size-size);
363         } else
364                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
365
366         return(map_addr);
367 }
368
369 #endif /* !elf_map */
370
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373         int i, first_idx = -1, last_idx = -1;
374
375         for (i = 0; i < nr; i++) {
376                 if (cmds[i].p_type == PT_LOAD) {
377                         last_idx = i;
378                         if (first_idx == -1)
379                                 first_idx = i;
380                 }
381         }
382         if (first_idx == -1)
383                 return 0;
384
385         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388
389
390 /* This is much more generalized than the library routine read function,
391    so we keep this separate.  Technically the library read function
392    is only provided so that we can read a.out libraries that have
393    an ELF header */
394
395 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
396                 struct file *interpreter, unsigned long *interp_map_addr,
397                 unsigned long no_base)
398 {
399         struct elf_phdr *elf_phdata;
400         struct elf_phdr *eppnt;
401         unsigned long load_addr = 0;
402         int load_addr_set = 0;
403         unsigned long last_bss = 0, elf_bss = 0;
404         unsigned long error = ~0UL;
405         unsigned long total_size;
406         int retval, i, size;
407
408         /* First of all, some simple consistency checks */
409         if (interp_elf_ex->e_type != ET_EXEC &&
410             interp_elf_ex->e_type != ET_DYN)
411                 goto out;
412         if (!elf_check_arch(interp_elf_ex))
413                 goto out;
414         if (!interpreter->f_op->mmap)
415                 goto out;
416
417         /*
418          * If the size of this structure has changed, then punt, since
419          * we will be doing the wrong thing.
420          */
421         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
422                 goto out;
423         if (interp_elf_ex->e_phnum < 1 ||
424                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
425                 goto out;
426
427         /* Now read in all of the header information */
428         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
429         if (size > ELF_MIN_ALIGN)
430                 goto out;
431         elf_phdata = kmalloc(size, GFP_KERNEL);
432         if (!elf_phdata)
433                 goto out;
434
435         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
436                              (char *)elf_phdata, size);
437         error = -EIO;
438         if (retval != size) {
439                 if (retval < 0)
440                         error = retval; 
441                 goto out_close;
442         }
443
444         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
445         if (!total_size) {
446                 error = -EINVAL;
447                 goto out_close;
448         }
449
450         eppnt = elf_phdata;
451         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
452                 if (eppnt->p_type == PT_LOAD) {
453                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
454                         int elf_prot = 0;
455                         unsigned long vaddr = 0;
456                         unsigned long k, map_addr;
457
458                         if (eppnt->p_flags & PF_R)
459                                 elf_prot = PROT_READ;
460                         if (eppnt->p_flags & PF_W)
461                                 elf_prot |= PROT_WRITE;
462                         if (eppnt->p_flags & PF_X)
463                                 elf_prot |= PROT_EXEC;
464                         vaddr = eppnt->p_vaddr;
465                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
466                                 elf_type |= MAP_FIXED;
467                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
468                                 load_addr = -vaddr;
469
470                         map_addr = elf_map(interpreter, load_addr + vaddr,
471                                         eppnt, elf_prot, elf_type, total_size);
472                         total_size = 0;
473                         if (!*interp_map_addr)
474                                 *interp_map_addr = map_addr;
475                         error = map_addr;
476                         if (BAD_ADDR(map_addr))
477                                 goto out_close;
478
479                         if (!load_addr_set &&
480                             interp_elf_ex->e_type == ET_DYN) {
481                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
482                                 load_addr_set = 1;
483                         }
484
485                         /*
486                          * Check to see if the section's size will overflow the
487                          * allowed task size. Note that p_filesz must always be
488                          * <= p_memsize so it's only necessary to check p_memsz.
489                          */
490                         k = load_addr + eppnt->p_vaddr;
491                         if (BAD_ADDR(k) ||
492                             eppnt->p_filesz > eppnt->p_memsz ||
493                             eppnt->p_memsz > TASK_SIZE ||
494                             TASK_SIZE - eppnt->p_memsz < k) {
495                                 error = -ENOMEM;
496                                 goto out_close;
497                         }
498
499                         /*
500                          * Find the end of the file mapping for this phdr, and
501                          * keep track of the largest address we see for this.
502                          */
503                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
504                         if (k > elf_bss)
505                                 elf_bss = k;
506
507                         /*
508                          * Do the same thing for the memory mapping - between
509                          * elf_bss and last_bss is the bss section.
510                          */
511                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
512                         if (k > last_bss)
513                                 last_bss = k;
514                 }
515         }
516
517         if (last_bss > elf_bss) {
518                 /*
519                  * Now fill out the bss section.  First pad the last page up
520                  * to the page boundary, and then perform a mmap to make sure
521                  * that there are zero-mapped pages up to and including the
522                  * last bss page.
523                  */
524                 if (padzero(elf_bss)) {
525                         error = -EFAULT;
526                         goto out_close;
527                 }
528
529                 /* What we have mapped so far */
530                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
531
532                 /* Map the last of the bss segment */
533                 error = vm_brk(elf_bss, last_bss - elf_bss);
534                 if (BAD_ADDR(error))
535                         goto out_close;
536         }
537
538         error = load_addr;
539
540 out_close:
541         kfree(elf_phdata);
542 out:
543         return error;
544 }
545
546 /*
547  * These are the functions used to load ELF style executables and shared
548  * libraries.  There is no binary dependent code anywhere else.
549  */
550
551 #ifndef STACK_RND_MASK
552 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
553 #endif
554
555 static unsigned long randomize_stack_top(unsigned long stack_top)
556 {
557         unsigned int random_variable = 0;
558
559         if ((current->flags & PF_RANDOMIZE) &&
560                 !(current->personality & ADDR_NO_RANDOMIZE)) {
561                 random_variable = get_random_int() & STACK_RND_MASK;
562                 random_variable <<= PAGE_SHIFT;
563         }
564 #ifdef CONFIG_STACK_GROWSUP
565         return PAGE_ALIGN(stack_top) + random_variable;
566 #else
567         return PAGE_ALIGN(stack_top) - random_variable;
568 #endif
569 }
570
571 static int load_elf_binary(struct linux_binprm *bprm)
572 {
573         struct file *interpreter = NULL; /* to shut gcc up */
574         unsigned long load_addr = 0, load_bias = 0;
575         int load_addr_set = 0;
576         char * elf_interpreter = NULL;
577         unsigned long error;
578         struct elf_phdr *elf_ppnt, *elf_phdata;
579         unsigned long elf_bss, elf_brk;
580         int retval, i;
581         unsigned int size;
582         unsigned long elf_entry;
583         unsigned long interp_load_addr = 0;
584         unsigned long start_code, end_code, start_data, end_data;
585         unsigned long reloc_func_desc __maybe_unused = 0;
586         int executable_stack = EXSTACK_DEFAULT;
587         struct pt_regs *regs = current_pt_regs();
588         struct {
589                 struct elfhdr elf_ex;
590                 struct elfhdr interp_elf_ex;
591         } *loc;
592
593         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
594         if (!loc) {
595                 retval = -ENOMEM;
596                 goto out_ret;
597         }
598         
599         /* Get the exec-header */
600         loc->elf_ex = *((struct elfhdr *)bprm->buf);
601
602         retval = -ENOEXEC;
603         /* First of all, some simple consistency checks */
604         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
605                 goto out;
606
607         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
608                 goto out;
609         if (!elf_check_arch(&loc->elf_ex))
610                 goto out;
611         if (!bprm->file->f_op->mmap)
612                 goto out;
613
614         /* Now read in all of the header information */
615         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
616                 goto out;
617         if (loc->elf_ex.e_phnum < 1 ||
618                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
619                 goto out;
620         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
621         retval = -ENOMEM;
622         elf_phdata = kmalloc(size, GFP_KERNEL);
623         if (!elf_phdata)
624                 goto out;
625
626         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
627                              (char *)elf_phdata, size);
628         if (retval != size) {
629                 if (retval >= 0)
630                         retval = -EIO;
631                 goto out_free_ph;
632         }
633
634         elf_ppnt = elf_phdata;
635         elf_bss = 0;
636         elf_brk = 0;
637
638         start_code = ~0UL;
639         end_code = 0;
640         start_data = 0;
641         end_data = 0;
642
643         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
644                 if (elf_ppnt->p_type == PT_INTERP) {
645                         /* This is the program interpreter used for
646                          * shared libraries - for now assume that this
647                          * is an a.out format binary
648                          */
649                         retval = -ENOEXEC;
650                         if (elf_ppnt->p_filesz > PATH_MAX || 
651                             elf_ppnt->p_filesz < 2)
652                                 goto out_free_ph;
653
654                         retval = -ENOMEM;
655                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
656                                                   GFP_KERNEL);
657                         if (!elf_interpreter)
658                                 goto out_free_ph;
659
660                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
661                                              elf_interpreter,
662                                              elf_ppnt->p_filesz);
663                         if (retval != elf_ppnt->p_filesz) {
664                                 if (retval >= 0)
665                                         retval = -EIO;
666                                 goto out_free_interp;
667                         }
668                         /* make sure path is NULL terminated */
669                         retval = -ENOEXEC;
670                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
671                                 goto out_free_interp;
672
673                         interpreter = open_exec(elf_interpreter);
674                         retval = PTR_ERR(interpreter);
675                         if (IS_ERR(interpreter))
676                                 goto out_free_interp;
677
678                         /*
679                          * If the binary is not readable then enforce
680                          * mm->dumpable = 0 regardless of the interpreter's
681                          * permissions.
682                          */
683                         would_dump(bprm, interpreter);
684
685                         retval = kernel_read(interpreter, 0, bprm->buf,
686                                              BINPRM_BUF_SIZE);
687                         if (retval != BINPRM_BUF_SIZE) {
688                                 if (retval >= 0)
689                                         retval = -EIO;
690                                 goto out_free_dentry;
691                         }
692
693                         /* Get the exec headers */
694                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
695                         break;
696                 }
697                 elf_ppnt++;
698         }
699
700         elf_ppnt = elf_phdata;
701         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
702                 if (elf_ppnt->p_type == PT_GNU_STACK) {
703                         if (elf_ppnt->p_flags & PF_X)
704                                 executable_stack = EXSTACK_ENABLE_X;
705                         else
706                                 executable_stack = EXSTACK_DISABLE_X;
707                         break;
708                 }
709
710         /* Some simple consistency checks for the interpreter */
711         if (elf_interpreter) {
712                 retval = -ELIBBAD;
713                 /* Not an ELF interpreter */
714                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715                         goto out_free_dentry;
716                 /* Verify the interpreter has a valid arch */
717                 if (!elf_check_arch(&loc->interp_elf_ex))
718                         goto out_free_dentry;
719         }
720
721         /* Flush all traces of the currently running executable */
722         retval = flush_old_exec(bprm);
723         if (retval)
724                 goto out_free_dentry;
725
726         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
727            may depend on the personality.  */
728         SET_PERSONALITY(loc->elf_ex);
729         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
730                 current->personality |= READ_IMPLIES_EXEC;
731
732         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
733                 current->flags |= PF_RANDOMIZE;
734
735         setup_new_exec(bprm);
736
737         /* Do this so that we can load the interpreter, if need be.  We will
738            change some of these later */
739         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
740                                  executable_stack);
741         if (retval < 0)
742                 goto out_free_dentry;
743         
744         current->mm->start_stack = bprm->p;
745
746         /* Now we do a little grungy work by mmapping the ELF image into
747            the correct location in memory. */
748         for(i = 0, elf_ppnt = elf_phdata;
749             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
750                 int elf_prot = 0, elf_flags;
751                 unsigned long k, vaddr;
752
753                 if (elf_ppnt->p_type != PT_LOAD)
754                         continue;
755
756                 if (unlikely (elf_brk > elf_bss)) {
757                         unsigned long nbyte;
758                     
759                         /* There was a PT_LOAD segment with p_memsz > p_filesz
760                            before this one. Map anonymous pages, if needed,
761                            and clear the area.  */
762                         retval = set_brk(elf_bss + load_bias,
763                                          elf_brk + load_bias);
764                         if (retval)
765                                 goto out_free_dentry;
766                         nbyte = ELF_PAGEOFFSET(elf_bss);
767                         if (nbyte) {
768                                 nbyte = ELF_MIN_ALIGN - nbyte;
769                                 if (nbyte > elf_brk - elf_bss)
770                                         nbyte = elf_brk - elf_bss;
771                                 if (clear_user((void __user *)elf_bss +
772                                                         load_bias, nbyte)) {
773                                         /*
774                                          * This bss-zeroing can fail if the ELF
775                                          * file specifies odd protections. So
776                                          * we don't check the return value
777                                          */
778                                 }
779                         }
780                 }
781
782                 if (elf_ppnt->p_flags & PF_R)
783                         elf_prot |= PROT_READ;
784                 if (elf_ppnt->p_flags & PF_W)
785                         elf_prot |= PROT_WRITE;
786                 if (elf_ppnt->p_flags & PF_X)
787                         elf_prot |= PROT_EXEC;
788
789                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
790
791                 vaddr = elf_ppnt->p_vaddr;
792                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
793                         elf_flags |= MAP_FIXED;
794                 } else if (loc->elf_ex.e_type == ET_DYN) {
795                         /* Try and get dynamic programs out of the way of the
796                          * default mmap base, as well as whatever program they
797                          * might try to exec.  This is because the brk will
798                          * follow the loader, and is not movable.  */
799 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
800                         /* Memory randomization might have been switched off
801                          * in runtime via sysctl or explicit setting of
802                          * personality flags.
803                          * If that is the case, retain the original non-zero
804                          * load_bias value in order to establish proper
805                          * non-randomized mappings.
806                          */
807                         if (current->flags & PF_RANDOMIZE)
808                                 load_bias = 0;
809                         else
810                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
811 #else
812                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
813 #endif
814                 }
815
816                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
817                                 elf_prot, elf_flags, 0);
818                 if (BAD_ADDR(error)) {
819                         retval = IS_ERR((void *)error) ?
820                                 PTR_ERR((void*)error) : -EINVAL;
821                         goto out_free_dentry;
822                 }
823
824                 if (!load_addr_set) {
825                         load_addr_set = 1;
826                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
827                         if (loc->elf_ex.e_type == ET_DYN) {
828                                 load_bias += error -
829                                              ELF_PAGESTART(load_bias + vaddr);
830                                 load_addr += load_bias;
831                                 reloc_func_desc = load_bias;
832                         }
833                 }
834                 k = elf_ppnt->p_vaddr;
835                 if (k < start_code)
836                         start_code = k;
837                 if (start_data < k)
838                         start_data = k;
839
840                 /*
841                  * Check to see if the section's size will overflow the
842                  * allowed task size. Note that p_filesz must always be
843                  * <= p_memsz so it is only necessary to check p_memsz.
844                  */
845                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
846                     elf_ppnt->p_memsz > TASK_SIZE ||
847                     TASK_SIZE - elf_ppnt->p_memsz < k) {
848                         /* set_brk can never work. Avoid overflows. */
849                         retval = -EINVAL;
850                         goto out_free_dentry;
851                 }
852
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
854
855                 if (k > elf_bss)
856                         elf_bss = k;
857                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
858                         end_code = k;
859                 if (end_data < k)
860                         end_data = k;
861                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
862                 if (k > elf_brk)
863                         elf_brk = k;
864         }
865
866         loc->elf_ex.e_entry += load_bias;
867         elf_bss += load_bias;
868         elf_brk += load_bias;
869         start_code += load_bias;
870         end_code += load_bias;
871         start_data += load_bias;
872         end_data += load_bias;
873
874         /* Calling set_brk effectively mmaps the pages that we need
875          * for the bss and break sections.  We must do this before
876          * mapping in the interpreter, to make sure it doesn't wind
877          * up getting placed where the bss needs to go.
878          */
879         retval = set_brk(elf_bss, elf_brk);
880         if (retval)
881                 goto out_free_dentry;
882         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
883                 retval = -EFAULT; /* Nobody gets to see this, but.. */
884                 goto out_free_dentry;
885         }
886
887         if (elf_interpreter) {
888                 unsigned long interp_map_addr = 0;
889
890                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
891                                             interpreter,
892                                             &interp_map_addr,
893                                             load_bias);
894                 if (!IS_ERR((void *)elf_entry)) {
895                         /*
896                          * load_elf_interp() returns relocation
897                          * adjustment
898                          */
899                         interp_load_addr = elf_entry;
900                         elf_entry += loc->interp_elf_ex.e_entry;
901                 }
902                 if (BAD_ADDR(elf_entry)) {
903                         retval = IS_ERR((void *)elf_entry) ?
904                                         (int)elf_entry : -EINVAL;
905                         goto out_free_dentry;
906                 }
907                 reloc_func_desc = interp_load_addr;
908
909                 allow_write_access(interpreter);
910                 fput(interpreter);
911                 kfree(elf_interpreter);
912         } else {
913                 elf_entry = loc->elf_ex.e_entry;
914                 if (BAD_ADDR(elf_entry)) {
915                         retval = -EINVAL;
916                         goto out_free_dentry;
917                 }
918         }
919
920         kfree(elf_phdata);
921
922         set_binfmt(&elf_format);
923
924 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
925         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
926         if (retval < 0)
927                 goto out;
928 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
929
930         install_exec_creds(bprm);
931         retval = create_elf_tables(bprm, &loc->elf_ex,
932                           load_addr, interp_load_addr);
933         if (retval < 0)
934                 goto out;
935         /* N.B. passed_fileno might not be initialized? */
936         current->mm->end_code = end_code;
937         current->mm->start_code = start_code;
938         current->mm->start_data = start_data;
939         current->mm->end_data = end_data;
940         current->mm->start_stack = bprm->p;
941
942 #ifdef arch_randomize_brk
943         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
944                 current->mm->brk = current->mm->start_brk =
945                         arch_randomize_brk(current->mm);
946 #ifdef CONFIG_COMPAT_BRK
947                 current->brk_randomized = 1;
948 #endif
949         }
950 #endif
951
952         if (current->personality & MMAP_PAGE_ZERO) {
953                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
954                    and some applications "depend" upon this behavior.
955                    Since we do not have the power to recompile these, we
956                    emulate the SVr4 behavior. Sigh. */
957                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
958                                 MAP_FIXED | MAP_PRIVATE, 0);
959         }
960
961 #ifdef ELF_PLAT_INIT
962         /*
963          * The ABI may specify that certain registers be set up in special
964          * ways (on i386 %edx is the address of a DT_FINI function, for
965          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
966          * that the e_entry field is the address of the function descriptor
967          * for the startup routine, rather than the address of the startup
968          * routine itself.  This macro performs whatever initialization to
969          * the regs structure is required as well as any relocations to the
970          * function descriptor entries when executing dynamically links apps.
971          */
972         ELF_PLAT_INIT(regs, reloc_func_desc);
973 #endif
974
975         start_thread(regs, elf_entry, bprm->p);
976         retval = 0;
977 out:
978         kfree(loc);
979 out_ret:
980         return retval;
981
982         /* error cleanup */
983 out_free_dentry:
984         allow_write_access(interpreter);
985         if (interpreter)
986                 fput(interpreter);
987 out_free_interp:
988         kfree(elf_interpreter);
989 out_free_ph:
990         kfree(elf_phdata);
991         goto out;
992 }
993
994 #ifdef CONFIG_USELIB
995 /* This is really simpleminded and specialized - we are loading an
996    a.out library that is given an ELF header. */
997 static int load_elf_library(struct file *file)
998 {
999         struct elf_phdr *elf_phdata;
1000         struct elf_phdr *eppnt;
1001         unsigned long elf_bss, bss, len;
1002         int retval, error, i, j;
1003         struct elfhdr elf_ex;
1004
1005         error = -ENOEXEC;
1006         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1007         if (retval != sizeof(elf_ex))
1008                 goto out;
1009
1010         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1011                 goto out;
1012
1013         /* First of all, some simple consistency checks */
1014         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1015             !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1016                 goto out;
1017
1018         /* Now read in all of the header information */
1019
1020         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1021         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1022
1023         error = -ENOMEM;
1024         elf_phdata = kmalloc(j, GFP_KERNEL);
1025         if (!elf_phdata)
1026                 goto out;
1027
1028         eppnt = elf_phdata;
1029         error = -ENOEXEC;
1030         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1031         if (retval != j)
1032                 goto out_free_ph;
1033
1034         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1035                 if ((eppnt + i)->p_type == PT_LOAD)
1036                         j++;
1037         if (j != 1)
1038                 goto out_free_ph;
1039
1040         while (eppnt->p_type != PT_LOAD)
1041                 eppnt++;
1042
1043         /* Now use mmap to map the library into memory. */
1044         error = vm_mmap(file,
1045                         ELF_PAGESTART(eppnt->p_vaddr),
1046                         (eppnt->p_filesz +
1047                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1048                         PROT_READ | PROT_WRITE | PROT_EXEC,
1049                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1050                         (eppnt->p_offset -
1051                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1052         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1053                 goto out_free_ph;
1054
1055         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1056         if (padzero(elf_bss)) {
1057                 error = -EFAULT;
1058                 goto out_free_ph;
1059         }
1060
1061         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1062                             ELF_MIN_ALIGN - 1);
1063         bss = eppnt->p_memsz + eppnt->p_vaddr;
1064         if (bss > len)
1065                 vm_brk(len, bss - len);
1066         error = 0;
1067
1068 out_free_ph:
1069         kfree(elf_phdata);
1070 out:
1071         return error;
1072 }
1073 #endif /* #ifdef CONFIG_USELIB */
1074
1075 #ifdef CONFIG_ELF_CORE
1076 /*
1077  * ELF core dumper
1078  *
1079  * Modelled on fs/exec.c:aout_core_dump()
1080  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1081  */
1082
1083 /*
1084  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1085  * that are useful for post-mortem analysis are included in every core dump.
1086  * In that way we ensure that the core dump is fully interpretable later
1087  * without matching up the same kernel and hardware config to see what PC values
1088  * meant. These special mappings include - vDSO, vsyscall, and other
1089  * architecture specific mappings
1090  */
1091 static bool always_dump_vma(struct vm_area_struct *vma)
1092 {
1093         /* Any vsyscall mappings? */
1094         if (vma == get_gate_vma(vma->vm_mm))
1095                 return true;
1096
1097         /*
1098          * Assume that all vmas with a .name op should always be dumped.
1099          * If this changes, a new vm_ops field can easily be added.
1100          */
1101         if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1102                 return true;
1103
1104         /*
1105          * arch_vma_name() returns non-NULL for special architecture mappings,
1106          * such as vDSO sections.
1107          */
1108         if (arch_vma_name(vma))
1109                 return true;
1110
1111         return false;
1112 }
1113
1114 /*
1115  * Decide what to dump of a segment, part, all or none.
1116  */
1117 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1118                                    unsigned long mm_flags)
1119 {
1120 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1121
1122         /* always dump the vdso and vsyscall sections */
1123         if (always_dump_vma(vma))
1124                 goto whole;
1125
1126         if (vma->vm_flags & VM_DONTDUMP)
1127                 return 0;
1128
1129         /* Hugetlb memory check */
1130         if (vma->vm_flags & VM_HUGETLB) {
1131                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1132                         goto whole;
1133                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1134                         goto whole;
1135                 return 0;
1136         }
1137
1138         /* Do not dump I/O mapped devices or special mappings */
1139         if (vma->vm_flags & VM_IO)
1140                 return 0;
1141
1142         /* By default, dump shared memory if mapped from an anonymous file. */
1143         if (vma->vm_flags & VM_SHARED) {
1144                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1145                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1146                         goto whole;
1147                 return 0;
1148         }
1149
1150         /* Dump segments that have been written to.  */
1151         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1152                 goto whole;
1153         if (vma->vm_file == NULL)
1154                 return 0;
1155
1156         if (FILTER(MAPPED_PRIVATE))
1157                 goto whole;
1158
1159         /*
1160          * If this looks like the beginning of a DSO or executable mapping,
1161          * check for an ELF header.  If we find one, dump the first page to
1162          * aid in determining what was mapped here.
1163          */
1164         if (FILTER(ELF_HEADERS) &&
1165             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1166                 u32 __user *header = (u32 __user *) vma->vm_start;
1167                 u32 word;
1168                 mm_segment_t fs = get_fs();
1169                 /*
1170                  * Doing it this way gets the constant folded by GCC.
1171                  */
1172                 union {
1173                         u32 cmp;
1174                         char elfmag[SELFMAG];
1175                 } magic;
1176                 BUILD_BUG_ON(SELFMAG != sizeof word);
1177                 magic.elfmag[EI_MAG0] = ELFMAG0;
1178                 magic.elfmag[EI_MAG1] = ELFMAG1;
1179                 magic.elfmag[EI_MAG2] = ELFMAG2;
1180                 magic.elfmag[EI_MAG3] = ELFMAG3;
1181                 /*
1182                  * Switch to the user "segment" for get_user(),
1183                  * then put back what elf_core_dump() had in place.
1184                  */
1185                 set_fs(USER_DS);
1186                 if (unlikely(get_user(word, header)))
1187                         word = 0;
1188                 set_fs(fs);
1189                 if (word == magic.cmp)
1190                         return PAGE_SIZE;
1191         }
1192
1193 #undef  FILTER
1194
1195         return 0;
1196
1197 whole:
1198         return vma->vm_end - vma->vm_start;
1199 }
1200
1201 /* An ELF note in memory */
1202 struct memelfnote
1203 {
1204         const char *name;
1205         int type;
1206         unsigned int datasz;
1207         void *data;
1208 };
1209
1210 static int notesize(struct memelfnote *en)
1211 {
1212         int sz;
1213
1214         sz = sizeof(struct elf_note);
1215         sz += roundup(strlen(en->name) + 1, 4);
1216         sz += roundup(en->datasz, 4);
1217
1218         return sz;
1219 }
1220
1221 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1222 {
1223         struct elf_note en;
1224         en.n_namesz = strlen(men->name) + 1;
1225         en.n_descsz = men->datasz;
1226         en.n_type = men->type;
1227
1228         return dump_emit(cprm, &en, sizeof(en)) &&
1229             dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1230             dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1231 }
1232
1233 static void fill_elf_header(struct elfhdr *elf, int segs,
1234                             u16 machine, u32 flags)
1235 {
1236         memset(elf, 0, sizeof(*elf));
1237
1238         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1239         elf->e_ident[EI_CLASS] = ELF_CLASS;
1240         elf->e_ident[EI_DATA] = ELF_DATA;
1241         elf->e_ident[EI_VERSION] = EV_CURRENT;
1242         elf->e_ident[EI_OSABI] = ELF_OSABI;
1243
1244         elf->e_type = ET_CORE;
1245         elf->e_machine = machine;
1246         elf->e_version = EV_CURRENT;
1247         elf->e_phoff = sizeof(struct elfhdr);
1248         elf->e_flags = flags;
1249         elf->e_ehsize = sizeof(struct elfhdr);
1250         elf->e_phentsize = sizeof(struct elf_phdr);
1251         elf->e_phnum = segs;
1252
1253         return;
1254 }
1255
1256 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1257 {
1258         phdr->p_type = PT_NOTE;
1259         phdr->p_offset = offset;
1260         phdr->p_vaddr = 0;
1261         phdr->p_paddr = 0;
1262         phdr->p_filesz = sz;
1263         phdr->p_memsz = 0;
1264         phdr->p_flags = 0;
1265         phdr->p_align = 0;
1266         return;
1267 }
1268
1269 static void fill_note(struct memelfnote *note, const char *name, int type, 
1270                 unsigned int sz, void *data)
1271 {
1272         note->name = name;
1273         note->type = type;
1274         note->datasz = sz;
1275         note->data = data;
1276         return;
1277 }
1278
1279 /*
1280  * fill up all the fields in prstatus from the given task struct, except
1281  * registers which need to be filled up separately.
1282  */
1283 static void fill_prstatus(struct elf_prstatus *prstatus,
1284                 struct task_struct *p, long signr)
1285 {
1286         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1287         prstatus->pr_sigpend = p->pending.signal.sig[0];
1288         prstatus->pr_sighold = p->blocked.sig[0];
1289         rcu_read_lock();
1290         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1291         rcu_read_unlock();
1292         prstatus->pr_pid = task_pid_vnr(p);
1293         prstatus->pr_pgrp = task_pgrp_vnr(p);
1294         prstatus->pr_sid = task_session_vnr(p);
1295         if (thread_group_leader(p)) {
1296                 struct task_cputime cputime;
1297
1298                 /*
1299                  * This is the record for the group leader.  It shows the
1300                  * group-wide total, not its individual thread total.
1301                  */
1302                 thread_group_cputime(p, &cputime);
1303                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1304                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1305         } else {
1306                 cputime_t utime, stime;
1307
1308                 task_cputime(p, &utime, &stime);
1309                 cputime_to_timeval(utime, &prstatus->pr_utime);
1310                 cputime_to_timeval(stime, &prstatus->pr_stime);
1311         }
1312         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1313         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1314 }
1315
1316 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1317                        struct mm_struct *mm)
1318 {
1319         const struct cred *cred;
1320         unsigned int i, len;
1321         
1322         /* first copy the parameters from user space */
1323         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1324
1325         len = mm->arg_end - mm->arg_start;
1326         if (len >= ELF_PRARGSZ)
1327                 len = ELF_PRARGSZ-1;
1328         if (copy_from_user(&psinfo->pr_psargs,
1329                            (const char __user *)mm->arg_start, len))
1330                 return -EFAULT;
1331         for(i = 0; i < len; i++)
1332                 if (psinfo->pr_psargs[i] == 0)
1333                         psinfo->pr_psargs[i] = ' ';
1334         psinfo->pr_psargs[len] = 0;
1335
1336         rcu_read_lock();
1337         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1338         rcu_read_unlock();
1339         psinfo->pr_pid = task_pid_vnr(p);
1340         psinfo->pr_pgrp = task_pgrp_vnr(p);
1341         psinfo->pr_sid = task_session_vnr(p);
1342
1343         i = p->state ? ffz(~p->state) + 1 : 0;
1344         psinfo->pr_state = i;
1345         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1346         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1347         psinfo->pr_nice = task_nice(p);
1348         psinfo->pr_flag = p->flags;
1349         rcu_read_lock();
1350         cred = __task_cred(p);
1351         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1352         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1353         rcu_read_unlock();
1354         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1355         
1356         return 0;
1357 }
1358
1359 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1360 {
1361         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1362         int i = 0;
1363         do
1364                 i += 2;
1365         while (auxv[i - 2] != AT_NULL);
1366         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1367 }
1368
1369 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1370                 const siginfo_t *siginfo)
1371 {
1372         mm_segment_t old_fs = get_fs();
1373         set_fs(KERNEL_DS);
1374         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1375         set_fs(old_fs);
1376         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1377 }
1378
1379 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1380 /*
1381  * Format of NT_FILE note:
1382  *
1383  * long count     -- how many files are mapped
1384  * long page_size -- units for file_ofs
1385  * array of [COUNT] elements of
1386  *   long start
1387  *   long end
1388  *   long file_ofs
1389  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1390  */
1391 static int fill_files_note(struct memelfnote *note)
1392 {
1393         struct vm_area_struct *vma;
1394         unsigned count, size, names_ofs, remaining, n;
1395         user_long_t *data;
1396         user_long_t *start_end_ofs;
1397         char *name_base, *name_curpos;
1398
1399         /* *Estimated* file count and total data size needed */
1400         count = current->mm->map_count;
1401         size = count * 64;
1402
1403         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1404  alloc:
1405         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1406                 return -EINVAL;
1407         size = round_up(size, PAGE_SIZE);
1408         data = vmalloc(size);
1409         if (!data)
1410                 return -ENOMEM;
1411
1412         start_end_ofs = data + 2;
1413         name_base = name_curpos = ((char *)data) + names_ofs;
1414         remaining = size - names_ofs;
1415         count = 0;
1416         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1417                 struct file *file;
1418                 const char *filename;
1419
1420                 file = vma->vm_file;
1421                 if (!file)
1422                         continue;
1423                 filename = d_path(&file->f_path, name_curpos, remaining);
1424                 if (IS_ERR(filename)) {
1425                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1426                                 vfree(data);
1427                                 size = size * 5 / 4;
1428                                 goto alloc;
1429                         }
1430                         continue;
1431                 }
1432
1433                 /* d_path() fills at the end, move name down */
1434                 /* n = strlen(filename) + 1: */
1435                 n = (name_curpos + remaining) - filename;
1436                 remaining = filename - name_curpos;
1437                 memmove(name_curpos, filename, n);
1438                 name_curpos += n;
1439
1440                 *start_end_ofs++ = vma->vm_start;
1441                 *start_end_ofs++ = vma->vm_end;
1442                 *start_end_ofs++ = vma->vm_pgoff;
1443                 count++;
1444         }
1445
1446         /* Now we know exact count of files, can store it */
1447         data[0] = count;
1448         data[1] = PAGE_SIZE;
1449         /*
1450          * Count usually is less than current->mm->map_count,
1451          * we need to move filenames down.
1452          */
1453         n = current->mm->map_count - count;
1454         if (n != 0) {
1455                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1456                 memmove(name_base - shift_bytes, name_base,
1457                         name_curpos - name_base);
1458                 name_curpos -= shift_bytes;
1459         }
1460
1461         size = name_curpos - (char *)data;
1462         fill_note(note, "CORE", NT_FILE, size, data);
1463         return 0;
1464 }
1465
1466 #ifdef CORE_DUMP_USE_REGSET
1467 #include <linux/regset.h>
1468
1469 struct elf_thread_core_info {
1470         struct elf_thread_core_info *next;
1471         struct task_struct *task;
1472         struct elf_prstatus prstatus;
1473         struct memelfnote notes[0];
1474 };
1475
1476 struct elf_note_info {
1477         struct elf_thread_core_info *thread;
1478         struct memelfnote psinfo;
1479         struct memelfnote signote;
1480         struct memelfnote auxv;
1481         struct memelfnote files;
1482         user_siginfo_t csigdata;
1483         size_t size;
1484         int thread_notes;
1485 };
1486
1487 /*
1488  * When a regset has a writeback hook, we call it on each thread before
1489  * dumping user memory.  On register window machines, this makes sure the
1490  * user memory backing the register data is up to date before we read it.
1491  */
1492 static void do_thread_regset_writeback(struct task_struct *task,
1493                                        const struct user_regset *regset)
1494 {
1495         if (regset->writeback)
1496                 regset->writeback(task, regset, 1);
1497 }
1498
1499 #ifndef PR_REG_SIZE
1500 #define PR_REG_SIZE(S) sizeof(S)
1501 #endif
1502
1503 #ifndef PRSTATUS_SIZE
1504 #define PRSTATUS_SIZE(S) sizeof(S)
1505 #endif
1506
1507 #ifndef PR_REG_PTR
1508 #define PR_REG_PTR(S) (&((S)->pr_reg))
1509 #endif
1510
1511 #ifndef SET_PR_FPVALID
1512 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1513 #endif
1514
1515 static int fill_thread_core_info(struct elf_thread_core_info *t,
1516                                  const struct user_regset_view *view,
1517                                  long signr, size_t *total)
1518 {
1519         unsigned int i;
1520
1521         /*
1522          * NT_PRSTATUS is the one special case, because the regset data
1523          * goes into the pr_reg field inside the note contents, rather
1524          * than being the whole note contents.  We fill the reset in here.
1525          * We assume that regset 0 is NT_PRSTATUS.
1526          */
1527         fill_prstatus(&t->prstatus, t->task, signr);
1528         (void) view->regsets[0].get(t->task, &view->regsets[0],
1529                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1530                                     PR_REG_PTR(&t->prstatus), NULL);
1531
1532         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1533                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1534         *total += notesize(&t->notes[0]);
1535
1536         do_thread_regset_writeback(t->task, &view->regsets[0]);
1537
1538         /*
1539          * Each other regset might generate a note too.  For each regset
1540          * that has no core_note_type or is inactive, we leave t->notes[i]
1541          * all zero and we'll know to skip writing it later.
1542          */
1543         for (i = 1; i < view->n; ++i) {
1544                 const struct user_regset *regset = &view->regsets[i];
1545                 do_thread_regset_writeback(t->task, regset);
1546                 if (regset->core_note_type && regset->get &&
1547                     (!regset->active || regset->active(t->task, regset))) {
1548                         int ret;
1549                         size_t size = regset->n * regset->size;
1550                         void *data = kmalloc(size, GFP_KERNEL);
1551                         if (unlikely(!data))
1552                                 return 0;
1553                         ret = regset->get(t->task, regset,
1554                                           0, size, data, NULL);
1555                         if (unlikely(ret))
1556                                 kfree(data);
1557                         else {
1558                                 if (regset->core_note_type != NT_PRFPREG)
1559                                         fill_note(&t->notes[i], "LINUX",
1560                                                   regset->core_note_type,
1561                                                   size, data);
1562                                 else {
1563                                         SET_PR_FPVALID(&t->prstatus, 1);
1564                                         fill_note(&t->notes[i], "CORE",
1565                                                   NT_PRFPREG, size, data);
1566                                 }
1567                                 *total += notesize(&t->notes[i]);
1568                         }
1569                 }
1570         }
1571
1572         return 1;
1573 }
1574
1575 static int fill_note_info(struct elfhdr *elf, int phdrs,
1576                           struct elf_note_info *info,
1577                           const siginfo_t *siginfo, struct pt_regs *regs)
1578 {
1579         struct task_struct *dump_task = current;
1580         const struct user_regset_view *view = task_user_regset_view(dump_task);
1581         struct elf_thread_core_info *t;
1582         struct elf_prpsinfo *psinfo;
1583         struct core_thread *ct;
1584         unsigned int i;
1585
1586         info->size = 0;
1587         info->thread = NULL;
1588
1589         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1590         if (psinfo == NULL) {
1591                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1592                 return 0;
1593         }
1594
1595         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1596
1597         /*
1598          * Figure out how many notes we're going to need for each thread.
1599          */
1600         info->thread_notes = 0;
1601         for (i = 0; i < view->n; ++i)
1602                 if (view->regsets[i].core_note_type != 0)
1603                         ++info->thread_notes;
1604
1605         /*
1606          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1607          * since it is our one special case.
1608          */
1609         if (unlikely(info->thread_notes == 0) ||
1610             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1611                 WARN_ON(1);
1612                 return 0;
1613         }
1614
1615         /*
1616          * Initialize the ELF file header.
1617          */
1618         fill_elf_header(elf, phdrs,
1619                         view->e_machine, view->e_flags);
1620
1621         /*
1622          * Allocate a structure for each thread.
1623          */
1624         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1625                 t = kzalloc(offsetof(struct elf_thread_core_info,
1626                                      notes[info->thread_notes]),
1627                             GFP_KERNEL);
1628                 if (unlikely(!t))
1629                         return 0;
1630
1631                 t->task = ct->task;
1632                 if (ct->task == dump_task || !info->thread) {
1633                         t->next = info->thread;
1634                         info->thread = t;
1635                 } else {
1636                         /*
1637                          * Make sure to keep the original task at
1638                          * the head of the list.
1639                          */
1640                         t->next = info->thread->next;
1641                         info->thread->next = t;
1642                 }
1643         }
1644
1645         /*
1646          * Now fill in each thread's information.
1647          */
1648         for (t = info->thread; t != NULL; t = t->next)
1649                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1650                         return 0;
1651
1652         /*
1653          * Fill in the two process-wide notes.
1654          */
1655         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1656         info->size += notesize(&info->psinfo);
1657
1658         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1659         info->size += notesize(&info->signote);
1660
1661         fill_auxv_note(&info->auxv, current->mm);
1662         info->size += notesize(&info->auxv);
1663
1664         if (fill_files_note(&info->files) == 0)
1665                 info->size += notesize(&info->files);
1666
1667         return 1;
1668 }
1669
1670 static size_t get_note_info_size(struct elf_note_info *info)
1671 {
1672         return info->size;
1673 }
1674
1675 /*
1676  * Write all the notes for each thread.  When writing the first thread, the
1677  * process-wide notes are interleaved after the first thread-specific note.
1678  */
1679 static int write_note_info(struct elf_note_info *info,
1680                            struct coredump_params *cprm)
1681 {
1682         bool first = true;
1683         struct elf_thread_core_info *t = info->thread;
1684
1685         do {
1686                 int i;
1687
1688                 if (!writenote(&t->notes[0], cprm))
1689                         return 0;
1690
1691                 if (first && !writenote(&info->psinfo, cprm))
1692                         return 0;
1693                 if (first && !writenote(&info->signote, cprm))
1694                         return 0;
1695                 if (first && !writenote(&info->auxv, cprm))
1696                         return 0;
1697                 if (first && info->files.data &&
1698                                 !writenote(&info->files, cprm))
1699                         return 0;
1700
1701                 for (i = 1; i < info->thread_notes; ++i)
1702                         if (t->notes[i].data &&
1703                             !writenote(&t->notes[i], cprm))
1704                                 return 0;
1705
1706                 first = false;
1707                 t = t->next;
1708         } while (t);
1709
1710         return 1;
1711 }
1712
1713 static void free_note_info(struct elf_note_info *info)
1714 {
1715         struct elf_thread_core_info *threads = info->thread;
1716         while (threads) {
1717                 unsigned int i;
1718                 struct elf_thread_core_info *t = threads;
1719                 threads = t->next;
1720                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1721                 for (i = 1; i < info->thread_notes; ++i)
1722                         kfree(t->notes[i].data);
1723                 kfree(t);
1724         }
1725         kfree(info->psinfo.data);
1726         vfree(info->files.data);
1727 }
1728
1729 #else
1730
1731 /* Here is the structure in which status of each thread is captured. */
1732 struct elf_thread_status
1733 {
1734         struct list_head list;
1735         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1736         elf_fpregset_t fpu;             /* NT_PRFPREG */
1737         struct task_struct *thread;
1738 #ifdef ELF_CORE_COPY_XFPREGS
1739         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1740 #endif
1741         struct memelfnote notes[3];
1742         int num_notes;
1743 };
1744
1745 /*
1746  * In order to add the specific thread information for the elf file format,
1747  * we need to keep a linked list of every threads pr_status and then create
1748  * a single section for them in the final core file.
1749  */
1750 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1751 {
1752         int sz = 0;
1753         struct task_struct *p = t->thread;
1754         t->num_notes = 0;
1755
1756         fill_prstatus(&t->prstatus, p, signr);
1757         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1758         
1759         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1760                   &(t->prstatus));
1761         t->num_notes++;
1762         sz += notesize(&t->notes[0]);
1763
1764         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1765                                                                 &t->fpu))) {
1766                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1767                           &(t->fpu));
1768                 t->num_notes++;
1769                 sz += notesize(&t->notes[1]);
1770         }
1771
1772 #ifdef ELF_CORE_COPY_XFPREGS
1773         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1774                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1775                           sizeof(t->xfpu), &t->xfpu);
1776                 t->num_notes++;
1777                 sz += notesize(&t->notes[2]);
1778         }
1779 #endif  
1780         return sz;
1781 }
1782
1783 struct elf_note_info {
1784         struct memelfnote *notes;
1785         struct memelfnote *notes_files;
1786         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1787         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1788         struct list_head thread_list;
1789         elf_fpregset_t *fpu;
1790 #ifdef ELF_CORE_COPY_XFPREGS
1791         elf_fpxregset_t *xfpu;
1792 #endif
1793         user_siginfo_t csigdata;
1794         int thread_status_size;
1795         int numnote;
1796 };
1797
1798 static int elf_note_info_init(struct elf_note_info *info)
1799 {
1800         memset(info, 0, sizeof(*info));
1801         INIT_LIST_HEAD(&info->thread_list);
1802
1803         /* Allocate space for ELF notes */
1804         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1805         if (!info->notes)
1806                 return 0;
1807         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1808         if (!info->psinfo)
1809                 return 0;
1810         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1811         if (!info->prstatus)
1812                 return 0;
1813         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1814         if (!info->fpu)
1815                 return 0;
1816 #ifdef ELF_CORE_COPY_XFPREGS
1817         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1818         if (!info->xfpu)
1819                 return 0;
1820 #endif
1821         return 1;
1822 }
1823
1824 static int fill_note_info(struct elfhdr *elf, int phdrs,
1825                           struct elf_note_info *info,
1826                           const siginfo_t *siginfo, struct pt_regs *regs)
1827 {
1828         struct list_head *t;
1829         struct core_thread *ct;
1830         struct elf_thread_status *ets;
1831
1832         if (!elf_note_info_init(info))
1833                 return 0;
1834
1835         for (ct = current->mm->core_state->dumper.next;
1836                                         ct; ct = ct->next) {
1837                 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1838                 if (!ets)
1839                         return 0;
1840
1841                 ets->thread = ct->task;
1842                 list_add(&ets->list, &info->thread_list);
1843         }
1844
1845         list_for_each(t, &info->thread_list) {
1846                 int sz;
1847
1848                 ets = list_entry(t, struct elf_thread_status, list);
1849                 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1850                 info->thread_status_size += sz;
1851         }
1852         /* now collect the dump for the current */
1853         memset(info->prstatus, 0, sizeof(*info->prstatus));
1854         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1855         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1856
1857         /* Set up header */
1858         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1859
1860         /*
1861          * Set up the notes in similar form to SVR4 core dumps made
1862          * with info from their /proc.
1863          */
1864
1865         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1866                   sizeof(*info->prstatus), info->prstatus);
1867         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1868         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1869                   sizeof(*info->psinfo), info->psinfo);
1870
1871         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1872         fill_auxv_note(info->notes + 3, current->mm);
1873         info->numnote = 4;
1874
1875         if (fill_files_note(info->notes + info->numnote) == 0) {
1876                 info->notes_files = info->notes + info->numnote;
1877                 info->numnote++;
1878         }
1879
1880         /* Try to dump the FPU. */
1881         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1882                                                                info->fpu);
1883         if (info->prstatus->pr_fpvalid)
1884                 fill_note(info->notes + info->numnote++,
1885                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1886 #ifdef ELF_CORE_COPY_XFPREGS
1887         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1888                 fill_note(info->notes + info->numnote++,
1889                           "LINUX", ELF_CORE_XFPREG_TYPE,
1890                           sizeof(*info->xfpu), info->xfpu);
1891 #endif
1892
1893         return 1;
1894 }
1895
1896 static size_t get_note_info_size(struct elf_note_info *info)
1897 {
1898         int sz = 0;
1899         int i;
1900
1901         for (i = 0; i < info->numnote; i++)
1902                 sz += notesize(info->notes + i);
1903
1904         sz += info->thread_status_size;
1905
1906         return sz;
1907 }
1908
1909 static int write_note_info(struct elf_note_info *info,
1910                            struct coredump_params *cprm)
1911 {
1912         int i;
1913         struct list_head *t;
1914
1915         for (i = 0; i < info->numnote; i++)
1916                 if (!writenote(info->notes + i, cprm))
1917                         return 0;
1918
1919         /* write out the thread status notes section */
1920         list_for_each(t, &info->thread_list) {
1921                 struct elf_thread_status *tmp =
1922                                 list_entry(t, struct elf_thread_status, list);
1923
1924                 for (i = 0; i < tmp->num_notes; i++)
1925                         if (!writenote(&tmp->notes[i], cprm))
1926                                 return 0;
1927         }
1928
1929         return 1;
1930 }
1931
1932 static void free_note_info(struct elf_note_info *info)
1933 {
1934         while (!list_empty(&info->thread_list)) {
1935                 struct list_head *tmp = info->thread_list.next;
1936                 list_del(tmp);
1937                 kfree(list_entry(tmp, struct elf_thread_status, list));
1938         }
1939
1940         /* Free data possibly allocated by fill_files_note(): */
1941         if (info->notes_files)
1942                 vfree(info->notes_files->data);
1943
1944         kfree(info->prstatus);
1945         kfree(info->psinfo);
1946         kfree(info->notes);
1947         kfree(info->fpu);
1948 #ifdef ELF_CORE_COPY_XFPREGS
1949         kfree(info->xfpu);
1950 #endif
1951 }
1952
1953 #endif
1954
1955 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1956                                         struct vm_area_struct *gate_vma)
1957 {
1958         struct vm_area_struct *ret = tsk->mm->mmap;
1959
1960         if (ret)
1961                 return ret;
1962         return gate_vma;
1963 }
1964 /*
1965  * Helper function for iterating across a vma list.  It ensures that the caller
1966  * will visit `gate_vma' prior to terminating the search.
1967  */
1968 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1969                                         struct vm_area_struct *gate_vma)
1970 {
1971         struct vm_area_struct *ret;
1972
1973         ret = this_vma->vm_next;
1974         if (ret)
1975                 return ret;
1976         if (this_vma == gate_vma)
1977                 return NULL;
1978         return gate_vma;
1979 }
1980
1981 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1982                              elf_addr_t e_shoff, int segs)
1983 {
1984         elf->e_shoff = e_shoff;
1985         elf->e_shentsize = sizeof(*shdr4extnum);
1986         elf->e_shnum = 1;
1987         elf->e_shstrndx = SHN_UNDEF;
1988
1989         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1990
1991         shdr4extnum->sh_type = SHT_NULL;
1992         shdr4extnum->sh_size = elf->e_shnum;
1993         shdr4extnum->sh_link = elf->e_shstrndx;
1994         shdr4extnum->sh_info = segs;
1995 }
1996
1997 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1998                                      unsigned long mm_flags)
1999 {
2000         struct vm_area_struct *vma;
2001         size_t size = 0;
2002
2003         for (vma = first_vma(current, gate_vma); vma != NULL;
2004              vma = next_vma(vma, gate_vma))
2005                 size += vma_dump_size(vma, mm_flags);
2006         return size;
2007 }
2008
2009 /*
2010  * Actual dumper
2011  *
2012  * This is a two-pass process; first we find the offsets of the bits,
2013  * and then they are actually written out.  If we run out of core limit
2014  * we just truncate.
2015  */
2016 static int elf_core_dump(struct coredump_params *cprm)
2017 {
2018         int has_dumped = 0;
2019         mm_segment_t fs;
2020         int segs;
2021         struct vm_area_struct *vma, *gate_vma;
2022         struct elfhdr *elf = NULL;
2023         loff_t offset = 0, dataoff;
2024         struct elf_note_info info = { };
2025         struct elf_phdr *phdr4note = NULL;
2026         struct elf_shdr *shdr4extnum = NULL;
2027         Elf_Half e_phnum;
2028         elf_addr_t e_shoff;
2029
2030         /*
2031          * We no longer stop all VM operations.
2032          * 
2033          * This is because those proceses that could possibly change map_count
2034          * or the mmap / vma pages are now blocked in do_exit on current
2035          * finishing this core dump.
2036          *
2037          * Only ptrace can touch these memory addresses, but it doesn't change
2038          * the map_count or the pages allocated. So no possibility of crashing
2039          * exists while dumping the mm->vm_next areas to the core file.
2040          */
2041   
2042         /* alloc memory for large data structures: too large to be on stack */
2043         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2044         if (!elf)
2045                 goto out;
2046         /*
2047          * The number of segs are recored into ELF header as 16bit value.
2048          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2049          */
2050         segs = current->mm->map_count;
2051         segs += elf_core_extra_phdrs();
2052
2053         gate_vma = get_gate_vma(current->mm);
2054         if (gate_vma != NULL)
2055                 segs++;
2056
2057         /* for notes section */
2058         segs++;
2059
2060         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2061          * this, kernel supports extended numbering. Have a look at
2062          * include/linux/elf.h for further information. */
2063         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2064
2065         /*
2066          * Collect all the non-memory information about the process for the
2067          * notes.  This also sets up the file header.
2068          */
2069         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2070                 goto cleanup;
2071
2072         has_dumped = 1;
2073
2074         fs = get_fs();
2075         set_fs(KERNEL_DS);
2076
2077         offset += sizeof(*elf);                         /* Elf header */
2078         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2079
2080         /* Write notes phdr entry */
2081         {
2082                 size_t sz = get_note_info_size(&info);
2083
2084                 sz += elf_coredump_extra_notes_size();
2085
2086                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2087                 if (!phdr4note)
2088                         goto end_coredump;
2089
2090                 fill_elf_note_phdr(phdr4note, sz, offset);
2091                 offset += sz;
2092         }
2093
2094         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2095
2096         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2097         offset += elf_core_extra_data_size();
2098         e_shoff = offset;
2099
2100         if (e_phnum == PN_XNUM) {
2101                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2102                 if (!shdr4extnum)
2103                         goto end_coredump;
2104                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2105         }
2106
2107         offset = dataoff;
2108
2109         if (!dump_emit(cprm, elf, sizeof(*elf)))
2110                 goto end_coredump;
2111
2112         if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2113                 goto end_coredump;
2114
2115         /* Write program headers for segments dump */
2116         for (vma = first_vma(current, gate_vma); vma != NULL;
2117                         vma = next_vma(vma, gate_vma)) {
2118                 struct elf_phdr phdr;
2119
2120                 phdr.p_type = PT_LOAD;
2121                 phdr.p_offset = offset;
2122                 phdr.p_vaddr = vma->vm_start;
2123                 phdr.p_paddr = 0;
2124                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2125                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2126                 offset += phdr.p_filesz;
2127                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2128                 if (vma->vm_flags & VM_WRITE)
2129                         phdr.p_flags |= PF_W;
2130                 if (vma->vm_flags & VM_EXEC)
2131                         phdr.p_flags |= PF_X;
2132                 phdr.p_align = ELF_EXEC_PAGESIZE;
2133
2134                 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2135                         goto end_coredump;
2136         }
2137
2138         if (!elf_core_write_extra_phdrs(cprm, offset))
2139                 goto end_coredump;
2140
2141         /* write out the notes section */
2142         if (!write_note_info(&info, cprm))
2143                 goto end_coredump;
2144
2145         if (elf_coredump_extra_notes_write(cprm))
2146                 goto end_coredump;
2147
2148         /* Align to page */
2149         if (!dump_skip(cprm, dataoff - cprm->written))
2150                 goto end_coredump;
2151
2152         for (vma = first_vma(current, gate_vma); vma != NULL;
2153                         vma = next_vma(vma, gate_vma)) {
2154                 unsigned long addr;
2155                 unsigned long end;
2156
2157                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2158
2159                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2160                         struct page *page;
2161                         int stop;
2162
2163                         page = get_dump_page(addr);
2164                         if (page) {
2165                                 void *kaddr = kmap(page);
2166                                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2167                                 kunmap(page);
2168                                 page_cache_release(page);
2169                         } else
2170                                 stop = !dump_skip(cprm, PAGE_SIZE);
2171                         if (stop)
2172                                 goto end_coredump;
2173                 }
2174         }
2175
2176         if (!elf_core_write_extra_data(cprm))
2177                 goto end_coredump;
2178
2179         if (e_phnum == PN_XNUM) {
2180                 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2181                         goto end_coredump;
2182         }
2183
2184 end_coredump:
2185         set_fs(fs);
2186
2187 cleanup:
2188         free_note_info(&info);
2189         kfree(shdr4extnum);
2190         kfree(phdr4note);
2191         kfree(elf);
2192 out:
2193         return has_dumped;
2194 }
2195
2196 #endif          /* CONFIG_ELF_CORE */
2197
2198 static int __init init_elf_binfmt(void)
2199 {
2200         register_binfmt(&elf_format);
2201         return 0;
2202 }
2203
2204 static void __exit exit_elf_binfmt(void)
2205 {
2206         /* Remove the COFF and ELF loaders. */
2207         unregister_binfmt(&elf_format);
2208 }
2209
2210 core_initcall(init_elf_binfmt);
2211 module_exit(exit_elf_binfmt);
2212 MODULE_LICENSE("GPL");