Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jgarzi...
[sfrench/cifs-2.6.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <asm/uaccess.h>
35 #include <asm/param.h>
36 #include <asm/page.h>
37
38 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
39 static int load_elf_library(struct file *);
40 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
41                                 int, int, unsigned long);
42
43 /*
44  * If we don't support core dumping, then supply a NULL so we
45  * don't even try.
46  */
47 #ifdef CONFIG_ELF_CORE
48 static int elf_core_dump(struct coredump_params *cprm);
49 #else
50 #define elf_core_dump   NULL
51 #endif
52
53 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
54 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
55 #else
56 #define ELF_MIN_ALIGN   PAGE_SIZE
57 #endif
58
59 #ifndef ELF_CORE_EFLAGS
60 #define ELF_CORE_EFLAGS 0
61 #endif
62
63 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
64 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
65 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
66
67 static struct linux_binfmt elf_format = {
68                 .module         = THIS_MODULE,
69                 .load_binary    = load_elf_binary,
70                 .load_shlib     = load_elf_library,
71                 .core_dump      = elf_core_dump,
72                 .min_coredump   = ELF_EXEC_PAGESIZE,
73                 .hasvdso        = 1
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 #ifndef elf_map
319
320 static unsigned long elf_map(struct file *filep, unsigned long addr,
321                 struct elf_phdr *eppnt, int prot, int type,
322                 unsigned long total_size)
323 {
324         unsigned long map_addr;
325         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
326         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
327         addr = ELF_PAGESTART(addr);
328         size = ELF_PAGEALIGN(size);
329
330         /* mmap() will return -EINVAL if given a zero size, but a
331          * segment with zero filesize is perfectly valid */
332         if (!size)
333                 return addr;
334
335         down_write(&current->mm->mmap_sem);
336         /*
337         * total_size is the size of the ELF (interpreter) image.
338         * The _first_ mmap needs to know the full size, otherwise
339         * randomization might put this image into an overlapping
340         * position with the ELF binary image. (since size < total_size)
341         * So we first map the 'big' image - and unmap the remainder at
342         * the end. (which unmap is needed for ELF images with holes.)
343         */
344         if (total_size) {
345                 total_size = ELF_PAGEALIGN(total_size);
346                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
347                 if (!BAD_ADDR(map_addr))
348                         do_munmap(current->mm, map_addr+size, total_size-size);
349         } else
350                 map_addr = do_mmap(filep, addr, size, prot, type, off);
351
352         up_write(&current->mm->mmap_sem);
353         return(map_addr);
354 }
355
356 #endif /* !elf_map */
357
358 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
359 {
360         int i, first_idx = -1, last_idx = -1;
361
362         for (i = 0; i < nr; i++) {
363                 if (cmds[i].p_type == PT_LOAD) {
364                         last_idx = i;
365                         if (first_idx == -1)
366                                 first_idx = i;
367                 }
368         }
369         if (first_idx == -1)
370                 return 0;
371
372         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
373                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
374 }
375
376
377 /* This is much more generalized than the library routine read function,
378    so we keep this separate.  Technically the library read function
379    is only provided so that we can read a.out libraries that have
380    an ELF header */
381
382 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
383                 struct file *interpreter, unsigned long *interp_map_addr,
384                 unsigned long no_base)
385 {
386         struct elf_phdr *elf_phdata;
387         struct elf_phdr *eppnt;
388         unsigned long load_addr = 0;
389         int load_addr_set = 0;
390         unsigned long last_bss = 0, elf_bss = 0;
391         unsigned long error = ~0UL;
392         unsigned long total_size;
393         int retval, i, size;
394
395         /* First of all, some simple consistency checks */
396         if (interp_elf_ex->e_type != ET_EXEC &&
397             interp_elf_ex->e_type != ET_DYN)
398                 goto out;
399         if (!elf_check_arch(interp_elf_ex))
400                 goto out;
401         if (!interpreter->f_op || !interpreter->f_op->mmap)
402                 goto out;
403
404         /*
405          * If the size of this structure has changed, then punt, since
406          * we will be doing the wrong thing.
407          */
408         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
409                 goto out;
410         if (interp_elf_ex->e_phnum < 1 ||
411                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
412                 goto out;
413
414         /* Now read in all of the header information */
415         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
416         if (size > ELF_MIN_ALIGN)
417                 goto out;
418         elf_phdata = kmalloc(size, GFP_KERNEL);
419         if (!elf_phdata)
420                 goto out;
421
422         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
423                              (char *)elf_phdata,size);
424         error = -EIO;
425         if (retval != size) {
426                 if (retval < 0)
427                         error = retval; 
428                 goto out_close;
429         }
430
431         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
432         if (!total_size) {
433                 error = -EINVAL;
434                 goto out_close;
435         }
436
437         eppnt = elf_phdata;
438         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
439                 if (eppnt->p_type == PT_LOAD) {
440                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
441                         int elf_prot = 0;
442                         unsigned long vaddr = 0;
443                         unsigned long k, map_addr;
444
445                         if (eppnt->p_flags & PF_R)
446                                 elf_prot = PROT_READ;
447                         if (eppnt->p_flags & PF_W)
448                                 elf_prot |= PROT_WRITE;
449                         if (eppnt->p_flags & PF_X)
450                                 elf_prot |= PROT_EXEC;
451                         vaddr = eppnt->p_vaddr;
452                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
453                                 elf_type |= MAP_FIXED;
454                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
455                                 load_addr = -vaddr;
456
457                         map_addr = elf_map(interpreter, load_addr + vaddr,
458                                         eppnt, elf_prot, elf_type, total_size);
459                         total_size = 0;
460                         if (!*interp_map_addr)
461                                 *interp_map_addr = map_addr;
462                         error = map_addr;
463                         if (BAD_ADDR(map_addr))
464                                 goto out_close;
465
466                         if (!load_addr_set &&
467                             interp_elf_ex->e_type == ET_DYN) {
468                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
469                                 load_addr_set = 1;
470                         }
471
472                         /*
473                          * Check to see if the section's size will overflow the
474                          * allowed task size. Note that p_filesz must always be
475                          * <= p_memsize so it's only necessary to check p_memsz.
476                          */
477                         k = load_addr + eppnt->p_vaddr;
478                         if (BAD_ADDR(k) ||
479                             eppnt->p_filesz > eppnt->p_memsz ||
480                             eppnt->p_memsz > TASK_SIZE ||
481                             TASK_SIZE - eppnt->p_memsz < k) {
482                                 error = -ENOMEM;
483                                 goto out_close;
484                         }
485
486                         /*
487                          * Find the end of the file mapping for this phdr, and
488                          * keep track of the largest address we see for this.
489                          */
490                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
491                         if (k > elf_bss)
492                                 elf_bss = k;
493
494                         /*
495                          * Do the same thing for the memory mapping - between
496                          * elf_bss and last_bss is the bss section.
497                          */
498                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
499                         if (k > last_bss)
500                                 last_bss = k;
501                 }
502         }
503
504         if (last_bss > elf_bss) {
505                 /*
506                  * Now fill out the bss section.  First pad the last page up
507                  * to the page boundary, and then perform a mmap to make sure
508                  * that there are zero-mapped pages up to and including the
509                  * last bss page.
510                  */
511                 if (padzero(elf_bss)) {
512                         error = -EFAULT;
513                         goto out_close;
514                 }
515
516                 /* What we have mapped so far */
517                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
518
519                 /* Map the last of the bss segment */
520                 down_write(&current->mm->mmap_sem);
521                 error = do_brk(elf_bss, last_bss - elf_bss);
522                 up_write(&current->mm->mmap_sem);
523                 if (BAD_ADDR(error))
524                         goto out_close;
525         }
526
527         error = load_addr;
528
529 out_close:
530         kfree(elf_phdata);
531 out:
532         return error;
533 }
534
535 /*
536  * These are the functions used to load ELF style executables and shared
537  * libraries.  There is no binary dependent code anywhere else.
538  */
539
540 #define INTERPRETER_NONE 0
541 #define INTERPRETER_ELF 2
542
543 #ifndef STACK_RND_MASK
544 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
545 #endif
546
547 static unsigned long randomize_stack_top(unsigned long stack_top)
548 {
549         unsigned int random_variable = 0;
550
551         if ((current->flags & PF_RANDOMIZE) &&
552                 !(current->personality & ADDR_NO_RANDOMIZE)) {
553                 random_variable = get_random_int() & STACK_RND_MASK;
554                 random_variable <<= PAGE_SHIFT;
555         }
556 #ifdef CONFIG_STACK_GROWSUP
557         return PAGE_ALIGN(stack_top) + random_variable;
558 #else
559         return PAGE_ALIGN(stack_top) - random_variable;
560 #endif
561 }
562
563 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
564 {
565         struct file *interpreter = NULL; /* to shut gcc up */
566         unsigned long load_addr = 0, load_bias = 0;
567         int load_addr_set = 0;
568         char * elf_interpreter = NULL;
569         unsigned long error;
570         struct elf_phdr *elf_ppnt, *elf_phdata;
571         unsigned long elf_bss, elf_brk;
572         int retval, i;
573         unsigned int size;
574         unsigned long elf_entry;
575         unsigned long interp_load_addr = 0;
576         unsigned long start_code, end_code, start_data, end_data;
577         unsigned long reloc_func_desc = 0;
578         int executable_stack = EXSTACK_DEFAULT;
579         unsigned long def_flags = 0;
580         struct {
581                 struct elfhdr elf_ex;
582                 struct elfhdr interp_elf_ex;
583         } *loc;
584
585         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586         if (!loc) {
587                 retval = -ENOMEM;
588                 goto out_ret;
589         }
590         
591         /* Get the exec-header */
592         loc->elf_ex = *((struct elfhdr *)bprm->buf);
593
594         retval = -ENOEXEC;
595         /* First of all, some simple consistency checks */
596         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597                 goto out;
598
599         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600                 goto out;
601         if (!elf_check_arch(&loc->elf_ex))
602                 goto out;
603         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
604                 goto out;
605
606         /* Now read in all of the header information */
607         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608                 goto out;
609         if (loc->elf_ex.e_phnum < 1 ||
610                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611                 goto out;
612         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613         retval = -ENOMEM;
614         elf_phdata = kmalloc(size, GFP_KERNEL);
615         if (!elf_phdata)
616                 goto out;
617
618         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619                              (char *)elf_phdata, size);
620         if (retval != size) {
621                 if (retval >= 0)
622                         retval = -EIO;
623                 goto out_free_ph;
624         }
625
626         elf_ppnt = elf_phdata;
627         elf_bss = 0;
628         elf_brk = 0;
629
630         start_code = ~0UL;
631         end_code = 0;
632         start_data = 0;
633         end_data = 0;
634
635         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636                 if (elf_ppnt->p_type == PT_INTERP) {
637                         /* This is the program interpreter used for
638                          * shared libraries - for now assume that this
639                          * is an a.out format binary
640                          */
641                         retval = -ENOEXEC;
642                         if (elf_ppnt->p_filesz > PATH_MAX || 
643                             elf_ppnt->p_filesz < 2)
644                                 goto out_free_ph;
645
646                         retval = -ENOMEM;
647                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648                                                   GFP_KERNEL);
649                         if (!elf_interpreter)
650                                 goto out_free_ph;
651
652                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653                                              elf_interpreter,
654                                              elf_ppnt->p_filesz);
655                         if (retval != elf_ppnt->p_filesz) {
656                                 if (retval >= 0)
657                                         retval = -EIO;
658                                 goto out_free_interp;
659                         }
660                         /* make sure path is NULL terminated */
661                         retval = -ENOEXEC;
662                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663                                 goto out_free_interp;
664
665                         /*
666                          * The early SET_PERSONALITY here is so that the lookup
667                          * for the interpreter happens in the namespace of the 
668                          * to-be-execed image.  SET_PERSONALITY can select an
669                          * alternate root.
670                          *
671                          * However, SET_PERSONALITY is NOT allowed to switch
672                          * this task into the new images's memory mapping
673                          * policy - that is, TASK_SIZE must still evaluate to
674                          * that which is appropriate to the execing application.
675                          * This is because exit_mmap() needs to have TASK_SIZE
676                          * evaluate to the size of the old image.
677                          *
678                          * So if (say) a 64-bit application is execing a 32-bit
679                          * application it is the architecture's responsibility
680                          * to defer changing the value of TASK_SIZE until the
681                          * switch really is going to happen - do this in
682                          * flush_thread().      - akpm
683                          */
684                         SET_PERSONALITY(loc->elf_ex);
685
686                         interpreter = open_exec(elf_interpreter);
687                         retval = PTR_ERR(interpreter);
688                         if (IS_ERR(interpreter))
689                                 goto out_free_interp;
690
691                         /*
692                          * If the binary is not readable then enforce
693                          * mm->dumpable = 0 regardless of the interpreter's
694                          * permissions.
695                          */
696                         if (file_permission(interpreter, MAY_READ) < 0)
697                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
698
699                         retval = kernel_read(interpreter, 0, bprm->buf,
700                                              BINPRM_BUF_SIZE);
701                         if (retval != BINPRM_BUF_SIZE) {
702                                 if (retval >= 0)
703                                         retval = -EIO;
704                                 goto out_free_dentry;
705                         }
706
707                         /* Get the exec headers */
708                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
709                         break;
710                 }
711                 elf_ppnt++;
712         }
713
714         elf_ppnt = elf_phdata;
715         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
716                 if (elf_ppnt->p_type == PT_GNU_STACK) {
717                         if (elf_ppnt->p_flags & PF_X)
718                                 executable_stack = EXSTACK_ENABLE_X;
719                         else
720                                 executable_stack = EXSTACK_DISABLE_X;
721                         break;
722                 }
723
724         /* Some simple consistency checks for the interpreter */
725         if (elf_interpreter) {
726                 retval = -ELIBBAD;
727                 /* Not an ELF interpreter */
728                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
729                         goto out_free_dentry;
730                 /* Verify the interpreter has a valid arch */
731                 if (!elf_check_arch(&loc->interp_elf_ex))
732                         goto out_free_dentry;
733         } else {
734                 /* Executables without an interpreter also need a personality  */
735                 SET_PERSONALITY(loc->elf_ex);
736         }
737
738         /* Flush all traces of the currently running executable */
739         retval = flush_old_exec(bprm);
740         if (retval)
741                 goto out_free_dentry;
742
743         /* OK, This is the point of no return */
744         current->flags &= ~PF_FORKNOEXEC;
745         current->mm->def_flags = def_flags;
746
747         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748            may depend on the personality.  */
749         SET_PERSONALITY(loc->elf_ex);
750         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751                 current->personality |= READ_IMPLIES_EXEC;
752
753         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754                 current->flags |= PF_RANDOMIZE;
755         arch_pick_mmap_layout(current->mm);
756
757         /* Do this so that we can load the interpreter, if need be.  We will
758            change some of these later */
759         current->mm->free_area_cache = current->mm->mmap_base;
760         current->mm->cached_hole_size = 0;
761         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
762                                  executable_stack);
763         if (retval < 0) {
764                 send_sig(SIGKILL, current, 0);
765                 goto out_free_dentry;
766         }
767         
768         current->mm->start_stack = bprm->p;
769
770         /* Now we do a little grungy work by mmapping the ELF image into
771            the correct location in memory. */
772         for(i = 0, elf_ppnt = elf_phdata;
773             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
774                 int elf_prot = 0, elf_flags;
775                 unsigned long k, vaddr;
776
777                 if (elf_ppnt->p_type != PT_LOAD)
778                         continue;
779
780                 if (unlikely (elf_brk > elf_bss)) {
781                         unsigned long nbyte;
782                     
783                         /* There was a PT_LOAD segment with p_memsz > p_filesz
784                            before this one. Map anonymous pages, if needed,
785                            and clear the area.  */
786                         retval = set_brk (elf_bss + load_bias,
787                                           elf_brk + load_bias);
788                         if (retval) {
789                                 send_sig(SIGKILL, current, 0);
790                                 goto out_free_dentry;
791                         }
792                         nbyte = ELF_PAGEOFFSET(elf_bss);
793                         if (nbyte) {
794                                 nbyte = ELF_MIN_ALIGN - nbyte;
795                                 if (nbyte > elf_brk - elf_bss)
796                                         nbyte = elf_brk - elf_bss;
797                                 if (clear_user((void __user *)elf_bss +
798                                                         load_bias, nbyte)) {
799                                         /*
800                                          * This bss-zeroing can fail if the ELF
801                                          * file specifies odd protections. So
802                                          * we don't check the return value
803                                          */
804                                 }
805                         }
806                 }
807
808                 if (elf_ppnt->p_flags & PF_R)
809                         elf_prot |= PROT_READ;
810                 if (elf_ppnt->p_flags & PF_W)
811                         elf_prot |= PROT_WRITE;
812                 if (elf_ppnt->p_flags & PF_X)
813                         elf_prot |= PROT_EXEC;
814
815                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
816
817                 vaddr = elf_ppnt->p_vaddr;
818                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
819                         elf_flags |= MAP_FIXED;
820                 } else if (loc->elf_ex.e_type == ET_DYN) {
821                         /* Try and get dynamic programs out of the way of the
822                          * default mmap base, as well as whatever program they
823                          * might try to exec.  This is because the brk will
824                          * follow the loader, and is not movable.  */
825 #ifdef CONFIG_X86
826                         load_bias = 0;
827 #else
828                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
829 #endif
830                 }
831
832                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
833                                 elf_prot, elf_flags, 0);
834                 if (BAD_ADDR(error)) {
835                         send_sig(SIGKILL, current, 0);
836                         retval = IS_ERR((void *)error) ?
837                                 PTR_ERR((void*)error) : -EINVAL;
838                         goto out_free_dentry;
839                 }
840
841                 if (!load_addr_set) {
842                         load_addr_set = 1;
843                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
844                         if (loc->elf_ex.e_type == ET_DYN) {
845                                 load_bias += error -
846                                              ELF_PAGESTART(load_bias + vaddr);
847                                 load_addr += load_bias;
848                                 reloc_func_desc = load_bias;
849                         }
850                 }
851                 k = elf_ppnt->p_vaddr;
852                 if (k < start_code)
853                         start_code = k;
854                 if (start_data < k)
855                         start_data = k;
856
857                 /*
858                  * Check to see if the section's size will overflow the
859                  * allowed task size. Note that p_filesz must always be
860                  * <= p_memsz so it is only necessary to check p_memsz.
861                  */
862                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
863                     elf_ppnt->p_memsz > TASK_SIZE ||
864                     TASK_SIZE - elf_ppnt->p_memsz < k) {
865                         /* set_brk can never work. Avoid overflows. */
866                         send_sig(SIGKILL, current, 0);
867                         retval = -EINVAL;
868                         goto out_free_dentry;
869                 }
870
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
872
873                 if (k > elf_bss)
874                         elf_bss = k;
875                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
876                         end_code = k;
877                 if (end_data < k)
878                         end_data = k;
879                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
880                 if (k > elf_brk)
881                         elf_brk = k;
882         }
883
884         loc->elf_ex.e_entry += load_bias;
885         elf_bss += load_bias;
886         elf_brk += load_bias;
887         start_code += load_bias;
888         end_code += load_bias;
889         start_data += load_bias;
890         end_data += load_bias;
891
892         /* Calling set_brk effectively mmaps the pages that we need
893          * for the bss and break sections.  We must do this before
894          * mapping in the interpreter, to make sure it doesn't wind
895          * up getting placed where the bss needs to go.
896          */
897         retval = set_brk(elf_bss, elf_brk);
898         if (retval) {
899                 send_sig(SIGKILL, current, 0);
900                 goto out_free_dentry;
901         }
902         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
903                 send_sig(SIGSEGV, current, 0);
904                 retval = -EFAULT; /* Nobody gets to see this, but.. */
905                 goto out_free_dentry;
906         }
907
908         if (elf_interpreter) {
909                 unsigned long uninitialized_var(interp_map_addr);
910
911                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
912                                             interpreter,
913                                             &interp_map_addr,
914                                             load_bias);
915                 if (!IS_ERR((void *)elf_entry)) {
916                         /*
917                          * load_elf_interp() returns relocation
918                          * adjustment
919                          */
920                         interp_load_addr = elf_entry;
921                         elf_entry += loc->interp_elf_ex.e_entry;
922                 }
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = IS_ERR((void *)elf_entry) ?
926                                         (int)elf_entry : -EINVAL;
927                         goto out_free_dentry;
928                 }
929                 reloc_func_desc = interp_load_addr;
930
931                 allow_write_access(interpreter);
932                 fput(interpreter);
933                 kfree(elf_interpreter);
934         } else {
935                 elf_entry = loc->elf_ex.e_entry;
936                 if (BAD_ADDR(elf_entry)) {
937                         force_sig(SIGSEGV, current);
938                         retval = -EINVAL;
939                         goto out_free_dentry;
940                 }
941         }
942
943         kfree(elf_phdata);
944
945         set_binfmt(&elf_format);
946
947 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
948         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
954
955         install_exec_creds(bprm);
956         current->flags &= ~PF_FORKNOEXEC;
957         retval = create_elf_tables(bprm, &loc->elf_ex,
958                           load_addr, interp_load_addr);
959         if (retval < 0) {
960                 send_sig(SIGKILL, current, 0);
961                 goto out;
962         }
963         /* N.B. passed_fileno might not be initialized? */
964         current->mm->end_code = end_code;
965         current->mm->start_code = start_code;
966         current->mm->start_data = start_data;
967         current->mm->end_data = end_data;
968         current->mm->start_stack = bprm->p;
969
970 #ifdef arch_randomize_brk
971         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
972                 current->mm->brk = current->mm->start_brk =
973                         arch_randomize_brk(current->mm);
974 #endif
975
976         if (current->personality & MMAP_PAGE_ZERO) {
977                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
978                    and some applications "depend" upon this behavior.
979                    Since we do not have the power to recompile these, we
980                    emulate the SVr4 behavior. Sigh. */
981                 down_write(&current->mm->mmap_sem);
982                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
983                                 MAP_FIXED | MAP_PRIVATE, 0);
984                 up_write(&current->mm->mmap_sem);
985         }
986
987 #ifdef ELF_PLAT_INIT
988         /*
989          * The ABI may specify that certain registers be set up in special
990          * ways (on i386 %edx is the address of a DT_FINI function, for
991          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
992          * that the e_entry field is the address of the function descriptor
993          * for the startup routine, rather than the address of the startup
994          * routine itself.  This macro performs whatever initialization to
995          * the regs structure is required as well as any relocations to the
996          * function descriptor entries when executing dynamically links apps.
997          */
998         ELF_PLAT_INIT(regs, reloc_func_desc);
999 #endif
1000
1001         start_thread(regs, elf_entry, bprm->p);
1002         retval = 0;
1003 out:
1004         kfree(loc);
1005 out_ret:
1006         return retval;
1007
1008         /* error cleanup */
1009 out_free_dentry:
1010         allow_write_access(interpreter);
1011         if (interpreter)
1012                 fput(interpreter);
1013 out_free_interp:
1014         kfree(elf_interpreter);
1015 out_free_ph:
1016         kfree(elf_phdata);
1017         goto out;
1018 }
1019
1020 /* This is really simpleminded and specialized - we are loading an
1021    a.out library that is given an ELF header. */
1022 static int load_elf_library(struct file *file)
1023 {
1024         struct elf_phdr *elf_phdata;
1025         struct elf_phdr *eppnt;
1026         unsigned long elf_bss, bss, len;
1027         int retval, error, i, j;
1028         struct elfhdr elf_ex;
1029
1030         error = -ENOEXEC;
1031         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032         if (retval != sizeof(elf_ex))
1033                 goto out;
1034
1035         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                 goto out;
1037
1038         /* First of all, some simple consistency checks */
1039         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                 goto out;
1042
1043         /* Now read in all of the header information */
1044
1045         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048         error = -ENOMEM;
1049         elf_phdata = kmalloc(j, GFP_KERNEL);
1050         if (!elf_phdata)
1051                 goto out;
1052
1053         eppnt = elf_phdata;
1054         error = -ENOEXEC;
1055         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056         if (retval != j)
1057                 goto out_free_ph;
1058
1059         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                 if ((eppnt + i)->p_type == PT_LOAD)
1061                         j++;
1062         if (j != 1)
1063                 goto out_free_ph;
1064
1065         while (eppnt->p_type != PT_LOAD)
1066                 eppnt++;
1067
1068         /* Now use mmap to map the library into memory. */
1069         down_write(&current->mm->mmap_sem);
1070         error = do_mmap(file,
1071                         ELF_PAGESTART(eppnt->p_vaddr),
1072                         (eppnt->p_filesz +
1073                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                         PROT_READ | PROT_WRITE | PROT_EXEC,
1075                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                         (eppnt->p_offset -
1077                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078         up_write(&current->mm->mmap_sem);
1079         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                 goto out_free_ph;
1081
1082         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083         if (padzero(elf_bss)) {
1084                 error = -EFAULT;
1085                 goto out_free_ph;
1086         }
1087
1088         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                             ELF_MIN_ALIGN - 1);
1090         bss = eppnt->p_memsz + eppnt->p_vaddr;
1091         if (bss > len) {
1092                 down_write(&current->mm->mmap_sem);
1093                 do_brk(len, bss - len);
1094                 up_write(&current->mm->mmap_sem);
1095         }
1096         error = 0;
1097
1098 out_free_ph:
1099         kfree(elf_phdata);
1100 out:
1101         return error;
1102 }
1103
1104 #ifdef CONFIG_ELF_CORE
1105 /*
1106  * ELF core dumper
1107  *
1108  * Modelled on fs/exec.c:aout_core_dump()
1109  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1110  */
1111 /*
1112  * These are the only things you should do on a core-file: use only these
1113  * functions to write out all the necessary info.
1114  */
1115 static int dump_write(struct file *file, const void *addr, int nr)
1116 {
1117         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1118 }
1119
1120 static int dump_seek(struct file *file, loff_t off)
1121 {
1122         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1123                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1124                         return 0;
1125         } else {
1126                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1127                 if (!buf)
1128                         return 0;
1129                 while (off > 0) {
1130                         unsigned long n = off;
1131                         if (n > PAGE_SIZE)
1132                                 n = PAGE_SIZE;
1133                         if (!dump_write(file, buf, n))
1134                                 return 0;
1135                         off -= n;
1136                 }
1137                 free_page((unsigned long)buf);
1138         }
1139         return 1;
1140 }
1141
1142 /*
1143  * Decide what to dump of a segment, part, all or none.
1144  */
1145 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1146                                    unsigned long mm_flags)
1147 {
1148 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1149
1150         /* The vma can be set up to tell us the answer directly.  */
1151         if (vma->vm_flags & VM_ALWAYSDUMP)
1152                 goto whole;
1153
1154         /* Hugetlb memory check */
1155         if (vma->vm_flags & VM_HUGETLB) {
1156                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1157                         goto whole;
1158                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1159                         goto whole;
1160         }
1161
1162         /* Do not dump I/O mapped devices or special mappings */
1163         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1164                 return 0;
1165
1166         /* By default, dump shared memory if mapped from an anonymous file. */
1167         if (vma->vm_flags & VM_SHARED) {
1168                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1169                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1170                         goto whole;
1171                 return 0;
1172         }
1173
1174         /* Dump segments that have been written to.  */
1175         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1176                 goto whole;
1177         if (vma->vm_file == NULL)
1178                 return 0;
1179
1180         if (FILTER(MAPPED_PRIVATE))
1181                 goto whole;
1182
1183         /*
1184          * If this looks like the beginning of a DSO or executable mapping,
1185          * check for an ELF header.  If we find one, dump the first page to
1186          * aid in determining what was mapped here.
1187          */
1188         if (FILTER(ELF_HEADERS) &&
1189             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1190                 u32 __user *header = (u32 __user *) vma->vm_start;
1191                 u32 word;
1192                 mm_segment_t fs = get_fs();
1193                 /*
1194                  * Doing it this way gets the constant folded by GCC.
1195                  */
1196                 union {
1197                         u32 cmp;
1198                         char elfmag[SELFMAG];
1199                 } magic;
1200                 BUILD_BUG_ON(SELFMAG != sizeof word);
1201                 magic.elfmag[EI_MAG0] = ELFMAG0;
1202                 magic.elfmag[EI_MAG1] = ELFMAG1;
1203                 magic.elfmag[EI_MAG2] = ELFMAG2;
1204                 magic.elfmag[EI_MAG3] = ELFMAG3;
1205                 /*
1206                  * Switch to the user "segment" for get_user(),
1207                  * then put back what elf_core_dump() had in place.
1208                  */
1209                 set_fs(USER_DS);
1210                 if (unlikely(get_user(word, header)))
1211                         word = 0;
1212                 set_fs(fs);
1213                 if (word == magic.cmp)
1214                         return PAGE_SIZE;
1215         }
1216
1217 #undef  FILTER
1218
1219         return 0;
1220
1221 whole:
1222         return vma->vm_end - vma->vm_start;
1223 }
1224
1225 /* An ELF note in memory */
1226 struct memelfnote
1227 {
1228         const char *name;
1229         int type;
1230         unsigned int datasz;
1231         void *data;
1232 };
1233
1234 static int notesize(struct memelfnote *en)
1235 {
1236         int sz;
1237
1238         sz = sizeof(struct elf_note);
1239         sz += roundup(strlen(en->name) + 1, 4);
1240         sz += roundup(en->datasz, 4);
1241
1242         return sz;
1243 }
1244
1245 #define DUMP_WRITE(addr, nr, foffset)   \
1246         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1247
1248 static int alignfile(struct file *file, loff_t *foffset)
1249 {
1250         static const char buf[4] = { 0, };
1251         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1252         return 1;
1253 }
1254
1255 static int writenote(struct memelfnote *men, struct file *file,
1256                         loff_t *foffset)
1257 {
1258         struct elf_note en;
1259         en.n_namesz = strlen(men->name) + 1;
1260         en.n_descsz = men->datasz;
1261         en.n_type = men->type;
1262
1263         DUMP_WRITE(&en, sizeof(en), foffset);
1264         DUMP_WRITE(men->name, en.n_namesz, foffset);
1265         if (!alignfile(file, foffset))
1266                 return 0;
1267         DUMP_WRITE(men->data, men->datasz, foffset);
1268         if (!alignfile(file, foffset))
1269                 return 0;
1270
1271         return 1;
1272 }
1273 #undef DUMP_WRITE
1274
1275 #define DUMP_WRITE(addr, nr)                            \
1276         if ((size += (nr)) > cprm->limit ||             \
1277             !dump_write(cprm->file, (addr), (nr)))      \
1278                 goto end_coredump;
1279
1280 static void fill_elf_header(struct elfhdr *elf, int segs,
1281                             u16 machine, u32 flags, u8 osabi)
1282 {
1283         memset(elf, 0, sizeof(*elf));
1284
1285         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1286         elf->e_ident[EI_CLASS] = ELF_CLASS;
1287         elf->e_ident[EI_DATA] = ELF_DATA;
1288         elf->e_ident[EI_VERSION] = EV_CURRENT;
1289         elf->e_ident[EI_OSABI] = ELF_OSABI;
1290
1291         elf->e_type = ET_CORE;
1292         elf->e_machine = machine;
1293         elf->e_version = EV_CURRENT;
1294         elf->e_phoff = sizeof(struct elfhdr);
1295         elf->e_flags = flags;
1296         elf->e_ehsize = sizeof(struct elfhdr);
1297         elf->e_phentsize = sizeof(struct elf_phdr);
1298         elf->e_phnum = segs;
1299
1300         return;
1301 }
1302
1303 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1304 {
1305         phdr->p_type = PT_NOTE;
1306         phdr->p_offset = offset;
1307         phdr->p_vaddr = 0;
1308         phdr->p_paddr = 0;
1309         phdr->p_filesz = sz;
1310         phdr->p_memsz = 0;
1311         phdr->p_flags = 0;
1312         phdr->p_align = 0;
1313         return;
1314 }
1315
1316 static void fill_note(struct memelfnote *note, const char *name, int type, 
1317                 unsigned int sz, void *data)
1318 {
1319         note->name = name;
1320         note->type = type;
1321         note->datasz = sz;
1322         note->data = data;
1323         return;
1324 }
1325
1326 /*
1327  * fill up all the fields in prstatus from the given task struct, except
1328  * registers which need to be filled up separately.
1329  */
1330 static void fill_prstatus(struct elf_prstatus *prstatus,
1331                 struct task_struct *p, long signr)
1332 {
1333         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1334         prstatus->pr_sigpend = p->pending.signal.sig[0];
1335         prstatus->pr_sighold = p->blocked.sig[0];
1336         rcu_read_lock();
1337         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1338         rcu_read_unlock();
1339         prstatus->pr_pid = task_pid_vnr(p);
1340         prstatus->pr_pgrp = task_pgrp_vnr(p);
1341         prstatus->pr_sid = task_session_vnr(p);
1342         if (thread_group_leader(p)) {
1343                 struct task_cputime cputime;
1344
1345                 /*
1346                  * This is the record for the group leader.  It shows the
1347                  * group-wide total, not its individual thread total.
1348                  */
1349                 thread_group_cputime(p, &cputime);
1350                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1351                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1352         } else {
1353                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1354                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1355         }
1356         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1357         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1358 }
1359
1360 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1361                        struct mm_struct *mm)
1362 {
1363         const struct cred *cred;
1364         unsigned int i, len;
1365         
1366         /* first copy the parameters from user space */
1367         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1368
1369         len = mm->arg_end - mm->arg_start;
1370         if (len >= ELF_PRARGSZ)
1371                 len = ELF_PRARGSZ-1;
1372         if (copy_from_user(&psinfo->pr_psargs,
1373                            (const char __user *)mm->arg_start, len))
1374                 return -EFAULT;
1375         for(i = 0; i < len; i++)
1376                 if (psinfo->pr_psargs[i] == 0)
1377                         psinfo->pr_psargs[i] = ' ';
1378         psinfo->pr_psargs[len] = 0;
1379
1380         rcu_read_lock();
1381         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1382         rcu_read_unlock();
1383         psinfo->pr_pid = task_pid_vnr(p);
1384         psinfo->pr_pgrp = task_pgrp_vnr(p);
1385         psinfo->pr_sid = task_session_vnr(p);
1386
1387         i = p->state ? ffz(~p->state) + 1 : 0;
1388         psinfo->pr_state = i;
1389         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1390         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1391         psinfo->pr_nice = task_nice(p);
1392         psinfo->pr_flag = p->flags;
1393         rcu_read_lock();
1394         cred = __task_cred(p);
1395         SET_UID(psinfo->pr_uid, cred->uid);
1396         SET_GID(psinfo->pr_gid, cred->gid);
1397         rcu_read_unlock();
1398         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1399         
1400         return 0;
1401 }
1402
1403 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1404 {
1405         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1406         int i = 0;
1407         do
1408                 i += 2;
1409         while (auxv[i - 2] != AT_NULL);
1410         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1411 }
1412
1413 #ifdef CORE_DUMP_USE_REGSET
1414 #include <linux/regset.h>
1415
1416 struct elf_thread_core_info {
1417         struct elf_thread_core_info *next;
1418         struct task_struct *task;
1419         struct elf_prstatus prstatus;
1420         struct memelfnote notes[0];
1421 };
1422
1423 struct elf_note_info {
1424         struct elf_thread_core_info *thread;
1425         struct memelfnote psinfo;
1426         struct memelfnote auxv;
1427         size_t size;
1428         int thread_notes;
1429 };
1430
1431 /*
1432  * When a regset has a writeback hook, we call it on each thread before
1433  * dumping user memory.  On register window machines, this makes sure the
1434  * user memory backing the register data is up to date before we read it.
1435  */
1436 static void do_thread_regset_writeback(struct task_struct *task,
1437                                        const struct user_regset *regset)
1438 {
1439         if (regset->writeback)
1440                 regset->writeback(task, regset, 1);
1441 }
1442
1443 static int fill_thread_core_info(struct elf_thread_core_info *t,
1444                                  const struct user_regset_view *view,
1445                                  long signr, size_t *total)
1446 {
1447         unsigned int i;
1448
1449         /*
1450          * NT_PRSTATUS is the one special case, because the regset data
1451          * goes into the pr_reg field inside the note contents, rather
1452          * than being the whole note contents.  We fill the reset in here.
1453          * We assume that regset 0 is NT_PRSTATUS.
1454          */
1455         fill_prstatus(&t->prstatus, t->task, signr);
1456         (void) view->regsets[0].get(t->task, &view->regsets[0],
1457                                     0, sizeof(t->prstatus.pr_reg),
1458                                     &t->prstatus.pr_reg, NULL);
1459
1460         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1461                   sizeof(t->prstatus), &t->prstatus);
1462         *total += notesize(&t->notes[0]);
1463
1464         do_thread_regset_writeback(t->task, &view->regsets[0]);
1465
1466         /*
1467          * Each other regset might generate a note too.  For each regset
1468          * that has no core_note_type or is inactive, we leave t->notes[i]
1469          * all zero and we'll know to skip writing it later.
1470          */
1471         for (i = 1; i < view->n; ++i) {
1472                 const struct user_regset *regset = &view->regsets[i];
1473                 do_thread_regset_writeback(t->task, regset);
1474                 if (regset->core_note_type &&
1475                     (!regset->active || regset->active(t->task, regset))) {
1476                         int ret;
1477                         size_t size = regset->n * regset->size;
1478                         void *data = kmalloc(size, GFP_KERNEL);
1479                         if (unlikely(!data))
1480                                 return 0;
1481                         ret = regset->get(t->task, regset,
1482                                           0, size, data, NULL);
1483                         if (unlikely(ret))
1484                                 kfree(data);
1485                         else {
1486                                 if (regset->core_note_type != NT_PRFPREG)
1487                                         fill_note(&t->notes[i], "LINUX",
1488                                                   regset->core_note_type,
1489                                                   size, data);
1490                                 else {
1491                                         t->prstatus.pr_fpvalid = 1;
1492                                         fill_note(&t->notes[i], "CORE",
1493                                                   NT_PRFPREG, size, data);
1494                                 }
1495                                 *total += notesize(&t->notes[i]);
1496                         }
1497                 }
1498         }
1499
1500         return 1;
1501 }
1502
1503 static int fill_note_info(struct elfhdr *elf, int phdrs,
1504                           struct elf_note_info *info,
1505                           long signr, struct pt_regs *regs)
1506 {
1507         struct task_struct *dump_task = current;
1508         const struct user_regset_view *view = task_user_regset_view(dump_task);
1509         struct elf_thread_core_info *t;
1510         struct elf_prpsinfo *psinfo;
1511         struct core_thread *ct;
1512         unsigned int i;
1513
1514         info->size = 0;
1515         info->thread = NULL;
1516
1517         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1518         if (psinfo == NULL)
1519                 return 0;
1520
1521         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1522
1523         /*
1524          * Figure out how many notes we're going to need for each thread.
1525          */
1526         info->thread_notes = 0;
1527         for (i = 0; i < view->n; ++i)
1528                 if (view->regsets[i].core_note_type != 0)
1529                         ++info->thread_notes;
1530
1531         /*
1532          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1533          * since it is our one special case.
1534          */
1535         if (unlikely(info->thread_notes == 0) ||
1536             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1537                 WARN_ON(1);
1538                 return 0;
1539         }
1540
1541         /*
1542          * Initialize the ELF file header.
1543          */
1544         fill_elf_header(elf, phdrs,
1545                         view->e_machine, view->e_flags, view->ei_osabi);
1546
1547         /*
1548          * Allocate a structure for each thread.
1549          */
1550         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1551                 t = kzalloc(offsetof(struct elf_thread_core_info,
1552                                      notes[info->thread_notes]),
1553                             GFP_KERNEL);
1554                 if (unlikely(!t))
1555                         return 0;
1556
1557                 t->task = ct->task;
1558                 if (ct->task == dump_task || !info->thread) {
1559                         t->next = info->thread;
1560                         info->thread = t;
1561                 } else {
1562                         /*
1563                          * Make sure to keep the original task at
1564                          * the head of the list.
1565                          */
1566                         t->next = info->thread->next;
1567                         info->thread->next = t;
1568                 }
1569         }
1570
1571         /*
1572          * Now fill in each thread's information.
1573          */
1574         for (t = info->thread; t != NULL; t = t->next)
1575                 if (!fill_thread_core_info(t, view, signr, &info->size))
1576                         return 0;
1577
1578         /*
1579          * Fill in the two process-wide notes.
1580          */
1581         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1582         info->size += notesize(&info->psinfo);
1583
1584         fill_auxv_note(&info->auxv, current->mm);
1585         info->size += notesize(&info->auxv);
1586
1587         return 1;
1588 }
1589
1590 static size_t get_note_info_size(struct elf_note_info *info)
1591 {
1592         return info->size;
1593 }
1594
1595 /*
1596  * Write all the notes for each thread.  When writing the first thread, the
1597  * process-wide notes are interleaved after the first thread-specific note.
1598  */
1599 static int write_note_info(struct elf_note_info *info,
1600                            struct file *file, loff_t *foffset)
1601 {
1602         bool first = 1;
1603         struct elf_thread_core_info *t = info->thread;
1604
1605         do {
1606                 int i;
1607
1608                 if (!writenote(&t->notes[0], file, foffset))
1609                         return 0;
1610
1611                 if (first && !writenote(&info->psinfo, file, foffset))
1612                         return 0;
1613                 if (first && !writenote(&info->auxv, file, foffset))
1614                         return 0;
1615
1616                 for (i = 1; i < info->thread_notes; ++i)
1617                         if (t->notes[i].data &&
1618                             !writenote(&t->notes[i], file, foffset))
1619                                 return 0;
1620
1621                 first = 0;
1622                 t = t->next;
1623         } while (t);
1624
1625         return 1;
1626 }
1627
1628 static void free_note_info(struct elf_note_info *info)
1629 {
1630         struct elf_thread_core_info *threads = info->thread;
1631         while (threads) {
1632                 unsigned int i;
1633                 struct elf_thread_core_info *t = threads;
1634                 threads = t->next;
1635                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1636                 for (i = 1; i < info->thread_notes; ++i)
1637                         kfree(t->notes[i].data);
1638                 kfree(t);
1639         }
1640         kfree(info->psinfo.data);
1641 }
1642
1643 #else
1644
1645 /* Here is the structure in which status of each thread is captured. */
1646 struct elf_thread_status
1647 {
1648         struct list_head list;
1649         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1650         elf_fpregset_t fpu;             /* NT_PRFPREG */
1651         struct task_struct *thread;
1652 #ifdef ELF_CORE_COPY_XFPREGS
1653         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1654 #endif
1655         struct memelfnote notes[3];
1656         int num_notes;
1657 };
1658
1659 /*
1660  * In order to add the specific thread information for the elf file format,
1661  * we need to keep a linked list of every threads pr_status and then create
1662  * a single section for them in the final core file.
1663  */
1664 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1665 {
1666         int sz = 0;
1667         struct task_struct *p = t->thread;
1668         t->num_notes = 0;
1669
1670         fill_prstatus(&t->prstatus, p, signr);
1671         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1672         
1673         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1674                   &(t->prstatus));
1675         t->num_notes++;
1676         sz += notesize(&t->notes[0]);
1677
1678         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1679                                                                 &t->fpu))) {
1680                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1681                           &(t->fpu));
1682                 t->num_notes++;
1683                 sz += notesize(&t->notes[1]);
1684         }
1685
1686 #ifdef ELF_CORE_COPY_XFPREGS
1687         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1688                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1689                           sizeof(t->xfpu), &t->xfpu);
1690                 t->num_notes++;
1691                 sz += notesize(&t->notes[2]);
1692         }
1693 #endif  
1694         return sz;
1695 }
1696
1697 struct elf_note_info {
1698         struct memelfnote *notes;
1699         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1700         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1701         struct list_head thread_list;
1702         elf_fpregset_t *fpu;
1703 #ifdef ELF_CORE_COPY_XFPREGS
1704         elf_fpxregset_t *xfpu;
1705 #endif
1706         int thread_status_size;
1707         int numnote;
1708 };
1709
1710 static int elf_note_info_init(struct elf_note_info *info)
1711 {
1712         memset(info, 0, sizeof(*info));
1713         INIT_LIST_HEAD(&info->thread_list);
1714
1715         /* Allocate space for six ELF notes */
1716         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1717         if (!info->notes)
1718                 return 0;
1719         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1720         if (!info->psinfo)
1721                 goto notes_free;
1722         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1723         if (!info->prstatus)
1724                 goto psinfo_free;
1725         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1726         if (!info->fpu)
1727                 goto prstatus_free;
1728 #ifdef ELF_CORE_COPY_XFPREGS
1729         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1730         if (!info->xfpu)
1731                 goto fpu_free;
1732 #endif
1733         return 1;
1734 #ifdef ELF_CORE_COPY_XFPREGS
1735  fpu_free:
1736         kfree(info->fpu);
1737 #endif
1738  prstatus_free:
1739         kfree(info->prstatus);
1740  psinfo_free:
1741         kfree(info->psinfo);
1742  notes_free:
1743         kfree(info->notes);
1744         return 0;
1745 }
1746
1747 static int fill_note_info(struct elfhdr *elf, int phdrs,
1748                           struct elf_note_info *info,
1749                           long signr, struct pt_regs *regs)
1750 {
1751         struct list_head *t;
1752
1753         if (!elf_note_info_init(info))
1754                 return 0;
1755
1756         if (signr) {
1757                 struct core_thread *ct;
1758                 struct elf_thread_status *ets;
1759
1760                 for (ct = current->mm->core_state->dumper.next;
1761                                                 ct; ct = ct->next) {
1762                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1763                         if (!ets)
1764                                 return 0;
1765
1766                         ets->thread = ct->task;
1767                         list_add(&ets->list, &info->thread_list);
1768                 }
1769
1770                 list_for_each(t, &info->thread_list) {
1771                         int sz;
1772
1773                         ets = list_entry(t, struct elf_thread_status, list);
1774                         sz = elf_dump_thread_status(signr, ets);
1775                         info->thread_status_size += sz;
1776                 }
1777         }
1778         /* now collect the dump for the current */
1779         memset(info->prstatus, 0, sizeof(*info->prstatus));
1780         fill_prstatus(info->prstatus, current, signr);
1781         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1782
1783         /* Set up header */
1784         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1785
1786         /*
1787          * Set up the notes in similar form to SVR4 core dumps made
1788          * with info from their /proc.
1789          */
1790
1791         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1792                   sizeof(*info->prstatus), info->prstatus);
1793         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1794         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1795                   sizeof(*info->psinfo), info->psinfo);
1796
1797         info->numnote = 2;
1798
1799         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1800
1801         /* Try to dump the FPU. */
1802         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1803                                                                info->fpu);
1804         if (info->prstatus->pr_fpvalid)
1805                 fill_note(info->notes + info->numnote++,
1806                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1807 #ifdef ELF_CORE_COPY_XFPREGS
1808         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1809                 fill_note(info->notes + info->numnote++,
1810                           "LINUX", ELF_CORE_XFPREG_TYPE,
1811                           sizeof(*info->xfpu), info->xfpu);
1812 #endif
1813
1814         return 1;
1815 }
1816
1817 static size_t get_note_info_size(struct elf_note_info *info)
1818 {
1819         int sz = 0;
1820         int i;
1821
1822         for (i = 0; i < info->numnote; i++)
1823                 sz += notesize(info->notes + i);
1824
1825         sz += info->thread_status_size;
1826
1827         return sz;
1828 }
1829
1830 static int write_note_info(struct elf_note_info *info,
1831                            struct file *file, loff_t *foffset)
1832 {
1833         int i;
1834         struct list_head *t;
1835
1836         for (i = 0; i < info->numnote; i++)
1837                 if (!writenote(info->notes + i, file, foffset))
1838                         return 0;
1839
1840         /* write out the thread status notes section */
1841         list_for_each(t, &info->thread_list) {
1842                 struct elf_thread_status *tmp =
1843                                 list_entry(t, struct elf_thread_status, list);
1844
1845                 for (i = 0; i < tmp->num_notes; i++)
1846                         if (!writenote(&tmp->notes[i], file, foffset))
1847                                 return 0;
1848         }
1849
1850         return 1;
1851 }
1852
1853 static void free_note_info(struct elf_note_info *info)
1854 {
1855         while (!list_empty(&info->thread_list)) {
1856                 struct list_head *tmp = info->thread_list.next;
1857                 list_del(tmp);
1858                 kfree(list_entry(tmp, struct elf_thread_status, list));
1859         }
1860
1861         kfree(info->prstatus);
1862         kfree(info->psinfo);
1863         kfree(info->notes);
1864         kfree(info->fpu);
1865 #ifdef ELF_CORE_COPY_XFPREGS
1866         kfree(info->xfpu);
1867 #endif
1868 }
1869
1870 #endif
1871
1872 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1873                                         struct vm_area_struct *gate_vma)
1874 {
1875         struct vm_area_struct *ret = tsk->mm->mmap;
1876
1877         if (ret)
1878                 return ret;
1879         return gate_vma;
1880 }
1881 /*
1882  * Helper function for iterating across a vma list.  It ensures that the caller
1883  * will visit `gate_vma' prior to terminating the search.
1884  */
1885 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1886                                         struct vm_area_struct *gate_vma)
1887 {
1888         struct vm_area_struct *ret;
1889
1890         ret = this_vma->vm_next;
1891         if (ret)
1892                 return ret;
1893         if (this_vma == gate_vma)
1894                 return NULL;
1895         return gate_vma;
1896 }
1897
1898 /*
1899  * Actual dumper
1900  *
1901  * This is a two-pass process; first we find the offsets of the bits,
1902  * and then they are actually written out.  If we run out of core limit
1903  * we just truncate.
1904  */
1905 static int elf_core_dump(struct coredump_params *cprm)
1906 {
1907         int has_dumped = 0;
1908         mm_segment_t fs;
1909         int segs;
1910         size_t size = 0;
1911         struct vm_area_struct *vma, *gate_vma;
1912         struct elfhdr *elf = NULL;
1913         loff_t offset = 0, dataoff, foffset;
1914         unsigned long mm_flags;
1915         struct elf_note_info info;
1916
1917         /*
1918          * We no longer stop all VM operations.
1919          * 
1920          * This is because those proceses that could possibly change map_count
1921          * or the mmap / vma pages are now blocked in do_exit on current
1922          * finishing this core dump.
1923          *
1924          * Only ptrace can touch these memory addresses, but it doesn't change
1925          * the map_count or the pages allocated. So no possibility of crashing
1926          * exists while dumping the mm->vm_next areas to the core file.
1927          */
1928   
1929         /* alloc memory for large data structures: too large to be on stack */
1930         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1931         if (!elf)
1932                 goto out;
1933         /*
1934          * The number of segs are recored into ELF header as 16bit value.
1935          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1936          */
1937         segs = current->mm->map_count;
1938 #ifdef ELF_CORE_EXTRA_PHDRS
1939         segs += ELF_CORE_EXTRA_PHDRS;
1940 #endif
1941
1942         gate_vma = get_gate_vma(current);
1943         if (gate_vma != NULL)
1944                 segs++;
1945
1946         /*
1947          * Collect all the non-memory information about the process for the
1948          * notes.  This also sets up the file header.
1949          */
1950         if (!fill_note_info(elf, segs + 1, /* including notes section */
1951                             &info, cprm->signr, cprm->regs))
1952                 goto cleanup;
1953
1954         has_dumped = 1;
1955         current->flags |= PF_DUMPCORE;
1956   
1957         fs = get_fs();
1958         set_fs(KERNEL_DS);
1959
1960         DUMP_WRITE(elf, sizeof(*elf));
1961         offset += sizeof(*elf);                         /* Elf header */
1962         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1963         foffset = offset;
1964
1965         /* Write notes phdr entry */
1966         {
1967                 struct elf_phdr phdr;
1968                 size_t sz = get_note_info_size(&info);
1969
1970                 sz += elf_coredump_extra_notes_size();
1971
1972                 fill_elf_note_phdr(&phdr, sz, offset);
1973                 offset += sz;
1974                 DUMP_WRITE(&phdr, sizeof(phdr));
1975         }
1976
1977         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1978
1979         /*
1980          * We must use the same mm->flags while dumping core to avoid
1981          * inconsistency between the program headers and bodies, otherwise an
1982          * unusable core file can be generated.
1983          */
1984         mm_flags = current->mm->flags;
1985
1986         /* Write program headers for segments dump */
1987         for (vma = first_vma(current, gate_vma); vma != NULL;
1988                         vma = next_vma(vma, gate_vma)) {
1989                 struct elf_phdr phdr;
1990
1991                 phdr.p_type = PT_LOAD;
1992                 phdr.p_offset = offset;
1993                 phdr.p_vaddr = vma->vm_start;
1994                 phdr.p_paddr = 0;
1995                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1996                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1997                 offset += phdr.p_filesz;
1998                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1999                 if (vma->vm_flags & VM_WRITE)
2000                         phdr.p_flags |= PF_W;
2001                 if (vma->vm_flags & VM_EXEC)
2002                         phdr.p_flags |= PF_X;
2003                 phdr.p_align = ELF_EXEC_PAGESIZE;
2004
2005                 DUMP_WRITE(&phdr, sizeof(phdr));
2006         }
2007
2008 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2009         ELF_CORE_WRITE_EXTRA_PHDRS;
2010 #endif
2011
2012         /* write out the notes section */
2013         if (!write_note_info(&info, cprm->file, &foffset))
2014                 goto end_coredump;
2015
2016         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2017                 goto end_coredump;
2018
2019         /* Align to page */
2020         if (!dump_seek(cprm->file, dataoff - foffset))
2021                 goto end_coredump;
2022
2023         for (vma = first_vma(current, gate_vma); vma != NULL;
2024                         vma = next_vma(vma, gate_vma)) {
2025                 unsigned long addr;
2026                 unsigned long end;
2027
2028                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2029
2030                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2031                         struct page *page;
2032                         int stop;
2033
2034                         page = get_dump_page(addr);
2035                         if (page) {
2036                                 void *kaddr = kmap(page);
2037                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2038                                         !dump_write(cprm->file, kaddr,
2039                                                     PAGE_SIZE);
2040                                 kunmap(page);
2041                                 page_cache_release(page);
2042                         } else
2043                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2044                         if (stop)
2045                                 goto end_coredump;
2046                 }
2047         }
2048
2049 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2050         ELF_CORE_WRITE_EXTRA_DATA;
2051 #endif
2052
2053 end_coredump:
2054         set_fs(fs);
2055
2056 cleanup:
2057         free_note_info(&info);
2058         kfree(elf);
2059 out:
2060         return has_dumped;
2061 }
2062
2063 #endif          /* CONFIG_ELF_CORE */
2064
2065 static int __init init_elf_binfmt(void)
2066 {
2067         return register_binfmt(&elf_format);
2068 }
2069
2070 static void __exit exit_elf_binfmt(void)
2071 {
2072         /* Remove the COFF and ELF loaders. */
2073         unregister_binfmt(&elf_format);
2074 }
2075
2076 core_initcall(init_elf_binfmt);
2077 module_exit(exit_elf_binfmt);
2078 MODULE_LICENSE("GPL");