elf: Allow core dump-related fields to be overridden
[linux-2.6.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 static unsigned long elf_map(struct file *filep, unsigned long addr,
319                 struct elf_phdr *eppnt, int prot, int type,
320                 unsigned long total_size)
321 {
322         unsigned long map_addr;
323         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
324         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
325         addr = ELF_PAGESTART(addr);
326         size = ELF_PAGEALIGN(size);
327
328         /* mmap() will return -EINVAL if given a zero size, but a
329          * segment with zero filesize is perfectly valid */
330         if (!size)
331                 return addr;
332
333         down_write(&current->mm->mmap_sem);
334         /*
335         * total_size is the size of the ELF (interpreter) image.
336         * The _first_ mmap needs to know the full size, otherwise
337         * randomization might put this image into an overlapping
338         * position with the ELF binary image. (since size < total_size)
339         * So we first map the 'big' image - and unmap the remainder at
340         * the end. (which unmap is needed for ELF images with holes.)
341         */
342         if (total_size) {
343                 total_size = ELF_PAGEALIGN(total_size);
344                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
345                 if (!BAD_ADDR(map_addr))
346                         do_munmap(current->mm, map_addr+size, total_size-size);
347         } else
348                 map_addr = do_mmap(filep, addr, size, prot, type, off);
349
350         up_write(&current->mm->mmap_sem);
351         return(map_addr);
352 }
353
354 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
355 {
356         int i, first_idx = -1, last_idx = -1;
357
358         for (i = 0; i < nr; i++) {
359                 if (cmds[i].p_type == PT_LOAD) {
360                         last_idx = i;
361                         if (first_idx == -1)
362                                 first_idx = i;
363                 }
364         }
365         if (first_idx == -1)
366                 return 0;
367
368         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
369                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
370 }
371
372
373 /* This is much more generalized than the library routine read function,
374    so we keep this separate.  Technically the library read function
375    is only provided so that we can read a.out libraries that have
376    an ELF header */
377
378 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
379                 struct file *interpreter, unsigned long *interp_map_addr,
380                 unsigned long no_base)
381 {
382         struct elf_phdr *elf_phdata;
383         struct elf_phdr *eppnt;
384         unsigned long load_addr = 0;
385         int load_addr_set = 0;
386         unsigned long last_bss = 0, elf_bss = 0;
387         unsigned long error = ~0UL;
388         unsigned long total_size;
389         int retval, i, size;
390
391         /* First of all, some simple consistency checks */
392         if (interp_elf_ex->e_type != ET_EXEC &&
393             interp_elf_ex->e_type != ET_DYN)
394                 goto out;
395         if (!elf_check_arch(interp_elf_ex))
396                 goto out;
397         if (!interpreter->f_op || !interpreter->f_op->mmap)
398                 goto out;
399
400         /*
401          * If the size of this structure has changed, then punt, since
402          * we will be doing the wrong thing.
403          */
404         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
405                 goto out;
406         if (interp_elf_ex->e_phnum < 1 ||
407                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
408                 goto out;
409
410         /* Now read in all of the header information */
411         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
412         if (size > ELF_MIN_ALIGN)
413                 goto out;
414         elf_phdata = kmalloc(size, GFP_KERNEL);
415         if (!elf_phdata)
416                 goto out;
417
418         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
419                              (char *)elf_phdata, size);
420         error = -EIO;
421         if (retval != size) {
422                 if (retval < 0)
423                         error = retval; 
424                 goto out_close;
425         }
426
427         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
428         if (!total_size) {
429                 error = -EINVAL;
430                 goto out_close;
431         }
432
433         eppnt = elf_phdata;
434         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
435                 if (eppnt->p_type == PT_LOAD) {
436                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
437                         int elf_prot = 0;
438                         unsigned long vaddr = 0;
439                         unsigned long k, map_addr;
440
441                         if (eppnt->p_flags & PF_R)
442                                 elf_prot = PROT_READ;
443                         if (eppnt->p_flags & PF_W)
444                                 elf_prot |= PROT_WRITE;
445                         if (eppnt->p_flags & PF_X)
446                                 elf_prot |= PROT_EXEC;
447                         vaddr = eppnt->p_vaddr;
448                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
449                                 elf_type |= MAP_FIXED;
450                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
451                                 load_addr = -vaddr;
452
453                         map_addr = elf_map(interpreter, load_addr + vaddr,
454                                         eppnt, elf_prot, elf_type, total_size);
455                         total_size = 0;
456                         if (!*interp_map_addr)
457                                 *interp_map_addr = map_addr;
458                         error = map_addr;
459                         if (BAD_ADDR(map_addr))
460                                 goto out_close;
461
462                         if (!load_addr_set &&
463                             interp_elf_ex->e_type == ET_DYN) {
464                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
465                                 load_addr_set = 1;
466                         }
467
468                         /*
469                          * Check to see if the section's size will overflow the
470                          * allowed task size. Note that p_filesz must always be
471                          * <= p_memsize so it's only necessary to check p_memsz.
472                          */
473                         k = load_addr + eppnt->p_vaddr;
474                         if (BAD_ADDR(k) ||
475                             eppnt->p_filesz > eppnt->p_memsz ||
476                             eppnt->p_memsz > TASK_SIZE ||
477                             TASK_SIZE - eppnt->p_memsz < k) {
478                                 error = -ENOMEM;
479                                 goto out_close;
480                         }
481
482                         /*
483                          * Find the end of the file mapping for this phdr, and
484                          * keep track of the largest address we see for this.
485                          */
486                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487                         if (k > elf_bss)
488                                 elf_bss = k;
489
490                         /*
491                          * Do the same thing for the memory mapping - between
492                          * elf_bss and last_bss is the bss section.
493                          */
494                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
495                         if (k > last_bss)
496                                 last_bss = k;
497                 }
498         }
499
500         if (last_bss > elf_bss) {
501                 /*
502                  * Now fill out the bss section.  First pad the last page up
503                  * to the page boundary, and then perform a mmap to make sure
504                  * that there are zero-mapped pages up to and including the
505                  * last bss page.
506                  */
507                 if (padzero(elf_bss)) {
508                         error = -EFAULT;
509                         goto out_close;
510                 }
511
512                 /* What we have mapped so far */
513                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514
515                 /* Map the last of the bss segment */
516                 down_write(&current->mm->mmap_sem);
517                 error = do_brk(elf_bss, last_bss - elf_bss);
518                 up_write(&current->mm->mmap_sem);
519                 if (BAD_ADDR(error))
520                         goto out_close;
521         }
522
523         error = load_addr;
524
525 out_close:
526         kfree(elf_phdata);
527 out:
528         return error;
529 }
530
531 /*
532  * These are the functions used to load ELF style executables and shared
533  * libraries.  There is no binary dependent code anywhere else.
534  */
535
536 #define INTERPRETER_NONE 0
537 #define INTERPRETER_ELF 2
538
539 #ifndef STACK_RND_MASK
540 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
541 #endif
542
543 static unsigned long randomize_stack_top(unsigned long stack_top)
544 {
545         unsigned int random_variable = 0;
546
547         if ((current->flags & PF_RANDOMIZE) &&
548                 !(current->personality & ADDR_NO_RANDOMIZE)) {
549                 random_variable = get_random_int() & STACK_RND_MASK;
550                 random_variable <<= PAGE_SHIFT;
551         }
552 #ifdef CONFIG_STACK_GROWSUP
553         return PAGE_ALIGN(stack_top) + random_variable;
554 #else
555         return PAGE_ALIGN(stack_top) - random_variable;
556 #endif
557 }
558
559 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
560 {
561         struct file *interpreter = NULL; /* to shut gcc up */
562         unsigned long load_addr = 0, load_bias = 0;
563         int load_addr_set = 0;
564         char * elf_interpreter = NULL;
565         unsigned long error;
566         struct elf_phdr *elf_ppnt, *elf_phdata;
567         unsigned long elf_bss, elf_brk;
568         int retval, i;
569         unsigned int size;
570         unsigned long elf_entry;
571         unsigned long interp_load_addr = 0;
572         unsigned long start_code, end_code, start_data, end_data;
573         unsigned long reloc_func_desc __maybe_unused = 0;
574         int executable_stack = EXSTACK_DEFAULT;
575         unsigned long def_flags = 0;
576         struct {
577                 struct elfhdr elf_ex;
578                 struct elfhdr interp_elf_ex;
579         } *loc;
580
581         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582         if (!loc) {
583                 retval = -ENOMEM;
584                 goto out_ret;
585         }
586         
587         /* Get the exec-header */
588         loc->elf_ex = *((struct elfhdr *)bprm->buf);
589
590         retval = -ENOEXEC;
591         /* First of all, some simple consistency checks */
592         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593                 goto out;
594
595         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596                 goto out;
597         if (!elf_check_arch(&loc->elf_ex))
598                 goto out;
599         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
600                 goto out;
601
602         /* Now read in all of the header information */
603         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604                 goto out;
605         if (loc->elf_ex.e_phnum < 1 ||
606                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607                 goto out;
608         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609         retval = -ENOMEM;
610         elf_phdata = kmalloc(size, GFP_KERNEL);
611         if (!elf_phdata)
612                 goto out;
613
614         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615                              (char *)elf_phdata, size);
616         if (retval != size) {
617                 if (retval >= 0)
618                         retval = -EIO;
619                 goto out_free_ph;
620         }
621
622         elf_ppnt = elf_phdata;
623         elf_bss = 0;
624         elf_brk = 0;
625
626         start_code = ~0UL;
627         end_code = 0;
628         start_data = 0;
629         end_data = 0;
630
631         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
632                 if (elf_ppnt->p_type == PT_INTERP) {
633                         /* This is the program interpreter used for
634                          * shared libraries - for now assume that this
635                          * is an a.out format binary
636                          */
637                         retval = -ENOEXEC;
638                         if (elf_ppnt->p_filesz > PATH_MAX || 
639                             elf_ppnt->p_filesz < 2)
640                                 goto out_free_ph;
641
642                         retval = -ENOMEM;
643                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
644                                                   GFP_KERNEL);
645                         if (!elf_interpreter)
646                                 goto out_free_ph;
647
648                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
649                                              elf_interpreter,
650                                              elf_ppnt->p_filesz);
651                         if (retval != elf_ppnt->p_filesz) {
652                                 if (retval >= 0)
653                                         retval = -EIO;
654                                 goto out_free_interp;
655                         }
656                         /* make sure path is NULL terminated */
657                         retval = -ENOEXEC;
658                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
659                                 goto out_free_interp;
660
661                         interpreter = open_exec(elf_interpreter);
662                         retval = PTR_ERR(interpreter);
663                         if (IS_ERR(interpreter))
664                                 goto out_free_interp;
665
666                         /*
667                          * If the binary is not readable then enforce
668                          * mm->dumpable = 0 regardless of the interpreter's
669                          * permissions.
670                          */
671                         would_dump(bprm, interpreter);
672
673                         retval = kernel_read(interpreter, 0, bprm->buf,
674                                              BINPRM_BUF_SIZE);
675                         if (retval != BINPRM_BUF_SIZE) {
676                                 if (retval >= 0)
677                                         retval = -EIO;
678                                 goto out_free_dentry;
679                         }
680
681                         /* Get the exec headers */
682                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683                         break;
684                 }
685                 elf_ppnt++;
686         }
687
688         elf_ppnt = elf_phdata;
689         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690                 if (elf_ppnt->p_type == PT_GNU_STACK) {
691                         if (elf_ppnt->p_flags & PF_X)
692                                 executable_stack = EXSTACK_ENABLE_X;
693                         else
694                                 executable_stack = EXSTACK_DISABLE_X;
695                         break;
696                 }
697
698         /* Some simple consistency checks for the interpreter */
699         if (elf_interpreter) {
700                 retval = -ELIBBAD;
701                 /* Not an ELF interpreter */
702                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703                         goto out_free_dentry;
704                 /* Verify the interpreter has a valid arch */
705                 if (!elf_check_arch(&loc->interp_elf_ex))
706                         goto out_free_dentry;
707         }
708
709         /* Flush all traces of the currently running executable */
710         retval = flush_old_exec(bprm);
711         if (retval)
712                 goto out_free_dentry;
713
714         /* OK, This is the point of no return */
715         current->flags &= ~PF_FORKNOEXEC;
716         current->mm->def_flags = def_flags;
717
718         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
719            may depend on the personality.  */
720         SET_PERSONALITY(loc->elf_ex);
721         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
722                 current->personality |= READ_IMPLIES_EXEC;
723
724         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
725                 current->flags |= PF_RANDOMIZE;
726
727         setup_new_exec(bprm);
728
729         /* Do this so that we can load the interpreter, if need be.  We will
730            change some of these later */
731         current->mm->free_area_cache = current->mm->mmap_base;
732         current->mm->cached_hole_size = 0;
733         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
734                                  executable_stack);
735         if (retval < 0) {
736                 send_sig(SIGKILL, current, 0);
737                 goto out_free_dentry;
738         }
739         
740         current->mm->start_stack = bprm->p;
741
742         /* Now we do a little grungy work by mmapping the ELF image into
743            the correct location in memory. */
744         for(i = 0, elf_ppnt = elf_phdata;
745             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
746                 int elf_prot = 0, elf_flags;
747                 unsigned long k, vaddr;
748
749                 if (elf_ppnt->p_type != PT_LOAD)
750                         continue;
751
752                 if (unlikely (elf_brk > elf_bss)) {
753                         unsigned long nbyte;
754                     
755                         /* There was a PT_LOAD segment with p_memsz > p_filesz
756                            before this one. Map anonymous pages, if needed,
757                            and clear the area.  */
758                         retval = set_brk(elf_bss + load_bias,
759                                          elf_brk + load_bias);
760                         if (retval) {
761                                 send_sig(SIGKILL, current, 0);
762                                 goto out_free_dentry;
763                         }
764                         nbyte = ELF_PAGEOFFSET(elf_bss);
765                         if (nbyte) {
766                                 nbyte = ELF_MIN_ALIGN - nbyte;
767                                 if (nbyte > elf_brk - elf_bss)
768                                         nbyte = elf_brk - elf_bss;
769                                 if (clear_user((void __user *)elf_bss +
770                                                         load_bias, nbyte)) {
771                                         /*
772                                          * This bss-zeroing can fail if the ELF
773                                          * file specifies odd protections. So
774                                          * we don't check the return value
775                                          */
776                                 }
777                         }
778                 }
779
780                 if (elf_ppnt->p_flags & PF_R)
781                         elf_prot |= PROT_READ;
782                 if (elf_ppnt->p_flags & PF_W)
783                         elf_prot |= PROT_WRITE;
784                 if (elf_ppnt->p_flags & PF_X)
785                         elf_prot |= PROT_EXEC;
786
787                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
788
789                 vaddr = elf_ppnt->p_vaddr;
790                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
791                         elf_flags |= MAP_FIXED;
792                 } else if (loc->elf_ex.e_type == ET_DYN) {
793                         /* Try and get dynamic programs out of the way of the
794                          * default mmap base, as well as whatever program they
795                          * might try to exec.  This is because the brk will
796                          * follow the loader, and is not movable.  */
797 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
798                         /* Memory randomization might have been switched off
799                          * in runtime via sysctl.
800                          * If that is the case, retain the original non-zero
801                          * load_bias value in order to establish proper
802                          * non-randomized mappings.
803                          */
804                         if (current->flags & PF_RANDOMIZE)
805                                 load_bias = 0;
806                         else
807                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
808 #else
809                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
810 #endif
811                 }
812
813                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
814                                 elf_prot, elf_flags, 0);
815                 if (BAD_ADDR(error)) {
816                         send_sig(SIGKILL, current, 0);
817                         retval = IS_ERR((void *)error) ?
818                                 PTR_ERR((void*)error) : -EINVAL;
819                         goto out_free_dentry;
820                 }
821
822                 if (!load_addr_set) {
823                         load_addr_set = 1;
824                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
825                         if (loc->elf_ex.e_type == ET_DYN) {
826                                 load_bias += error -
827                                              ELF_PAGESTART(load_bias + vaddr);
828                                 load_addr += load_bias;
829                                 reloc_func_desc = load_bias;
830                         }
831                 }
832                 k = elf_ppnt->p_vaddr;
833                 if (k < start_code)
834                         start_code = k;
835                 if (start_data < k)
836                         start_data = k;
837
838                 /*
839                  * Check to see if the section's size will overflow the
840                  * allowed task size. Note that p_filesz must always be
841                  * <= p_memsz so it is only necessary to check p_memsz.
842                  */
843                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
844                     elf_ppnt->p_memsz > TASK_SIZE ||
845                     TASK_SIZE - elf_ppnt->p_memsz < k) {
846                         /* set_brk can never work. Avoid overflows. */
847                         send_sig(SIGKILL, current, 0);
848                         retval = -EINVAL;
849                         goto out_free_dentry;
850                 }
851
852                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
853
854                 if (k > elf_bss)
855                         elf_bss = k;
856                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
857                         end_code = k;
858                 if (end_data < k)
859                         end_data = k;
860                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
861                 if (k > elf_brk)
862                         elf_brk = k;
863         }
864
865         loc->elf_ex.e_entry += load_bias;
866         elf_bss += load_bias;
867         elf_brk += load_bias;
868         start_code += load_bias;
869         end_code += load_bias;
870         start_data += load_bias;
871         end_data += load_bias;
872
873         /* Calling set_brk effectively mmaps the pages that we need
874          * for the bss and break sections.  We must do this before
875          * mapping in the interpreter, to make sure it doesn't wind
876          * up getting placed where the bss needs to go.
877          */
878         retval = set_brk(elf_bss, elf_brk);
879         if (retval) {
880                 send_sig(SIGKILL, current, 0);
881                 goto out_free_dentry;
882         }
883         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
884                 send_sig(SIGSEGV, current, 0);
885                 retval = -EFAULT; /* Nobody gets to see this, but.. */
886                 goto out_free_dentry;
887         }
888
889         if (elf_interpreter) {
890                 unsigned long uninitialized_var(interp_map_addr);
891
892                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
893                                             interpreter,
894                                             &interp_map_addr,
895                                             load_bias);
896                 if (!IS_ERR((void *)elf_entry)) {
897                         /*
898                          * load_elf_interp() returns relocation
899                          * adjustment
900                          */
901                         interp_load_addr = elf_entry;
902                         elf_entry += loc->interp_elf_ex.e_entry;
903                 }
904                 if (BAD_ADDR(elf_entry)) {
905                         force_sig(SIGSEGV, current);
906                         retval = IS_ERR((void *)elf_entry) ?
907                                         (int)elf_entry : -EINVAL;
908                         goto out_free_dentry;
909                 }
910                 reloc_func_desc = interp_load_addr;
911
912                 allow_write_access(interpreter);
913                 fput(interpreter);
914                 kfree(elf_interpreter);
915         } else {
916                 elf_entry = loc->elf_ex.e_entry;
917                 if (BAD_ADDR(elf_entry)) {
918                         force_sig(SIGSEGV, current);
919                         retval = -EINVAL;
920                         goto out_free_dentry;
921                 }
922         }
923
924         kfree(elf_phdata);
925
926         set_binfmt(&elf_format);
927
928 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
929         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
930         if (retval < 0) {
931                 send_sig(SIGKILL, current, 0);
932                 goto out;
933         }
934 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
935
936         install_exec_creds(bprm);
937         current->flags &= ~PF_FORKNOEXEC;
938         retval = create_elf_tables(bprm, &loc->elf_ex,
939                           load_addr, interp_load_addr);
940         if (retval < 0) {
941                 send_sig(SIGKILL, current, 0);
942                 goto out;
943         }
944         /* N.B. passed_fileno might not be initialized? */
945         current->mm->end_code = end_code;
946         current->mm->start_code = start_code;
947         current->mm->start_data = start_data;
948         current->mm->end_data = end_data;
949         current->mm->start_stack = bprm->p;
950
951 #ifdef arch_randomize_brk
952         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
953                 current->mm->brk = current->mm->start_brk =
954                         arch_randomize_brk(current->mm);
955 #ifdef CONFIG_COMPAT_BRK
956                 current->brk_randomized = 1;
957 #endif
958         }
959 #endif
960
961         if (current->personality & MMAP_PAGE_ZERO) {
962                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
963                    and some applications "depend" upon this behavior.
964                    Since we do not have the power to recompile these, we
965                    emulate the SVr4 behavior. Sigh. */
966                 down_write(&current->mm->mmap_sem);
967                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
968                                 MAP_FIXED | MAP_PRIVATE, 0);
969                 up_write(&current->mm->mmap_sem);
970         }
971
972 #ifdef ELF_PLAT_INIT
973         /*
974          * The ABI may specify that certain registers be set up in special
975          * ways (on i386 %edx is the address of a DT_FINI function, for
976          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
977          * that the e_entry field is the address of the function descriptor
978          * for the startup routine, rather than the address of the startup
979          * routine itself.  This macro performs whatever initialization to
980          * the regs structure is required as well as any relocations to the
981          * function descriptor entries when executing dynamically links apps.
982          */
983         ELF_PLAT_INIT(regs, reloc_func_desc);
984 #endif
985
986         start_thread(regs, elf_entry, bprm->p);
987         retval = 0;
988 out:
989         kfree(loc);
990 out_ret:
991         return retval;
992
993         /* error cleanup */
994 out_free_dentry:
995         allow_write_access(interpreter);
996         if (interpreter)
997                 fput(interpreter);
998 out_free_interp:
999         kfree(elf_interpreter);
1000 out_free_ph:
1001         kfree(elf_phdata);
1002         goto out;
1003 }
1004
1005 /* This is really simpleminded and specialized - we are loading an
1006    a.out library that is given an ELF header. */
1007 static int load_elf_library(struct file *file)
1008 {
1009         struct elf_phdr *elf_phdata;
1010         struct elf_phdr *eppnt;
1011         unsigned long elf_bss, bss, len;
1012         int retval, error, i, j;
1013         struct elfhdr elf_ex;
1014
1015         error = -ENOEXEC;
1016         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1017         if (retval != sizeof(elf_ex))
1018                 goto out;
1019
1020         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1021                 goto out;
1022
1023         /* First of all, some simple consistency checks */
1024         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1025             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1026                 goto out;
1027
1028         /* Now read in all of the header information */
1029
1030         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1031         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1032
1033         error = -ENOMEM;
1034         elf_phdata = kmalloc(j, GFP_KERNEL);
1035         if (!elf_phdata)
1036                 goto out;
1037
1038         eppnt = elf_phdata;
1039         error = -ENOEXEC;
1040         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1041         if (retval != j)
1042                 goto out_free_ph;
1043
1044         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1045                 if ((eppnt + i)->p_type == PT_LOAD)
1046                         j++;
1047         if (j != 1)
1048                 goto out_free_ph;
1049
1050         while (eppnt->p_type != PT_LOAD)
1051                 eppnt++;
1052
1053         /* Now use mmap to map the library into memory. */
1054         down_write(&current->mm->mmap_sem);
1055         error = do_mmap(file,
1056                         ELF_PAGESTART(eppnt->p_vaddr),
1057                         (eppnt->p_filesz +
1058                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1059                         PROT_READ | PROT_WRITE | PROT_EXEC,
1060                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1061                         (eppnt->p_offset -
1062                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1063         up_write(&current->mm->mmap_sem);
1064         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1065                 goto out_free_ph;
1066
1067         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1068         if (padzero(elf_bss)) {
1069                 error = -EFAULT;
1070                 goto out_free_ph;
1071         }
1072
1073         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1074                             ELF_MIN_ALIGN - 1);
1075         bss = eppnt->p_memsz + eppnt->p_vaddr;
1076         if (bss > len) {
1077                 down_write(&current->mm->mmap_sem);
1078                 do_brk(len, bss - len);
1079                 up_write(&current->mm->mmap_sem);
1080         }
1081         error = 0;
1082
1083 out_free_ph:
1084         kfree(elf_phdata);
1085 out:
1086         return error;
1087 }
1088
1089 #ifdef CONFIG_ELF_CORE
1090 /*
1091  * ELF core dumper
1092  *
1093  * Modelled on fs/exec.c:aout_core_dump()
1094  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1095  */
1096
1097 /*
1098  * Decide what to dump of a segment, part, all or none.
1099  */
1100 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1101                                    unsigned long mm_flags)
1102 {
1103 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1104
1105         /* The vma can be set up to tell us the answer directly.  */
1106         if (vma->vm_flags & VM_ALWAYSDUMP)
1107                 goto whole;
1108
1109         /* Hugetlb memory check */
1110         if (vma->vm_flags & VM_HUGETLB) {
1111                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1112                         goto whole;
1113                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1114                         goto whole;
1115         }
1116
1117         /* Do not dump I/O mapped devices or special mappings */
1118         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1119                 return 0;
1120
1121         /* By default, dump shared memory if mapped from an anonymous file. */
1122         if (vma->vm_flags & VM_SHARED) {
1123                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1124                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1125                         goto whole;
1126                 return 0;
1127         }
1128
1129         /* Dump segments that have been written to.  */
1130         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1131                 goto whole;
1132         if (vma->vm_file == NULL)
1133                 return 0;
1134
1135         if (FILTER(MAPPED_PRIVATE))
1136                 goto whole;
1137
1138         /*
1139          * If this looks like the beginning of a DSO or executable mapping,
1140          * check for an ELF header.  If we find one, dump the first page to
1141          * aid in determining what was mapped here.
1142          */
1143         if (FILTER(ELF_HEADERS) &&
1144             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1145                 u32 __user *header = (u32 __user *) vma->vm_start;
1146                 u32 word;
1147                 mm_segment_t fs = get_fs();
1148                 /*
1149                  * Doing it this way gets the constant folded by GCC.
1150                  */
1151                 union {
1152                         u32 cmp;
1153                         char elfmag[SELFMAG];
1154                 } magic;
1155                 BUILD_BUG_ON(SELFMAG != sizeof word);
1156                 magic.elfmag[EI_MAG0] = ELFMAG0;
1157                 magic.elfmag[EI_MAG1] = ELFMAG1;
1158                 magic.elfmag[EI_MAG2] = ELFMAG2;
1159                 magic.elfmag[EI_MAG3] = ELFMAG3;
1160                 /*
1161                  * Switch to the user "segment" for get_user(),
1162                  * then put back what elf_core_dump() had in place.
1163                  */
1164                 set_fs(USER_DS);
1165                 if (unlikely(get_user(word, header)))
1166                         word = 0;
1167                 set_fs(fs);
1168                 if (word == magic.cmp)
1169                         return PAGE_SIZE;
1170         }
1171
1172 #undef  FILTER
1173
1174         return 0;
1175
1176 whole:
1177         return vma->vm_end - vma->vm_start;
1178 }
1179
1180 /* An ELF note in memory */
1181 struct memelfnote
1182 {
1183         const char *name;
1184         int type;
1185         unsigned int datasz;
1186         void *data;
1187 };
1188
1189 static int notesize(struct memelfnote *en)
1190 {
1191         int sz;
1192
1193         sz = sizeof(struct elf_note);
1194         sz += roundup(strlen(en->name) + 1, 4);
1195         sz += roundup(en->datasz, 4);
1196
1197         return sz;
1198 }
1199
1200 #define DUMP_WRITE(addr, nr, foffset)   \
1201         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1202
1203 static int alignfile(struct file *file, loff_t *foffset)
1204 {
1205         static const char buf[4] = { 0, };
1206         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1207         return 1;
1208 }
1209
1210 static int writenote(struct memelfnote *men, struct file *file,
1211                         loff_t *foffset)
1212 {
1213         struct elf_note en;
1214         en.n_namesz = strlen(men->name) + 1;
1215         en.n_descsz = men->datasz;
1216         en.n_type = men->type;
1217
1218         DUMP_WRITE(&en, sizeof(en), foffset);
1219         DUMP_WRITE(men->name, en.n_namesz, foffset);
1220         if (!alignfile(file, foffset))
1221                 return 0;
1222         DUMP_WRITE(men->data, men->datasz, foffset);
1223         if (!alignfile(file, foffset))
1224                 return 0;
1225
1226         return 1;
1227 }
1228 #undef DUMP_WRITE
1229
1230 static void fill_elf_header(struct elfhdr *elf, int segs,
1231                             u16 machine, u32 flags, u8 osabi)
1232 {
1233         memset(elf, 0, sizeof(*elf));
1234
1235         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1236         elf->e_ident[EI_CLASS] = ELF_CLASS;
1237         elf->e_ident[EI_DATA] = ELF_DATA;
1238         elf->e_ident[EI_VERSION] = EV_CURRENT;
1239         elf->e_ident[EI_OSABI] = ELF_OSABI;
1240
1241         elf->e_type = ET_CORE;
1242         elf->e_machine = machine;
1243         elf->e_version = EV_CURRENT;
1244         elf->e_phoff = sizeof(struct elfhdr);
1245         elf->e_flags = flags;
1246         elf->e_ehsize = sizeof(struct elfhdr);
1247         elf->e_phentsize = sizeof(struct elf_phdr);
1248         elf->e_phnum = segs;
1249
1250         return;
1251 }
1252
1253 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1254 {
1255         phdr->p_type = PT_NOTE;
1256         phdr->p_offset = offset;
1257         phdr->p_vaddr = 0;
1258         phdr->p_paddr = 0;
1259         phdr->p_filesz = sz;
1260         phdr->p_memsz = 0;
1261         phdr->p_flags = 0;
1262         phdr->p_align = 0;
1263         return;
1264 }
1265
1266 static void fill_note(struct memelfnote *note, const char *name, int type, 
1267                 unsigned int sz, void *data)
1268 {
1269         note->name = name;
1270         note->type = type;
1271         note->datasz = sz;
1272         note->data = data;
1273         return;
1274 }
1275
1276 /*
1277  * fill up all the fields in prstatus from the given task struct, except
1278  * registers which need to be filled up separately.
1279  */
1280 static void fill_prstatus(struct elf_prstatus *prstatus,
1281                 struct task_struct *p, long signr)
1282 {
1283         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1284         prstatus->pr_sigpend = p->pending.signal.sig[0];
1285         prstatus->pr_sighold = p->blocked.sig[0];
1286         rcu_read_lock();
1287         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1288         rcu_read_unlock();
1289         prstatus->pr_pid = task_pid_vnr(p);
1290         prstatus->pr_pgrp = task_pgrp_vnr(p);
1291         prstatus->pr_sid = task_session_vnr(p);
1292         if (thread_group_leader(p)) {
1293                 struct task_cputime cputime;
1294
1295                 /*
1296                  * This is the record for the group leader.  It shows the
1297                  * group-wide total, not its individual thread total.
1298                  */
1299                 thread_group_cputime(p, &cputime);
1300                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1301                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1302         } else {
1303                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1304                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1305         }
1306         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1307         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1308 }
1309
1310 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1311                        struct mm_struct *mm)
1312 {
1313         const struct cred *cred;
1314         unsigned int i, len;
1315         
1316         /* first copy the parameters from user space */
1317         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1318
1319         len = mm->arg_end - mm->arg_start;
1320         if (len >= ELF_PRARGSZ)
1321                 len = ELF_PRARGSZ-1;
1322         if (copy_from_user(&psinfo->pr_psargs,
1323                            (const char __user *)mm->arg_start, len))
1324                 return -EFAULT;
1325         for(i = 0; i < len; i++)
1326                 if (psinfo->pr_psargs[i] == 0)
1327                         psinfo->pr_psargs[i] = ' ';
1328         psinfo->pr_psargs[len] = 0;
1329
1330         rcu_read_lock();
1331         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1332         rcu_read_unlock();
1333         psinfo->pr_pid = task_pid_vnr(p);
1334         psinfo->pr_pgrp = task_pgrp_vnr(p);
1335         psinfo->pr_sid = task_session_vnr(p);
1336
1337         i = p->state ? ffz(~p->state) + 1 : 0;
1338         psinfo->pr_state = i;
1339         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1340         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1341         psinfo->pr_nice = task_nice(p);
1342         psinfo->pr_flag = p->flags;
1343         rcu_read_lock();
1344         cred = __task_cred(p);
1345         SET_UID(psinfo->pr_uid, cred->uid);
1346         SET_GID(psinfo->pr_gid, cred->gid);
1347         rcu_read_unlock();
1348         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1349         
1350         return 0;
1351 }
1352
1353 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1354 {
1355         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1356         int i = 0;
1357         do
1358                 i += 2;
1359         while (auxv[i - 2] != AT_NULL);
1360         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1361 }
1362
1363 #ifdef CORE_DUMP_USE_REGSET
1364 #include <linux/regset.h>
1365
1366 struct elf_thread_core_info {
1367         struct elf_thread_core_info *next;
1368         struct task_struct *task;
1369         struct elf_prstatus prstatus;
1370         struct memelfnote notes[0];
1371 };
1372
1373 struct elf_note_info {
1374         struct elf_thread_core_info *thread;
1375         struct memelfnote psinfo;
1376         struct memelfnote auxv;
1377         size_t size;
1378         int thread_notes;
1379 };
1380
1381 /*
1382  * When a regset has a writeback hook, we call it on each thread before
1383  * dumping user memory.  On register window machines, this makes sure the
1384  * user memory backing the register data is up to date before we read it.
1385  */
1386 static void do_thread_regset_writeback(struct task_struct *task,
1387                                        const struct user_regset *regset)
1388 {
1389         if (regset->writeback)
1390                 regset->writeback(task, regset, 1);
1391 }
1392
1393 #ifndef PR_REG_SIZE
1394 #define PR_REG_SIZE(S) sizeof(S)
1395 #endif
1396
1397 #ifndef PRSTATUS_SIZE
1398 #define PRSTATUS_SIZE(S) sizeof(S)
1399 #endif
1400
1401 #ifndef PR_REG_PTR
1402 #define PR_REG_PTR(S) (&((S)->pr_reg))
1403 #endif
1404
1405 #ifndef SET_PR_FPVALID
1406 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1407 #endif
1408
1409 static int fill_thread_core_info(struct elf_thread_core_info *t,
1410                                  const struct user_regset_view *view,
1411                                  long signr, size_t *total)
1412 {
1413         unsigned int i;
1414
1415         /*
1416          * NT_PRSTATUS is the one special case, because the regset data
1417          * goes into the pr_reg field inside the note contents, rather
1418          * than being the whole note contents.  We fill the reset in here.
1419          * We assume that regset 0 is NT_PRSTATUS.
1420          */
1421         fill_prstatus(&t->prstatus, t->task, signr);
1422         (void) view->regsets[0].get(t->task, &view->regsets[0],
1423                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1424                                     PR_REG_PTR(&t->prstatus), NULL);
1425
1426         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1427                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1428         *total += notesize(&t->notes[0]);
1429
1430         do_thread_regset_writeback(t->task, &view->regsets[0]);
1431
1432         /*
1433          * Each other regset might generate a note too.  For each regset
1434          * that has no core_note_type or is inactive, we leave t->notes[i]
1435          * all zero and we'll know to skip writing it later.
1436          */
1437         for (i = 1; i < view->n; ++i) {
1438                 const struct user_regset *regset = &view->regsets[i];
1439                 do_thread_regset_writeback(t->task, regset);
1440                 if (regset->core_note_type &&
1441                     (!regset->active || regset->active(t->task, regset))) {
1442                         int ret;
1443                         size_t size = regset->n * regset->size;
1444                         void *data = kmalloc(size, GFP_KERNEL);
1445                         if (unlikely(!data))
1446                                 return 0;
1447                         ret = regset->get(t->task, regset,
1448                                           0, size, data, NULL);
1449                         if (unlikely(ret))
1450                                 kfree(data);
1451                         else {
1452                                 if (regset->core_note_type != NT_PRFPREG)
1453                                         fill_note(&t->notes[i], "LINUX",
1454                                                   regset->core_note_type,
1455                                                   size, data);
1456                                 else {
1457                                         SET_PR_FPVALID(&t->prstatus, 1);
1458                                         fill_note(&t->notes[i], "CORE",
1459                                                   NT_PRFPREG, size, data);
1460                                 }
1461                                 *total += notesize(&t->notes[i]);
1462                         }
1463                 }
1464         }
1465
1466         return 1;
1467 }
1468
1469 static int fill_note_info(struct elfhdr *elf, int phdrs,
1470                           struct elf_note_info *info,
1471                           long signr, struct pt_regs *regs)
1472 {
1473         struct task_struct *dump_task = current;
1474         const struct user_regset_view *view = task_user_regset_view(dump_task);
1475         struct elf_thread_core_info *t;
1476         struct elf_prpsinfo *psinfo;
1477         struct core_thread *ct;
1478         unsigned int i;
1479
1480         info->size = 0;
1481         info->thread = NULL;
1482
1483         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1484         if (psinfo == NULL)
1485                 return 0;
1486
1487         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1488
1489         /*
1490          * Figure out how many notes we're going to need for each thread.
1491          */
1492         info->thread_notes = 0;
1493         for (i = 0; i < view->n; ++i)
1494                 if (view->regsets[i].core_note_type != 0)
1495                         ++info->thread_notes;
1496
1497         /*
1498          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1499          * since it is our one special case.
1500          */
1501         if (unlikely(info->thread_notes == 0) ||
1502             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1503                 WARN_ON(1);
1504                 return 0;
1505         }
1506
1507         /*
1508          * Initialize the ELF file header.
1509          */
1510         fill_elf_header(elf, phdrs,
1511                         view->e_machine, view->e_flags, view->ei_osabi);
1512
1513         /*
1514          * Allocate a structure for each thread.
1515          */
1516         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1517                 t = kzalloc(offsetof(struct elf_thread_core_info,
1518                                      notes[info->thread_notes]),
1519                             GFP_KERNEL);
1520                 if (unlikely(!t))
1521                         return 0;
1522
1523                 t->task = ct->task;
1524                 if (ct->task == dump_task || !info->thread) {
1525                         t->next = info->thread;
1526                         info->thread = t;
1527                 } else {
1528                         /*
1529                          * Make sure to keep the original task at
1530                          * the head of the list.
1531                          */
1532                         t->next = info->thread->next;
1533                         info->thread->next = t;
1534                 }
1535         }
1536
1537         /*
1538          * Now fill in each thread's information.
1539          */
1540         for (t = info->thread; t != NULL; t = t->next)
1541                 if (!fill_thread_core_info(t, view, signr, &info->size))
1542                         return 0;
1543
1544         /*
1545          * Fill in the two process-wide notes.
1546          */
1547         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1548         info->size += notesize(&info->psinfo);
1549
1550         fill_auxv_note(&info->auxv, current->mm);
1551         info->size += notesize(&info->auxv);
1552
1553         return 1;
1554 }
1555
1556 static size_t get_note_info_size(struct elf_note_info *info)
1557 {
1558         return info->size;
1559 }
1560
1561 /*
1562  * Write all the notes for each thread.  When writing the first thread, the
1563  * process-wide notes are interleaved after the first thread-specific note.
1564  */
1565 static int write_note_info(struct elf_note_info *info,
1566                            struct file *file, loff_t *foffset)
1567 {
1568         bool first = 1;
1569         struct elf_thread_core_info *t = info->thread;
1570
1571         do {
1572                 int i;
1573
1574                 if (!writenote(&t->notes[0], file, foffset))
1575                         return 0;
1576
1577                 if (first && !writenote(&info->psinfo, file, foffset))
1578                         return 0;
1579                 if (first && !writenote(&info->auxv, file, foffset))
1580                         return 0;
1581
1582                 for (i = 1; i < info->thread_notes; ++i)
1583                         if (t->notes[i].data &&
1584                             !writenote(&t->notes[i], file, foffset))
1585                                 return 0;
1586
1587                 first = 0;
1588                 t = t->next;
1589         } while (t);
1590
1591         return 1;
1592 }
1593
1594 static void free_note_info(struct elf_note_info *info)
1595 {
1596         struct elf_thread_core_info *threads = info->thread;
1597         while (threads) {
1598                 unsigned int i;
1599                 struct elf_thread_core_info *t = threads;
1600                 threads = t->next;
1601                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1602                 for (i = 1; i < info->thread_notes; ++i)
1603                         kfree(t->notes[i].data);
1604                 kfree(t);
1605         }
1606         kfree(info->psinfo.data);
1607 }
1608
1609 #else
1610
1611 /* Here is the structure in which status of each thread is captured. */
1612 struct elf_thread_status
1613 {
1614         struct list_head list;
1615         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1616         elf_fpregset_t fpu;             /* NT_PRFPREG */
1617         struct task_struct *thread;
1618 #ifdef ELF_CORE_COPY_XFPREGS
1619         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1620 #endif
1621         struct memelfnote notes[3];
1622         int num_notes;
1623 };
1624
1625 /*
1626  * In order to add the specific thread information for the elf file format,
1627  * we need to keep a linked list of every threads pr_status and then create
1628  * a single section for them in the final core file.
1629  */
1630 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1631 {
1632         int sz = 0;
1633         struct task_struct *p = t->thread;
1634         t->num_notes = 0;
1635
1636         fill_prstatus(&t->prstatus, p, signr);
1637         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1638         
1639         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1640                   &(t->prstatus));
1641         t->num_notes++;
1642         sz += notesize(&t->notes[0]);
1643
1644         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1645                                                                 &t->fpu))) {
1646                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1647                           &(t->fpu));
1648                 t->num_notes++;
1649                 sz += notesize(&t->notes[1]);
1650         }
1651
1652 #ifdef ELF_CORE_COPY_XFPREGS
1653         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1654                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1655                           sizeof(t->xfpu), &t->xfpu);
1656                 t->num_notes++;
1657                 sz += notesize(&t->notes[2]);
1658         }
1659 #endif  
1660         return sz;
1661 }
1662
1663 struct elf_note_info {
1664         struct memelfnote *notes;
1665         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1666         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1667         struct list_head thread_list;
1668         elf_fpregset_t *fpu;
1669 #ifdef ELF_CORE_COPY_XFPREGS
1670         elf_fpxregset_t *xfpu;
1671 #endif
1672         int thread_status_size;
1673         int numnote;
1674 };
1675
1676 static int elf_note_info_init(struct elf_note_info *info)
1677 {
1678         memset(info, 0, sizeof(*info));
1679         INIT_LIST_HEAD(&info->thread_list);
1680
1681         /* Allocate space for six ELF notes */
1682         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1683         if (!info->notes)
1684                 return 0;
1685         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1686         if (!info->psinfo)
1687                 goto notes_free;
1688         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1689         if (!info->prstatus)
1690                 goto psinfo_free;
1691         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1692         if (!info->fpu)
1693                 goto prstatus_free;
1694 #ifdef ELF_CORE_COPY_XFPREGS
1695         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1696         if (!info->xfpu)
1697                 goto fpu_free;
1698 #endif
1699         return 1;
1700 #ifdef ELF_CORE_COPY_XFPREGS
1701  fpu_free:
1702         kfree(info->fpu);
1703 #endif
1704  prstatus_free:
1705         kfree(info->prstatus);
1706  psinfo_free:
1707         kfree(info->psinfo);
1708  notes_free:
1709         kfree(info->notes);
1710         return 0;
1711 }
1712
1713 static int fill_note_info(struct elfhdr *elf, int phdrs,
1714                           struct elf_note_info *info,
1715                           long signr, struct pt_regs *regs)
1716 {
1717         struct list_head *t;
1718
1719         if (!elf_note_info_init(info))
1720                 return 0;
1721
1722         if (signr) {
1723                 struct core_thread *ct;
1724                 struct elf_thread_status *ets;
1725
1726                 for (ct = current->mm->core_state->dumper.next;
1727                                                 ct; ct = ct->next) {
1728                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1729                         if (!ets)
1730                                 return 0;
1731
1732                         ets->thread = ct->task;
1733                         list_add(&ets->list, &info->thread_list);
1734                 }
1735
1736                 list_for_each(t, &info->thread_list) {
1737                         int sz;
1738
1739                         ets = list_entry(t, struct elf_thread_status, list);
1740                         sz = elf_dump_thread_status(signr, ets);
1741                         info->thread_status_size += sz;
1742                 }
1743         }
1744         /* now collect the dump for the current */
1745         memset(info->prstatus, 0, sizeof(*info->prstatus));
1746         fill_prstatus(info->prstatus, current, signr);
1747         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1748
1749         /* Set up header */
1750         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1751
1752         /*
1753          * Set up the notes in similar form to SVR4 core dumps made
1754          * with info from their /proc.
1755          */
1756
1757         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1758                   sizeof(*info->prstatus), info->prstatus);
1759         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1760         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1761                   sizeof(*info->psinfo), info->psinfo);
1762
1763         info->numnote = 2;
1764
1765         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1766
1767         /* Try to dump the FPU. */
1768         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1769                                                                info->fpu);
1770         if (info->prstatus->pr_fpvalid)
1771                 fill_note(info->notes + info->numnote++,
1772                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1773 #ifdef ELF_CORE_COPY_XFPREGS
1774         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1775                 fill_note(info->notes + info->numnote++,
1776                           "LINUX", ELF_CORE_XFPREG_TYPE,
1777                           sizeof(*info->xfpu), info->xfpu);
1778 #endif
1779
1780         return 1;
1781 }
1782
1783 static size_t get_note_info_size(struct elf_note_info *info)
1784 {
1785         int sz = 0;
1786         int i;
1787
1788         for (i = 0; i < info->numnote; i++)
1789                 sz += notesize(info->notes + i);
1790
1791         sz += info->thread_status_size;
1792
1793         return sz;
1794 }
1795
1796 static int write_note_info(struct elf_note_info *info,
1797                            struct file *file, loff_t *foffset)
1798 {
1799         int i;
1800         struct list_head *t;
1801
1802         for (i = 0; i < info->numnote; i++)
1803                 if (!writenote(info->notes + i, file, foffset))
1804                         return 0;
1805
1806         /* write out the thread status notes section */
1807         list_for_each(t, &info->thread_list) {
1808                 struct elf_thread_status *tmp =
1809                                 list_entry(t, struct elf_thread_status, list);
1810
1811                 for (i = 0; i < tmp->num_notes; i++)
1812                         if (!writenote(&tmp->notes[i], file, foffset))
1813                                 return 0;
1814         }
1815
1816         return 1;
1817 }
1818
1819 static void free_note_info(struct elf_note_info *info)
1820 {
1821         while (!list_empty(&info->thread_list)) {
1822                 struct list_head *tmp = info->thread_list.next;
1823                 list_del(tmp);
1824                 kfree(list_entry(tmp, struct elf_thread_status, list));
1825         }
1826
1827         kfree(info->prstatus);
1828         kfree(info->psinfo);
1829         kfree(info->notes);
1830         kfree(info->fpu);
1831 #ifdef ELF_CORE_COPY_XFPREGS
1832         kfree(info->xfpu);
1833 #endif
1834 }
1835
1836 #endif
1837
1838 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1839                                         struct vm_area_struct *gate_vma)
1840 {
1841         struct vm_area_struct *ret = tsk->mm->mmap;
1842
1843         if (ret)
1844                 return ret;
1845         return gate_vma;
1846 }
1847 /*
1848  * Helper function for iterating across a vma list.  It ensures that the caller
1849  * will visit `gate_vma' prior to terminating the search.
1850  */
1851 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1852                                         struct vm_area_struct *gate_vma)
1853 {
1854         struct vm_area_struct *ret;
1855
1856         ret = this_vma->vm_next;
1857         if (ret)
1858                 return ret;
1859         if (this_vma == gate_vma)
1860                 return NULL;
1861         return gate_vma;
1862 }
1863
1864 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1865                              elf_addr_t e_shoff, int segs)
1866 {
1867         elf->e_shoff = e_shoff;
1868         elf->e_shentsize = sizeof(*shdr4extnum);
1869         elf->e_shnum = 1;
1870         elf->e_shstrndx = SHN_UNDEF;
1871
1872         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1873
1874         shdr4extnum->sh_type = SHT_NULL;
1875         shdr4extnum->sh_size = elf->e_shnum;
1876         shdr4extnum->sh_link = elf->e_shstrndx;
1877         shdr4extnum->sh_info = segs;
1878 }
1879
1880 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1881                                      unsigned long mm_flags)
1882 {
1883         struct vm_area_struct *vma;
1884         size_t size = 0;
1885
1886         for (vma = first_vma(current, gate_vma); vma != NULL;
1887              vma = next_vma(vma, gate_vma))
1888                 size += vma_dump_size(vma, mm_flags);
1889         return size;
1890 }
1891
1892 /*
1893  * Actual dumper
1894  *
1895  * This is a two-pass process; first we find the offsets of the bits,
1896  * and then they are actually written out.  If we run out of core limit
1897  * we just truncate.
1898  */
1899 static int elf_core_dump(struct coredump_params *cprm)
1900 {
1901         int has_dumped = 0;
1902         mm_segment_t fs;
1903         int segs;
1904         size_t size = 0;
1905         struct vm_area_struct *vma, *gate_vma;
1906         struct elfhdr *elf = NULL;
1907         loff_t offset = 0, dataoff, foffset;
1908         struct elf_note_info info;
1909         struct elf_phdr *phdr4note = NULL;
1910         struct elf_shdr *shdr4extnum = NULL;
1911         Elf_Half e_phnum;
1912         elf_addr_t e_shoff;
1913
1914         /*
1915          * We no longer stop all VM operations.
1916          * 
1917          * This is because those proceses that could possibly change map_count
1918          * or the mmap / vma pages are now blocked in do_exit on current
1919          * finishing this core dump.
1920          *
1921          * Only ptrace can touch these memory addresses, but it doesn't change
1922          * the map_count or the pages allocated. So no possibility of crashing
1923          * exists while dumping the mm->vm_next areas to the core file.
1924          */
1925   
1926         /* alloc memory for large data structures: too large to be on stack */
1927         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1928         if (!elf)
1929                 goto out;
1930         /*
1931          * The number of segs are recored into ELF header as 16bit value.
1932          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1933          */
1934         segs = current->mm->map_count;
1935         segs += elf_core_extra_phdrs();
1936
1937         gate_vma = get_gate_vma(current->mm);
1938         if (gate_vma != NULL)
1939                 segs++;
1940
1941         /* for notes section */
1942         segs++;
1943
1944         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1945          * this, kernel supports extended numbering. Have a look at
1946          * include/linux/elf.h for further information. */
1947         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1948
1949         /*
1950          * Collect all the non-memory information about the process for the
1951          * notes.  This also sets up the file header.
1952          */
1953         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1954                 goto cleanup;
1955
1956         has_dumped = 1;
1957         current->flags |= PF_DUMPCORE;
1958   
1959         fs = get_fs();
1960         set_fs(KERNEL_DS);
1961
1962         offset += sizeof(*elf);                         /* Elf header */
1963         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1964         foffset = offset;
1965
1966         /* Write notes phdr entry */
1967         {
1968                 size_t sz = get_note_info_size(&info);
1969
1970                 sz += elf_coredump_extra_notes_size();
1971
1972                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1973                 if (!phdr4note)
1974                         goto end_coredump;
1975
1976                 fill_elf_note_phdr(phdr4note, sz, offset);
1977                 offset += sz;
1978         }
1979
1980         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1981
1982         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1983         offset += elf_core_extra_data_size();
1984         e_shoff = offset;
1985
1986         if (e_phnum == PN_XNUM) {
1987                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1988                 if (!shdr4extnum)
1989                         goto end_coredump;
1990                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1991         }
1992
1993         offset = dataoff;
1994
1995         size += sizeof(*elf);
1996         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1997                 goto end_coredump;
1998
1999         size += sizeof(*phdr4note);
2000         if (size > cprm->limit
2001             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2002                 goto end_coredump;
2003
2004         /* Write program headers for segments dump */
2005         for (vma = first_vma(current, gate_vma); vma != NULL;
2006                         vma = next_vma(vma, gate_vma)) {
2007                 struct elf_phdr phdr;
2008
2009                 phdr.p_type = PT_LOAD;
2010                 phdr.p_offset = offset;
2011                 phdr.p_vaddr = vma->vm_start;
2012                 phdr.p_paddr = 0;
2013                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2014                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2015                 offset += phdr.p_filesz;
2016                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2017                 if (vma->vm_flags & VM_WRITE)
2018                         phdr.p_flags |= PF_W;
2019                 if (vma->vm_flags & VM_EXEC)
2020                         phdr.p_flags |= PF_X;
2021                 phdr.p_align = ELF_EXEC_PAGESIZE;
2022
2023                 size += sizeof(phdr);
2024                 if (size > cprm->limit
2025                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2026                         goto end_coredump;
2027         }
2028
2029         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2030                 goto end_coredump;
2031
2032         /* write out the notes section */
2033         if (!write_note_info(&info, cprm->file, &foffset))
2034                 goto end_coredump;
2035
2036         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2037                 goto end_coredump;
2038
2039         /* Align to page */
2040         if (!dump_seek(cprm->file, dataoff - foffset))
2041                 goto end_coredump;
2042
2043         for (vma = first_vma(current, gate_vma); vma != NULL;
2044                         vma = next_vma(vma, gate_vma)) {
2045                 unsigned long addr;
2046                 unsigned long end;
2047
2048                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2049
2050                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2051                         struct page *page;
2052                         int stop;
2053
2054                         page = get_dump_page(addr);
2055                         if (page) {
2056                                 void *kaddr = kmap(page);
2057                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2058                                         !dump_write(cprm->file, kaddr,
2059                                                     PAGE_SIZE);
2060                                 kunmap(page);
2061                                 page_cache_release(page);
2062                         } else
2063                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2064                         if (stop)
2065                                 goto end_coredump;
2066                 }
2067         }
2068
2069         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2070                 goto end_coredump;
2071
2072         if (e_phnum == PN_XNUM) {
2073                 size += sizeof(*shdr4extnum);
2074                 if (size > cprm->limit
2075                     || !dump_write(cprm->file, shdr4extnum,
2076                                    sizeof(*shdr4extnum)))
2077                         goto end_coredump;
2078         }
2079
2080 end_coredump:
2081         set_fs(fs);
2082
2083 cleanup:
2084         free_note_info(&info);
2085         kfree(shdr4extnum);
2086         kfree(phdr4note);
2087         kfree(elf);
2088 out:
2089         return has_dumped;
2090 }
2091
2092 #endif          /* CONFIG_ELF_CORE */
2093
2094 static int __init init_elf_binfmt(void)
2095 {
2096         return register_binfmt(&elf_format);
2097 }
2098
2099 static void __exit exit_elf_binfmt(void)
2100 {
2101         /* Remove the COFF and ELF loaders. */
2102         unregister_binfmt(&elf_format);
2103 }
2104
2105 core_initcall(init_elf_binfmt);
2106 module_exit(exit_elf_binfmt);
2107 MODULE_LICENSE("GPL");