]> rtime.felk.cvut.cz Git - linux-imx.git/blob - fs/binfmt_elf.c
coredump: factor out the setting of PF_DUMPCORE
[linux-imx.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244         if (k_platform) {
245                 NEW_AUX_ENT(AT_PLATFORM,
246                             (elf_addr_t)(unsigned long)u_platform);
247         }
248         if (k_base_platform) {
249                 NEW_AUX_ENT(AT_BASE_PLATFORM,
250                             (elf_addr_t)(unsigned long)u_base_platform);
251         }
252         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254         }
255 #undef NEW_AUX_ENT
256         /* AT_NULL is zero; clear the rest too */
257         memset(&elf_info[ei_index], 0,
258                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259
260         /* And advance past the AT_NULL entry.  */
261         ei_index += 2;
262
263         sp = STACK_ADD(p, ei_index);
264
265         items = (argc + 1) + (envc + 1) + 1;
266         bprm->p = STACK_ROUND(sp, items);
267
268         /* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273         sp = (elf_addr_t __user *)bprm->p;
274 #endif
275
276
277         /*
278          * Grow the stack manually; some architectures have a limit on how
279          * far ahead a user-space access may be in order to grow the stack.
280          */
281         vma = find_extend_vma(current->mm, bprm->p);
282         if (!vma)
283                 return -EFAULT;
284
285         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
286         if (__put_user(argc, sp++))
287                 return -EFAULT;
288         argv = sp;
289         envp = argv + argc + 1;
290
291         /* Populate argv and envp */
292         p = current->mm->arg_end = current->mm->arg_start;
293         while (argc-- > 0) {
294                 size_t len;
295                 if (__put_user((elf_addr_t)p, argv++))
296                         return -EFAULT;
297                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298                 if (!len || len > MAX_ARG_STRLEN)
299                         return -EINVAL;
300                 p += len;
301         }
302         if (__put_user(0, argv))
303                 return -EFAULT;
304         current->mm->arg_end = current->mm->env_start = p;
305         while (envc-- > 0) {
306                 size_t len;
307                 if (__put_user((elf_addr_t)p, envp++))
308                         return -EFAULT;
309                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310                 if (!len || len > MAX_ARG_STRLEN)
311                         return -EINVAL;
312                 p += len;
313         }
314         if (__put_user(0, envp))
315                 return -EFAULT;
316         current->mm->env_end = p;
317
318         /* Put the elf_info on the stack in the right place.  */
319         sp = (elf_addr_t __user *)envp + 1;
320         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321                 return -EFAULT;
322         return 0;
323 }
324
325 #ifndef elf_map
326
327 static unsigned long elf_map(struct file *filep, unsigned long addr,
328                 struct elf_phdr *eppnt, int prot, int type,
329                 unsigned long total_size)
330 {
331         unsigned long map_addr;
332         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
333         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
334         addr = ELF_PAGESTART(addr);
335         size = ELF_PAGEALIGN(size);
336
337         /* mmap() will return -EINVAL if given a zero size, but a
338          * segment with zero filesize is perfectly valid */
339         if (!size)
340                 return addr;
341
342         /*
343         * total_size is the size of the ELF (interpreter) image.
344         * The _first_ mmap needs to know the full size, otherwise
345         * randomization might put this image into an overlapping
346         * position with the ELF binary image. (since size < total_size)
347         * So we first map the 'big' image - and unmap the remainder at
348         * the end. (which unmap is needed for ELF images with holes.)
349         */
350         if (total_size) {
351                 total_size = ELF_PAGEALIGN(total_size);
352                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
353                 if (!BAD_ADDR(map_addr))
354                         vm_munmap(map_addr+size, total_size-size);
355         } else
356                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
357
358         return(map_addr);
359 }
360
361 #endif /* !elf_map */
362
363 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
364 {
365         int i, first_idx = -1, last_idx = -1;
366
367         for (i = 0; i < nr; i++) {
368                 if (cmds[i].p_type == PT_LOAD) {
369                         last_idx = i;
370                         if (first_idx == -1)
371                                 first_idx = i;
372                 }
373         }
374         if (first_idx == -1)
375                 return 0;
376
377         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
378                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
379 }
380
381
382 /* This is much more generalized than the library routine read function,
383    so we keep this separate.  Technically the library read function
384    is only provided so that we can read a.out libraries that have
385    an ELF header */
386
387 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
388                 struct file *interpreter, unsigned long *interp_map_addr,
389                 unsigned long no_base)
390 {
391         struct elf_phdr *elf_phdata;
392         struct elf_phdr *eppnt;
393         unsigned long load_addr = 0;
394         int load_addr_set = 0;
395         unsigned long last_bss = 0, elf_bss = 0;
396         unsigned long error = ~0UL;
397         unsigned long total_size;
398         int retval, i, size;
399
400         /* First of all, some simple consistency checks */
401         if (interp_elf_ex->e_type != ET_EXEC &&
402             interp_elf_ex->e_type != ET_DYN)
403                 goto out;
404         if (!elf_check_arch(interp_elf_ex))
405                 goto out;
406         if (!interpreter->f_op || !interpreter->f_op->mmap)
407                 goto out;
408
409         /*
410          * If the size of this structure has changed, then punt, since
411          * we will be doing the wrong thing.
412          */
413         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
414                 goto out;
415         if (interp_elf_ex->e_phnum < 1 ||
416                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
417                 goto out;
418
419         /* Now read in all of the header information */
420         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
421         if (size > ELF_MIN_ALIGN)
422                 goto out;
423         elf_phdata = kmalloc(size, GFP_KERNEL);
424         if (!elf_phdata)
425                 goto out;
426
427         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
428                              (char *)elf_phdata, size);
429         error = -EIO;
430         if (retval != size) {
431                 if (retval < 0)
432                         error = retval; 
433                 goto out_close;
434         }
435
436         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
437         if (!total_size) {
438                 error = -EINVAL;
439                 goto out_close;
440         }
441
442         eppnt = elf_phdata;
443         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
444                 if (eppnt->p_type == PT_LOAD) {
445                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
446                         int elf_prot = 0;
447                         unsigned long vaddr = 0;
448                         unsigned long k, map_addr;
449
450                         if (eppnt->p_flags & PF_R)
451                                 elf_prot = PROT_READ;
452                         if (eppnt->p_flags & PF_W)
453                                 elf_prot |= PROT_WRITE;
454                         if (eppnt->p_flags & PF_X)
455                                 elf_prot |= PROT_EXEC;
456                         vaddr = eppnt->p_vaddr;
457                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
458                                 elf_type |= MAP_FIXED;
459                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
460                                 load_addr = -vaddr;
461
462                         map_addr = elf_map(interpreter, load_addr + vaddr,
463                                         eppnt, elf_prot, elf_type, total_size);
464                         total_size = 0;
465                         if (!*interp_map_addr)
466                                 *interp_map_addr = map_addr;
467                         error = map_addr;
468                         if (BAD_ADDR(map_addr))
469                                 goto out_close;
470
471                         if (!load_addr_set &&
472                             interp_elf_ex->e_type == ET_DYN) {
473                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
474                                 load_addr_set = 1;
475                         }
476
477                         /*
478                          * Check to see if the section's size will overflow the
479                          * allowed task size. Note that p_filesz must always be
480                          * <= p_memsize so it's only necessary to check p_memsz.
481                          */
482                         k = load_addr + eppnt->p_vaddr;
483                         if (BAD_ADDR(k) ||
484                             eppnt->p_filesz > eppnt->p_memsz ||
485                             eppnt->p_memsz > TASK_SIZE ||
486                             TASK_SIZE - eppnt->p_memsz < k) {
487                                 error = -ENOMEM;
488                                 goto out_close;
489                         }
490
491                         /*
492                          * Find the end of the file mapping for this phdr, and
493                          * keep track of the largest address we see for this.
494                          */
495                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
496                         if (k > elf_bss)
497                                 elf_bss = k;
498
499                         /*
500                          * Do the same thing for the memory mapping - between
501                          * elf_bss and last_bss is the bss section.
502                          */
503                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
504                         if (k > last_bss)
505                                 last_bss = k;
506                 }
507         }
508
509         if (last_bss > elf_bss) {
510                 /*
511                  * Now fill out the bss section.  First pad the last page up
512                  * to the page boundary, and then perform a mmap to make sure
513                  * that there are zero-mapped pages up to and including the
514                  * last bss page.
515                  */
516                 if (padzero(elf_bss)) {
517                         error = -EFAULT;
518                         goto out_close;
519                 }
520
521                 /* What we have mapped so far */
522                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
523
524                 /* Map the last of the bss segment */
525                 error = vm_brk(elf_bss, last_bss - elf_bss);
526                 if (BAD_ADDR(error))
527                         goto out_close;
528         }
529
530         error = load_addr;
531
532 out_close:
533         kfree(elf_phdata);
534 out:
535         return error;
536 }
537
538 /*
539  * These are the functions used to load ELF style executables and shared
540  * libraries.  There is no binary dependent code anywhere else.
541  */
542
543 #define INTERPRETER_NONE 0
544 #define INTERPRETER_ELF 2
545
546 #ifndef STACK_RND_MASK
547 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
548 #endif
549
550 static unsigned long randomize_stack_top(unsigned long stack_top)
551 {
552         unsigned int random_variable = 0;
553
554         if ((current->flags & PF_RANDOMIZE) &&
555                 !(current->personality & ADDR_NO_RANDOMIZE)) {
556                 random_variable = get_random_int() & STACK_RND_MASK;
557                 random_variable <<= PAGE_SHIFT;
558         }
559 #ifdef CONFIG_STACK_GROWSUP
560         return PAGE_ALIGN(stack_top) + random_variable;
561 #else
562         return PAGE_ALIGN(stack_top) - random_variable;
563 #endif
564 }
565
566 static int load_elf_binary(struct linux_binprm *bprm)
567 {
568         struct file *interpreter = NULL; /* to shut gcc up */
569         unsigned long load_addr = 0, load_bias = 0;
570         int load_addr_set = 0;
571         char * elf_interpreter = NULL;
572         unsigned long error;
573         struct elf_phdr *elf_ppnt, *elf_phdata;
574         unsigned long elf_bss, elf_brk;
575         int retval, i;
576         unsigned int size;
577         unsigned long elf_entry;
578         unsigned long interp_load_addr = 0;
579         unsigned long start_code, end_code, start_data, end_data;
580         unsigned long reloc_func_desc __maybe_unused = 0;
581         int executable_stack = EXSTACK_DEFAULT;
582         unsigned long def_flags = 0;
583         struct pt_regs *regs = current_pt_regs();
584         struct {
585                 struct elfhdr elf_ex;
586                 struct elfhdr interp_elf_ex;
587         } *loc;
588
589         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
590         if (!loc) {
591                 retval = -ENOMEM;
592                 goto out_ret;
593         }
594         
595         /* Get the exec-header */
596         loc->elf_ex = *((struct elfhdr *)bprm->buf);
597
598         retval = -ENOEXEC;
599         /* First of all, some simple consistency checks */
600         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
601                 goto out;
602
603         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
604                 goto out;
605         if (!elf_check_arch(&loc->elf_ex))
606                 goto out;
607         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
608                 goto out;
609
610         /* Now read in all of the header information */
611         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
612                 goto out;
613         if (loc->elf_ex.e_phnum < 1 ||
614                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
615                 goto out;
616         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
617         retval = -ENOMEM;
618         elf_phdata = kmalloc(size, GFP_KERNEL);
619         if (!elf_phdata)
620                 goto out;
621
622         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
623                              (char *)elf_phdata, size);
624         if (retval != size) {
625                 if (retval >= 0)
626                         retval = -EIO;
627                 goto out_free_ph;
628         }
629
630         elf_ppnt = elf_phdata;
631         elf_bss = 0;
632         elf_brk = 0;
633
634         start_code = ~0UL;
635         end_code = 0;
636         start_data = 0;
637         end_data = 0;
638
639         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
640                 if (elf_ppnt->p_type == PT_INTERP) {
641                         /* This is the program interpreter used for
642                          * shared libraries - for now assume that this
643                          * is an a.out format binary
644                          */
645                         retval = -ENOEXEC;
646                         if (elf_ppnt->p_filesz > PATH_MAX || 
647                             elf_ppnt->p_filesz < 2)
648                                 goto out_free_ph;
649
650                         retval = -ENOMEM;
651                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
652                                                   GFP_KERNEL);
653                         if (!elf_interpreter)
654                                 goto out_free_ph;
655
656                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
657                                              elf_interpreter,
658                                              elf_ppnt->p_filesz);
659                         if (retval != elf_ppnt->p_filesz) {
660                                 if (retval >= 0)
661                                         retval = -EIO;
662                                 goto out_free_interp;
663                         }
664                         /* make sure path is NULL terminated */
665                         retval = -ENOEXEC;
666                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
667                                 goto out_free_interp;
668
669                         interpreter = open_exec(elf_interpreter);
670                         retval = PTR_ERR(interpreter);
671                         if (IS_ERR(interpreter))
672                                 goto out_free_interp;
673
674                         /*
675                          * If the binary is not readable then enforce
676                          * mm->dumpable = 0 regardless of the interpreter's
677                          * permissions.
678                          */
679                         would_dump(bprm, interpreter);
680
681                         retval = kernel_read(interpreter, 0, bprm->buf,
682                                              BINPRM_BUF_SIZE);
683                         if (retval != BINPRM_BUF_SIZE) {
684                                 if (retval >= 0)
685                                         retval = -EIO;
686                                 goto out_free_dentry;
687                         }
688
689                         /* Get the exec headers */
690                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
691                         break;
692                 }
693                 elf_ppnt++;
694         }
695
696         elf_ppnt = elf_phdata;
697         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698                 if (elf_ppnt->p_type == PT_GNU_STACK) {
699                         if (elf_ppnt->p_flags & PF_X)
700                                 executable_stack = EXSTACK_ENABLE_X;
701                         else
702                                 executable_stack = EXSTACK_DISABLE_X;
703                         break;
704                 }
705
706         /* Some simple consistency checks for the interpreter */
707         if (elf_interpreter) {
708                 retval = -ELIBBAD;
709                 /* Not an ELF interpreter */
710                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
711                         goto out_free_dentry;
712                 /* Verify the interpreter has a valid arch */
713                 if (!elf_check_arch(&loc->interp_elf_ex))
714                         goto out_free_dentry;
715         }
716
717         /* Flush all traces of the currently running executable */
718         retval = flush_old_exec(bprm);
719         if (retval)
720                 goto out_free_dentry;
721
722         /* OK, This is the point of no return */
723         current->mm->def_flags = def_flags;
724
725         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
726            may depend on the personality.  */
727         SET_PERSONALITY(loc->elf_ex);
728         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
729                 current->personality |= READ_IMPLIES_EXEC;
730
731         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
732                 current->flags |= PF_RANDOMIZE;
733
734         setup_new_exec(bprm);
735
736         /* Do this so that we can load the interpreter, if need be.  We will
737            change some of these later */
738         current->mm->free_area_cache = current->mm->mmap_base;
739         current->mm->cached_hole_size = 0;
740         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
741                                  executable_stack);
742         if (retval < 0) {
743                 send_sig(SIGKILL, current, 0);
744                 goto out_free_dentry;
745         }
746         
747         current->mm->start_stack = bprm->p;
748
749         /* Now we do a little grungy work by mmapping the ELF image into
750            the correct location in memory. */
751         for(i = 0, elf_ppnt = elf_phdata;
752             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
753                 int elf_prot = 0, elf_flags;
754                 unsigned long k, vaddr;
755
756                 if (elf_ppnt->p_type != PT_LOAD)
757                         continue;
758
759                 if (unlikely (elf_brk > elf_bss)) {
760                         unsigned long nbyte;
761                     
762                         /* There was a PT_LOAD segment with p_memsz > p_filesz
763                            before this one. Map anonymous pages, if needed,
764                            and clear the area.  */
765                         retval = set_brk(elf_bss + load_bias,
766                                          elf_brk + load_bias);
767                         if (retval) {
768                                 send_sig(SIGKILL, current, 0);
769                                 goto out_free_dentry;
770                         }
771                         nbyte = ELF_PAGEOFFSET(elf_bss);
772                         if (nbyte) {
773                                 nbyte = ELF_MIN_ALIGN - nbyte;
774                                 if (nbyte > elf_brk - elf_bss)
775                                         nbyte = elf_brk - elf_bss;
776                                 if (clear_user((void __user *)elf_bss +
777                                                         load_bias, nbyte)) {
778                                         /*
779                                          * This bss-zeroing can fail if the ELF
780                                          * file specifies odd protections. So
781                                          * we don't check the return value
782                                          */
783                                 }
784                         }
785                 }
786
787                 if (elf_ppnt->p_flags & PF_R)
788                         elf_prot |= PROT_READ;
789                 if (elf_ppnt->p_flags & PF_W)
790                         elf_prot |= PROT_WRITE;
791                 if (elf_ppnt->p_flags & PF_X)
792                         elf_prot |= PROT_EXEC;
793
794                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
795
796                 vaddr = elf_ppnt->p_vaddr;
797                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
798                         elf_flags |= MAP_FIXED;
799                 } else if (loc->elf_ex.e_type == ET_DYN) {
800                         /* Try and get dynamic programs out of the way of the
801                          * default mmap base, as well as whatever program they
802                          * might try to exec.  This is because the brk will
803                          * follow the loader, and is not movable.  */
804 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
805                         /* Memory randomization might have been switched off
806                          * in runtime via sysctl or explicit setting of
807                          * personality flags.
808                          * If that is the case, retain the original non-zero
809                          * load_bias value in order to establish proper
810                          * non-randomized mappings.
811                          */
812                         if (current->flags & PF_RANDOMIZE)
813                                 load_bias = 0;
814                         else
815                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
816 #else
817                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
818 #endif
819                 }
820
821                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
822                                 elf_prot, elf_flags, 0);
823                 if (BAD_ADDR(error)) {
824                         send_sig(SIGKILL, current, 0);
825                         retval = IS_ERR((void *)error) ?
826                                 PTR_ERR((void*)error) : -EINVAL;
827                         goto out_free_dentry;
828                 }
829
830                 if (!load_addr_set) {
831                         load_addr_set = 1;
832                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
833                         if (loc->elf_ex.e_type == ET_DYN) {
834                                 load_bias += error -
835                                              ELF_PAGESTART(load_bias + vaddr);
836                                 load_addr += load_bias;
837                                 reloc_func_desc = load_bias;
838                         }
839                 }
840                 k = elf_ppnt->p_vaddr;
841                 if (k < start_code)
842                         start_code = k;
843                 if (start_data < k)
844                         start_data = k;
845
846                 /*
847                  * Check to see if the section's size will overflow the
848                  * allowed task size. Note that p_filesz must always be
849                  * <= p_memsz so it is only necessary to check p_memsz.
850                  */
851                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
852                     elf_ppnt->p_memsz > TASK_SIZE ||
853                     TASK_SIZE - elf_ppnt->p_memsz < k) {
854                         /* set_brk can never work. Avoid overflows. */
855                         send_sig(SIGKILL, current, 0);
856                         retval = -EINVAL;
857                         goto out_free_dentry;
858                 }
859
860                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
861
862                 if (k > elf_bss)
863                         elf_bss = k;
864                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
865                         end_code = k;
866                 if (end_data < k)
867                         end_data = k;
868                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
869                 if (k > elf_brk)
870                         elf_brk = k;
871         }
872
873         loc->elf_ex.e_entry += load_bias;
874         elf_bss += load_bias;
875         elf_brk += load_bias;
876         start_code += load_bias;
877         end_code += load_bias;
878         start_data += load_bias;
879         end_data += load_bias;
880
881         /* Calling set_brk effectively mmaps the pages that we need
882          * for the bss and break sections.  We must do this before
883          * mapping in the interpreter, to make sure it doesn't wind
884          * up getting placed where the bss needs to go.
885          */
886         retval = set_brk(elf_bss, elf_brk);
887         if (retval) {
888                 send_sig(SIGKILL, current, 0);
889                 goto out_free_dentry;
890         }
891         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
892                 send_sig(SIGSEGV, current, 0);
893                 retval = -EFAULT; /* Nobody gets to see this, but.. */
894                 goto out_free_dentry;
895         }
896
897         if (elf_interpreter) {
898                 unsigned long interp_map_addr = 0;
899
900                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
901                                             interpreter,
902                                             &interp_map_addr,
903                                             load_bias);
904                 if (!IS_ERR((void *)elf_entry)) {
905                         /*
906                          * load_elf_interp() returns relocation
907                          * adjustment
908                          */
909                         interp_load_addr = elf_entry;
910                         elf_entry += loc->interp_elf_ex.e_entry;
911                 }
912                 if (BAD_ADDR(elf_entry)) {
913                         force_sig(SIGSEGV, current);
914                         retval = IS_ERR((void *)elf_entry) ?
915                                         (int)elf_entry : -EINVAL;
916                         goto out_free_dentry;
917                 }
918                 reloc_func_desc = interp_load_addr;
919
920                 allow_write_access(interpreter);
921                 fput(interpreter);
922                 kfree(elf_interpreter);
923         } else {
924                 elf_entry = loc->elf_ex.e_entry;
925                 if (BAD_ADDR(elf_entry)) {
926                         force_sig(SIGSEGV, current);
927                         retval = -EINVAL;
928                         goto out_free_dentry;
929                 }
930         }
931
932         kfree(elf_phdata);
933
934         set_binfmt(&elf_format);
935
936 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
937         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
938         if (retval < 0) {
939                 send_sig(SIGKILL, current, 0);
940                 goto out;
941         }
942 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
943
944         install_exec_creds(bprm);
945         retval = create_elf_tables(bprm, &loc->elf_ex,
946                           load_addr, interp_load_addr);
947         if (retval < 0) {
948                 send_sig(SIGKILL, current, 0);
949                 goto out;
950         }
951         /* N.B. passed_fileno might not be initialized? */
952         current->mm->end_code = end_code;
953         current->mm->start_code = start_code;
954         current->mm->start_data = start_data;
955         current->mm->end_data = end_data;
956         current->mm->start_stack = bprm->p;
957
958 #ifdef arch_randomize_brk
959         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
960                 current->mm->brk = current->mm->start_brk =
961                         arch_randomize_brk(current->mm);
962 #ifdef CONFIG_COMPAT_BRK
963                 current->brk_randomized = 1;
964 #endif
965         }
966 #endif
967
968         if (current->personality & MMAP_PAGE_ZERO) {
969                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
970                    and some applications "depend" upon this behavior.
971                    Since we do not have the power to recompile these, we
972                    emulate the SVr4 behavior. Sigh. */
973                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
974                                 MAP_FIXED | MAP_PRIVATE, 0);
975         }
976
977 #ifdef ELF_PLAT_INIT
978         /*
979          * The ABI may specify that certain registers be set up in special
980          * ways (on i386 %edx is the address of a DT_FINI function, for
981          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
982          * that the e_entry field is the address of the function descriptor
983          * for the startup routine, rather than the address of the startup
984          * routine itself.  This macro performs whatever initialization to
985          * the regs structure is required as well as any relocations to the
986          * function descriptor entries when executing dynamically links apps.
987          */
988         ELF_PLAT_INIT(regs, reloc_func_desc);
989 #endif
990
991         start_thread(regs, elf_entry, bprm->p);
992         retval = 0;
993 out:
994         kfree(loc);
995 out_ret:
996         return retval;
997
998         /* error cleanup */
999 out_free_dentry:
1000         allow_write_access(interpreter);
1001         if (interpreter)
1002                 fput(interpreter);
1003 out_free_interp:
1004         kfree(elf_interpreter);
1005 out_free_ph:
1006         kfree(elf_phdata);
1007         goto out;
1008 }
1009
1010 /* This is really simpleminded and specialized - we are loading an
1011    a.out library that is given an ELF header. */
1012 static int load_elf_library(struct file *file)
1013 {
1014         struct elf_phdr *elf_phdata;
1015         struct elf_phdr *eppnt;
1016         unsigned long elf_bss, bss, len;
1017         int retval, error, i, j;
1018         struct elfhdr elf_ex;
1019
1020         error = -ENOEXEC;
1021         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1022         if (retval != sizeof(elf_ex))
1023                 goto out;
1024
1025         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1026                 goto out;
1027
1028         /* First of all, some simple consistency checks */
1029         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1030             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1031                 goto out;
1032
1033         /* Now read in all of the header information */
1034
1035         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1036         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1037
1038         error = -ENOMEM;
1039         elf_phdata = kmalloc(j, GFP_KERNEL);
1040         if (!elf_phdata)
1041                 goto out;
1042
1043         eppnt = elf_phdata;
1044         error = -ENOEXEC;
1045         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1046         if (retval != j)
1047                 goto out_free_ph;
1048
1049         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1050                 if ((eppnt + i)->p_type == PT_LOAD)
1051                         j++;
1052         if (j != 1)
1053                 goto out_free_ph;
1054
1055         while (eppnt->p_type != PT_LOAD)
1056                 eppnt++;
1057
1058         /* Now use mmap to map the library into memory. */
1059         error = vm_mmap(file,
1060                         ELF_PAGESTART(eppnt->p_vaddr),
1061                         (eppnt->p_filesz +
1062                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1063                         PROT_READ | PROT_WRITE | PROT_EXEC,
1064                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1065                         (eppnt->p_offset -
1066                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1067         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1068                 goto out_free_ph;
1069
1070         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1071         if (padzero(elf_bss)) {
1072                 error = -EFAULT;
1073                 goto out_free_ph;
1074         }
1075
1076         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1077                             ELF_MIN_ALIGN - 1);
1078         bss = eppnt->p_memsz + eppnt->p_vaddr;
1079         if (bss > len)
1080                 vm_brk(len, bss - len);
1081         error = 0;
1082
1083 out_free_ph:
1084         kfree(elf_phdata);
1085 out:
1086         return error;
1087 }
1088
1089 #ifdef CONFIG_ELF_CORE
1090 /*
1091  * ELF core dumper
1092  *
1093  * Modelled on fs/exec.c:aout_core_dump()
1094  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1095  */
1096
1097 /*
1098  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1099  * that are useful for post-mortem analysis are included in every core dump.
1100  * In that way we ensure that the core dump is fully interpretable later
1101  * without matching up the same kernel and hardware config to see what PC values
1102  * meant. These special mappings include - vDSO, vsyscall, and other
1103  * architecture specific mappings
1104  */
1105 static bool always_dump_vma(struct vm_area_struct *vma)
1106 {
1107         /* Any vsyscall mappings? */
1108         if (vma == get_gate_vma(vma->vm_mm))
1109                 return true;
1110         /*
1111          * arch_vma_name() returns non-NULL for special architecture mappings,
1112          * such as vDSO sections.
1113          */
1114         if (arch_vma_name(vma))
1115                 return true;
1116
1117         return false;
1118 }
1119
1120 /*
1121  * Decide what to dump of a segment, part, all or none.
1122  */
1123 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1124                                    unsigned long mm_flags)
1125 {
1126 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1127
1128         /* always dump the vdso and vsyscall sections */
1129         if (always_dump_vma(vma))
1130                 goto whole;
1131
1132         if (vma->vm_flags & VM_DONTDUMP)
1133                 return 0;
1134
1135         /* Hugetlb memory check */
1136         if (vma->vm_flags & VM_HUGETLB) {
1137                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1138                         goto whole;
1139                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1140                         goto whole;
1141                 return 0;
1142         }
1143
1144         /* Do not dump I/O mapped devices or special mappings */
1145         if (vma->vm_flags & VM_IO)
1146                 return 0;
1147
1148         /* By default, dump shared memory if mapped from an anonymous file. */
1149         if (vma->vm_flags & VM_SHARED) {
1150                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1151                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1152                         goto whole;
1153                 return 0;
1154         }
1155
1156         /* Dump segments that have been written to.  */
1157         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1158                 goto whole;
1159         if (vma->vm_file == NULL)
1160                 return 0;
1161
1162         if (FILTER(MAPPED_PRIVATE))
1163                 goto whole;
1164
1165         /*
1166          * If this looks like the beginning of a DSO or executable mapping,
1167          * check for an ELF header.  If we find one, dump the first page to
1168          * aid in determining what was mapped here.
1169          */
1170         if (FILTER(ELF_HEADERS) &&
1171             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1172                 u32 __user *header = (u32 __user *) vma->vm_start;
1173                 u32 word;
1174                 mm_segment_t fs = get_fs();
1175                 /*
1176                  * Doing it this way gets the constant folded by GCC.
1177                  */
1178                 union {
1179                         u32 cmp;
1180                         char elfmag[SELFMAG];
1181                 } magic;
1182                 BUILD_BUG_ON(SELFMAG != sizeof word);
1183                 magic.elfmag[EI_MAG0] = ELFMAG0;
1184                 magic.elfmag[EI_MAG1] = ELFMAG1;
1185                 magic.elfmag[EI_MAG2] = ELFMAG2;
1186                 magic.elfmag[EI_MAG3] = ELFMAG3;
1187                 /*
1188                  * Switch to the user "segment" for get_user(),
1189                  * then put back what elf_core_dump() had in place.
1190                  */
1191                 set_fs(USER_DS);
1192                 if (unlikely(get_user(word, header)))
1193                         word = 0;
1194                 set_fs(fs);
1195                 if (word == magic.cmp)
1196                         return PAGE_SIZE;
1197         }
1198
1199 #undef  FILTER
1200
1201         return 0;
1202
1203 whole:
1204         return vma->vm_end - vma->vm_start;
1205 }
1206
1207 /* An ELF note in memory */
1208 struct memelfnote
1209 {
1210         const char *name;
1211         int type;
1212         unsigned int datasz;
1213         void *data;
1214 };
1215
1216 static int notesize(struct memelfnote *en)
1217 {
1218         int sz;
1219
1220         sz = sizeof(struct elf_note);
1221         sz += roundup(strlen(en->name) + 1, 4);
1222         sz += roundup(en->datasz, 4);
1223
1224         return sz;
1225 }
1226
1227 #define DUMP_WRITE(addr, nr, foffset)   \
1228         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1229
1230 static int alignfile(struct file *file, loff_t *foffset)
1231 {
1232         static const char buf[4] = { 0, };
1233         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1234         return 1;
1235 }
1236
1237 static int writenote(struct memelfnote *men, struct file *file,
1238                         loff_t *foffset)
1239 {
1240         struct elf_note en;
1241         en.n_namesz = strlen(men->name) + 1;
1242         en.n_descsz = men->datasz;
1243         en.n_type = men->type;
1244
1245         DUMP_WRITE(&en, sizeof(en), foffset);
1246         DUMP_WRITE(men->name, en.n_namesz, foffset);
1247         if (!alignfile(file, foffset))
1248                 return 0;
1249         DUMP_WRITE(men->data, men->datasz, foffset);
1250         if (!alignfile(file, foffset))
1251                 return 0;
1252
1253         return 1;
1254 }
1255 #undef DUMP_WRITE
1256
1257 static void fill_elf_header(struct elfhdr *elf, int segs,
1258                             u16 machine, u32 flags)
1259 {
1260         memset(elf, 0, sizeof(*elf));
1261
1262         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1263         elf->e_ident[EI_CLASS] = ELF_CLASS;
1264         elf->e_ident[EI_DATA] = ELF_DATA;
1265         elf->e_ident[EI_VERSION] = EV_CURRENT;
1266         elf->e_ident[EI_OSABI] = ELF_OSABI;
1267
1268         elf->e_type = ET_CORE;
1269         elf->e_machine = machine;
1270         elf->e_version = EV_CURRENT;
1271         elf->e_phoff = sizeof(struct elfhdr);
1272         elf->e_flags = flags;
1273         elf->e_ehsize = sizeof(struct elfhdr);
1274         elf->e_phentsize = sizeof(struct elf_phdr);
1275         elf->e_phnum = segs;
1276
1277         return;
1278 }
1279
1280 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1281 {
1282         phdr->p_type = PT_NOTE;
1283         phdr->p_offset = offset;
1284         phdr->p_vaddr = 0;
1285         phdr->p_paddr = 0;
1286         phdr->p_filesz = sz;
1287         phdr->p_memsz = 0;
1288         phdr->p_flags = 0;
1289         phdr->p_align = 0;
1290         return;
1291 }
1292
1293 static void fill_note(struct memelfnote *note, const char *name, int type, 
1294                 unsigned int sz, void *data)
1295 {
1296         note->name = name;
1297         note->type = type;
1298         note->datasz = sz;
1299         note->data = data;
1300         return;
1301 }
1302
1303 /*
1304  * fill up all the fields in prstatus from the given task struct, except
1305  * registers which need to be filled up separately.
1306  */
1307 static void fill_prstatus(struct elf_prstatus *prstatus,
1308                 struct task_struct *p, long signr)
1309 {
1310         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1311         prstatus->pr_sigpend = p->pending.signal.sig[0];
1312         prstatus->pr_sighold = p->blocked.sig[0];
1313         rcu_read_lock();
1314         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1315         rcu_read_unlock();
1316         prstatus->pr_pid = task_pid_vnr(p);
1317         prstatus->pr_pgrp = task_pgrp_vnr(p);
1318         prstatus->pr_sid = task_session_vnr(p);
1319         if (thread_group_leader(p)) {
1320                 struct task_cputime cputime;
1321
1322                 /*
1323                  * This is the record for the group leader.  It shows the
1324                  * group-wide total, not its individual thread total.
1325                  */
1326                 thread_group_cputime(p, &cputime);
1327                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1328                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1329         } else {
1330                 cputime_t utime, stime;
1331
1332                 task_cputime(p, &utime, &stime);
1333                 cputime_to_timeval(utime, &prstatus->pr_utime);
1334                 cputime_to_timeval(stime, &prstatus->pr_stime);
1335         }
1336         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1337         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1338 }
1339
1340 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1341                        struct mm_struct *mm)
1342 {
1343         const struct cred *cred;
1344         unsigned int i, len;
1345         
1346         /* first copy the parameters from user space */
1347         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1348
1349         len = mm->arg_end - mm->arg_start;
1350         if (len >= ELF_PRARGSZ)
1351                 len = ELF_PRARGSZ-1;
1352         if (copy_from_user(&psinfo->pr_psargs,
1353                            (const char __user *)mm->arg_start, len))
1354                 return -EFAULT;
1355         for(i = 0; i < len; i++)
1356                 if (psinfo->pr_psargs[i] == 0)
1357                         psinfo->pr_psargs[i] = ' ';
1358         psinfo->pr_psargs[len] = 0;
1359
1360         rcu_read_lock();
1361         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1362         rcu_read_unlock();
1363         psinfo->pr_pid = task_pid_vnr(p);
1364         psinfo->pr_pgrp = task_pgrp_vnr(p);
1365         psinfo->pr_sid = task_session_vnr(p);
1366
1367         i = p->state ? ffz(~p->state) + 1 : 0;
1368         psinfo->pr_state = i;
1369         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1370         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1371         psinfo->pr_nice = task_nice(p);
1372         psinfo->pr_flag = p->flags;
1373         rcu_read_lock();
1374         cred = __task_cred(p);
1375         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1376         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1377         rcu_read_unlock();
1378         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1379         
1380         return 0;
1381 }
1382
1383 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1384 {
1385         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1386         int i = 0;
1387         do
1388                 i += 2;
1389         while (auxv[i - 2] != AT_NULL);
1390         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1391 }
1392
1393 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1394                 siginfo_t *siginfo)
1395 {
1396         mm_segment_t old_fs = get_fs();
1397         set_fs(KERNEL_DS);
1398         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1399         set_fs(old_fs);
1400         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1401 }
1402
1403 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1404 /*
1405  * Format of NT_FILE note:
1406  *
1407  * long count     -- how many files are mapped
1408  * long page_size -- units for file_ofs
1409  * array of [COUNT] elements of
1410  *   long start
1411  *   long end
1412  *   long file_ofs
1413  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1414  */
1415 static void fill_files_note(struct memelfnote *note)
1416 {
1417         struct vm_area_struct *vma;
1418         unsigned count, size, names_ofs, remaining, n;
1419         user_long_t *data;
1420         user_long_t *start_end_ofs;
1421         char *name_base, *name_curpos;
1422
1423         /* *Estimated* file count and total data size needed */
1424         count = current->mm->map_count;
1425         size = count * 64;
1426
1427         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1428  alloc:
1429         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1430                 goto err;
1431         size = round_up(size, PAGE_SIZE);
1432         data = vmalloc(size);
1433         if (!data)
1434                 goto err;
1435
1436         start_end_ofs = data + 2;
1437         name_base = name_curpos = ((char *)data) + names_ofs;
1438         remaining = size - names_ofs;
1439         count = 0;
1440         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1441                 struct file *file;
1442                 const char *filename;
1443
1444                 file = vma->vm_file;
1445                 if (!file)
1446                         continue;
1447                 filename = d_path(&file->f_path, name_curpos, remaining);
1448                 if (IS_ERR(filename)) {
1449                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1450                                 vfree(data);
1451                                 size = size * 5 / 4;
1452                                 goto alloc;
1453                         }
1454                         continue;
1455                 }
1456
1457                 /* d_path() fills at the end, move name down */
1458                 /* n = strlen(filename) + 1: */
1459                 n = (name_curpos + remaining) - filename;
1460                 remaining = filename - name_curpos;
1461                 memmove(name_curpos, filename, n);
1462                 name_curpos += n;
1463
1464                 *start_end_ofs++ = vma->vm_start;
1465                 *start_end_ofs++ = vma->vm_end;
1466                 *start_end_ofs++ = vma->vm_pgoff;
1467                 count++;
1468         }
1469
1470         /* Now we know exact count of files, can store it */
1471         data[0] = count;
1472         data[1] = PAGE_SIZE;
1473         /*
1474          * Count usually is less than current->mm->map_count,
1475          * we need to move filenames down.
1476          */
1477         n = current->mm->map_count - count;
1478         if (n != 0) {
1479                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1480                 memmove(name_base - shift_bytes, name_base,
1481                         name_curpos - name_base);
1482                 name_curpos -= shift_bytes;
1483         }
1484
1485         size = name_curpos - (char *)data;
1486         fill_note(note, "CORE", NT_FILE, size, data);
1487  err: ;
1488 }
1489
1490 #ifdef CORE_DUMP_USE_REGSET
1491 #include <linux/regset.h>
1492
1493 struct elf_thread_core_info {
1494         struct elf_thread_core_info *next;
1495         struct task_struct *task;
1496         struct elf_prstatus prstatus;
1497         struct memelfnote notes[0];
1498 };
1499
1500 struct elf_note_info {
1501         struct elf_thread_core_info *thread;
1502         struct memelfnote psinfo;
1503         struct memelfnote signote;
1504         struct memelfnote auxv;
1505         struct memelfnote files;
1506         user_siginfo_t csigdata;
1507         size_t size;
1508         int thread_notes;
1509 };
1510
1511 /*
1512  * When a regset has a writeback hook, we call it on each thread before
1513  * dumping user memory.  On register window machines, this makes sure the
1514  * user memory backing the register data is up to date before we read it.
1515  */
1516 static void do_thread_regset_writeback(struct task_struct *task,
1517                                        const struct user_regset *regset)
1518 {
1519         if (regset->writeback)
1520                 regset->writeback(task, regset, 1);
1521 }
1522
1523 #ifndef PR_REG_SIZE
1524 #define PR_REG_SIZE(S) sizeof(S)
1525 #endif
1526
1527 #ifndef PRSTATUS_SIZE
1528 #define PRSTATUS_SIZE(S) sizeof(S)
1529 #endif
1530
1531 #ifndef PR_REG_PTR
1532 #define PR_REG_PTR(S) (&((S)->pr_reg))
1533 #endif
1534
1535 #ifndef SET_PR_FPVALID
1536 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1537 #endif
1538
1539 static int fill_thread_core_info(struct elf_thread_core_info *t,
1540                                  const struct user_regset_view *view,
1541                                  long signr, size_t *total)
1542 {
1543         unsigned int i;
1544
1545         /*
1546          * NT_PRSTATUS is the one special case, because the regset data
1547          * goes into the pr_reg field inside the note contents, rather
1548          * than being the whole note contents.  We fill the reset in here.
1549          * We assume that regset 0 is NT_PRSTATUS.
1550          */
1551         fill_prstatus(&t->prstatus, t->task, signr);
1552         (void) view->regsets[0].get(t->task, &view->regsets[0],
1553                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1554                                     PR_REG_PTR(&t->prstatus), NULL);
1555
1556         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1557                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1558         *total += notesize(&t->notes[0]);
1559
1560         do_thread_regset_writeback(t->task, &view->regsets[0]);
1561
1562         /*
1563          * Each other regset might generate a note too.  For each regset
1564          * that has no core_note_type or is inactive, we leave t->notes[i]
1565          * all zero and we'll know to skip writing it later.
1566          */
1567         for (i = 1; i < view->n; ++i) {
1568                 const struct user_regset *regset = &view->regsets[i];
1569                 do_thread_regset_writeback(t->task, regset);
1570                 if (regset->core_note_type && regset->get &&
1571                     (!regset->active || regset->active(t->task, regset))) {
1572                         int ret;
1573                         size_t size = regset->n * regset->size;
1574                         void *data = kmalloc(size, GFP_KERNEL);
1575                         if (unlikely(!data))
1576                                 return 0;
1577                         ret = regset->get(t->task, regset,
1578                                           0, size, data, NULL);
1579                         if (unlikely(ret))
1580                                 kfree(data);
1581                         else {
1582                                 if (regset->core_note_type != NT_PRFPREG)
1583                                         fill_note(&t->notes[i], "LINUX",
1584                                                   regset->core_note_type,
1585                                                   size, data);
1586                                 else {
1587                                         SET_PR_FPVALID(&t->prstatus, 1);
1588                                         fill_note(&t->notes[i], "CORE",
1589                                                   NT_PRFPREG, size, data);
1590                                 }
1591                                 *total += notesize(&t->notes[i]);
1592                         }
1593                 }
1594         }
1595
1596         return 1;
1597 }
1598
1599 static int fill_note_info(struct elfhdr *elf, int phdrs,
1600                           struct elf_note_info *info,
1601                           siginfo_t *siginfo, struct pt_regs *regs)
1602 {
1603         struct task_struct *dump_task = current;
1604         const struct user_regset_view *view = task_user_regset_view(dump_task);
1605         struct elf_thread_core_info *t;
1606         struct elf_prpsinfo *psinfo;
1607         struct core_thread *ct;
1608         unsigned int i;
1609
1610         info->size = 0;
1611         info->thread = NULL;
1612
1613         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1614         if (psinfo == NULL) {
1615                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1616                 return 0;
1617         }
1618
1619         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1620
1621         /*
1622          * Figure out how many notes we're going to need for each thread.
1623          */
1624         info->thread_notes = 0;
1625         for (i = 0; i < view->n; ++i)
1626                 if (view->regsets[i].core_note_type != 0)
1627                         ++info->thread_notes;
1628
1629         /*
1630          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1631          * since it is our one special case.
1632          */
1633         if (unlikely(info->thread_notes == 0) ||
1634             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1635                 WARN_ON(1);
1636                 return 0;
1637         }
1638
1639         /*
1640          * Initialize the ELF file header.
1641          */
1642         fill_elf_header(elf, phdrs,
1643                         view->e_machine, view->e_flags);
1644
1645         /*
1646          * Allocate a structure for each thread.
1647          */
1648         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1649                 t = kzalloc(offsetof(struct elf_thread_core_info,
1650                                      notes[info->thread_notes]),
1651                             GFP_KERNEL);
1652                 if (unlikely(!t))
1653                         return 0;
1654
1655                 t->task = ct->task;
1656                 if (ct->task == dump_task || !info->thread) {
1657                         t->next = info->thread;
1658                         info->thread = t;
1659                 } else {
1660                         /*
1661                          * Make sure to keep the original task at
1662                          * the head of the list.
1663                          */
1664                         t->next = info->thread->next;
1665                         info->thread->next = t;
1666                 }
1667         }
1668
1669         /*
1670          * Now fill in each thread's information.
1671          */
1672         for (t = info->thread; t != NULL; t = t->next)
1673                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1674                         return 0;
1675
1676         /*
1677          * Fill in the two process-wide notes.
1678          */
1679         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1680         info->size += notesize(&info->psinfo);
1681
1682         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1683         info->size += notesize(&info->signote);
1684
1685         fill_auxv_note(&info->auxv, current->mm);
1686         info->size += notesize(&info->auxv);
1687
1688         fill_files_note(&info->files);
1689         info->size += notesize(&info->files);
1690
1691         return 1;
1692 }
1693
1694 static size_t get_note_info_size(struct elf_note_info *info)
1695 {
1696         return info->size;
1697 }
1698
1699 /*
1700  * Write all the notes for each thread.  When writing the first thread, the
1701  * process-wide notes are interleaved after the first thread-specific note.
1702  */
1703 static int write_note_info(struct elf_note_info *info,
1704                            struct file *file, loff_t *foffset)
1705 {
1706         bool first = 1;
1707         struct elf_thread_core_info *t = info->thread;
1708
1709         do {
1710                 int i;
1711
1712                 if (!writenote(&t->notes[0], file, foffset))
1713                         return 0;
1714
1715                 if (first && !writenote(&info->psinfo, file, foffset))
1716                         return 0;
1717                 if (first && !writenote(&info->signote, file, foffset))
1718                         return 0;
1719                 if (first && !writenote(&info->auxv, file, foffset))
1720                         return 0;
1721                 if (first && !writenote(&info->files, file, foffset))
1722                         return 0;
1723
1724                 for (i = 1; i < info->thread_notes; ++i)
1725                         if (t->notes[i].data &&
1726                             !writenote(&t->notes[i], file, foffset))
1727                                 return 0;
1728
1729                 first = 0;
1730                 t = t->next;
1731         } while (t);
1732
1733         return 1;
1734 }
1735
1736 static void free_note_info(struct elf_note_info *info)
1737 {
1738         struct elf_thread_core_info *threads = info->thread;
1739         while (threads) {
1740                 unsigned int i;
1741                 struct elf_thread_core_info *t = threads;
1742                 threads = t->next;
1743                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1744                 for (i = 1; i < info->thread_notes; ++i)
1745                         kfree(t->notes[i].data);
1746                 kfree(t);
1747         }
1748         kfree(info->psinfo.data);
1749         vfree(info->files.data);
1750 }
1751
1752 #else
1753
1754 /* Here is the structure in which status of each thread is captured. */
1755 struct elf_thread_status
1756 {
1757         struct list_head list;
1758         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1759         elf_fpregset_t fpu;             /* NT_PRFPREG */
1760         struct task_struct *thread;
1761 #ifdef ELF_CORE_COPY_XFPREGS
1762         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1763 #endif
1764         struct memelfnote notes[3];
1765         int num_notes;
1766 };
1767
1768 /*
1769  * In order to add the specific thread information for the elf file format,
1770  * we need to keep a linked list of every threads pr_status and then create
1771  * a single section for them in the final core file.
1772  */
1773 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1774 {
1775         int sz = 0;
1776         struct task_struct *p = t->thread;
1777         t->num_notes = 0;
1778
1779         fill_prstatus(&t->prstatus, p, signr);
1780         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1781         
1782         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1783                   &(t->prstatus));
1784         t->num_notes++;
1785         sz += notesize(&t->notes[0]);
1786
1787         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1788                                                                 &t->fpu))) {
1789                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1790                           &(t->fpu));
1791                 t->num_notes++;
1792                 sz += notesize(&t->notes[1]);
1793         }
1794
1795 #ifdef ELF_CORE_COPY_XFPREGS
1796         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1797                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1798                           sizeof(t->xfpu), &t->xfpu);
1799                 t->num_notes++;
1800                 sz += notesize(&t->notes[2]);
1801         }
1802 #endif  
1803         return sz;
1804 }
1805
1806 struct elf_note_info {
1807         struct memelfnote *notes;
1808         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1809         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1810         struct list_head thread_list;
1811         elf_fpregset_t *fpu;
1812 #ifdef ELF_CORE_COPY_XFPREGS
1813         elf_fpxregset_t *xfpu;
1814 #endif
1815         user_siginfo_t csigdata;
1816         int thread_status_size;
1817         int numnote;
1818 };
1819
1820 static int elf_note_info_init(struct elf_note_info *info)
1821 {
1822         memset(info, 0, sizeof(*info));
1823         INIT_LIST_HEAD(&info->thread_list);
1824
1825         /* Allocate space for ELF notes */
1826         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1827         if (!info->notes)
1828                 return 0;
1829         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1830         if (!info->psinfo)
1831                 return 0;
1832         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1833         if (!info->prstatus)
1834                 return 0;
1835         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1836         if (!info->fpu)
1837                 return 0;
1838 #ifdef ELF_CORE_COPY_XFPREGS
1839         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1840         if (!info->xfpu)
1841                 return 0;
1842 #endif
1843         return 1;
1844 }
1845
1846 static int fill_note_info(struct elfhdr *elf, int phdrs,
1847                           struct elf_note_info *info,
1848                           siginfo_t *siginfo, struct pt_regs *regs)
1849 {
1850         struct list_head *t;
1851
1852         if (!elf_note_info_init(info))
1853                 return 0;
1854
1855         if (siginfo->si_signo) {
1856                 struct core_thread *ct;
1857                 struct elf_thread_status *ets;
1858
1859                 for (ct = current->mm->core_state->dumper.next;
1860                                                 ct; ct = ct->next) {
1861                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1862                         if (!ets)
1863                                 return 0;
1864
1865                         ets->thread = ct->task;
1866                         list_add(&ets->list, &info->thread_list);
1867                 }
1868
1869                 list_for_each(t, &info->thread_list) {
1870                         int sz;
1871
1872                         ets = list_entry(t, struct elf_thread_status, list);
1873                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1874                         info->thread_status_size += sz;
1875                 }
1876         }
1877         /* now collect the dump for the current */
1878         memset(info->prstatus, 0, sizeof(*info->prstatus));
1879         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1880         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1881
1882         /* Set up header */
1883         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1884
1885         /*
1886          * Set up the notes in similar form to SVR4 core dumps made
1887          * with info from their /proc.
1888          */
1889
1890         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1891                   sizeof(*info->prstatus), info->prstatus);
1892         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1893         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1894                   sizeof(*info->psinfo), info->psinfo);
1895
1896         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1897         fill_auxv_note(info->notes + 3, current->mm);
1898         fill_files_note(info->notes + 4);
1899
1900         info->numnote = 5;
1901
1902         /* Try to dump the FPU. */
1903         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1904                                                                info->fpu);
1905         if (info->prstatus->pr_fpvalid)
1906                 fill_note(info->notes + info->numnote++,
1907                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1908 #ifdef ELF_CORE_COPY_XFPREGS
1909         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1910                 fill_note(info->notes + info->numnote++,
1911                           "LINUX", ELF_CORE_XFPREG_TYPE,
1912                           sizeof(*info->xfpu), info->xfpu);
1913 #endif
1914
1915         return 1;
1916 }
1917
1918 static size_t get_note_info_size(struct elf_note_info *info)
1919 {
1920         int sz = 0;
1921         int i;
1922
1923         for (i = 0; i < info->numnote; i++)
1924                 sz += notesize(info->notes + i);
1925
1926         sz += info->thread_status_size;
1927
1928         return sz;
1929 }
1930
1931 static int write_note_info(struct elf_note_info *info,
1932                            struct file *file, loff_t *foffset)
1933 {
1934         int i;
1935         struct list_head *t;
1936
1937         for (i = 0; i < info->numnote; i++)
1938                 if (!writenote(info->notes + i, file, foffset))
1939                         return 0;
1940
1941         /* write out the thread status notes section */
1942         list_for_each(t, &info->thread_list) {
1943                 struct elf_thread_status *tmp =
1944                                 list_entry(t, struct elf_thread_status, list);
1945
1946                 for (i = 0; i < tmp->num_notes; i++)
1947                         if (!writenote(&tmp->notes[i], file, foffset))
1948                                 return 0;
1949         }
1950
1951         return 1;
1952 }
1953
1954 static void free_note_info(struct elf_note_info *info)
1955 {
1956         while (!list_empty(&info->thread_list)) {
1957                 struct list_head *tmp = info->thread_list.next;
1958                 list_del(tmp);
1959                 kfree(list_entry(tmp, struct elf_thread_status, list));
1960         }
1961
1962         /* Free data allocated by fill_files_note(): */
1963         vfree(info->notes[4].data);
1964
1965         kfree(info->prstatus);
1966         kfree(info->psinfo);
1967         kfree(info->notes);
1968         kfree(info->fpu);
1969 #ifdef ELF_CORE_COPY_XFPREGS
1970         kfree(info->xfpu);
1971 #endif
1972 }
1973
1974 #endif
1975
1976 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1977                                         struct vm_area_struct *gate_vma)
1978 {
1979         struct vm_area_struct *ret = tsk->mm->mmap;
1980
1981         if (ret)
1982                 return ret;
1983         return gate_vma;
1984 }
1985 /*
1986  * Helper function for iterating across a vma list.  It ensures that the caller
1987  * will visit `gate_vma' prior to terminating the search.
1988  */
1989 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1990                                         struct vm_area_struct *gate_vma)
1991 {
1992         struct vm_area_struct *ret;
1993
1994         ret = this_vma->vm_next;
1995         if (ret)
1996                 return ret;
1997         if (this_vma == gate_vma)
1998                 return NULL;
1999         return gate_vma;
2000 }
2001
2002 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2003                              elf_addr_t e_shoff, int segs)
2004 {
2005         elf->e_shoff = e_shoff;
2006         elf->e_shentsize = sizeof(*shdr4extnum);
2007         elf->e_shnum = 1;
2008         elf->e_shstrndx = SHN_UNDEF;
2009
2010         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2011
2012         shdr4extnum->sh_type = SHT_NULL;
2013         shdr4extnum->sh_size = elf->e_shnum;
2014         shdr4extnum->sh_link = elf->e_shstrndx;
2015         shdr4extnum->sh_info = segs;
2016 }
2017
2018 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2019                                      unsigned long mm_flags)
2020 {
2021         struct vm_area_struct *vma;
2022         size_t size = 0;
2023
2024         for (vma = first_vma(current, gate_vma); vma != NULL;
2025              vma = next_vma(vma, gate_vma))
2026                 size += vma_dump_size(vma, mm_flags);
2027         return size;
2028 }
2029
2030 /*
2031  * Actual dumper
2032  *
2033  * This is a two-pass process; first we find the offsets of the bits,
2034  * and then they are actually written out.  If we run out of core limit
2035  * we just truncate.
2036  */
2037 static int elf_core_dump(struct coredump_params *cprm)
2038 {
2039         int has_dumped = 0;
2040         mm_segment_t fs;
2041         int segs;
2042         size_t size = 0;
2043         struct vm_area_struct *vma, *gate_vma;
2044         struct elfhdr *elf = NULL;
2045         loff_t offset = 0, dataoff, foffset;
2046         struct elf_note_info info;
2047         struct elf_phdr *phdr4note = NULL;
2048         struct elf_shdr *shdr4extnum = NULL;
2049         Elf_Half e_phnum;
2050         elf_addr_t e_shoff;
2051
2052         /*
2053          * We no longer stop all VM operations.
2054          * 
2055          * This is because those proceses that could possibly change map_count
2056          * or the mmap / vma pages are now blocked in do_exit on current
2057          * finishing this core dump.
2058          *
2059          * Only ptrace can touch these memory addresses, but it doesn't change
2060          * the map_count or the pages allocated. So no possibility of crashing
2061          * exists while dumping the mm->vm_next areas to the core file.
2062          */
2063   
2064         /* alloc memory for large data structures: too large to be on stack */
2065         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2066         if (!elf)
2067                 goto out;
2068         /*
2069          * The number of segs are recored into ELF header as 16bit value.
2070          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2071          */
2072         segs = current->mm->map_count;
2073         segs += elf_core_extra_phdrs();
2074
2075         gate_vma = get_gate_vma(current->mm);
2076         if (gate_vma != NULL)
2077                 segs++;
2078
2079         /* for notes section */
2080         segs++;
2081
2082         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2083          * this, kernel supports extended numbering. Have a look at
2084          * include/linux/elf.h for further information. */
2085         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2086
2087         /*
2088          * Collect all the non-memory information about the process for the
2089          * notes.  This also sets up the file header.
2090          */
2091         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2092                 goto cleanup;
2093
2094         has_dumped = 1;
2095
2096         fs = get_fs();
2097         set_fs(KERNEL_DS);
2098
2099         offset += sizeof(*elf);                         /* Elf header */
2100         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2101         foffset = offset;
2102
2103         /* Write notes phdr entry */
2104         {
2105                 size_t sz = get_note_info_size(&info);
2106
2107                 sz += elf_coredump_extra_notes_size();
2108
2109                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2110                 if (!phdr4note)
2111                         goto end_coredump;
2112
2113                 fill_elf_note_phdr(phdr4note, sz, offset);
2114                 offset += sz;
2115         }
2116
2117         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2118
2119         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2120         offset += elf_core_extra_data_size();
2121         e_shoff = offset;
2122
2123         if (e_phnum == PN_XNUM) {
2124                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2125                 if (!shdr4extnum)
2126                         goto end_coredump;
2127                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2128         }
2129
2130         offset = dataoff;
2131
2132         size += sizeof(*elf);
2133         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2134                 goto end_coredump;
2135
2136         size += sizeof(*phdr4note);
2137         if (size > cprm->limit
2138             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2139                 goto end_coredump;
2140
2141         /* Write program headers for segments dump */
2142         for (vma = first_vma(current, gate_vma); vma != NULL;
2143                         vma = next_vma(vma, gate_vma)) {
2144                 struct elf_phdr phdr;
2145
2146                 phdr.p_type = PT_LOAD;
2147                 phdr.p_offset = offset;
2148                 phdr.p_vaddr = vma->vm_start;
2149                 phdr.p_paddr = 0;
2150                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2151                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2152                 offset += phdr.p_filesz;
2153                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2154                 if (vma->vm_flags & VM_WRITE)
2155                         phdr.p_flags |= PF_W;
2156                 if (vma->vm_flags & VM_EXEC)
2157                         phdr.p_flags |= PF_X;
2158                 phdr.p_align = ELF_EXEC_PAGESIZE;
2159
2160                 size += sizeof(phdr);
2161                 if (size > cprm->limit
2162                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2163                         goto end_coredump;
2164         }
2165
2166         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2167                 goto end_coredump;
2168
2169         /* write out the notes section */
2170         if (!write_note_info(&info, cprm->file, &foffset))
2171                 goto end_coredump;
2172
2173         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2174                 goto end_coredump;
2175
2176         /* Align to page */
2177         if (!dump_seek(cprm->file, dataoff - foffset))
2178                 goto end_coredump;
2179
2180         for (vma = first_vma(current, gate_vma); vma != NULL;
2181                         vma = next_vma(vma, gate_vma)) {
2182                 unsigned long addr;
2183                 unsigned long end;
2184
2185                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2186
2187                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2188                         struct page *page;
2189                         int stop;
2190
2191                         page = get_dump_page(addr);
2192                         if (page) {
2193                                 void *kaddr = kmap(page);
2194                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2195                                         !dump_write(cprm->file, kaddr,
2196                                                     PAGE_SIZE);
2197                                 kunmap(page);
2198                                 page_cache_release(page);
2199                         } else
2200                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2201                         if (stop)
2202                                 goto end_coredump;
2203                 }
2204         }
2205
2206         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2207                 goto end_coredump;
2208
2209         if (e_phnum == PN_XNUM) {
2210                 size += sizeof(*shdr4extnum);
2211                 if (size > cprm->limit
2212                     || !dump_write(cprm->file, shdr4extnum,
2213                                    sizeof(*shdr4extnum)))
2214                         goto end_coredump;
2215         }
2216
2217 end_coredump:
2218         set_fs(fs);
2219
2220 cleanup:
2221         free_note_info(&info);
2222         kfree(shdr4extnum);
2223         kfree(phdr4note);
2224         kfree(elf);
2225 out:
2226         return has_dumped;
2227 }
2228
2229 #endif          /* CONFIG_ELF_CORE */
2230
2231 static int __init init_elf_binfmt(void)
2232 {
2233         register_binfmt(&elf_format);
2234         return 0;
2235 }
2236
2237 static void __exit exit_elf_binfmt(void)
2238 {
2239         /* Remove the COFF and ELF loaders. */
2240         unregister_binfmt(&elf_format);
2241 }
2242
2243 core_initcall(init_elf_binfmt);
2244 module_exit(exit_elf_binfmt);
2245 MODULE_LICENSE("GPL");