/* * linux/fs/exec.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * #!-checking implemented by tytso. */ /* * Demand-loading implemented 01.12.91 - no need to read anything but * the header into memory. The inode of the executable is put into * "current->executable", and page faults do the actual loading. Clean. * * Once more I can proudly say that linux stood up to being changed: it * was less than 2 hours work to get demand-loading completely implemented. * * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary * formats. */ #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/a.out.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/ptrace.h> #include <linux/user.h> #include <linux/malloc.h> #include <linux/binfmts.h> #include <linux/personality.h> #include <asm/system.h> #include <asm/segment.h> #include <asm/pgtable.h> #include <linux/config.h> #ifdef CONFIG_KERNELD #include <linux/kerneld.h> #endif asmlinkage int sys_exit(int exit_code); asmlinkage int sys_brk(unsigned long); /* * Here are the actual binaries that will be accepted: * add more with "register_binfmt()" if using modules... * * These are defined again for the 'real' modules if you are using a * module definition for these routines. */ static struct linux_binfmt *formats = (struct linux_binfmt *) NULL; void binfmt_setup(void) { #ifdef CONFIG_BINFMT_ELF init_elf_binfmt(); #endif #ifdef CONFIG_BINFMT_AOUT init_aout_binfmt(); #endif #ifdef CONFIG_BINFMT_JAVA init_java_binfmt(); #endif /* This cannot be configured out of the kernel */ init_script_binfmt(); } int register_binfmt(struct linux_binfmt * fmt) { struct linux_binfmt ** tmp = &formats; if (!fmt) return -EINVAL; if (fmt->next) return -EBUSY; while (*tmp) { if (fmt == *tmp) return -EBUSY; tmp = &(*tmp)->next; } fmt->next = formats; formats = fmt; return 0; } #ifdef CONFIG_MODULES int unregister_binfmt(struct linux_binfmt * fmt) { struct linux_binfmt ** tmp = &formats; while (*tmp) { if (fmt == *tmp) { *tmp = fmt->next; return 0; } tmp = &(*tmp)->next; } return -EINVAL; } #endif /* CONFIG_MODULES */ int open_inode(struct inode * inode, int mode) { int fd; if (!inode->i_op || !inode->i_op->default_file_ops) return -EINVAL; fd = get_unused_fd(); if (fd >= 0) { struct file * f = get_empty_filp(); if (!f) { put_unused_fd(fd); return -ENFILE; } f->f_flags = mode; f->f_mode = (mode+1) & O_ACCMODE; f->f_inode = inode; f->f_pos = 0; f->f_reada = 0; f->f_op = inode->i_op->default_file_ops; if (f->f_op->open) { int error = f->f_op->open(inode,f); if (error) { f->f_count--; put_unused_fd(fd); return error; } } current->files->fd[fd] = f; inode->i_count++; } return fd; } /* * Note that a shared library must be both readable and executable due to * security reasons. * * Also note that we take the address to load from from the file itself. */ asmlinkage int sys_uselib(const char * library) { int fd, retval; struct file * file; struct linux_binfmt * fmt; fd = sys_open(library, 0, 0); if (fd < 0) return fd; file = current->files->fd[fd]; retval = -ENOEXEC; if (file && file->f_inode && file->f_op && file->f_op->read) { for (fmt = formats ; fmt ; fmt = fmt->next) { int (*fn)(int) = fmt->load_shlib; if (!fn) continue; retval = fn(fd); if (retval != -ENOEXEC) break; } } sys_close(fd); return retval; } /* * count() counts the number of arguments/envelopes * * We also do some limited EFAULT checking: this isn't complete, but * it does cover most cases. I'll have to do this correctly some day.. */ static int count(char ** argv) { int error, i = 0; char ** tmp, *p; if ((tmp = argv) != NULL) { error = verify_area(VERIFY_READ, tmp, sizeof(char *)); if (error) return error; while ((p = get_user(tmp++)) != NULL) { i++; error = verify_area(VERIFY_READ, p, 1); if (error) return error; } } return i; } /* * 'copy_string()' copies argument/envelope strings from user * memory to free pages in kernel mem. These are in a format ready * to be put directly into the top of new user memory. * * Modified by TYT, 11/24/91 to add the from_kmem argument, which specifies * whether the string and the string array are from user or kernel segments: * * from_kmem argv * argv ** * 0 user space user space * 1 kernel space user space * 2 kernel space kernel space * * We do this by playing games with the fs segment register. Since it * is expensive to load a segment register, we try to avoid calling * set_fs() unless we absolutely have to. */ unsigned long copy_strings(int argc,char ** argv,unsigned long *page, unsigned long p, int from_kmem) { char *tmp, *tmp1, *pag = NULL; int len, offset = 0; unsigned long old_fs, new_fs; if (!p) return 0; /* bullet-proofing */ new_fs = get_ds(); old_fs = get_fs(); if (from_kmem==2) set_fs(new_fs); while (argc-- > 0) { if (from_kmem == 1) set_fs(new_fs); if (!(tmp1 = tmp = get_user(argv+argc))) panic("VFS: argc is wrong"); if (from_kmem == 1) set_fs(old_fs); while (get_user(tmp++)); len = tmp - tmp1; if (p < len) { /* this shouldn't happen - 128kB */ set_fs(old_fs); return 0; } while (len) { --p; --tmp; --len; if (--offset < 0) { offset = p % PAGE_SIZE; if (from_kmem==2) set_fs(old_fs); if (!(pag = (char *) page[p/PAGE_SIZE]) && !(pag = (char *) page[p/PAGE_SIZE] = (unsigned long *) get_free_page(GFP_USER))) return 0; if (from_kmem==2) set_fs(new_fs); } if (len == 0 || offset == 0) *(pag + offset) = get_user(tmp); else { int bytes_to_copy = (len > offset) ? offset : len; tmp -= bytes_to_copy; p -= bytes_to_copy; offset -= bytes_to_copy; len -= bytes_to_copy; memcpy_fromfs(pag + offset, tmp, bytes_to_copy + 1); } } } if (from_kmem==2) set_fs(old_fs); return p; } unsigned long setup_arg_pages(unsigned long p, struct linux_binprm * bprm) { unsigned long stack_base; struct vm_area_struct *mpnt; int i; stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; p += stack_base; if (bprm->loader) bprm->loader += stack_base; bprm->exec += stack_base; mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL); if (mpnt) { mpnt->vm_mm = current->mm; mpnt->vm_start = PAGE_MASK & (unsigned long) p; mpnt->vm_end = STACK_TOP; mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; mpnt->vm_offset = 0; mpnt->vm_inode = NULL; mpnt->vm_pte = 0; insert_vm_struct(current->mm, mpnt); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { if (bprm->page[i]) { current->mm->rss++; put_dirty_page(current,bprm->page[i],stack_base); } stack_base += PAGE_SIZE; } return p; } /* * Read in the complete executable. This is used for "-N" files * that aren't on a block boundary, and for files on filesystems * without bmap support. */ int read_exec(struct inode *inode, unsigned long offset, char * addr, unsigned long count, int to_kmem) { struct file file; int result = -ENOEXEC; if (!inode->i_op || !inode->i_op->default_file_ops) goto end_readexec; file.f_mode = 1; file.f_flags = 0; file.f_count = 1; file.f_inode = inode; file.f_pos = 0; file.f_reada = 0; file.f_op = inode->i_op->default_file_ops; if (file.f_op->open) if (file.f_op->open(inode,&file)) goto end_readexec; if (!file.f_op || !file.f_op->read) goto close_readexec; if (file.f_op->lseek) { if (file.f_op->lseek(inode,&file,offset,0) != offset) goto close_readexec; } else file.f_pos = offset; if (to_kmem) { unsigned long old_fs = get_fs(); set_fs(get_ds()); result = file.f_op->read(inode, &file, addr, count); set_fs(old_fs); } else { result = verify_area(VERIFY_WRITE, addr, count); if (result) goto close_readexec; result = file.f_op->read(inode, &file, addr, count); } close_readexec: if (file.f_op->release) file.f_op->release(inode,&file); end_readexec: return result; } static void exec_mmap(void) { /* * The clear_page_tables done later on exec does the right thing * to the page directory when shared, except for graceful abort * (the oom is wrong there, too, IMHO) */ if (current->mm->count > 1) { struct mm_struct *mm = kmalloc(sizeof(*mm), GFP_KERNEL); if (!mm) { /* this is wrong, I think. */ oom(current); return; } *mm = *current->mm; mm->def_flags = 0; /* should future lockings be kept? */ mm->count = 1; mm->mmap = NULL; mm->mmap_avl = NULL; mm->total_vm = 0; mm->rss = 0; current->mm->count--; current->mm = mm; new_page_tables(current); return; } exit_mmap(current->mm); clear_page_tables(current); } /* * These functions flushes out all traces of the currently running executable * so that a new one can be started */ static inline void flush_old_signals(struct signal_struct *sig) { int i; struct sigaction * sa = sig->action; for (i=32 ; i != 0 ; i--) { sa->sa_mask = 0; sa->sa_flags = 0; if (sa->sa_handler != SIG_IGN) sa->sa_handler = NULL; sa++; } } static inline void flush_old_files(struct files_struct * files) { unsigned long j; j = 0; for (;;) { unsigned long set, i; i = j * __NFDBITS; if (i >= NR_OPEN) break; set = files->close_on_exec.fds_bits[j]; files->close_on_exec.fds_bits[j] = 0; j++; for ( ; set ; i++,set >>= 1) { if (set & 1) sys_close(i); } } } void flush_old_exec(struct linux_binprm * bprm) { int i; int ch; char * name; if (current->euid == current->uid && current->egid == current->gid) current->dumpable = 1; name = bprm->filename; for (i=0; (ch = *(name++)) != '\0';) { if (ch == '/') i = 0; else if (i < 15) current->comm[i++] = ch; } current->comm[i] = '\0'; /* Release all of the old mmap stuff. */ exec_mmap(); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || permission(bprm->inode,MAY_READ)) current->dumpable = 0; flush_old_signals(current->sig); flush_old_files(current->files); } /* * Fill the binprm structure from the inode. * Check permissions, then read the first 512 bytes */ int prepare_binprm(struct linux_binprm *bprm) { int mode; int retval,id_change; mode = bprm->inode->i_mode; if (!S_ISREG(mode)) /* must be regular file */ return -EACCES; if (!(mode & 0111)) /* with at least _one_ execute bit set */ return -EACCES; if (IS_NOEXEC(bprm->inode)) /* FS mustn't be mounted noexec */ return -EACCES; if (!bprm->inode->i_sb) return -EACCES; if ((retval = permission(bprm->inode, MAY_EXEC)) != 0) return retval; /* better not execute files which are being written to */ if (bprm->inode->i_writecount > 0) return -ETXTBSY; bprm->e_uid = current->euid; bprm->e_gid = current->egid; id_change = 0; /* Set-uid? */ if (mode & S_ISUID) { bprm->e_uid = bprm->inode->i_uid; if (bprm->e_uid != current->euid) id_change = 1; } /* Set-gid? */ /* * If setgid is set but no group execute bit then this * is a candidate for mandatory locking, not a setgid * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->e_gid = bprm->inode->i_gid; if (!in_group_p(bprm->e_gid)) id_change = 1; } if (id_change) { /* We can't suid-execute if we're sharing parts of the executable */ /* or if we're being traced (or if suid execs are not allowed) */ /* (current->mm->count > 1 is ok, as we'll get a new mm anyway) */ if (IS_NOSUID(bprm->inode) || (current->flags & PF_PTRACED) || (current->fs->count > 1) || (current->sig->count > 1) || (current->files->count > 1)) { if (!suser()) return -EPERM; } } memset(bprm->buf,0,sizeof(bprm->buf)); return read_exec(bprm->inode,0,bprm->buf,128,1); } void remove_arg_zero(struct linux_binprm *bprm) { if (bprm->argc) { unsigned long offset; char * page; offset = bprm->p % PAGE_SIZE; page = (char*)bprm->page[bprm->p/PAGE_SIZE]; while(bprm->p++,*(page+offset++)) if(offset==PAGE_SIZE){ offset=0; page = (char*)bprm->page[bprm->p/PAGE_SIZE]; } bprm->argc--; } } /* * cycle the list of binary formats handler, until one recognizes the image */ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int try,retval=0; struct linux_binfmt *fmt; #ifdef __alpha__ /* handle /sbin/loader.. */ { struct exec * eh = (struct exec *) bprm->buf; if (!bprm->loader && eh->fh.f_magic == 0x183 && (eh->fh.f_flags & 0x3000) == 0x3000) { char * dynloader[] = { "/sbin/loader" }; iput(bprm->inode); bprm->dont_iput = 1; remove_arg_zero(bprm); bprm->p = copy_strings(1, dynloader, bprm->page, bprm->p, 2); bprm->argc++; bprm->loader = bprm->p; retval = open_namei(dynloader[0], 0, 0, &bprm->inode, NULL); if (retval) return retval; bprm->dont_iput = 0; retval = prepare_binprm(bprm); if (retval<0) return retval; /* should call search_binary_handler recursively here, but it does not matter */ } } #endif for (try=0; try<2; try++) { for (fmt = formats ; fmt ; fmt = fmt->next) { int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary; if (!fn) continue; retval = fn(bprm, regs); if (retval >= 0) { if(!bprm->dont_iput) iput(bprm->inode); bprm->dont_iput=1; current->did_exec = 1; return retval; } if (retval != -ENOEXEC) break; if (bprm->dont_iput) /* We don't have the inode anymore*/ return retval; } if (retval != -ENOEXEC) { break; #ifdef CONFIG_KERNELD }else{ #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) char modname[20]; if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && printable(bprm->buf[2]) && printable(bprm->buf[3])) break; /* -ENOEXEC */ sprintf(modname, "binfmt-%hd", *(short*)(&bprm->buf)); request_module(modname); #endif } } return retval; } /* * sys_execve() executes a new program. */ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs) { struct linux_binprm bprm; int retval; int i; bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */ bprm.page[i] = 0; retval = open_namei(filename, 0, 0, &bprm.inode, NULL); if (retval) return retval; bprm.filename = filename; bprm.sh_bang = 0; bprm.loader = 0; bprm.exec = 0; bprm.dont_iput = 0; if ((bprm.argc = count(argv)) < 0) return bprm.argc; if ((bprm.envc = count(envp)) < 0) return bprm.envc; retval = prepare_binprm(&bprm); if(retval>=0) { bprm.p = copy_strings(1, &bprm.filename, bprm.page, bprm.p, 2); bprm.exec = bprm.p; bprm.p = copy_strings(bprm.envc,envp,bprm.page,bprm.p,0); bprm.p = copy_strings(bprm.argc,argv,bprm.page,bprm.p,0); if (!bprm.p) retval = -E2BIG; } if(retval>=0) retval = search_binary_handler(&bprm,regs); if(retval>=0) /* execve success */ return retval; /* Something went wrong, return the inode and free the argument pages*/ if(!bprm.dont_iput) iput(bprm.inode); for (i=0 ; i<MAX_ARG_PAGES ; i++) free_page(bprm.page[i]); return(retval); }