/* * linux/fs/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/stat.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <asm/system.h> #define NR_IHASH 512 /* * Be VERY careful when you access the inode hash table. There * are some rather scary race conditions you need to take care of: * - P1 tries to open file "xx", calls "iget()" with the proper * inode number, but blocks because it's not on the list. * - P2 deletes file "xx", gets the inode (which P1 has just read, * but P1 hasn't woken up to the fact yet) * - P2 iput()'s the inode, which now has i_nlink = 0 * - P1 wakes up and has the inode, but now P2 has made that * inode invalid (but P1 has no way of knowing that). * * The "updating" counter makes sure that when P1 blocks on the * iget(), P2 can't delete the inode from under it because P2 * will wait until P1 has been able to update the inode usage * count so that the inode will stay in use until everybody has * closed it.. */ static struct inode_hash_entry { struct inode * inode; int updating; } hash_table[NR_IHASH]; static struct inode * first_inode; static struct wait_queue * inode_wait = NULL; /* Keep these next two contiguous in memory for sysctl.c */ int nr_inodes = 0, nr_free_inodes = 0; int max_inodes = NR_INODE; static inline int const hashfn(kdev_t dev, unsigned int i) { return (HASHDEV(dev) ^ i) % NR_IHASH; } static inline struct inode_hash_entry * const hash(kdev_t dev, int i) { return hash_table + hashfn(dev, i); } static inline void insert_inode_free(struct inode *inode) { struct inode * prev, * next = first_inode; first_inode = inode; prev = next->i_prev; inode->i_next = next; inode->i_prev = prev; prev->i_next = inode; next->i_prev = inode; } static inline void remove_inode_free(struct inode *inode) { if (first_inode == inode) first_inode = first_inode->i_next; if (inode->i_next) inode->i_next->i_prev = inode->i_prev; if (inode->i_prev) inode->i_prev->i_next = inode->i_next; inode->i_next = inode->i_prev = NULL; } void insert_inode_hash(struct inode *inode) { struct inode_hash_entry *h; h = hash(inode->i_dev, inode->i_ino); inode->i_hash_next = h->inode; inode->i_hash_prev = NULL; if (inode->i_hash_next) inode->i_hash_next->i_hash_prev = inode; h->inode = inode; } static inline void remove_inode_hash(struct inode *inode) { struct inode_hash_entry *h; h = hash(inode->i_dev, inode->i_ino); if (h->inode == inode) h->inode = inode->i_hash_next; if (inode->i_hash_next) inode->i_hash_next->i_hash_prev = inode->i_hash_prev; if (inode->i_hash_prev) inode->i_hash_prev->i_hash_next = inode->i_hash_next; inode->i_hash_prev = inode->i_hash_next = NULL; } static inline void put_last_free(struct inode *inode) { remove_inode_free(inode); inode->i_prev = first_inode->i_prev; inode->i_prev->i_next = inode; inode->i_next = first_inode; inode->i_next->i_prev = inode; } int grow_inodes(void) { struct inode * inode; int i; if (!(inode = (struct inode*) get_free_page(GFP_KERNEL))) return -ENOMEM; i=PAGE_SIZE / sizeof(struct inode); nr_inodes += i; nr_free_inodes += i; if (!first_inode) inode->i_next = inode->i_prev = first_inode = inode++, i--; for ( ; i ; i-- ) insert_inode_free(inode++); return 0; } unsigned long inode_init(unsigned long start, unsigned long end) { memset(hash_table, 0, sizeof(hash_table)); first_inode = NULL; return start; } static void __wait_on_inode(struct inode *); static inline void wait_on_inode(struct inode * inode) { if (inode->i_lock) __wait_on_inode(inode); } static inline void lock_inode(struct inode * inode) { wait_on_inode(inode); inode->i_lock = 1; } static inline void unlock_inode(struct inode * inode) { inode->i_lock = 0; wake_up(&inode->i_wait); } /* * Note that we don't want to disturb any wait-queues when we discard * an inode. * * Argghh. Got bitten by a gcc problem with inlining: no way to tell * the compiler that the inline asm function 'memset' changes 'inode'. * I've been searching for the bug for days, and was getting desperate. * Finally looked at the assembler output... Grrr. * * The solution is the weird use of 'volatile'. Ho humm. Have to report * it to the gcc lists, and hope we can do this more cleanly some day.. */
void clear_inode(struct inode * inode) { struct wait_queue * wait; /* * We can clear inodes either when a last deref to the inode * causes it to be deleted (reference count==1), or when we want to * reuse it (reference count==0). Any other count is an error. */ if (inode->i_count > 1) panic ("clear_inode: Inode still has references"); /* * We are about to zap this inode. This operation may block, * and it's imperative that we don't allow another process to * grab it before it is completely pulled down. The i_count * will prevent reuse of the inode by get_empty_inode(), but the * i_condemned flag will also prevent __iget() from finding the * inode until it is completely dead. */ inode->i_condemned = 1; inode->i_count++; truncate_inode_pages(inode, 0); wait_on_inode(inode); if (IS_WRITABLE(inode)) { if (inode->i_sb && inode->i_sb->dq_op) inode->i_sb->dq_op->drop(inode); } remove_inode_hash(inode); remove_inode_free(inode); wait = ((volatile struct inode *) inode)->i_wait; if (--inode->i_count) nr_free_inodes++; memset(inode,0,sizeof(*inode)); ((volatile struct inode *) inode)->i_wait = wait; insert_inode_free(inode); /* * The inode is now reusable again, and the condemned flag is * clear. Wake up anybody who is waiting on the condemned flag. */ wake_up(&inode->i_wait); } int fs_may_mount(kdev_t dev) { struct inode * inode, * next; int i; next = first_inode; for (i = nr_inodes ; i > 0 ; i--) { inode = next; next = inode->i_next; /* clear_inode() changes the queues.. */ if (inode->i_dev != dev) continue; if (inode->i_count || inode->i_dirt || inode->i_lock) return 0; clear_inode(inode); } return 1; } int fs_may_umount(kdev_t dev, struct inode * mount_root) { struct inode * inode; int i; inode = first_inode; for (i=0 ; i < nr_inodes ; i++, inode = inode->i_next) { if (inode->i_dev != dev || !inode->i_count) continue; if (inode == mount_root && inode->i_count == (inode->i_mount != inode ? 1 : 2)) continue; return 0; } return 1; } int fs_may_remount_ro(kdev_t dev) { struct file * file; int i; /* Check that no files are currently opened for writing. */ for (file = first_file, i=0; i<nr_files; i++, file=file->f_next) { if (!file->f_count || !file->f_inode || file->f_inode->i_dev != dev) continue; if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2)) return 0; } return 1; } static void write_inode(struct inode * inode) { if (!inode->i_dirt) return; wait_on_inode(inode); if (!inode->i_dirt) return; if (!inode->i_sb || !inode->i_sb->s_op || !inode->i_sb->s_op->write_inode) { inode->i_dirt = 0; return; } inode->i_lock = 1; inode->i_sb->s_op->write_inode(inode); unlock_inode(inode); } static inline void read_inode(struct inode * inode) { lock_inode(inode); if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode) inode->i_sb->s_op->read_inode(inode); unlock_inode(inode); } /* POSIX UID/GID verification for setting inode attributes */ int inode_change_ok(struct inode *inode, struct iattr *attr) { /* * If force is set do it anyway. */ if (attr->ia_valid & ATTR_FORCE) return 0; /* Make sure a caller can chown */ if ((attr->ia_valid & ATTR_UID) && (current->fsuid != inode->i_uid || attr->ia_uid != inode->i_uid) && !fsuser()) return -EPERM; /* Make sure caller can chgrp */ if ((attr->ia_valid & ATTR_GID) && (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) && !fsuser()) return -EPERM; /* Make sure a caller can chmod */ if (attr->ia_valid & ATTR_MODE) { if ((current->fsuid != inode->i_uid) && !fsuser()) return -EPERM; /* Also check the setgid bit! */ if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid : inode->i_gid)) attr->ia_mode &= ~S_ISGID; } /* Check for setting the inode time */ if ((attr->ia_valid & ATTR_ATIME_SET) && ((current->fsuid != inode->i_uid) && !fsuser())) return -EPERM; if ((attr->ia_valid & ATTR_MTIME_SET) && ((current->fsuid != inode->i_uid) && !fsuser())) return -EPERM; return 0; } /* * Set the appropriate attributes from an attribute structure into * the inode structure. */ void inode_setattr(struct inode *inode, struct iattr *attr) { if (attr->ia_valid & ATTR_UID) inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; if (attr->ia_valid & ATTR_SIZE) inode->i_size = attr->ia_size; if (attr->ia_valid & ATTR_ATIME) inode->i_atime = attr->ia_atime; if (attr->ia_valid & ATTR_MTIME) inode->i_mtime = attr->ia_mtime; if (attr->ia_valid & ATTR_CTIME) inode->i_ctime = attr->ia_ctime; if (attr->ia_valid & ATTR_MODE) { inode->i_mode = attr->ia_mode; if (!fsuser() && !in_group_p(inode->i_gid)) inode->i_mode &= ~S_ISGID; } inode->i_dirt = 1; } /* * notify_change is called for inode-changing operations such as * chown, chmod, utime, and truncate. It is guaranteed (unlike * write_inode) to be called from the context of the user requesting * the change. */ int notify_change(struct inode * inode, struct iattr *attr) { int retval; attr->ia_ctime = CURRENT_TIME; if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { if (!(attr->ia_valid & ATTR_ATIME_SET)) attr->ia_atime = attr->ia_ctime; if (!(attr->ia_valid & ATTR_MTIME_SET)) attr->ia_mtime = attr->ia_ctime; } if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->notify_change) return inode->i_sb->s_op->notify_change(inode, attr); if ((retval = inode_change_ok(inode, attr)) != 0) return retval; inode_setattr(inode, attr); return 0; } /* * bmap is needed for demand-loading and paging: if this function * doesn't exist for a filesystem, then those things are impossible: * executables cannot be run from the filesystem etc... * * This isn't as bad as it sounds: the read-routines might still work, * so the filesystem would be otherwise ok (for example, you might have * a DOS filesystem, which doesn't lend itself to bmap very well, but * you could still transfer files to/from the filesystem) */ int bmap(struct inode * inode, int block) { if (inode->i_op && inode->i_op->bmap) return inode->i_op->bmap(inode,block); return 0; } void invalidate_inodes(kdev_t dev) { struct inode * inode, * next; int i; next = first_inode; for(i = nr_inodes ; i > 0 ; i--) { inode = next; next = inode->i_next; /* clear_inode() changes the queues.. */ if (inode->i_dev != dev) continue; if (inode->i_count || inode->i_dirt || inode->i_lock) { printk("VFS: inode busy on removed device %s\n", kdevname(dev)); continue; } clear_inode(inode); } } void sync_inodes(kdev_t dev) { int i; struct inode * inode; inode = first_inode; for(i = 0; i < nr_inodes*2; i++, inode = inode->i_next) { if (dev && inode->i_dev != dev) continue; wait_on_inode(inode); if (inode->i_dirt) write_inode(inode); } } void iput(struct inode * inode) { if (!inode) return; wait_on_inode(inode); if (!inode->i_count) { printk("VFS: iput: trying to free free inode\n"); printk("VFS: device %s, inode %lu, mode=0%07o\n", kdevname(inode->i_rdev), inode->i_ino, inode->i_mode); return; } if (inode->i_pipe) wake_up_interruptible(&PIPE_WAIT(*inode)); repeat: if (inode->i_count>1) { inode->i_count--; return; } wake_up(&inode_wait); if (inode->i_pipe) { unsigned long page = (unsigned long) PIPE_BASE(*inode); PIPE_BASE(*inode) = NULL; free_page(page); } if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->put_inode) { inode->i_sb->s_op->put_inode(inode); if (!inode->i_nlink) return; } if (inode->i_dirt) { write_inode(inode); /* we can sleep - so do again */ wait_on_inode(inode); goto repeat; } if (IS_WRITABLE(inode)) { if (inode->i_sb && inode->i_sb->dq_op) { /* Here we can sleep also. Let's do it again * Dmitry Gorodchanin 02/11/96 */ inode->i_lock = 1; inode->i_sb->dq_op->drop(inode); unlock_inode(inode); goto repeat; } } inode->i_count--; if (inode->i_count) /* * Huoh, we were supposed to be the last user, but someone has * grabbed it while we were sleeping. Dont destroy inode VM * mappings, it might cause a memory leak. */ return; if (inode->i_mmap) { printk("iput: inode %lu on device %s still has mappings.\n", inode->i_ino, kdevname(inode->i_dev)); inode->i_mmap = NULL; } nr_free_inodes++; return; } struct inode * get_empty_inode(void) { static int ino = 0; struct inode * inode, * best; unsigned long badness; int i; if (nr_inodes < max_inodes && nr_free_inodes < (nr_inodes >> 1)) grow_inodes(); repeat: inode = first_inode; best = NULL; badness = 1000; for (i = nr_inodes/2; i > 0; i--,inode = inode->i_next) { if (!inode->i_count) { unsigned long i = 999; if (!(inode->i_lock || inode->i_dirt)) i = inode->i_nrpages; if (i < badness) { best = inode; if (!i) goto found_good; badness = i; } } } if (nr_inodes < max_inodes) { if (grow_inodes() == 0) goto repeat; best = NULL; } if (!best) { printk("VFS: No free inodes - contact Linus\n"); sleep_on(&inode_wait); goto repeat; } if (best->i_lock) { wait_on_inode(best); goto repeat; } if (best->i_dirt) { write_inode(best); goto repeat; } if (best->i_count) goto repeat; found_good: clear_inode(best); best->i_count = 1; best->i_nlink = 1; best->i_version = ++event; best->i_sem.count = 1; best->i_ino = ++ino; best->i_dev = 0; nr_free_inodes--; if (nr_free_inodes < 0) { printk ("VFS: get_empty_inode: bad free inode count.\n"); nr_free_inodes = 0; } return best; } struct inode * get_pipe_inode(void) { struct inode * inode; extern struct inode_operations pipe_inode_operations; if (!(inode = get_empty_inode())) return NULL; if (!(PIPE_BASE(*inode) = (char*) __get_free_page(GFP_USER))) { iput(inode); return NULL; } inode->i_op = &pipe_inode_operations; inode->i_count = 2; /* sum of readers/writers */ PIPE_WAIT(*inode) = NULL; PIPE_START(*inode) = PIPE_LEN(*inode) = 0; PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; PIPE_LOCK(*inode) = 0; inode->i_pipe = 1; inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_blksize = PAGE_SIZE; return inode; } struct inode *__iget(struct super_block * sb, int nr, int crossmntp) { static struct wait_queue * update_wait = NULL; struct inode_hash_entry * h; struct inode * inode; struct inode * empty = NULL; if (!sb) panic("VFS: iget with sb==NULL"); h = hash(sb->s_dev, nr); repeat: for (inode = h->inode; inode ; inode = inode->i_hash_next) if (inode->i_dev == sb->s_dev && inode->i_ino == nr) goto found_it; if (!empty) { /* * If we sleep here before we have found an inode * we need to make sure nobody does anything bad * to the inode while we sleep, because otherwise * we may return an inode that is not valid any * more when we wake up.. */ h->updating++; empty = get_empty_inode(); if (!--h->updating) wake_up(&update_wait); if (empty) goto repeat; return (NULL); } inode = empty; inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_ino = nr; inode->i_flags = sb->s_flags; put_last_free(inode); insert_inode_hash(inode); read_inode(inode); goto return_it; found_it: /* * The inode may currently be being pulled down by * clear_inode(). Avoid it if so. If we get past this, then * the increment of i_count will prevent the inode's reuse. */ if (inode->i_condemned) { sleep_on(&inode->i_wait); goto repeat; } if (!inode->i_count) nr_free_inodes--; inode->i_count++; wait_on_inode(inode); if (inode->i_dev != sb->s_dev || inode->i_ino != nr) { printk("Whee.. inode changed from under us. Tell Linus\n"); iput(inode); goto repeat; } if (crossmntp && inode->i_mount) { struct inode * tmp = inode->i_mount; tmp->i_count++; iput(inode); inode = tmp; wait_on_inode(inode); } if (empty) iput(empty); return_it: while (h->updating) sleep_on(&update_wait); return inode; } /* * The "new" scheduling primitives (new as of 0.97 or so) allow this to * be done without disabling interrupts (other than in the actual queue * updating things: only a couple of 386 instructions). This should be * much better for interrupt latency. */ static void __wait_on_inode(struct inode * inode) { struct wait_queue wait = { current, NULL }; add_wait_queue(&inode->i_wait, &wait); repeat: current->state = TASK_UNINTERRUPTIBLE; if (inode->i_lock) { schedule(); goto repeat; } remove_wait_queue(&inode->i_wait, &wait); current->state = TASK_RUNNING; }