Leave mangled flag only for setting nd.intent.open.flag
[linux-2.6.git] / fs / namei.c
index d62fdc8..f5e4397 100644 (file)
@@ -689,33 +689,20 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
        set_root(nd);
 
        while(1) {
-               struct vfsmount *parent;
                struct dentry *old = nd->path.dentry;
 
                if (nd->path.dentry == nd->root.dentry &&
                    nd->path.mnt == nd->root.mnt) {
                        break;
                }
-               spin_lock(&dcache_lock);
                if (nd->path.dentry != nd->path.mnt->mnt_root) {
-                       nd->path.dentry = dget(nd->path.dentry->d_parent);
-                       spin_unlock(&dcache_lock);
+                       /* rare case of legitimate dget_parent()... */
+                       nd->path.dentry = dget_parent(nd->path.dentry);
                        dput(old);
                        break;
                }
-               spin_unlock(&dcache_lock);
-               spin_lock(&vfsmount_lock);
-               parent = nd->path.mnt->mnt_parent;
-               if (parent == nd->path.mnt) {
-                       spin_unlock(&vfsmount_lock);
+               if (!follow_up(&nd->path))
                        break;
-               }
-               mntget(parent);
-               nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
-               spin_unlock(&vfsmount_lock);
-               dput(old);
-               mntput(nd->path.mnt);
-               nd->path.mnt = parent;
        }
        follow_mount(&nd->path);
 }
@@ -823,6 +810,17 @@ fail:
 }
 
 /*
+ * This is a temporary kludge to deal with "automount" symlinks; proper
+ * solution is to trigger them on follow_mount(), so that do_lookup()
+ * would DTRT.  To be killed before 2.6.34-final.
+ */
+static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
+{
+       return inode && unlikely(inode->i_op->follow_link) &&
+               ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
+}
+
+/*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
  * the final dentry. We expect 'base' to be positive and a directory.
@@ -942,8 +940,7 @@ last_component:
                if (err)
                        break;
                inode = next.dentry->d_inode;
-               if ((lookup_flags & LOOKUP_FOLLOW)
-                   && inode && inode->i_op->follow_link) {
+               if (follow_on_final(inode, lookup_flags)) {
                        err = do_follow_link(&next, nd);
                        if (err)
                                goto return_err;
@@ -1337,7 +1334,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
                return -ENOENT;
 
        BUG_ON(victim->d_parent->d_inode != dir);
-       audit_inode_child(victim->d_name.name, victim, dir);
+       audit_inode_child(victim, dir);
 
        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
        if (error)
@@ -1493,7 +1490,7 @@ int may_open(struct path *path, int acc_mode, int flag)
         * An append-only file must be opened in append mode for writing.
         */
        if (IS_APPEND(inode)) {
-               if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
+               if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
                        return -EPERM;
                if (flag & O_TRUNC)
                        return -EPERM;
@@ -1537,7 +1534,7 @@ static int handle_truncate(struct path *path)
  * what get passed to sys_open().
  */
 static int __open_namei_create(struct nameidata *nd, struct path *path,
-                               int flag, int mode)
+                               int open_flag, int mode)
 {
        int error;
        struct dentry *dir = nd->path.dentry;
@@ -1555,7 +1552,7 @@ out_unlock:
        if (error)
                return error;
        /* Don't check for write permission, don't truncate */
-       return may_open(&nd->path, 0, flag & ~O_TRUNC);
+       return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
 /*
@@ -1593,6 +1590,163 @@ static int open_will_truncate(int flag, struct inode *inode)
        return (flag & O_TRUNC);
 }
 
+static struct file *finish_open(struct nameidata *nd,
+                               int open_flag, int acc_mode)
+{
+       struct file *filp;
+       int will_truncate;
+       int error;
+
+       will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
+       if (will_truncate) {
+               error = mnt_want_write(nd->path.mnt);
+               if (error)
+                       goto exit;
+       }
+       error = may_open(&nd->path, acc_mode, open_flag);
+       if (error) {
+               if (will_truncate)
+                       mnt_drop_write(nd->path.mnt);
+               goto exit;
+       }
+       filp = nameidata_to_filp(nd);
+       if (!IS_ERR(filp)) {
+               error = ima_file_check(filp, acc_mode);
+               if (error) {
+                       fput(filp);
+                       filp = ERR_PTR(error);
+               }
+       }
+       if (!IS_ERR(filp)) {
+               if (acc_mode & MAY_WRITE)
+                       vfs_dq_init(nd->path.dentry->d_inode);
+
+               if (will_truncate) {
+                       error = handle_truncate(&nd->path);
+                       if (error) {
+                               fput(filp);
+                               filp = ERR_PTR(error);
+                       }
+               }
+       }
+       /*
+        * It is now safe to drop the mnt write
+        * because the filp has had a write taken
+        * on its behalf.
+        */
+       if (will_truncate)
+               mnt_drop_write(nd->path.mnt);
+       return filp;
+
+exit:
+       if (!IS_ERR(nd->intent.open.file))
+               release_open_intent(nd);
+       path_put(&nd->path);
+       return ERR_PTR(error);
+}
+
+static struct file *do_last(struct nameidata *nd, struct path *path,
+                           int open_flag, int acc_mode,
+                           int mode, const char *pathname,
+                           int *is_link)
+{
+       struct dentry *dir = nd->path.dentry;
+       struct file *filp;
+       int error;
+
+       *is_link = 0;
+
+       error = -EISDIR;
+       if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
+               goto exit;
+
+       mutex_lock(&dir->d_inode->i_mutex);
+
+       path->dentry = lookup_hash(nd);
+       path->mnt = nd->path.mnt;
+
+       error = PTR_ERR(path->dentry);
+       if (IS_ERR(path->dentry)) {
+               mutex_unlock(&dir->d_inode->i_mutex);
+               goto exit;
+       }
+
+       if (IS_ERR(nd->intent.open.file)) {
+               error = PTR_ERR(nd->intent.open.file);
+               goto exit_mutex_unlock;
+       }
+
+       /* Negative dentry, just create the file */
+       if (!path->dentry->d_inode) {
+               /*
+                * This write is needed to ensure that a
+                * ro->rw transition does not occur between
+                * the time when the file is created and when
+                * a permanent write count is taken through
+                * the 'struct file' in nameidata_to_filp().
+                */
+               error = mnt_want_write(nd->path.mnt);
+               if (error)
+                       goto exit_mutex_unlock;
+               error = __open_namei_create(nd, path, open_flag, mode);
+               if (error) {
+                       mnt_drop_write(nd->path.mnt);
+                       goto exit;
+               }
+               filp = nameidata_to_filp(nd);
+               mnt_drop_write(nd->path.mnt);
+               if (!IS_ERR(filp)) {
+                       error = ima_file_check(filp, acc_mode);
+                       if (error) {
+                               fput(filp);
+                               filp = ERR_PTR(error);
+                       }
+               }
+               return filp;
+       }
+
+       /*
+        * It already exists.
+        */
+       mutex_unlock(&dir->d_inode->i_mutex);
+       audit_inode(pathname, path->dentry);
+
+       error = -EEXIST;
+       if (open_flag & O_EXCL)
+               goto exit_dput;
+
+       if (__follow_mount(path)) {
+               error = -ELOOP;
+               if (open_flag & O_NOFOLLOW)
+                       goto exit_dput;
+       }
+
+       error = -ENOENT;
+       if (!path->dentry->d_inode)
+               goto exit_dput;
+       if (path->dentry->d_inode->i_op->follow_link) {
+               *is_link = 1;
+               return NULL;
+       }
+
+       path_to_nameidata(path, nd);
+       error = -EISDIR;
+       if (S_ISDIR(path->dentry->d_inode->i_mode))
+               goto exit;
+       filp = finish_open(nd, open_flag, acc_mode);
+       return filp;
+
+exit_mutex_unlock:
+       mutex_unlock(&dir->d_inode->i_mutex);
+exit_dput:
+       path_put_conditional(path, nd);
+exit:
+       if (!IS_ERR(nd->intent.open.file))
+               release_open_intent(nd);
+       path_put(&nd->path);
+       return ERR_PTR(error);
+}
+
 /*
  * Note that the low bits of the passed in "open_flag"
  * are not the same as in the local variable "flag". See
@@ -1605,11 +1759,10 @@ struct file *do_filp_open(int dfd, const char *pathname,
        struct nameidata nd;
        int error;
        struct path path;
-       struct dentry *dir;
        int count = 0;
-       int will_truncate;
        int flag = open_to_namei_flags(open_flag);
        int force_reval = 0;
+       int is_link;
 
        /*
         * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
@@ -1624,18 +1777,18 @@ struct file *do_filp_open(int dfd, const char *pathname,
                acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
        /* O_TRUNC implies we need access checks for write permissions */
-       if (flag & O_TRUNC)
+       if (open_flag & O_TRUNC)
                acc_mode |= MAY_WRITE;
 
        /* Allow the LSM permission hook to distinguish append 
           access from general write access. */
-       if (flag & O_APPEND)
+       if (open_flag & O_APPEND)
                acc_mode |= MAY_APPEND;
 
        /*
         * The simplest case - just a plain lookup.
         */
-       if (!(flag & O_CREAT)) {
+       if (!(open_flag & O_CREAT)) {
                filp = get_empty_filp();
 
                if (filp == NULL)
@@ -1645,7 +1798,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
                nd.intent.open.flags = flag;
                nd.intent.open.create_mode = 0;
                error = do_path_lookup(dfd, pathname,
-                                       lookup_flags(flag)|LOOKUP_OPEN, &nd);
+                                       lookup_flags(open_flag)|LOOKUP_OPEN, &nd);
                if (IS_ERR(nd.intent.open.file)) {
                        if (error == 0) {
                                error = PTR_ERR(nd.intent.open.file);
@@ -1677,13 +1830,8 @@ reval:
                audit_inode(pathname, nd.path.dentry);
 
        /*
-        * We have the parent and last component. First of all, check
-        * that we are not asked to creat(2) an obvious directory - that
-        * will not do.
+        * We have the parent and last component.
         */
-       error = -EISDIR;
-       if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
-               goto exit_parent;
 
        error = -ENFILE;
        filp = get_empty_filp();
@@ -1693,140 +1841,24 @@ reval:
        filp->f_flags = open_flag;
        nd.intent.open.flags = flag;
        nd.intent.open.create_mode = mode;
-       dir = nd.path.dentry;
        nd.flags &= ~LOOKUP_PARENT;
        nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
-       if (flag & O_EXCL)
+       if (open_flag & O_EXCL)
                nd.flags |= LOOKUP_EXCL;
-       mutex_lock(&dir->d_inode->i_mutex);
-       path.dentry = lookup_hash(&nd);
-       path.mnt = nd.path.mnt;
-
-do_last:
-       error = PTR_ERR(path.dentry);
-       if (IS_ERR(path.dentry)) {
-               mutex_unlock(&dir->d_inode->i_mutex);
-               goto exit;
-       }
-
-       if (IS_ERR(nd.intent.open.file)) {
-               error = PTR_ERR(nd.intent.open.file);
-               goto exit_mutex_unlock;
-       }
-
-       /* Negative dentry, just create the file */
-       if (!path.dentry->d_inode) {
-               /*
-                * This write is needed to ensure that a
-                * ro->rw transition does not occur between
-                * the time when the file is created and when
-                * a permanent write count is taken through
-                * the 'struct file' in nameidata_to_filp().
-                */
-               error = mnt_want_write(nd.path.mnt);
-               if (error)
-                       goto exit_mutex_unlock;
-               error = __open_namei_create(&nd, &path, flag, mode);
-               if (error) {
-                       mnt_drop_write(nd.path.mnt);
-                       goto exit;
-               }
-               filp = nameidata_to_filp(&nd);
-               mnt_drop_write(nd.path.mnt);
-               if (nd.root.mnt)
-                       path_put(&nd.root);
-               if (!IS_ERR(filp)) {
-                       error = ima_file_check(filp, acc_mode);
-                       if (error) {
-                               fput(filp);
-                               filp = ERR_PTR(error);
-                       }
-               }
-               return filp;
-       }
-
-       /*
-        * It already exists.
-        */
-       mutex_unlock(&dir->d_inode->i_mutex);
-       audit_inode(pathname, path.dentry);
-
-       error = -EEXIST;
-       if (flag & O_EXCL)
-               goto exit_dput;
-
-       if (__follow_mount(&path)) {
-               error = -ELOOP;
-               if (flag & O_NOFOLLOW)
-                       goto exit_dput;
-       }
-
-       error = -ENOENT;
-       if (!path.dentry->d_inode)
-               goto exit_dput;
-       if (path.dentry->d_inode->i_op->follow_link)
+       filp = do_last(&nd, &path, open_flag, acc_mode, mode,
+                      pathname, &is_link);
+       if (is_link)
                goto do_link;
+       if (nd.root.mnt)
+               path_put(&nd.root);
+       return filp;
 
-       path_to_nameidata(&path, &nd);
-       error = -EISDIR;
-       if (S_ISDIR(path.dentry->d_inode->i_mode))
-               goto exit;
 ok:
-       /*
-        * Consider:
-        * 1. may_open() truncates a file
-        * 2. a rw->ro mount transition occurs
-        * 3. nameidata_to_filp() fails due to
-        *    the ro mount.
-        * That would be inconsistent, and should
-        * be avoided. Taking this mnt write here
-        * ensures that (2) can not occur.
-        */
-       will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
-       if (will_truncate) {
-               error = mnt_want_write(nd.path.mnt);
-               if (error)
-                       goto exit;
-       }
-       error = may_open(&nd.path, acc_mode, flag);
-       if (error) {
-               if (will_truncate)
-                       mnt_drop_write(nd.path.mnt);
-               goto exit;
-       }
-       filp = nameidata_to_filp(&nd);
-       if (!IS_ERR(filp)) {
-               error = ima_file_check(filp, acc_mode);
-               if (error) {
-                       fput(filp);
-                       filp = ERR_PTR(error);
-               }
-       }
-       if (!IS_ERR(filp)) {
-               if (acc_mode & MAY_WRITE)
-                       vfs_dq_init(nd.path.dentry->d_inode);
-
-               if (will_truncate) {
-                       error = handle_truncate(&nd.path);
-                       if (error) {
-                               fput(filp);
-                               filp = ERR_PTR(error);
-                       }
-               }
-       }
-       /*
-        * It is now safe to drop the mnt write
-        * because the filp has had a write taken
-        * on its behalf.
-        */
-       if (will_truncate)
-               mnt_drop_write(nd.path.mnt);
+       filp = finish_open(&nd, open_flag, acc_mode);
        if (nd.root.mnt)
                path_put(&nd.root);
        return filp;
 
-exit_mutex_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
 exit_dput:
        path_put_conditional(&path, &nd);
 exit:
@@ -1840,7 +1872,7 @@ exit_parent:
 
 do_link:
        error = -ELOOP;
-       if (flag & O_NOFOLLOW)
+       if ((open_flag & O_NOFOLLOW) || count++ == 32)
                goto exit_dput;
        /*
         * This is subtle. Instead of calling do_follow_link() we do the
@@ -1875,24 +1907,15 @@ do_link:
        nd.flags &= ~LOOKUP_PARENT;
        if (nd.last_type == LAST_BIND)
                goto ok;
-       error = -EISDIR;
-       if (nd.last_type != LAST_NORM)
-               goto exit;
-       if (nd.last.name[nd.last.len]) {
+       filp = do_last(&nd, &path, open_flag, acc_mode, mode,
+                      pathname, &is_link);
+       if (nd.last_type == LAST_NORM)
                __putname(nd.last.name);
-               goto exit;
-       }
-       error = -ELOOP;
-       if (count++==32) {
-               __putname(nd.last.name);
-               goto exit;
-       }
-       dir = nd.path.dentry;
-       mutex_lock(&dir->d_inode->i_mutex);
-       path.dentry = lookup_hash(&nd);
-       path.mnt = nd.path.mnt;
-       __putname(nd.last.name);
-       goto do_last;
+       if (is_link)
+               goto do_link;
+       if (nd.root.mnt)
+               path_put(&nd.root);
+       return filp;
 }
 
 /**
@@ -2265,8 +2288,11 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
                error = -EBUSY;
        else {
                error = security_inode_unlink(dir, dentry);
-               if (!error)
+               if (!error) {
                        error = dir->i_op->unlink(dir, dentry);
+                       if (!error)
+                               dentry->d_inode->i_flags |= S_DEAD;
+               }
        }
        mutex_unlock(&dentry->d_inode->i_mutex);
 
@@ -2619,6 +2645,8 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
        else
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (!error) {
+               if (target)
+                       target->i_flags |= S_DEAD;
                if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                        d_move(old_dentry, new_dentry);
        }
@@ -2661,11 +2689,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
        else
                error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-       if (!error) {
-               const char *new_name = old_dentry->d_name.name;
-               fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+       if (!error)
+               fsnotify_move(old_dir, new_dir, old_name, is_dir,
                              new_dentry->d_inode, old_dentry);
-       }
        fsnotify_oldname_free(old_name);
 
        return error;