sanitize vfsmount refcounting changes
Al Viro [Sat, 15 Jan 2011 03:30:21 +0000 (22:30 -0500)]
Instead of splitting refcount between (per-cpu) mnt_count
and (SMP-only) mnt_longrefs, make all references contribute
to mnt_count again and keep track of how many are longterm
ones.

Accounting rules for longterm count:
* 1 for each fs_struct.root.mnt
* 1 for each fs_struct.pwd.mnt
* 1 for having non-NULL ->mnt_ns
* decrement to 0 happens only under vfsmount lock exclusive

That allows nice common case for mntput() - since we can't drop the
final reference until after mnt_longterm has reached 0 due to the rules
above, mntput() can grab vfsmount lock shared and check mnt_longterm.
If it turns out to be non-zero (which is the common case), we know
that this is not the final mntput() and can just blindly decrement
percpu mnt_count.  Otherwise we grab vfsmount lock exclusive and
do usual decrement-and-check of percpu mnt_count.

For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm();
namespace.c uses the latter in places where we don't already hold
vfsmount lock exclusive and opencodes a few remaining spots where
we need to manipulate mnt_longterm.

Note that we mostly revert the code outside of fs/namespace.c back
to what we used to have; in particular, normal code doesn't need
to care about two kinds of references, etc.  And we get to keep
the optimization Nick's variant had bought us...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

drivers/mtd/mtdchar.c
fs/anon_inodes.c
fs/fs_struct.c
fs/internal.h
fs/namei.c
fs/namespace.c
fs/pipe.c
fs/super.c
include/linux/mount.h
include/linux/path.h

index ee4bb33..9824057 100644 (file)
@@ -1201,7 +1201,7 @@ err_unregister_chdev:
 static void __exit cleanup_mtdchar(void)
 {
        unregister_mtd_user(&mtdchar_notifier);
-       mntput_long(mtd_inode_mnt);
+       mntput(mtd_inode_mnt);
        unregister_filesystem(&mtd_inodefs_type);
        __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
 }
index cbe57f3..c5567cb 100644 (file)
@@ -233,7 +233,7 @@ static int __init anon_inode_init(void)
        return 0;
 
 err_mntput:
-       mntput_long(anon_inode_mnt);
+       mntput(anon_inode_mnt);
 err_unregister_filesystem:
        unregister_filesystem(&anon_inode_fs_type);
 err_exit:
index 68ca487..78b519c 100644 (file)
@@ -4,6 +4,19 @@
 #include <linux/path.h>
 #include <linux/slab.h>
 #include <linux/fs_struct.h>
+#include "internal.h"
+
+static inline void path_get_longterm(struct path *path)
+{
+       path_get(path);
+       mnt_make_longterm(path->mnt);
+}
+
+static inline void path_put_longterm(struct path *path)
+{
+       mnt_make_shortterm(path->mnt);
+       path_put(path);
+}
 
 /*
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
        write_seqcount_begin(&fs->seq);
        old_root = fs->root;
        fs->root = *path;
-       path_get_long(path);
+       path_get_longterm(path);
        write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);
        if (old_root.dentry)
-               path_put_long(&old_root);
+               path_put_longterm(&old_root);
 }
 
 /*
@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
        write_seqcount_begin(&fs->seq);
        old_pwd = fs->pwd;
        fs->pwd = *path;
-       path_get_long(path);
+       path_get_longterm(path);
        write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);
 
        if (old_pwd.dentry)
-               path_put_long(&old_pwd);
+               path_put_longterm(&old_pwd);
 }
 
 void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
                        write_seqcount_begin(&fs->seq);
                        if (fs->root.dentry == old_root->dentry
                            && fs->root.mnt == old_root->mnt) {
-                               path_get_long(new_root);
+                               path_get_longterm(new_root);
                                fs->root = *new_root;
                                count++;
                        }
                        if (fs->pwd.dentry == old_root->dentry
                            && fs->pwd.mnt == old_root->mnt) {
-                               path_get_long(new_root);
+                               path_get_longterm(new_root);
                                fs->pwd = *new_root;
                                count++;
                        }
@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
        while (count--)
-               path_put_long(old_root);
+               path_put_longterm(old_root);
 }
 
 void free_fs_struct(struct fs_struct *fs)
 {
-       path_put_long(&fs->root);
-       path_put_long(&fs->pwd);
+       path_put_longterm(&fs->root);
+       path_put_longterm(&fs->pwd);
        kmem_cache_free(fs_cachep, fs);
 }
 
@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 
                spin_lock(&old->lock);
                fs->root = old->root;
-               path_get_long(&fs->root);
+               path_get_longterm(&fs->root);
                fs->pwd = old->pwd;
-               path_get_long(&fs->pwd);
+               path_get_longterm(&fs->pwd);
                spin_unlock(&old->lock);
        }
        return fs;
index 4931060..12ccb86 100644 (file)
@@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
 extern int do_add_mount(struct vfsmount *, struct path *, int);
 extern void mnt_clear_expiry(struct vfsmount *);
 
+extern void mnt_make_longterm(struct vfsmount *);
+extern void mnt_make_shortterm(struct vfsmount *);
+
 extern void __init mnt_init(void);
 
 DECLARE_BRLOCK(vfsmount_lock);
index c2e3772..8f7b41a 100644 (file)
@@ -368,18 +368,6 @@ void path_get(struct path *path)
 EXPORT_SYMBOL(path_get);
 
 /**
- * path_get_long - get a long reference to a path
- * @path: path to get the reference to
- *
- * Given a path increment the reference count to the dentry and the vfsmount.
- */
-void path_get_long(struct path *path)
-{
-       mntget_long(path->mnt);
-       dget(path->dentry);
-}
-
-/**
  * path_put - put a reference to a path
  * @path: path to put the reference to
  *
@@ -393,18 +381,6 @@ void path_put(struct path *path)
 EXPORT_SYMBOL(path_put);
 
 /**
- * path_put_long - put a long reference to a path
- * @path: path to put the reference to
- *
- * Given a path decrement the reference count to the dentry and the vfsmount.
- */
-void path_put_long(struct path *path)
-{
-       dput(path->dentry);
-       mntput_long(path->mnt);
-}
-
-/**
  * nameidata_drop_rcu - drop this nameidata out of rcu-walk
  * @nd: nameidata pathwalk data to drop
  * Returns: 0 on success, -ECHILD on failure
index d7fc05f..48809e2 100644 (file)
@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt)
 unsigned int mnt_get_count(struct vfsmount *mnt)
 {
 #ifdef CONFIG_SMP
-       unsigned int count = atomic_read(&mnt->mnt_longrefs);
+       unsigned int count = 0;
        int cpu;
 
        for_each_possible_cpu(cpu) {
@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
                if (!mnt->mnt_pcp)
                        goto out_free_devname;
 
-               atomic_set(&mnt->mnt_longrefs, 1);
+               this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
 #else
                mnt->mnt_count = 1;
                mnt->mnt_writers = 0;
@@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt)
        BUG_ON(parent == mnt);
 
        list_add_tail(&head, &mnt->mnt_list);
-       list_for_each_entry(m, &head, mnt_list)
+       list_for_each_entry(m, &head, mnt_list) {
                m->mnt_ns = n;
+               atomic_inc(&m->mnt_longterm);
+       }
+
        list_splice(&head, n->list.prev);
 
        list_add_tail(&mnt->mnt_hash, mount_hashtable +
@@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt)
        deactivate_super(sb);
 }
 
-#ifdef CONFIG_SMP
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
+static void mntput_no_expire(struct vfsmount *mnt)
 {
-       if (!longrefs) {
 put_again:
-               br_read_lock(vfsmount_lock);
-               if (likely(atomic_read(&mnt->mnt_longrefs))) {
-                       mnt_dec_count(mnt);
-                       br_read_unlock(vfsmount_lock);
-                       return;
-               }
+#ifdef CONFIG_SMP
+       br_read_lock(vfsmount_lock);
+       if (likely(atomic_read(&mnt->mnt_longterm))) {
+               mnt_dec_count(mnt);
                br_read_unlock(vfsmount_lock);
-       } else {
-               BUG_ON(!atomic_read(&mnt->mnt_longrefs));
-               if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
-                       return;
+               return;
        }
+       br_read_unlock(vfsmount_lock);
 
        br_write_lock(vfsmount_lock);
-       if (!longrefs)
-               mnt_dec_count(mnt);
-       else
-               atomic_dec(&mnt->mnt_longrefs);
+       mnt_dec_count(mnt);
        if (mnt_get_count(mnt)) {
                br_write_unlock(vfsmount_lock);
                return;
        }
-       if (unlikely(mnt->mnt_pinned)) {
-               mnt_add_count(mnt, mnt->mnt_pinned + 1);
-               mnt->mnt_pinned = 0;
-               br_write_unlock(vfsmount_lock);
-               acct_auto_close_mnt(mnt);
-               goto put_again;
-       }
-       br_write_unlock(vfsmount_lock);
-       mntfree(mnt);
-}
 #else
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
-{
-put_again:
        mnt_dec_count(mnt);
        if (likely(mnt_get_count(mnt)))
                return;
        br_write_lock(vfsmount_lock);
+#endif
        if (unlikely(mnt->mnt_pinned)) {
                mnt_add_count(mnt, mnt->mnt_pinned + 1);
                mnt->mnt_pinned = 0;
@@ -789,12 +771,6 @@ put_again:
        br_write_unlock(vfsmount_lock);
        mntfree(mnt);
 }
-#endif
-
-static void mntput_no_expire(struct vfsmount *mnt)
-{
-       __mntput(mnt, 0);
-}
 
 void mntput(struct vfsmount *mnt)
 {
@@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt)
                /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
                if (unlikely(mnt->mnt_expiry_mark))
                        mnt->mnt_expiry_mark = 0;
-               __mntput(mnt, 0);
+               mntput_no_expire(mnt);
        }
 }
 EXPORT_SYMBOL(mntput);
@@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(mntget);
 
-void mntput_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
-       if (mnt) {
-               /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
-               if (unlikely(mnt->mnt_expiry_mark))
-                       mnt->mnt_expiry_mark = 0;
-               __mntput(mnt, 1);
-       }
-#else
-       mntput(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntput_long);
-
-struct vfsmount *mntget_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
-       if (mnt)
-               atomic_inc(&mnt->mnt_longrefs);
-       return mnt;
-#else
-       return mntget(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntget_long);
-
 void mnt_pin(struct vfsmount *mnt)
 {
        br_write_lock(vfsmount_lock);
@@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head)
                        dput(dentry);
                        mntput(m);
                }
-               mntput_long(mnt);
+               mntput(mnt);
        }
 }
 
@@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
                list_del_init(&p->mnt_list);
                __touch_mnt_namespace(p->mnt_ns);
                p->mnt_ns = NULL;
+               atomic_dec(&p->mnt_longterm);
                list_del_init(&p->mnt_child);
                if (p->mnt_parent != p) {
                        p->mnt_parent->mnt_ghosts++;
@@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
 
 unlock:
        up_write(&namespace_sem);
-       mntput_long(newmnt);
+       mntput(newmnt);
        return err;
 }
 
@@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void)
        return new_ns;
 }
 
+void mnt_make_longterm(struct vfsmount *mnt)
+{
+       atomic_inc(&mnt->mnt_longterm);
+}
+
+void mnt_make_shortterm(struct vfsmount *mnt)
+{
+       if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
+               return;
+       br_write_lock(vfsmount_lock);
+       atomic_dec(&mnt->mnt_longterm);
+       br_write_unlock(vfsmount_lock);
+}
+
 /*
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
@@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        q = new_ns->root;
        while (p) {
                q->mnt_ns = new_ns;
+               atomic_inc(&q->mnt_longterm);
                if (fs) {
                        if (p == fs->root.mnt) {
+                               fs->root.mnt = mntget(q);
+                               atomic_inc(&q->mnt_longterm);
+                               mnt_make_shortterm(p);
                                rootmnt = p;
-                               fs->root.mnt = mntget_long(q);
                        }
                        if (p == fs->pwd.mnt) {
+                               fs->pwd.mnt = mntget(q);
+                               atomic_inc(&q->mnt_longterm);
+                               mnt_make_shortterm(p);
                                pwdmnt = p;
-                               fs->pwd.mnt = mntget_long(q);
                        }
                }
                p = next_mnt(p, mnt_ns->root);
@@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        up_write(&namespace_sem);
 
        if (rootmnt)
-               mntput_long(rootmnt);
+               mntput(rootmnt);
        if (pwdmnt)
-               mntput_long(pwdmnt);
+               mntput(pwdmnt);
 
        return new_ns;
 }
@@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
        new_ns = alloc_mnt_ns();
        if (!IS_ERR(new_ns)) {
                mnt->mnt_ns = new_ns;
+               atomic_inc(&mnt->mnt_longterm);
                new_ns->root = mnt;
                list_add(&new_ns->list, &new_ns->root->mnt_list);
        }
index e2e95fb..89e9e19 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
 static void __exit exit_pipe_fs(void)
 {
        unregister_filesystem(&pipe_fs_type);
-       mntput_long(pipe_mnt);
+       mntput(pipe_mnt);
 }
 
 fs_initcall(init_pipe_fs);
index 4f6a357..74e149e 100644 (file)
@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
        return mnt;
 
  err:
-       mntput_long(mnt);
+       mntput(mnt);
        return ERR_PTR(err);
 }
 
index af4765e..604f122 100644 (file)
@@ -60,7 +60,7 @@ struct vfsmount {
        struct super_block *mnt_sb;     /* pointer to superblock */
 #ifdef CONFIG_SMP
        struct mnt_pcp __percpu *mnt_pcp;
-       atomic_t mnt_longrefs;
+       atomic_t mnt_longterm;          /* how many of the refs are longterm */
 #else
        int mnt_count;
        int mnt_writers;
@@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern void mntput_long(struct vfsmount *mnt);
-extern struct vfsmount *mntget_long(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
index a581e8c..edc98de 100644 (file)
@@ -10,9 +10,7 @@ struct path {
 };
 
 extern void path_get(struct path *);
-extern void path_get_long(struct path *);
 extern void path_put(struct path *);
-extern void path_put_long(struct path *);
 
 static inline int path_equal(const struct path *path1, const struct path *path2)
 {