[PATCH] r/o bind mounts: debugging for missed calls
Dave Hansen [Fri, 15 Feb 2008 22:38:01 +0000 (14:38 -0800)]
There have been a few oopses caused by 'struct file's with NULL f_vfsmnts.
There was also a set of potentially missed mnt_want_write()s from
dentry_open() calls.

This patch provides a very simple debugging framework to catch these kinds of
bugs.  It will WARN_ON() them, but should stop us from having any oopses or
mnt_writer count imbalances.

I'm quite convinced that this is a good thing because it found bugs in the
stuff I was working on as soon as I wrote it.

[hch: made it conditional on a debug option.
      But it's still a little bit too ugly]

[hch: merged forced remount r/o fix from Dave and akpm's fix for the fix]

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

fs/file_table.c
fs/open.c
fs/super.c
include/linux/fs.h
lib/Kconfig.debug

index 71efc70..7a0a9b8 100644 (file)
@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head)
 static inline void file_free(struct file *f)
 {
        percpu_counter_dec(&nr_files);
+       file_check_state(f);
        call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -207,6 +208,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
         * that we can do debugging checks at __fput()
         */
        if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+               file_take_write(file);
                error = mnt_want_write(mnt);
                WARN_ON(error);
        }
@@ -237,8 +239,13 @@ void drop_file_write_access(struct file *file)
        struct inode *inode = dentry->d_inode;
 
        put_write_access(inode);
-       if (!special_file(inode->i_mode))
-               mnt_drop_write(mnt);
+
+       if (special_file(inode->i_mode))
+               return;
+       if (file_check_writeable(file) != 0)
+               return;
+       mnt_drop_write(mnt);
+       file_release_write(file);
 }
 EXPORT_SYMBOL_GPL(drop_file_write_access);
 
index e58382d..b70e766 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -806,6 +806,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
                error = __get_file_write_access(inode, mnt);
                if (error)
                        goto cleanup_file;
+               if (!special_file(inode->i_mode))
+                       file_take_write(f);
        }
 
        f->f_mapping = inode->i_mapping;
@@ -847,8 +849,16 @@ cleanup_all:
        fops_put(f->f_op);
        if (f->f_mode & FMODE_WRITE) {
                put_write_access(inode);
-               if (!special_file(inode->i_mode))
+               if (!special_file(inode->i_mode)) {
+                       /*
+                        * We don't consider this a real
+                        * mnt_want/drop_write() pair
+                        * because it all happenend right
+                        * here, so just reset the state.
+                        */
+                       file_reset_write(f);
                        mnt_drop_write(mnt);
+               }
        }
        file_kill(f);
        f->f_path.dentry = NULL;
index 01d5c40..1f8f05e 100644 (file)
@@ -579,6 +579,9 @@ retry:
                if (!(f->f_mode & FMODE_WRITE))
                        continue;
                f->f_mode &= ~FMODE_WRITE;
+               if (file_check_writeable(f) != 0)
+                       continue;
+               file_release_write(f);
                mnt = mntget(f->f_path.mnt);
                file_list_unlock();
                /*
index 013b9c2..d1eeea6 100644 (file)
@@ -776,6 +776,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
                index <  ra->start + ra->size);
 }
 
+#define FILE_MNT_WRITE_TAKEN   1
+#define FILE_MNT_WRITE_RELEASED        2
+
 struct file {
        /*
         * fu_list becomes invalid after file_free is called and queued via
@@ -810,6 +813,9 @@ struct file {
        spinlock_t              f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
+#ifdef CONFIG_DEBUG_WRITECOUNT
+       unsigned long f_mnt_write_state;
+#endif
 };
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
@@ -818,6 +824,49 @@ extern spinlock_t files_lock;
 #define get_file(x)    atomic_inc(&(x)->f_count)
 #define file_count(x)  atomic_read(&(x)->f_count)
 
+#ifdef CONFIG_DEBUG_WRITECOUNT
+static inline void file_take_write(struct file *f)
+{
+       WARN_ON(f->f_mnt_write_state != 0);
+       f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
+}
+static inline void file_release_write(struct file *f)
+{
+       f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
+}
+static inline void file_reset_write(struct file *f)
+{
+       f->f_mnt_write_state = 0;
+}
+static inline void file_check_state(struct file *f)
+{
+       /*
+        * At this point, either both or neither of these bits
+        * should be set.
+        */
+       WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
+       WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
+}
+static inline int file_check_writeable(struct file *f)
+{
+       if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
+               return 0;
+       printk(KERN_WARNING "writeable file with no "
+                           "mnt_want_write()\n");
+       WARN_ON(1);
+       return -EINVAL;
+}
+#else /* !CONFIG_DEBUG_WRITECOUNT */
+static inline void file_take_write(struct file *filp) {}
+static inline void file_release_write(struct file *filp) {}
+static inline void file_reset_write(struct file *filp) {}
+static inline void file_check_state(struct file *filp) {}
+static inline int file_check_writeable(struct file *filp)
+{
+       return 0;
+}
+#endif /* CONFIG_DEBUG_WRITECOUNT */
+
 #define        MAX_NON_LFS     ((1UL<<31) - 1)
 
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
index 95de310..623ef24 100644 (file)
@@ -427,6 +427,16 @@ config DEBUG_VM
 
          If unsure, say N.
 
+config DEBUG_WRITECOUNT
+       bool "Debug filesystem writers count"
+       depends on DEBUG_KERNEL
+       help
+         Enable this to catch wrong use of the writers count in struct
+         vfsmount.  This will increase the size of each file struct by
+         32 bits.
+
+         If unsure, say N.
+
 config DEBUG_LIST
        bool "Debug linked list manipulation"
        depends on DEBUG_KERNEL