[PATCH] proc: Rewrite the proc dentry flush on exit optimization
Eric W. Biederman [Mon, 26 Jun 2006 07:25:48 +0000 (00:25 -0700)]
To keep the dcache from filling up with dead /proc entries we flush them on
process exit.  However over the years that code has gotten hairy with a
dentry_pointer and a lock in task_struct and misdocumented as a correctness
feature.

I have rewritten this code to look and see if we have a corresponding entry in
the dcache and if so flush it on process exit.  This removes the extra fields
in the task_struct and allows me to trivially handle the case of a
/proc/<tgid>/task/<pid> entry as well as the current /proc/<pid> entries.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

fs/exec.c
fs/proc/base.c
include/linux/init_task.h
include/linux/proc_fs.h
include/linux/sched.h
kernel/exit.c
kernel/fork.c

index 0b88bf6..8c51960 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk)
         * and to assume its PID:
         */
        if (!thread_group_leader(current)) {
-               struct dentry *proc_dentry1, *proc_dentry2;
-
                /*
                 * Wait for the thread group leader to be a zombie.
                 * It should already be zombie at this point, most
@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk)
                 */
                current->start_time = leader->start_time;
 
-               spin_lock(&leader->proc_lock);
-               spin_lock(&current->proc_lock);
-               proc_dentry1 = proc_pid_unhash(current);
-               proc_dentry2 = proc_pid_unhash(leader);
                write_lock_irq(&tasklist_lock);
 
                BUG_ON(leader->tgid != current->tgid);
@@ -729,10 +723,6 @@ static int de_thread(struct task_struct *tsk)
                leader->exit_state = EXIT_DEAD;
 
                write_unlock_irq(&tasklist_lock);
-               spin_unlock(&leader->proc_lock);
-               spin_unlock(&current->proc_lock);
-               proc_pid_flush(proc_dentry1);
-               proc_pid_flush(proc_dentry2);
         }
 
        /*
index c863684..f435932 100644 (file)
@@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
        return 0;
 }
 
-static void pid_base_iput(struct dentry *dentry, struct inode *inode)
-{
-       struct task_struct *task = proc_task(inode);
-       spin_lock(&task->proc_lock);
-       if (task->proc_dentry == dentry)
-               task->proc_dentry = NULL;
-       spin_unlock(&task->proc_lock);
-       iput(inode);
-}
-
 static int pid_delete_dentry(struct dentry * dentry)
 {
        /* Is the task we represent dead?
@@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dentry_operations =
        .d_delete       = pid_delete_dentry,
 };
 
-static struct dentry_operations pid_base_dentry_operations =
-{
-       .d_revalidate   = pid_revalidate,
-       .d_iput         = pid_base_iput,
-       .d_delete       = pid_delete_dentry,
-};
-
 /* Lookups */
 
 static unsigned name_to_int(struct dentry *dentry)
@@ -1859,57 +1842,70 @@ static struct inode_operations proc_self_inode_operations = {
 };
 
 /**
- * proc_pid_unhash -  Unhash /proc/@pid entry from the dcache.
- * @p: task that should be flushed.
+ * proc_flush_task -  Remove dcache entries for @task from the /proc dcache.
+ *
+ * @task: task that should be flushed.
+ *
+ * Looks in the dcache for
+ * /proc/@pid
+ * /proc/@tgid/task/@pid
+ * if either directory is present flushes it and all of it'ts children
+ * from the dcache.
  *
- * Drops the /proc/@pid dcache entry from the hash chains.
+ * It is safe and reasonable to cache /proc entries for a task until
+ * that task exits.  After that they just clog up the dcache with
+ * useless entries, possibly causing useful dcache entries to be
+ * flushed instead.  This routine is proved to flush those useless
+ * dcache entries at process exit time.
  *
- * Dropping /proc/@pid entries and detach_pid must be synchroneous,
- * otherwise e.g. /proc/@pid/exe might point to the wrong executable,
- * if the pid value is immediately reused. This is enforced by
- * - caller must acquire spin_lock(p->proc_lock)
- * - must be called before detach_pid()
- * - proc_pid_lookup acquires proc_lock, and checks that
- *   the target is not dead by looking at the attach count
- *   of PIDTYPE_PID.
+ * NOTE: This routine is just an optimization so it does not guarantee
+ *       that no dcache entries will exist at process exit time it
+ *       just makes it very unlikely that any will persist.
  */
-
-struct dentry *proc_pid_unhash(struct task_struct *p)
+void proc_flush_task(struct task_struct *task)
 {
-       struct dentry *proc_dentry;
+       struct dentry *dentry, *leader, *dir;
+       char buf[30];
+       struct qstr name;
+
+       name.name = buf;
+       name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+       dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+       if (dentry) {
+               shrink_dcache_parent(dentry);
+               d_drop(dentry);
+               dput(dentry);
+       }
 
-       proc_dentry = p->proc_dentry;
-       if (proc_dentry != NULL) {
+       if (thread_group_leader(task))
+               goto out;
 
-               spin_lock(&dcache_lock);
-               spin_lock(&proc_dentry->d_lock);
-               if (!d_unhashed(proc_dentry)) {
-                       dget_locked(proc_dentry);
-                       __d_drop(proc_dentry);
-                       spin_unlock(&proc_dentry->d_lock);
-               } else {
-                       spin_unlock(&proc_dentry->d_lock);
-                       proc_dentry = NULL;
-               }
-               spin_unlock(&dcache_lock);
-       }
-       return proc_dentry;
-}
+       name.name = buf;
+       name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
+       leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+       if (!leader)
+               goto out;
 
-/**
- * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
- * @proc_dentry: directoy to prune.
- *
- * Shrink the /proc directory that was used by the just killed thread.
- */
-       
-void proc_pid_flush(struct dentry *proc_dentry)
-{
-       might_sleep();
-       if(proc_dentry != NULL) {
-               shrink_dcache_parent(proc_dentry);
-               dput(proc_dentry);
+       name.name = "task";
+       name.len = strlen(name.name);
+       dir = d_hash_and_lookup(leader, &name);
+       if (!dir)
+               goto out_put_leader;
+
+       name.name = buf;
+       name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+       dentry = d_hash_and_lookup(dir, &name);
+       if (dentry) {
+               shrink_dcache_parent(dentry);
+               d_drop(dentry);
+               dput(dentry);
        }
+
+       dput(dir);
+out_put_leader:
+       dput(leader);
+out:
+       return;
 }
 
 /* SMP-safe */
@@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
        struct inode *inode;
        struct proc_inode *ei;
        unsigned tgid;
-       int died;
 
        if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
                inode = new_inode(dir->i_sb);
@@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
        inode->i_nlink = 4;
 #endif
 
-       dentry->d_op = &pid_base_dentry_operations;
+       dentry->d_op = &pid_dentry_operations;
 
-       died = 0;
        d_add(dentry, inode);
-       spin_lock(&task->proc_lock);
-       task->proc_dentry = dentry;
        if (!pid_alive(task)) {
-               dentry = proc_pid_unhash(task);
-               died = 1;
+               d_drop(dentry);
+               shrink_dcache_parent(dentry);
+               goto out;
        }
-       spin_unlock(&task->proc_lock);
 
        put_task_struct(task);
-       if (died) {
-               proc_pid_flush(dentry);
-               goto out;
-       }
        return NULL;
 out:
        return ERR_PTR(-ENOENT);
@@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
        inode->i_nlink = 3;
 #endif
 
-       dentry->d_op = &pid_base_dentry_operations;
+       dentry->d_op = &pid_dentry_operations;
 
        d_add(dentry, inode);
 
index 41ecbb8..e127ef7 100644 (file)
@@ -119,7 +119,6 @@ extern struct group_info init_groups;
                .signal = {{0}}},                                       \
        .blocked        = {{0}},                                        \
        .alloc_lock     = SPIN_LOCK_UNLOCKED,                           \
-       .proc_lock      = SPIN_LOCK_UNLOCKED,                           \
        .journal_info   = NULL,                                         \
        .cpu_timers     = INIT_CPU_TIMERS(tsk.cpu_timers),              \
        .fs_excl        = ATOMIC_INIT(0),                               \
index 9dd8488..d4d2081 100644 (file)
@@ -99,9 +99,8 @@ extern void proc_misc_init(void);
 
 struct mm_struct;
 
+void proc_flush_task(struct task_struct *task);
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-struct dentry *proc_pid_unhash(struct task_struct *p);
-void proc_pid_flush(struct dentry *proc_dentry);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
@@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name)
 #define proc_net_create(name, mode, info)      ({ (void)(mode), NULL; })
 static inline void proc_net_remove(const char *name) {}
 
-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
+static inline void proc_flush_task(struct task_struct *task) { }
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
        mode_t mode, struct proc_dir_entry *parent) { return NULL; }
index 8d11d93..122a25c 100644 (file)
@@ -842,8 +842,6 @@ struct task_struct {
        u32 self_exec_id;
 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
        spinlock_t alloc_lock;
-/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
-       spinlock_t proc_lock;
 
 #ifdef CONFIG_DEBUG_MUTEXES
        /* mutex deadlock detection */
@@ -856,7 +854,6 @@ struct task_struct {
 /* VM state */
        struct reclaim_state *reclaim_state;
 
-       struct dentry *proc_dentry;
        struct backing_dev_info *backing_dev_info;
 
        struct io_context *io_context;
index e76bd02..304ef63 100644 (file)
@@ -137,12 +137,8 @@ void release_task(struct task_struct * p)
 {
        int zap_leader;
        task_t *leader;
-       struct dentry *proc_dentry;
-
 repeat:
        atomic_dec(&p->user->processes);
-       spin_lock(&p->proc_lock);
-       proc_dentry = proc_pid_unhash(p);
        write_lock_irq(&tasklist_lock);
        ptrace_unlink(p);
        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -171,8 +167,7 @@ repeat:
 
        sched_exit(p);
        write_unlock_irq(&tasklist_lock);
-       spin_unlock(&p->proc_lock);
-       proc_pid_flush(proc_dentry);
+       proc_flush_task(p);
        release_thread(p);
        call_rcu(&p->rcu, delayed_put_task_struct);
 
index dfd10cb..79e9104 100644 (file)
@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags,
                if (put_user(p->pid, parent_tidptr))
                        goto bad_fork_cleanup;
 
-       p->proc_dentry = NULL;
-
        INIT_LIST_HEAD(&p->children);
        INIT_LIST_HEAD(&p->sibling);
        p->vfork_done = NULL;
        spin_lock_init(&p->alloc_lock);
-       spin_lock_init(&p->proc_lock);
 
        clear_tsk_thread_flag(p, TIF_SIGPENDING);
        init_sigpending(&p->pending);