SUNRPC: Fix a potential race in rpc_wake_up_task()
Trond Myklebust [Mon, 13 Nov 2006 21:23:44 +0000 (16:23 -0500)]
Use RCU to ensure that we can safely call rpc_finish_wakeup after we've
called __rpc_do_wake_up_task. If not, there is a theoretical race, in which
the rpc_task finishes executing, and gets freed first.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

fs/nfs/read.c
fs/nfs/write.c
include/linux/nfs_fs.h
include/linux/sunrpc/sched.h
net/sunrpc/sched.c

index c2e49c3..8b58bbf 100644 (file)
@@ -65,13 +65,19 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len)
        return p;
 }
 
-static void nfs_readdata_free(struct nfs_read_data *p)
+static void nfs_readdata_rcu_free(struct rcu_head *head)
 {
+       struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_rdata_mempool);
 }
 
+static void nfs_readdata_free(struct nfs_read_data *rdata)
+{
+       call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
index 883dd4a..29d8820 100644 (file)
@@ -102,13 +102,19 @@ struct nfs_write_data *nfs_commit_alloc(void)
        return p;
 }
 
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_rcu_free(struct rcu_head *head)
 {
+       struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_commit_mempool);
 }
 
+void nfs_commit_free(struct nfs_write_data *wdata)
+{
+       call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
+}
+
 struct nfs_write_data *nfs_writedata_alloc(size_t len)
 {
        unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -131,13 +137,19 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len)
        return p;
 }
 
-static void nfs_writedata_free(struct nfs_write_data *p)
+static void nfs_writedata_rcu_free(struct rcu_head *head)
 {
+       struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_wdata_mempool);
 }
 
+static void nfs_writedata_free(struct nfs_write_data *wdata)
+{
+       call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
+}
+
 void nfs_writedata_release(void *wdata)
 {
        nfs_writedata_free(wdata);
@@ -258,7 +270,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 io_error:
        nfs_end_data_update(inode);
        end_page_writeback(page);
-       nfs_writedata_free(wdata);
+       nfs_writedata_release(wdata);
        return written ? written : result;
 }
 
@@ -1043,7 +1055,7 @@ out_bad:
        while (!list_empty(&list)) {
                data = list_entry(list.next, struct nfs_write_data, pages);
                list_del(&data->pages);
-               nfs_writedata_free(data);
+               nfs_writedata_release(data);
        }
        nfs_mark_request_dirty(req);
        nfs_clear_page_writeback(req);
index 625ffea..02f3818 100644 (file)
@@ -428,11 +428,6 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 extern void nfs_writedata_release(void *);
 
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-struct nfs_write_data *nfs_commit_alloc(void);
-void nfs_commit_free(struct nfs_write_data *p);
-#endif
-
 /*
  * Try to write back everything synchronously (but check the
  * return value!)
@@ -440,6 +435,8 @@ void nfs_commit_free(struct nfs_write_data *p);
 extern int  nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
+extern struct nfs_write_data *nfs_commit_alloc(void);
+extern void nfs_commit_free(struct nfs_write_data *wdata);
 extern void nfs_commit_release(void *wdata);
 #else
 static inline int
index 9fdb8c9..14fc813 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/timer.h>
 #include <linux/sunrpc/types.h>
+#include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
@@ -85,6 +86,7 @@ struct rpc_task {
        union {
                struct work_struct      tk_work;        /* Async task work queue */
                struct rpc_wait         tk_wait;        /* RPC wait */
+               struct rcu_head         tk_rcu;         /* for task deletion */
        } u;
 
        unsigned short          tk_timeouts;    /* maj timeouts */
index 66d0136..6b808c0 100644 (file)
@@ -427,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task)
  */
 void rpc_wake_up_task(struct rpc_task *task)
 {
+       rcu_read_lock_bh();
        if (rpc_start_wakeup(task)) {
                if (RPC_IS_QUEUED(task)) {
                        struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
 
-                       spin_lock_bh(&queue->lock);
+                       /* Note: we're already in a bh-safe context */
+                       spin_lock(&queue->lock);
                        __rpc_do_wake_up_task(task);
-                       spin_unlock_bh(&queue->lock);
+                       spin_unlock(&queue->lock);
                }
                rpc_finish_wakeup(task);
        }
+       rcu_read_unlock_bh();
 }
 
 /*
@@ -499,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
        struct rpc_task *task = NULL;
 
        dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
-       spin_lock_bh(&queue->lock);
+       rcu_read_lock_bh();
+       spin_lock(&queue->lock);
        if (RPC_IS_PRIORITY(queue))
                task = __rpc_wake_up_next_priority(queue);
        else {
                task_for_first(task, &queue->tasks[0])
                        __rpc_wake_up_task(task);
        }
-       spin_unlock_bh(&queue->lock);
+       spin_unlock(&queue->lock);
+       rcu_read_unlock_bh();
 
        return task;
 }
@@ -522,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
        struct rpc_task *task, *next;
        struct list_head *head;
 
-       spin_lock_bh(&queue->lock);
+       rcu_read_lock_bh();
+       spin_lock(&queue->lock);
        head = &queue->tasks[queue->maxpriority];
        for (;;) {
                list_for_each_entry_safe(task, next, head, u.tk_wait.list)
@@ -531,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
                        break;
                head--;
        }
-       spin_unlock_bh(&queue->lock);
+       spin_unlock(&queue->lock);
+       rcu_read_unlock_bh();
 }
 
 /**
@@ -546,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
        struct rpc_task *task, *next;
        struct list_head *head;
 
-       spin_lock_bh(&queue->lock);
+       rcu_read_lock_bh();
+       spin_lock(&queue->lock);
        head = &queue->tasks[queue->maxpriority];
        for (;;) {
                list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
@@ -557,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
                        break;
                head--;
        }
-       spin_unlock_bh(&queue->lock);
+       spin_unlock(&queue->lock);
+       rcu_read_unlock_bh();
 }
 
 static void __rpc_atrun(struct rpc_task *task)
@@ -817,8 +826,9 @@ rpc_alloc_task(void)
        return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
-static void rpc_free_task(struct rpc_task *task)
+static void rpc_free_task(struct rcu_head *rcu)
 {
+       struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu);
        dprintk("RPC: %4d freeing task\n", task->tk_pid);
        mempool_free(task, rpc_task_mempool);
 }
@@ -872,7 +882,7 @@ void rpc_put_task(struct rpc_task *task)
                task->tk_client = NULL;
        }
        if (task->tk_flags & RPC_TASK_DYNAMIC)
-               rpc_free_task(task);
+               call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
        if (tk_ops->rpc_release)
                tk_ops->rpc_release(calldata);
 }