[PATCH] RCU signal handling
Ingo Molnar [Sun, 8 Jan 2006 09:01:37 +0000 (01:01 -0800)]
RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked
instead of tasklist_lock read-locked.  This is a scalability improvement on
SMP and a preemption-latency improvement under PREEMPT_RCU.

Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: William Irwin <wli@holomorphy.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

fs/exec.c
include/linux/sched.h
kernel/exit.c
kernel/fork.c
kernel/pid.c
kernel/rcupdate.c
kernel/sched.c
kernel/signal.c

index e75a954..e9650cd 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ no_thread_group:
                spin_lock(&oldsighand->siglock);
                spin_lock(&newsighand->siglock);
 
-               current->sighand = newsighand;
+               rcu_assign_pointer(current->sighand, newsighand);
                recalc_sigpending();
 
                spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
                write_unlock_irq(&tasklist_lock);
 
                if (atomic_dec_and_test(&oldsighand->count))
-                       kmem_cache_free(sighand_cachep, oldsighand);
+                       sighand_free(oldsighand);
        }
 
        BUG_ON(!thread_group_leader(current));
index a746620..a6af77e 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/percpu.h>
 #include <linux/topology.h>
 #include <linux/seccomp.h>
+#include <linux/rcupdate.h>
 
 #include <linux/auxvec.h>      /* For AT_VECTOR_SIZE */
 
@@ -350,8 +351,16 @@ struct sighand_struct {
        atomic_t                count;
        struct k_sigaction      action[_NSIG];
        spinlock_t              siglock;
+       struct rcu_head         rcu;
 };
 
+extern void sighand_free_cb(struct rcu_head *rhp);
+
+static inline void sighand_free(struct sighand_struct *sp)
+{
+       call_rcu(&sp->rcu, sighand_free_cb);
+}
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -844,6 +853,7 @@ struct task_struct {
        int cpuset_mems_generation;
 #endif
        atomic_t fs_excl;       /* holding fs exclusive resources */
+       struct rcu_head rcu;
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -867,8 +877,26 @@ static inline int pid_alive(struct task_struct *p)
 extern void free_task(struct task_struct *tsk);
 extern void __put_task_struct(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
-#define put_task_struct(tsk) \
-do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
+
+static inline int get_task_struct_rcu(struct task_struct *t)
+{
+       int oldusage;
+
+       do {
+               oldusage = atomic_read(&t->usage);
+               if (oldusage == 0)
+                       return 0;
+       } while (cmpxchg(&t->usage.counter, oldusage, oldusage+1) != oldusage);
+       return 1;
+}
+
+extern void __put_task_struct_cb(struct rcu_head *rhp);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+       if (atomic_dec_and_test(&t->usage))
+               call_rcu(&t->rcu, __put_task_struct_cb);
+}
 
 /*
  * Per process flags
index ee51568..c73a7eb 100644 (file)
@@ -72,7 +72,6 @@ repeat:
                __ptrace_unlink(p);
        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
        __exit_signal(p);
-       __exit_sighand(p);
        /*
         * Note that the fastpath in sys_times depends on __exit_signal having
         * updated the counters before a task is removed from the tasklist of
index fb8572a..7fe3adf 100644 (file)
@@ -743,6 +743,14 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
+void sighand_free_cb(struct rcu_head *rhp)
+{
+       struct sighand_struct *sp;
+
+       sp = container_of(rhp, struct sighand_struct, rcu);
+       kmem_cache_free(sighand_cachep, sp);
+}
+
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
        struct sighand_struct *sig;
@@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
                return 0;
        }
        sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-       tsk->sighand = sig;
+       rcu_assign_pointer(tsk->sighand, sig);
        if (!sig)
                return -ENOMEM;
        spin_lock_init(&sig->siglock);
index edba31c..1acc072 100644 (file)
@@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
        struct hlist_node *elem;
        struct pid *pid;
 
-       hlist_for_each_entry(pid, elem,
+       hlist_for_each_entry_rcu(pid, elem,
                        &pid_hash[type][pid_hashfn(nr)], pid_chain) {
                if (pid->nr == nr)
                        return pid;
@@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 
        task_pid = &task->pids[type];
        pid = find_pid(type, nr);
+       task_pid->nr = nr;
        if (pid == NULL) {
-               hlist_add_head(&task_pid->pid_chain,
-                               &pid_hash[type][pid_hashfn(nr)]);
                INIT_LIST_HEAD(&task_pid->pid_list);
+               hlist_add_head_rcu(&task_pid->pid_chain,
+                                  &pid_hash[type][pid_hashfn(nr)]);
        } else {
                INIT_HLIST_NODE(&task_pid->pid_chain);
-               list_add_tail(&task_pid->pid_list, &pid->pid_list);
+               list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
        }
-       task_pid->nr = nr;
 
        return 0;
 }
@@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type)
 
        pid = &task->pids[type];
        if (!hlist_unhashed(&pid->pid_chain)) {
-               hlist_del(&pid->pid_chain);
 
-               if (list_empty(&pid->pid_list))
+               if (list_empty(&pid->pid_list)) {
                        nr = pid->nr;
-               else {
+                       hlist_del_rcu(&pid->pid_chain);
+               } else {
                        pid_next = list_entry(pid->pid_list.next,
                                                struct pid, pid_list);
                        /* insert next pid from pid_list to hash */
-                       hlist_add_head(&pid_next->pid_chain,
-                               &pid_hash[type][pid_hashfn(pid_next->nr)]);
+                       hlist_replace_rcu(&pid->pid_chain,
+                                         &pid_next->pid_chain);
                }
        }
 
-       list_del(&pid->pid_list);
+       list_del_rcu(&pid->pid_list);
        pid->nr = 0;
 
        return nr;
index c9afc61..0a669bd 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
+#include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <asm/atomic.h>
index 6f46c94..9273309 100644 (file)
@@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)      \
                                < (long long) (sd)->cache_hot_time)
 
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+       __put_task_struct(container_of(rhp, struct task_struct, rcu));
+}
+
+EXPORT_SYMBOL_GPL(__put_task_struct_cb);
+
 /*
  * These are the runqueue data structures:
  */
index d7611f1..64737c7 100644 (file)
@@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk)
        /* Ok, we're done with the signal handlers */
        tsk->sighand = NULL;
        if (atomic_dec_and_test(&sighand->count))
-               kmem_cache_free(sighand_cachep, sighand);
+               sighand_free(sighand);
 }
 
 void exit_sighand(struct task_struct *tsk)
 {
        write_lock_irq(&tasklist_lock);
-       __exit_sighand(tsk);
+       rcu_read_lock();
+       if (tsk->sighand != NULL) {
+               struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
+               spin_lock(&sighand->siglock);
+               __exit_sighand(tsk);
+               spin_unlock(&sighand->siglock);
+       }
+       rcu_read_unlock();
        write_unlock_irq(&tasklist_lock);
 }
 
@@ -345,12 +352,14 @@ void exit_sighand(struct task_struct *tsk)
 void __exit_signal(struct task_struct *tsk)
 {
        struct signal_struct * sig = tsk->signal;
-       struct sighand_struct * sighand = tsk->sighand;
+       struct sighand_struct * sighand;
 
        if (!sig)
                BUG();
        if (!atomic_read(&sig->count))
                BUG();
+       rcu_read_lock();
+       sighand = rcu_dereference(tsk->sighand);
        spin_lock(&sighand->siglock);
        posix_cpu_timers_exit(tsk);
        if (atomic_dec_and_test(&sig->count)) {
@@ -358,6 +367,7 @@ void __exit_signal(struct task_struct *tsk)
                if (tsk == sig->curr_target)
                        sig->curr_target = next_thread(tsk);
                tsk->signal = NULL;
+               __exit_sighand(tsk);
                spin_unlock(&sighand->siglock);
                flush_sigqueue(&sig->shared_pending);
        } else {
@@ -389,9 +399,11 @@ void __exit_signal(struct task_struct *tsk)
                sig->nvcsw += tsk->nvcsw;
                sig->nivcsw += tsk->nivcsw;
                sig->sched_time += tsk->sched_time;
+               __exit_sighand(tsk);
                spin_unlock(&sighand->siglock);
                sig = NULL;     /* Marker for below.  */
        }
+       rcu_read_unlock();
        clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
        flush_sigqueue(&tsk->pending);
        if (sig) {
@@ -1080,18 +1092,28 @@ void zap_other_threads(struct task_struct *p)
 }
 
 /*
- * Must be called with the tasklist_lock held for reading!
+ * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
        unsigned long flags;
+       struct sighand_struct *sp;
        int ret;
 
+retry:
        ret = check_kill_permission(sig, info, p);
-       if (!ret && sig && p->sighand) {
-               spin_lock_irqsave(&p->sighand->siglock, flags);
+       if (!ret && sig && (sp = p->sighand)) {
+               if (!get_task_struct_rcu(p))
+                       return -ESRCH;
+               spin_lock_irqsave(&sp->siglock, flags);
+               if (p->sighand != sp) {
+                       spin_unlock_irqrestore(&sp->siglock, flags);
+                       put_task_struct(p);
+                       goto retry;
+               }
                ret = __group_send_sig_info(sig, info, p);
-               spin_unlock_irqrestore(&p->sighand->siglock, flags);
+               spin_unlock_irqrestore(&sp->siglock, flags);
+               put_task_struct(p);
        }
 
        return ret;
@@ -1136,14 +1158,21 @@ int
 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
        int error;
+       int acquired_tasklist_lock = 0;
        struct task_struct *p;
 
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
+       if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
+               read_lock(&tasklist_lock);
+               acquired_tasklist_lock = 1;
+       }
        p = find_task_by_pid(pid);
        error = -ESRCH;
        if (p)
                error = group_send_sig_info(sig, info, p);
-       read_unlock(&tasklist_lock);
+       if (unlikely(acquired_tasklist_lock))
+               read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        return error;
 }
 
@@ -1355,16 +1384,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 {
        unsigned long flags;
        int ret = 0;
+       struct sighand_struct *sh;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
-       read_lock(&tasklist_lock);
+
+       /*
+        * The rcu based delayed sighand destroy makes it possible to
+        * run this without tasklist lock held. The task struct itself
+        * cannot go away as create_timer did get_task_struct().
+        *
+        * We return -1, when the task is marked exiting, so
+        * posix_timer_event can redirect it to the group leader
+        */
+       rcu_read_lock();
 
        if (unlikely(p->flags & PF_EXITING)) {
                ret = -1;
                goto out_err;
        }
 
-       spin_lock_irqsave(&p->sighand->siglock, flags);
+retry:
+       sh = rcu_dereference(p->sighand);
+
+       spin_lock_irqsave(&sh->siglock, flags);
+       if (p->sighand != sh) {
+               /* We raced with exec() in a multithreaded process... */
+               spin_unlock_irqrestore(&sh->siglock, flags);
+               goto retry;
+       }
+
+       /*
+        * We do the check here again to handle the following scenario:
+        *
+        * CPU 0                CPU 1
+        * send_sigqueue
+        * check PF_EXITING
+        * interrupt            exit code running
+        *                      __exit_signal
+        *                      lock sighand->siglock
+        *                      unlock sighand->siglock
+        * lock sh->siglock
+        * add(tsk->pending)    flush_sigqueue(tsk->pending)
+        *
+        */
+
+       if (unlikely(p->flags & PF_EXITING)) {
+               ret = -1;
+               goto out;
+       }
 
        if (unlikely(!list_empty(&q->list))) {
                /*
@@ -1388,9 +1455,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
                signal_wake_up(p, sig == SIGKILL);
 
 out:
-       spin_unlock_irqrestore(&p->sighand->siglock, flags);
+       spin_unlock_irqrestore(&sh->siglock, flags);
 out_err:
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
 
        return ret;
 }
@@ -1402,7 +1469,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
        int ret = 0;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
+
        read_lock(&tasklist_lock);
+       /* Since it_lock is held, p->sighand cannot be NULL. */
        spin_lock_irqsave(&p->sighand->siglock, flags);
        handle_stop_signal(sig, p);
 
@@ -1436,7 +1505,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 out:
        spin_unlock_irqrestore(&p->sighand->siglock, flags);
        read_unlock(&tasklist_lock);
-       return(ret);
+       return ret;
 }
 
 /*