Merge branch 'master' into next
[linux-3.10.git] / kernel / signal.c
1 /*
2  *  linux/kernel/signal.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
7  *
8  *  2003-06-02  Jim Houston - Concurrent Computer Corp.
9  *              Changes to use preallocated sigqueue structures
10  *              to allow signals to be sent reliably.
11  */
12
13 #include <linux/slab.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/sched.h>
17 #include <linux/fs.h>
18 #include <linux/tty.h>
19 #include <linux/binfmts.h>
20 #include <linux/security.h>
21 #include <linux/syscalls.h>
22 #include <linux/ptrace.h>
23 #include <linux/signal.h>
24 #include <linux/signalfd.h>
25 #include <linux/tracehook.h>
26 #include <linux/capability.h>
27 #include <linux/freezer.h>
28 #include <linux/pid_namespace.h>
29 #include <linux/nsproxy.h>
30 #include <trace/sched.h>
31
32 #include <asm/param.h>
33 #include <asm/uaccess.h>
34 #include <asm/unistd.h>
35 #include <asm/siginfo.h>
36 #include "audit.h"      /* audit_signal_info() */
37
38 /*
39  * SLAB caches for signal bits.
40  */
41
42 static struct kmem_cache *sigqueue_cachep;
43
44 DEFINE_TRACE(sched_signal_send);
45
46 static void __user *sig_handler(struct task_struct *t, int sig)
47 {
48         return t->sighand->action[sig - 1].sa.sa_handler;
49 }
50
51 static int sig_handler_ignored(void __user *handler, int sig)
52 {
53         /* Is it explicitly or implicitly ignored? */
54         return handler == SIG_IGN ||
55                 (handler == SIG_DFL && sig_kernel_ignore(sig));
56 }
57
58 static int sig_task_ignored(struct task_struct *t, int sig,
59                 int from_ancestor_ns)
60 {
61         void __user *handler;
62
63         handler = sig_handler(t, sig);
64
65         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
66                         handler == SIG_DFL && !from_ancestor_ns)
67                 return 1;
68
69         return sig_handler_ignored(handler, sig);
70 }
71
72 static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
73 {
74         /*
75          * Blocked signals are never ignored, since the
76          * signal handler may change by the time it is
77          * unblocked.
78          */
79         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
80                 return 0;
81
82         if (!sig_task_ignored(t, sig, from_ancestor_ns))
83                 return 0;
84
85         /*
86          * Tracers may want to know about even ignored signals.
87          */
88         return !tracehook_consider_ignored_signal(t, sig);
89 }
90
91 /*
92  * Re-calculate pending state from the set of locally pending
93  * signals, globally pending signals, and blocked signals.
94  */
95 static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
96 {
97         unsigned long ready;
98         long i;
99
100         switch (_NSIG_WORDS) {
101         default:
102                 for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
103                         ready |= signal->sig[i] &~ blocked->sig[i];
104                 break;
105
106         case 4: ready  = signal->sig[3] &~ blocked->sig[3];
107                 ready |= signal->sig[2] &~ blocked->sig[2];
108                 ready |= signal->sig[1] &~ blocked->sig[1];
109                 ready |= signal->sig[0] &~ blocked->sig[0];
110                 break;
111
112         case 2: ready  = signal->sig[1] &~ blocked->sig[1];
113                 ready |= signal->sig[0] &~ blocked->sig[0];
114                 break;
115
116         case 1: ready  = signal->sig[0] &~ blocked->sig[0];
117         }
118         return ready != 0;
119 }
120
121 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
122
123 static int recalc_sigpending_tsk(struct task_struct *t)
124 {
125         if (t->signal->group_stop_count > 0 ||
126             PENDING(&t->pending, &t->blocked) ||
127             PENDING(&t->signal->shared_pending, &t->blocked)) {
128                 set_tsk_thread_flag(t, TIF_SIGPENDING);
129                 return 1;
130         }
131         /*
132          * We must never clear the flag in another thread, or in current
133          * when it's possible the current syscall is returning -ERESTART*.
134          * So we don't clear it here, and only callers who know they should do.
135          */
136         return 0;
137 }
138
139 /*
140  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
141  * This is superfluous when called on current, the wakeup is a harmless no-op.
142  */
143 void recalc_sigpending_and_wake(struct task_struct *t)
144 {
145         if (recalc_sigpending_tsk(t))
146                 signal_wake_up(t, 0);
147 }
148
149 void recalc_sigpending(void)
150 {
151         if (unlikely(tracehook_force_sigpending()))
152                 set_thread_flag(TIF_SIGPENDING);
153         else if (!recalc_sigpending_tsk(current) && !freezing(current))
154                 clear_thread_flag(TIF_SIGPENDING);
155
156 }
157
158 /* Given the mask, find the first available signal that should be serviced. */
159
160 int next_signal(struct sigpending *pending, sigset_t *mask)
161 {
162         unsigned long i, *s, *m, x;
163         int sig = 0;
164         
165         s = pending->signal.sig;
166         m = mask->sig;
167         switch (_NSIG_WORDS) {
168         default:
169                 for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
170                         if ((x = *s &~ *m) != 0) {
171                                 sig = ffz(~x) + i*_NSIG_BPW + 1;
172                                 break;
173                         }
174                 break;
175
176         case 2: if ((x = s[0] &~ m[0]) != 0)
177                         sig = 1;
178                 else if ((x = s[1] &~ m[1]) != 0)
179                         sig = _NSIG_BPW + 1;
180                 else
181                         break;
182                 sig += ffz(~x);
183                 break;
184
185         case 1: if ((x = *s &~ *m) != 0)
186                         sig = ffz(~x) + 1;
187                 break;
188         }
189         
190         return sig;
191 }
192
193 /*
194  * allocate a new signal queue record
195  * - this may be called without locks if and only if t == current, otherwise an
196  *   appopriate lock must be held to stop the target task from exiting
197  */
198 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
199                                          int override_rlimit)
200 {
201         struct sigqueue *q = NULL;
202         struct user_struct *user;
203
204         /*
205          * We won't get problems with the target's UID changing under us
206          * because changing it requires RCU be used, and if t != current, the
207          * caller must be holding the RCU readlock (by way of a spinlock) and
208          * we use RCU protection here
209          */
210         user = get_uid(__task_cred(t)->user);
211         atomic_inc(&user->sigpending);
212         if (override_rlimit ||
213             atomic_read(&user->sigpending) <=
214                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
215                 q = kmem_cache_alloc(sigqueue_cachep, flags);
216         if (unlikely(q == NULL)) {
217                 atomic_dec(&user->sigpending);
218                 free_uid(user);
219         } else {
220                 INIT_LIST_HEAD(&q->list);
221                 q->flags = 0;
222                 q->user = user;
223         }
224
225         return q;
226 }
227
228 static void __sigqueue_free(struct sigqueue *q)
229 {
230         if (q->flags & SIGQUEUE_PREALLOC)
231                 return;
232         atomic_dec(&q->user->sigpending);
233         free_uid(q->user);
234         kmem_cache_free(sigqueue_cachep, q);
235 }
236
237 void flush_sigqueue(struct sigpending *queue)
238 {
239         struct sigqueue *q;
240
241         sigemptyset(&queue->signal);
242         while (!list_empty(&queue->list)) {
243                 q = list_entry(queue->list.next, struct sigqueue , list);
244                 list_del_init(&q->list);
245                 __sigqueue_free(q);
246         }
247 }
248
249 /*
250  * Flush all pending signals for a task.
251  */
252 void __flush_signals(struct task_struct *t)
253 {
254         clear_tsk_thread_flag(t, TIF_SIGPENDING);
255         flush_sigqueue(&t->pending);
256         flush_sigqueue(&t->signal->shared_pending);
257 }
258
259 void flush_signals(struct task_struct *t)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&t->sighand->siglock, flags);
264         __flush_signals(t);
265         spin_unlock_irqrestore(&t->sighand->siglock, flags);
266 }
267
268 static void __flush_itimer_signals(struct sigpending *pending)
269 {
270         sigset_t signal, retain;
271         struct sigqueue *q, *n;
272
273         signal = pending->signal;
274         sigemptyset(&retain);
275
276         list_for_each_entry_safe(q, n, &pending->list, list) {
277                 int sig = q->info.si_signo;
278
279                 if (likely(q->info.si_code != SI_TIMER)) {
280                         sigaddset(&retain, sig);
281                 } else {
282                         sigdelset(&signal, sig);
283                         list_del_init(&q->list);
284                         __sigqueue_free(q);
285                 }
286         }
287
288         sigorsets(&pending->signal, &signal, &retain);
289 }
290
291 void flush_itimer_signals(void)
292 {
293         struct task_struct *tsk = current;
294         unsigned long flags;
295
296         spin_lock_irqsave(&tsk->sighand->siglock, flags);
297         __flush_itimer_signals(&tsk->pending);
298         __flush_itimer_signals(&tsk->signal->shared_pending);
299         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
300 }
301
302 void ignore_signals(struct task_struct *t)
303 {
304         int i;
305
306         for (i = 0; i < _NSIG; ++i)
307                 t->sighand->action[i].sa.sa_handler = SIG_IGN;
308
309         flush_signals(t);
310 }
311
312 /*
313  * Flush all handlers for a task.
314  */
315
316 void
317 flush_signal_handlers(struct task_struct *t, int force_default)
318 {
319         int i;
320         struct k_sigaction *ka = &t->sighand->action[0];
321         for (i = _NSIG ; i != 0 ; i--) {
322                 if (force_default || ka->sa.sa_handler != SIG_IGN)
323                         ka->sa.sa_handler = SIG_DFL;
324                 ka->sa.sa_flags = 0;
325                 sigemptyset(&ka->sa.sa_mask);
326                 ka++;
327         }
328 }
329
330 int unhandled_signal(struct task_struct *tsk, int sig)
331 {
332         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
333         if (is_global_init(tsk))
334                 return 1;
335         if (handler != SIG_IGN && handler != SIG_DFL)
336                 return 0;
337         return !tracehook_consider_fatal_signal(tsk, sig);
338 }
339
340
341 /* Notify the system that a driver wants to block all signals for this
342  * process, and wants to be notified if any signals at all were to be
343  * sent/acted upon.  If the notifier routine returns non-zero, then the
344  * signal will be acted upon after all.  If the notifier routine returns 0,
345  * then then signal will be blocked.  Only one block per process is
346  * allowed.  priv is a pointer to private data that the notifier routine
347  * can use to determine if the signal should be blocked or not.  */
348
349 void
350 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
351 {
352         unsigned long flags;
353
354         spin_lock_irqsave(&current->sighand->siglock, flags);
355         current->notifier_mask = mask;
356         current->notifier_data = priv;
357         current->notifier = notifier;
358         spin_unlock_irqrestore(&current->sighand->siglock, flags);
359 }
360
361 /* Notify the system that blocking has ended. */
362
363 void
364 unblock_all_signals(void)
365 {
366         unsigned long flags;
367
368         spin_lock_irqsave(&current->sighand->siglock, flags);
369         current->notifier = NULL;
370         current->notifier_data = NULL;
371         recalc_sigpending();
372         spin_unlock_irqrestore(&current->sighand->siglock, flags);
373 }
374
375 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
376 {
377         struct sigqueue *q, *first = NULL;
378
379         /*
380          * Collect the siginfo appropriate to this signal.  Check if
381          * there is another siginfo for the same signal.
382         */
383         list_for_each_entry(q, &list->list, list) {
384                 if (q->info.si_signo == sig) {
385                         if (first)
386                                 goto still_pending;
387                         first = q;
388                 }
389         }
390
391         sigdelset(&list->signal, sig);
392
393         if (first) {
394 still_pending:
395                 list_del_init(&first->list);
396                 copy_siginfo(info, &first->info);
397                 __sigqueue_free(first);
398         } else {
399                 /* Ok, it wasn't in the queue.  This must be
400                    a fast-pathed signal or we must have been
401                    out of queue space.  So zero out the info.
402                  */
403                 info->si_signo = sig;
404                 info->si_errno = 0;
405                 info->si_code = 0;
406                 info->si_pid = 0;
407                 info->si_uid = 0;
408         }
409 }
410
411 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
412                         siginfo_t *info)
413 {
414         int sig = next_signal(pending, mask);
415
416         if (sig) {
417                 if (current->notifier) {
418                         if (sigismember(current->notifier_mask, sig)) {
419                                 if (!(current->notifier)(current->notifier_data)) {
420                                         clear_thread_flag(TIF_SIGPENDING);
421                                         return 0;
422                                 }
423                         }
424                 }
425
426                 collect_signal(sig, pending, info);
427         }
428
429         return sig;
430 }
431
432 /*
433  * Dequeue a signal and return the element to the caller, which is 
434  * expected to free it.
435  *
436  * All callers have to hold the siglock.
437  */
438 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
439 {
440         int signr;
441
442         /* We only dequeue private signals from ourselves, we don't let
443          * signalfd steal them
444          */
445         signr = __dequeue_signal(&tsk->pending, mask, info);
446         if (!signr) {
447                 signr = __dequeue_signal(&tsk->signal->shared_pending,
448                                          mask, info);
449                 /*
450                  * itimer signal ?
451                  *
452                  * itimers are process shared and we restart periodic
453                  * itimers in the signal delivery path to prevent DoS
454                  * attacks in the high resolution timer case. This is
455                  * compliant with the old way of self restarting
456                  * itimers, as the SIGALRM is a legacy signal and only
457                  * queued once. Changing the restart behaviour to
458                  * restart the timer in the signal dequeue path is
459                  * reducing the timer noise on heavy loaded !highres
460                  * systems too.
461                  */
462                 if (unlikely(signr == SIGALRM)) {
463                         struct hrtimer *tmr = &tsk->signal->real_timer;
464
465                         if (!hrtimer_is_queued(tmr) &&
466                             tsk->signal->it_real_incr.tv64 != 0) {
467                                 hrtimer_forward(tmr, tmr->base->get_time(),
468                                                 tsk->signal->it_real_incr);
469                                 hrtimer_restart(tmr);
470                         }
471                 }
472         }
473
474         recalc_sigpending();
475         if (!signr)
476                 return 0;
477
478         if (unlikely(sig_kernel_stop(signr))) {
479                 /*
480                  * Set a marker that we have dequeued a stop signal.  Our
481                  * caller might release the siglock and then the pending
482                  * stop signal it is about to process is no longer in the
483                  * pending bitmasks, but must still be cleared by a SIGCONT
484                  * (and overruled by a SIGKILL).  So those cases clear this
485                  * shared flag after we've set it.  Note that this flag may
486                  * remain set after the signal we return is ignored or
487                  * handled.  That doesn't matter because its only purpose
488                  * is to alert stop-signal processing code when another
489                  * processor has come along and cleared the flag.
490                  */
491                 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
492         }
493         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
494                 /*
495                  * Release the siglock to ensure proper locking order
496                  * of timer locks outside of siglocks.  Note, we leave
497                  * irqs disabled here, since the posix-timers code is
498                  * about to disable them again anyway.
499                  */
500                 spin_unlock(&tsk->sighand->siglock);
501                 do_schedule_next_timer(info);
502                 spin_lock(&tsk->sighand->siglock);
503         }
504         return signr;
505 }
506
507 /*
508  * Tell a process that it has a new active signal..
509  *
510  * NOTE! we rely on the previous spin_lock to
511  * lock interrupts for us! We can only be called with
512  * "siglock" held, and the local interrupt must
513  * have been disabled when that got acquired!
514  *
515  * No need to set need_resched since signal event passing
516  * goes through ->blocked
517  */
518 void signal_wake_up(struct task_struct *t, int resume)
519 {
520         unsigned int mask;
521
522         set_tsk_thread_flag(t, TIF_SIGPENDING);
523
524         /*
525          * For SIGKILL, we want to wake it up in the stopped/traced/killable
526          * case. We don't check t->state here because there is a race with it
527          * executing another processor and just now entering stopped state.
528          * By using wake_up_state, we ensure the process will wake up and
529          * handle its death signal.
530          */
531         mask = TASK_INTERRUPTIBLE;
532         if (resume)
533                 mask |= TASK_WAKEKILL;
534         if (!wake_up_state(t, mask))
535                 kick_process(t);
536 }
537
538 /*
539  * Remove signals in mask from the pending set and queue.
540  * Returns 1 if any signals were found.
541  *
542  * All callers must be holding the siglock.
543  *
544  * This version takes a sigset mask and looks at all signals,
545  * not just those in the first mask word.
546  */
547 static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
548 {
549         struct sigqueue *q, *n;
550         sigset_t m;
551
552         sigandsets(&m, mask, &s->signal);
553         if (sigisemptyset(&m))
554                 return 0;
555
556         signandsets(&s->signal, &s->signal, mask);
557         list_for_each_entry_safe(q, n, &s->list, list) {
558                 if (sigismember(mask, q->info.si_signo)) {
559                         list_del_init(&q->list);
560                         __sigqueue_free(q);
561                 }
562         }
563         return 1;
564 }
565 /*
566  * Remove signals in mask from the pending set and queue.
567  * Returns 1 if any signals were found.
568  *
569  * All callers must be holding the siglock.
570  */
571 static int rm_from_queue(unsigned long mask, struct sigpending *s)
572 {
573         struct sigqueue *q, *n;
574
575         if (!sigtestsetmask(&s->signal, mask))
576                 return 0;
577
578         sigdelsetmask(&s->signal, mask);
579         list_for_each_entry_safe(q, n, &s->list, list) {
580                 if (q->info.si_signo < SIGRTMIN &&
581                     (mask & sigmask(q->info.si_signo))) {
582                         list_del_init(&q->list);
583                         __sigqueue_free(q);
584                 }
585         }
586         return 1;
587 }
588
589 /*
590  * Bad permissions for sending the signal
591  * - the caller must hold at least the RCU read lock
592  */
593 static int check_kill_permission(int sig, struct siginfo *info,
594                                  struct task_struct *t)
595 {
596         const struct cred *cred = current_cred(), *tcred;
597         struct pid *sid;
598         int error;
599
600         if (!valid_signal(sig))
601                 return -EINVAL;
602
603         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
604                 return 0;
605
606         error = audit_signal_info(sig, t); /* Let audit system see the signal */
607         if (error)
608                 return error;
609
610         tcred = __task_cred(t);
611         if ((cred->euid ^ tcred->suid) &&
612             (cred->euid ^ tcred->uid) &&
613             (cred->uid  ^ tcred->suid) &&
614             (cred->uid  ^ tcred->uid) &&
615             !capable(CAP_KILL)) {
616                 switch (sig) {
617                 case SIGCONT:
618                         sid = task_session(t);
619                         /*
620                          * We don't return the error if sid == NULL. The
621                          * task was unhashed, the caller must notice this.
622                          */
623                         if (!sid || sid == task_session(current))
624                                 break;
625                 default:
626                         return -EPERM;
627                 }
628         }
629
630         return security_task_kill(t, info, sig, 0);
631 }
632
633 /*
634  * Handle magic process-wide effects of stop/continue signals. Unlike
635  * the signal actions, these happen immediately at signal-generation
636  * time regardless of blocking, ignoring, or handling.  This does the
637  * actual continuing for SIGCONT, but not the actual stopping for stop
638  * signals. The process stop is done as a signal action for SIG_DFL.
639  *
640  * Returns true if the signal should be actually delivered, otherwise
641  * it should be dropped.
642  */
643 static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
644 {
645         struct signal_struct *signal = p->signal;
646         struct task_struct *t;
647
648         if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
649                 /*
650                  * The process is in the middle of dying, nothing to do.
651                  */
652         } else if (sig_kernel_stop(sig)) {
653                 /*
654                  * This is a stop signal.  Remove SIGCONT from all queues.
655                  */
656                 rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
657                 t = p;
658                 do {
659                         rm_from_queue(sigmask(SIGCONT), &t->pending);
660                 } while_each_thread(p, t);
661         } else if (sig == SIGCONT) {
662                 unsigned int why;
663                 /*
664                  * Remove all stop signals from all queues,
665                  * and wake all threads.
666                  */
667                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
668                 t = p;
669                 do {
670                         unsigned int state;
671                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
672                         /*
673                          * If there is a handler for SIGCONT, we must make
674                          * sure that no thread returns to user mode before
675                          * we post the signal, in case it was the only
676                          * thread eligible to run the signal handler--then
677                          * it must not do anything between resuming and
678                          * running the handler.  With the TIF_SIGPENDING
679                          * flag set, the thread will pause and acquire the
680                          * siglock that we hold now and until we've queued
681                          * the pending signal.
682                          *
683                          * Wake up the stopped thread _after_ setting
684                          * TIF_SIGPENDING
685                          */
686                         state = __TASK_STOPPED;
687                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
688                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
689                                 state |= TASK_INTERRUPTIBLE;
690                         }
691                         wake_up_state(t, state);
692                 } while_each_thread(p, t);
693
694                 /*
695                  * Notify the parent with CLD_CONTINUED if we were stopped.
696                  *
697                  * If we were in the middle of a group stop, we pretend it
698                  * was already finished, and then continued. Since SIGCHLD
699                  * doesn't queue we report only CLD_STOPPED, as if the next
700                  * CLD_CONTINUED was dropped.
701                  */
702                 why = 0;
703                 if (signal->flags & SIGNAL_STOP_STOPPED)
704                         why |= SIGNAL_CLD_CONTINUED;
705                 else if (signal->group_stop_count)
706                         why |= SIGNAL_CLD_STOPPED;
707
708                 if (why) {
709                         /*
710                          * The first thread which returns from finish_stop()
711                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
712                          * notify its parent. See get_signal_to_deliver().
713                          */
714                         signal->flags = why | SIGNAL_STOP_CONTINUED;
715                         signal->group_stop_count = 0;
716                         signal->group_exit_code = 0;
717                 } else {
718                         /*
719                          * We are not stopped, but there could be a stop
720                          * signal in the middle of being processed after
721                          * being removed from the queue.  Clear that too.
722                          */
723                         signal->flags &= ~SIGNAL_STOP_DEQUEUED;
724                 }
725         }
726
727         return !sig_ignored(p, sig, from_ancestor_ns);
728 }
729
730 /*
731  * Test if P wants to take SIG.  After we've checked all threads with this,
732  * it's equivalent to finding no threads not blocking SIG.  Any threads not
733  * blocking SIG were ruled out because they are not running and already
734  * have pending signals.  Such threads will dequeue from the shared queue
735  * as soon as they're available, so putting the signal on the shared queue
736  * will be equivalent to sending it to one such thread.
737  */
738 static inline int wants_signal(int sig, struct task_struct *p)
739 {
740         if (sigismember(&p->blocked, sig))
741                 return 0;
742         if (p->flags & PF_EXITING)
743                 return 0;
744         if (sig == SIGKILL)
745                 return 1;
746         if (task_is_stopped_or_traced(p))
747                 return 0;
748         return task_curr(p) || !signal_pending(p);
749 }
750
751 static void complete_signal(int sig, struct task_struct *p, int group)
752 {
753         struct signal_struct *signal = p->signal;
754         struct task_struct *t;
755
756         /*
757          * Now find a thread we can wake up to take the signal off the queue.
758          *
759          * If the main thread wants the signal, it gets first crack.
760          * Probably the least surprising to the average bear.
761          */
762         if (wants_signal(sig, p))
763                 t = p;
764         else if (!group || thread_group_empty(p))
765                 /*
766                  * There is just one thread and it does not need to be woken.
767                  * It will dequeue unblocked signals before it runs again.
768                  */
769                 return;
770         else {
771                 /*
772                  * Otherwise try to find a suitable thread.
773                  */
774                 t = signal->curr_target;
775                 while (!wants_signal(sig, t)) {
776                         t = next_thread(t);
777                         if (t == signal->curr_target)
778                                 /*
779                                  * No thread needs to be woken.
780                                  * Any eligible threads will see
781                                  * the signal in the queue soon.
782                                  */
783                                 return;
784                 }
785                 signal->curr_target = t;
786         }
787
788         /*
789          * Found a killable thread.  If the signal will be fatal,
790          * then start taking the whole group down immediately.
791          */
792         if (sig_fatal(p, sig) &&
793             !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
794             !sigismember(&t->real_blocked, sig) &&
795             (sig == SIGKILL ||
796              !tracehook_consider_fatal_signal(t, sig))) {
797                 /*
798                  * This signal will be fatal to the whole group.
799                  */
800                 if (!sig_kernel_coredump(sig)) {
801                         /*
802                          * Start a group exit and wake everybody up.
803                          * This way we don't have other threads
804                          * running and doing things after a slower
805                          * thread has the fatal signal pending.
806                          */
807                         signal->flags = SIGNAL_GROUP_EXIT;
808                         signal->group_exit_code = sig;
809                         signal->group_stop_count = 0;
810                         t = p;
811                         do {
812                                 sigaddset(&t->pending.signal, SIGKILL);
813                                 signal_wake_up(t, 1);
814                         } while_each_thread(p, t);
815                         return;
816                 }
817         }
818
819         /*
820          * The signal is already in the shared-pending queue.
821          * Tell the chosen thread to wake up and dequeue it.
822          */
823         signal_wake_up(t, sig == SIGKILL);
824         return;
825 }
826
827 static inline int legacy_queue(struct sigpending *signals, int sig)
828 {
829         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
830 }
831
832 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
833                         int group, int from_ancestor_ns)
834 {
835         struct sigpending *pending;
836         struct sigqueue *q;
837
838         trace_sched_signal_send(sig, t);
839
840         assert_spin_locked(&t->sighand->siglock);
841
842         if (!prepare_signal(sig, t, from_ancestor_ns))
843                 return 0;
844
845         pending = group ? &t->signal->shared_pending : &t->pending;
846         /*
847          * Short-circuit ignored signals and support queuing
848          * exactly one non-rt signal, so that we can get more
849          * detailed information about the cause of the signal.
850          */
851         if (legacy_queue(pending, sig))
852                 return 0;
853         /*
854          * fast-pathed signals for kernel-internal things like SIGSTOP
855          * or SIGKILL.
856          */
857         if (info == SEND_SIG_FORCED)
858                 goto out_set;
859
860         /* Real-time signals must be queued if sent by sigqueue, or
861            some other real-time mechanism.  It is implementation
862            defined whether kill() does so.  We attempt to do so, on
863            the principle of least surprise, but since kill is not
864            allowed to fail with EAGAIN when low on memory we just
865            make sure at least one signal gets delivered and don't
866            pass on the info struct.  */
867
868         q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
869                                              (is_si_special(info) ||
870                                               info->si_code >= 0)));
871         if (q) {
872                 list_add_tail(&q->list, &pending->list);
873                 switch ((unsigned long) info) {
874                 case (unsigned long) SEND_SIG_NOINFO:
875                         q->info.si_signo = sig;
876                         q->info.si_errno = 0;
877                         q->info.si_code = SI_USER;
878                         q->info.si_pid = task_tgid_nr_ns(current,
879                                                         task_active_pid_ns(t));
880                         q->info.si_uid = current_uid();
881                         break;
882                 case (unsigned long) SEND_SIG_PRIV:
883                         q->info.si_signo = sig;
884                         q->info.si_errno = 0;
885                         q->info.si_code = SI_KERNEL;
886                         q->info.si_pid = 0;
887                         q->info.si_uid = 0;
888                         break;
889                 default:
890                         copy_siginfo(&q->info, info);
891                         if (from_ancestor_ns)
892                                 q->info.si_pid = 0;
893                         break;
894                 }
895         } else if (!is_si_special(info)) {
896                 if (sig >= SIGRTMIN && info->si_code != SI_USER)
897                 /*
898                  * Queue overflow, abort.  We may abort if the signal was rt
899                  * and sent by user using something other than kill().
900                  */
901                         return -EAGAIN;
902         }
903
904 out_set:
905         signalfd_notify(t, sig);
906         sigaddset(&pending->signal, sig);
907         complete_signal(sig, t, group);
908         return 0;
909 }
910
911 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
912                         int group)
913 {
914         int from_ancestor_ns = 0;
915
916 #ifdef CONFIG_PID_NS
917         if (!is_si_special(info) && SI_FROMUSER(info) &&
918                         task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
919                 from_ancestor_ns = 1;
920 #endif
921
922         return __send_signal(sig, info, t, group, from_ancestor_ns);
923 }
924
925 int print_fatal_signals;
926
927 static void print_fatal_signal(struct pt_regs *regs, int signr)
928 {
929         printk("%s/%d: potentially unexpected fatal signal %d.\n",
930                 current->comm, task_pid_nr(current), signr);
931
932 #if defined(__i386__) && !defined(__arch_um__)
933         printk("code at %08lx: ", regs->ip);
934         {
935                 int i;
936                 for (i = 0; i < 16; i++) {
937                         unsigned char insn;
938
939                         __get_user(insn, (unsigned char *)(regs->ip + i));
940                         printk("%02x ", insn);
941                 }
942         }
943 #endif
944         printk("\n");
945         preempt_disable();
946         show_regs(regs);
947         preempt_enable();
948 }
949
950 static int __init setup_print_fatal_signals(char *str)
951 {
952         get_option (&str, &print_fatal_signals);
953
954         return 1;
955 }
956
957 __setup("print-fatal-signals=", setup_print_fatal_signals);
958
959 int
960 __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
961 {
962         return send_signal(sig, info, p, 1);
963 }
964
965 static int
966 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
967 {
968         return send_signal(sig, info, t, 0);
969 }
970
971 /*
972  * Force a signal that the process can't ignore: if necessary
973  * we unblock the signal and change any SIG_IGN to SIG_DFL.
974  *
975  * Note: If we unblock the signal, we always reset it to SIG_DFL,
976  * since we do not want to have a signal handler that was blocked
977  * be invoked when user space had explicitly blocked it.
978  *
979  * We don't want to have recursive SIGSEGV's etc, for example,
980  * that is why we also clear SIGNAL_UNKILLABLE.
981  */
982 int
983 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
984 {
985         unsigned long int flags;
986         int ret, blocked, ignored;
987         struct k_sigaction *action;
988
989         spin_lock_irqsave(&t->sighand->siglock, flags);
990         action = &t->sighand->action[sig-1];
991         ignored = action->sa.sa_handler == SIG_IGN;
992         blocked = sigismember(&t->blocked, sig);
993         if (blocked || ignored) {
994                 action->sa.sa_handler = SIG_DFL;
995                 if (blocked) {
996                         sigdelset(&t->blocked, sig);
997                         recalc_sigpending_and_wake(t);
998                 }
999         }
1000         if (action->sa.sa_handler == SIG_DFL)
1001                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
1002         ret = specific_send_sig_info(sig, info, t);
1003         spin_unlock_irqrestore(&t->sighand->siglock, flags);
1004
1005         return ret;
1006 }
1007
1008 void
1009 force_sig_specific(int sig, struct task_struct *t)
1010 {
1011         force_sig_info(sig, SEND_SIG_FORCED, t);
1012 }
1013
1014 /*
1015  * Nuke all other threads in the group.
1016  */
1017 void zap_other_threads(struct task_struct *p)
1018 {
1019         struct task_struct *t;
1020
1021         p->signal->group_stop_count = 0;
1022
1023         for (t = next_thread(p); t != p; t = next_thread(t)) {
1024                 /*
1025                  * Don't bother with already dead threads
1026                  */
1027                 if (t->exit_state)
1028                         continue;
1029
1030                 /* SIGKILL will be handled before any pending SIGSTOP */
1031                 sigaddset(&t->pending.signal, SIGKILL);
1032                 signal_wake_up(t, 1);
1033         }
1034 }
1035
1036 int __fatal_signal_pending(struct task_struct *tsk)
1037 {
1038         return sigismember(&tsk->pending.signal, SIGKILL);
1039 }
1040 EXPORT_SYMBOL(__fatal_signal_pending);
1041
1042 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1043 {
1044         struct sighand_struct *sighand;
1045
1046         rcu_read_lock();
1047         for (;;) {
1048                 sighand = rcu_dereference(tsk->sighand);
1049                 if (unlikely(sighand == NULL))
1050                         break;
1051
1052                 spin_lock_irqsave(&sighand->siglock, *flags);
1053                 if (likely(sighand == tsk->sighand))
1054                         break;
1055                 spin_unlock_irqrestore(&sighand->siglock, *flags);
1056         }
1057         rcu_read_unlock();
1058
1059         return sighand;
1060 }
1061
1062 /*
1063  * send signal info to all the members of a group
1064  * - the caller must hold the RCU read lock at least
1065  */
1066 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1067 {
1068         unsigned long flags;
1069         int ret;
1070
1071         ret = check_kill_permission(sig, info, p);
1072
1073         if (!ret && sig) {
1074                 ret = -ESRCH;
1075                 if (lock_task_sighand(p, &flags)) {
1076                         ret = __group_send_sig_info(sig, info, p);
1077                         unlock_task_sighand(p, &flags);
1078                 }
1079         }
1080
1081         return ret;
1082 }
1083
1084 /*
1085  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
1086  * control characters do (^C, ^Z etc)
1087  * - the caller must hold at least a readlock on tasklist_lock
1088  */
1089 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
1090 {
1091         struct task_struct *p = NULL;
1092         int retval, success;
1093
1094         success = 0;
1095         retval = -ESRCH;
1096         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
1097                 int err = group_send_sig_info(sig, info, p);
1098                 success |= !err;
1099                 retval = err;
1100         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
1101         return success ? 0 : retval;
1102 }
1103
1104 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1105 {
1106         int error = -ESRCH;
1107         struct task_struct *p;
1108
1109         rcu_read_lock();
1110 retry:
1111         p = pid_task(pid, PIDTYPE_PID);
1112         if (p) {
1113                 error = group_send_sig_info(sig, info, p);
1114                 if (unlikely(error == -ESRCH))
1115                         /*
1116                          * The task was unhashed in between, try again.
1117                          * If it is dead, pid_task() will return NULL,
1118                          * if we race with de_thread() it will find the
1119                          * new leader.
1120                          */
1121                         goto retry;
1122         }
1123         rcu_read_unlock();
1124
1125         return error;
1126 }
1127
1128 int
1129 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1130 {
1131         int error;
1132         rcu_read_lock();
1133         error = kill_pid_info(sig, info, find_vpid(pid));
1134         rcu_read_unlock();
1135         return error;
1136 }
1137
1138 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
1139 int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1140                       uid_t uid, uid_t euid, u32 secid)
1141 {
1142         int ret = -EINVAL;
1143         struct task_struct *p;
1144         const struct cred *pcred;
1145
1146         if (!valid_signal(sig))
1147                 return ret;
1148
1149         read_lock(&tasklist_lock);
1150         p = pid_task(pid, PIDTYPE_PID);
1151         if (!p) {
1152                 ret = -ESRCH;
1153                 goto out_unlock;
1154         }
1155         pcred = __task_cred(p);
1156         if ((info == SEND_SIG_NOINFO ||
1157              (!is_si_special(info) && SI_FROMUSER(info))) &&
1158             euid != pcred->suid && euid != pcred->uid &&
1159             uid  != pcred->suid && uid  != pcred->uid) {
1160                 ret = -EPERM;
1161                 goto out_unlock;
1162         }
1163         ret = security_task_kill(p, info, sig, secid);
1164         if (ret)
1165                 goto out_unlock;
1166         if (sig && p->sighand) {
1167                 unsigned long flags;
1168                 spin_lock_irqsave(&p->sighand->siglock, flags);
1169                 ret = __send_signal(sig, info, p, 1, 0);
1170                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1171         }
1172 out_unlock:
1173         read_unlock(&tasklist_lock);
1174         return ret;
1175 }
1176 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1177
1178 /*
1179  * kill_something_info() interprets pid in interesting ways just like kill(2).
1180  *
1181  * POSIX specifies that kill(-1,sig) is unspecified, but what we have
1182  * is probably wrong.  Should make it like BSD or SYSV.
1183  */
1184
1185 static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1186 {
1187         int ret;
1188
1189         if (pid > 0) {
1190                 rcu_read_lock();
1191                 ret = kill_pid_info(sig, info, find_vpid(pid));
1192                 rcu_read_unlock();
1193                 return ret;
1194         }
1195
1196         read_lock(&tasklist_lock);
1197         if (pid != -1) {
1198                 ret = __kill_pgrp_info(sig, info,
1199                                 pid ? find_vpid(-pid) : task_pgrp(current));
1200         } else {
1201                 int retval = 0, count = 0;
1202                 struct task_struct * p;
1203
1204                 for_each_process(p) {
1205                         if (task_pid_vnr(p) > 1 &&
1206                                         !same_thread_group(p, current)) {
1207                                 int err = group_send_sig_info(sig, info, p);
1208                                 ++count;
1209                                 if (err != -EPERM)
1210                                         retval = err;
1211                         }
1212                 }
1213                 ret = count ? retval : -ESRCH;
1214         }
1215         read_unlock(&tasklist_lock);
1216
1217         return ret;
1218 }
1219
1220 /*
1221  * These are for backward compatibility with the rest of the kernel source.
1222  */
1223
1224 /*
1225  * The caller must ensure the task can't exit.
1226  */
1227 int
1228 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1229 {
1230         int ret;
1231         unsigned long flags;
1232
1233         /*
1234          * Make sure legacy kernel users don't send in bad values
1235          * (normal paths check this in check_kill_permission).
1236          */
1237         if (!valid_signal(sig))
1238                 return -EINVAL;
1239
1240         spin_lock_irqsave(&p->sighand->siglock, flags);
1241         ret = specific_send_sig_info(sig, info, p);
1242         spin_unlock_irqrestore(&p->sighand->siglock, flags);
1243         return ret;
1244 }
1245
1246 #define __si_special(priv) \
1247         ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
1248
1249 int
1250 send_sig(int sig, struct task_struct *p, int priv)
1251 {
1252         return send_sig_info(sig, __si_special(priv), p);
1253 }
1254
1255 void
1256 force_sig(int sig, struct task_struct *p)
1257 {
1258         force_sig_info(sig, SEND_SIG_PRIV, p);
1259 }
1260
1261 /*
1262  * When things go south during signal handling, we
1263  * will force a SIGSEGV. And if the signal that caused
1264  * the problem was already a SIGSEGV, we'll want to
1265  * make sure we don't even try to deliver the signal..
1266  */
1267 int
1268 force_sigsegv(int sig, struct task_struct *p)
1269 {
1270         if (sig == SIGSEGV) {
1271                 unsigned long flags;
1272                 spin_lock_irqsave(&p->sighand->siglock, flags);
1273                 p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
1274                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1275         }
1276         force_sig(SIGSEGV, p);
1277         return 0;
1278 }
1279
1280 int kill_pgrp(struct pid *pid, int sig, int priv)
1281 {
1282         int ret;
1283
1284         read_lock(&tasklist_lock);
1285         ret = __kill_pgrp_info(sig, __si_special(priv), pid);
1286         read_unlock(&tasklist_lock);
1287
1288         return ret;
1289 }
1290 EXPORT_SYMBOL(kill_pgrp);
1291
1292 int kill_pid(struct pid *pid, int sig, int priv)
1293 {
1294         return kill_pid_info(sig, __si_special(priv), pid);
1295 }
1296 EXPORT_SYMBOL(kill_pid);
1297
1298 /*
1299  * These functions support sending signals using preallocated sigqueue
1300  * structures.  This is needed "because realtime applications cannot
1301  * afford to lose notifications of asynchronous events, like timer
1302  * expirations or I/O completions".  In the case of Posix Timers 
1303  * we allocate the sigqueue structure from the timer_create.  If this
1304  * allocation fails we are able to report the failure to the application
1305  * with an EAGAIN error.
1306  */
1307  
1308 struct sigqueue *sigqueue_alloc(void)
1309 {
1310         struct sigqueue *q;
1311
1312         if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
1313                 q->flags |= SIGQUEUE_PREALLOC;
1314         return(q);
1315 }
1316
1317 void sigqueue_free(struct sigqueue *q)
1318 {
1319         unsigned long flags;
1320         spinlock_t *lock = &current->sighand->siglock;
1321
1322         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1323         /*
1324          * We must hold ->siglock while testing q->list
1325          * to serialize with collect_signal() or with
1326          * __exit_signal()->flush_sigqueue().
1327          */
1328         spin_lock_irqsave(lock, flags);
1329         q->flags &= ~SIGQUEUE_PREALLOC;
1330         /*
1331          * If it is queued it will be freed when dequeued,
1332          * like the "regular" sigqueue.
1333          */
1334         if (!list_empty(&q->list))
1335                 q = NULL;
1336         spin_unlock_irqrestore(lock, flags);
1337
1338         if (q)
1339                 __sigqueue_free(q);
1340 }
1341
1342 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1343 {
1344         int sig = q->info.si_signo;
1345         struct sigpending *pending;
1346         unsigned long flags;
1347         int ret;
1348
1349         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1350
1351         ret = -1;
1352         if (!likely(lock_task_sighand(t, &flags)))
1353                 goto ret;
1354
1355         ret = 1; /* the signal is ignored */
1356         if (!prepare_signal(sig, t, 0))
1357                 goto out;
1358
1359         ret = 0;
1360         if (unlikely(!list_empty(&q->list))) {
1361                 /*
1362                  * If an SI_TIMER entry is already queue just increment
1363                  * the overrun count.
1364                  */
1365                 BUG_ON(q->info.si_code != SI_TIMER);
1366                 q->info.si_overrun++;
1367                 goto out;
1368         }
1369         q->info.si_overrun = 0;
1370
1371         signalfd_notify(t, sig);
1372         pending = group ? &t->signal->shared_pending : &t->pending;
1373         list_add_tail(&q->list, &pending->list);
1374         sigaddset(&pending->signal, sig);
1375         complete_signal(sig, t, group);
1376 out:
1377         unlock_task_sighand(t, &flags);
1378 ret:
1379         return ret;
1380 }
1381
1382 /*
1383  * Wake up any threads in the parent blocked in wait* syscalls.
1384  */
1385 static inline void __wake_up_parent(struct task_struct *p,
1386                                     struct task_struct *parent)
1387 {
1388         wake_up_interruptible_sync(&parent->signal->wait_chldexit);
1389 }
1390
1391 /*
1392  * Let a parent know about the death of a child.
1393  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1394  *
1395  * Returns -1 if our parent ignored us and so we've switched to
1396  * self-reaping, or else @sig.
1397  */
1398 int do_notify_parent(struct task_struct *tsk, int sig)
1399 {
1400         struct siginfo info;
1401         unsigned long flags;
1402         struct sighand_struct *psig;
1403         int ret = sig;
1404
1405         BUG_ON(sig == -1);
1406
1407         /* do_notify_parent_cldstop should have been called instead.  */
1408         BUG_ON(task_is_stopped_or_traced(tsk));
1409
1410         BUG_ON(!tsk->ptrace &&
1411                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1412
1413         info.si_signo = sig;
1414         info.si_errno = 0;
1415         /*
1416          * we are under tasklist_lock here so our parent is tied to
1417          * us and cannot exit and release its namespace.
1418          *
1419          * the only it can is to switch its nsproxy with sys_unshare,
1420          * bu uncharing pid namespaces is not allowed, so we'll always
1421          * see relevant namespace
1422          *
1423          * write_lock() currently calls preempt_disable() which is the
1424          * same as rcu_read_lock(), but according to Oleg, this is not
1425          * correct to rely on this
1426          */
1427         rcu_read_lock();
1428         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1429         info.si_uid = __task_cred(tsk)->uid;
1430         rcu_read_unlock();
1431
1432         info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1433                                 tsk->signal->utime));
1434         info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1435                                 tsk->signal->stime));
1436
1437         info.si_status = tsk->exit_code & 0x7f;
1438         if (tsk->exit_code & 0x80)
1439                 info.si_code = CLD_DUMPED;
1440         else if (tsk->exit_code & 0x7f)
1441                 info.si_code = CLD_KILLED;
1442         else {
1443                 info.si_code = CLD_EXITED;
1444                 info.si_status = tsk->exit_code >> 8;
1445         }
1446
1447         psig = tsk->parent->sighand;
1448         spin_lock_irqsave(&psig->siglock, flags);
1449         if (!tsk->ptrace && sig == SIGCHLD &&
1450             (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1451              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1452                 /*
1453                  * We are exiting and our parent doesn't care.  POSIX.1
1454                  * defines special semantics for setting SIGCHLD to SIG_IGN
1455                  * or setting the SA_NOCLDWAIT flag: we should be reaped
1456                  * automatically and not left for our parent's wait4 call.
1457                  * Rather than having the parent do it as a magic kind of
1458                  * signal handler, we just set this to tell do_exit that we
1459                  * can be cleaned up without becoming a zombie.  Note that
1460                  * we still call __wake_up_parent in this case, because a
1461                  * blocked sys_wait4 might now return -ECHILD.
1462                  *
1463                  * Whether we send SIGCHLD or not for SA_NOCLDWAIT
1464                  * is implementation-defined: we do (if you don't want
1465                  * it, just use SIG_IGN instead).
1466                  */
1467                 ret = tsk->exit_signal = -1;
1468                 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1469                         sig = -1;
1470         }
1471         if (valid_signal(sig) && sig > 0)
1472                 __group_send_sig_info(sig, &info, tsk->parent);
1473         __wake_up_parent(tsk, tsk->parent);
1474         spin_unlock_irqrestore(&psig->siglock, flags);
1475
1476         return ret;
1477 }
1478
1479 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1480 {
1481         struct siginfo info;
1482         unsigned long flags;
1483         struct task_struct *parent;
1484         struct sighand_struct *sighand;
1485
1486         if (tsk->ptrace & PT_PTRACED)
1487                 parent = tsk->parent;
1488         else {
1489                 tsk = tsk->group_leader;
1490                 parent = tsk->real_parent;
1491         }
1492
1493         info.si_signo = SIGCHLD;
1494         info.si_errno = 0;
1495         /*
1496          * see comment in do_notify_parent() abot the following 3 lines
1497          */
1498         rcu_read_lock();
1499         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1500         info.si_uid = __task_cred(tsk)->uid;
1501         rcu_read_unlock();
1502
1503         info.si_utime = cputime_to_clock_t(tsk->utime);
1504         info.si_stime = cputime_to_clock_t(tsk->stime);
1505
1506         info.si_code = why;
1507         switch (why) {
1508         case CLD_CONTINUED:
1509                 info.si_status = SIGCONT;
1510                 break;
1511         case CLD_STOPPED:
1512                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1513                 break;
1514         case CLD_TRAPPED:
1515                 info.si_status = tsk->exit_code & 0x7f;
1516                 break;
1517         default:
1518                 BUG();
1519         }
1520
1521         sighand = parent->sighand;
1522         spin_lock_irqsave(&sighand->siglock, flags);
1523         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1524             !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
1525                 __group_send_sig_info(SIGCHLD, &info, parent);
1526         /*
1527          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1528          */
1529         __wake_up_parent(tsk, parent);
1530         spin_unlock_irqrestore(&sighand->siglock, flags);
1531 }
1532
1533 static inline int may_ptrace_stop(void)
1534 {
1535         if (!likely(current->ptrace & PT_PTRACED))
1536                 return 0;
1537         /*
1538          * Are we in the middle of do_coredump?
1539          * If so and our tracer is also part of the coredump stopping
1540          * is a deadlock situation, and pointless because our tracer
1541          * is dead so don't allow us to stop.
1542          * If SIGKILL was already sent before the caller unlocked
1543          * ->siglock we must see ->core_state != NULL. Otherwise it
1544          * is safe to enter schedule().
1545          */
1546         if (unlikely(current->mm->core_state) &&
1547             unlikely(current->mm == current->parent->mm))
1548                 return 0;
1549
1550         return 1;
1551 }
1552
1553 /*
1554  * Return nonzero if there is a SIGKILL that should be waking us up.
1555  * Called with the siglock held.
1556  */
1557 static int sigkill_pending(struct task_struct *tsk)
1558 {
1559         return  sigismember(&tsk->pending.signal, SIGKILL) ||
1560                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1561 }
1562
1563 /*
1564  * This must be called with current->sighand->siglock held.
1565  *
1566  * This should be the path for all ptrace stops.
1567  * We always set current->last_siginfo while stopped here.
1568  * That makes it a way to test a stopped process for
1569  * being ptrace-stopped vs being job-control-stopped.
1570  *
1571  * If we actually decide not to stop at all because the tracer
1572  * is gone, we keep current->exit_code unless clear_code.
1573  */
1574 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1575 {
1576         if (arch_ptrace_stop_needed(exit_code, info)) {
1577                 /*
1578                  * The arch code has something special to do before a
1579                  * ptrace stop.  This is allowed to block, e.g. for faults
1580                  * on user stack pages.  We can't keep the siglock while
1581                  * calling arch_ptrace_stop, so we must release it now.
1582                  * To preserve proper semantics, we must do this before
1583                  * any signal bookkeeping like checking group_stop_count.
1584                  * Meanwhile, a SIGKILL could come in before we retake the
1585                  * siglock.  That must prevent us from sleeping in TASK_TRACED.
1586                  * So after regaining the lock, we must check for SIGKILL.
1587                  */
1588                 spin_unlock_irq(&current->sighand->siglock);
1589                 arch_ptrace_stop(exit_code, info);
1590                 spin_lock_irq(&current->sighand->siglock);
1591                 if (sigkill_pending(current))
1592                         return;
1593         }
1594
1595         /*
1596          * If there is a group stop in progress,
1597          * we must participate in the bookkeeping.
1598          */
1599         if (current->signal->group_stop_count > 0)
1600                 --current->signal->group_stop_count;
1601
1602         current->last_siginfo = info;
1603         current->exit_code = exit_code;
1604
1605         /* Let the debugger run.  */
1606         __set_current_state(TASK_TRACED);
1607         spin_unlock_irq(&current->sighand->siglock);
1608         read_lock(&tasklist_lock);
1609         if (may_ptrace_stop()) {
1610                 do_notify_parent_cldstop(current, CLD_TRAPPED);
1611                 /*
1612                  * Don't want to allow preemption here, because
1613                  * sys_ptrace() needs this task to be inactive.
1614                  *
1615                  * XXX: implement read_unlock_no_resched().
1616                  */
1617                 preempt_disable();
1618                 read_unlock(&tasklist_lock);
1619                 preempt_enable_no_resched();
1620                 schedule();
1621         } else {
1622                 /*
1623                  * By the time we got the lock, our tracer went away.
1624                  * Don't drop the lock yet, another tracer may come.
1625                  */
1626                 __set_current_state(TASK_RUNNING);
1627                 if (clear_code)
1628                         current->exit_code = 0;
1629                 read_unlock(&tasklist_lock);
1630         }
1631
1632         /*
1633          * While in TASK_TRACED, we were considered "frozen enough".
1634          * Now that we woke up, it's crucial if we're supposed to be
1635          * frozen that we freeze now before running anything substantial.
1636          */
1637         try_to_freeze();
1638
1639         /*
1640          * We are back.  Now reacquire the siglock before touching
1641          * last_siginfo, so that we are sure to have synchronized with
1642          * any signal-sending on another CPU that wants to examine it.
1643          */
1644         spin_lock_irq(&current->sighand->siglock);
1645         current->last_siginfo = NULL;
1646
1647         /*
1648          * Queued signals ignored us while we were stopped for tracing.
1649          * So check for any that we should take before resuming user mode.
1650          * This sets TIF_SIGPENDING, but never clears it.
1651          */
1652         recalc_sigpending_tsk(current);
1653 }
1654
1655 void ptrace_notify(int exit_code)
1656 {
1657         siginfo_t info;
1658
1659         BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
1660
1661         memset(&info, 0, sizeof info);
1662         info.si_signo = SIGTRAP;
1663         info.si_code = exit_code;
1664         info.si_pid = task_pid_vnr(current);
1665         info.si_uid = current_uid();
1666
1667         /* Let the debugger run.  */
1668         spin_lock_irq(&current->sighand->siglock);
1669         ptrace_stop(exit_code, 1, &info);
1670         spin_unlock_irq(&current->sighand->siglock);
1671 }
1672
1673 static void
1674 finish_stop(int stop_count)
1675 {
1676         /*
1677          * If there are no other threads in the group, or if there is
1678          * a group stop in progress and we are the last to stop,
1679          * report to the parent.  When ptraced, every thread reports itself.
1680          */
1681         if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
1682                 read_lock(&tasklist_lock);
1683                 do_notify_parent_cldstop(current, CLD_STOPPED);
1684                 read_unlock(&tasklist_lock);
1685         }
1686
1687         do {
1688                 schedule();
1689         } while (try_to_freeze());
1690         /*
1691          * Now we don't run again until continued.
1692          */
1693         current->exit_code = 0;
1694 }
1695
1696 /*
1697  * This performs the stopping for SIGSTOP and other stop signals.
1698  * We have to stop all threads in the thread group.
1699  * Returns nonzero if we've actually stopped and released the siglock.
1700  * Returns zero if we didn't stop and still hold the siglock.
1701  */
1702 static int do_signal_stop(int signr)
1703 {
1704         struct signal_struct *sig = current->signal;
1705         int stop_count;
1706
1707         if (sig->group_stop_count > 0) {
1708                 /*
1709                  * There is a group stop in progress.  We don't need to
1710                  * start another one.
1711                  */
1712                 stop_count = --sig->group_stop_count;
1713         } else {
1714                 struct task_struct *t;
1715
1716                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1717                     unlikely(signal_group_exit(sig)))
1718                         return 0;
1719                 /*
1720                  * There is no group stop already in progress.
1721                  * We must initiate one now.
1722                  */
1723                 sig->group_exit_code = signr;
1724
1725                 stop_count = 0;
1726                 for (t = next_thread(current); t != current; t = next_thread(t))
1727                         /*
1728                          * Setting state to TASK_STOPPED for a group
1729                          * stop is always done with the siglock held,
1730                          * so this check has no races.
1731                          */
1732                         if (!(t->flags & PF_EXITING) &&
1733                             !task_is_stopped_or_traced(t)) {
1734                                 stop_count++;
1735                                 signal_wake_up(t, 0);
1736                         }
1737                 sig->group_stop_count = stop_count;
1738         }
1739
1740         if (stop_count == 0)
1741                 sig->flags = SIGNAL_STOP_STOPPED;
1742         current->exit_code = sig->group_exit_code;
1743         __set_current_state(TASK_STOPPED);
1744
1745         spin_unlock_irq(&current->sighand->siglock);
1746         finish_stop(stop_count);
1747         return 1;
1748 }
1749
1750 static int ptrace_signal(int signr, siginfo_t *info,
1751                          struct pt_regs *regs, void *cookie)
1752 {
1753         if (!(current->ptrace & PT_PTRACED))
1754                 return signr;
1755
1756         ptrace_signal_deliver(regs, cookie);
1757
1758         /* Let the debugger run.  */
1759         ptrace_stop(signr, 0, info);
1760
1761         /* We're back.  Did the debugger cancel the sig?  */
1762         signr = current->exit_code;
1763         if (signr == 0)
1764                 return signr;
1765
1766         current->exit_code = 0;
1767
1768         /* Update the siginfo structure if the signal has
1769            changed.  If the debugger wanted something
1770            specific in the siginfo structure then it should
1771            have updated *info via PTRACE_SETSIGINFO.  */
1772         if (signr != info->si_signo) {
1773                 info->si_signo = signr;
1774                 info->si_errno = 0;
1775                 info->si_code = SI_USER;
1776                 info->si_pid = task_pid_vnr(current->parent);
1777                 info->si_uid = task_uid(current->parent);
1778         }
1779
1780         /* If the (new) signal is now blocked, requeue it.  */
1781         if (sigismember(&current->blocked, signr)) {
1782                 specific_send_sig_info(signr, info, current);
1783                 signr = 0;
1784         }
1785
1786         return signr;
1787 }
1788
1789 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
1790                           struct pt_regs *regs, void *cookie)
1791 {
1792         struct sighand_struct *sighand = current->sighand;
1793         struct signal_struct *signal = current->signal;
1794         int signr;
1795
1796 relock:
1797         /*
1798          * We'll jump back here after any time we were stopped in TASK_STOPPED.
1799          * While in TASK_STOPPED, we were considered "frozen enough".
1800          * Now that we woke up, it's crucial if we're supposed to be
1801          * frozen that we freeze now before running anything substantial.
1802          */
1803         try_to_freeze();
1804
1805         spin_lock_irq(&sighand->siglock);
1806         /*
1807          * Every stopped thread goes here after wakeup. Check to see if
1808          * we should notify the parent, prepare_signal(SIGCONT) encodes
1809          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1810          */
1811         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1812                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1813                                 ? CLD_CONTINUED : CLD_STOPPED;
1814                 signal->flags &= ~SIGNAL_CLD_MASK;
1815                 spin_unlock_irq(&sighand->siglock);
1816
1817                 if (unlikely(!tracehook_notify_jctl(1, why)))
1818                         goto relock;
1819
1820                 read_lock(&tasklist_lock);
1821                 do_notify_parent_cldstop(current->group_leader, why);
1822                 read_unlock(&tasklist_lock);
1823                 goto relock;
1824         }
1825
1826         for (;;) {
1827                 struct k_sigaction *ka;
1828
1829                 if (unlikely(signal->group_stop_count > 0) &&
1830                     do_signal_stop(0))
1831                         goto relock;
1832
1833                 /*
1834                  * Tracing can induce an artifical signal and choose sigaction.
1835                  * The return value in @signr determines the default action,
1836                  * but @info->si_signo is the signal number we will report.
1837                  */
1838                 signr = tracehook_get_signal(current, regs, info, return_ka);
1839                 if (unlikely(signr < 0))
1840                         goto relock;
1841                 if (unlikely(signr != 0))
1842                         ka = return_ka;
1843                 else {
1844                         signr = dequeue_signal(current, &current->blocked,
1845                                                info);
1846
1847                         if (!signr)
1848                                 break; /* will return 0 */
1849
1850                         if (signr != SIGKILL) {
1851                                 signr = ptrace_signal(signr, info,
1852                                                       regs, cookie);
1853                                 if (!signr)
1854                                         continue;
1855                         }
1856
1857                         ka = &sighand->action[signr-1];
1858                 }
1859
1860                 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
1861                         continue;
1862                 if (ka->sa.sa_handler != SIG_DFL) {
1863                         /* Run the handler.  */
1864                         *return_ka = *ka;
1865
1866                         if (ka->sa.sa_flags & SA_ONESHOT)
1867                                 ka->sa.sa_handler = SIG_DFL;
1868
1869                         break; /* will return non-zero "signr" value */
1870                 }
1871
1872                 /*
1873                  * Now we are doing the default action for this signal.
1874                  */
1875                 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1876                         continue;
1877
1878                 /*
1879                  * Global init gets no signals it doesn't want.
1880                  * Container-init gets no signals it doesn't want from same
1881                  * container.
1882                  *
1883                  * Note that if global/container-init sees a sig_kernel_only()
1884                  * signal here, the signal must have been generated internally
1885                  * or must have come from an ancestor namespace. In either
1886                  * case, the signal cannot be dropped.
1887                  */
1888                 if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
1889                                 !sig_kernel_only(signr))
1890                         continue;
1891
1892                 if (sig_kernel_stop(signr)) {
1893                         /*
1894                          * The default action is to stop all threads in
1895                          * the thread group.  The job control signals
1896                          * do nothing in an orphaned pgrp, but SIGSTOP
1897                          * always works.  Note that siglock needs to be
1898                          * dropped during the call to is_orphaned_pgrp()
1899                          * because of lock ordering with tasklist_lock.
1900                          * This allows an intervening SIGCONT to be posted.
1901                          * We need to check for that and bail out if necessary.
1902                          */
1903                         if (signr != SIGSTOP) {
1904                                 spin_unlock_irq(&sighand->siglock);
1905
1906                                 /* signals can be posted during this window */
1907
1908                                 if (is_current_pgrp_orphaned())
1909                                         goto relock;
1910
1911                                 spin_lock_irq(&sighand->siglock);
1912                         }
1913
1914                         if (likely(do_signal_stop(info->si_signo))) {
1915                                 /* It released the siglock.  */
1916                                 goto relock;
1917                         }
1918
1919                         /*
1920                          * We didn't actually stop, due to a race
1921                          * with SIGCONT or something like that.
1922                          */
1923                         continue;
1924                 }
1925
1926                 spin_unlock_irq(&sighand->siglock);
1927
1928                 /*
1929                  * Anything else is fatal, maybe with a core dump.
1930                  */
1931                 current->flags |= PF_SIGNALED;
1932
1933                 if (sig_kernel_coredump(signr)) {
1934                         if (print_fatal_signals)
1935                                 print_fatal_signal(regs, info->si_signo);
1936                         /*
1937                          * If it was able to dump core, this kills all
1938                          * other threads in the group and synchronizes with
1939                          * their demise.  If we lost the race with another
1940                          * thread getting here, it set group_exit_code
1941                          * first and our do_group_exit call below will use
1942                          * that value and ignore the one we pass it.
1943                          */
1944                         do_coredump(info->si_signo, info->si_signo, regs);
1945                 }
1946
1947                 /*
1948                  * Death signals, no core dump.
1949                  */
1950                 do_group_exit(info->si_signo);
1951                 /* NOTREACHED */
1952         }
1953         spin_unlock_irq(&sighand->siglock);
1954         return signr;
1955 }
1956
1957 void exit_signals(struct task_struct *tsk)
1958 {
1959         int group_stop = 0;
1960         struct task_struct *t;
1961
1962         if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
1963                 tsk->flags |= PF_EXITING;
1964                 return;
1965         }
1966
1967         spin_lock_irq(&tsk->sighand->siglock);
1968         /*
1969          * From now this task is not visible for group-wide signals,
1970          * see wants_signal(), do_signal_stop().
1971          */
1972         tsk->flags |= PF_EXITING;
1973         if (!signal_pending(tsk))
1974                 goto out;
1975
1976         /* It could be that __group_complete_signal() choose us to
1977          * notify about group-wide signal. Another thread should be
1978          * woken now to take the signal since we will not.
1979          */
1980         for (t = tsk; (t = next_thread(t)) != tsk; )
1981                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
1982                         recalc_sigpending_and_wake(t);
1983
1984         if (unlikely(tsk->signal->group_stop_count) &&
1985                         !--tsk->signal->group_stop_count) {
1986                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1987                 group_stop = 1;
1988         }
1989 out:
1990         spin_unlock_irq(&tsk->sighand->siglock);
1991
1992         if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
1993                 read_lock(&tasklist_lock);
1994                 do_notify_parent_cldstop(tsk, CLD_STOPPED);
1995                 read_unlock(&tasklist_lock);
1996         }
1997 }
1998
1999 EXPORT_SYMBOL(recalc_sigpending);
2000 EXPORT_SYMBOL_GPL(dequeue_signal);
2001 EXPORT_SYMBOL(flush_signals);
2002 EXPORT_SYMBOL(force_sig);
2003 EXPORT_SYMBOL(send_sig);
2004 EXPORT_SYMBOL(send_sig_info);
2005 EXPORT_SYMBOL(sigprocmask);
2006 EXPORT_SYMBOL(block_all_signals);
2007 EXPORT_SYMBOL(unblock_all_signals);
2008
2009
2010 /*
2011  * System call entry points.
2012  */
2013
2014 SYSCALL_DEFINE0(restart_syscall)
2015 {
2016         struct restart_block *restart = &current_thread_info()->restart_block;
2017         return restart->fn(restart);
2018 }
2019
2020 long do_no_restart_syscall(struct restart_block *param)
2021 {
2022         return -EINTR;
2023 }
2024
2025 /*
2026  * We don't need to get the kernel lock - this is all local to this
2027  * particular thread.. (and that's good, because this is _heavily_
2028  * used by various programs)
2029  */
2030
2031 /*
2032  * This is also useful for kernel threads that want to temporarily
2033  * (or permanently) block certain signals.
2034  *
2035  * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
2036  * interface happily blocks "unblockable" signals like SIGKILL
2037  * and friends.
2038  */
2039 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2040 {
2041         int error;
2042
2043         spin_lock_irq(&current->sighand->siglock);
2044         if (oldset)
2045                 *oldset = current->blocked;
2046
2047         error = 0;
2048         switch (how) {
2049         case SIG_BLOCK:
2050                 sigorsets(&current->blocked, &current->blocked, set);
2051                 break;
2052         case SIG_UNBLOCK:
2053                 signandsets(&current->blocked, &current->blocked, set);
2054                 break;
2055         case SIG_SETMASK:
2056                 current->blocked = *set;
2057                 break;
2058         default:
2059                 error = -EINVAL;
2060         }
2061         recalc_sigpending();
2062         spin_unlock_irq(&current->sighand->siglock);
2063
2064         return error;
2065 }
2066
2067 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2068                 sigset_t __user *, oset, size_t, sigsetsize)
2069 {
2070         int error = -EINVAL;
2071         sigset_t old_set, new_set;
2072
2073         /* XXX: Don't preclude handling different sized sigset_t's.  */
2074         if (sigsetsize != sizeof(sigset_t))
2075                 goto out;
2076
2077         if (set) {
2078                 error = -EFAULT;
2079                 if (copy_from_user(&new_set, set, sizeof(*set)))
2080                         goto out;
2081                 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2082
2083                 error = sigprocmask(how, &new_set, &old_set);
2084                 if (error)
2085                         goto out;
2086                 if (oset)
2087                         goto set_old;
2088         } else if (oset) {
2089                 spin_lock_irq(&current->sighand->siglock);
2090                 old_set = current->blocked;
2091                 spin_unlock_irq(&current->sighand->siglock);
2092
2093         set_old:
2094                 error = -EFAULT;
2095                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2096                         goto out;
2097         }
2098         error = 0;
2099 out:
2100         return error;
2101 }
2102
2103 long do_sigpending(void __user *set, unsigned long sigsetsize)
2104 {
2105         long error = -EINVAL;
2106         sigset_t pending;
2107
2108         if (sigsetsize > sizeof(sigset_t))
2109                 goto out;
2110
2111         spin_lock_irq(&current->sighand->siglock);
2112         sigorsets(&pending, &current->pending.signal,
2113                   &current->signal->shared_pending.signal);
2114         spin_unlock_irq(&current->sighand->siglock);
2115
2116         /* Outside the lock because only this thread touches it.  */
2117         sigandsets(&pending, &current->blocked, &pending);
2118
2119         error = -EFAULT;
2120         if (!copy_to_user(set, &pending, sigsetsize))
2121                 error = 0;
2122
2123 out:
2124         return error;
2125 }       
2126
2127 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2128 {
2129         return do_sigpending(set, sigsetsize);
2130 }
2131
2132 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2133
2134 int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2135 {
2136         int err;
2137
2138         if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
2139                 return -EFAULT;
2140         if (from->si_code < 0)
2141                 return __copy_to_user(to, from, sizeof(siginfo_t))
2142                         ? -EFAULT : 0;
2143         /*
2144          * If you change siginfo_t structure, please be sure
2145          * this code is fixed accordingly.
2146          * Please remember to update the signalfd_copyinfo() function
2147          * inside fs/signalfd.c too, in case siginfo_t changes.
2148          * It should never copy any pad contained in the structure
2149          * to avoid security leaks, but must copy the generic
2150          * 3 ints plus the relevant union member.
2151          */
2152         err = __put_user(from->si_signo, &to->si_signo);
2153         err |= __put_user(from->si_errno, &to->si_errno);
2154         err |= __put_user((short)from->si_code, &to->si_code);
2155         switch (from->si_code & __SI_MASK) {
2156         case __SI_KILL:
2157                 err |= __put_user(from->si_pid, &to->si_pid);
2158                 err |= __put_user(from->si_uid, &to->si_uid);
2159                 break;
2160         case __SI_TIMER:
2161                  err |= __put_user(from->si_tid, &to->si_tid);
2162                  err |= __put_user(from->si_overrun, &to->si_overrun);
2163                  err |= __put_user(from->si_ptr, &to->si_ptr);
2164                 break;
2165         case __SI_POLL:
2166                 err |= __put_user(from->si_band, &to->si_band);
2167                 err |= __put_user(from->si_fd, &to->si_fd);
2168                 break;
2169         case __SI_FAULT:
2170                 err |= __put_user(from->si_addr, &to->si_addr);
2171 #ifdef __ARCH_SI_TRAPNO
2172                 err |= __put_user(from->si_trapno, &to->si_trapno);
2173 #endif
2174                 break;
2175         case __SI_CHLD:
2176                 err |= __put_user(from->si_pid, &to->si_pid);
2177                 err |= __put_user(from->si_uid, &to->si_uid);
2178                 err |= __put_user(from->si_status, &to->si_status);
2179                 err |= __put_user(from->si_utime, &to->si_utime);
2180                 err |= __put_user(from->si_stime, &to->si_stime);
2181                 break;
2182         case __SI_RT: /* This is not generated by the kernel as of now. */
2183         case __SI_MESGQ: /* But this is */
2184                 err |= __put_user(from->si_pid, &to->si_pid);
2185                 err |= __put_user(from->si_uid, &to->si_uid);
2186                 err |= __put_user(from->si_ptr, &to->si_ptr);
2187                 break;
2188         default: /* this is just in case for now ... */
2189                 err |= __put_user(from->si_pid, &to->si_pid);
2190                 err |= __put_user(from->si_uid, &to->si_uid);
2191                 break;
2192         }
2193         return err;
2194 }
2195
2196 #endif
2197
2198 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2199                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2200                 size_t, sigsetsize)
2201 {
2202         int ret, sig;
2203         sigset_t these;
2204         struct timespec ts;
2205         siginfo_t info;
2206         long timeout = 0;
2207
2208         /* XXX: Don't preclude handling different sized sigset_t's.  */
2209         if (sigsetsize != sizeof(sigset_t))
2210                 return -EINVAL;
2211
2212         if (copy_from_user(&these, uthese, sizeof(these)))
2213                 return -EFAULT;
2214                 
2215         /*
2216          * Invert the set of allowed signals to get those we
2217          * want to block.
2218          */
2219         sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2220         signotset(&these);
2221
2222         if (uts) {
2223                 if (copy_from_user(&ts, uts, sizeof(ts)))
2224                         return -EFAULT;
2225                 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2226                     || ts.tv_sec < 0)
2227                         return -EINVAL;
2228         }
2229
2230         spin_lock_irq(&current->sighand->siglock);
2231         sig = dequeue_signal(current, &these, &info);
2232         if (!sig) {
2233                 timeout = MAX_SCHEDULE_TIMEOUT;
2234                 if (uts)
2235                         timeout = (timespec_to_jiffies(&ts)
2236                                    + (ts.tv_sec || ts.tv_nsec));
2237
2238                 if (timeout) {
2239                         /* None ready -- temporarily unblock those we're
2240                          * interested while we are sleeping in so that we'll
2241                          * be awakened when they arrive.  */
2242                         current->real_blocked = current->blocked;
2243                         sigandsets(&current->blocked, &current->blocked, &these);
2244                         recalc_sigpending();
2245                         spin_unlock_irq(&current->sighand->siglock);
2246
2247                         timeout = schedule_timeout_interruptible(timeout);
2248
2249                         spin_lock_irq(&current->sighand->siglock);
2250                         sig = dequeue_signal(current, &these, &info);
2251                         current->blocked = current->real_blocked;
2252                         siginitset(&current->real_blocked, 0);
2253                         recalc_sigpending();
2254                 }
2255         }
2256         spin_unlock_irq(&current->sighand->siglock);
2257
2258         if (sig) {
2259                 ret = sig;
2260                 if (uinfo) {
2261                         if (copy_siginfo_to_user(uinfo, &info))
2262                                 ret = -EFAULT;
2263                 }
2264         } else {
2265                 ret = -EAGAIN;
2266                 if (timeout)
2267                         ret = -EINTR;
2268         }
2269
2270         return ret;
2271 }
2272
2273 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2274 {
2275         struct siginfo info;
2276
2277         info.si_signo = sig;
2278         info.si_errno = 0;
2279         info.si_code = SI_USER;
2280         info.si_pid = task_tgid_vnr(current);
2281         info.si_uid = current_uid();
2282
2283         return kill_something_info(sig, &info, pid);
2284 }
2285
2286 static int do_tkill(pid_t tgid, pid_t pid, int sig)
2287 {
2288         int error;
2289         struct siginfo info;
2290         struct task_struct *p;
2291         unsigned long flags;
2292
2293         error = -ESRCH;
2294         info.si_signo = sig;
2295         info.si_errno = 0;
2296         info.si_code = SI_TKILL;
2297         info.si_pid = task_tgid_vnr(current);
2298         info.si_uid = current_uid();
2299
2300         rcu_read_lock();
2301         p = find_task_by_vpid(pid);
2302         if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2303                 error = check_kill_permission(sig, &info, p);
2304                 /*
2305                  * The null signal is a permissions and process existence
2306                  * probe.  No signal is actually delivered.
2307                  *
2308                  * If lock_task_sighand() fails we pretend the task dies
2309                  * after receiving the signal. The window is tiny, and the
2310                  * signal is private anyway.
2311                  */
2312                 if (!error && sig && lock_task_sighand(p, &flags)) {
2313                         error = specific_send_sig_info(sig, &info, p);
2314                         unlock_task_sighand(p, &flags);
2315                 }
2316         }
2317         rcu_read_unlock();
2318
2319         return error;
2320 }
2321
2322 /**
2323  *  sys_tgkill - send signal to one specific thread
2324  *  @tgid: the thread group ID of the thread
2325  *  @pid: the PID of the thread
2326  *  @sig: signal to be sent
2327  *
2328  *  This syscall also checks the @tgid and returns -ESRCH even if the PID
2329  *  exists but it's not belonging to the target process anymore. This
2330  *  method solves the problem of threads exiting and PIDs getting reused.
2331  */
2332 SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2333 {
2334         /* This is only valid for single tasks */
2335         if (pid <= 0 || tgid <= 0)
2336                 return -EINVAL;
2337
2338         return do_tkill(tgid, pid, sig);
2339 }
2340
2341 /*
2342  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
2343  */
2344 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2345 {
2346         /* This is only valid for single tasks */
2347         if (pid <= 0)
2348                 return -EINVAL;
2349
2350         return do_tkill(0, pid, sig);
2351 }
2352
2353 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2354                 siginfo_t __user *, uinfo)
2355 {
2356         siginfo_t info;
2357
2358         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2359                 return -EFAULT;
2360
2361         /* Not even root can pretend to send signals from the kernel.
2362            Nor can they impersonate a kill(), which adds source info.  */
2363         if (info.si_code >= 0)
2364                 return -EPERM;
2365         info.si_signo = sig;
2366
2367         /* POSIX.1b doesn't mention process groups.  */
2368         return kill_proc_info(sig, &info, pid);
2369 }
2370
2371 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2372 {
2373         struct task_struct *t = current;
2374         struct k_sigaction *k;
2375         sigset_t mask;
2376
2377         if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2378                 return -EINVAL;
2379
2380         k = &t->sighand->action[sig-1];
2381
2382         spin_lock_irq(&current->sighand->siglock);
2383         if (oact)
2384                 *oact = *k;
2385
2386         if (act) {
2387                 sigdelsetmask(&act->sa.sa_mask,
2388                               sigmask(SIGKILL) | sigmask(SIGSTOP));
2389                 *k = *act;
2390                 /*
2391                  * POSIX 3.3.1.3:
2392                  *  "Setting a signal action to SIG_IGN for a signal that is
2393                  *   pending shall cause the pending signal to be discarded,
2394                  *   whether or not it is blocked."
2395                  *
2396                  *  "Setting a signal action to SIG_DFL for a signal that is
2397                  *   pending and whose default action is to ignore the signal
2398                  *   (for example, SIGCHLD), shall cause the pending signal to
2399                  *   be discarded, whether or not it is blocked"
2400                  */
2401                 if (sig_handler_ignored(sig_handler(t, sig), sig)) {
2402                         sigemptyset(&mask);
2403                         sigaddset(&mask, sig);
2404                         rm_from_queue_full(&mask, &t->signal->shared_pending);
2405                         do {
2406                                 rm_from_queue_full(&mask, &t->pending);
2407                                 t = next_thread(t);
2408                         } while (t != current);
2409                 }
2410         }
2411
2412         spin_unlock_irq(&current->sighand->siglock);
2413         return 0;
2414 }
2415
2416 int 
2417 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
2418 {
2419         stack_t oss;
2420         int error;
2421
2422         if (uoss) {
2423                 oss.ss_sp = (void __user *) current->sas_ss_sp;
2424                 oss.ss_size = current->sas_ss_size;
2425                 oss.ss_flags = sas_ss_flags(sp);
2426         }
2427
2428         if (uss) {
2429                 void __user *ss_sp;
2430                 size_t ss_size;
2431                 int ss_flags;
2432
2433                 error = -EFAULT;
2434                 if (!access_ok(VERIFY_READ, uss, sizeof(*uss))
2435                     || __get_user(ss_sp, &uss->ss_sp)
2436                     || __get_user(ss_flags, &uss->ss_flags)
2437                     || __get_user(ss_size, &uss->ss_size))
2438                         goto out;
2439
2440                 error = -EPERM;
2441                 if (on_sig_stack(sp))
2442                         goto out;
2443
2444                 error = -EINVAL;
2445                 /*
2446                  *
2447                  * Note - this code used to test ss_flags incorrectly
2448                  *        old code may have been written using ss_flags==0
2449                  *        to mean ss_flags==SS_ONSTACK (as this was the only
2450                  *        way that worked) - this fix preserves that older
2451                  *        mechanism
2452                  */
2453                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
2454                         goto out;
2455
2456                 if (ss_flags == SS_DISABLE) {
2457                         ss_size = 0;
2458                         ss_sp = NULL;
2459                 } else {
2460                         error = -ENOMEM;
2461                         if (ss_size < MINSIGSTKSZ)
2462                                 goto out;
2463                 }
2464
2465                 current->sas_ss_sp = (unsigned long) ss_sp;
2466                 current->sas_ss_size = ss_size;
2467         }
2468
2469         if (uoss) {
2470                 error = -EFAULT;
2471                 if (copy_to_user(uoss, &oss, sizeof(oss)))
2472                         goto out;
2473         }
2474
2475         error = 0;
2476 out:
2477         return error;
2478 }
2479
2480 #ifdef __ARCH_WANT_SYS_SIGPENDING
2481
2482 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2483 {
2484         return do_sigpending(set, sizeof(*set));
2485 }
2486
2487 #endif
2488
2489 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
2490 /* Some platforms have their own version with special arguments others
2491    support only sys_rt_sigprocmask.  */
2492
2493 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2494                 old_sigset_t __user *, oset)
2495 {
2496         int error;
2497         old_sigset_t old_set, new_set;
2498
2499         if (set) {
2500                 error = -EFAULT;
2501                 if (copy_from_user(&new_set, set, sizeof(*set)))
2502                         goto out;
2503                 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2504
2505                 spin_lock_irq(&current->sighand->siglock);
2506                 old_set = current->blocked.sig[0];
2507
2508                 error = 0;
2509                 switch (how) {
2510                 default:
2511                         error = -EINVAL;
2512                         break;
2513                 case SIG_BLOCK:
2514                         sigaddsetmask(&current->blocked, new_set);
2515                         break;
2516                 case SIG_UNBLOCK:
2517                         sigdelsetmask(&current->blocked, new_set);
2518                         break;
2519                 case SIG_SETMASK:
2520                         current->blocked.sig[0] = new_set;
2521                         break;
2522                 }
2523
2524                 recalc_sigpending();
2525                 spin_unlock_irq(&current->sighand->siglock);
2526                 if (error)
2527                         goto out;
2528                 if (oset)
2529                         goto set_old;
2530         } else if (oset) {
2531                 old_set = current->blocked.sig[0];
2532         set_old:
2533                 error = -EFAULT;
2534                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2535                         goto out;
2536         }
2537         error = 0;
2538 out:
2539         return error;
2540 }
2541 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2542
2543 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
2544 SYSCALL_DEFINE4(rt_sigaction, int, sig,
2545                 const struct sigaction __user *, act,
2546                 struct sigaction __user *, oact,
2547                 size_t, sigsetsize)
2548 {
2549         struct k_sigaction new_sa, old_sa;
2550         int ret = -EINVAL;
2551
2552         /* XXX: Don't preclude handling different sized sigset_t's.  */
2553         if (sigsetsize != sizeof(sigset_t))
2554                 goto out;
2555
2556         if (act) {
2557                 if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
2558                         return -EFAULT;
2559         }
2560
2561         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
2562
2563         if (!ret && oact) {
2564                 if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
2565                         return -EFAULT;
2566         }
2567 out:
2568         return ret;
2569 }
2570 #endif /* __ARCH_WANT_SYS_RT_SIGACTION */
2571
2572 #ifdef __ARCH_WANT_SYS_SGETMASK
2573
2574 /*
2575  * For backwards compatibility.  Functionality superseded by sigprocmask.
2576  */
2577 SYSCALL_DEFINE0(sgetmask)
2578 {
2579         /* SMP safe */
2580         return current->blocked.sig[0];
2581 }
2582
2583 SYSCALL_DEFINE1(ssetmask, int, newmask)
2584 {
2585         int old;
2586
2587         spin_lock_irq(&current->sighand->siglock);
2588         old = current->blocked.sig[0];
2589
2590         siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
2591                                                   sigmask(SIGSTOP)));
2592         recalc_sigpending();
2593         spin_unlock_irq(&current->sighand->siglock);
2594
2595         return old;
2596 }
2597 #endif /* __ARCH_WANT_SGETMASK */
2598
2599 #ifdef __ARCH_WANT_SYS_SIGNAL
2600 /*
2601  * For backwards compatibility.  Functionality superseded by sigaction.
2602  */
2603 SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2604 {
2605         struct k_sigaction new_sa, old_sa;
2606         int ret;
2607
2608         new_sa.sa.sa_handler = handler;
2609         new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2610         sigemptyset(&new_sa.sa.sa_mask);
2611
2612         ret = do_sigaction(sig, &new_sa, &old_sa);
2613
2614         return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
2615 }
2616 #endif /* __ARCH_WANT_SYS_SIGNAL */
2617
2618 #ifdef __ARCH_WANT_SYS_PAUSE
2619
2620 SYSCALL_DEFINE0(pause)
2621 {
2622         current->state = TASK_INTERRUPTIBLE;
2623         schedule();
2624         return -ERESTARTNOHAND;
2625 }
2626
2627 #endif
2628
2629 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2630 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2631 {
2632         sigset_t newset;
2633
2634         /* XXX: Don't preclude handling different sized sigset_t's.  */
2635         if (sigsetsize != sizeof(sigset_t))
2636                 return -EINVAL;
2637
2638         if (copy_from_user(&newset, unewset, sizeof(newset)))
2639                 return -EFAULT;
2640         sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2641
2642         spin_lock_irq(&current->sighand->siglock);
2643         current->saved_sigmask = current->blocked;
2644         current->blocked = newset;
2645         recalc_sigpending();
2646         spin_unlock_irq(&current->sighand->siglock);
2647
2648         current->state = TASK_INTERRUPTIBLE;
2649         schedule();
2650         set_restore_sigmask();
2651         return -ERESTARTNOHAND;
2652 }
2653 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
2654
2655 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2656 {
2657         return NULL;
2658 }
2659
2660 void __init signals_init(void)
2661 {
2662         sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2663 }