kernel/signal.c

   1 /*
   2  *  linux/kernel/signal.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *
   6  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
   7  *
   8  *  2003-06-02  Jim Houston - Concurrent Computer Corp.
   9  *              Changes to use preallocated sigqueue structures
  10  *              to allow signals to be sent reliably.
  11  */
  12
  13 #include <linux/slab.h>
  14 #include <linux/module.h>
  15 #include <linux/init.h>
  16 #include <linux/sched.h>
  17 #include <linux/fs.h>
  18 #include <linux/tty.h>
  19 #include <linux/binfmts.h>
  20 #include <linux/security.h>
  21 #include <linux/syscalls.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/signal.h>
  24 #include <linux/signalfd.h>
  25 #include <linux/tracehook.h>
  26 #include <linux/capability.h>
  27 #include <linux/freezer.h>
  28 #include <linux/pid_namespace.h>
  29 #include <linux/nsproxy.h>
  30 #include <trace/events/sched.h>
  31
  32 #include <asm/param.h>
  33 #include <asm/uaccess.h>
  34 #include <asm/unistd.h>
  35 #include <asm/siginfo.h>
  36 #include "audit.h"      /* audit_signal_info() */
  37
  38 /*
  39  * SLAB caches for signal bits.
  40  */
  41
  42 static struct kmem_cache *sigqueue_cachep;
  43
  44 static void __user *sig_handler(struct task_struct *t, int sig)
  45 {
  46         return t->sighand->action[sig - 1].sa.sa_handler;
  47 }
  48
  49 static int sig_handler_ignored(void __user *handler, int sig)
  50 {
  51         /* Is it explicitly or implicitly ignored? */
  52         return handler == SIG_IGN ||
  53                 (handler == SIG_DFL && sig_kernel_ignore(sig));
  54 }
  55
  56 static int sig_task_ignored(struct task_struct *t, int sig,
  57                 int from_ancestor_ns)
  58 {
  59         void __user *handler;
  60
  61         handler = sig_handler(t, sig);
  62
  63         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
  64                         handler == SIG_DFL && !from_ancestor_ns)
  65                 return 1;
  66
  67         return sig_handler_ignored(handler, sig);
  68 }
  69
  70 static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns)
  71 {
  72         /*
  73          * Blocked signals are never ignored, since the
  74          * signal handler may change by the time it is
  75          * unblocked.
  76          */
  77         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
  78                 return 0;
  79
  80         if (!sig_task_ignored(t, sig, from_ancestor_ns))
  81                 return 0;
  82
  83         /*
  84          * Tracers may want to know about even ignored signals.
  85          */
  86         return !tracehook_consider_ignored_signal(t, sig);
  87 }
  88
  89 /*
  90  * Re-calculate pending state from the set of locally pending
  91  * signals, globally pending signals, and blocked signals.
  92  */
  93 static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
  94 {
  95         unsigned long ready;
  96         long i;
  97
  98         switch (_NSIG_WORDS) {
  99         default:
 100                 for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
 101                         ready |= signal->sig[i] &~ blocked->sig[i];
 102                 break;
 103
 104         case 4: ready  = signal->sig[3] &~ blocked->sig[3];
 105                 ready |= signal->sig[2] &~ blocked->sig[2];
 106                 ready |= signal->sig[1] &~ blocked->sig[1];
 107                 ready |= signal->sig[0] &~ blocked->sig[0];
 108                 break;
 109
 110         case 2: ready  = signal->sig[1] &~ blocked->sig[1];
 111                 ready |= signal->sig[0] &~ blocked->sig[0];
 112                 break;
 113
 114         case 1: ready  = signal->sig[0] &~ blocked->sig[0];
 115         }
 116         return ready != 0;
 117 }
 118
 119 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
 120
 121 static int recalc_sigpending_tsk(struct task_struct *t)
 122 {
 123         if (t->signal->group_stop_count > 0 ||
 124             PENDING(&t->pending, &t->blocked) ||
 125             PENDING(&t->signal->shared_pending, &t->blocked)) {
 126                 set_tsk_thread_flag(t, TIF_SIGPENDING);
 127                 return 1;
 128         }
 129         /*
 130          * We must never clear the flag in another thread, or in current
 131          * when it's possible the current syscall is returning -ERESTART*.
 132          * So we don't clear it here, and only callers who know they should do.
 133          */
 134         return 0;
 135 }
 136
 137 /*
 138  * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
 139  * This is superfluous when called on current, the wakeup is a harmless no-op.
 140  */
 141 void recalc_sigpending_and_wake(struct task_struct *t)
 142 {
 143         if (recalc_sigpending_tsk(t))
 144                 signal_wake_up(t, 0);
 145 }
 146
 147 void recalc_sigpending(void)
 148 {
 149         if (unlikely(tracehook_force_sigpending()))
 150                 set_thread_flag(TIF_SIGPENDING);
 151         else if (!recalc_sigpending_tsk(current) && !freezing(current))
 152                 clear_thread_flag(TIF_SIGPENDING);
 153
 154 }
 155
 156 /* Given the mask, find the first available signal that should be serviced. */
 157
 158 int next_signal(struct sigpending *pending, sigset_t *mask)
 159 {
 160         unsigned long i, *s, *m, x;
 161         int sig = 0;
 162
 163         s = pending->signal.sig;
 164         m = mask->sig;
 165         switch (_NSIG_WORDS) {
 166         default:
 167                 for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
 168                         if ((x = *s &~ *m) != 0) {
 169                                 sig = ffz(~x) + i*_NSIG_BPW + 1;
 170                                 break;
 171                         }
 172                 break;
 173
 174         case 2: if ((x = s[0] &~ m[0]) != 0)
 175                         sig = 1;
 176                 else if ((x = s[1] &~ m[1]) != 0)
 177                         sig = _NSIG_BPW + 1;
 178                 else
 179                         break;
 180                 sig += ffz(~x);
 181                 break;
 182
 183         case 1: if ((x = *s &~ *m) != 0)
 184                         sig = ffz(~x) + 1;
 185                 break;
 186         }
 187
 188         return sig;
 189 }
 190
 191 /*
 192  * allocate a new signal queue record
 193  * - this may be called without locks if and only if t == current, otherwise an
 194  *   appopriate lock must be held to stop the target task from exiting
 195  */
 196 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 197                                          int override_rlimit)
 198 {
 199         struct sigqueue *q = NULL;
 200         struct user_struct *user;
 201
 202         /*
 203          * We won't get problems with the target's UID changing under us
 204          * because changing it requires RCU be used, and if t != current, the
 205          * caller must be holding the RCU readlock (by way of a spinlock) and
 206          * we use RCU protection here
 207          */
 208         user = get_uid(__task_cred(t)->user);
 209         atomic_inc(&user->sigpending);
 210         if (override_rlimit ||
 211             atomic_read(&user->sigpending) <=
 212                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
 213                 q = kmem_cache_alloc(sigqueue_cachep, flags);
 214         if (unlikely(q == NULL)) {
 215                 atomic_dec(&user->sigpending);
 216                 free_uid(user);
 217         } else {
 218                 INIT_LIST_HEAD(&q->list);
 219                 q->flags = 0;
 220                 q->user = user;
 221         }
 222
 223         return q;
 224 }
 225
 226 static void __sigqueue_free(struct sigqueue *q)
 227 {
 228         if (q->flags & SIGQUEUE_PREALLOC)
 229                 return;
 230         atomic_dec(&q->user->sigpending);
 231         free_uid(q->user);
 232         kmem_cache_free(sigqueue_cachep, q);
 233 }
 234
 235 void flush_sigqueue(struct sigpending *queue)
 236 {
 237         struct sigqueue *q;
 238
 239         sigemptyset(&queue->signal);
 240         while (!list_empty(&queue->list)) {
 241                 q = list_entry(queue->list.next, struct sigqueue , list);
 242                 list_del_init(&q->list);
 243                 __sigqueue_free(q);
 244         }
 245 }
 246
 247 /*
 248  * Flush all pending signals for a task.
 249  */
 250 void __flush_signals(struct task_struct *t)
 251 {
 252         clear_tsk_thread_flag(t, TIF_SIGPENDING);
 253         flush_sigqueue(&t->pending);
 254         flush_sigqueue(&t->signal->shared_pending);
 255 }
 256
 257 void flush_signals(struct task_struct *t)
 258 {
 259         unsigned long flags;
 260
 261         spin_lock_irqsave(&t->sighand->siglock, flags);
 262         __flush_signals(t);
 263         spin_unlock_irqrestore(&t->sighand->siglock, flags);
 264 }
 265
 266 static void __flush_itimer_signals(struct sigpending *pending)
 267 {
 268         sigset_t signal, retain;
 269         struct sigqueue *q, *n;
 270
 271         signal = pending->signal;
 272         sigemptyset(&retain);
 273
 274         list_for_each_entry_safe(q, n, &pending->list, list) {
 275                 int sig = q->info.si_signo;
 276
 277                 if (likely(q->info.si_code != SI_TIMER)) {
 278                         sigaddset(&retain, sig);
 279                 } else {
 280                         sigdelset(&signal, sig);
 281                         list_del_init(&q->list);
 282                         __sigqueue_free(q);
 283                 }
 284         }
 285
 286         sigorsets(&pending->signal, &signal, &retain);
 287 }
 288
 289 void flush_itimer_signals(void)
 290 {
 291         struct task_struct *tsk = current;
 292         unsigned long flags;
 293
 294         spin_lock_irqsave(&tsk->sighand->siglock, flags);
 295         __flush_itimer_signals(&tsk->pending);
 296         __flush_itimer_signals(&tsk->signal->shared_pending);
 297         spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
 298 }
 299
 300 void ignore_signals(struct task_struct *t)
 301 {
 302         int i;
 303
 304         for (i = 0; i < _NSIG; ++i)
 305                 t->sighand->action[i].sa.sa_handler = SIG_IGN;
 306
 307         flush_signals(t);
 308 }
 309
 310 /*
 311  * Flush all handlers for a task.
 312  */
 313
 314 void
 315 flush_signal_handlers(struct task_struct *t, int force_default)
 316 {
 317         int i;
 318         struct k_sigaction *ka = &t->sighand->action[0];
 319         for (i = _NSIG ; i != 0 ; i--) {
 320                 if (force_default || ka->sa.sa_handler != SIG_IGN)
 321                         ka->sa.sa_handler = SIG_DFL;
 322                 ka->sa.sa_flags = 0;
 323                 sigemptyset(&ka->sa.sa_mask);
 324                 ka++;
 325         }
 326 }
 327
 328 int unhandled_signal(struct task_struct *tsk, int sig)
 329 {
 330         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
 331         if (is_global_init(tsk))
 332                 return 1;
 333         if (handler != SIG_IGN && handler != SIG_DFL)
 334                 return 0;
 335         return !tracehook_consider_fatal_signal(tsk, sig);
 336 }
 337
 338
 339 /* Notify the system that a driver wants to block all signals for this
 340  * process, and wants to be notified if any signals at all were to be
 341  * sent/acted upon.  If the notifier routine returns non-zero, then the
 342  * signal will be acted upon after all.  If the notifier routine returns 0,
 343  * then then signal will be blocked.  Only one block per process is
 344  * allowed.  priv is a pointer to private data that the notifier routine
 345  * can use to determine if the signal should be blocked or not.  */
 346
 347 void
 348 block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
 349 {
 350         unsigned long flags;
 351
 352         spin_lock_irqsave(&current->sighand->siglock, flags);
 353         current->notifier_mask = mask;
 354         current->notifier_data = priv;
 355         current->notifier = notifier;
 356         spin_unlock_irqrestore(&current->sighand->siglock, flags);
 357 }
 358
 359 /* Notify the system that blocking has ended. */
 360
 361 void
 362 unblock_all_signals(void)
 363 {
 364         unsigned long flags;
 365
 366         spin_lock_irqsave(&current->sighand->siglock, flags);
 367         current->notifier = NULL;
 368         current->notifier_data = NULL;
 369         recalc_sigpending();
 370         spin_unlock_irqrestore(&current->sighand->siglock, flags);
 371 }
 372
 373 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 374 {
 375         struct sigqueue *q, *first = NULL;
 376
 377         /*
 378          * Collect the siginfo appropriate to this signal.  Check if
 379          * there is another siginfo for the same signal.
 380         */
 381         list_for_each_entry(q, &list->list, list) {
 382                 if (q->info.si_signo == sig) {
 383                         if (first)
 384                                 goto still_pending;
 385                         first = q;
 386                 }
 387         }
 388
 389         sigdelset(&list->signal, sig);
 390
 391         if (first) {
 392 still_pending:
 393                 list_del_init(&first->list);
 394                 copy_siginfo(info, &first->info);
 395                 __sigqueue_free(first);
 396         } else {
 397                 /* Ok, it wasn't in the queue.  This must be
 398                    a fast-pathed signal or we must have been
 399                    out of queue space.  So zero out the info.
 400                  */
 401                 info->si_signo = sig;
 402                 info->si_errno = 0;
 403                 info->si_code = 0;
 404                 info->si_pid = 0;
 405                 info->si_uid = 0;
 406         }
 407 }
 408
 409 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 410                         siginfo_t *info)
 411 {
 412         int sig = next_signal(pending, mask);
 413
 414         if (sig) {
 415                 if (current->notifier) {
 416                         if (sigismember(current->notifier_mask, sig)) {
 417                                 if (!(current->notifier)(current->notifier_data)) {
 418                                         clear_thread_flag(TIF_SIGPENDING);
 419                                         return 0;
 420                                 }
 421                         }
 422                 }
 423
 424                 collect_signal(sig, pending, info);
 425         }
 426
 427         return sig;
 428 }
 429
 430 /*
 431  * Dequeue a signal and return the element to the caller, which is
 432  * expected to free it.
 433  *
 434  * All callers have to hold the siglock.
 435  */
 436 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 437 {
 438         int signr;
 439
 440         /* We only dequeue private signals from ourselves, we don't let
 441          * signalfd steal them
 442          */
 443         signr = __dequeue_signal(&tsk->pending, mask, info);
 444         if (!signr) {
 445                 signr = __dequeue_signal(&tsk->signal->shared_pending,
 446                                          mask, info);
 447                 /*
 448                  * itimer signal ?
 449                  *
 450                  * itimers are process shared and we restart periodic
 451                  * itimers in the signal delivery path to prevent DoS
 452                  * attacks in the high resolution timer case. This is
 453                  * compliant with the old way of self restarting
 454                  * itimers, as the SIGALRM is a legacy signal and only
 455                  * queued once. Changing the restart behaviour to
 456                  * restart the timer in the signal dequeue path is
 457                  * reducing the timer noise on heavy loaded !highres
 458                  * systems too.
 459                  */
 460                 if (unlikely(signr == SIGALRM)) {
 461                         struct hrtimer *tmr = &tsk->signal->real_timer;
 462
 463                         if (!hrtimer_is_queued(tmr) &&
 464                             tsk->signal->it_real_incr.tv64 != 0) {
 465                                 hrtimer_forward(tmr, tmr->base->get_time(),
 466                                                 tsk->signal->it_real_incr);
 467                                 hrtimer_restart(tmr);
 468                         }
 469                 }
 470         }
 471
 472         recalc_sigpending();
 473         if (!signr)
 474                 return 0;
 475
 476         if (unlikely(sig_kernel_stop(signr))) {
 477                 /*
 478                  * Set a marker that we have dequeued a stop signal.  Our
 479                  * caller might release the siglock and then the pending
 480                  * stop signal it is about to process is no longer in the
 481                  * pending bitmasks, but must still be cleared by a SIGCONT
 482                  * (and overruled by a SIGKILL).  So those cases clear this
 483                  * shared flag after we've set it.  Note that this flag may
 484                  * remain set after the signal we return is ignored or
 485                  * handled.  That doesn't matter because its only purpose
 486                  * is to alert stop-signal processing code when another
 487                  * processor has come along and cleared the flag.
 488                  */
 489                 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
 490         }
 491         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
 492                 /*
 493                  * Release the siglock to ensure proper locking order
 494                  * of timer locks outside of siglocks.  Note, we leave
 495                  * irqs disabled here, since the posix-timers code is
 496                  * about to disable them again anyway.
 497                  */
 498                 spin_unlock(&tsk->sighand->siglock);
 499                 do_schedule_next_timer(info);
 500                 spin_lock(&tsk->sighand->siglock);
 501         }
 502         return signr;
 503 }
 504
 505 /*
 506  * Tell a process that it has a new active signal..
 507  *
 508  * NOTE! we rely on the previous spin_lock to
 509  * lock interrupts for us! We can only be called with
 510  * "siglock" held, and the local interrupt must
 511  * have been disabled when that got acquired!
 512  *
 513  * No need to set need_resched since signal event passing
 514  * goes through ->blocked
 515  */
 516 void signal_wake_up(struct task_struct *t, int resume)
 517 {
 518         unsigned int mask;
 519
 520         set_tsk_thread_flag(t, TIF_SIGPENDING);
 521
 522         /*
 523          * For SIGKILL, we want to wake it up in the stopped/traced/killable
 524          * case. We don't check t->state here because there is a race with it
 525          * executing another processor and just now entering stopped state.
 526          * By using wake_up_state, we ensure the process will wake up and
 527          * handle its death signal.
 528          */
 529         mask = TASK_INTERRUPTIBLE;
 530         if (resume)
 531                 mask |= TASK_WAKEKILL;
 532         if (!wake_up_state(t, mask))
 533                 kick_process(t);
 534 }
 535
 536 /*
 537  * Remove signals in mask from the pending set and queue.
 538  * Returns 1 if any signals were found.
 539  *
 540  * All callers must be holding the siglock.
 541  *
 542  * This version takes a sigset mask and looks at all signals,
 543  * not just those in the first mask word.
 544  */
 545 static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
 546 {
 547         struct sigqueue *q, *n;
 548         sigset_t m;
 549
 550         sigandsets(&m, mask, &s->signal);
 551         if (sigisemptyset(&m))
 552                 return 0;
 553
 554         signandsets(&s->signal, &s->signal, mask);
 555         list_for_each_entry_safe(q, n, &s->list, list) {
 556                 if (sigismember(mask, q->info.si_signo)) {
 557                         list_del_init(&q->list);
 558                         __sigqueue_free(q);
 559                 }
 560         }
 561         return 1;
 562 }
 563 /*
 564  * Remove signals in mask from the pending set and queue.
 565  * Returns 1 if any signals were found.
 566  *
 567  * All callers must be holding the siglock.
 568  */
 569 static int rm_from_queue(unsigned long mask, struct sigpending *s)
 570 {
 571         struct sigqueue *q, *n;
 572
 573         if (!sigtestsetmask(&s->signal, mask))
 574                 return 0;
 575
 576         sigdelsetmask(&s->signal, mask);
 577         list_for_each_entry_safe(q, n, &s->list, list) {
 578                 if (q->info.si_signo < SIGRTMIN &&
 579                     (mask & sigmask(q->info.si_signo))) {
 580                         list_del_init(&q->list);
 581                         __sigqueue_free(q);
 582                 }
 583         }
 584         return 1;
 585 }
 586
 587 /*
 588  * Bad permissions for sending the signal
 589  * - the caller must hold at least the RCU read lock
 590  */
 591 static int check_kill_permission(int sig, struct siginfo *info,
 592                                  struct task_struct *t)
 593 {
 594         const struct cred *cred = current_cred(), *tcred;
 595         struct pid *sid;
 596         int error;
 597
 598         if (!valid_signal(sig))
 599                 return -EINVAL;
 600
 601         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
 602                 return 0;
 603
 604         error = audit_signal_info(sig, t); /* Let audit system see the signal */
 605         if (error)
 606                 return error;
 607
 608         tcred = __task_cred(t);
 609         if ((cred->euid ^ tcred->suid) &&
 610             (cred->euid ^ tcred->uid) &&
 611             (cred->uid  ^ tcred->suid) &&
 612             (cred->uid  ^ tcred->uid) &&
 613             !capable(CAP_KILL)) {
 614                 switch (sig) {
 615                 case SIGCONT:
 616                         sid = task_session(t);
 617                         /*
 618                          * We don't return the error if sid == NULL. The
 619                          * task was unhashed, the caller must notice this.
 620                          */
 621                         if (!sid || sid == task_session(current))
 622                                 break;
 623                 default:
 624                         return -EPERM;
 625                 }
 626         }
 627
 628         return security_task_kill(t, info, sig, 0);
 629 }
 630
 631 /*
 632  * Handle magic process-wide effects of stop/continue signals. Unlike
 633  * the signal actions, these happen immediately at signal-generation
 634  * time regardless of blocking, ignoring, or handling.  This does the
 635  * actual continuing for SIGCONT, but not the actual stopping for stop
 636  * signals. The process stop is done as a signal action for SIG_DFL.
 637  *
 638  * Returns true if the signal should be actually delivered, otherwise
 639  * it should be dropped.
 640  */
 641 static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 642 {
 643         struct signal_struct *signal = p->signal;
 644         struct task_struct *t;
 645
 646         if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
 647                 /*
 648                  * The process is in the middle of dying, nothing to do.
 649                  */
 650         } else if (sig_kernel_stop(sig)) {
 651                 /*
 652                  * This is a stop signal.  Remove SIGCONT from all queues.
 653                  */
 654                 rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
 655                 t = p;
 656                 do {
 657                         rm_from_queue(sigmask(SIGCONT), &t->pending);
 658                 } while_each_thread(p, t);
 659         } else if (sig == SIGCONT) {
 660                 unsigned int why;
 661                 /*
 662                  * Remove all stop signals from all queues,
 663                  * and wake all threads.
 664                  */
 665                 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
 666                 t = p;
 667                 do {
 668                         unsigned int state;
 669                         rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
 670                         /*
 671                          * If there is a handler for SIGCONT, we must make
 672                          * sure that no thread returns to user mode before
 673                          * we post the signal, in case it was the only
 674                          * thread eligible to run the signal handler--then
 675                          * it must not do anything between resuming and
 676                          * running the handler.  With the TIF_SIGPENDING
 677                          * flag set, the thread will pause and acquire the
 678                          * siglock that we hold now and until we've queued
 679                          * the pending signal.
 680                          *
 681                          * Wake up the stopped thread _after_ setting
 682                          * TIF_SIGPENDING
 683                          */
 684                         state = __TASK_STOPPED;
 685                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
 686                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
 687                                 state |= TASK_INTERRUPTIBLE;
 688                         }
 689                         wake_up_state(t, state);
 690                 } while_each_thread(p, t);
 691
 692                 /*
 693                  * Notify the parent with CLD_CONTINUED if we were stopped.
 694                  *
 695                  * If we were in the middle of a group stop, we pretend it
 696                  * was already finished, and then continued. Since SIGCHLD
 697                  * doesn't queue we report only CLD_STOPPED, as if the next
 698                  * CLD_CONTINUED was dropped.
 699                  */
 700                 why = 0;
 701                 if (signal->flags & SIGNAL_STOP_STOPPED)
 702                         why |= SIGNAL_CLD_CONTINUED;
 703                 else if (signal->group_stop_count)
 704                         why |= SIGNAL_CLD_STOPPED;
 705
 706                 if (why) {
 707                         /*
 708                          * The first thread which returns from do_signal_stop()
 709                          * will take ->siglock, notice SIGNAL_CLD_MASK, and
 710                          * notify its parent. See get_signal_to_deliver().
 711                          */
 712                         signal->flags = why | SIGNAL_STOP_CONTINUED;
 713                         signal->group_stop_count = 0;
 714                         signal->group_exit_code = 0;
 715                 } else {
 716                         /*
 717                          * We are not stopped, but there could be a stop
 718                          * signal in the middle of being processed after
 719                          * being removed from the queue.  Clear that too.
 720                          */
 721                         signal->flags &= ~SIGNAL_STOP_DEQUEUED;
 722                 }
 723         }
 724
 725         return !sig_ignored(p, sig, from_ancestor_ns);
 726 }
 727
 728 /*
 729  * Test if P wants to take SIG.  After we've checked all threads with this,
 730  * it's equivalent to finding no threads not blocking SIG.  Any threads not
 731  * blocking SIG were ruled out because they are not running and already
 732  * have pending signals.  Such threads will dequeue from the shared queue
 733  * as soon as they're available, so putting the signal on the shared queue
 734  * will be equivalent to sending it to one such thread.
 735  */
 736 static inline int wants_signal(int sig, struct task_struct *p)
 737 {
 738         if (sigismember(&p->blocked, sig))
 739                 return 0;
 740         if (p->flags & PF_EXITING)
 741                 return 0;
 742         if (sig == SIGKILL)
 743                 return 1;
 744         if (task_is_stopped_or_traced(p))
 745                 return 0;
 746         return task_curr(p) || !signal_pending(p);
 747 }
 748
 749 static void complete_signal(int sig, struct task_struct *p, int group)
 750 {
 751         struct signal_struct *signal = p->signal;
 752         struct task_struct *t;
 753
 754         /*
 755          * Now find a thread we can wake up to take the signal off the queue.
 756          *
 757          * If the main thread wants the signal, it gets first crack.
 758          * Probably the least surprising to the average bear.
 759          */
 760         if (wants_signal(sig, p))
 761                 t = p;
 762         else if (!group || thread_group_empty(p))
 763                 /*
 764                  * There is just one thread and it does not need to be woken.
 765                  * It will dequeue unblocked signals before it runs again.
 766                  */
 767                 return;
 768         else {
 769                 /*
 770                  * Otherwise try to find a suitable thread.
 771                  */
 772                 t = signal->curr_target;
 773                 while (!wants_signal(sig, t)) {
 774                         t = next_thread(t);
 775                         if (t == signal->curr_target)
 776                                 /*
 777                                  * No thread needs to be woken.
 778                                  * Any eligible threads will see
 779                                  * the signal in the queue soon.
 780                                  */
 781                                 return;
 782                 }
 783                 signal->curr_target = t;
 784         }
 785
 786         /*
 787          * Found a killable thread.  If the signal will be fatal,
 788          * then start taking the whole group down immediately.
 789          */
 790         if (sig_fatal(p, sig) &&
 791             !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
 792             !sigismember(&t->real_blocked, sig) &&
 793             (sig == SIGKILL ||
 794              !tracehook_consider_fatal_signal(t, sig))) {
 795                 /*
 796                  * This signal will be fatal to the whole group.
 797                  */
 798                 if (!sig_kernel_coredump(sig)) {
 799                         /*
 800                          * Start a group exit and wake everybody up.
 801                          * This way we don't have other threads
 802                          * running and doing things after a slower
 803                          * thread has the fatal signal pending.
 804                          */
 805                         signal->flags = SIGNAL_GROUP_EXIT;
 806                         signal->group_exit_code = sig;
 807                         signal->group_stop_count = 0;
 808                         t = p;
 809                         do {
 810                                 sigaddset(&t->pending.signal, SIGKILL);
 811                                 signal_wake_up(t, 1);
 812                         } while_each_thread(p, t);
 813                         return;
 814                 }
 815         }
 816
 817         /*
 818          * The signal is already in the shared-pending queue.
 819          * Tell the chosen thread to wake up and dequeue it.
 820          */
 821         signal_wake_up(t, sig == SIGKILL);
 822         return;
 823 }
 824
 825 static inline int legacy_queue(struct sigpending *signals, int sig)
 826 {
 827         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
 828 }
 829
 830 static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 831                         int group, int from_ancestor_ns)
 832 {
 833         struct sigpending *pending;
 834         struct sigqueue *q;
 835         int override_rlimit;
 836
 837         trace_sched_signal_send(sig, t);
 838
 839         assert_spin_locked(&t->sighand->siglock);
 840
 841         if (!prepare_signal(sig, t, from_ancestor_ns))
 842                 return 0;
 843
 844         pending = group ? &t->signal->shared_pending : &t->pending;
 845         /*
 846          * Short-circuit ignored signals and support queuing
 847          * exactly one non-rt signal, so that we can get more
 848          * detailed information about the cause of the signal.
 849          */
 850         if (legacy_queue(pending, sig))
 851                 return 0;
 852         /*
 853          * fast-pathed signals for kernel-internal things like SIGSTOP
 854          * or SIGKILL.
 855          */
 856         if (info == SEND_SIG_FORCED)
 857                 goto out_set;
 858
 859         /* Real-time signals must be queued if sent by sigqueue, or
 860            some other real-time mechanism.  It is implementation
 861            defined whether kill() does so.  We attempt to do so, on
 862            the principle of least surprise, but since kill is not
 863            allowed to fail with EAGAIN when low on memory we just
 864            make sure at least one signal gets delivered and don't
 865            pass on the info struct.  */
 866
 867         if (sig < SIGRTMIN)
 868                 override_rlimit = (is_si_special(info) || info->si_code >= 0);
 869         else
 870                 override_rlimit = 0;
 871
 872         q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
 873                 override_rlimit);
 874         if (q) {
 875                 list_add_tail(&q->list, &pending->list);
 876                 switch ((unsigned long) info) {
 877                 case (unsigned long) SEND_SIG_NOINFO:
 878                         q->info.si_signo = sig;
 879                         q->info.si_errno = 0;
 880                         q->info.si_code = SI_USER;
 881                         q->info.si_pid = task_tgid_nr_ns(current,
 882                                                         task_active_pid_ns(t));
 883                         q->info.si_uid = current_uid();
 884                         break;
 885                 case (unsigned long) SEND_SIG_PRIV:
 886                         q->info.si_signo = sig;
 887                         q->info.si_errno = 0;
 888                         q->info.si_code = SI_KERNEL;
 889                         q->info.si_pid = 0;
 890                         q->info.si_uid = 0;
 891                         break;
 892                 default:
 893                         copy_siginfo(&q->info, info);
 894                         if (from_ancestor_ns)
 895                                 q->info.si_pid = 0;
 896                         break;
 897                 }
 898         } else if (!is_si_special(info)) {
 899                 if (sig >= SIGRTMIN && info->si_code != SI_USER)
 900                 /*
 901                  * Queue overflow, abort.  We may abort if the signal was rt
 902                  * and sent by user using something other than kill().
 903                  */
 904                         return -EAGAIN;
 905         }
 906
 907 out_set:
 908         signalfd_notify(t, sig);
 909         sigaddset(&pending->signal, sig);
 910         complete_signal(sig, t, group);
 911         return 0;
 912 }
 913
 914 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 915                         int group)
 916 {
 917         int from_ancestor_ns = 0;
 918
 919 #ifdef CONFIG_PID_NS
 920         if (!is_si_special(info) && SI_FROMUSER(info) &&
 921                         task_pid_nr_ns(current, task_active_pid_ns(t)) <= 0)
 922                 from_ancestor_ns = 1;
 923 #endif
 924
 925         return __send_signal(sig, info, t, group, from_ancestor_ns);
 926 }
 927
 928 int print_fatal_signals;
 929
 930 static void print_fatal_signal(struct pt_regs *regs, int signr)
 931 {
 932         printk("%s/%d: potentially unexpected fatal signal %d.\n",
 933                 current->comm, task_pid_nr(current), signr);
 934
 935 #if defined(__i386__) && !defined(__arch_um__)
 936         printk("code at %08lx: ", regs->ip);
 937         {
 938                 int i;
 939                 for (i = 0; i < 16; i++) {
 940                         unsigned char insn;
 941
 942                         __get_user(insn, (unsigned char *)(regs->ip + i));
 943                         printk("%02x ", insn);
 944                 }
 945         }
 946 #endif
 947         printk("\n");
 948         preempt_disable();
 949         show_regs(regs);
 950         preempt_enable();
 951 }
 952
 953 static int __init setup_print_fatal_signals(char *str)
 954 {
 955         get_option (&str, &print_fatal_signals);
 956
 957         return 1;
 958 }
 959
 960 __setup("print-fatal-signals=", setup_print_fatal_signals);
 961
 962 int
 963 __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 964 {
 965         return send_signal(sig, info, p, 1);
 966 }
 967
 968 static int
 969 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 970 {
 971         return send_signal(sig, info, t, 0);
 972 }
 973
 974 int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
 975                         bool group)
 976 {
 977         unsigned long flags;
 978         int ret = -ESRCH;
 979
 980         if (lock_task_sighand(p, &flags)) {
 981                 ret = send_signal(sig, info, p, group);
 982                 unlock_task_sighand(p, &flags);
 983         }
 984
 985         return ret;
 986 }
 987
 988 /*
 989  * Force a signal that the process can't ignore: if necessary
 990  * we unblock the signal and change any SIG_IGN to SIG_DFL.
 991  *
 992  * Note: If we unblock the signal, we always reset it to SIG_DFL,
 993  * since we do not want to have a signal handler that was blocked
 994  * be invoked when user space had explicitly blocked it.
 995  *
 996  * We don't want to have recursive SIGSEGV's etc, for example,
 997  * that is why we also clear SIGNAL_UNKILLABLE.
 998  */
 999 int
1000 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
1001 {
1002         unsigned long int flags;
1003         int ret, blocked, ignored;
1004         struct k_sigaction *action;
1005
1006         spin_lock_irqsave(&t->sighand->siglock, flags);
1007         action = &t->sighand->action[sig-1];
1008         ignored = action->sa.sa_handler == SIG_IGN;
1009         blocked = sigismember(&t->blocked, sig);
1010         if (blocked || ignored) {
1011                 action->sa.sa_handler = SIG_DFL;
1012                 if (blocked) {
1013                         sigdelset(&t->blocked, sig);
1014                         recalc_sigpending_and_wake(t);
1015                 }
1016         }
1017         if (action->sa.sa_handler == SIG_DFL)
1018                 t->signal->flags &= ~SIGNAL_UNKILLABLE;
1019         ret = specific_send_sig_info(sig, info, t);
1020         spin_unlock_irqrestore(&t->sighand->siglock, flags);
1021
1022         return ret;
1023 }
1024
1025 void
1026 force_sig_specific(int sig, struct task_struct *t)
1027 {
1028         force_sig_info(sig, SEND_SIG_FORCED, t);
1029 }
1030
1031 /*
1032  * Nuke all other threads in the group.
1033  */
1034 void zap_other_threads(struct task_struct *p)
1035 {
1036         struct task_struct *t;
1037
1038         p->signal->group_stop_count = 0;
1039
1040         for (t = next_thread(p); t != p; t = next_thread(t)) {
1041                 /*
1042                  * Don't bother with already dead threads
1043                  */
1044                 if (t->exit_state)
1045                         continue;
1046
1047                 /* SIGKILL will be handled before any pending SIGSTOP */
1048                 sigaddset(&t->pending.signal, SIGKILL);
1049                 signal_wake_up(t, 1);
1050         }
1051 }
1052
1053 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
1054 {
1055         struct sighand_struct *sighand;
1056
1057         rcu_read_lock();
1058         for (;;) {
1059                 sighand = rcu_dereference(tsk->sighand);
1060                 if (unlikely(sighand == NULL))
1061                         break;
1062
1063                 spin_lock_irqsave(&sighand->siglock, *flags);
1064                 if (likely(sighand == tsk->sighand))
1065                         break;
1066                 spin_unlock_irqrestore(&sighand->siglock, *flags);
1067         }
1068         rcu_read_unlock();
1069
1070         return sighand;
1071 }
1072
1073 /*
1074  * send signal info to all the members of a group
1075  * - the caller must hold the RCU read lock at least
1076  */
1077 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1078 {
1079         int ret = check_kill_permission(sig, info, p);
1080
1081         if (!ret && sig)
1082                 ret = do_send_sig_info(sig, info, p, true);
1083
1084         return ret;
1085 }
1086
1087 /*
1088  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
1089  * control characters do (^C, ^Z etc)
1090  * - the caller must hold at least a readlock on tasklist_lock
1091  */
1092 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
1093 {
1094         struct task_struct *p = NULL;
1095         int retval, success;
1096
1097         success = 0;
1098         retval = -ESRCH;
1099         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
1100                 int err = group_send_sig_info(sig, info, p);
1101                 success |= !err;
1102                 retval = err;
1103         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
1104         return success ? 0 : retval;
1105 }
1106
1107 int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
1108 {
1109         int error = -ESRCH;
1110         struct task_struct *p;
1111
1112         rcu_read_lock();
1113 retry:
1114         p = pid_task(pid, PIDTYPE_PID);
1115         if (p) {
1116                 error = group_send_sig_info(sig, info, p);
1117                 if (unlikely(error == -ESRCH))
1118                         /*
1119                          * The task was unhashed in between, try again.
1120                          * If it is dead, pid_task() will return NULL,
1121                          * if we race with de_thread() it will find the
1122                          * new leader.
1123                          */
1124                         goto retry;
1125         }
1126         rcu_read_unlock();
1127
1128         return error;
1129 }
1130
1131 int
1132 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1133 {
1134         int error;
1135         rcu_read_lock();
1136         error = kill_pid_info(sig, info, find_vpid(pid));
1137         rcu_read_unlock();
1138         return error;
1139 }
1140
1141 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
1142 int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
1143                       uid_t uid, uid_t euid, u32 secid)
1144 {
1145         int ret = -EINVAL;
1146         struct task_struct *p;
1147         const struct cred *pcred;
1148
1149         if (!valid_signal(sig))
1150                 return ret;
1151
1152         read_lock(&tasklist_lock);
1153         p = pid_task(pid, PIDTYPE_PID);
1154         if (!p) {
1155                 ret = -ESRCH;
1156                 goto out_unlock;
1157         }
1158         pcred = __task_cred(p);
1159         if ((info == SEND_SIG_NOINFO ||
1160              (!is_si_special(info) && SI_FROMUSER(info))) &&
1161             euid != pcred->suid && euid != pcred->uid &&
1162             uid  != pcred->suid && uid  != pcred->uid) {
1163                 ret = -EPERM;
1164                 goto out_unlock;
1165         }
1166         ret = security_task_kill(p, info, sig, secid);
1167         if (ret)
1168                 goto out_unlock;
1169         if (sig && p->sighand) {
1170                 unsigned long flags;
1171                 spin_lock_irqsave(&p->sighand->siglock, flags);
1172                 ret = __send_signal(sig, info, p, 1, 0);
1173                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1174         }
1175 out_unlock:
1176         read_unlock(&tasklist_lock);
1177         return ret;
1178 }
1179 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1180
1181 /*
1182  * kill_something_info() interprets pid in interesting ways just like kill(2).
1183  *
1184  * POSIX specifies that kill(-1,sig) is unspecified, but what we have
1185  * is probably wrong.  Should make it like BSD or SYSV.
1186  */
1187
1188 static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1189 {
1190         int ret;
1191
1192         if (pid > 0) {
1193                 rcu_read_lock();
1194                 ret = kill_pid_info(sig, info, find_vpid(pid));
1195                 rcu_read_unlock();
1196                 return ret;
1197         }
1198
1199         read_lock(&tasklist_lock);
1200         if (pid != -1) {
1201                 ret = __kill_pgrp_info(sig, info,
1202                                 pid ? find_vpid(-pid) : task_pgrp(current));
1203         } else {
1204                 int retval = 0, count = 0;
1205                 struct task_struct * p;
1206
1207                 for_each_process(p) {
1208                         if (task_pid_vnr(p) > 1 &&
1209                                         !same_thread_group(p, current)) {
1210                                 int err = group_send_sig_info(sig, info, p);
1211                                 ++count;
1212                                 if (err != -EPERM)
1213                                         retval = err;
1214                         }
1215                 }
1216                 ret = count ? retval : -ESRCH;
1217         }
1218         read_unlock(&tasklist_lock);
1219
1220         return ret;
1221 }
1222
1223 /*
1224  * These are for backward compatibility with the rest of the kernel source.
1225  */
1226
1227 int
1228 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1229 {
1230         /*
1231          * Make sure legacy kernel users don't send in bad values
1232          * (normal paths check this in check_kill_permission).
1233          */
1234         if (!valid_signal(sig))
1235                 return -EINVAL;
1236
1237         return do_send_sig_info(sig, info, p, false);
1238 }
1239
1240 #define __si_special(priv) \
1241         ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
1242
1243 int
1244 send_sig(int sig, struct task_struct *p, int priv)
1245 {
1246         return send_sig_info(sig, __si_special(priv), p);
1247 }
1248
1249 void
1250 force_sig(int sig, struct task_struct *p)
1251 {
1252         force_sig_info(sig, SEND_SIG_PRIV, p);
1253 }
1254
1255 /*
1256  * When things go south during signal handling, we
1257  * will force a SIGSEGV. And if the signal that caused
1258  * the problem was already a SIGSEGV, we'll want to
1259  * make sure we don't even try to deliver the signal..
1260  */
1261 int
1262 force_sigsegv(int sig, struct task_struct *p)
1263 {
1264         if (sig == SIGSEGV) {
1265                 unsigned long flags;
1266                 spin_lock_irqsave(&p->sighand->siglock, flags);
1267                 p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
1268                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1269         }
1270         force_sig(SIGSEGV, p);
1271         return 0;
1272 }
1273
1274 int kill_pgrp(struct pid *pid, int sig, int priv)
1275 {
1276         int ret;
1277
1278         read_lock(&tasklist_lock);
1279         ret = __kill_pgrp_info(sig, __si_special(priv), pid);
1280         read_unlock(&tasklist_lock);
1281
1282         return ret;
1283 }
1284 EXPORT_SYMBOL(kill_pgrp);
1285
1286 int kill_pid(struct pid *pid, int sig, int priv)
1287 {
1288         return kill_pid_info(sig, __si_special(priv), pid);
1289 }
1290 EXPORT_SYMBOL(kill_pid);
1291
1292 /*
1293  * These functions support sending signals using preallocated sigqueue
1294  * structures.  This is needed "because realtime applications cannot
1295  * afford to lose notifications of asynchronous events, like timer
1296  * expirations or I/O completions".  In the case of Posix Timers
1297  * we allocate the sigqueue structure from the timer_create.  If this
1298  * allocation fails we are able to report the failure to the application
1299  * with an EAGAIN error.
1300  */
1301
1302 struct sigqueue *sigqueue_alloc(void)
1303 {
1304         struct sigqueue *q;
1305
1306         if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
1307                 q->flags |= SIGQUEUE_PREALLOC;
1308         return(q);
1309 }
1310
1311 void sigqueue_free(struct sigqueue *q)
1312 {
1313         unsigned long flags;
1314         spinlock_t *lock = &current->sighand->siglock;
1315
1316         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1317         /*
1318          * We must hold ->siglock while testing q->list
1319          * to serialize with collect_signal() or with
1320          * __exit_signal()->flush_sigqueue().
1321          */
1322         spin_lock_irqsave(lock, flags);
1323         q->flags &= ~SIGQUEUE_PREALLOC;
1324         /*
1325          * If it is queued it will be freed when dequeued,
1326          * like the "regular" sigqueue.
1327          */
1328         if (!list_empty(&q->list))
1329                 q = NULL;
1330         spin_unlock_irqrestore(lock, flags);
1331
1332         if (q)
1333                 __sigqueue_free(q);
1334 }
1335
1336 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1337 {
1338         int sig = q->info.si_signo;
1339         struct sigpending *pending;
1340         unsigned long flags;
1341         int ret;
1342
1343         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1344
1345         ret = -1;
1346         if (!likely(lock_task_sighand(t, &flags)))
1347                 goto ret;
1348
1349         ret = 1; /* the signal is ignored */
1350         if (!prepare_signal(sig, t, 0))
1351                 goto out;
1352
1353         ret = 0;
1354         if (unlikely(!list_empty(&q->list))) {
1355                 /*
1356                  * If an SI_TIMER entry is already queue just increment
1357                  * the overrun count.
1358                  */
1359                 BUG_ON(q->info.si_code != SI_TIMER);
1360                 q->info.si_overrun++;
1361                 goto out;
1362         }
1363         q->info.si_overrun = 0;
1364
1365         signalfd_notify(t, sig);
1366         pending = group ? &t->signal->shared_pending : &t->pending;
1367         list_add_tail(&q->list, &pending->list);
1368         sigaddset(&pending->signal, sig);
1369         complete_signal(sig, t, group);
1370 out:
1371         unlock_task_sighand(t, &flags);
1372 ret:
1373         return ret;
1374 }
1375
1376 /*
1377  * Let a parent know about the death of a child.
1378  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
1379  *
1380  * Returns -1 if our parent ignored us and so we've switched to
1381  * self-reaping, or else @sig.
1382  */
1383 int do_notify_parent(struct task_struct *tsk, int sig)
1384 {
1385         struct siginfo info;
1386         unsigned long flags;
1387         struct sighand_struct *psig;
1388         int ret = sig;
1389
1390         BUG_ON(sig == -1);
1391
1392         /* do_notify_parent_cldstop should have been called instead.  */
1393         BUG_ON(task_is_stopped_or_traced(tsk));
1394
1395         BUG_ON(!task_ptrace(tsk) &&
1396                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
1397
1398         info.si_signo = sig;
1399         info.si_errno = 0;
1400         /*
1401          * we are under tasklist_lock here so our parent is tied to
1402          * us and cannot exit and release its namespace.
1403          *
1404          * the only it can is to switch its nsproxy with sys_unshare,
1405          * bu uncharing pid namespaces is not allowed, so we'll always
1406          * see relevant namespace
1407          *
1408          * write_lock() currently calls preempt_disable() which is the
1409          * same as rcu_read_lock(), but according to Oleg, this is not
1410          * correct to rely on this
1411          */
1412         rcu_read_lock();
1413         info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1414         info.si_uid = __task_cred(tsk)->uid;
1415         rcu_read_unlock();
1416
1417         info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1418                                 tsk->signal->utime));
1419         info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1420                                 tsk->signal->stime));
1421
1422         info.si_status = tsk->exit_code & 0x7f;
1423         if (tsk->exit_code & 0x80)
1424                 info.si_code = CLD_DUMPED;
1425         else if (tsk->exit_code & 0x7f)
1426                 info.si_code = CLD_KILLED;
1427         else {
1428                 info.si_code = CLD_EXITED;
1429                 info.si_status = tsk->exit_code >> 8;
1430         }
1431
1432         psig = tsk->parent->sighand;
1433         spin_lock_irqsave(&psig->siglock, flags);
1434         if (!task_ptrace(tsk) && sig == SIGCHLD &&
1435             (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
1436              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
1437                 /*
1438                  * We are exiting and our parent doesn't care.  POSIX.1
1439                  * defines special semantics for setting SIGCHLD to SIG_IGN
1440                  * or setting the SA_NOCLDWAIT flag: we should be reaped
1441                  * automatically and not left for our parent's wait4 call.
1442                  * Rather than having the parent do it as a magic kind of
1443                  * signal handler, we just set this to tell do_exit that we
1444                  * can be cleaned up without becoming a zombie.  Note that
1445                  * we still call __wake_up_parent in this case, because a
1446                  * blocked sys_wait4 might now return -ECHILD.
1447                  *
1448                  * Whether we send SIGCHLD or not for SA_NOCLDWAIT
1449                  * is implementation-defined: we do (if you don't want
1450                  * it, just use SIG_IGN instead).
1451                  */
1452                 ret = tsk->exit_signal = -1;
1453                 if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
1454                         sig = -1;
1455         }
1456         if (valid_signal(sig) && sig > 0)
1457                 __group_send_sig_info(sig, &info, tsk->parent);
1458         __wake_up_parent(tsk, tsk->parent);
1459         spin_unlock_irqrestore(&psig->siglock, flags);
1460
1461         return ret;
1462 }
1463
1464 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1465 {
1466         struct siginfo info;
1467         unsigned long flags;
1468         struct task_struct *parent;
1469         struct sighand_struct *sighand;
1470
1471         if (task_ptrace(tsk))
1472                 parent = tsk->parent;
1473         else {
1474                 tsk = tsk->group_leader;
1475                 parent = tsk->real_parent;
1476         }
1477
1478         info.si_signo = SIGCHLD;
1479         info.si_errno = 0;
1480         /*
1481          * see comment in do_notify_parent() abot the following 3 lines
1482          */
1483         rcu_read_lock();
1484         info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
1485         info.si_uid = __task_cred(tsk)->uid;
1486         rcu_read_unlock();
1487
1488         info.si_utime = cputime_to_clock_t(tsk->utime);
1489         info.si_stime = cputime_to_clock_t(tsk->stime);
1490
1491         info.si_code = why;
1492         switch (why) {
1493         case CLD_CONTINUED:
1494                 info.si_status = SIGCONT;
1495                 break;
1496         case CLD_STOPPED:
1497                 info.si_status = tsk->signal->group_exit_code & 0x7f;
1498                 break;
1499         case CLD_TRAPPED:
1500                 info.si_status = tsk->exit_code & 0x7f;
1501                 break;
1502         default:
1503                 BUG();
1504         }
1505
1506         sighand = parent->sighand;
1507         spin_lock_irqsave(&sighand->siglock, flags);
1508         if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
1509             !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
1510                 __group_send_sig_info(SIGCHLD, &info, parent);
1511         /*
1512          * Even if SIGCHLD is not generated, we must wake up wait4 calls.
1513          */
1514         __wake_up_parent(tsk, parent);
1515         spin_unlock_irqrestore(&sighand->siglock, flags);
1516 }
1517
1518 static inline int may_ptrace_stop(void)
1519 {
1520         if (!likely(task_ptrace(current)))
1521                 return 0;
1522         /*
1523          * Are we in the middle of do_coredump?
1524          * If so and our tracer is also part of the coredump stopping
1525          * is a deadlock situation, and pointless because our tracer
1526          * is dead so don't allow us to stop.
1527          * If SIGKILL was already sent before the caller unlocked
1528          * ->siglock we must see ->core_state != NULL. Otherwise it
1529          * is safe to enter schedule().
1530          */
1531         if (unlikely(current->mm->core_state) &&
1532             unlikely(current->mm == current->parent->mm))
1533                 return 0;
1534
1535         return 1;
1536 }
1537
1538 /*
1539  * Return nonzero if there is a SIGKILL that should be waking us up.
1540  * Called with the siglock held.
1541  */
1542 static int sigkill_pending(struct task_struct *tsk)
1543 {
1544         return  sigismember(&tsk->pending.signal, SIGKILL) ||
1545                 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1546 }
1547
1548 /*
1549  * This must be called with current->sighand->siglock held.
1550  *
1551  * This should be the path for all ptrace stops.
1552  * We always set current->last_siginfo while stopped here.
1553  * That makes it a way to test a stopped process for
1554  * being ptrace-stopped vs being job-control-stopped.
1555  *
1556  * If we actually decide not to stop at all because the tracer
1557  * is gone, we keep current->exit_code unless clear_code.
1558  */
1559 static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1560 {
1561         if (arch_ptrace_stop_needed(exit_code, info)) {
1562                 /*
1563                  * The arch code has something special to do before a
1564                  * ptrace stop.  This is allowed to block, e.g. for faults
1565                  * on user stack pages.  We can't keep the siglock while
1566                  * calling arch_ptrace_stop, so we must release it now.
1567                  * To preserve proper semantics, we must do this before
1568                  * any signal bookkeeping like checking group_stop_count.
1569                  * Meanwhile, a SIGKILL could come in before we retake the
1570                  * siglock.  That must prevent us from sleeping in TASK_TRACED.
1571                  * So after regaining the lock, we must check for SIGKILL.
1572                  */
1573                 spin_unlock_irq(&current->sighand->siglock);
1574                 arch_ptrace_stop(exit_code, info);
1575                 spin_lock_irq(&current->sighand->siglock);
1576                 if (sigkill_pending(current))
1577                         return;
1578         }
1579
1580         /*
1581          * If there is a group stop in progress,
1582          * we must participate in the bookkeeping.
1583          */
1584         if (current->signal->group_stop_count > 0)
1585                 --current->signal->group_stop_count;
1586
1587         current->last_siginfo = info;
1588         current->exit_code = exit_code;
1589
1590         /* Let the debugger run.  */
1591         __set_current_state(TASK_TRACED);
1592         spin_unlock_irq(&current->sighand->siglock);
1593         read_lock(&tasklist_lock);
1594         if (may_ptrace_stop()) {
1595                 do_notify_parent_cldstop(current, CLD_TRAPPED);
1596                 /*
1597                  * Don't want to allow preemption here, because
1598                  * sys_ptrace() needs this task to be inactive.
1599                  *
1600                  * XXX: implement read_unlock_no_resched().
1601                  */
1602                 preempt_disable();
1603                 read_unlock(&tasklist_lock);
1604                 preempt_enable_no_resched();
1605                 schedule();
1606         } else {
1607                 /*
1608                  * By the time we got the lock, our tracer went away.
1609                  * Don't drop the lock yet, another tracer may come.
1610                  */
1611                 __set_current_state(TASK_RUNNING);
1612                 if (clear_code)
1613                         current->exit_code = 0;
1614                 read_unlock(&tasklist_lock);
1615         }
1616
1617         /*
1618          * While in TASK_TRACED, we were considered "frozen enough".
1619          * Now that we woke up, it's crucial if we're supposed to be
1620          * frozen that we freeze now before running anything substantial.
1621          */
1622         try_to_freeze();
1623
1624         /*
1625          * We are back.  Now reacquire the siglock before touching
1626          * last_siginfo, so that we are sure to have synchronized with
1627          * any signal-sending on another CPU that wants to examine it.
1628          */
1629         spin_lock_irq(&current->sighand->siglock);
1630         current->last_siginfo = NULL;
1631
1632         /*
1633          * Queued signals ignored us while we were stopped for tracing.
1634          * So check for any that we should take before resuming user mode.
1635          * This sets TIF_SIGPENDING, but never clears it.
1636          */
1637         recalc_sigpending_tsk(current);
1638 }
1639
1640 void ptrace_notify(int exit_code)
1641 {
1642         siginfo_t info;
1643
1644         BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
1645
1646         memset(&info, 0, sizeof info);
1647         info.si_signo = SIGTRAP;
1648         info.si_code = exit_code;
1649         info.si_pid = task_pid_vnr(current);
1650         info.si_uid = current_uid();
1651
1652         /* Let the debugger run.  */
1653         spin_lock_irq(&current->sighand->siglock);
1654         ptrace_stop(exit_code, 1, &info);
1655         spin_unlock_irq(&current->sighand->siglock);
1656 }
1657
1658 /*
1659  * This performs the stopping for SIGSTOP and other stop signals.
1660  * We have to stop all threads in the thread group.
1661  * Returns nonzero if we've actually stopped and released the siglock.
1662  * Returns zero if we didn't stop and still hold the siglock.
1663  */
1664 static int do_signal_stop(int signr)
1665 {
1666         struct signal_struct *sig = current->signal;
1667         int notify;
1668
1669         if (!sig->group_stop_count) {
1670                 struct task_struct *t;
1671
1672                 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1673                     unlikely(signal_group_exit(sig)))
1674                         return 0;
1675                 /*
1676                  * There is no group stop already in progress.
1677                  * We must initiate one now.
1678                  */
1679                 sig->group_exit_code = signr;
1680
1681                 sig->group_stop_count = 1;
1682                 for (t = next_thread(current); t != current; t = next_thread(t))
1683                         /*
1684                          * Setting state to TASK_STOPPED for a group
1685                          * stop is always done with the siglock held,
1686                          * so this check has no races.
1687                          */
1688                         if (!(t->flags & PF_EXITING) &&
1689                             !task_is_stopped_or_traced(t)) {
1690                                 sig->group_stop_count++;
1691                                 signal_wake_up(t, 0);
1692                         }
1693         }
1694         /*
1695          * If there are no other threads in the group, or if there is
1696          * a group stop in progress and we are the last to stop, report
1697          * to the parent.  When ptraced, every thread reports itself.
1698          */
1699         notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
1700         notify = tracehook_notify_jctl(notify, CLD_STOPPED);
1701         /*
1702          * tracehook_notify_jctl() can drop and reacquire siglock, so
1703          * we keep ->group_stop_count != 0 before the call. If SIGCONT
1704          * or SIGKILL comes in between ->group_stop_count == 0.
1705          */
1706         if (sig->group_stop_count) {
1707                 if (!--sig->group_stop_count)
1708                         sig->flags = SIGNAL_STOP_STOPPED;
1709                 current->exit_code = sig->group_exit_code;
1710                 __set_current_state(TASK_STOPPED);
1711         }
1712         spin_unlock_irq(&current->sighand->siglock);
1713
1714         if (notify) {
1715                 read_lock(&tasklist_lock);
1716                 do_notify_parent_cldstop(current, notify);
1717                 read_unlock(&tasklist_lock);
1718         }
1719
1720         /* Now we don't run again until woken by SIGCONT or SIGKILL */
1721         do {
1722                 schedule();
1723         } while (try_to_freeze());
1724
1725         tracehook_finish_jctl();
1726         current->exit_code = 0;
1727
1728         return 1;
1729 }
1730
1731 static int ptrace_signal(int signr, siginfo_t *info,
1732                          struct pt_regs *regs, void *cookie)
1733 {
1734         if (!task_ptrace(current))
1735                 return signr;
1736
1737         ptrace_signal_deliver(regs, cookie);
1738
1739         /* Let the debugger run.  */
1740         ptrace_stop(signr, 0, info);
1741
1742         /* We're back.  Did the debugger cancel the sig?  */
1743         signr = current->exit_code;
1744         if (signr == 0)
1745                 return signr;
1746
1747         current->exit_code = 0;
1748
1749         /* Update the siginfo structure if the signal has
1750            changed.  If the debugger wanted something
1751            specific in the siginfo structure then it should
1752            have updated *info via PTRACE_SETSIGINFO.  */
1753         if (signr != info->si_signo) {
1754                 info->si_signo = signr;
1755                 info->si_errno = 0;
1756                 info->si_code = SI_USER;
1757                 info->si_pid = task_pid_vnr(current->parent);
1758                 info->si_uid = task_uid(current->parent);
1759         }
1760
1761         /* If the (new) signal is now blocked, requeue it.  */
1762         if (sigismember(&current->blocked, signr)) {
1763                 specific_send_sig_info(signr, info, current);
1764                 signr = 0;
1765         }
1766
1767         return signr;
1768 }
1769
1770 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
1771                           struct pt_regs *regs, void *cookie)
1772 {
1773         struct sighand_struct *sighand = current->sighand;
1774         struct signal_struct *signal = current->signal;
1775         int signr;
1776
1777 relock:
1778         /*
1779          * We'll jump back here after any time we were stopped in TASK_STOPPED.
1780          * While in TASK_STOPPED, we were considered "frozen enough".
1781          * Now that we woke up, it's crucial if we're supposed to be
1782          * frozen that we freeze now before running anything substantial.
1783          */
1784         try_to_freeze();
1785
1786         spin_lock_irq(&sighand->siglock);
1787         /*
1788          * Every stopped thread goes here after wakeup. Check to see if
1789          * we should notify the parent, prepare_signal(SIGCONT) encodes
1790          * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1791          */
1792         if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1793                 int why = (signal->flags & SIGNAL_STOP_CONTINUED)
1794                                 ? CLD_CONTINUED : CLD_STOPPED;
1795                 signal->flags &= ~SIGNAL_CLD_MASK;
1796
1797                 why = tracehook_notify_jctl(why, CLD_CONTINUED);
1798                 spin_unlock_irq(&sighand->siglock);
1799
1800                 if (why) {
1801                         read_lock(&tasklist_lock);
1802                         do_notify_parent_cldstop(current->group_leader, why);
1803                         read_unlock(&tasklist_lock);
1804                 }
1805                 goto relock;
1806         }
1807
1808         for (;;) {
1809                 struct k_sigaction *ka;
1810
1811                 if (unlikely(signal->group_stop_count > 0) &&
1812                     do_signal_stop(0))
1813                         goto relock;
1814
1815                 /*
1816                  * Tracing can induce an artifical signal and choose sigaction.
1817                  * The return value in @signr determines the default action,
1818                  * but @info->si_signo is the signal number we will report.
1819                  */
1820                 signr = tracehook_get_signal(current, regs, info, return_ka);
1821                 if (unlikely(signr < 0))
1822                         goto relock;
1823                 if (unlikely(signr != 0))
1824                         ka = return_ka;
1825                 else {
1826                         signr = dequeue_signal(current, &current->blocked,
1827                                                info);
1828
1829                         if (!signr)
1830                                 break; /* will return 0 */
1831
1832                         if (signr != SIGKILL) {
1833                                 signr = ptrace_signal(signr, info,
1834                                                       regs, cookie);
1835                                 if (!signr)
1836                                         continue;
1837                         }
1838
1839                         ka = &sighand->action[signr-1];
1840                 }
1841
1842                 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
1843                         continue;
1844                 if (ka->sa.sa_handler != SIG_DFL) {
1845                         /* Run the handler.  */
1846                         *return_ka = *ka;
1847
1848                         if (ka->sa.sa_flags & SA_ONESHOT)
1849                                 ka->sa.sa_handler = SIG_DFL;
1850
1851                         break; /* will return non-zero "signr" value */
1852                 }
1853
1854                 /*
1855                  * Now we are doing the default action for this signal.
1856                  */
1857                 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1858                         continue;
1859
1860                 /*
1861                  * Global init gets no signals it doesn't want.
1862                  * Container-init gets no signals it doesn't want from same
1863                  * container.
1864                  *
1865                  * Note that if global/container-init sees a sig_kernel_only()
1866                  * signal here, the signal must have been generated internally
1867                  * or must have come from an ancestor namespace. In either
1868                  * case, the signal cannot be dropped.
1869                  */
1870                 if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
1871                                 !sig_kernel_only(signr))
1872                         continue;
1873
1874                 if (sig_kernel_stop(signr)) {
1875                         /*
1876                          * The default action is to stop all threads in
1877                          * the thread group.  The job control signals
1878                          * do nothing in an orphaned pgrp, but SIGSTOP
1879                          * always works.  Note that siglock needs to be
1880                          * dropped during the call to is_orphaned_pgrp()
1881                          * because of lock ordering with tasklist_lock.
1882                          * This allows an intervening SIGCONT to be posted.
1883                          * We need to check for that and bail out if necessary.
1884                          */
1885                         if (signr != SIGSTOP) {
1886                                 spin_unlock_irq(&sighand->siglock);
1887
1888                                 /* signals can be posted during this window */
1889
1890                                 if (is_current_pgrp_orphaned())
1891                                         goto relock;
1892
1893                                 spin_lock_irq(&sighand->siglock);
1894                         }
1895
1896                         if (likely(do_signal_stop(info->si_signo))) {
1897                                 /* It released the siglock.  */
1898                                 goto relock;
1899                         }
1900
1901                         /*
1902                          * We didn't actually stop, due to a race
1903                          * with SIGCONT or something like that.
1904                          */
1905                         continue;
1906                 }
1907
1908                 spin_unlock_irq(&sighand->siglock);
1909
1910                 /*
1911                  * Anything else is fatal, maybe with a core dump.
1912                  */
1913                 current->flags |= PF_SIGNALED;
1914
1915                 if (sig_kernel_coredump(signr)) {
1916                         if (print_fatal_signals)
1917                                 print_fatal_signal(regs, info->si_signo);
1918                         /*
1919                          * If it was able to dump core, this kills all
1920                          * other threads in the group and synchronizes with
1921                          * their demise.  If we lost the race with another
1922                          * thread getting here, it set group_exit_code
1923                          * first and our do_group_exit call below will use
1924                          * that value and ignore the one we pass it.
1925                          */
1926                         do_coredump(info->si_signo, info->si_signo, regs);
1927                 }
1928
1929                 /*
1930                  * Death signals, no core dump.
1931                  */
1932                 do_group_exit(info->si_signo);
1933                 /* NOTREACHED */
1934         }
1935         spin_unlock_irq(&sighand->siglock);
1936         return signr;
1937 }
1938
1939 void exit_signals(struct task_struct *tsk)
1940 {
1941         int group_stop = 0;
1942         struct task_struct *t;
1943
1944         if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
1945                 tsk->flags |= PF_EXITING;
1946                 return;
1947         }
1948
1949         spin_lock_irq(&tsk->sighand->siglock);
1950         /*
1951          * From now this task is not visible for group-wide signals,
1952          * see wants_signal(), do_signal_stop().
1953          */
1954         tsk->flags |= PF_EXITING;
1955         if (!signal_pending(tsk))
1956                 goto out;
1957
1958         /* It could be that __group_complete_signal() choose us to
1959          * notify about group-wide signal. Another thread should be
1960          * woken now to take the signal since we will not.
1961          */
1962         for (t = tsk; (t = next_thread(t)) != tsk; )
1963                 if (!signal_pending(t) && !(t->flags & PF_EXITING))
1964                         recalc_sigpending_and_wake(t);
1965
1966         if (unlikely(tsk->signal->group_stop_count) &&
1967                         !--tsk->signal->group_stop_count) {
1968                 tsk->signal->flags = SIGNAL_STOP_STOPPED;
1969                 group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
1970         }
1971 out:
1972         spin_unlock_irq(&tsk->sighand->siglock);
1973
1974         if (unlikely(group_stop)) {
1975                 read_lock(&tasklist_lock);
1976                 do_notify_parent_cldstop(tsk, group_stop);
1977                 read_unlock(&tasklist_lock);
1978         }
1979 }
1980
1981 EXPORT_SYMBOL(recalc_sigpending);
1982 EXPORT_SYMBOL_GPL(dequeue_signal);
1983 EXPORT_SYMBOL(flush_signals);
1984 EXPORT_SYMBOL(force_sig);
1985 EXPORT_SYMBOL(send_sig);
1986 EXPORT_SYMBOL(send_sig_info);
1987 EXPORT_SYMBOL(sigprocmask);
1988 EXPORT_SYMBOL(block_all_signals);
1989 EXPORT_SYMBOL(unblock_all_signals);
1990
1991
1992 /*
1993  * System call entry points.
1994  */
1995
1996 SYSCALL_DEFINE0(restart_syscall)
1997 {
1998         struct restart_block *restart = &current_thread_info()->restart_block;
1999         return restart->fn(restart);
2000 }
2001
2002 long do_no_restart_syscall(struct restart_block *param)
2003 {
2004         return -EINTR;
2005 }
2006
2007 /*
2008  * We don't need to get the kernel lock - this is all local to this
2009  * particular thread.. (and that's good, because this is _heavily_
2010  * used by various programs)
2011  */
2012
2013 /*
2014  * This is also useful for kernel threads that want to temporarily
2015  * (or permanently) block certain signals.
2016  *
2017  * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
2018  * interface happily blocks "unblockable" signals like SIGKILL
2019  * and friends.
2020  */
2021 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2022 {
2023         int error;
2024
2025         spin_lock_irq(&current->sighand->siglock);
2026         if (oldset)
2027                 *oldset = current->blocked;
2028
2029         error = 0;
2030         switch (how) {
2031         case SIG_BLOCK:
2032                 sigorsets(&current->blocked, &current->blocked, set);
2033                 break;
2034         case SIG_UNBLOCK:
2035                 signandsets(&current->blocked, &current->blocked, set);
2036                 break;
2037         case SIG_SETMASK:
2038                 current->blocked = *set;
2039                 break;
2040         default:
2041                 error = -EINVAL;
2042         }
2043         recalc_sigpending();
2044         spin_unlock_irq(&current->sighand->siglock);
2045
2046         return error;
2047 }
2048
2049 SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2050                 sigset_t __user *, oset, size_t, sigsetsize)
2051 {
2052         int error = -EINVAL;
2053         sigset_t old_set, new_set;
2054
2055         /* XXX: Don't preclude handling different sized sigset_t's.  */
2056         if (sigsetsize != sizeof(sigset_t))
2057                 goto out;
2058
2059         if (set) {
2060                 error = -EFAULT;
2061                 if (copy_from_user(&new_set, set, sizeof(*set)))
2062                         goto out;
2063                 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2064
2065                 error = sigprocmask(how, &new_set, &old_set);
2066                 if (error)
2067                         goto out;
2068                 if (oset)
2069                         goto set_old;
2070         } else if (oset) {
2071                 spin_lock_irq(&current->sighand->siglock);
2072                 old_set = current->blocked;
2073                 spin_unlock_irq(&current->sighand->siglock);
2074
2075         set_old:
2076                 error = -EFAULT;
2077                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2078                         goto out;
2079         }
2080         error = 0;
2081 out:
2082         return error;
2083 }
2084
2085 long do_sigpending(void __user *set, unsigned long sigsetsize)
2086 {
2087         long error = -EINVAL;
2088         sigset_t pending;
2089
2090         if (sigsetsize > sizeof(sigset_t))
2091                 goto out;
2092
2093         spin_lock_irq(&current->sighand->siglock);
2094         sigorsets(&pending, &current->pending.signal,
2095                   &current->signal->shared_pending.signal);
2096         spin_unlock_irq(&current->sighand->siglock);
2097
2098         /* Outside the lock because only this thread touches it.  */
2099         sigandsets(&pending, &current->blocked, &pending);
2100
2101         error = -EFAULT;
2102         if (!copy_to_user(set, &pending, sigsetsize))
2103                 error = 0;
2104
2105 out:
2106         return error;
2107 }
2108
2109 SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2110 {
2111         return do_sigpending(set, sigsetsize);
2112 }
2113
2114 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
2115
2116 int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2117 {
2118         int err;
2119
2120         if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
2121                 return -EFAULT;
2122         if (from->si_code < 0)
2123                 return __copy_to_user(to, from, sizeof(siginfo_t))
2124                         ? -EFAULT : 0;
2125         /*
2126          * If you change siginfo_t structure, please be sure
2127          * this code is fixed accordingly.
2128          * Please remember to update the signalfd_copyinfo() function
2129          * inside fs/signalfd.c too, in case siginfo_t changes.
2130          * It should never copy any pad contained in the structure
2131          * to avoid security leaks, but must copy the generic
2132          * 3 ints plus the relevant union member.
2133          */
2134         err = __put_user(from->si_signo, &to->si_signo);
2135         err |= __put_user(from->si_errno, &to->si_errno);
2136         err |= __put_user((short)from->si_code, &to->si_code);
2137         switch (from->si_code & __SI_MASK) {
2138         case __SI_KILL:
2139                 err |= __put_user(from->si_pid, &to->si_pid);
2140                 err |= __put_user(from->si_uid, &to->si_uid);
2141                 break;
2142         case __SI_TIMER:
2143                  err |= __put_user(from->si_tid, &to->si_tid);
2144                  err |= __put_user(from->si_overrun, &to->si_overrun);
2145                  err |= __put_user(from->si_ptr, &to->si_ptr);
2146                 break;
2147         case __SI_POLL:
2148                 err |= __put_user(from->si_band, &to->si_band);
2149                 err |= __put_user(from->si_fd, &to->si_fd);
2150                 break;
2151         case __SI_FAULT:
2152                 err |= __put_user(from->si_addr, &to->si_addr);
2153 #ifdef __ARCH_SI_TRAPNO
2154                 err |= __put_user(from->si_trapno, &to->si_trapno);
2155 #endif
2156                 break;
2157         case __SI_CHLD:
2158                 err |= __put_user(from->si_pid, &to->si_pid);
2159                 err |= __put_user(from->si_uid, &to->si_uid);
2160                 err |= __put_user(from->si_status, &to->si_status);
2161                 err |= __put_user(from->si_utime, &to->si_utime);
2162                 err |= __put_user(from->si_stime, &to->si_stime);
2163                 break;
2164         case __SI_RT: /* This is not generated by the kernel as of now. */
2165         case __SI_MESGQ: /* But this is */
2166                 err |= __put_user(from->si_pid, &to->si_pid);
2167                 err |= __put_user(from->si_uid, &to->si_uid);
2168                 err |= __put_user(from->si_ptr, &to->si_ptr);
2169                 break;
2170         default: /* this is just in case for now ... */
2171                 err |= __put_user(from->si_pid, &to->si_pid);
2172                 err |= __put_user(from->si_uid, &to->si_uid);
2173                 break;
2174         }
2175         return err;
2176 }
2177
2178 #endif
2179
2180 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2181                 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2182                 size_t, sigsetsize)
2183 {
2184         int ret, sig;
2185         sigset_t these;
2186         struct timespec ts;
2187         siginfo_t info;
2188         long timeout = 0;
2189
2190         /* XXX: Don't preclude handling different sized sigset_t's.  */
2191         if (sigsetsize != sizeof(sigset_t))
2192                 return -EINVAL;
2193
2194         if (copy_from_user(&these, uthese, sizeof(these)))
2195                 return -EFAULT;
2196
2197         /*
2198          * Invert the set of allowed signals to get those we
2199          * want to block.
2200          */
2201         sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2202         signotset(&these);
2203
2204         if (uts) {
2205                 if (copy_from_user(&ts, uts, sizeof(ts)))
2206                         return -EFAULT;
2207                 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2208                     || ts.tv_sec < 0)
2209                         return -EINVAL;
2210         }
2211
2212         spin_lock_irq(&current->sighand->siglock);
2213         sig = dequeue_signal(current, &these, &info);
2214         if (!sig) {
2215                 timeout = MAX_SCHEDULE_TIMEOUT;
2216                 if (uts)
2217                         timeout = (timespec_to_jiffies(&ts)
2218                                    + (ts.tv_sec || ts.tv_nsec));
2219
2220                 if (timeout) {
2221                         /* None ready -- temporarily unblock those we're
2222                          * interested while we are sleeping in so that we'll
2223                          * be awakened when they arrive.  */
2224                         current->real_blocked = current->blocked;
2225                         sigandsets(&current->blocked, &current->blocked, &these);
2226                         recalc_sigpending();
2227                         spin_unlock_irq(&current->sighand->siglock);
2228
2229                         timeout = schedule_timeout_interruptible(timeout);
2230
2231                         spin_lock_irq(&current->sighand->siglock);
2232                         sig = dequeue_signal(current, &these, &info);
2233                         current->blocked = current->real_blocked;
2234                         siginitset(&current->real_blocked, 0);
2235                         recalc_sigpending();
2236                 }
2237         }
2238         spin_unlock_irq(&current->sighand->siglock);
2239
2240         if (sig) {
2241                 ret = sig;
2242                 if (uinfo) {
2243                         if (copy_siginfo_to_user(uinfo, &info))
2244                                 ret = -EFAULT;
2245                 }
2246         } else {
2247                 ret = -EAGAIN;
2248                 if (timeout)
2249                         ret = -EINTR;
2250         }
2251
2252         return ret;
2253 }
2254
2255 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2256 {
2257         struct siginfo info;
2258
2259         info.si_signo = sig;
2260         info.si_errno = 0;
2261         info.si_code = SI_USER;
2262         info.si_pid = task_tgid_vnr(current);
2263         info.si_uid = current_uid();
2264
2265         return kill_something_info(sig, &info, pid);
2266 }
2267
2268 static int
2269 do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
2270 {
2271         struct task_struct *p;
2272         int error = -ESRCH;
2273
2274         rcu_read_lock();
2275         p = find_task_by_vpid(pid);
2276         if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2277                 error = check_kill_permission(sig, info, p);
2278                 /*
2279                  * The null signal is a permissions and process existence
2280                  * probe.  No signal is actually delivered.
2281                  */
2282                 if (!error && sig) {
2283                         error = do_send_sig_info(sig, info, p, false);
2284                         /*
2285                          * If lock_task_sighand() failed we pretend the task
2286                          * dies after receiving the signal. The window is tiny,
2287                          * and the signal is private anyway.
2288                          */
2289                         if (unlikely(error == -ESRCH))
2290                                 error = 0;
2291                 }
2292         }
2293         rcu_read_unlock();
2294
2295         return error;
2296 }
2297
2298 static int do_tkill(pid_t tgid, pid_t pid, int sig)
2299 {
2300         struct siginfo info;
2301
2302         info.si_signo = sig;
2303         info.si_errno = 0;
2304         info.si_code = SI_TKILL;
2305         info.si_pid = task_tgid_vnr(current);
2306         info.si_uid = current_uid();
2307
2308         return do_send_specific(tgid, pid, sig, &info);
2309 }
2310
2311 /**
2312  *  sys_tgkill - send signal to one specific thread
2313  *  @tgid: the thread group ID of the thread
2314  *  @pid: the PID of the thread
2315  *  @sig: signal to be sent
2316  *
2317  *  This syscall also checks the @tgid and returns -ESRCH even if the PID
2318  *  exists but it's not belonging to the target process anymore. This
2319  *  method solves the problem of threads exiting and PIDs getting reused.
2320  */
2321 SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2322 {
2323         /* This is only valid for single tasks */
2324         if (pid <= 0 || tgid <= 0)
2325                 return -EINVAL;
2326
2327         return do_tkill(tgid, pid, sig);
2328 }
2329
2330 /*
2331  *  Send a signal to only one task, even if it's a CLONE_THREAD task.
2332  */
2333 SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2334 {
2335         /* This is only valid for single tasks */
2336         if (pid <= 0)
2337                 return -EINVAL;
2338
2339         return do_tkill(0, pid, sig);
2340 }
2341
2342 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2343                 siginfo_t __user *, uinfo)
2344 {
2345         siginfo_t info;
2346
2347         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2348                 return -EFAULT;
2349
2350         /* Not even root can pretend to send signals from the kernel.
2351            Nor can they impersonate a kill(), which adds source info.  */
2352         if (info.si_code >= 0)
2353                 return -EPERM;
2354         info.si_signo = sig;
2355
2356         /* POSIX.1b doesn't mention process groups.  */
2357         return kill_proc_info(sig, &info, pid);
2358 }
2359
2360 long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
2361 {
2362         /* This is only valid for single tasks */
2363         if (pid <= 0 || tgid <= 0)
2364                 return -EINVAL;
2365
2366         /* Not even root can pretend to send signals from the kernel.
2367            Nor can they impersonate a kill(), which adds source info.  */
2368         if (info->si_code >= 0)
2369                 return -EPERM;
2370         info->si_signo = sig;
2371
2372         return do_send_specific(tgid, pid, sig, info);
2373 }
2374
2375 SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
2376                 siginfo_t __user *, uinfo)
2377 {
2378         siginfo_t info;
2379
2380         if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
2381                 return -EFAULT;
2382
2383         return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
2384 }
2385
2386 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2387 {
2388         struct task_struct *t = current;
2389         struct k_sigaction *k;
2390         sigset_t mask;
2391
2392         if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2393                 return -EINVAL;
2394
2395         k = &t->sighand->action[sig-1];
2396
2397         spin_lock_irq(&current->sighand->siglock);
2398         if (oact)
2399                 *oact = *k;
2400
2401         if (act) {
2402                 sigdelsetmask(&act->sa.sa_mask,
2403                               sigmask(SIGKILL) | sigmask(SIGSTOP));
2404                 *k = *act;
2405                 /*
2406                  * POSIX 3.3.1.3:
2407                  *  "Setting a signal action to SIG_IGN for a signal that is
2408                  *   pending shall cause the pending signal to be discarded,
2409                  *   whether or not it is blocked."
2410                  *
2411                  *  "Setting a signal action to SIG_DFL for a signal that is
2412                  *   pending and whose default action is to ignore the signal
2413                  *   (for example, SIGCHLD), shall cause the pending signal to
2414                  *   be discarded, whether or not it is blocked"
2415                  */
2416                 if (sig_handler_ignored(sig_handler(t, sig), sig)) {
2417                         sigemptyset(&mask);
2418                         sigaddset(&mask, sig);
2419                         rm_from_queue_full(&mask, &t->signal->shared_pending);
2420                         do {
2421                                 rm_from_queue_full(&mask, &t->pending);
2422                                 t = next_thread(t);
2423                         } while (t != current);
2424                 }
2425         }
2426
2427         spin_unlock_irq(&current->sighand->siglock);
2428         return 0;
2429 }
2430
2431 int
2432 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
2433 {
2434         stack_t oss;
2435         int error;
2436
2437         oss.ss_sp = (void __user *) current->sas_ss_sp;
2438         oss.ss_size = current->sas_ss_size;
2439         oss.ss_flags = sas_ss_flags(sp);
2440
2441         if (uss) {
2442                 void __user *ss_sp;
2443                 size_t ss_size;
2444                 int ss_flags;
2445
2446                 error = -EFAULT;
2447                 if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
2448                         goto out;
2449                 error = __get_user(ss_sp, &uss->ss_sp) |
2450                         __get_user(ss_flags, &uss->ss_flags) |
2451                         __get_user(ss_size, &uss->ss_size);
2452                 if (error)
2453                         goto out;
2454
2455                 error = -EPERM;
2456                 if (on_sig_stack(sp))
2457                         goto out;
2458
2459                 error = -EINVAL;
2460                 /*
2461                  *
2462                  * Note - this code used to test ss_flags incorrectly
2463                  *        old code may have been written using ss_flags==0
2464                  *        to mean ss_flags==SS_ONSTACK (as this was the only
2465                  *        way that worked) - this fix preserves that older
2466                  *        mechanism
2467                  */
2468                 if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
2469                         goto out;
2470
2471                 if (ss_flags == SS_DISABLE) {
2472                         ss_size = 0;
2473                         ss_sp = NULL;
2474                 } else {
2475                         error = -ENOMEM;
2476                         if (ss_size < MINSIGSTKSZ)
2477                                 goto out;
2478                 }
2479
2480                 current->sas_ss_sp = (unsigned long) ss_sp;
2481                 current->sas_ss_size = ss_size;
2482         }
2483
2484         error = 0;
2485         if (uoss) {
2486                 error = -EFAULT;
2487                 if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
2488                         goto out;
2489                 error = __put_user(oss.ss_sp, &uoss->ss_sp) |
2490                         __put_user(oss.ss_size, &uoss->ss_size) |
2491                         __put_user(oss.ss_flags, &uoss->ss_flags);
2492         }
2493
2494 out:
2495         return error;
2496 }
2497
2498 #ifdef __ARCH_WANT_SYS_SIGPENDING
2499
2500 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2501 {
2502         return do_sigpending(set, sizeof(*set));
2503 }
2504
2505 #endif
2506
2507 #ifdef __ARCH_WANT_SYS_SIGPROCMASK
2508 /* Some platforms have their own version with special arguments others
2509    support only sys_rt_sigprocmask.  */
2510
2511 SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2512                 old_sigset_t __user *, oset)
2513 {
2514         int error;
2515         old_sigset_t old_set, new_set;
2516
2517         if (set) {
2518                 error = -EFAULT;
2519                 if (copy_from_user(&new_set, set, sizeof(*set)))
2520                         goto out;
2521                 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2522
2523                 spin_lock_irq(&current->sighand->siglock);
2524                 old_set = current->blocked.sig[0];
2525
2526                 error = 0;
2527                 switch (how) {
2528                 default:
2529                         error = -EINVAL;
2530                         break;
2531                 case SIG_BLOCK:
2532                         sigaddsetmask(&current->blocked, new_set);
2533                         break;
2534                 case SIG_UNBLOCK:
2535                         sigdelsetmask(&current->blocked, new_set);
2536                         break;
2537                 case SIG_SETMASK:
2538                         current->blocked.sig[0] = new_set;
2539                         break;
2540                 }
2541
2542                 recalc_sigpending();
2543                 spin_unlock_irq(&current->sighand->siglock);
2544                 if (error)
2545                         goto out;
2546                 if (oset)
2547                         goto set_old;
2548         } else if (oset) {
2549                 old_set = current->blocked.sig[0];
2550         set_old:
2551                 error = -EFAULT;
2552                 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2553                         goto out;
2554         }
2555         error = 0;
2556 out:
2557         return error;
2558 }
2559 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2560
2561 #ifdef __ARCH_WANT_SYS_RT_SIGACTION
2562 SYSCALL_DEFINE4(rt_sigaction, int, sig,
2563                 const struct sigaction __user *, act,
2564                 struct sigaction __user *, oact,
2565                 size_t, sigsetsize)
2566 {
2567         struct k_sigaction new_sa, old_sa;
2568         int ret = -EINVAL;
2569
2570         /* XXX: Don't preclude handling different sized sigset_t's.  */
2571         if (sigsetsize != sizeof(sigset_t))
2572                 goto out;
2573
2574         if (act) {
2575                 if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
2576                         return -EFAULT;
2577         }
2578
2579         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
2580
2581         if (!ret && oact) {
2582                 if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
2583                         return -EFAULT;
2584         }
2585 out:
2586         return ret;
2587 }
2588 #endif /* __ARCH_WANT_SYS_RT_SIGACTION */
2589
2590 #ifdef __ARCH_WANT_SYS_SGETMASK
2591
2592 /*
2593  * For backwards compatibility.  Functionality superseded by sigprocmask.
2594  */
2595 SYSCALL_DEFINE0(sgetmask)
2596 {
2597         /* SMP safe */
2598         return current->blocked.sig[0];
2599 }
2600
2601 SYSCALL_DEFINE1(ssetmask, int, newmask)
2602 {
2603         int old;
2604
2605         spin_lock_irq(&current->sighand->siglock);
2606         old = current->blocked.sig[0];
2607
2608         siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
2609                                                   sigmask(SIGSTOP)));
2610         recalc_sigpending();
2611         spin_unlock_irq(&current->sighand->siglock);
2612
2613         return old;
2614 }
2615 #endif /* __ARCH_WANT_SGETMASK */
2616
2617 #ifdef __ARCH_WANT_SYS_SIGNAL
2618 /*
2619  * For backwards compatibility.  Functionality superseded by sigaction.
2620  */
2621 SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2622 {
2623         struct k_sigaction new_sa, old_sa;
2624         int ret;
2625
2626         new_sa.sa.sa_handler = handler;
2627         new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
2628         sigemptyset(&new_sa.sa.sa_mask);
2629
2630         ret = do_sigaction(sig, &new_sa, &old_sa);
2631
2632         return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
2633 }
2634 #endif /* __ARCH_WANT_SYS_SIGNAL */
2635
2636 #ifdef __ARCH_WANT_SYS_PAUSE
2637
2638 SYSCALL_DEFINE0(pause)
2639 {
2640         current->state = TASK_INTERRUPTIBLE;
2641         schedule();
2642         return -ERESTARTNOHAND;
2643 }
2644
2645 #endif
2646
2647 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2648 SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2649 {
2650         sigset_t newset;
2651
2652         /* XXX: Don't preclude handling different sized sigset_t's.  */
2653         if (sigsetsize != sizeof(sigset_t))
2654                 return -EINVAL;
2655
2656         if (copy_from_user(&newset, unewset, sizeof(newset)))
2657                 return -EFAULT;
2658         sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2659
2660         spin_lock_irq(&current->sighand->siglock);
2661         current->saved_sigmask = current->blocked;
2662         current->blocked = newset;
2663         recalc_sigpending();
2664         spin_unlock_irq(&current->sighand->siglock);
2665
2666         current->state = TASK_INTERRUPTIBLE;
2667         schedule();
2668         set_restore_sigmask();
2669         return -ERESTARTNOHAND;
2670 }
2671 #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
2672
2673 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
2674 {
2675         return NULL;
2676 }
2677
2678 void __init signals_init(void)
2679 {
2680         sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2681 }