nptl/allocatestack.c

   1 /* Copyright (C) 2002, 2003, 2004, 2005
   2    Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <assert.h>
  22 #include <errno.h>
  23 #include <signal.h>
  24 #include <stdint.h>
  25 #include <string.h>
  26 #include <unistd.h>
  27 #include <sys/mman.h>
  28 #include <sys/param.h>
  29 #include <dl-sysdep.h>
  30 #include <tls.h>
  31 #include <lowlevellock.h>
  32
  33
  34 #ifndef NEED_SEPARATE_REGISTER_STACK
  35
  36 /* Most architectures have exactly one stack pointer.  Some have more.  */
  37 # define STACK_VARIABLES void *stackaddr = NULL
  38
  39 /* How to pass the values to the 'create_thread' function.  */
  40 # define STACK_VARIABLES_ARGS stackaddr
  41
  42 /* How to declare function which gets there parameters.  */
  43 # define STACK_VARIABLES_PARMS void *stackaddr
  44
  45 /* How to declare allocate_stack.  */
  46 # define ALLOCATE_STACK_PARMS void **stack
  47
  48 /* This is how the function is called.  We do it this way to allow
  49    other variants of the function to have more parameters.  */
  50 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
  51
  52 #else
  53
  54 /* We need two stacks.  The kernel will place them but we have to tell
  55    the kernel about the size of the reserved address space.  */
  56 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
  57
  58 /* How to pass the values to the 'create_thread' function.  */
  59 # define STACK_VARIABLES_ARGS stackaddr, stacksize
  60
  61 /* How to declare function which gets there parameters.  */
  62 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
  63
  64 /* How to declare allocate_stack.  */
  65 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
  66
  67 /* This is how the function is called.  We do it this way to allow
  68    other variants of the function to have more parameters.  */
  69 # define ALLOCATE_STACK(attr, pd) \
  70   allocate_stack (attr, pd, &stackaddr, &stacksize)
  71
  72 #endif
  73
  74
  75 /* Default alignment of stack.  */
  76 #ifndef STACK_ALIGN
  77 # define STACK_ALIGN __alignof__ (long double)
  78 #endif
  79
  80 /* Default value for minimal stack size after allocating thread
  81    descriptor and guard.  */
  82 #ifndef MINIMAL_REST_STACK
  83 # define MINIMAL_REST_STACK     4096
  84 #endif
  85
  86
  87 /* Let the architecture add some flags to the mmap() call used to
  88    allocate stacks.  */
  89 #ifndef ARCH_MAP_FLAGS
  90 # define ARCH_MAP_FLAGS 0
  91 #endif
  92
  93 /* This yields the pointer that TLS support code calls the thread pointer.  */
  94 #if TLS_TCB_AT_TP
  95 # define TLS_TPADJ(pd) (pd)
  96 #elif TLS_DTV_AT_TP
  97 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
  98 #endif
  99
 100 /* Cache handling for not-yet free stacks.  */
 101
 102 /* Maximum size in kB of cache.  */
 103 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
 104 static size_t stack_cache_actsize;
 105
 106 /* Mutex protecting this variable.  */
 107 static lll_lock_t stack_cache_lock = LLL_LOCK_INITIALIZER;
 108
 109 /* List of queued stack frames.  */
 110 static LIST_HEAD (stack_cache);
 111
 112 /* List of the stacks in use.  */
 113 static LIST_HEAD (stack_used);
 114
 115 /* List of the threads with user provided stacks in use.  No need to
 116    initialize this, since it's done in __pthread_initialize_minimal.  */
 117 list_t __stack_user __attribute__ ((nocommon));
 118 hidden_data_def (__stack_user)
 119
 120 #if COLORING_INCREMENT != 0
 121 /* Number of threads created.  */
 122 static unsigned int nptl_ncreated;
 123 #endif
 124
 125
 126 /* Check whether the stack is still used or not.  */
 127 #define FREE_P(descr) ((descr)->tid <= 0)
 128
 129
 130 /* We create a double linked list of all cache entries.  Double linked
 131    because this allows removing entries from the end.  */
 132
 133
 134 /* Get a stack frame from the cache.  We have to match by size since
 135    some blocks might be too small or far too large.  */
 136 static struct pthread *
 137 get_cached_stack (size_t *sizep, void **memp)
 138 {
 139   size_t size = *sizep;
 140   struct pthread *result = NULL;
 141   list_t *entry;
 142
 143   lll_lock (stack_cache_lock);
 144
 145   /* Search the cache for a matching entry.  We search for the
 146      smallest stack which has at least the required size.  Note that
 147      in normal situations the size of all allocated stacks is the
 148      same.  As the very least there are only a few different sizes.
 149      Therefore this loop will exit early most of the time with an
 150      exact match.  */
 151   list_for_each (entry, &stack_cache)
 152     {
 153       struct pthread *curr;
 154
 155       curr = list_entry (entry, struct pthread, list);
 156       if (FREE_P (curr) && curr->stackblock_size >= size)
 157         {
 158           if (curr->stackblock_size == size)
 159             {
 160               result = curr;
 161               break;
 162             }
 163
 164           if (result == NULL
 165               || result->stackblock_size > curr->stackblock_size)
 166             result = curr;
 167         }
 168     }
 169
 170   if (__builtin_expect (result == NULL, 0)
 171       /* Make sure the size difference is not too excessive.  In that
 172          case we do not use the block.  */
 173       || __builtin_expect (result->stackblock_size > 4 * size, 0))
 174     {
 175       /* Release the lock.  */
 176       lll_unlock (stack_cache_lock);
 177
 178       return NULL;
 179     }
 180
 181   /* Dequeue the entry.  */
 182   list_del (&result->list);
 183
 184   /* And add to the list of stacks in use.  */
 185   list_add (&result->list, &stack_used);
 186
 187   /* And decrease the cache size.  */
 188   stack_cache_actsize -= result->stackblock_size;
 189
 190   /* Release the lock early.  */
 191   lll_unlock (stack_cache_lock);
 192
 193   /* Report size and location of the stack to the caller.  */
 194   *sizep = result->stackblock_size;
 195   *memp = result->stackblock;
 196
 197   /* Cancellation handling is back to the default.  */
 198   result->cancelhandling = 0;
 199   result->cleanup = NULL;
 200
 201   /* No pending event.  */
 202   result->nextevent = NULL;
 203
 204   /* Clear the DTV.  */
 205   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
 206   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 207
 208   /* Re-initialize the TLS.  */
 209   _dl_allocate_tls_init (TLS_TPADJ (result));
 210
 211   return result;
 212 }
 213
 214
 215 /* Add a stack frame which is not used anymore to the stack.  Must be
 216    called with the cache lock held.  */
 217 static inline void
 218 __attribute ((always_inline))
 219 queue_stack (struct pthread *stack)
 220 {
 221   /* We unconditionally add the stack to the list.  The memory may
 222      still be in use but it will not be reused until the kernel marks
 223      the stack as not used anymore.  */
 224   list_add (&stack->list, &stack_cache);
 225
 226   stack_cache_actsize += stack->stackblock_size;
 227   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
 228     {
 229       /* We reduce the size of the cache.  Remove the last entries
 230          until the size is below the limit.  */
 231       list_t *entry;
 232       list_t *prev;
 233
 234       /* Search from the end of the list.  */
 235       list_for_each_prev_safe (entry, prev, &stack_cache)
 236         {
 237           struct pthread *curr;
 238
 239           curr = list_entry (entry, struct pthread, list);
 240           if (FREE_P (curr))
 241             {
 242               /* Unlink the block.  */
 243               list_del (entry);
 244
 245               /* Account for the freed memory.  */
 246               stack_cache_actsize -= curr->stackblock_size;
 247
 248               /* Free the memory associated with the ELF TLS.  */
 249               _dl_deallocate_tls (TLS_TPADJ (curr), false);
 250
 251               /* Remove this block.  This should never fail.  If it
 252                  does something is really wrong.  */
 253               if (munmap (curr->stackblock, curr->stackblock_size) != 0)
 254                 abort ();
 255
 256               /* Maybe we have freed enough.  */
 257               if (stack_cache_actsize <= stack_cache_maxsize)
 258                 break;
 259             }
 260         }
 261     }
 262 }
 263
 264
 265 static int
 266 internal_function
 267 change_stack_perm (struct pthread *pd
 268 #ifdef NEED_SEPARATE_REGISTER_STACK
 269                    , size_t pagemask
 270 #endif
 271                    )
 272 {
 273 #ifdef NEED_SEPARATE_REGISTER_STACK
 274   void *stack = (pd->stackblock
 275                  + (((((pd->stackblock_size - pd->guardsize) / 2)
 276                       & pagemask) + pd->guardsize) & pagemask));
 277   size_t len = pd->stackblock + pd->stackblock_size - stack;
 278 #else
 279   void *stack = pd->stackblock + pd->guardsize;
 280   size_t len = pd->stackblock_size - pd->guardsize;
 281 #endif
 282   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
 283     return errno;
 284
 285   return 0;
 286 }
 287
 288
 289 static int
 290 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 291                 ALLOCATE_STACK_PARMS)
 292 {
 293   struct pthread *pd;
 294   size_t size;
 295   size_t pagesize_m1 = __getpagesize () - 1;
 296   void *stacktop;
 297
 298   assert (attr != NULL);
 299   assert (powerof2 (pagesize_m1 + 1));
 300   assert (TCB_ALIGNMENT >= STACK_ALIGN);
 301
 302   /* Get the stack size from the attribute if it is set.  Otherwise we
 303      use the default we determined at start time.  */
 304   size = attr->stacksize ?: __default_stacksize;
 305
 306   /* Get memory for the stack.  */
 307   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
 308     {
 309       uintptr_t adj;
 310
 311       /* If the user also specified the size of the stack make sure it
 312          is large enough.  */
 313       if (attr->stacksize != 0
 314           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
 315         return EINVAL;
 316
 317       /* Adjust stack size for alignment of the TLS block.  */
 318 #if TLS_TCB_AT_TP
 319       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
 320             & __static_tls_align_m1;
 321       assert (size > adj + TLS_TCB_SIZE);
 322 #elif TLS_DTV_AT_TP
 323       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
 324             & __static_tls_align_m1;
 325       assert (size > adj);
 326 #endif
 327
 328       /* The user provided some memory.  Let's hope it matches the
 329          size...  We do not allocate guard pages if the user provided
 330          the stack.  It is the user's responsibility to do this if it
 331          is wanted.  */
 332 #if TLS_TCB_AT_TP
 333       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
 334                                - TLS_TCB_SIZE - adj);
 335 #elif TLS_DTV_AT_TP
 336       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
 337                                 - __static_tls_size - adj)
 338                                - TLS_PRE_TCB_SIZE);
 339 #endif
 340
 341       /* The user provided stack memory needs to be cleared.  */
 342       memset (pd, '\0', sizeof (struct pthread));
 343
 344       /* The first TSD block is included in the TCB.  */
 345       pd->specific[0] = pd->specific_1stblock;
 346
 347       /* Remember the stack-related values.  */
 348       pd->stackblock = (char *) attr->stackaddr - size;
 349       pd->stackblock_size = size;
 350
 351       /* This is a user-provided stack.  It will not be queued in the
 352          stack cache nor will the memory (except the TLS memory) be freed.  */
 353       pd->user_stack = true;
 354
 355       /* This is at least the second thread.  */
 356       pd->header.multiple_threads = 1;
 357 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 358       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 359 #endif
 360
 361 #ifdef NEED_DL_SYSINFO
 362       /* Copy the sysinfo value from the parent.  */
 363       THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
 364 #endif
 365
 366       /* The process ID is also the same as that of the caller.  */
 367       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 368
 369       /* Allocate the DTV for this thread.  */
 370       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 371         {
 372           /* Something went wrong.  */
 373           assert (errno == ENOMEM);
 374           return EAGAIN;
 375         }
 376
 377
 378       /* Prepare to modify global data.  */
 379       lll_lock (stack_cache_lock);
 380
 381       /* And add to the list of stacks in use.  */
 382       list_add (&pd->list, &__stack_user);
 383
 384       lll_unlock (stack_cache_lock);
 385     }
 386   else
 387     {
 388       /* Allocate some anonymous memory.  If possible use the cache.  */
 389       size_t guardsize;
 390       size_t reqsize;
 391       void *mem;
 392       const int prot = (PROT_READ | PROT_WRITE
 393                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 394
 395 #if COLORING_INCREMENT != 0
 396       /* Add one more page for stack coloring.  Don't do it for stacks
 397          with 16 times pagesize or larger.  This might just cause
 398          unnecessary misalignment.  */
 399       if (size <= 16 * pagesize_m1)
 400         size += pagesize_m1 + 1;
 401 #endif
 402
 403       /* Adjust the stack size for alignment.  */
 404       size &= ~__static_tls_align_m1;
 405       assert (size != 0);
 406
 407       /* Make sure the size of the stack is enough for the guard and
 408          eventually the thread descriptor.  */
 409       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
 410       if (__builtin_expect (size < ((guardsize + __static_tls_size
 411                                      + MINIMAL_REST_STACK + pagesize_m1)
 412                                     & ~pagesize_m1),
 413                             0))
 414         /* The stack is too small (or the guard too large).  */
 415         return EINVAL;
 416
 417       /* Try to get a stack from the cache.  */
 418       reqsize = size;
 419       pd = get_cached_stack (&size, &mem);
 420       if (pd == NULL)
 421         {
 422           /* To avoid aliasing effects on a larger scale than pages we
 423              adjust the allocated stack size if necessary.  This way
 424              allocations directly following each other will not have
 425              aliasing problems.  */
 426 #if MULTI_PAGE_ALIASING != 0
 427           if ((size % MULTI_PAGE_ALIASING) == 0)
 428             size += pagesize_m1 + 1;
 429 #endif
 430
 431           mem = mmap (NULL, size, prot,
 432                       MAP_PRIVATE | MAP_ANONYMOUS | ARCH_MAP_FLAGS, -1, 0);
 433
 434           if (__builtin_expect (mem == MAP_FAILED, 0))
 435             {
 436 #ifdef ARCH_RETRY_MMAP
 437               mem = ARCH_RETRY_MMAP (size);
 438               if (__builtin_expect (mem == MAP_FAILED, 0))
 439 #endif
 440                 return errno;
 441             }
 442
 443           /* SIZE is guaranteed to be greater than zero.
 444              So we can never get a null pointer back from mmap.  */
 445           assert (mem != NULL);
 446
 447 #if COLORING_INCREMENT != 0
 448           /* Atomically increment NCREATED.  */
 449           unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
 450
 451           /* We chose the offset for coloring by incrementing it for
 452              every new thread by a fixed amount.  The offset used
 453              module the page size.  Even if coloring would be better
 454              relative to higher alignment values it makes no sense to
 455              do it since the mmap() interface does not allow us to
 456              specify any alignment for the returned memory block.  */
 457           size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
 458
 459           /* Make sure the coloring offsets does not disturb the alignment
 460              of the TCB and static TLS block.  */
 461           if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
 462             coloring = (((coloring + __static_tls_align_m1)
 463                          & ~(__static_tls_align_m1))
 464                         & ~pagesize_m1);
 465 #else
 466           /* Unless specified we do not make any adjustments.  */
 467 # define coloring 0
 468 #endif
 469
 470           /* Place the thread descriptor at the end of the stack.  */
 471 #if TLS_TCB_AT_TP
 472           pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
 473 #elif TLS_DTV_AT_TP
 474           pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
 475                                     - __static_tls_size)
 476                                     & ~__static_tls_align_m1)
 477                                    - TLS_PRE_TCB_SIZE);
 478 #endif
 479
 480           /* Remember the stack-related values.  */
 481           pd->stackblock = mem;
 482           pd->stackblock_size = size;
 483
 484           /* We allocated the first block thread-specific data array.
 485              This address will not change for the lifetime of this
 486              descriptor.  */
 487           pd->specific[0] = pd->specific_1stblock;
 488
 489           /* This is at least the second thread.  */
 490           pd->header.multiple_threads = 1;
 491 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 492           __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 493 #endif
 494
 495 #ifdef NEED_DL_SYSINFO
 496           /* Copy the sysinfo value from the parent.  */
 497           THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
 498 #endif
 499
 500           /* The process ID is also the same as that of the caller.  */
 501           pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 502
 503           /* Allocate the DTV for this thread.  */
 504           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 505             {
 506               /* Something went wrong.  */
 507               assert (errno == ENOMEM);
 508
 509               /* Free the stack memory we just allocated.  */
 510               (void) munmap (mem, size);
 511
 512               return EAGAIN;
 513             }
 514
 515
 516           /* Prepare to modify global data.  */
 517           lll_lock (stack_cache_lock);
 518
 519           /* And add to the list of stacks in use.  */
 520           list_add (&pd->list, &stack_used);
 521
 522           lll_unlock (stack_cache_lock);
 523
 524
 525           /* There might have been a race.  Another thread might have
 526              caused the stacks to get exec permission while this new
 527              stack was prepared.  Detect if this was possible and
 528              change the permission if necessary.  */
 529           if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
 530                                 && (prot & PROT_EXEC) == 0, 0))
 531             {
 532               int err = change_stack_perm (pd
 533 #ifdef NEED_SEPARATE_REGISTER_STACK
 534                                            , ~pagesize_m1
 535 #endif
 536                                            );
 537               if (err != 0)
 538                 {
 539                   /* Free the stack memory we just allocated.  */
 540                   (void) munmap (mem, size);
 541
 542                   return err;
 543                 }
 544             }
 545
 546
 547           /* Note that all of the stack and the thread descriptor is
 548              zeroed.  This means we do not have to initialize fields
 549              with initial value zero.  This is specifically true for
 550              the 'tid' field which is always set back to zero once the
 551              stack is not used anymore and for the 'guardsize' field
 552              which will be read next.  */
 553         }
 554
 555       /* Create or resize the guard area if necessary.  */
 556       if (__builtin_expect (guardsize > pd->guardsize, 0))
 557         {
 558 #ifdef NEED_SEPARATE_REGISTER_STACK
 559           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 560 #else
 561           char *guard = mem;
 562 #endif
 563           if (mprotect (guard, guardsize, PROT_NONE) != 0)
 564             {
 565               int err;
 566             mprot_error:
 567               err = errno;
 568
 569               lll_lock (stack_cache_lock);
 570
 571               /* Remove the thread from the list.  */
 572               list_del (&pd->list);
 573
 574               lll_unlock (stack_cache_lock);
 575
 576               /* Get rid of the TLS block we allocated.  */
 577               _dl_deallocate_tls (TLS_TPADJ (pd), false);
 578
 579               /* Free the stack memory regardless of whether the size
 580                  of the cache is over the limit or not.  If this piece
 581                  of memory caused problems we better do not use it
 582                  anymore.  Uh, and we ignore possible errors.  There
 583                  is nothing we could do.  */
 584               (void) munmap (mem, size);
 585
 586               return err;
 587             }
 588
 589           pd->guardsize = guardsize;
 590         }
 591       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
 592                                  0))
 593         {
 594           /* The old guard area is too large.  */
 595
 596 #ifdef NEED_SEPARATE_REGISTER_STACK
 597           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 598           char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
 599
 600           if (oldguard < guard
 601               && mprotect (oldguard, guard - oldguard, prot) != 0)
 602             goto mprot_error;
 603
 604           if (mprotect (guard + guardsize,
 605                         oldguard + pd->guardsize - guard - guardsize,
 606                         prot) != 0)
 607             goto mprot_error;
 608 #else
 609           if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
 610                         prot) != 0)
 611             goto mprot_error;
 612 #endif
 613
 614           pd->guardsize = guardsize;
 615         }
 616       /* The pthread_getattr_np() calls need to get passed the size
 617          requested in the attribute, regardless of how large the
 618          actually used guardsize is.  */
 619       pd->reported_guardsize = guardsize;
 620     }
 621
 622   /* Initialize the lock.  We have to do this unconditionally since the
 623      stillborn thread could be canceled while the lock is taken.  */
 624   pd->lock = LLL_LOCK_INITIALIZER;
 625
 626   /* We place the thread descriptor at the end of the stack.  */
 627   *pdp = pd;
 628
 629 #if TLS_TCB_AT_TP
 630   /* The stack begins before the TCB and the static TLS block.  */
 631   stacktop = ((char *) (pd + 1) - __static_tls_size);
 632 #elif TLS_DTV_AT_TP
 633   stacktop = (char *) (pd - 1);
 634 #endif
 635
 636 #ifdef NEED_SEPARATE_REGISTER_STACK
 637   *stack = pd->stackblock;
 638   *stacksize = stacktop - *stack;
 639 #else
 640   *stack = stacktop;
 641 #endif
 642
 643   return 0;
 644 }
 645
 646
 647 void
 648 internal_function
 649 __deallocate_stack (struct pthread *pd)
 650 {
 651   lll_lock (stack_cache_lock);
 652
 653   /* Remove the thread from the list of threads with user defined
 654      stacks.  */
 655   list_del (&pd->list);
 656
 657   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
 658      not reset the 'used' flag in the 'tid' field.  This is done by
 659      the kernel.  If no thread has been created yet this field is
 660      still zero.  */
 661   if (__builtin_expect (! pd->user_stack, 1))
 662     (void) queue_stack (pd);
 663   else
 664     /* Free the memory associated with the ELF TLS.  */
 665     _dl_deallocate_tls (TLS_TPADJ (pd), false);
 666
 667   lll_unlock (stack_cache_lock);
 668 }
 669
 670
 671 int
 672 internal_function
 673 __make_stacks_executable (void **stack_endp)
 674 {
 675   /* First the main thread's stack.  */
 676   int err = _dl_make_stack_executable (stack_endp);
 677   if (err != 0)
 678     return err;
 679
 680 #ifdef NEED_SEPARATE_REGISTER_STACK
 681   const size_t pagemask = ~(__getpagesize () - 1);
 682 #endif
 683
 684   lll_lock (stack_cache_lock);
 685
 686   list_t *runp;
 687   list_for_each (runp, &stack_used)
 688     {
 689       err = change_stack_perm (list_entry (runp, struct pthread, list)
 690 #ifdef NEED_SEPARATE_REGISTER_STACK
 691                                , pagemask
 692 #endif
 693                                );
 694       if (err != 0)
 695         break;
 696     }
 697
 698   /* Also change the permission for the currently unused stacks.  This
 699      might be wasted time but better spend it here than adding a check
 700      in the fast path.  */
 701   if (err == 0)
 702     list_for_each (runp, &stack_cache)
 703       {
 704         err = change_stack_perm (list_entry (runp, struct pthread, list)
 705 #ifdef NEED_SEPARATE_REGISTER_STACK
 706                                  , pagemask
 707 #endif
 708                                  );
 709         if (err != 0)
 710           break;
 711       }
 712
 713   lll_unlock (stack_cache_lock);
 714
 715   return err;
 716 }
 717
 718
 719 /* In case of a fork() call the memory allocation in the child will be
 720    the same but only one thread is running.  All stacks except that of
 721    the one running thread are not used anymore.  We have to recycle
 722    them.  */
 723 void
 724 __reclaim_stacks (void)
 725 {
 726   struct pthread *self = (struct pthread *) THREAD_SELF;
 727
 728   /* No locking necessary.  The caller is the only stack in use.  */
 729
 730   /* Mark all stacks except the still running one as free.  */
 731   list_t *runp;
 732   list_for_each (runp, &stack_used)
 733     {
 734       struct pthread *curp;
 735
 736       curp = list_entry (runp, struct pthread, list);
 737       if (curp != self)
 738         {
 739           /* This marks the stack as free.  */
 740           curp->tid = 0;
 741
 742           /* The PID field must be initialized for the new process.  */
 743           curp->pid = self->pid;
 744
 745           /* Account for the size of the stack.  */
 746           stack_cache_actsize += curp->stackblock_size;
 747         }
 748     }
 749
 750   /* Add the stack of all running threads to the cache.  */
 751   list_splice (&stack_used, &stack_cache);
 752
 753   /* Remove the entry for the current thread to from the cache list
 754      and add it to the list of running threads.  Which of the two
 755      lists is decided by the user_stack flag.  */
 756   list_del (&self->list);
 757
 758   /* Re-initialize the lists for all the threads.  */
 759   INIT_LIST_HEAD (&stack_used);
 760   INIT_LIST_HEAD (&__stack_user);
 761
 762   if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
 763     list_add (&self->list, &__stack_user);
 764   else
 765     list_add (&self->list, &stack_used);
 766
 767   /* There is one thread running.  */
 768   __nptl_nthreads = 1;
 769
 770   /* Initialize the lock.  */
 771   stack_cache_lock = LLL_LOCK_INITIALIZER;
 772 }
 773
 774
 775 #if HP_TIMING_AVAIL
 776 # undef __find_thread_by_id
 777 /* Find a thread given the thread ID.  */
 778 attribute_hidden
 779 struct pthread *
 780 __find_thread_by_id (pid_t tid)
 781 {
 782   struct pthread *result = NULL;
 783
 784   lll_lock (stack_cache_lock);
 785
 786   /* Iterate over the list with system-allocated threads first.  */
 787   list_t *runp;
 788   list_for_each (runp, &stack_used)
 789     {
 790       struct pthread *curp;
 791
 792       curp = list_entry (runp, struct pthread, list);
 793
 794       if (curp->tid == tid)
 795         {
 796           result = curp;
 797           goto out;
 798         }
 799     }
 800
 801   /* Now the list with threads using user-allocated stacks.  */
 802   list_for_each (runp, &__stack_user)
 803     {
 804       struct pthread *curp;
 805
 806       curp = list_entry (runp, struct pthread, list);
 807
 808       if (curp->tid == tid)
 809         {
 810           result = curp;
 811           goto out;
 812         }
 813     }
 814
 815  out:
 816   lll_unlock (stack_cache_lock);
 817
 818   return result;
 819 }
 820 #endif
 821
 822
 823 static void
 824 internal_function
 825 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
 826 {
 827   if (! IS_DETACHED (t))
 828     {
 829       int ch;
 830       do
 831         {
 832           ch = t->cancelhandling;
 833
 834           /* If the thread is exiting right now, ignore it.  */
 835           if ((ch & EXITING_BITMASK) != 0)
 836             return;
 837         }
 838       while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
 839                                                    ch | SETXID_BITMASK, ch));
 840     }
 841
 842   int val;
 843   INTERNAL_SYSCALL_DECL (err);
 844 #if __ASSUME_TGKILL
 845   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
 846                           t->tid, SIGSETXID);
 847 #else
 848 # ifdef __NR_tgkill
 849   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
 850                           t->tid, SIGSETXID);
 851   if (INTERNAL_SYSCALL_ERROR_P (val, err)
 852       && INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS)
 853 # endif
 854     val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID);
 855 #endif
 856
 857   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
 858     atomic_increment (&cmdp->cntr);
 859 }
 860
 861
 862 int
 863 attribute_hidden
 864 __nptl_setxid (struct xid_command *cmdp)
 865 {
 866   int result;
 867   lll_lock (stack_cache_lock);
 868
 869   __xidcmd = cmdp;
 870   cmdp->cntr = 0;
 871
 872   struct pthread *self = THREAD_SELF;
 873
 874   /* Iterate over the list with system-allocated threads first.  */
 875   list_t *runp;
 876   list_for_each (runp, &stack_used)
 877     {
 878       struct pthread *t = list_entry (runp, struct pthread, list);
 879       if (t == self)
 880         continue;
 881
 882       setxid_signal_thread (cmdp, t);
 883     }
 884
 885   /* Now the list with threads using user-allocated stacks.  */
 886   list_for_each (runp, &__stack_user)
 887     {
 888       struct pthread *t = list_entry (runp, struct pthread, list);
 889       if (t == self)
 890         continue;
 891
 892       setxid_signal_thread (cmdp, t);
 893     }
 894
 895   int cur = cmdp->cntr;
 896   while (cur != 0)
 897     {
 898       lll_futex_wait (&cmdp->cntr, cur);
 899       cur = cmdp->cntr;
 900     }
 901
 902   /* This must be last, otherwise the current thread might not have
 903      permissions to send SIGSETXID syscall to the other threads.  */
 904   INTERNAL_SYSCALL_DECL (err);
 905   result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
 906                                  cmdp->id[0], cmdp->id[1], cmdp->id[2]);
 907   if (INTERNAL_SYSCALL_ERROR_P (result, err))
 908     {
 909       __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
 910       result = -1;
 911     }
 912
 913   lll_unlock (stack_cache_lock);
 914   return result;
 915 }
 916
 917 static inline void __attribute__((always_inline))
 918 init_one_static_tls (struct pthread *curp, struct link_map *map)
 919 {
 920   dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
 921 # if TLS_TCB_AT_TP
 922   void *dest = (char *) curp - map->l_tls_offset;
 923 # elif TLS_DTV_AT_TP
 924   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
 925 # else
 926 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 927 # endif
 928
 929   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
 930   dtv[map->l_tls_modid].pointer.val = dest;
 931   dtv[map->l_tls_modid].pointer.is_static = true;
 932
 933   /* Initialize the memory.  */
 934   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
 935           '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 936 }
 937
 938 void
 939 attribute_hidden
 940 __pthread_init_static_tls (struct link_map *map)
 941 {
 942   lll_lock (stack_cache_lock);
 943
 944   /* Iterate over the list with system-allocated threads first.  */
 945   list_t *runp;
 946   list_for_each (runp, &stack_used)
 947     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 948
 949   /* Now the list with threads using user-allocated stacks.  */
 950   list_for_each (runp, &__stack_user)
 951     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 952
 953   lll_unlock (stack_cache_lock);
 954 }