nptl/allocatestack.c

   1 /* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #include <assert.h>
  21 #include <errno.h>
  22 #include <stdint.h>
  23 #include <string.h>
  24 #include <unistd.h>
  25 #include <sys/mman.h>
  26 #include <sys/param.h>
  27 #include <dl-sysdep.h>
  28 #include <tls.h>
  29
  30
  31
  32 #ifndef NEED_SEPARATE_REGISTER_STACK
  33
  34 /* Most architectures have exactly one stack pointer.  Some have more.  */
  35 #define STACK_VARIABLES void *stackaddr
  36
  37 /* How to pass the values to the 'create_thread' function.  */
  38 #define STACK_VARIABLES_ARGS stackaddr
  39
  40 /* How to declare function which gets there parameters.  */
  41 #define STACK_VARIABLES_PARMS void *stackaddr
  42
  43 /* How to declare allocate_stack.  */
  44 #define ALLOCATE_STACK_PARMS void **stack
  45
  46 /* This is how the function is called.  We do it this way to allow
  47    other variants of the function to have more parameters.  */
  48 #define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
  49
  50 #else
  51
  52 #define STACK_VARIABLES void *stackaddr; size_t stacksize
  53 #define STACK_VARIABLES_ARGS stackaddr, stacksize
  54 #define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
  55 #define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
  56 #define ALLOCATE_STACK(attr, pd) \
  57   allocate_stack (attr, pd, &stackaddr, &stacksize)
  58
  59 #endif
  60
  61
  62 /* Default alignment of stack.  */
  63 #ifndef STACK_ALIGN
  64 # define STACK_ALIGN __alignof__ (long double)
  65 #endif
  66
  67 /* Default value for minimal stack size after allocating thread
  68    descriptor and guard.  */
  69 #ifndef MINIMAL_REST_STACK
  70 # define MINIMAL_REST_STACK     4096
  71 #endif
  72
  73
  74 /* Let the architecture add some flags to the mmap() call used to
  75    allocate stacks.  */
  76 #ifndef ARCH_MAP_FLAGS
  77 # define ARCH_MAP_FLAGS 0
  78 #endif
  79
  80 /* This yields the pointer that TLS support code calls the thread pointer.  */
  81 #if TLS_TCB_AT_TP
  82 # define TLS_TPADJ(pd) (pd)
  83 #elif TLS_DTV_AT_TP
  84 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
  85 #endif
  86
  87 /* Cache handling for not-yet free stacks.  */
  88
  89 /* Maximum size in kB of cache.  */
  90 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
  91 static size_t stack_cache_actsize;
  92
  93 /* Mutex protecting this variable.  */
  94 static lll_lock_t stack_cache_lock = LLL_LOCK_INITIALIZER;
  95
  96 /* List of queued stack frames.  */
  97 static LIST_HEAD (stack_cache);
  98
  99 /* List of the stacks in use.  */
 100 static LIST_HEAD (stack_used);
 101
 102 /* List of the threads with user provided stacks in use.  No need to
 103    initialize this, since it's done in __pthread_initialize_minimal.  */
 104 list_t __stack_user __attribute__ ((nocommon));
 105 hidden_data_def (__stack_user)
 106
 107 #if COLORING_INCREMENT != 0
 108 /* Number of threads created.  */
 109 static unsigned int nptl_ncreated;
 110 #endif
 111
 112
 113 /* Check whether the stack is still used or not.  */
 114 #define FREE_P(descr) ((descr)->tid <= 0)
 115
 116
 117 /* We create a double linked list of all cache entries.  Double linked
 118    because this allows removing entries from the end.  */
 119
 120
 121 /* Get a stack frame from the cache.  We have to match by size since
 122    some blocks might be too small or far too large.  */
 123 static struct pthread *
 124 get_cached_stack (size_t *sizep, void **memp)
 125 {
 126   size_t size = *sizep;
 127   struct pthread *result = NULL;
 128   list_t *entry;
 129
 130   lll_lock (stack_cache_lock);
 131
 132   /* Search the cache for a matching entry.  We search for the
 133      smallest stack which has at least the required size.  Note that
 134      in normal situations the size of all allocated stacks is the
 135      same.  As the very least there are only a few different sizes.
 136      Therefore this loop will exit early most of the time with an
 137      exact match.  */
 138   list_for_each (entry, &stack_cache)
 139     {
 140       struct pthread *curr;
 141
 142       curr = list_entry (entry, struct pthread, list);
 143       if (FREE_P (curr) && curr->stackblock_size >= size)
 144         {
 145           if (curr->stackblock_size == size)
 146             {
 147               result = curr;
 148               break;
 149             }
 150
 151           if (result == NULL
 152               || result->stackblock_size > curr->stackblock_size)
 153             result = curr;
 154         }
 155     }
 156
 157   if (__builtin_expect (result == NULL, 0)
 158       /* Make sure the size difference is not too excessive.  In that
 159          case we do not use the block.  */
 160       || __builtin_expect (result->stackblock_size > 4 * size, 0))
 161     {
 162       /* Release the lock.  */
 163       lll_unlock (stack_cache_lock);
 164
 165       return NULL;
 166     }
 167
 168   /* Dequeue the entry.  */
 169   list_del (&result->list);
 170
 171   /* And add to the list of stacks in use.  */
 172   list_add (&result->list, &stack_used);
 173
 174   /* And decrease the cache size.  */
 175   stack_cache_actsize -= result->stackblock_size;
 176
 177   /* Release the lock early.  */
 178   lll_unlock (stack_cache_lock);
 179
 180   /* Report size and location of the stack to the caller.  */
 181   *sizep = result->stackblock_size;
 182   *memp = result->stackblock;
 183
 184   /* Cancellation handling is back to the default.  */
 185   result->cancelhandling = 0;
 186   result->cleanup = NULL;
 187
 188   /* No pending event.  */
 189   result->nextevent = NULL;
 190
 191   /* Clear the DTV.  */
 192   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
 193   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 194
 195   /* Re-initialize the TLS.  */
 196   _dl_allocate_tls_init (TLS_TPADJ (result));
 197
 198   return result;
 199 }
 200
 201
 202 /* Add a stack frame which is not used anymore to the stack.  Must be
 203    called with the cache lock held.  */
 204 static inline void
 205 __attribute ((always_inline))
 206 queue_stack (struct pthread *stack)
 207 {
 208   /* We unconditionally add the stack to the list.  The memory may
 209      still be in use but it will not be reused until the kernel marks
 210      the stack as not used anymore.  */
 211   list_add (&stack->list, &stack_cache);
 212
 213   stack_cache_actsize += stack->stackblock_size;
 214   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
 215     {
 216       /* We reduce the size of the cache.  Remove the last entries
 217          until the size is below the limit.  */
 218       list_t *entry;
 219       list_t *prev;
 220
 221       /* Search from the end of the list.  */
 222       list_for_each_prev_safe (entry, prev, &stack_cache)
 223         {
 224           struct pthread *curr;
 225
 226           curr = list_entry (entry, struct pthread, list);
 227           if (FREE_P (curr))
 228             {
 229               /* Unlink the block.  */
 230               list_del (entry);
 231
 232               /* Account for the freed memory.  */
 233               stack_cache_actsize -= curr->stackblock_size;
 234
 235               /* Free the memory associated with the ELF TLS.  */
 236               _dl_deallocate_tls (TLS_TPADJ (curr), false);
 237
 238               /* Remove this block.  This should never fail.  If it
 239                  does something is really wrong.  */
 240               if (munmap (curr->stackblock, curr->stackblock_size) != 0)
 241                 abort ();
 242
 243               /* Maybe we have freed enough.  */
 244               if (stack_cache_actsize <= stack_cache_maxsize)
 245                 break;
 246             }
 247         }
 248     }
 249 }
 250
 251
 252
 253 static int
 254 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 255                 ALLOCATE_STACK_PARMS)
 256 {
 257   struct pthread *pd;
 258   size_t size;
 259   size_t pagesize_m1 = __getpagesize () - 1;
 260   void *stacktop;
 261
 262   assert (attr != NULL);
 263   assert (powerof2 (pagesize_m1 + 1));
 264   assert (TCB_ALIGNMENT >= STACK_ALIGN);
 265
 266   /* Get the stack size from the attribute if it is set.  Otherwise we
 267      use the default we determined at start time.  */
 268   size = attr->stacksize ?: __default_stacksize;
 269
 270   /* Get memory for the stack.  */
 271   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
 272     {
 273       uintptr_t adj;
 274
 275       /* If the user also specified the size of the stack make sure it
 276          is large enough.  */
 277       if (attr->stacksize != 0
 278           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
 279         return EINVAL;
 280
 281       /* Adjust stack size for alignment of the TLS block.  */
 282 #if TLS_TCB_AT_TP
 283       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
 284             & __static_tls_align_m1;
 285       assert (size > adj + TLS_TCB_SIZE);
 286 #elif TLS_DTV_AT_TP
 287       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
 288             & __static_tls_align_m1;
 289       assert (size > adj);
 290 #endif
 291
 292       /* The user provided some memory.  Let's hope it matches the
 293          size...  We do not allocate guard pages if the user provided
 294          the stack.  It is the user's responsibility to do this if it
 295          is wanted.  */
 296 #if TLS_TCB_AT_TP
 297       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
 298                                - TLS_TCB_SIZE - adj);
 299 #elif TLS_DTV_AT_TP
 300       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
 301                                 - __static_tls_size - adj)
 302                                - TLS_PRE_TCB_SIZE);
 303 #endif
 304
 305       /* The user provided stack memory needs to be cleared.  */
 306       memset (pd, '\0', sizeof (struct pthread));
 307
 308       /* The first TSD block is included in the TCB.  */
 309       pd->specific[0] = pd->specific_1stblock;
 310
 311 #if defined __ASSUME_CLONE_STOPPED && LLL_LOCK_INITIALIZER != 0
 312       /* Initialize the lock.  */
 313       pd->lock = LLL_LOCK_INITIALIZER;
 314 #endif
 315
 316       /* Remember the stack-related values.  */
 317       pd->stackblock = (char *) attr->stackaddr - size;
 318       pd->stackblock_size = size;
 319
 320       /* This is a user-provided stack.  It will not be queued in the
 321          stack cache nor will the memory (except the TLS memory) be freed.  */
 322       pd->user_stack = true;
 323
 324       /* This is at least the second thread.  */
 325       pd->header.multiple_threads = 1;
 326 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 327       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 328 #endif
 329
 330 #ifdef NEED_DL_SYSINFO
 331       /* Copy the sysinfo value from the parent.  */
 332       pd->header.sysinfo = THREAD_GETMEM (THREAD_SELF, header.sysinfo);
 333 #endif
 334
 335       /* The process ID is also the same as that of the caller.  */
 336       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 337
 338       /* Allocate the DTV for this thread.  */
 339       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 340         /* Something went wrong.  */
 341         return errno;
 342
 343
 344       /* Prepare to modify global data.  */
 345       lll_lock (stack_cache_lock);
 346
 347       /* And add to the list of stacks in use.  */
 348       list_add (&pd->list, &__stack_user);
 349
 350       lll_unlock (stack_cache_lock);
 351     }
 352   else
 353     {
 354       /* Allocate some anonymous memory.  If possible use the cache.  */
 355       size_t guardsize;
 356       size_t reqsize;
 357       void *mem;
 358       const int prot = (PROT_READ | PROT_WRITE
 359                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 360
 361 #if COLORING_INCREMENT != 0
 362       /* Add one more page for stack coloring.  Don't do it for stacks
 363          with 16 times pagesize or larger.  This might just cause
 364          unnecessary misalignment.  */
 365       if (size <= 16 * pagesize_m1)
 366         size += pagesize_m1 + 1;
 367 #endif
 368
 369       /* Adjust the stack size for alignment.  */
 370       size &= ~__static_tls_align_m1;
 371       assert (size != 0);
 372
 373       /* Make sure the size of the stack is enough for the guard and
 374          eventually the thread descriptor.  */
 375       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
 376       if (__builtin_expect (size < (guardsize + __static_tls_size
 377                                     + MINIMAL_REST_STACK + pagesize_m1 + 1),
 378                             0))
 379         /* The stack is too small (or the guard too large).  */
 380         return EINVAL;
 381
 382       /* Try to get a stack from the cache.  */
 383       reqsize = size;
 384       pd = get_cached_stack (&size, &mem);
 385       if (pd == NULL)
 386         {
 387           /* To avoid aliasing effects on a larger scale then pages we
 388              adjust the allocated stack size if necessary.  This way
 389              allocations directly following each other will not have
 390              aliasing problems.  */
 391 #if MULTI_PAGE_ALIASING != 0
 392           if ((size % MULTI_PAGE_ALIASING) == 0)
 393             size += pagesize_m1 + 1;
 394 #endif
 395
 396           mem = mmap (NULL, size, prot,
 397                       MAP_PRIVATE | MAP_ANONYMOUS | ARCH_MAP_FLAGS, -1, 0);
 398
 399           if (__builtin_expect (mem == MAP_FAILED, 0))
 400             {
 401 #ifdef ARCH_RETRY_MMAP
 402               mem = ARCH_RETRY_MMAP (size);
 403               if (__builtin_expect (mem == MAP_FAILED, 0))
 404 #endif
 405                 return errno;
 406             }
 407
 408           /* SIZE is guaranteed to be greater than zero.
 409              So we can never get a null pointer back from mmap.  */
 410           assert (mem != NULL);
 411
 412 #if COLORING_INCREMENT != 0
 413           /* Atomically increment NCREATED.  */
 414           unsigned int ncreated = (atomic_exchange_and_add (&nptl_ncreated, 1)
 415                                    + 1);
 416
 417           /* We chose the offset for coloring by incrementing it for
 418              every new thread by a fixed amount.  The offset used
 419              module the page size.  Even if coloring would be better
 420              relative to higher alignment values it makes no sense to
 421              do it since the mmap() interface does not allow us to
 422              specify any alignment for the returned memory block.  */
 423           size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
 424
 425           /* Make sure the coloring offsets does not disturb the alignment
 426              of the TCB and static TLS block.  */
 427           if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
 428             coloring = (((coloring + __static_tls_align_m1)
 429                          & ~(__static_tls_align_m1))
 430                         & ~pagesize_m1);
 431 #else
 432           /* Unless specified we do not make any adjustments.  */
 433 # define coloring 0
 434 #endif
 435
 436           /* Place the thread descriptor at the end of the stack.  */
 437 #if TLS_TCB_AT_TP
 438           pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
 439 #elif TLS_DTV_AT_TP
 440           pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
 441                                     - __static_tls_size)
 442                                     & ~__static_tls_align_m1)
 443                                    - TLS_PRE_TCB_SIZE);
 444 #endif
 445
 446           /* Remember the stack-related values.  */
 447           pd->stackblock = mem;
 448           pd->stackblock_size = size;
 449
 450           /* We allocated the first block thread-specific data array.
 451              This address will not change for the lifetime of this
 452              descriptor.  */
 453           pd->specific[0] = pd->specific_1stblock;
 454
 455 #if defined __ASSUME_CLONE_STOPPED && LLL_LOCK_INITIALIZER != 0
 456           /* Initialize the lock.  */
 457           pd->lock = LLL_LOCK_INITIALIZER;
 458 #endif
 459
 460           /* This is at least the second thread.  */
 461           pd->header.multiple_threads = 1;
 462 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 463           __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 464 #endif
 465
 466 #ifdef NEED_DL_SYSINFO
 467           /* Copy the sysinfo value from the parent.  */
 468           pd->header.sysinfo = THREAD_GETMEM (THREAD_SELF, header.sysinfo);
 469 #endif
 470
 471           /* The process ID is also the same as that of the caller.  */
 472           pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 473
 474           /* Allocate the DTV for this thread.  */
 475           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 476             {
 477               /* Something went wrong.  */
 478               int err = errno;
 479
 480               /* Free the stack memory we just allocated.  */
 481               (void) munmap (mem, size);
 482
 483               return err;
 484             }
 485
 486
 487           /* Prepare to modify global data.  */
 488           lll_lock (stack_cache_lock);
 489
 490           /* And add to the list of stacks in use.  */
 491           list_add (&pd->list, &stack_used);
 492
 493           lll_unlock (stack_cache_lock);
 494
 495
 496           /* Note that all of the stack and the thread descriptor is
 497              zeroed.  This means we do not have to initialize fields
 498              with initial value zero.  This is specifically true for
 499              the 'tid' field which is always set back to zero once the
 500              stack is not used anymore and for the 'guardsize' field
 501              which will be read next.  */
 502         }
 503
 504       /* Create or resize the guard area if necessary.  */
 505       if (__builtin_expect (guardsize > pd->guardsize, 0))
 506         {
 507 #ifdef NEED_SEPARATE_REGISTER_STACK
 508           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 509 #else
 510           char *guard = mem;
 511 #endif
 512           if (mprotect (guard, guardsize, PROT_NONE) != 0)
 513             {
 514               int err;
 515             mprot_error:
 516               err = errno;
 517
 518               lll_lock (stack_cache_lock);
 519
 520               /* Remove the thread from the list.  */
 521               list_del (&pd->list);
 522
 523               lll_unlock (stack_cache_lock);
 524
 525               /* Get rid of the TLS block we allocated.  */
 526               _dl_deallocate_tls (TLS_TPADJ (pd), false);
 527
 528               /* Free the stack memory regardless of whether the size
 529                  of the cache is over the limit or not.  If this piece
 530                  of memory caused problems we better do not use it
 531                  anymore.  Uh, and we ignore possible errors.  There
 532                  is nothing we could do.  */
 533               (void) munmap (mem, size);
 534
 535               return err;
 536             }
 537
 538           pd->guardsize = guardsize;
 539         }
 540       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
 541                                  0))
 542         {
 543           /* The old guard area is too large.  */
 544
 545 #ifdef NEED_SEPARATE_REGISTER_STACK
 546           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 547           char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
 548
 549           if (oldguard < guard
 550               && mprotect (oldguard, guard - oldguard, prot) != 0)
 551             goto mprot_error;
 552
 553           if (mprotect (guard + guardsize,
 554                         oldguard + pd->guardsize - guard - guardsize,
 555                         prot) != 0)
 556             goto mprot_error;
 557 #else
 558           if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
 559                         prot) != 0)
 560             goto mprot_error;
 561 #endif
 562
 563           pd->guardsize = guardsize;
 564         }
 565     }
 566
 567 #ifndef __ASSUME_CLONE_STOPPED
 568   /* Initialize the lock.  We have to do this unconditionally if the
 569      CLONE_STOPPED flag is not available since then the stillborn
 570      thread could be canceled while the lock is taken.  */
 571   pd->lock = LLL_LOCK_INITIALIZER;
 572 #endif
 573
 574   /* We place the thread descriptor at the end of the stack.  */
 575   *pdp = pd;
 576
 577 #if TLS_TCB_AT_TP
 578   /* The stack begins before the TCB and the static TLS block.  */
 579   stacktop = ((char *) (pd + 1) - __static_tls_size);
 580 #elif TLS_DTV_AT_TP
 581   stacktop = (char *) (pd - 1);
 582 #endif
 583
 584 #ifdef NEED_SEPARATE_REGISTER_STACK
 585   *stack = pd->stackblock;
 586   *stacksize = stacktop - *stack;
 587 #else
 588   *stack = stacktop;
 589 #endif
 590
 591   return 0;
 592 }
 593
 594
 595 void
 596 internal_function
 597 __deallocate_stack (struct pthread *pd)
 598 {
 599   lll_lock (stack_cache_lock);
 600
 601   /* Remove the thread from the list of threads with user defined
 602      stacks.  */
 603   list_del (&pd->list);
 604
 605   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
 606      not reset the 'used' flag in the 'tid' field.  This is done by
 607      the kernel.  If no thread has been created yet this field is
 608      still zero.  */
 609   if (__builtin_expect (! pd->user_stack, 1))
 610     (void) queue_stack (pd);
 611   else
 612     /* Free the memory associated with the ELF TLS.  */
 613     _dl_deallocate_tls (TLS_TPADJ (pd), false);
 614
 615   lll_unlock (stack_cache_lock);
 616 }
 617
 618
 619 int
 620 internal_function
 621 __make_stacks_executable (void)
 622 {
 623 #ifdef NEED_SEPARATE_REGISTER_STACK
 624   const size_t pagemask = ~(__getpagesize () - 1);
 625 #endif
 626
 627   lll_lock (stack_cache_lock);
 628
 629   int err = 0;
 630   list_t *runp;
 631   list_for_each (runp, &stack_used)
 632     {
 633       struct pthread *const pd = list_entry (runp, struct pthread, list);
 634 #ifdef NEED_SEPARATE_REGISTER_STACK
 635       void *stack = (pd->stackblock
 636                      + (((((pd->stackblock_size - pd->guardsize) / 2)
 637                           & pagemask) + pd->guardsize) & pagemask));
 638       size_t len = pd->stackblock + pd->stackblock_size - stack;
 639 #else
 640       void *stack = pd->stackblock + pd->guardsize;
 641       size_t len = pd->stackblock_size - pd->guardsize;
 642 #endif
 643       if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
 644         {
 645           err = errno;
 646           break;
 647         }
 648     }
 649
 650   lll_unlock (stack_cache_lock);
 651
 652   _dl_make_stack_executable ();
 653
 654   return err;
 655 }
 656
 657
 658 /* In case of a fork() call the memory allocation in the child will be
 659    the same but only one thread is running.  All stacks except that of
 660    the one running thread are not used anymore.  We have to recycle
 661    them.  */
 662 void
 663 __reclaim_stacks (void)
 664 {
 665   struct pthread *self = (struct pthread *) THREAD_SELF;
 666
 667   /* No locking necessary.  The caller is the only stack in use.  */
 668
 669   /* Mark all stacks except the still running one as free.  */
 670   list_t *runp;
 671   list_for_each (runp, &stack_used)
 672     {
 673       struct pthread *curp;
 674
 675       curp = list_entry (runp, struct pthread, list);
 676       if (curp != self)
 677         {
 678           /* This marks the stack as free.  */
 679           curp->tid = 0;
 680
 681           /* The PID field must be initialized for the new process.  */
 682           curp->pid = self->pid;
 683
 684           /* Account for the size of the stack.  */
 685           stack_cache_actsize += curp->stackblock_size;
 686         }
 687     }
 688
 689   /* Add the stack of all running threads to the cache.  */
 690   list_splice (&stack_used, &stack_cache);
 691
 692   /* Remove the entry for the current thread to from the cache list
 693      and add it to the list of running threads.  Which of the two
 694      lists is decided by the user_stack flag.  */
 695   list_del (&self->list);
 696
 697   /* Re-initialize the lists for all the threads.  */
 698   INIT_LIST_HEAD (&stack_used);
 699   INIT_LIST_HEAD (&__stack_user);
 700
 701   if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
 702     list_add (&self->list, &__stack_user);
 703   else
 704     list_add (&self->list, &stack_used);
 705
 706   /* There is one thread running.  */
 707   __nptl_nthreads = 1;
 708
 709   /* Initialize the lock.  */
 710   stack_cache_lock = LLL_LOCK_INITIALIZER;
 711 }
 712
 713
 714 #if HP_TIMING_AVAIL
 715 /* Find a thread given the thread ID.  */
 716 struct pthread *
 717 attribute_hidden
 718 __find_thread_by_id (pid_t tid)
 719 {
 720   struct pthread *result = NULL;
 721
 722   lll_lock (stack_cache_lock);
 723
 724   /* Iterate over the list with system-allocated threads first.  */
 725   list_t *runp;
 726   list_for_each (runp, &stack_used)
 727     {
 728       struct pthread *curp;
 729
 730       curp = list_entry (runp, struct pthread, list);
 731
 732       if (curp->tid == tid)
 733         {
 734           result = curp;
 735           goto out;
 736         }
 737     }
 738
 739   /* Now the list with threads using user-allocated stacks.  */
 740   list_for_each (runp, &__stack_user)
 741     {
 742       struct pthread *curp;
 743
 744       curp = list_entry (runp, struct pthread, list);
 745
 746       if (curp->tid == tid)
 747         {
 748           result = curp;
 749           goto out;
 750         }
 751     }
 752
 753  out:
 754   lll_unlock (stack_cache_lock);
 755
 756   return result;
 757 }
 758 #endif
 759
 760 static inline void __attribute__((always_inline))
 761 init_one_static_tls (struct pthread *curp, struct link_map *map)
 762 {
 763   dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
 764 # if TLS_TCB_AT_TP
 765   void *dest = (char *) curp - map->l_tls_offset;
 766 # elif TLS_DTV_AT_TP
 767   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
 768 # else
 769 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 770 # endif
 771
 772   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
 773   dtv[map->l_tls_modid].pointer = dest;
 774
 775   /* Initialize the memory.  */
 776   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
 777           '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 778 }
 779
 780 void
 781 attribute_hidden
 782 __pthread_init_static_tls (struct link_map *map)
 783 {
 784   lll_lock (stack_cache_lock);
 785
 786   /* Iterate over the list with system-allocated threads first.  */
 787   list_t *runp;
 788   list_for_each (runp, &stack_used)
 789     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 790
 791   /* Now the list with threads using user-allocated stacks.  */
 792   list_for_each (runp, &__stack_user)
 793     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 794
 795   lll_unlock (stack_cache_lock);
 796 }