]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/libgomp/lib/contrib/gcc-4.9/libgomp/team.c
update
[l4.git] / l4 / pkg / libgomp / lib / contrib / gcc-4.9 / libgomp / team.c
1 /* Copyright (C) 2005-2014 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3
4    This file is part of the GNU OpenMP Library (libgomp).
5
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24
25 /* This file handles the maintainence of threads in response to team
26    creation and termination.  */
27
28 #include "libgomp.h"
29 #include <stdlib.h>
30 #include <string.h>
31
32 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
33 pthread_attr_t gomp_thread_attr;
34
35 /* This key is for the thread destructor.  */
36 pthread_key_t gomp_thread_destructor;
37
38
39 /* This is the libgomp per-thread data structure.  */
40 #ifdef HAVE_TLS
41 __thread struct gomp_thread gomp_tls_data;
42 #else
43 pthread_key_t gomp_tls_key;
44 #endif
45
46
47 /* This structure is used to communicate across pthread_create.  */
48
49 struct gomp_thread_start_data
50 {
51   void (*fn) (void *);
52   void *fn_data;
53   struct gomp_team_state ts;
54   struct gomp_task *task;
55   struct gomp_thread_pool *thread_pool;
56   unsigned int place;
57   bool nested;
58 };
59
60
61 /* This function is a pthread_create entry point.  This contains the idle
62    loop in which a thread waits to be called up to become part of a team.  */
63
64 static void *
65 gomp_thread_start (void *xdata)
66 {
67   struct gomp_thread_start_data *data = xdata;
68   struct gomp_thread *thr;
69   struct gomp_thread_pool *pool;
70   void (*local_fn) (void *);
71   void *local_data;
72
73 #ifdef HAVE_TLS
74   thr = &gomp_tls_data;
75 #else
76   struct gomp_thread local_thr;
77   thr = &local_thr;
78   pthread_setspecific (gomp_tls_key, thr);
79 #endif
80   gomp_sem_init (&thr->release, 0);
81
82   /* Extract what we need from data.  */
83   local_fn = data->fn;
84   local_data = data->fn_data;
85   thr->thread_pool = data->thread_pool;
86   thr->ts = data->ts;
87   thr->task = data->task;
88   thr->place = data->place;
89
90   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
91
92   /* Make thread pool local. */
93   pool = thr->thread_pool;
94
95   if (data->nested)
96     {
97       struct gomp_team *team = thr->ts.team;
98       struct gomp_task *task = thr->task;
99
100       gomp_barrier_wait (&team->barrier);
101
102       local_fn (local_data);
103       gomp_team_barrier_wait_final (&team->barrier);
104       gomp_finish_task (task);
105       gomp_barrier_wait_last (&team->barrier);
106     }
107   else
108     {
109       pool->threads[thr->ts.team_id] = thr;
110
111       gomp_barrier_wait (&pool->threads_dock);
112       do
113         {
114           struct gomp_team *team = thr->ts.team;
115           struct gomp_task *task = thr->task;
116
117           local_fn (local_data);
118           gomp_team_barrier_wait_final (&team->barrier);
119           gomp_finish_task (task);
120
121           gomp_barrier_wait (&pool->threads_dock);
122
123           local_fn = thr->fn;
124           local_data = thr->data;
125           thr->fn = NULL;
126         }
127       while (local_fn);
128     }
129
130   gomp_sem_destroy (&thr->release);
131   thr->thread_pool = NULL;
132   thr->task = NULL;
133   return NULL;
134 }
135
136
137 /* Create a new team data structure.  */
138
139 struct gomp_team *
140 gomp_new_team (unsigned nthreads)
141 {
142   struct gomp_team *team;
143   size_t size;
144   int i;
145
146   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
147                                       + sizeof (team->implicit_task[0]));
148   team = gomp_malloc (size);
149
150   team->work_share_chunk = 8;
151 #ifdef HAVE_SYNC_BUILTINS
152   team->single_count = 0;
153 #else
154   gomp_mutex_init (&team->work_share_list_free_lock);
155 #endif
156   team->work_shares_to_free = &team->work_shares[0];
157   gomp_init_work_share (&team->work_shares[0], false, nthreads);
158   team->work_shares[0].next_alloc = NULL;
159   team->work_share_list_free = NULL;
160   team->work_share_list_alloc = &team->work_shares[1];
161   for (i = 1; i < 7; i++)
162     team->work_shares[i].next_free = &team->work_shares[i + 1];
163   team->work_shares[i].next_free = NULL;
164
165   team->nthreads = nthreads;
166   gomp_barrier_init (&team->barrier, nthreads);
167
168   gomp_sem_init (&team->master_release, 0);
169   team->ordered_release = (void *) &team->implicit_task[nthreads];
170   team->ordered_release[0] = &team->master_release;
171
172   gomp_mutex_init (&team->task_lock);
173   team->task_queue = NULL;
174   team->task_count = 0;
175   team->task_queued_count = 0;
176   team->task_running_count = 0;
177   team->work_share_cancelled = 0;
178   team->team_cancelled = 0;
179
180   return team;
181 }
182
183
184 /* Free a team data structure.  */
185
186 static void
187 free_team (struct gomp_team *team)
188 {
189   gomp_barrier_destroy (&team->barrier);
190   gomp_mutex_destroy (&team->task_lock);
191   free (team);
192 }
193
194 /* Allocate and initialize a thread pool. */
195
196 static struct gomp_thread_pool *gomp_new_thread_pool (void)
197 {
198   struct gomp_thread_pool *pool
199     = gomp_malloc (sizeof(struct gomp_thread_pool));
200   pool->threads = NULL;
201   pool->threads_size = 0;
202   pool->threads_used = 0;
203   pool->last_team = NULL;
204   return pool;
205 }
206
207 static void
208 gomp_free_pool_helper (void *thread_pool)
209 {
210   struct gomp_thread *thr = gomp_thread ();
211   struct gomp_thread_pool *pool
212     = (struct gomp_thread_pool *) thread_pool;
213   gomp_barrier_wait_last (&pool->threads_dock);
214   gomp_sem_destroy (&thr->release);
215   thr->thread_pool = NULL;
216   thr->task = NULL;
217   pthread_exit (NULL);
218 }
219
220 /* Free a thread pool and release its threads. */
221
222 void
223 gomp_free_thread (void *arg __attribute__((unused)))
224 {
225   struct gomp_thread *thr = gomp_thread ();
226   struct gomp_thread_pool *pool = thr->thread_pool;
227   if (pool)
228     {
229       if (pool->threads_used > 0)
230         {
231           int i;
232           for (i = 1; i < pool->threads_used; i++)
233             {
234               struct gomp_thread *nthr = pool->threads[i];
235               nthr->fn = gomp_free_pool_helper;
236               nthr->data = pool;
237             }
238           /* This barrier undocks threads docked on pool->threads_dock.  */
239           gomp_barrier_wait (&pool->threads_dock);
240           /* And this waits till all threads have called gomp_barrier_wait_last
241              in gomp_free_pool_helper.  */
242           gomp_barrier_wait (&pool->threads_dock);
243           /* Now it is safe to destroy the barrier and free the pool.  */
244           gomp_barrier_destroy (&pool->threads_dock);
245
246 #ifdef HAVE_SYNC_BUILTINS
247           __sync_fetch_and_add (&gomp_managed_threads,
248                                 1L - pool->threads_used);
249 #else
250           gomp_mutex_lock (&gomp_managed_threads_lock);
251           gomp_managed_threads -= pool->threads_used - 1L;
252           gomp_mutex_unlock (&gomp_managed_threads_lock);
253 #endif
254         }
255       free (pool->threads);
256       if (pool->last_team)
257         free_team (pool->last_team);
258       free (pool);
259       thr->thread_pool = NULL;
260     }
261   if (thr->task != NULL)
262     {
263       struct gomp_task *task = thr->task;
264       gomp_end_task ();
265       free (task);
266     }
267 }
268
269 /* Launch a team.  */
270
271 void
272 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
273                  unsigned flags, struct gomp_team *team)
274 {
275   struct gomp_thread_start_data *start_data;
276   struct gomp_thread *thr, *nthr;
277   struct gomp_task *task;
278   struct gomp_task_icv *icv;
279   bool nested;
280   struct gomp_thread_pool *pool;
281   unsigned i, n, old_threads_used = 0;
282   pthread_attr_t thread_attr, *attr;
283   unsigned long nthreads_var;
284   char bind, bind_var;
285   unsigned int s = 0, rest = 0, p = 0, k = 0;
286   unsigned int affinity_count = 0;
287   struct gomp_thread **affinity_thr = NULL;
288
289   thr = gomp_thread ();
290   nested = thr->ts.team != NULL;
291   if (__builtin_expect (thr->thread_pool == NULL, 0))
292     {
293       thr->thread_pool = gomp_new_thread_pool ();
294       thr->thread_pool->threads_busy = nthreads;
295       pthread_setspecific (gomp_thread_destructor, thr);
296     }
297   pool = thr->thread_pool;
298   task = thr->task;
299   icv = task ? &task->icv : &gomp_global_icv;
300   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
301     gomp_init_affinity ();
302
303   /* Always save the previous state, even if this isn't a nested team.
304      In particular, we should save any work share state from an outer
305      orphaned work share construct.  */
306   team->prev_ts = thr->ts;
307
308   thr->ts.team = team;
309   thr->ts.team_id = 0;
310   ++thr->ts.level;
311   if (nthreads > 1)
312     ++thr->ts.active_level;
313   thr->ts.work_share = &team->work_shares[0];
314   thr->ts.last_work_share = NULL;
315 #ifdef HAVE_SYNC_BUILTINS
316   thr->ts.single_count = 0;
317 #endif
318   thr->ts.static_trip = 0;
319   thr->task = &team->implicit_task[0];
320   nthreads_var = icv->nthreads_var;
321   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
322       && thr->ts.level < gomp_nthreads_var_list_len)
323     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
324   bind_var = icv->bind_var;
325   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
326     bind_var = flags & 7;
327   bind = bind_var;
328   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
329       && thr->ts.level < gomp_bind_var_list_len)
330     bind_var = gomp_bind_var_list[thr->ts.level];
331   gomp_init_task (thr->task, task, icv);
332   team->implicit_task[0].icv.nthreads_var = nthreads_var;
333   team->implicit_task[0].icv.bind_var = bind_var;
334
335   if (nthreads == 1)
336     return;
337
338   i = 1;
339
340   if (__builtin_expect (gomp_places_list != NULL, 0))
341     {
342       /* Depending on chosen proc_bind model, set subpartition
343          for the master thread and initialize helper variables
344          P and optionally S, K and/or REST used by later place
345          computation for each additional thread.  */
346       p = thr->place - 1;
347       switch (bind)
348         {
349         case omp_proc_bind_true:
350         case omp_proc_bind_close:
351           if (nthreads > thr->ts.place_partition_len)
352             {
353               /* T > P.  S threads will be placed in each place,
354                  and the final REM threads placed one by one
355                  into the already occupied places.  */
356               s = nthreads / thr->ts.place_partition_len;
357               rest = nthreads % thr->ts.place_partition_len;
358             }
359           else
360             s = 1;
361           k = 1;
362           break;
363         case omp_proc_bind_master:
364           /* Each thread will be bound to master's place.  */
365           break;
366         case omp_proc_bind_spread:
367           if (nthreads <= thr->ts.place_partition_len)
368             {
369               /* T <= P.  Each subpartition will have in between s
370                  and s+1 places (subpartitions starting at or
371                  after rest will have s places, earlier s+1 places),
372                  each thread will be bound to the first place in
373                  its subpartition (except for the master thread
374                  that can be bound to another place in its
375                  subpartition).  */
376               s = thr->ts.place_partition_len / nthreads;
377               rest = thr->ts.place_partition_len % nthreads;
378               rest = (s + 1) * rest + thr->ts.place_partition_off;
379               if (p < rest)
380                 {
381                   p -= (p - thr->ts.place_partition_off) % (s + 1);
382                   thr->ts.place_partition_len = s + 1;
383                 }
384               else
385                 {
386                   p -= (p - rest) % s;
387                   thr->ts.place_partition_len = s;
388                 }
389               thr->ts.place_partition_off = p;
390             }
391           else
392             {
393               /* T > P.  Each subpartition will have just a single
394                  place and we'll place between s and s+1
395                  threads into each subpartition.  */
396               s = nthreads / thr->ts.place_partition_len;
397               rest = nthreads % thr->ts.place_partition_len;
398               thr->ts.place_partition_off = p;
399               thr->ts.place_partition_len = 1;
400               k = 1;
401             }
402           break;
403         }
404     }
405   else
406     bind = omp_proc_bind_false;
407
408   /* We only allow the reuse of idle threads for non-nested PARALLEL
409      regions.  This appears to be implied by the semantics of
410      threadprivate variables, but perhaps that's reading too much into
411      things.  Certainly it does prevent any locking problems, since
412      only the initial program thread will modify gomp_threads.  */
413   if (!nested)
414     {
415       old_threads_used = pool->threads_used;
416
417       if (nthreads <= old_threads_used)
418         n = nthreads;
419       else if (old_threads_used == 0)
420         {
421           n = 0;
422           gomp_barrier_init (&pool->threads_dock, nthreads);
423         }
424       else
425         {
426           n = old_threads_used;
427
428           /* Increase the barrier threshold to make sure all new
429              threads arrive before the team is released.  */
430           gomp_barrier_reinit (&pool->threads_dock, nthreads);
431         }
432
433       /* Not true yet, but soon will be.  We're going to release all
434          threads from the dock, and those that aren't part of the
435          team will exit.  */
436       pool->threads_used = nthreads;
437
438       /* If necessary, expand the size of the gomp_threads array.  It is
439          expected that changes in the number of threads are rare, thus we
440          make no effort to expand gomp_threads_size geometrically.  */
441       if (nthreads >= pool->threads_size)
442         {
443           pool->threads_size = nthreads + 1;
444           pool->threads
445             = gomp_realloc (pool->threads,
446                             pool->threads_size
447                             * sizeof (struct gomp_thread_data *));
448         }
449
450       /* Release existing idle threads.  */
451       for (; i < n; ++i)
452         {
453           unsigned int place_partition_off = thr->ts.place_partition_off;
454           unsigned int place_partition_len = thr->ts.place_partition_len;
455           unsigned int place = 0;
456           if (__builtin_expect (gomp_places_list != NULL, 0))
457             {
458               switch (bind)
459                 {
460                 case omp_proc_bind_true:
461                 case omp_proc_bind_close:
462                   if (k == s)
463                     {
464                       ++p;
465                       if (p == (team->prev_ts.place_partition_off
466                                 + team->prev_ts.place_partition_len))
467                         p = team->prev_ts.place_partition_off;
468                       k = 1;
469                       if (i == nthreads - rest)
470                         s = 1;
471                     }
472                   else
473                     ++k;
474                   break;
475                 case omp_proc_bind_master:
476                   break;
477                 case omp_proc_bind_spread:
478                   if (k == 0)
479                     {
480                       /* T <= P.  */
481                       if (p < rest)
482                         p += s + 1;
483                       else
484                         p += s;
485                       if (p == (team->prev_ts.place_partition_off
486                                 + team->prev_ts.place_partition_len))
487                         p = team->prev_ts.place_partition_off;
488                       place_partition_off = p;
489                       if (p < rest)
490                         place_partition_len = s + 1;
491                       else
492                         place_partition_len = s;
493                     }
494                   else
495                     {
496                       /* T > P.  */
497                       if (k == s)
498                         {
499                           ++p;
500                           if (p == (team->prev_ts.place_partition_off
501                                     + team->prev_ts.place_partition_len))
502                             p = team->prev_ts.place_partition_off;
503                           k = 1;
504                           if (i == nthreads - rest)
505                             s = 1;
506                         }
507                       else
508                         ++k;
509                       place_partition_off = p;
510                       place_partition_len = 1;
511                     }
512                   break;
513                 }
514               if (affinity_thr != NULL
515                   || (bind != omp_proc_bind_true
516                       && pool->threads[i]->place != p + 1)
517                   || pool->threads[i]->place <= place_partition_off
518                   || pool->threads[i]->place > (place_partition_off
519                                                 + place_partition_len))
520                 {
521                   unsigned int l;
522                   if (affinity_thr == NULL)
523                     {
524                       unsigned int j;
525
526                       if (team->prev_ts.place_partition_len > 64)
527                         affinity_thr
528                           = gomp_malloc (team->prev_ts.place_partition_len
529                                          * sizeof (struct gomp_thread *));
530                       else
531                         affinity_thr
532                           = gomp_alloca (team->prev_ts.place_partition_len
533                                          * sizeof (struct gomp_thread *));
534                       memset (affinity_thr, '\0',
535                               team->prev_ts.place_partition_len
536                               * sizeof (struct gomp_thread *));
537                       for (j = i; j < old_threads_used; j++)
538                         {
539                           if (pool->threads[j]->place
540                               > team->prev_ts.place_partition_off
541                               && (pool->threads[j]->place
542                                   <= (team->prev_ts.place_partition_off
543                                       + team->prev_ts.place_partition_len)))
544                             {
545                               l = pool->threads[j]->place - 1
546                                   - team->prev_ts.place_partition_off;
547                               pool->threads[j]->data = affinity_thr[l];
548                               affinity_thr[l] = pool->threads[j];
549                             }
550                           pool->threads[j] = NULL;
551                         }
552                       if (nthreads > old_threads_used)
553                         memset (&pool->threads[old_threads_used],
554                                 '\0', ((nthreads - old_threads_used)
555                                        * sizeof (struct gomp_thread *)));
556                       n = nthreads;
557                       affinity_count = old_threads_used - i;
558                     }
559                   if (affinity_count == 0)
560                     break;
561                   l = p;
562                   if (affinity_thr[l - team->prev_ts.place_partition_off]
563                       == NULL)
564                     {
565                       if (bind != omp_proc_bind_true)
566                         continue;
567                       for (l = place_partition_off;
568                            l < place_partition_off + place_partition_len;
569                            l++)
570                         if (affinity_thr[l - team->prev_ts.place_partition_off]
571                             != NULL)
572                           break;
573                       if (l == place_partition_off + place_partition_len)
574                         continue;
575                     }
576                   nthr = affinity_thr[l - team->prev_ts.place_partition_off];
577                   affinity_thr[l - team->prev_ts.place_partition_off]
578                     = (struct gomp_thread *) nthr->data;
579                   affinity_count--;
580                   pool->threads[i] = nthr;
581                 }
582               else
583                 nthr = pool->threads[i];
584               place = p + 1;
585             }
586           else
587             nthr = pool->threads[i];
588           nthr->ts.team = team;
589           nthr->ts.work_share = &team->work_shares[0];
590           nthr->ts.last_work_share = NULL;
591           nthr->ts.team_id = i;
592           nthr->ts.level = team->prev_ts.level + 1;
593           nthr->ts.active_level = thr->ts.active_level;
594           nthr->ts.place_partition_off = place_partition_off;
595           nthr->ts.place_partition_len = place_partition_len;
596 #ifdef HAVE_SYNC_BUILTINS
597           nthr->ts.single_count = 0;
598 #endif
599           nthr->ts.static_trip = 0;
600           nthr->task = &team->implicit_task[i];
601           nthr->place = place;
602           gomp_init_task (nthr->task, task, icv);
603           team->implicit_task[i].icv.nthreads_var = nthreads_var;
604           team->implicit_task[i].icv.bind_var = bind_var;
605           nthr->fn = fn;
606           nthr->data = data;
607           team->ordered_release[i] = &nthr->release;
608         }
609
610       if (__builtin_expect (affinity_thr != NULL, 0))
611         {
612           /* If AFFINITY_THR is non-NULL just because we had to
613              permute some threads in the pool, but we've managed
614              to find exactly as many old threads as we'd find
615              without affinity, we don't need to handle this
616              specially anymore.  */
617           if (nthreads <= old_threads_used
618               ? (affinity_count == old_threads_used - nthreads)
619               : (i == old_threads_used))
620             {
621               if (team->prev_ts.place_partition_len > 64)
622                 free (affinity_thr);
623               affinity_thr = NULL;
624               affinity_count = 0;
625             }
626           else
627             {
628               i = 1;
629               /* We are going to compute the places/subpartitions
630                  again from the beginning.  So, we need to reinitialize
631                  vars modified by the switch (bind) above inside
632                  of the loop, to the state they had after the initial
633                  switch (bind).  */
634               switch (bind)
635                 {
636                 case omp_proc_bind_true:
637                 case omp_proc_bind_close:
638                   if (nthreads > thr->ts.place_partition_len)
639                     /* T > P.  S has been changed, so needs
640                        to be recomputed.  */
641                     s = nthreads / thr->ts.place_partition_len;
642                   k = 1;
643                   p = thr->place - 1;
644                   break;
645                 case omp_proc_bind_master:
646                   /* No vars have been changed.  */
647                   break;
648                 case omp_proc_bind_spread:
649                   p = thr->ts.place_partition_off;
650                   if (k != 0)
651                     {
652                       /* T > P.  */
653                       s = nthreads / team->prev_ts.place_partition_len;
654                       k = 1;
655                     }
656                   break;
657                 }
658
659               /* Increase the barrier threshold to make sure all new
660                  threads and all the threads we're going to let die
661                  arrive before the team is released.  */
662               if (affinity_count)
663                 gomp_barrier_reinit (&pool->threads_dock,
664                                      nthreads + affinity_count);
665             }
666         }
667
668       if (i == nthreads)
669         goto do_release;
670
671     }
672
673   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
674     {
675       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
676
677       if (old_threads_used == 0)
678         --diff;
679
680 #ifdef HAVE_SYNC_BUILTINS
681       __sync_fetch_and_add (&gomp_managed_threads, diff);
682 #else
683       gomp_mutex_lock (&gomp_managed_threads_lock);
684       gomp_managed_threads += diff;
685       gomp_mutex_unlock (&gomp_managed_threads_lock);
686 #endif
687     }
688
689   attr = &gomp_thread_attr;
690   if (__builtin_expect (gomp_places_list != NULL, 0))
691     {
692       size_t stacksize;
693       pthread_attr_init (&thread_attr);
694       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
695       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
696         pthread_attr_setstacksize (&thread_attr, stacksize);
697       attr = &thread_attr;
698     }
699
700   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
701                             * (nthreads-i));
702
703   /* Launch new threads.  */
704   for (; i < nthreads; ++i)
705     {
706       pthread_t pt;
707       int err;
708
709       start_data->ts.place_partition_off = thr->ts.place_partition_off;
710       start_data->ts.place_partition_len = thr->ts.place_partition_len;
711       start_data->place = 0;
712       if (__builtin_expect (gomp_places_list != NULL, 0))
713         {
714           switch (bind)
715             {
716             case omp_proc_bind_true:
717             case omp_proc_bind_close:
718               if (k == s)
719                 {
720                   ++p;
721                   if (p == (team->prev_ts.place_partition_off
722                             + team->prev_ts.place_partition_len))
723                     p = team->prev_ts.place_partition_off;
724                   k = 1;
725                   if (i == nthreads - rest)
726                     s = 1;
727                 }
728               else
729                 ++k;
730               break;
731             case omp_proc_bind_master:
732               break;
733             case omp_proc_bind_spread:
734               if (k == 0)
735                 {
736                   /* T <= P.  */
737                   if (p < rest)
738                     p += s + 1;
739                   else
740                     p += s;
741                   if (p == (team->prev_ts.place_partition_off
742                             + team->prev_ts.place_partition_len))
743                     p = team->prev_ts.place_partition_off;
744                   start_data->ts.place_partition_off = p;
745                   if (p < rest)
746                     start_data->ts.place_partition_len = s + 1;
747                   else
748                     start_data->ts.place_partition_len = s;
749                 }
750               else
751                 {
752                   /* T > P.  */
753                   if (k == s)
754                     {
755                       ++p;
756                       if (p == (team->prev_ts.place_partition_off
757                                 + team->prev_ts.place_partition_len))
758                         p = team->prev_ts.place_partition_off;
759                       k = 1;
760                       if (i == nthreads - rest)
761                         s = 1;
762                     }
763                   else
764                     ++k;
765                   start_data->ts.place_partition_off = p;
766                   start_data->ts.place_partition_len = 1;
767                 }
768               break;
769             }
770           start_data->place = p + 1;
771           if (affinity_thr != NULL && pool->threads[i] != NULL)
772             continue;
773           gomp_init_thread_affinity (attr, p);
774         }
775
776       start_data->fn = fn;
777       start_data->fn_data = data;
778       start_data->ts.team = team;
779       start_data->ts.work_share = &team->work_shares[0];
780       start_data->ts.last_work_share = NULL;
781       start_data->ts.team_id = i;
782       start_data->ts.level = team->prev_ts.level + 1;
783       start_data->ts.active_level = thr->ts.active_level;
784 #ifdef HAVE_SYNC_BUILTINS
785       start_data->ts.single_count = 0;
786 #endif
787       start_data->ts.static_trip = 0;
788       start_data->task = &team->implicit_task[i];
789       gomp_init_task (start_data->task, task, icv);
790       team->implicit_task[i].icv.nthreads_var = nthreads_var;
791       team->implicit_task[i].icv.bind_var = bind_var;
792       start_data->thread_pool = pool;
793       start_data->nested = nested;
794
795       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
796       if (err != 0)
797         gomp_fatal ("Thread creation failed: %s", strerror (err));
798     }
799
800   if (__builtin_expect (gomp_places_list != NULL, 0))
801     pthread_attr_destroy (&thread_attr);
802
803  do_release:
804   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
805
806   /* Decrease the barrier threshold to match the number of threads
807      that should arrive back at the end of this team.  The extra
808      threads should be exiting.  Note that we arrange for this test
809      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
810      the barrier as well as gomp_managed_threads was temporarily
811      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
812      AFFINITY_COUNT if non-zero will be always at least
813      OLD_THREADS_COUNT - NTHREADS.  */
814   if (__builtin_expect (nthreads < old_threads_used, 0)
815       || __builtin_expect (affinity_count, 0))
816     {
817       long diff = (long) nthreads - (long) old_threads_used;
818
819       if (affinity_count)
820         diff = -affinity_count;
821
822       gomp_barrier_reinit (&pool->threads_dock, nthreads);
823
824 #ifdef HAVE_SYNC_BUILTINS
825       __sync_fetch_and_add (&gomp_managed_threads, diff);
826 #else
827       gomp_mutex_lock (&gomp_managed_threads_lock);
828       gomp_managed_threads += diff;
829       gomp_mutex_unlock (&gomp_managed_threads_lock);
830 #endif
831     }
832   if (__builtin_expect (affinity_thr != NULL, 0)
833       && team->prev_ts.place_partition_len > 64)
834     free (affinity_thr);
835 }
836
837
838 /* Terminate the current team.  This is only to be called by the master
839    thread.  We assume that we must wait for the other threads.  */
840
841 void
842 gomp_team_end (void)
843 {
844   struct gomp_thread *thr = gomp_thread ();
845   struct gomp_team *team = thr->ts.team;
846
847   /* This barrier handles all pending explicit threads.
848      As #pragma omp cancel parallel might get awaited count in
849      team->barrier in a inconsistent state, we need to use a different
850      counter here.  */
851   gomp_team_barrier_wait_final (&team->barrier);
852   if (__builtin_expect (team->team_cancelled, 0))
853     {
854       struct gomp_work_share *ws = team->work_shares_to_free;
855       do
856         {
857           struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
858           if (next_ws == NULL)
859             gomp_ptrlock_set (&ws->next_ws, ws);
860           gomp_fini_work_share (ws);
861           ws = next_ws;
862         }
863       while (ws != NULL);
864     }
865   else
866     gomp_fini_work_share (thr->ts.work_share);
867
868   gomp_end_task ();
869   thr->ts = team->prev_ts;
870
871   if (__builtin_expect (thr->ts.team != NULL, 0))
872     {
873 #ifdef HAVE_SYNC_BUILTINS
874       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
875 #else
876       gomp_mutex_lock (&gomp_managed_threads_lock);
877       gomp_managed_threads -= team->nthreads - 1L;
878       gomp_mutex_unlock (&gomp_managed_threads_lock);
879 #endif
880       /* This barrier has gomp_barrier_wait_last counterparts
881          and ensures the team can be safely destroyed.  */
882       gomp_barrier_wait (&team->barrier);
883     }
884
885   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
886     {
887       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
888       do
889         {
890           struct gomp_work_share *next_ws = ws->next_alloc;
891           free (ws);
892           ws = next_ws;
893         }
894       while (ws != NULL);
895     }
896   gomp_sem_destroy (&team->master_release);
897 #ifndef HAVE_SYNC_BUILTINS
898   gomp_mutex_destroy (&team->work_share_list_free_lock);
899 #endif
900
901   if (__builtin_expect (thr->ts.team != NULL, 0)
902       || __builtin_expect (team->nthreads == 1, 0))
903     free_team (team);
904   else
905     {
906       struct gomp_thread_pool *pool = thr->thread_pool;
907       if (pool->last_team)
908         free_team (pool->last_team);
909       pool->last_team = team;
910     }
911 }
912
913
914 /* Constructors for this file.  */
915
916 static void __attribute__((constructor))
917 initialize_team (void)
918 {
919 #ifndef HAVE_TLS
920   static struct gomp_thread initial_thread_tls_data;
921
922   pthread_key_create (&gomp_tls_key, NULL);
923   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
924 #endif
925
926   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
927     gomp_fatal ("could not create thread pool destructor.");
928 }
929
930 static void __attribute__((destructor))
931 team_destructor (void)
932 {
933   /* Without this dlclose on libgomp could lead to subsequent
934      crashes.  */
935   pthread_key_delete (gomp_thread_destructor);
936 }
937
938 struct gomp_task_icv *
939 gomp_new_icv (void)
940 {
941   struct gomp_thread *thr = gomp_thread ();
942   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
943   gomp_init_task (task, NULL, &gomp_global_icv);
944   thr->task = task;
945   pthread_setspecific (gomp_thread_destructor, thr);
946   return &task->icv;
947 }