|  | /* Copyright (C) 2005-2021 Free Software Foundation, Inc. | 
|  | Contributed by Richard Henderson <rth@redhat.com>. | 
|  |  | 
|  | This file is part of the GNU Offloading and Multi Processing Library | 
|  | (libgomp). | 
|  |  | 
|  | Libgomp is free software; you can redistribute it and/or modify it | 
|  | under the terms of the GNU General Public License as published by | 
|  | the Free Software Foundation; either version 3, or (at your option) | 
|  | any later version. | 
|  |  | 
|  | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | FOR A PARTICULAR PURPOSE.  See the GNU General Public License for | 
|  | more details. | 
|  |  | 
|  | Under Section 7 of GPL version 3, you are granted additional | 
|  | permissions described in the GCC Runtime Library Exception, version | 
|  | 3.1, as published by the Free Software Foundation. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License and | 
|  | a copy of the GCC Runtime Library Exception along with this program; | 
|  | see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
|  | <http://www.gnu.org/licenses/>.  */ | 
|  |  | 
|  | /* This file handles the maintenance of threads in response to team | 
|  | creation and termination.  */ | 
|  |  | 
|  | #include "libgomp.h" | 
|  | #include "pool.h" | 
|  | #include <stdlib.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #ifdef LIBGOMP_USE_PTHREADS | 
|  | pthread_attr_t gomp_thread_attr; | 
|  |  | 
|  | /* This key is for the thread destructor.  */ | 
|  | pthread_key_t gomp_thread_destructor; | 
|  |  | 
|  |  | 
|  | /* This is the libgomp per-thread data structure.  */ | 
|  | #if defined HAVE_TLS || defined USE_EMUTLS | 
|  | __thread struct gomp_thread gomp_tls_data; | 
|  | #else | 
|  | pthread_key_t gomp_tls_key; | 
|  | #endif | 
|  |  | 
|  |  | 
|  | /* This structure is used to communicate across pthread_create.  */ | 
|  |  | 
|  | struct gomp_thread_start_data | 
|  | { | 
|  | void (*fn) (void *); | 
|  | void *fn_data; | 
|  | struct gomp_team_state ts; | 
|  | struct gomp_task *task; | 
|  | struct gomp_thread_pool *thread_pool; | 
|  | unsigned int place; | 
|  | bool nested; | 
|  | pthread_t handle; | 
|  | }; | 
|  |  | 
|  |  | 
|  | /* This function is a pthread_create entry point.  This contains the idle | 
|  | loop in which a thread waits to be called up to become part of a team.  */ | 
|  |  | 
|  | static void * | 
|  | gomp_thread_start (void *xdata) | 
|  | { | 
|  | struct gomp_thread_start_data *data = xdata; | 
|  | struct gomp_thread *thr; | 
|  | struct gomp_thread_pool *pool; | 
|  | void (*local_fn) (void *); | 
|  | void *local_data; | 
|  |  | 
|  | #if defined HAVE_TLS || defined USE_EMUTLS | 
|  | thr = &gomp_tls_data; | 
|  | #else | 
|  | struct gomp_thread local_thr; | 
|  | thr = &local_thr; | 
|  | #endif | 
|  | gomp_sem_init (&thr->release, 0); | 
|  |  | 
|  | /* Extract what we need from data.  */ | 
|  | local_fn = data->fn; | 
|  | local_data = data->fn_data; | 
|  | thr->thread_pool = data->thread_pool; | 
|  | thr->ts = data->ts; | 
|  | thr->task = data->task; | 
|  | thr->place = data->place; | 
|  | #ifdef GOMP_NEEDS_THREAD_HANDLE | 
|  | thr->handle = data->handle; | 
|  | #endif | 
|  | #if !(defined HAVE_TLS || defined USE_EMUTLS) | 
|  | pthread_setspecific (gomp_tls_key, thr); | 
|  | #endif | 
|  |  | 
|  | thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; | 
|  |  | 
|  | /* Make thread pool local. */ | 
|  | pool = thr->thread_pool; | 
|  |  | 
|  | if (data->nested) | 
|  | { | 
|  | struct gomp_team *team = thr->ts.team; | 
|  | struct gomp_task *task = thr->task; | 
|  |  | 
|  | gomp_barrier_wait (&team->barrier); | 
|  |  | 
|  | local_fn (local_data); | 
|  | gomp_team_barrier_wait_final (&team->barrier); | 
|  | gomp_finish_task (task); | 
|  | gomp_barrier_wait_last (&team->barrier); | 
|  | } | 
|  | else | 
|  | { | 
|  | pool->threads[thr->ts.team_id] = thr; | 
|  |  | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  | do | 
|  | { | 
|  | struct gomp_team *team = thr->ts.team; | 
|  | struct gomp_task *task = thr->task; | 
|  |  | 
|  | local_fn (local_data); | 
|  | gomp_team_barrier_wait_final (&team->barrier); | 
|  | gomp_finish_task (task); | 
|  |  | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  |  | 
|  | local_fn = thr->fn; | 
|  | local_data = thr->data; | 
|  | thr->fn = NULL; | 
|  | } | 
|  | while (local_fn); | 
|  | } | 
|  |  | 
|  | gomp_sem_destroy (&thr->release); | 
|  | pthread_detach (pthread_self ()); | 
|  | thr->thread_pool = NULL; | 
|  | thr->task = NULL; | 
|  | return NULL; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | static inline struct gomp_team * | 
|  | get_last_team (unsigned nthreads) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | if (thr->ts.team == NULL) | 
|  | { | 
|  | struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); | 
|  | struct gomp_team *last_team = pool->last_team; | 
|  | if (last_team != NULL && last_team->nthreads == nthreads) | 
|  | { | 
|  | pool->last_team = NULL; | 
|  | return last_team; | 
|  | } | 
|  | } | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | /* Create a new team data structure.  */ | 
|  |  | 
|  | struct gomp_team * | 
|  | gomp_new_team (unsigned nthreads) | 
|  | { | 
|  | struct gomp_team *team; | 
|  | int i; | 
|  |  | 
|  | team = get_last_team (nthreads); | 
|  | if (team == NULL) | 
|  | { | 
|  | size_t extra = sizeof (team->ordered_release[0]) | 
|  | + sizeof (team->implicit_task[0]); | 
|  | team = team_malloc (sizeof (*team) + nthreads * extra); | 
|  |  | 
|  | #ifndef HAVE_SYNC_BUILTINS | 
|  | gomp_mutex_init (&team->work_share_list_free_lock); | 
|  | #endif | 
|  | gomp_barrier_init (&team->barrier, nthreads); | 
|  | gomp_mutex_init (&team->task_lock); | 
|  |  | 
|  | team->nthreads = nthreads; | 
|  | } | 
|  |  | 
|  | team->work_share_chunk = 8; | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | team->single_count = 0; | 
|  | #endif | 
|  | team->work_shares_to_free = &team->work_shares[0]; | 
|  | gomp_init_work_share (&team->work_shares[0], 0, nthreads); | 
|  | team->work_shares[0].next_alloc = NULL; | 
|  | team->work_share_list_free = NULL; | 
|  | team->work_share_list_alloc = &team->work_shares[1]; | 
|  | for (i = 1; i < 7; i++) | 
|  | team->work_shares[i].next_free = &team->work_shares[i + 1]; | 
|  | team->work_shares[i].next_free = NULL; | 
|  |  | 
|  | gomp_sem_init (&team->master_release, 0); | 
|  | team->ordered_release = (void *) &team->implicit_task[nthreads]; | 
|  | team->ordered_release[0] = &team->master_release; | 
|  |  | 
|  | priority_queue_init (&team->task_queue); | 
|  | team->task_count = 0; | 
|  | team->task_queued_count = 0; | 
|  | team->task_running_count = 0; | 
|  | team->work_share_cancelled = 0; | 
|  | team->team_cancelled = 0; | 
|  |  | 
|  | team->task_detach_count = 0; | 
|  |  | 
|  | return team; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* Free a team data structure.  */ | 
|  |  | 
|  | static void | 
|  | free_team (struct gomp_team *team) | 
|  | { | 
|  | #ifndef HAVE_SYNC_BUILTINS | 
|  | gomp_mutex_destroy (&team->work_share_list_free_lock); | 
|  | #endif | 
|  | gomp_barrier_destroy (&team->barrier); | 
|  | gomp_mutex_destroy (&team->task_lock); | 
|  | priority_queue_free (&team->task_queue); | 
|  | team_free (team); | 
|  | } | 
|  |  | 
|  | static void | 
|  | gomp_free_pool_helper (void *thread_pool) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | struct gomp_thread_pool *pool | 
|  | = (struct gomp_thread_pool *) thread_pool; | 
|  | gomp_simple_barrier_wait_last (&pool->threads_dock); | 
|  | gomp_sem_destroy (&thr->release); | 
|  | thr->thread_pool = NULL; | 
|  | thr->task = NULL; | 
|  | #ifdef LIBGOMP_USE_PTHREADS | 
|  | pthread_detach (pthread_self ()); | 
|  | pthread_exit (NULL); | 
|  | #elif defined(__nvptx__) | 
|  | asm ("exit;"); | 
|  | #elif defined(__AMDGCN__) | 
|  | asm ("s_dcache_wb\n\t" | 
|  | "s_endpgm"); | 
|  | #else | 
|  | #error gomp_free_pool_helper must terminate the thread | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /* Free a thread pool and release its threads. */ | 
|  |  | 
|  | void | 
|  | gomp_free_thread (void *arg __attribute__((unused))) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | struct gomp_thread_pool *pool = thr->thread_pool; | 
|  | if (pool) | 
|  | { | 
|  | if (pool->threads_used > 0) | 
|  | { | 
|  | int i; | 
|  | for (i = 1; i < pool->threads_used; i++) | 
|  | { | 
|  | struct gomp_thread *nthr = pool->threads[i]; | 
|  | nthr->fn = gomp_free_pool_helper; | 
|  | nthr->data = pool; | 
|  | } | 
|  | /* This barrier undocks threads docked on pool->threads_dock.  */ | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  | /* And this waits till all threads have called gomp_barrier_wait_last | 
|  | in gomp_free_pool_helper.  */ | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  | /* Now it is safe to destroy the barrier and free the pool.  */ | 
|  | gomp_simple_barrier_destroy (&pool->threads_dock); | 
|  |  | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | __sync_fetch_and_add (&gomp_managed_threads, | 
|  | 1L - pool->threads_used); | 
|  | #else | 
|  | gomp_mutex_lock (&gomp_managed_threads_lock); | 
|  | gomp_managed_threads -= pool->threads_used - 1L; | 
|  | gomp_mutex_unlock (&gomp_managed_threads_lock); | 
|  | #endif | 
|  | } | 
|  | if (pool->last_team) | 
|  | free_team (pool->last_team); | 
|  | #ifndef __nvptx__ | 
|  | team_free (pool->threads); | 
|  | team_free (pool); | 
|  | #endif | 
|  | thr->thread_pool = NULL; | 
|  | } | 
|  | if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) | 
|  | gomp_team_end (); | 
|  | if (thr->task != NULL) | 
|  | { | 
|  | struct gomp_task *task = thr->task; | 
|  | gomp_end_task (); | 
|  | free (task); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Launch a team.  */ | 
|  |  | 
|  | #ifdef LIBGOMP_USE_PTHREADS | 
|  | void | 
|  | gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, | 
|  | unsigned flags, struct gomp_team *team, | 
|  | struct gomp_taskgroup *taskgroup) | 
|  | { | 
|  | struct gomp_thread_start_data *start_data; | 
|  | struct gomp_thread *thr, *nthr; | 
|  | struct gomp_task *task; | 
|  | struct gomp_task_icv *icv; | 
|  | bool nested; | 
|  | struct gomp_thread_pool *pool; | 
|  | unsigned i, n, old_threads_used = 0; | 
|  | pthread_attr_t thread_attr, *attr; | 
|  | unsigned long nthreads_var; | 
|  | char bind, bind_var; | 
|  | unsigned int s = 0, rest = 0, p = 0, k = 0; | 
|  | unsigned int affinity_count = 0; | 
|  | struct gomp_thread **affinity_thr = NULL; | 
|  | bool force_display = false; | 
|  |  | 
|  | thr = gomp_thread (); | 
|  | nested = thr->ts.level; | 
|  | pool = thr->thread_pool; | 
|  | task = thr->task; | 
|  | icv = task ? &task->icv : &gomp_global_icv; | 
|  | if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) | 
|  | { | 
|  | gomp_init_affinity (); | 
|  | if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1) | 
|  | gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, | 
|  | thr->place); | 
|  | } | 
|  |  | 
|  | /* Always save the previous state, even if this isn't a nested team. | 
|  | In particular, we should save any work share state from an outer | 
|  | orphaned work share construct.  */ | 
|  | team->prev_ts = thr->ts; | 
|  |  | 
|  | thr->ts.team = team; | 
|  | thr->ts.team_id = 0; | 
|  | ++thr->ts.level; | 
|  | if (nthreads > 1) | 
|  | ++thr->ts.active_level; | 
|  | thr->ts.work_share = &team->work_shares[0]; | 
|  | thr->ts.last_work_share = NULL; | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | thr->ts.single_count = 0; | 
|  | #endif | 
|  | thr->ts.static_trip = 0; | 
|  | thr->task = &team->implicit_task[0]; | 
|  | #ifdef GOMP_NEEDS_THREAD_HANDLE | 
|  | thr->handle = pthread_self (); | 
|  | #endif | 
|  | nthreads_var = icv->nthreads_var; | 
|  | if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) | 
|  | && thr->ts.level < gomp_nthreads_var_list_len) | 
|  | nthreads_var = gomp_nthreads_var_list[thr->ts.level]; | 
|  | bind_var = icv->bind_var; | 
|  | if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) | 
|  | bind_var = flags & 7; | 
|  | bind = bind_var; | 
|  | if (__builtin_expect (gomp_bind_var_list != NULL, 0) | 
|  | && thr->ts.level < gomp_bind_var_list_len) | 
|  | bind_var = gomp_bind_var_list[thr->ts.level]; | 
|  | gomp_init_task (thr->task, task, icv); | 
|  | thr->task->taskgroup = taskgroup; | 
|  | team->implicit_task[0].icv.nthreads_var = nthreads_var; | 
|  | team->implicit_task[0].icv.bind_var = bind_var; | 
|  |  | 
|  | if (nthreads == 1) | 
|  | return; | 
|  |  | 
|  | i = 1; | 
|  |  | 
|  | if (__builtin_expect (gomp_places_list != NULL, 0)) | 
|  | { | 
|  | /* Depending on chosen proc_bind model, set subpartition | 
|  | for the master thread and initialize helper variables | 
|  | P and optionally S, K and/or REST used by later place | 
|  | computation for each additional thread.  */ | 
|  | p = thr->place - 1; | 
|  | switch (bind) | 
|  | { | 
|  | case omp_proc_bind_true: | 
|  | case omp_proc_bind_close: | 
|  | if (nthreads > thr->ts.place_partition_len) | 
|  | { | 
|  | /* T > P.  S threads will be placed in each place, | 
|  | and the final REM threads placed one by one | 
|  | into the already occupied places.  */ | 
|  | s = nthreads / thr->ts.place_partition_len; | 
|  | rest = nthreads % thr->ts.place_partition_len; | 
|  | } | 
|  | else | 
|  | s = 1; | 
|  | k = 1; | 
|  | break; | 
|  | case omp_proc_bind_master: | 
|  | /* Each thread will be bound to master's place.  */ | 
|  | break; | 
|  | case omp_proc_bind_spread: | 
|  | if (nthreads <= thr->ts.place_partition_len) | 
|  | { | 
|  | /* T <= P.  Each subpartition will have in between s | 
|  | and s+1 places (subpartitions starting at or | 
|  | after rest will have s places, earlier s+1 places), | 
|  | each thread will be bound to the first place in | 
|  | its subpartition (except for the master thread | 
|  | that can be bound to another place in its | 
|  | subpartition).  */ | 
|  | s = thr->ts.place_partition_len / nthreads; | 
|  | rest = thr->ts.place_partition_len % nthreads; | 
|  | rest = (s + 1) * rest + thr->ts.place_partition_off; | 
|  | if (p < rest) | 
|  | { | 
|  | p -= (p - thr->ts.place_partition_off) % (s + 1); | 
|  | thr->ts.place_partition_len = s + 1; | 
|  | } | 
|  | else | 
|  | { | 
|  | p -= (p - rest) % s; | 
|  | thr->ts.place_partition_len = s; | 
|  | } | 
|  | thr->ts.place_partition_off = p; | 
|  | } | 
|  | else | 
|  | { | 
|  | /* T > P.  Each subpartition will have just a single | 
|  | place and we'll place between s and s+1 | 
|  | threads into each subpartition.  */ | 
|  | s = nthreads / thr->ts.place_partition_len; | 
|  | rest = nthreads % thr->ts.place_partition_len; | 
|  | thr->ts.place_partition_off = p; | 
|  | thr->ts.place_partition_len = 1; | 
|  | k = 1; | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  | else | 
|  | bind = omp_proc_bind_false; | 
|  |  | 
|  | /* We only allow the reuse of idle threads for non-nested PARALLEL | 
|  | regions.  This appears to be implied by the semantics of | 
|  | threadprivate variables, but perhaps that's reading too much into | 
|  | things.  Certainly it does prevent any locking problems, since | 
|  | only the initial program thread will modify gomp_threads.  */ | 
|  | if (!nested) | 
|  | { | 
|  | old_threads_used = pool->threads_used; | 
|  |  | 
|  | if (nthreads <= old_threads_used) | 
|  | n = nthreads; | 
|  | else if (old_threads_used == 0) | 
|  | { | 
|  | n = 0; | 
|  | gomp_simple_barrier_init (&pool->threads_dock, nthreads); | 
|  | } | 
|  | else | 
|  | { | 
|  | n = old_threads_used; | 
|  |  | 
|  | /* Increase the barrier threshold to make sure all new | 
|  | threads arrive before the team is released.  */ | 
|  | gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); | 
|  | } | 
|  |  | 
|  | /* Not true yet, but soon will be.  We're going to release all | 
|  | threads from the dock, and those that aren't part of the | 
|  | team will exit.  */ | 
|  | pool->threads_used = nthreads; | 
|  |  | 
|  | /* If necessary, expand the size of the gomp_threads array.  It is | 
|  | expected that changes in the number of threads are rare, thus we | 
|  | make no effort to expand gomp_threads_size geometrically.  */ | 
|  | if (nthreads >= pool->threads_size) | 
|  | { | 
|  | pool->threads_size = nthreads + 1; | 
|  | pool->threads | 
|  | = gomp_realloc (pool->threads, | 
|  | pool->threads_size | 
|  | * sizeof (struct gomp_thread *)); | 
|  | /* Add current (master) thread to threads[].  */ | 
|  | pool->threads[0] = thr; | 
|  | } | 
|  |  | 
|  | /* Release existing idle threads.  */ | 
|  | for (; i < n; ++i) | 
|  | { | 
|  | unsigned int place_partition_off = thr->ts.place_partition_off; | 
|  | unsigned int place_partition_len = thr->ts.place_partition_len; | 
|  | unsigned int place = 0; | 
|  | if (__builtin_expect (gomp_places_list != NULL, 0)) | 
|  | { | 
|  | switch (bind) | 
|  | { | 
|  | case omp_proc_bind_true: | 
|  | case omp_proc_bind_close: | 
|  | if (k == s) | 
|  | { | 
|  | ++p; | 
|  | if (p == (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len)) | 
|  | p = team->prev_ts.place_partition_off; | 
|  | k = 1; | 
|  | if (i == nthreads - rest) | 
|  | s = 1; | 
|  | } | 
|  | else | 
|  | ++k; | 
|  | break; | 
|  | case omp_proc_bind_master: | 
|  | break; | 
|  | case omp_proc_bind_spread: | 
|  | if (k == 0) | 
|  | { | 
|  | /* T <= P.  */ | 
|  | if (p < rest) | 
|  | p += s + 1; | 
|  | else | 
|  | p += s; | 
|  | if (p == (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len)) | 
|  | p = team->prev_ts.place_partition_off; | 
|  | place_partition_off = p; | 
|  | if (p < rest) | 
|  | place_partition_len = s + 1; | 
|  | else | 
|  | place_partition_len = s; | 
|  | } | 
|  | else | 
|  | { | 
|  | /* T > P.  */ | 
|  | if (k == s) | 
|  | { | 
|  | ++p; | 
|  | if (p == (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len)) | 
|  | p = team->prev_ts.place_partition_off; | 
|  | k = 1; | 
|  | if (i == nthreads - rest) | 
|  | s = 1; | 
|  | } | 
|  | else | 
|  | ++k; | 
|  | place_partition_off = p; | 
|  | place_partition_len = 1; | 
|  | } | 
|  | break; | 
|  | } | 
|  | if (affinity_thr != NULL | 
|  | || (bind != omp_proc_bind_true | 
|  | && pool->threads[i]->place != p + 1) | 
|  | || pool->threads[i]->place <= place_partition_off | 
|  | || pool->threads[i]->place > (place_partition_off | 
|  | + place_partition_len)) | 
|  | { | 
|  | unsigned int l; | 
|  | force_display = true; | 
|  | if (affinity_thr == NULL) | 
|  | { | 
|  | unsigned int j; | 
|  |  | 
|  | if (team->prev_ts.place_partition_len > 64) | 
|  | affinity_thr | 
|  | = gomp_malloc (team->prev_ts.place_partition_len | 
|  | * sizeof (struct gomp_thread *)); | 
|  | else | 
|  | affinity_thr | 
|  | = gomp_alloca (team->prev_ts.place_partition_len | 
|  | * sizeof (struct gomp_thread *)); | 
|  | memset (affinity_thr, '\0', | 
|  | team->prev_ts.place_partition_len | 
|  | * sizeof (struct gomp_thread *)); | 
|  | for (j = i; j < old_threads_used; j++) | 
|  | { | 
|  | if (pool->threads[j]->place | 
|  | > team->prev_ts.place_partition_off | 
|  | && (pool->threads[j]->place | 
|  | <= (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len))) | 
|  | { | 
|  | l = pool->threads[j]->place - 1 | 
|  | - team->prev_ts.place_partition_off; | 
|  | pool->threads[j]->data = affinity_thr[l]; | 
|  | affinity_thr[l] = pool->threads[j]; | 
|  | } | 
|  | pool->threads[j] = NULL; | 
|  | } | 
|  | if (nthreads > old_threads_used) | 
|  | memset (&pool->threads[old_threads_used], | 
|  | '\0', ((nthreads - old_threads_used) | 
|  | * sizeof (struct gomp_thread *))); | 
|  | n = nthreads; | 
|  | affinity_count = old_threads_used - i; | 
|  | } | 
|  | if (affinity_count == 0) | 
|  | break; | 
|  | l = p; | 
|  | if (affinity_thr[l - team->prev_ts.place_partition_off] | 
|  | == NULL) | 
|  | { | 
|  | if (bind != omp_proc_bind_true) | 
|  | continue; | 
|  | for (l = place_partition_off; | 
|  | l < place_partition_off + place_partition_len; | 
|  | l++) | 
|  | if (affinity_thr[l - team->prev_ts.place_partition_off] | 
|  | != NULL) | 
|  | break; | 
|  | if (l == place_partition_off + place_partition_len) | 
|  | continue; | 
|  | } | 
|  | nthr = affinity_thr[l - team->prev_ts.place_partition_off]; | 
|  | affinity_thr[l - team->prev_ts.place_partition_off] | 
|  | = (struct gomp_thread *) nthr->data; | 
|  | affinity_count--; | 
|  | pool->threads[i] = nthr; | 
|  | } | 
|  | else | 
|  | nthr = pool->threads[i]; | 
|  | place = p + 1; | 
|  | } | 
|  | else | 
|  | nthr = pool->threads[i]; | 
|  | nthr->ts.team = team; | 
|  | nthr->ts.work_share = &team->work_shares[0]; | 
|  | nthr->ts.last_work_share = NULL; | 
|  | nthr->ts.team_id = i; | 
|  | nthr->ts.level = team->prev_ts.level + 1; | 
|  | nthr->ts.active_level = thr->ts.active_level; | 
|  | nthr->ts.place_partition_off = place_partition_off; | 
|  | nthr->ts.place_partition_len = place_partition_len; | 
|  | nthr->ts.def_allocator = thr->ts.def_allocator; | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | nthr->ts.single_count = 0; | 
|  | #endif | 
|  | nthr->ts.static_trip = 0; | 
|  | nthr->task = &team->implicit_task[i]; | 
|  | nthr->place = place; | 
|  | gomp_init_task (nthr->task, task, icv); | 
|  | team->implicit_task[i].icv.nthreads_var = nthreads_var; | 
|  | team->implicit_task[i].icv.bind_var = bind_var; | 
|  | nthr->task->taskgroup = taskgroup; | 
|  | nthr->fn = fn; | 
|  | nthr->data = data; | 
|  | team->ordered_release[i] = &nthr->release; | 
|  | } | 
|  |  | 
|  | if (__builtin_expect (affinity_thr != NULL, 0)) | 
|  | { | 
|  | /* If AFFINITY_THR is non-NULL just because we had to | 
|  | permute some threads in the pool, but we've managed | 
|  | to find exactly as many old threads as we'd find | 
|  | without affinity, we don't need to handle this | 
|  | specially anymore.  */ | 
|  | if (nthreads <= old_threads_used | 
|  | ? (affinity_count == old_threads_used - nthreads) | 
|  | : (i == old_threads_used)) | 
|  | { | 
|  | if (team->prev_ts.place_partition_len > 64) | 
|  | free (affinity_thr); | 
|  | affinity_thr = NULL; | 
|  | affinity_count = 0; | 
|  | } | 
|  | else | 
|  | { | 
|  | i = 1; | 
|  | /* We are going to compute the places/subpartitions | 
|  | again from the beginning.  So, we need to reinitialize | 
|  | vars modified by the switch (bind) above inside | 
|  | of the loop, to the state they had after the initial | 
|  | switch (bind).  */ | 
|  | switch (bind) | 
|  | { | 
|  | case omp_proc_bind_true: | 
|  | case omp_proc_bind_close: | 
|  | if (nthreads > thr->ts.place_partition_len) | 
|  | /* T > P.  S has been changed, so needs | 
|  | to be recomputed.  */ | 
|  | s = nthreads / thr->ts.place_partition_len; | 
|  | k = 1; | 
|  | p = thr->place - 1; | 
|  | break; | 
|  | case omp_proc_bind_master: | 
|  | /* No vars have been changed.  */ | 
|  | break; | 
|  | case omp_proc_bind_spread: | 
|  | p = thr->ts.place_partition_off; | 
|  | if (k != 0) | 
|  | { | 
|  | /* T > P.  */ | 
|  | s = nthreads / team->prev_ts.place_partition_len; | 
|  | k = 1; | 
|  | } | 
|  | break; | 
|  | } | 
|  |  | 
|  | /* Increase the barrier threshold to make sure all new | 
|  | threads and all the threads we're going to let die | 
|  | arrive before the team is released.  */ | 
|  | if (affinity_count) | 
|  | gomp_simple_barrier_reinit (&pool->threads_dock, | 
|  | nthreads + affinity_count); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (i == nthreads) | 
|  | goto do_release; | 
|  |  | 
|  | } | 
|  |  | 
|  | if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) | 
|  | { | 
|  | long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; | 
|  |  | 
|  | if (old_threads_used == 0) | 
|  | --diff; | 
|  |  | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | __sync_fetch_and_add (&gomp_managed_threads, diff); | 
|  | #else | 
|  | gomp_mutex_lock (&gomp_managed_threads_lock); | 
|  | gomp_managed_threads += diff; | 
|  | gomp_mutex_unlock (&gomp_managed_threads_lock); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | attr = &gomp_thread_attr; | 
|  | if (__builtin_expect (gomp_places_list != NULL, 0)) | 
|  | { | 
|  | size_t stacksize; | 
|  | pthread_attr_init (&thread_attr); | 
|  | if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) | 
|  | pthread_attr_setstacksize (&thread_attr, stacksize); | 
|  | attr = &thread_attr; | 
|  | } | 
|  |  | 
|  | start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) | 
|  | * (nthreads - i)); | 
|  |  | 
|  | /* Launch new threads.  */ | 
|  | for (; i < nthreads; ++i) | 
|  | { | 
|  | int err; | 
|  |  | 
|  | start_data->ts.place_partition_off = thr->ts.place_partition_off; | 
|  | start_data->ts.place_partition_len = thr->ts.place_partition_len; | 
|  | start_data->place = 0; | 
|  | if (__builtin_expect (gomp_places_list != NULL, 0)) | 
|  | { | 
|  | switch (bind) | 
|  | { | 
|  | case omp_proc_bind_true: | 
|  | case omp_proc_bind_close: | 
|  | if (k == s) | 
|  | { | 
|  | ++p; | 
|  | if (p == (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len)) | 
|  | p = team->prev_ts.place_partition_off; | 
|  | k = 1; | 
|  | if (i == nthreads - rest) | 
|  | s = 1; | 
|  | } | 
|  | else | 
|  | ++k; | 
|  | break; | 
|  | case omp_proc_bind_master: | 
|  | break; | 
|  | case omp_proc_bind_spread: | 
|  | if (k == 0) | 
|  | { | 
|  | /* T <= P.  */ | 
|  | if (p < rest) | 
|  | p += s + 1; | 
|  | else | 
|  | p += s; | 
|  | if (p == (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len)) | 
|  | p = team->prev_ts.place_partition_off; | 
|  | start_data->ts.place_partition_off = p; | 
|  | if (p < rest) | 
|  | start_data->ts.place_partition_len = s + 1; | 
|  | else | 
|  | start_data->ts.place_partition_len = s; | 
|  | } | 
|  | else | 
|  | { | 
|  | /* T > P.  */ | 
|  | if (k == s) | 
|  | { | 
|  | ++p; | 
|  | if (p == (team->prev_ts.place_partition_off | 
|  | + team->prev_ts.place_partition_len)) | 
|  | p = team->prev_ts.place_partition_off; | 
|  | k = 1; | 
|  | if (i == nthreads - rest) | 
|  | s = 1; | 
|  | } | 
|  | else | 
|  | ++k; | 
|  | start_data->ts.place_partition_off = p; | 
|  | start_data->ts.place_partition_len = 1; | 
|  | } | 
|  | break; | 
|  | } | 
|  | start_data->place = p + 1; | 
|  | if (affinity_thr != NULL && pool->threads[i] != NULL) | 
|  | continue; | 
|  | gomp_init_thread_affinity (attr, p); | 
|  | } | 
|  |  | 
|  | start_data->fn = fn; | 
|  | start_data->fn_data = data; | 
|  | start_data->ts.team = team; | 
|  | start_data->ts.work_share = &team->work_shares[0]; | 
|  | start_data->ts.last_work_share = NULL; | 
|  | start_data->ts.team_id = i; | 
|  | start_data->ts.level = team->prev_ts.level + 1; | 
|  | start_data->ts.active_level = thr->ts.active_level; | 
|  | start_data->ts.def_allocator = thr->ts.def_allocator; | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | start_data->ts.single_count = 0; | 
|  | #endif | 
|  | start_data->ts.static_trip = 0; | 
|  | start_data->task = &team->implicit_task[i]; | 
|  | gomp_init_task (start_data->task, task, icv); | 
|  | team->implicit_task[i].icv.nthreads_var = nthreads_var; | 
|  | team->implicit_task[i].icv.bind_var = bind_var; | 
|  | start_data->task->taskgroup = taskgroup; | 
|  | start_data->thread_pool = pool; | 
|  | start_data->nested = nested; | 
|  |  | 
|  | attr = gomp_adjust_thread_attr (attr, &thread_attr); | 
|  | err = pthread_create (&start_data->handle, attr, gomp_thread_start, | 
|  | start_data); | 
|  | start_data++; | 
|  | if (err != 0) | 
|  | gomp_fatal ("Thread creation failed: %s", strerror (err)); | 
|  | } | 
|  |  | 
|  | if (__builtin_expect (attr == &thread_attr, 0)) | 
|  | pthread_attr_destroy (&thread_attr); | 
|  |  | 
|  | do_release: | 
|  | if (nested) | 
|  | gomp_barrier_wait (&team->barrier); | 
|  | else | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  |  | 
|  | /* Decrease the barrier threshold to match the number of threads | 
|  | that should arrive back at the end of this team.  The extra | 
|  | threads should be exiting.  Note that we arrange for this test | 
|  | to never be true for nested teams.  If AFFINITY_COUNT is non-zero, | 
|  | the barrier as well as gomp_managed_threads was temporarily | 
|  | set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT, | 
|  | AFFINITY_COUNT if non-zero will be always at least | 
|  | OLD_THREADS_COUNT - NTHREADS.  */ | 
|  | if (__builtin_expect (nthreads < old_threads_used, 0) | 
|  | || __builtin_expect (affinity_count, 0)) | 
|  | { | 
|  | long diff = (long) nthreads - (long) old_threads_used; | 
|  |  | 
|  | if (affinity_count) | 
|  | diff = -affinity_count; | 
|  |  | 
|  | gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); | 
|  |  | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | __sync_fetch_and_add (&gomp_managed_threads, diff); | 
|  | #else | 
|  | gomp_mutex_lock (&gomp_managed_threads_lock); | 
|  | gomp_managed_threads += diff; | 
|  | gomp_mutex_unlock (&gomp_managed_threads_lock); | 
|  | #endif | 
|  | } | 
|  | if (__builtin_expect (gomp_display_affinity_var, 0)) | 
|  | { | 
|  | if (nested | 
|  | || nthreads != old_threads_used | 
|  | || force_display) | 
|  | { | 
|  | gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, | 
|  | thr->place); | 
|  | if (nested) | 
|  | { | 
|  | start_data -= nthreads - 1; | 
|  | for (i = 1; i < nthreads; ++i) | 
|  | { | 
|  | gomp_display_affinity_thread ( | 
|  | #ifdef LIBGOMP_USE_PTHREADS | 
|  | start_data->handle, | 
|  | #else | 
|  | gomp_thread_self (), | 
|  | #endif | 
|  | &start_data->ts, | 
|  | start_data->place); | 
|  | start_data++; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | for (i = 1; i < nthreads; ++i) | 
|  | { | 
|  | gomp_thread_handle handle | 
|  | = gomp_thread_to_pthread_t (pool->threads[i]); | 
|  | gomp_display_affinity_thread (handle, &pool->threads[i]->ts, | 
|  | pool->threads[i]->place); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | if (__builtin_expect (affinity_thr != NULL, 0) | 
|  | && team->prev_ts.place_partition_len > 64) | 
|  | free (affinity_thr); | 
|  | } | 
|  | #endif | 
|  |  | 
|  |  | 
|  | /* Terminate the current team.  This is only to be called by the master | 
|  | thread.  We assume that we must wait for the other threads.  */ | 
|  |  | 
|  | void | 
|  | gomp_team_end (void) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | struct gomp_team *team = thr->ts.team; | 
|  |  | 
|  | /* This barrier handles all pending explicit threads. | 
|  | As #pragma omp cancel parallel might get awaited count in | 
|  | team->barrier in a inconsistent state, we need to use a different | 
|  | counter here.  */ | 
|  | gomp_team_barrier_wait_final (&team->barrier); | 
|  | if (__builtin_expect (team->team_cancelled, 0)) | 
|  | { | 
|  | struct gomp_work_share *ws = team->work_shares_to_free; | 
|  | do | 
|  | { | 
|  | struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); | 
|  | if (next_ws == NULL) | 
|  | gomp_ptrlock_set (&ws->next_ws, ws); | 
|  | gomp_fini_work_share (ws); | 
|  | ws = next_ws; | 
|  | } | 
|  | while (ws != NULL); | 
|  | } | 
|  | else | 
|  | gomp_fini_work_share (thr->ts.work_share); | 
|  |  | 
|  | gomp_end_task (); | 
|  | thr->ts = team->prev_ts; | 
|  |  | 
|  | if (__builtin_expect (thr->ts.level != 0, 0)) | 
|  | { | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); | 
|  | #else | 
|  | gomp_mutex_lock (&gomp_managed_threads_lock); | 
|  | gomp_managed_threads -= team->nthreads - 1L; | 
|  | gomp_mutex_unlock (&gomp_managed_threads_lock); | 
|  | #endif | 
|  | /* This barrier has gomp_barrier_wait_last counterparts | 
|  | and ensures the team can be safely destroyed.  */ | 
|  | gomp_barrier_wait (&team->barrier); | 
|  | } | 
|  |  | 
|  | if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) | 
|  | { | 
|  | struct gomp_work_share *ws = team->work_shares[0].next_alloc; | 
|  | do | 
|  | { | 
|  | struct gomp_work_share *next_ws = ws->next_alloc; | 
|  | free (ws); | 
|  | ws = next_ws; | 
|  | } | 
|  | while (ws != NULL); | 
|  | } | 
|  | gomp_sem_destroy (&team->master_release); | 
|  |  | 
|  | if (__builtin_expect (thr->ts.team != NULL, 0) | 
|  | || __builtin_expect (team->nthreads == 1, 0)) | 
|  | free_team (team); | 
|  | else | 
|  | { | 
|  | struct gomp_thread_pool *pool = thr->thread_pool; | 
|  | if (pool->last_team) | 
|  | free_team (pool->last_team); | 
|  | pool->last_team = team; | 
|  | gomp_release_thread_pool (pool); | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifdef LIBGOMP_USE_PTHREADS | 
|  |  | 
|  | /* Constructors for this file.  */ | 
|  |  | 
|  | static void __attribute__((constructor)) | 
|  | initialize_team (void) | 
|  | { | 
|  | #if !defined HAVE_TLS && !defined USE_EMUTLS | 
|  | static struct gomp_thread initial_thread_tls_data; | 
|  |  | 
|  | pthread_key_create (&gomp_tls_key, NULL); | 
|  | pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); | 
|  | #endif | 
|  |  | 
|  | if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) | 
|  | gomp_fatal ("could not create thread pool destructor."); | 
|  | } | 
|  |  | 
|  | static void __attribute__((destructor)) | 
|  | team_destructor (void) | 
|  | { | 
|  | /* Without this dlclose on libgomp could lead to subsequent | 
|  | crashes.  */ | 
|  | pthread_key_delete (gomp_thread_destructor); | 
|  | } | 
|  |  | 
|  | /* Similar to gomp_free_pool_helper, but don't detach itself, | 
|  | gomp_pause_host will pthread_join those threads.  */ | 
|  |  | 
|  | static void | 
|  | gomp_pause_pool_helper (void *thread_pool) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | struct gomp_thread_pool *pool | 
|  | = (struct gomp_thread_pool *) thread_pool; | 
|  | gomp_simple_barrier_wait_last (&pool->threads_dock); | 
|  | gomp_sem_destroy (&thr->release); | 
|  | thr->thread_pool = NULL; | 
|  | thr->task = NULL; | 
|  | pthread_exit (NULL); | 
|  | } | 
|  |  | 
|  | /* Free a thread pool and release its threads.  Return non-zero on | 
|  | failure.  */ | 
|  |  | 
|  | int | 
|  | gomp_pause_host (void) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | struct gomp_thread_pool *pool = thr->thread_pool; | 
|  | if (thr->ts.level) | 
|  | return -1; | 
|  | if (pool) | 
|  | { | 
|  | if (pool->threads_used > 0) | 
|  | { | 
|  | int i; | 
|  | pthread_t *thrs | 
|  | = gomp_alloca (sizeof (pthread_t) * pool->threads_used); | 
|  | for (i = 1; i < pool->threads_used; i++) | 
|  | { | 
|  | struct gomp_thread *nthr = pool->threads[i]; | 
|  | nthr->fn = gomp_pause_pool_helper; | 
|  | nthr->data = pool; | 
|  | thrs[i] = gomp_thread_to_pthread_t (nthr); | 
|  | } | 
|  | /* This barrier undocks threads docked on pool->threads_dock.  */ | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  | /* And this waits till all threads have called gomp_barrier_wait_last | 
|  | in gomp_pause_pool_helper.  */ | 
|  | gomp_simple_barrier_wait (&pool->threads_dock); | 
|  | /* Now it is safe to destroy the barrier and free the pool.  */ | 
|  | gomp_simple_barrier_destroy (&pool->threads_dock); | 
|  |  | 
|  | #ifdef HAVE_SYNC_BUILTINS | 
|  | __sync_fetch_and_add (&gomp_managed_threads, | 
|  | 1L - pool->threads_used); | 
|  | #else | 
|  | gomp_mutex_lock (&gomp_managed_threads_lock); | 
|  | gomp_managed_threads -= pool->threads_used - 1L; | 
|  | gomp_mutex_unlock (&gomp_managed_threads_lock); | 
|  | #endif | 
|  | for (i = 1; i < pool->threads_used; i++) | 
|  | pthread_join (thrs[i], NULL); | 
|  | } | 
|  | if (pool->last_team) | 
|  | free_team (pool->last_team); | 
|  | #ifndef __nvptx__ | 
|  | team_free (pool->threads); | 
|  | team_free (pool); | 
|  | #endif | 
|  | thr->thread_pool = NULL; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | struct gomp_task_icv * | 
|  | gomp_new_icv (void) | 
|  | { | 
|  | struct gomp_thread *thr = gomp_thread (); | 
|  | struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); | 
|  | gomp_init_task (task, NULL, &gomp_global_icv); | 
|  | thr->task = task; | 
|  | #ifdef LIBGOMP_USE_PTHREADS | 
|  | pthread_setspecific (gomp_thread_destructor, thr); | 
|  | #endif | 
|  | return &task->icv; | 
|  | } |