14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
36 #define KMP_USE_PRCTL 0
42 #include "tsan_annotations.h"
44 #if defined(KMP_GOMP_COMPAT)
45 char const __kmp_version_alt_comp[] =
46 KMP_VERSION_PREFIX
"alternative compiler support: yes";
49 char const __kmp_version_omp_api[] =
50 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
53 char const __kmp_version_lock[] =
54 KMP_VERSION_PREFIX
"lock type: run time selectable";
57 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
62 kmp_info_t __kmp_monitor;
67 void __kmp_cleanup(
void);
69 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
71 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
72 kmp_internal_control_t *new_icvs,
74 #if KMP_AFFINITY_SUPPORTED
75 static void __kmp_partition_places(kmp_team_t *team,
76 int update_master_only = 0);
78 static void __kmp_do_serial_initialize(
void);
79 void __kmp_fork_barrier(
int gtid,
int tid);
80 void __kmp_join_barrier(
int gtid);
81 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
82 kmp_internal_control_t *new_icvs,
ident_t *loc);
84 #ifdef USE_LOAD_BALANCE
85 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
88 static int __kmp_expand_threads(
int nNeed);
90 static int __kmp_unregister_root_other_thread(
int gtid);
92 static void __kmp_unregister_library(
void);
93 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
94 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
99 int __kmp_get_global_thread_id() {
101 kmp_info_t **other_threads;
109 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
110 __kmp_nth, __kmp_all_nth));
117 if (!TCR_4(__kmp_init_gtid))
120 #ifdef KMP_TDATA_GTID
121 if (TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
126 if (TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
128 return __kmp_gtid_get_specific();
130 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
132 stack_addr = (
char *)&stack_data;
133 other_threads = __kmp_threads;
146 for (i = 0; i < __kmp_threads_capacity; i++) {
148 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
152 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
153 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
157 if (stack_addr <= stack_base) {
158 size_t stack_diff = stack_base - stack_addr;
160 if (stack_diff <= stack_size) {
163 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
171 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
172 "thread, using TLS\n"));
173 i = __kmp_gtid_get_specific();
183 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
184 KMP_FATAL(StackOverflow, i);
187 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
188 if (stack_addr > stack_base) {
189 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
190 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
191 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
195 stack_base - stack_addr);
199 if (__kmp_storage_map) {
200 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
201 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
202 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
203 other_threads[i]->th.th_info.ds.ds_stacksize,
204 "th_%d stack (refinement)", i);
209 int __kmp_get_global_thread_id_reg() {
212 if (!__kmp_init_serial) {
215 #ifdef KMP_TDATA_GTID
216 if (TCR_4(__kmp_gtid_mode) >= 3) {
217 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
221 if (TCR_4(__kmp_gtid_mode) >= 2) {
222 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
223 gtid = __kmp_gtid_get_specific();
226 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
227 gtid = __kmp_get_global_thread_id();
231 if (gtid == KMP_GTID_DNE) {
233 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
234 "Registering a new gtid.\n"));
235 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
236 if (!__kmp_init_serial) {
237 __kmp_do_serial_initialize();
238 gtid = __kmp_gtid_get_specific();
240 gtid = __kmp_register_root(FALSE);
242 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
246 KMP_DEBUG_ASSERT(gtid >= 0);
252 void __kmp_check_stack_overlap(kmp_info_t *th) {
254 char *stack_beg = NULL;
255 char *stack_end = NULL;
258 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
259 if (__kmp_storage_map) {
260 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
261 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
263 gtid = __kmp_gtid_from_thread(th);
265 if (gtid == KMP_GTID_MONITOR) {
266 __kmp_print_storage_map_gtid(
267 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
268 "th_%s stack (%s)",
"mon",
269 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
271 __kmp_print_storage_map_gtid(
272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%d stack (%s)", gtid,
274 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
280 gtid = __kmp_gtid_from_thread(th);
281 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
283 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
284 if (stack_beg == NULL) {
285 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
286 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
289 for (f = 0; f < __kmp_threads_capacity; f++) {
290 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
292 if (f_th && f_th != th) {
293 char *other_stack_end =
294 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
295 char *other_stack_beg =
296 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
297 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
298 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
301 if (__kmp_storage_map)
302 __kmp_print_storage_map_gtid(
303 -1, other_stack_beg, other_stack_end,
304 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
305 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
307 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
313 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
318 void __kmp_infinite_loop(
void) {
319 static int done = FALSE;
326 #define MAX_MESSAGE 512
328 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
329 char const *format, ...) {
330 char buffer[MAX_MESSAGE];
333 va_start(ap, format);
334 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
335 p2, (
unsigned long)size, format);
336 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
337 __kmp_vprintf(kmp_err, buffer, ap);
338 #if KMP_PRINT_DATA_PLACEMENT
341 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
342 if (__kmp_storage_map_verbose) {
343 node = __kmp_get_host_node(p1);
345 __kmp_storage_map_verbose = FALSE;
349 int localProc = __kmp_get_cpu_from_gtid(gtid);
351 const int page_size = KMP_GET_PAGE_SIZE();
353 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
354 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
356 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
359 __kmp_printf_no_lock(
" GTID %d\n", gtid);
368 (
char *)p1 += page_size;
369 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
370 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
374 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
375 (
char *)p1 + (page_size - 1),
376 __kmp_get_host_node(p1));
378 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
379 (
char *)p2 + (page_size - 1),
380 __kmp_get_host_node(p2));
386 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
389 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
392 void __kmp_warn(
char const *format, ...) {
393 char buffer[MAX_MESSAGE];
396 if (__kmp_generate_warnings == kmp_warnings_off) {
400 va_start(ap, format);
402 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
403 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
404 __kmp_vprintf(kmp_err, buffer, ap);
405 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410 void __kmp_abort_process() {
412 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
414 if (__kmp_debug_buf) {
415 __kmp_dump_debug_buffer();
418 if (KMP_OS_WINDOWS) {
421 __kmp_global.g.g_abort = SIGABRT;
438 __kmp_infinite_loop();
439 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
443 void __kmp_abort_thread(
void) {
446 __kmp_infinite_loop();
452 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
453 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
456 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
457 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
459 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
460 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
462 __kmp_print_storage_map_gtid(
463 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
464 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
466 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
467 &thr->th.th_bar[bs_plain_barrier + 1],
468 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
472 &thr->th.th_bar[bs_forkjoin_barrier + 1],
473 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
476 #if KMP_FAST_REDUCTION_BARRIER
477 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
478 &thr->th.th_bar[bs_reduction_barrier + 1],
479 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
481 #endif // KMP_FAST_REDUCTION_BARRIER
487 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
488 int team_id,
int num_thr) {
489 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
490 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
493 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
494 &team->t.t_bar[bs_last_barrier],
495 sizeof(kmp_balign_team_t) * bs_last_barrier,
496 "%s_%d.t_bar", header, team_id);
498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
499 &team->t.t_bar[bs_plain_barrier + 1],
500 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
504 &team->t.t_bar[bs_forkjoin_barrier + 1],
505 sizeof(kmp_balign_team_t),
506 "%s_%d.t_bar[forkjoin]", header, team_id);
508 #if KMP_FAST_REDUCTION_BARRIER
509 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
510 &team->t.t_bar[bs_reduction_barrier + 1],
511 sizeof(kmp_balign_team_t),
512 "%s_%d.t_bar[reduction]", header, team_id);
513 #endif // KMP_FAST_REDUCTION_BARRIER
515 __kmp_print_storage_map_gtid(
516 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
517 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
519 __kmp_print_storage_map_gtid(
520 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
521 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
523 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
524 &team->t.t_disp_buffer[num_disp_buff],
525 sizeof(dispatch_shared_info_t) * num_disp_buff,
526 "%s_%d.t_disp_buffer", header, team_id);
529 static void __kmp_init_allocator() { __kmp_init_memkind(); }
530 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
537 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
539 __kmp_init_bootstrap_lock(lck);
542 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
560 for (i = 0; i < __kmp_threads_capacity; ++i) {
563 kmp_info_t *th = __kmp_threads[i];
566 int gtid = th->th.th_info.ds.ds_gtid;
567 if (gtid == gtid_req)
572 int alive = __kmp_is_thread_alive(th, &exit_val);
577 if (thread_count == 0)
583 __kmp_reset_lock(&__kmp_forkjoin_lock);
585 __kmp_reset_lock(&__kmp_stdio_lock);
589 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
594 case DLL_PROCESS_ATTACH:
595 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
599 case DLL_PROCESS_DETACH:
600 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
602 if (lpReserved != NULL) {
628 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
631 __kmp_internal_end_library(__kmp_gtid_get_specific());
635 case DLL_THREAD_ATTACH:
636 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
642 case DLL_THREAD_DETACH:
643 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
645 __kmp_internal_end_thread(__kmp_gtid_get_specific());
656 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
657 int gtid = *gtid_ref;
658 #ifdef BUILD_PARALLEL_ORDERED
659 kmp_team_t *team = __kmp_team_from_gtid(gtid);
662 if (__kmp_env_consistency_check) {
663 if (__kmp_threads[gtid]->th.th_root->r.r_active)
664 #if KMP_USE_DYNAMIC_LOCK
665 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
667 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
670 #ifdef BUILD_PARALLEL_ORDERED
671 if (!team->t.t_serialized) {
673 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
681 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
682 int gtid = *gtid_ref;
683 #ifdef BUILD_PARALLEL_ORDERED
684 int tid = __kmp_tid_from_gtid(gtid);
685 kmp_team_t *team = __kmp_team_from_gtid(gtid);
688 if (__kmp_env_consistency_check) {
689 if (__kmp_threads[gtid]->th.th_root->r.r_active)
690 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
692 #ifdef BUILD_PARALLEL_ORDERED
693 if (!team->t.t_serialized) {
698 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
708 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
713 if (!TCR_4(__kmp_init_parallel))
714 __kmp_parallel_initialize();
715 __kmp_resume_if_soft_paused();
717 th = __kmp_threads[gtid];
718 team = th->th.th_team;
721 th->th.th_ident = id_ref;
723 if (team->t.t_serialized) {
726 kmp_int32 old_this = th->th.th_local.this_construct;
728 ++th->th.th_local.this_construct;
732 if (team->t.t_construct == old_this) {
733 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
734 th->th.th_local.this_construct);
737 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
738 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
739 team->t.t_active_level ==
741 __kmp_itt_metadata_single(id_ref);
746 if (__kmp_env_consistency_check) {
747 if (status && push_ws) {
748 __kmp_push_workshare(gtid, ct_psingle, id_ref);
750 __kmp_check_workshare(gtid, ct_psingle, id_ref);
755 __kmp_itt_single_start(gtid);
761 void __kmp_exit_single(
int gtid) {
763 __kmp_itt_single_end(gtid);
765 if (__kmp_env_consistency_check)
766 __kmp_pop_workshare(gtid, ct_psingle, NULL);
775 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
776 int master_tid,
int set_nthreads,
780 KMP_DEBUG_ASSERT(__kmp_init_serial);
781 KMP_DEBUG_ASSERT(root && parent_team);
782 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
786 new_nthreads = set_nthreads;
787 if (!get__dynamic_2(parent_team, master_tid)) {
790 #ifdef USE_LOAD_BALANCE
791 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
792 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
793 if (new_nthreads == 1) {
794 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
795 "reservation to 1 thread\n",
799 if (new_nthreads < set_nthreads) {
800 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
801 "reservation to %d threads\n",
802 master_tid, new_nthreads));
806 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
807 new_nthreads = __kmp_avail_proc - __kmp_nth +
808 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
809 if (new_nthreads <= 1) {
810 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
811 "reservation to 1 thread\n",
815 if (new_nthreads < set_nthreads) {
816 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
817 "reservation to %d threads\n",
818 master_tid, new_nthreads));
820 new_nthreads = set_nthreads;
822 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
823 if (set_nthreads > 2) {
824 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
825 new_nthreads = (new_nthreads % set_nthreads) + 1;
826 if (new_nthreads == 1) {
827 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
828 "reservation to 1 thread\n",
832 if (new_nthreads < set_nthreads) {
833 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
834 "reservation to %d threads\n",
835 master_tid, new_nthreads));
843 if (__kmp_nth + new_nthreads -
844 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
846 int tl_nthreads = __kmp_max_nth - __kmp_nth +
847 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
848 if (tl_nthreads <= 0) {
853 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
854 __kmp_reserve_warn = 1;
855 __kmp_msg(kmp_ms_warning,
856 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
857 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
859 if (tl_nthreads == 1) {
860 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
861 "reduced reservation to 1 thread\n",
865 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
866 "reservation to %d threads\n",
867 master_tid, tl_nthreads));
868 new_nthreads = tl_nthreads;
872 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
873 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
874 if (cg_nthreads + new_nthreads -
875 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
877 int tl_nthreads = max_cg_threads - cg_nthreads +
878 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
879 if (tl_nthreads <= 0) {
884 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
885 __kmp_reserve_warn = 1;
886 __kmp_msg(kmp_ms_warning,
887 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
888 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
890 if (tl_nthreads == 1) {
891 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
892 "reduced reservation to 1 thread\n",
896 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
897 "reservation to %d threads\n",
898 master_tid, tl_nthreads));
899 new_nthreads = tl_nthreads;
905 capacity = __kmp_threads_capacity;
906 if (TCR_PTR(__kmp_threads[0]) == NULL) {
909 if (__kmp_nth + new_nthreads -
910 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
913 int slotsRequired = __kmp_nth + new_nthreads -
914 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
916 int slotsAdded = __kmp_expand_threads(slotsRequired);
917 if (slotsAdded < slotsRequired) {
919 new_nthreads -= (slotsRequired - slotsAdded);
920 KMP_ASSERT(new_nthreads >= 1);
923 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
924 __kmp_reserve_warn = 1;
925 if (__kmp_tp_cached) {
926 __kmp_msg(kmp_ms_warning,
927 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
928 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
929 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
931 __kmp_msg(kmp_ms_warning,
932 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
933 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
940 if (new_nthreads == 1) {
942 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
943 "dead roots and rechecking; requested %d threads\n",
944 __kmp_get_gtid(), set_nthreads));
946 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
948 __kmp_get_gtid(), new_nthreads, set_nthreads));
957 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
958 kmp_info_t *master_th,
int master_gtid) {
962 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
963 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
967 master_th->th.th_info.ds.ds_tid = 0;
968 master_th->th.th_team = team;
969 master_th->th.th_team_nproc = team->t.t_nproc;
970 master_th->th.th_team_master = master_th;
971 master_th->th.th_team_serialized = FALSE;
972 master_th->th.th_dispatch = &team->t.t_dispatch[0];
975 #if KMP_NESTED_HOT_TEAMS
977 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
980 int level = team->t.t_active_level - 1;
981 if (master_th->th.th_teams_microtask) {
982 if (master_th->th.th_teams_size.nteams > 1) {
986 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
987 master_th->th.th_teams_level == team->t.t_level) {
992 if (level < __kmp_hot_teams_max_level) {
993 if (hot_teams[level].hot_team) {
995 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
999 hot_teams[level].hot_team = team;
1000 hot_teams[level].hot_team_nth = team->t.t_nproc;
1007 use_hot_team = team == root->r.r_hot_team;
1009 if (!use_hot_team) {
1012 team->t.t_threads[0] = master_th;
1013 __kmp_initialize_info(master_th, team, 0, master_gtid);
1016 for (i = 1; i < team->t.t_nproc; i++) {
1019 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1020 team->t.t_threads[i] = thr;
1021 KMP_DEBUG_ASSERT(thr);
1022 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1024 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1025 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1026 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1027 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1028 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1029 team->t.t_bar[bs_plain_barrier].b_arrived));
1030 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1031 thr->th.th_teams_level = master_th->th.th_teams_level;
1032 thr->th.th_teams_size = master_th->th.th_teams_size;
1035 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1036 for (b = 0; b < bs_last_barrier; ++b) {
1037 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1038 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1040 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1046 #if KMP_AFFINITY_SUPPORTED
1047 __kmp_partition_places(team);
1051 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1052 for (i = 0; i < team->t.t_nproc; i++) {
1053 kmp_info_t *thr = team->t.t_threads[i];
1054 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1055 thr->th.th_prev_level != team->t.t_level) {
1056 team->t.t_display_affinity = 1;
1065 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1069 inline static void propagateFPControl(kmp_team_t *team) {
1070 if (__kmp_inherit_fp_control) {
1071 kmp_int16 x87_fpu_control_word;
1075 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1076 __kmp_store_mxcsr(&mxcsr);
1077 mxcsr &= KMP_X86_MXCSR_MASK;
1088 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1089 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1092 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1096 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1102 inline static void updateHWFPControl(kmp_team_t *team) {
1103 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1106 kmp_int16 x87_fpu_control_word;
1108 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1109 __kmp_store_mxcsr(&mxcsr);
1110 mxcsr &= KMP_X86_MXCSR_MASK;
1112 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1113 __kmp_clear_x87_fpu_status_word();
1114 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1117 if (team->t.t_mxcsr != mxcsr) {
1118 __kmp_load_mxcsr(&team->t.t_mxcsr);
1123 #define propagateFPControl(x) ((void)0)
1124 #define updateHWFPControl(x) ((void)0)
1127 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1132 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1133 kmp_info_t *this_thr;
1134 kmp_team_t *serial_team;
1136 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1143 if (!TCR_4(__kmp_init_parallel))
1144 __kmp_parallel_initialize();
1145 __kmp_resume_if_soft_paused();
1147 this_thr = __kmp_threads[global_tid];
1148 serial_team = this_thr->th.th_serial_team;
1151 KMP_DEBUG_ASSERT(serial_team);
1154 if (__kmp_tasking_mode != tskm_immediate_exec) {
1156 this_thr->th.th_task_team ==
1157 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1158 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1160 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1161 "team %p, new task_team = NULL\n",
1162 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1163 this_thr->th.th_task_team = NULL;
1166 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1167 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1168 proc_bind = proc_bind_false;
1169 }
else if (proc_bind == proc_bind_default) {
1172 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1175 this_thr->th.th_set_proc_bind = proc_bind_default;
1178 ompt_data_t ompt_parallel_data = ompt_data_none;
1179 ompt_data_t *implicit_task_data;
1180 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1181 if (ompt_enabled.enabled &&
1182 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1184 ompt_task_info_t *parent_task_info;
1185 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1187 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1188 if (ompt_enabled.ompt_callback_parallel_begin) {
1191 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1192 &(parent_task_info->task_data), &(parent_task_info->frame),
1193 &ompt_parallel_data, team_size,
1194 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1197 #endif // OMPT_SUPPORT
1199 if (this_thr->th.th_team != serial_team) {
1201 int level = this_thr->th.th_team->t.t_level;
1203 if (serial_team->t.t_serialized) {
1206 kmp_team_t *new_team;
1208 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1211 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1215 proc_bind, &this_thr->th.th_current_task->td_icvs,
1216 0 USE_NESTED_HOT_ARG(NULL));
1217 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1218 KMP_ASSERT(new_team);
1221 new_team->t.t_threads[0] = this_thr;
1222 new_team->t.t_parent = this_thr->th.th_team;
1223 serial_team = new_team;
1224 this_thr->th.th_serial_team = serial_team;
1228 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1229 global_tid, serial_team));
1237 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1238 global_tid, serial_team));
1242 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1243 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1244 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1245 serial_team->t.t_ident = loc;
1246 serial_team->t.t_serialized = 1;
1247 serial_team->t.t_nproc = 1;
1248 serial_team->t.t_parent = this_thr->th.th_team;
1249 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1250 this_thr->th.th_team = serial_team;
1251 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1253 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1254 this_thr->th.th_current_task));
1255 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1256 this_thr->th.th_current_task->td_flags.executing = 0;
1258 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1263 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1264 &this_thr->th.th_current_task->td_parent->td_icvs);
1268 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1269 this_thr->th.th_current_task->td_icvs.nproc =
1270 __kmp_nested_nth.nth[level + 1];
1273 if (__kmp_nested_proc_bind.used &&
1274 (level + 1 < __kmp_nested_proc_bind.used)) {
1275 this_thr->th.th_current_task->td_icvs.proc_bind =
1276 __kmp_nested_proc_bind.bind_types[level + 1];
1280 serial_team->t.t_pkfn = (microtask_t)(~0);
1282 this_thr->th.th_info.ds.ds_tid = 0;
1285 this_thr->th.th_team_nproc = 1;
1286 this_thr->th.th_team_master = this_thr;
1287 this_thr->th.th_team_serialized = 1;
1289 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1290 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1291 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1293 propagateFPControl(serial_team);
1296 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1297 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1298 serial_team->t.t_dispatch->th_disp_buffer =
1299 (dispatch_private_info_t *)__kmp_allocate(
1300 sizeof(dispatch_private_info_t));
1302 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1309 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1310 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1311 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1312 ++serial_team->t.t_serialized;
1313 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1316 int level = this_thr->th.th_team->t.t_level;
1319 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1320 this_thr->th.th_current_task->td_icvs.nproc =
1321 __kmp_nested_nth.nth[level + 1];
1323 serial_team->t.t_level++;
1324 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1325 "of serial team %p to %d\n",
1326 global_tid, serial_team, serial_team->t.t_level));
1329 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1331 dispatch_private_info_t *disp_buffer =
1332 (dispatch_private_info_t *)__kmp_allocate(
1333 sizeof(dispatch_private_info_t));
1334 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1335 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1337 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1341 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1345 if (__kmp_display_affinity) {
1346 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1347 this_thr->th.th_prev_num_threads != 1) {
1349 __kmp_aux_display_affinity(global_tid, NULL);
1350 this_thr->th.th_prev_level = serial_team->t.t_level;
1351 this_thr->th.th_prev_num_threads = 1;
1355 if (__kmp_env_consistency_check)
1356 __kmp_push_parallel(global_tid, NULL);
1358 serial_team->t.ompt_team_info.master_return_address = codeptr;
1359 if (ompt_enabled.enabled &&
1360 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1361 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1363 ompt_lw_taskteam_t lw_taskteam;
1364 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1365 &ompt_parallel_data, codeptr);
1367 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1371 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1372 if (ompt_enabled.ompt_callback_implicit_task) {
1373 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1374 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1375 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit);
1376 OMPT_CUR_TASK_INFO(this_thr)
1377 ->thread_num = __kmp_tid_from_gtid(global_tid);
1381 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1382 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1389 int __kmp_fork_call(
ident_t *loc,
int gtid,
1390 enum fork_context_e call_context,
1391 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1396 int master_this_cons;
1398 kmp_team_t *parent_team;
1399 kmp_info_t *master_th;
1403 int master_set_numthreads;
1407 #if KMP_NESTED_HOT_TEAMS
1408 kmp_hot_team_ptr_t **p_hot_teams;
1411 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1414 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1415 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1418 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1420 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1421 __kmp_stkpadding += (short)((kmp_int64)dummy);
1427 if (!TCR_4(__kmp_init_parallel))
1428 __kmp_parallel_initialize();
1429 __kmp_resume_if_soft_paused();
1432 master_th = __kmp_threads[gtid];
1434 parent_team = master_th->th.th_team;
1435 master_tid = master_th->th.th_info.ds.ds_tid;
1436 master_this_cons = master_th->th.th_local.this_construct;
1437 root = master_th->th.th_root;
1438 master_active = root->r.r_active;
1439 master_set_numthreads = master_th->th.th_set_nproc;
1442 ompt_data_t ompt_parallel_data = ompt_data_none;
1443 ompt_data_t *parent_task_data;
1444 ompt_frame_t *ompt_frame;
1445 ompt_data_t *implicit_task_data;
1446 void *return_address = NULL;
1448 if (ompt_enabled.enabled) {
1449 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1451 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1456 level = parent_team->t.t_level;
1458 active_level = parent_team->t.t_active_level;
1460 teams_level = master_th->th.th_teams_level;
1461 #if KMP_NESTED_HOT_TEAMS
1462 p_hot_teams = &master_th->th.th_hot_teams;
1463 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1464 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1465 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1466 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1468 (*p_hot_teams)[0].hot_team_nth = 1;
1473 if (ompt_enabled.enabled) {
1474 if (ompt_enabled.ompt_callback_parallel_begin) {
1475 int team_size = master_set_numthreads
1476 ? master_set_numthreads
1477 : get__nproc_2(parent_team, master_tid);
1478 int flags = OMPT_INVOKER(call_context) |
1479 ((microtask == (microtask_t)__kmp_teams_master)
1480 ? ompt_parallel_league
1481 : ompt_parallel_team);
1482 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1483 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1486 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1490 master_th->th.th_ident = loc;
1492 if (master_th->th.th_teams_microtask && ap &&
1493 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1497 parent_team->t.t_ident = loc;
1498 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1499 parent_team->t.t_argc = argc;
1500 argv = (
void **)parent_team->t.t_argv;
1501 for (i = argc - 1; i >= 0; --i)
1502 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1504 if (parent_team == master_th->th.th_serial_team) {
1507 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1509 if (call_context == fork_context_gnu) {
1512 parent_team->t.t_serialized--;
1518 void **exit_frame_p;
1520 ompt_lw_taskteam_t lw_taskteam;
1522 if (ompt_enabled.enabled) {
1523 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1524 &ompt_parallel_data, return_address);
1525 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1527 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1531 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1532 if (ompt_enabled.ompt_callback_implicit_task) {
1533 OMPT_CUR_TASK_INFO(master_th)
1534 ->thread_num = __kmp_tid_from_gtid(gtid);
1535 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1536 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1537 implicit_task_data, 1,
1538 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1542 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1544 exit_frame_p = &dummy;
1549 parent_team->t.t_serialized--;
1552 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1553 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1554 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1563 if (ompt_enabled.enabled) {
1564 *exit_frame_p = NULL;
1565 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1566 if (ompt_enabled.ompt_callback_implicit_task) {
1567 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1568 ompt_scope_end, NULL, implicit_task_data, 1,
1569 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1571 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1572 __ompt_lw_taskteam_unlink(master_th);
1573 if (ompt_enabled.ompt_callback_parallel_end) {
1574 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1575 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1576 OMPT_INVOKER(call_context) | ompt_parallel_team,
1579 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1585 parent_team->t.t_pkfn = microtask;
1586 parent_team->t.t_invoke = invoker;
1587 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1588 parent_team->t.t_active_level++;
1589 parent_team->t.t_level++;
1590 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1593 if (ompt_enabled.enabled) {
1594 ompt_lw_taskteam_t lw_taskteam;
1595 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1596 &ompt_parallel_data, return_address);
1597 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1602 if (master_set_numthreads) {
1603 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1605 kmp_info_t **other_threads = parent_team->t.t_threads;
1606 parent_team->t.t_nproc = master_set_numthreads;
1607 for (i = 0; i < master_set_numthreads; ++i) {
1608 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1612 master_th->th.th_set_nproc = 0;
1616 if (__kmp_debugging) {
1617 int nth = __kmp_omp_num_threads(loc);
1619 master_set_numthreads = nth;
1625 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1627 __kmp_forkjoin_frames_mode == 3 &&
1628 parent_team->t.t_active_level == 1
1629 && master_th->th.th_teams_size.nteams == 1) {
1630 kmp_uint64 tmp_time = __itt_get_timestamp();
1631 master_th->th.th_frame_time = tmp_time;
1632 parent_team->t.t_region_time = tmp_time;
1634 if (__itt_stack_caller_create_ptr) {
1636 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1640 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1641 "master_th=%p, gtid=%d\n",
1642 root, parent_team, master_th, gtid));
1643 __kmp_internal_fork(loc, gtid, parent_team);
1644 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1645 "master_th=%p, gtid=%d\n",
1646 root, parent_team, master_th, gtid));
1648 if (call_context == fork_context_gnu)
1652 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1653 parent_team->t.t_id, parent_team->t.t_pkfn));
1655 if (!parent_team->t.t_invoke(gtid)) {
1656 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1658 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1659 parent_team->t.t_id, parent_team->t.t_pkfn));
1662 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1668 if (__kmp_tasking_mode != tskm_immediate_exec) {
1669 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1670 parent_team->t.t_task_team[master_th->th.th_task_state]);
1674 if (parent_team->t.t_active_level >=
1675 master_th->th.th_current_task->td_icvs.max_active_levels) {
1678 int enter_teams = ((ap == NULL && active_level == 0) ||
1679 (ap && teams_level > 0 && teams_level == level));
1681 master_set_numthreads
1682 ? master_set_numthreads
1691 if ((get__max_active_levels(master_th) == 1 &&
1692 (root->r.r_in_parallel && !enter_teams)) ||
1693 (__kmp_library == library_serial)) {
1694 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1702 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1707 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1708 nthreads, enter_teams);
1709 if (nthreads == 1) {
1713 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1717 KMP_DEBUG_ASSERT(nthreads > 0);
1720 master_th->th.th_set_nproc = 0;
1723 if (nthreads == 1) {
1725 #if KMP_OS_LINUX && \
1726 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1729 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1734 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1738 if (call_context == fork_context_intel) {
1740 master_th->th.th_serial_team->t.t_ident = loc;
1743 master_th->th.th_serial_team->t.t_level--;
1748 void **exit_frame_p;
1749 ompt_task_info_t *task_info;
1751 ompt_lw_taskteam_t lw_taskteam;
1753 if (ompt_enabled.enabled) {
1754 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1755 &ompt_parallel_data, return_address);
1757 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1760 task_info = OMPT_CUR_TASK_INFO(master_th);
1761 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1762 if (ompt_enabled.ompt_callback_implicit_task) {
1763 OMPT_CUR_TASK_INFO(master_th)
1764 ->thread_num = __kmp_tid_from_gtid(gtid);
1765 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1766 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1767 &(task_info->task_data), 1,
1768 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1769 ompt_task_implicit);
1773 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1775 exit_frame_p = &dummy;
1780 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1781 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1782 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1783 parent_team->t.t_argv
1792 if (ompt_enabled.enabled) {
1793 *exit_frame_p = NULL;
1794 if (ompt_enabled.ompt_callback_implicit_task) {
1795 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1796 ompt_scope_end, NULL, &(task_info->task_data), 1,
1797 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1798 ompt_task_implicit);
1800 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1801 __ompt_lw_taskteam_unlink(master_th);
1802 if (ompt_enabled.ompt_callback_parallel_end) {
1803 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1804 &ompt_parallel_data, parent_task_data,
1805 OMPT_INVOKER(call_context) | ompt_parallel_team,
1808 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1811 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1812 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1813 master_th->th.th_serial_team);
1814 team = master_th->th.th_team;
1816 team->t.t_invoke = invoker;
1817 __kmp_alloc_argv_entries(argc, team, TRUE);
1818 team->t.t_argc = argc;
1819 argv = (
void **)team->t.t_argv;
1821 for (i = argc - 1; i >= 0; --i)
1822 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1824 for (i = 0; i < argc; ++i)
1826 argv[i] = parent_team->t.t_argv[i];
1834 if (ompt_enabled.enabled) {
1835 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1836 if (ompt_enabled.ompt_callback_implicit_task) {
1837 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1838 ompt_scope_end, NULL, &(task_info->task_data), 0,
1839 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1841 if (ompt_enabled.ompt_callback_parallel_end) {
1842 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1843 &ompt_parallel_data, parent_task_data,
1844 OMPT_INVOKER(call_context) | ompt_parallel_league,
1847 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1852 for (i = argc - 1; i >= 0; --i)
1853 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1858 void **exit_frame_p;
1859 ompt_task_info_t *task_info;
1861 ompt_lw_taskteam_t lw_taskteam;
1863 if (ompt_enabled.enabled) {
1864 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1865 &ompt_parallel_data, return_address);
1866 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1868 task_info = OMPT_CUR_TASK_INFO(master_th);
1869 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1872 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1873 if (ompt_enabled.ompt_callback_implicit_task) {
1874 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1875 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1876 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1877 ompt_task_implicit);
1878 OMPT_CUR_TASK_INFO(master_th)
1879 ->thread_num = __kmp_tid_from_gtid(gtid);
1883 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1885 exit_frame_p = &dummy;
1890 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1891 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1892 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1901 if (ompt_enabled.enabled) {
1902 *exit_frame_p = NULL;
1903 if (ompt_enabled.ompt_callback_implicit_task) {
1904 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1905 ompt_scope_end, NULL, &(task_info->task_data), 1,
1906 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1907 ompt_task_implicit);
1910 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1911 __ompt_lw_taskteam_unlink(master_th);
1912 if (ompt_enabled.ompt_callback_parallel_end) {
1913 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1914 &ompt_parallel_data, parent_task_data,
1915 OMPT_INVOKER(call_context) | ompt_parallel_team,
1918 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1922 }
else if (call_context == fork_context_gnu) {
1924 ompt_lw_taskteam_t lwt;
1925 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1928 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1929 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1934 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1937 KMP_ASSERT2(call_context < fork_context_last,
1938 "__kmp_fork_call: unknown fork_context parameter");
1941 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1948 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1949 "curtask=%p, curtask_max_aclevel=%d\n",
1950 parent_team->t.t_active_level, master_th,
1951 master_th->th.th_current_task,
1952 master_th->th.th_current_task->td_icvs.max_active_levels));
1956 master_th->th.th_current_task->td_flags.executing = 0;
1958 if (!master_th->th.th_teams_microtask || level > teams_level) {
1960 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1964 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1965 if ((level + 1 < __kmp_nested_nth.used) &&
1966 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1967 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1973 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1974 kmp_proc_bind_t proc_bind_icv =
1976 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1977 proc_bind = proc_bind_false;
1979 if (proc_bind == proc_bind_default) {
1982 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1988 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1989 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1990 master_th->th.th_current_task->td_icvs.proc_bind)) {
1991 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1996 master_th->th.th_set_proc_bind = proc_bind_default;
1998 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
1999 kmp_internal_control_t new_icvs;
2000 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2001 new_icvs.next = NULL;
2002 if (nthreads_icv > 0) {
2003 new_icvs.nproc = nthreads_icv;
2005 if (proc_bind_icv != proc_bind_default) {
2006 new_icvs.proc_bind = proc_bind_icv;
2010 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2011 team = __kmp_allocate_team(root, nthreads, nthreads,
2015 proc_bind, &new_icvs,
2016 argc USE_NESTED_HOT_ARG(master_th));
2019 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2020 team = __kmp_allocate_team(root, nthreads, nthreads,
2025 &master_th->th.th_current_task->td_icvs,
2026 argc USE_NESTED_HOT_ARG(master_th));
2029 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2032 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2033 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2034 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2035 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2036 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2038 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2041 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2043 if (!master_th->th.th_teams_microtask || level > teams_level) {
2044 int new_level = parent_team->t.t_level + 1;
2045 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2046 new_level = parent_team->t.t_active_level + 1;
2047 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2050 int new_level = parent_team->t.t_level;
2051 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2052 new_level = parent_team->t.t_active_level;
2053 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2055 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2057 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2059 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2060 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2063 propagateFPControl(team);
2065 if (__kmp_tasking_mode != tskm_immediate_exec) {
2068 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2069 parent_team->t.t_task_team[master_th->th.th_task_state]);
2070 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team "
2071 "%p, new task_team %p / team %p\n",
2072 __kmp_gtid_from_thread(master_th),
2073 master_th->th.th_task_team, parent_team,
2074 team->t.t_task_team[master_th->th.th_task_state], team));
2076 if (active_level || master_th->th.th_task_team) {
2078 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2079 if (master_th->th.th_task_state_top >=
2080 master_th->th.th_task_state_stack_sz) {
2081 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2082 kmp_uint8 *old_stack, *new_stack;
2084 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2085 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2086 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2088 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2092 old_stack = master_th->th.th_task_state_memo_stack;
2093 master_th->th.th_task_state_memo_stack = new_stack;
2094 master_th->th.th_task_state_stack_sz = new_size;
2095 __kmp_free(old_stack);
2099 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2100 master_th->th.th_task_state;
2101 master_th->th.th_task_state_top++;
2102 #if KMP_NESTED_HOT_TEAMS
2103 if (master_th->th.th_hot_teams &&
2104 active_level < __kmp_hot_teams_max_level &&
2105 team == master_th->th.th_hot_teams[active_level].hot_team) {
2107 master_th->th.th_task_state =
2109 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2112 master_th->th.th_task_state = 0;
2113 #if KMP_NESTED_HOT_TEAMS
2117 #if !KMP_NESTED_HOT_TEAMS
2118 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2119 (team == root->r.r_hot_team));
2125 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2126 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2128 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2129 (team->t.t_master_tid == 0 &&
2130 (team->t.t_parent == root->r.r_root_team ||
2131 team->t.t_parent->t.t_serialized)));
2135 argv = (
void **)team->t.t_argv;
2137 for (i = argc - 1; i >= 0; --i) {
2138 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2139 KMP_CHECK_UPDATE(*argv, new_argv);
2143 for (i = 0; i < argc; ++i) {
2145 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2150 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2151 if (!root->r.r_active)
2152 root->r.r_active = TRUE;
2154 __kmp_fork_team_threads(root, team, master_th, gtid);
2155 __kmp_setup_icv_copy(team, nthreads,
2156 &master_th->th.th_current_task->td_icvs, loc);
2159 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2162 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2165 if (team->t.t_active_level == 1
2166 && !master_th->th.th_teams_microtask) {
2168 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2169 (__kmp_forkjoin_frames_mode == 3 ||
2170 __kmp_forkjoin_frames_mode == 1)) {
2171 kmp_uint64 tmp_time = 0;
2172 if (__itt_get_timestamp_ptr)
2173 tmp_time = __itt_get_timestamp();
2175 master_th->th.th_frame_time = tmp_time;
2176 if (__kmp_forkjoin_frames_mode == 3)
2177 team->t.t_region_time = tmp_time;
2181 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2182 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2184 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2190 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2193 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2194 root, team, master_th, gtid));
2197 if (__itt_stack_caller_create_ptr) {
2198 team->t.t_stack_id =
2199 __kmp_itt_stack_caller_create();
2207 __kmp_internal_fork(loc, gtid, team);
2208 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2209 "master_th=%p, gtid=%d\n",
2210 root, team, master_th, gtid));
2213 if (call_context == fork_context_gnu) {
2214 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2219 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2220 team->t.t_id, team->t.t_pkfn));
2223 #if KMP_STATS_ENABLED
2227 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2231 if (!team->t.t_invoke(gtid)) {
2232 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2235 #if KMP_STATS_ENABLED
2238 KMP_SET_THREAD_STATE(previous_state);
2242 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2243 team->t.t_id, team->t.t_pkfn));
2246 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2249 if (ompt_enabled.enabled) {
2250 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2258 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2261 thread->th.ompt_thread_info.state =
2262 ((team->t.t_serialized) ? ompt_state_work_serial
2263 : ompt_state_work_parallel);
2266 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2267 kmp_team_t *team, ompt_data_t *parallel_data,
2268 int flags,
void *codeptr) {
2269 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2270 if (ompt_enabled.ompt_callback_parallel_end) {
2271 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2272 parallel_data, &(task_info->task_data), flags, codeptr);
2275 task_info->frame.enter_frame = ompt_data_none;
2276 __kmp_join_restore_state(thread, team);
2280 void __kmp_join_call(
ident_t *loc,
int gtid
2283 enum fork_context_e fork_context
2287 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2289 kmp_team_t *parent_team;
2290 kmp_info_t *master_th;
2294 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2297 master_th = __kmp_threads[gtid];
2298 root = master_th->th.th_root;
2299 team = master_th->th.th_team;
2300 parent_team = team->t.t_parent;
2302 master_th->th.th_ident = loc;
2305 void *team_microtask = (
void *)team->t.t_pkfn;
2309 if (ompt_enabled.enabled &&
2310 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2311 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2316 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2317 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2318 "th_task_team = %p\n",
2319 __kmp_gtid_from_thread(master_th), team,
2320 team->t.t_task_team[master_th->th.th_task_state],
2321 master_th->th.th_task_team));
2322 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2323 team->t.t_task_team[master_th->th.th_task_state]);
2327 if (team->t.t_serialized) {
2328 if (master_th->th.th_teams_microtask) {
2330 int level = team->t.t_level;
2331 int tlevel = master_th->th.th_teams_level;
2332 if (level == tlevel) {
2336 }
else if (level == tlevel + 1) {
2340 team->t.t_serialized++;
2346 if (ompt_enabled.enabled) {
2347 __kmp_join_restore_state(master_th, parent_team);
2354 master_active = team->t.t_master_active;
2359 __kmp_internal_join(loc, gtid, team);
2361 master_th->th.th_task_state =
2368 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2369 void *codeptr = team->t.ompt_team_info.master_return_address;
2373 if (__itt_stack_caller_create_ptr) {
2375 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2378 if (team->t.t_active_level == 1 &&
2379 (!master_th->th.th_teams_microtask ||
2380 master_th->th.th_teams_size.nteams == 1)) {
2381 master_th->th.th_ident = loc;
2384 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2385 __kmp_forkjoin_frames_mode == 3)
2386 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2387 master_th->th.th_frame_time, 0, loc,
2388 master_th->th.th_team_nproc, 1);
2389 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2390 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2391 __kmp_itt_region_joined(gtid);
2395 if (master_th->th.th_teams_microtask && !exit_teams &&
2396 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2397 team->t.t_level == master_th->th.th_teams_level + 1) {
2402 ompt_data_t ompt_parallel_data = ompt_data_none;
2403 if (ompt_enabled.enabled) {
2404 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2405 if (ompt_enabled.ompt_callback_implicit_task) {
2406 int ompt_team_size = team->t.t_nproc;
2407 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2408 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2409 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2411 task_info->frame.exit_frame = ompt_data_none;
2412 task_info->task_data = ompt_data_none;
2413 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2414 __ompt_lw_taskteam_unlink(master_th);
2419 team->t.t_active_level--;
2420 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2426 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2427 int old_num = master_th->th.th_team_nproc;
2428 int new_num = master_th->th.th_teams_size.nth;
2429 kmp_info_t **other_threads = team->t.t_threads;
2430 team->t.t_nproc = new_num;
2431 for (
int i = 0; i < old_num; ++i) {
2432 other_threads[i]->th.th_team_nproc = new_num;
2435 for (
int i = old_num; i < new_num; ++i) {
2437 KMP_DEBUG_ASSERT(other_threads[i]);
2438 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2439 for (
int b = 0; b < bs_last_barrier; ++b) {
2440 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2441 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2443 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2446 if (__kmp_tasking_mode != tskm_immediate_exec) {
2448 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2454 if (ompt_enabled.enabled) {
2455 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2456 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2464 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2465 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2467 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2472 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2474 if (!master_th->th.th_teams_microtask ||
2475 team->t.t_level > master_th->th.th_teams_level) {
2477 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2479 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2482 if (ompt_enabled.enabled) {
2483 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2484 if (ompt_enabled.ompt_callback_implicit_task) {
2485 int flags = (team_microtask == (
void *)__kmp_teams_master)
2487 : ompt_task_implicit;
2488 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2489 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2490 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2491 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2493 task_info->frame.exit_frame = ompt_data_none;
2494 task_info->task_data = ompt_data_none;
2498 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2500 __kmp_pop_current_task_from_thread(master_th);
2502 #if KMP_AFFINITY_SUPPORTED
2504 master_th->th.th_first_place = team->t.t_first_place;
2505 master_th->th.th_last_place = team->t.t_last_place;
2506 #endif // KMP_AFFINITY_SUPPORTED
2507 master_th->th.th_def_allocator = team->t.t_def_allocator;
2509 updateHWFPControl(team);
2511 if (root->r.r_active != master_active)
2512 root->r.r_active = master_active;
2514 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2522 master_th->th.th_team = parent_team;
2523 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2524 master_th->th.th_team_master = parent_team->t.t_threads[0];
2525 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2528 if (parent_team->t.t_serialized &&
2529 parent_team != master_th->th.th_serial_team &&
2530 parent_team != root->r.r_root_team) {
2531 __kmp_free_team(root,
2532 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2533 master_th->th.th_serial_team = parent_team;
2536 if (__kmp_tasking_mode != tskm_immediate_exec) {
2537 if (master_th->th.th_task_state_top >
2539 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2541 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2542 master_th->th.th_task_state;
2543 --master_th->th.th_task_state_top;
2545 master_th->th.th_task_state =
2547 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2550 master_th->th.th_task_team =
2551 parent_team->t.t_task_team[master_th->th.th_task_state];
2553 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2554 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2561 master_th->th.th_current_task->td_flags.executing = 1;
2563 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2567 OMPT_INVOKER(fork_context) |
2568 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2569 : ompt_parallel_team);
2570 if (ompt_enabled.enabled) {
2571 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2577 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2582 void __kmp_save_internal_controls(kmp_info_t *thread) {
2584 if (thread->th.th_team != thread->th.th_serial_team) {
2587 if (thread->th.th_team->t.t_serialized > 1) {
2590 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2593 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2594 thread->th.th_team->t.t_serialized) {
2599 kmp_internal_control_t *control =
2600 (kmp_internal_control_t *)__kmp_allocate(
2601 sizeof(kmp_internal_control_t));
2603 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2605 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2607 control->next = thread->th.th_team->t.t_control_stack_top;
2608 thread->th.th_team->t.t_control_stack_top = control;
2614 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2618 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2619 KMP_DEBUG_ASSERT(__kmp_init_serial);
2623 else if (new_nth > __kmp_max_nth)
2624 new_nth = __kmp_max_nth;
2627 thread = __kmp_threads[gtid];
2628 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2631 __kmp_save_internal_controls(thread);
2633 set__nproc(thread, new_nth);
2638 root = thread->th.th_root;
2639 if (__kmp_init_parallel && (!root->r.r_active) &&
2640 (root->r.r_hot_team->t.t_nproc > new_nth)
2641 #
if KMP_NESTED_HOT_TEAMS
2642 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2645 kmp_team_t *hot_team = root->r.r_hot_team;
2648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2651 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2652 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2653 if (__kmp_tasking_mode != tskm_immediate_exec) {
2656 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2658 __kmp_free_thread(hot_team->t.t_threads[f]);
2659 hot_team->t.t_threads[f] = NULL;
2661 hot_team->t.t_nproc = new_nth;
2662 #if KMP_NESTED_HOT_TEAMS
2663 if (thread->th.th_hot_teams) {
2664 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2665 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2669 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2672 for (f = 0; f < new_nth; f++) {
2673 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2674 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2677 hot_team->t.t_size_changed = -1;
2682 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2685 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2687 gtid, max_active_levels));
2688 KMP_DEBUG_ASSERT(__kmp_init_serial);
2691 if (max_active_levels < 0) {
2692 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2697 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2698 "max_active_levels for thread %d = (%d)\n",
2699 gtid, max_active_levels));
2702 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2707 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2708 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2709 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2715 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2716 "max_active_levels for thread %d = (%d)\n",
2717 gtid, max_active_levels));
2719 thread = __kmp_threads[gtid];
2721 __kmp_save_internal_controls(thread);
2723 set__max_active_levels(thread, max_active_levels);
2727 int __kmp_get_max_active_levels(
int gtid) {
2730 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2731 KMP_DEBUG_ASSERT(__kmp_init_serial);
2733 thread = __kmp_threads[gtid];
2734 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2735 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2736 "curtask_maxaclevel=%d\n",
2737 gtid, thread->th.th_current_task,
2738 thread->th.th_current_task->td_icvs.max_active_levels));
2739 return thread->th.th_current_task->td_icvs.max_active_levels;
2742 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2743 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2746 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2748 kmp_sched_t orig_kind;
2751 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2752 gtid, (
int)kind, chunk));
2753 KMP_DEBUG_ASSERT(__kmp_init_serial);
2760 kind = __kmp_sched_without_mods(kind);
2762 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2763 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2765 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2766 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2768 kind = kmp_sched_default;
2772 thread = __kmp_threads[gtid];
2774 __kmp_save_internal_controls(thread);
2776 if (kind < kmp_sched_upper_std) {
2777 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2780 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2782 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2783 __kmp_sch_map[kind - kmp_sched_lower - 1];
2788 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2789 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2790 kmp_sched_lower - 2];
2792 __kmp_sched_apply_mods_intkind(
2793 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2794 if (kind == kmp_sched_auto || chunk < 1) {
2796 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2798 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2803 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2807 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2808 KMP_DEBUG_ASSERT(__kmp_init_serial);
2810 thread = __kmp_threads[gtid];
2812 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2813 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2815 case kmp_sch_static_greedy:
2816 case kmp_sch_static_balanced:
2817 *kind = kmp_sched_static;
2818 __kmp_sched_apply_mods_stdkind(kind, th_type);
2821 case kmp_sch_static_chunked:
2822 *kind = kmp_sched_static;
2824 case kmp_sch_dynamic_chunked:
2825 *kind = kmp_sched_dynamic;
2828 case kmp_sch_guided_iterative_chunked:
2829 case kmp_sch_guided_analytical_chunked:
2830 *kind = kmp_sched_guided;
2833 *kind = kmp_sched_auto;
2835 case kmp_sch_trapezoidal:
2836 *kind = kmp_sched_trapezoidal;
2838 #if KMP_STATIC_STEAL_ENABLED
2839 case kmp_sch_static_steal:
2840 *kind = kmp_sched_static_steal;
2844 KMP_FATAL(UnknownSchedulingType, th_type);
2847 __kmp_sched_apply_mods_stdkind(kind, th_type);
2848 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2851 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2857 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2858 KMP_DEBUG_ASSERT(__kmp_init_serial);
2865 thr = __kmp_threads[gtid];
2866 team = thr->th.th_team;
2867 ii = team->t.t_level;
2871 if (thr->th.th_teams_microtask) {
2873 int tlevel = thr->th.th_teams_level;
2876 KMP_DEBUG_ASSERT(ii >= tlevel);
2888 return __kmp_tid_from_gtid(gtid);
2890 dd = team->t.t_serialized;
2892 while (ii > level) {
2893 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2895 if ((team->t.t_serialized) && (!dd)) {
2896 team = team->t.t_parent;
2900 team = team->t.t_parent;
2901 dd = team->t.t_serialized;
2906 return (dd > 1) ? (0) : (team->t.t_master_tid);
2909 int __kmp_get_team_size(
int gtid,
int level) {
2915 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2916 KMP_DEBUG_ASSERT(__kmp_init_serial);
2923 thr = __kmp_threads[gtid];
2924 team = thr->th.th_team;
2925 ii = team->t.t_level;
2929 if (thr->th.th_teams_microtask) {
2931 int tlevel = thr->th.th_teams_level;
2934 KMP_DEBUG_ASSERT(ii >= tlevel);
2945 while (ii > level) {
2946 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2948 if (team->t.t_serialized && (!dd)) {
2949 team = team->t.t_parent;
2953 team = team->t.t_parent;
2958 return team->t.t_nproc;
2961 kmp_r_sched_t __kmp_get_schedule_global() {
2966 kmp_r_sched_t r_sched;
2972 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
2973 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
2976 r_sched.r_sched_type = __kmp_static;
2979 r_sched.r_sched_type = __kmp_guided;
2981 r_sched.r_sched_type = __kmp_sched;
2983 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
2985 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2987 r_sched.chunk = KMP_DEFAULT_CHUNK;
2989 r_sched.chunk = __kmp_chunk;
2997 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
2999 KMP_DEBUG_ASSERT(team);
3000 if (!realloc || argc > team->t.t_max_argc) {
3002 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3003 "current entries=%d\n",
3004 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3006 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3007 __kmp_free((
void *)team->t.t_argv);
3009 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3011 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3012 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3014 team->t.t_id, team->t.t_max_argc));
3015 team->t.t_argv = &team->t.t_inline_argv[0];
3016 if (__kmp_storage_map) {
3017 __kmp_print_storage_map_gtid(
3018 -1, &team->t.t_inline_argv[0],
3019 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3020 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3025 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3026 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3028 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3030 team->t.t_id, team->t.t_max_argc));
3032 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3033 if (__kmp_storage_map) {
3034 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3035 &team->t.t_argv[team->t.t_max_argc],
3036 sizeof(
void *) * team->t.t_max_argc,
3037 "team_%d.t_argv", team->t.t_id);
3043 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3045 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3047 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3048 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3049 sizeof(dispatch_shared_info_t) * num_disp_buff);
3050 team->t.t_dispatch =
3051 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3052 team->t.t_implicit_task_taskdata =
3053 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3054 team->t.t_max_nproc = max_nth;
3057 for (i = 0; i < num_disp_buff; ++i) {
3058 team->t.t_disp_buffer[i].buffer_index = i;
3059 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3063 static void __kmp_free_team_arrays(kmp_team_t *team) {
3066 for (i = 0; i < team->t.t_max_nproc; ++i) {
3067 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3068 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3069 team->t.t_dispatch[i].th_disp_buffer = NULL;
3072 #if KMP_USE_HIER_SCHED
3073 __kmp_dispatch_free_hierarchies(team);
3075 __kmp_free(team->t.t_threads);
3076 __kmp_free(team->t.t_disp_buffer);
3077 __kmp_free(team->t.t_dispatch);
3078 __kmp_free(team->t.t_implicit_task_taskdata);
3079 team->t.t_threads = NULL;
3080 team->t.t_disp_buffer = NULL;
3081 team->t.t_dispatch = NULL;
3082 team->t.t_implicit_task_taskdata = 0;
3085 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3086 kmp_info_t **oldThreads = team->t.t_threads;
3088 __kmp_free(team->t.t_disp_buffer);
3089 __kmp_free(team->t.t_dispatch);
3090 __kmp_free(team->t.t_implicit_task_taskdata);
3091 __kmp_allocate_team_arrays(team, max_nth);
3093 KMP_MEMCPY(team->t.t_threads, oldThreads,
3094 team->t.t_nproc *
sizeof(kmp_info_t *));
3096 __kmp_free(oldThreads);
3099 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3101 kmp_r_sched_t r_sched =
3102 __kmp_get_schedule_global();
3104 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3106 kmp_internal_control_t g_icvs = {
3108 (kmp_int8)__kmp_global.g.g_dynamic,
3110 (kmp_int8)__kmp_env_blocktime,
3112 __kmp_dflt_blocktime,
3117 __kmp_dflt_team_nth,
3121 __kmp_dflt_max_active_levels,
3125 __kmp_nested_proc_bind.bind_types[0],
3126 __kmp_default_device,
3133 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3135 kmp_internal_control_t gx_icvs;
3136 gx_icvs.serial_nesting_level =
3138 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3139 gx_icvs.next = NULL;
3144 static void __kmp_initialize_root(kmp_root_t *root) {
3146 kmp_team_t *root_team;
3147 kmp_team_t *hot_team;
3148 int hot_team_max_nth;
3149 kmp_r_sched_t r_sched =
3150 __kmp_get_schedule_global();
3151 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3152 KMP_DEBUG_ASSERT(root);
3153 KMP_ASSERT(!root->r.r_begin);
3156 __kmp_init_lock(&root->r.r_begin_lock);
3157 root->r.r_begin = FALSE;
3158 root->r.r_active = FALSE;
3159 root->r.r_in_parallel = 0;
3160 root->r.r_blocktime = __kmp_dflt_blocktime;
3164 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3167 __kmp_allocate_team(root,
3173 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3175 USE_NESTED_HOT_ARG(NULL)
3180 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3183 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3185 root->r.r_root_team = root_team;
3186 root_team->t.t_control_stack_top = NULL;
3189 root_team->t.t_threads[0] = NULL;
3190 root_team->t.t_nproc = 1;
3191 root_team->t.t_serialized = 1;
3193 root_team->t.t_sched.sched = r_sched.sched;
3196 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3197 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3201 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3204 __kmp_allocate_team(root,
3206 __kmp_dflt_team_nth_ub * 2,
3210 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3212 USE_NESTED_HOT_ARG(NULL)
3214 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3216 root->r.r_hot_team = hot_team;
3217 root_team->t.t_control_stack_top = NULL;
3220 hot_team->t.t_parent = root_team;
3223 hot_team_max_nth = hot_team->t.t_max_nproc;
3224 for (f = 0; f < hot_team_max_nth; ++f) {
3225 hot_team->t.t_threads[f] = NULL;
3227 hot_team->t.t_nproc = 1;
3229 hot_team->t.t_sched.sched = r_sched.sched;
3230 hot_team->t.t_size_changed = 0;
3235 typedef struct kmp_team_list_item {
3236 kmp_team_p
const *entry;
3237 struct kmp_team_list_item *next;
3238 } kmp_team_list_item_t;
3239 typedef kmp_team_list_item_t *kmp_team_list_t;
3241 static void __kmp_print_structure_team_accum(
3242 kmp_team_list_t list,
3243 kmp_team_p
const *team
3253 KMP_DEBUG_ASSERT(list != NULL);
3258 __kmp_print_structure_team_accum(list, team->t.t_parent);
3259 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3263 while (l->next != NULL && l->entry != team) {
3266 if (l->next != NULL) {
3272 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3278 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3279 sizeof(kmp_team_list_item_t));
3286 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3289 __kmp_printf(
"%s", title);
3291 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3293 __kmp_printf(
" - (nil)\n");
3297 static void __kmp_print_structure_thread(
char const *title,
3298 kmp_info_p
const *thread) {
3299 __kmp_printf(
"%s", title);
3300 if (thread != NULL) {
3301 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3303 __kmp_printf(
" - (nil)\n");
3307 void __kmp_print_structure(
void) {
3309 kmp_team_list_t list;
3313 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3317 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3318 "Table\n------------------------------\n");
3321 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3322 __kmp_printf(
"%2d", gtid);
3323 if (__kmp_threads != NULL) {
3324 __kmp_printf(
" %p", __kmp_threads[gtid]);
3326 if (__kmp_root != NULL) {
3327 __kmp_printf(
" %p", __kmp_root[gtid]);
3334 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3336 if (__kmp_threads != NULL) {
3338 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3339 kmp_info_t
const *thread = __kmp_threads[gtid];
3340 if (thread != NULL) {
3341 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3342 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3343 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3344 __kmp_print_structure_team(
" Serial Team: ",
3345 thread->th.th_serial_team);
3346 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3347 __kmp_print_structure_thread(
" Master: ",
3348 thread->th.th_team_master);
3349 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3350 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3351 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3352 __kmp_print_structure_thread(
" Next in pool: ",
3353 thread->th.th_next_pool);
3355 __kmp_print_structure_team_accum(list, thread->th.th_team);
3356 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3360 __kmp_printf(
"Threads array is not allocated.\n");
3364 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3366 if (__kmp_root != NULL) {
3368 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3369 kmp_root_t
const *root = __kmp_root[gtid];
3371 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3372 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3373 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3374 __kmp_print_structure_thread(
" Uber Thread: ",
3375 root->r.r_uber_thread);
3376 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3377 __kmp_printf(
" In Parallel: %2d\n",
3378 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3380 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3381 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3385 __kmp_printf(
"Ubers array is not allocated.\n");
3388 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3390 while (list->next != NULL) {
3391 kmp_team_p
const *team = list->entry;
3393 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3394 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3395 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3396 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3397 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3398 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3399 for (i = 0; i < team->t.t_nproc; ++i) {
3400 __kmp_printf(
" Thread %2d: ", i);
3401 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3403 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3409 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3411 __kmp_print_structure_thread(
"Thread pool: ",
3412 CCAST(kmp_info_t *, __kmp_thread_pool));
3413 __kmp_print_structure_team(
"Team pool: ",
3414 CCAST(kmp_team_t *, __kmp_team_pool));
3418 while (list != NULL) {
3419 kmp_team_list_item_t *item = list;
3421 KMP_INTERNAL_FREE(item);
3430 static const unsigned __kmp_primes[] = {
3431 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3432 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3433 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3434 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3435 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3436 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3437 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3438 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3439 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3440 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3441 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3445 unsigned short __kmp_get_random(kmp_info_t *thread) {
3446 unsigned x = thread->th.th_x;
3447 unsigned short r = x >> 16;
3449 thread->th.th_x = x * thread->th.th_a + 1;
3451 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3452 thread->th.th_info.ds.ds_tid, r));
3458 void __kmp_init_random(kmp_info_t *thread) {
3459 unsigned seed = thread->th.th_info.ds.ds_tid;
3462 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3463 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3465 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3471 static int __kmp_reclaim_dead_roots(
void) {
3474 for (i = 0; i < __kmp_threads_capacity; ++i) {
3475 if (KMP_UBER_GTID(i) &&
3476 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3479 r += __kmp_unregister_root_other_thread(i);
3504 static int __kmp_expand_threads(
int nNeed) {
3506 int minimumRequiredCapacity;
3508 kmp_info_t **newThreads;
3509 kmp_root_t **newRoot;
3515 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3518 added = __kmp_reclaim_dead_roots();
3547 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3550 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3554 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3556 newCapacity = __kmp_threads_capacity;
3558 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3559 : __kmp_sys_max_nth;
3560 }
while (newCapacity < minimumRequiredCapacity);
3561 newThreads = (kmp_info_t **)__kmp_allocate(
3562 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3564 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3565 KMP_MEMCPY(newThreads, __kmp_threads,
3566 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3567 KMP_MEMCPY(newRoot, __kmp_root,
3568 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3570 kmp_info_t **temp_threads = __kmp_threads;
3571 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3572 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3573 __kmp_free(temp_threads);
3574 added += newCapacity - __kmp_threads_capacity;
3575 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3577 if (newCapacity > __kmp_tp_capacity) {
3578 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3579 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3580 __kmp_threadprivate_resize_cache(newCapacity);
3582 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3584 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3593 int __kmp_register_root(
int initial_thread) {
3594 kmp_info_t *root_thread;
3598 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3599 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3616 capacity = __kmp_threads_capacity;
3617 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3622 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3623 if (__kmp_tp_cached) {
3624 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3625 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3626 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3628 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3636 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3640 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3641 KMP_ASSERT(gtid < __kmp_threads_capacity);
3645 TCW_4(__kmp_nth, __kmp_nth + 1);
3649 if (__kmp_adjust_gtid_mode) {
3650 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3651 if (TCR_4(__kmp_gtid_mode) != 2) {
3652 TCW_4(__kmp_gtid_mode, 2);
3655 if (TCR_4(__kmp_gtid_mode) != 1) {
3656 TCW_4(__kmp_gtid_mode, 1);
3661 #ifdef KMP_ADJUST_BLOCKTIME
3664 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3665 if (__kmp_nth > __kmp_avail_proc) {
3666 __kmp_zero_bt = TRUE;
3672 if (!(root = __kmp_root[gtid])) {
3673 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3674 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3677 #if KMP_STATS_ENABLED
3679 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3680 __kmp_stats_thread_ptr->startLife();
3681 KMP_SET_THREAD_STATE(SERIAL_REGION);
3684 __kmp_initialize_root(root);
3687 if (root->r.r_uber_thread) {
3688 root_thread = root->r.r_uber_thread;
3690 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3691 if (__kmp_storage_map) {
3692 __kmp_print_thread_storage_map(root_thread, gtid);
3694 root_thread->th.th_info.ds.ds_gtid = gtid;
3696 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3698 root_thread->th.th_root = root;
3699 if (__kmp_env_consistency_check) {
3700 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3703 __kmp_initialize_fast_memory(root_thread);
3707 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3708 __kmp_initialize_bget(root_thread);
3710 __kmp_init_random(root_thread);
3714 if (!root_thread->th.th_serial_team) {
3715 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3716 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3717 root_thread->th.th_serial_team = __kmp_allocate_team(
3722 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3724 KMP_ASSERT(root_thread->th.th_serial_team);
3725 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3726 root_thread->th.th_serial_team));
3729 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3731 root->r.r_root_team->t.t_threads[0] = root_thread;
3732 root->r.r_hot_team->t.t_threads[0] = root_thread;
3733 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3735 root_thread->th.th_serial_team->t.t_serialized = 0;
3736 root->r.r_uber_thread = root_thread;
3739 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3740 TCW_4(__kmp_init_gtid, TRUE);
3743 __kmp_gtid_set_specific(gtid);
3746 __kmp_itt_thread_name(gtid);
3749 #ifdef KMP_TDATA_GTID
3752 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3753 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3755 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3757 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3758 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3759 KMP_INIT_BARRIER_STATE));
3762 for (b = 0; b < bs_last_barrier; ++b) {
3763 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3765 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3769 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3770 KMP_INIT_BARRIER_STATE);
3772 #if KMP_AFFINITY_SUPPORTED
3773 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3774 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3775 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3776 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3777 if (TCR_4(__kmp_init_middle)) {
3778 __kmp_affinity_set_init_mask(gtid, TRUE);
3781 root_thread->th.th_def_allocator = __kmp_def_allocator;
3782 root_thread->th.th_prev_level = 0;
3783 root_thread->th.th_prev_num_threads = 1;
3785 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3786 tmp->cg_root = root_thread;
3787 tmp->cg_thread_limit = __kmp_cg_max_nth;
3788 tmp->cg_nthreads = 1;
3789 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3790 " cg_nthreads init to 1\n",
3793 root_thread->th.th_cg_roots = tmp;
3795 __kmp_root_counter++;
3798 if (!initial_thread && ompt_enabled.enabled) {
3800 kmp_info_t *root_thread = ompt_get_thread();
3802 ompt_set_thread_state(root_thread, ompt_state_overhead);
3804 if (ompt_enabled.ompt_callback_thread_begin) {
3805 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3806 ompt_thread_initial, __ompt_get_thread_data_internal());
3808 ompt_data_t *task_data;
3809 ompt_data_t *parallel_data;
3810 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3811 if (ompt_enabled.ompt_callback_implicit_task) {
3812 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3813 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3816 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3821 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3826 #if KMP_NESTED_HOT_TEAMS
3827 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3828 const int max_level) {
3830 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3831 if (!hot_teams || !hot_teams[level].hot_team) {
3834 KMP_DEBUG_ASSERT(level < max_level);
3835 kmp_team_t *team = hot_teams[level].hot_team;
3836 nth = hot_teams[level].hot_team_nth;
3838 if (level < max_level - 1) {
3839 for (i = 0; i < nth; ++i) {
3840 kmp_info_t *th = team->t.t_threads[i];
3841 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3842 if (i > 0 && th->th.th_hot_teams) {
3843 __kmp_free(th->th.th_hot_teams);
3844 th->th.th_hot_teams = NULL;
3848 __kmp_free_team(root, team, NULL);
3855 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3856 kmp_team_t *root_team = root->r.r_root_team;
3857 kmp_team_t *hot_team = root->r.r_hot_team;
3858 int n = hot_team->t.t_nproc;
3861 KMP_DEBUG_ASSERT(!root->r.r_active);
3863 root->r.r_root_team = NULL;
3864 root->r.r_hot_team = NULL;
3867 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3868 #if KMP_NESTED_HOT_TEAMS
3869 if (__kmp_hot_teams_max_level >
3871 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3872 kmp_info_t *th = hot_team->t.t_threads[i];
3873 if (__kmp_hot_teams_max_level > 1) {
3874 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3876 if (th->th.th_hot_teams) {
3877 __kmp_free(th->th.th_hot_teams);
3878 th->th.th_hot_teams = NULL;
3883 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3888 if (__kmp_tasking_mode != tskm_immediate_exec) {
3889 __kmp_wait_to_unref_task_teams();
3895 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3897 (LPVOID) & (root->r.r_uber_thread->th),
3898 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3899 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3903 ompt_data_t *task_data;
3904 ompt_data_t *parallel_data;
3905 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3906 if (ompt_enabled.ompt_callback_implicit_task) {
3907 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3908 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
3910 if (ompt_enabled.ompt_callback_thread_end) {
3911 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3912 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3918 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3919 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
3921 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3922 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
3925 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
3926 root->r.r_uber_thread->th.th_cg_roots->cg_root);
3927 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
3928 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
3929 root->r.r_uber_thread->th.th_cg_roots = NULL;
3931 __kmp_reap_thread(root->r.r_uber_thread, 1);
3935 root->r.r_uber_thread = NULL;
3937 root->r.r_begin = FALSE;
3942 void __kmp_unregister_root_current_thread(
int gtid) {
3943 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3947 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3948 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3949 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
3952 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3955 kmp_root_t *root = __kmp_root[gtid];
3957 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3958 KMP_ASSERT(KMP_UBER_GTID(gtid));
3959 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3960 KMP_ASSERT(root->r.r_active == FALSE);
3964 kmp_info_t *thread = __kmp_threads[gtid];
3965 kmp_team_t *team = thread->th.th_team;
3966 kmp_task_team_t *task_team = thread->th.th_task_team;
3969 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3972 thread->th.ompt_thread_info.state = ompt_state_undefined;
3974 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3977 __kmp_reset_root(gtid, root);
3980 __kmp_gtid_set_specific(KMP_GTID_DNE);
3981 #ifdef KMP_TDATA_GTID
3982 __kmp_gtid = KMP_GTID_DNE;
3987 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3989 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3996 static int __kmp_unregister_root_other_thread(
int gtid) {
3997 kmp_root_t *root = __kmp_root[gtid];
4000 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4001 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4002 KMP_ASSERT(KMP_UBER_GTID(gtid));
4003 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4004 KMP_ASSERT(root->r.r_active == FALSE);
4006 r = __kmp_reset_root(gtid, root);
4008 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4014 void __kmp_task_info() {
4016 kmp_int32 gtid = __kmp_entry_gtid();
4017 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4018 kmp_info_t *this_thr = __kmp_threads[gtid];
4019 kmp_team_t *steam = this_thr->th.th_serial_team;
4020 kmp_team_t *team = this_thr->th.th_team;
4023 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4025 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4026 team->t.t_implicit_task_taskdata[tid].td_parent);
4033 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4034 int tid,
int gtid) {
4038 kmp_info_t *master = team->t.t_threads[0];
4039 KMP_DEBUG_ASSERT(this_thr != NULL);
4040 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4041 KMP_DEBUG_ASSERT(team);
4042 KMP_DEBUG_ASSERT(team->t.t_threads);
4043 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4044 KMP_DEBUG_ASSERT(master);
4045 KMP_DEBUG_ASSERT(master->th.th_root);
4049 TCW_SYNC_PTR(this_thr->th.th_team, team);
4051 this_thr->th.th_info.ds.ds_tid = tid;
4052 this_thr->th.th_set_nproc = 0;
4053 if (__kmp_tasking_mode != tskm_immediate_exec)
4056 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4058 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4059 this_thr->th.th_set_proc_bind = proc_bind_default;
4060 #if KMP_AFFINITY_SUPPORTED
4061 this_thr->th.th_new_place = this_thr->th.th_current_place;
4063 this_thr->th.th_root = master->th.th_root;
4066 this_thr->th.th_team_nproc = team->t.t_nproc;
4067 this_thr->th.th_team_master = master;
4068 this_thr->th.th_team_serialized = team->t.t_serialized;
4069 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4071 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4073 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4074 tid, gtid, this_thr, this_thr->th.th_current_task));
4076 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4079 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4080 tid, gtid, this_thr, this_thr->th.th_current_task));
4085 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4087 this_thr->th.th_local.this_construct = 0;
4089 if (!this_thr->th.th_pri_common) {
4090 this_thr->th.th_pri_common =
4091 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4092 if (__kmp_storage_map) {
4093 __kmp_print_storage_map_gtid(
4094 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4095 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4097 this_thr->th.th_pri_head = NULL;
4100 if (this_thr != master &&
4101 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4103 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4104 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4107 int i = tmp->cg_nthreads--;
4108 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4109 " on node %p of thread %p to %d\n",
4110 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4115 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4117 this_thr->th.th_cg_roots->cg_nthreads++;
4118 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4119 " node %p of thread %p to %d\n",
4120 this_thr, this_thr->th.th_cg_roots,
4121 this_thr->th.th_cg_roots->cg_root,
4122 this_thr->th.th_cg_roots->cg_nthreads));
4123 this_thr->th.th_current_task->td_icvs.thread_limit =
4124 this_thr->th.th_cg_roots->cg_thread_limit;
4129 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4132 sizeof(dispatch_private_info_t) *
4133 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4134 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4135 team->t.t_max_nproc));
4136 KMP_ASSERT(dispatch);
4137 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4138 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4140 dispatch->th_disp_index = 0;
4141 dispatch->th_doacross_buf_idx = 0;
4142 if (!dispatch->th_disp_buffer) {
4143 dispatch->th_disp_buffer =
4144 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4146 if (__kmp_storage_map) {
4147 __kmp_print_storage_map_gtid(
4148 gtid, &dispatch->th_disp_buffer[0],
4149 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4151 : __kmp_dispatch_num_buffers],
4152 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4153 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4154 gtid, team->t.t_id, gtid);
4157 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4160 dispatch->th_dispatch_pr_current = 0;
4161 dispatch->th_dispatch_sh_current = 0;
4163 dispatch->th_deo_fcn = 0;
4164 dispatch->th_dxo_fcn = 0;
4167 this_thr->th.th_next_pool = NULL;
4169 if (!this_thr->th.th_task_state_memo_stack) {
4171 this_thr->th.th_task_state_memo_stack =
4172 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4173 this_thr->th.th_task_state_top = 0;
4174 this_thr->th.th_task_state_stack_sz = 4;
4175 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4177 this_thr->th.th_task_state_memo_stack[i] = 0;
4180 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4181 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4191 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4193 kmp_team_t *serial_team;
4194 kmp_info_t *new_thr;
4197 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4198 KMP_DEBUG_ASSERT(root && team);
4199 #if !KMP_NESTED_HOT_TEAMS
4200 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4205 if (__kmp_thread_pool) {
4206 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4207 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4208 if (new_thr == __kmp_thread_pool_insert_pt) {
4209 __kmp_thread_pool_insert_pt = NULL;
4211 TCW_4(new_thr->th.th_in_pool, FALSE);
4212 __kmp_suspend_initialize_thread(new_thr);
4213 __kmp_lock_suspend_mx(new_thr);
4214 if (new_thr->th.th_active_in_pool == TRUE) {
4215 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4216 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4217 new_thr->th.th_active_in_pool = FALSE;
4219 __kmp_unlock_suspend_mx(new_thr);
4221 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4222 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4223 KMP_ASSERT(!new_thr->th.th_team);
4224 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4227 __kmp_initialize_info(new_thr, team, new_tid,
4228 new_thr->th.th_info.ds.ds_gtid);
4229 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4231 TCW_4(__kmp_nth, __kmp_nth + 1);
4233 new_thr->th.th_task_state = 0;
4234 new_thr->th.th_task_state_top = 0;
4235 new_thr->th.th_task_state_stack_sz = 4;
4237 #ifdef KMP_ADJUST_BLOCKTIME
4240 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4241 if (__kmp_nth > __kmp_avail_proc) {
4242 __kmp_zero_bt = TRUE;
4251 kmp_balign_t *balign = new_thr->th.th_bar;
4252 for (b = 0; b < bs_last_barrier; ++b)
4253 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4256 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4257 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4264 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4265 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4270 if (!TCR_4(__kmp_init_monitor)) {
4271 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4272 if (!TCR_4(__kmp_init_monitor)) {
4273 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4274 TCW_4(__kmp_init_monitor, 1);
4275 __kmp_create_monitor(&__kmp_monitor);
4276 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4287 while (TCR_4(__kmp_init_monitor) < 2) {
4290 KF_TRACE(10, (
"after monitor thread has started\n"));
4293 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4298 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4299 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4303 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4305 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4307 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4310 __itt_suppress_mark_range(
4311 __itt_suppress_range, __itt_suppress_threading_errors,
4312 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4313 __itt_suppress_mark_range(
4314 __itt_suppress_range, __itt_suppress_threading_errors,
4315 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4317 __itt_suppress_mark_range(
4318 __itt_suppress_range, __itt_suppress_threading_errors,
4319 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4321 __itt_suppress_mark_range(__itt_suppress_range,
4322 __itt_suppress_threading_errors,
4323 &new_thr->th.th_suspend_init_count,
4324 sizeof(new_thr->th.th_suspend_init_count));
4327 __itt_suppress_mark_range(__itt_suppress_range,
4328 __itt_suppress_threading_errors,
4329 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4330 sizeof(new_thr->th.th_bar[0].bb.b_go));
4331 __itt_suppress_mark_range(__itt_suppress_range,
4332 __itt_suppress_threading_errors,
4333 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4334 sizeof(new_thr->th.th_bar[1].bb.b_go));
4335 __itt_suppress_mark_range(__itt_suppress_range,
4336 __itt_suppress_threading_errors,
4337 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4338 sizeof(new_thr->th.th_bar[2].bb.b_go));
4340 if (__kmp_storage_map) {
4341 __kmp_print_thread_storage_map(new_thr, new_gtid);
4346 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4347 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4348 new_thr->th.th_serial_team = serial_team =
4349 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4353 proc_bind_default, &r_icvs,
4354 0 USE_NESTED_HOT_ARG(NULL));
4356 KMP_ASSERT(serial_team);
4357 serial_team->t.t_serialized = 0;
4359 serial_team->t.t_threads[0] = new_thr;
4361 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4365 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4368 __kmp_initialize_fast_memory(new_thr);
4372 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4373 __kmp_initialize_bget(new_thr);
4376 __kmp_init_random(new_thr);
4380 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4381 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4384 kmp_balign_t *balign = new_thr->th.th_bar;
4385 for (b = 0; b < bs_last_barrier; ++b) {
4386 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4387 balign[b].bb.team = NULL;
4388 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4389 balign[b].bb.use_oncore_barrier = 0;
4392 new_thr->th.th_spin_here = FALSE;
4393 new_thr->th.th_next_waiting = 0;
4395 new_thr->th.th_blocking =
false;
4398 #if KMP_AFFINITY_SUPPORTED
4399 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4400 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4401 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4402 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4404 new_thr->th.th_def_allocator = __kmp_def_allocator;
4405 new_thr->th.th_prev_level = 0;
4406 new_thr->th.th_prev_num_threads = 1;
4408 TCW_4(new_thr->th.th_in_pool, FALSE);
4409 new_thr->th.th_active_in_pool = FALSE;
4410 TCW_4(new_thr->th.th_active, TRUE);
4418 if (__kmp_adjust_gtid_mode) {
4419 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4420 if (TCR_4(__kmp_gtid_mode) != 2) {
4421 TCW_4(__kmp_gtid_mode, 2);
4424 if (TCR_4(__kmp_gtid_mode) != 1) {
4425 TCW_4(__kmp_gtid_mode, 1);
4430 #ifdef KMP_ADJUST_BLOCKTIME
4433 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4434 if (__kmp_nth > __kmp_avail_proc) {
4435 __kmp_zero_bt = TRUE;
4442 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4443 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4445 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4447 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4458 static void __kmp_reinitialize_team(kmp_team_t *team,
4459 kmp_internal_control_t *new_icvs,
4461 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4462 team->t.t_threads[0], team));
4463 KMP_DEBUG_ASSERT(team && new_icvs);
4464 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4465 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4467 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4469 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4470 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4472 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4473 team->t.t_threads[0], team));
4479 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4480 kmp_internal_control_t *new_icvs,
4482 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4485 KMP_DEBUG_ASSERT(team);
4486 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4487 KMP_DEBUG_ASSERT(team->t.t_threads);
4490 team->t.t_master_tid = 0;
4492 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4493 team->t.t_nproc = new_nproc;
4496 team->t.t_next_pool = NULL;
4500 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4501 team->t.t_invoke = NULL;
4504 team->t.t_sched.sched = new_icvs->sched.sched;
4506 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4507 team->t.t_fp_control_saved = FALSE;
4508 team->t.t_x87_fpu_control_word = 0;
4509 team->t.t_mxcsr = 0;
4512 team->t.t_construct = 0;
4514 team->t.t_ordered.dt.t_value = 0;
4515 team->t.t_master_active = FALSE;
4518 team->t.t_copypriv_data = NULL;
4521 team->t.t_copyin_counter = 0;
4524 team->t.t_control_stack_top = NULL;
4526 __kmp_reinitialize_team(team, new_icvs, loc);
4529 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4532 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4535 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4536 if (KMP_AFFINITY_CAPABLE()) {
4538 if (old_mask != NULL) {
4539 status = __kmp_get_system_affinity(old_mask, TRUE);
4542 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4546 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4551 #if KMP_AFFINITY_SUPPORTED
4557 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4559 kmp_info_t *master_th = team->t.t_threads[0];
4560 KMP_DEBUG_ASSERT(master_th != NULL);
4561 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4562 int first_place = master_th->th.th_first_place;
4563 int last_place = master_th->th.th_last_place;
4564 int masters_place = master_th->th.th_current_place;
4565 team->t.t_first_place = first_place;
4566 team->t.t_last_place = last_place;
4568 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4569 "bound to place %d partition = [%d,%d]\n",
4570 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4571 team->t.t_id, masters_place, first_place, last_place));
4573 switch (proc_bind) {
4575 case proc_bind_default:
4578 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4581 case proc_bind_master: {
4583 int n_th = team->t.t_nproc;
4584 for (f = 1; f < n_th; f++) {
4585 kmp_info_t *th = team->t.t_threads[f];
4586 KMP_DEBUG_ASSERT(th != NULL);
4587 th->th.th_first_place = first_place;
4588 th->th.th_last_place = last_place;
4589 th->th.th_new_place = masters_place;
4590 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4591 team->t.t_display_affinity != 1) {
4592 team->t.t_display_affinity = 1;
4595 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d "
4596 "partition = [%d,%d]\n",
4597 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4598 f, masters_place, first_place, last_place));
4602 case proc_bind_close: {
4604 int n_th = team->t.t_nproc;
4606 if (first_place <= last_place) {
4607 n_places = last_place - first_place + 1;
4609 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4611 if (n_th <= n_places) {
4612 int place = masters_place;
4613 for (f = 1; f < n_th; f++) {
4614 kmp_info_t *th = team->t.t_threads[f];
4615 KMP_DEBUG_ASSERT(th != NULL);
4617 if (place == last_place) {
4618 place = first_place;
4619 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4624 th->th.th_first_place = first_place;
4625 th->th.th_last_place = last_place;
4626 th->th.th_new_place = place;
4627 if (__kmp_display_affinity && place != th->th.th_current_place &&
4628 team->t.t_display_affinity != 1) {
4629 team->t.t_display_affinity = 1;
4632 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4633 "partition = [%d,%d]\n",
4634 __kmp_gtid_from_thread(team->t.t_threads[f]),
4635 team->t.t_id, f, place, first_place, last_place));
4638 int S, rem, gap, s_count;
4639 S = n_th / n_places;
4641 rem = n_th - (S * n_places);
4642 gap = rem > 0 ? n_places / rem : n_places;
4643 int place = masters_place;
4645 for (f = 0; f < n_th; f++) {
4646 kmp_info_t *th = team->t.t_threads[f];
4647 KMP_DEBUG_ASSERT(th != NULL);
4649 th->th.th_first_place = first_place;
4650 th->th.th_last_place = last_place;
4651 th->th.th_new_place = place;
4652 if (__kmp_display_affinity && place != th->th.th_current_place &&
4653 team->t.t_display_affinity != 1) {
4654 team->t.t_display_affinity = 1;
4658 if ((s_count == S) && rem && (gap_ct == gap)) {
4660 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4662 if (place == last_place) {
4663 place = first_place;
4664 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4672 }
else if (s_count == S) {
4673 if (place == last_place) {
4674 place = first_place;
4675 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4685 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4686 "partition = [%d,%d]\n",
4687 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4688 th->th.th_new_place, first_place, last_place));
4690 KMP_DEBUG_ASSERT(place == masters_place);
4694 case proc_bind_spread: {
4696 int n_th = team->t.t_nproc;
4699 if (first_place <= last_place) {
4700 n_places = last_place - first_place + 1;
4702 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4704 if (n_th <= n_places) {
4707 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4708 int S = n_places / n_th;
4709 int s_count, rem, gap, gap_ct;
4711 place = masters_place;
4712 rem = n_places - n_th * S;
4713 gap = rem ? n_th / rem : 1;
4716 if (update_master_only == 1)
4718 for (f = 0; f < thidx; f++) {
4719 kmp_info_t *th = team->t.t_threads[f];
4720 KMP_DEBUG_ASSERT(th != NULL);
4722 th->th.th_first_place = place;
4723 th->th.th_new_place = place;
4724 if (__kmp_display_affinity && place != th->th.th_current_place &&
4725 team->t.t_display_affinity != 1) {
4726 team->t.t_display_affinity = 1;
4729 while (s_count < S) {
4730 if (place == last_place) {
4731 place = first_place;
4732 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4739 if (rem && (gap_ct == gap)) {
4740 if (place == last_place) {
4741 place = first_place;
4742 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4750 th->th.th_last_place = place;
4753 if (place == last_place) {
4754 place = first_place;
4755 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4762 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4763 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4764 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4765 f, th->th.th_new_place, th->th.th_first_place,
4766 th->th.th_last_place, __kmp_affinity_num_masks));
4772 double current =
static_cast<double>(masters_place);
4774 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4779 if (update_master_only == 1)
4781 for (f = 0; f < thidx; f++) {
4782 first =
static_cast<int>(current);
4783 last =
static_cast<int>(current + spacing) - 1;
4784 KMP_DEBUG_ASSERT(last >= first);
4785 if (first >= n_places) {
4786 if (masters_place) {
4789 if (first == (masters_place + 1)) {
4790 KMP_DEBUG_ASSERT(f == n_th);
4793 if (last == masters_place) {
4794 KMP_DEBUG_ASSERT(f == (n_th - 1));
4798 KMP_DEBUG_ASSERT(f == n_th);
4803 if (last >= n_places) {
4804 last = (n_places - 1);
4809 KMP_DEBUG_ASSERT(0 <= first);
4810 KMP_DEBUG_ASSERT(n_places > first);
4811 KMP_DEBUG_ASSERT(0 <= last);
4812 KMP_DEBUG_ASSERT(n_places > last);
4813 KMP_DEBUG_ASSERT(last_place >= first_place);
4814 th = team->t.t_threads[f];
4815 KMP_DEBUG_ASSERT(th);
4816 th->th.th_first_place = first;
4817 th->th.th_new_place = place;
4818 th->th.th_last_place = last;
4819 if (__kmp_display_affinity && place != th->th.th_current_place &&
4820 team->t.t_display_affinity != 1) {
4821 team->t.t_display_affinity = 1;
4824 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4825 "partition = [%d,%d], spacing = %.4f\n",
4826 __kmp_gtid_from_thread(team->t.t_threads[f]),
4827 team->t.t_id, f, th->th.th_new_place,
4828 th->th.th_first_place, th->th.th_last_place, spacing));
4832 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4834 int S, rem, gap, s_count;
4835 S = n_th / n_places;
4837 rem = n_th - (S * n_places);
4838 gap = rem > 0 ? n_places / rem : n_places;
4839 int place = masters_place;
4842 if (update_master_only == 1)
4844 for (f = 0; f < thidx; f++) {
4845 kmp_info_t *th = team->t.t_threads[f];
4846 KMP_DEBUG_ASSERT(th != NULL);
4848 th->th.th_first_place = place;
4849 th->th.th_last_place = place;
4850 th->th.th_new_place = place;
4851 if (__kmp_display_affinity && place != th->th.th_current_place &&
4852 team->t.t_display_affinity != 1) {
4853 team->t.t_display_affinity = 1;
4857 if ((s_count == S) && rem && (gap_ct == gap)) {
4859 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4861 if (place == last_place) {
4862 place = first_place;
4863 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4871 }
else if (s_count == S) {
4872 if (place == last_place) {
4873 place = first_place;
4874 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4883 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4884 "partition = [%d,%d]\n",
4885 __kmp_gtid_from_thread(team->t.t_threads[f]),
4886 team->t.t_id, f, th->th.th_new_place,
4887 th->th.th_first_place, th->th.th_last_place));
4889 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4897 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4900 #endif // KMP_AFFINITY_SUPPORTED
4905 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4907 ompt_data_t ompt_parallel_data,
4909 kmp_proc_bind_t new_proc_bind,
4910 kmp_internal_control_t *new_icvs,
4911 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4912 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4915 int use_hot_team = !root->r.r_active;
4918 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4919 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4920 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4923 #if KMP_NESTED_HOT_TEAMS
4924 kmp_hot_team_ptr_t *hot_teams;
4926 team = master->th.th_team;
4927 level = team->t.t_active_level;
4928 if (master->th.th_teams_microtask) {
4929 if (master->th.th_teams_size.nteams > 1 &&
4932 (microtask_t)__kmp_teams_master ||
4933 master->th.th_teams_level <
4939 hot_teams = master->th.th_hot_teams;
4940 if (level < __kmp_hot_teams_max_level && hot_teams &&
4941 hot_teams[level].hot_team) {
4949 KMP_DEBUG_ASSERT(new_nproc == 1);
4953 if (use_hot_team && new_nproc > 1) {
4954 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
4955 #if KMP_NESTED_HOT_TEAMS
4956 team = hot_teams[level].hot_team;
4958 team = root->r.r_hot_team;
4961 if (__kmp_tasking_mode != tskm_immediate_exec) {
4962 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
4963 "task_team[1] = %p before reinit\n",
4964 team->t.t_task_team[0], team->t.t_task_team[1]));
4971 if (team->t.t_nproc == new_nproc) {
4972 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4975 if (team->t.t_size_changed == -1) {
4976 team->t.t_size_changed = 1;
4978 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4982 kmp_r_sched_t new_sched = new_icvs->sched;
4984 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4986 __kmp_reinitialize_team(team, new_icvs,
4987 root->r.r_uber_thread->th.th_ident);
4989 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4990 team->t.t_threads[0], team));
4991 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4993 #if KMP_AFFINITY_SUPPORTED
4994 if ((team->t.t_size_changed == 0) &&
4995 (team->t.t_proc_bind == new_proc_bind)) {
4996 if (new_proc_bind == proc_bind_spread) {
4997 __kmp_partition_places(
5000 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5001 "proc_bind = %d, partition = [%d,%d]\n",
5002 team->t.t_id, new_proc_bind, team->t.t_first_place,
5003 team->t.t_last_place));
5005 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5006 __kmp_partition_places(team);
5009 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5011 }
else if (team->t.t_nproc > new_nproc) {
5013 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5016 team->t.t_size_changed = 1;
5017 #if KMP_NESTED_HOT_TEAMS
5018 if (__kmp_hot_teams_mode == 0) {
5021 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5022 hot_teams[level].hot_team_nth = new_nproc;
5023 #endif // KMP_NESTED_HOT_TEAMS
5025 for (f = new_nproc; f < team->t.t_nproc; f++) {
5026 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5027 if (__kmp_tasking_mode != tskm_immediate_exec) {
5030 team->t.t_threads[f]->th.th_task_team = NULL;
5032 __kmp_free_thread(team->t.t_threads[f]);
5033 team->t.t_threads[f] = NULL;
5035 #if KMP_NESTED_HOT_TEAMS
5040 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5041 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5042 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5043 for (
int b = 0; b < bs_last_barrier; ++b) {
5044 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5045 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5047 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5051 #endif // KMP_NESTED_HOT_TEAMS
5052 team->t.t_nproc = new_nproc;
5054 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5055 __kmp_reinitialize_team(team, new_icvs,
5056 root->r.r_uber_thread->th.th_ident);
5059 for (f = 0; f < new_nproc; ++f) {
5060 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5065 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5066 team->t.t_threads[0], team));
5068 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5071 for (f = 0; f < team->t.t_nproc; f++) {
5072 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5073 team->t.t_threads[f]->th.th_team_nproc ==
5078 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5079 #if KMP_AFFINITY_SUPPORTED
5080 __kmp_partition_places(team);
5083 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5084 kmp_affin_mask_t *old_mask;
5085 if (KMP_AFFINITY_CAPABLE()) {
5086 KMP_CPU_ALLOC(old_mask);
5091 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5094 team->t.t_size_changed = 1;
5096 #if KMP_NESTED_HOT_TEAMS
5097 int avail_threads = hot_teams[level].hot_team_nth;
5098 if (new_nproc < avail_threads)
5099 avail_threads = new_nproc;
5100 kmp_info_t **other_threads = team->t.t_threads;
5101 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5105 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5106 for (b = 0; b < bs_last_barrier; ++b) {
5107 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5108 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5110 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5114 if (hot_teams[level].hot_team_nth >= new_nproc) {
5117 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5118 team->t.t_nproc = new_nproc;
5124 hot_teams[level].hot_team_nth = new_nproc;
5125 #endif // KMP_NESTED_HOT_TEAMS
5126 if (team->t.t_max_nproc < new_nproc) {
5128 __kmp_reallocate_team_arrays(team, new_nproc);
5129 __kmp_reinitialize_team(team, new_icvs, NULL);
5132 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5137 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5141 for (f = team->t.t_nproc; f < new_nproc; f++) {
5142 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5143 KMP_DEBUG_ASSERT(new_worker);
5144 team->t.t_threads[f] = new_worker;
5147 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5148 "join=%llu, plain=%llu\n",
5149 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5150 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5151 team->t.t_bar[bs_plain_barrier].b_arrived));
5155 kmp_balign_t *balign = new_worker->th.th_bar;
5156 for (b = 0; b < bs_last_barrier; ++b) {
5157 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5158 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5159 KMP_BARRIER_PARENT_FLAG);
5161 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5167 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5168 if (KMP_AFFINITY_CAPABLE()) {
5170 __kmp_set_system_affinity(old_mask, TRUE);
5171 KMP_CPU_FREE(old_mask);
5174 #if KMP_NESTED_HOT_TEAMS
5176 #endif // KMP_NESTED_HOT_TEAMS
5178 int old_nproc = team->t.t_nproc;
5180 __kmp_initialize_team(team, new_nproc, new_icvs,
5181 root->r.r_uber_thread->th.th_ident);
5184 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5185 for (f = 0; f < team->t.t_nproc; ++f)
5186 __kmp_initialize_info(team->t.t_threads[f], team, f,
5187 __kmp_gtid_from_tid(f, team));
5195 for (f = old_nproc; f < team->t.t_nproc; ++f)
5196 team->t.t_threads[f]->th.th_task_state =
5197 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5200 team->t.t_threads[0]->th.th_task_state;
5201 for (f = old_nproc; f < team->t.t_nproc; ++f)
5202 team->t.t_threads[f]->th.th_task_state = old_state;
5206 for (f = 0; f < team->t.t_nproc; ++f) {
5207 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5208 team->t.t_threads[f]->th.th_team_nproc ==
5213 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5214 #if KMP_AFFINITY_SUPPORTED
5215 __kmp_partition_places(team);
5219 kmp_info_t *master = team->t.t_threads[0];
5220 if (master->th.th_teams_microtask) {
5221 for (f = 1; f < new_nproc; ++f) {
5223 kmp_info_t *thr = team->t.t_threads[f];
5224 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5225 thr->th.th_teams_level = master->th.th_teams_level;
5226 thr->th.th_teams_size = master->th.th_teams_size;
5229 #if KMP_NESTED_HOT_TEAMS
5233 for (f = 1; f < new_nproc; ++f) {
5234 kmp_info_t *thr = team->t.t_threads[f];
5236 kmp_balign_t *balign = thr->th.th_bar;
5237 for (b = 0; b < bs_last_barrier; ++b) {
5238 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5239 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5241 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5246 #endif // KMP_NESTED_HOT_TEAMS
5249 __kmp_alloc_argv_entries(argc, team, TRUE);
5250 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5254 KF_TRACE(10, (
" hot_team = %p\n", team));
5257 if (__kmp_tasking_mode != tskm_immediate_exec) {
5258 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5259 "task_team[1] = %p after reinit\n",
5260 team->t.t_task_team[0], team->t.t_task_team[1]));
5265 __ompt_team_assign_id(team, ompt_parallel_data);
5275 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5278 if (team->t.t_max_nproc >= max_nproc) {
5280 __kmp_team_pool = team->t.t_next_pool;
5283 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5285 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5286 "task_team[1] %p to NULL\n",
5287 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5288 team->t.t_task_team[0] = NULL;
5289 team->t.t_task_team[1] = NULL;
5292 __kmp_alloc_argv_entries(argc, team, TRUE);
5293 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5296 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5297 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5300 for (b = 0; b < bs_last_barrier; ++b) {
5301 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5303 team->t.t_bar[b].b_master_arrived = 0;
5304 team->t.t_bar[b].b_team_arrived = 0;
5309 team->t.t_proc_bind = new_proc_bind;
5311 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5315 __ompt_team_assign_id(team, ompt_parallel_data);
5327 team = __kmp_reap_team(team);
5328 __kmp_team_pool = team;
5333 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5336 team->t.t_max_nproc = max_nproc;
5339 __kmp_allocate_team_arrays(team, max_nproc);
5341 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5342 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5344 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5346 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5347 team->t.t_task_team[0] = NULL;
5349 team->t.t_task_team[1] = NULL;
5352 if (__kmp_storage_map) {
5353 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5357 __kmp_alloc_argv_entries(argc, team, FALSE);
5358 team->t.t_argc = argc;
5361 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5362 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5365 for (b = 0; b < bs_last_barrier; ++b) {
5366 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5368 team->t.t_bar[b].b_master_arrived = 0;
5369 team->t.t_bar[b].b_team_arrived = 0;
5374 team->t.t_proc_bind = new_proc_bind;
5377 __ompt_team_assign_id(team, ompt_parallel_data);
5378 team->t.ompt_serialized_team_info = NULL;
5383 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5394 void __kmp_free_team(kmp_root_t *root,
5395 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5397 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5401 KMP_DEBUG_ASSERT(root);
5402 KMP_DEBUG_ASSERT(team);
5403 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5404 KMP_DEBUG_ASSERT(team->t.t_threads);
5406 int use_hot_team = team == root->r.r_hot_team;
5407 #if KMP_NESTED_HOT_TEAMS
5409 kmp_hot_team_ptr_t *hot_teams;
5411 level = team->t.t_active_level - 1;
5412 if (master->th.th_teams_microtask) {
5413 if (master->th.th_teams_size.nteams > 1) {
5417 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5418 master->th.th_teams_level == team->t.t_level) {
5423 hot_teams = master->th.th_hot_teams;
5424 if (level < __kmp_hot_teams_max_level) {
5425 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5429 #endif // KMP_NESTED_HOT_TEAMS
5432 TCW_SYNC_PTR(team->t.t_pkfn,
5435 team->t.t_copyin_counter = 0;
5440 if (!use_hot_team) {
5441 if (__kmp_tasking_mode != tskm_immediate_exec) {
5443 for (f = 1; f < team->t.t_nproc; ++f) {
5444 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5445 kmp_info_t *th = team->t.t_threads[f];
5446 volatile kmp_uint32 *state = &th->th.th_reap_state;
5447 while (*state != KMP_SAFE_TO_REAP) {
5451 if (!__kmp_is_thread_alive(th, &ecode)) {
5452 *state = KMP_SAFE_TO_REAP;
5457 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5458 if (fl.is_sleeping())
5459 fl.resume(__kmp_gtid_from_thread(th));
5466 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5467 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5468 if (task_team != NULL) {
5469 for (f = 0; f < team->t.t_nproc; ++f) {
5470 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5471 team->t.t_threads[f]->th.th_task_team = NULL;
5475 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5476 __kmp_get_gtid(), task_team, team->t.t_id));
5477 #if KMP_NESTED_HOT_TEAMS
5478 __kmp_free_task_team(master, task_team);
5480 team->t.t_task_team[tt_idx] = NULL;
5486 team->t.t_parent = NULL;
5487 team->t.t_level = 0;
5488 team->t.t_active_level = 0;
5491 for (f = 1; f < team->t.t_nproc; ++f) {
5492 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5493 __kmp_free_thread(team->t.t_threads[f]);
5494 team->t.t_threads[f] = NULL;
5499 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5500 __kmp_team_pool = (
volatile kmp_team_t *)team;
5503 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5504 team->t.t_threads[1]->th.th_cg_roots);
5505 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5507 for (f = 1; f < team->t.t_nproc; ++f) {
5508 kmp_info_t *thr = team->t.t_threads[f];
5509 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5510 thr->th.th_cg_roots->cg_root == thr);
5512 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5513 thr->th.th_cg_roots = tmp->up;
5514 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5515 " up to node %p. cg_nthreads was %d\n",
5516 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5517 int i = tmp->cg_nthreads--;
5522 if (thr->th.th_cg_roots)
5523 thr->th.th_current_task->td_icvs.thread_limit =
5524 thr->th.th_cg_roots->cg_thread_limit;
5533 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5534 kmp_team_t *next_pool = team->t.t_next_pool;
5536 KMP_DEBUG_ASSERT(team);
5537 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5538 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5539 KMP_DEBUG_ASSERT(team->t.t_threads);
5540 KMP_DEBUG_ASSERT(team->t.t_argv);
5545 __kmp_free_team_arrays(team);
5546 if (team->t.t_argv != &team->t.t_inline_argv[0])
5547 __kmp_free((
void *)team->t.t_argv);
5579 void __kmp_free_thread(kmp_info_t *this_th) {
5583 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5584 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5586 KMP_DEBUG_ASSERT(this_th);
5591 kmp_balign_t *balign = this_th->th.th_bar;
5592 for (b = 0; b < bs_last_barrier; ++b) {
5593 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5594 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5595 balign[b].bb.team = NULL;
5596 balign[b].bb.leaf_kids = 0;
5598 this_th->th.th_task_state = 0;
5599 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5602 TCW_PTR(this_th->th.th_team, NULL);
5603 TCW_PTR(this_th->th.th_root, NULL);
5604 TCW_PTR(this_th->th.th_dispatch, NULL);
5606 while (this_th->th.th_cg_roots) {
5607 this_th->th.th_cg_roots->cg_nthreads--;
5608 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5609 " %p of thread %p to %d\n",
5610 this_th, this_th->th.th_cg_roots,
5611 this_th->th.th_cg_roots->cg_root,
5612 this_th->th.th_cg_roots->cg_nthreads));
5613 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5614 if (tmp->cg_root == this_th) {
5615 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5617 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5618 this_th->th.th_cg_roots = tmp->up;
5621 if (tmp->cg_nthreads == 0) {
5624 this_th->th.th_cg_roots = NULL;
5634 __kmp_free_implicit_task(this_th);
5635 this_th->th.th_current_task = NULL;
5639 gtid = this_th->th.th_info.ds.ds_gtid;
5640 if (__kmp_thread_pool_insert_pt != NULL) {
5641 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5642 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5643 __kmp_thread_pool_insert_pt = NULL;
5652 if (__kmp_thread_pool_insert_pt != NULL) {
5653 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5655 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5657 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5658 scan = &((*scan)->th.th_next_pool))
5663 TCW_PTR(this_th->th.th_next_pool, *scan);
5664 __kmp_thread_pool_insert_pt = *scan = this_th;
5665 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5666 (this_th->th.th_info.ds.ds_gtid <
5667 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5668 TCW_4(this_th->th.th_in_pool, TRUE);
5669 __kmp_suspend_initialize_thread(this_th);
5670 __kmp_lock_suspend_mx(this_th);
5671 if (this_th->th.th_active == TRUE) {
5672 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5673 this_th->th.th_active_in_pool = TRUE;
5677 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5680 __kmp_unlock_suspend_mx(this_th);
5682 TCW_4(__kmp_nth, __kmp_nth - 1);
5684 #ifdef KMP_ADJUST_BLOCKTIME
5687 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5688 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5689 if (__kmp_nth <= __kmp_avail_proc) {
5690 __kmp_zero_bt = FALSE;
5700 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5701 int gtid = this_thr->th.th_info.ds.ds_gtid;
5703 kmp_team_t **
volatile pteam;
5706 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5708 if (__kmp_env_consistency_check) {
5709 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5713 ompt_data_t *thread_data;
5714 if (ompt_enabled.enabled) {
5715 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5716 *thread_data = ompt_data_none;
5718 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5719 this_thr->th.ompt_thread_info.wait_id = 0;
5720 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5721 this_thr->th.ompt_thread_info.parallel_flags = 0;
5722 if (ompt_enabled.ompt_callback_thread_begin) {
5723 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5724 ompt_thread_worker, thread_data);
5726 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5731 while (!TCR_4(__kmp_global.g.g_done)) {
5732 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5736 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5739 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5742 if (ompt_enabled.enabled) {
5743 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5747 pteam = &this_thr->th.th_team;
5750 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5752 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5755 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5756 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5757 (*pteam)->t.t_pkfn));
5759 updateHWFPControl(*pteam);
5762 if (ompt_enabled.enabled) {
5763 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5767 rc = (*pteam)->t.t_invoke(gtid);
5771 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5772 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5773 (*pteam)->t.t_pkfn));
5776 if (ompt_enabled.enabled) {
5778 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5780 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5784 __kmp_join_barrier(gtid);
5787 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5790 if (ompt_enabled.ompt_callback_thread_end) {
5791 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5795 this_thr->th.th_task_team = NULL;
5797 __kmp_common_destroy_gtid(gtid);
5799 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5806 void __kmp_internal_end_dest(
void *specific_gtid) {
5807 #if KMP_COMPILER_ICC
5808 #pragma warning(push)
5809 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5813 int gtid = (kmp_intptr_t)specific_gtid - 1;
5814 #if KMP_COMPILER_ICC
5815 #pragma warning(pop)
5818 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5831 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5832 __kmp_gtid_set_specific(gtid);
5833 #ifdef KMP_TDATA_GTID
5836 __kmp_internal_end_thread(gtid);
5839 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5841 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5842 __kmp_internal_end_atexit();
5849 void __kmp_internal_end_atexit(
void) {
5850 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5874 __kmp_internal_end_library(-1);
5876 __kmp_close_console();
5880 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5885 KMP_DEBUG_ASSERT(thread != NULL);
5887 gtid = thread->th.th_info.ds.ds_gtid;
5890 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5893 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5897 ANNOTATE_HAPPENS_BEFORE(thread);
5898 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5899 __kmp_release_64(&flag);
5903 __kmp_reap_worker(thread);
5915 if (thread->th.th_active_in_pool) {
5916 thread->th.th_active_in_pool = FALSE;
5917 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5918 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5922 __kmp_free_implicit_task(thread);
5926 __kmp_free_fast_memory(thread);
5929 __kmp_suspend_uninitialize_thread(thread);
5931 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5932 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5937 #ifdef KMP_ADJUST_BLOCKTIME
5940 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5941 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5942 if (__kmp_nth <= __kmp_avail_proc) {
5943 __kmp_zero_bt = FALSE;
5949 if (__kmp_env_consistency_check) {
5950 if (thread->th.th_cons) {
5951 __kmp_free_cons_stack(thread->th.th_cons);
5952 thread->th.th_cons = NULL;
5956 if (thread->th.th_pri_common != NULL) {
5957 __kmp_free(thread->th.th_pri_common);
5958 thread->th.th_pri_common = NULL;
5961 if (thread->th.th_task_state_memo_stack != NULL) {
5962 __kmp_free(thread->th.th_task_state_memo_stack);
5963 thread->th.th_task_state_memo_stack = NULL;
5967 if (thread->th.th_local.bget_data != NULL) {
5968 __kmp_finalize_bget(thread);
5972 #if KMP_AFFINITY_SUPPORTED
5973 if (thread->th.th_affin_mask != NULL) {
5974 KMP_CPU_FREE(thread->th.th_affin_mask);
5975 thread->th.th_affin_mask = NULL;
5979 #if KMP_USE_HIER_SCHED
5980 if (thread->th.th_hier_bar_data != NULL) {
5981 __kmp_free(thread->th.th_hier_bar_data);
5982 thread->th.th_hier_bar_data = NULL;
5986 __kmp_reap_team(thread->th.th_serial_team);
5987 thread->th.th_serial_team = NULL;
5994 static void __kmp_internal_end(
void) {
5998 __kmp_unregister_library();
6005 __kmp_reclaim_dead_roots();
6009 for (i = 0; i < __kmp_threads_capacity; i++)
6011 if (__kmp_root[i]->r.r_active)
6014 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6016 if (i < __kmp_threads_capacity) {
6028 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6029 if (TCR_4(__kmp_init_monitor)) {
6030 __kmp_reap_monitor(&__kmp_monitor);
6031 TCW_4(__kmp_init_monitor, 0);
6033 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6034 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6035 #endif // KMP_USE_MONITOR
6040 for (i = 0; i < __kmp_threads_capacity; i++) {
6041 if (__kmp_root[i]) {
6044 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6053 while (__kmp_thread_pool != NULL) {
6055 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6056 __kmp_thread_pool = thread->th.th_next_pool;
6058 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6059 thread->th.th_next_pool = NULL;
6060 thread->th.th_in_pool = FALSE;
6061 __kmp_reap_thread(thread, 0);
6063 __kmp_thread_pool_insert_pt = NULL;
6066 while (__kmp_team_pool != NULL) {
6068 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6069 __kmp_team_pool = team->t.t_next_pool;
6071 team->t.t_next_pool = NULL;
6072 __kmp_reap_team(team);
6075 __kmp_reap_task_teams();
6082 for (i = 0; i < __kmp_threads_capacity; i++) {
6083 kmp_info_t *thr = __kmp_threads[i];
6084 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6089 for (i = 0; i < __kmp_threads_capacity; ++i) {
6096 TCW_SYNC_4(__kmp_init_common, FALSE);
6098 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6106 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6107 if (TCR_4(__kmp_init_monitor)) {
6108 __kmp_reap_monitor(&__kmp_monitor);
6109 TCW_4(__kmp_init_monitor, 0);
6111 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6112 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6115 TCW_4(__kmp_init_gtid, FALSE);
6124 void __kmp_internal_end_library(
int gtid_req) {
6131 if (__kmp_global.g.g_abort) {
6132 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6136 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6137 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6145 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6147 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6148 if (gtid == KMP_GTID_SHUTDOWN) {
6149 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6150 "already shutdown\n"));
6152 }
else if (gtid == KMP_GTID_MONITOR) {
6153 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6154 "registered, or system shutdown\n"));
6156 }
else if (gtid == KMP_GTID_DNE) {
6157 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6160 }
else if (KMP_UBER_GTID(gtid)) {
6162 if (__kmp_root[gtid]->r.r_active) {
6163 __kmp_global.g.g_abort = -1;
6164 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6166 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6172 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6173 __kmp_unregister_root_current_thread(gtid);
6180 #ifdef DUMP_DEBUG_ON_EXIT
6181 if (__kmp_debug_buf)
6182 __kmp_dump_debug_buffer();
6188 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6191 if (__kmp_global.g.g_abort) {
6192 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6194 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6197 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6198 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6207 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6210 __kmp_internal_end();
6212 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6213 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6215 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6217 #ifdef DUMP_DEBUG_ON_EXIT
6218 if (__kmp_debug_buf)
6219 __kmp_dump_debug_buffer();
6223 __kmp_close_console();
6226 __kmp_fini_allocator();
6230 void __kmp_internal_end_thread(
int gtid_req) {
6239 if (__kmp_global.g.g_abort) {
6240 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6244 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6245 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6253 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6255 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6256 if (gtid == KMP_GTID_SHUTDOWN) {
6257 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6258 "already shutdown\n"));
6260 }
else if (gtid == KMP_GTID_MONITOR) {
6261 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6262 "registered, or system shutdown\n"));
6264 }
else if (gtid == KMP_GTID_DNE) {
6265 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6269 }
else if (KMP_UBER_GTID(gtid)) {
6271 if (__kmp_root[gtid]->r.r_active) {
6272 __kmp_global.g.g_abort = -1;
6273 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6275 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6279 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6281 __kmp_unregister_root_current_thread(gtid);
6285 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6288 __kmp_threads[gtid]->th.th_task_team = NULL;
6292 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6298 if (__kmp_pause_status != kmp_hard_paused)
6302 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6307 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6310 if (__kmp_global.g.g_abort) {
6311 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6313 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6316 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6317 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6328 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6330 for (i = 0; i < __kmp_threads_capacity; ++i) {
6331 if (KMP_UBER_GTID(i)) {
6334 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6335 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6336 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6343 __kmp_internal_end();
6345 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6346 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6348 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6350 #ifdef DUMP_DEBUG_ON_EXIT
6351 if (__kmp_debug_buf)
6352 __kmp_dump_debug_buffer();
6359 static long __kmp_registration_flag = 0;
6361 static char *__kmp_registration_str = NULL;
6364 static inline char *__kmp_reg_status_name() {
6369 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6372 void __kmp_register_library_startup(
void) {
6374 char *name = __kmp_reg_status_name();
6380 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6381 __kmp_initialize_system_tick();
6383 __kmp_read_system_time(&time.dtime);
6384 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6385 __kmp_registration_str =
6386 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6387 __kmp_registration_flag, KMP_LIBRARY_FILE);
6389 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6390 __kmp_registration_str));
6397 __kmp_env_set(name, __kmp_registration_str, 0);
6399 value = __kmp_env_get(name);
6400 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6410 char *flag_addr_str = NULL;
6411 char *flag_val_str = NULL;
6412 char const *file_name = NULL;
6413 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6414 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6417 long *flag_addr = 0;
6419 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6420 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6421 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6425 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6439 file_name =
"unknown library";
6444 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6445 if (!__kmp_str_match_true(duplicate_ok)) {
6447 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6448 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6450 KMP_INTERNAL_FREE(duplicate_ok);
6451 __kmp_duplicate_library_ok = 1;
6456 __kmp_env_unset(name);
6458 default: { KMP_DEBUG_ASSERT(0); }
break;
6461 KMP_INTERNAL_FREE((
void *)value);
6463 KMP_INTERNAL_FREE((
void *)name);
6467 void __kmp_unregister_library(
void) {
6469 char *name = __kmp_reg_status_name();
6470 char *value = __kmp_env_get(name);
6472 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6473 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6474 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6476 __kmp_env_unset(name);
6479 KMP_INTERNAL_FREE(__kmp_registration_str);
6480 KMP_INTERNAL_FREE(value);
6481 KMP_INTERNAL_FREE(name);
6483 __kmp_registration_flag = 0;
6484 __kmp_registration_str = NULL;
6491 #if KMP_MIC_SUPPORTED
6493 static void __kmp_check_mic_type() {
6494 kmp_cpuid_t cpuid_state = {0};
6495 kmp_cpuid_t *cs_p = &cpuid_state;
6496 __kmp_x86_cpuid(1, 0, cs_p);
6498 if ((cs_p->eax & 0xff0) == 0xB10) {
6499 __kmp_mic_type = mic2;
6500 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6501 __kmp_mic_type = mic3;
6503 __kmp_mic_type = non_mic;
6509 static void __kmp_do_serial_initialize(
void) {
6513 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6515 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6516 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6517 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6518 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6519 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6525 __kmp_validate_locks();
6528 __kmp_init_allocator();
6533 __kmp_register_library_startup();
6536 if (TCR_4(__kmp_global.g.g_done)) {
6537 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6540 __kmp_global.g.g_abort = 0;
6541 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6544 #if KMP_USE_ADAPTIVE_LOCKS
6545 #if KMP_DEBUG_ADAPTIVE_LOCKS
6546 __kmp_init_speculative_stats();
6549 #if KMP_STATS_ENABLED
6552 __kmp_init_lock(&__kmp_global_lock);
6553 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6554 __kmp_init_lock(&__kmp_debug_lock);
6555 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6556 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6557 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6558 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6559 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6560 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6561 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6562 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6563 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6564 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6565 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6566 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6567 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6568 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6569 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6571 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6573 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6577 __kmp_runtime_initialize();
6579 #if KMP_MIC_SUPPORTED
6580 __kmp_check_mic_type();
6587 __kmp_abort_delay = 0;
6591 __kmp_dflt_team_nth_ub = __kmp_xproc;
6592 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6593 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6595 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6596 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6598 __kmp_max_nth = __kmp_sys_max_nth;
6599 __kmp_cg_max_nth = __kmp_sys_max_nth;
6600 __kmp_teams_max_nth = __kmp_xproc;
6601 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6602 __kmp_teams_max_nth = __kmp_sys_max_nth;
6607 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6609 __kmp_monitor_wakeups =
6610 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6611 __kmp_bt_intervals =
6612 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6615 __kmp_library = library_throughput;
6617 __kmp_static = kmp_sch_static_balanced;
6624 #if KMP_FAST_REDUCTION_BARRIER
6625 #define kmp_reduction_barrier_gather_bb ((int)1)
6626 #define kmp_reduction_barrier_release_bb ((int)1)
6627 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6628 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6629 #endif // KMP_FAST_REDUCTION_BARRIER
6630 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6631 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6632 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6633 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6634 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6635 #if KMP_FAST_REDUCTION_BARRIER
6636 if (i == bs_reduction_barrier) {
6638 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6639 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6640 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6641 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6643 #endif // KMP_FAST_REDUCTION_BARRIER
6645 #if KMP_FAST_REDUCTION_BARRIER
6646 #undef kmp_reduction_barrier_release_pat
6647 #undef kmp_reduction_barrier_gather_pat
6648 #undef kmp_reduction_barrier_release_bb
6649 #undef kmp_reduction_barrier_gather_bb
6650 #endif // KMP_FAST_REDUCTION_BARRIER
6651 #if KMP_MIC_SUPPORTED
6652 if (__kmp_mic_type == mic2) {
6654 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6655 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6657 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6658 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6660 #if KMP_FAST_REDUCTION_BARRIER
6661 if (__kmp_mic_type == mic2) {
6662 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6663 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6665 #endif // KMP_FAST_REDUCTION_BARRIER
6666 #endif // KMP_MIC_SUPPORTED
6670 __kmp_env_checks = TRUE;
6672 __kmp_env_checks = FALSE;
6676 __kmp_foreign_tp = TRUE;
6678 __kmp_global.g.g_dynamic = FALSE;
6679 __kmp_global.g.g_dynamic_mode = dynamic_default;
6681 __kmp_env_initialize(NULL);
6685 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6686 if (__kmp_str_match_true(val)) {
6687 kmp_str_buf_t buffer;
6688 __kmp_str_buf_init(&buffer);
6689 __kmp_i18n_dump_catalog(&buffer);
6690 __kmp_printf(
"%s", buffer.str);
6691 __kmp_str_buf_free(&buffer);
6693 __kmp_env_free(&val);
6696 __kmp_threads_capacity =
6697 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6699 __kmp_tp_capacity = __kmp_default_tp_capacity(
6700 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6705 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6706 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6707 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6708 __kmp_thread_pool = NULL;
6709 __kmp_thread_pool_insert_pt = NULL;
6710 __kmp_team_pool = NULL;
6717 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6719 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6720 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6721 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6724 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6726 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6731 gtid = __kmp_register_root(TRUE);
6732 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6733 KMP_ASSERT(KMP_UBER_GTID(gtid));
6734 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6738 __kmp_common_initialize();
6742 __kmp_register_atfork();
6745 #if !KMP_DYNAMIC_LIB
6749 int rc = atexit(__kmp_internal_end_atexit);
6751 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6757 #if KMP_HANDLE_SIGNALS
6763 __kmp_install_signals(FALSE);
6766 __kmp_install_signals(TRUE);
6771 __kmp_init_counter++;
6773 __kmp_init_serial = TRUE;
6775 if (__kmp_settings) {
6779 if (__kmp_display_env || __kmp_display_env_verbose) {
6780 __kmp_env_print_2();
6789 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6792 void __kmp_serial_initialize(
void) {
6793 if (__kmp_init_serial) {
6796 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6797 if (__kmp_init_serial) {
6798 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6801 __kmp_do_serial_initialize();
6802 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6805 static void __kmp_do_middle_initialize(
void) {
6807 int prev_dflt_team_nth;
6809 if (!__kmp_init_serial) {
6810 __kmp_do_serial_initialize();
6813 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6817 prev_dflt_team_nth = __kmp_dflt_team_nth;
6819 #if KMP_AFFINITY_SUPPORTED
6822 __kmp_affinity_initialize();
6826 for (i = 0; i < __kmp_threads_capacity; i++) {
6827 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6828 __kmp_affinity_set_init_mask(i, TRUE);
6833 KMP_ASSERT(__kmp_xproc > 0);
6834 if (__kmp_avail_proc == 0) {
6835 __kmp_avail_proc = __kmp_xproc;
6841 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6842 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6847 if (__kmp_dflt_team_nth == 0) {
6848 #ifdef KMP_DFLT_NTH_CORES
6850 __kmp_dflt_team_nth = __kmp_ncores;
6851 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6852 "__kmp_ncores (%d)\n",
6853 __kmp_dflt_team_nth));
6856 __kmp_dflt_team_nth = __kmp_avail_proc;
6857 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6858 "__kmp_avail_proc(%d)\n",
6859 __kmp_dflt_team_nth));
6863 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6864 __kmp_dflt_team_nth = KMP_MIN_NTH;
6866 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6867 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6872 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6874 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6879 for (i = 0; i < __kmp_threads_capacity; i++) {
6880 kmp_info_t *thread = __kmp_threads[i];
6883 if (thread->th.th_current_task->td_icvs.nproc != 0)
6886 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6891 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6892 __kmp_dflt_team_nth));
6894 #ifdef KMP_ADJUST_BLOCKTIME
6896 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6897 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6898 if (__kmp_nth > __kmp_avail_proc) {
6899 __kmp_zero_bt = TRUE;
6905 TCW_SYNC_4(__kmp_init_middle, TRUE);
6907 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6910 void __kmp_middle_initialize(
void) {
6911 if (__kmp_init_middle) {
6914 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6915 if (__kmp_init_middle) {
6916 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6919 __kmp_do_middle_initialize();
6920 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6923 void __kmp_parallel_initialize(
void) {
6924 int gtid = __kmp_entry_gtid();
6927 if (TCR_4(__kmp_init_parallel))
6929 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6930 if (TCR_4(__kmp_init_parallel)) {
6931 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6936 if (TCR_4(__kmp_global.g.g_done)) {
6939 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6940 __kmp_infinite_loop();
6946 if (!__kmp_init_middle) {
6947 __kmp_do_middle_initialize();
6949 __kmp_resume_if_hard_paused();
6952 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6953 KMP_ASSERT(KMP_UBER_GTID(gtid));
6955 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6958 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6959 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6960 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6964 #if KMP_HANDLE_SIGNALS
6966 __kmp_install_signals(TRUE);
6970 __kmp_suspend_initialize();
6972 #if defined(USE_LOAD_BALANCE)
6973 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6974 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6977 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6978 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6982 if (__kmp_version) {
6983 __kmp_print_version_2();
6987 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6990 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6992 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6997 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6999 kmp_disp_t *dispatch;
7004 this_thr->th.th_local.this_construct = 0;
7005 #if KMP_CACHE_MANAGE
7006 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7008 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7009 KMP_DEBUG_ASSERT(dispatch);
7010 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7014 dispatch->th_disp_index = 0;
7015 dispatch->th_doacross_buf_idx = 0;
7016 if (__kmp_env_consistency_check)
7017 __kmp_push_parallel(gtid, team->t.t_ident);
7022 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7024 if (__kmp_env_consistency_check)
7025 __kmp_pop_parallel(gtid, team->t.t_ident);
7027 __kmp_finish_implicit_task(this_thr);
7030 int __kmp_invoke_task_func(
int gtid) {
7032 int tid = __kmp_tid_from_gtid(gtid);
7033 kmp_info_t *this_thr = __kmp_threads[gtid];
7034 kmp_team_t *team = this_thr->th.th_team;
7036 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7038 if (__itt_stack_caller_create_ptr) {
7039 __kmp_itt_stack_callee_enter(
7041 team->t.t_stack_id);
7044 #if INCLUDE_SSC_MARKS
7045 SSC_MARK_INVOKING();
7050 void **exit_frame_p;
7051 ompt_data_t *my_task_data;
7052 ompt_data_t *my_parallel_data;
7055 if (ompt_enabled.enabled) {
7057 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
7059 exit_frame_p = &dummy;
7063 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7064 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7065 if (ompt_enabled.ompt_callback_implicit_task) {
7066 ompt_team_size = team->t.t_nproc;
7067 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7068 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7069 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7070 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7074 #if KMP_STATS_ENABLED
7076 if (previous_state == stats_state_e::TEAMS_REGION) {
7077 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7079 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7081 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7084 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7085 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7092 *exit_frame_p = NULL;
7093 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7096 #if KMP_STATS_ENABLED
7097 if (previous_state == stats_state_e::TEAMS_REGION) {
7098 KMP_SET_THREAD_STATE(previous_state);
7100 KMP_POP_PARTITIONED_TIMER();
7104 if (__itt_stack_caller_create_ptr) {
7105 __kmp_itt_stack_callee_leave(
7107 team->t.t_stack_id);
7110 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7115 void __kmp_teams_master(
int gtid) {
7117 kmp_info_t *thr = __kmp_threads[gtid];
7118 kmp_team_t *team = thr->th.th_team;
7119 ident_t *loc = team->t.t_ident;
7120 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7121 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7122 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7123 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7124 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7127 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7130 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7131 tmp->cg_nthreads = 1;
7132 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7133 " cg_nthreads to 1\n",
7135 tmp->up = thr->th.th_cg_roots;
7136 thr->th.th_cg_roots = tmp;
7140 #if INCLUDE_SSC_MARKS
7143 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7144 (microtask_t)thr->th.th_teams_microtask,
7145 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7146 #if INCLUDE_SSC_MARKS
7150 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7151 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7154 __kmp_join_call(loc, gtid
7163 int __kmp_invoke_teams_master(
int gtid) {
7164 kmp_info_t *this_thr = __kmp_threads[gtid];
7165 kmp_team_t *team = this_thr->th.th_team;
7167 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7168 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7169 (
void *)__kmp_teams_master);
7171 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7173 int tid = __kmp_tid_from_gtid(gtid);
7174 ompt_data_t *task_data =
7175 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7176 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7177 if (ompt_enabled.ompt_callback_implicit_task) {
7178 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7179 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7181 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7184 __kmp_teams_master(gtid);
7186 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7188 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7197 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7198 kmp_info_t *thr = __kmp_threads[gtid];
7200 if (num_threads > 0)
7201 thr->th.th_set_nproc = num_threads;
7206 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7208 kmp_info_t *thr = __kmp_threads[gtid];
7209 KMP_DEBUG_ASSERT(num_teams >= 0);
7210 KMP_DEBUG_ASSERT(num_threads >= 0);
7214 if (num_teams > __kmp_teams_max_nth) {
7215 if (!__kmp_reserve_warn) {
7216 __kmp_reserve_warn = 1;
7217 __kmp_msg(kmp_ms_warning,
7218 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7219 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7221 num_teams = __kmp_teams_max_nth;
7225 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7228 if (!TCR_4(__kmp_init_middle))
7229 __kmp_middle_initialize();
7230 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7231 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7232 if (num_threads == 0) {
7233 num_threads = __kmp_avail_proc / num_teams;
7237 if (num_threads > __kmp_dflt_team_nth) {
7238 num_threads = __kmp_dflt_team_nth;
7240 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7241 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7243 if (num_teams * num_threads > __kmp_teams_max_nth) {
7244 num_threads = __kmp_teams_max_nth / num_teams;
7249 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7251 if (num_threads > __kmp_dflt_team_nth) {
7252 num_threads = __kmp_dflt_team_nth;
7254 if (num_teams * num_threads > __kmp_teams_max_nth) {
7255 int new_threads = __kmp_teams_max_nth / num_teams;
7256 if (!__kmp_reserve_warn) {
7257 __kmp_reserve_warn = 1;
7258 __kmp_msg(kmp_ms_warning,
7259 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7260 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7262 num_threads = new_threads;
7265 thr->th.th_teams_size.nth = num_threads;
7269 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7270 kmp_info_t *thr = __kmp_threads[gtid];
7271 thr->th.th_set_proc_bind = proc_bind;
7276 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7277 kmp_info_t *this_thr = __kmp_threads[gtid];
7283 KMP_DEBUG_ASSERT(team);
7284 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7285 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7288 team->t.t_construct = 0;
7289 team->t.t_ordered.dt.t_value =
7293 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7294 if (team->t.t_max_nproc > 1) {
7296 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7297 team->t.t_disp_buffer[i].buffer_index = i;
7298 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7301 team->t.t_disp_buffer[0].buffer_index = 0;
7302 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7306 KMP_ASSERT(this_thr->th.th_team == team);
7309 for (f = 0; f < team->t.t_nproc; f++) {
7310 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7311 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7316 __kmp_fork_barrier(gtid, 0);
7319 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7320 kmp_info_t *this_thr = __kmp_threads[gtid];
7322 KMP_DEBUG_ASSERT(team);
7323 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7324 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7330 if (__kmp_threads[gtid] &&
7331 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7332 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7333 __kmp_threads[gtid]);
7334 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7335 "team->t.t_nproc=%d\n",
7336 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7338 __kmp_print_structure();
7340 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7341 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7344 __kmp_join_barrier(gtid);
7346 if (ompt_enabled.enabled &&
7347 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7348 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7349 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7350 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7352 void *codeptr = NULL;
7353 if (KMP_MASTER_TID(ds_tid) &&
7354 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7355 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7356 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7358 if (ompt_enabled.ompt_callback_sync_region_wait) {
7359 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7360 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7363 if (ompt_enabled.ompt_callback_sync_region) {
7364 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7365 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7369 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7370 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7371 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit);
7377 KMP_ASSERT(this_thr->th.th_team == team);
7382 #ifdef USE_LOAD_BALANCE
7386 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7389 kmp_team_t *hot_team;
7391 if (root->r.r_active) {
7394 hot_team = root->r.r_hot_team;
7395 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7396 return hot_team->t.t_nproc - 1;
7401 for (i = 1; i < hot_team->t.t_nproc; i++) {
7402 if (hot_team->t.t_threads[i]->th.th_active) {
7411 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7414 int hot_team_active;
7415 int team_curr_active;
7418 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7420 KMP_DEBUG_ASSERT(root);
7421 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7422 ->th.th_current_task->td_icvs.dynamic == TRUE);
7423 KMP_DEBUG_ASSERT(set_nproc > 1);
7425 if (set_nproc == 1) {
7426 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7435 pool_active = __kmp_thread_pool_active_nth;
7436 hot_team_active = __kmp_active_hot_team_nproc(root);
7437 team_curr_active = pool_active + hot_team_active + 1;
7440 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7441 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
7442 "hot team active = %d\n",
7443 system_active, pool_active, hot_team_active));
7445 if (system_active < 0) {
7449 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7450 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7453 retval = __kmp_avail_proc - __kmp_nth +
7454 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7455 if (retval > set_nproc) {
7458 if (retval < KMP_MIN_NTH) {
7459 retval = KMP_MIN_NTH;
7462 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7470 if (system_active < team_curr_active) {
7471 system_active = team_curr_active;
7473 retval = __kmp_avail_proc - system_active + team_curr_active;
7474 if (retval > set_nproc) {
7477 if (retval < KMP_MIN_NTH) {
7478 retval = KMP_MIN_NTH;
7481 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7490 void __kmp_cleanup(
void) {
7493 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7495 if (TCR_4(__kmp_init_parallel)) {
7496 #if KMP_HANDLE_SIGNALS
7497 __kmp_remove_signals();
7499 TCW_4(__kmp_init_parallel, FALSE);
7502 if (TCR_4(__kmp_init_middle)) {
7503 #if KMP_AFFINITY_SUPPORTED
7504 __kmp_affinity_uninitialize();
7506 __kmp_cleanup_hierarchy();
7507 TCW_4(__kmp_init_middle, FALSE);
7510 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7512 if (__kmp_init_serial) {
7513 __kmp_runtime_destroy();
7514 __kmp_init_serial = FALSE;
7517 __kmp_cleanup_threadprivate_caches();
7519 for (f = 0; f < __kmp_threads_capacity; f++) {
7520 if (__kmp_root[f] != NULL) {
7521 __kmp_free(__kmp_root[f]);
7522 __kmp_root[f] = NULL;
7525 __kmp_free(__kmp_threads);
7528 __kmp_threads = NULL;
7530 __kmp_threads_capacity = 0;
7532 #if KMP_USE_DYNAMIC_LOCK
7533 __kmp_cleanup_indirect_user_locks();
7535 __kmp_cleanup_user_locks();
7538 #if KMP_AFFINITY_SUPPORTED
7539 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7540 __kmp_cpuinfo_file = NULL;
7543 #if KMP_USE_ADAPTIVE_LOCKS
7544 #if KMP_DEBUG_ADAPTIVE_LOCKS
7545 __kmp_print_speculative_stats();
7548 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7549 __kmp_nested_nth.nth = NULL;
7550 __kmp_nested_nth.size = 0;
7551 __kmp_nested_nth.used = 0;
7552 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7553 __kmp_nested_proc_bind.bind_types = NULL;
7554 __kmp_nested_proc_bind.size = 0;
7555 __kmp_nested_proc_bind.used = 0;
7556 if (__kmp_affinity_format) {
7557 KMP_INTERNAL_FREE(__kmp_affinity_format);
7558 __kmp_affinity_format = NULL;
7561 __kmp_i18n_catclose();
7563 #if KMP_USE_HIER_SCHED
7564 __kmp_hier_scheds.deallocate();
7567 #if KMP_STATS_ENABLED
7571 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7576 int __kmp_ignore_mppbeg(
void) {
7579 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7580 if (__kmp_str_match_false(env))
7587 int __kmp_ignore_mppend(
void) {
7590 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7591 if (__kmp_str_match_false(env))
7598 void __kmp_internal_begin(
void) {
7604 gtid = __kmp_entry_gtid();
7605 root = __kmp_threads[gtid]->th.th_root;
7606 KMP_ASSERT(KMP_UBER_GTID(gtid));
7608 if (root->r.r_begin)
7610 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7611 if (root->r.r_begin) {
7612 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7616 root->r.r_begin = TRUE;
7618 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7623 void __kmp_user_set_library(
enum library_type arg) {
7630 gtid = __kmp_entry_gtid();
7631 thread = __kmp_threads[gtid];
7633 root = thread->th.th_root;
7635 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7637 if (root->r.r_in_parallel) {
7639 KMP_WARNING(SetLibraryIncorrectCall);
7644 case library_serial:
7645 thread->th.th_set_nproc = 0;
7646 set__nproc(thread, 1);
7648 case library_turnaround:
7649 thread->th.th_set_nproc = 0;
7650 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7651 : __kmp_dflt_team_nth_ub);
7653 case library_throughput:
7654 thread->th.th_set_nproc = 0;
7655 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7656 : __kmp_dflt_team_nth_ub);
7659 KMP_FATAL(UnknownLibraryType, arg);
7662 __kmp_aux_set_library(arg);
7665 void __kmp_aux_set_stacksize(
size_t arg) {
7666 if (!__kmp_init_serial)
7667 __kmp_serial_initialize();
7670 if (arg & (0x1000 - 1)) {
7671 arg &= ~(0x1000 - 1);
7676 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7679 if (!TCR_4(__kmp_init_parallel)) {
7682 if (value < __kmp_sys_min_stksize)
7683 value = __kmp_sys_min_stksize;
7684 else if (value > KMP_MAX_STKSIZE)
7685 value = KMP_MAX_STKSIZE;
7687 __kmp_stksize = value;
7689 __kmp_env_stksize = TRUE;
7692 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7697 void __kmp_aux_set_library(
enum library_type arg) {
7698 __kmp_library = arg;
7700 switch (__kmp_library) {
7701 case library_serial: {
7702 KMP_INFORM(LibraryIsSerial);
7704 case library_turnaround:
7705 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
7706 __kmp_use_yield = 2;
7708 case library_throughput:
7709 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
7710 __kmp_dflt_blocktime = 200;
7713 KMP_FATAL(UnknownLibraryType, arg);
7719 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
7720 kmp_info_t *thr = __kmp_entry_thread();
7721 teams_serialized = 0;
7722 if (thr->th.th_teams_microtask) {
7723 kmp_team_t *team = thr->th.th_team;
7724 int tlevel = thr->th.th_teams_level;
7725 int ii = team->t.t_level;
7726 teams_serialized = team->t.t_serialized;
7727 int level = tlevel + 1;
7728 KMP_DEBUG_ASSERT(ii >= tlevel);
7729 while (ii > level) {
7730 for (teams_serialized = team->t.t_serialized;
7731 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7733 if (team->t.t_serialized && (!teams_serialized)) {
7734 team = team->t.t_parent;
7738 team = team->t.t_parent;
7747 int __kmp_aux_get_team_num() {
7749 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7751 if (serialized > 1) {
7754 return team->t.t_master_tid;
7760 int __kmp_aux_get_num_teams() {
7762 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7764 if (serialized > 1) {
7767 return team->t.t_parent->t.t_nproc;
7806 typedef struct kmp_affinity_format_field_t {
7808 const char *long_name;
7811 } kmp_affinity_format_field_t;
7813 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7814 #if KMP_AFFINITY_SUPPORTED
7815 {
'A',
"thread_affinity",
's'},
7817 {
't',
"team_num",
'd'},
7818 {
'T',
"num_teams",
'd'},
7819 {
'L',
"nesting_level",
'd'},
7820 {
'n',
"thread_num",
'd'},
7821 {
'N',
"num_threads",
'd'},
7822 {
'a',
"ancestor_tnum",
'd'},
7824 {
'P',
"process_id",
'd'},
7825 {
'i',
"native_thread_id",
'd'}};
7828 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
7830 kmp_str_buf_t *field_buffer) {
7831 int rc, format_index, field_value;
7832 const char *width_left, *width_right;
7833 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7834 static const int FORMAT_SIZE = 20;
7835 char format[FORMAT_SIZE] = {0};
7836 char absolute_short_name = 0;
7838 KMP_DEBUG_ASSERT(gtid >= 0);
7839 KMP_DEBUG_ASSERT(th);
7840 KMP_DEBUG_ASSERT(**ptr ==
'%');
7841 KMP_DEBUG_ASSERT(field_buffer);
7843 __kmp_str_buf_clear(field_buffer);
7850 __kmp_str_buf_cat(field_buffer,
"%", 1);
7861 right_justify =
false;
7863 right_justify =
true;
7867 width_left = width_right = NULL;
7868 if (**ptr >=
'0' && **ptr <=
'9') {
7876 format[format_index++] =
'%';
7878 format[format_index++] =
'-';
7880 format[format_index++] =
'0';
7881 if (width_left && width_right) {
7885 while (i < 8 && width_left < width_right) {
7886 format[format_index++] = *width_left;
7894 found_valid_name =
false;
7895 parse_long_name = (**ptr ==
'{');
7896 if (parse_long_name)
7898 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
7899 sizeof(__kmp_affinity_format_table[0]);
7901 char short_name = __kmp_affinity_format_table[i].short_name;
7902 const char *long_name = __kmp_affinity_format_table[i].long_name;
7903 char field_format = __kmp_affinity_format_table[i].field_format;
7904 if (parse_long_name) {
7905 int length = KMP_STRLEN(long_name);
7906 if (strncmp(*ptr, long_name, length) == 0) {
7907 found_valid_name =
true;
7910 }
else if (**ptr == short_name) {
7911 found_valid_name =
true;
7914 if (found_valid_name) {
7915 format[format_index++] = field_format;
7916 format[format_index++] =
'\0';
7917 absolute_short_name = short_name;
7921 if (parse_long_name) {
7923 absolute_short_name = 0;
7931 switch (absolute_short_name) {
7933 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
7936 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
7939 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
7942 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
7945 static const int BUFFER_SIZE = 256;
7946 char buf[BUFFER_SIZE];
7947 __kmp_expand_host_name(buf, BUFFER_SIZE);
7948 rc = __kmp_str_buf_print(field_buffer, format, buf);
7951 rc = __kmp_str_buf_print(field_buffer, format, getpid());
7954 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
7957 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
7961 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
7962 rc = __kmp_str_buf_print(field_buffer, format, field_value);
7964 #if KMP_AFFINITY_SUPPORTED
7967 __kmp_str_buf_init(&buf);
7968 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
7969 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
7970 __kmp_str_buf_free(&buf);
7976 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
7978 if (parse_long_name) {
7987 KMP_ASSERT(format_index <= FORMAT_SIZE);
7997 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
7998 kmp_str_buf_t *buffer) {
7999 const char *parse_ptr;
8001 const kmp_info_t *th;
8002 kmp_str_buf_t field;
8004 KMP_DEBUG_ASSERT(buffer);
8005 KMP_DEBUG_ASSERT(gtid >= 0);
8007 __kmp_str_buf_init(&field);
8008 __kmp_str_buf_clear(buffer);
8010 th = __kmp_threads[gtid];
8016 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8017 parse_ptr = __kmp_affinity_format;
8019 KMP_DEBUG_ASSERT(parse_ptr);
8021 while (*parse_ptr !=
'\0') {
8023 if (*parse_ptr ==
'%') {
8025 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8026 __kmp_str_buf_catbuf(buffer, &field);
8030 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8035 __kmp_str_buf_free(&field);
8040 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8042 __kmp_str_buf_init(&buf);
8043 __kmp_aux_capture_affinity(gtid, format, &buf);
8044 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8045 __kmp_str_buf_free(&buf);
8050 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8051 int blocktime = arg;
8057 __kmp_save_internal_controls(thread);
8060 if (blocktime < KMP_MIN_BLOCKTIME)
8061 blocktime = KMP_MIN_BLOCKTIME;
8062 else if (blocktime > KMP_MAX_BLOCKTIME)
8063 blocktime = KMP_MAX_BLOCKTIME;
8065 set__blocktime_team(thread->th.th_team, tid, blocktime);
8066 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8070 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8072 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8073 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8079 set__bt_set_team(thread->th.th_team, tid, bt_set);
8080 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8082 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8083 "bt_intervals=%d, monitor_updates=%d\n",
8084 __kmp_gtid_from_tid(tid, thread->th.th_team),
8085 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8086 __kmp_monitor_wakeups));
8088 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8089 __kmp_gtid_from_tid(tid, thread->th.th_team),
8090 thread->th.th_team->t.t_id, tid, blocktime));
8094 void __kmp_aux_set_defaults(
char const *str,
int len) {
8095 if (!__kmp_init_serial) {
8096 __kmp_serial_initialize();
8098 __kmp_env_initialize(str);
8100 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8108 PACKED_REDUCTION_METHOD_T
8109 __kmp_determine_reduction_method(
8110 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8111 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8112 kmp_critical_name *lck) {
8123 PACKED_REDUCTION_METHOD_T retval;
8127 KMP_DEBUG_ASSERT(loc);
8128 KMP_DEBUG_ASSERT(lck);
8130 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8131 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8132 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8134 retval = critical_reduce_block;
8137 team_size = __kmp_get_team_num_threads(global_tid);
8138 if (team_size == 1) {
8140 retval = empty_reduce_block;
8144 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8146 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8147 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8149 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8150 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8152 int teamsize_cutoff = 4;
8154 #if KMP_MIC_SUPPORTED
8155 if (__kmp_mic_type != non_mic) {
8156 teamsize_cutoff = 8;
8159 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8160 if (tree_available) {
8161 if (team_size <= teamsize_cutoff) {
8162 if (atomic_available) {
8163 retval = atomic_reduce_block;
8166 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8168 }
else if (atomic_available) {
8169 retval = atomic_reduce_block;
8172 #error "Unknown or unsupported OS"
8173 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8176 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8178 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8182 if (atomic_available) {
8183 if (num_vars <= 2) {
8184 retval = atomic_reduce_block;
8190 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8191 if (atomic_available && (num_vars <= 3)) {
8192 retval = atomic_reduce_block;
8193 }
else if (tree_available) {
8194 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8195 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8196 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8201 #error "Unknown or unsupported OS"
8205 #error "Unknown or unsupported architecture"
8213 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8216 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8218 int atomic_available, tree_available;
8220 switch ((forced_retval = __kmp_force_reduction_method)) {
8221 case critical_reduce_block:
8225 case atomic_reduce_block:
8226 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8227 if (!atomic_available) {
8228 KMP_WARNING(RedMethodNotSupported,
"atomic");
8229 forced_retval = critical_reduce_block;
8233 case tree_reduce_block:
8234 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8235 if (!tree_available) {
8236 KMP_WARNING(RedMethodNotSupported,
"tree");
8237 forced_retval = critical_reduce_block;
8239 #if KMP_FAST_REDUCTION_BARRIER
8240 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8249 retval = forced_retval;
8252 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8254 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8255 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8260 kmp_int32 __kmp_get_reduce_method(
void) {
8261 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8266 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8270 void __kmp_hard_pause() {
8271 __kmp_pause_status = kmp_hard_paused;
8272 __kmp_internal_end_thread(-1);
8276 void __kmp_resume_if_soft_paused() {
8277 if (__kmp_pause_status == kmp_soft_paused) {
8278 __kmp_pause_status = kmp_not_paused;
8280 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8281 kmp_info_t *thread = __kmp_threads[gtid];
8283 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8284 if (fl.is_sleeping())
8286 else if (__kmp_try_suspend_mx(thread)) {
8287 __kmp_unlock_suspend_mx(thread);
8290 if (fl.is_sleeping()) {
8293 }
else if (__kmp_try_suspend_mx(thread)) {
8294 __kmp_unlock_suspend_mx(thread);
8306 int __kmp_pause_resource(kmp_pause_status_t level) {
8307 if (level == kmp_not_paused) {
8308 if (__kmp_pause_status == kmp_not_paused) {
8312 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8313 __kmp_pause_status == kmp_hard_paused);
8314 __kmp_pause_status = kmp_not_paused;
8317 }
else if (level == kmp_soft_paused) {
8318 if (__kmp_pause_status != kmp_not_paused) {
8325 }
else if (level == kmp_hard_paused) {
8326 if (__kmp_pause_status != kmp_not_paused) {
8340 void __kmp_omp_display_env(
int verbose) {
8341 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8342 if (__kmp_init_serial == 0)
8343 __kmp_do_serial_initialize();
8344 __kmp_display_env_impl(!verbose, verbose);
8345 __kmp_release_bootstrap_lock(&__kmp_initz_lock);