15 #include "kmp_affinity.h" 16 #include "kmp_atomic.h" 17 #include "kmp_environment.h" 18 #include "kmp_error.h" 22 #include "kmp_settings.h" 23 #include "kmp_stats.h" 25 #include "kmp_wait_release.h" 26 #include "kmp_wrapper_getpid.h" 29 #include "ompt-specific.h" 33 #define KMP_USE_PRCTL 0 39 #include "tsan_annotations.h" 41 #if defined(KMP_GOMP_COMPAT) 42 char const __kmp_version_alt_comp[] =
43 KMP_VERSION_PREFIX
"alternative compiler support: yes";
46 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 58 char const __kmp_version_lock[] =
59 KMP_VERSION_PREFIX
"lock type: run time selectable";
62 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 66 kmp_info_t __kmp_monitor;
70 void __kmp_cleanup(
void);
72 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
74 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
75 kmp_internal_control_t *new_icvs,
77 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 78 static void __kmp_partition_places(kmp_team_t *team,
79 int update_master_only = 0);
81 static void __kmp_do_serial_initialize(
void);
82 void __kmp_fork_barrier(
int gtid,
int tid);
83 void __kmp_join_barrier(
int gtid);
84 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
85 kmp_internal_control_t *new_icvs,
ident_t *loc);
87 #ifdef USE_LOAD_BALANCE 88 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
91 static int __kmp_expand_threads(
int nWish,
int nNeed);
93 static int __kmp_unregister_root_other_thread(
int gtid);
95 static void __kmp_unregister_library(
void);
96 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
97 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
102 int __kmp_get_global_thread_id() {
104 kmp_info_t **other_threads;
112 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
113 __kmp_nth, __kmp_all_nth));
120 if (!TCR_4(__kmp_init_gtid))
123 #ifdef KMP_TDATA_GTID 124 if (TCR_4(__kmp_gtid_mode) >= 3) {
125 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
129 if (TCR_4(__kmp_gtid_mode) >= 2) {
130 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
131 return __kmp_gtid_get_specific();
133 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
135 stack_addr = (
char *)&stack_data;
136 other_threads = __kmp_threads;
149 for (i = 0; i < __kmp_threads_capacity; i++) {
151 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
155 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
156 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
160 if (stack_addr <= stack_base) {
161 size_t stack_diff = stack_base - stack_addr;
163 if (stack_diff <= stack_size) {
166 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
174 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 175 "thread, using TLS\n"));
176 i = __kmp_gtid_get_specific();
186 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
187 KMP_FATAL(StackOverflow, i);
190 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
191 if (stack_addr > stack_base) {
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
194 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
198 stack_base - stack_addr);
202 if (__kmp_storage_map) {
203 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
204 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
205 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
206 other_threads[i]->th.th_info.ds.ds_stacksize,
207 "th_%d stack (refinement)", i);
212 int __kmp_get_global_thread_id_reg() {
215 if (!__kmp_init_serial) {
218 #ifdef KMP_TDATA_GTID 219 if (TCR_4(__kmp_gtid_mode) >= 3) {
220 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
224 if (TCR_4(__kmp_gtid_mode) >= 2) {
225 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
226 gtid = __kmp_gtid_get_specific();
229 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
230 gtid = __kmp_get_global_thread_id();
234 if (gtid == KMP_GTID_DNE) {
236 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 237 "Registering a new gtid.\n"));
238 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
239 if (!__kmp_init_serial) {
240 __kmp_do_serial_initialize();
241 gtid = __kmp_gtid_get_specific();
243 gtid = __kmp_register_root(FALSE);
245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
249 KMP_DEBUG_ASSERT(gtid >= 0);
255 void __kmp_check_stack_overlap(kmp_info_t *th) {
257 char *stack_beg = NULL;
258 char *stack_end = NULL;
261 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
262 if (__kmp_storage_map) {
263 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
264 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
266 gtid = __kmp_gtid_from_thread(th);
268 if (gtid == KMP_GTID_MONITOR) {
269 __kmp_print_storage_map_gtid(
270 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)",
"mon",
272 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%d stack (%s)", gtid,
277 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
283 gtid = __kmp_gtid_from_thread(th);
284 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
286 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
287 if (stack_beg == NULL) {
288 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
289 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 for (f = 0; f < __kmp_threads_capacity; f++) {
293 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
295 if (f_th && f_th != th) {
296 char *other_stack_end =
297 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
298 char *other_stack_beg =
299 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
300 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
301 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
304 if (__kmp_storage_map)
305 __kmp_print_storage_map_gtid(
306 -1, other_stack_beg, other_stack_end,
307 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
308 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
310 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
316 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
321 void __kmp_infinite_loop(
void) {
322 static int done = FALSE;
329 #define MAX_MESSAGE 512 331 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
332 char const *format, ...) {
333 char buffer[MAX_MESSAGE];
336 va_start(ap, format);
337 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
338 p2, (
unsigned long)size, format);
339 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
340 __kmp_vprintf(kmp_err, buffer, ap);
341 #if KMP_PRINT_DATA_PLACEMENT 344 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
345 if (__kmp_storage_map_verbose) {
346 node = __kmp_get_host_node(p1);
348 __kmp_storage_map_verbose = FALSE;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
354 const int page_size = KMP_GET_PAGE_SIZE();
356 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
357 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
359 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
362 __kmp_printf_no_lock(
" GTID %d\n", gtid);
371 (
char *)p1 += page_size;
372 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
373 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
377 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
378 (
char *)p1 + (page_size - 1),
379 __kmp_get_host_node(p1));
381 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
382 (
char *)p2 + (page_size - 1),
383 __kmp_get_host_node(p2));
389 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
392 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
395 void __kmp_warn(
char const *format, ...) {
396 char buffer[MAX_MESSAGE];
399 if (__kmp_generate_warnings == kmp_warnings_off) {
403 va_start(ap, format);
405 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
406 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
407 __kmp_vprintf(kmp_err, buffer, ap);
408 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
413 void __kmp_abort_process() {
415 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
417 if (__kmp_debug_buf) {
418 __kmp_dump_debug_buffer();
421 if (KMP_OS_WINDOWS) {
424 __kmp_global.g.g_abort = SIGABRT;
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
446 void __kmp_abort_thread(
void) {
449 __kmp_infinite_loop();
455 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
456 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
459 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
460 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
462 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
463 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
465 __kmp_print_storage_map_gtid(
466 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
469 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier + 1],
471 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
475 &thr->th.th_bar[bs_forkjoin_barrier + 1],
476 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
479 #if KMP_FAST_REDUCTION_BARRIER 480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
481 &thr->th.th_bar[bs_reduction_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
484 #endif // KMP_FAST_REDUCTION_BARRIER 490 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
491 int team_id,
int num_thr) {
492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
493 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
496 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
497 &team->t.t_bar[bs_last_barrier],
498 sizeof(kmp_balign_team_t) * bs_last_barrier,
499 "%s_%d.t_bar", header, team_id);
501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
502 &team->t.t_bar[bs_plain_barrier + 1],
503 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
507 &team->t.t_bar[bs_forkjoin_barrier + 1],
508 sizeof(kmp_balign_team_t),
509 "%s_%d.t_bar[forkjoin]", header, team_id);
511 #if KMP_FAST_REDUCTION_BARRIER 512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
513 &team->t.t_bar[bs_reduction_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[reduction]", header, team_id);
516 #endif // KMP_FAST_REDUCTION_BARRIER 518 __kmp_print_storage_map_gtid(
519 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
522 __kmp_print_storage_map_gtid(
523 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
524 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
526 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
527 &team->t.t_disp_buffer[num_disp_buff],
528 sizeof(dispatch_shared_info_t) * num_disp_buff,
529 "%s_%d.t_disp_buffer", header, team_id);
531 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
532 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
536 static void __kmp_init_allocator() {}
537 static void __kmp_fini_allocator() {}
541 #ifdef KMP_DYNAMIC_LIB 544 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
546 __kmp_init_bootstrap_lock(lck);
549 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
567 for (i = 0; i < __kmp_threads_capacity; ++i) {
570 kmp_info_t *th = __kmp_threads[i];
573 int gtid = th->th.th_info.ds.ds_gtid;
574 if (gtid == gtid_req)
579 int alive = __kmp_is_thread_alive(th, &exit_val);
584 if (thread_count == 0)
590 __kmp_reset_lock(&__kmp_forkjoin_lock);
592 __kmp_reset_lock(&__kmp_stdio_lock);
596 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
601 case DLL_PROCESS_ATTACH:
602 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
606 case DLL_PROCESS_DETACH:
607 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
609 if (lpReserved != NULL) {
635 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
638 __kmp_internal_end_library(__kmp_gtid_get_specific());
642 case DLL_THREAD_ATTACH:
643 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
649 case DLL_THREAD_DETACH:
650 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
652 __kmp_internal_end_thread(__kmp_gtid_get_specific());
664 int __kmp_change_library(
int status) {
667 old_status = __kmp_yield_init &
671 __kmp_yield_init |= 1;
673 __kmp_yield_init &= ~1;
681 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
682 int gtid = *gtid_ref;
683 #ifdef BUILD_PARALLEL_ORDERED 684 kmp_team_t *team = __kmp_team_from_gtid(gtid);
687 if (__kmp_env_consistency_check) {
688 if (__kmp_threads[gtid]->th.th_root->r.r_active)
689 #if KMP_USE_DYNAMIC_LOCK 690 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
695 #ifdef BUILD_PARALLEL_ORDERED 696 if (!team->t.t_serialized) {
698 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
706 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
707 int gtid = *gtid_ref;
708 #ifdef BUILD_PARALLEL_ORDERED 709 int tid = __kmp_tid_from_gtid(gtid);
710 kmp_team_t *team = __kmp_team_from_gtid(gtid);
713 if (__kmp_env_consistency_check) {
714 if (__kmp_threads[gtid]->th.th_root->r.r_active)
715 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
717 #ifdef BUILD_PARALLEL_ORDERED 718 if (!team->t.t_serialized) {
723 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
733 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
738 if (!TCR_4(__kmp_init_parallel))
739 __kmp_parallel_initialize();
741 th = __kmp_threads[gtid];
742 team = th->th.th_team;
745 th->th.th_ident = id_ref;
747 if (team->t.t_serialized) {
750 kmp_int32 old_this = th->th.th_local.this_construct;
752 ++th->th.th_local.this_construct;
756 if (team->t.t_construct == old_this) {
757 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
758 th->th.th_local.this_construct);
761 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
762 KMP_MASTER_GTID(gtid) &&
764 th->th.th_teams_microtask == NULL &&
766 team->t.t_active_level ==
768 __kmp_itt_metadata_single(id_ref);
773 if (__kmp_env_consistency_check) {
774 if (status && push_ws) {
775 __kmp_push_workshare(gtid, ct_psingle, id_ref);
777 __kmp_check_workshare(gtid, ct_psingle, id_ref);
782 __kmp_itt_single_start(gtid);
788 void __kmp_exit_single(
int gtid) {
790 __kmp_itt_single_end(gtid);
792 if (__kmp_env_consistency_check)
793 __kmp_pop_workshare(gtid, ct_psingle, NULL);
802 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
803 int master_tid,
int set_nthreads
811 KMP_DEBUG_ASSERT(__kmp_init_serial);
812 KMP_DEBUG_ASSERT(root && parent_team);
816 new_nthreads = set_nthreads;
817 if (!get__dynamic_2(parent_team, master_tid)) {
820 #ifdef USE_LOAD_BALANCE 821 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
822 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
823 if (new_nthreads == 1) {
824 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 825 "reservation to 1 thread\n",
829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
836 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
837 new_nthreads = __kmp_avail_proc - __kmp_nth +
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
839 if (new_nthreads <= 1) {
840 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 841 "reservation to 1 thread\n",
845 if (new_nthreads < set_nthreads) {
846 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 847 "reservation to %d threads\n",
848 master_tid, new_nthreads));
850 new_nthreads = set_nthreads;
852 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
853 if (set_nthreads > 2) {
854 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
855 new_nthreads = (new_nthreads % set_nthreads) + 1;
856 if (new_nthreads == 1) {
857 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 858 "reservation to 1 thread\n",
862 if (new_nthreads < set_nthreads) {
863 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 864 "reservation to %d threads\n",
865 master_tid, new_nthreads));
873 if (__kmp_nth + new_nthreads -
874 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
876 int tl_nthreads = __kmp_max_nth - __kmp_nth +
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
878 if (tl_nthreads <= 0) {
883 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
884 __kmp_reserve_warn = 1;
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
887 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
889 if (tl_nthreads == 1) {
890 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 891 "reduced reservation to 1 thread\n",
895 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 896 "reservation to %d threads\n",
897 master_tid, tl_nthreads));
898 new_nthreads = tl_nthreads;
902 if (root->r.r_cg_nthreads + new_nthreads -
903 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
905 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
906 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
907 if (tl_nthreads <= 0) {
912 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
913 __kmp_reserve_warn = 1;
914 __kmp_msg(kmp_ms_warning,
915 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
916 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
918 if (tl_nthreads == 1) {
919 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 920 "reduced reservation to 1 thread\n",
924 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 925 "reservation to %d threads\n",
926 master_tid, tl_nthreads));
927 new_nthreads = tl_nthreads;
933 capacity = __kmp_threads_capacity;
934 if (TCR_PTR(__kmp_threads[0]) == NULL) {
937 if (__kmp_nth + new_nthreads -
938 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
941 int slotsRequired = __kmp_nth + new_nthreads -
942 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
944 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
945 if (slotsAdded < slotsRequired) {
947 new_nthreads -= (slotsRequired - slotsAdded);
948 KMP_ASSERT(new_nthreads >= 1);
951 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
952 __kmp_reserve_warn = 1;
953 if (__kmp_tp_cached) {
954 __kmp_msg(kmp_ms_warning,
955 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
956 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
957 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
959 __kmp_msg(kmp_ms_warning,
960 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
961 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
968 if (new_nthreads == 1) {
970 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 971 "dead roots and rechecking; requested %d threads\n",
972 __kmp_get_gtid(), set_nthreads));
974 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 976 __kmp_get_gtid(), new_nthreads, set_nthreads));
985 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
986 kmp_info_t *master_th,
int master_gtid) {
990 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
991 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
995 master_th->th.th_info.ds.ds_tid = 0;
996 master_th->th.th_team = team;
997 master_th->th.th_team_nproc = team->t.t_nproc;
998 master_th->th.th_team_master = master_th;
999 master_th->th.th_team_serialized = FALSE;
1000 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1003 #if KMP_NESTED_HOT_TEAMS 1005 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1008 int level = team->t.t_active_level - 1;
1009 if (master_th->th.th_teams_microtask) {
1010 if (master_th->th.th_teams_size.nteams > 1) {
1014 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1015 master_th->th.th_teams_level == team->t.t_level) {
1020 if (level < __kmp_hot_teams_max_level) {
1021 if (hot_teams[level].hot_team) {
1023 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1027 hot_teams[level].hot_team = team;
1028 hot_teams[level].hot_team_nth = team->t.t_nproc;
1035 use_hot_team = team == root->r.r_hot_team;
1037 if (!use_hot_team) {
1040 team->t.t_threads[0] = master_th;
1041 __kmp_initialize_info(master_th, team, 0, master_gtid);
1044 for (i = 1; i < team->t.t_nproc; i++) {
1047 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1048 team->t.t_threads[i] = thr;
1049 KMP_DEBUG_ASSERT(thr);
1050 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1052 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1053 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1054 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1055 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1056 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1057 team->t.t_bar[bs_plain_barrier].b_arrived));
1059 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1060 thr->th.th_teams_level = master_th->th.th_teams_level;
1061 thr->th.th_teams_size = master_th->th.th_teams_size;
1065 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1066 for (b = 0; b < bs_last_barrier; ++b) {
1067 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1068 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1070 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1076 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1077 __kmp_partition_places(team);
1084 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1088 inline static void propagateFPControl(kmp_team_t *team) {
1089 if (__kmp_inherit_fp_control) {
1090 kmp_int16 x87_fpu_control_word;
1094 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1095 __kmp_store_mxcsr(&mxcsr);
1096 mxcsr &= KMP_X86_MXCSR_MASK;
1107 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1108 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1111 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1115 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1121 inline static void updateHWFPControl(kmp_team_t *team) {
1122 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1125 kmp_int16 x87_fpu_control_word;
1127 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1128 __kmp_store_mxcsr(&mxcsr);
1129 mxcsr &= KMP_X86_MXCSR_MASK;
1131 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1132 __kmp_clear_x87_fpu_status_word();
1133 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1136 if (team->t.t_mxcsr != mxcsr) {
1137 __kmp_load_mxcsr(&team->t.t_mxcsr);
1142 #define propagateFPControl(x) ((void)0) 1143 #define updateHWFPControl(x) ((void)0) 1146 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1151 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1152 kmp_info_t *this_thr;
1153 kmp_team_t *serial_team;
1155 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1162 if (!TCR_4(__kmp_init_parallel))
1163 __kmp_parallel_initialize();
1165 this_thr = __kmp_threads[global_tid];
1166 serial_team = this_thr->th.th_serial_team;
1169 KMP_DEBUG_ASSERT(serial_team);
1172 if (__kmp_tasking_mode != tskm_immediate_exec) {
1174 this_thr->th.th_task_team ==
1175 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1176 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1178 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1179 "team %p, new task_team = NULL\n",
1180 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1181 this_thr->th.th_task_team = NULL;
1185 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1186 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1187 proc_bind = proc_bind_false;
1188 }
else if (proc_bind == proc_bind_default) {
1191 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1194 this_thr->th.th_set_proc_bind = proc_bind_default;
1198 ompt_data_t ompt_parallel_data;
1199 ompt_parallel_data.ptr = NULL;
1200 ompt_data_t *implicit_task_data;
1201 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1202 if (ompt_enabled.enabled &&
1203 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1205 ompt_task_info_t *parent_task_info;
1206 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1208 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1209 if (ompt_enabled.ompt_callback_parallel_begin) {
1212 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1213 &(parent_task_info->task_data), &(parent_task_info->frame),
1214 &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
1217 #endif // OMPT_SUPPORT 1219 if (this_thr->th.th_team != serial_team) {
1221 int level = this_thr->th.th_team->t.t_level;
1223 if (serial_team->t.t_serialized) {
1226 kmp_team_t *new_team;
1228 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1230 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1237 &this_thr->th.th_current_task->td_icvs,
1238 0 USE_NESTED_HOT_ARG(NULL));
1239 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1240 KMP_ASSERT(new_team);
1243 new_team->t.t_threads[0] = this_thr;
1244 new_team->t.t_parent = this_thr->th.th_team;
1245 serial_team = new_team;
1246 this_thr->th.th_serial_team = serial_team;
1250 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1251 global_tid, serial_team));
1259 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1260 global_tid, serial_team));
1264 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1265 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1266 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1267 serial_team->t.t_ident = loc;
1268 serial_team->t.t_serialized = 1;
1269 serial_team->t.t_nproc = 1;
1270 serial_team->t.t_parent = this_thr->th.th_team;
1271 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1272 this_thr->th.th_team = serial_team;
1273 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1275 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1276 this_thr->th.th_current_task));
1277 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1278 this_thr->th.th_current_task->td_flags.executing = 0;
1280 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1285 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1286 &this_thr->th.th_current_task->td_parent->td_icvs);
1290 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1291 this_thr->th.th_current_task->td_icvs.nproc =
1292 __kmp_nested_nth.nth[level + 1];
1296 if (__kmp_nested_proc_bind.used &&
1297 (level + 1 < __kmp_nested_proc_bind.used)) {
1298 this_thr->th.th_current_task->td_icvs.proc_bind =
1299 __kmp_nested_proc_bind.bind_types[level + 1];
1304 serial_team->t.t_pkfn = (microtask_t)(~0);
1306 this_thr->th.th_info.ds.ds_tid = 0;
1309 this_thr->th.th_team_nproc = 1;
1310 this_thr->th.th_team_master = this_thr;
1311 this_thr->th.th_team_serialized = 1;
1313 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1314 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1316 propagateFPControl(serial_team);
1319 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1320 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1321 serial_team->t.t_dispatch->th_disp_buffer =
1322 (dispatch_private_info_t *)__kmp_allocate(
1323 sizeof(dispatch_private_info_t));
1325 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1332 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1333 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1334 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1335 ++serial_team->t.t_serialized;
1336 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1339 int level = this_thr->th.th_team->t.t_level;
1342 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1343 this_thr->th.th_current_task->td_icvs.nproc =
1344 __kmp_nested_nth.nth[level + 1];
1346 serial_team->t.t_level++;
1347 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1348 "of serial team %p to %d\n",
1349 global_tid, serial_team, serial_team->t.t_level));
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1354 dispatch_private_info_t *disp_buffer =
1355 (dispatch_private_info_t *)__kmp_allocate(
1356 sizeof(dispatch_private_info_t));
1357 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1358 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1360 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1365 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1368 if (__kmp_env_consistency_check)
1369 __kmp_push_parallel(global_tid, NULL);
1371 serial_team->t.ompt_team_info.master_return_address = codeptr;
1372 if (ompt_enabled.enabled &&
1373 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1374 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1376 ompt_lw_taskteam_t lw_taskteam;
1377 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1378 &ompt_parallel_data, codeptr);
1380 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1384 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1385 if (ompt_enabled.ompt_callback_implicit_task) {
1386 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1387 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1388 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
1392 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1393 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1400 int __kmp_fork_call(
ident_t *loc,
int gtid,
1401 enum fork_context_e call_context,
1402 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1404 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1413 int master_this_cons;
1415 kmp_team_t *parent_team;
1416 kmp_info_t *master_th;
1420 int master_set_numthreads;
1426 #if KMP_NESTED_HOT_TEAMS 1427 kmp_hot_team_ptr_t **p_hot_teams;
1430 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1433 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1434 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1437 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1439 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1440 __kmp_stkpadding += (short)((kmp_int64)dummy);
1446 if (!TCR_4(__kmp_init_parallel))
1447 __kmp_parallel_initialize();
1450 master_th = __kmp_threads[gtid];
1452 parent_team = master_th->th.th_team;
1453 master_tid = master_th->th.th_info.ds.ds_tid;
1454 master_this_cons = master_th->th.th_local.this_construct;
1455 root = master_th->th.th_root;
1456 master_active = root->r.r_active;
1457 master_set_numthreads = master_th->th.th_set_nproc;
1460 ompt_data_t ompt_parallel_data;
1461 ompt_parallel_data.ptr = NULL;
1462 ompt_data_t *parent_task_data;
1463 ompt_frame_t *ompt_frame;
1464 ompt_data_t *implicit_task_data;
1465 void *return_address = NULL;
1467 if (ompt_enabled.enabled) {
1468 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1470 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1475 level = parent_team->t.t_level;
1477 active_level = parent_team->t.t_active_level;
1480 teams_level = master_th->th.th_teams_level;
1482 #if KMP_NESTED_HOT_TEAMS 1483 p_hot_teams = &master_th->th.th_hot_teams;
1484 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1485 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1486 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1487 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1489 (*p_hot_teams)[0].hot_team_nth = 1;
1494 if (ompt_enabled.enabled) {
1495 if (ompt_enabled.ompt_callback_parallel_begin) {
1496 int team_size = master_set_numthreads
1497 ? master_set_numthreads
1498 : get__nproc_2(parent_team, master_tid);
1499 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1500 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1501 OMPT_INVOKER(call_context), return_address);
1503 master_th->th.ompt_thread_info.state = omp_state_overhead;
1507 master_th->th.th_ident = loc;
1510 if (master_th->th.th_teams_microtask && ap &&
1511 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1515 parent_team->t.t_ident = loc;
1516 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1517 parent_team->t.t_argc = argc;
1518 argv = (
void **)parent_team->t.t_argv;
1519 for (i = argc - 1; i >= 0; --i)
1521 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1522 *argv++ = va_arg(*ap,
void *);
1524 *argv++ = va_arg(ap,
void *);
1527 if (parent_team == master_th->th.th_serial_team) {
1530 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1533 parent_team->t.t_serialized--;
1536 void **exit_runtime_p;
1538 ompt_lw_taskteam_t lw_taskteam;
1540 if (ompt_enabled.enabled) {
1541 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1542 &ompt_parallel_data, return_address);
1543 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
1545 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1549 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1550 if (ompt_enabled.ompt_callback_implicit_task) {
1551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1552 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1553 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1557 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1559 exit_runtime_p = &dummy;
1564 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1565 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1566 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1575 *exit_runtime_p = NULL;
1576 if (ompt_enabled.enabled) {
1577 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
1578 if (ompt_enabled.ompt_callback_implicit_task) {
1579 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1580 ompt_scope_end, NULL, implicit_task_data, 1,
1581 __kmp_tid_from_gtid(gtid));
1583 __ompt_lw_taskteam_unlink(master_th);
1585 if (ompt_enabled.ompt_callback_parallel_end) {
1586 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1587 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1588 OMPT_INVOKER(call_context), return_address);
1590 master_th->th.ompt_thread_info.state = omp_state_overhead;
1596 parent_team->t.t_pkfn = microtask;
1597 parent_team->t.t_invoke = invoker;
1598 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1599 parent_team->t.t_active_level++;
1600 parent_team->t.t_level++;
1603 if (master_set_numthreads) {
1604 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1606 kmp_info_t **other_threads = parent_team->t.t_threads;
1607 parent_team->t.t_nproc = master_set_numthreads;
1608 for (i = 0; i < master_set_numthreads; ++i) {
1609 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1613 master_th->th.th_set_nproc = 0;
1617 if (__kmp_debugging) {
1618 int nth = __kmp_omp_num_threads(loc);
1620 master_set_numthreads = nth;
1625 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1626 "master_th=%p, gtid=%d\n",
1627 root, parent_team, master_th, gtid));
1628 __kmp_internal_fork(loc, gtid, parent_team);
1629 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1630 "master_th=%p, gtid=%d\n",
1631 root, parent_team, master_th, gtid));
1634 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1635 parent_team->t.t_id, parent_team->t.t_pkfn));
1638 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1639 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1640 if (!parent_team->t.t_invoke(gtid)) {
1641 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1644 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1645 parent_team->t.t_id, parent_team->t.t_pkfn));
1648 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1655 if (__kmp_tasking_mode != tskm_immediate_exec) {
1656 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1657 parent_team->t.t_task_team[master_th->th.th_task_state]);
1661 if (parent_team->t.t_active_level >=
1662 master_th->th.th_current_task->td_icvs.max_active_levels) {
1666 int enter_teams = ((ap == NULL && active_level == 0) ||
1667 (ap && teams_level > 0 && teams_level == level));
1670 master_set_numthreads
1671 ? master_set_numthreads
1680 if ((!get__nested(master_th) && (root->r.r_in_parallel
1685 (__kmp_library == library_serial)) {
1686 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1694 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1695 nthreads = __kmp_reserve_threads(
1696 root, parent_team, master_tid, nthreads
1707 if (nthreads == 1) {
1711 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1715 KMP_DEBUG_ASSERT(nthreads > 0);
1718 master_th->th.th_set_nproc = 0;
1721 if (nthreads == 1) {
1723 #if KMP_OS_LINUX && \ 1724 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1727 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1732 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1736 if (call_context == fork_context_intel) {
1738 master_th->th.th_serial_team->t.t_ident = loc;
1742 master_th->th.th_serial_team->t.t_level--;
1747 void **exit_runtime_p;
1748 ompt_task_info_t *task_info;
1750 ompt_lw_taskteam_t lw_taskteam;
1752 if (ompt_enabled.enabled) {
1753 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1754 &ompt_parallel_data, return_address);
1756 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1759 task_info = OMPT_CUR_TASK_INFO(master_th);
1760 exit_runtime_p = &(task_info->frame.exit_frame);
1761 if (ompt_enabled.ompt_callback_implicit_task) {
1762 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1763 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1764 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
1768 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1770 exit_runtime_p = &dummy;
1775 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1776 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1777 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1778 parent_team->t.t_argv
1787 if (ompt_enabled.enabled) {
1788 exit_runtime_p = NULL;
1789 if (ompt_enabled.ompt_callback_implicit_task) {
1790 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1791 ompt_scope_end, NULL, &(task_info->task_data), 1,
1792 __kmp_tid_from_gtid(gtid));
1795 __ompt_lw_taskteam_unlink(master_th);
1796 if (ompt_enabled.ompt_callback_parallel_end) {
1797 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1798 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1799 OMPT_INVOKER(call_context), return_address);
1801 master_th->th.ompt_thread_info.state = omp_state_overhead;
1804 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1805 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1806 master_th->th.th_serial_team);
1807 team = master_th->th.th_team;
1809 team->t.t_invoke = invoker;
1810 __kmp_alloc_argv_entries(argc, team, TRUE);
1811 team->t.t_argc = argc;
1812 argv = (
void **)team->t.t_argv;
1814 for (i = argc - 1; i >= 0; --i)
1816 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1817 *argv++ = va_arg(*ap,
void *);
1819 *argv++ = va_arg(ap,
void *);
1822 for (i = 0; i < argc; ++i)
1824 argv[i] = parent_team->t.t_argv[i];
1831 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1832 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1838 for (i = argc - 1; i >= 0; --i)
1840 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1841 *argv++ = va_arg(*ap,
void *);
1843 *argv++ = va_arg(ap,
void *);
1849 void **exit_runtime_p;
1850 ompt_task_info_t *task_info;
1852 ompt_lw_taskteam_t lw_taskteam;
1854 if (ompt_enabled.enabled) {
1855 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1856 &ompt_parallel_data, return_address);
1857 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1859 task_info = OMPT_CUR_TASK_INFO(master_th);
1860 exit_runtime_p = &(task_info->frame.exit_frame);
1863 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1864 if (ompt_enabled.ompt_callback_implicit_task) {
1865 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1866 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1867 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1871 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1873 exit_runtime_p = &dummy;
1878 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1879 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1880 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1889 if (ompt_enabled.enabled) {
1890 *exit_runtime_p = NULL;
1891 if (ompt_enabled.ompt_callback_implicit_task) {
1892 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1893 ompt_scope_end, NULL, &(task_info->task_data), 1,
1894 __kmp_tid_from_gtid(gtid));
1897 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1898 __ompt_lw_taskteam_unlink(master_th);
1899 if (ompt_enabled.ompt_callback_parallel_end) {
1900 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1901 &ompt_parallel_data, parent_task_data,
1902 OMPT_INVOKER(call_context), return_address);
1904 master_th->th.ompt_thread_info.state = omp_state_overhead;
1910 }
else if (call_context == fork_context_gnu) {
1912 ompt_lw_taskteam_t lwt;
1913 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1916 lwt.ompt_task_info.frame.exit_frame = NULL;
1917 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1922 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1925 KMP_ASSERT2(call_context < fork_context_last,
1926 "__kmp_fork_call: unknown fork_context parameter");
1929 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1936 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1937 "curtask=%p, curtask_max_aclevel=%d\n",
1938 parent_team->t.t_active_level, master_th,
1939 master_th->th.th_current_task,
1940 master_th->th.th_current_task->td_icvs.max_active_levels));
1944 master_th->th.th_current_task->td_flags.executing = 0;
1947 if (!master_th->th.th_teams_microtask || level > teams_level)
1951 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1955 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1956 if ((level + 1 < __kmp_nested_nth.used) &&
1957 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1958 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1965 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1966 kmp_proc_bind_t proc_bind_icv =
1968 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1969 proc_bind = proc_bind_false;
1971 if (proc_bind == proc_bind_default) {
1974 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1980 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1981 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1982 master_th->th.th_current_task->td_icvs.proc_bind)) {
1983 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1988 master_th->th.th_set_proc_bind = proc_bind_default;
1991 if ((nthreads_icv > 0)
1993 || (proc_bind_icv != proc_bind_default)
1996 kmp_internal_control_t new_icvs;
1997 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1998 new_icvs.next = NULL;
1999 if (nthreads_icv > 0) {
2000 new_icvs.nproc = nthreads_icv;
2004 if (proc_bind_icv != proc_bind_default) {
2005 new_icvs.proc_bind = proc_bind_icv;
2010 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2011 team = __kmp_allocate_team(root, nthreads, nthreads,
2018 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2021 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2022 team = __kmp_allocate_team(root, nthreads, nthreads,
2029 &master_th->th.th_current_task->td_icvs,
2030 argc USE_NESTED_HOT_ARG(master_th));
2033 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2036 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2037 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2038 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2039 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2040 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2042 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2045 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2048 if (!master_th->th.th_teams_microtask || level > teams_level) {
2050 int new_level = parent_team->t.t_level + 1;
2051 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2052 new_level = parent_team->t.t_active_level + 1;
2053 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2057 int new_level = parent_team->t.t_level;
2058 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2059 new_level = parent_team->t.t_active_level;
2060 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2063 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2065 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2068 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2072 propagateFPControl(team);
2074 if (__kmp_tasking_mode != tskm_immediate_exec) {
2077 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2078 parent_team->t.t_task_team[master_th->th.th_task_state]);
2079 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2080 "%p, new task_team %p / team %p\n",
2081 __kmp_gtid_from_thread(master_th),
2082 master_th->th.th_task_team, parent_team,
2083 team->t.t_task_team[master_th->th.th_task_state], team));
2085 if (active_level || master_th->th.th_task_team) {
2087 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2088 if (master_th->th.th_task_state_top >=
2089 master_th->th.th_task_state_stack_sz) {
2090 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2091 kmp_uint8 *old_stack, *new_stack;
2093 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2094 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2095 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2097 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2101 old_stack = master_th->th.th_task_state_memo_stack;
2102 master_th->th.th_task_state_memo_stack = new_stack;
2103 master_th->th.th_task_state_stack_sz = new_size;
2104 __kmp_free(old_stack);
2108 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2109 master_th->th.th_task_state;
2110 master_th->th.th_task_state_top++;
2111 #if KMP_NESTED_HOT_TEAMS 2112 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2114 master_th->th.th_task_state =
2116 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2119 master_th->th.th_task_state = 0;
2120 #if KMP_NESTED_HOT_TEAMS 2124 #if !KMP_NESTED_HOT_TEAMS 2125 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2126 (team == root->r.r_hot_team));
2132 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2133 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2135 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2136 (team->t.t_master_tid == 0 &&
2137 (team->t.t_parent == root->r.r_root_team ||
2138 team->t.t_parent->t.t_serialized)));
2142 argv = (
void **)team->t.t_argv;
2146 for (i = argc - 1; i >= 0; --i) {
2148 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2149 void *new_argv = va_arg(*ap,
void *);
2151 void *new_argv = va_arg(ap,
void *);
2153 KMP_CHECK_UPDATE(*argv, new_argv);
2158 for (i = 0; i < argc; ++i) {
2160 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2166 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2167 if (!root->r.r_active)
2168 root->r.r_active = TRUE;
2170 __kmp_fork_team_threads(root, team, master_th, gtid);
2171 __kmp_setup_icv_copy(team, nthreads,
2172 &master_th->th.th_current_task->td_icvs, loc);
2175 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
2178 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2181 if (team->t.t_active_level == 1
2183 && !master_th->th.th_teams_microtask
2187 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2188 (__kmp_forkjoin_frames_mode == 3 ||
2189 __kmp_forkjoin_frames_mode == 1)) {
2190 kmp_uint64 tmp_time = 0;
2191 if (__itt_get_timestamp_ptr)
2192 tmp_time = __itt_get_timestamp();
2194 master_th->th.th_frame_time = tmp_time;
2195 if (__kmp_forkjoin_frames_mode == 3)
2196 team->t.t_region_time = tmp_time;
2200 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2201 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2203 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2209 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2212 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2213 root, team, master_th, gtid));
2216 if (__itt_stack_caller_create_ptr) {
2217 team->t.t_stack_id =
2218 __kmp_itt_stack_caller_create();
2229 __kmp_internal_fork(loc, gtid, team);
2230 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2231 "master_th=%p, gtid=%d\n",
2232 root, team, master_th, gtid));
2235 if (call_context == fork_context_gnu) {
2236 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2241 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2242 team->t.t_id, team->t.t_pkfn));
2246 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2247 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2248 if (!team->t.t_invoke(gtid)) {
2249 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2252 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2253 team->t.t_id, team->t.t_pkfn));
2256 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2259 if (ompt_enabled.enabled) {
2260 master_th->th.ompt_thread_info.state = omp_state_overhead;
2268 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2271 thread->th.ompt_thread_info.state =
2272 ((team->t.t_serialized) ? omp_state_work_serial
2273 : omp_state_work_parallel);
2276 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2277 kmp_team_t *team, ompt_data_t *parallel_data,
2278 fork_context_e fork_context,
void *codeptr) {
2279 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2280 if (ompt_enabled.ompt_callback_parallel_end) {
2281 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2282 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2286 task_info->frame.enter_frame = NULL;
2287 __kmp_join_restore_state(thread, team);
2291 void __kmp_join_call(
ident_t *loc,
int gtid
2294 enum fork_context_e fork_context
2301 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2303 kmp_team_t *parent_team;
2304 kmp_info_t *master_th;
2309 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2312 master_th = __kmp_threads[gtid];
2313 root = master_th->th.th_root;
2314 team = master_th->th.th_team;
2315 parent_team = team->t.t_parent;
2317 master_th->th.th_ident = loc;
2320 if (ompt_enabled.enabled) {
2321 master_th->th.ompt_thread_info.state = omp_state_overhead;
2326 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2327 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2328 "th_task_team = %p\n",
2329 __kmp_gtid_from_thread(master_th), team,
2330 team->t.t_task_team[master_th->th.th_task_state],
2331 master_th->th.th_task_team));
2332 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2333 team->t.t_task_team[master_th->th.th_task_state]);
2337 if (team->t.t_serialized) {
2339 if (master_th->th.th_teams_microtask) {
2341 int level = team->t.t_level;
2342 int tlevel = master_th->th.th_teams_level;
2343 if (level == tlevel) {
2347 }
else if (level == tlevel + 1) {
2351 team->t.t_serialized++;
2358 if (ompt_enabled.enabled) {
2359 __kmp_join_restore_state(master_th, parent_team);
2366 master_active = team->t.t_master_active;
2374 __kmp_internal_join(loc, gtid, team);
2378 master_th->th.th_task_state =
2386 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2387 void *codeptr = team->t.ompt_team_info.master_return_address;
2391 if (__itt_stack_caller_create_ptr) {
2392 __kmp_itt_stack_caller_destroy(
2393 (__itt_caller)team->t
2398 if (team->t.t_active_level == 1
2400 && !master_th->th.th_teams_microtask
2403 master_th->th.th_ident = loc;
2406 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2407 __kmp_forkjoin_frames_mode == 3)
2408 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2409 master_th->th.th_frame_time, 0, loc,
2410 master_th->th.th_team_nproc, 1);
2411 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2412 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2413 __kmp_itt_region_joined(gtid);
2418 if (master_th->th.th_teams_microtask && !exit_teams &&
2419 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2420 team->t.t_level == master_th->th.th_teams_level + 1) {
2427 team->t.t_active_level--;
2428 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2431 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2432 int old_num = master_th->th.th_team_nproc;
2433 int new_num = master_th->th.th_teams_size.nth;
2434 kmp_info_t **other_threads = team->t.t_threads;
2435 team->t.t_nproc = new_num;
2436 for (i = 0; i < old_num; ++i) {
2437 other_threads[i]->th.th_team_nproc = new_num;
2440 for (i = old_num; i < new_num; ++i) {
2443 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2444 for (b = 0; b < bs_last_barrier; ++b) {
2445 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2446 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2448 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2451 if (__kmp_tasking_mode != tskm_immediate_exec) {
2453 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2459 if (ompt_enabled.enabled) {
2460 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2470 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2471 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2473 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2478 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2481 if (!master_th->th.th_teams_microtask ||
2482 team->t.t_level > master_th->th.th_teams_level)
2486 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2488 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2491 if (ompt_enabled.enabled) {
2492 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2493 if (ompt_enabled.ompt_callback_implicit_task) {
2494 int ompt_team_size = team->t.t_nproc;
2495 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2496 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2497 __kmp_tid_from_gtid(gtid));
2500 task_info->frame.exit_frame = NULL;
2501 task_info->task_data = ompt_data_none;
2505 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2507 __kmp_pop_current_task_from_thread(master_th);
2509 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2511 master_th->th.th_first_place = team->t.t_first_place;
2512 master_th->th.th_last_place = team->t.t_last_place;
2515 updateHWFPControl(team);
2517 if (root->r.r_active != master_active)
2518 root->r.r_active = master_active;
2520 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2528 master_th->th.th_team = parent_team;
2529 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2530 master_th->th.th_team_master = parent_team->t.t_threads[0];
2531 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2534 if (parent_team->t.t_serialized &&
2535 parent_team != master_th->th.th_serial_team &&
2536 parent_team != root->r.r_root_team) {
2537 __kmp_free_team(root,
2538 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2539 master_th->th.th_serial_team = parent_team;
2542 if (__kmp_tasking_mode != tskm_immediate_exec) {
2543 if (master_th->th.th_task_state_top >
2545 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2547 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2548 master_th->th.th_task_state;
2549 --master_th->th.th_task_state_top;
2551 master_th->th.th_task_state =
2553 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2556 master_th->th.th_task_team =
2557 parent_team->t.t_task_team[master_th->th.th_task_state];
2559 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2560 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2567 master_th->th.th_current_task->td_flags.executing = 1;
2569 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2572 if (ompt_enabled.enabled) {
2573 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2579 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2584 void __kmp_save_internal_controls(kmp_info_t *thread) {
2586 if (thread->th.th_team != thread->th.th_serial_team) {
2589 if (thread->th.th_team->t.t_serialized > 1) {
2592 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2595 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2596 thread->th.th_team->t.t_serialized) {
2601 kmp_internal_control_t *control =
2602 (kmp_internal_control_t *)__kmp_allocate(
2603 sizeof(kmp_internal_control_t));
2605 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2607 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2609 control->next = thread->th.th_team->t.t_control_stack_top;
2610 thread->th.th_team->t.t_control_stack_top = control;
2616 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2620 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2621 KMP_DEBUG_ASSERT(__kmp_init_serial);
2625 else if (new_nth > __kmp_max_nth)
2626 new_nth = __kmp_max_nth;
2629 thread = __kmp_threads[gtid];
2631 __kmp_save_internal_controls(thread);
2633 set__nproc(thread, new_nth);
2638 root = thread->th.th_root;
2639 if (__kmp_init_parallel && (!root->r.r_active) &&
2640 (root->r.r_hot_team->t.t_nproc > new_nth)
2641 #
if KMP_NESTED_HOT_TEAMS
2642 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2645 kmp_team_t *hot_team = root->r.r_hot_team;
2648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2651 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2652 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2653 if (__kmp_tasking_mode != tskm_immediate_exec) {
2656 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2658 __kmp_free_thread(hot_team->t.t_threads[f]);
2659 hot_team->t.t_threads[f] = NULL;
2661 hot_team->t.t_nproc = new_nth;
2662 #if KMP_NESTED_HOT_TEAMS 2663 if (thread->th.th_hot_teams) {
2664 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2665 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2669 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2672 for (f = 0; f < new_nth; f++) {
2673 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2674 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2677 hot_team->t.t_size_changed = -1;
2682 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2685 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2687 gtid, max_active_levels));
2688 KMP_DEBUG_ASSERT(__kmp_init_serial);
2691 if (max_active_levels < 0) {
2692 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2697 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2698 "max_active_levels for thread %d = (%d)\n",
2699 gtid, max_active_levels));
2702 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2707 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2708 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2709 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2715 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2716 "max_active_levels for thread %d = (%d)\n",
2717 gtid, max_active_levels));
2719 thread = __kmp_threads[gtid];
2721 __kmp_save_internal_controls(thread);
2723 set__max_active_levels(thread, max_active_levels);
2727 int __kmp_get_max_active_levels(
int gtid) {
2730 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2731 KMP_DEBUG_ASSERT(__kmp_init_serial);
2733 thread = __kmp_threads[gtid];
2734 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2735 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2736 "curtask_maxaclevel=%d\n",
2737 gtid, thread->th.th_current_task,
2738 thread->th.th_current_task->td_icvs.max_active_levels));
2739 return thread->th.th_current_task->td_icvs.max_active_levels;
2743 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2747 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2748 gtid, (
int)kind, chunk));
2749 KMP_DEBUG_ASSERT(__kmp_init_serial);
2755 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2756 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2758 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2759 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2761 kind = kmp_sched_default;
2765 thread = __kmp_threads[gtid];
2767 __kmp_save_internal_controls(thread);
2769 if (kind < kmp_sched_upper_std) {
2770 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2773 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2775 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2776 __kmp_sch_map[kind - kmp_sched_lower - 1];
2781 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2782 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2783 kmp_sched_lower - 2];
2785 if (kind == kmp_sched_auto || chunk < 1) {
2787 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2789 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2794 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2798 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2799 KMP_DEBUG_ASSERT(__kmp_init_serial);
2801 thread = __kmp_threads[gtid];
2803 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2807 case kmp_sch_static_greedy:
2808 case kmp_sch_static_balanced:
2809 *kind = kmp_sched_static;
2812 case kmp_sch_static_chunked:
2813 *kind = kmp_sched_static;
2815 case kmp_sch_dynamic_chunked:
2816 *kind = kmp_sched_dynamic;
2819 case kmp_sch_guided_iterative_chunked:
2820 case kmp_sch_guided_analytical_chunked:
2821 *kind = kmp_sched_guided;
2824 *kind = kmp_sched_auto;
2826 case kmp_sch_trapezoidal:
2827 *kind = kmp_sched_trapezoidal;
2829 #if KMP_STATIC_STEAL_ENABLED 2830 case kmp_sch_static_steal:
2831 *kind = kmp_sched_static_steal;
2835 KMP_FATAL(UnknownSchedulingType, th_type);
2838 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2841 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2847 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2848 KMP_DEBUG_ASSERT(__kmp_init_serial);
2855 thr = __kmp_threads[gtid];
2856 team = thr->th.th_team;
2857 ii = team->t.t_level;
2862 if (thr->th.th_teams_microtask) {
2864 int tlevel = thr->th.th_teams_level;
2867 KMP_DEBUG_ASSERT(ii >= tlevel);
2880 return __kmp_tid_from_gtid(gtid);
2882 dd = team->t.t_serialized;
2884 while (ii > level) {
2885 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2887 if ((team->t.t_serialized) && (!dd)) {
2888 team = team->t.t_parent;
2892 team = team->t.t_parent;
2893 dd = team->t.t_serialized;
2898 return (dd > 1) ? (0) : (team->t.t_master_tid);
2901 int __kmp_get_team_size(
int gtid,
int level) {
2907 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2908 KMP_DEBUG_ASSERT(__kmp_init_serial);
2915 thr = __kmp_threads[gtid];
2916 team = thr->th.th_team;
2917 ii = team->t.t_level;
2922 if (thr->th.th_teams_microtask) {
2924 int tlevel = thr->th.th_teams_level;
2927 KMP_DEBUG_ASSERT(ii >= tlevel);
2939 while (ii > level) {
2940 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2942 if (team->t.t_serialized && (!dd)) {
2943 team = team->t.t_parent;
2947 team = team->t.t_parent;
2952 return team->t.t_nproc;
2955 kmp_r_sched_t __kmp_get_schedule_global() {
2960 kmp_r_sched_t r_sched;
2968 r_sched.r_sched_type = __kmp_static;
2971 r_sched.r_sched_type = __kmp_guided;
2973 r_sched.r_sched_type = __kmp_sched;
2976 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2978 r_sched.chunk = KMP_DEFAULT_CHUNK;
2980 r_sched.chunk = __kmp_chunk;
2988 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
2990 KMP_DEBUG_ASSERT(team);
2991 if (!realloc || argc > team->t.t_max_argc) {
2993 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 2994 "current entries=%d\n",
2995 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2997 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
2998 __kmp_free((
void *)team->t.t_argv);
3000 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3002 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3003 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 3005 team->t.t_id, team->t.t_max_argc));
3006 team->t.t_argv = &team->t.t_inline_argv[0];
3007 if (__kmp_storage_map) {
3008 __kmp_print_storage_map_gtid(
3009 -1, &team->t.t_inline_argv[0],
3010 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3011 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3016 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3017 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3019 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 3021 team->t.t_id, team->t.t_max_argc));
3023 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3024 if (__kmp_storage_map) {
3025 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3026 &team->t.t_argv[team->t.t_max_argc],
3027 sizeof(
void *) * team->t.t_max_argc,
3028 "team_%d.t_argv", team->t.t_id);
3034 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3036 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3038 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3039 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3040 sizeof(dispatch_shared_info_t) * num_disp_buff);
3041 team->t.t_dispatch =
3042 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3043 team->t.t_implicit_task_taskdata =
3044 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3045 team->t.t_max_nproc = max_nth;
3048 for (i = 0; i < num_disp_buff; ++i) {
3049 team->t.t_disp_buffer[i].buffer_index = i;
3051 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3056 static void __kmp_free_team_arrays(kmp_team_t *team) {
3059 for (i = 0; i < team->t.t_max_nproc; ++i) {
3060 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3061 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3062 team->t.t_dispatch[i].th_disp_buffer = NULL;
3065 __kmp_free(team->t.t_threads);
3066 __kmp_free(team->t.t_disp_buffer);
3067 __kmp_free(team->t.t_dispatch);
3068 __kmp_free(team->t.t_implicit_task_taskdata);
3069 team->t.t_threads = NULL;
3070 team->t.t_disp_buffer = NULL;
3071 team->t.t_dispatch = NULL;
3072 team->t.t_implicit_task_taskdata = 0;
3075 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3076 kmp_info_t **oldThreads = team->t.t_threads;
3078 __kmp_free(team->t.t_disp_buffer);
3079 __kmp_free(team->t.t_dispatch);
3080 __kmp_free(team->t.t_implicit_task_taskdata);
3081 __kmp_allocate_team_arrays(team, max_nth);
3083 KMP_MEMCPY(team->t.t_threads, oldThreads,
3084 team->t.t_nproc *
sizeof(kmp_info_t *));
3086 __kmp_free(oldThreads);
3089 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3091 kmp_r_sched_t r_sched =
3092 __kmp_get_schedule_global();
3095 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3098 kmp_internal_control_t g_icvs = {
3100 (kmp_int8)__kmp_dflt_nested,
3102 (kmp_int8)__kmp_global.g.g_dynamic,
3104 (kmp_int8)__kmp_env_blocktime,
3106 __kmp_dflt_blocktime,
3111 __kmp_dflt_team_nth,
3114 __kmp_dflt_max_active_levels,
3119 __kmp_nested_proc_bind.bind_types[0],
3120 __kmp_default_device,
3128 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3130 kmp_internal_control_t gx_icvs;
3131 gx_icvs.serial_nesting_level =
3133 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3134 gx_icvs.next = NULL;
3139 static void __kmp_initialize_root(kmp_root_t *root) {
3141 kmp_team_t *root_team;
3142 kmp_team_t *hot_team;
3143 int hot_team_max_nth;
3144 kmp_r_sched_t r_sched =
3145 __kmp_get_schedule_global();
3146 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3147 KMP_DEBUG_ASSERT(root);
3148 KMP_ASSERT(!root->r.r_begin);
3151 __kmp_init_lock(&root->r.r_begin_lock);
3152 root->r.r_begin = FALSE;
3153 root->r.r_active = FALSE;
3154 root->r.r_in_parallel = 0;
3155 root->r.r_blocktime = __kmp_dflt_blocktime;
3156 root->r.r_nested = __kmp_dflt_nested;
3157 root->r.r_cg_nthreads = 1;
3161 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3164 __kmp_allocate_team(root,
3171 __kmp_nested_proc_bind.bind_types[0],
3175 USE_NESTED_HOT_ARG(NULL)
3180 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3183 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3185 root->r.r_root_team = root_team;
3186 root_team->t.t_control_stack_top = NULL;
3189 root_team->t.t_threads[0] = NULL;
3190 root_team->t.t_nproc = 1;
3191 root_team->t.t_serialized = 1;
3193 root_team->t.t_sched.sched = r_sched.sched;
3196 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3197 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3201 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3204 __kmp_allocate_team(root,
3206 __kmp_dflt_team_nth_ub * 2,
3211 __kmp_nested_proc_bind.bind_types[0],
3215 USE_NESTED_HOT_ARG(NULL)
3217 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3219 root->r.r_hot_team = hot_team;
3220 root_team->t.t_control_stack_top = NULL;
3223 hot_team->t.t_parent = root_team;
3226 hot_team_max_nth = hot_team->t.t_max_nproc;
3227 for (f = 0; f < hot_team_max_nth; ++f) {
3228 hot_team->t.t_threads[f] = NULL;
3230 hot_team->t.t_nproc = 1;
3232 hot_team->t.t_sched.sched = r_sched.sched;
3233 hot_team->t.t_size_changed = 0;
3238 typedef struct kmp_team_list_item {
3239 kmp_team_p
const *entry;
3240 struct kmp_team_list_item *next;
3241 } kmp_team_list_item_t;
3242 typedef kmp_team_list_item_t *kmp_team_list_t;
3244 static void __kmp_print_structure_team_accum(
3245 kmp_team_list_t list,
3246 kmp_team_p
const *team
3256 KMP_DEBUG_ASSERT(list != NULL);
3261 __kmp_print_structure_team_accum(list, team->t.t_parent);
3262 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3266 while (l->next != NULL && l->entry != team) {
3269 if (l->next != NULL) {
3275 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3281 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3282 sizeof(kmp_team_list_item_t));
3289 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3292 __kmp_printf(
"%s", title);
3294 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3296 __kmp_printf(
" - (nil)\n");
3300 static void __kmp_print_structure_thread(
char const *title,
3301 kmp_info_p
const *thread) {
3302 __kmp_printf(
"%s", title);
3303 if (thread != NULL) {
3304 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3306 __kmp_printf(
" - (nil)\n");
3310 void __kmp_print_structure(
void) {
3312 kmp_team_list_t list;
3316 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3320 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3321 "Table\n------------------------------\n");
3324 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3325 __kmp_printf(
"%2d", gtid);
3326 if (__kmp_threads != NULL) {
3327 __kmp_printf(
" %p", __kmp_threads[gtid]);
3329 if (__kmp_root != NULL) {
3330 __kmp_printf(
" %p", __kmp_root[gtid]);
3337 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3339 if (__kmp_threads != NULL) {
3341 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3342 kmp_info_t
const *thread = __kmp_threads[gtid];
3343 if (thread != NULL) {
3344 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3345 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3346 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3347 __kmp_print_structure_team(
" Serial Team: ",
3348 thread->th.th_serial_team);
3349 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3350 __kmp_print_structure_thread(
" Master: ",
3351 thread->th.th_team_master);
3352 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3353 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3355 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3357 __kmp_print_structure_thread(
" Next in pool: ",
3358 thread->th.th_next_pool);
3360 __kmp_print_structure_team_accum(list, thread->th.th_team);
3361 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3365 __kmp_printf(
"Threads array is not allocated.\n");
3369 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3371 if (__kmp_root != NULL) {
3373 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3374 kmp_root_t
const *root = __kmp_root[gtid];
3376 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3377 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3378 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3379 __kmp_print_structure_thread(
" Uber Thread: ",
3380 root->r.r_uber_thread);
3381 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3382 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3383 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel);
3385 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3386 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3390 __kmp_printf(
"Ubers array is not allocated.\n");
3393 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3395 while (list->next != NULL) {
3396 kmp_team_p
const *team = list->entry;
3398 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3399 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3400 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3401 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3402 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3403 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3404 for (i = 0; i < team->t.t_nproc; ++i) {
3405 __kmp_printf(
" Thread %2d: ", i);
3406 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3408 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3414 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3416 __kmp_print_structure_thread(
"Thread pool: ",
3417 CCAST(kmp_info_t *, __kmp_thread_pool));
3418 __kmp_print_structure_team(
"Team pool: ",
3419 CCAST(kmp_team_t *, __kmp_team_pool));
3423 while (list != NULL) {
3424 kmp_team_list_item_t *item = list;
3426 KMP_INTERNAL_FREE(item);
3435 static const unsigned __kmp_primes[] = {
3436 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3437 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3438 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3439 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3440 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3441 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3442 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3443 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3444 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3445 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3446 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3450 unsigned short __kmp_get_random(kmp_info_t *thread) {
3451 unsigned x = thread->th.th_x;
3452 unsigned short r = x >> 16;
3454 thread->th.th_x = x * thread->th.th_a + 1;
3456 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3457 thread->th.th_info.ds.ds_tid, r));
3463 void __kmp_init_random(kmp_info_t *thread) {
3464 unsigned seed = thread->th.th_info.ds.ds_tid;
3467 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3468 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3470 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3476 static int __kmp_reclaim_dead_roots(
void) {
3479 for (i = 0; i < __kmp_threads_capacity; ++i) {
3480 if (KMP_UBER_GTID(i) &&
3481 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3484 r += __kmp_unregister_root_other_thread(i);
3513 static int __kmp_expand_threads(
int nWish,
int nNeed) {
3516 int __kmp_actual_max_nth;
3520 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3523 added = __kmp_reclaim_dead_roots();
3541 int minimumRequiredCapacity;
3543 kmp_info_t **newThreads;
3544 kmp_root_t **newRoot;
3563 old_tp_cached = __kmp_tp_cached;
3564 __kmp_actual_max_nth =
3565 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3566 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3571 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3575 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3581 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3588 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3590 newCapacity = __kmp_threads_capacity;
3592 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1)
3593 ? (newCapacity << 1)
3594 : __kmp_actual_max_nth;
3595 }
while (newCapacity < minimumRequiredCapacity);
3596 newThreads = (kmp_info_t **)__kmp_allocate(
3597 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity +
3599 newRoot = (kmp_root_t **)((
char *)newThreads +
3600 sizeof(kmp_info_t *) * newCapacity);
3601 KMP_MEMCPY(newThreads, __kmp_threads,
3602 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3603 KMP_MEMCPY(newRoot, __kmp_root,
3604 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3605 memset(newThreads + __kmp_threads_capacity, 0,
3606 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t *));
3607 memset(newRoot + __kmp_threads_capacity, 0,
3608 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t *));
3610 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3616 __kmp_free(newThreads);
3619 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3620 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3623 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3624 __kmp_free(newThreads);
3630 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3631 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3632 added += newCapacity - __kmp_threads_capacity;
3633 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3634 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3644 int __kmp_register_root(
int initial_thread) {
3645 kmp_info_t *root_thread;
3649 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3650 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3667 capacity = __kmp_threads_capacity;
3668 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3673 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) {
3674 if (__kmp_tp_cached) {
3675 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3676 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3677 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3679 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3687 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3691 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3692 KMP_ASSERT(gtid < __kmp_threads_capacity);
3696 TCW_4(__kmp_nth, __kmp_nth + 1);
3700 if (__kmp_adjust_gtid_mode) {
3701 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3702 if (TCR_4(__kmp_gtid_mode) != 2) {
3703 TCW_4(__kmp_gtid_mode, 2);
3706 if (TCR_4(__kmp_gtid_mode) != 1) {
3707 TCW_4(__kmp_gtid_mode, 1);
3712 #ifdef KMP_ADJUST_BLOCKTIME 3715 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3716 if (__kmp_nth > __kmp_avail_proc) {
3717 __kmp_zero_bt = TRUE;
3723 if (!(root = __kmp_root[gtid])) {
3724 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3725 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3728 #if KMP_STATS_ENABLED 3730 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3732 KMP_SET_THREAD_STATE(SERIAL_REGION);
3735 __kmp_initialize_root(root);
3738 if (root->r.r_uber_thread) {
3739 root_thread = root->r.r_uber_thread;
3741 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3742 if (__kmp_storage_map) {
3743 __kmp_print_thread_storage_map(root_thread, gtid);
3745 root_thread->th.th_info.ds.ds_gtid = gtid;
3747 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3749 root_thread->th.th_root = root;
3750 if (__kmp_env_consistency_check) {
3751 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3754 __kmp_initialize_fast_memory(root_thread);
3758 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3759 __kmp_initialize_bget(root_thread);
3761 __kmp_init_random(root_thread);
3765 if (!root_thread->th.th_serial_team) {
3766 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3767 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3768 root_thread->th.th_serial_team =
3769 __kmp_allocate_team(root, 1, 1,
3776 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3778 KMP_ASSERT(root_thread->th.th_serial_team);
3779 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3780 root_thread->th.th_serial_team));
3783 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3785 root->r.r_root_team->t.t_threads[0] = root_thread;
3786 root->r.r_hot_team->t.t_threads[0] = root_thread;
3787 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3789 root_thread->th.th_serial_team->t.t_serialized = 0;
3790 root->r.r_uber_thread = root_thread;
3793 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3794 TCW_4(__kmp_init_gtid, TRUE);
3797 __kmp_gtid_set_specific(gtid);
3800 __kmp_itt_thread_name(gtid);
3803 #ifdef KMP_TDATA_GTID 3806 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3807 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3809 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3811 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3812 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3813 KMP_INIT_BARRIER_STATE));
3816 for (b = 0; b < bs_last_barrier; ++b) {
3817 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3819 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3823 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3824 KMP_INIT_BARRIER_STATE);
3826 #if KMP_AFFINITY_SUPPORTED 3828 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3829 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3830 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3831 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3834 if (TCR_4(__kmp_init_middle)) {
3835 __kmp_affinity_set_init_mask(gtid, TRUE);
3839 __kmp_root_counter++;
3842 if (!initial_thread && ompt_enabled.enabled) {
3844 ompt_thread_t *root_thread = ompt_get_thread();
3846 ompt_set_thread_state(root_thread, omp_state_overhead);
3848 if (ompt_enabled.ompt_callback_thread_begin) {
3849 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3850 ompt_thread_initial, __ompt_get_thread_data_internal());
3852 ompt_data_t *task_data;
3853 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3854 if (ompt_enabled.ompt_callback_task_create) {
3855 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3856 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3860 ompt_set_thread_state(root_thread, omp_state_work_serial);
3865 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3870 #if KMP_NESTED_HOT_TEAMS 3871 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3872 const int max_level) {
3874 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3875 if (!hot_teams || !hot_teams[level].hot_team) {
3878 KMP_DEBUG_ASSERT(level < max_level);
3879 kmp_team_t *team = hot_teams[level].hot_team;
3880 nth = hot_teams[level].hot_team_nth;
3882 if (level < max_level - 1) {
3883 for (i = 0; i < nth; ++i) {
3884 kmp_info_t *th = team->t.t_threads[i];
3885 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3886 if (i > 0 && th->th.th_hot_teams) {
3887 __kmp_free(th->th.th_hot_teams);
3888 th->th.th_hot_teams = NULL;
3892 __kmp_free_team(root, team, NULL);
3899 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3900 kmp_team_t *root_team = root->r.r_root_team;
3901 kmp_team_t *hot_team = root->r.r_hot_team;
3902 int n = hot_team->t.t_nproc;
3905 KMP_DEBUG_ASSERT(!root->r.r_active);
3907 root->r.r_root_team = NULL;
3908 root->r.r_hot_team = NULL;
3911 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3912 #if KMP_NESTED_HOT_TEAMS 3913 if (__kmp_hot_teams_max_level >
3915 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3916 kmp_info_t *th = hot_team->t.t_threads[i];
3917 if (__kmp_hot_teams_max_level > 1) {
3918 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3920 if (th->th.th_hot_teams) {
3921 __kmp_free(th->th.th_hot_teams);
3922 th->th.th_hot_teams = NULL;
3927 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3932 if (__kmp_tasking_mode != tskm_immediate_exec) {
3933 __kmp_wait_to_unref_task_teams();
3939 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3941 (LPVOID) & (root->r.r_uber_thread->th),
3942 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3943 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3947 if (ompt_enabled.ompt_callback_thread_end) {
3948 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3949 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3955 root->r.r_cg_nthreads--;
3957 __kmp_reap_thread(root->r.r_uber_thread, 1);
3961 root->r.r_uber_thread = NULL;
3963 root->r.r_begin = FALSE;
3968 void __kmp_unregister_root_current_thread(
int gtid) {
3969 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3973 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3974 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3975 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3978 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3981 kmp_root_t *root = __kmp_root[gtid];
3983 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3984 KMP_ASSERT(KMP_UBER_GTID(gtid));
3985 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3986 KMP_ASSERT(root->r.r_active == FALSE);
3991 kmp_info_t *thread = __kmp_threads[gtid];
3992 kmp_team_t *team = thread->th.th_team;
3993 kmp_task_team_t *task_team = thread->th.th_task_team;
3996 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3999 thread->th.ompt_thread_info.state = omp_state_undefined;
4001 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4005 __kmp_reset_root(gtid, root);
4008 __kmp_gtid_set_specific(KMP_GTID_DNE);
4009 #ifdef KMP_TDATA_GTID 4010 __kmp_gtid = KMP_GTID_DNE;
4015 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4017 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4024 static int __kmp_unregister_root_other_thread(
int gtid) {
4025 kmp_root_t *root = __kmp_root[gtid];
4028 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4029 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4030 KMP_ASSERT(KMP_UBER_GTID(gtid));
4031 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4032 KMP_ASSERT(root->r.r_active == FALSE);
4034 r = __kmp_reset_root(gtid, root);
4036 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4042 void __kmp_task_info() {
4044 kmp_int32 gtid = __kmp_entry_gtid();
4045 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4046 kmp_info_t *this_thr = __kmp_threads[gtid];
4047 kmp_team_t *steam = this_thr->th.th_serial_team;
4048 kmp_team_t *team = this_thr->th.th_team;
4050 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " 4052 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4053 team->t.t_implicit_task_taskdata[tid].td_parent);
4060 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4061 int tid,
int gtid) {
4065 kmp_info_t *master = team->t.t_threads[0];
4066 KMP_DEBUG_ASSERT(this_thr != NULL);
4067 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4068 KMP_DEBUG_ASSERT(team);
4069 KMP_DEBUG_ASSERT(team->t.t_threads);
4070 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4071 KMP_DEBUG_ASSERT(master);
4072 KMP_DEBUG_ASSERT(master->th.th_root);
4076 TCW_SYNC_PTR(this_thr->th.th_team, team);
4078 this_thr->th.th_info.ds.ds_tid = tid;
4079 this_thr->th.th_set_nproc = 0;
4080 if (__kmp_tasking_mode != tskm_immediate_exec)
4083 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4085 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4087 this_thr->th.th_set_proc_bind = proc_bind_default;
4088 #if KMP_AFFINITY_SUPPORTED 4089 this_thr->th.th_new_place = this_thr->th.th_current_place;
4092 this_thr->th.th_root = master->th.th_root;
4095 this_thr->th.th_team_nproc = team->t.t_nproc;
4096 this_thr->th.th_team_master = master;
4097 this_thr->th.th_team_serialized = team->t.t_serialized;
4098 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4100 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4102 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4103 tid, gtid, this_thr, this_thr->th.th_current_task));
4105 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4108 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4109 tid, gtid, this_thr, this_thr->th.th_current_task));
4114 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4116 this_thr->th.th_local.this_construct = 0;
4118 if (!this_thr->th.th_pri_common) {
4119 this_thr->th.th_pri_common =
4120 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4121 if (__kmp_storage_map) {
4122 __kmp_print_storage_map_gtid(
4123 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4124 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4126 this_thr->th.th_pri_head = NULL;
4131 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4134 sizeof(dispatch_private_info_t) *
4135 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4136 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4137 team->t.t_max_nproc));
4138 KMP_ASSERT(dispatch);
4139 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4140 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4142 dispatch->th_disp_index = 0;
4144 dispatch->th_doacross_buf_idx = 0;
4146 if (!dispatch->th_disp_buffer) {
4147 dispatch->th_disp_buffer =
4148 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4150 if (__kmp_storage_map) {
4151 __kmp_print_storage_map_gtid(
4152 gtid, &dispatch->th_disp_buffer[0],
4153 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4155 : __kmp_dispatch_num_buffers],
4156 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4157 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4158 gtid, team->t.t_id, gtid);
4161 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4164 dispatch->th_dispatch_pr_current = 0;
4165 dispatch->th_dispatch_sh_current = 0;
4167 dispatch->th_deo_fcn = 0;
4168 dispatch->th_dxo_fcn = 0;
4171 this_thr->th.th_next_pool = NULL;
4173 if (!this_thr->th.th_task_state_memo_stack) {
4175 this_thr->th.th_task_state_memo_stack =
4176 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4177 this_thr->th.th_task_state_top = 0;
4178 this_thr->th.th_task_state_stack_sz = 4;
4179 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4181 this_thr->th.th_task_state_memo_stack[i] = 0;
4184 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4185 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4195 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4197 kmp_team_t *serial_team;
4198 kmp_info_t *new_thr;
4201 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4202 KMP_DEBUG_ASSERT(root && team);
4203 #if !KMP_NESTED_HOT_TEAMS 4204 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4209 if (__kmp_thread_pool) {
4211 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4212 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4213 if (new_thr == __kmp_thread_pool_insert_pt) {
4214 __kmp_thread_pool_insert_pt = NULL;
4216 TCW_4(new_thr->th.th_in_pool, FALSE);
4219 __kmp_thread_pool_nth--;
4221 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4222 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4223 KMP_ASSERT(!new_thr->th.th_team);
4224 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4225 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4228 __kmp_initialize_info(new_thr, team, new_tid,
4229 new_thr->th.th_info.ds.ds_gtid);
4230 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4232 TCW_4(__kmp_nth, __kmp_nth + 1);
4233 root->r.r_cg_nthreads++;
4235 new_thr->th.th_task_state = 0;
4236 new_thr->th.th_task_state_top = 0;
4237 new_thr->th.th_task_state_stack_sz = 4;
4239 #ifdef KMP_ADJUST_BLOCKTIME 4242 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4243 if (__kmp_nth > __kmp_avail_proc) {
4244 __kmp_zero_bt = TRUE;
4253 kmp_balign_t *balign = new_thr->th.th_bar;
4254 for (b = 0; b < bs_last_barrier; ++b)
4255 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4258 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4259 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4266 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4267 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4272 if (!TCR_4(__kmp_init_monitor)) {
4273 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4274 if (!TCR_4(__kmp_init_monitor)) {
4275 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4276 TCW_4(__kmp_init_monitor, 1);
4277 __kmp_create_monitor(&__kmp_monitor);
4278 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4289 while (TCR_4(__kmp_init_monitor) < 2) {
4292 KF_TRACE(10, (
"after monitor thread has started\n"));
4295 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4300 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4301 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4305 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4307 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4309 if (__kmp_storage_map) {
4310 __kmp_print_thread_storage_map(new_thr, new_gtid);
4315 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4316 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4317 new_thr->th.th_serial_team = serial_team =
4318 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4325 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4327 KMP_ASSERT(serial_team);
4328 serial_team->t.t_serialized = 0;
4330 serial_team->t.t_threads[0] = new_thr;
4332 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4336 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4339 __kmp_initialize_fast_memory(new_thr);
4343 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4344 __kmp_initialize_bget(new_thr);
4347 __kmp_init_random(new_thr);
4351 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4352 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4355 kmp_balign_t *balign = new_thr->th.th_bar;
4356 for (b = 0; b < bs_last_barrier; ++b) {
4357 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4358 balign[b].bb.team = NULL;
4359 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4360 balign[b].bb.use_oncore_barrier = 0;
4363 new_thr->th.th_spin_here = FALSE;
4364 new_thr->th.th_next_waiting = 0;
4366 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4367 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4368 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4369 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4370 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4373 TCW_4(new_thr->th.th_in_pool, FALSE);
4374 new_thr->th.th_active_in_pool = FALSE;
4375 TCW_4(new_thr->th.th_active, TRUE);
4381 root->r.r_cg_nthreads++;
4385 if (__kmp_adjust_gtid_mode) {
4386 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4387 if (TCR_4(__kmp_gtid_mode) != 2) {
4388 TCW_4(__kmp_gtid_mode, 2);
4391 if (TCR_4(__kmp_gtid_mode) != 1) {
4392 TCW_4(__kmp_gtid_mode, 1);
4397 #ifdef KMP_ADJUST_BLOCKTIME 4400 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4401 if (__kmp_nth > __kmp_avail_proc) {
4402 __kmp_zero_bt = TRUE;
4409 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4410 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4412 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4414 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4425 static void __kmp_reinitialize_team(kmp_team_t *team,
4426 kmp_internal_control_t *new_icvs,
4428 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4429 team->t.t_threads[0], team));
4430 KMP_DEBUG_ASSERT(team && new_icvs);
4431 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4432 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4434 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4436 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4437 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4439 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4440 team->t.t_threads[0], team));
4446 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4447 kmp_internal_control_t *new_icvs,
4449 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4452 KMP_DEBUG_ASSERT(team);
4453 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4454 KMP_DEBUG_ASSERT(team->t.t_threads);
4457 team->t.t_master_tid = 0;
4459 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4460 team->t.t_nproc = new_nproc;
4463 team->t.t_next_pool = NULL;
4467 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4468 team->t.t_invoke = NULL;
4471 team->t.t_sched.sched = new_icvs->sched.sched;
4473 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4474 team->t.t_fp_control_saved = FALSE;
4475 team->t.t_x87_fpu_control_word = 0;
4476 team->t.t_mxcsr = 0;
4479 team->t.t_construct = 0;
4481 team->t.t_ordered.dt.t_value = 0;
4482 team->t.t_master_active = FALSE;
4484 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4487 team->t.t_copypriv_data = NULL;
4489 team->t.t_copyin_counter = 0;
4491 team->t.t_control_stack_top = NULL;
4493 __kmp_reinitialize_team(team, new_icvs, loc);
4496 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4499 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4502 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4503 if (KMP_AFFINITY_CAPABLE()) {
4505 if (old_mask != NULL) {
4506 status = __kmp_get_system_affinity(old_mask, TRUE);
4509 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4513 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4518 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4524 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4526 kmp_info_t *master_th = team->t.t_threads[0];
4527 KMP_DEBUG_ASSERT(master_th != NULL);
4528 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4529 int first_place = master_th->th.th_first_place;
4530 int last_place = master_th->th.th_last_place;
4531 int masters_place = master_th->th.th_current_place;
4532 team->t.t_first_place = first_place;
4533 team->t.t_last_place = last_place;
4535 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4536 "bound to place %d partition = [%d,%d]\n",
4537 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4538 team->t.t_id, masters_place, first_place, last_place));
4540 switch (proc_bind) {
4542 case proc_bind_default:
4545 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4548 case proc_bind_master: {
4550 int n_th = team->t.t_nproc;
4551 for (f = 1; f < n_th; f++) {
4552 kmp_info_t *th = team->t.t_threads[f];
4553 KMP_DEBUG_ASSERT(th != NULL);
4554 th->th.th_first_place = first_place;
4555 th->th.th_last_place = last_place;
4556 th->th.th_new_place = masters_place;
4558 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4559 "partition = [%d,%d]\n",
4560 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4561 f, masters_place, first_place, last_place));
4565 case proc_bind_close: {
4567 int n_th = team->t.t_nproc;
4569 if (first_place <= last_place) {
4570 n_places = last_place - first_place + 1;
4572 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4574 if (n_th <= n_places) {
4575 int place = masters_place;
4576 for (f = 1; f < n_th; f++) {
4577 kmp_info_t *th = team->t.t_threads[f];
4578 KMP_DEBUG_ASSERT(th != NULL);
4580 if (place == last_place) {
4581 place = first_place;
4582 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4587 th->th.th_first_place = first_place;
4588 th->th.th_last_place = last_place;
4589 th->th.th_new_place = place;
4591 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4592 "partition = [%d,%d]\n",
4593 __kmp_gtid_from_thread(team->t.t_threads[f]),
4594 team->t.t_id, f, place, first_place, last_place));
4597 int S, rem, gap, s_count;
4598 S = n_th / n_places;
4600 rem = n_th - (S * n_places);
4601 gap = rem > 0 ? n_places / rem : n_places;
4602 int place = masters_place;
4604 for (f = 0; f < n_th; f++) {
4605 kmp_info_t *th = team->t.t_threads[f];
4606 KMP_DEBUG_ASSERT(th != NULL);
4608 th->th.th_first_place = first_place;
4609 th->th.th_last_place = last_place;
4610 th->th.th_new_place = place;
4613 if ((s_count == S) && rem && (gap_ct == gap)) {
4615 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4617 if (place == last_place) {
4618 place = first_place;
4619 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4627 }
else if (s_count == S) {
4628 if (place == last_place) {
4629 place = first_place;
4630 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4640 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4641 "partition = [%d,%d]\n",
4642 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4643 th->th.th_new_place, first_place, last_place));
4645 KMP_DEBUG_ASSERT(place == masters_place);
4649 case proc_bind_spread: {
4651 int n_th = team->t.t_nproc;
4654 if (first_place <= last_place) {
4655 n_places = last_place - first_place + 1;
4657 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4659 if (n_th <= n_places) {
4662 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4663 int S = n_places / n_th;
4664 int s_count, rem, gap, gap_ct;
4666 place = masters_place;
4667 rem = n_places - n_th * S;
4668 gap = rem ? n_th / rem : 1;
4671 if (update_master_only == 1)
4673 for (f = 0; f < thidx; f++) {
4674 kmp_info_t *th = team->t.t_threads[f];
4675 KMP_DEBUG_ASSERT(th != NULL);
4677 th->th.th_first_place = place;
4678 th->th.th_new_place = place;
4680 while (s_count < S) {
4681 if (place == last_place) {
4682 place = first_place;
4683 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4690 if (rem && (gap_ct == gap)) {
4691 if (place == last_place) {
4692 place = first_place;
4693 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4701 th->th.th_last_place = place;
4704 if (place == last_place) {
4705 place = first_place;
4706 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4713 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4714 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4715 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4716 f, th->th.th_new_place, th->th.th_first_place,
4717 th->th.th_last_place, __kmp_affinity_num_masks));
4723 double current =
static_cast<double>(masters_place);
4725 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4730 if (update_master_only == 1)
4732 for (f = 0; f < thidx; f++) {
4733 first =
static_cast<int>(current);
4734 last =
static_cast<int>(current + spacing) - 1;
4735 KMP_DEBUG_ASSERT(last >= first);
4736 if (first >= n_places) {
4737 if (masters_place) {
4740 if (first == (masters_place + 1)) {
4741 KMP_DEBUG_ASSERT(f == n_th);
4744 if (last == masters_place) {
4745 KMP_DEBUG_ASSERT(f == (n_th - 1));
4749 KMP_DEBUG_ASSERT(f == n_th);
4754 if (last >= n_places) {
4755 last = (n_places - 1);
4760 KMP_DEBUG_ASSERT(0 <= first);
4761 KMP_DEBUG_ASSERT(n_places > first);
4762 KMP_DEBUG_ASSERT(0 <= last);
4763 KMP_DEBUG_ASSERT(n_places > last);
4764 KMP_DEBUG_ASSERT(last_place >= first_place);
4765 th = team->t.t_threads[f];
4766 KMP_DEBUG_ASSERT(th);
4767 th->th.th_first_place = first;
4768 th->th.th_new_place = place;
4769 th->th.th_last_place = last;
4772 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4773 "partition = [%d,%d], spacing = %.4f\n",
4774 __kmp_gtid_from_thread(team->t.t_threads[f]),
4775 team->t.t_id, f, th->th.th_new_place,
4776 th->th.th_first_place, th->th.th_last_place, spacing));
4780 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4782 int S, rem, gap, s_count;
4783 S = n_th / n_places;
4785 rem = n_th - (S * n_places);
4786 gap = rem > 0 ? n_places / rem : n_places;
4787 int place = masters_place;
4790 if (update_master_only == 1)
4792 for (f = 0; f < thidx; f++) {
4793 kmp_info_t *th = team->t.t_threads[f];
4794 KMP_DEBUG_ASSERT(th != NULL);
4796 th->th.th_first_place = place;
4797 th->th.th_last_place = place;
4798 th->th.th_new_place = place;
4801 if ((s_count == S) && rem && (gap_ct == gap)) {
4803 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4805 if (place == last_place) {
4806 place = first_place;
4807 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4815 }
else if (s_count == S) {
4816 if (place == last_place) {
4817 place = first_place;
4818 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4827 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4828 "partition = [%d,%d]\n",
4829 __kmp_gtid_from_thread(team->t.t_threads[f]),
4830 team->t.t_id, f, th->th.th_new_place,
4831 th->th.th_first_place, th->th.th_last_place));
4833 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4841 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4849 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4851 ompt_data_t ompt_parallel_data,
4854 kmp_proc_bind_t new_proc_bind,
4856 kmp_internal_control_t *new_icvs,
4857 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4858 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4861 int use_hot_team = !root->r.r_active;
4864 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4865 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4866 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4869 #if KMP_NESTED_HOT_TEAMS 4870 kmp_hot_team_ptr_t *hot_teams;
4872 team = master->th.th_team;
4873 level = team->t.t_active_level;
4874 if (master->th.th_teams_microtask) {
4875 if (master->th.th_teams_size.nteams > 1 &&
4878 (microtask_t)__kmp_teams_master ||
4879 master->th.th_teams_level <
4885 hot_teams = master->th.th_hot_teams;
4886 if (level < __kmp_hot_teams_max_level && hot_teams &&
4896 if (use_hot_team && new_nproc > 1) {
4897 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
4898 #if KMP_NESTED_HOT_TEAMS 4899 team = hot_teams[level].hot_team;
4901 team = root->r.r_hot_team;
4904 if (__kmp_tasking_mode != tskm_immediate_exec) {
4905 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4906 "task_team[1] = %p before reinit\n",
4907 team->t.t_task_team[0], team->t.t_task_team[1]));
4914 if (team->t.t_nproc == new_nproc) {
4915 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4918 if (team->t.t_size_changed == -1) {
4919 team->t.t_size_changed = 1;
4921 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4925 kmp_r_sched_t new_sched = new_icvs->sched;
4927 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4929 __kmp_reinitialize_team(team, new_icvs,
4930 root->r.r_uber_thread->th.th_ident);
4932 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4933 team->t.t_threads[0], team));
4934 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4937 #if KMP_AFFINITY_SUPPORTED 4938 if ((team->t.t_size_changed == 0) &&
4939 (team->t.t_proc_bind == new_proc_bind)) {
4940 if (new_proc_bind == proc_bind_spread) {
4941 __kmp_partition_places(
4944 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4945 "proc_bind = %d, partition = [%d,%d]\n",
4946 team->t.t_id, new_proc_bind, team->t.t_first_place,
4947 team->t.t_last_place));
4949 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4950 __kmp_partition_places(team);
4953 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4956 }
else if (team->t.t_nproc > new_nproc) {
4958 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4961 team->t.t_size_changed = 1;
4962 #if KMP_NESTED_HOT_TEAMS 4963 if (__kmp_hot_teams_mode == 0) {
4966 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4967 hot_teams[level].hot_team_nth = new_nproc;
4968 #endif // KMP_NESTED_HOT_TEAMS 4970 for (f = new_nproc; f < team->t.t_nproc; f++) {
4971 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4972 if (__kmp_tasking_mode != tskm_immediate_exec) {
4975 team->t.t_threads[f]->th.th_task_team = NULL;
4977 __kmp_free_thread(team->t.t_threads[f]);
4978 team->t.t_threads[f] = NULL;
4980 #if KMP_NESTED_HOT_TEAMS 4985 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4986 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4987 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4988 for (
int b = 0; b < bs_last_barrier; ++b) {
4989 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4990 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4992 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4996 #endif // KMP_NESTED_HOT_TEAMS 4997 team->t.t_nproc = new_nproc;
4999 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5000 __kmp_reinitialize_team(team, new_icvs,
5001 root->r.r_uber_thread->th.th_ident);
5004 for (f = 0; f < new_nproc; ++f) {
5005 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5009 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5010 team->t.t_threads[0], team));
5012 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5015 for (f = 0; f < team->t.t_nproc; f++) {
5016 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5017 team->t.t_threads[f]->th.th_team_nproc ==
5023 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5024 #if KMP_AFFINITY_SUPPORTED 5025 __kmp_partition_places(team);
5029 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5030 kmp_affin_mask_t *old_mask;
5031 if (KMP_AFFINITY_CAPABLE()) {
5032 KMP_CPU_ALLOC(old_mask);
5037 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5040 team->t.t_size_changed = 1;
5042 #if KMP_NESTED_HOT_TEAMS 5043 int avail_threads = hot_teams[level].hot_team_nth;
5044 if (new_nproc < avail_threads)
5045 avail_threads = new_nproc;
5046 kmp_info_t **other_threads = team->t.t_threads;
5047 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5051 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5052 for (b = 0; b < bs_last_barrier; ++b) {
5053 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5054 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5056 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5060 if (hot_teams[level].hot_team_nth >= new_nproc) {
5063 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5064 team->t.t_nproc = new_nproc;
5070 hot_teams[level].hot_team_nth = new_nproc;
5071 #endif // KMP_NESTED_HOT_TEAMS 5072 if (team->t.t_max_nproc < new_nproc) {
5074 __kmp_reallocate_team_arrays(team, new_nproc);
5075 __kmp_reinitialize_team(team, new_icvs, NULL);
5078 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5083 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5087 for (f = team->t.t_nproc; f < new_nproc; f++) {
5088 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5089 KMP_DEBUG_ASSERT(new_worker);
5090 team->t.t_threads[f] = new_worker;
5093 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5094 "join=%llu, plain=%llu\n",
5095 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5096 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5097 team->t.t_bar[bs_plain_barrier].b_arrived));
5101 kmp_balign_t *balign = new_worker->th.th_bar;
5102 for (b = 0; b < bs_last_barrier; ++b) {
5103 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5104 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5105 KMP_BARRIER_PARENT_FLAG);
5107 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5113 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5114 if (KMP_AFFINITY_CAPABLE()) {
5116 __kmp_set_system_affinity(old_mask, TRUE);
5117 KMP_CPU_FREE(old_mask);
5120 #if KMP_NESTED_HOT_TEAMS 5122 #endif // KMP_NESTED_HOT_TEAMS 5124 int old_nproc = team->t.t_nproc;
5126 __kmp_initialize_team(team, new_nproc, new_icvs,
5127 root->r.r_uber_thread->th.th_ident);
5130 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5131 for (f = 0; f < team->t.t_nproc; ++f)
5132 __kmp_initialize_info(team->t.t_threads[f], team, f,
5133 __kmp_gtid_from_tid(f, team));
5140 for (f = old_nproc; f < team->t.t_nproc; ++f)
5141 team->t.t_threads[f]->th.th_task_state =
5142 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5145 team->t.t_threads[0]->th.th_task_state;
5146 for (f = old_nproc; f < team->t.t_nproc; ++f)
5147 team->t.t_threads[f]->th.th_task_state = old_state;
5151 for (f = 0; f < team->t.t_nproc; ++f) {
5152 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5153 team->t.t_threads[f]->th.th_team_nproc ==
5159 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5160 #if KMP_AFFINITY_SUPPORTED 5161 __kmp_partition_places(team);
5167 kmp_info_t *master = team->t.t_threads[0];
5168 if (master->th.th_teams_microtask) {
5169 for (f = 1; f < new_nproc; ++f) {
5171 kmp_info_t *thr = team->t.t_threads[f];
5172 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5173 thr->th.th_teams_level = master->th.th_teams_level;
5174 thr->th.th_teams_size = master->th.th_teams_size;
5178 #if KMP_NESTED_HOT_TEAMS 5182 for (f = 1; f < new_nproc; ++f) {
5183 kmp_info_t *thr = team->t.t_threads[f];
5185 kmp_balign_t *balign = thr->th.th_bar;
5186 for (b = 0; b < bs_last_barrier; ++b) {
5187 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5188 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5190 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5195 #endif // KMP_NESTED_HOT_TEAMS 5198 __kmp_alloc_argv_entries(argc, team, TRUE);
5199 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5203 KF_TRACE(10, (
" hot_team = %p\n", team));
5206 if (__kmp_tasking_mode != tskm_immediate_exec) {
5207 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5208 "task_team[1] = %p after reinit\n",
5209 team->t.t_task_team[0], team->t.t_task_team[1]));
5214 __ompt_team_assign_id(team, ompt_parallel_data);
5224 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5227 if (team->t.t_max_nproc >= max_nproc) {
5229 __kmp_team_pool = team->t.t_next_pool;
5232 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5234 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5235 "task_team[1] %p to NULL\n",
5236 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5237 team->t.t_task_team[0] = NULL;
5238 team->t.t_task_team[1] = NULL;
5241 __kmp_alloc_argv_entries(argc, team, TRUE);
5242 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5245 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5246 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5249 for (b = 0; b < bs_last_barrier; ++b) {
5250 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5252 team->t.t_bar[b].b_master_arrived = 0;
5253 team->t.t_bar[b].b_team_arrived = 0;
5259 team->t.t_proc_bind = new_proc_bind;
5262 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5266 __ompt_team_assign_id(team, ompt_parallel_data);
5278 team = __kmp_reap_team(team);
5279 __kmp_team_pool = team;
5284 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5287 team->t.t_max_nproc = max_nproc;
5290 __kmp_allocate_team_arrays(team, max_nproc);
5292 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5293 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5295 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5297 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5298 team->t.t_task_team[0] = NULL;
5300 team->t.t_task_team[1] = NULL;
5303 if (__kmp_storage_map) {
5304 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5308 __kmp_alloc_argv_entries(argc, team, FALSE);
5309 team->t.t_argc = argc;
5312 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5313 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5316 for (b = 0; b < bs_last_barrier; ++b) {
5317 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5319 team->t.t_bar[b].b_master_arrived = 0;
5320 team->t.t_bar[b].b_team_arrived = 0;
5326 team->t.t_proc_bind = new_proc_bind;
5330 __ompt_team_assign_id(team, ompt_parallel_data);
5331 team->t.ompt_serialized_team_info = NULL;
5336 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5347 void __kmp_free_team(kmp_root_t *root,
5348 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5350 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5354 KMP_DEBUG_ASSERT(root);
5355 KMP_DEBUG_ASSERT(team);
5356 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5357 KMP_DEBUG_ASSERT(team->t.t_threads);
5359 int use_hot_team = team == root->r.r_hot_team;
5360 #if KMP_NESTED_HOT_TEAMS 5362 kmp_hot_team_ptr_t *hot_teams;
5364 level = team->t.t_active_level - 1;
5365 if (master->th.th_teams_microtask) {
5366 if (master->th.th_teams_size.nteams > 1) {
5370 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5371 master->th.th_teams_level == team->t.t_level) {
5376 hot_teams = master->th.th_hot_teams;
5377 if (level < __kmp_hot_teams_max_level) {
5378 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5382 #endif // KMP_NESTED_HOT_TEAMS 5385 TCW_SYNC_PTR(team->t.t_pkfn,
5387 team->t.t_copyin_counter = 0;
5391 if (!use_hot_team) {
5392 if (__kmp_tasking_mode != tskm_immediate_exec) {
5394 for (f = 1; f < team->t.t_nproc; ++f) {
5395 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5396 kmp_info_t *th = team->t.t_threads[f];
5397 volatile kmp_uint32 *state = &th->th.th_reap_state;
5398 while (*state != KMP_SAFE_TO_REAP) {
5402 if (!__kmp_is_thread_alive(th, &ecode)) {
5403 *state = KMP_SAFE_TO_REAP;
5408 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5409 if (fl.is_sleeping())
5410 fl.resume(__kmp_gtid_from_thread(th));
5417 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5418 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5419 if (task_team != NULL) {
5420 for (f = 0; f < team->t.t_nproc;
5422 team->t.t_threads[f]->th.th_task_team = NULL;
5426 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5427 __kmp_get_gtid(), task_team, team->t.t_id));
5428 #if KMP_NESTED_HOT_TEAMS 5429 __kmp_free_task_team(master, task_team);
5431 team->t.t_task_team[tt_idx] = NULL;
5437 team->t.t_parent = NULL;
5438 team->t.t_level = 0;
5439 team->t.t_active_level = 0;
5442 for (f = 1; f < team->t.t_nproc; ++f) {
5443 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5444 __kmp_free_thread(team->t.t_threads[f]);
5445 team->t.t_threads[f] = NULL;
5450 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5451 __kmp_team_pool = (
volatile kmp_team_t *)team;
5458 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5459 kmp_team_t *next_pool = team->t.t_next_pool;
5461 KMP_DEBUG_ASSERT(team);
5462 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5463 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5464 KMP_DEBUG_ASSERT(team->t.t_threads);
5465 KMP_DEBUG_ASSERT(team->t.t_argv);
5470 __kmp_free_team_arrays(team);
5471 if (team->t.t_argv != &team->t.t_inline_argv[0])
5472 __kmp_free((
void *)team->t.t_argv);
5504 void __kmp_free_thread(kmp_info_t *this_th) {
5507 kmp_root_t *root = this_th->th.th_root;
5509 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5510 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5512 KMP_DEBUG_ASSERT(this_th);
5517 kmp_balign_t *balign = this_th->th.th_bar;
5518 for (b = 0; b < bs_last_barrier; ++b) {
5519 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5520 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5521 balign[b].bb.team = NULL;
5522 balign[b].bb.leaf_kids = 0;
5524 this_th->th.th_task_state = 0;
5527 TCW_PTR(this_th->th.th_team, NULL);
5528 TCW_PTR(this_th->th.th_root, NULL);
5529 TCW_PTR(this_th->th.th_dispatch, NULL);
5533 gtid = this_th->th.th_info.ds.ds_gtid;
5534 if (__kmp_thread_pool_insert_pt != NULL) {
5535 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5536 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5537 __kmp_thread_pool_insert_pt = NULL;
5546 if (__kmp_thread_pool_insert_pt != NULL) {
5547 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5549 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5551 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5552 scan = &((*scan)->th.th_next_pool))
5557 TCW_PTR(this_th->th.th_next_pool, *scan);
5558 __kmp_thread_pool_insert_pt = *scan = this_th;
5559 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5560 (this_th->th.th_info.ds.ds_gtid <
5561 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5562 TCW_4(this_th->th.th_in_pool, TRUE);
5563 __kmp_thread_pool_nth++;
5565 TCW_4(__kmp_nth, __kmp_nth - 1);
5566 root->r.r_cg_nthreads--;
5568 #ifdef KMP_ADJUST_BLOCKTIME 5571 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5572 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5573 if (__kmp_nth <= __kmp_avail_proc) {
5574 __kmp_zero_bt = FALSE;
5584 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5585 int gtid = this_thr->th.th_info.ds.ds_gtid;
5587 kmp_team_t *(*
volatile pteam);
5590 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5592 if (__kmp_env_consistency_check) {
5593 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5597 ompt_data_t *thread_data;
5598 if (ompt_enabled.enabled) {
5599 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5600 thread_data->ptr = NULL;
5602 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5603 this_thr->th.ompt_thread_info.wait_id = 0;
5604 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5605 if (ompt_enabled.ompt_callback_thread_begin) {
5606 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5607 ompt_thread_worker, thread_data);
5613 if (ompt_enabled.enabled) {
5614 this_thr->th.ompt_thread_info.state = omp_state_idle;
5618 while (!TCR_4(__kmp_global.g.g_done)) {
5619 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5623 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5626 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5629 if (ompt_enabled.enabled) {
5630 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5634 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5637 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5639 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5642 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5643 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5644 (*pteam)->t.t_pkfn));
5646 updateHWFPControl(*pteam);
5649 if (ompt_enabled.enabled) {
5650 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
5655 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5656 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5657 rc = (*pteam)->t.t_invoke(gtid);
5662 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5663 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5664 (*pteam)->t.t_pkfn));
5667 if (ompt_enabled.enabled) {
5669 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
5671 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5672 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
5676 __kmp_join_barrier(gtid);
5679 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5682 if (ompt_enabled.ompt_callback_thread_end) {
5683 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5687 this_thr->th.th_task_team = NULL;
5689 __kmp_common_destroy_gtid(gtid);
5691 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5698 void __kmp_internal_end_dest(
void *specific_gtid) {
5699 #if KMP_COMPILER_ICC 5700 #pragma warning(push) 5701 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5705 int gtid = (kmp_intptr_t)specific_gtid - 1;
5706 #if KMP_COMPILER_ICC 5707 #pragma warning(pop) 5710 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5723 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5724 __kmp_gtid_set_specific(gtid);
5725 #ifdef KMP_TDATA_GTID 5728 __kmp_internal_end_thread(gtid);
5731 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5737 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5738 __kmp_internal_end_atexit();
5741 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5747 void __kmp_internal_end_atexit(
void) {
5748 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5772 __kmp_internal_end_library(-1);
5774 __kmp_close_console();
5778 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5783 KMP_DEBUG_ASSERT(thread != NULL);
5785 gtid = thread->th.th_info.ds.ds_gtid;
5789 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5792 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5796 ANNOTATE_HAPPENS_BEFORE(thread);
5797 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5798 __kmp_release_64(&flag);
5802 __kmp_reap_worker(thread);
5814 if (thread->th.th_active_in_pool) {
5815 thread->th.th_active_in_pool = FALSE;
5816 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
5817 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5821 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5822 --__kmp_thread_pool_nth;
5825 __kmp_free_implicit_task(thread);
5829 __kmp_free_fast_memory(thread);
5832 __kmp_suspend_uninitialize_thread(thread);
5834 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5835 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5840 #ifdef KMP_ADJUST_BLOCKTIME 5843 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5844 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5845 if (__kmp_nth <= __kmp_avail_proc) {
5846 __kmp_zero_bt = FALSE;
5852 if (__kmp_env_consistency_check) {
5853 if (thread->th.th_cons) {
5854 __kmp_free_cons_stack(thread->th.th_cons);
5855 thread->th.th_cons = NULL;
5859 if (thread->th.th_pri_common != NULL) {
5860 __kmp_free(thread->th.th_pri_common);
5861 thread->th.th_pri_common = NULL;
5864 if (thread->th.th_task_state_memo_stack != NULL) {
5865 __kmp_free(thread->th.th_task_state_memo_stack);
5866 thread->th.th_task_state_memo_stack = NULL;
5870 if (thread->th.th_local.bget_data != NULL) {
5871 __kmp_finalize_bget(thread);
5875 #if KMP_AFFINITY_SUPPORTED 5876 if (thread->th.th_affin_mask != NULL) {
5877 KMP_CPU_FREE(thread->th.th_affin_mask);
5878 thread->th.th_affin_mask = NULL;
5882 __kmp_reap_team(thread->th.th_serial_team);
5883 thread->th.th_serial_team = NULL;
5890 static void __kmp_internal_end(
void) {
5894 __kmp_unregister_library();
5901 __kmp_reclaim_dead_roots();
5905 for (i = 0; i < __kmp_threads_capacity; i++)
5907 if (__kmp_root[i]->r.r_active)
5910 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5912 if (i < __kmp_threads_capacity) {
5924 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5925 if (TCR_4(__kmp_init_monitor)) {
5926 __kmp_reap_monitor(&__kmp_monitor);
5927 TCW_4(__kmp_init_monitor, 0);
5929 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5930 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5931 #endif // KMP_USE_MONITOR 5936 for (i = 0; i < __kmp_threads_capacity; i++) {
5937 if (__kmp_root[i]) {
5940 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5949 while (__kmp_thread_pool != NULL) {
5951 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5952 __kmp_thread_pool = thread->th.th_next_pool;
5954 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5955 thread->th.th_next_pool = NULL;
5956 thread->th.th_in_pool = FALSE;
5957 __kmp_reap_thread(thread, 0);
5959 __kmp_thread_pool_insert_pt = NULL;
5962 while (__kmp_team_pool != NULL) {
5964 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5965 __kmp_team_pool = team->t.t_next_pool;
5967 team->t.t_next_pool = NULL;
5968 __kmp_reap_team(team);
5971 __kmp_reap_task_teams();
5973 for (i = 0; i < __kmp_threads_capacity; ++i) {
5980 TCW_SYNC_4(__kmp_init_common, FALSE);
5982 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
5990 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5991 if (TCR_4(__kmp_init_monitor)) {
5992 __kmp_reap_monitor(&__kmp_monitor);
5993 TCW_4(__kmp_init_monitor, 0);
5995 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5996 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5999 TCW_4(__kmp_init_gtid, FALSE);
6008 void __kmp_internal_end_library(
int gtid_req) {
6015 if (__kmp_global.g.g_abort) {
6016 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6020 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6021 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6029 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6031 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6032 if (gtid == KMP_GTID_SHUTDOWN) {
6033 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6034 "already shutdown\n"));
6036 }
else if (gtid == KMP_GTID_MONITOR) {
6037 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6038 "registered, or system shutdown\n"));
6040 }
else if (gtid == KMP_GTID_DNE) {
6041 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6044 }
else if (KMP_UBER_GTID(gtid)) {
6046 if (__kmp_root[gtid]->r.r_active) {
6047 __kmp_global.g.g_abort = -1;
6048 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6050 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6056 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6057 __kmp_unregister_root_current_thread(gtid);
6064 #ifdef DUMP_DEBUG_ON_EXIT 6065 if (__kmp_debug_buf)
6066 __kmp_dump_debug_buffer();
6072 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6075 if (__kmp_global.g.g_abort) {
6076 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6078 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6081 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6082 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6091 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6094 __kmp_internal_end();
6096 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6097 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6099 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6101 #ifdef DUMP_DEBUG_ON_EXIT 6102 if (__kmp_debug_buf)
6103 __kmp_dump_debug_buffer();
6107 __kmp_close_console();
6110 __kmp_fini_allocator();
6114 void __kmp_internal_end_thread(
int gtid_req) {
6123 if (__kmp_global.g.g_abort) {
6124 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6128 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6129 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6137 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6139 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6140 if (gtid == KMP_GTID_SHUTDOWN) {
6141 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6142 "already shutdown\n"));
6144 }
else if (gtid == KMP_GTID_MONITOR) {
6145 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6146 "registered, or system shutdown\n"));
6148 }
else if (gtid == KMP_GTID_DNE) {
6149 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6153 }
else if (KMP_UBER_GTID(gtid)) {
6155 if (__kmp_root[gtid]->r.r_active) {
6156 __kmp_global.g.g_abort = -1;
6157 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6159 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6163 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6165 __kmp_unregister_root_current_thread(gtid);
6169 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6172 __kmp_threads[gtid]->th.th_task_team = NULL;
6176 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6181 #if defined KMP_DYNAMIC_LIB 6190 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6194 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6197 if (__kmp_global.g.g_abort) {
6198 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6200 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6203 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6204 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6215 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6217 for (i = 0; i < __kmp_threads_capacity; ++i) {
6218 if (KMP_UBER_GTID(i)) {
6221 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6222 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6223 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6230 __kmp_internal_end();
6232 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6233 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6235 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6237 #ifdef DUMP_DEBUG_ON_EXIT 6238 if (__kmp_debug_buf)
6239 __kmp_dump_debug_buffer();
6246 static long __kmp_registration_flag = 0;
6248 static char *__kmp_registration_str = NULL;
6251 static inline char *__kmp_reg_status_name() {
6256 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6259 void __kmp_register_library_startup(
void) {
6261 char *name = __kmp_reg_status_name();
6267 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6268 __kmp_initialize_system_tick();
6270 __kmp_read_system_time(&time.dtime);
6271 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6272 __kmp_registration_str =
6273 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6274 __kmp_registration_flag, KMP_LIBRARY_FILE);
6276 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6277 __kmp_registration_str));
6284 __kmp_env_set(name, __kmp_registration_str, 0);
6286 value = __kmp_env_get(name);
6287 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6297 char *flag_addr_str = NULL;
6298 char *flag_val_str = NULL;
6299 char const *file_name = NULL;
6300 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6301 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6304 long *flag_addr = 0;
6306 KMP_SSCANF(flag_addr_str,
"%p", &flag_addr);
6307 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6308 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6312 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6326 file_name =
"unknown library";
6330 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6331 if (!__kmp_str_match_true(duplicate_ok)) {
6333 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6334 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6336 KMP_INTERNAL_FREE(duplicate_ok);
6337 __kmp_duplicate_library_ok = 1;
6342 __kmp_env_unset(name);
6344 default: { KMP_DEBUG_ASSERT(0); }
break;
6347 KMP_INTERNAL_FREE((
void *)value);
6349 KMP_INTERNAL_FREE((
void *)name);
6353 void __kmp_unregister_library(
void) {
6355 char *name = __kmp_reg_status_name();
6356 char *value = __kmp_env_get(name);
6358 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6359 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6360 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6362 __kmp_env_unset(name);
6365 KMP_INTERNAL_FREE(__kmp_registration_str);
6366 KMP_INTERNAL_FREE(value);
6367 KMP_INTERNAL_FREE(name);
6369 __kmp_registration_flag = 0;
6370 __kmp_registration_str = NULL;
6377 #if KMP_MIC_SUPPORTED 6379 static void __kmp_check_mic_type() {
6380 kmp_cpuid_t cpuid_state = {0};
6381 kmp_cpuid_t *cs_p = &cpuid_state;
6382 __kmp_x86_cpuid(1, 0, cs_p);
6384 if ((cs_p->eax & 0xff0) == 0xB10) {
6385 __kmp_mic_type = mic2;
6386 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6387 __kmp_mic_type = mic3;
6389 __kmp_mic_type = non_mic;
6395 static void __kmp_do_serial_initialize(
void) {
6399 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6401 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6402 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6403 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6404 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6405 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6411 __kmp_validate_locks();
6414 __kmp_init_allocator();
6419 __kmp_register_library_startup();
6422 if (TCR_4(__kmp_global.g.g_done)) {
6423 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6426 __kmp_global.g.g_abort = 0;
6427 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6430 #if KMP_USE_ADAPTIVE_LOCKS 6431 #if KMP_DEBUG_ADAPTIVE_LOCKS 6432 __kmp_init_speculative_stats();
6435 #if KMP_STATS_ENABLED 6438 __kmp_init_lock(&__kmp_global_lock);
6439 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6440 __kmp_init_lock(&__kmp_debug_lock);
6441 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6442 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6443 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6444 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6445 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6446 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6447 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6448 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6449 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6450 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6451 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6452 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6453 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6454 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6455 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6457 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6459 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6463 __kmp_runtime_initialize();
6465 #if KMP_MIC_SUPPORTED 6466 __kmp_check_mic_type();
6473 __kmp_abort_delay = 0;
6477 __kmp_dflt_team_nth_ub = __kmp_xproc;
6478 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6479 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6481 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6482 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6484 __kmp_max_nth = __kmp_sys_max_nth;
6485 __kmp_cg_max_nth = __kmp_sys_max_nth;
6486 __kmp_teams_max_nth = __kmp_xproc;
6487 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6488 __kmp_teams_max_nth = __kmp_sys_max_nth;
6493 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6495 __kmp_monitor_wakeups =
6496 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6497 __kmp_bt_intervals =
6498 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6501 __kmp_library = library_throughput;
6503 __kmp_static = kmp_sch_static_balanced;
6510 #if KMP_FAST_REDUCTION_BARRIER 6511 #define kmp_reduction_barrier_gather_bb ((int)1) 6512 #define kmp_reduction_barrier_release_bb ((int)1) 6513 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6514 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6515 #endif // KMP_FAST_REDUCTION_BARRIER 6516 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6517 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6518 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6519 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6520 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6521 #if KMP_FAST_REDUCTION_BARRIER 6522 if (i == bs_reduction_barrier) {
6524 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6525 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6526 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6527 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6529 #endif // KMP_FAST_REDUCTION_BARRIER 6531 #if KMP_FAST_REDUCTION_BARRIER 6532 #undef kmp_reduction_barrier_release_pat 6533 #undef kmp_reduction_barrier_gather_pat 6534 #undef kmp_reduction_barrier_release_bb 6535 #undef kmp_reduction_barrier_gather_bb 6536 #endif // KMP_FAST_REDUCTION_BARRIER 6537 #if KMP_MIC_SUPPORTED 6538 if (__kmp_mic_type == mic2) {
6540 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6541 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6543 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6544 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6546 #if KMP_FAST_REDUCTION_BARRIER 6547 if (__kmp_mic_type == mic2) {
6548 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6549 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6551 #endif // KMP_FAST_REDUCTION_BARRIER 6552 #endif // KMP_MIC_SUPPORTED 6556 __kmp_env_checks = TRUE;
6558 __kmp_env_checks = FALSE;
6562 __kmp_foreign_tp = TRUE;
6564 __kmp_global.g.g_dynamic = FALSE;
6565 __kmp_global.g.g_dynamic_mode = dynamic_default;
6567 __kmp_env_initialize(NULL);
6571 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6572 if (__kmp_str_match_true(val)) {
6573 kmp_str_buf_t buffer;
6574 __kmp_str_buf_init(&buffer);
6575 __kmp_i18n_dump_catalog(&buffer);
6576 __kmp_printf(
"%s", buffer.str);
6577 __kmp_str_buf_free(&buffer);
6579 __kmp_env_free(&val);
6582 __kmp_threads_capacity =
6583 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6585 __kmp_tp_capacity = __kmp_default_tp_capacity(
6586 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6591 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6592 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6593 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6594 __kmp_thread_pool = NULL;
6595 __kmp_thread_pool_insert_pt = NULL;
6596 __kmp_team_pool = NULL;
6603 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6605 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6606 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6607 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6610 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6612 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6617 gtid = __kmp_register_root(TRUE);
6618 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6619 KMP_ASSERT(KMP_UBER_GTID(gtid));
6620 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6624 __kmp_common_initialize();
6628 __kmp_register_atfork();
6631 #if !defined KMP_DYNAMIC_LIB 6635 int rc = atexit(__kmp_internal_end_atexit);
6637 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6643 #if KMP_HANDLE_SIGNALS 6649 __kmp_install_signals(FALSE);
6652 __kmp_install_signals(TRUE);
6657 __kmp_init_counter++;
6659 __kmp_init_serial = TRUE;
6661 if (__kmp_settings) {
6666 if (__kmp_display_env || __kmp_display_env_verbose) {
6667 __kmp_env_print_2();
6669 #endif // OMP_40_ENABLED 6677 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6680 void __kmp_serial_initialize(
void) {
6681 if (__kmp_init_serial) {
6684 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6685 if (__kmp_init_serial) {
6686 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6689 __kmp_do_serial_initialize();
6690 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6693 static void __kmp_do_middle_initialize(
void) {
6695 int prev_dflt_team_nth;
6697 if (!__kmp_init_serial) {
6698 __kmp_do_serial_initialize();
6701 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6705 prev_dflt_team_nth = __kmp_dflt_team_nth;
6707 #if KMP_AFFINITY_SUPPORTED 6710 __kmp_affinity_initialize();
6714 for (i = 0; i < __kmp_threads_capacity; i++) {
6715 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6716 __kmp_affinity_set_init_mask(i, TRUE);
6721 KMP_ASSERT(__kmp_xproc > 0);
6722 if (__kmp_avail_proc == 0) {
6723 __kmp_avail_proc = __kmp_xproc;
6729 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6730 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6735 if (__kmp_dflt_team_nth == 0) {
6736 #ifdef KMP_DFLT_NTH_CORES 6738 __kmp_dflt_team_nth = __kmp_ncores;
6739 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6740 "__kmp_ncores (%d)\n",
6741 __kmp_dflt_team_nth));
6744 __kmp_dflt_team_nth = __kmp_avail_proc;
6745 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6746 "__kmp_avail_proc(%d)\n",
6747 __kmp_dflt_team_nth));
6751 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6752 __kmp_dflt_team_nth = KMP_MIN_NTH;
6754 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6755 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6760 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6762 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6767 for (i = 0; i < __kmp_threads_capacity; i++) {
6768 kmp_info_t *thread = __kmp_threads[i];
6771 if (thread->th.th_current_task->td_icvs.nproc != 0)
6774 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6779 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6780 __kmp_dflt_team_nth));
6782 #ifdef KMP_ADJUST_BLOCKTIME 6784 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6785 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6786 if (__kmp_nth > __kmp_avail_proc) {
6787 __kmp_zero_bt = TRUE;
6793 TCW_SYNC_4(__kmp_init_middle, TRUE);
6795 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6798 void __kmp_middle_initialize(
void) {
6799 if (__kmp_init_middle) {
6802 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6803 if (__kmp_init_middle) {
6804 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6807 __kmp_do_middle_initialize();
6808 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6811 void __kmp_parallel_initialize(
void) {
6812 int gtid = __kmp_entry_gtid();
6815 if (TCR_4(__kmp_init_parallel))
6817 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6818 if (TCR_4(__kmp_init_parallel)) {
6819 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6824 if (TCR_4(__kmp_global.g.g_done)) {
6827 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6828 __kmp_infinite_loop();
6834 if (!__kmp_init_middle) {
6835 __kmp_do_middle_initialize();
6839 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6840 KMP_ASSERT(KMP_UBER_GTID(gtid));
6842 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6845 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6846 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6847 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6851 #if KMP_HANDLE_SIGNALS 6853 __kmp_install_signals(TRUE);
6857 __kmp_suspend_initialize();
6859 #if defined(USE_LOAD_BALANCE) 6860 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6861 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6864 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6865 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6869 if (__kmp_version) {
6870 __kmp_print_version_2();
6874 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6877 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6879 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6884 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6886 kmp_disp_t *dispatch;
6891 this_thr->th.th_local.this_construct = 0;
6892 #if KMP_CACHE_MANAGE 6893 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6895 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6896 KMP_DEBUG_ASSERT(dispatch);
6897 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6901 dispatch->th_disp_index = 0;
6903 dispatch->th_doacross_buf_idx =
6906 if (__kmp_env_consistency_check)
6907 __kmp_push_parallel(gtid, team->t.t_ident);
6912 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6914 if (__kmp_env_consistency_check)
6915 __kmp_pop_parallel(gtid, team->t.t_ident);
6917 __kmp_finish_implicit_task(this_thr);
6920 int __kmp_invoke_task_func(
int gtid) {
6922 int tid = __kmp_tid_from_gtid(gtid);
6923 kmp_info_t *this_thr = __kmp_threads[gtid];
6924 kmp_team_t *team = this_thr->th.th_team;
6926 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6928 if (__itt_stack_caller_create_ptr) {
6929 __kmp_itt_stack_callee_enter(
6931 team->t.t_stack_id);
6934 #if INCLUDE_SSC_MARKS 6935 SSC_MARK_INVOKING();
6940 void **exit_runtime_p;
6941 ompt_data_t *my_task_data;
6942 ompt_data_t *my_parallel_data;
6945 if (ompt_enabled.enabled) {
6947 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
6949 exit_runtime_p = &dummy;
6953 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6954 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6955 if (ompt_enabled.ompt_callback_implicit_task) {
6956 ompt_team_size = team->t.t_nproc;
6957 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6958 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6959 __kmp_tid_from_gtid(gtid));
6964 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6965 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6967 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6968 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
6975 *exit_runtime_p = NULL;
6980 if (__itt_stack_caller_create_ptr) {
6981 __kmp_itt_stack_callee_leave(
6983 team->t.t_stack_id);
6986 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
6992 void __kmp_teams_master(
int gtid) {
6994 kmp_info_t *thr = __kmp_threads[gtid];
6995 kmp_team_t *team = thr->th.th_team;
6996 ident_t *loc = team->t.t_ident;
6997 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6998 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6999 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7000 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7001 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7004 #if INCLUDE_SSC_MARKS 7007 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7008 (microtask_t)thr->th.th_teams_microtask,
7009 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7010 #if INCLUDE_SSC_MARKS 7016 __kmp_join_call(loc, gtid
7025 int __kmp_invoke_teams_master(
int gtid) {
7026 kmp_info_t *this_thr = __kmp_threads[gtid];
7027 kmp_team_t *team = this_thr->th.th_team;
7029 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7030 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7031 (
void *)__kmp_teams_master);
7033 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7034 __kmp_teams_master(gtid);
7035 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7045 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7046 kmp_info_t *thr = __kmp_threads[gtid];
7048 if (num_threads > 0)
7049 thr->th.th_set_nproc = num_threads;
7056 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7058 kmp_info_t *thr = __kmp_threads[gtid];
7059 KMP_DEBUG_ASSERT(num_teams >= 0);
7060 KMP_DEBUG_ASSERT(num_threads >= 0);
7064 if (num_teams > __kmp_teams_max_nth) {
7065 if (!__kmp_reserve_warn) {
7066 __kmp_reserve_warn = 1;
7067 __kmp_msg(kmp_ms_warning,
7068 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7069 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7071 num_teams = __kmp_teams_max_nth;
7075 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7078 if (num_threads == 0) {
7079 if (!TCR_4(__kmp_init_middle))
7080 __kmp_middle_initialize();
7081 num_threads = __kmp_avail_proc / num_teams;
7082 if (num_teams * num_threads > __kmp_teams_max_nth) {
7084 num_threads = __kmp_teams_max_nth / num_teams;
7087 if (num_teams * num_threads > __kmp_teams_max_nth) {
7088 int new_threads = __kmp_teams_max_nth / num_teams;
7089 if (!__kmp_reserve_warn) {
7090 __kmp_reserve_warn = 1;
7091 __kmp_msg(kmp_ms_warning,
7092 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7093 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7095 num_threads = new_threads;
7098 thr->th.th_teams_size.nth = num_threads;
7102 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7103 kmp_info_t *thr = __kmp_threads[gtid];
7104 thr->th.th_set_proc_bind = proc_bind;
7111 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7112 kmp_info_t *this_thr = __kmp_threads[gtid];
7118 KMP_DEBUG_ASSERT(team);
7119 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7120 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7123 team->t.t_construct = 0;
7124 team->t.t_ordered.dt.t_value =
7128 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7129 if (team->t.t_max_nproc > 1) {
7131 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7132 team->t.t_disp_buffer[i].buffer_index = i;
7134 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7138 team->t.t_disp_buffer[0].buffer_index = 0;
7140 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7145 KMP_ASSERT(this_thr->th.th_team == team);
7148 for (f = 0; f < team->t.t_nproc; f++) {
7149 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7150 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7155 __kmp_fork_barrier(gtid, 0);
7158 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7159 kmp_info_t *this_thr = __kmp_threads[gtid];
7161 KMP_DEBUG_ASSERT(team);
7162 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7163 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7169 if (__kmp_threads[gtid] &&
7170 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7171 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7172 __kmp_threads[gtid]);
7173 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7174 "team->t.t_nproc=%d\n",
7175 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7177 __kmp_print_structure();
7179 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7180 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7183 __kmp_join_barrier(gtid);
7185 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7186 if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7187 ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr);
7188 ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr);
7189 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7191 void *codeptr = NULL;
7192 if (KMP_MASTER_TID(ds_tid) &&
7193 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7194 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7195 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7197 if (ompt_enabled.ompt_callback_sync_region_wait) {
7198 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7199 ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
7201 if (ompt_enabled.ompt_callback_sync_region) {
7202 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7203 ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
7206 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7207 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7208 ompt_scope_end, NULL, tId, 0, ds_tid);
7211 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7216 KMP_ASSERT(this_thr->th.th_team == team);
7221 #ifdef USE_LOAD_BALANCE 7225 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7228 kmp_team_t *hot_team;
7230 if (root->r.r_active) {
7233 hot_team = root->r.r_hot_team;
7234 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7235 return hot_team->t.t_nproc - 1;
7240 for (i = 1; i < hot_team->t.t_nproc; i++) {
7241 if (hot_team->t.t_threads[i]->th.th_active) {
7250 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7253 int hot_team_active;
7254 int team_curr_active;
7257 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7259 KMP_DEBUG_ASSERT(root);
7260 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7261 ->th.th_current_task->td_icvs.dynamic == TRUE);
7262 KMP_DEBUG_ASSERT(set_nproc > 1);
7264 if (set_nproc == 1) {
7265 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7274 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7275 hot_team_active = __kmp_active_hot_team_nproc(root);
7276 team_curr_active = pool_active + hot_team_active + 1;
7279 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7280 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7281 "hot team active = %d\n",
7282 system_active, pool_active, hot_team_active));
7284 if (system_active < 0) {
7288 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7289 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7292 retval = __kmp_avail_proc - __kmp_nth +
7293 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7294 if (retval > set_nproc) {
7297 if (retval < KMP_MIN_NTH) {
7298 retval = KMP_MIN_NTH;
7301 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7309 if (system_active < team_curr_active) {
7310 system_active = team_curr_active;
7312 retval = __kmp_avail_proc - system_active + team_curr_active;
7313 if (retval > set_nproc) {
7316 if (retval < KMP_MIN_NTH) {
7317 retval = KMP_MIN_NTH;
7320 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7329 void __kmp_cleanup(
void) {
7332 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7334 if (TCR_4(__kmp_init_parallel)) {
7335 #if KMP_HANDLE_SIGNALS 7336 __kmp_remove_signals();
7338 TCW_4(__kmp_init_parallel, FALSE);
7341 if (TCR_4(__kmp_init_middle)) {
7342 #if KMP_AFFINITY_SUPPORTED 7343 __kmp_affinity_uninitialize();
7345 __kmp_cleanup_hierarchy();
7346 TCW_4(__kmp_init_middle, FALSE);
7349 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7351 if (__kmp_init_serial) {
7352 __kmp_runtime_destroy();
7353 __kmp_init_serial = FALSE;
7356 for (f = 0; f < __kmp_threads_capacity; f++) {
7357 if (__kmp_root[f] != NULL) {
7358 __kmp_free(__kmp_root[f]);
7359 __kmp_root[f] = NULL;
7362 __kmp_free(__kmp_threads);
7365 __kmp_threads = NULL;
7367 __kmp_threads_capacity = 0;
7369 #if KMP_USE_DYNAMIC_LOCK 7370 __kmp_cleanup_indirect_user_locks();
7372 __kmp_cleanup_user_locks();
7375 #if KMP_AFFINITY_SUPPORTED 7376 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7377 __kmp_cpuinfo_file = NULL;
7380 #if KMP_USE_ADAPTIVE_LOCKS 7381 #if KMP_DEBUG_ADAPTIVE_LOCKS 7382 __kmp_print_speculative_stats();
7385 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7386 __kmp_nested_nth.nth = NULL;
7387 __kmp_nested_nth.size = 0;
7388 __kmp_nested_nth.used = 0;
7389 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7390 __kmp_nested_proc_bind.bind_types = NULL;
7391 __kmp_nested_proc_bind.size = 0;
7392 __kmp_nested_proc_bind.used = 0;
7394 __kmp_i18n_catclose();
7396 #if KMP_STATS_ENABLED 7400 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7405 int __kmp_ignore_mppbeg(
void) {
7408 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7409 if (__kmp_str_match_false(env))
7416 int __kmp_ignore_mppend(
void) {
7419 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7420 if (__kmp_str_match_false(env))
7427 void __kmp_internal_begin(
void) {
7433 gtid = __kmp_entry_gtid();
7434 root = __kmp_threads[gtid]->th.th_root;
7435 KMP_ASSERT(KMP_UBER_GTID(gtid));
7437 if (root->r.r_begin)
7439 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7440 if (root->r.r_begin) {
7441 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7445 root->r.r_begin = TRUE;
7447 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7452 void __kmp_user_set_library(
enum library_type arg) {
7459 gtid = __kmp_entry_gtid();
7460 thread = __kmp_threads[gtid];
7462 root = thread->th.th_root;
7464 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7466 if (root->r.r_in_parallel) {
7468 KMP_WARNING(SetLibraryIncorrectCall);
7473 case library_serial:
7474 thread->th.th_set_nproc = 0;
7475 set__nproc(thread, 1);
7477 case library_turnaround:
7478 thread->th.th_set_nproc = 0;
7479 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7480 : __kmp_dflt_team_nth_ub);
7482 case library_throughput:
7483 thread->th.th_set_nproc = 0;
7484 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7485 : __kmp_dflt_team_nth_ub);
7488 KMP_FATAL(UnknownLibraryType, arg);
7491 __kmp_aux_set_library(arg);
7494 void __kmp_aux_set_stacksize(
size_t arg) {
7495 if (!__kmp_init_serial)
7496 __kmp_serial_initialize();
7499 if (arg & (0x1000 - 1)) {
7500 arg &= ~(0x1000 - 1);
7505 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7508 if (!TCR_4(__kmp_init_parallel)) {
7511 if (value < __kmp_sys_min_stksize)
7512 value = __kmp_sys_min_stksize;
7513 else if (value > KMP_MAX_STKSIZE)
7514 value = KMP_MAX_STKSIZE;
7516 __kmp_stksize = value;
7518 __kmp_env_stksize = TRUE;
7521 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7526 void __kmp_aux_set_library(
enum library_type arg) {
7527 __kmp_library = arg;
7529 switch (__kmp_library) {
7530 case library_serial: {
7531 KMP_INFORM(LibraryIsSerial);
7532 (void)__kmp_change_library(TRUE);
7534 case library_turnaround:
7535 (void)__kmp_change_library(TRUE);
7537 case library_throughput:
7538 (void)__kmp_change_library(FALSE);
7541 KMP_FATAL(UnknownLibraryType, arg);
7547 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7548 int blocktime = arg;
7554 __kmp_save_internal_controls(thread);
7557 if (blocktime < KMP_MIN_BLOCKTIME)
7558 blocktime = KMP_MIN_BLOCKTIME;
7559 else if (blocktime > KMP_MAX_BLOCKTIME)
7560 blocktime = KMP_MAX_BLOCKTIME;
7562 set__blocktime_team(thread->th.th_team, tid, blocktime);
7563 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7567 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7569 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7570 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7576 set__bt_set_team(thread->th.th_team, tid, bt_set);
7577 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7579 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7580 "bt_intervals=%d, monitor_updates=%d\n",
7581 __kmp_gtid_from_tid(tid, thread->th.th_team),
7582 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7583 __kmp_monitor_wakeups));
7585 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7586 __kmp_gtid_from_tid(tid, thread->th.th_team),
7587 thread->th.th_team->t.t_id, tid, blocktime));
7591 void __kmp_aux_set_defaults(
char const *str,
int len) {
7592 if (!__kmp_init_serial) {
7593 __kmp_serial_initialize();
7595 __kmp_env_initialize(str);
7599 || __kmp_display_env || __kmp_display_env_verbose
7609 PACKED_REDUCTION_METHOD_T
7610 __kmp_determine_reduction_method(
7611 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7612 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7613 kmp_critical_name *lck) {
7624 PACKED_REDUCTION_METHOD_T retval;
7628 KMP_DEBUG_ASSERT(loc);
7629 KMP_DEBUG_ASSERT(lck);
7631 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7632 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7633 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7635 retval = critical_reduce_block;
7638 team_size = __kmp_get_team_num_threads(global_tid);
7639 if (team_size == 1) {
7641 retval = empty_reduce_block;
7645 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7646 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7648 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7650 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7653 int teamsize_cutoff = 4;
7655 #if KMP_MIC_SUPPORTED 7656 if (__kmp_mic_type != non_mic) {
7657 teamsize_cutoff = 8;
7660 if (tree_available) {
7661 if (team_size <= teamsize_cutoff) {
7662 if (atomic_available) {
7663 retval = atomic_reduce_block;
7666 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7668 }
else if (atomic_available) {
7669 retval = atomic_reduce_block;
7672 #error "Unknown or unsupported OS" 7673 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7676 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7678 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7682 if (atomic_available) {
7683 if (num_vars <= 2) {
7684 retval = atomic_reduce_block;
7690 if (atomic_available && (num_vars <= 3)) {
7691 retval = atomic_reduce_block;
7692 }
else if (tree_available) {
7693 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
7694 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
7695 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7700 #error "Unknown or unsupported OS" 7704 #error "Unknown or unsupported architecture" 7712 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7715 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7717 int atomic_available, tree_available;
7719 switch ((forced_retval = __kmp_force_reduction_method)) {
7720 case critical_reduce_block:
7724 case atomic_reduce_block:
7725 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7726 if (!atomic_available) {
7727 KMP_WARNING(RedMethodNotSupported,
"atomic");
7728 forced_retval = critical_reduce_block;
7732 case tree_reduce_block:
7733 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7734 if (!tree_available) {
7735 KMP_WARNING(RedMethodNotSupported,
"tree");
7736 forced_retval = critical_reduce_block;
7738 #if KMP_FAST_REDUCTION_BARRIER 7739 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7748 retval = forced_retval;
7751 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
7753 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7754 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7760 kmp_int32 __kmp_get_reduce_method(
void) {
7761 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)