LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
43  } else if (__kmp_ignore_mppbeg() == FALSE) {
44  // By default __kmp_ignore_mppbeg() returns TRUE.
45  __kmp_internal_begin();
46  KC_TRACE(10, ("__kmpc_begin: called\n"));
47  }
48 }
49 
58 void __kmpc_end(ident_t *loc) {
59  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
60  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
61  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
62  // returns FALSE and __kmpc_end() will unregister this root (it can cause
63  // library shut down).
64  if (__kmp_ignore_mppend() == FALSE) {
65  KC_TRACE(10, ("__kmpc_end: called\n"));
66  KA_TRACE(30, ("__kmpc_end\n"));
67 
68  __kmp_internal_end_thread(-1);
69  }
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT
71  // Normal exit process on Windows does not allow worker threads of the final
72  // parallel region to finish reporting their events, so shutting down the
73  // library here fixes the issue at least for the cases where __kmpc_end() is
74  // placed properly.
75  if (ompt_enabled.enabled)
76  __kmp_internal_end_library(__kmp_gtid_get_specific());
77 #endif
78 }
79 
99  kmp_int32 gtid = __kmp_entry_gtid();
100 
101  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
102 
103  return gtid;
104 }
105 
121  KC_TRACE(10,
122  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
123 
124  return TCR_4(__kmp_all_nth);
125 }
126 
134  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
135  return __kmp_tid_from_gtid(__kmp_entry_gtid());
136 }
137 
144  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
145 
146  return __kmp_entry_thread()->th.th_team->t.t_nproc;
147 }
148 
155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
156 #ifndef KMP_DEBUG
157 
158  return TRUE;
159 
160 #else
161 
162  const char *semi2;
163  const char *semi3;
164  int line_no;
165 
166  if (__kmp_par_range == 0) {
167  return TRUE;
168  }
169  semi2 = loc->psource;
170  if (semi2 == NULL) {
171  return TRUE;
172  }
173  semi2 = strchr(semi2, ';');
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2 + 1, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  if (__kmp_par_range_filename[0]) {
182  const char *name = semi2 - 1;
183  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
184  name--;
185  }
186  if ((*name == '/') || (*name == ';')) {
187  name++;
188  }
189  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190  return __kmp_par_range < 0;
191  }
192  }
193  semi3 = strchr(semi2 + 1, ';');
194  if (__kmp_par_range_routine[0]) {
195  if ((semi3 != NULL) && (semi3 > semi2) &&
196  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197  return __kmp_par_range < 0;
198  }
199  }
200  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
201  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202  return __kmp_par_range > 0;
203  }
204  return __kmp_par_range < 0;
205  }
206  return TRUE;
207 
208 #endif /* KMP_DEBUG */
209 }
210 
217 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
218  return __kmp_entry_thread()->th.th_root->r.r_active;
219 }
220 
230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
231  kmp_int32 num_threads) {
232  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233  global_tid, num_threads));
234  __kmp_assert_valid_gtid(global_tid);
235  __kmp_push_num_threads(loc, global_tid, num_threads);
236 }
237 
238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
239  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
240  /* the num_threads are automatically popped */
241 }
242 
243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
244  kmp_int32 proc_bind) {
245  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
246  proc_bind));
247  __kmp_assert_valid_gtid(global_tid);
248  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
249 }
250 
261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262  int gtid = __kmp_entry_gtid();
263 
264 #if (KMP_STATS_ENABLED)
265  // If we were in a serial region, then stop the serial timer, record
266  // the event, and start parallel region timer
267  stats_state_e previous_state = KMP_GET_THREAD_STATE();
268  if (previous_state == stats_state_e::SERIAL_REGION) {
269  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
270  } else {
271  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
272  }
273  int inParallel = __kmpc_in_parallel(loc);
274  if (inParallel) {
275  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
276  } else {
277  KMP_COUNT_BLOCK(OMP_PARALLEL);
278  }
279 #endif
280 
281  // maybe to save thr_state is enough here
282  {
283  va_list ap;
284  va_start(ap, microtask);
285 
286 #if OMPT_SUPPORT
287  ompt_frame_t *ompt_frame;
288  if (ompt_enabled.enabled) {
289  kmp_info_t *master_th = __kmp_threads[gtid];
290  kmp_team_t *parent_team = master_th->th.th_team;
291  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
292  if (lwt)
293  ompt_frame = &(lwt->ompt_task_info.frame);
294  else {
295  int tid = __kmp_tid_from_gtid(gtid);
296  ompt_frame = &(
297  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
298  }
299  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
300  }
301  OMPT_STORE_RETURN_ADDRESS(gtid);
302 #endif
303 
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_FORKING();
306 #endif
307  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
308  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
309  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
310  kmp_va_addr_of(ap));
311 #if INCLUDE_SSC_MARKS
312  SSC_MARK_JOINING();
313 #endif
314  __kmp_join_call(loc, gtid
315 #if OMPT_SUPPORT
316  ,
317  fork_context_intel
318 #endif
319  );
320 
321  va_end(ap);
322  }
323 
324 #if KMP_STATS_ENABLED
325  if (previous_state == stats_state_e::SERIAL_REGION) {
326  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327  KMP_SET_THREAD_STATE(previous_state);
328  } else {
329  KMP_POP_PARTITIONED_TIMER();
330  }
331 #endif // KMP_STATS_ENABLED
332 }
333 
345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
346  kmp_int32 num_teams, kmp_int32 num_threads) {
347  KA_TRACE(20,
348  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
349  global_tid, num_teams, num_threads));
350  __kmp_assert_valid_gtid(global_tid);
351  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
352 }
353 
364 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
365  ...) {
366  int gtid = __kmp_entry_gtid();
367  kmp_info_t *this_thr = __kmp_threads[gtid];
368  va_list ap;
369  va_start(ap, microtask);
370 
371 #if KMP_STATS_ENABLED
372  KMP_COUNT_BLOCK(OMP_TEAMS);
373  stats_state_e previous_state = KMP_GET_THREAD_STATE();
374  if (previous_state == stats_state_e::SERIAL_REGION) {
375  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
376  } else {
377  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
378  }
379 #endif
380 
381  // remember teams entry point and nesting level
382  this_thr->th.th_teams_microtask = microtask;
383  this_thr->th.th_teams_level =
384  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
385 
386 #if OMPT_SUPPORT
387  kmp_team_t *parent_team = this_thr->th.th_team;
388  int tid = __kmp_tid_from_gtid(gtid);
389  if (ompt_enabled.enabled) {
390  parent_team->t.t_implicit_task_taskdata[tid]
391  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
392  }
393  OMPT_STORE_RETURN_ADDRESS(gtid);
394 #endif
395 
396  // check if __kmpc_push_num_teams called, set default number of teams
397  // otherwise
398  if (this_thr->th.th_teams_size.nteams == 0) {
399  __kmp_push_num_teams(loc, gtid, 0, 0);
400  }
401  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
402  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
403  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
404 
405  __kmp_fork_call(
406  loc, gtid, fork_context_intel, argc,
407  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
408  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
409  __kmp_join_call(loc, gtid
410 #if OMPT_SUPPORT
411  ,
412  fork_context_intel
413 #endif
414  );
415 
416  // Pop current CG root off list
417  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
418  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
419  this_thr->th.th_cg_roots = tmp->up;
420  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
421  " to node %p. cg_nthreads was %d\n",
422  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
423  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
424  int i = tmp->cg_nthreads--;
425  if (i == 1) { // check is we are the last thread in CG (not always the case)
426  __kmp_free(tmp);
427  }
428  // Restore current task's thread_limit from CG root
429  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430  this_thr->th.th_current_task->td_icvs.thread_limit =
431  this_thr->th.th_cg_roots->cg_thread_limit;
432 
433  this_thr->th.th_teams_microtask = NULL;
434  this_thr->th.th_teams_level = 0;
435  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
436  va_end(ap);
437 #if KMP_STATS_ENABLED
438  if (previous_state == stats_state_e::SERIAL_REGION) {
439  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
440  KMP_SET_THREAD_STATE(previous_state);
441  } else {
442  KMP_POP_PARTITIONED_TIMER();
443  }
444 #endif // KMP_STATS_ENABLED
445 }
446 
447 // I don't think this function should ever have been exported.
448 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
449 // openmp code ever called it, but it's been exported from the RTL for so
450 // long that I'm afraid to remove the definition.
451 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
452 
465 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
466  // The implementation is now in kmp_runtime.cpp so that it can share static
467  // functions with kmp_fork_call since the tasks to be done are similar in
468  // each case.
469  __kmp_assert_valid_gtid(global_tid);
470 #if OMPT_SUPPORT
471  OMPT_STORE_RETURN_ADDRESS(global_tid);
472 #endif
473  __kmp_serialized_parallel(loc, global_tid);
474 }
475 
483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
484  kmp_internal_control_t *top;
485  kmp_info_t *this_thr;
486  kmp_team_t *serial_team;
487 
488  KC_TRACE(10,
489  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
490 
491  /* skip all this code for autopar serialized loops since it results in
492  unacceptable overhead */
493  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
494  return;
495 
496  // Not autopar code
497  __kmp_assert_valid_gtid(global_tid);
498  if (!TCR_4(__kmp_init_parallel))
499  __kmp_parallel_initialize();
500 
501  __kmp_resume_if_soft_paused();
502 
503  this_thr = __kmp_threads[global_tid];
504  serial_team = this_thr->th.th_serial_team;
505 
506  kmp_task_team_t *task_team = this_thr->th.th_task_team;
507  // we need to wait for the proxy tasks before finishing the thread
508  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
509  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
510 
511  KMP_MB();
512  KMP_DEBUG_ASSERT(serial_team);
513  KMP_ASSERT(serial_team->t.t_serialized);
514  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
515  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
516  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
517  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
518 
519 #if OMPT_SUPPORT
520  if (ompt_enabled.enabled &&
521  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
522  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
523  if (ompt_enabled.ompt_callback_implicit_task) {
524  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
525  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
526  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
527  }
528 
529  // reset clear the task id only after unlinking the task
530  ompt_data_t *parent_task_data;
531  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
532 
533  if (ompt_enabled.ompt_callback_parallel_end) {
534  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
535  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
536  ompt_parallel_invoker_program | ompt_parallel_team,
537  OMPT_LOAD_RETURN_ADDRESS(global_tid));
538  }
539  __ompt_lw_taskteam_unlink(this_thr);
540  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
541  }
542 #endif
543 
544  /* If necessary, pop the internal control stack values and replace the team
545  * values */
546  top = serial_team->t.t_control_stack_top;
547  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
548  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
549  serial_team->t.t_control_stack_top = top->next;
550  __kmp_free(top);
551  }
552 
553  // if( serial_team -> t.t_serialized > 1 )
554  serial_team->t.t_level--;
555 
556  /* pop dispatch buffers stack */
557  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
558  {
559  dispatch_private_info_t *disp_buffer =
560  serial_team->t.t_dispatch->th_disp_buffer;
561  serial_team->t.t_dispatch->th_disp_buffer =
562  serial_team->t.t_dispatch->th_disp_buffer->next;
563  __kmp_free(disp_buffer);
564  }
565  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
566 
567  --serial_team->t.t_serialized;
568  if (serial_team->t.t_serialized == 0) {
569 
570 /* return to the parallel section */
571 
572 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
573  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
574  __kmp_clear_x87_fpu_status_word();
575  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
576  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
577  }
578 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
579 
580  this_thr->th.th_team = serial_team->t.t_parent;
581  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
582 
583  /* restore values cached in the thread */
584  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
585  this_thr->th.th_team_master =
586  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
587  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
588 
589  /* TODO the below shouldn't need to be adjusted for serialized teams */
590  this_thr->th.th_dispatch =
591  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
592 
593  __kmp_pop_current_task_from_thread(this_thr);
594 
595  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
596  this_thr->th.th_current_task->td_flags.executing = 1;
597 
598  if (__kmp_tasking_mode != tskm_immediate_exec) {
599  // Copy the task team from the new child / old parent team to the thread.
600  this_thr->th.th_task_team =
601  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
602  KA_TRACE(20,
603  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
604  "team %p\n",
605  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
606  }
607  } else {
608  if (__kmp_tasking_mode != tskm_immediate_exec) {
609  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
610  "depth of serial team %p to %d\n",
611  global_tid, serial_team, serial_team->t.t_serialized));
612  }
613  }
614 
615  if (__kmp_env_consistency_check)
616  __kmp_pop_parallel(global_tid, NULL);
617 #if OMPT_SUPPORT
618  if (ompt_enabled.enabled)
619  this_thr->th.ompt_thread_info.state =
620  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
621  : ompt_state_work_parallel);
622 #endif
623 }
624 
633 void __kmpc_flush(ident_t *loc) {
634  KC_TRACE(10, ("__kmpc_flush: called\n"));
635 
636  /* need explicit __mf() here since use volatile instead in library */
637  KMP_MB(); /* Flush all pending memory write invalidates. */
638 
639 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
640 #if KMP_MIC
641 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
642 // We shouldn't need it, though, since the ABI rules require that
643 // * If the compiler generates NGO stores it also generates the fence
644 // * If users hand-code NGO stores they should insert the fence
645 // therefore no incomplete unordered stores should be visible.
646 #else
647  // C74404
648  // This is to address non-temporal store instructions (sfence needed).
649  // The clflush instruction is addressed either (mfence needed).
650  // Probably the non-temporal load monvtdqa instruction should also be
651  // addressed.
652  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
653  if (!__kmp_cpuinfo.initialized) {
654  __kmp_query_cpuid(&__kmp_cpuinfo);
655  }
656  if (!__kmp_cpuinfo.sse2) {
657  // CPU cannot execute SSE2 instructions.
658  } else {
659 #if KMP_COMPILER_ICC
660  _mm_mfence();
661 #elif KMP_COMPILER_MSVC
662  MemoryBarrier();
663 #else
664  __sync_synchronize();
665 #endif // KMP_COMPILER_ICC
666  }
667 #endif // KMP_MIC
668 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
669  KMP_ARCH_RISCV64)
670 // Nothing to see here move along
671 #elif KMP_ARCH_PPC64
672 // Nothing needed here (we have a real MB above).
673 #else
674 #error Unknown or unsupported architecture
675 #endif
676 
677 #if OMPT_SUPPORT && OMPT_OPTIONAL
678  if (ompt_enabled.ompt_callback_flush) {
679  ompt_callbacks.ompt_callback(ompt_callback_flush)(
680  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
681  }
682 #endif
683 }
684 
685 /* -------------------------------------------------------------------------- */
693 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
694  KMP_COUNT_BLOCK(OMP_BARRIER);
695  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
696  __kmp_assert_valid_gtid(global_tid);
697 
698  if (!TCR_4(__kmp_init_parallel))
699  __kmp_parallel_initialize();
700 
701  __kmp_resume_if_soft_paused();
702 
703  if (__kmp_env_consistency_check) {
704  if (loc == 0) {
705  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
706  }
707  __kmp_check_barrier(global_tid, ct_barrier, loc);
708  }
709 
710 #if OMPT_SUPPORT
711  ompt_frame_t *ompt_frame;
712  if (ompt_enabled.enabled) {
713  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
714  if (ompt_frame->enter_frame.ptr == NULL)
715  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
716  }
717  OMPT_STORE_RETURN_ADDRESS(global_tid);
718 #endif
719  __kmp_threads[global_tid]->th.th_ident = loc;
720  // TODO: explicit barrier_wait_id:
721  // this function is called when 'barrier' directive is present or
722  // implicit barrier at the end of a worksharing construct.
723  // 1) better to add a per-thread barrier counter to a thread data structure
724  // 2) set to 0 when a new team is created
725  // 4) no sync is required
726 
727  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
728 #if OMPT_SUPPORT && OMPT_OPTIONAL
729  if (ompt_enabled.enabled) {
730  ompt_frame->enter_frame = ompt_data_none;
731  }
732 #endif
733 }
734 
735 /* The BARRIER for a MASTER section is always explicit */
742 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
743  int status = 0;
744 
745  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
746  __kmp_assert_valid_gtid(global_tid);
747 
748  if (!TCR_4(__kmp_init_parallel))
749  __kmp_parallel_initialize();
750 
751  __kmp_resume_if_soft_paused();
752 
753  if (KMP_MASTER_GTID(global_tid)) {
754  KMP_COUNT_BLOCK(OMP_MASTER);
755  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
756  status = 1;
757  }
758 
759 #if OMPT_SUPPORT && OMPT_OPTIONAL
760  if (status) {
761  if (ompt_enabled.ompt_callback_masked) {
762  kmp_info_t *this_thr = __kmp_threads[global_tid];
763  kmp_team_t *team = this_thr->th.th_team;
764 
765  int tid = __kmp_tid_from_gtid(global_tid);
766  ompt_callbacks.ompt_callback(ompt_callback_masked)(
767  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
768  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
769  OMPT_GET_RETURN_ADDRESS(0));
770  }
771  }
772 #endif
773 
774  if (__kmp_env_consistency_check) {
775 #if KMP_USE_DYNAMIC_LOCK
776  if (status)
777  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
778  else
779  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
780 #else
781  if (status)
782  __kmp_push_sync(global_tid, ct_master, loc, NULL);
783  else
784  __kmp_check_sync(global_tid, ct_master, loc, NULL);
785 #endif
786  }
787 
788  return status;
789 }
790 
799 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
800  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
801  __kmp_assert_valid_gtid(global_tid);
802  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
803  KMP_POP_PARTITIONED_TIMER();
804 
805 #if OMPT_SUPPORT && OMPT_OPTIONAL
806  kmp_info_t *this_thr = __kmp_threads[global_tid];
807  kmp_team_t *team = this_thr->th.th_team;
808  if (ompt_enabled.ompt_callback_masked) {
809  int tid = __kmp_tid_from_gtid(global_tid);
810  ompt_callbacks.ompt_callback(ompt_callback_masked)(
811  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
812  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
813  OMPT_GET_RETURN_ADDRESS(0));
814  }
815 #endif
816 
817  if (__kmp_env_consistency_check) {
818  if (KMP_MASTER_GTID(global_tid))
819  __kmp_pop_sync(global_tid, ct_master, loc);
820  }
821 }
822 
830 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
831  int cid = 0;
832  kmp_info_t *th;
833  KMP_DEBUG_ASSERT(__kmp_init_serial);
834 
835  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
836  __kmp_assert_valid_gtid(gtid);
837 
838  if (!TCR_4(__kmp_init_parallel))
839  __kmp_parallel_initialize();
840 
841  __kmp_resume_if_soft_paused();
842 
843 #if USE_ITT_BUILD
844  __kmp_itt_ordered_prep(gtid);
845 // TODO: ordered_wait_id
846 #endif /* USE_ITT_BUILD */
847 
848  th = __kmp_threads[gtid];
849 
850 #if OMPT_SUPPORT && OMPT_OPTIONAL
851  kmp_team_t *team;
852  ompt_wait_id_t lck;
853  void *codeptr_ra;
854  OMPT_STORE_RETURN_ADDRESS(gtid);
855  if (ompt_enabled.enabled) {
856  team = __kmp_team_from_gtid(gtid);
857  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
858  /* OMPT state update */
859  th->th.ompt_thread_info.wait_id = lck;
860  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
861 
862  /* OMPT event callback */
863  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
864  if (ompt_enabled.ompt_callback_mutex_acquire) {
865  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
866  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
867  codeptr_ra);
868  }
869  }
870 #endif
871 
872  if (th->th.th_dispatch->th_deo_fcn != 0)
873  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
874  else
875  __kmp_parallel_deo(&gtid, &cid, loc);
876 
877 #if OMPT_SUPPORT && OMPT_OPTIONAL
878  if (ompt_enabled.enabled) {
879  /* OMPT state update */
880  th->th.ompt_thread_info.state = ompt_state_work_parallel;
881  th->th.ompt_thread_info.wait_id = 0;
882 
883  /* OMPT event callback */
884  if (ompt_enabled.ompt_callback_mutex_acquired) {
885  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
886  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
887  }
888  }
889 #endif
890 
891 #if USE_ITT_BUILD
892  __kmp_itt_ordered_start(gtid);
893 #endif /* USE_ITT_BUILD */
894 }
895 
903 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
904  int cid = 0;
905  kmp_info_t *th;
906 
907  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
908  __kmp_assert_valid_gtid(gtid);
909 
910 #if USE_ITT_BUILD
911  __kmp_itt_ordered_end(gtid);
912 // TODO: ordered_wait_id
913 #endif /* USE_ITT_BUILD */
914 
915  th = __kmp_threads[gtid];
916 
917  if (th->th.th_dispatch->th_dxo_fcn != 0)
918  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
919  else
920  __kmp_parallel_dxo(&gtid, &cid, loc);
921 
922 #if OMPT_SUPPORT && OMPT_OPTIONAL
923  OMPT_STORE_RETURN_ADDRESS(gtid);
924  if (ompt_enabled.ompt_callback_mutex_released) {
925  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
926  ompt_mutex_ordered,
927  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
928  ->t.t_ordered.dt.t_value,
929  OMPT_LOAD_RETURN_ADDRESS(gtid));
930  }
931 #endif
932 }
933 
934 #if KMP_USE_DYNAMIC_LOCK
935 
936 static __forceinline void
937 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
938  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
939  // Pointer to the allocated indirect lock is written to crit, while indexing
940  // is ignored.
941  void *idx;
942  kmp_indirect_lock_t **lck;
943  lck = (kmp_indirect_lock_t **)crit;
944  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
945  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
946  KMP_SET_I_LOCK_LOCATION(ilk, loc);
947  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
948  KA_TRACE(20,
949  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
950 #if USE_ITT_BUILD
951  __kmp_itt_critical_creating(ilk->lock, loc);
952 #endif
953  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
954  if (status == 0) {
955 #if USE_ITT_BUILD
956  __kmp_itt_critical_destroyed(ilk->lock);
957 #endif
958  // We don't really need to destroy the unclaimed lock here since it will be
959  // cleaned up at program exit.
960  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
961  }
962  KMP_DEBUG_ASSERT(*lck != NULL);
963 }
964 
965 // Fast-path acquire tas lock
966 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
967  { \
968  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
969  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
970  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
971  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
972  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
973  kmp_uint32 spins; \
974  KMP_FSYNC_PREPARE(l); \
975  KMP_INIT_YIELD(spins); \
976  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
977  do { \
978  if (TCR_4(__kmp_nth) > \
979  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
980  KMP_YIELD(TRUE); \
981  } else { \
982  KMP_YIELD_SPIN(spins); \
983  } \
984  __kmp_spin_backoff(&backoff); \
985  } while ( \
986  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
987  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
988  } \
989  KMP_FSYNC_ACQUIRED(l); \
990  }
991 
992 // Fast-path test tas lock
993 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
994  { \
995  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
996  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
997  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
998  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
999  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1000  }
1001 
1002 // Fast-path release tas lock
1003 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1004  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1005 
1006 #if KMP_USE_FUTEX
1007 
1008 #include <sys/syscall.h>
1009 #include <unistd.h>
1010 #ifndef FUTEX_WAIT
1011 #define FUTEX_WAIT 0
1012 #endif
1013 #ifndef FUTEX_WAKE
1014 #define FUTEX_WAKE 1
1015 #endif
1016 
1017 // Fast-path acquire futex lock
1018 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1019  { \
1020  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1021  kmp_int32 gtid_code = (gtid + 1) << 1; \
1022  KMP_MB(); \
1023  KMP_FSYNC_PREPARE(ftx); \
1024  kmp_int32 poll_val; \
1025  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1026  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1027  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1028  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1029  if (!cond) { \
1030  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1031  poll_val | \
1032  KMP_LOCK_BUSY(1, futex))) { \
1033  continue; \
1034  } \
1035  poll_val |= KMP_LOCK_BUSY(1, futex); \
1036  } \
1037  kmp_int32 rc; \
1038  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1039  NULL, NULL, 0)) != 0) { \
1040  continue; \
1041  } \
1042  gtid_code |= 1; \
1043  } \
1044  KMP_FSYNC_ACQUIRED(ftx); \
1045  }
1046 
1047 // Fast-path test futex lock
1048 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1049  { \
1050  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1051  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1052  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1053  KMP_FSYNC_ACQUIRED(ftx); \
1054  rc = TRUE; \
1055  } else { \
1056  rc = FALSE; \
1057  } \
1058  }
1059 
1060 // Fast-path release futex lock
1061 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1062  { \
1063  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1064  KMP_MB(); \
1065  KMP_FSYNC_RELEASING(ftx); \
1066  kmp_int32 poll_val = \
1067  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1068  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1069  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1070  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1071  } \
1072  KMP_MB(); \
1073  KMP_YIELD_OVERSUB(); \
1074  }
1075 
1076 #endif // KMP_USE_FUTEX
1077 
1078 #else // KMP_USE_DYNAMIC_LOCK
1079 
1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1081  ident_t const *loc,
1082  kmp_int32 gtid) {
1083  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1084 
1085  // Because of the double-check, the following load doesn't need to be volatile
1086  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1087 
1088  if (lck == NULL) {
1089  void *idx;
1090 
1091  // Allocate & initialize the lock.
1092  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1093  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1094  __kmp_init_user_lock_with_checks(lck);
1095  __kmp_set_user_lock_location(lck, loc);
1096 #if USE_ITT_BUILD
1097  __kmp_itt_critical_creating(lck);
1098 // __kmp_itt_critical_creating() should be called *before* the first usage
1099 // of underlying lock. It is the only place where we can guarantee it. There
1100 // are chances the lock will destroyed with no usage, but it is not a
1101 // problem, because this is not real event seen by user but rather setting
1102 // name for object (lock). See more details in kmp_itt.h.
1103 #endif /* USE_ITT_BUILD */
1104 
1105  // Use a cmpxchg instruction to slam the start of the critical section with
1106  // the lock pointer. If another thread beat us to it, deallocate the lock,
1107  // and use the lock that the other thread allocated.
1108  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1109 
1110  if (status == 0) {
1111 // Deallocate the lock and reload the value.
1112 #if USE_ITT_BUILD
1113  __kmp_itt_critical_destroyed(lck);
1114 // Let ITT know the lock is destroyed and the same memory location may be reused
1115 // for another purpose.
1116 #endif /* USE_ITT_BUILD */
1117  __kmp_destroy_user_lock_with_checks(lck);
1118  __kmp_user_lock_free(&idx, gtid, lck);
1119  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120  KMP_DEBUG_ASSERT(lck != NULL);
1121  }
1122  }
1123  return lck;
1124 }
1125 
1126 #endif // KMP_USE_DYNAMIC_LOCK
1127 
1138 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1139  kmp_critical_name *crit) {
1140 #if KMP_USE_DYNAMIC_LOCK
1141 #if OMPT_SUPPORT && OMPT_OPTIONAL
1142  OMPT_STORE_RETURN_ADDRESS(global_tid);
1143 #endif // OMPT_SUPPORT
1144  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1145 #else
1146  KMP_COUNT_BLOCK(OMP_CRITICAL);
1147 #if OMPT_SUPPORT && OMPT_OPTIONAL
1148  ompt_state_t prev_state = ompt_state_undefined;
1149  ompt_thread_info_t ti;
1150 #endif
1151  kmp_user_lock_p lck;
1152 
1153  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1154  __kmp_assert_valid_gtid(global_tid);
1155 
1156  // TODO: add THR_OVHD_STATE
1157 
1158  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1159  KMP_CHECK_USER_LOCK_INIT();
1160 
1161  if ((__kmp_user_lock_kind == lk_tas) &&
1162  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1163  lck = (kmp_user_lock_p)crit;
1164  }
1165 #if KMP_USE_FUTEX
1166  else if ((__kmp_user_lock_kind == lk_futex) &&
1167  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1168  lck = (kmp_user_lock_p)crit;
1169  }
1170 #endif
1171  else { // ticket, queuing or drdpa
1172  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1173  }
1174 
1175  if (__kmp_env_consistency_check)
1176  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1177 
1178 // since the critical directive binds to all threads, not just the current
1179 // team we have to check this even if we are in a serialized team.
1180 // also, even if we are the uber thread, we still have to conduct the lock,
1181 // as we have to contend with sibling threads.
1182 
1183 #if USE_ITT_BUILD
1184  __kmp_itt_critical_acquiring(lck);
1185 #endif /* USE_ITT_BUILD */
1186 #if OMPT_SUPPORT && OMPT_OPTIONAL
1187  OMPT_STORE_RETURN_ADDRESS(gtid);
1188  void *codeptr_ra = NULL;
1189  if (ompt_enabled.enabled) {
1190  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1191  /* OMPT state update */
1192  prev_state = ti.state;
1193  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1194  ti.state = ompt_state_wait_critical;
1195 
1196  /* OMPT event callback */
1197  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1198  if (ompt_enabled.ompt_callback_mutex_acquire) {
1199  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1200  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1201  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1202  }
1203  }
1204 #endif
1205  // Value of 'crit' should be good for using as a critical_id of the critical
1206  // section directive.
1207  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1208 
1209 #if USE_ITT_BUILD
1210  __kmp_itt_critical_acquired(lck);
1211 #endif /* USE_ITT_BUILD */
1212 #if OMPT_SUPPORT && OMPT_OPTIONAL
1213  if (ompt_enabled.enabled) {
1214  /* OMPT state update */
1215  ti.state = prev_state;
1216  ti.wait_id = 0;
1217 
1218  /* OMPT event callback */
1219  if (ompt_enabled.ompt_callback_mutex_acquired) {
1220  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1221  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1222  }
1223  }
1224 #endif
1225  KMP_POP_PARTITIONED_TIMER();
1226 
1227  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1228  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1229 #endif // KMP_USE_DYNAMIC_LOCK
1230 }
1231 
1232 #if KMP_USE_DYNAMIC_LOCK
1233 
1234 // Converts the given hint to an internal lock implementation
1235 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1236 #if KMP_USE_TSX
1237 #define KMP_TSX_LOCK(seq) lockseq_##seq
1238 #else
1239 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1240 #endif
1241 
1242 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1243 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1244 #else
1245 #define KMP_CPUINFO_RTM 0
1246 #endif
1247 
1248  // Hints that do not require further logic
1249  if (hint & kmp_lock_hint_hle)
1250  return KMP_TSX_LOCK(hle);
1251  if (hint & kmp_lock_hint_rtm)
1252  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1253  if (hint & kmp_lock_hint_adaptive)
1254  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1255 
1256  // Rule out conflicting hints first by returning the default lock
1257  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1258  return __kmp_user_lock_seq;
1259  if ((hint & omp_lock_hint_speculative) &&
1260  (hint & omp_lock_hint_nonspeculative))
1261  return __kmp_user_lock_seq;
1262 
1263  // Do not even consider speculation when it appears to be contended
1264  if (hint & omp_lock_hint_contended)
1265  return lockseq_queuing;
1266 
1267  // Uncontended lock without speculation
1268  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1269  return lockseq_tas;
1270 
1271  // Use RTM lock for speculation
1272  if (hint & omp_lock_hint_speculative)
1273  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1274 
1275  return __kmp_user_lock_seq;
1276 }
1277 
1278 #if OMPT_SUPPORT && OMPT_OPTIONAL
1279 #if KMP_USE_DYNAMIC_LOCK
1280 static kmp_mutex_impl_t
1281 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1282  if (user_lock) {
1283  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1284  case 0:
1285  break;
1286 #if KMP_USE_FUTEX
1287  case locktag_futex:
1288  return kmp_mutex_impl_queuing;
1289 #endif
1290  case locktag_tas:
1291  return kmp_mutex_impl_spin;
1292 #if KMP_USE_TSX
1293  case locktag_hle:
1294  case locktag_rtm_spin:
1295  return kmp_mutex_impl_speculative;
1296 #endif
1297  default:
1298  return kmp_mutex_impl_none;
1299  }
1300  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1301  }
1302  KMP_ASSERT(ilock);
1303  switch (ilock->type) {
1304 #if KMP_USE_TSX
1305  case locktag_adaptive:
1306  case locktag_rtm_queuing:
1307  return kmp_mutex_impl_speculative;
1308 #endif
1309  case locktag_nested_tas:
1310  return kmp_mutex_impl_spin;
1311 #if KMP_USE_FUTEX
1312  case locktag_nested_futex:
1313 #endif
1314  case locktag_ticket:
1315  case locktag_queuing:
1316  case locktag_drdpa:
1317  case locktag_nested_ticket:
1318  case locktag_nested_queuing:
1319  case locktag_nested_drdpa:
1320  return kmp_mutex_impl_queuing;
1321  default:
1322  return kmp_mutex_impl_none;
1323  }
1324 }
1325 #else
1326 // For locks without dynamic binding
1327 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1328  switch (__kmp_user_lock_kind) {
1329  case lk_tas:
1330  return kmp_mutex_impl_spin;
1331 #if KMP_USE_FUTEX
1332  case lk_futex:
1333 #endif
1334  case lk_ticket:
1335  case lk_queuing:
1336  case lk_drdpa:
1337  return kmp_mutex_impl_queuing;
1338 #if KMP_USE_TSX
1339  case lk_hle:
1340  case lk_rtm_queuing:
1341  case lk_rtm_spin:
1342  case lk_adaptive:
1343  return kmp_mutex_impl_speculative;
1344 #endif
1345  default:
1346  return kmp_mutex_impl_none;
1347  }
1348 }
1349 #endif // KMP_USE_DYNAMIC_LOCK
1350 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1351 
1365 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1366  kmp_critical_name *crit, uint32_t hint) {
1367  KMP_COUNT_BLOCK(OMP_CRITICAL);
1368  kmp_user_lock_p lck;
1369 #if OMPT_SUPPORT && OMPT_OPTIONAL
1370  ompt_state_t prev_state = ompt_state_undefined;
1371  ompt_thread_info_t ti;
1372  // This is the case, if called from __kmpc_critical:
1373  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1374  if (!codeptr)
1375  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1376 #endif
1377 
1378  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1379  __kmp_assert_valid_gtid(global_tid);
1380 
1381  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1382  // Check if it is initialized.
1383  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1384  if (*lk == 0) {
1385  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1386  if (KMP_IS_D_LOCK(lckseq)) {
1387  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1388  KMP_GET_D_TAG(lckseq));
1389  } else {
1390  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1391  }
1392  }
1393  // Branch for accessing the actual lock object and set operation. This
1394  // branching is inevitable since this lock initialization does not follow the
1395  // normal dispatch path (lock table is not used).
1396  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1397  lck = (kmp_user_lock_p)lk;
1398  if (__kmp_env_consistency_check) {
1399  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1400  __kmp_map_hint_to_lock(hint));
1401  }
1402 #if USE_ITT_BUILD
1403  __kmp_itt_critical_acquiring(lck);
1404 #endif
1405 #if OMPT_SUPPORT && OMPT_OPTIONAL
1406  if (ompt_enabled.enabled) {
1407  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1408  /* OMPT state update */
1409  prev_state = ti.state;
1410  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1411  ti.state = ompt_state_wait_critical;
1412 
1413  /* OMPT event callback */
1414  if (ompt_enabled.ompt_callback_mutex_acquire) {
1415  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1416  ompt_mutex_critical, (unsigned int)hint,
1417  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1418  codeptr);
1419  }
1420  }
1421 #endif
1422 #if KMP_USE_INLINED_TAS
1423  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1424  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1425  } else
1426 #elif KMP_USE_INLINED_FUTEX
1427  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1428  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1429  } else
1430 #endif
1431  {
1432  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1433  }
1434  } else {
1435  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1436  lck = ilk->lock;
1437  if (__kmp_env_consistency_check) {
1438  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1439  __kmp_map_hint_to_lock(hint));
1440  }
1441 #if USE_ITT_BUILD
1442  __kmp_itt_critical_acquiring(lck);
1443 #endif
1444 #if OMPT_SUPPORT && OMPT_OPTIONAL
1445  if (ompt_enabled.enabled) {
1446  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1447  /* OMPT state update */
1448  prev_state = ti.state;
1449  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1450  ti.state = ompt_state_wait_critical;
1451 
1452  /* OMPT event callback */
1453  if (ompt_enabled.ompt_callback_mutex_acquire) {
1454  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1455  ompt_mutex_critical, (unsigned int)hint,
1456  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1457  codeptr);
1458  }
1459  }
1460 #endif
1461  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1462  }
1463  KMP_POP_PARTITIONED_TIMER();
1464 
1465 #if USE_ITT_BUILD
1466  __kmp_itt_critical_acquired(lck);
1467 #endif /* USE_ITT_BUILD */
1468 #if OMPT_SUPPORT && OMPT_OPTIONAL
1469  if (ompt_enabled.enabled) {
1470  /* OMPT state update */
1471  ti.state = prev_state;
1472  ti.wait_id = 0;
1473 
1474  /* OMPT event callback */
1475  if (ompt_enabled.ompt_callback_mutex_acquired) {
1476  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1477  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1478  }
1479  }
1480 #endif
1481 
1482  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1483  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1484 } // __kmpc_critical_with_hint
1485 
1486 #endif // KMP_USE_DYNAMIC_LOCK
1487 
1497 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1498  kmp_critical_name *crit) {
1499  kmp_user_lock_p lck;
1500 
1501  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1502 
1503 #if KMP_USE_DYNAMIC_LOCK
1504  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1505  lck = (kmp_user_lock_p)crit;
1506  KMP_ASSERT(lck != NULL);
1507  if (__kmp_env_consistency_check) {
1508  __kmp_pop_sync(global_tid, ct_critical, loc);
1509  }
1510 #if USE_ITT_BUILD
1511  __kmp_itt_critical_releasing(lck);
1512 #endif
1513 #if KMP_USE_INLINED_TAS
1514  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1515  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1516  } else
1517 #elif KMP_USE_INLINED_FUTEX
1518  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1519  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1520  } else
1521 #endif
1522  {
1523  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1524  }
1525  } else {
1526  kmp_indirect_lock_t *ilk =
1527  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1528  KMP_ASSERT(ilk != NULL);
1529  lck = ilk->lock;
1530  if (__kmp_env_consistency_check) {
1531  __kmp_pop_sync(global_tid, ct_critical, loc);
1532  }
1533 #if USE_ITT_BUILD
1534  __kmp_itt_critical_releasing(lck);
1535 #endif
1536  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1537  }
1538 
1539 #else // KMP_USE_DYNAMIC_LOCK
1540 
1541  if ((__kmp_user_lock_kind == lk_tas) &&
1542  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1543  lck = (kmp_user_lock_p)crit;
1544  }
1545 #if KMP_USE_FUTEX
1546  else if ((__kmp_user_lock_kind == lk_futex) &&
1547  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1548  lck = (kmp_user_lock_p)crit;
1549  }
1550 #endif
1551  else { // ticket, queuing or drdpa
1552  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1553  }
1554 
1555  KMP_ASSERT(lck != NULL);
1556 
1557  if (__kmp_env_consistency_check)
1558  __kmp_pop_sync(global_tid, ct_critical, loc);
1559 
1560 #if USE_ITT_BUILD
1561  __kmp_itt_critical_releasing(lck);
1562 #endif /* USE_ITT_BUILD */
1563  // Value of 'crit' should be good for using as a critical_id of the critical
1564  // section directive.
1565  __kmp_release_user_lock_with_checks(lck, global_tid);
1566 
1567 #endif // KMP_USE_DYNAMIC_LOCK
1568 
1569 #if OMPT_SUPPORT && OMPT_OPTIONAL
1570  /* OMPT release event triggers after lock is released; place here to trigger
1571  * for all #if branches */
1572  OMPT_STORE_RETURN_ADDRESS(global_tid);
1573  if (ompt_enabled.ompt_callback_mutex_released) {
1574  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1575  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1576  OMPT_LOAD_RETURN_ADDRESS(0));
1577  }
1578 #endif
1579 
1580  KMP_POP_PARTITIONED_TIMER();
1581  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1582 }
1583 
1593 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1594  int status;
1595  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1596  __kmp_assert_valid_gtid(global_tid);
1597 
1598  if (!TCR_4(__kmp_init_parallel))
1599  __kmp_parallel_initialize();
1600 
1601  __kmp_resume_if_soft_paused();
1602 
1603  if (__kmp_env_consistency_check)
1604  __kmp_check_barrier(global_tid, ct_barrier, loc);
1605 
1606 #if OMPT_SUPPORT
1607  ompt_frame_t *ompt_frame;
1608  if (ompt_enabled.enabled) {
1609  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1610  if (ompt_frame->enter_frame.ptr == NULL)
1611  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1612  }
1613  OMPT_STORE_RETURN_ADDRESS(global_tid);
1614 #endif
1615 #if USE_ITT_NOTIFY
1616  __kmp_threads[global_tid]->th.th_ident = loc;
1617 #endif
1618  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1619 #if OMPT_SUPPORT && OMPT_OPTIONAL
1620  if (ompt_enabled.enabled) {
1621  ompt_frame->enter_frame = ompt_data_none;
1622  }
1623 #endif
1624 
1625  return (status != 0) ? 0 : 1;
1626 }
1627 
1637 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1638  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1639  __kmp_assert_valid_gtid(global_tid);
1640  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1641 }
1642 
1653 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1654  kmp_int32 ret;
1655  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1656  __kmp_assert_valid_gtid(global_tid);
1657 
1658  if (!TCR_4(__kmp_init_parallel))
1659  __kmp_parallel_initialize();
1660 
1661  __kmp_resume_if_soft_paused();
1662 
1663  if (__kmp_env_consistency_check) {
1664  if (loc == 0) {
1665  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1666  }
1667  __kmp_check_barrier(global_tid, ct_barrier, loc);
1668  }
1669 
1670 #if OMPT_SUPPORT
1671  ompt_frame_t *ompt_frame;
1672  if (ompt_enabled.enabled) {
1673  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1674  if (ompt_frame->enter_frame.ptr == NULL)
1675  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1676  }
1677  OMPT_STORE_RETURN_ADDRESS(global_tid);
1678 #endif
1679 #if USE_ITT_NOTIFY
1680  __kmp_threads[global_tid]->th.th_ident = loc;
1681 #endif
1682  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1683 #if OMPT_SUPPORT && OMPT_OPTIONAL
1684  if (ompt_enabled.enabled) {
1685  ompt_frame->enter_frame = ompt_data_none;
1686  }
1687 #endif
1688 
1689  ret = __kmpc_master(loc, global_tid);
1690 
1691  if (__kmp_env_consistency_check) {
1692  /* there's no __kmpc_end_master called; so the (stats) */
1693  /* actions of __kmpc_end_master are done here */
1694  if (ret) {
1695  /* only one thread should do the pop since only */
1696  /* one did the push (see __kmpc_master()) */
1697  __kmp_pop_sync(global_tid, ct_master, loc);
1698  }
1699  }
1700 
1701  return (ret);
1702 }
1703 
1704 /* The BARRIER for a SINGLE process section is always explicit */
1716 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1717  __kmp_assert_valid_gtid(global_tid);
1718  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1719 
1720  if (rc) {
1721  // We are going to execute the single statement, so we should count it.
1722  KMP_COUNT_BLOCK(OMP_SINGLE);
1723  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1724  }
1725 
1726 #if OMPT_SUPPORT && OMPT_OPTIONAL
1727  kmp_info_t *this_thr = __kmp_threads[global_tid];
1728  kmp_team_t *team = this_thr->th.th_team;
1729  int tid = __kmp_tid_from_gtid(global_tid);
1730 
1731  if (ompt_enabled.enabled) {
1732  if (rc) {
1733  if (ompt_enabled.ompt_callback_work) {
1734  ompt_callbacks.ompt_callback(ompt_callback_work)(
1735  ompt_work_single_executor, ompt_scope_begin,
1736  &(team->t.ompt_team_info.parallel_data),
1737  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1738  1, OMPT_GET_RETURN_ADDRESS(0));
1739  }
1740  } else {
1741  if (ompt_enabled.ompt_callback_work) {
1742  ompt_callbacks.ompt_callback(ompt_callback_work)(
1743  ompt_work_single_other, ompt_scope_begin,
1744  &(team->t.ompt_team_info.parallel_data),
1745  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1746  1, OMPT_GET_RETURN_ADDRESS(0));
1747  ompt_callbacks.ompt_callback(ompt_callback_work)(
1748  ompt_work_single_other, ompt_scope_end,
1749  &(team->t.ompt_team_info.parallel_data),
1750  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1751  1, OMPT_GET_RETURN_ADDRESS(0));
1752  }
1753  }
1754  }
1755 #endif
1756 
1757  return rc;
1758 }
1759 
1769 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1770  __kmp_assert_valid_gtid(global_tid);
1771  __kmp_exit_single(global_tid);
1772  KMP_POP_PARTITIONED_TIMER();
1773 
1774 #if OMPT_SUPPORT && OMPT_OPTIONAL
1775  kmp_info_t *this_thr = __kmp_threads[global_tid];
1776  kmp_team_t *team = this_thr->th.th_team;
1777  int tid = __kmp_tid_from_gtid(global_tid);
1778 
1779  if (ompt_enabled.ompt_callback_work) {
1780  ompt_callbacks.ompt_callback(ompt_callback_work)(
1781  ompt_work_single_executor, ompt_scope_end,
1782  &(team->t.ompt_team_info.parallel_data),
1783  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1784  OMPT_GET_RETURN_ADDRESS(0));
1785  }
1786 #endif
1787 }
1788 
1796 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1797  KMP_POP_PARTITIONED_TIMER();
1798  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1799 
1800 #if OMPT_SUPPORT && OMPT_OPTIONAL
1801  if (ompt_enabled.ompt_callback_work) {
1802  ompt_work_t ompt_work_type = ompt_work_loop;
1803  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1804  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1805  // Determine workshare type
1806  if (loc != NULL) {
1807  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1808  ompt_work_type = ompt_work_loop;
1809  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1810  ompt_work_type = ompt_work_sections;
1811  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1812  ompt_work_type = ompt_work_distribute;
1813  } else {
1814  // use default set above.
1815  // a warning about this case is provided in __kmpc_for_static_init
1816  }
1817  KMP_DEBUG_ASSERT(ompt_work_type);
1818  }
1819  ompt_callbacks.ompt_callback(ompt_callback_work)(
1820  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1821  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1822  }
1823 #endif
1824  if (__kmp_env_consistency_check)
1825  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1826 }
1827 
1828 // User routines which take C-style arguments (call by value)
1829 // different from the Fortran equivalent routines
1830 
1831 void ompc_set_num_threads(int arg) {
1832  // !!!!! TODO: check the per-task binding
1833  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1834 }
1835 
1836 void ompc_set_dynamic(int flag) {
1837  kmp_info_t *thread;
1838 
1839  /* For the thread-private implementation of the internal controls */
1840  thread = __kmp_entry_thread();
1841 
1842  __kmp_save_internal_controls(thread);
1843 
1844  set__dynamic(thread, flag ? true : false);
1845 }
1846 
1847 void ompc_set_nested(int flag) {
1848  kmp_info_t *thread;
1849 
1850  /* For the thread-private internal controls implementation */
1851  thread = __kmp_entry_thread();
1852 
1853  __kmp_save_internal_controls(thread);
1854 
1855  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1856 }
1857 
1858 void ompc_set_max_active_levels(int max_active_levels) {
1859  /* TO DO */
1860  /* we want per-task implementation of this internal control */
1861 
1862  /* For the per-thread internal controls implementation */
1863  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1864 }
1865 
1866 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1867  // !!!!! TODO: check the per-task binding
1868  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1869 }
1870 
1871 int ompc_get_ancestor_thread_num(int level) {
1872  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1873 }
1874 
1875 int ompc_get_team_size(int level) {
1876  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1877 }
1878 
1879 /* OpenMP 5.0 Affinity Format API */
1880 
1881 void ompc_set_affinity_format(char const *format) {
1882  if (!__kmp_init_serial) {
1883  __kmp_serial_initialize();
1884  }
1885  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1886  format, KMP_STRLEN(format) + 1);
1887 }
1888 
1889 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1890  size_t format_size;
1891  if (!__kmp_init_serial) {
1892  __kmp_serial_initialize();
1893  }
1894  format_size = KMP_STRLEN(__kmp_affinity_format);
1895  if (buffer && size) {
1896  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1897  format_size + 1);
1898  }
1899  return format_size;
1900 }
1901 
1902 void ompc_display_affinity(char const *format) {
1903  int gtid;
1904  if (!TCR_4(__kmp_init_middle)) {
1905  __kmp_middle_initialize();
1906  }
1907  gtid = __kmp_get_gtid();
1908  __kmp_aux_display_affinity(gtid, format);
1909 }
1910 
1911 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1912  char const *format) {
1913  int gtid;
1914  size_t num_required;
1915  kmp_str_buf_t capture_buf;
1916  if (!TCR_4(__kmp_init_middle)) {
1917  __kmp_middle_initialize();
1918  }
1919  gtid = __kmp_get_gtid();
1920  __kmp_str_buf_init(&capture_buf);
1921  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1922  if (buffer && buf_size) {
1923  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1924  capture_buf.used + 1);
1925  }
1926  __kmp_str_buf_free(&capture_buf);
1927  return num_required;
1928 }
1929 
1930 void kmpc_set_stacksize(int arg) {
1931  // __kmp_aux_set_stacksize initializes the library if needed
1932  __kmp_aux_set_stacksize(arg);
1933 }
1934 
1935 void kmpc_set_stacksize_s(size_t arg) {
1936  // __kmp_aux_set_stacksize initializes the library if needed
1937  __kmp_aux_set_stacksize(arg);
1938 }
1939 
1940 void kmpc_set_blocktime(int arg) {
1941  int gtid, tid;
1942  kmp_info_t *thread;
1943 
1944  gtid = __kmp_entry_gtid();
1945  tid = __kmp_tid_from_gtid(gtid);
1946  thread = __kmp_thread_from_gtid(gtid);
1947 
1948  __kmp_aux_set_blocktime(arg, thread, tid);
1949 }
1950 
1951 void kmpc_set_library(int arg) {
1952  // __kmp_user_set_library initializes the library if needed
1953  __kmp_user_set_library((enum library_type)arg);
1954 }
1955 
1956 void kmpc_set_defaults(char const *str) {
1957  // __kmp_aux_set_defaults initializes the library if needed
1958  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1959 }
1960 
1961 void kmpc_set_disp_num_buffers(int arg) {
1962  // ignore after initialization because some teams have already
1963  // allocated dispatch buffers
1964  if (__kmp_init_serial == 0 && arg > 0)
1965  __kmp_dispatch_num_buffers = arg;
1966 }
1967 
1968 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1969 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1970  return -1;
1971 #else
1972  if (!TCR_4(__kmp_init_middle)) {
1973  __kmp_middle_initialize();
1974  }
1975  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1976 #endif
1977 }
1978 
1979 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1980 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1981  return -1;
1982 #else
1983  if (!TCR_4(__kmp_init_middle)) {
1984  __kmp_middle_initialize();
1985  }
1986  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1987 #endif
1988 }
1989 
1990 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1991 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1992  return -1;
1993 #else
1994  if (!TCR_4(__kmp_init_middle)) {
1995  __kmp_middle_initialize();
1996  }
1997  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1998 #endif
1999 }
2000 
2001 /* -------------------------------------------------------------------------- */
2046 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2047  void *cpy_data, void (*cpy_func)(void *, void *),
2048  kmp_int32 didit) {
2049  void **data_ptr;
2050  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2051  __kmp_assert_valid_gtid(gtid);
2052 
2053  KMP_MB();
2054 
2055  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2056 
2057  if (__kmp_env_consistency_check) {
2058  if (loc == 0) {
2059  KMP_WARNING(ConstructIdentInvalid);
2060  }
2061  }
2062 
2063  // ToDo: Optimize the following two barriers into some kind of split barrier
2064 
2065  if (didit)
2066  *data_ptr = cpy_data;
2067 
2068 #if OMPT_SUPPORT
2069  ompt_frame_t *ompt_frame;
2070  if (ompt_enabled.enabled) {
2071  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2072  if (ompt_frame->enter_frame.ptr == NULL)
2073  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2074  }
2075  OMPT_STORE_RETURN_ADDRESS(gtid);
2076 #endif
2077 /* This barrier is not a barrier region boundary */
2078 #if USE_ITT_NOTIFY
2079  __kmp_threads[gtid]->th.th_ident = loc;
2080 #endif
2081  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2082 
2083  if (!didit)
2084  (*cpy_func)(cpy_data, *data_ptr);
2085 
2086 // Consider next barrier a user-visible barrier for barrier region boundaries
2087 // Nesting checks are already handled by the single construct checks
2088  {
2089 #if OMPT_SUPPORT
2090  OMPT_STORE_RETURN_ADDRESS(gtid);
2091 #endif
2092 #if USE_ITT_NOTIFY
2093  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2094 // tasks can overwrite the location)
2095 #endif
2096  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2097 #if OMPT_SUPPORT && OMPT_OPTIONAL
2098  if (ompt_enabled.enabled) {
2099  ompt_frame->enter_frame = ompt_data_none;
2100  }
2101 #endif
2102  }
2103 }
2104 
2105 /* -------------------------------------------------------------------------- */
2106 
2107 #define INIT_LOCK __kmp_init_user_lock_with_checks
2108 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2109 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2110 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2111 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2112 #define ACQUIRE_NESTED_LOCK_TIMED \
2113  __kmp_acquire_nested_user_lock_with_checks_timed
2114 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2115 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2116 #define TEST_LOCK __kmp_test_user_lock_with_checks
2117 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2118 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2119 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2120 
2121 // TODO: Make check abort messages use location info & pass it into
2122 // with_checks routines
2123 
2124 #if KMP_USE_DYNAMIC_LOCK
2125 
2126 // internal lock initializer
2127 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2128  kmp_dyna_lockseq_t seq) {
2129  if (KMP_IS_D_LOCK(seq)) {
2130  KMP_INIT_D_LOCK(lock, seq);
2131 #if USE_ITT_BUILD
2132  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2133 #endif
2134  } else {
2135  KMP_INIT_I_LOCK(lock, seq);
2136 #if USE_ITT_BUILD
2137  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2138  __kmp_itt_lock_creating(ilk->lock, loc);
2139 #endif
2140  }
2141 }
2142 
2143 // internal nest lock initializer
2144 static __forceinline void
2145 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2146  kmp_dyna_lockseq_t seq) {
2147 #if KMP_USE_TSX
2148  // Don't have nested lock implementation for speculative locks
2149  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2150  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2151  seq = __kmp_user_lock_seq;
2152 #endif
2153  switch (seq) {
2154  case lockseq_tas:
2155  seq = lockseq_nested_tas;
2156  break;
2157 #if KMP_USE_FUTEX
2158  case lockseq_futex:
2159  seq = lockseq_nested_futex;
2160  break;
2161 #endif
2162  case lockseq_ticket:
2163  seq = lockseq_nested_ticket;
2164  break;
2165  case lockseq_queuing:
2166  seq = lockseq_nested_queuing;
2167  break;
2168  case lockseq_drdpa:
2169  seq = lockseq_nested_drdpa;
2170  break;
2171  default:
2172  seq = lockseq_nested_queuing;
2173  }
2174  KMP_INIT_I_LOCK(lock, seq);
2175 #if USE_ITT_BUILD
2176  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2177  __kmp_itt_lock_creating(ilk->lock, loc);
2178 #endif
2179 }
2180 
2181 /* initialize the lock with a hint */
2182 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2183  uintptr_t hint) {
2184  KMP_DEBUG_ASSERT(__kmp_init_serial);
2185  if (__kmp_env_consistency_check && user_lock == NULL) {
2186  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2187  }
2188 
2189  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2190 
2191 #if OMPT_SUPPORT && OMPT_OPTIONAL
2192  // This is the case, if called from omp_init_lock_with_hint:
2193  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2194  if (!codeptr)
2195  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2196  if (ompt_enabled.ompt_callback_lock_init) {
2197  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2198  ompt_mutex_lock, (omp_lock_hint_t)hint,
2199  __ompt_get_mutex_impl_type(user_lock),
2200  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2201  }
2202 #endif
2203 }
2204 
2205 /* initialize the lock with a hint */
2206 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2207  void **user_lock, uintptr_t hint) {
2208  KMP_DEBUG_ASSERT(__kmp_init_serial);
2209  if (__kmp_env_consistency_check && user_lock == NULL) {
2210  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2211  }
2212 
2213  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2214 
2215 #if OMPT_SUPPORT && OMPT_OPTIONAL
2216  // This is the case, if called from omp_init_lock_with_hint:
2217  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2218  if (!codeptr)
2219  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2220  if (ompt_enabled.ompt_callback_lock_init) {
2221  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2222  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2223  __ompt_get_mutex_impl_type(user_lock),
2224  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2225  }
2226 #endif
2227 }
2228 
2229 #endif // KMP_USE_DYNAMIC_LOCK
2230 
2231 /* initialize the lock */
2232 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2233 #if KMP_USE_DYNAMIC_LOCK
2234 
2235  KMP_DEBUG_ASSERT(__kmp_init_serial);
2236  if (__kmp_env_consistency_check && user_lock == NULL) {
2237  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2238  }
2239  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2240 
2241 #if OMPT_SUPPORT && OMPT_OPTIONAL
2242  // This is the case, if called from omp_init_lock_with_hint:
2243  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2244  if (!codeptr)
2245  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2246  if (ompt_enabled.ompt_callback_lock_init) {
2247  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2248  ompt_mutex_lock, omp_lock_hint_none,
2249  __ompt_get_mutex_impl_type(user_lock),
2250  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2251  }
2252 #endif
2253 
2254 #else // KMP_USE_DYNAMIC_LOCK
2255 
2256  static char const *const func = "omp_init_lock";
2257  kmp_user_lock_p lck;
2258  KMP_DEBUG_ASSERT(__kmp_init_serial);
2259 
2260  if (__kmp_env_consistency_check) {
2261  if (user_lock == NULL) {
2262  KMP_FATAL(LockIsUninitialized, func);
2263  }
2264  }
2265 
2266  KMP_CHECK_USER_LOCK_INIT();
2267 
2268  if ((__kmp_user_lock_kind == lk_tas) &&
2269  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2270  lck = (kmp_user_lock_p)user_lock;
2271  }
2272 #if KMP_USE_FUTEX
2273  else if ((__kmp_user_lock_kind == lk_futex) &&
2274  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2275  lck = (kmp_user_lock_p)user_lock;
2276  }
2277 #endif
2278  else {
2279  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2280  }
2281  INIT_LOCK(lck);
2282  __kmp_set_user_lock_location(lck, loc);
2283 
2284 #if OMPT_SUPPORT && OMPT_OPTIONAL
2285  // This is the case, if called from omp_init_lock_with_hint:
2286  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2287  if (!codeptr)
2288  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2289  if (ompt_enabled.ompt_callback_lock_init) {
2290  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2291  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2292  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2293  }
2294 #endif
2295 
2296 #if USE_ITT_BUILD
2297  __kmp_itt_lock_creating(lck);
2298 #endif /* USE_ITT_BUILD */
2299 
2300 #endif // KMP_USE_DYNAMIC_LOCK
2301 } // __kmpc_init_lock
2302 
2303 /* initialize the lock */
2304 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2305 #if KMP_USE_DYNAMIC_LOCK
2306 
2307  KMP_DEBUG_ASSERT(__kmp_init_serial);
2308  if (__kmp_env_consistency_check && user_lock == NULL) {
2309  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2310  }
2311  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2312 
2313 #if OMPT_SUPPORT && OMPT_OPTIONAL
2314  // This is the case, if called from omp_init_lock_with_hint:
2315  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2316  if (!codeptr)
2317  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318  if (ompt_enabled.ompt_callback_lock_init) {
2319  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320  ompt_mutex_nest_lock, omp_lock_hint_none,
2321  __ompt_get_mutex_impl_type(user_lock),
2322  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2323  }
2324 #endif
2325 
2326 #else // KMP_USE_DYNAMIC_LOCK
2327 
2328  static char const *const func = "omp_init_nest_lock";
2329  kmp_user_lock_p lck;
2330  KMP_DEBUG_ASSERT(__kmp_init_serial);
2331 
2332  if (__kmp_env_consistency_check) {
2333  if (user_lock == NULL) {
2334  KMP_FATAL(LockIsUninitialized, func);
2335  }
2336  }
2337 
2338  KMP_CHECK_USER_LOCK_INIT();
2339 
2340  if ((__kmp_user_lock_kind == lk_tas) &&
2341  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2342  OMP_NEST_LOCK_T_SIZE)) {
2343  lck = (kmp_user_lock_p)user_lock;
2344  }
2345 #if KMP_USE_FUTEX
2346  else if ((__kmp_user_lock_kind == lk_futex) &&
2347  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2348  OMP_NEST_LOCK_T_SIZE)) {
2349  lck = (kmp_user_lock_p)user_lock;
2350  }
2351 #endif
2352  else {
2353  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2354  }
2355 
2356  INIT_NESTED_LOCK(lck);
2357  __kmp_set_user_lock_location(lck, loc);
2358 
2359 #if OMPT_SUPPORT && OMPT_OPTIONAL
2360  // This is the case, if called from omp_init_lock_with_hint:
2361  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2362  if (!codeptr)
2363  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2364  if (ompt_enabled.ompt_callback_lock_init) {
2365  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2366  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2367  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2368  }
2369 #endif
2370 
2371 #if USE_ITT_BUILD
2372  __kmp_itt_lock_creating(lck);
2373 #endif /* USE_ITT_BUILD */
2374 
2375 #endif // KMP_USE_DYNAMIC_LOCK
2376 } // __kmpc_init_nest_lock
2377 
2378 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2379 #if KMP_USE_DYNAMIC_LOCK
2380 
2381 #if USE_ITT_BUILD
2382  kmp_user_lock_p lck;
2383  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2384  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2385  } else {
2386  lck = (kmp_user_lock_p)user_lock;
2387  }
2388  __kmp_itt_lock_destroyed(lck);
2389 #endif
2390 #if OMPT_SUPPORT && OMPT_OPTIONAL
2391  // This is the case, if called from omp_init_lock_with_hint:
2392  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2393  if (!codeptr)
2394  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2395  if (ompt_enabled.ompt_callback_lock_destroy) {
2396  kmp_user_lock_p lck;
2397  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2398  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2399  } else {
2400  lck = (kmp_user_lock_p)user_lock;
2401  }
2402  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2403  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2404  }
2405 #endif
2406  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2407 #else
2408  kmp_user_lock_p lck;
2409 
2410  if ((__kmp_user_lock_kind == lk_tas) &&
2411  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2412  lck = (kmp_user_lock_p)user_lock;
2413  }
2414 #if KMP_USE_FUTEX
2415  else if ((__kmp_user_lock_kind == lk_futex) &&
2416  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2417  lck = (kmp_user_lock_p)user_lock;
2418  }
2419 #endif
2420  else {
2421  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2422  }
2423 
2424 #if OMPT_SUPPORT && OMPT_OPTIONAL
2425  // This is the case, if called from omp_init_lock_with_hint:
2426  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2427  if (!codeptr)
2428  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2429  if (ompt_enabled.ompt_callback_lock_destroy) {
2430  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2431  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2432  }
2433 #endif
2434 
2435 #if USE_ITT_BUILD
2436  __kmp_itt_lock_destroyed(lck);
2437 #endif /* USE_ITT_BUILD */
2438  DESTROY_LOCK(lck);
2439 
2440  if ((__kmp_user_lock_kind == lk_tas) &&
2441  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2442  ;
2443  }
2444 #if KMP_USE_FUTEX
2445  else if ((__kmp_user_lock_kind == lk_futex) &&
2446  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2447  ;
2448  }
2449 #endif
2450  else {
2451  __kmp_user_lock_free(user_lock, gtid, lck);
2452  }
2453 #endif // KMP_USE_DYNAMIC_LOCK
2454 } // __kmpc_destroy_lock
2455 
2456 /* destroy the lock */
2457 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2458 #if KMP_USE_DYNAMIC_LOCK
2459 
2460 #if USE_ITT_BUILD
2461  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2462  __kmp_itt_lock_destroyed(ilk->lock);
2463 #endif
2464 #if OMPT_SUPPORT && OMPT_OPTIONAL
2465  // This is the case, if called from omp_init_lock_with_hint:
2466  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2467  if (!codeptr)
2468  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2469  if (ompt_enabled.ompt_callback_lock_destroy) {
2470  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2471  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2472  }
2473 #endif
2474  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2475 
2476 #else // KMP_USE_DYNAMIC_LOCK
2477 
2478  kmp_user_lock_p lck;
2479 
2480  if ((__kmp_user_lock_kind == lk_tas) &&
2481  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2482  OMP_NEST_LOCK_T_SIZE)) {
2483  lck = (kmp_user_lock_p)user_lock;
2484  }
2485 #if KMP_USE_FUTEX
2486  else if ((__kmp_user_lock_kind == lk_futex) &&
2487  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2488  OMP_NEST_LOCK_T_SIZE)) {
2489  lck = (kmp_user_lock_p)user_lock;
2490  }
2491 #endif
2492  else {
2493  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2494  }
2495 
2496 #if OMPT_SUPPORT && OMPT_OPTIONAL
2497  // This is the case, if called from omp_init_lock_with_hint:
2498  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2499  if (!codeptr)
2500  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501  if (ompt_enabled.ompt_callback_lock_destroy) {
2502  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2503  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2504  }
2505 #endif
2506 
2507 #if USE_ITT_BUILD
2508  __kmp_itt_lock_destroyed(lck);
2509 #endif /* USE_ITT_BUILD */
2510 
2511  DESTROY_NESTED_LOCK(lck);
2512 
2513  if ((__kmp_user_lock_kind == lk_tas) &&
2514  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2515  OMP_NEST_LOCK_T_SIZE)) {
2516  ;
2517  }
2518 #if KMP_USE_FUTEX
2519  else if ((__kmp_user_lock_kind == lk_futex) &&
2520  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2521  OMP_NEST_LOCK_T_SIZE)) {
2522  ;
2523  }
2524 #endif
2525  else {
2526  __kmp_user_lock_free(user_lock, gtid, lck);
2527  }
2528 #endif // KMP_USE_DYNAMIC_LOCK
2529 } // __kmpc_destroy_nest_lock
2530 
2531 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2532  KMP_COUNT_BLOCK(OMP_set_lock);
2533 #if KMP_USE_DYNAMIC_LOCK
2534  int tag = KMP_EXTRACT_D_TAG(user_lock);
2535 #if USE_ITT_BUILD
2536  __kmp_itt_lock_acquiring(
2537  (kmp_user_lock_p)
2538  user_lock); // itt function will get to the right lock object.
2539 #endif
2540 #if OMPT_SUPPORT && OMPT_OPTIONAL
2541  // This is the case, if called from omp_init_lock_with_hint:
2542  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2543  if (!codeptr)
2544  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2545  if (ompt_enabled.ompt_callback_mutex_acquire) {
2546  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2547  ompt_mutex_lock, omp_lock_hint_none,
2548  __ompt_get_mutex_impl_type(user_lock),
2549  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2550  }
2551 #endif
2552 #if KMP_USE_INLINED_TAS
2553  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2554  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2555  } else
2556 #elif KMP_USE_INLINED_FUTEX
2557  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2558  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2559  } else
2560 #endif
2561  {
2562  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2563  }
2564 #if USE_ITT_BUILD
2565  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2566 #endif
2567 #if OMPT_SUPPORT && OMPT_OPTIONAL
2568  if (ompt_enabled.ompt_callback_mutex_acquired) {
2569  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2570  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2571  }
2572 #endif
2573 
2574 #else // KMP_USE_DYNAMIC_LOCK
2575 
2576  kmp_user_lock_p lck;
2577 
2578  if ((__kmp_user_lock_kind == lk_tas) &&
2579  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2580  lck = (kmp_user_lock_p)user_lock;
2581  }
2582 #if KMP_USE_FUTEX
2583  else if ((__kmp_user_lock_kind == lk_futex) &&
2584  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2585  lck = (kmp_user_lock_p)user_lock;
2586  }
2587 #endif
2588  else {
2589  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2590  }
2591 
2592 #if USE_ITT_BUILD
2593  __kmp_itt_lock_acquiring(lck);
2594 #endif /* USE_ITT_BUILD */
2595 #if OMPT_SUPPORT && OMPT_OPTIONAL
2596  // This is the case, if called from omp_init_lock_with_hint:
2597  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2598  if (!codeptr)
2599  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2600  if (ompt_enabled.ompt_callback_mutex_acquire) {
2601  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2602  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2603  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2604  }
2605 #endif
2606 
2607  ACQUIRE_LOCK(lck, gtid);
2608 
2609 #if USE_ITT_BUILD
2610  __kmp_itt_lock_acquired(lck);
2611 #endif /* USE_ITT_BUILD */
2612 
2613 #if OMPT_SUPPORT && OMPT_OPTIONAL
2614  if (ompt_enabled.ompt_callback_mutex_acquired) {
2615  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2616  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2617  }
2618 #endif
2619 
2620 #endif // KMP_USE_DYNAMIC_LOCK
2621 }
2622 
2623 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2624 #if KMP_USE_DYNAMIC_LOCK
2625 
2626 #if USE_ITT_BUILD
2627  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2628 #endif
2629 #if OMPT_SUPPORT && OMPT_OPTIONAL
2630  // This is the case, if called from omp_init_lock_with_hint:
2631  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2632  if (!codeptr)
2633  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2634  if (ompt_enabled.enabled) {
2635  if (ompt_enabled.ompt_callback_mutex_acquire) {
2636  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2637  ompt_mutex_nest_lock, omp_lock_hint_none,
2638  __ompt_get_mutex_impl_type(user_lock),
2639  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2640  }
2641  }
2642 #endif
2643  int acquire_status =
2644  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2645  (void) acquire_status;
2646 #if USE_ITT_BUILD
2647  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2648 #endif
2649 
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL
2651  if (ompt_enabled.enabled) {
2652  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2653  if (ompt_enabled.ompt_callback_mutex_acquired) {
2654  // lock_first
2655  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2656  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2657  codeptr);
2658  }
2659  } else {
2660  if (ompt_enabled.ompt_callback_nest_lock) {
2661  // lock_next
2662  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2663  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2664  }
2665  }
2666  }
2667 #endif
2668 
2669 #else // KMP_USE_DYNAMIC_LOCK
2670  int acquire_status;
2671  kmp_user_lock_p lck;
2672 
2673  if ((__kmp_user_lock_kind == lk_tas) &&
2674  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2675  OMP_NEST_LOCK_T_SIZE)) {
2676  lck = (kmp_user_lock_p)user_lock;
2677  }
2678 #if KMP_USE_FUTEX
2679  else if ((__kmp_user_lock_kind == lk_futex) &&
2680  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2681  OMP_NEST_LOCK_T_SIZE)) {
2682  lck = (kmp_user_lock_p)user_lock;
2683  }
2684 #endif
2685  else {
2686  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2687  }
2688 
2689 #if USE_ITT_BUILD
2690  __kmp_itt_lock_acquiring(lck);
2691 #endif /* USE_ITT_BUILD */
2692 #if OMPT_SUPPORT && OMPT_OPTIONAL
2693  // This is the case, if called from omp_init_lock_with_hint:
2694  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2695  if (!codeptr)
2696  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2697  if (ompt_enabled.enabled) {
2698  if (ompt_enabled.ompt_callback_mutex_acquire) {
2699  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2700  ompt_mutex_nest_lock, omp_lock_hint_none,
2701  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2702  codeptr);
2703  }
2704  }
2705 #endif
2706 
2707  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2708 
2709 #if USE_ITT_BUILD
2710  __kmp_itt_lock_acquired(lck);
2711 #endif /* USE_ITT_BUILD */
2712 
2713 #if OMPT_SUPPORT && OMPT_OPTIONAL
2714  if (ompt_enabled.enabled) {
2715  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2716  if (ompt_enabled.ompt_callback_mutex_acquired) {
2717  // lock_first
2718  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2719  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2720  }
2721  } else {
2722  if (ompt_enabled.ompt_callback_nest_lock) {
2723  // lock_next
2724  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2725  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2726  }
2727  }
2728  }
2729 #endif
2730 
2731 #endif // KMP_USE_DYNAMIC_LOCK
2732 }
2733 
2734 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2735 #if KMP_USE_DYNAMIC_LOCK
2736 
2737  int tag = KMP_EXTRACT_D_TAG(user_lock);
2738 #if USE_ITT_BUILD
2739  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2740 #endif
2741 #if KMP_USE_INLINED_TAS
2742  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2743  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2744  } else
2745 #elif KMP_USE_INLINED_FUTEX
2746  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2747  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2748  } else
2749 #endif
2750  {
2751  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2752  }
2753 
2754 #if OMPT_SUPPORT && OMPT_OPTIONAL
2755  // This is the case, if called from omp_init_lock_with_hint:
2756  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2757  if (!codeptr)
2758  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2759  if (ompt_enabled.ompt_callback_mutex_released) {
2760  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2761  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2762  }
2763 #endif
2764 
2765 #else // KMP_USE_DYNAMIC_LOCK
2766 
2767  kmp_user_lock_p lck;
2768 
2769  /* Can't use serial interval since not block structured */
2770  /* release the lock */
2771 
2772  if ((__kmp_user_lock_kind == lk_tas) &&
2773  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2774 #if KMP_OS_LINUX && \
2775  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2776 // "fast" path implemented to fix customer performance issue
2777 #if USE_ITT_BUILD
2778  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2779 #endif /* USE_ITT_BUILD */
2780  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2781  KMP_MB();
2782 
2783 #if OMPT_SUPPORT && OMPT_OPTIONAL
2784  // This is the case, if called from omp_init_lock_with_hint:
2785  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2786  if (!codeptr)
2787  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2788  if (ompt_enabled.ompt_callback_mutex_released) {
2789  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2790  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2791  }
2792 #endif
2793 
2794  return;
2795 #else
2796  lck = (kmp_user_lock_p)user_lock;
2797 #endif
2798  }
2799 #if KMP_USE_FUTEX
2800  else if ((__kmp_user_lock_kind == lk_futex) &&
2801  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2802  lck = (kmp_user_lock_p)user_lock;
2803  }
2804 #endif
2805  else {
2806  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2807  }
2808 
2809 #if USE_ITT_BUILD
2810  __kmp_itt_lock_releasing(lck);
2811 #endif /* USE_ITT_BUILD */
2812 
2813  RELEASE_LOCK(lck, gtid);
2814 
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2816  // This is the case, if called from omp_init_lock_with_hint:
2817  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818  if (!codeptr)
2819  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820  if (ompt_enabled.ompt_callback_mutex_released) {
2821  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2822  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2823  }
2824 #endif
2825 
2826 #endif // KMP_USE_DYNAMIC_LOCK
2827 }
2828 
2829 /* release the lock */
2830 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2831 #if KMP_USE_DYNAMIC_LOCK
2832 
2833 #if USE_ITT_BUILD
2834  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2835 #endif
2836  int release_status =
2837  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2838  (void) release_status;
2839 
2840 #if OMPT_SUPPORT && OMPT_OPTIONAL
2841  // This is the case, if called from omp_init_lock_with_hint:
2842  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2843  if (!codeptr)
2844  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2845  if (ompt_enabled.enabled) {
2846  if (release_status == KMP_LOCK_RELEASED) {
2847  if (ompt_enabled.ompt_callback_mutex_released) {
2848  // release_lock_last
2849  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2850  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2851  codeptr);
2852  }
2853  } else if (ompt_enabled.ompt_callback_nest_lock) {
2854  // release_lock_prev
2855  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2856  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2857  }
2858  }
2859 #endif
2860 
2861 #else // KMP_USE_DYNAMIC_LOCK
2862 
2863  kmp_user_lock_p lck;
2864 
2865  /* Can't use serial interval since not block structured */
2866 
2867  if ((__kmp_user_lock_kind == lk_tas) &&
2868  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2869  OMP_NEST_LOCK_T_SIZE)) {
2870 #if KMP_OS_LINUX && \
2871  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2872  // "fast" path implemented to fix customer performance issue
2873  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2874 #if USE_ITT_BUILD
2875  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2876 #endif /* USE_ITT_BUILD */
2877 
2878 #if OMPT_SUPPORT && OMPT_OPTIONAL
2879  int release_status = KMP_LOCK_STILL_HELD;
2880 #endif
2881 
2882  if (--(tl->lk.depth_locked) == 0) {
2883  TCW_4(tl->lk.poll, 0);
2884 #if OMPT_SUPPORT && OMPT_OPTIONAL
2885  release_status = KMP_LOCK_RELEASED;
2886 #endif
2887  }
2888  KMP_MB();
2889 
2890 #if OMPT_SUPPORT && OMPT_OPTIONAL
2891  // This is the case, if called from omp_init_lock_with_hint:
2892  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2893  if (!codeptr)
2894  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2895  if (ompt_enabled.enabled) {
2896  if (release_status == KMP_LOCK_RELEASED) {
2897  if (ompt_enabled.ompt_callback_mutex_released) {
2898  // release_lock_last
2899  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2900  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2901  }
2902  } else if (ompt_enabled.ompt_callback_nest_lock) {
2903  // release_lock_previous
2904  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2905  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2906  }
2907  }
2908 #endif
2909 
2910  return;
2911 #else
2912  lck = (kmp_user_lock_p)user_lock;
2913 #endif
2914  }
2915 #if KMP_USE_FUTEX
2916  else if ((__kmp_user_lock_kind == lk_futex) &&
2917  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2918  OMP_NEST_LOCK_T_SIZE)) {
2919  lck = (kmp_user_lock_p)user_lock;
2920  }
2921 #endif
2922  else {
2923  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2924  }
2925 
2926 #if USE_ITT_BUILD
2927  __kmp_itt_lock_releasing(lck);
2928 #endif /* USE_ITT_BUILD */
2929 
2930  int release_status;
2931  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2932 #if OMPT_SUPPORT && OMPT_OPTIONAL
2933  // This is the case, if called from omp_init_lock_with_hint:
2934  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2935  if (!codeptr)
2936  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2937  if (ompt_enabled.enabled) {
2938  if (release_status == KMP_LOCK_RELEASED) {
2939  if (ompt_enabled.ompt_callback_mutex_released) {
2940  // release_lock_last
2941  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2943  }
2944  } else if (ompt_enabled.ompt_callback_nest_lock) {
2945  // release_lock_previous
2946  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2947  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2948  }
2949  }
2950 #endif
2951 
2952 #endif // KMP_USE_DYNAMIC_LOCK
2953 }
2954 
2955 /* try to acquire the lock */
2956 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2957  KMP_COUNT_BLOCK(OMP_test_lock);
2958 
2959 #if KMP_USE_DYNAMIC_LOCK
2960  int rc;
2961  int tag = KMP_EXTRACT_D_TAG(user_lock);
2962 #if USE_ITT_BUILD
2963  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2964 #endif
2965 #if OMPT_SUPPORT && OMPT_OPTIONAL
2966  // This is the case, if called from omp_init_lock_with_hint:
2967  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2968  if (!codeptr)
2969  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2970  if (ompt_enabled.ompt_callback_mutex_acquire) {
2971  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2972  ompt_mutex_lock, omp_lock_hint_none,
2973  __ompt_get_mutex_impl_type(user_lock),
2974  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2975  }
2976 #endif
2977 #if KMP_USE_INLINED_TAS
2978  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2979  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2980  } else
2981 #elif KMP_USE_INLINED_FUTEX
2982  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2983  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2984  } else
2985 #endif
2986  {
2987  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2988  }
2989  if (rc) {
2990 #if USE_ITT_BUILD
2991  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2992 #endif
2993 #if OMPT_SUPPORT && OMPT_OPTIONAL
2994  if (ompt_enabled.ompt_callback_mutex_acquired) {
2995  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2996  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2997  }
2998 #endif
2999  return FTN_TRUE;
3000  } else {
3001 #if USE_ITT_BUILD
3002  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3003 #endif
3004  return FTN_FALSE;
3005  }
3006 
3007 #else // KMP_USE_DYNAMIC_LOCK
3008 
3009  kmp_user_lock_p lck;
3010  int rc;
3011 
3012  if ((__kmp_user_lock_kind == lk_tas) &&
3013  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3014  lck = (kmp_user_lock_p)user_lock;
3015  }
3016 #if KMP_USE_FUTEX
3017  else if ((__kmp_user_lock_kind == lk_futex) &&
3018  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3019  lck = (kmp_user_lock_p)user_lock;
3020  }
3021 #endif
3022  else {
3023  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3024  }
3025 
3026 #if USE_ITT_BUILD
3027  __kmp_itt_lock_acquiring(lck);
3028 #endif /* USE_ITT_BUILD */
3029 #if OMPT_SUPPORT && OMPT_OPTIONAL
3030  // This is the case, if called from omp_init_lock_with_hint:
3031  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3032  if (!codeptr)
3033  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034  if (ompt_enabled.ompt_callback_mutex_acquire) {
3035  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3036  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3037  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3038  }
3039 #endif
3040 
3041  rc = TEST_LOCK(lck, gtid);
3042 #if USE_ITT_BUILD
3043  if (rc) {
3044  __kmp_itt_lock_acquired(lck);
3045  } else {
3046  __kmp_itt_lock_cancelled(lck);
3047  }
3048 #endif /* USE_ITT_BUILD */
3049 #if OMPT_SUPPORT && OMPT_OPTIONAL
3050  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3051  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3052  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3053  }
3054 #endif
3055 
3056  return (rc ? FTN_TRUE : FTN_FALSE);
3057 
3058 /* Can't use serial interval since not block structured */
3059 
3060 #endif // KMP_USE_DYNAMIC_LOCK
3061 }
3062 
3063 /* try to acquire the lock */
3064 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3065 #if KMP_USE_DYNAMIC_LOCK
3066  int rc;
3067 #if USE_ITT_BUILD
3068  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3069 #endif
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3071  // This is the case, if called from omp_init_lock_with_hint:
3072  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3073  if (!codeptr)
3074  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3075  if (ompt_enabled.ompt_callback_mutex_acquire) {
3076  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3077  ompt_mutex_nest_lock, omp_lock_hint_none,
3078  __ompt_get_mutex_impl_type(user_lock),
3079  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3080  }
3081 #endif
3082  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3083 #if USE_ITT_BUILD
3084  if (rc) {
3085  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3086  } else {
3087  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3088  }
3089 #endif
3090 #if OMPT_SUPPORT && OMPT_OPTIONAL
3091  if (ompt_enabled.enabled && rc) {
3092  if (rc == 1) {
3093  if (ompt_enabled.ompt_callback_mutex_acquired) {
3094  // lock_first
3095  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3096  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3097  codeptr);
3098  }
3099  } else {
3100  if (ompt_enabled.ompt_callback_nest_lock) {
3101  // lock_next
3102  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3103  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3104  }
3105  }
3106  }
3107 #endif
3108  return rc;
3109 
3110 #else // KMP_USE_DYNAMIC_LOCK
3111 
3112  kmp_user_lock_p lck;
3113  int rc;
3114 
3115  if ((__kmp_user_lock_kind == lk_tas) &&
3116  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3117  OMP_NEST_LOCK_T_SIZE)) {
3118  lck = (kmp_user_lock_p)user_lock;
3119  }
3120 #if KMP_USE_FUTEX
3121  else if ((__kmp_user_lock_kind == lk_futex) &&
3122  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3123  OMP_NEST_LOCK_T_SIZE)) {
3124  lck = (kmp_user_lock_p)user_lock;
3125  }
3126 #endif
3127  else {
3128  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3129  }
3130 
3131 #if USE_ITT_BUILD
3132  __kmp_itt_lock_acquiring(lck);
3133 #endif /* USE_ITT_BUILD */
3134 
3135 #if OMPT_SUPPORT && OMPT_OPTIONAL
3136  // This is the case, if called from omp_init_lock_with_hint:
3137  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3138  if (!codeptr)
3139  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3140  if (ompt_enabled.enabled) &&
3141  ompt_enabled.ompt_callback_mutex_acquire) {
3142  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3143  ompt_mutex_nest_lock, omp_lock_hint_none,
3144  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3145  codeptr);
3146  }
3147 #endif
3148 
3149  rc = TEST_NESTED_LOCK(lck, gtid);
3150 #if USE_ITT_BUILD
3151  if (rc) {
3152  __kmp_itt_lock_acquired(lck);
3153  } else {
3154  __kmp_itt_lock_cancelled(lck);
3155  }
3156 #endif /* USE_ITT_BUILD */
3157 #if OMPT_SUPPORT && OMPT_OPTIONAL
3158  if (ompt_enabled.enabled && rc) {
3159  if (rc == 1) {
3160  if (ompt_enabled.ompt_callback_mutex_acquired) {
3161  // lock_first
3162  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3163  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3164  }
3165  } else {
3166  if (ompt_enabled.ompt_callback_nest_lock) {
3167  // lock_next
3168  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3169  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3170  }
3171  }
3172  }
3173 #endif
3174  return rc;
3175 
3176 /* Can't use serial interval since not block structured */
3177 
3178 #endif // KMP_USE_DYNAMIC_LOCK
3179 }
3180 
3181 // Interface to fast scalable reduce methods routines
3182 
3183 // keep the selected method in a thread local structure for cross-function
3184 // usage: will be used in __kmpc_end_reduce* functions;
3185 // another solution: to re-determine the method one more time in
3186 // __kmpc_end_reduce* functions (new prototype required then)
3187 // AT: which solution is better?
3188 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3189  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3190 
3191 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3192  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3193 
3194 // description of the packed_reduction_method variable: look at the macros in
3195 // kmp.h
3196 
3197 // used in a critical section reduce block
3198 static __forceinline void
3199 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3200  kmp_critical_name *crit) {
3201 
3202  // this lock was visible to a customer and to the threading profile tool as a
3203  // serial overhead span (although it's used for an internal purpose only)
3204  // why was it visible in previous implementation?
3205  // should we keep it visible in new reduce block?
3206  kmp_user_lock_p lck;
3207 
3208 #if KMP_USE_DYNAMIC_LOCK
3209 
3210  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3211  // Check if it is initialized.
3212  if (*lk == 0) {
3213  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3214  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3215  KMP_GET_D_TAG(__kmp_user_lock_seq));
3216  } else {
3217  __kmp_init_indirect_csptr(crit, loc, global_tid,
3218  KMP_GET_I_TAG(__kmp_user_lock_seq));
3219  }
3220  }
3221  // Branch for accessing the actual lock object and set operation. This
3222  // branching is inevitable since this lock initialization does not follow the
3223  // normal dispatch path (lock table is not used).
3224  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3225  lck = (kmp_user_lock_p)lk;
3226  KMP_DEBUG_ASSERT(lck != NULL);
3227  if (__kmp_env_consistency_check) {
3228  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3229  }
3230  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3231  } else {
3232  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3233  lck = ilk->lock;
3234  KMP_DEBUG_ASSERT(lck != NULL);
3235  if (__kmp_env_consistency_check) {
3236  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3237  }
3238  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3239  }
3240 
3241 #else // KMP_USE_DYNAMIC_LOCK
3242 
3243  // We know that the fast reduction code is only emitted by Intel compilers
3244  // with 32 byte critical sections. If there isn't enough space, then we
3245  // have to use a pointer.
3246  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3247  lck = (kmp_user_lock_p)crit;
3248  } else {
3249  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3250  }
3251  KMP_DEBUG_ASSERT(lck != NULL);
3252 
3253  if (__kmp_env_consistency_check)
3254  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3255 
3256  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3257 
3258 #endif // KMP_USE_DYNAMIC_LOCK
3259 }
3260 
3261 // used in a critical section reduce block
3262 static __forceinline void
3263 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3264  kmp_critical_name *crit) {
3265 
3266  kmp_user_lock_p lck;
3267 
3268 #if KMP_USE_DYNAMIC_LOCK
3269 
3270  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3271  lck = (kmp_user_lock_p)crit;
3272  if (__kmp_env_consistency_check)
3273  __kmp_pop_sync(global_tid, ct_critical, loc);
3274  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3275  } else {
3276  kmp_indirect_lock_t *ilk =
3277  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3278  if (__kmp_env_consistency_check)
3279  __kmp_pop_sync(global_tid, ct_critical, loc);
3280  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3281  }
3282 
3283 #else // KMP_USE_DYNAMIC_LOCK
3284 
3285  // We know that the fast reduction code is only emitted by Intel compilers
3286  // with 32 byte critical sections. If there isn't enough space, then we have
3287  // to use a pointer.
3288  if (__kmp_base_user_lock_size > 32) {
3289  lck = *((kmp_user_lock_p *)crit);
3290  KMP_ASSERT(lck != NULL);
3291  } else {
3292  lck = (kmp_user_lock_p)crit;
3293  }
3294 
3295  if (__kmp_env_consistency_check)
3296  __kmp_pop_sync(global_tid, ct_critical, loc);
3297 
3298  __kmp_release_user_lock_with_checks(lck, global_tid);
3299 
3300 #endif // KMP_USE_DYNAMIC_LOCK
3301 } // __kmp_end_critical_section_reduce_block
3302 
3303 static __forceinline int
3304 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3305  int *task_state) {
3306  kmp_team_t *team;
3307 
3308  // Check if we are inside the teams construct?
3309  if (th->th.th_teams_microtask) {
3310  *team_p = team = th->th.th_team;
3311  if (team->t.t_level == th->th.th_teams_level) {
3312  // This is reduction at teams construct.
3313  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3314  // Let's swap teams temporarily for the reduction.
3315  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3316  th->th.th_team = team->t.t_parent;
3317  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3318  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3319  *task_state = th->th.th_task_state;
3320  th->th.th_task_state = 0;
3321 
3322  return 1;
3323  }
3324  }
3325  return 0;
3326 }
3327 
3328 static __forceinline void
3329 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3330  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3331  th->th.th_info.ds.ds_tid = 0;
3332  th->th.th_team = team;
3333  th->th.th_team_nproc = team->t.t_nproc;
3334  th->th.th_task_team = team->t.t_task_team[task_state];
3335  __kmp_type_convert(task_state, &(th->th.th_task_state));
3336 }
3337 
3338 /* 2.a.i. Reduce Block without a terminating barrier */
3354 kmp_int32
3355 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3356  size_t reduce_size, void *reduce_data,
3357  void (*reduce_func)(void *lhs_data, void *rhs_data),
3358  kmp_critical_name *lck) {
3359 
3360  KMP_COUNT_BLOCK(REDUCE_nowait);
3361  int retval = 0;
3362  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3363  kmp_info_t *th;
3364  kmp_team_t *team;
3365  int teams_swapped = 0, task_state;
3366  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3367  __kmp_assert_valid_gtid(global_tid);
3368 
3369  // why do we need this initialization here at all?
3370  // Reduction clause can not be used as a stand-alone directive.
3371 
3372  // do not call __kmp_serial_initialize(), it will be called by
3373  // __kmp_parallel_initialize() if needed
3374  // possible detection of false-positive race by the threadchecker ???
3375  if (!TCR_4(__kmp_init_parallel))
3376  __kmp_parallel_initialize();
3377 
3378  __kmp_resume_if_soft_paused();
3379 
3380 // check correctness of reduce block nesting
3381 #if KMP_USE_DYNAMIC_LOCK
3382  if (__kmp_env_consistency_check)
3383  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3384 #else
3385  if (__kmp_env_consistency_check)
3386  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3387 #endif
3388 
3389  th = __kmp_thread_from_gtid(global_tid);
3390  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3391 
3392  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3393  // the value should be kept in a variable
3394  // the variable should be either a construct-specific or thread-specific
3395  // property, not a team specific property
3396  // (a thread can reach the next reduce block on the next construct, reduce
3397  // method may differ on the next construct)
3398  // an ident_t "loc" parameter could be used as a construct-specific property
3399  // (what if loc == 0?)
3400  // (if both construct-specific and team-specific variables were shared,
3401  // then unness extra syncs should be needed)
3402  // a thread-specific variable is better regarding two issues above (next
3403  // construct and extra syncs)
3404  // a thread-specific "th_local.reduction_method" variable is used currently
3405  // each thread executes 'determine' and 'set' lines (no need to execute by one
3406  // thread, to avoid unness extra syncs)
3407 
3408  packed_reduction_method = __kmp_determine_reduction_method(
3409  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3410  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3411 
3412  OMPT_REDUCTION_DECL(th, global_tid);
3413  if (packed_reduction_method == critical_reduce_block) {
3414 
3415  OMPT_REDUCTION_BEGIN;
3416 
3417  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3418  retval = 1;
3419 
3420  } else if (packed_reduction_method == empty_reduce_block) {
3421 
3422  OMPT_REDUCTION_BEGIN;
3423 
3424  // usage: if team size == 1, no synchronization is required ( Intel
3425  // platforms only )
3426  retval = 1;
3427 
3428  } else if (packed_reduction_method == atomic_reduce_block) {
3429 
3430  retval = 2;
3431 
3432  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3433  // won't be called by the code gen)
3434  // (it's not quite good, because the checking block has been closed by
3435  // this 'pop',
3436  // but atomic operation has not been executed yet, will be executed
3437  // slightly later, literally on next instruction)
3438  if (__kmp_env_consistency_check)
3439  __kmp_pop_sync(global_tid, ct_reduce, loc);
3440 
3441  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3442  tree_reduce_block)) {
3443 
3444 // AT: performance issue: a real barrier here
3445 // AT: (if master goes slow, other threads are blocked here waiting for the
3446 // master to come and release them)
3447 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3448 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3449 // be confusing to a customer)
3450 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3451 // might go faster and be more in line with sense of NOWAIT
3452 // AT: TO DO: do epcc test and compare times
3453 
3454 // this barrier should be invisible to a customer and to the threading profile
3455 // tool (it's neither a terminating barrier nor customer's code, it's
3456 // used for an internal purpose)
3457 #if OMPT_SUPPORT
3458  // JP: can this barrier potentially leed to task scheduling?
3459  // JP: as long as there is a barrier in the implementation, OMPT should and
3460  // will provide the barrier events
3461  // so we set-up the necessary frame/return addresses.
3462  ompt_frame_t *ompt_frame;
3463  if (ompt_enabled.enabled) {
3464  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3465  if (ompt_frame->enter_frame.ptr == NULL)
3466  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3467  }
3468  OMPT_STORE_RETURN_ADDRESS(global_tid);
3469 #endif
3470 #if USE_ITT_NOTIFY
3471  __kmp_threads[global_tid]->th.th_ident = loc;
3472 #endif
3473  retval =
3474  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3475  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3476  retval = (retval != 0) ? (0) : (1);
3477 #if OMPT_SUPPORT && OMPT_OPTIONAL
3478  if (ompt_enabled.enabled) {
3479  ompt_frame->enter_frame = ompt_data_none;
3480  }
3481 #endif
3482 
3483  // all other workers except master should do this pop here
3484  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3485  if (__kmp_env_consistency_check) {
3486  if (retval == 0) {
3487  __kmp_pop_sync(global_tid, ct_reduce, loc);
3488  }
3489  }
3490 
3491  } else {
3492 
3493  // should never reach this block
3494  KMP_ASSERT(0); // "unexpected method"
3495  }
3496  if (teams_swapped) {
3497  __kmp_restore_swapped_teams(th, team, task_state);
3498  }
3499  KA_TRACE(
3500  10,
3501  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3502  global_tid, packed_reduction_method, retval));
3503 
3504  return retval;
3505 }
3506 
3515 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3516  kmp_critical_name *lck) {
3517 
3518  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3519 
3520  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3521  __kmp_assert_valid_gtid(global_tid);
3522 
3523  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3524 
3525  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3526 
3527  if (packed_reduction_method == critical_reduce_block) {
3528 
3529  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3530  OMPT_REDUCTION_END;
3531 
3532  } else if (packed_reduction_method == empty_reduce_block) {
3533 
3534  // usage: if team size == 1, no synchronization is required ( on Intel
3535  // platforms only )
3536 
3537  OMPT_REDUCTION_END;
3538 
3539  } else if (packed_reduction_method == atomic_reduce_block) {
3540 
3541  // neither master nor other workers should get here
3542  // (code gen does not generate this call in case 2: atomic reduce block)
3543  // actually it's better to remove this elseif at all;
3544  // after removal this value will checked by the 'else' and will assert
3545 
3546  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3547  tree_reduce_block)) {
3548 
3549  // only master gets here
3550  // OMPT: tree reduction is annotated in the barrier code
3551 
3552  } else {
3553 
3554  // should never reach this block
3555  KMP_ASSERT(0); // "unexpected method"
3556  }
3557 
3558  if (__kmp_env_consistency_check)
3559  __kmp_pop_sync(global_tid, ct_reduce, loc);
3560 
3561  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3562  global_tid, packed_reduction_method));
3563 
3564  return;
3565 }
3566 
3567 /* 2.a.ii. Reduce Block with a terminating barrier */
3568 
3584 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3585  size_t reduce_size, void *reduce_data,
3586  void (*reduce_func)(void *lhs_data, void *rhs_data),
3587  kmp_critical_name *lck) {
3588  KMP_COUNT_BLOCK(REDUCE_wait);
3589  int retval = 0;
3590  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3591  kmp_info_t *th;
3592  kmp_team_t *team;
3593  int teams_swapped = 0, task_state;
3594 
3595  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3596  __kmp_assert_valid_gtid(global_tid);
3597 
3598  // why do we need this initialization here at all?
3599  // Reduction clause can not be a stand-alone directive.
3600 
3601  // do not call __kmp_serial_initialize(), it will be called by
3602  // __kmp_parallel_initialize() if needed
3603  // possible detection of false-positive race by the threadchecker ???
3604  if (!TCR_4(__kmp_init_parallel))
3605  __kmp_parallel_initialize();
3606 
3607  __kmp_resume_if_soft_paused();
3608 
3609 // check correctness of reduce block nesting
3610 #if KMP_USE_DYNAMIC_LOCK
3611  if (__kmp_env_consistency_check)
3612  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3613 #else
3614  if (__kmp_env_consistency_check)
3615  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3616 #endif
3617 
3618  th = __kmp_thread_from_gtid(global_tid);
3619  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3620 
3621  packed_reduction_method = __kmp_determine_reduction_method(
3622  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3623  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3624 
3625  OMPT_REDUCTION_DECL(th, global_tid);
3626 
3627  if (packed_reduction_method == critical_reduce_block) {
3628 
3629  OMPT_REDUCTION_BEGIN;
3630  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3631  retval = 1;
3632 
3633  } else if (packed_reduction_method == empty_reduce_block) {
3634 
3635  OMPT_REDUCTION_BEGIN;
3636  // usage: if team size == 1, no synchronization is required ( Intel
3637  // platforms only )
3638  retval = 1;
3639 
3640  } else if (packed_reduction_method == atomic_reduce_block) {
3641 
3642  retval = 2;
3643 
3644  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3645  tree_reduce_block)) {
3646 
3647 // case tree_reduce_block:
3648 // this barrier should be visible to a customer and to the threading profile
3649 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3650 #if OMPT_SUPPORT
3651  ompt_frame_t *ompt_frame;
3652  if (ompt_enabled.enabled) {
3653  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3654  if (ompt_frame->enter_frame.ptr == NULL)
3655  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3656  }
3657  OMPT_STORE_RETURN_ADDRESS(global_tid);
3658 #endif
3659 #if USE_ITT_NOTIFY
3660  __kmp_threads[global_tid]->th.th_ident =
3661  loc; // needed for correct notification of frames
3662 #endif
3663  retval =
3664  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3665  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3666  retval = (retval != 0) ? (0) : (1);
3667 #if OMPT_SUPPORT && OMPT_OPTIONAL
3668  if (ompt_enabled.enabled) {
3669  ompt_frame->enter_frame = ompt_data_none;
3670  }
3671 #endif
3672 
3673  // all other workers except master should do this pop here
3674  // ( none of other workers except master will enter __kmpc_end_reduce() )
3675  if (__kmp_env_consistency_check) {
3676  if (retval == 0) { // 0: all other workers; 1: master
3677  __kmp_pop_sync(global_tid, ct_reduce, loc);
3678  }
3679  }
3680 
3681  } else {
3682 
3683  // should never reach this block
3684  KMP_ASSERT(0); // "unexpected method"
3685  }
3686  if (teams_swapped) {
3687  __kmp_restore_swapped_teams(th, team, task_state);
3688  }
3689 
3690  KA_TRACE(10,
3691  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3692  global_tid, packed_reduction_method, retval));
3693  return retval;
3694 }
3695 
3706 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3707  kmp_critical_name *lck) {
3708 
3709  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3710  kmp_info_t *th;
3711  kmp_team_t *team;
3712  int teams_swapped = 0, task_state;
3713 
3714  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3715  __kmp_assert_valid_gtid(global_tid);
3716 
3717  th = __kmp_thread_from_gtid(global_tid);
3718  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3719 
3720  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3721 
3722  // this barrier should be visible to a customer and to the threading profile
3723  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3724  OMPT_REDUCTION_DECL(th, global_tid);
3725 
3726  if (packed_reduction_method == critical_reduce_block) {
3727  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3728 
3729  OMPT_REDUCTION_END;
3730 
3731 // TODO: implicit barrier: should be exposed
3732 #if OMPT_SUPPORT
3733  ompt_frame_t *ompt_frame;
3734  if (ompt_enabled.enabled) {
3735  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3736  if (ompt_frame->enter_frame.ptr == NULL)
3737  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3738  }
3739  OMPT_STORE_RETURN_ADDRESS(global_tid);
3740 #endif
3741 #if USE_ITT_NOTIFY
3742  __kmp_threads[global_tid]->th.th_ident = loc;
3743 #endif
3744  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3745 #if OMPT_SUPPORT && OMPT_OPTIONAL
3746  if (ompt_enabled.enabled) {
3747  ompt_frame->enter_frame = ompt_data_none;
3748  }
3749 #endif
3750 
3751  } else if (packed_reduction_method == empty_reduce_block) {
3752 
3753  OMPT_REDUCTION_END;
3754 
3755 // usage: if team size==1, no synchronization is required (Intel platforms only)
3756 
3757 // TODO: implicit barrier: should be exposed
3758 #if OMPT_SUPPORT
3759  ompt_frame_t *ompt_frame;
3760  if (ompt_enabled.enabled) {
3761  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3762  if (ompt_frame->enter_frame.ptr == NULL)
3763  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3764  }
3765  OMPT_STORE_RETURN_ADDRESS(global_tid);
3766 #endif
3767 #if USE_ITT_NOTIFY
3768  __kmp_threads[global_tid]->th.th_ident = loc;
3769 #endif
3770  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3771 #if OMPT_SUPPORT && OMPT_OPTIONAL
3772  if (ompt_enabled.enabled) {
3773  ompt_frame->enter_frame = ompt_data_none;
3774  }
3775 #endif
3776 
3777  } else if (packed_reduction_method == atomic_reduce_block) {
3778 
3779 #if OMPT_SUPPORT
3780  ompt_frame_t *ompt_frame;
3781  if (ompt_enabled.enabled) {
3782  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3783  if (ompt_frame->enter_frame.ptr == NULL)
3784  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3785  }
3786  OMPT_STORE_RETURN_ADDRESS(global_tid);
3787 #endif
3788 // TODO: implicit barrier: should be exposed
3789 #if USE_ITT_NOTIFY
3790  __kmp_threads[global_tid]->th.th_ident = loc;
3791 #endif
3792  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3793 #if OMPT_SUPPORT && OMPT_OPTIONAL
3794  if (ompt_enabled.enabled) {
3795  ompt_frame->enter_frame = ompt_data_none;
3796  }
3797 #endif
3798 
3799  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3800  tree_reduce_block)) {
3801 
3802  // only master executes here (master releases all other workers)
3803  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3804  global_tid);
3805 
3806  } else {
3807 
3808  // should never reach this block
3809  KMP_ASSERT(0); // "unexpected method"
3810  }
3811  if (teams_swapped) {
3812  __kmp_restore_swapped_teams(th, team, task_state);
3813  }
3814 
3815  if (__kmp_env_consistency_check)
3816  __kmp_pop_sync(global_tid, ct_reduce, loc);
3817 
3818  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3819  global_tid, packed_reduction_method));
3820 
3821  return;
3822 }
3823 
3824 #undef __KMP_GET_REDUCTION_METHOD
3825 #undef __KMP_SET_REDUCTION_METHOD
3826 
3827 /* end of interface to fast scalable reduce routines */
3828 
3829 kmp_uint64 __kmpc_get_taskid() {
3830 
3831  kmp_int32 gtid;
3832  kmp_info_t *thread;
3833 
3834  gtid = __kmp_get_gtid();
3835  if (gtid < 0) {
3836  return 0;
3837  }
3838  thread = __kmp_thread_from_gtid(gtid);
3839  return thread->th.th_current_task->td_task_id;
3840 
3841 } // __kmpc_get_taskid
3842 
3843 kmp_uint64 __kmpc_get_parent_taskid() {
3844 
3845  kmp_int32 gtid;
3846  kmp_info_t *thread;
3847  kmp_taskdata_t *parent_task;
3848 
3849  gtid = __kmp_get_gtid();
3850  if (gtid < 0) {
3851  return 0;
3852  }
3853  thread = __kmp_thread_from_gtid(gtid);
3854  parent_task = thread->th.th_current_task->td_parent;
3855  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3856 
3857 } // __kmpc_get_parent_taskid
3858 
3870 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3871  const struct kmp_dim *dims) {
3872  __kmp_assert_valid_gtid(gtid);
3873  int j, idx;
3874  kmp_int64 last, trace_count;
3875  kmp_info_t *th = __kmp_threads[gtid];
3876  kmp_team_t *team = th->th.th_team;
3877  kmp_uint32 *flags;
3878  kmp_disp_t *pr_buf = th->th.th_dispatch;
3879  dispatch_shared_info_t *sh_buf;
3880 
3881  KA_TRACE(
3882  20,
3883  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3884  gtid, num_dims, !team->t.t_serialized));
3885  KMP_DEBUG_ASSERT(dims != NULL);
3886  KMP_DEBUG_ASSERT(num_dims > 0);
3887 
3888  if (team->t.t_serialized) {
3889  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3890  return; // no dependencies if team is serialized
3891  }
3892  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3893  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3894  // the next loop
3895  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3896 
3897  // Save bounds info into allocated private buffer
3898  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3899  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3900  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3901  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3902  pr_buf->th_doacross_info[0] =
3903  (kmp_int64)num_dims; // first element is number of dimensions
3904  // Save also address of num_done in order to access it later without knowing
3905  // the buffer index
3906  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3907  pr_buf->th_doacross_info[2] = dims[0].lo;
3908  pr_buf->th_doacross_info[3] = dims[0].up;
3909  pr_buf->th_doacross_info[4] = dims[0].st;
3910  last = 5;
3911  for (j = 1; j < num_dims; ++j) {
3912  kmp_int64
3913  range_length; // To keep ranges of all dimensions but the first dims[0]
3914  if (dims[j].st == 1) { // most common case
3915  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3916  range_length = dims[j].up - dims[j].lo + 1;
3917  } else {
3918  if (dims[j].st > 0) {
3919  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3920  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3921  } else { // negative increment
3922  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3923  range_length =
3924  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3925  }
3926  }
3927  pr_buf->th_doacross_info[last++] = range_length;
3928  pr_buf->th_doacross_info[last++] = dims[j].lo;
3929  pr_buf->th_doacross_info[last++] = dims[j].up;
3930  pr_buf->th_doacross_info[last++] = dims[j].st;
3931  }
3932 
3933  // Compute total trip count.
3934  // Start with range of dims[0] which we don't need to keep in the buffer.
3935  if (dims[0].st == 1) { // most common case
3936  trace_count = dims[0].up - dims[0].lo + 1;
3937  } else if (dims[0].st > 0) {
3938  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3939  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3940  } else { // negative increment
3941  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3942  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3943  }
3944  for (j = 1; j < num_dims; ++j) {
3945  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3946  }
3947  KMP_DEBUG_ASSERT(trace_count > 0);
3948 
3949  // Check if shared buffer is not occupied by other loop (idx -
3950  // __kmp_dispatch_num_buffers)
3951  if (idx != sh_buf->doacross_buf_idx) {
3952  // Shared buffer is occupied, wait for it to be free
3953  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3954  __kmp_eq_4, NULL);
3955  }
3956 #if KMP_32_BIT_ARCH
3957  // Check if we are the first thread. After the CAS the first thread gets 0,
3958  // others get 1 if initialization is in progress, allocated pointer otherwise.
3959  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3960  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3961  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3962 #else
3963  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3964  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3965 #endif
3966  if (flags == NULL) {
3967  // we are the first thread, allocate the array of flags
3968  size_t size =
3969  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
3970  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3971  KMP_MB();
3972  sh_buf->doacross_flags = flags;
3973  } else if (flags == (kmp_uint32 *)1) {
3974 #if KMP_32_BIT_ARCH
3975  // initialization is still in progress, need to wait
3976  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3977 #else
3978  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3979 #endif
3980  KMP_YIELD(TRUE);
3981  KMP_MB();
3982  } else {
3983  KMP_MB();
3984  }
3985  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3986  pr_buf->th_doacross_flags =
3987  sh_buf->doacross_flags; // save private copy in order to not
3988  // touch shared buffer on each iteration
3989  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3990 }
3991 
3992 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3993  __kmp_assert_valid_gtid(gtid);
3994  kmp_int64 shft;
3995  size_t num_dims, i;
3996  kmp_uint32 flag;
3997  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3998  kmp_info_t *th = __kmp_threads[gtid];
3999  kmp_team_t *team = th->th.th_team;
4000  kmp_disp_t *pr_buf;
4001  kmp_int64 lo, up, st;
4002 
4003  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4004  if (team->t.t_serialized) {
4005  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4006  return; // no dependencies if team is serialized
4007  }
4008 
4009  // calculate sequential iteration number and check out-of-bounds condition
4010  pr_buf = th->th.th_dispatch;
4011  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4012  num_dims = (size_t)pr_buf->th_doacross_info[0];
4013  lo = pr_buf->th_doacross_info[2];
4014  up = pr_buf->th_doacross_info[3];
4015  st = pr_buf->th_doacross_info[4];
4016 #if OMPT_SUPPORT && OMPT_OPTIONAL
4017  ompt_dependence_t deps[num_dims];
4018 #endif
4019  if (st == 1) { // most common case
4020  if (vec[0] < lo || vec[0] > up) {
4021  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4022  "bounds [%lld,%lld]\n",
4023  gtid, vec[0], lo, up));
4024  return;
4025  }
4026  iter_number = vec[0] - lo;
4027  } else if (st > 0) {
4028  if (vec[0] < lo || vec[0] > up) {
4029  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4030  "bounds [%lld,%lld]\n",
4031  gtid, vec[0], lo, up));
4032  return;
4033  }
4034  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4035  } else { // negative increment
4036  if (vec[0] > lo || vec[0] < up) {
4037  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4038  "bounds [%lld,%lld]\n",
4039  gtid, vec[0], lo, up));
4040  return;
4041  }
4042  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4043  }
4044 #if OMPT_SUPPORT && OMPT_OPTIONAL
4045  deps[0].variable.value = iter_number;
4046  deps[0].dependence_type = ompt_dependence_type_sink;
4047 #endif
4048  for (i = 1; i < num_dims; ++i) {
4049  kmp_int64 iter, ln;
4050  size_t j = i * 4;
4051  ln = pr_buf->th_doacross_info[j + 1];
4052  lo = pr_buf->th_doacross_info[j + 2];
4053  up = pr_buf->th_doacross_info[j + 3];
4054  st = pr_buf->th_doacross_info[j + 4];
4055  if (st == 1) {
4056  if (vec[i] < lo || vec[i] > up) {
4057  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058  "bounds [%lld,%lld]\n",
4059  gtid, vec[i], lo, up));
4060  return;
4061  }
4062  iter = vec[i] - lo;
4063  } else if (st > 0) {
4064  if (vec[i] < lo || vec[i] > up) {
4065  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4066  "bounds [%lld,%lld]\n",
4067  gtid, vec[i], lo, up));
4068  return;
4069  }
4070  iter = (kmp_uint64)(vec[i] - lo) / st;
4071  } else { // st < 0
4072  if (vec[i] > lo || vec[i] < up) {
4073  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4074  "bounds [%lld,%lld]\n",
4075  gtid, vec[i], lo, up));
4076  return;
4077  }
4078  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4079  }
4080  iter_number = iter + ln * iter_number;
4081 #if OMPT_SUPPORT && OMPT_OPTIONAL
4082  deps[i].variable.value = iter;
4083  deps[i].dependence_type = ompt_dependence_type_sink;
4084 #endif
4085  }
4086  shft = iter_number % 32; // use 32-bit granularity
4087  iter_number >>= 5; // divided by 32
4088  flag = 1 << shft;
4089  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4090  KMP_YIELD(TRUE);
4091  }
4092  KMP_MB();
4093 #if OMPT_SUPPORT && OMPT_OPTIONAL
4094  if (ompt_enabled.ompt_callback_dependences) {
4095  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4096  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4097  }
4098 #endif
4099  KA_TRACE(20,
4100  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4101  gtid, (iter_number << 5) + shft));
4102 }
4103 
4104 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4105  __kmp_assert_valid_gtid(gtid);
4106  kmp_int64 shft;
4107  size_t num_dims, i;
4108  kmp_uint32 flag;
4109  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4110  kmp_info_t *th = __kmp_threads[gtid];
4111  kmp_team_t *team = th->th.th_team;
4112  kmp_disp_t *pr_buf;
4113  kmp_int64 lo, st;
4114 
4115  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4116  if (team->t.t_serialized) {
4117  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4118  return; // no dependencies if team is serialized
4119  }
4120 
4121  // calculate sequential iteration number (same as in "wait" but no
4122  // out-of-bounds checks)
4123  pr_buf = th->th.th_dispatch;
4124  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4125  num_dims = (size_t)pr_buf->th_doacross_info[0];
4126  lo = pr_buf->th_doacross_info[2];
4127  st = pr_buf->th_doacross_info[4];
4128 #if OMPT_SUPPORT && OMPT_OPTIONAL
4129  ompt_dependence_t deps[num_dims];
4130 #endif
4131  if (st == 1) { // most common case
4132  iter_number = vec[0] - lo;
4133  } else if (st > 0) {
4134  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4135  } else { // negative increment
4136  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4137  }
4138 #if OMPT_SUPPORT && OMPT_OPTIONAL
4139  deps[0].variable.value = iter_number;
4140  deps[0].dependence_type = ompt_dependence_type_source;
4141 #endif
4142  for (i = 1; i < num_dims; ++i) {
4143  kmp_int64 iter, ln;
4144  size_t j = i * 4;
4145  ln = pr_buf->th_doacross_info[j + 1];
4146  lo = pr_buf->th_doacross_info[j + 2];
4147  st = pr_buf->th_doacross_info[j + 4];
4148  if (st == 1) {
4149  iter = vec[i] - lo;
4150  } else if (st > 0) {
4151  iter = (kmp_uint64)(vec[i] - lo) / st;
4152  } else { // st < 0
4153  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4154  }
4155  iter_number = iter + ln * iter_number;
4156 #if OMPT_SUPPORT && OMPT_OPTIONAL
4157  deps[i].variable.value = iter;
4158  deps[i].dependence_type = ompt_dependence_type_source;
4159 #endif
4160  }
4161 #if OMPT_SUPPORT && OMPT_OPTIONAL
4162  if (ompt_enabled.ompt_callback_dependences) {
4163  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4164  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4165  }
4166 #endif
4167  shft = iter_number % 32; // use 32-bit granularity
4168  iter_number >>= 5; // divided by 32
4169  flag = 1 << shft;
4170  KMP_MB();
4171  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4172  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4173  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4174  (iter_number << 5) + shft));
4175 }
4176 
4177 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4178  __kmp_assert_valid_gtid(gtid);
4179  kmp_int32 num_done;
4180  kmp_info_t *th = __kmp_threads[gtid];
4181  kmp_team_t *team = th->th.th_team;
4182  kmp_disp_t *pr_buf = th->th.th_dispatch;
4183 
4184  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4185  if (team->t.t_serialized) {
4186  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4187  return; // nothing to do
4188  }
4189  num_done =
4190  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4191  if (num_done == th->th.th_team_nproc) {
4192  // we are the last thread, need to free shared resources
4193  int idx = pr_buf->th_doacross_buf_idx - 1;
4194  dispatch_shared_info_t *sh_buf =
4195  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4196  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4197  (kmp_int64)&sh_buf->doacross_num_done);
4198  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4199  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4200  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4201  sh_buf->doacross_flags = NULL;
4202  sh_buf->doacross_num_done = 0;
4203  sh_buf->doacross_buf_idx +=
4204  __kmp_dispatch_num_buffers; // free buffer for future re-use
4205  }
4206  // free private resources (need to keep buffer index forever)
4207  pr_buf->th_doacross_flags = NULL;
4208  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4209  pr_buf->th_doacross_info = NULL;
4210  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4211 }
4212 
4213 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */
4214 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4215  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4216 }
4217 
4218 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4219  return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4220 }
4221 
4222 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4223  omp_allocator_handle_t free_allocator) {
4224  return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4225  free_allocator);
4226 }
4227 
4228 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4229  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4230 }
4231 
4232 int __kmpc_get_target_offload(void) {
4233  if (!__kmp_init_serial) {
4234  __kmp_serial_initialize();
4235  }
4236  return __kmp_target_offload;
4237 }
4238 
4239 int __kmpc_pause_resource(kmp_pause_status_t level) {
4240  if (!__kmp_init_serial) {
4241  return 1; // Can't pause if runtime is not initialized
4242  }
4243  return __kmp_pause_resource(level);
4244 }
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:209
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:211
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:194
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:213
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1494
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:901
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:229
char const * psource
Definition: kmp.h:239
kmp_int32 flags
Definition: kmp.h:231