LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43  { \
44  kmp_int64 t; \
45  kmp_int64 u = (kmp_int64)(*pupper); \
46  kmp_int64 l = (kmp_int64)(*plower); \
47  kmp_int64 i = (kmp_int64)incr; \
48  if (i == 1) { \
49  t = u - l + 1; \
50  } else if (i == -1) { \
51  t = l - u + 1; \
52  } else if (i > 0) { \
53  t = (u - l) / i + 1; \
54  } else { \
55  t = (l - u) / (-i) + 1; \
56  } \
57  KMP_COUNT_VALUE(stat, t); \
58  KMP_POP_PARTITIONED_TIMER(); \
59  }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66  if (loc == NULL)
67  loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72  kmp_int32 schedtype, kmp_int32 *plastiter,
73  T *plower, T *pupper,
74  typename traits_t<T>::signed_t *pstride,
75  typename traits_t<T>::signed_t incr,
76  typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78  ,
79  void *codeptr
80 #endif
81  ) {
82  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86  typedef typename traits_t<T>::unsigned_t UT;
87  typedef typename traits_t<T>::signed_t ST;
88  /* this all has to be changed back to TID and such.. */
89  kmp_int32 gtid = global_tid;
90  kmp_uint32 tid;
91  kmp_uint32 nth;
92  UT trip_count;
93  kmp_team_t *team;
94  __kmp_assert_valid_gtid(gtid);
95  kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98  ompt_team_info_t *team_info = NULL;
99  ompt_task_info_t *task_info = NULL;
100  ompt_work_t ompt_work_type = ompt_work_loop;
101 
102  static kmp_int8 warn = 0;
103 
104  if (ompt_enabled.ompt_callback_work) {
105  // Only fully initialize variables needed by OMPT if OMPT is enabled.
106  team_info = __ompt_get_teaminfo(0, NULL);
107  task_info = __ompt_get_task_info_object(0);
108  // Determine workshare type
109  if (loc != NULL) {
110  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111  ompt_work_type = ompt_work_loop;
112  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113  ompt_work_type = ompt_work_sections;
114  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115  ompt_work_type = ompt_work_distribute;
116  } else {
117  kmp_int8 bool_res =
118  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119  if (bool_res)
120  KMP_WARNING(OmptOutdatedWorkshare);
121  }
122  KMP_DEBUG_ASSERT(ompt_work_type);
123  }
124  }
125 #endif
126 
127  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130  {
131  char *buff;
132  // create format specifiers before the debug output
133  buff = __kmp_str_format(
134  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139  *pstride, incr, chunk));
140  __kmp_str_free(&buff);
141  }
142 #endif
143 
144  if (__kmp_env_consistency_check) {
145  __kmp_push_workshare(global_tid, ct_pdo, loc);
146  if (incr == 0) {
147  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148  loc);
149  }
150  }
151  /* special handling for zero-trip loops */
152  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153  if (plastiter != NULL)
154  *plastiter = FALSE;
155  /* leave pupper and plower set to entire iteration space */
156  *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162  {
163  char *buff;
164  // create format specifiers before the debug output
165  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166  "lower=%%%s upper=%%%s stride = %%%s "
167  "signed?<%s>, loc = %%s\n",
168  traits_t<T>::spec, traits_t<T>::spec,
169  traits_t<ST>::spec, traits_t<T>::spec);
170  KD_TRACE(100,
171  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
172  __kmp_str_free(&buff);
173  }
174 #endif
175  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
176 
177 #if OMPT_SUPPORT && OMPT_OPTIONAL
178  if (ompt_enabled.ompt_callback_work) {
179  ompt_callbacks.ompt_callback(ompt_callback_work)(
180  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
181  &(task_info->task_data), 0, codeptr);
182  }
183 #endif
184  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
185  return;
186  }
187 
188  // Although there are schedule enumerations above kmp_ord_upper which are not
189  // schedules for "distribute", the only ones which are useful are dynamic, so
190  // cannot be seen here, since this codepath is only executed for static
191  // schedules.
192  if (schedtype > kmp_ord_upper) {
193  // we are in DISTRIBUTE construct
194  schedtype += kmp_sch_static -
195  kmp_distribute_static; // AC: convert to usual schedule type
196  tid = th->th.th_team->t.t_master_tid;
197  team = th->th.th_team->t.t_parent;
198  } else {
199  tid = __kmp_tid_from_gtid(global_tid);
200  team = th->th.th_team;
201  }
202 
203  /* determine if "for" loop is an active worksharing construct */
204  if (team->t.t_serialized) {
205  /* serialized parallel, each thread executes whole iteration space */
206  if (plastiter != NULL)
207  *plastiter = TRUE;
208  /* leave pupper and plower set to entire iteration space */
209  *pstride =
210  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
211 
212 #ifdef KMP_DEBUG
213  {
214  char *buff;
215  // create format specifiers before the debug output
216  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
217  "lower=%%%s upper=%%%s stride = %%%s\n",
218  traits_t<T>::spec, traits_t<T>::spec,
219  traits_t<ST>::spec);
220  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
221  __kmp_str_free(&buff);
222  }
223 #endif
224  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
225 
226 #if OMPT_SUPPORT && OMPT_OPTIONAL
227  if (ompt_enabled.ompt_callback_work) {
228  ompt_callbacks.ompt_callback(ompt_callback_work)(
229  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
230  &(task_info->task_data), *pstride, codeptr);
231  }
232 #endif
233  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
234  return;
235  }
236  nth = team->t.t_nproc;
237  if (nth == 1) {
238  if (plastiter != NULL)
239  *plastiter = TRUE;
240  *pstride =
241  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
242 #ifdef KMP_DEBUG
243  {
244  char *buff;
245  // create format specifiers before the debug output
246  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
247  "lower=%%%s upper=%%%s stride = %%%s\n",
248  traits_t<T>::spec, traits_t<T>::spec,
249  traits_t<ST>::spec);
250  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
251  __kmp_str_free(&buff);
252  }
253 #endif
254  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
255 
256 #if OMPT_SUPPORT && OMPT_OPTIONAL
257  if (ompt_enabled.ompt_callback_work) {
258  ompt_callbacks.ompt_callback(ompt_callback_work)(
259  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
260  &(task_info->task_data), *pstride, codeptr);
261  }
262 #endif
263  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
264  return;
265  }
266 
267  /* compute trip count */
268  if (incr == 1) {
269  trip_count = *pupper - *plower + 1;
270  } else if (incr == -1) {
271  trip_count = *plower - *pupper + 1;
272  } else if (incr > 0) {
273  // upper-lower can exceed the limit of signed type
274  trip_count = (UT)(*pupper - *plower) / incr + 1;
275  } else {
276  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
277  }
278 
279 #if KMP_STATS_ENABLED
280  if (KMP_MASTER_GTID(gtid)) {
281  KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
282  }
283 #endif
284 
285  if (__kmp_env_consistency_check) {
286  /* tripcount overflow? */
287  if (trip_count == 0 && *pupper != *plower) {
288  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
289  loc);
290  }
291  }
292 
293  /* compute remaining parameters */
294  switch (schedtype) {
295  case kmp_sch_static: {
296  if (trip_count < nth) {
297  KMP_DEBUG_ASSERT(
298  __kmp_static == kmp_sch_static_greedy ||
299  __kmp_static ==
300  kmp_sch_static_balanced); // Unknown static scheduling type.
301  if (tid < trip_count) {
302  *pupper = *plower = *plower + tid * incr;
303  } else {
304  *plower = *pupper + incr;
305  }
306  if (plastiter != NULL)
307  *plastiter = (tid == trip_count - 1);
308  } else {
309  if (__kmp_static == kmp_sch_static_balanced) {
310  UT small_chunk = trip_count / nth;
311  UT extras = trip_count % nth;
312  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
313  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
314  if (plastiter != NULL)
315  *plastiter = (tid == nth - 1);
316  } else {
317  T big_chunk_inc_count =
318  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
319  T old_upper = *pupper;
320 
321  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
322  // Unknown static scheduling type.
323 
324  *plower += tid * big_chunk_inc_count;
325  *pupper = *plower + big_chunk_inc_count - incr;
326  if (incr > 0) {
327  if (*pupper < *plower)
328  *pupper = traits_t<T>::max_value;
329  if (plastiter != NULL)
330  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
331  if (*pupper > old_upper)
332  *pupper = old_upper; // tracker C73258
333  } else {
334  if (*pupper > *plower)
335  *pupper = traits_t<T>::min_value;
336  if (plastiter != NULL)
337  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
338  if (*pupper < old_upper)
339  *pupper = old_upper; // tracker C73258
340  }
341  }
342  }
343  *pstride = trip_count;
344  break;
345  }
346  case kmp_sch_static_chunked: {
347  ST span;
348  if (chunk < 1) {
349  chunk = 1;
350  }
351  span = chunk * incr;
352  *pstride = span * nth;
353  *plower = *plower + (span * tid);
354  *pupper = *plower + span - incr;
355  if (plastiter != NULL)
356  *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
357  break;
358  }
359  case kmp_sch_static_balanced_chunked: {
360  T old_upper = *pupper;
361  // round up to make sure the chunk is enough to cover all iterations
362  UT span = (trip_count + nth - 1) / nth;
363 
364  // perform chunk adjustment
365  chunk = (span + chunk - 1) & ~(chunk - 1);
366 
367  span = chunk * incr;
368  *plower = *plower + (span * tid);
369  *pupper = *plower + span - incr;
370  if (incr > 0) {
371  if (*pupper > old_upper)
372  *pupper = old_upper;
373  } else if (*pupper < old_upper)
374  *pupper = old_upper;
375 
376  if (plastiter != NULL)
377  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
378  break;
379  }
380  default:
381  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
382  break;
383  }
384 
385 #if USE_ITT_BUILD
386  // Report loop metadata
387  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
388  __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
389  team->t.t_active_level == 1) {
390  kmp_uint64 cur_chunk = chunk;
391  check_loc(loc);
392  // Calculate chunk in case it was not specified; it is specified for
393  // kmp_sch_static_chunked
394  if (schedtype == kmp_sch_static) {
395  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
396  }
397  // 0 - "static" schedule
398  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
399  }
400 #endif
401 #ifdef KMP_DEBUG
402  {
403  char *buff;
404  // create format specifiers before the debug output
405  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
406  "upper=%%%s stride = %%%s signed?<%s>\n",
407  traits_t<T>::spec, traits_t<T>::spec,
408  traits_t<ST>::spec, traits_t<T>::spec);
409  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
410  __kmp_str_free(&buff);
411  }
412 #endif
413  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
414 
415 #if OMPT_SUPPORT && OMPT_OPTIONAL
416  if (ompt_enabled.ompt_callback_work) {
417  ompt_callbacks.ompt_callback(ompt_callback_work)(
418  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
419  &(task_info->task_data), trip_count, codeptr);
420  }
421 #endif
422 
423  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
424  return;
425 }
426 
427 template <typename T>
428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
429  kmp_int32 schedule, kmp_int32 *plastiter,
430  T *plower, T *pupper, T *pupperDist,
431  typename traits_t<T>::signed_t *pstride,
432  typename traits_t<T>::signed_t incr,
433  typename traits_t<T>::signed_t chunk) {
434  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
435  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
436  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
437  typedef typename traits_t<T>::unsigned_t UT;
438  typedef typename traits_t<T>::signed_t ST;
439  kmp_uint32 tid;
440  kmp_uint32 nth;
441  kmp_uint32 team_id;
442  kmp_uint32 nteams;
443  UT trip_count;
444  kmp_team_t *team;
445  kmp_info_t *th;
446 
447  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
448  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
449  __kmp_assert_valid_gtid(gtid);
450 #ifdef KMP_DEBUG
451  {
452  char *buff;
453  // create format specifiers before the debug output
454  buff = __kmp_str_format(
455  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
456  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
457  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
458  traits_t<ST>::spec, traits_t<T>::spec);
459  KD_TRACE(100,
460  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
461  __kmp_str_free(&buff);
462  }
463 #endif
464 
465  if (__kmp_env_consistency_check) {
466  __kmp_push_workshare(gtid, ct_pdo, loc);
467  if (incr == 0) {
468  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
469  loc);
470  }
471  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
472  // The loop is illegal.
473  // Some zero-trip loops maintained by compiler, e.g.:
474  // for(i=10;i<0;++i) // lower >= upper - run-time check
475  // for(i=0;i>10;--i) // lower <= upper - run-time check
476  // for(i=0;i>10;++i) // incr > 0 - compile-time check
477  // for(i=10;i<0;--i) // incr < 0 - compile-time check
478  // Compiler does not check the following illegal loops:
479  // for(i=0;i<10;i+=incr) // where incr<0
480  // for(i=10;i>0;i-=incr) // where incr<0
481  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
482  }
483  }
484  tid = __kmp_tid_from_gtid(gtid);
485  th = __kmp_threads[gtid];
486  nth = th->th.th_team_nproc;
487  team = th->th.th_team;
488  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
489  nteams = th->th.th_teams_size.nteams;
490  team_id = team->t.t_master_tid;
491  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
492 
493  // compute global trip count
494  if (incr == 1) {
495  trip_count = *pupper - *plower + 1;
496  } else if (incr == -1) {
497  trip_count = *plower - *pupper + 1;
498  } else if (incr > 0) {
499  // upper-lower can exceed the limit of signed type
500  trip_count = (UT)(*pupper - *plower) / incr + 1;
501  } else {
502  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
503  }
504 
505  *pstride = *pupper - *plower; // just in case (can be unused)
506  if (trip_count <= nteams) {
507  KMP_DEBUG_ASSERT(
508  __kmp_static == kmp_sch_static_greedy ||
509  __kmp_static ==
510  kmp_sch_static_balanced); // Unknown static scheduling type.
511  // only masters of some teams get single iteration, other threads get
512  // nothing
513  if (team_id < trip_count && tid == 0) {
514  *pupper = *pupperDist = *plower = *plower + team_id * incr;
515  } else {
516  *pupperDist = *pupper;
517  *plower = *pupper + incr; // compiler should skip loop body
518  }
519  if (plastiter != NULL)
520  *plastiter = (tid == 0 && team_id == trip_count - 1);
521  } else {
522  // Get the team's chunk first (each team gets at most one chunk)
523  if (__kmp_static == kmp_sch_static_balanced) {
524  UT chunkD = trip_count / nteams;
525  UT extras = trip_count % nteams;
526  *plower +=
527  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
528  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
529  if (plastiter != NULL)
530  *plastiter = (team_id == nteams - 1);
531  } else {
532  T chunk_inc_count =
533  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
534  T upper = *pupper;
535  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
536  // Unknown static scheduling type.
537  *plower += team_id * chunk_inc_count;
538  *pupperDist = *plower + chunk_inc_count - incr;
539  // Check/correct bounds if needed
540  if (incr > 0) {
541  if (*pupperDist < *plower)
542  *pupperDist = traits_t<T>::max_value;
543  if (plastiter != NULL)
544  *plastiter = *plower <= upper && *pupperDist > upper - incr;
545  if (*pupperDist > upper)
546  *pupperDist = upper; // tracker C73258
547  if (*plower > *pupperDist) {
548  *pupper = *pupperDist; // no iterations available for the team
549  goto end;
550  }
551  } else {
552  if (*pupperDist > *plower)
553  *pupperDist = traits_t<T>::min_value;
554  if (plastiter != NULL)
555  *plastiter = *plower >= upper && *pupperDist < upper - incr;
556  if (*pupperDist < upper)
557  *pupperDist = upper; // tracker C73258
558  if (*plower < *pupperDist) {
559  *pupper = *pupperDist; // no iterations available for the team
560  goto end;
561  }
562  }
563  }
564  // Get the parallel loop chunk now (for thread)
565  // compute trip count for team's chunk
566  if (incr == 1) {
567  trip_count = *pupperDist - *plower + 1;
568  } else if (incr == -1) {
569  trip_count = *plower - *pupperDist + 1;
570  } else if (incr > 1) {
571  // upper-lower can exceed the limit of signed type
572  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
573  } else {
574  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
575  }
576  KMP_DEBUG_ASSERT(trip_count);
577  switch (schedule) {
578  case kmp_sch_static: {
579  if (trip_count <= nth) {
580  KMP_DEBUG_ASSERT(
581  __kmp_static == kmp_sch_static_greedy ||
582  __kmp_static ==
583  kmp_sch_static_balanced); // Unknown static scheduling type.
584  if (tid < trip_count)
585  *pupper = *plower = *plower + tid * incr;
586  else
587  *plower = *pupper + incr; // no iterations available
588  if (plastiter != NULL)
589  if (*plastiter != 0 && !(tid == trip_count - 1))
590  *plastiter = 0;
591  } else {
592  if (__kmp_static == kmp_sch_static_balanced) {
593  UT chunkL = trip_count / nth;
594  UT extras = trip_count % nth;
595  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
596  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
597  if (plastiter != NULL)
598  if (*plastiter != 0 && !(tid == nth - 1))
599  *plastiter = 0;
600  } else {
601  T chunk_inc_count =
602  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
603  T upper = *pupperDist;
604  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
605  // Unknown static scheduling type.
606  *plower += tid * chunk_inc_count;
607  *pupper = *plower + chunk_inc_count - incr;
608  if (incr > 0) {
609  if (*pupper < *plower)
610  *pupper = traits_t<T>::max_value;
611  if (plastiter != NULL)
612  if (*plastiter != 0 &&
613  !(*plower <= upper && *pupper > upper - incr))
614  *plastiter = 0;
615  if (*pupper > upper)
616  *pupper = upper; // tracker C73258
617  } else {
618  if (*pupper > *plower)
619  *pupper = traits_t<T>::min_value;
620  if (plastiter != NULL)
621  if (*plastiter != 0 &&
622  !(*plower >= upper && *pupper < upper - incr))
623  *plastiter = 0;
624  if (*pupper < upper)
625  *pupper = upper; // tracker C73258
626  }
627  }
628  }
629  break;
630  }
631  case kmp_sch_static_chunked: {
632  ST span;
633  if (chunk < 1)
634  chunk = 1;
635  span = chunk * incr;
636  *pstride = span * nth;
637  *plower = *plower + (span * tid);
638  *pupper = *plower + span - incr;
639  if (plastiter != NULL)
640  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
641  *plastiter = 0;
642  break;
643  }
644  default:
645  KMP_ASSERT2(0,
646  "__kmpc_dist_for_static_init: unknown loop scheduling type");
647  break;
648  }
649  }
650 end:;
651 #ifdef KMP_DEBUG
652  {
653  char *buff;
654  // create format specifiers before the debug output
655  buff = __kmp_str_format(
656  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
657  "stride=%%%s signed?<%s>\n",
658  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
659  traits_t<ST>::spec, traits_t<T>::spec);
660  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
661  __kmp_str_free(&buff);
662  }
663 #endif
664  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
665  KMP_STATS_LOOP_END(OMP_distribute_iterations);
666  return;
667 }
668 
669 template <typename T>
670 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
671  kmp_int32 *p_last, T *p_lb, T *p_ub,
672  typename traits_t<T>::signed_t *p_st,
673  typename traits_t<T>::signed_t incr,
674  typename traits_t<T>::signed_t chunk) {
675  // The routine returns the first chunk distributed to the team and
676  // stride for next chunks calculation.
677  // Last iteration flag set for the team that will execute
678  // the last iteration of the loop.
679  // The routine is called for dist_schedule(static,chunk) only.
680  typedef typename traits_t<T>::unsigned_t UT;
681  typedef typename traits_t<T>::signed_t ST;
682  kmp_uint32 team_id;
683  kmp_uint32 nteams;
684  UT trip_count;
685  T lower;
686  T upper;
687  ST span;
688  kmp_team_t *team;
689  kmp_info_t *th;
690 
691  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
692  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
693  __kmp_assert_valid_gtid(gtid);
694 #ifdef KMP_DEBUG
695  {
696  char *buff;
697  // create format specifiers before the debug output
698  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
699  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
700  traits_t<T>::spec, traits_t<T>::spec,
701  traits_t<ST>::spec, traits_t<ST>::spec,
702  traits_t<T>::spec);
703  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
704  __kmp_str_free(&buff);
705  }
706 #endif
707 
708  lower = *p_lb;
709  upper = *p_ub;
710  if (__kmp_env_consistency_check) {
711  if (incr == 0) {
712  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
713  loc);
714  }
715  if (incr > 0 ? (upper < lower) : (lower < upper)) {
716  // The loop is illegal.
717  // Some zero-trip loops maintained by compiler, e.g.:
718  // for(i=10;i<0;++i) // lower >= upper - run-time check
719  // for(i=0;i>10;--i) // lower <= upper - run-time check
720  // for(i=0;i>10;++i) // incr > 0 - compile-time check
721  // for(i=10;i<0;--i) // incr < 0 - compile-time check
722  // Compiler does not check the following illegal loops:
723  // for(i=0;i<10;i+=incr) // where incr<0
724  // for(i=10;i>0;i-=incr) // where incr<0
725  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
726  }
727  }
728  th = __kmp_threads[gtid];
729  team = th->th.th_team;
730  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
731  nteams = th->th.th_teams_size.nteams;
732  team_id = team->t.t_master_tid;
733  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
734 
735  // compute trip count
736  if (incr == 1) {
737  trip_count = upper - lower + 1;
738  } else if (incr == -1) {
739  trip_count = lower - upper + 1;
740  } else if (incr > 0) {
741  // upper-lower can exceed the limit of signed type
742  trip_count = (UT)(upper - lower) / incr + 1;
743  } else {
744  trip_count = (UT)(lower - upper) / (-incr) + 1;
745  }
746  if (chunk < 1)
747  chunk = 1;
748  span = chunk * incr;
749  *p_st = span * nteams;
750  *p_lb = lower + (span * team_id);
751  *p_ub = *p_lb + span - incr;
752  if (p_last != NULL)
753  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
754  // Correct upper bound if needed
755  if (incr > 0) {
756  if (*p_ub < *p_lb) // overflow?
757  *p_ub = traits_t<T>::max_value;
758  if (*p_ub > upper)
759  *p_ub = upper; // tracker C73258
760  } else { // incr < 0
761  if (*p_ub > *p_lb)
762  *p_ub = traits_t<T>::min_value;
763  if (*p_ub < upper)
764  *p_ub = upper; // tracker C73258
765  }
766 #ifdef KMP_DEBUG
767  {
768  char *buff;
769  // create format specifiers before the debug output
770  buff =
771  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
772  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
773  traits_t<T>::spec, traits_t<T>::spec,
774  traits_t<ST>::spec, traits_t<ST>::spec);
775  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
776  __kmp_str_free(&buff);
777  }
778 #endif
779 }
780 
781 //------------------------------------------------------------------------------
782 extern "C" {
804 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
805  kmp_int32 *plastiter, kmp_int32 *plower,
806  kmp_int32 *pupper, kmp_int32 *pstride,
807  kmp_int32 incr, kmp_int32 chunk) {
808  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
809  pupper, pstride, incr, chunk
810 #if OMPT_SUPPORT && OMPT_OPTIONAL
811  ,
812  OMPT_GET_RETURN_ADDRESS(0)
813 #endif
814  );
815 }
816 
820 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
821  kmp_int32 schedtype, kmp_int32 *plastiter,
822  kmp_uint32 *plower, kmp_uint32 *pupper,
823  kmp_int32 *pstride, kmp_int32 incr,
824  kmp_int32 chunk) {
825  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
826  pupper, pstride, incr, chunk
827 #if OMPT_SUPPORT && OMPT_OPTIONAL
828  ,
829  OMPT_GET_RETURN_ADDRESS(0)
830 #endif
831  );
832 }
833 
837 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
838  kmp_int32 *plastiter, kmp_int64 *plower,
839  kmp_int64 *pupper, kmp_int64 *pstride,
840  kmp_int64 incr, kmp_int64 chunk) {
841  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
842  pupper, pstride, incr, chunk
843 #if OMPT_SUPPORT && OMPT_OPTIONAL
844  ,
845  OMPT_GET_RETURN_ADDRESS(0)
846 #endif
847  );
848 }
849 
853 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
854  kmp_int32 schedtype, kmp_int32 *plastiter,
855  kmp_uint64 *plower, kmp_uint64 *pupper,
856  kmp_int64 *pstride, kmp_int64 incr,
857  kmp_int64 chunk) {
858  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
859  pupper, pstride, incr, chunk
860 #if OMPT_SUPPORT && OMPT_OPTIONAL
861  ,
862  OMPT_GET_RETURN_ADDRESS(0)
863 #endif
864  );
865 }
892 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
893  kmp_int32 schedule, kmp_int32 *plastiter,
894  kmp_int32 *plower, kmp_int32 *pupper,
895  kmp_int32 *pupperD, kmp_int32 *pstride,
896  kmp_int32 incr, kmp_int32 chunk) {
897  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
898  pupper, pupperD, pstride, incr, chunk);
899 }
900 
904 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
905  kmp_int32 schedule, kmp_int32 *plastiter,
906  kmp_uint32 *plower, kmp_uint32 *pupper,
907  kmp_uint32 *pupperD, kmp_int32 *pstride,
908  kmp_int32 incr, kmp_int32 chunk) {
909  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
910  pupper, pupperD, pstride, incr, chunk);
911 }
912 
916 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
917  kmp_int32 schedule, kmp_int32 *plastiter,
918  kmp_int64 *plower, kmp_int64 *pupper,
919  kmp_int64 *pupperD, kmp_int64 *pstride,
920  kmp_int64 incr, kmp_int64 chunk) {
921  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
922  pupper, pupperD, pstride, incr, chunk);
923 }
924 
928 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
929  kmp_int32 schedule, kmp_int32 *plastiter,
930  kmp_uint64 *plower, kmp_uint64 *pupper,
931  kmp_uint64 *pupperD, kmp_int64 *pstride,
932  kmp_int64 incr, kmp_int64 chunk) {
933  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
934  pupper, pupperD, pstride, incr, chunk);
935 }
940 //------------------------------------------------------------------------------
941 // Auxiliary routines for Distribute Parallel Loop construct implementation
942 // Transfer call to template< type T >
943 // __kmp_team_static_init( ident_t *loc, int gtid,
944 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
945 
966 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
967  kmp_int32 *p_lb, kmp_int32 *p_ub,
968  kmp_int32 *p_st, kmp_int32 incr,
969  kmp_int32 chunk) {
970  KMP_DEBUG_ASSERT(__kmp_init_serial);
971  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
972  chunk);
973 }
974 
978 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
979  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
980  kmp_int32 *p_st, kmp_int32 incr,
981  kmp_int32 chunk) {
982  KMP_DEBUG_ASSERT(__kmp_init_serial);
983  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
984  chunk);
985 }
986 
990 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
991  kmp_int64 *p_lb, kmp_int64 *p_ub,
992  kmp_int64 *p_st, kmp_int64 incr,
993  kmp_int64 chunk) {
994  KMP_DEBUG_ASSERT(__kmp_init_serial);
995  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
996  chunk);
997 }
998 
1002 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1003  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1004  kmp_int64 *p_st, kmp_int64 incr,
1005  kmp_int64 chunk) {
1006  KMP_DEBUG_ASSERT(__kmp_init_serial);
1007  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1008  chunk);
1009 }
1014 } // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:191
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:209
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:211
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:213
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:888
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:901
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:837
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:916
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:820
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:990
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:904
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1002
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:853
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:978
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:892
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:804
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:966
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:928
@ kmp_sch_static
Definition: kmp.h:354
@ kmp_distribute_static
Definition: kmp.h:390
@ kmp_ord_upper
Definition: kmp.h:386
Definition: kmp.h:229
char const * psource
Definition: kmp.h:239
kmp_int32 flags
Definition: kmp.h:231