LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23  class Mask : public KMPAffinity::Mask {
24  hwloc_cpuset_t mask;
25 
26  public:
27  Mask() {
28  mask = hwloc_bitmap_alloc();
29  this->zero();
30  }
31  ~Mask() { hwloc_bitmap_free(mask); }
32  void set(int i) override { hwloc_bitmap_set(mask, i); }
33  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35  void zero() override { hwloc_bitmap_zero(mask); }
36  void copy(const KMPAffinity::Mask *src) override {
37  const Mask *convert = static_cast<const Mask *>(src);
38  hwloc_bitmap_copy(mask, convert->mask);
39  }
40  void bitwise_and(const KMPAffinity::Mask *rhs) override {
41  const Mask *convert = static_cast<const Mask *>(rhs);
42  hwloc_bitmap_and(mask, mask, convert->mask);
43  }
44  void bitwise_or(const KMPAffinity::Mask *rhs) override {
45  const Mask *convert = static_cast<const Mask *>(rhs);
46  hwloc_bitmap_or(mask, mask, convert->mask);
47  }
48  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49  int begin() const override { return hwloc_bitmap_first(mask); }
50  int end() const override { return -1; }
51  int next(int previous) const override {
52  return hwloc_bitmap_next(mask, previous);
53  }
54  int get_system_affinity(bool abort_on_error) override {
55  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56  "Illegal get affinity operation when not capable");
57  long retval =
58  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59  if (retval >= 0) {
60  return 0;
61  }
62  int error = errno;
63  if (abort_on_error) {
64  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65  }
66  return error;
67  }
68  int set_system_affinity(bool abort_on_error) const override {
69  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70  "Illegal set affinity operation when not capable");
71  long retval =
72  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73  if (retval >= 0) {
74  return 0;
75  }
76  int error = errno;
77  if (abort_on_error) {
78  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79  }
80  return error;
81  }
82 #if KMP_OS_WINDOWS
83  int set_process_affinity(bool abort_on_error) const override {
84  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
85  "Illegal set process affinity operation when not capable");
86  int error = 0;
87  const hwloc_topology_support *support =
88  hwloc_topology_get_support(__kmp_hwloc_topology);
89  if (support->cpubind->set_proc_cpubind) {
90  int retval;
91  retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
92  HWLOC_CPUBIND_PROCESS);
93  if (retval >= 0)
94  return 0;
95  error = errno;
96  if (abort_on_error)
97  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
98  }
99  return error;
100  }
101 #endif
102  int get_proc_group() const override {
103  int group = -1;
104 #if KMP_OS_WINDOWS
105  if (__kmp_num_proc_groups == 1) {
106  return 1;
107  }
108  for (int i = 0; i < __kmp_num_proc_groups; i++) {
109  // On windows, the long type is always 32 bits
110  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
111  unsigned long second_32_bits =
112  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
113  if (first_32_bits == 0 && second_32_bits == 0) {
114  continue;
115  }
116  if (group >= 0) {
117  return -1;
118  }
119  group = i;
120  }
121 #endif /* KMP_OS_WINDOWS */
122  return group;
123  }
124  };
125  void determine_capable(const char *var) override {
126  const hwloc_topology_support *topology_support;
127  if (__kmp_hwloc_topology == NULL) {
128  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
129  __kmp_hwloc_error = TRUE;
130  if (__kmp_affinity_verbose)
131  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
132  }
133  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
134  __kmp_hwloc_error = TRUE;
135  if (__kmp_affinity_verbose)
136  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
137  }
138  }
139  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
140  // Is the system capable of setting/getting this thread's affinity?
141  // Also, is topology discovery possible? (pu indicates ability to discover
142  // processing units). And finally, were there no errors when calling any
143  // hwloc_* API functions?
144  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
145  topology_support->cpubind->get_thisthread_cpubind &&
146  topology_support->discovery->pu && !__kmp_hwloc_error) {
147  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
148  KMP_AFFINITY_ENABLE(TRUE);
149  } else {
150  // indicate that hwloc didn't work and disable affinity
151  __kmp_hwloc_error = TRUE;
152  KMP_AFFINITY_DISABLE();
153  }
154  }
155  void bind_thread(int which) override {
156  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
157  "Illegal set affinity operation when not capable");
158  KMPAffinity::Mask *mask;
159  KMP_CPU_ALLOC_ON_STACK(mask);
160  KMP_CPU_ZERO(mask);
161  KMP_CPU_SET(which, mask);
162  __kmp_set_system_affinity(mask, TRUE);
163  KMP_CPU_FREE_FROM_STACK(mask);
164  }
165  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
166  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
167  KMPAffinity::Mask *allocate_mask_array(int num) override {
168  return new Mask[num];
169  }
170  void deallocate_mask_array(KMPAffinity::Mask *array) override {
171  Mask *hwloc_array = static_cast<Mask *>(array);
172  delete[] hwloc_array;
173  }
174  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
175  int index) override {
176  Mask *hwloc_array = static_cast<Mask *>(array);
177  return &(hwloc_array[index]);
178  }
179  api_type get_api_type() const override { return HWLOC; }
180 };
181 #endif /* KMP_USE_HWLOC */
182 
183 #if KMP_OS_LINUX || KMP_OS_FREEBSD
184 #if KMP_OS_LINUX
185 /* On some of the older OS's that we build on, these constants aren't present
186  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
187  all systems of the same arch where they are defined, and they cannot change.
188  stone forever. */
189 #include <sys/syscall.h>
190 #if KMP_ARCH_X86 || KMP_ARCH_ARM
191 #ifndef __NR_sched_setaffinity
192 #define __NR_sched_setaffinity 241
193 #elif __NR_sched_setaffinity != 241
194 #error Wrong code for setaffinity system call.
195 #endif /* __NR_sched_setaffinity */
196 #ifndef __NR_sched_getaffinity
197 #define __NR_sched_getaffinity 242
198 #elif __NR_sched_getaffinity != 242
199 #error Wrong code for getaffinity system call.
200 #endif /* __NR_sched_getaffinity */
201 #elif KMP_ARCH_AARCH64
202 #ifndef __NR_sched_setaffinity
203 #define __NR_sched_setaffinity 122
204 #elif __NR_sched_setaffinity != 122
205 #error Wrong code for setaffinity system call.
206 #endif /* __NR_sched_setaffinity */
207 #ifndef __NR_sched_getaffinity
208 #define __NR_sched_getaffinity 123
209 #elif __NR_sched_getaffinity != 123
210 #error Wrong code for getaffinity system call.
211 #endif /* __NR_sched_getaffinity */
212 #elif KMP_ARCH_X86_64
213 #ifndef __NR_sched_setaffinity
214 #define __NR_sched_setaffinity 203
215 #elif __NR_sched_setaffinity != 203
216 #error Wrong code for setaffinity system call.
217 #endif /* __NR_sched_setaffinity */
218 #ifndef __NR_sched_getaffinity
219 #define __NR_sched_getaffinity 204
220 #elif __NR_sched_getaffinity != 204
221 #error Wrong code for getaffinity system call.
222 #endif /* __NR_sched_getaffinity */
223 #elif KMP_ARCH_PPC64
224 #ifndef __NR_sched_setaffinity
225 #define __NR_sched_setaffinity 222
226 #elif __NR_sched_setaffinity != 222
227 #error Wrong code for setaffinity system call.
228 #endif /* __NR_sched_setaffinity */
229 #ifndef __NR_sched_getaffinity
230 #define __NR_sched_getaffinity 223
231 #elif __NR_sched_getaffinity != 223
232 #error Wrong code for getaffinity system call.
233 #endif /* __NR_sched_getaffinity */
234 # elif KMP_ARCH_MIPS
235 # ifndef __NR_sched_setaffinity
236 # define __NR_sched_setaffinity 4239
237 # elif __NR_sched_setaffinity != 4239
238 # error Wrong code for setaffinity system call.
239 # endif /* __NR_sched_setaffinity */
240 # ifndef __NR_sched_getaffinity
241 # define __NR_sched_getaffinity 4240
242 # elif __NR_sched_getaffinity != 4240
243 # error Wrong code for getaffinity system call.
244 # endif /* __NR_sched_getaffinity */
245 # elif KMP_ARCH_MIPS64
246 # ifndef __NR_sched_setaffinity
247 # define __NR_sched_setaffinity 5195
248 # elif __NR_sched_setaffinity != 5195
249 # error Wrong code for setaffinity system call.
250 # endif /* __NR_sched_setaffinity */
251 # ifndef __NR_sched_getaffinity
252 # define __NR_sched_getaffinity 5196
253 # elif __NR_sched_getaffinity != 5196
254 # error Wrong code for getaffinity system call.
255 # endif /* __NR_sched_getaffinity */
256 # else
257 #error Unknown or unsupported architecture
258 #endif /* KMP_ARCH_* */
259 #elif KMP_OS_FREEBSD
260 #include <pthread.h>
261 #include <pthread_np.h>
262 #endif
263 class KMPNativeAffinity : public KMPAffinity {
264  class Mask : public KMPAffinity::Mask {
265  typedef unsigned long mask_t;
266  typedef decltype(__kmp_affin_mask_size) mask_size_type;
267  static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
268  static const mask_t ONE = 1;
269  mask_size_type get_num_mask_types() const {
270  return __kmp_affin_mask_size / sizeof(mask_t);
271  }
272 
273  public:
274  mask_t *mask;
275  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
276  ~Mask() {
277  if (mask)
278  __kmp_free(mask);
279  }
280  void set(int i) override {
281  mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
282  }
283  bool is_set(int i) const override {
284  return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
285  }
286  void clear(int i) override {
287  mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
288  }
289  void zero() override {
290  mask_size_type e = get_num_mask_types();
291  for (mask_size_type i = 0; i < e; ++i)
292  mask[i] = (mask_t)0;
293  }
294  void copy(const KMPAffinity::Mask *src) override {
295  const Mask *convert = static_cast<const Mask *>(src);
296  mask_size_type e = get_num_mask_types();
297  for (mask_size_type i = 0; i < e; ++i)
298  mask[i] = convert->mask[i];
299  }
300  void bitwise_and(const KMPAffinity::Mask *rhs) override {
301  const Mask *convert = static_cast<const Mask *>(rhs);
302  mask_size_type e = get_num_mask_types();
303  for (mask_size_type i = 0; i < e; ++i)
304  mask[i] &= convert->mask[i];
305  }
306  void bitwise_or(const KMPAffinity::Mask *rhs) override {
307  const Mask *convert = static_cast<const Mask *>(rhs);
308  mask_size_type e = get_num_mask_types();
309  for (mask_size_type i = 0; i < e; ++i)
310  mask[i] |= convert->mask[i];
311  }
312  void bitwise_not() override {
313  mask_size_type e = get_num_mask_types();
314  for (mask_size_type i = 0; i < e; ++i)
315  mask[i] = ~(mask[i]);
316  }
317  int begin() const override {
318  int retval = 0;
319  while (retval < end() && !is_set(retval))
320  ++retval;
321  return retval;
322  }
323  int end() const override {
324  int e;
325  __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
326  return e;
327  }
328  int next(int previous) const override {
329  int retval = previous + 1;
330  while (retval < end() && !is_set(retval))
331  ++retval;
332  return retval;
333  }
334  int get_system_affinity(bool abort_on_error) override {
335  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
336  "Illegal get affinity operation when not capable");
337 #if KMP_OS_LINUX
338  long retval =
339  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
340 #elif KMP_OS_FREEBSD
341  int r =
342  pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
343  int retval = (r == 0 ? 0 : -1);
344 #endif
345  if (retval >= 0) {
346  return 0;
347  }
348  int error = errno;
349  if (abort_on_error) {
350  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
351  }
352  return error;
353  }
354  int set_system_affinity(bool abort_on_error) const override {
355  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
356  "Illegal set affinity operation when not capable");
357 #if KMP_OS_LINUX
358  long retval =
359  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
360 #elif KMP_OS_FREEBSD
361  int r =
362  pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
363  int retval = (r == 0 ? 0 : -1);
364 #endif
365  if (retval >= 0) {
366  return 0;
367  }
368  int error = errno;
369  if (abort_on_error) {
370  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
371  }
372  return error;
373  }
374  };
375  void determine_capable(const char *env_var) override {
376  __kmp_affinity_determine_capable(env_var);
377  }
378  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
379  KMPAffinity::Mask *allocate_mask() override {
380  KMPNativeAffinity::Mask *retval = new Mask();
381  return retval;
382  }
383  void deallocate_mask(KMPAffinity::Mask *m) override {
384  KMPNativeAffinity::Mask *native_mask =
385  static_cast<KMPNativeAffinity::Mask *>(m);
386  delete native_mask;
387  }
388  KMPAffinity::Mask *allocate_mask_array(int num) override {
389  return new Mask[num];
390  }
391  void deallocate_mask_array(KMPAffinity::Mask *array) override {
392  Mask *linux_array = static_cast<Mask *>(array);
393  delete[] linux_array;
394  }
395  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
396  int index) override {
397  Mask *linux_array = static_cast<Mask *>(array);
398  return &(linux_array[index]);
399  }
400  api_type get_api_type() const override { return NATIVE_OS; }
401 };
402 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
403 
404 #if KMP_OS_WINDOWS
405 class KMPNativeAffinity : public KMPAffinity {
406  class Mask : public KMPAffinity::Mask {
407  typedef ULONG_PTR mask_t;
408  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
409  mask_t *mask;
410 
411  public:
412  Mask() {
413  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
414  }
415  ~Mask() {
416  if (mask)
417  __kmp_free(mask);
418  }
419  void set(int i) override {
420  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
421  }
422  bool is_set(int i) const override {
423  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
424  }
425  void clear(int i) override {
426  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
427  }
428  void zero() override {
429  for (int i = 0; i < __kmp_num_proc_groups; ++i)
430  mask[i] = 0;
431  }
432  void copy(const KMPAffinity::Mask *src) override {
433  const Mask *convert = static_cast<const Mask *>(src);
434  for (int i = 0; i < __kmp_num_proc_groups; ++i)
435  mask[i] = convert->mask[i];
436  }
437  void bitwise_and(const KMPAffinity::Mask *rhs) override {
438  const Mask *convert = static_cast<const Mask *>(rhs);
439  for (int i = 0; i < __kmp_num_proc_groups; ++i)
440  mask[i] &= convert->mask[i];
441  }
442  void bitwise_or(const KMPAffinity::Mask *rhs) override {
443  const Mask *convert = static_cast<const Mask *>(rhs);
444  for (int i = 0; i < __kmp_num_proc_groups; ++i)
445  mask[i] |= convert->mask[i];
446  }
447  void bitwise_not() override {
448  for (int i = 0; i < __kmp_num_proc_groups; ++i)
449  mask[i] = ~(mask[i]);
450  }
451  int begin() const override {
452  int retval = 0;
453  while (retval < end() && !is_set(retval))
454  ++retval;
455  return retval;
456  }
457  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
458  int next(int previous) const override {
459  int retval = previous + 1;
460  while (retval < end() && !is_set(retval))
461  ++retval;
462  return retval;
463  }
464  int set_process_affinity(bool abort_on_error) const override {
465  if (__kmp_num_proc_groups <= 1) {
466  if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
467  DWORD error = GetLastError();
468  if (abort_on_error) {
469  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
470  __kmp_msg_null);
471  }
472  return error;
473  }
474  }
475  return 0;
476  }
477  int set_system_affinity(bool abort_on_error) const override {
478  if (__kmp_num_proc_groups > 1) {
479  // Check for a valid mask.
480  GROUP_AFFINITY ga;
481  int group = get_proc_group();
482  if (group < 0) {
483  if (abort_on_error) {
484  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
485  }
486  return -1;
487  }
488  // Transform the bit vector into a GROUP_AFFINITY struct
489  // and make the system call to set affinity.
490  ga.Group = group;
491  ga.Mask = mask[group];
492  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
493 
494  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
495  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
496  DWORD error = GetLastError();
497  if (abort_on_error) {
498  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
499  __kmp_msg_null);
500  }
501  return error;
502  }
503  } else {
504  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
505  DWORD error = GetLastError();
506  if (abort_on_error) {
507  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
508  __kmp_msg_null);
509  }
510  return error;
511  }
512  }
513  return 0;
514  }
515  int get_system_affinity(bool abort_on_error) override {
516  if (__kmp_num_proc_groups > 1) {
517  this->zero();
518  GROUP_AFFINITY ga;
519  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
520  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
521  DWORD error = GetLastError();
522  if (abort_on_error) {
523  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
524  KMP_ERR(error), __kmp_msg_null);
525  }
526  return error;
527  }
528  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
529  (ga.Mask == 0)) {
530  return -1;
531  }
532  mask[ga.Group] = ga.Mask;
533  } else {
534  mask_t newMask, sysMask, retval;
535  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
536  DWORD error = GetLastError();
537  if (abort_on_error) {
538  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
539  KMP_ERR(error), __kmp_msg_null);
540  }
541  return error;
542  }
543  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
544  if (!retval) {
545  DWORD error = GetLastError();
546  if (abort_on_error) {
547  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
548  KMP_ERR(error), __kmp_msg_null);
549  }
550  return error;
551  }
552  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
553  if (!newMask) {
554  DWORD error = GetLastError();
555  if (abort_on_error) {
556  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
557  KMP_ERR(error), __kmp_msg_null);
558  }
559  }
560  *mask = retval;
561  }
562  return 0;
563  }
564  int get_proc_group() const override {
565  int group = -1;
566  if (__kmp_num_proc_groups == 1) {
567  return 1;
568  }
569  for (int i = 0; i < __kmp_num_proc_groups; i++) {
570  if (mask[i] == 0)
571  continue;
572  if (group >= 0)
573  return -1;
574  group = i;
575  }
576  return group;
577  }
578  };
579  void determine_capable(const char *env_var) override {
580  __kmp_affinity_determine_capable(env_var);
581  }
582  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
583  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
584  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
585  KMPAffinity::Mask *allocate_mask_array(int num) override {
586  return new Mask[num];
587  }
588  void deallocate_mask_array(KMPAffinity::Mask *array) override {
589  Mask *windows_array = static_cast<Mask *>(array);
590  delete[] windows_array;
591  }
592  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
593  int index) override {
594  Mask *windows_array = static_cast<Mask *>(array);
595  return &(windows_array[index]);
596  }
597  api_type get_api_type() const override { return NATIVE_OS; }
598 };
599 #endif /* KMP_OS_WINDOWS */
600 #endif /* KMP_AFFINITY_SUPPORTED */
601 
602 class Address {
603 public:
604  static const unsigned maxDepth = 32;
605  unsigned labels[maxDepth];
606  unsigned childNums[maxDepth];
607  unsigned depth;
608  unsigned leader;
609  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
610  Address &operator=(const Address &b) {
611  depth = b.depth;
612  for (unsigned i = 0; i < depth; i++) {
613  labels[i] = b.labels[i];
614  childNums[i] = b.childNums[i];
615  }
616  leader = FALSE;
617  return *this;
618  }
619  bool operator==(const Address &b) const {
620  if (depth != b.depth)
621  return false;
622  for (unsigned i = 0; i < depth; i++)
623  if (labels[i] != b.labels[i])
624  return false;
625  return true;
626  }
627  bool isClose(const Address &b, int level) const {
628  if (depth != b.depth)
629  return false;
630  if ((unsigned)level >= depth)
631  return true;
632  for (unsigned i = 0; i < (depth - level); i++)
633  if (labels[i] != b.labels[i])
634  return false;
635  return true;
636  }
637  bool operator!=(const Address &b) const { return !operator==(b); }
638  void print() const {
639  unsigned i;
640  printf("Depth: %u --- ", depth);
641  for (i = 0; i < depth; i++) {
642  printf("%u ", labels[i]);
643  }
644  }
645 };
646 
647 class AddrUnsPair {
648 public:
649  Address first;
650  unsigned second;
651  AddrUnsPair(Address _first, unsigned _second)
652  : first(_first), second(_second) {}
653  AddrUnsPair &operator=(const AddrUnsPair &b) {
654  first = b.first;
655  second = b.second;
656  return *this;
657  }
658  void print() const {
659  printf("first = ");
660  first.print();
661  printf(" --- second = %u", second);
662  }
663  bool operator==(const AddrUnsPair &b) const {
664  if (first != b.first)
665  return false;
666  if (second != b.second)
667  return false;
668  return true;
669  }
670  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
671 };
672 
673 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
674  const Address *aa = &(((const AddrUnsPair *)a)->first);
675  const Address *bb = &(((const AddrUnsPair *)b)->first);
676  unsigned depth = aa->depth;
677  unsigned i;
678  KMP_DEBUG_ASSERT(depth == bb->depth);
679  for (i = 0; i < depth; i++) {
680  if (aa->labels[i] < bb->labels[i])
681  return -1;
682  if (aa->labels[i] > bb->labels[i])
683  return 1;
684  }
685  return 0;
686 }
687 
688 /* A structure for holding machine-specific hierarchy info to be computed once
689  at init. This structure represents a mapping of threads to the actual machine
690  hierarchy, or to our best guess at what the hierarchy might be, for the
691  purpose of performing an efficient barrier. In the worst case, when there is
692  no machine hierarchy information, it produces a tree suitable for a barrier,
693  similar to the tree used in the hyper barrier. */
694 class hierarchy_info {
695 public:
696  /* Good default values for number of leaves and branching factor, given no
697  affinity information. Behaves a bit like hyper barrier. */
698  static const kmp_uint32 maxLeaves = 4;
699  static const kmp_uint32 minBranch = 4;
705  kmp_uint32 maxLevels;
706 
711  kmp_uint32 depth;
712  kmp_uint32 base_num_threads;
713  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
714  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
715  // 2=initialization in progress
716  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
717 
722  kmp_uint32 *numPerLevel;
723  kmp_uint32 *skipPerLevel;
724 
725  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
726  int hier_depth = adr2os[0].first.depth;
727  int level = 0;
728  for (int i = hier_depth - 1; i >= 0; --i) {
729  int max = -1;
730  for (int j = 0; j < num_addrs; ++j) {
731  int next = adr2os[j].first.childNums[i];
732  if (next > max)
733  max = next;
734  }
735  numPerLevel[level] = max + 1;
736  ++level;
737  }
738  }
739 
740  hierarchy_info()
741  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
742 
743  void fini() {
744  if (!uninitialized && numPerLevel) {
745  __kmp_free(numPerLevel);
746  numPerLevel = NULL;
747  uninitialized = not_initialized;
748  }
749  }
750 
751  void init(AddrUnsPair *adr2os, int num_addrs) {
752  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
753  &uninitialized, not_initialized, initializing);
754  if (bool_result == 0) { // Wait for initialization
755  while (TCR_1(uninitialized) != initialized)
756  KMP_CPU_PAUSE();
757  return;
758  }
759  KMP_DEBUG_ASSERT(bool_result == 1);
760 
761  /* Added explicit initialization of the data fields here to prevent usage of
762  dirty value observed when static library is re-initialized multiple times
763  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
764  OpenMP). */
765  depth = 1;
766  resizing = 0;
767  maxLevels = 7;
768  numPerLevel =
769  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
770  skipPerLevel = &(numPerLevel[maxLevels]);
771  for (kmp_uint32 i = 0; i < maxLevels;
772  ++i) { // init numPerLevel[*] to 1 item per level
773  numPerLevel[i] = 1;
774  skipPerLevel[i] = 1;
775  }
776 
777  // Sort table by physical ID
778  if (adr2os) {
779  qsort(adr2os, num_addrs, sizeof(*adr2os),
780  __kmp_affinity_cmp_Address_labels);
781  deriveLevels(adr2os, num_addrs);
782  } else {
783  numPerLevel[0] = maxLeaves;
784  numPerLevel[1] = num_addrs / maxLeaves;
785  if (num_addrs % maxLeaves)
786  numPerLevel[1]++;
787  }
788 
789  base_num_threads = num_addrs;
790  for (int i = maxLevels - 1; i >= 0;
791  --i) // count non-empty levels to get depth
792  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
793  depth++;
794 
795  kmp_uint32 branch = minBranch;
796  if (numPerLevel[0] == 1)
797  branch = num_addrs / maxLeaves;
798  if (branch < minBranch)
799  branch = minBranch;
800  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
801  while (numPerLevel[d] > branch ||
802  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
803  if (numPerLevel[d] & 1)
804  numPerLevel[d]++;
805  numPerLevel[d] = numPerLevel[d] >> 1;
806  if (numPerLevel[d + 1] == 1)
807  depth++;
808  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
809  }
810  if (numPerLevel[0] == 1) {
811  branch = branch >> 1;
812  if (branch < 4)
813  branch = minBranch;
814  }
815  }
816 
817  for (kmp_uint32 i = 1; i < depth; ++i)
818  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
819  // Fill in hierarchy in the case of oversubscription
820  for (kmp_uint32 i = depth; i < maxLevels; ++i)
821  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
822 
823  uninitialized = initialized; // One writer
824  }
825 
826  // Resize the hierarchy if nproc changes to something larger than before
827  void resize(kmp_uint32 nproc) {
828  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
829  while (bool_result == 0) { // someone else is trying to resize
830  KMP_CPU_PAUSE();
831  if (nproc <= base_num_threads) // happy with other thread's resize
832  return;
833  else // try to resize
834  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
835  }
836  KMP_DEBUG_ASSERT(bool_result != 0);
837  if (nproc <= base_num_threads)
838  return; // happy with other thread's resize
839 
840  // Calculate new maxLevels
841  kmp_uint32 old_sz = skipPerLevel[depth - 1];
842  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
843  // First see if old maxLevels is enough to contain new size
844  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
845  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
846  numPerLevel[i - 1] *= 2;
847  old_sz *= 2;
848  depth++;
849  }
850  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
851  while (nproc > old_sz) {
852  old_sz *= 2;
853  incs++;
854  depth++;
855  }
856  maxLevels += incs;
857 
858  // Resize arrays
859  kmp_uint32 *old_numPerLevel = numPerLevel;
860  kmp_uint32 *old_skipPerLevel = skipPerLevel;
861  numPerLevel = skipPerLevel = NULL;
862  numPerLevel =
863  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
864  skipPerLevel = &(numPerLevel[maxLevels]);
865 
866  // Copy old elements from old arrays
867  for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
868  // init numPerLevel[*] to 1 item per level
869  numPerLevel[i] = old_numPerLevel[i];
870  skipPerLevel[i] = old_skipPerLevel[i];
871  }
872 
873  // Init new elements in arrays to 1
874  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
875  // init numPerLevel[*] to 1 item per level
876  numPerLevel[i] = 1;
877  skipPerLevel[i] = 1;
878  }
879 
880  // Free old arrays
881  __kmp_free(old_numPerLevel);
882  }
883 
884  // Fill in oversubscription levels of hierarchy
885  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
886  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
887 
888  base_num_threads = nproc;
889  resizing = 0; // One writer
890  }
891 };
892 #endif // KMP_AFFINITY_H