libstdc++
barrier
Go to the documentation of this file.
1 // <barrier> -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // You should have received a copy of the GNU General Public License along
17 // with this library; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
19 
20 // This implementation is based on libcxx/include/barrier
21 //===-- barrier.h --------------------------------------------------===//
22 //
23 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
24 // See https://llvm.org/LICENSE.txt for license information.
25 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
26 //
27 //===---------------------------------------------------------------===//
28 
29 /** @file include/barrier
30  * This is a Standard C++ Library header.
31  */
32 
33 #ifndef _GLIBCXX_BARRIER
34 #define _GLIBCXX_BARRIER 1
35 
36 #pragma GCC system_header
37 
38 #if __cplusplus > 201703L
39 #include <bits/atomic_base.h>
40 #if __cpp_lib_atomic_wait && __cpp_aligned_new
41 #include <bits/std_thread.h>
42 #include <bits/unique_ptr.h>
43 
44 #include <array>
45 
46 #define __cpp_lib_barrier 201907L
47 
48 namespace std _GLIBCXX_VISIBILITY(default)
49 {
50 _GLIBCXX_BEGIN_NAMESPACE_VERSION
51 
52  struct __empty_completion
53  {
54  _GLIBCXX_ALWAYS_INLINE void
55  operator()() noexcept
56  { }
57  };
58 
59 /*
60 
61 The default implementation of __tree_barrier is a classic tree barrier.
62 
63 It looks different from literature pseudocode for two main reasons:
64  1. Threads that call into std::barrier functions do not provide indices,
65  so a numbering step is added before the actual barrier algorithm,
66  appearing as an N+1 round to the N rounds of the tree barrier.
67  2. A great deal of attention has been paid to avoid cache line thrashing
68  by flattening the tree structure into cache-line sized arrays, that
69  are indexed in an efficient way.
70 
71 */
72 
73  enum class __barrier_phase_t : unsigned char { };
74 
75  template<typename _CompletionF>
76  class __tree_barrier
77  {
78  using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
79  using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
80  static constexpr auto __phase_alignment =
81  __atomic_phase_ref_t::required_alignment;
82 
83  using __tickets_t = std::array<__barrier_phase_t, 64>;
84  struct alignas(64) /* naturally-align the heap state */ __state_t
85  {
86  alignas(__phase_alignment) __tickets_t __tickets;
87  };
88 
89  ptrdiff_t _M_expected;
90  unique_ptr<__state_t[]> _M_state;
91  __atomic_base<ptrdiff_t> _M_expected_adjustment;
92  _CompletionF _M_completion;
93 
94  alignas(__phase_alignment) __barrier_phase_t _M_phase;
95 
96  bool
97  _M_arrive(__barrier_phase_t __old_phase, size_t __current)
98  {
99  const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
100  const auto __half_step =
101  static_cast<__barrier_phase_t>(__old_phase_val + 1);
102  const auto __full_step =
103  static_cast<__barrier_phase_t>(__old_phase_val + 2);
104 
105  size_t __current_expected = _M_expected;
107  __current %= ((_M_expected + 1) >> 1);
108 
109  for (int __round = 0; ; ++__round)
110  {
111  if (__current_expected <= 1)
112  return true;
113  size_t const __end_node = ((__current_expected + 1) >> 1),
114  __last_node = __end_node - 1;
115  for ( ; ; ++__current)
116  {
117  if (__current == __end_node)
118  __current = 0;
119  auto __expect = __old_phase;
120  __atomic_phase_ref_t __phase(_M_state[__current]
121  .__tickets[__round]);
122  if (__current == __last_node && (__current_expected & 1))
123  {
124  if (__phase.compare_exchange_strong(__expect, __full_step,
125  memory_order_acq_rel))
126  break; // I'm 1 in 1, go to next __round
127  }
128  else if (__phase.compare_exchange_strong(__expect, __half_step,
129  memory_order_acq_rel))
130  {
131  return false; // I'm 1 in 2, done with arrival
132  }
133  else if (__expect == __half_step)
134  {
135  if (__phase.compare_exchange_strong(__expect, __full_step,
136  memory_order_acq_rel))
137  break; // I'm 2 in 2, go to next __round
138  }
139  }
140  __current_expected = __last_node + 1;
141  __current >>= 1;
142  }
143  }
144 
145  public:
146  using arrival_token = __barrier_phase_t;
147 
148  static constexpr ptrdiff_t
149  max() noexcept
150  { return __PTRDIFF_MAX__; }
151 
152  __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
153  : _M_expected(__expected), _M_expected_adjustment(0),
154  _M_completion(move(__completion)),
155  _M_phase(static_cast<__barrier_phase_t>(0))
156  {
157  size_t const __count = (_M_expected + 1) >> 1;
158 
159  _M_state = std::make_unique<__state_t[]>(__count);
160  }
161 
162  [[nodiscard]] arrival_token
163  arrive(ptrdiff_t __update)
164  {
166  size_t __current = __hasher(std::this_thread::get_id());
167  __atomic_phase_ref_t __phase(_M_phase);
168  const auto __old_phase = __phase.load(memory_order_relaxed);
169  const auto __cur = static_cast<unsigned char>(__old_phase);
170  for(; __update; --__update)
171  {
172  if(_M_arrive(__old_phase, __current))
173  {
174  _M_completion();
175  _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
176  _M_expected_adjustment.store(0, memory_order_relaxed);
177  auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
178  __phase.store(__new_phase, memory_order_release);
179  __phase.notify_all();
180  }
181  }
182  return __old_phase;
183  }
184 
185  void
186  wait(arrival_token&& __old_phase) const
187  {
188  __atomic_phase_const_ref_t __phase(_M_phase);
189  auto const __test_fn = [=]
190  {
191  return __phase.load(memory_order_acquire) != __old_phase;
192  };
193  std::__atomic_wait_address(&_M_phase, __test_fn);
194  }
195 
196  void
197  arrive_and_drop()
198  {
199  _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
200  (void)arrive(1);
201  }
202  };
203 
204  template<typename _CompletionF = __empty_completion>
205  class barrier
206  {
207  // Note, we may introduce a "central" barrier algorithm at some point
208  // for more space constrained targets
209  using __algorithm_t = __tree_barrier<_CompletionF>;
210  __algorithm_t _M_b;
211 
212  public:
213  class arrival_token final
214  {
215  public:
216  arrival_token(arrival_token&&) = default;
217  arrival_token& operator=(arrival_token&&) = default;
218  ~arrival_token() = default;
219 
220  private:
221  friend class barrier;
222  using __token = typename __algorithm_t::arrival_token;
223  explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
224  __token _M_tok;
225  };
226 
227  static constexpr ptrdiff_t
228  max() noexcept
229  { return __algorithm_t::max(); }
230 
231  explicit
232  barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
233  : _M_b(__count, std::move(__completion))
234  { }
235 
236  barrier(barrier const&) = delete;
237  barrier& operator=(barrier const&) = delete;
238 
239  [[nodiscard]] arrival_token
240  arrive(ptrdiff_t __update = 1)
241  { return arrival_token{_M_b.arrive(__update)}; }
242 
243  void
244  wait(arrival_token&& __phase) const
245  { _M_b.wait(std::move(__phase._M_tok)); }
246 
247  void
248  arrive_and_wait()
249  { wait(arrive()); }
250 
251  void
252  arrive_and_drop()
253  { _M_b.arrive_and_drop(); }
254  };
255 
256 _GLIBCXX_END_NAMESPACE_VERSION
257 } // namespace
258 #endif // __cpp_lib_atomic_wait && __cpp_aligned_new
259 #endif // __cplusplus > 201703L
260 #endif // _GLIBCXX_BARRIER
auto_ptr & operator=(auto_ptr &__a)
auto_ptr assignment operator.
Definition: auto_ptr.h:47
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:104
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:254
ISO C++ entities toplevel namespace is std.
thread::id get_id() noexcept
this_thread::get_id
Definition: std_thread.h:307
A standard container for storing a fixed size sequence of elements.
Definition: array:96
Primary class template hash.