barrier 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. // <barrier> -*- C++ -*-
  2. // Copyright (C) 2020-2022 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // You should have received a copy of the GNU General Public License along
  14. // with this library; see the file COPYING3. If not see
  15. // <http://www.gnu.org/licenses/>.
  16. // This implementation is based on libcxx/include/barrier
  17. //===-- barrier.h --------------------------------------------------===//
  18. //
  19. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  20. // See https://llvm.org/LICENSE.txt for license information.
  21. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  22. //
  23. //===---------------------------------------------------------------===//
  24. /** @file include/barrier
  25. * This is a Standard C++ Library header.
  26. */
  27. #ifndef _GLIBCXX_BARRIER
  28. #define _GLIBCXX_BARRIER 1
  29. #pragma GCC system_header
  30. #if __cplusplus > 201703L
  31. #include <bits/atomic_base.h>
  32. #if __cpp_lib_atomic_wait && __cpp_aligned_new
  33. #include <bits/std_thread.h>
  34. #include <bits/unique_ptr.h>
  35. #include <array>
  36. #define __cpp_lib_barrier 201907L
  37. namespace std _GLIBCXX_VISIBILITY(default)
  38. {
  39. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  40. struct __empty_completion
  41. {
  42. _GLIBCXX_ALWAYS_INLINE void
  43. operator()() noexcept
  44. { }
  45. };
  46. /*
  47. The default implementation of __tree_barrier is a classic tree barrier.
  48. It looks different from literature pseudocode for two main reasons:
  49. 1. Threads that call into std::barrier functions do not provide indices,
  50. so a numbering step is added before the actual barrier algorithm,
  51. appearing as an N+1 round to the N rounds of the tree barrier.
  52. 2. A great deal of attention has been paid to avoid cache line thrashing
  53. by flattening the tree structure into cache-line sized arrays, that
  54. are indexed in an efficient way.
  55. */
  56. enum class __barrier_phase_t : unsigned char { };
  57. template<typename _CompletionF>
  58. class __tree_barrier
  59. {
  60. using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
  61. using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
  62. static constexpr auto __phase_alignment =
  63. __atomic_phase_ref_t::required_alignment;
  64. using __tickets_t = std::array<__barrier_phase_t, 64>;
  65. struct alignas(64) /* naturally-align the heap state */ __state_t
  66. {
  67. alignas(__phase_alignment) __tickets_t __tickets;
  68. };
  69. ptrdiff_t _M_expected;
  70. unique_ptr<__state_t[]> _M_state;
  71. __atomic_base<ptrdiff_t> _M_expected_adjustment;
  72. _CompletionF _M_completion;
  73. alignas(__phase_alignment) __barrier_phase_t _M_phase;
  74. bool
  75. _M_arrive(__barrier_phase_t __old_phase, size_t __current)
  76. {
  77. const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
  78. const auto __half_step =
  79. static_cast<__barrier_phase_t>(__old_phase_val + 1);
  80. const auto __full_step =
  81. static_cast<__barrier_phase_t>(__old_phase_val + 2);
  82. size_t __current_expected = _M_expected;
  83. __current %= ((_M_expected + 1) >> 1);
  84. for (int __round = 0; ; ++__round)
  85. {
  86. if (__current_expected <= 1)
  87. return true;
  88. size_t const __end_node = ((__current_expected + 1) >> 1),
  89. __last_node = __end_node - 1;
  90. for ( ; ; ++__current)
  91. {
  92. if (__current == __end_node)
  93. __current = 0;
  94. auto __expect = __old_phase;
  95. __atomic_phase_ref_t __phase(_M_state[__current]
  96. .__tickets[__round]);
  97. if (__current == __last_node && (__current_expected & 1))
  98. {
  99. if (__phase.compare_exchange_strong(__expect, __full_step,
  100. memory_order_acq_rel))
  101. break; // I'm 1 in 1, go to next __round
  102. }
  103. else if (__phase.compare_exchange_strong(__expect, __half_step,
  104. memory_order_acq_rel))
  105. {
  106. return false; // I'm 1 in 2, done with arrival
  107. }
  108. else if (__expect == __half_step)
  109. {
  110. if (__phase.compare_exchange_strong(__expect, __full_step,
  111. memory_order_acq_rel))
  112. break; // I'm 2 in 2, go to next __round
  113. }
  114. }
  115. __current_expected = __last_node + 1;
  116. __current >>= 1;
  117. }
  118. }
  119. public:
  120. using arrival_token = __barrier_phase_t;
  121. static constexpr ptrdiff_t
  122. max() noexcept
  123. { return __PTRDIFF_MAX__; }
  124. __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
  125. : _M_expected(__expected), _M_expected_adjustment(0),
  126. _M_completion(move(__completion)),
  127. _M_phase(static_cast<__barrier_phase_t>(0))
  128. {
  129. size_t const __count = (_M_expected + 1) >> 1;
  130. _M_state = std::make_unique<__state_t[]>(__count);
  131. }
  132. [[nodiscard]] arrival_token
  133. arrive(ptrdiff_t __update)
  134. {
  135. std::hash<std::thread::id> __hasher;
  136. size_t __current = __hasher(std::this_thread::get_id());
  137. __atomic_phase_ref_t __phase(_M_phase);
  138. const auto __old_phase = __phase.load(memory_order_relaxed);
  139. const auto __cur = static_cast<unsigned char>(__old_phase);
  140. for(; __update; --__update)
  141. {
  142. if(_M_arrive(__old_phase, __current))
  143. {
  144. _M_completion();
  145. _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
  146. _M_expected_adjustment.store(0, memory_order_relaxed);
  147. auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
  148. __phase.store(__new_phase, memory_order_release);
  149. __phase.notify_all();
  150. }
  151. }
  152. return __old_phase;
  153. }
  154. void
  155. wait(arrival_token&& __old_phase) const
  156. {
  157. __atomic_phase_const_ref_t __phase(_M_phase);
  158. auto const __test_fn = [=]
  159. {
  160. return __phase.load(memory_order_acquire) != __old_phase;
  161. };
  162. std::__atomic_wait_address(&_M_phase, __test_fn);
  163. }
  164. void
  165. arrive_and_drop()
  166. {
  167. _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
  168. (void)arrive(1);
  169. }
  170. };
  171. template<typename _CompletionF = __empty_completion>
  172. class barrier
  173. {
  174. // Note, we may introduce a "central" barrier algorithm at some point
  175. // for more space constrained targets
  176. using __algorithm_t = __tree_barrier<_CompletionF>;
  177. __algorithm_t _M_b;
  178. public:
  179. class arrival_token final
  180. {
  181. public:
  182. arrival_token(arrival_token&&) = default;
  183. arrival_token& operator=(arrival_token&&) = default;
  184. ~arrival_token() = default;
  185. private:
  186. friend class barrier;
  187. using __token = typename __algorithm_t::arrival_token;
  188. explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
  189. __token _M_tok;
  190. };
  191. static constexpr ptrdiff_t
  192. max() noexcept
  193. { return __algorithm_t::max(); }
  194. explicit
  195. barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
  196. : _M_b(__count, std::move(__completion))
  197. { }
  198. barrier(barrier const&) = delete;
  199. barrier& operator=(barrier const&) = delete;
  200. [[nodiscard]] arrival_token
  201. arrive(ptrdiff_t __update = 1)
  202. { return arrival_token{_M_b.arrive(__update)}; }
  203. void
  204. wait(arrival_token&& __phase) const
  205. { _M_b.wait(std::move(__phase._M_tok)); }
  206. void
  207. arrive_and_wait()
  208. { wait(arrive()); }
  209. void
  210. arrive_and_drop()
  211. { _M_b.arrive_and_drop(); }
  212. };
  213. _GLIBCXX_END_NAMESPACE_VERSION
  214. } // namespace
  215. #endif // __cpp_lib_atomic_wait && __cpp_aligned_new
  216. #endif // __cplusplus > 201703L
  217. #endif // _GLIBCXX_BARRIER