libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <functional>
39 #include <iosfwd>
40 #include <utility>
41 
42 #if _GLIBCXX_SIMD_X86INTRIN
43 #include <x86intrin.h>
44 #elif _GLIBCXX_SIMD_HAVE_NEON
45 #include <arm_neon.h>
46 #endif
47 
48 /* There are several closely related types, with the following naming
49  * convention:
50  * _Tp: vectorizable (arithmetic) type (or any type)
51  * _TV: __vector_type_t<_Tp, _Np>
52  * _TW: _SimdWrapper<_Tp, _Np>
53  * _TI: __intrinsic_type_t<_Tp, _Np>
54  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
55  * If one additional type is needed use _U instead of _T.
56  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
57  *
58  * More naming conventions:
59  * _Ap or _Abi: An ABI tag from the simd_abi namespace
60  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
61  * _IV, _IW as for _TV, _TW
62  * _Np: number of elements (not bytes)
63  * _Bytes: number of bytes
64  *
65  * Variable names:
66  * __k: mask object (vector- or bitmask)
67  */
68 _GLIBCXX_SIMD_BEGIN_NAMESPACE
69 
70 #if !_GLIBCXX_SIMD_X86INTRIN
71 using __m128 [[__gnu__::__vector_size__(16)]] = float;
72 using __m128d [[__gnu__::__vector_size__(16)]] = double;
73 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
74 using __m256 [[__gnu__::__vector_size__(32)]] = float;
75 using __m256d [[__gnu__::__vector_size__(32)]] = double;
76 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
77 using __m512 [[__gnu__::__vector_size__(64)]] = float;
78 using __m512d [[__gnu__::__vector_size__(64)]] = double;
79 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
80 #endif
81 
82 namespace simd_abi {
83 // simd_abi forward declarations {{{
84 // implementation details:
85 struct _Scalar;
86 
87 template <int _Np>
88  struct _Fixed;
89 
90 // There are two major ABIs that appear on different architectures.
91 // Both have non-boolean values packed into an N Byte register
92 // -> #elements = N / sizeof(T)
93 // Masks differ:
94 // 1. Use value vector registers for masks (all 0 or all 1)
95 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
96 // value vector
97 //
98 // Both can be partially used, masking off the rest when doing horizontal
99 // operations or operations that can trap (e.g. FP_INVALID or integer division
100 // by 0). This is encoded as the number of used bytes.
101 template <int _UsedBytes>
102  struct _VecBuiltin;
103 
104 template <int _UsedBytes>
105  struct _VecBltnBtmsk;
106 
107 template <typename _Tp, int _Np>
108  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
109 
110 template <int _UsedBytes = 16>
111  using _Sse = _VecBuiltin<_UsedBytes>;
112 
113 template <int _UsedBytes = 32>
114  using _Avx = _VecBuiltin<_UsedBytes>;
115 
116 template <int _UsedBytes = 64>
117  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
118 
119 template <int _UsedBytes = 16>
120  using _Neon = _VecBuiltin<_UsedBytes>;
121 
122 // implementation-defined:
123 using __sse = _Sse<>;
124 using __avx = _Avx<>;
125 using __avx512 = _Avx512<>;
126 using __neon = _Neon<>;
127 using __neon128 = _Neon<16>;
128 using __neon64 = _Neon<8>;
129 
130 // standard:
131 template <typename _Tp, size_t _Np, typename...>
132  struct deduce;
133 
134 template <int _Np>
135  using fixed_size = _Fixed<_Np>;
136 
137 using scalar = _Scalar;
138 
139 // }}}
140 } // namespace simd_abi
141 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
142 template <typename _Tp>
143  struct is_simd;
144 
145 template <typename _Tp>
146  struct is_simd_mask;
147 
148 template <typename _Tp, typename _Abi>
149  class simd;
150 
151 template <typename _Tp, typename _Abi>
152  class simd_mask;
153 
154 template <typename _Tp, typename _Abi>
155  struct simd_size;
156 
157 // }}}
158 // load/store flags {{{
159 struct element_aligned_tag
160 {
161  template <typename _Tp, typename _Up = typename _Tp::value_type>
162  static constexpr size_t _S_alignment = alignof(_Up);
163 
164  template <typename _Tp, typename _Up>
165  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
166  _S_apply(_Up* __ptr)
167  { return __ptr; }
168 };
169 
170 struct vector_aligned_tag
171 {
172  template <typename _Tp, typename _Up = typename _Tp::value_type>
173  static constexpr size_t _S_alignment
174  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
175 
176  template <typename _Tp, typename _Up>
177  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
178  _S_apply(_Up* __ptr)
179  {
180  return static_cast<_Up*>(
181  __builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
182  }
183 };
184 
185 template <size_t _Np> struct overaligned_tag
186 {
187  template <typename _Tp, typename _Up = typename _Tp::value_type>
188  static constexpr size_t _S_alignment = _Np;
189 
190  template <typename _Tp, typename _Up>
191  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
192  _S_apply(_Up* __ptr)
193  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
194 };
195 
196 inline constexpr element_aligned_tag element_aligned = {};
197 
198 inline constexpr vector_aligned_tag vector_aligned = {};
199 
200 template <size_t _Np>
201  inline constexpr overaligned_tag<_Np> overaligned = {};
202 
203 // }}}
204 template <size_t _X>
205  using _SizeConstant = integral_constant<size_t, _X>;
206 
207 // unrolled/pack execution helpers
208 // __execute_n_times{{{
209 template <typename _Fp, size_t... _I>
210  _GLIBCXX_SIMD_INTRINSIC constexpr void
211  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
212  { ((void)__f(_SizeConstant<_I>()), ...); }
213 
214 template <typename _Fp>
215  _GLIBCXX_SIMD_INTRINSIC constexpr void
216  __execute_on_index_sequence(_Fp&&, index_sequence<>)
217  { }
218 
219 template <size_t _Np, typename _Fp>
220  _GLIBCXX_SIMD_INTRINSIC constexpr void
221  __execute_n_times(_Fp&& __f)
222  {
223  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
224  make_index_sequence<_Np>{});
225  }
226 
227 // }}}
228 // __generate_from_n_evaluations{{{
229 template <typename _R, typename _Fp, size_t... _I>
230  _GLIBCXX_SIMD_INTRINSIC constexpr _R
231  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
232  { return _R{__f(_SizeConstant<_I>())...}; }
233 
234 template <size_t _Np, typename _R, typename _Fp>
235  _GLIBCXX_SIMD_INTRINSIC constexpr _R
236  __generate_from_n_evaluations(_Fp&& __f)
237  {
238  return __execute_on_index_sequence_with_return<_R>(
239  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
240  }
241 
242 // }}}
243 // __call_with_n_evaluations{{{
244 template <size_t... _I, typename _F0, typename _FArgs>
245  _GLIBCXX_SIMD_INTRINSIC constexpr auto
246  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
247  { return __f0(__fargs(_SizeConstant<_I>())...); }
248 
249 template <size_t _Np, typename _F0, typename _FArgs>
250  _GLIBCXX_SIMD_INTRINSIC constexpr auto
251  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
252  {
253  return __call_with_n_evaluations(make_index_sequence<_Np>{},
254  static_cast<_F0&&>(__f0),
255  static_cast<_FArgs&&>(__fargs));
256  }
257 
258 // }}}
259 // __call_with_subscripts{{{
260 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
261  _GLIBCXX_SIMD_INTRINSIC constexpr auto
262  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
263  { return __fun(__x[_First + _It]...); }
264 
265 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
266  _GLIBCXX_SIMD_INTRINSIC constexpr auto
267  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
268  {
269  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
270  make_index_sequence<_Np>(),
271  static_cast<_Fp&&>(__fun));
272  }
273 
274 // }}}
275 
276 // vvv ---- type traits ---- vvv
277 // integer type aliases{{{
278 using _UChar = unsigned char;
279 using _SChar = signed char;
280 using _UShort = unsigned short;
281 using _UInt = unsigned int;
282 using _ULong = unsigned long;
283 using _ULLong = unsigned long long;
284 using _LLong = long long;
285 
286 //}}}
287 // __first_of_pack{{{
288 template <typename _T0, typename...>
289  struct __first_of_pack
290  { using type = _T0; };
291 
292 template <typename... _Ts>
293  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
294 
295 //}}}
296 // __value_type_or_identity_t {{{
297 template <typename _Tp>
298  typename _Tp::value_type
299  __value_type_or_identity_impl(int);
300 
301 template <typename _Tp>
302  _Tp
303  __value_type_or_identity_impl(float);
304 
305 template <typename _Tp>
306  using __value_type_or_identity_t
307  = decltype(__value_type_or_identity_impl<_Tp>(int()));
308 
309 // }}}
310 // __is_vectorizable {{{
311 template <typename _Tp>
312  struct __is_vectorizable : public is_arithmetic<_Tp> {};
313 
314 template <>
315  struct __is_vectorizable<bool> : public false_type {};
316 
317 template <typename _Tp>
318  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
319 
320 // Deduces to a vectorizable type
321 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
322  using _Vectorizable = _Tp;
323 
324 // }}}
325 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
326 template <typename _Ptr, typename _ValueType>
327  struct __is_possible_loadstore_conversion
328  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
329 
330 template <>
331  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
332 
333 // Deduces to a type allowed for load/store with the given value type.
334 template <typename _Ptr, typename _ValueType,
335  typename = enable_if_t<
336  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
337  using _LoadStorePtr = _Ptr;
338 
339 // }}}
340 // __is_bitmask{{{
341 template <typename _Tp, typename = void_t<>>
342  struct __is_bitmask : false_type {};
343 
344 template <typename _Tp>
345  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
346 
347 // the __mmaskXX case:
348 template <typename _Tp>
349  struct __is_bitmask<_Tp,
350  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
351  : true_type {};
352 
353 // }}}
354 // __int_for_sizeof{{{
355 #pragma GCC diagnostic push
356 #pragma GCC diagnostic ignored "-Wpedantic"
357 template <size_t _Bytes>
358  constexpr auto
359  __int_for_sizeof()
360  {
361  if constexpr (_Bytes == sizeof(int))
362  return int();
363  #ifdef __clang__
364  else if constexpr (_Bytes == sizeof(char))
365  return char();
366  #else
367  else if constexpr (_Bytes == sizeof(_SChar))
368  return _SChar();
369  #endif
370  else if constexpr (_Bytes == sizeof(short))
371  return short();
372  #ifndef __clang__
373  else if constexpr (_Bytes == sizeof(long))
374  return long();
375  #endif
376  else if constexpr (_Bytes == sizeof(_LLong))
377  return _LLong();
378  #ifdef __SIZEOF_INT128__
379  else if constexpr (_Bytes == sizeof(__int128))
380  return __int128();
381  #endif // __SIZEOF_INT128__
382  else if constexpr (_Bytes % sizeof(int) == 0)
383  {
384  constexpr size_t _Np = _Bytes / sizeof(int);
385  struct _Ip
386  {
387  int _M_data[_Np];
388 
389  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
390  operator&(_Ip __rhs) const
391  {
392  return __generate_from_n_evaluations<_Np, _Ip>(
393  [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
394  }
395 
396  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
397  operator|(_Ip __rhs) const
398  {
399  return __generate_from_n_evaluations<_Np, _Ip>(
400  [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
401  }
402 
403  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
404  operator^(_Ip __rhs) const
405  {
406  return __generate_from_n_evaluations<_Np, _Ip>(
407  [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
408  }
409 
410  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
411  operator~() const
412  {
413  return __generate_from_n_evaluations<_Np, _Ip>(
414  [&](auto __i) { return ~_M_data[__i]; });
415  }
416  };
417  return _Ip{};
418  }
419  else
420  static_assert(_Bytes != _Bytes, "this should be unreachable");
421  }
422 #pragma GCC diagnostic pop
423 
424 template <typename _Tp>
425  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
426 
427 template <size_t _Np>
428  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
429 
430 // }}}
431 // __is_fixed_size_abi{{{
432 template <typename _Tp>
433  struct __is_fixed_size_abi : false_type {};
434 
435 template <int _Np>
436  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
437 
438 template <typename _Tp>
439  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
440 
441 // }}}
442 // constexpr feature detection{{{
443 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
444 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
445 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
446 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
447 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
448 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
449 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
450 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
451 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
452 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
453 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
454 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
455 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
456 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
457 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
458 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
459 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
460 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
461 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
462 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
463 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
464 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
465 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
466 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
467 
468 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
469 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
470 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
471 constexpr inline bool __support_neon_float =
472 #if defined __GCC_IEC_559
473  __GCC_IEC_559 == 0;
474 #elif defined __FAST_MATH__
475  true;
476 #else
477  false;
478 #endif
479 
480 #ifdef __POWER9_VECTOR__
481 constexpr inline bool __have_power9vec = true;
482 #else
483 constexpr inline bool __have_power9vec = false;
484 #endif
485 #if defined __POWER8_VECTOR__
486 constexpr inline bool __have_power8vec = true;
487 #else
488 constexpr inline bool __have_power8vec = __have_power9vec;
489 #endif
490 #if defined __VSX__
491 constexpr inline bool __have_power_vsx = true;
492 #else
493 constexpr inline bool __have_power_vsx = __have_power8vec;
494 #endif
495 #if defined __ALTIVEC__
496 constexpr inline bool __have_power_vmx = true;
497 #else
498 constexpr inline bool __have_power_vmx = __have_power_vsx;
499 #endif
500 
501 // }}}
502 // __is_scalar_abi {{{
503 template <typename _Abi>
504  constexpr bool
505  __is_scalar_abi()
506  { return is_same_v<simd_abi::scalar, _Abi>; }
507 
508 // }}}
509 // __abi_bytes_v {{{
510 template <template <int> class _Abi, int _Bytes>
511  constexpr int
512  __abi_bytes_impl(_Abi<_Bytes>*)
513  { return _Bytes; }
514 
515 template <typename _Tp>
516  constexpr int
517  __abi_bytes_impl(_Tp*)
518  { return -1; }
519 
520 template <typename _Abi>
521  inline constexpr int __abi_bytes_v
522  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
523 
524 // }}}
525 // __is_builtin_bitmask_abi {{{
526 template <typename _Abi>
527  constexpr bool
528  __is_builtin_bitmask_abi()
529  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
530 
531 // }}}
532 // __is_sse_abi {{{
533 template <typename _Abi>
534  constexpr bool
535  __is_sse_abi()
536  {
537  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
538  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
539  }
540 
541 // }}}
542 // __is_avx_abi {{{
543 template <typename _Abi>
544  constexpr bool
545  __is_avx_abi()
546  {
547  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
548  return _Bytes > 16 && _Bytes <= 32
549  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
550  }
551 
552 // }}}
553 // __is_avx512_abi {{{
554 template <typename _Abi>
555  constexpr bool
556  __is_avx512_abi()
557  {
558  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
559  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
560  }
561 
562 // }}}
563 // __is_neon_abi {{{
564 template <typename _Abi>
565  constexpr bool
566  __is_neon_abi()
567  {
568  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
569  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
570  }
571 
572 // }}}
573 // __make_dependent_t {{{
574 template <typename, typename _Up>
575  struct __make_dependent
576  { using type = _Up; };
577 
578 template <typename _Tp, typename _Up>
579  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
580 
581 // }}}
582 // ^^^ ---- type traits ---- ^^^
583 
584 // __invoke_ub{{{
585 template <typename... _Args>
586  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
587  __invoke_ub([[maybe_unused]] const char* __msg,
588  [[maybe_unused]] const _Args&... __args)
589  {
590 #ifdef _GLIBCXX_DEBUG_UB
591  __builtin_fprintf(stderr, __msg, __args...);
592  __builtin_trap();
593 #else
594  __builtin_unreachable();
595 #endif
596  }
597 
598 // }}}
599 // __assert_unreachable{{{
600 template <typename _Tp>
601  struct __assert_unreachable
602  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
603 
604 // }}}
605 // __size_or_zero_v {{{
606 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
607  constexpr size_t
608  __size_or_zero_dispatch(int)
609  { return _Np; }
610 
611 template <typename _Tp, typename _Ap>
612  constexpr size_t
613  __size_or_zero_dispatch(float)
614  { return 0; }
615 
616 template <typename _Tp, typename _Ap>
617  inline constexpr size_t __size_or_zero_v
618  = __size_or_zero_dispatch<_Tp, _Ap>(0);
619 
620 // }}}
621 // __div_roundup {{{
622 inline constexpr size_t
623 __div_roundup(size_t __a, size_t __b)
624 { return (__a + __b - 1) / __b; }
625 
626 // }}}
627 // _ExactBool{{{
628 class _ExactBool
629 {
630  const bool _M_data;
631 
632 public:
633  _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}
634 
635  _ExactBool(int) = delete;
636 
637  _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
638 };
639 
640 // }}}
641 // __may_alias{{{
642 /**@internal
643  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
644  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
645  * that support it).
646  */
647 template <typename _Tp>
648  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
649 
650 // }}}
651 // _UnsupportedBase {{{
652 // simd and simd_mask base for unsupported <_Tp, _Abi>
653 struct _UnsupportedBase
654 {
655  _UnsupportedBase() = delete;
656  _UnsupportedBase(const _UnsupportedBase&) = delete;
657  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
658  ~_UnsupportedBase() = delete;
659 };
660 
661 // }}}
662 // _InvalidTraits {{{
663 /**
664  * @internal
665  * Defines the implementation of __a given <_Tp, _Abi>.
666  *
667  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
668  * possible. Static assertions in the type definition do not suffice. It is
669  * important that SFINAE works.
670  */
671 struct _InvalidTraits
672 {
673  using _IsValid = false_type;
674  using _SimdBase = _UnsupportedBase;
675  using _MaskBase = _UnsupportedBase;
676 
677  static constexpr size_t _S_full_size = 0;
678  static constexpr bool _S_is_partial = false;
679 
680  static constexpr size_t _S_simd_align = 1;
681  struct _SimdImpl;
682  struct _SimdMember {};
683  struct _SimdCastType;
684 
685  static constexpr size_t _S_mask_align = 1;
686  struct _MaskImpl;
687  struct _MaskMember {};
688  struct _MaskCastType;
689 };
690 
691 // }}}
692 // _SimdTraits {{{
693 template <typename _Tp, typename _Abi, typename = void_t<>>
694  struct _SimdTraits : _InvalidTraits {};
695 
696 // }}}
697 // __private_init, __bitset_init{{{
698 /**
699  * @internal
700  * Tag used for private init constructor of simd and simd_mask
701  */
702 inline constexpr struct _PrivateInit {} __private_init = {};
703 
704 inline constexpr struct _BitsetInit {} __bitset_init = {};
705 
706 // }}}
707 // __is_narrowing_conversion<_From, _To>{{{
708 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
709  bool = is_arithmetic_v<_To>>
710  struct __is_narrowing_conversion;
711 
712 // ignore "signed/unsigned mismatch" in the following trait.
713 // The implicit conversions will do the right thing here.
714 template <typename _From, typename _To>
715  struct __is_narrowing_conversion<_From, _To, true, true>
716  : public __bool_constant<(
717  __digits_v<_From> > __digits_v<_To>
718  || __finite_max_v<_From> > __finite_max_v<_To>
719  || __finite_min_v<_From> < __finite_min_v<_To>
720  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
721 
722 template <typename _Tp>
723  struct __is_narrowing_conversion<_Tp, bool, true, true>
724  : public true_type {};
725 
726 template <>
727  struct __is_narrowing_conversion<bool, bool, true, true>
728  : public false_type {};
729 
730 template <typename _Tp>
731  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
732  : public false_type {};
733 
734 template <typename _From, typename _To>
735  struct __is_narrowing_conversion<_From, _To, false, true>
736  : public negation<is_convertible<_From, _To>> {};
737 
738 // }}}
739 // __converts_to_higher_integer_rank{{{
740 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
741  struct __converts_to_higher_integer_rank : public true_type {};
742 
743 // this may fail for char -> short if sizeof(char) == sizeof(short)
744 template <typename _From, typename _To>
745  struct __converts_to_higher_integer_rank<_From, _To, false>
746  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
747 
748 // }}}
749 // __data(simd/simd_mask) {{{
750 template <typename _Tp, typename _Ap>
751  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
752  __data(const simd<_Tp, _Ap>& __x);
753 
754 template <typename _Tp, typename _Ap>
755  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
756  __data(simd<_Tp, _Ap>& __x);
757 
758 template <typename _Tp, typename _Ap>
759  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
760  __data(const simd_mask<_Tp, _Ap>& __x);
761 
762 template <typename _Tp, typename _Ap>
763  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
764  __data(simd_mask<_Tp, _Ap>& __x);
765 
766 // }}}
767 // _SimdConverter {{{
768 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
769  typename = void>
770  struct _SimdConverter;
771 
772 template <typename _Tp, typename _Ap>
773  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
774  {
775  template <typename _Up>
776  _GLIBCXX_SIMD_INTRINSIC const _Up&
777  operator()(const _Up& __x)
778  { return __x; }
779  };
780 
781 // }}}
782 // __to_value_type_or_member_type {{{
783 template <typename _V>
784  _GLIBCXX_SIMD_INTRINSIC constexpr auto
785  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
786  { return __data(__x); }
787 
788 template <typename _V>
789  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
790  __to_value_type_or_member_type(const typename _V::value_type& __x)
791  { return __x; }
792 
793 // }}}
794 // __bool_storage_member_type{{{
795 template <size_t _Size>
796  struct __bool_storage_member_type;
797 
798 template <size_t _Size>
799  using __bool_storage_member_type_t =
800  typename __bool_storage_member_type<_Size>::type;
801 
802 // }}}
803 // _SimdTuple {{{
804 // why not tuple?
805 // 1. tuple gives no guarantee about the storage order, but I require
806 // storage
807 // equivalent to array<_Tp, _Np>
808 // 2. direct access to the element type (first template argument)
809 // 3. enforces equal element type, only different _Abi types are allowed
810 template <typename _Tp, typename... _Abis>
811  struct _SimdTuple;
812 
813 //}}}
814 // __fixed_size_storage_t {{{
815 template <typename _Tp, int _Np>
816  struct __fixed_size_storage;
817 
818 template <typename _Tp, int _Np>
819  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
820 
821 // }}}
822 // _SimdWrapper fwd decl{{{
823 template <typename _Tp, size_t _Size, typename = void_t<>>
824  struct _SimdWrapper;
825 
826 template <typename _Tp>
827  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
828 template <typename _Tp>
829  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
830 template <typename _Tp>
831  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
832 template <typename _Tp>
833  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
834 
835 // }}}
836 // __is_simd_wrapper {{{
837 template <typename _Tp>
838  struct __is_simd_wrapper : false_type {};
839 
840 template <typename _Tp, size_t _Np>
841  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
842 
843 template <typename _Tp>
844  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
845 
846 // }}}
847 // _BitOps {{{
848 struct _BitOps
849 {
850  // _S_bit_iteration {{{
851  template <typename _Tp, typename _Fp>
852  static void
853  _S_bit_iteration(_Tp __mask, _Fp&& __f)
854  {
855  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
856  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
857  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
858  __k = __mask;
859  else
860  __k = __mask.to_ullong();
861  while(__k)
862  {
863  __f(std::__countr_zero(__k));
864  __k &= (__k - 1);
865  }
866  }
867 
868  //}}}
869 };
870 
871 //}}}
872 // __increment, __decrement {{{
873 template <typename _Tp = void>
874  struct __increment
875  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
876 
877 template <>
878  struct __increment<void>
879  {
880  template <typename _Tp>
881  constexpr _Tp
882  operator()(_Tp __a) const
883  { return ++__a; }
884  };
885 
886 template <typename _Tp = void>
887  struct __decrement
888  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
889 
890 template <>
891  struct __decrement<void>
892  {
893  template <typename _Tp>
894  constexpr _Tp
895  operator()(_Tp __a) const
896  { return --__a; }
897  };
898 
899 // }}}
900 // _ValuePreserving(OrInt) {{{
901 template <typename _From, typename _To,
902  typename = enable_if_t<negation<
903  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
904  using _ValuePreserving = _From;
905 
906 template <typename _From, typename _To,
907  typename _DecayedFrom = __remove_cvref_t<_From>,
908  typename = enable_if_t<conjunction<
909  is_convertible<_From, _To>,
910  disjunction<
911  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
912  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
913  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
914  using _ValuePreservingOrInt = _From;
915 
916 // }}}
917 // __intrinsic_type {{{
918 template <typename _Tp, size_t _Bytes, typename = void_t<>>
919  struct __intrinsic_type;
920 
921 template <typename _Tp, size_t _Size>
922  using __intrinsic_type_t =
923  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
924 
925 template <typename _Tp>
926  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
927 template <typename _Tp>
928  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
929 template <typename _Tp>
930  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
931 template <typename _Tp>
932  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
933 template <typename _Tp>
934  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
935 template <typename _Tp>
936  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
937 
938 // }}}
939 // _BitMask {{{
940 template <size_t _Np, bool _Sanitized = false>
941  struct _BitMask;
942 
943 template <size_t _Np, bool _Sanitized>
944  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
945 
946 template <size_t _Np>
947  using _SanitizedBitMask = _BitMask<_Np, true>;
948 
949 template <size_t _Np, bool _Sanitized>
950  struct _BitMask
951  {
952  static_assert(_Np > 0);
953 
954  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
955 
956  using _Tp = conditional_t<_Np == 1, bool,
957  make_unsigned_t<__int_with_sizeof_t<std::min(
958  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
959 
960  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
961 
962  _Tp _M_bits[_S_array_size];
963 
964  static constexpr int _S_unused_bits
965  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
966 
967  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
968 
969  constexpr _BitMask() noexcept = default;
970 
971  constexpr _BitMask(unsigned long long __x) noexcept
972  : _M_bits{static_cast<_Tp>(__x)} {}
973 
974  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
975 
976  constexpr _BitMask(const _BitMask&) noexcept = default;
977 
978  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
979  && _Sanitized == true>>
980  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
981  : _BitMask(__rhs._M_sanitized()) {}
982 
983  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
984  {
985  static_assert(_S_array_size == 1);
986  return _M_bits[0];
987  }
988 
989  // precondition: is sanitized
990  constexpr _Tp
991  _M_to_bits() const noexcept
992  {
993  static_assert(_S_array_size == 1);
994  return _M_bits[0];
995  }
996 
997  // precondition: is sanitized
998  constexpr unsigned long long
999  to_ullong() const noexcept
1000  {
1001  static_assert(_S_array_size == 1);
1002  return _M_bits[0];
1003  }
1004 
1005  // precondition: is sanitized
1006  constexpr unsigned long
1007  to_ulong() const noexcept
1008  {
1009  static_assert(_S_array_size == 1);
1010  return _M_bits[0];
1011  }
1012 
1013  constexpr bitset<_Np>
1014  _M_to_bitset() const noexcept
1015  {
1016  static_assert(_S_array_size == 1);
1017  return _M_bits[0];
1018  }
1019 
1020  constexpr decltype(auto)
1021  _M_sanitized() const noexcept
1022  {
1023  if constexpr (_Sanitized)
1024  return *this;
1025  else if constexpr (_Np == 1)
1026  return _SanitizedBitMask<_Np>(_M_bits[0]);
1027  else
1028  {
1029  _SanitizedBitMask<_Np> __r = {};
1030  for (int __i = 0; __i < _S_array_size; ++__i)
1031  __r._M_bits[__i] = _M_bits[__i];
1032  if constexpr (_S_unused_bits > 0)
1033  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1034  return __r;
1035  }
1036  }
1037 
1038  template <size_t _Mp, bool _LSanitized>
1039  constexpr _BitMask<_Np + _Mp, _Sanitized>
1040  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1041  {
1042  constexpr size_t _RN = _Np + _Mp;
1043  using _Rp = _BitMask<_RN, _Sanitized>;
1044  if constexpr (_Rp::_S_array_size == 1)
1045  {
1046  _Rp __r{{_M_bits[0]}};
1047  __r._M_bits[0] <<= _Mp;
1048  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1049  return __r;
1050  }
1051  else
1052  __assert_unreachable<_Rp>();
1053  }
1054 
1055  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1056  // significant bits. If the operation implicitly produces a sanitized bitmask,
1057  // the result type will have _Sanitized set.
1058  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1059  constexpr auto
1060  _M_extract() const noexcept
1061  {
1062  static_assert(_Np > _DropLsb);
1063  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1064  "not implemented for bitmasks larger than one ullong");
1065  if constexpr (_NewSize == 1)
1066  // must sanitize because the return _Tp is bool
1067  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1068  else
1069  return _BitMask<_NewSize,
1070  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1071  && _NewSize + _DropLsb <= _Np)
1072  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1073  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1074  >> _DropLsb);
1075  }
1076 
1077  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1078  constexpr bool
1079  all() const noexcept
1080  {
1081  if constexpr (_Np == 1)
1082  return _M_bits[0];
1083  else if constexpr (!_Sanitized)
1084  return _M_sanitized().all();
1085  else
1086  {
1087  constexpr _Tp __allbits = ~_Tp();
1088  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1089  if (_M_bits[__i] != __allbits)
1090  return false;
1091  return _M_bits[_S_array_size - 1] == _S_bitmask;
1092  }
1093  }
1094 
1095  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1096  // false.
1097  constexpr bool
1098  any() const noexcept
1099  {
1100  if constexpr (_Np == 1)
1101  return _M_bits[0];
1102  else if constexpr (!_Sanitized)
1103  return _M_sanitized().any();
1104  else
1105  {
1106  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1107  if (_M_bits[__i] != 0)
1108  return true;
1109  return _M_bits[_S_array_size - 1] != 0;
1110  }
1111  }
1112 
1113  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1114  constexpr bool
1115  none() const noexcept
1116  {
1117  if constexpr (_Np == 1)
1118  return !_M_bits[0];
1119  else if constexpr (!_Sanitized)
1120  return _M_sanitized().none();
1121  else
1122  {
1123  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1124  if (_M_bits[__i] != 0)
1125  return false;
1126  return _M_bits[_S_array_size - 1] == 0;
1127  }
1128  }
1129 
1130  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1131  // false.
1132  constexpr int
1133  count() const noexcept
1134  {
1135  if constexpr (_Np == 1)
1136  return _M_bits[0];
1137  else if constexpr (!_Sanitized)
1138  return _M_sanitized().none();
1139  else
1140  {
1141  int __result = __builtin_popcountll(_M_bits[0]);
1142  for (int __i = 1; __i < _S_array_size; ++__i)
1143  __result += __builtin_popcountll(_M_bits[__i]);
1144  return __result;
1145  }
1146  }
1147 
1148  // Returns the bit at offset __i as bool.
1149  constexpr bool
1150  operator[](size_t __i) const noexcept
1151  {
1152  if constexpr (_Np == 1)
1153  return _M_bits[0];
1154  else if constexpr (_S_array_size == 1)
1155  return (_M_bits[0] >> __i) & 1;
1156  else
1157  {
1158  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1159  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1160  return (_M_bits[__j] >> __shift) & 1;
1161  }
1162  }
1163 
1164  template <size_t __i>
1165  constexpr bool
1166  operator[](_SizeConstant<__i>) const noexcept
1167  {
1168  static_assert(__i < _Np);
1169  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1170  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1171  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1172  }
1173 
1174  // Set the bit at offset __i to __x.
1175  constexpr void
1176  set(size_t __i, bool __x) noexcept
1177  {
1178  if constexpr (_Np == 1)
1179  _M_bits[0] = __x;
1180  else if constexpr (_S_array_size == 1)
1181  {
1182  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1183  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1184  }
1185  else
1186  {
1187  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1188  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1189  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1190  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1191  }
1192  }
1193 
1194  template <size_t __i>
1195  constexpr void
1196  set(_SizeConstant<__i>, bool __x) noexcept
1197  {
1198  static_assert(__i < _Np);
1199  if constexpr (_Np == 1)
1200  _M_bits[0] = __x;
1201  else
1202  {
1203  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1204  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1205  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1206  _M_bits[__j] &= __mask;
1207  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1208  }
1209  }
1210 
1211  // Inverts all bits. Sanitized input leads to sanitized output.
1212  constexpr _BitMask
1213  operator~() const noexcept
1214  {
1215  if constexpr (_Np == 1)
1216  return !_M_bits[0];
1217  else
1218  {
1219  _BitMask __result{};
1220  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1221  __result._M_bits[__i] = ~_M_bits[__i];
1222  if constexpr (_Sanitized)
1223  __result._M_bits[_S_array_size - 1]
1224  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1225  else
1226  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1227  return __result;
1228  }
1229  }
1230 
1231  constexpr _BitMask&
1232  operator^=(const _BitMask& __b) & noexcept
1233  {
1234  __execute_n_times<_S_array_size>(
1235  [&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
1236  return *this;
1237  }
1238 
1239  constexpr _BitMask&
1240  operator|=(const _BitMask& __b) & noexcept
1241  {
1242  __execute_n_times<_S_array_size>(
1243  [&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
1244  return *this;
1245  }
1246 
1247  constexpr _BitMask&
1248  operator&=(const _BitMask& __b) & noexcept
1249  {
1250  __execute_n_times<_S_array_size>(
1251  [&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
1252  return *this;
1253  }
1254 
1255  friend constexpr _BitMask
1256  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1257  {
1258  _BitMask __r = __a;
1259  __r ^= __b;
1260  return __r;
1261  }
1262 
1263  friend constexpr _BitMask
1264  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1265  {
1266  _BitMask __r = __a;
1267  __r |= __b;
1268  return __r;
1269  }
1270 
1271  friend constexpr _BitMask
1272  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1273  {
1274  _BitMask __r = __a;
1275  __r &= __b;
1276  return __r;
1277  }
1278 
1279  _GLIBCXX_SIMD_INTRINSIC
1280  constexpr bool
1281  _M_is_constprop() const
1282  {
1283  if constexpr (_S_array_size == 0)
1284  return __builtin_constant_p(_M_bits[0]);
1285  else
1286  {
1287  for (int __i = 0; __i < _S_array_size; ++__i)
1288  if (!__builtin_constant_p(_M_bits[__i]))
1289  return false;
1290  return true;
1291  }
1292  }
1293  };
1294 
1295 // }}}
1296 
1297 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1298 // __min_vector_size {{{
1299 template <typename _Tp = void>
1300  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1301 
1302 #if _GLIBCXX_SIMD_HAVE_NEON
1303 template <>
1304  inline constexpr int __min_vector_size<void> = 8;
1305 #else
1306 template <>
1307  inline constexpr int __min_vector_size<void> = 16;
1308 #endif
1309 
1310 // }}}
1311 // __vector_type {{{
1312 template <typename _Tp, size_t _Np, typename = void>
1313  struct __vector_type_n {};
1314 
1315 // substition failure for 0-element case
1316 template <typename _Tp>
1317  struct __vector_type_n<_Tp, 0, void> {};
1318 
1319 // special case 1-element to be _Tp itself
1320 template <typename _Tp>
1321  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1322  { using type = _Tp; };
1323 
1324 // else, use GNU-style builtin vector types
1325 template <typename _Tp, size_t _Np>
1326  struct __vector_type_n<_Tp, _Np,
1327  enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1328  {
1329  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1330 
1331  static constexpr size_t _S_Bytes =
1332 #ifdef __i386__
1333  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1334  // those objects are passed via MMX registers and nothing ever calls EMMS.
1335  _S_Np2 == 8 ? 16 :
1336 #endif
1337  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1338  : _S_Np2;
1339 
1340  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1341  };
1342 
1343 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1344  struct __vector_type;
1345 
1346 template <typename _Tp, size_t _Bytes>
1347  struct __vector_type<_Tp, _Bytes, 0>
1348  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1349 
1350 template <typename _Tp, size_t _Size>
1351  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1352 
1353 template <typename _Tp>
1354  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1355 template <typename _Tp>
1356  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1357 template <typename _Tp>
1358  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1359 template <typename _Tp>
1360  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1361 template <typename _Tp>
1362  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1363 template <typename _Tp>
1364  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1365 
1366 // }}}
1367 // __is_vector_type {{{
1368 template <typename _Tp, typename = void_t<>>
1369  struct __is_vector_type : false_type {};
1370 
1371 template <typename _Tp>
1372  struct __is_vector_type<
1373  _Tp, void_t<typename __vector_type<
1374  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1375  : is_same<_Tp, typename __vector_type<
1376  remove_reference_t<decltype(declval<_Tp>()[0])>,
1377  sizeof(_Tp)>::type> {};
1378 
1379 template <typename _Tp>
1380  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1381 
1382 // }}}
1383 // _VectorTraits{{{
1384 template <typename _Tp, typename = void_t<>>
1385  struct _VectorTraitsImpl;
1386 
1387 template <typename _Tp>
1388  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>>>
1389  {
1390  using type = _Tp;
1391  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1392  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1393  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1394  template <typename _Up, int _W = _S_full_size>
1395  static constexpr bool _S_is
1396  = is_same_v<value_type, _Up> && _W == _S_full_size;
1397  };
1398 
1399 template <typename _Tp, size_t _Np>
1400  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1401  void_t<__vector_type_t<_Tp, _Np>>>
1402  {
1403  using type = __vector_type_t<_Tp, _Np>;
1404  using value_type = _Tp;
1405  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1406  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1407  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1408  static constexpr int _S_partial_width = _Np;
1409  template <typename _Up, int _W = _S_full_size>
1410  static constexpr bool _S_is
1411  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1412  };
1413 
1414 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1415  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1416 
1417 // }}}
1418 // __as_vector{{{
1419 template <typename _V>
1420  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1421  __as_vector(_V __x)
1422  {
1423  if constexpr (__is_vector_type_v<_V>)
1424  return __x;
1425  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1426  return __data(__x)._M_data;
1427  else if constexpr (__is_vectorizable_v<_V>)
1428  return __vector_type_t<_V, 2>{__x};
1429  else
1430  return __x._M_data;
1431  }
1432 
1433 // }}}
1434 // __as_wrapper{{{
1435 template <size_t _Np = 0, typename _V>
1436  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1437  __as_wrapper(_V __x)
1438  {
1439  if constexpr (__is_vector_type_v<_V>)
1440  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1441  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1442  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1443  {
1444  static_assert(_V::size() == _Np);
1445  return __data(__x);
1446  }
1447  else
1448  {
1449  static_assert(_V::_S_size == _Np);
1450  return __x;
1451  }
1452  }
1453 
1454 // }}}
1455 // __intrin_bitcast{{{
1456 template <typename _To, typename _From>
1457  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1458  __intrin_bitcast(_From __v)
1459  {
1460  static_assert(__is_vector_type_v<_From> && __is_vector_type_v<_To>);
1461  if constexpr (sizeof(_To) == sizeof(_From))
1462  return reinterpret_cast<_To>(__v);
1463  else if constexpr (sizeof(_From) > sizeof(_To))
1464  if constexpr (sizeof(_To) >= 16)
1465  return reinterpret_cast<const __may_alias<_To>&>(__v);
1466  else
1467  {
1468  _To __r;
1469  __builtin_memcpy(&__r, &__v, sizeof(_To));
1470  return __r;
1471  }
1472 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1473  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1474  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1475  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1476  else if constexpr (__have_avx512f && sizeof(_From) == 16
1477  && sizeof(_To) == 64)
1478  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1479  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1480  else if constexpr (__have_avx512f && sizeof(_From) == 32
1481  && sizeof(_To) == 64)
1482  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1483  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1484 #endif // _GLIBCXX_SIMD_X86INTRIN
1485  else if constexpr (sizeof(__v) <= 8)
1486  return reinterpret_cast<_To>(
1487  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1488  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1489  else
1490  {
1491  static_assert(sizeof(_To) > sizeof(_From));
1492  _To __r = {};
1493  __builtin_memcpy(&__r, &__v, sizeof(_From));
1494  return __r;
1495  }
1496  }
1497 
1498 // }}}
1499 // __vector_bitcast{{{
1500 template <typename _To, size_t _NN = 0, typename _From,
1501  typename _FromVT = _VectorTraits<_From>,
1502  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1503  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1504  __vector_bitcast(_From __x)
1505  {
1506  using _R = __vector_type_t<_To, _Np>;
1507  return __intrin_bitcast<_R>(__x);
1508  }
1509 
1510 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1511  size_t _Np
1512  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1513  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1514  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1515  {
1516  static_assert(_Np > 1);
1517  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1518  }
1519 
1520 // }}}
1521 // __convert_x86 declarations {{{
1522 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1523 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1524  _To __convert_x86(_Tp);
1525 
1526 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1527  _To __convert_x86(_Tp, _Tp);
1528 
1529 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1530  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1531 
1532 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1533  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1534 
1535 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1536  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1537  _Tp, _Tp, _Tp, _Tp);
1538 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1539 
1540 //}}}
1541 // __bit_cast {{{
1542 template <typename _To, typename _From>
1543  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1544  __bit_cast(const _From __x)
1545  {
1546  // TODO: implement with / replace by __builtin_bit_cast ASAP
1547  static_assert(sizeof(_To) == sizeof(_From));
1548  constexpr bool __to_is_vectorizable
1549  = is_arithmetic_v<_To> || is_enum_v<_To>;
1550  constexpr bool __from_is_vectorizable
1551  = is_arithmetic_v<_From> || is_enum_v<_From>;
1552  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1553  return reinterpret_cast<_To>(__x);
1554  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1555  {
1556  using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1557  return reinterpret_cast<_To>(_FV{__x});
1558  }
1559  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1560  {
1561  using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1562  using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1563  return reinterpret_cast<_TV>(_FV{__x})[0];
1564  }
1565  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1566  {
1567  using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1568  return reinterpret_cast<_TV>(__x)[0];
1569  }
1570  else
1571  {
1572  _To __r;
1573  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1574  reinterpret_cast<const char*>(&__x), sizeof(_To));
1575  return __r;
1576  }
1577  }
1578 
1579 // }}}
1580 // __to_intrin {{{
1581 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1582  typename _R
1583  = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1584  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1585  __to_intrin(_Tp __x)
1586  {
1587  static_assert(sizeof(__x) <= sizeof(_R),
1588  "__to_intrin may never drop values off the end");
1589  if constexpr (sizeof(__x) == sizeof(_R))
1590  return reinterpret_cast<_R>(__as_vector(__x));
1591  else
1592  {
1593  using _Up = __int_for_sizeof_t<_Tp>;
1594  return reinterpret_cast<_R>(
1595  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1596  }
1597  }
1598 
1599 // }}}
1600 // __make_vector{{{
1601 template <typename _Tp, typename... _Args>
1602  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1603  __make_vector(const _Args&... __args)
1604  {
1605  return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
1606  }
1607 
1608 // }}}
1609 // __vector_broadcast{{{
1610 template <size_t _Np, typename _Tp>
1611  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1612  __vector_broadcast(_Tp __x)
1613  {
1614  return __call_with_n_evaluations<_Np>(
1615  [](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
1616  [&__x](int) { return __x; });
1617  }
1618 
1619 // }}}
1620 // __generate_vector{{{
1621  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1622  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1623  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1624  {
1625  return __vector_type_t<_Tp, _Np>{
1626  static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
1627  }
1628 
1629 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1630  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1631  __generate_vector(_Gp&& __gen)
1632  {
1633  if constexpr (__is_vector_type_v<_V>)
1634  return __generate_vector_impl<typename _VVT::value_type,
1635  _VVT::_S_full_size>(
1636  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1637  else
1638  return __generate_vector_impl<typename _VVT::value_type,
1639  _VVT::_S_partial_width>(
1640  static_cast<_Gp&&>(__gen),
1641  make_index_sequence<_VVT::_S_partial_width>());
1642  }
1643 
1644 template <typename _Tp, size_t _Np, typename _Gp>
1645  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1646  __generate_vector(_Gp&& __gen)
1647  {
1648  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1649  make_index_sequence<_Np>());
1650  }
1651 
1652 // }}}
1653 // __xor{{{
1654 template <typename _TW>
1655  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1656  __xor(_TW __a, _TW __b) noexcept
1657  {
1658  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1659  {
1660  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1661  _VectorTraitsImpl<_TW>>::value_type;
1662  if constexpr (is_floating_point_v<_Tp>)
1663  {
1664  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1665  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1666  ^ __vector_bitcast<_Ip>(__b));
1667  }
1668  else if constexpr (__is_vector_type_v<_TW>)
1669  return __a ^ __b;
1670  else
1671  return __a._M_data ^ __b._M_data;
1672  }
1673  else
1674  return __a ^ __b;
1675  }
1676 
1677 // }}}
1678 // __or{{{
1679 template <typename _TW>
1680  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1681  __or(_TW __a, _TW __b) noexcept
1682  {
1683  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1684  {
1685  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1686  _VectorTraitsImpl<_TW>>::value_type;
1687  if constexpr (is_floating_point_v<_Tp>)
1688  {
1689  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1690  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1691  | __vector_bitcast<_Ip>(__b));
1692  }
1693  else if constexpr (__is_vector_type_v<_TW>)
1694  return __a | __b;
1695  else
1696  return __a._M_data | __b._M_data;
1697  }
1698  else
1699  return __a | __b;
1700  }
1701 
1702 // }}}
1703 // __and{{{
1704 template <typename _TW>
1705  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1706  __and(_TW __a, _TW __b) noexcept
1707  {
1708  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1709  {
1710  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1711  _VectorTraitsImpl<_TW>>::value_type;
1712  if constexpr (is_floating_point_v<_Tp>)
1713  {
1714  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1715  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1716  & __vector_bitcast<_Ip>(__b));
1717  }
1718  else if constexpr (__is_vector_type_v<_TW>)
1719  return __a & __b;
1720  else
1721  return __a._M_data & __b._M_data;
1722  }
1723  else
1724  return __a & __b;
1725  }
1726 
1727 // }}}
1728 // __andnot{{{
1729 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1730 static constexpr struct
1731 {
1732  _GLIBCXX_SIMD_INTRINSIC __v4sf
1733  operator()(__v4sf __a, __v4sf __b) const noexcept
1734  { return __builtin_ia32_andnps(__a, __b); }
1735 
1736  _GLIBCXX_SIMD_INTRINSIC __v2df
1737  operator()(__v2df __a, __v2df __b) const noexcept
1738  { return __builtin_ia32_andnpd(__a, __b); }
1739 
1740  _GLIBCXX_SIMD_INTRINSIC __v2di
1741  operator()(__v2di __a, __v2di __b) const noexcept
1742  { return __builtin_ia32_pandn128(__a, __b); }
1743 
1744  _GLIBCXX_SIMD_INTRINSIC __v8sf
1745  operator()(__v8sf __a, __v8sf __b) const noexcept
1746  { return __builtin_ia32_andnps256(__a, __b); }
1747 
1748  _GLIBCXX_SIMD_INTRINSIC __v4df
1749  operator()(__v4df __a, __v4df __b) const noexcept
1750  { return __builtin_ia32_andnpd256(__a, __b); }
1751 
1752  _GLIBCXX_SIMD_INTRINSIC __v4di
1753  operator()(__v4di __a, __v4di __b) const noexcept
1754  {
1755  if constexpr (__have_avx2)
1756  return __builtin_ia32_andnotsi256(__a, __b);
1757  else
1758  return reinterpret_cast<__v4di>(
1759  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1760  reinterpret_cast<__v4df>(__b)));
1761  }
1762 
1763  _GLIBCXX_SIMD_INTRINSIC __v16sf
1764  operator()(__v16sf __a, __v16sf __b) const noexcept
1765  {
1766  if constexpr (__have_avx512dq)
1767  return _mm512_andnot_ps(__a, __b);
1768  else
1769  return reinterpret_cast<__v16sf>(
1770  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1771  reinterpret_cast<__v8di>(__b)));
1772  }
1773 
1774  _GLIBCXX_SIMD_INTRINSIC __v8df
1775  operator()(__v8df __a, __v8df __b) const noexcept
1776  {
1777  if constexpr (__have_avx512dq)
1778  return _mm512_andnot_pd(__a, __b);
1779  else
1780  return reinterpret_cast<__v8df>(
1781  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1782  reinterpret_cast<__v8di>(__b)));
1783  }
1784 
1785  _GLIBCXX_SIMD_INTRINSIC __v8di
1786  operator()(__v8di __a, __v8di __b) const noexcept
1787  { return _mm512_andnot_si512(__a, __b); }
1788 } _S_x86_andnot;
1789 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1790 
1791 template <typename _TW>
1792  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1793  __andnot(_TW __a, _TW __b) noexcept
1794  {
1795  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1796  {
1797  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1798  _VectorTraitsImpl<_TW>>;
1799  using _Tp = typename _TVT::value_type;
1800 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1801  if constexpr (sizeof(_TW) >= 16)
1802  {
1803  const auto __ai = __to_intrin(__a);
1804  const auto __bi = __to_intrin(__b);
1805  if (!__builtin_is_constant_evaluated()
1806  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1807  {
1808  const auto __r = _S_x86_andnot(__ai, __bi);
1809  if constexpr (is_convertible_v<decltype(__r), _TW>)
1810  return __r;
1811  else
1812  return reinterpret_cast<typename _TVT::type>(__r);
1813  }
1814  }
1815 #endif // _GLIBCXX_SIMD_X86INTRIN
1816  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1817  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
1818  & __vector_bitcast<_Ip>(__b));
1819  }
1820  else
1821  return ~__a & __b;
1822  }
1823 
1824 // }}}
1825 // __not{{{
1826 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1827  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
1828  __not(_Tp __a) noexcept
1829  {
1830  if constexpr (is_floating_point_v<typename _TVT::value_type>)
1831  return reinterpret_cast<typename _TVT::type>(
1832  ~__vector_bitcast<unsigned>(__a));
1833  else
1834  return ~__a;
1835  }
1836 
1837 // }}}
1838 // __concat{{{
1839 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1840  typename _R = __vector_type_t<typename _TVT::value_type,
1841  _TVT::_S_full_size * 2>>
1842  constexpr _R
1843  __concat(_Tp a_, _Tp b_)
1844  {
1845 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
1846  using _W
1847  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
1848  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
1849  long long, typename _TVT::value_type>>;
1850  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
1851  const auto __a = __vector_bitcast<_W>(a_);
1852  const auto __b = __vector_bitcast<_W>(b_);
1853  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
1854 #else
1855  constexpr int input_width = _TVT::_S_full_size;
1856  const _Tp& __a = a_;
1857  const _Tp& __b = b_;
1858  using _Up = _R;
1859 #endif
1860  if constexpr (input_width == 2)
1861  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
1862  else if constexpr (input_width == 4)
1863  return reinterpret_cast<_R>(
1864  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
1865  else if constexpr (input_width == 8)
1866  return reinterpret_cast<_R>(
1867  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
1868  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
1869  else if constexpr (input_width == 16)
1870  return reinterpret_cast<_R>(
1871  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1872  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1873  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
1874  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
1875  __b[12], __b[13], __b[14], __b[15]});
1876  else if constexpr (input_width == 32)
1877  return reinterpret_cast<_R>(
1878  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1879  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1880  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
1881  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
1882  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
1883  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
1884  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
1885  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
1886  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
1887  __b[31]});
1888  }
1889 
1890 // }}}
1891 // __zero_extend {{{
1892 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1893  struct _ZeroExtendProxy
1894  {
1895  using value_type = typename _TVT::value_type;
1896  static constexpr size_t _Np = _TVT::_S_full_size;
1897  const _Tp __x;
1898 
1899  template <typename _To, typename _ToVT = _VectorTraits<_To>,
1900  typename
1901  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
1902  _GLIBCXX_SIMD_INTRINSIC operator _To() const
1903  {
1904  constexpr size_t _ToN = _ToVT::_S_full_size;
1905  if constexpr (_ToN == _Np)
1906  return __x;
1907  else if constexpr (_ToN == 2 * _Np)
1908  {
1909 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1910  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
1911  return __vector_bitcast<value_type>(
1912  _mm256_insertf128_ps(__m256(), __x, 0));
1913  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
1914  return __vector_bitcast<value_type>(
1915  _mm256_insertf128_pd(__m256d(), __x, 0));
1916  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
1917  return __vector_bitcast<value_type>(
1918  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
1919  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
1920  {
1921  if constexpr (__have_avx512dq)
1922  return __vector_bitcast<value_type>(
1923  _mm512_insertf32x8(__m512(), __x, 0));
1924  else
1925  return reinterpret_cast<__m512>(
1926  _mm512_insertf64x4(__m512d(),
1927  reinterpret_cast<__m256d>(__x), 0));
1928  }
1929  else if constexpr (__have_avx512f
1930  && _TVT::template _S_is<double, 4>)
1931  return __vector_bitcast<value_type>(
1932  _mm512_insertf64x4(__m512d(), __x, 0));
1933  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
1934  return __vector_bitcast<value_type>(
1935  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
1936 #endif
1937  return __concat(__x, _Tp());
1938  }
1939  else if constexpr (_ToN == 4 * _Np)
1940  {
1941 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1942  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
1943  {
1944  return __vector_bitcast<value_type>(
1945  _mm512_insertf64x2(__m512d(), __x, 0));
1946  }
1947  else if constexpr (__have_avx512f
1948  && is_floating_point_v<value_type>)
1949  {
1950  return __vector_bitcast<value_type>(
1951  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
1952  0));
1953  }
1954  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
1955  {
1956  return __vector_bitcast<value_type>(
1957  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
1958  }
1959 #endif
1960  return __concat(__concat(__x, _Tp()),
1961  __vector_type_t<value_type, _Np * 2>());
1962  }
1963  else if constexpr (_ToN == 8 * _Np)
1964  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
1965  __vector_type_t<value_type, _Np * 4>());
1966  else if constexpr (_ToN == 16 * _Np)
1967  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
1968  __vector_type_t<value_type, _Np * 8>());
1969  else
1970  __assert_unreachable<_Tp>();
1971  }
1972  };
1973 
1974 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1975  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
1976  __zero_extend(_Tp __x)
1977  { return {__x}; }
1978 
1979 // }}}
1980 // __extract<_Np, By>{{{
1981 template <int _Offset,
1982  int _SplitBy,
1983  typename _Tp,
1984  typename _TVT = _VectorTraits<_Tp>,
1985  typename _R = __vector_type_t<typename _TVT::value_type,
1986  _TVT::_S_full_size / _SplitBy>>
1987  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1988  __extract(_Tp __in)
1989  {
1990  using value_type = typename _TVT::value_type;
1991 #if _GLIBCXX_SIMD_X86INTRIN // {{{
1992  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
1993  {
1994  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
1995  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
1996  else if constexpr (is_floating_point_v<value_type>)
1997  return __vector_bitcast<value_type>(
1998  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
1999  else
2000  return reinterpret_cast<_R>(
2001  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2002  _Offset));
2003  }
2004  else
2005 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2006  {
2007 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2008  using _W = conditional_t<
2009  is_floating_point_v<value_type>, double,
2010  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2011  static_assert(sizeof(_R) % sizeof(_W) == 0);
2012  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2013  using _Up = __vector_type_t<_W, __return_width>;
2014  const auto __x = __vector_bitcast<_W>(__in);
2015 #else
2016  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2017  using _Up = _R;
2018  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2019  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2020 #endif
2021  constexpr int _O = _Offset * __return_width;
2022  return __call_with_subscripts<__return_width, _O>(
2023  __x, [](auto... __entries) {
2024  return reinterpret_cast<_R>(_Up{__entries...});
2025  });
2026  }
2027  }
2028 
2029 // }}}
2030 // __lo/__hi64[z]{{{
2031 template <typename _Tp,
2032  typename _R
2033  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2034  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2035  __lo64(_Tp __x)
2036  {
2037  _R __r{};
2038  __builtin_memcpy(&__r, &__x, 8);
2039  return __r;
2040  }
2041 
2042 template <typename _Tp,
2043  typename _R
2044  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2045  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2046  __hi64(_Tp __x)
2047  {
2048  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2049  _R __r{};
2050  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2051  return __r;
2052  }
2053 
2054 template <typename _Tp,
2055  typename _R
2056  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2057  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2058  __hi64z([[maybe_unused]] _Tp __x)
2059  {
2060  _R __r{};
2061  if constexpr (sizeof(_Tp) == 16)
2062  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2063  return __r;
2064  }
2065 
2066 // }}}
2067 // __lo/__hi128{{{
2068 template <typename _Tp>
2069  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2070  __lo128(_Tp __x)
2071  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2072 
2073 template <typename _Tp>
2074  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2075  __hi128(_Tp __x)
2076  {
2077  static_assert(sizeof(__x) == 32);
2078  return __extract<1, 2>(__x);
2079  }
2080 
2081 // }}}
2082 // __lo/__hi256{{{
2083 template <typename _Tp>
2084  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2085  __lo256(_Tp __x)
2086  {
2087  static_assert(sizeof(__x) == 64);
2088  return __extract<0, 2>(__x);
2089  }
2090 
2091 template <typename _Tp>
2092  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2093  __hi256(_Tp __x)
2094  {
2095  static_assert(sizeof(__x) == 64);
2096  return __extract<1, 2>(__x);
2097  }
2098 
2099 // }}}
2100 // __auto_bitcast{{{
2101 template <typename _Tp>
2102  struct _AutoCast
2103  {
2104  static_assert(__is_vector_type_v<_Tp>);
2105 
2106  const _Tp __x;
2107 
2108  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2109  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2110  { return __intrin_bitcast<typename _UVT::type>(__x); }
2111  };
2112 
2113 template <typename _Tp>
2114  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2115  __auto_bitcast(const _Tp& __x)
2116  { return {__x}; }
2117 
2118 template <typename _Tp, size_t _Np>
2119  _GLIBCXX_SIMD_INTRINSIC constexpr
2120  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2121  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2122  { return {__x._M_data}; }
2123 
2124 // }}}
2125 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2126 
2127 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2128 // __bool_storage_member_type{{{
2129 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2130 template <size_t _Size>
2131  struct __bool_storage_member_type
2132  {
2133  static_assert((_Size & (_Size - 1)) != 0,
2134  "This trait may only be used for non-power-of-2 sizes. "
2135  "Power-of-2 sizes must be specialized.");
2136  using type =
2137  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2138  };
2139 
2140 template <>
2141  struct __bool_storage_member_type<1> { using type = bool; };
2142 
2143 template <>
2144  struct __bool_storage_member_type<2> { using type = __mmask8; };
2145 
2146 template <>
2147  struct __bool_storage_member_type<4> { using type = __mmask8; };
2148 
2149 template <>
2150  struct __bool_storage_member_type<8> { using type = __mmask8; };
2151 
2152 template <>
2153  struct __bool_storage_member_type<16> { using type = __mmask16; };
2154 
2155 template <>
2156  struct __bool_storage_member_type<32> { using type = __mmask32; };
2157 
2158 template <>
2159  struct __bool_storage_member_type<64> { using type = __mmask64; };
2160 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2161 
2162 // }}}
2163 // __intrinsic_type (x86){{{
2164 // the following excludes bool via __is_vectorizable
2165 #if _GLIBCXX_SIMD_HAVE_SSE
2166 template <typename _Tp, size_t _Bytes>
2167  struct __intrinsic_type<_Tp, _Bytes,
2168  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2169  {
2170  static_assert(!is_same_v<_Tp, long double>,
2171  "no __intrinsic_type support for long double on x86");
2172 
2173  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16
2174  : _Bytes <= 32 ? 32
2175  : 64;
2176 
2177  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2178  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2179  };
2180 #endif // _GLIBCXX_SIMD_HAVE_SSE
2181 
2182 // }}}
2183 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2184 // __intrinsic_type (ARM){{{
2185 #if _GLIBCXX_SIMD_HAVE_NEON
2186 template <typename _Tp, size_t _Bytes>
2187  struct __intrinsic_type<_Tp, _Bytes,
2188  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2189  {
2190  static constexpr int _S_VBytes = _Bytes <= 8 ? 8 : 16;
2191  using _Ip = __int_for_sizeof_t<_Tp>;
2192  using _Up = conditional_t<
2193  is_floating_point_v<_Tp>, _Tp,
2194  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2195  using type [[__gnu__::__vector_size__(_S_VBytes)]] = _Up;
2196  };
2197 #endif // _GLIBCXX_SIMD_HAVE_NEON
2198 
2199 // }}}
2200 // __intrinsic_type (PPC){{{
2201 #ifdef __ALTIVEC__
2202 template <typename _Tp>
2203  struct __intrinsic_type_impl;
2204 
2205 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2206  template <> \
2207  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2208 _GLIBCXX_SIMD_PPC_INTRIN(float);
2209 _GLIBCXX_SIMD_PPC_INTRIN(double);
2210 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2211 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2212 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2213 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2214 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2215 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2216 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2217 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2218 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2219 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2220 #undef _GLIBCXX_SIMD_PPC_INTRIN
2221 
2222 template <typename _Tp, size_t _Bytes>
2223  struct __intrinsic_type<_Tp, _Bytes,
2224  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2225  {
2226  static_assert(!is_same_v<_Tp, long double>,
2227  "no __intrinsic_type support for long double on PPC");
2228 #ifndef __VSX__
2229  static_assert(!is_same_v<_Tp, double>,
2230  "no __intrinsic_type support for double on PPC w/o VSX");
2231 #endif
2232 #ifndef __POWER8_VECTOR__
2233  static_assert(
2234  !(is_integral_v<_Tp> && sizeof(_Tp) > 4),
2235  "no __intrinsic_type support for integers larger than 4 Bytes "
2236  "on PPC w/o POWER8 vectors");
2237 #endif
2238  using type = typename __intrinsic_type_impl<conditional_t<
2239  is_floating_point_v<_Tp>, _Tp, __int_for_sizeof_t<_Tp>>>::type;
2240  };
2241 #endif // __ALTIVEC__
2242 
2243 // }}}
2244 // _SimdWrapper<bool>{{{1
2245 template <size_t _Width>
2246  struct _SimdWrapper<bool, _Width,
2247  void_t<typename __bool_storage_member_type<_Width>::type>>
2248  {
2249  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2250  using value_type = bool;
2251 
2252  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2253 
2254  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2255  __as_full_vector() const { return _M_data; }
2256 
2257  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2258  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k)
2259  : _M_data(__k) {};
2260 
2261  _GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const
2262  { return _M_data; }
2263 
2264  _GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&()
2265  { return _M_data; }
2266 
2267  _GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const
2268  { return _M_data; }
2269 
2270  _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const
2271  { return _M_data & (_BuiltinType(1) << __i); }
2272 
2273  template <size_t __i>
2274  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2275  operator[](_SizeConstant<__i>) const
2276  { return _M_data & (_BuiltinType(1) << __i); }
2277 
2278  _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x)
2279  {
2280  if (__x)
2281  _M_data |= (_BuiltinType(1) << __i);
2282  else
2283  _M_data &= ~(_BuiltinType(1) << __i);
2284  }
2285 
2286  _GLIBCXX_SIMD_INTRINSIC
2287  constexpr bool _M_is_constprop() const
2288  { return __builtin_constant_p(_M_data); }
2289 
2290  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2291  {
2292  if (__builtin_constant_p(_M_data))
2293  {
2294  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2295  constexpr _BuiltinType __active_mask
2296  = ~_BuiltinType() >> (__nbits - _Width);
2297  return (_M_data & __active_mask) == 0;
2298  }
2299  return false;
2300  }
2301 
2302  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2303  {
2304  if (__builtin_constant_p(_M_data))
2305  {
2306  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2307  constexpr _BuiltinType __active_mask
2308  = ~_BuiltinType() >> (__nbits - _Width);
2309  return (_M_data & __active_mask) == __active_mask;
2310  }
2311  return false;
2312  }
2313 
2314  _BuiltinType _M_data;
2315  };
2316 
2317 // _SimdWrapperBase{{{1
2318 template <bool _MustZeroInitPadding, typename _BuiltinType>
2319  struct _SimdWrapperBase;
2320 
2321 template <typename _BuiltinType>
2322  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2323  {
2324  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default;
2325  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2326  : _M_data(__init)
2327  {}
2328 
2329  _BuiltinType _M_data;
2330  };
2331 
2332 template <typename _BuiltinType>
2333  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2334  // never become SNaN
2335  {
2336  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {}
2337  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2338  : _M_data(__init)
2339  {}
2340 
2341  _BuiltinType _M_data;
2342  };
2343 
2344 // }}}
2345 // _SimdWrapper{{{
2346 template <typename _Tp, size_t _Width>
2347  struct _SimdWrapper<
2348  _Tp, _Width,
2349  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2350  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2351  && sizeof(_Tp) * _Width
2352  == sizeof(__vector_type_t<_Tp, _Width>),
2353  __vector_type_t<_Tp, _Width>>
2354  {
2355  using _Base
2356  = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2357  && sizeof(_Tp) * _Width
2358  == sizeof(__vector_type_t<_Tp, _Width>),
2359  __vector_type_t<_Tp, _Width>>;
2360 
2361  static_assert(__is_vectorizable_v<_Tp>);
2362  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2363 
2364  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2365  using value_type = _Tp;
2366 
2367  static inline constexpr size_t _S_full_size
2368  = sizeof(_BuiltinType) / sizeof(value_type);
2369  static inline constexpr int _S_size = _Width;
2370  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2371 
2372  using _Base::_M_data;
2373 
2374  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2375  __as_full_vector() const
2376  { return _M_data; }
2377 
2378  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
2379  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2380  [&](auto __i) { return __init.begin()[__i.value]; })) {}
2381 
2382  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2383  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
2384  = default;
2385  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default;
2386 
2387  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2388  operator=(const _SimdWrapper&) = default;
2389  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2390  operator=(_SimdWrapper&&) = default;
2391 
2392  template <typename _V, typename = enable_if_t<disjunction_v<
2393  is_same<_V, __vector_type_t<_Tp, _Width>>,
2394  is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2395  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x)
2396  // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2397  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2398 
2399  template <typename... _As,
2400  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2401  && sizeof...(_As) <= _Width)>>
2402  _GLIBCXX_SIMD_INTRINSIC constexpr
2403  operator _SimdTuple<_Tp, _As...>() const
2404  {
2405  const auto& dd = _M_data; // workaround for GCC7 ICE
2406  return __generate_from_n_evaluations<sizeof...(_As),
2407  _SimdTuple<_Tp, _As...>>([&](
2408  auto __i) constexpr { return dd[int(__i)]; });
2409  }
2410 
2411  _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
2412  { return _M_data; }
2413 
2414  _GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&()
2415  { return _M_data; }
2416 
2417  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const
2418  { return _M_data[__i]; }
2419 
2420  template <size_t __i>
2421  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const
2422  { return _M_data[__i]; }
2423 
2424  _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x)
2425  { _M_data[__i] = __x; }
2426 
2427  _GLIBCXX_SIMD_INTRINSIC
2428  constexpr bool _M_is_constprop() const
2429  { return __builtin_constant_p(_M_data); }
2430 
2431  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2432  {
2433  if (__builtin_constant_p(_M_data))
2434  {
2435  bool __r = true;
2436  if constexpr (is_floating_point_v<_Tp>)
2437  {
2438  using _Ip = __int_for_sizeof_t<_Tp>;
2439  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2440  __execute_n_times<_Width>(
2441  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2442  }
2443  else
2444  __execute_n_times<_Width>(
2445  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2446  return __r;
2447  }
2448  return false;
2449  }
2450 
2451  _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2452  {
2453  if (__builtin_constant_p(_M_data))
2454  {
2455  bool __r = true;
2456  if constexpr (is_floating_point_v<_Tp>)
2457  {
2458  using _Ip = __int_for_sizeof_t<_Tp>;
2459  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2460  __execute_n_times<_Width>(
2461  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2462  }
2463  else
2464  __execute_n_times<_Width>(
2465  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2466  return __r;
2467  }
2468  return false;
2469  }
2470  };
2471 
2472 // }}}
2473 
2474 // __vectorized_sizeof {{{
2475 template <typename _Tp>
2476  constexpr size_t
2477  __vectorized_sizeof()
2478  {
2479  if constexpr (!__is_vectorizable_v<_Tp>)
2480  return 0;
2481 
2482  if constexpr (sizeof(_Tp) <= 8)
2483  {
2484  // X86:
2485  if constexpr (__have_avx512bw)
2486  return 64;
2487  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2488  return 64;
2489  if constexpr (__have_avx2)
2490  return 32;
2491  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2492  return 32;
2493  if constexpr (__have_sse2)
2494  return 16;
2495  if constexpr (__have_sse && is_same_v<_Tp, float>)
2496  return 16;
2497  /* The following is too much trouble because of mixed MMX and x87 code.
2498  * While nothing here explicitly calls MMX instructions of registers,
2499  * they are still emitted but no EMMS cleanup is done.
2500  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2501  return 8;
2502  */
2503 
2504  // PowerPC:
2505  if constexpr (__have_power8vec
2506  || (__have_power_vmx && (sizeof(_Tp) < 8))
2507  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2508  return 16;
2509 
2510  // ARM:
2511  if constexpr (__have_neon_a64
2512  || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2513  return 16;
2514  if constexpr (__have_neon
2515  && sizeof(_Tp) < 8
2516  // Only allow fp if the user allows non-ICE559 fp (e.g.
2517  // via -ffast-math). ARMv7 NEON fp is not conforming to
2518  // IEC559.
2519  && (__support_neon_float || !is_floating_point_v<_Tp>))
2520  return 16;
2521  }
2522 
2523  return sizeof(_Tp);
2524  }
2525 
2526 // }}}
2527 namespace simd_abi {
2528 // most of simd_abi is defined in simd_detail.h
2529 template <typename _Tp>
2530  inline constexpr int max_fixed_size
2531  = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2532 
2533 // compatible {{{
2534 #if defined __x86_64__ || defined __aarch64__
2535 template <typename _Tp>
2536  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2537 #elif defined __ARM_NEON
2538 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2539 // ABI?)
2540 template <typename _Tp>
2541  using compatible
2542  = conditional_t<(sizeof(_Tp) < 8
2543  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2544  _VecBuiltin<16>, scalar>;
2545 #else
2546 template <typename>
2547  using compatible = scalar;
2548 #endif
2549 
2550 // }}}
2551 // native {{{
2552 template <typename _Tp>
2553  constexpr auto
2554  __determine_native_abi()
2555  {
2556  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2557  if constexpr (__bytes == sizeof(_Tp))
2558  return static_cast<scalar*>(nullptr);
2559  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2560  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2561  else
2562  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2563  }
2564 
2565 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2566  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2567 
2568 // }}}
2569 // __default_abi {{{
2570 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2571 template <typename _Tp>
2572  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2573 #else
2574 template <typename _Tp>
2575  using __default_abi = compatible<_Tp>;
2576 #endif
2577 
2578 // }}}
2579 } // namespace simd_abi
2580 
2581 // traits {{{1
2582 // is_abi_tag {{{2
2583 template <typename _Tp, typename = void_t<>>
2584  struct is_abi_tag : false_type {};
2585 
2586 template <typename _Tp>
2587  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2588  : public _Tp::_IsValidAbiTag {};
2589 
2590 template <typename _Tp>
2591  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2592 
2593 // is_simd(_mask) {{{2
2594 template <typename _Tp>
2595  struct is_simd : public false_type {};
2596 
2597 template <typename _Tp>
2598  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2599 
2600 template <typename _Tp>
2601  struct is_simd_mask : public false_type {};
2602 
2603 template <typename _Tp>
2604 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2605 
2606 // simd_size {{{2
2607 template <typename _Tp, typename _Abi, typename = void>
2608  struct __simd_size_impl {};
2609 
2610 template <typename _Tp, typename _Abi>
2611  struct __simd_size_impl<
2612  _Tp, _Abi,
2613  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2614  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2615 
2616 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2617  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2618 
2619 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2620  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2621 
2622 // simd_abi::deduce {{{2
2623 template <typename _Tp, size_t _Np, typename = void>
2624  struct __deduce_impl;
2625 
2626 namespace simd_abi {
2627 /**
2628  * @tparam _Tp The requested `value_type` for the elements.
2629  * @tparam _Np The requested number of elements.
2630  * @tparam _Abis This parameter is ignored, since this implementation cannot
2631  * make any use of it. Either __a good native ABI is matched and used as `type`
2632  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2633  * the best matching native ABIs.
2634  */
2635 template <typename _Tp, size_t _Np, typename...>
2636  struct deduce : __deduce_impl<_Tp, _Np> {};
2637 
2638 template <typename _Tp, size_t _Np, typename... _Abis>
2639  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2640 } // namespace simd_abi
2641 
2642 // }}}2
2643 // rebind_simd {{{2
2644 template <typename _Tp, typename _V, typename = void>
2645  struct rebind_simd;
2646 
2647 template <typename _Tp, typename _Up, typename _Abi>
2648  struct rebind_simd<
2649  _Tp, simd<_Up, _Abi>,
2650  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2651  {
2652  using type
2653  = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2654  };
2655 
2656 template <typename _Tp, typename _Up, typename _Abi>
2657  struct rebind_simd<
2658  _Tp, simd_mask<_Up, _Abi>,
2659  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2660  {
2661  using type
2662  = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2663  };
2664 
2665 template <typename _Tp, typename _V>
2666  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2667 
2668 // resize_simd {{{2
2669 template <int _Np, typename _V, typename = void>
2670  struct resize_simd;
2671 
2672 template <int _Np, typename _Tp, typename _Abi>
2673  struct resize_simd<_Np, simd<_Tp, _Abi>,
2674  void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2675  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2676 
2677 template <int _Np, typename _Tp, typename _Abi>
2678  struct resize_simd<_Np, simd_mask<_Tp, _Abi>,
2679  void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2680  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2681 
2682 template <int _Np, typename _V>
2683  using resize_simd_t = typename resize_simd<_Np, _V>::type;
2684 
2685 // }}}2
2686 // memory_alignment {{{2
2687 template <typename _Tp, typename _Up = typename _Tp::value_type>
2688  struct memory_alignment
2689  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
2690 
2691 template <typename _Tp, typename _Up = typename _Tp::value_type>
2692  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
2693 
2694 // class template simd [simd] {{{1
2695 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2696  class simd;
2697 
2698 template <typename _Tp, typename _Abi>
2699  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
2700 
2701 template <typename _Tp>
2702  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
2703 
2704 template <typename _Tp, int _Np>
2705  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
2706 
2707 template <typename _Tp, size_t _Np>
2708  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2709 
2710 // class template simd_mask [simd_mask] {{{1
2711 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2712  class simd_mask;
2713 
2714 template <typename _Tp, typename _Abi>
2715  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
2716 
2717 template <typename _Tp>
2718  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
2719 
2720 template <typename _Tp, int _Np>
2721  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
2722 
2723 template <typename _Tp, size_t _Np>
2724  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2725 
2726 // casts [simd.casts] {{{1
2727 // static_simd_cast {{{2
2728 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>,
2729  typename = void>
2730  struct __static_simd_cast_return_type;
2731 
2732 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
2733  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false,
2734  void>
2735  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
2736 
2737 template <typename _Tp, typename _Up, typename _Ap>
2738  struct __static_simd_cast_return_type<
2739  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
2740  { using type = _Tp; };
2741 
2742 template <typename _Tp, typename _Ap>
2743  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
2744 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2745  enable_if_t<__is_vectorizable_v<_Tp>>
2746 #else
2747  void
2748 #endif
2749  >
2750  { using type = simd<_Tp, _Ap>; };
2751 
2752 template <typename _Tp, typename = void>
2753  struct __safe_make_signed { using type = _Tp;};
2754 
2755 template <typename _Tp>
2756  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
2757  {
2758  // the extra make_unsigned_t is because of PR85951
2759  using type = make_signed_t<make_unsigned_t<_Tp>>;
2760  };
2761 
2762 template <typename _Tp>
2763  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
2764 
2765 template <typename _Tp, typename _Up, typename _Ap>
2766  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
2767 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2768  enable_if_t<__is_vectorizable_v<_Tp>>
2769 #else
2770  void
2771 #endif
2772  >
2773  {
2774  using type = conditional_t<
2775  (is_integral_v<_Up> && is_integral_v<_Tp> &&
2776 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
2777  is_signed_v<_Up> != is_signed_v<_Tp> &&
2778 #endif
2779  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
2780  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
2781  };
2782 
2783 template <typename _Tp, typename _Up, typename _Ap,
2784  typename _R
2785  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2786  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
2787  static_simd_cast(const simd<_Up, _Ap>& __x)
2788  {
2789  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
2790  return __x;
2791  else
2792  {
2793  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
2794  __c;
2795  return _R(__private_init, __c(__data(__x)));
2796  }
2797  }
2798 
2799 namespace __proposed {
2800 template <typename _Tp, typename _Up, typename _Ap,
2801  typename _R
2802  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2803  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
2804  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
2805  {
2806  using _RM = typename _R::mask_type;
2807  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
2808  typename _RM::simd_type::value_type>(__x)};
2809  }
2810 } // namespace __proposed
2811 
2812 // simd_cast {{{2
2813 template <typename _Tp, typename _Up, typename _Ap,
2814  typename _To = __value_type_or_identity_t<_Tp>>
2815  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2816  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
2817  -> decltype(static_simd_cast<_Tp>(__x))
2818  { return static_simd_cast<_Tp>(__x); }
2819 
2820 namespace __proposed {
2821 template <typename _Tp, typename _Up, typename _Ap,
2822  typename _To = __value_type_or_identity_t<_Tp>>
2823  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2824  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
2825  -> decltype(static_simd_cast<_Tp>(__x))
2826  { return static_simd_cast<_Tp>(__x); }
2827 } // namespace __proposed
2828 
2829 // }}}2
2830 // resizing_simd_cast {{{
2831 namespace __proposed {
2832 /* Proposed spec:
2833 
2834 template <class T, class U, class Abi>
2835 T resizing_simd_cast(const simd<U, Abi>& x)
2836 
2837 p1 Constraints:
2838  - is_simd_v<T> is true and
2839  - T::value_type is the same type as U
2840 
2841 p2 Returns:
2842  A simd object with the i^th element initialized to x[i] for all i in the
2843  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2844  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
2845 
2846 template <class T, class U, class Abi>
2847 T resizing_simd_cast(const simd_mask<U, Abi>& x)
2848 
2849 p1 Constraints: is_simd_mask_v<T> is true
2850 
2851 p2 Returns:
2852  A simd_mask object with the i^th element initialized to x[i] for all i in
2853 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2854  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
2855 
2856  */
2857 
2858 template <typename _Tp, typename _Up, typename _Ap>
2859  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
2860  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
2861  resizing_simd_cast(const simd<_Up, _Ap>& __x)
2862  {
2863  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
2864  return __x;
2865  else if constexpr (simd_size_v<_Up, _Ap> == 1)
2866  {
2867  _Tp __r{};
2868  __r[0] = __x[0];
2869  return __r;
2870  }
2871  else if constexpr (_Tp::size() == 1)
2872  return __x[0];
2873  else if constexpr (sizeof(_Tp) == sizeof(__x)
2874  && !__is_fixed_size_abi_v<_Ap>)
2875  return {__private_init,
2876  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
2877  _Ap::_S_masked(__data(__x))._M_data)};
2878  else
2879  {
2880  _Tp __r{};
2881  __builtin_memcpy(&__data(__r), &__data(__x),
2882  sizeof(_Up)
2883  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
2884  return __r;
2885  }
2886  }
2887 
2888 template <typename _Tp, typename _Up, typename _Ap>
2889  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
2890  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
2891  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
2892  {
2893  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
2894  typename _Tp::simd_type::value_type>(__x)};
2895  }
2896 } // namespace __proposed
2897 
2898 // }}}
2899 // to_fixed_size {{{2
2900 template <typename _Tp, int _Np>
2901  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
2902  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
2903  { return __x; }
2904 
2905 template <typename _Tp, int _Np>
2906  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
2907  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
2908  { return __x; }
2909 
2910 template <typename _Tp, typename _Ap>
2911  _GLIBCXX_SIMD_INTRINSIC auto
2912  to_fixed_size(const simd<_Tp, _Ap>& __x)
2913  {
2914  return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
2915  auto __i) constexpr { return __x[__i]; });
2916  }
2917 
2918 template <typename _Tp, typename _Ap>
2919  _GLIBCXX_SIMD_INTRINSIC auto
2920  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
2921  {
2922  constexpr int _Np = simd_mask<_Tp, _Ap>::size();
2923  fixed_size_simd_mask<_Tp, _Np> __r;
2924  __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
2925  return __r;
2926  }
2927 
2928 // to_native {{{2
2929 template <typename _Tp, int _Np>
2930  _GLIBCXX_SIMD_INTRINSIC
2931  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
2932  to_native(const fixed_size_simd<_Tp, _Np>& __x)
2933  {
2934  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
2935  __x.copy_to(__mem, vector_aligned);
2936  return {__mem, vector_aligned};
2937  }
2938 
2939 template <typename _Tp, size_t _Np>
2940  _GLIBCXX_SIMD_INTRINSIC
2941  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
2942  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
2943  {
2944  return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
2945  }
2946 
2947 // to_compatible {{{2
2948 template <typename _Tp, size_t _Np>
2949  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
2950  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
2951  {
2952  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
2953  __x.copy_to(__mem, vector_aligned);
2954  return {__mem, vector_aligned};
2955  }
2956 
2957 template <typename _Tp, size_t _Np>
2958  _GLIBCXX_SIMD_INTRINSIC
2959  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
2960  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
2961  { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }
2962 
2963 // masked assignment [simd_mask.where] {{{1
2964 
2965 // where_expression {{{1
2966 // const_where_expression<M, T> {{{2
2967 template <typename _M, typename _Tp>
2968  class const_where_expression
2969  {
2970  using _V = _Tp;
2971  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
2972 
2973  struct _Wrapper { using value_type = _V; };
2974 
2975  protected:
2976  using _Impl = typename _V::_Impl;
2977 
2978  using value_type =
2979  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
2980 
2981  _GLIBCXX_SIMD_INTRINSIC friend const _M&
2982  __get_mask(const const_where_expression& __x)
2983  { return __x._M_k; }
2984 
2985  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
2986  __get_lvalue(const const_where_expression& __x)
2987  { return __x._M_value; }
2988 
2989  const _M& _M_k;
2990  _Tp& _M_value;
2991 
2992  public:
2993  const_where_expression(const const_where_expression&) = delete;
2994  const_where_expression& operator=(const const_where_expression&) = delete;
2995 
2996  _GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
2997  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
2998 
2999  _GLIBCXX_SIMD_INTRINSIC _V
3000  operator-() const&&
3001  {
3002  return {__private_init,
3003  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3004  __data(_M_value))};
3005  }
3006 
3007  template <typename _Up, typename _Flags>
3008  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3009  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3010  {
3011  return {__private_init,
3012  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3013  _Flags::template _S_apply<_V>(__mem))};
3014  }
3015 
3016  template <typename _Up, typename _Flags>
3017  _GLIBCXX_SIMD_INTRINSIC void
3018  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3019  {
3020  _Impl::_S_masked_store(__data(_M_value),
3021  _Flags::template _S_apply<_V>(__mem),
3022  __data(_M_k));
3023  }
3024  };
3025 
3026 // const_where_expression<bool, T> {{{2
3027 template <typename _Tp>
3028  class const_where_expression<bool, _Tp>
3029  {
3030  using _M = bool;
3031  using _V = _Tp;
3032 
3033  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3034 
3035  struct _Wrapper { using value_type = _V; };
3036 
3037  protected:
3038  using value_type =
3039  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3040 
3041  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3042  __get_mask(const const_where_expression& __x)
3043  { return __x._M_k; }
3044 
3045  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3046  __get_lvalue(const const_where_expression& __x)
3047  { return __x._M_value; }
3048 
3049  const bool _M_k;
3050  _Tp& _M_value;
3051 
3052  public:
3053  const_where_expression(const const_where_expression&) = delete;
3054  const_where_expression& operator=(const const_where_expression&) = delete;
3055 
3056  _GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd)
3057  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3058 
3059  _GLIBCXX_SIMD_INTRINSIC _V operator-() const&&
3060  { return _M_k ? -_M_value : _M_value; }
3061 
3062  template <typename _Up, typename _Flags>
3063  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3064  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3065  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3066 
3067  template <typename _Up, typename _Flags>
3068  _GLIBCXX_SIMD_INTRINSIC void
3069  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3070  {
3071  if (_M_k)
3072  __mem[0] = _M_value;
3073  }
3074  };
3075 
3076 // where_expression<M, T> {{{2
3077 template <typename _M, typename _Tp>
3078  class where_expression : public const_where_expression<_M, _Tp>
3079  {
3080  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3081 
3082  static_assert(!is_const<_Tp>::value,
3083  "where_expression may only be instantiated with __a non-const "
3084  "_Tp parameter");
3085 
3086  using typename const_where_expression<_M, _Tp>::value_type;
3087  using const_where_expression<_M, _Tp>::_M_k;
3088  using const_where_expression<_M, _Tp>::_M_value;
3089 
3090  static_assert(
3091  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3092  static_assert(_M::size() == _Tp::size(), "");
3093 
3094  _GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x)
3095  { return __x._M_value; }
3096 
3097  public:
3098  where_expression(const where_expression&) = delete;
3099  where_expression& operator=(const where_expression&) = delete;
3100 
3101  _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3102  : const_where_expression<_M, _Tp>(__kk, dd) {}
3103 
3104  template <typename _Up>
3105  _GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) &&
3106  {
3107  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3108  __to_value_type_or_member_type<_Tp>(
3109  static_cast<_Up&&>(__x)));
3110  }
3111 
3112 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3113  template <typename _Up> \
3114  _GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&& \
3115  { \
3116  _Impl::template _S_masked_cassign( \
3117  __data(_M_k), __data(_M_value), \
3118  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3119  [](auto __impl, auto __lhs, auto __rhs) constexpr { \
3120  return __impl.__name(__lhs, __rhs); \
3121  }); \
3122  } \
3123  static_assert(true)
3124  _GLIBCXX_SIMD_OP_(+, _S_plus);
3125  _GLIBCXX_SIMD_OP_(-, _S_minus);
3126  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3127  _GLIBCXX_SIMD_OP_(/, _S_divides);
3128  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3129  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3130  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3131  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3132  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3133  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3134 #undef _GLIBCXX_SIMD_OP_
3135 
3136  _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3137  {
3138  __data(_M_value)
3139  = _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3140  __data(_M_value));
3141  }
3142 
3143  _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3144  {
3145  __data(_M_value)
3146  = _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3147  __data(_M_value));
3148  }
3149 
3150  _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3151  {
3152  __data(_M_value)
3153  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3154  __data(_M_value));
3155  }
3156 
3157  _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3158  {
3159  __data(_M_value)
3160  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3161  __data(_M_value));
3162  }
3163 
3164  // intentionally hides const_where_expression::copy_from
3165  template <typename _Up, typename _Flags>
3166  _GLIBCXX_SIMD_INTRINSIC void
3167  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3168  {
3169  __data(_M_value)
3170  = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3171  _Flags::template _S_apply<_Tp>(__mem));
3172  }
3173  };
3174 
3175 // where_expression<bool, T> {{{2
3176 template <typename _Tp>
3177  class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp>
3178  {
3179  using _M = bool;
3180  using typename const_where_expression<_M, _Tp>::value_type;
3181  using const_where_expression<_M, _Tp>::_M_k;
3182  using const_where_expression<_M, _Tp>::_M_value;
3183 
3184  public:
3185  where_expression(const where_expression&) = delete;
3186  where_expression& operator=(const where_expression&) = delete;
3187 
3188  _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3189  : const_where_expression<_M, _Tp>(__kk, dd) {}
3190 
3191 #define _GLIBCXX_SIMD_OP_(__op) \
3192  template <typename _Up> \
3193  _GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&& \
3194  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3195 
3196  _GLIBCXX_SIMD_OP_(=)
3197  _GLIBCXX_SIMD_OP_(+=)
3198  _GLIBCXX_SIMD_OP_(-=)
3199  _GLIBCXX_SIMD_OP_(*=)
3200  _GLIBCXX_SIMD_OP_(/=)
3201  _GLIBCXX_SIMD_OP_(%=)
3202  _GLIBCXX_SIMD_OP_(&=)
3203  _GLIBCXX_SIMD_OP_(|=)
3204  _GLIBCXX_SIMD_OP_(^=)
3205  _GLIBCXX_SIMD_OP_(<<=)
3206  _GLIBCXX_SIMD_OP_(>>=)
3207  #undef _GLIBCXX_SIMD_OP_
3208 
3209  _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3210  { if (_M_k) ++_M_value; }
3211 
3212  _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3213  { if (_M_k) ++_M_value; }
3214 
3215  _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3216  { if (_M_k) --_M_value; }
3217 
3218  _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3219  { if (_M_k) --_M_value; }
3220 
3221  // intentionally hides const_where_expression::copy_from
3222  template <typename _Up, typename _Flags>
3223  _GLIBCXX_SIMD_INTRINSIC void
3224  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3225  { if (_M_k) _M_value = __mem[0]; }
3226  };
3227 
3228 // where {{{1
3229 template <typename _Tp, typename _Ap>
3230  _GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3231  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3232  { return {__k, __value}; }
3233 
3234 template <typename _Tp, typename _Ap>
3235  _GLIBCXX_SIMD_INTRINSIC
3236  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3237  where(const typename simd<_Tp, _Ap>::mask_type& __k,
3238  const simd<_Tp, _Ap>& __value)
3239  { return {__k, __value}; }
3240 
3241 template <typename _Tp, typename _Ap>
3242  _GLIBCXX_SIMD_INTRINSIC
3243  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3244  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3245  simd_mask<_Tp, _Ap>& __value)
3246  { return {__k, __value}; }
3247 
3248 template <typename _Tp, typename _Ap>
3249  _GLIBCXX_SIMD_INTRINSIC
3250  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3251  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3252  const simd_mask<_Tp, _Ap>& __value)
3253  { return {__k, __value}; }
3254 
3255 template <typename _Tp>
3256  _GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp>
3257  where(_ExactBool __k, _Tp& __value)
3258  { return {__k, __value}; }
3259 
3260 template <typename _Tp>
3261  _GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp>
3262  where(_ExactBool __k, const _Tp& __value)
3263  { return {__k, __value}; }
3264 
3265  template <typename _Tp, typename _Ap>
3266  void where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3267 
3268  template <typename _Tp, typename _Ap>
3269  void where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3270 
3271 // proposed mask iterations {{{1
3272 namespace __proposed {
3273 template <size_t _Np>
3274  class where_range
3275  {
3276  const bitset<_Np> __bits;
3277 
3278  public:
3279  where_range(bitset<_Np> __b) : __bits(__b) {}
3280 
3281  class iterator
3282  {
3283  size_t __mask;
3284  size_t __bit;
3285 
3286  _GLIBCXX_SIMD_INTRINSIC void __next_bit()
3287  { __bit = __builtin_ctzl(__mask); }
3288 
3289  _GLIBCXX_SIMD_INTRINSIC void __reset_lsb()
3290  {
3291  // 01100100 - 1 = 01100011
3292  __mask &= (__mask - 1);
3293  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3294  }
3295 
3296  public:
3297  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3298  iterator(const iterator&) = default;
3299  iterator(iterator&&) = default;
3300 
3301  _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const
3302  { return __bit; }
3303 
3304  _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const
3305  { return __bit; }
3306 
3307  _GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++()
3308  {
3309  __reset_lsb();
3310  __next_bit();
3311  return *this;
3312  }
3313 
3314  _GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int)
3315  {
3316  iterator __tmp = *this;
3317  __reset_lsb();
3318  __next_bit();
3319  return __tmp;
3320  }
3321 
3322  _GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const
3323  { return __mask == __rhs.__mask; }
3324 
3325  _GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const
3326  { return __mask != __rhs.__mask; }
3327  };
3328 
3329  iterator begin() const
3330  { return __bits.to_ullong(); }
3331 
3332  iterator end() const
3333  { return 0; }
3334  };
3335 
3336 template <typename _Tp, typename _Ap>
3337  where_range<simd_size_v<_Tp, _Ap>>
3338  where(const simd_mask<_Tp, _Ap>& __k)
3339  { return __k.__to_bitset(); }
3340 
3341 } // namespace __proposed
3342 
3343 // }}}1
3344 // reductions [simd.reductions] {{{1
3345  template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3346  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3347  reduce(const simd<_Tp, _Abi>& __v,
3348  _BinaryOperation __binary_op = _BinaryOperation())
3349  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3350 
3351 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3352  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3353  reduce(const const_where_expression<_M, _V>& __x,
3354  typename _V::value_type __identity_element,
3355  _BinaryOperation __binary_op)
3356  {
3357  if (__builtin_expect(none_of(__get_mask(__x)), false))
3358  return __identity_element;
3359 
3360  _V __tmp = __identity_element;
3361  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3362  __data(__get_lvalue(__x)));
3363  return reduce(__tmp, __binary_op);
3364  }
3365 
3366 template <typename _M, typename _V>
3367  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3368  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3369  { return reduce(__x, 0, __binary_op); }
3370 
3371 template <typename _M, typename _V>
3372  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3373  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3374  { return reduce(__x, 1, __binary_op); }
3375 
3376 template <typename _M, typename _V>
3377  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3378  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3379  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3380 
3381 template <typename _M, typename _V>
3382  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3383  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3384  { return reduce(__x, 0, __binary_op); }
3385 
3386 template <typename _M, typename _V>
3387  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3388  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3389  { return reduce(__x, 0, __binary_op); }
3390 
3391 // }}}1
3392 // algorithms [simd.alg] {{{
3393 template <typename _Tp, typename _Ap>
3394  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3395  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3396  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3397 
3398 template <typename _Tp, typename _Ap>
3399  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3400  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3401  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3402 
3403 template <typename _Tp, typename _Ap>
3404  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3405  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3406  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3407  {
3408  const auto pair_of_members
3409  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3410  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3411  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3412  }
3413 
3414 template <typename _Tp, typename _Ap>
3415  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3416  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo,
3417  const simd<_Tp, _Ap>& __hi)
3418  {
3419  using _Impl = typename _Ap::_SimdImpl;
3420  return {__private_init,
3421  _Impl::_S_min(__data(__hi),
3422  _Impl::_S_max(__data(__lo), __data(__v)))};
3423  }
3424 
3425 // }}}
3426 
3427 template <size_t... _Sizes, typename _Tp, typename _Ap,
3428  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3429  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3430  split(const simd<_Tp, _Ap>&);
3431 
3432 // __extract_part {{{
3433 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3434  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
3435  _SimdWrapper<_Tp, _Np / _Total * _Combine>
3436  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3437 
3438 template <int Index, int Parts, int _Combine = 1, typename _Tp, typename _A0,
3439  typename... _As>
3440  _GLIBCXX_SIMD_INTRINSIC auto
3441  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3442 
3443 // }}}
3444 // _SizeList {{{
3445 template <size_t _V0, size_t... _Values>
3446  struct _SizeList
3447  {
3448  template <size_t _I>
3449  static constexpr size_t _S_at(_SizeConstant<_I> = {})
3450  {
3451  if constexpr (_I == 0)
3452  return _V0;
3453  else
3454  return _SizeList<_Values...>::template _S_at<_I - 1>();
3455  }
3456 
3457  template <size_t _I>
3458  static constexpr auto _S_before(_SizeConstant<_I> = {})
3459  {
3460  if constexpr (_I == 0)
3461  return _SizeConstant<0>();
3462  else
3463  return _SizeConstant<
3464  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3465  }
3466 
3467  template <size_t _Np>
3468  static constexpr auto _S_pop_front(_SizeConstant<_Np> = {})
3469  {
3470  if constexpr (_Np == 0)
3471  return _SizeList();
3472  else
3473  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3474  }
3475  };
3476 
3477 // }}}
3478 // __extract_center {{{
3479 template <typename _Tp, size_t _Np>
3480  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3481  __extract_center(_SimdWrapper<_Tp, _Np> __x)
3482  {
3483  static_assert(_Np >= 4);
3484  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3485 #if _GLIBCXX_SIMD_X86INTRIN // {{{
3486  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3487  {
3488  const auto __intrin = __to_intrin(__x);
3489  if constexpr (is_integral_v<_Tp>)
3490  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3491  _mm512_shuffle_i32x4(__intrin, __intrin,
3492  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3493  else if constexpr (sizeof(_Tp) == 4)
3494  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3495  _mm512_shuffle_f32x4(__intrin, __intrin,
3496  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3497  else if constexpr (sizeof(_Tp) == 8)
3498  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3499  _mm512_shuffle_f64x2(__intrin, __intrin,
3500  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3501  else
3502  __assert_unreachable<_Tp>();
3503  }
3504  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3505  return __vector_bitcast<_Tp>(
3506  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3507  __hi128(__vector_bitcast<double>(__x)), 1));
3508  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3509  return __vector_bitcast<_Tp>(
3510  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3511  __lo128(__vector_bitcast<_LLong>(__x)),
3512  sizeof(_Tp) * _Np / 4));
3513  else
3514 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
3515  {
3516  __vector_type_t<_Tp, _Np / 2> __r;
3517  __builtin_memcpy(&__r,
3518  reinterpret_cast<const char*>(&__x)
3519  + sizeof(_Tp) * _Np / 4,
3520  sizeof(_Tp) * _Np / 2);
3521  return __r;
3522  }
3523  }
3524 
3525 template <typename _Tp, typename _A0, typename... _As>
3526  _GLIBCXX_SIMD_INTRINSIC
3527  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3528  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3529  {
3530  if constexpr (sizeof...(_As) == 0)
3531  return __extract_center(__x.first);
3532  else
3533  return __extract_part<1, 4, 2>(__x);
3534  }
3535 
3536 // }}}
3537 // __split_wrapper {{{
3538 template <size_t... _Sizes, typename _Tp, typename... _As>
3539  auto
3540  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3541  {
3542  return split<_Sizes...>(
3543  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3544  __x));
3545  }
3546 
3547 // }}}
3548 
3549 // split<simd>(simd) {{{
3550 template <typename _V, typename _Ap,
3551  size_t Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
3552  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == Parts * _V::size()
3553  && is_simd_v<_V>, array<_V, Parts>>
3554  split(const simd<typename _V::value_type, _Ap>& __x)
3555  {
3556  using _Tp = typename _V::value_type;
3557  if constexpr (Parts == 1)
3558  {
3559  return {simd_cast<_V>(__x)};
3560  }
3561  else if (__x._M_is_constprop())
3562  {
3563  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3564  auto __i) constexpr {
3565  return _V([&](auto __j) constexpr {
3566  return __x[__i * _V::size() + __j];
3567  });
3568  });
3569  }
3570  else if constexpr (
3571  __is_fixed_size_abi_v<_Ap>
3572  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
3573  || (__is_fixed_size_abi_v<typename _V::abi_type>
3574  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
3575  )))
3576  {
3577  // fixed_size -> fixed_size (w/o padding) or scalar
3578 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3579  const __may_alias<_Tp>* const __element_ptr
3580  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
3581  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3582  auto __i) constexpr {
3583  return _V(__element_ptr + __i * _V::size(), vector_aligned);
3584  });
3585 #else
3586  const auto& __xx = __data(__x);
3587  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3588  auto __i) constexpr {
3589  [[maybe_unused]] constexpr size_t __offset
3590  = decltype(__i)::value * _V::size();
3591  return _V([&](auto __j) constexpr {
3592  constexpr _SizeConstant<__j + __offset> __k;
3593  return __xx[__k];
3594  });
3595  });
3596 #endif
3597  }
3598  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
3599  {
3600  // normally memcpy should work here as well
3601  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3602  auto __i) constexpr { return __x[__i]; });
3603  }
3604  else
3605  {
3606  return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3607  auto __i) constexpr {
3608  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
3609  return _V([&](auto __j) constexpr {
3610  return __x[__i * _V::size() + __j];
3611  });
3612  else
3613  return _V(__private_init,
3614  __extract_part<decltype(__i)::value, Parts>(__data(__x)));
3615  });
3616  }
3617  }
3618 
3619 // }}}
3620 // split<simd_mask>(simd_mask) {{{
3621 template <typename _V, typename _Ap,
3622  size_t _Parts
3623  = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
3624  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
3625  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
3626  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
3627  {
3628  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
3629  return {__x};
3630  else if constexpr (_Parts == 1)
3631  return {__proposed::static_simd_cast<_V>(__x)};
3632  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
3633  && __is_avx_abi<_Ap>())
3634  return {_V(__private_init, __lo128(__data(__x))),
3635  _V(__private_init, __hi128(__data(__x)))};
3636  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
3637  {
3638  const bitset __bits = __x.__to_bitset();
3639  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3640  auto __i) constexpr {
3641  constexpr size_t __offset = __i * _V::size();
3642  return _V(__bitset_init, (__bits >> __offset).to_ullong());
3643  });
3644  }
3645  else
3646  {
3647  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3648  auto __i) constexpr {
3649  constexpr size_t __offset = __i * _V::size();
3650  return _V(
3651  __private_init, [&](auto __j) constexpr {
3652  return __x[__j + __offset];
3653  });
3654  });
3655  }
3656  }
3657 
3658 // }}}
3659 // split<_Sizes...>(simd) {{{
3660 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
3661  _GLIBCXX_SIMD_ALWAYS_INLINE
3662  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3663  split(const simd<_Tp, _Ap>& __x)
3664  {
3665  using _SL = _SizeList<_Sizes...>;
3666  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
3667  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
3668  constexpr size_t _N0 = _SL::template _S_at<0>();
3669  using _V = __deduced_simd<_Tp, _N0>;
3670 
3671  if (__x._M_is_constprop())
3672  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3673  auto __i) constexpr {
3674  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3675  constexpr size_t __offset = _SL::_S_before(__i);
3676  return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
3677  });
3678  else if constexpr (_Np == _N0)
3679  {
3680  static_assert(sizeof...(_Sizes) == 1);
3681  return {simd_cast<_V>(__x)};
3682  }
3683  else if constexpr // split from fixed_size, such that __x::first.size == _N0
3684  (__is_fixed_size_abi_v<
3685  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
3686  {
3687  static_assert(
3688  !__is_fixed_size_abi_v<typename _V::abi_type>,
3689  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
3690  "fixed_size_simd "
3691  "when deduced?");
3692  // extract first and recurse (__split_wrapper is needed to deduce a new
3693  // _Sizes pack)
3694  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
3695  __split_wrapper(_SL::template _S_pop_front<1>(),
3696  __data(__x).second));
3697  }
3698  else if constexpr ((!is_same_v<simd_abi::scalar,
3699  simd_abi::deduce_t<_Tp, _Sizes>> && ...)
3700  && (!__is_fixed_size_abi_v<
3701  simd_abi::deduce_t<_Tp, _Sizes>> && ...))
3702  {
3703  if constexpr (((_Sizes * 2 == _Np) && ...))
3704  return {{__private_init, __extract_part<0, 2>(__data(__x))},
3705  {__private_init, __extract_part<1, 2>(__data(__x))}};
3706  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3707  _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
3708  return {{__private_init, __extract_part<0, 3>(__data(__x))},
3709  {__private_init, __extract_part<1, 3>(__data(__x))},
3710  {__private_init, __extract_part<2, 3>(__data(__x))}};
3711  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3712  _SizeList<2 * _Np / 3, _Np / 3>>)
3713  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
3714  {__private_init, __extract_part<2, 3>(__data(__x))}};
3715  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3716  _SizeList<_Np / 3, 2 * _Np / 3>>)
3717  return {{__private_init, __extract_part<0, 3>(__data(__x))},
3718  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
3719  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3720  _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
3721  return {{__private_init, __extract_part<0, 2>(__data(__x))},
3722  {__private_init, __extract_part<2, 4>(__data(__x))},
3723  {__private_init, __extract_part<3, 4>(__data(__x))}};
3724  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3725  _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
3726  return {{__private_init, __extract_part<0, 4>(__data(__x))},
3727  {__private_init, __extract_part<1, 4>(__data(__x))},
3728  {__private_init, __extract_part<1, 2>(__data(__x))}};
3729  else if constexpr (is_same_v<_SizeList<_Sizes...>,
3730  _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
3731  return {{__private_init, __extract_part<0, 4>(__data(__x))},
3732  {__private_init, __extract_center(__data(__x))},
3733  {__private_init, __extract_part<3, 4>(__data(__x))}};
3734  else if constexpr (((_Sizes * 4 == _Np) && ...))
3735  return {{__private_init, __extract_part<0, 4>(__data(__x))},
3736  {__private_init, __extract_part<1, 4>(__data(__x))},
3737  {__private_init, __extract_part<2, 4>(__data(__x))},
3738  {__private_init, __extract_part<3, 4>(__data(__x))}};
3739  // else fall through
3740  }
3741 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3742  const __may_alias<_Tp>* const __element_ptr
3743  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
3744  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3745  auto __i) constexpr {
3746  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3747  constexpr size_t __offset = _SL::_S_before(__i);
3748  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
3749  constexpr size_t __a
3750  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
3751  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
3752  constexpr size_t __alignment = __b == 0 ? __a : __b;
3753  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
3754  });
3755 #else
3756  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3757  auto __i) constexpr {
3758  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3759  const auto& __xx = __data(__x);
3760  using _Offset = decltype(_SL::_S_before(__i));
3761  return _Vi([&](auto __j) constexpr {
3762  constexpr _SizeConstant<_Offset::value + __j> __k;
3763  return __xx[__k];
3764  });
3765  });
3766 #endif
3767  }
3768 
3769 // }}}
3770 
3771 // __subscript_in_pack {{{
3772 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
3773  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
3774  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
3775  {
3776  if constexpr (_I < simd_size_v<_Tp, _Ap>)
3777  return __x[_I];
3778  else
3779  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
3780  }
3781 
3782 // }}}
3783 // __store_pack_of_simd {{{
3784 template <typename _Tp, typename _A0, typename... _As>
3785  _GLIBCXX_SIMD_INTRINSIC void
3786  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0,
3787  const simd<_Tp, _As>&... __xs)
3788  {
3789  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
3790  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
3791  if constexpr (sizeof...(__xs) > 0)
3792  __store_pack_of_simd(__mem + __n_bytes, __xs...);
3793  }
3794 
3795 // }}}
3796 // concat(simd...) {{{
3797 template <typename _Tp, typename... _As>
3798  inline _GLIBCXX_SIMD_CONSTEXPR
3799  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
3800  concat(const simd<_Tp, _As>&... __xs)
3801  {
3802  using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
3803  if constexpr (sizeof...(__xs) == 1)
3804  return simd_cast<_Rp>(__xs...);
3805  else if ((... && __xs._M_is_constprop()))
3806  return simd<_Tp,
3807  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
3808  auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
3809  else
3810  {
3811  _Rp __r{};
3812  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
3813  return __r;
3814  }
3815  }
3816 
3817 // }}}
3818 // concat(array<simd>) {{{
3819 template <typename _Tp, typename _Abi, size_t _Np>
3820  _GLIBCXX_SIMD_ALWAYS_INLINE
3821  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
3822  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
3823  {
3824  return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
3825  return concat(__xs...);
3826  });
3827  }
3828 
3829 // }}}
3830 
3831 // _SmartReference {{{
3832 template <typename _Up, typename _Accessor = _Up,
3833  typename _ValueType = typename _Up::value_type>
3834  class _SmartReference
3835  {
3836  friend _Accessor;
3837  int _M_index;
3838  _Up& _M_obj;
3839 
3840  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept
3841  {
3842  if constexpr (is_arithmetic_v<_Up>)
3843  return _M_obj;
3844  else
3845  return _M_obj[_M_index];
3846  }
3847 
3848  template <typename _Tp>
3849  _GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const
3850  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
3851 
3852  public:
3853  _GLIBCXX_SIMD_INTRINSIC constexpr
3854  _SmartReference(_Up& __o, int __i) noexcept
3855  : _M_index(__i), _M_obj(__o) {}
3856 
3857  using value_type = _ValueType;
3858 
3859  _GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete;
3860 
3861  _GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept
3862  { return _M_read(); }
3863 
3864  template <typename _Tp,
3865  typename
3866  = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
3867  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) &&
3868  {
3869  _M_write(static_cast<_Tp&&>(__x));
3870  return {_M_obj, _M_index};
3871  }
3872 
3873 #define _GLIBCXX_SIMD_OP_(__op) \
3874  template <typename _Tp, \
3875  typename _TT \
3876  = decltype(declval<value_type>() __op declval<_Tp>()), \
3877  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
3878  typename = _ValuePreservingOrInt<_TT, value_type>> \
3879  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
3880  operator __op##=(_Tp&& __x) && \
3881  { \
3882  const value_type& __lhs = _M_read(); \
3883  _M_write(__lhs __op __x); \
3884  return {_M_obj, _M_index}; \
3885  }
3886  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
3887  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
3888  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
3889 #undef _GLIBCXX_SIMD_OP_
3890 
3891  template <typename _Tp = void,
3892  typename
3893  = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
3894  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() &&
3895  {
3896  value_type __x = _M_read();
3897  _M_write(++__x);
3898  return {_M_obj, _M_index};
3899  }
3900 
3901  template <typename _Tp = void,
3902  typename
3903  = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
3904  _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) &&
3905  {
3906  const value_type __r = _M_read();
3907  value_type __x = __r;
3908  _M_write(++__x);
3909  return __r;
3910  }
3911 
3912  template <typename _Tp = void,
3913  typename
3914  = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
3915  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() &&
3916  {
3917  value_type __x = _M_read();
3918  _M_write(--__x);
3919  return {_M_obj, _M_index};
3920  }
3921 
3922  template <typename _Tp = void,
3923  typename
3924  = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
3925  _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) &&
3926  {
3927  const value_type __r = _M_read();
3928  value_type __x = __r;
3929  _M_write(--__x);
3930  return __r;
3931  }
3932 
3933  _GLIBCXX_SIMD_INTRINSIC friend void
3934  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
3935  conjunction<
3936  is_nothrow_constructible<value_type, _SmartReference&&>,
3937  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
3938  {
3939  value_type __tmp = static_cast<_SmartReference&&>(__a);
3940  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
3941  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
3942  }
3943 
3944  _GLIBCXX_SIMD_INTRINSIC friend void
3945  swap(value_type& __a, _SmartReference&& __b) noexcept(
3946  conjunction<
3947  is_nothrow_constructible<value_type, value_type&&>,
3948  is_nothrow_assignable<value_type&, value_type&&>,
3949  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
3950  {
3951  value_type __tmp(std::move(__a));
3952  __a = static_cast<value_type>(__b);
3953  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
3954  }
3955 
3956  _GLIBCXX_SIMD_INTRINSIC friend void
3957  swap(_SmartReference&& __a, value_type& __b) noexcept(
3958  conjunction<
3959  is_nothrow_constructible<value_type, _SmartReference&&>,
3960  is_nothrow_assignable<value_type&, value_type&&>,
3961  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
3962  {
3963  value_type __tmp(__a);
3964  static_cast<_SmartReference&&>(__a) = std::move(__b);
3965  __b = std::move(__tmp);
3966  }
3967  };
3968 
3969 // }}}
3970 // __scalar_abi_wrapper {{{
3971 template <int _Bytes>
3972  struct __scalar_abi_wrapper
3973  {
3974  template <typename _Tp> static constexpr size_t _S_full_size = 1;
3975  template <typename _Tp> static constexpr size_t _S_size = 1;
3976  template <typename _Tp> static constexpr size_t _S_is_partial = false;
3977 
3978  template <typename _Tp, typename _Abi = simd_abi::scalar>
3979  static constexpr bool _S_is_valid_v
3980  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
3981  };
3982 
3983 // }}}
3984 // __decay_abi metafunction {{{
3985 template <typename _Tp>
3986  struct __decay_abi { using type = _Tp; };
3987 
3988 template <int _Bytes>
3989  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
3990  { using type = simd_abi::scalar; };
3991 
3992 // }}}
3993 // __find_next_valid_abi metafunction {{{1
3994 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
3995 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
3996 // recursion at 2 elements in the resulting ABI tag. In this case
3997 // type::_S_is_valid_v<_Tp> may be false.
3998 template <template <int> class _Abi, int _Bytes, typename _Tp>
3999  struct __find_next_valid_abi
4000  {
4001  static constexpr auto _S_choose()
4002  {
4003  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4004  using _NextAbi = _Abi<_NextBytes>;
4005  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4006  return _Abi<_Bytes>();
4007  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4008  && _NextAbi::template _S_is_valid_v<_Tp>)
4009  return _NextAbi();
4010  else
4011  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4012  }
4013 
4014  using type = decltype(_S_choose());
4015  };
4016 
4017 template <int _Bytes, typename _Tp>
4018  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4019  { using type = simd_abi::scalar; };
4020 
4021 // _AbiList {{{1
4022 template <template <int> class...>
4023  struct _AbiList
4024  {
4025  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4026  template <typename, int> using _FirstValidAbi = void;
4027  template <typename, int> using _BestAbi = void;
4028  };
4029 
4030 template <template <int> class _A0, template <int> class... _Rest>
4031  struct _AbiList<_A0, _Rest...>
4032  {
4033  template <typename _Tp, int _Np>
4034  static constexpr bool _S_has_valid_abi
4035  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4036  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4037 
4038  template <typename _Tp, int _Np>
4039  using _FirstValidAbi = conditional_t<
4040  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4041  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4042  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4043 
4044  template <typename _Tp, int _Np>
4045  static constexpr auto _S_determine_best_abi()
4046  {
4047  static_assert(_Np >= 1);
4048  constexpr int _Bytes = sizeof(_Tp) * _Np;
4049  if constexpr (_Np == 1)
4050  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4051  else
4052  {
4053  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4054  // _A0<_Bytes> is good if:
4055  // 1. The ABI tag is valid for _Tp
4056  // 2. The storage overhead is no more than padding to fill the next
4057  // power-of-2 number of bytes
4058  if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4059  _Tp> && __fullsize / 2 < _Np)
4060  return typename __decay_abi<_A0<_Bytes>>::type{};
4061  else
4062  {
4063  using _B =
4064  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4065  if constexpr (_B::template _S_is_valid_v<
4066  _Tp> && _B::template _S_size<_Tp> <= _Np)
4067  return _B{};
4068  else
4069  return
4070  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4071  }
4072  }
4073  }
4074 
4075  template <typename _Tp, int _Np>
4076  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4077  };
4078 
4079 // }}}1
4080 
4081 // the following lists all native ABIs, which makes them accessible to
4082 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4083 // matters: Whatever comes first has higher priority.
4084 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4085  __scalar_abi_wrapper>;
4086 
4087 // valid _SimdTraits specialization {{{1
4088 template <typename _Tp, typename _Abi>
4089  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4090  : _Abi::template __traits<_Tp> {};
4091 
4092 // __deduce_impl specializations {{{1
4093 // try all native ABIs (including scalar) first
4094 template <typename _Tp, size_t _Np>
4095  struct __deduce_impl<
4096  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4097  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4098 
4099 // fall back to fixed_size only if scalar and native ABIs don't match
4100 template <typename _Tp, size_t _Np, typename = void>
4101  struct __deduce_fixed_size_fallback {};
4102 
4103 template <typename _Tp, size_t _Np>
4104  struct __deduce_fixed_size_fallback<_Tp, _Np,
4105  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4106  { using type = simd_abi::fixed_size<_Np>; };
4107 
4108 template <typename _Tp, size_t _Np, typename>
4109  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4110 
4111 //}}}1
4112 
4113 // simd_mask {{{
4114 template <typename _Tp, typename _Abi>
4115  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4116  {
4117  // types, tags, and friends {{{
4118  using _Traits = _SimdTraits<_Tp, _Abi>;
4119  using _MemberType = typename _Traits::_MaskMember;
4120 
4121  // We map all masks with equal element sizeof to a single integer type, the
4122  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4123  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4124  // template specializations in the implementation classes.
4125  using _Ip = __int_for_sizeof_t<_Tp>;
4126  static constexpr _Ip* _S_type_tag = nullptr;
4127 
4128  friend typename _Traits::_MaskBase;
4129  friend class simd<_Tp, _Abi>; // to construct masks on return
4130  friend typename _Traits::_SimdImpl; // to construct masks on return and
4131  // inspect data on masked operations
4132  public:
4133  using _Impl = typename _Traits::_MaskImpl;
4134  friend _Impl;
4135 
4136  // }}}
4137  // member types {{{
4138  using value_type = bool;
4139  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4140  using simd_type = simd<_Tp, _Abi>;
4141  using abi_type = _Abi;
4142 
4143  // }}}
4144  static constexpr size_t size() // {{{
4145  { return __size_or_zero_v<_Tp, _Abi>; }
4146 
4147  // }}}
4148  // constructors & assignment {{{
4149  simd_mask() = default;
4150  simd_mask(const simd_mask&) = default;
4151  simd_mask(simd_mask&&) = default;
4152  simd_mask& operator=(const simd_mask&) = default;
4153  simd_mask& operator=(simd_mask&&) = default;
4154 
4155  // }}}
4156  // access to internal representation (optional feature) {{{
4157  _GLIBCXX_SIMD_ALWAYS_INLINE explicit
4158  simd_mask(typename _Traits::_MaskCastType __init)
4159  : _M_data{__init} {}
4160  // conversions to internal type is done in _MaskBase
4161 
4162  // }}}
4163  // bitset interface (extension to be proposed) {{{
4164  // TS_FEEDBACK:
4165  // Conversion of simd_mask to and from bitset makes it much easier to
4166  // interface with other facilities. I suggest adding `static
4167  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4168  _GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask
4169  __from_bitset(bitset<size()> bs)
4170  { return {__bitset_init, bs}; }
4171 
4172  _GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()>
4173  __to_bitset() const
4174  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4175 
4176  // }}}
4177  // explicit broadcast constructor {{{
4178  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4179  simd_mask(value_type __x)
4180  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4181 
4182  // }}}
4183  // implicit type conversion constructor {{{
4184  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4185  // proposed improvement
4186  template <typename _Up, typename _A2,
4187  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4188  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4189  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4190  simd_mask(const simd_mask<_Up, _A2>& __x)
4191  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4192  #else
4193  // conforming to ISO/IEC 19570:2018
4194  template <typename _Up, typename = enable_if_t<conjunction<
4195  is_same<abi_type, simd_abi::fixed_size<size()>>,
4196  is_same<_Up, _Up>>::value>>
4197  _GLIBCXX_SIMD_ALWAYS_INLINE
4198  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4199  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4200  #endif
4201 
4202  // }}}
4203  // load constructor {{{
4204  template <typename _Flags>
4205  _GLIBCXX_SIMD_ALWAYS_INLINE
4206  simd_mask(const value_type* __mem, _Flags)
4207  : _M_data(_Impl::template _S_load<_Ip>(
4208  _Flags::template _S_apply<simd_mask>(__mem))) {}
4209 
4210  template <typename _Flags>
4211  _GLIBCXX_SIMD_ALWAYS_INLINE
4212  simd_mask(const value_type* __mem, simd_mask __k, _Flags)
4213  : _M_data{}
4214  {
4215  _M_data
4216  = _Impl::_S_masked_load(_M_data, __k._M_data,
4217  _Flags::template _S_apply<simd_mask>(__mem));
4218  }
4219 
4220  // }}}
4221  // loads [simd_mask.load] {{{
4222  template <typename _Flags>
4223  _GLIBCXX_SIMD_ALWAYS_INLINE void
4224  copy_from(const value_type* __mem, _Flags)
4225  {
4226  _M_data = _Impl::template _S_load<_Ip>(
4227  _Flags::template _S_apply<simd_mask>(__mem));
4228  }
4229 
4230  // }}}
4231  // stores [simd_mask.store] {{{
4232  template <typename _Flags>
4233  _GLIBCXX_SIMD_ALWAYS_INLINE void
4234  copy_to(value_type* __mem, _Flags) const
4235  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4236 
4237  // }}}
4238  // scalar access {{{
4239  _GLIBCXX_SIMD_ALWAYS_INLINE reference
4240  operator[](size_t __i)
4241  {
4242  if (__i >= size())
4243  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4244  return {_M_data, int(__i)};
4245  }
4246 
4247  _GLIBCXX_SIMD_ALWAYS_INLINE value_type
4248  operator[](size_t __i) const
4249  {
4250  if (__i >= size())
4251  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4252  if constexpr (__is_scalar_abi<_Abi>())
4253  return _M_data;
4254  else
4255  return static_cast<bool>(_M_data[__i]);
4256  }
4257 
4258  // }}}
4259  // negation {{{
4260  _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask
4261  operator!() const
4262  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4263 
4264  // }}}
4265  // simd_mask binary operators [simd_mask.binary] {{{
4266  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4267  // simd_mask<int> && simd_mask<uint> needs disambiguation
4268  template <typename _Up, typename _A2,
4269  typename
4270  = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4271  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4272  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4273  {
4274  return {__private_init,
4275  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4276  }
4277 
4278  template <typename _Up, typename _A2,
4279  typename
4280  = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4281  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4282  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4283  {
4284  return {__private_init,
4285  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4286  }
4287  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4288 
4289  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4290  operator&&(const simd_mask& __x, const simd_mask& __y)
4291  {
4292  return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)};
4293  }
4294 
4295  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4296  operator||(const simd_mask& __x, const simd_mask& __y)
4297  {
4298  return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)};
4299  }
4300 
4301  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4302  operator&(const simd_mask& __x, const simd_mask& __y)
4303  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4304 
4305  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4306  operator|(const simd_mask& __x, const simd_mask& __y)
4307  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4308 
4309  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4310  operator^(const simd_mask& __x, const simd_mask& __y)
4311  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4312 
4313  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4314  operator&=(simd_mask& __x, const simd_mask& __y)
4315  {
4316  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4317  return __x;
4318  }
4319 
4320  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4321  operator|=(simd_mask& __x, const simd_mask& __y)
4322  {
4323  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4324  return __x;
4325  }
4326 
4327  _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4328  operator^=(simd_mask& __x, const simd_mask& __y)
4329  {
4330  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4331  return __x;
4332  }
4333 
4334  // }}}
4335  // simd_mask compares [simd_mask.comparison] {{{
4336  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4337  operator==(const simd_mask& __x, const simd_mask& __y)
4338  { return !operator!=(__x, __y); }
4339 
4340  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4341  operator!=(const simd_mask& __x, const simd_mask& __y)
4342  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4343 
4344  // }}}
4345  // private_init ctor {{{
4346  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4347  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4348  : _M_data(__init) {}
4349 
4350  // }}}
4351  // private_init generator ctor {{{
4352  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4353  _GLIBCXX_SIMD_INTRINSIC constexpr
4354  simd_mask(_PrivateInit, _Fp&& __gen)
4355  : _M_data()
4356  {
4357  __execute_n_times<size()>([&](auto __i) constexpr {
4358  _Impl::_S_set(_M_data, __i, __gen(__i));
4359  });
4360  }
4361 
4362  // }}}
4363  // bitset_init ctor {{{
4364  _GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init)
4365  : _M_data(
4366  _Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4367  {}
4368 
4369  // }}}
4370  // __cvt {{{
4371  // TS_FEEDBACK:
4372  // The conversion operator this implements should be a ctor on simd_mask.
4373  // Once you call .__cvt() on a simd_mask it converts conveniently.
4374  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4375  struct _CvtProxy
4376  {
4377  template <typename _Up, typename _A2,
4378  typename
4379  = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4380  operator simd_mask<_Up, _A2>() &&
4381  {
4382  using namespace std::experimental::__proposed;
4383  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4384  }
4385 
4386  const simd_mask<_Tp, _Abi>& _M_data;
4387  };
4388 
4389  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4390  __cvt() const
4391  { return {*this}; }
4392 
4393  // }}}
4394  // operator?: overloads (suggested extension) {{{
4395  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4396  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4397  operator?:(const simd_mask& __k, const simd_mask& __where_true,
4398  const simd_mask& __where_false)
4399  {
4400  auto __ret = __where_false;
4401  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4402  return __ret;
4403  }
4404 
4405  template <typename _U1, typename _U2,
4406  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4407  typename = enable_if_t<conjunction_v<
4408  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4409  is_convertible<simd_mask, typename _Rp::mask_type>>>>
4410  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4411  operator?:(const simd_mask& __k, const _U1& __where_true,
4412  const _U2& __where_false)
4413  {
4414  _Rp __ret = __where_false;
4415  _Rp::_Impl::_S_masked_assign(
4416  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4417  __data(static_cast<_Rp>(__where_true)));
4418  return __ret;
4419  }
4420 
4421  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4422  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4423  typename = enable_if_t<
4424  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4425  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4426  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4427  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4428  const simd_mask<_Up, _Au>& __where_false)
4429  {
4430  simd_mask __ret = __where_false;
4431  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4432  __where_true._M_data);
4433  return __ret;
4434  }
4435  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4436  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4437 
4438  // }}}
4439  // _M_is_constprop {{{
4440  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4441  _M_is_constprop() const
4442  {
4443  if constexpr (__is_scalar_abi<_Abi>())
4444  return __builtin_constant_p(_M_data);
4445  else
4446  return _M_data._M_is_constprop();
4447  }
4448 
4449  // }}}
4450 
4451  private:
4452  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4453  friend auto& __data<_Tp, abi_type>(simd_mask&);
4454  alignas(_Traits::_S_mask_align) _MemberType _M_data;
4455  };
4456 
4457 // }}}
4458 
4459 // __data(simd_mask) {{{
4460 template <typename _Tp, typename _Ap>
4461  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4462  __data(const simd_mask<_Tp, _Ap>& __x)
4463  { return __x._M_data; }
4464 
4465 template <typename _Tp, typename _Ap>
4466  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4467  __data(simd_mask<_Tp, _Ap>& __x)
4468  { return __x._M_data; }
4469 
4470 // }}}
4471 
4472 // simd_mask reductions [simd_mask.reductions] {{{
4473 template <typename _Tp, typename _Abi>
4474  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4475  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4476  {
4477  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4478  {
4479  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4480  if (!__k[__i])
4481  return false;
4482  return true;
4483  }
4484  else
4485  return _Abi::_MaskImpl::_S_all_of(__k);
4486  }
4487 
4488 template <typename _Tp, typename _Abi>
4489  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4490  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4491  {
4492  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4493  {
4494  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4495  if (__k[__i])
4496  return true;
4497  return false;
4498  }
4499  else
4500  return _Abi::_MaskImpl::_S_any_of(__k);
4501  }
4502 
4503 template <typename _Tp, typename _Abi>
4504  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4505  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4506  {
4507  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4508  {
4509  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4510  if (__k[__i])
4511  return false;
4512  return true;
4513  }
4514  else
4515  return _Abi::_MaskImpl::_S_none_of(__k);
4516  }
4517 
4518 template <typename _Tp, typename _Abi>
4519  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4520  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4521  {
4522  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4523  {
4524  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4525  if (__k[__i] != __k[__i - 1])
4526  return true;
4527  return false;
4528  }
4529  else
4530  return _Abi::_MaskImpl::_S_some_of(__k);
4531  }
4532 
4533 template <typename _Tp, typename _Abi>
4534  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4535  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4536  {
4537  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4538  {
4539  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4540  __k, [](auto... __elements) { return ((__elements != 0) + ...); });
4541  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4542  return __r;
4543  }
4544  return _Abi::_MaskImpl::_S_popcount(__k);
4545  }
4546 
4547 template <typename _Tp, typename _Abi>
4548  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4549  find_first_set(const simd_mask<_Tp, _Abi>& __k)
4550  {
4551  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4552  {
4553  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4554  const size_t _Idx = __call_with_n_evaluations<_Np>(
4555  [](auto... __indexes) { return std::min({__indexes...}); },
4556  [&](auto __i) { return __k[__i] ? +__i : _Np; });
4557  if (_Idx >= _Np)
4558  __invoke_ub("find_first_set(empty mask) is UB");
4559  if (__builtin_constant_p(_Idx))
4560  return _Idx;
4561  }
4562  return _Abi::_MaskImpl::_S_find_first_set(__k);
4563  }
4564 
4565 template <typename _Tp, typename _Abi>
4566  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4567  find_last_set(const simd_mask<_Tp, _Abi>& __k)
4568  {
4569  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4570  {
4571  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4572  const int _Idx = __call_with_n_evaluations<_Np>(
4573  [](auto... __indexes) { return std::max({__indexes...}); },
4574  [&](auto __i) { return __k[__i] ? int(__i) : -1; });
4575  if (_Idx < 0)
4576  __invoke_ub("find_first_set(empty mask) is UB");
4577  if (__builtin_constant_p(_Idx))
4578  return _Idx;
4579  }
4580  return _Abi::_MaskImpl::_S_find_last_set(__k);
4581  }
4582 
4583 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4584 all_of(_ExactBool __x) noexcept
4585 { return __x; }
4586 
4587 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4588 any_of(_ExactBool __x) noexcept
4589 { return __x; }
4590 
4591 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4592 none_of(_ExactBool __x) noexcept
4593 { return !__x; }
4594 
4595 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4596 some_of(_ExactBool) noexcept
4597 { return false; }
4598 
4599 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4600 popcount(_ExactBool __x) noexcept
4601 { return __x; }
4602 
4603 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4604 find_first_set(_ExactBool)
4605 { return 0; }
4606 
4607 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4608 find_last_set(_ExactBool)
4609 { return 0; }
4610 
4611 // }}}
4612 
4613 // _SimdIntOperators{{{1
4614 template <typename _V, typename _Impl, bool>
4615  class _SimdIntOperators {};
4616 
4617 template <typename _V, typename _Impl>
4618  class _SimdIntOperators<_V, _Impl, true>
4619  {
4620  _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
4621  { return *static_cast<const _V*>(this); }
4622 
4623  template <typename _Tp>
4624  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
4625  _S_make_derived(_Tp&& __d)
4626  { return {__private_init, static_cast<_Tp&&>(__d)}; }
4627 
4628  public:
4629  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
4630  { return __lhs = __lhs % __x; }
4631 
4632  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
4633  { return __lhs = __lhs & __x; }
4634 
4635  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x)
4636  { return __lhs = __lhs | __x; }
4637 
4638  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
4639  { return __lhs = __lhs ^ __x; }
4640 
4641  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
4642  { return __lhs = __lhs << __x; }
4643 
4644  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
4645  { return __lhs = __lhs >> __x; }
4646 
4647  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
4648  { return __lhs = __lhs << __x; }
4649 
4650  _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
4651  { return __lhs = __lhs >> __x; }
4652 
4653  _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
4654  {
4655  return _SimdIntOperators::_S_make_derived(
4656  _Impl::_S_modulus(__data(__x), __data(__y)));
4657  }
4658 
4659  _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
4660  {
4661  return _SimdIntOperators::_S_make_derived(
4662  _Impl::_S_bit_and(__data(__x), __data(__y)));
4663  }
4664 
4665  _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y)
4666  {
4667  return _SimdIntOperators::_S_make_derived(
4668  _Impl::_S_bit_or(__data(__x), __data(__y)));
4669  }
4670 
4671  _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
4672  {
4673  return _SimdIntOperators::_S_make_derived(
4674  _Impl::_S_bit_xor(__data(__x), __data(__y)));
4675  }
4676 
4677  _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
4678  {
4679  return _SimdIntOperators::_S_make_derived(
4680  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
4681  }
4682 
4683  _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
4684  {
4685  return _SimdIntOperators::_S_make_derived(
4686  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
4687  }
4688 
4689  template <typename _VV = _V>
4690  _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
4691  {
4692  using _Tp = typename _VV::value_type;
4693  if (__y < 0)
4694  __invoke_ub("The behavior is undefined if the right operand of a "
4695  "shift operation is negative. [expr.shift]\nA shift by "
4696  "%d was requested",
4697  __y);
4698  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4699  __invoke_ub(
4700  "The behavior is undefined if the right operand of a "
4701  "shift operation is greater than or equal to the width of the "
4702  "promoted left operand. [expr.shift]\nA shift by %d was requested",
4703  __y);
4704  return _SimdIntOperators::_S_make_derived(
4705  _Impl::_S_bit_shift_left(__data(__x), __y));
4706  }
4707 
4708  template <typename _VV = _V>
4709  _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
4710  {
4711  using _Tp = typename _VV::value_type;
4712  if (__y < 0)
4713  __invoke_ub(
4714  "The behavior is undefined if the right operand of a shift "
4715  "operation is negative. [expr.shift]\nA shift by %d was requested",
4716  __y);
4717  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4718  __invoke_ub(
4719  "The behavior is undefined if the right operand of a shift "
4720  "operation is greater than or equal to the width of the promoted "
4721  "left operand. [expr.shift]\nA shift by %d was requested",
4722  __y);
4723  return _SimdIntOperators::_S_make_derived(
4724  _Impl::_S_bit_shift_right(__data(__x), __y));
4725  }
4726 
4727  // unary operators (for integral _Tp)
4728  _GLIBCXX_SIMD_CONSTEXPR _V operator~() const
4729  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
4730  };
4731 
4732 //}}}1
4733 
4734 // simd {{{
4735 template <typename _Tp, typename _Abi>
4736  class simd : public _SimdIntOperators<
4737  simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
4738  conjunction<is_integral<_Tp>,
4739  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
4740  public _SimdTraits<_Tp, _Abi>::_SimdBase
4741  {
4742  using _Traits = _SimdTraits<_Tp, _Abi>;
4743  using _MemberType = typename _Traits::_SimdMember;
4744  using _CastType = typename _Traits::_SimdCastType;
4745  static constexpr _Tp* _S_type_tag = nullptr;
4746  friend typename _Traits::_SimdBase;
4747 
4748  public:
4749  using _Impl = typename _Traits::_SimdImpl;
4750  friend _Impl;
4751  friend _SimdIntOperators<simd, _Impl, true>;
4752 
4753  using value_type = _Tp;
4754  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4755  using mask_type = simd_mask<_Tp, _Abi>;
4756  using abi_type = _Abi;
4757 
4758  static constexpr size_t size()
4759  { return __size_or_zero_v<_Tp, _Abi>; }
4760 
4761  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
4762  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
4763  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
4764  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
4765  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
4766 
4767  // implicit broadcast constructor
4768  template <typename _Up,
4769  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
4770  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4771  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
4772  : _M_data(
4773  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
4774  {}
4775 
4776  // implicit type conversion constructor (convert from fixed_size to
4777  // fixed_size)
4778  template <typename _Up>
4779  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4780  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
4781  enable_if_t<
4782  conjunction<
4783  is_same<simd_abi::fixed_size<size()>, abi_type>,
4784  negation<__is_narrowing_conversion<_Up, value_type>>,
4785  __converts_to_higher_integer_rank<_Up, value_type>>::value,
4786  void*> = nullptr)
4787  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
4788 
4789  // explicit type conversion constructor
4790 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4791  template <typename _Up, typename _A2,
4792  typename = decltype(static_simd_cast<simd>(
4793  declval<const simd<_Up, _A2>&>()))>
4794  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4795  simd(const simd<_Up, _A2>& __x)
4796  : simd(static_simd_cast<simd>(__x)) {}
4797 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4798 
4799  // generator constructor
4800  template <typename _Fp>
4801  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4802  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
4803  declval<_SizeConstant<0>&>())),
4804  value_type>* = nullptr)
4805  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
4806 
4807  // load constructor
4808  template <typename _Up, typename _Flags>
4809  _GLIBCXX_SIMD_ALWAYS_INLINE
4810  simd(const _Up* __mem, _Flags)
4811  : _M_data(
4812  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
4813  {}
4814 
4815  // loads [simd.load]
4816  template <typename _Up, typename _Flags>
4817  _GLIBCXX_SIMD_ALWAYS_INLINE void
4818  copy_from(const _Vectorizable<_Up>* __mem, _Flags)
4819  {
4820  _M_data = static_cast<decltype(_M_data)>(
4821  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
4822  }
4823 
4824  // stores [simd.store]
4825  template <typename _Up, typename _Flags>
4826  _GLIBCXX_SIMD_ALWAYS_INLINE void
4827  copy_to(_Vectorizable<_Up>* __mem, _Flags) const
4828  {
4829  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
4830  _S_type_tag);
4831  }
4832 
4833  // scalar access
4834  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4835  operator[](size_t __i)
4836  { return {_M_data, int(__i)}; }
4837 
4838  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4839  operator[]([[maybe_unused]] size_t __i) const
4840  {
4841  if constexpr (__is_scalar_abi<_Abi>())
4842  {
4843  _GLIBCXX_DEBUG_ASSERT(__i == 0);
4844  return _M_data;
4845  }
4846  else
4847  return _M_data[__i];
4848  }
4849 
4850  // increment and decrement:
4851  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
4852  operator++()
4853  {
4854  _Impl::_S_increment(_M_data);
4855  return *this;
4856  }
4857 
4858  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
4859  operator++(int)
4860  {
4861  simd __r = *this;
4862  _Impl::_S_increment(_M_data);
4863  return __r;
4864  }
4865 
4866  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
4867  operator--()
4868  {
4869  _Impl::_S_decrement(_M_data);
4870  return *this;
4871  }
4872 
4873  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
4874  operator--(int)
4875  {
4876  simd __r = *this;
4877  _Impl::_S_decrement(_M_data);
4878  return __r;
4879  }
4880 
4881  // unary operators (for any _Tp)
4882  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
4883  operator!() const
4884  { return {__private_init, _Impl::_S_negate(_M_data)}; }
4885 
4886  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
4887  operator+() const
4888  { return *this; }
4889 
4890  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
4891  operator-() const
4892  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
4893 
4894  // access to internal representation (suggested extension)
4895  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4896  simd(_CastType __init) : _M_data(__init) {}
4897 
4898  // compound assignment [simd.cassign]
4899  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
4900  operator+=(simd& __lhs, const simd& __x)
4901  { return __lhs = __lhs + __x; }
4902 
4903  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
4904  operator-=(simd& __lhs, const simd& __x)
4905  { return __lhs = __lhs - __x; }
4906 
4907  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
4908  operator*=(simd& __lhs, const simd& __x)
4909  { return __lhs = __lhs * __x; }
4910 
4911  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
4912  operator/=(simd& __lhs, const simd& __x)
4913  { return __lhs = __lhs / __x; }
4914 
4915  // binary operators [simd.binary]
4916  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
4917  operator+(const simd& __x, const simd& __y)
4918  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
4919 
4920  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
4921  operator-(const simd& __x, const simd& __y)
4922  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
4923 
4924  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
4925  operator*(const simd& __x, const simd& __y)
4926  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
4927 
4928  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
4929  operator/(const simd& __x, const simd& __y)
4930  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
4931 
4932  // compares [simd.comparison]
4933  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
4934  operator==(const simd& __x, const simd& __y)
4935  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
4936 
4937  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
4938  operator!=(const simd& __x, const simd& __y)
4939  {
4940  return simd::_S_make_mask(
4941  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
4942  }
4943 
4944  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
4945  operator<(const simd& __x, const simd& __y)
4946  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
4947 
4948  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
4949  operator<=(const simd& __x, const simd& __y)
4950  {
4951  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
4952  }
4953 
4954  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
4955  operator>(const simd& __x, const simd& __y)
4956  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
4957 
4958  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
4959  operator>=(const simd& __x, const simd& __y)
4960  {
4961  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
4962  }
4963 
4964  // operator?: overloads (suggested extension) {{{
4965 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4966  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
4967  operator?:(const mask_type& __k, const simd& __where_true,
4968  const simd& __where_false)
4969  {
4970  auto __ret = __where_false;
4971  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
4972  return __ret;
4973  }
4974 
4975 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4976  // }}}
4977 
4978  // "private" because of the first arguments's namespace
4979  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4980  simd(_PrivateInit, const _MemberType& __init)
4981  : _M_data(__init) {}
4982 
4983  // "private" because of the first arguments's namespace
4984  _GLIBCXX_SIMD_INTRINSIC
4985  simd(_BitsetInit, bitset<size()> __init) : _M_data()
4986  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
4987 
4988  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4989  _M_is_constprop() const
4990  {
4991  if constexpr (__is_scalar_abi<_Abi>())
4992  return __builtin_constant_p(_M_data);
4993  else
4994  return _M_data._M_is_constprop();
4995  }
4996 
4997  private:
4998  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type
4999  _S_make_mask(typename mask_type::_MemberType __k)
5000  { return {__private_init, __k}; }
5001 
5002  friend const auto& __data<value_type, abi_type>(const simd&);
5003  friend auto& __data<value_type, abi_type>(simd&);
5004  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5005  };
5006 
5007 // }}}
5008 // __data {{{
5009 template <typename _Tp, typename _Ap>
5010  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5011  __data(const simd<_Tp, _Ap>& __x)
5012  { return __x._M_data; }
5013 
5014 template <typename _Tp, typename _Ap>
5015  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5016  __data(simd<_Tp, _Ap>& __x)
5017  { return __x._M_data; }
5018 
5019 // }}}
5020 namespace __float_bitwise_operators { //{{{
5021 template <typename _Tp, typename _Ap>
5022  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5023  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5024  {
5025  return {__private_init,
5026  _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))};
5027  }
5028 
5029 template <typename _Tp, typename _Ap>
5030  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5031  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5032  {
5033  return {__private_init,
5034  _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))};
5035  }
5036 
5037 template <typename _Tp, typename _Ap>
5038  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5039  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5040  {
5041  return {__private_init,
5042  _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
5043  }
5044 } // namespace __float_bitwise_operators }}}
5045 
5046 _GLIBCXX_SIMD_END_NAMESPACE
5047 
5048 #endif // __cplusplus >= 201703L
5049 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5050 
5051 // vim: foldmethod=marker foldmarker={{{,}}}
auto_ptr & operator=(auto_ptr &__a)
auto_ptr assignment operator.
Definition: auto_ptr.h:47
element_type * operator->() const
Smart pointer dereferencing.
Definition: auto_ptr.h:105
element_type & operator*() const
Smart pointer dereferencing.
Definition: auto_ptr.h:92
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:422
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:362
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:332
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1595
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:1925
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition: type_traits:2536
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2518
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2000
typename remove_const< _Tp >::type remove_const_t
Alias template for remove_const.
Definition: type_traits:1526
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:75
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2514
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:78
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
tuple_cat
Definition: tuple:1684
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:101
void swap(any &__x, any &__y) noexcept
Exchange the states of two any objects.
Definition: any:412
_Tp * begin(valarray< _Tp > &__va)
Return an iterator pointing to the first element of the valarray.
Definition: valarray:1214
_Tp * end(valarray< _Tp > &__va)
Return an iterator pointing to one past the last element of the valarray.
Definition: valarray:1234
constexpr const _Tp & clamp(const _Tp &, const _Tp &, const _Tp &)
Returns the value clamped between lo and hi.
Definition: stl_algo.h:3656
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:254
constexpr pair< const _Tp &, const _Tp & > minmax(const _Tp &, const _Tp &)
Determines min and max at once as an ordered pair.
Definition: stl_algo.h:3301
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:278
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1472
bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1435
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:244
constexpr auto data(_Container &__cont) noexcept(noexcept(__cont.data())) -> decltype(__cont.data())
Return the data pointer of a container.
Definition: range_access.h:289