17#if defined(HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
20#undef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
22#define HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
39template <
class D,
class V>
41template <
class D,
class V>
54template <
class D,
class V>
56template <
class D,
class V>
69template <
class D,
class V>
71template <
class D,
class V>
84template <
class D,
class V>
86template <
class D,
class V>
99template <
class D,
class V>
101template <
class D,
class V>
114template <
class D,
class V>
116template <
class D,
class V>
129template <
class D,
class V>
131template <
class D,
class V>
144template <
class D,
class V>
146template <
class D,
class V>
159template <
class D,
class V>
161template <
class D,
class V>
174template <
class D,
class V>
176template <
class D,
class V>
189template <
class D,
class V>
191template <
class D,
class V>
204template <
class D,
class V>
206template <
class D,
class V>
219template <
class D,
class V>
221template <
class D,
class V>
234template <
class D,
class V>
236template <
class D,
class V>
249template <
class D,
class V>
251template <
class D,
class V>
264template <
class D,
class V>
266template <
class D,
class V>
335 T c6, T c7, T c8, T c9) {
345 T c6, T c7, T c8, T c9, T c10) {
355 T c6, T c7, T c8, T c9, T c10, T c11) {
365 T c6, T c7, T c8, T c9, T c10, T c11,
371 x8,
MulAdd(x4, c12,
MulAdd(x2,
MulAdd(c11, x, c10),
MulAdd(c9, x, c8))),
377 T c6, T c7, T c8, T c9, T c10, T c11,
390 T c6, T c7, T c8, T c9, T c10, T c11,
391 T c12, T c13, T c14) {
403 T c6, T c7, T c8, T c9, T c10, T c11,
404 T c12, T c13, T c14, T c15) {
416 T c6, T c7, T c8, T c9, T c10, T c11,
417 T c12, T c13, T c14, T c15, T c16) {
432 T c6, T c7, T c8, T c9, T c10, T c11,
433 T c12, T c13, T c14, T c15, T c16, T c17) {
448 T c6, T c7, T c8, T c9, T c10, T c11,
449 T c12, T c13, T c14, T c15, T c16, T c17,
464template <
class FloatOrDouble>
466template <
class FloatOrDouble>
468template <
class FloatOrDouble>
470template <
class FloatOrDouble>
472template <
class FloatOrDouble>
478 template <
class D,
class V>
480 const auto k0 =
Set(
d, +0.1666677296f);
481 const auto k1 =
Set(
d, +0.07495029271f);
482 const auto k2 =
Set(
d, +0.04547423869f);
483 const auto k3 =
Set(
d, +0.02424046025f);
484 const auto k4 =
Set(
d, +0.04197454825f);
486 return Estrin(x2, k0, k1, k2, k3, k4);
490#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
493struct AsinImpl<double> {
495 template <
class D,
class V>
497 const auto k0 =
Set(
d, +0.1666666666666497543);
498 const auto k1 =
Set(
d, +0.07500000000378581611);
499 const auto k2 =
Set(
d, +0.04464285681377102438);
500 const auto k3 =
Set(
d, +0.03038195928038132237);
501 const auto k4 =
Set(
d, +0.02237176181932048341);
502 const auto k5 =
Set(
d, +0.01735956991223614604);
503 const auto k6 =
Set(
d, +0.01388715184501609218);
504 const auto k7 =
Set(
d, +0.01215360525577377331);
505 const auto k8 =
Set(
d, +0.006606077476277170610);
506 const auto k9 =
Set(
d, +0.01929045477267910674);
507 const auto k10 =
Set(
d, -0.01581918243329996643);
508 const auto k11 =
Set(
d, +0.03161587650653934628);
510 return Estrin(x2, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11);
519 template <
class D,
class V>
521 const auto k0 =
Set(
d, -0.333331018686294555664062f);
522 const auto k1 =
Set(
d, +0.199926957488059997558594f);
523 const auto k2 =
Set(
d, -0.142027363181114196777344f);
524 const auto k3 =
Set(
d, +0.106347933411598205566406f);
525 const auto k4 =
Set(
d, -0.0748900920152664184570312f);
526 const auto k5 =
Set(
d, +0.0425049886107444763183594f);
527 const auto k6 =
Set(
d, -0.0159569028764963150024414f);
528 const auto k7 =
Set(
d, +0.00282363896258175373077393f);
530 const auto y =
Mul(x, x);
531 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7),
Mul(y, x), x);
535#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
538struct AtanImpl<double> {
540 template <
class D,
class V>
542 const auto k0 =
Set(
d, -0.333333333333311110369124);
543 const auto k1 =
Set(
d, +0.199999999996591265594148);
544 const auto k2 =
Set(
d, -0.14285714266771329383765);
545 const auto k3 =
Set(
d, +0.111111105648261418443745);
546 const auto k4 =
Set(
d, -0.090908995008245008229153);
547 const auto k5 =
Set(
d, +0.0769219538311769618355029);
548 const auto k6 =
Set(
d, -0.0666573579361080525984562);
549 const auto k7 =
Set(
d, +0.0587666392926673580854313);
550 const auto k8 =
Set(
d, -0.0523674852303482457616113);
551 const auto k9 =
Set(
d, +0.0466667150077840625632675);
552 const auto k10 =
Set(
d, -0.0407629191276836500001934);
553 const auto k11 =
Set(
d, +0.0337852580001353069993897);
554 const auto k12 =
Set(
d, -0.0254517624932312641616861);
555 const auto k13 =
Set(
d, +0.016599329773529201970117);
556 const auto k14 =
Set(
d, -0.00889896195887655491740809);
557 const auto k15 =
Set(
d, +0.00370026744188713119232403);
558 const auto k16 =
Set(
d, -0.00110611831486672482563471);
559 const auto k17 =
Set(
d, +0.000209850076645816976906797);
560 const auto k18 =
Set(
d, -1.88796008463073496563746e-5);
562 const auto y =
Mul(x, x);
563 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11,
564 k12, k13, k14, k15, k16, k17, k18),
574 template <
class D,
class V>
579 template <
class D,
class V>
581 const auto k0 =
Set(
d, -1.66666597127914428710938e-1f);
582 const auto k1 =
Set(
d, +8.33307858556509017944336e-3f);
583 const auto k2 =
Set(
d, -1.981069071916863322258e-4f);
584 const auto k3 =
Set(
d, +2.6083159809786593541503e-6f);
586 const auto y =
Mul(x, x);
590 template <
class D,
class V,
class VI32>
593 const V kHalfPiPart0f =
Set(
d, -0.5f * 3.140625f);
594 const V kHalfPiPart1f =
Set(
d, -0.5f * 0.0009670257568359375f);
595 const V kHalfPiPart2f =
Set(
d, -0.5f * 6.2771141529083251953e-7f);
596 const V kHalfPiPart3f =
Set(
d, -0.5f * 1.2154201256553420762e-10f);
600 x =
MulAdd(qf, kHalfPiPart0f, x);
601 x =
MulAdd(qf, kHalfPiPart1f, x);
602 x =
MulAdd(qf, kHalfPiPart2f, x);
603 x =
MulAdd(qf, kHalfPiPart3f, x);
607 template <
class D,
class V,
class VI32>
610 const V kPiPart0f =
Set(
d, -3.140625f);
611 const V kPiPart1f =
Set(
d, -0.0009670257568359375f);
612 const V kPiPart2f =
Set(
d, -6.2771141529083251953e-7f);
613 const V kPiPart3f =
Set(
d, -1.2154201256553420762e-10f);
617 x =
MulAdd(qf, kPiPart0f, x);
618 x =
MulAdd(qf, kPiPart1f, x);
619 x =
MulAdd(qf, kPiPart2f, x);
620 x =
MulAdd(qf, kPiPart3f, x);
625 template <
class D,
class VI32>
632 template <
class D,
class VI32>
639#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
642struct CosSinImpl<double> {
644 template <
class D,
class V>
649 template <
class D,
class V>
651 const auto k0 =
Set(
d, -0.166666666666666657414808);
652 const auto k1 =
Set(
d, +0.00833333333333332974823815);
653 const auto k2 =
Set(
d, -0.000198412698412696162806809);
654 const auto k3 =
Set(
d, +2.75573192239198747630416e-6);
655 const auto k4 =
Set(
d, -2.50521083763502045810755e-8);
656 const auto k5 =
Set(
d, +1.60590430605664501629054e-10);
657 const auto k6 =
Set(
d, -7.64712219118158833288484e-13);
658 const auto k7 =
Set(
d, +2.81009972710863200091251e-15);
659 const auto k8 =
Set(
d, -7.97255955009037868891952e-18);
661 const auto y =
Mul(x, x);
662 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8),
Mul(y, x), x);
665 template <
class D,
class V,
class VI32>
668 const V kHalfPiPart0d =
Set(
d, -0.5 * 3.1415926218032836914);
669 const V kHalfPiPart1d =
Set(
d, -0.5 * 3.1786509424591713469e-8);
670 const V kHalfPiPart2d =
Set(
d, -0.5 * 1.2246467864107188502e-16);
671 const V kHalfPiPart3d =
Set(
d, -0.5 * 1.2736634327021899816e-24);
675 x =
MulAdd(qf, kHalfPiPart0d, x);
676 x =
MulAdd(qf, kHalfPiPart1d, x);
677 x =
MulAdd(qf, kHalfPiPart2d, x);
678 x =
MulAdd(qf, kHalfPiPart3d, x);
682 template <
class D,
class V,
class VI32>
685 const V kPiPart0d =
Set(
d, -3.1415926218032836914);
686 const V kPiPart1d =
Set(
d, -3.1786509424591713469e-8);
687 const V kPiPart2d =
Set(
d, -1.2246467864107188502e-16);
688 const V kPiPart3d =
Set(
d, -1.2736634327021899816e-24);
692 x =
MulAdd(qf, kPiPart0d, x);
693 x =
MulAdd(qf, kPiPart1d, x);
694 x =
MulAdd(qf, kPiPart2d, x);
695 x =
MulAdd(qf, kPiPart3d, x);
700 template <
class D,
class VI32>
701 HWY_INLINE Vec<Rebind<double, D>> CosSignFromQuadrant(D
d, VI32 q) {
702 const VI32 kTwo =
Set(Rebind<int32_t, D>(), 2);
708 template <
class D,
class VI32>
709 HWY_INLINE Vec<Rebind<double, D>> SinSignFromQuadrant(D
d, VI32 q) {
710 const VI32 kOne =
Set(Rebind<int32_t, D>(), 1);
712 d, ShiftLeft<63>(
PromoteTo(Rebind<int64_t, D>(),
And(q, kOne))));
721 template <
class D,
class V>
726 template <
class D,
class V>
728 const auto k0 =
Set(
d, +0.5f);
729 const auto k1 =
Set(
d, +0.166666671633720397949219f);
730 const auto k2 =
Set(
d, +0.0416664853692054748535156f);
731 const auto k3 =
Set(
d, +0.00833336077630519866943359f);
732 const auto k4 =
Set(
d, +0.00139304355252534151077271f);
733 const auto k5 =
Set(
d, +0.000198527617612853646278381f);
739 template <
class D,
class VI32>
742 const VI32 kOffset =
Set(di32, 0x7F);
747 template <
class D,
class V,
class VI32>
749 const VI32 y = ShiftRight<1>(e);
750 return Mul(
Mul(x, Pow2I(
d, y)), Pow2I(
d,
Sub(e, y)));
753 template <
class D,
class V,
class VI32>
756 const V kLn2Part0f =
Set(
d, -0.693145751953125f);
757 const V kLn2Part1f =
Set(
d, -1.428606765330187045e-6f);
761 x =
MulAdd(qf, kLn2Part0f, x);
762 x =
MulAdd(qf, kLn2Part1f, x);
769 template <
class D,
class V>
773 const auto kBias =
Set(di32, 0x7F);
778 template <
class D,
class V>
780 const V k0 =
Set(
d, 0.66666662693f);
781 const V k1 =
Set(
d, 0.40000972152f);
782 const V k2 =
Set(
d, 0.28498786688f);
783 const V k3 =
Set(
d, 0.24279078841f);
785 const V x2 =
Mul(x, x);
786 const V x4 =
Mul(x2, x2);
791#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
793struct ExpImpl<double> {
795 template <
class D,
class V>
800 template <
class D,
class V>
802 const auto k0 =
Set(
d, +0.5);
803 const auto k1 =
Set(
d, +0.166666666666666851703837);
804 const auto k2 =
Set(
d, +0.0416666666666665047591422);
805 const auto k3 =
Set(
d, +0.00833333333331652721664984);
806 const auto k4 =
Set(
d, +0.00138888888889774492207962);
807 const auto k5 =
Set(
d, +0.000198412698960509205564975);
808 const auto k6 =
Set(
d, +2.4801587159235472998791e-5);
809 const auto k7 =
Set(
d, +2.75572362911928827629423e-6);
810 const auto k8 =
Set(
d, +2.75573911234900471893338e-7);
811 const auto k9 =
Set(
d, +2.51112930892876518610661e-8);
812 const auto k10 =
Set(
d, +2.08860621107283687536341e-9);
814 return MulAdd(
Estrin(x, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10),
819 template <
class D,
class VI32>
821 const Rebind<int32_t, D> di32;
822 const Rebind<int64_t, D> di64;
823 const VI32 kOffset =
Set(di32, 0x3FF);
828 template <
class D,
class V,
class VI32>
830 const VI32 y = ShiftRight<1>(e);
831 return Mul(
Mul(x, Pow2I(
d, y)), Pow2I(
d,
Sub(e, y)));
834 template <
class D,
class V,
class VI32>
837 const V kLn2Part0d =
Set(
d, -0.6931471805596629565116018);
838 const V kLn2Part1d =
Set(
d, -0.28235290563031577122588448175e-12);
842 x =
MulAdd(qf, kLn2Part0d, x);
843 x =
MulAdd(qf, kLn2Part1d, x);
849struct LogImpl<double> {
850 template <
class D,
class V>
851 HWY_INLINE Vec<Rebind<int64_t, D>> Log2p1NoSubnormal(D , V x) {
852 const Rebind<int64_t, D> di64;
853 const Rebind<uint64_t, D> du64;
859 template <
class D,
class V>
861 const V k0 =
Set(
d, 0.6666666666666735130);
862 const V k1 =
Set(
d, 0.3999999999940941908);
863 const V k2 =
Set(
d, 0.2857142874366239149);
864 const V k3 =
Set(
d, 0.2222219843214978396);
865 const V k4 =
Set(
d, 0.1818357216161805012);
866 const V k5 =
Set(
d, 0.1531383769920937332);
867 const V k6 =
Set(
d, 0.1479819860511658591);
869 const V x2 =
Mul(x, x);
870 const V x4 =
Mul(x2, x2);
878template <
class D,
class V,
bool kAllowSubnormals = true>
884 constexpr bool kIsF32 = (
sizeof(T) == 4);
887 const V kLn2Hi =
Set(
d, kIsF32 ?
static_cast<T
>(0.69313812256f)
888 :
static_cast<T
>(0.693147180369123816490));
889 const V kLn2Lo =
Set(
d, kIsF32 ?
static_cast<T
>(9.0580006145e-6f)
890 :
static_cast<T
>(1.90821492927058770002e-10));
891 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
892 const V kMinNormal =
Set(
d, kIsF32 ?
static_cast<T
>(1.175494351e-38f)
893 :
static_cast<T
>(2.2250738585072014e-308));
894 const V kScale =
Set(
d, kIsF32 ?
static_cast<T
>(3.355443200e+7f)
895 :
static_cast<T
>(1.8014398509481984e+16));
900 using VI =
decltype(
Zero(di));
901 const VI kLowerBits =
Set(di, kIsF32 ?
static_cast<TI
>(0x00000000L)
902 :
static_cast<TI
>(0xFFFFFFFFLL));
903 const VI kMagic =
Set(di, kIsF32 ?
static_cast<TI
>(0x3F3504F3L)
904 :
static_cast<TI
>(0x3FE6A09E00000000LL));
905 const VI kExpMask =
Set(di, kIsF32 ?
static_cast<TI
>(0x3F800000L)
906 :
static_cast<TI
>(0x3FF0000000000000LL));
908 Set(di, kIsF32 ?
static_cast<TI
>(-25) :
static_cast<TI
>(-54));
909 const VI kManMask =
Set(di, kIsF32 ?
static_cast<TI
>(0x7FFFFFL)
910 :
static_cast<TI
>(0xFFFFF00000000LL));
915 if (kAllowSubnormals ==
true) {
916 const auto is_denormal =
Lt(x, kMinNormal);
924 d,
Add(exp_scale, impl.Log2p1NoSubnormal(
d,
BitCast(
d, exp_bits))));
936 const V ym1 =
Sub(y, kOne);
937 const V z =
Div(ym1,
Add(y, kOne));
946template <
class D,
class V>
950 const V kZero =
Zero(
d);
951 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
952 const V kPi =
Set(
d,
static_cast<T
>(+3.14159265358979323846264));
953 const V kPiOverTwo =
Set(
d,
static_cast<T
>(+1.57079632679489661923132169));
956 const V abs_x =
Xor(x, sign_x);
957 const auto mask =
Lt(abs_x, kHalf);
963 const V t =
Mul(impl.AsinPoly(
d, yy, y),
Mul(y, yy));
965 const V t_plus_y =
Add(t, y);
968 Add(t_plus_y, t_plus_y));
972template <
class D,
class V>
976 const V kLarge =
Set(
d,
static_cast<T
>(268435456.0));
977 const V kLog2 =
Set(
d,
static_cast<T
>(0.693147180559945286227));
978 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
979 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
981 const auto is_x_large =
Gt(x, kLarge);
982 const auto is_x_gt_2 =
Gt(x, kTwo);
984 const V x_minus_1 =
Sub(x, kOne);
992 const auto is_pole =
Eq(y2, kOne);
999template <
class D,
class V>
1003 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1004 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1005 const V kPiOverTwo =
Set(
d,
static_cast<T
>(+1.57079632679489661923132169));
1008 const V abs_x =
Xor(x, sign_x);
1009 const auto mask =
Lt(abs_x, kHalf);
1015 const V z0 =
MulAdd(impl.AsinPoly(
d, yy, y),
Mul(yy, y), y);
1016 const V z1 =
NegMulAdd(z0, kTwo, kPiOverTwo);
1020template <
class D,
class V>
1024 const V kSmall =
Set(
d,
static_cast<T
>(1.0 / 268435456.0));
1025 const V kLarge =
Set(
d,
static_cast<T
>(268435456.0));
1026 const V kLog2 =
Set(
d,
static_cast<T
>(0.693147180559945286227));
1027 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1028 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1031 const V abs_x =
Xor(x, sign_x);
1033 const auto is_x_large =
Gt(abs_x, kLarge);
1034 const auto is_x_lt_2 =
Lt(abs_x, kTwo);
1036 const V x2 =
Mul(x, x);
1037 const V sqrt_x2_plus_1 =
Sqrt(
Add(x2, kOne));
1039 const V y0 =
MulAdd(abs_x, kTwo,
Div(kOne,
Add(sqrt_x2_plus_1, abs_x)));
1040 const V y1 =
Add(
Div(x2,
Add(sqrt_x2_plus_1, kOne)), abs_x);
1045 const auto is_pole =
Eq(y2, kOne);
1053template <
class D,
class V>
1057 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1058 const V kPiOverTwo =
Set(
d,
static_cast<T
>(+1.57079632679489661923132169));
1061 const V abs_x =
Xor(x, sign);
1062 const auto mask =
Gt(abs_x, kOne);
1065 const auto divisor =
IfThenElse(mask, abs_x, kOne);
1066 const V y = impl.AtanPoly(
d,
IfThenElse(mask,
Div(kOne, divisor), abs_x));
1070template <
class D,
class V>
1074 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1075 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1078 const V abs_x =
Xor(x, sign);
1083template <
class D,
class V>
1089 const V kOneOverPi =
Set(
d,
static_cast<T
>(0.31830988618379067153));
1093 using VI32 =
decltype(
Zero(di32));
1094 const VI32 kOne =
Set(di32, 1);
1099 const VI32 q =
Add(ShiftLeft<1>(impl.ToInt32(
d,
Mul(y, kOneOverPi))), kOne);
1103 d,
Xor(impl.CosReduce(
d, y, q), impl.CosSignFromQuadrant(
d, q)));
1106template <
class D,
class V>
1110 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1111 const V kLowerBound =
1112 Set(
d,
static_cast<T
>((
sizeof(T) == 4 ? -104.0 : -1000.0)));
1113 const V kNegZero =
Set(
d,
static_cast<T
>(-0.0));
1114 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1115 const V kOneOverLog2 =
Set(
d,
static_cast<T
>(+1.442695040888963407359924681));
1121 impl.ToInt32(
d,
MulAdd(x, kOneOverLog2,
Or(kHalf,
And(x, kNegZero))));
1124 const V y = impl.LoadExpShortRange(
1125 d,
Add(impl.ExpPoly(
d, impl.ExpReduce(
d, x, q)), kOne), q);
1129template <
class D,
class V>
1133 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1134 const V kLowerBound =
1135 Set(
d,
static_cast<T
>((
sizeof(T) == 4 ? -104.0 : -1000.0)));
1136 const V kLn2Over2 =
Set(
d,
static_cast<T
>(+0.346573590279972654708616));
1137 const V kNegOne =
Set(
d,
static_cast<T
>(-1.0));
1138 const V kNegZero =
Set(
d,
static_cast<T
>(-0.0));
1139 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1140 const V kOneOverLog2 =
Set(
d,
static_cast<T
>(+1.442695040888963407359924681));
1146 impl.ToInt32(
d,
MulAdd(x, kOneOverLog2,
Or(kHalf,
And(x, kNegZero))));
1149 const V y = impl.ExpPoly(
d, impl.ExpReduce(
d, x, q));
1151 Sub(impl.LoadExpShortRange(
d,
Add(y, kOne), q), kOne));
1155template <
class D,
class V>
1160template <
class D,
class V>
1163 return Mul(
Log(
d, x),
Set(
d,
static_cast<T
>(0.4342944819032518276511)));
1166template <
class D,
class V>
1169 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1171 const V y =
Add(x, kOne);
1172 const auto is_pole =
Eq(y, kOne);
1174 const auto non_pole =
1175 Mul(impl::Log<D, V, /*kAllowSubnormals=*/false>(
d, y),
Div(x, divisor));
1179template <
class D,
class V>
1182 return Mul(
Log(
d, x),
Set(
d,
static_cast<T
>(1.44269504088896340735992)));
1185template <
class D,
class V>
1191 const V kOneOverPi =
Set(
d,
static_cast<T
>(0.31830988618379067153));
1192 const V kHalf =
Set(
d,
static_cast<T
>(0.5));
1196 using VI32 =
decltype(
Zero(di32));
1198 const V abs_x =
Abs(x);
1199 const V sign_x =
Xor(abs_x, x);
1202 const VI32 q = impl.ToInt32(
d,
MulAdd(abs_x, kOneOverPi, kHalf));
1205 return impl.Poly(
d,
Xor(impl.SinReduce(
d, abs_x, q),
1206 Xor(impl.SinSignFromQuadrant(
d, q), sign_x)));
1209template <
class D,
class V>
1212 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1213 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1214 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1217 const V abs_x =
Xor(x, sign);
1218 const V y =
Expm1(
d, abs_x);
1220 return Xor(z, sign);
1223template <
class D,
class V>
1226 const V kLimit =
Set(
d,
static_cast<T
>(18.714973875));
1227 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1228 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1231 const V abs_x =
Xor(x, sign);
1234 return Xor(z, sign);
#define HWY_NOINLINE
Definition: base.h:63
#define HWY_INLINE
Definition: base.h:62
#define HWY_MAYBE_UNUSED
Definition: base.h:73
HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1)
Definition: math-inl.h:281
HWY_INLINE V Log(const D d, V x)
Definition: math-inl.h:879
d
Definition: rvv-inl.h:1742
HWY_NOINLINE V CallSin(const D d, VecArg< V > x)
Definition: math-inl.h:237
V VecArg
Definition: ops/shared-inl.h:306
HWY_NOINLINE V CallAsin(const D d, VecArg< V > x)
Definition: math-inl.h:72
HWY_INLINE V Atan(const D d, V x)
Highway SIMD version of std::atan(x).
Definition: math-inl.h:1054
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6309
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6301
HWY_INLINE V Cos(const D d, V x)
Highway SIMD version of std::cos(x).
Definition: math-inl.h:1084
HWY_NOINLINE V CallAcos(const D d, VecArg< V > x)
Definition: math-inl.h:42
HWY_API auto Gt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6314
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1784
HWY_INLINE V Sin(const D d, V x)
Highway SIMD version of std::sin(x).
Definition: math-inl.h:1186
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1934
HWY_INLINE V Exp(const D d, V x)
Highway SIMD version of std::exp(x).
Definition: math-inl.h:1107
HWY_API auto Ge(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6318
HWY_INLINE V Log10(const D d, V x)
Highway SIMD version of std::log10(x).
Definition: math-inl.h:1161
HWY_INLINE V Log1p(const D d, V x)
Highway SIMD version of std::log1p(x).
Definition: math-inl.h:1167
HWY_NOINLINE V CallExpm1(const D d, VecArg< V > x)
Definition: math-inl.h:162
HWY_NOINLINE V CallLog1p(const D d, VecArg< V > x)
Definition: math-inl.h:207
HWY_INLINE V Atanh(const D d, V x)
Highway SIMD version of std::atanh(x).
Definition: math-inl.h:1071
HWY_NOINLINE V CallLog10(const D d, VecArg< V > x)
Definition: math-inl.h:192
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition: arm_neon-inl.h:2212
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:6274
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition: emu128-inl.h:325
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1983
HWY_NOINLINE V CallLog2(const D d, VecArg< V > x)
Definition: math-inl.h:222
HWY_NOINLINE V CallExp(const D d, VecArg< V > x)
Definition: math-inl.h:147
HWY_NOINLINE V CallAtanh(const D d, VecArg< V > x)
Definition: math-inl.h:117
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1838
HWY_INLINE V Log2(const D d, V x)
Highway SIMD version of std::log2(x).
Definition: math-inl.h:1180
HWY_INLINE V Acos(const D d, V x)
Highway SIMD version of std::acos(x).
Definition: math-inl.h:947
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:312
HWY_NOINLINE V CallAtan(const D d, VecArg< V > x)
Definition: math-inl.h:102
HWY_API Vec< D > SignBit(D d)
Definition: generic_ops-inl.h:61
HWY_INLINE V Acosh(const D d, V x)
Highway SIMD version of std::acosh(x).
Definition: math-inl.h:973
HWY_NOINLINE V CallLog(const D d, VecArg< V > x)
Definition: math-inl.h:177
HWY_INLINE V Tanh(const D d, V x)
Highway SIMD version of std::tanh(x).
Definition: math-inl.h:1224
HWY_API Vec64< uint16_t > DemoteTo(Full64< uint16_t >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:3091
HWY_INLINE V Log(const D d, V x)
Highway SIMD version of std::log(x).
Definition: math-inl.h:1156
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:988
HWY_INLINE V Asin(const D d, V x)
Highway SIMD version of std::asin(x).
Definition: math-inl.h:1000
HWY_INLINE V Asinh(const D d, V x)
Highway SIMD version of std::asinh(x).
Definition: math-inl.h:1021
HWY_NOINLINE V CallAsinh(const D d, VecArg< V > x)
Definition: math-inl.h:87
HWY_API V Sub(V a, V b)
Definition: arm_neon-inl.h:6278
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:195
HWY_INLINE V Expm1(const D d, V x)
Highway SIMD version of std::expm1(x).
Definition: math-inl.h:1130
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:1011
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition: arm_neon-inl.h:2219
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1971
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1817
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec64< uint8_t > v)
Definition: arm_neon-inl.h:2911
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition: arm_neon-inl.h:2105
HWY_NOINLINE V CallCos(const D d, VecArg< V > x)
Definition: math-inl.h:132
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:3273
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:1898
HWY_NOINLINE V CallSinh(const D d, VecArg< V > x)
Definition: math-inl.h:252
HWY_INLINE V Sinh(const D d, V x)
Highway SIMD version of std::sinh(x).
Definition: math-inl.h:1210
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition: arm_neon-inl.h:1949
HWY_API V Div(V a, V b)
Definition: arm_neon-inl.h:6287
HWY_API V Mul(V a, V b)
Definition: arm_neon-inl.h:6283
HWY_NOINLINE V CallTanh(const D d, VecArg< V > x)
Definition: math-inl.h:267
typename D::T TFromD
Definition: ops/shared-inl.h:191
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:32
HWY_NOINLINE V CallAcosh(const D d, VecArg< V > x)
Definition: math-inl.h:57
Definition: aligned_allocator.h:27
typename detail::Relations< T >::Signed MakeSigned
Definition: base.h:505
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
HWY_INLINE V AsinPoly(D d, V x2, V)
Definition: math-inl.h:479
Definition: math-inl.h:465
HWY_INLINE V AtanPoly(D d, V x)
Definition: math-inl.h:520
Definition: math-inl.h:467
HWY_INLINE Vec< Rebind< float, D > > SinSignFromQuadrant(D d, VI32 q)
Definition: math-inl.h:633
HWY_INLINE Vec< Rebind< float, D > > CosSignFromQuadrant(D d, VI32 q)
Definition: math-inl.h:626
HWY_INLINE Vec< Rebind< int32_t, D > > ToInt32(D, V x)
Definition: math-inl.h:575
HWY_INLINE V SinReduce(D d, V x, VI32 q)
Definition: math-inl.h:608
HWY_INLINE V Poly(D d, V x)
Definition: math-inl.h:580
HWY_INLINE V CosReduce(D d, V x, VI32 q)
Definition: math-inl.h:591
Definition: math-inl.h:469
HWY_INLINE Vec< D > Pow2I(D d, VI32 x)
Definition: math-inl.h:740
HWY_INLINE V ExpReduce(D d, V x, VI32 q)
Definition: math-inl.h:754
HWY_INLINE V ExpPoly(D d, V x)
Definition: math-inl.h:727
HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e)
Definition: math-inl.h:748
HWY_INLINE Vec< Rebind< int32_t, D > > ToInt32(D, V x)
Definition: math-inl.h:722
Definition: math-inl.h:471
HWY_INLINE Vec< Rebind< int32_t, D > > Log2p1NoSubnormal(D, V x)
Definition: math-inl.h:770
HWY_INLINE V LogPoly(D d, V x)
Definition: math-inl.h:779
Definition: math-inl.h:473