added vcopysign()

This commit is contained in:
Ahmet Inan 2021-06-26 08:00:52 +02:00
commit 6cddcef660
4 changed files with 73 additions and 0 deletions

22
avx2.hh
View file

@ -598,6 +598,28 @@ inline SIMD<int32_t, 8> vsign(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
return tmp;
}
template <>
inline SIMD<float, 8> vcopysign(SIMD<float, 8> a, SIMD<float, 8> b)
{
SIMD<float, 8> tmp;
__m256 negz = _mm256_set1_ps(-0.f);
tmp.m = _mm256_or_ps(
_mm256_andnot_ps(negz, a.m),
_mm256_and_ps(negz, b.m));
return tmp;
}
template <>
inline SIMD<double, 4> vcopysign(SIMD<double, 4> a, SIMD<double, 4> b)
{
SIMD<double, 4> tmp;
__m256d negz = _mm256_set1_pd(-0.);
tmp.m = _mm256_or_pd(
_mm256_andnot_pd(negz, a.m),
_mm256_and_pd(negz, b.m));
return tmp;
}
template <>
inline SIMD<uint8_t, 32> vorr(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
{