removed negz constants for consistency

This commit is contained in:
Ahmet Inan 2024-02-13 11:59:44 +01:00
commit 56903fc707
6 changed files with 16 additions and 24 deletions

10
avx2.hh
View file

@ -602,10 +602,9 @@ template <>
inline SIMD<float, 8> vcopysign(SIMD<float, 8> a, SIMD<float, 8> b)
{
SIMD<float, 8> tmp;
__m256 negz = _mm256_set1_ps(-0.f);
tmp.m = _mm256_or_ps(
_mm256_andnot_ps(negz, a.m),
_mm256_and_ps(negz, b.m));
_mm256_andnot_ps(_mm256_set1_ps(-0.f), a.m),
_mm256_and_ps(_mm256_set1_ps(-0.f), b.m));
return tmp;
}
@ -613,10 +612,9 @@ template <>
inline SIMD<double, 4> vcopysign(SIMD<double, 4> a, SIMD<double, 4> b)
{
SIMD<double, 4> tmp;
__m256d negz = _mm256_set1_pd(-0.);
tmp.m = _mm256_or_pd(
_mm256_andnot_pd(negz, a.m),
_mm256_and_pd(negz, b.m));
_mm256_andnot_pd(_mm256_set1_pd(-0.), a.m),
_mm256_and_pd(_mm256_set1_pd(-0.), b.m));
return tmp;
}

View file

@ -526,10 +526,9 @@ template <>
inline SIMD<float, 4> vcopysign(SIMD<float, 4> a, SIMD<float, 4> b)
{
SIMD<float, 4> tmp;
uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
tmp.m = (float32x4_t)vorrq_u32(
vbicq_u32((uint32x4_t)a.m, negz),
vandq_u32((uint32x4_t)b.m, negz));
vbicq_u32((uint32x4_t)a.m, (uint32x4_t)vdupq_n_f32(-0.f)),
vandq_u32((uint32x4_t)b.m, (uint32x4_t)vdupq_n_f32(-0.f)));
return tmp;
}

View file

@ -577,11 +577,10 @@ template <>
inline SIMD<float, 8> vcopysign(SIMD<float, 8> a, SIMD<float, 8> b)
{
SIMD<float, 8> tmp;
uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
for (int i = 0; i < 2; ++i)
tmp.m[i] = (float32x4_t)vorrq_u32(
vbicq_u32((uint32x4_t)a.m[i], negz),
vandq_u32((uint32x4_t)b.m[i], negz));
vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)),
vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f)));
return tmp;
}

View file

@ -577,11 +577,10 @@ template <>
inline SIMD<float, 16> vcopysign(SIMD<float, 16> a, SIMD<float, 16> b)
{
SIMD<float, 16> tmp;
uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
for (int i = 0; i < 4; ++i)
tmp.m[i] = (float32x4_t)vorrq_u32(
vbicq_u32((uint32x4_t)a.m[i], negz),
vandq_u32((uint32x4_t)b.m[i], negz));
vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)),
vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f)));
return tmp;
}

View file

@ -577,11 +577,10 @@ template <>
inline SIMD<float, 12> vcopysign(SIMD<float, 12> a, SIMD<float, 12> b)
{
SIMD<float, 12> tmp;
uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
for (int i = 0; i < 3; ++i)
tmp.m[i] = (float32x4_t)vorrq_u32(
vbicq_u32((uint32x4_t)a.m[i], negz),
vandq_u32((uint32x4_t)b.m[i], negz));
vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)),
vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f)));
return tmp;
}

View file

@ -602,10 +602,9 @@ template <>
inline SIMD<float, 4> vcopysign(SIMD<float, 4> a, SIMD<float, 4> b)
{
SIMD<float, 4> tmp;
__m128 negz = _mm_set1_ps(-0.f);
tmp.m = _mm_or_ps(
_mm_andnot_ps(negz, a.m),
_mm_and_ps(negz, b.m));
_mm_andnot_ps(_mm_set1_ps(-0.f), a.m),
_mm_and_ps(_mm_set1_ps(-0.f), b.m));
return tmp;
}
@ -613,10 +612,9 @@ template <>
inline SIMD<double, 2> vcopysign(SIMD<double, 2> a, SIMD<double, 2> b)
{
SIMD<double, 2> tmp;
__m128d negz = _mm_set1_pd(-0.);
tmp.m = _mm_or_pd(
_mm_andnot_pd(negz, a.m),
_mm_and_pd(negz, b.m));
_mm_andnot_pd(_mm_set1_pd(-0.), a.m),
_mm_and_pd(_mm_set1_pd(-0.), b.m));
return tmp;
}