From 56903fc7076c2b25e9c3f46ab96056a0e49b44c3 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 13 Feb 2024 11:59:44 +0100 Subject: [PATCH] removed negz constants for consistency --- avx2.hh | 10 ++++------ neon.hh | 5 ++--- neon_double.hh | 5 ++--- neon_quadruple.hh | 5 ++--- neon_triple.hh | 5 ++--- sse4_1.hh | 10 ++++------ 6 files changed, 16 insertions(+), 24 deletions(-) diff --git a/avx2.hh b/avx2.hh index 9b12334..4e49303 100644 --- a/avx2.hh +++ b/avx2.hh @@ -602,10 +602,9 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - __m256 negz = _mm256_set1_ps(-0.f); tmp.m = _mm256_or_ps( - _mm256_andnot_ps(negz, a.m), - _mm256_and_ps(negz, b.m)); + _mm256_andnot_ps(_mm256_set1_ps(-0.f), a.m), + _mm256_and_ps(_mm256_set1_ps(-0.f), b.m)); return tmp; } @@ -613,10 +612,9 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - __m256d negz = _mm256_set1_pd(-0.); tmp.m = _mm256_or_pd( - _mm256_andnot_pd(negz, a.m), - _mm256_and_pd(negz, b.m)); + _mm256_andnot_pd(_mm256_set1_pd(-0.), a.m), + _mm256_and_pd(_mm256_set1_pd(-0.), b.m)); return tmp; } diff --git a/neon.hh b/neon.hh index e7b235e..85d6702 100644 --- a/neon.hh +++ b/neon.hh @@ -526,10 +526,9 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f); tmp.m = (float32x4_t)vorrq_u32( - vbicq_u32((uint32x4_t)a.m, negz), - vandq_u32((uint32x4_t)b.m, negz)); + vbicq_u32((uint32x4_t)a.m, (uint32x4_t)vdupq_n_f32(-0.f)), + vandq_u32((uint32x4_t)b.m, (uint32x4_t)vdupq_n_f32(-0.f))); return tmp; } diff --git a/neon_double.hh b/neon_double.hh index 141b95e..e5ae2fd 100644 --- a/neon_double.hh +++ b/neon_double.hh @@ -577,11 +577,10 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f); for (int i = 0; i < 2; ++i) tmp.m[i] = (float32x4_t)vorrq_u32( - vbicq_u32((uint32x4_t)a.m[i], negz), - vandq_u32((uint32x4_t)b.m[i], negz)); + vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)), + vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f))); return tmp; } diff --git a/neon_quadruple.hh b/neon_quadruple.hh index 943ed71..0585d24 100644 --- a/neon_quadruple.hh +++ b/neon_quadruple.hh @@ -577,11 +577,10 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f); for (int i = 0; i < 4; ++i) tmp.m[i] = (float32x4_t)vorrq_u32( - vbicq_u32((uint32x4_t)a.m[i], negz), - vandq_u32((uint32x4_t)b.m[i], negz)); + vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)), + vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f))); return tmp; } diff --git a/neon_triple.hh b/neon_triple.hh index f22c74b..fc7fffd 100644 --- a/neon_triple.hh +++ b/neon_triple.hh @@ -577,11 +577,10 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f); for (int i = 0; i < 3; ++i) tmp.m[i] = (float32x4_t)vorrq_u32( - vbicq_u32((uint32x4_t)a.m[i], negz), - vandq_u32((uint32x4_t)b.m[i], negz)); + vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)), + vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f))); return tmp; } diff --git a/sse4_1.hh b/sse4_1.hh index 3af6b7d..e5a1f90 100644 --- a/sse4_1.hh +++ b/sse4_1.hh @@ -602,10 +602,9 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - __m128 negz = _mm_set1_ps(-0.f); tmp.m = _mm_or_ps( - _mm_andnot_ps(negz, a.m), - _mm_and_ps(negz, b.m)); + _mm_andnot_ps(_mm_set1_ps(-0.f), a.m), + _mm_and_ps(_mm_set1_ps(-0.f), b.m)); return tmp; } @@ -613,10 +612,9 @@ template <> inline SIMD vcopysign(SIMD a, SIMD b) { SIMD tmp; - __m128d negz = _mm_set1_pd(-0.); tmp.m = _mm_or_pd( - _mm_andnot_pd(negz, a.m), - _mm_and_pd(negz, b.m)); + _mm_andnot_pd(_mm_set1_pd(-0.), a.m), + _mm_and_pd(_mm_set1_pd(-0.), b.m)); return tmp; }