removed negz constants for consistency

2026-04-27 14:30:36 +00:00 · 2024-02-13 11:59:44 +01:00 · 2024-02-13 11:59:44 +01:00 · 56903fc707
commit 56903fc707
parent 992c83933d
6 changed files with 16 additions and 24 deletions
--- a/avx2.hh
+++ b/avx2.hh
@ -602,10 +602,9 @@ template <>
 inline SIMD<float, 8> vcopysign(SIMD<float, 8> a, SIMD<float, 8> b)
 {
 	SIMD<float, 8> tmp;
-	__m256 negz = _mm256_set1_ps(-0.f);
 	tmp.m = _mm256_or_ps(
-		_mm256_andnot_ps(negz, a.m),
-		_mm256_and_ps(negz, b.m));
+		_mm256_andnot_ps(_mm256_set1_ps(-0.f), a.m),
+		_mm256_and_ps(_mm256_set1_ps(-0.f), b.m));
 	return tmp;
 }

@ -613,10 +612,9 @@ template <>
 inline SIMD<double, 4> vcopysign(SIMD<double, 4> a, SIMD<double, 4> b)
 {
 	SIMD<double, 4> tmp;
-	__m256d negz = _mm256_set1_pd(-0.);
 	tmp.m = _mm256_or_pd(
-		_mm256_andnot_pd(negz, a.m),
-		_mm256_and_pd(negz, b.m));
+		_mm256_andnot_pd(_mm256_set1_pd(-0.), a.m),
+		_mm256_and_pd(_mm256_set1_pd(-0.), b.m));
 	return tmp;
 }

--- a/neon.hh
+++ b/neon.hh
@ -526,10 +526,9 @@ template <>
 inline SIMD<float, 4> vcopysign(SIMD<float, 4> a, SIMD<float, 4> b)
 {
 	SIMD<float, 4> tmp;
-	uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
 	tmp.m = (float32x4_t)vorrq_u32(
-		vbicq_u32((uint32x4_t)a.m, negz),
-		vandq_u32((uint32x4_t)b.m, negz));
+		vbicq_u32((uint32x4_t)a.m, (uint32x4_t)vdupq_n_f32(-0.f)),
+		vandq_u32((uint32x4_t)b.m, (uint32x4_t)vdupq_n_f32(-0.f)));
 	return tmp;
 }

--- a/neon_double.hh
+++ b/neon_double.hh
@ -577,11 +577,10 @@ template <>
 inline SIMD<float, 8> vcopysign(SIMD<float, 8> a, SIMD<float, 8> b)
 {
 	SIMD<float, 8> tmp;
-	uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
 	for (int i = 0; i < 2; ++i)
 		tmp.m[i] = (float32x4_t)vorrq_u32(
-			vbicq_u32((uint32x4_t)a.m[i], negz),
-			vandq_u32((uint32x4_t)b.m[i], negz));
+			vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)),
+			vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f)));
 	return tmp;
 }

--- a/neon_quadruple.hh
+++ b/neon_quadruple.hh
@ -577,11 +577,10 @@ template <>
 inline SIMD<float, 16> vcopysign(SIMD<float, 16> a, SIMD<float, 16> b)
 {
 	SIMD<float, 16> tmp;
-	uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
 	for (int i = 0; i < 4; ++i)
 		tmp.m[i] = (float32x4_t)vorrq_u32(
-			vbicq_u32((uint32x4_t)a.m[i], negz),
-			vandq_u32((uint32x4_t)b.m[i], negz));
+			vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)),
+			vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f)));
 	return tmp;
 }

--- a/neon_triple.hh
+++ b/neon_triple.hh
@ -577,11 +577,10 @@ template <>
 inline SIMD<float, 12> vcopysign(SIMD<float, 12> a, SIMD<float, 12> b)
 {
 	SIMD<float, 12> tmp;
-	uint32x4_t negz = (uint32x4_t)vdupq_n_f32(-0.f);
 	for (int i = 0; i < 3; ++i)
 		tmp.m[i] = (float32x4_t)vorrq_u32(
-			vbicq_u32((uint32x4_t)a.m[i], negz),
-			vandq_u32((uint32x4_t)b.m[i], negz));
+			vbicq_u32((uint32x4_t)a.m[i], (uint32x4_t)vdupq_n_f32(-0.f)),
+			vandq_u32((uint32x4_t)b.m[i], (uint32x4_t)vdupq_n_f32(-0.f)));
 	return tmp;
 }

--- a/sse4_1.hh
+++ b/sse4_1.hh
@ -602,10 +602,9 @@ template <>
 inline SIMD<float, 4> vcopysign(SIMD<float, 4> a, SIMD<float, 4> b)
 {
 	SIMD<float, 4> tmp;
-	__m128 negz = _mm_set1_ps(-0.f);
 	tmp.m = _mm_or_ps(
-		_mm_andnot_ps(negz, a.m),
-		_mm_and_ps(negz, b.m));
+		_mm_andnot_ps(_mm_set1_ps(-0.f), a.m),
+		_mm_and_ps(_mm_set1_ps(-0.f), b.m));
 	return tmp;
 }

@ -613,10 +612,9 @@ template <>
 inline SIMD<double, 2> vcopysign(SIMD<double, 2> a, SIMD<double, 2> b)
 {
 	SIMD<double, 2> tmp;
-	__m128d negz = _mm_set1_pd(-0.);
 	tmp.m = _mm_or_pd(
-		_mm_andnot_pd(negz, a.m),
-		_mm_and_pd(negz, b.m));
+		_mm_andnot_pd(_mm_set1_pd(-0.), a.m),
+		_mm_and_pd(_mm_set1_pd(-0.), b.m));
 	return tmp;
 }