From 41b8abaf83582bebefad298ca2505018ffb09ce6 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Wed, 14 Feb 2024 10:48:45 +0100 Subject: [PATCH] added more shuffle operations for AVX2 --- avx2.hh | 8 ++++++++ avx2_double.hh | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/avx2.hh b/avx2.hh index 4e49303..b275cee 100644 --- a/avx2.hh +++ b/avx2.hh @@ -1178,6 +1178,14 @@ inline SIMD vshuf(SIMD a, SIMD b) return tmp; } +template <> +inline SIMD vshuf(SIMD a, SIMD b) +{ + SIMD tmp; + tmp.m = _mm256_permutevar8x32_epi32(a.m, b.m); + return tmp; +} + template <> inline SIMD vshuf(SIMD a, SIMD b) { diff --git a/avx2_double.hh b/avx2_double.hh index cb96cf7..cd01530 100644 --- a/avx2_double.hh +++ b/avx2_double.hh @@ -1321,3 +1321,25 @@ inline SIMD vshuf(SIMD a, SIMD b) return tmp; } +template <> +inline SIMD vshuf(SIMD a, SIMD b) +{ + SIMD tmp; + for (int i = 0; i < 2; ++i) + tmp.m[i] = _mm256_or_si256( + _mm256_and_si256(_mm256_permutevar8x32_epi32(a.m[0], b.m[i]), _mm256_cmpgt_epi32(_mm256_set1_epi32(8), b.m[i])), + _mm256_and_si256(_mm256_permutevar8x32_epi32(a.m[1], _mm256_sub_epi32(b.m[i], _mm256_set1_epi32(8))), _mm256_cmpgt_epi32(b.m[i], _mm256_set1_epi32(7)))); + return tmp; +} + +template <> +inline SIMD vshuf(SIMD a, SIMD b) +{ + SIMD tmp; + for (int i = 0; i < 2; ++i) + tmp.m[i] = _mm256_or_ps( + _mm256_and_ps(_mm256_permutevar8x32_ps(a.m[0], b.m[i]), _mm256_cmpgt_epi32(_mm256_set1_epi32(8), b.m[i])), + _mm256_and_ps(_mm256_permutevar8x32_ps(a.m[1], _mm256_sub_epi32(b.m[i], _mm256_set1_epi32(8))), _mm256_cmpgt_epi32(b.m[i], _mm256_set1_epi32(7)))); + return tmp; +} +