added vshuf()

This commit is contained in:
Ahmet Inan 2021-06-21 08:18:15 +02:00
commit 778d51ac0f
4 changed files with 160 additions and 0 deletions

32
avx2.hh
View file

@ -1134,3 +1134,35 @@ inline SIMD<int64_t, 4> vclamp(SIMD<int64_t, 4> x, int64_t a, int64_t b)
return tmp;
}
template <>
inline SIMD<uint8_t, 32> vshuf(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
{
SIMD<uint8_t, 32> tmp;
__m256i c = _mm256_sub_epi8(b.m, _mm256_set1_epi8(16));
__m256i d = _mm256_or_si256(b.m, _mm256_cmpgt_epi8(b.m, _mm256_set1_epi8(15)));
__m256i e = _mm256_shuffle_epi8(_mm256_permute2x128_si256(a.m, a.m, 0), d);
__m256i f = _mm256_shuffle_epi8(_mm256_permute2x128_si256(a.m, a.m, 17), c);
tmp.m = _mm256_or_si256(e, f);
return tmp;
}
template <>
inline SIMD<int8_t, 32> vshuf(SIMD<int8_t, 32> a, SIMD<uint8_t, 32> b)
{
SIMD<int8_t, 32> tmp;
__m256i c = _mm256_sub_epi8(b.m, _mm256_set1_epi8(16));
__m256i d = _mm256_or_si256(b.m, _mm256_cmpgt_epi8(b.m, _mm256_set1_epi8(15)));
__m256i e = _mm256_shuffle_epi8(_mm256_permute2x128_si256(a.m, a.m, 0), d);
__m256i f = _mm256_shuffle_epi8(_mm256_permute2x128_si256(a.m, a.m, 17), c);
tmp.m = _mm256_or_si256(e, f);
return tmp;
}
template <>
inline SIMD<float, 8> vshuf(SIMD<float, 8> a, SIMD<uint32_t, 8> b)
{
SIMD<float, 8> tmp;
tmp.m = _mm256_permutevar8x32_ps(a.m, b.m);
return tmp;
}