mirror of
https://github.com/aicodix/code.git
synced 2026-04-27 14:30:36 +00:00
added more shuffle operations for AVX2
This commit is contained in:
parent
f1aae95d34
commit
41b8abaf83
2 changed files with 30 additions and 0 deletions
8
avx2.hh
8
avx2.hh
|
|
@ -1178,6 +1178,14 @@ inline SIMD<int8_t, 32> vshuf(SIMD<int8_t, 32> a, SIMD<uint8_t, 32> b)
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline SIMD<uint32_t, 8> vshuf(SIMD<uint32_t, 8> a, SIMD<uint32_t, 8> b)
|
||||||
|
{
|
||||||
|
SIMD<uint32_t, 8> tmp;
|
||||||
|
tmp.m = _mm256_permutevar8x32_epi32(a.m, b.m);
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline SIMD<float, 8> vshuf(SIMD<float, 8> a, SIMD<uint32_t, 8> b)
|
inline SIMD<float, 8> vshuf(SIMD<float, 8> a, SIMD<uint32_t, 8> b)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1321,3 +1321,25 @@ inline SIMD<int8_t, 64> vshuf(SIMD<int8_t, 64> a, SIMD<uint8_t, 64> b)
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline SIMD<uint32_t, 16> vshuf(SIMD<uint32_t, 16> a, SIMD<uint32_t, 16> b)
|
||||||
|
{
|
||||||
|
SIMD<uint32_t, 16> tmp;
|
||||||
|
for (int i = 0; i < 2; ++i)
|
||||||
|
tmp.m[i] = _mm256_or_si256(
|
||||||
|
_mm256_and_si256(_mm256_permutevar8x32_epi32(a.m[0], b.m[i]), _mm256_cmpgt_epi32(_mm256_set1_epi32(8), b.m[i])),
|
||||||
|
_mm256_and_si256(_mm256_permutevar8x32_epi32(a.m[1], _mm256_sub_epi32(b.m[i], _mm256_set1_epi32(8))), _mm256_cmpgt_epi32(b.m[i], _mm256_set1_epi32(7))));
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline SIMD<float, 16> vshuf(SIMD<float, 16> a, SIMD<uint32_t, 16> b)
|
||||||
|
{
|
||||||
|
SIMD<float, 16> tmp;
|
||||||
|
for (int i = 0; i < 2; ++i)
|
||||||
|
tmp.m[i] = _mm256_or_ps(
|
||||||
|
_mm256_and_ps(_mm256_permutevar8x32_ps(a.m[0], b.m[i]), _mm256_cmpgt_epi32(_mm256_set1_epi32(8), b.m[i])),
|
||||||
|
_mm256_and_ps(_mm256_permutevar8x32_ps(a.m[1], _mm256_sub_epi32(b.m[i], _mm256_set1_epi32(8))), _mm256_cmpgt_epi32(b.m[i], _mm256_set1_epi32(7))));
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue