mirror of
https://github.com/aicodix/code.git
synced 2026-04-27 14:30:36 +00:00
added SIMD horizontal rotation helper
This commit is contained in:
parent
a10cc6afcb
commit
8de487af88
2 changed files with 112 additions and 0 deletions
|
|
@ -87,3 +87,7 @@ Single instruction, multiple data ([SIMD](https://en.wikipedia.org/wiki/SIMD)) w
|
|||
* [Intel SSE4.1](https://en.wikipedia.org/wiki/SSE4) ([sse4_1.hh](sse4_1.hh))
|
||||
* [Intel AVX2](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) ([avx2.hh](avx2.hh))
|
||||
|
||||
### [rotate.hh](rotate.hh)
|
||||
|
||||
([SIMD](https://en.wikipedia.org/wiki/SIMD)) element wise horizontal rotation
|
||||
|
||||
|
|
|
|||
108
rotate.hh
Normal file
108
rotate.hh
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
SIMD element wise horizontal rotation
|
||||
|
||||
Copyright 2019 Ahmet Inan <inan@aicodix.de>
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "simd.hh"
|
||||
|
||||
namespace CODE {
|
||||
|
||||
template <typename TYPE, int WIDTH>
|
||||
class Rotate
|
||||
{
|
||||
static const int SIZE = TYPE::SIZE;
|
||||
static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size");
|
||||
public:
|
||||
TYPE operator()(TYPE a, int s)
|
||||
{
|
||||
if (s < 0)
|
||||
s += WIDTH;
|
||||
int t = WIDTH - s;
|
||||
TYPE ret;
|
||||
for (int n = 0; n < s; ++n)
|
||||
ret.v[n] = a.v[n+t];
|
||||
for (int n = 0; n < t; ++n)
|
||||
ret.v[n+s] = a.v[n];
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
template <int WIDTH>
|
||||
class Rotate<SIMD<int8_t, 16>, WIDTH>
|
||||
{
|
||||
static const int SIZE = 16;
|
||||
static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size");
|
||||
typedef SIMD<int8_t, SIZE> TYPE;
|
||||
TYPE rot[WIDTH];
|
||||
public:
|
||||
Rotate()
|
||||
{
|
||||
for (int i = 0; i < WIDTH; ++i) {
|
||||
rot[i] = vdup<TYPE>(0x80);
|
||||
for (int j = 0; j < WIDTH; ++j)
|
||||
rot[i].v[j] = (j - i + WIDTH) % WIDTH;
|
||||
}
|
||||
}
|
||||
TYPE operator()(TYPE a, int s)
|
||||
{
|
||||
if (s < 0)
|
||||
s += WIDTH;
|
||||
TYPE ret;
|
||||
ret.m = _mm_shuffle_epi8(a.m, rot[s].m);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
template <int WIDTH>
|
||||
class Rotate<SIMD<int8_t, 32>, WIDTH>
|
||||
{
|
||||
static const int SIZE = 32;
|
||||
static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size");
|
||||
typedef SIMD<int8_t, SIZE> TYPE;
|
||||
TYPE rot0[WIDTH], rot1[WIDTH];
|
||||
public:
|
||||
Rotate()
|
||||
{
|
||||
for (int i = 0; i < WIDTH; ++i) {
|
||||
rot0[i] = vdup<TYPE>(0x80);
|
||||
rot1[i] = vdup<TYPE>(0x80);
|
||||
for (int j = 0; j < WIDTH; ++j) {
|
||||
int pos = (j - i + WIDTH) % WIDTH;
|
||||
if (j < 16) {
|
||||
if (pos < 16) {
|
||||
rot0[i].v[j] = pos;
|
||||
} else {
|
||||
rot1[i].v[j+16] = pos-16;
|
||||
}
|
||||
} else {
|
||||
if (pos < 16) {
|
||||
rot1[i].v[j-16] = pos;
|
||||
} else {
|
||||
rot0[i].v[j] = pos-16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TYPE operator()(TYPE a, int s)
|
||||
{
|
||||
if (s < 0)
|
||||
s += WIDTH;
|
||||
__m256i b = _mm256_shuffle_epi8(a.m, rot0[s].m);
|
||||
__m256i c = _mm256_shuffle_epi8(a.m, rot1[s].m);
|
||||
__m256i d = _mm256_permute2x128_si256(c, c, 1);
|
||||
TYPE ret;
|
||||
ret.m = _mm256_or_si256(b, d);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue