/* SIMD element wise horizontal rotation Copyright 2019 Ahmet Inan */ #pragma once #include "simd.hh" namespace CODE { template class Rotate { static const int SIZE = TYPE::SIZE; static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); public: TYPE operator()(TYPE a, int s) { if (s < 0) s += WIDTH; int t = WIDTH - s; TYPE ret; for (int n = 0; n < s; ++n) ret.v[n] = a.v[n+t]; for (int n = 0; n < t; ++n) ret.v[n+s] = a.v[n]; return ret; } }; #ifdef __ARM_NEON__ template class Rotate, WIDTH> { static const int SIZE = 16; static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); typedef SIMD TYPE; TYPE rot[WIDTH]; public: Rotate() { for (int i = 0; i < WIDTH; ++i) { rot[i] = vdup(0x80); for (int j = 0; j < WIDTH; ++j) rot[i].v[j] = (j - i + WIDTH) % WIDTH; } } TYPE operator()(TYPE a, int s) { if (s < 0) s += WIDTH; int8x8x2_t b { vget_low_s8(a.m), vget_high_s8(a.m) }; int8x8_t c = vtbl2_s8(b, vget_low_s8(rot[s].m)); int8x8_t d = vtbl2_s8(b, vget_high_s8(rot[s].m)); TYPE ret; ret.m = vcombine_s8(c, d); return ret; } }; #endif #ifdef __AVX2__ template class Rotate, WIDTH> { static const int SIZE = 32; static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); typedef SIMD TYPE; TYPE rot0[WIDTH], rot1[WIDTH]; public: Rotate() { for (int i = 0; i < WIDTH; ++i) { rot0[i] = vdup(0x80); rot1[i] = vdup(0x80); for (int j = 0; j < WIDTH; ++j) { int pos = (j - i + WIDTH) % WIDTH; if (j < 16) { if (pos < 16) { rot0[i].v[j] = pos; } else { rot1[i].v[j+16] = pos-16; } } else { if (pos < 16) { rot1[i].v[j-16] = pos; } else { rot0[i].v[j] = pos-16; } } } } } TYPE operator()(TYPE a, int s) { if (s < 0) s += WIDTH; __m256i b = _mm256_shuffle_epi8(a.m, rot0[s].m); __m256i c = _mm256_shuffle_epi8(a.m, rot1[s].m); __m256i d = _mm256_permute2x128_si256(c, c, 1); TYPE ret; ret.m = _mm256_or_si256(b, d); return ret; } }; #else #ifdef __SSE4_1__ template class Rotate, WIDTH> { static const int SIZE = 16; static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); typedef SIMD TYPE; TYPE rot[WIDTH]; public: Rotate() { for (int i = 0; i < WIDTH; ++i) { rot[i] = vdup(0x80); for (int j = 0; j < WIDTH; ++j) rot[i].v[j] = (j - i + WIDTH) % WIDTH; } } TYPE operator()(TYPE a, int s) { if (s < 0) s += WIDTH; TYPE ret; ret.m = _mm_shuffle_epi8(a.m, rot[s].m); return ret; } }; #endif #endif }