From 8de487af88cf931cc8db1369c12aa1838defe98a Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 24 Sep 2019 00:05:57 +0200 Subject: [PATCH] added SIMD horizontal rotation helper --- README.md | 4 ++ rotate.hh | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 rotate.hh diff --git a/README.md b/README.md index 5f4e163..e39d0b0 100644 --- a/README.md +++ b/README.md @@ -87,3 +87,7 @@ Single instruction, multiple data ([SIMD](https://en.wikipedia.org/wiki/SIMD)) w * [Intel SSE4.1](https://en.wikipedia.org/wiki/SSE4) ([sse4_1.hh](sse4_1.hh)) * [Intel AVX2](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) ([avx2.hh](avx2.hh)) +### [rotate.hh](rotate.hh) + +([SIMD](https://en.wikipedia.org/wiki/SIMD)) element wise horizontal rotation + diff --git a/rotate.hh b/rotate.hh new file mode 100644 index 0000000..59d645a --- /dev/null +++ b/rotate.hh @@ -0,0 +1,108 @@ +/* +SIMD element wise horizontal rotation + +Copyright 2019 Ahmet Inan +*/ + +#pragma once + +#include "simd.hh" + +namespace CODE { + +template +class Rotate +{ + static const int SIZE = TYPE::SIZE; + static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); +public: + TYPE operator()(TYPE a, int s) + { + if (s < 0) + s += WIDTH; + int t = WIDTH - s; + TYPE ret; + for (int n = 0; n < s; ++n) + ret.v[n] = a.v[n+t]; + for (int n = 0; n < t; ++n) + ret.v[n+s] = a.v[n]; + return ret; + } +}; + +#ifdef __SSE4_1__ +template +class Rotate, WIDTH> +{ + static const int SIZE = 16; + static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); + typedef SIMD TYPE; + TYPE rot[WIDTH]; +public: + Rotate() + { + for (int i = 0; i < WIDTH; ++i) { + rot[i] = vdup(0x80); + for (int j = 0; j < WIDTH; ++j) + rot[i].v[j] = (j - i + WIDTH) % WIDTH; + } + } + TYPE operator()(TYPE a, int s) + { + if (s < 0) + s += WIDTH; + TYPE ret; + ret.m = _mm_shuffle_epi8(a.m, rot[s].m); + return ret; + } +}; +#endif + +#ifdef __AVX2__ +template +class Rotate, WIDTH> +{ + static const int SIZE = 32; + static_assert(WIDTH <= SIZE, "width must be smaller or equal to SIMD size"); + typedef SIMD TYPE; + TYPE rot0[WIDTH], rot1[WIDTH]; +public: + Rotate() + { + for (int i = 0; i < WIDTH; ++i) { + rot0[i] = vdup(0x80); + rot1[i] = vdup(0x80); + for (int j = 0; j < WIDTH; ++j) { + int pos = (j - i + WIDTH) % WIDTH; + if (j < 16) { + if (pos < 16) { + rot0[i].v[j] = pos; + } else { + rot1[i].v[j+16] = pos-16; + } + } else { + if (pos < 16) { + rot1[i].v[j-16] = pos; + } else { + rot0[i].v[j] = pos-16; + } + } + } + } + } + TYPE operator()(TYPE a, int s) + { + if (s < 0) + s += WIDTH; + __m256i b = _mm256_shuffle_epi8(a.m, rot0[s].m); + __m256i c = _mm256_shuffle_epi8(a.m, rot1[s].m); + __m256i d = _mm256_permute2x128_si256(c, c, 1); + TYPE ret; + ret.m = _mm256_or_si256(b, d); + return ret; + } +}; +#endif + +} +