From e51425e1216b51280b531b0bba6fc486b50f02e7 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Wed, 12 Dec 2018 22:51:28 +0100 Subject: [PATCH] added vclamp() --- avx2.hh | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ neon.hh | 32 ++++++++++++++++++++++++++++++++ simd.hh | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ sse4_1.hh | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+) diff --git a/avx2.hh b/avx2.hh index 7c9808c..afd76dd 100644 --- a/avx2.hh +++ b/avx2.hh @@ -1043,4 +1043,52 @@ inline SIMD vmax(SIMD a, SIMD b) return tmp; } +template <> +inline SIMD vclamp(SIMD x, float a, float b) +{ + SIMD tmp; + tmp.m = _mm256_min_ps(_mm256_max_ps(x.m, _mm256_set1_ps(a)), _mm256_set1_ps(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, double a, double b) +{ + SIMD tmp; + tmp.m = _mm256_min_pd(_mm256_max_pd(x.m, _mm256_set1_pd(a)), _mm256_set1_pd(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int8_t a, int8_t b) +{ + SIMD tmp; + tmp.m = _mm256_min_epi8(_mm256_max_epi8(x.m, _mm256_set1_epi8(a)), _mm256_set1_epi8(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int16_t a, int16_t b) +{ + SIMD tmp; + tmp.m = _mm256_min_epi16(_mm256_max_epi16(x.m, _mm256_set1_epi16(a)), _mm256_set1_epi16(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int32_t a, int32_t b) +{ + SIMD tmp; + tmp.m = _mm256_min_epi32(_mm256_max_epi32(x.m, _mm256_set1_epi32(a)), _mm256_set1_epi32(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int64_t a, int64_t b) +{ + SIMD tmp; + tmp.m = _mm256_min_epi64(_mm256_max_epi64(x.m, _mm256_set1_epi64x(a)), _mm256_set1_epi64x(b)); + return tmp; +} + #endif diff --git a/neon.hh b/neon.hh index ac63a8c..9a48bd5 100644 --- a/neon.hh +++ b/neon.hh @@ -872,4 +872,36 @@ inline SIMD vmax(SIMD a, SIMD b) return tmp; } +template <> +inline SIMD vclamp(SIMD x, float a, float b) +{ + SIMD tmp; + tmp.m = vminq_f32(vmaxq_f32(x.m, vdupq_n_f32(a)), vdupq_n_f32(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int8_t a, int8_t b) +{ + SIMD tmp; + tmp.m = vminq_s8(vmaxq_s8(x.m, vdupq_n_s8(a)), vdupq_n_s8(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int16_t a, int16_t b) +{ + SIMD tmp; + tmp.m = vminq_s16(vmaxq_s16(x.m, vdupq_n_s16(a)), vdupq_n_s16(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int32_t a, int32_t b) +{ + SIMD tmp; + tmp.m = vminq_s32(vmaxq_s32(x.m, vdupq_n_s32(a)), vdupq_n_s32(b)); + return tmp; +} + #endif diff --git a/simd.hh b/simd.hh index ce24d4f..16ae135 100644 --- a/simd.hh +++ b/simd.hh @@ -1010,6 +1010,60 @@ static inline SIMD vmax(SIMD a, SIMD +static inline SIMD vclamp(SIMD x, float a, float b) +{ + SIMD tmp; + for (int i = 0; i < WIDTH; ++i) + tmp.v[i] = std::min(std::max(x.v[i], a), b); + return tmp; +} + +template +static inline SIMD vclamp(SIMD x, double a, double b) +{ + SIMD tmp; + for (int i = 0; i < WIDTH; ++i) + tmp.v[i] = std::min(std::max(x.v[i], a), b); + return tmp; +} + +template +static inline SIMD vclamp(SIMD x, int8_t a, int8_t b) +{ + SIMD tmp; + for (int i = 0; i < WIDTH; ++i) + tmp.v[i] = std::min(std::max(x.v[i], a), b); + return tmp; +} + +template +static inline SIMD vclamp(SIMD x, int16_t a, int16_t b) +{ + SIMD tmp; + for (int i = 0; i < WIDTH; ++i) + tmp.v[i] = std::min(std::max(x.v[i], a), b); + return tmp; +} + +template +static inline SIMD vclamp(SIMD x, int32_t a, int32_t b) +{ + SIMD tmp; + for (int i = 0; i < WIDTH; ++i) + tmp.v[i] = std::min(std::max(x.v[i], a), b); + return tmp; +} + +template +static inline SIMD vclamp(SIMD x, int64_t a, int64_t b) +{ + SIMD tmp; + for (int i = 0; i < WIDTH; ++i) + tmp.v[i] = std::min(std::max(x.v[i], a), b); + return tmp; +} + template static inline SIMD vadd(SIMD a, SIMD b) { diff --git a/sse4_1.hh b/sse4_1.hh index f9fa5a5..18cde4d 100644 --- a/sse4_1.hh +++ b/sse4_1.hh @@ -1028,4 +1028,44 @@ inline SIMD vmax(SIMD a, SIMD b) return tmp; } +template <> +inline SIMD vclamp(SIMD x, float a, float b) +{ + SIMD tmp; + tmp.m = _mm_min_ps(_mm_max_ps(x.m, _mm_set1_ps(a)), _mm_set1_ps(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, double a, double b) +{ + SIMD tmp; + tmp.m = _mm_min_pd(_mm_max_pd(x.m, _mm_set1_pd(a)), _mm_set1_pd(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int8_t a, int8_t b) +{ + SIMD tmp; + tmp.m = _mm_min_epi8(_mm_max_epi8(x.m, _mm_set1_epi8(a)), _mm_set1_epi8(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int16_t a, int16_t b) +{ + SIMD tmp; + tmp.m = _mm_min_epi16(_mm_max_epi16(x.m, _mm_set1_epi16(a)), _mm_set1_epi16(b)); + return tmp; +} + +template <> +inline SIMD vclamp(SIMD x, int32_t a, int32_t b) +{ + SIMD tmp; + tmp.m = _mm_min_epi32(_mm_max_epi32(x.m, _mm_set1_epi32(a)), _mm_set1_epi32(b)); + return tmp; +} + #endif