added vclamp()

This commit is contained in:
Ahmet Inan 2018-12-12 22:51:28 +01:00
commit e51425e121
4 changed files with 174 additions and 0 deletions

48
avx2.hh
View file

@ -1043,4 +1043,52 @@ inline SIMD<int64_t, 4> vmax(SIMD<int64_t, 4> a, SIMD<int64_t, 4> b)
return tmp;
}
template <>
inline SIMD<float, 8> vclamp(SIMD<float, 8> x, float a, float b)
{
SIMD<float, 8> tmp;
tmp.m = _mm256_min_ps(_mm256_max_ps(x.m, _mm256_set1_ps(a)), _mm256_set1_ps(b));
return tmp;
}
template <>
inline SIMD<double, 4> vclamp(SIMD<double, 4> x, double a, double b)
{
SIMD<double, 4> tmp;
tmp.m = _mm256_min_pd(_mm256_max_pd(x.m, _mm256_set1_pd(a)), _mm256_set1_pd(b));
return tmp;
}
template <>
inline SIMD<int8_t, 32> vclamp(SIMD<int8_t, 32> x, int8_t a, int8_t b)
{
SIMD<int8_t, 32> tmp;
tmp.m = _mm256_min_epi8(_mm256_max_epi8(x.m, _mm256_set1_epi8(a)), _mm256_set1_epi8(b));
return tmp;
}
template <>
inline SIMD<int16_t, 16> vclamp(SIMD<int16_t, 16> x, int16_t a, int16_t b)
{
SIMD<int16_t, 16> tmp;
tmp.m = _mm256_min_epi16(_mm256_max_epi16(x.m, _mm256_set1_epi16(a)), _mm256_set1_epi16(b));
return tmp;
}
template <>
inline SIMD<int32_t, 8> vclamp(SIMD<int32_t, 8> x, int32_t a, int32_t b)
{
SIMD<int32_t, 8> tmp;
tmp.m = _mm256_min_epi32(_mm256_max_epi32(x.m, _mm256_set1_epi32(a)), _mm256_set1_epi32(b));
return tmp;
}
template <>
inline SIMD<int64_t, 4> vclamp(SIMD<int64_t, 4> x, int64_t a, int64_t b)
{
SIMD<int64_t, 4> tmp;
tmp.m = _mm256_min_epi64(_mm256_max_epi64(x.m, _mm256_set1_epi64x(a)), _mm256_set1_epi64x(b));
return tmp;
}
#endif

32
neon.hh
View file

@ -872,4 +872,36 @@ inline SIMD<int32_t, 4> vmax(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
return tmp;
}
template <>
inline SIMD<float, 4> vclamp(SIMD<float, 4> x, float a, float b)
{
SIMD<float, 4> tmp;
tmp.m = vminq_f32(vmaxq_f32(x.m, vdupq_n_f32(a)), vdupq_n_f32(b));
return tmp;
}
template <>
inline SIMD<int8_t, 16> vclamp(SIMD<int8_t, 16> x, int8_t a, int8_t b)
{
SIMD<int8_t, 16> tmp;
tmp.m = vminq_s8(vmaxq_s8(x.m, vdupq_n_s8(a)), vdupq_n_s8(b));
return tmp;
}
template <>
inline SIMD<int16_t, 8> vclamp(SIMD<int16_t, 8> x, int16_t a, int16_t b)
{
SIMD<int16_t, 8> tmp;
tmp.m = vminq_s16(vmaxq_s16(x.m, vdupq_n_s16(a)), vdupq_n_s16(b));
return tmp;
}
template <>
inline SIMD<int32_t, 4> vclamp(SIMD<int32_t, 4> x, int32_t a, int32_t b)
{
SIMD<int32_t, 4> tmp;
tmp.m = vminq_s32(vmaxq_s32(x.m, vdupq_n_s32(a)), vdupq_n_s32(b));
return tmp;
}
#endif

54
simd.hh
View file

@ -1010,6 +1010,60 @@ static inline SIMD<int64_t, WIDTH> vmax(SIMD<int64_t, WIDTH> a, SIMD<int64_t, WI
return tmp;
}
template <int WIDTH>
static inline SIMD<float, WIDTH> vclamp(SIMD<float, WIDTH> x, float a, float b)
{
SIMD<float, WIDTH> tmp;
for (int i = 0; i < WIDTH; ++i)
tmp.v[i] = std::min(std::max(x.v[i], a), b);
return tmp;
}
template <int WIDTH>
static inline SIMD<double, WIDTH> vclamp(SIMD<double, WIDTH> x, double a, double b)
{
SIMD<double, WIDTH> tmp;
for (int i = 0; i < WIDTH; ++i)
tmp.v[i] = std::min(std::max(x.v[i], a), b);
return tmp;
}
template <int WIDTH>
static inline SIMD<int8_t, WIDTH> vclamp(SIMD<int8_t, WIDTH> x, int8_t a, int8_t b)
{
SIMD<int8_t, WIDTH> tmp;
for (int i = 0; i < WIDTH; ++i)
tmp.v[i] = std::min(std::max(x.v[i], a), b);
return tmp;
}
template <int WIDTH>
static inline SIMD<int16_t, WIDTH> vclamp(SIMD<int16_t, WIDTH> x, int16_t a, int16_t b)
{
SIMD<int16_t, WIDTH> tmp;
for (int i = 0; i < WIDTH; ++i)
tmp.v[i] = std::min(std::max(x.v[i], a), b);
return tmp;
}
template <int WIDTH>
static inline SIMD<int32_t, WIDTH> vclamp(SIMD<int32_t, WIDTH> x, int32_t a, int32_t b)
{
SIMD<int32_t, WIDTH> tmp;
for (int i = 0; i < WIDTH; ++i)
tmp.v[i] = std::min(std::max(x.v[i], a), b);
return tmp;
}
template <int WIDTH>
static inline SIMD<int64_t, WIDTH> vclamp(SIMD<int64_t, WIDTH> x, int64_t a, int64_t b)
{
SIMD<int64_t, WIDTH> tmp;
for (int i = 0; i < WIDTH; ++i)
tmp.v[i] = std::min(std::max(x.v[i], a), b);
return tmp;
}
template <int WIDTH>
static inline SIMD<float, WIDTH> vadd(SIMD<float, WIDTH> a, SIMD<float, WIDTH> b)
{

View file

@ -1028,4 +1028,44 @@ inline SIMD<int32_t, 4> vmax(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
return tmp;
}
template <>
inline SIMD<float, 4> vclamp(SIMD<float, 4> x, float a, float b)
{
SIMD<float, 4> tmp;
tmp.m = _mm_min_ps(_mm_max_ps(x.m, _mm_set1_ps(a)), _mm_set1_ps(b));
return tmp;
}
template <>
inline SIMD<double, 2> vclamp(SIMD<double, 2> x, double a, double b)
{
SIMD<double, 2> tmp;
tmp.m = _mm_min_pd(_mm_max_pd(x.m, _mm_set1_pd(a)), _mm_set1_pd(b));
return tmp;
}
template <>
inline SIMD<int8_t, 16> vclamp(SIMD<int8_t, 16> x, int8_t a, int8_t b)
{
SIMD<int8_t, 16> tmp;
tmp.m = _mm_min_epi8(_mm_max_epi8(x.m, _mm_set1_epi8(a)), _mm_set1_epi8(b));
return tmp;
}
template <>
inline SIMD<int16_t, 8> vclamp(SIMD<int16_t, 8> x, int16_t a, int16_t b)
{
SIMD<int16_t, 8> tmp;
tmp.m = _mm_min_epi16(_mm_max_epi16(x.m, _mm_set1_epi16(a)), _mm_set1_epi16(b));
return tmp;
}
template <>
inline SIMD<int32_t, 4> vclamp(SIMD<int32_t, 4> x, int32_t a, int32_t b)
{
SIMD<int32_t, 4> tmp;
tmp.m = _mm_min_epi32(_mm_max_epi32(x.m, _mm_set1_epi32(a)), _mm_set1_epi32(b));
return tmp;
}
#endif