mirror of
https://github.com/aicodix/code.git
synced 2026-04-27 14:30:36 +00:00
added vclamp()
This commit is contained in:
parent
09945a18b5
commit
e51425e121
4 changed files with 174 additions and 0 deletions
48
avx2.hh
48
avx2.hh
|
|
@ -1043,4 +1043,52 @@ inline SIMD<int64_t, 4> vmax(SIMD<int64_t, 4> a, SIMD<int64_t, 4> b)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<float, 8> vclamp(SIMD<float, 8> x, float a, float b)
|
||||
{
|
||||
SIMD<float, 8> tmp;
|
||||
tmp.m = _mm256_min_ps(_mm256_max_ps(x.m, _mm256_set1_ps(a)), _mm256_set1_ps(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<double, 4> vclamp(SIMD<double, 4> x, double a, double b)
|
||||
{
|
||||
SIMD<double, 4> tmp;
|
||||
tmp.m = _mm256_min_pd(_mm256_max_pd(x.m, _mm256_set1_pd(a)), _mm256_set1_pd(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int8_t, 32> vclamp(SIMD<int8_t, 32> x, int8_t a, int8_t b)
|
||||
{
|
||||
SIMD<int8_t, 32> tmp;
|
||||
tmp.m = _mm256_min_epi8(_mm256_max_epi8(x.m, _mm256_set1_epi8(a)), _mm256_set1_epi8(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int16_t, 16> vclamp(SIMD<int16_t, 16> x, int16_t a, int16_t b)
|
||||
{
|
||||
SIMD<int16_t, 16> tmp;
|
||||
tmp.m = _mm256_min_epi16(_mm256_max_epi16(x.m, _mm256_set1_epi16(a)), _mm256_set1_epi16(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int32_t, 8> vclamp(SIMD<int32_t, 8> x, int32_t a, int32_t b)
|
||||
{
|
||||
SIMD<int32_t, 8> tmp;
|
||||
tmp.m = _mm256_min_epi32(_mm256_max_epi32(x.m, _mm256_set1_epi32(a)), _mm256_set1_epi32(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int64_t, 4> vclamp(SIMD<int64_t, 4> x, int64_t a, int64_t b)
|
||||
{
|
||||
SIMD<int64_t, 4> tmp;
|
||||
tmp.m = _mm256_min_epi64(_mm256_max_epi64(x.m, _mm256_set1_epi64x(a)), _mm256_set1_epi64x(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
32
neon.hh
32
neon.hh
|
|
@ -872,4 +872,36 @@ inline SIMD<int32_t, 4> vmax(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<float, 4> vclamp(SIMD<float, 4> x, float a, float b)
|
||||
{
|
||||
SIMD<float, 4> tmp;
|
||||
tmp.m = vminq_f32(vmaxq_f32(x.m, vdupq_n_f32(a)), vdupq_n_f32(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int8_t, 16> vclamp(SIMD<int8_t, 16> x, int8_t a, int8_t b)
|
||||
{
|
||||
SIMD<int8_t, 16> tmp;
|
||||
tmp.m = vminq_s8(vmaxq_s8(x.m, vdupq_n_s8(a)), vdupq_n_s8(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int16_t, 8> vclamp(SIMD<int16_t, 8> x, int16_t a, int16_t b)
|
||||
{
|
||||
SIMD<int16_t, 8> tmp;
|
||||
tmp.m = vminq_s16(vmaxq_s16(x.m, vdupq_n_s16(a)), vdupq_n_s16(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int32_t, 4> vclamp(SIMD<int32_t, 4> x, int32_t a, int32_t b)
|
||||
{
|
||||
SIMD<int32_t, 4> tmp;
|
||||
tmp.m = vminq_s32(vmaxq_s32(x.m, vdupq_n_s32(a)), vdupq_n_s32(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
54
simd.hh
54
simd.hh
|
|
@ -1010,6 +1010,60 @@ static inline SIMD<int64_t, WIDTH> vmax(SIMD<int64_t, WIDTH> a, SIMD<int64_t, WI
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<float, WIDTH> vclamp(SIMD<float, WIDTH> x, float a, float b)
|
||||
{
|
||||
SIMD<float, WIDTH> tmp;
|
||||
for (int i = 0; i < WIDTH; ++i)
|
||||
tmp.v[i] = std::min(std::max(x.v[i], a), b);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<double, WIDTH> vclamp(SIMD<double, WIDTH> x, double a, double b)
|
||||
{
|
||||
SIMD<double, WIDTH> tmp;
|
||||
for (int i = 0; i < WIDTH; ++i)
|
||||
tmp.v[i] = std::min(std::max(x.v[i], a), b);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<int8_t, WIDTH> vclamp(SIMD<int8_t, WIDTH> x, int8_t a, int8_t b)
|
||||
{
|
||||
SIMD<int8_t, WIDTH> tmp;
|
||||
for (int i = 0; i < WIDTH; ++i)
|
||||
tmp.v[i] = std::min(std::max(x.v[i], a), b);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<int16_t, WIDTH> vclamp(SIMD<int16_t, WIDTH> x, int16_t a, int16_t b)
|
||||
{
|
||||
SIMD<int16_t, WIDTH> tmp;
|
||||
for (int i = 0; i < WIDTH; ++i)
|
||||
tmp.v[i] = std::min(std::max(x.v[i], a), b);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<int32_t, WIDTH> vclamp(SIMD<int32_t, WIDTH> x, int32_t a, int32_t b)
|
||||
{
|
||||
SIMD<int32_t, WIDTH> tmp;
|
||||
for (int i = 0; i < WIDTH; ++i)
|
||||
tmp.v[i] = std::min(std::max(x.v[i], a), b);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<int64_t, WIDTH> vclamp(SIMD<int64_t, WIDTH> x, int64_t a, int64_t b)
|
||||
{
|
||||
SIMD<int64_t, WIDTH> tmp;
|
||||
for (int i = 0; i < WIDTH; ++i)
|
||||
tmp.v[i] = std::min(std::max(x.v[i], a), b);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<float, WIDTH> vadd(SIMD<float, WIDTH> a, SIMD<float, WIDTH> b)
|
||||
{
|
||||
|
|
|
|||
40
sse4_1.hh
40
sse4_1.hh
|
|
@ -1028,4 +1028,44 @@ inline SIMD<int32_t, 4> vmax(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<float, 4> vclamp(SIMD<float, 4> x, float a, float b)
|
||||
{
|
||||
SIMD<float, 4> tmp;
|
||||
tmp.m = _mm_min_ps(_mm_max_ps(x.m, _mm_set1_ps(a)), _mm_set1_ps(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<double, 2> vclamp(SIMD<double, 2> x, double a, double b)
|
||||
{
|
||||
SIMD<double, 2> tmp;
|
||||
tmp.m = _mm_min_pd(_mm_max_pd(x.m, _mm_set1_pd(a)), _mm_set1_pd(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int8_t, 16> vclamp(SIMD<int8_t, 16> x, int8_t a, int8_t b)
|
||||
{
|
||||
SIMD<int8_t, 16> tmp;
|
||||
tmp.m = _mm_min_epi8(_mm_max_epi8(x.m, _mm_set1_epi8(a)), _mm_set1_epi8(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int16_t, 8> vclamp(SIMD<int16_t, 8> x, int16_t a, int16_t b)
|
||||
{
|
||||
SIMD<int16_t, 8> tmp;
|
||||
tmp.m = _mm_min_epi16(_mm_max_epi16(x.m, _mm_set1_epi16(a)), _mm_set1_epi16(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<int32_t, 4> vclamp(SIMD<int32_t, 4> x, int32_t a, int32_t b)
|
||||
{
|
||||
SIMD<int32_t, 4> tmp;
|
||||
tmp.m = _mm_min_epi32(_mm_max_epi32(x.m, _mm_set1_epi32(a)), _mm_set1_epi32(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue