added vclez()

This commit is contained in:
Ahmet Inan 2018-12-12 12:03:27 +01:00
commit 72d7c6d57f
4 changed files with 198 additions and 0 deletions

56
avx2.hh
View file

@ -891,6 +891,62 @@ inline SIMD<uint64_t, 4> vcltz(SIMD<int64_t, 4> a)
return tmp;
}
template <>
inline SIMD<uint32_t, 8> vclez(SIMD<float, 8> a)
{
SIMD<uint32_t, 8> tmp;
tmp.m = (__m256i)_mm256_cmp_ps(a.m, _mm256_setzero_ps(), _CMP_LE_OQ);
return tmp;
}
template <>
inline SIMD<uint64_t, 4> vclez(SIMD<double, 4> a)
{
SIMD<uint64_t, 4> tmp;
tmp.m = (__m256i)_mm256_cmp_pd(a.m, _mm256_setzero_pd(), _CMP_LE_OQ);
return tmp;
}
template <>
inline SIMD<uint8_t, 32> vclez(SIMD<int8_t, 32> a)
{
SIMD<uint8_t, 32> tmp;
tmp.m = _mm256_or_si256(
_mm256_cmpeq_epi8(a.m, _mm256_setzero_si256()),
_mm256_cmpgt_epi8(_mm256_setzero_si256(), a.m));
return tmp;
}
template <>
inline SIMD<uint16_t, 16> vclez(SIMD<int16_t, 16> a)
{
SIMD<uint16_t, 16> tmp;
tmp.m = _mm256_or_si256(
_mm256_cmpeq_epi16(a.m, _mm256_setzero_si256()),
_mm256_cmpgt_epi16(_mm256_setzero_si256(), a.m));
return tmp;
}
template <>
inline SIMD<uint32_t, 8> vclez(SIMD<int32_t, 8> a)
{
SIMD<uint32_t, 8> tmp;
tmp.m = _mm256_or_si256(
_mm256_cmpeq_epi32(a.m, _mm256_setzero_si256()),
_mm256_cmpgt_epi32(_mm256_setzero_si256(), a.m));
return tmp;
}
template <>
inline SIMD<uint64_t, 4> vclez(SIMD<int64_t, 4> a)
{
SIMD<uint64_t, 4> tmp;
tmp.m = _mm256_or_si256(
_mm256_cmpeq_epi64(a.m, _mm256_setzero_si256()),
_mm256_cmpgt_epi64(_mm256_setzero_si256(), a.m));
return tmp;
}
template <>
inline SIMD<float, 8> vmin(SIMD<float, 8> a, SIMD<float, 8> b)
{

32
neon.hh
View file

@ -776,6 +776,38 @@ inline SIMD<uint32_t, 4> vcltz(SIMD<int32_t, 4> a)
return tmp;
}
template <>
inline SIMD<uint32_t, 4> vclez(SIMD<float, 4> a)
{
SIMD<uint32_t, 4> tmp;
tmp.m = vcleq_f32(a.m, vdupq_n_f32(0.f));
return tmp;
}
template <>
inline SIMD<uint8_t, 16> vclez(SIMD<int8_t, 16> a)
{
SIMD<uint8_t, 16> tmp;
tmp.m = vcleq_s8(a.m, vdupq_n_s8(0));
return tmp;
}
template <>
inline SIMD<uint16_t, 8> vclez(SIMD<int16_t, 8> a)
{
SIMD<uint16_t, 8> tmp;
tmp.m = vcleq_s16(a.m, vdupq_n_s16(0));
return tmp;
}
template <>
inline SIMD<uint32_t, 4> vclez(SIMD<int32_t, 4> a)
{
SIMD<uint32_t, 4> tmp;
tmp.m = vcleq_s32(a.m, vdupq_n_s32(0));
return tmp;
}
template <>
inline SIMD<float, 4> vmin(SIMD<float, 4> a, SIMD<float, 4> b)
{

54
simd.hh
View file

@ -704,6 +704,60 @@ static inline SIMD<uint64_t, WIDTH> vcltz(SIMD<int64_t, WIDTH> a)
return tmp;
}
template <int WIDTH>
static inline SIMD<uint32_t, WIDTH> vclez(SIMD<float, WIDTH> a)
{
SIMD<uint32_t, WIDTH> tmp;
for (int i = 0; i <= WIDTH; ++i)
tmp.v[i] = -(a.v[i] <= 0.f);
return tmp;
}
template <int WIDTH>
static inline SIMD<uint64_t, WIDTH> vclez(SIMD<double, WIDTH> a)
{
SIMD<uint64_t, WIDTH> tmp;
for (int i = 0; i <= WIDTH; ++i)
tmp.v[i] = -(a.v[i] <= 0.);
return tmp;
}
template <int WIDTH>
static inline SIMD<uint8_t, WIDTH> vclez(SIMD<int8_t, WIDTH> a)
{
SIMD<uint8_t, WIDTH> tmp;
for (int i = 0; i <= WIDTH; ++i)
tmp.v[i] = -(a.v[i] <= 0);
return tmp;
}
template <int WIDTH>
static inline SIMD<uint16_t, WIDTH> vclez(SIMD<int16_t, WIDTH> a)
{
SIMD<uint16_t, WIDTH> tmp;
for (int i = 0; i <= WIDTH; ++i)
tmp.v[i] = -(a.v[i] <= 0);
return tmp;
}
template <int WIDTH>
static inline SIMD<uint32_t, WIDTH> vclez(SIMD<int32_t, WIDTH> a)
{
SIMD<uint32_t, WIDTH> tmp;
for (int i = 0; i <= WIDTH; ++i)
tmp.v[i] = -(a.v[i] <= 0);
return tmp;
}
template <int WIDTH>
static inline SIMD<uint64_t, WIDTH> vclez(SIMD<int64_t, WIDTH> a)
{
SIMD<uint64_t, WIDTH> tmp;
for (int i = 0; i <= WIDTH; ++i)
tmp.v[i] = -(a.v[i] <= 0);
return tmp;
}
template <int WIDTH>
static inline SIMD<uint32_t, WIDTH> vcgt(SIMD<float, WIDTH> a, SIMD<float, WIDTH> b)
{

View file

@ -892,6 +892,62 @@ inline SIMD<uint64_t, 2> vcltz(SIMD<int64_t, 2> a)
return tmp;
}
template <>
inline SIMD<uint32_t, 4> vclez(SIMD<float, 4> a)
{
SIMD<uint32_t, 4> tmp;
tmp.m = (__m128i)_mm_cmple_ps(a.m, _mm_setzero_ps());
return tmp;
}
template <>
inline SIMD<uint64_t, 2> vclez(SIMD<double, 2> a)
{
SIMD<uint64_t, 2> tmp;
tmp.m = (__m128i)_mm_cmple_pd(a.m, _mm_setzero_pd());
return tmp;
}
template <>
inline SIMD<uint8_t, 16> vclez(SIMD<int8_t, 16> a)
{
SIMD<uint8_t, 16> tmp;
tmp.m = _mm_or_si128(
_mm_cmpeq_epi8(a.m, _mm_setzero_si128()),
_mm_cmpgt_epi8(_mm_setzero_si128(), a.m));
return tmp;
}
template <>
inline SIMD<uint16_t, 8> vclez(SIMD<int16_t, 8> a)
{
SIMD<uint16_t, 8> tmp;
tmp.m = _mm_or_si128(
_mm_cmpeq_epi16(a.m, _mm_setzero_si128()),
_mm_cmpgt_epi16(_mm_setzero_si128(), a.m));
return tmp;
}
template <>
inline SIMD<uint32_t, 4> vclez(SIMD<int32_t, 4> a)
{
SIMD<uint32_t, 4> tmp;
tmp.m = _mm_or_si128(
_mm_cmpeq_epi32(a.m, _mm_setzero_si128()),
_mm_cmpgt_epi32(_mm_setzero_si128(), a.m));
return tmp;
}
template <>
inline SIMD<uint64_t, 2> vclez(SIMD<int64_t, 2> a)
{
SIMD<uint64_t, 2> tmp;
tmp.m = _mm_or_si128(
_mm_cmpeq_epi64(a.m, _mm_setzero_si128()),
_mm_cmpgt_epi64(_mm_setzero_si128(), a.m));
return tmp;
}
template <>
inline SIMD<float, 4> vmin(SIMD<float, 4> a, SIMD<float, 4> b)
{