mirror of
https://github.com/aicodix/code.git
synced 2026-04-27 22:35:44 +00:00
added vclez()
This commit is contained in:
parent
4a3d113433
commit
72d7c6d57f
4 changed files with 198 additions and 0 deletions
56
avx2.hh
56
avx2.hh
|
|
@ -891,6 +891,62 @@ inline SIMD<uint64_t, 4> vcltz(SIMD<int64_t, 4> a)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint32_t, 8> vclez(SIMD<float, 8> a)
|
||||
{
|
||||
SIMD<uint32_t, 8> tmp;
|
||||
tmp.m = (__m256i)_mm256_cmp_ps(a.m, _mm256_setzero_ps(), _CMP_LE_OQ);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint64_t, 4> vclez(SIMD<double, 4> a)
|
||||
{
|
||||
SIMD<uint64_t, 4> tmp;
|
||||
tmp.m = (__m256i)_mm256_cmp_pd(a.m, _mm256_setzero_pd(), _CMP_LE_OQ);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint8_t, 32> vclez(SIMD<int8_t, 32> a)
|
||||
{
|
||||
SIMD<uint8_t, 32> tmp;
|
||||
tmp.m = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi8(a.m, _mm256_setzero_si256()),
|
||||
_mm256_cmpgt_epi8(_mm256_setzero_si256(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint16_t, 16> vclez(SIMD<int16_t, 16> a)
|
||||
{
|
||||
SIMD<uint16_t, 16> tmp;
|
||||
tmp.m = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi16(a.m, _mm256_setzero_si256()),
|
||||
_mm256_cmpgt_epi16(_mm256_setzero_si256(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint32_t, 8> vclez(SIMD<int32_t, 8> a)
|
||||
{
|
||||
SIMD<uint32_t, 8> tmp;
|
||||
tmp.m = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi32(a.m, _mm256_setzero_si256()),
|
||||
_mm256_cmpgt_epi32(_mm256_setzero_si256(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint64_t, 4> vclez(SIMD<int64_t, 4> a)
|
||||
{
|
||||
SIMD<uint64_t, 4> tmp;
|
||||
tmp.m = _mm256_or_si256(
|
||||
_mm256_cmpeq_epi64(a.m, _mm256_setzero_si256()),
|
||||
_mm256_cmpgt_epi64(_mm256_setzero_si256(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<float, 8> vmin(SIMD<float, 8> a, SIMD<float, 8> b)
|
||||
{
|
||||
|
|
|
|||
32
neon.hh
32
neon.hh
|
|
@ -776,6 +776,38 @@ inline SIMD<uint32_t, 4> vcltz(SIMD<int32_t, 4> a)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint32_t, 4> vclez(SIMD<float, 4> a)
|
||||
{
|
||||
SIMD<uint32_t, 4> tmp;
|
||||
tmp.m = vcleq_f32(a.m, vdupq_n_f32(0.f));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint8_t, 16> vclez(SIMD<int8_t, 16> a)
|
||||
{
|
||||
SIMD<uint8_t, 16> tmp;
|
||||
tmp.m = vcleq_s8(a.m, vdupq_n_s8(0));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint16_t, 8> vclez(SIMD<int16_t, 8> a)
|
||||
{
|
||||
SIMD<uint16_t, 8> tmp;
|
||||
tmp.m = vcleq_s16(a.m, vdupq_n_s16(0));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint32_t, 4> vclez(SIMD<int32_t, 4> a)
|
||||
{
|
||||
SIMD<uint32_t, 4> tmp;
|
||||
tmp.m = vcleq_s32(a.m, vdupq_n_s32(0));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<float, 4> vmin(SIMD<float, 4> a, SIMD<float, 4> b)
|
||||
{
|
||||
|
|
|
|||
54
simd.hh
54
simd.hh
|
|
@ -704,6 +704,60 @@ static inline SIMD<uint64_t, WIDTH> vcltz(SIMD<int64_t, WIDTH> a)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint32_t, WIDTH> vclez(SIMD<float, WIDTH> a)
|
||||
{
|
||||
SIMD<uint32_t, WIDTH> tmp;
|
||||
for (int i = 0; i <= WIDTH; ++i)
|
||||
tmp.v[i] = -(a.v[i] <= 0.f);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint64_t, WIDTH> vclez(SIMD<double, WIDTH> a)
|
||||
{
|
||||
SIMD<uint64_t, WIDTH> tmp;
|
||||
for (int i = 0; i <= WIDTH; ++i)
|
||||
tmp.v[i] = -(a.v[i] <= 0.);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint8_t, WIDTH> vclez(SIMD<int8_t, WIDTH> a)
|
||||
{
|
||||
SIMD<uint8_t, WIDTH> tmp;
|
||||
for (int i = 0; i <= WIDTH; ++i)
|
||||
tmp.v[i] = -(a.v[i] <= 0);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint16_t, WIDTH> vclez(SIMD<int16_t, WIDTH> a)
|
||||
{
|
||||
SIMD<uint16_t, WIDTH> tmp;
|
||||
for (int i = 0; i <= WIDTH; ++i)
|
||||
tmp.v[i] = -(a.v[i] <= 0);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint32_t, WIDTH> vclez(SIMD<int32_t, WIDTH> a)
|
||||
{
|
||||
SIMD<uint32_t, WIDTH> tmp;
|
||||
for (int i = 0; i <= WIDTH; ++i)
|
||||
tmp.v[i] = -(a.v[i] <= 0);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint64_t, WIDTH> vclez(SIMD<int64_t, WIDTH> a)
|
||||
{
|
||||
SIMD<uint64_t, WIDTH> tmp;
|
||||
for (int i = 0; i <= WIDTH; ++i)
|
||||
tmp.v[i] = -(a.v[i] <= 0);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <int WIDTH>
|
||||
static inline SIMD<uint32_t, WIDTH> vcgt(SIMD<float, WIDTH> a, SIMD<float, WIDTH> b)
|
||||
{
|
||||
|
|
|
|||
56
sse4_1.hh
56
sse4_1.hh
|
|
@ -892,6 +892,62 @@ inline SIMD<uint64_t, 2> vcltz(SIMD<int64_t, 2> a)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint32_t, 4> vclez(SIMD<float, 4> a)
|
||||
{
|
||||
SIMD<uint32_t, 4> tmp;
|
||||
tmp.m = (__m128i)_mm_cmple_ps(a.m, _mm_setzero_ps());
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint64_t, 2> vclez(SIMD<double, 2> a)
|
||||
{
|
||||
SIMD<uint64_t, 2> tmp;
|
||||
tmp.m = (__m128i)_mm_cmple_pd(a.m, _mm_setzero_pd());
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint8_t, 16> vclez(SIMD<int8_t, 16> a)
|
||||
{
|
||||
SIMD<uint8_t, 16> tmp;
|
||||
tmp.m = _mm_or_si128(
|
||||
_mm_cmpeq_epi8(a.m, _mm_setzero_si128()),
|
||||
_mm_cmpgt_epi8(_mm_setzero_si128(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint16_t, 8> vclez(SIMD<int16_t, 8> a)
|
||||
{
|
||||
SIMD<uint16_t, 8> tmp;
|
||||
tmp.m = _mm_or_si128(
|
||||
_mm_cmpeq_epi16(a.m, _mm_setzero_si128()),
|
||||
_mm_cmpgt_epi16(_mm_setzero_si128(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint32_t, 4> vclez(SIMD<int32_t, 4> a)
|
||||
{
|
||||
SIMD<uint32_t, 4> tmp;
|
||||
tmp.m = _mm_or_si128(
|
||||
_mm_cmpeq_epi32(a.m, _mm_setzero_si128()),
|
||||
_mm_cmpgt_epi32(_mm_setzero_si128(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<uint64_t, 2> vclez(SIMD<int64_t, 2> a)
|
||||
{
|
||||
SIMD<uint64_t, 2> tmp;
|
||||
tmp.m = _mm_or_si128(
|
||||
_mm_cmpeq_epi64(a.m, _mm_setzero_si128()),
|
||||
_mm_cmpgt_epi64(_mm_setzero_si128(), a.m));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline SIMD<float, 4> vmin(SIMD<float, 4> a, SIMD<float, 4> b)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue