From e29e3a161d0d07c4763f72490a599a9538fab385 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Mon, 12 Feb 2024 20:26:21 +0100 Subject: [PATCH] added doubled vshuf for aarch64 --- neon_double.hh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/neon_double.hh b/neon_double.hh index a34c40c..e559dce 100644 --- a/neon_double.hh +++ b/neon_double.hh @@ -1061,3 +1061,23 @@ inline SIMD vclamp(SIMD x, int32_t a, int32_t b) return tmp; } +#ifdef __aarch64__ +template <> +inline SIMD vshuf(SIMD a, SIMD b) +{ + SIMD tmp; + tmp.m[0] = vorrq_u8(vqtbl1q_u8(a.m[0], b.m[0]), vqtbl1q_u8(a.m[1], vsubq_u8(b.m[0], vdupq_n_u8(16)))); + tmp.m[1] = vorrq_u8(vqtbl1q_u8(a.m[0], b.m[1]), vqtbl1q_u8(a.m[1], vsubq_u8(b.m[1], vdupq_n_u8(16)))); + return tmp; +} + +template <> +inline SIMD vshuf(SIMD a, SIMD b) +{ + SIMD tmp; + tmp.m[0] = vorrq_s8(vqtbl1q_s8(a.m[0], b.m[0]), vqtbl1q_s8(a.m[1], vsubq_u8(b.m[0], vdupq_n_u8(16)))); + tmp.m[1] = vorrq_s8(vqtbl1q_s8(a.m[0], b.m[1]), vqtbl1q_s8(a.m[1], vsubq_u8(b.m[1], vdupq_n_u8(16)))); + return tmp; +} +#endif +