diff --git a/neon_double.hh b/neon_double.hh index 35ea118..0d5ca33 100644 --- a/neon_double.hh +++ b/neon_double.hh @@ -1058,7 +1058,7 @@ inline SIMD vshuf(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < 2; ++i) - tmp.m.val[i] = vorrq_u8(vqtbl1q_u8(a.m.val[0], b.m.val[i]), vqtbl1q_u8(a.m.val[1], vsubq_u8(b.m.val[i], vdupq_n_u8(16)))); + tmp.m.val[i] = vqtbl2q_u8(a.m, b.m.val[i]); return tmp; } @@ -1067,7 +1067,7 @@ inline SIMD vshuf(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < 2; ++i) - tmp.m.val[i] = vorrq_s8(vqtbl1q_s8(a.m.val[0], b.m.val[i]), vqtbl1q_s8(a.m.val[1], vsubq_u8(b.m.val[i], vdupq_n_u8(16)))); + tmp.m.val[i] = vqtbl2q_s8(a.m, b.m.val[i]); return tmp; } #endif diff --git a/neon_quadruple.hh b/neon_quadruple.hh index 69bde6f..e553dee 100644 --- a/neon_quadruple.hh +++ b/neon_quadruple.hh @@ -1058,10 +1058,7 @@ inline SIMD vshuf(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < 4; ++i) - tmp.m.val[i] = vorrq_u8(vorrq_u8(vorrq_u8(vqtbl1q_u8(a.m.val[0], b.m.val[i]), - vqtbl1q_u8(a.m.val[1], vsubq_u8(b.m.val[i], vdupq_n_u8(16)))), - vqtbl1q_u8(a.m.val[2], vsubq_u8(b.m.val[i], vdupq_n_u8(32)))), - vqtbl1q_u8(a.m.val[3], vsubq_u8(b.m.val[i], vdupq_n_u8(48)))); + tmp.m.val[i] = vqtbl4q_u8(a.m, b.m.val[i]); return tmp; } @@ -1070,10 +1067,7 @@ inline SIMD vshuf(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < 4; ++i) - tmp.m.val[i] = vorrq_s8(vorrq_s8(vorrq_s8(vqtbl1q_s8(a.m.val[0], b.m.val[i]), - vqtbl1q_s8(a.m.val[1], vsubq_u8(b.m.val[i], vdupq_n_u8(16)))), - vqtbl1q_s8(a.m.val[2], vsubq_u8(b.m.val[i], vdupq_n_u8(32)))), - vqtbl1q_s8(a.m.val[3], vsubq_u8(b.m.val[i], vdupq_n_u8(48)))); + tmp.m.val[i] = vqtbl4q_s8(a.m, b.m.val[i]); return tmp; } #endif diff --git a/neon_triple.hh b/neon_triple.hh index 4dc6e1d..aafe12f 100644 --- a/neon_triple.hh +++ b/neon_triple.hh @@ -1058,9 +1058,7 @@ inline SIMD vshuf(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < 3; ++i) - tmp.m.val[i] = vorrq_u8(vorrq_u8(vqtbl1q_u8(a.m.val[0], b.m.val[i]), - vqtbl1q_u8(a.m.val[1], vsubq_u8(b.m.val[i], vdupq_n_u8(16)))), - vqtbl1q_u8(a.m.val[2], vsubq_u8(b.m.val[i], vdupq_n_u8(32)))); + tmp.m.val[i] = vqtbl3q_u8(a.m, b.m.val[i]); return tmp; } @@ -1069,9 +1067,7 @@ inline SIMD vshuf(SIMD a, SIMD b) { SIMD tmp; for (int i = 0; i < 3; ++i) - tmp.m.val[i] = vorrq_s8(vorrq_s8(vqtbl1q_s8(a.m.val[0], b.m.val[i]), - vqtbl1q_s8(a.m.val[1], vsubq_u8(b.m.val[i], vdupq_n_u8(16)))), - vqtbl1q_s8(a.m.val[2], vsubq_u8(b.m.val[i], vdupq_n_u8(32)))); + tmp.m.val[i] = vqtbl3q_s8(a.m, b.m.val[i]); return tmp; } #endif