From fe25fb2a696b3160cbd2230c10c1ef22e6d5c648 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Mon, 5 Jun 2023 12:18:27 +0200 Subject: [PATCH] flatten hot functions --- cauchy_reed_solomon_erasure_coding.hh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cauchy_reed_solomon_erasure_coding.hh b/cauchy_reed_solomon_erasure_coding.hh index a02163c..e598c8f 100644 --- a/cauchy_reed_solomon_erasure_coding.hh +++ b/cauchy_reed_solomon_erasure_coding.hh @@ -27,12 +27,14 @@ struct CauchyReedSolomonErasureCoding typedef typename GF::IndexType IndexType; IndexType row_num, row_den; // $a_{ij} = \frac{1}{x_i + y_j}$ + __attribute__((flatten)) IndexType cauchy_matrix(int i, int j) { ValueType row(i), col(ValueType::N - j); return rcp(index(row + col)); } // $b_{ij} = \frac{\prod_{k=1}^{n}{(x_j + y_k)(x_k + y_i)}}{(x_j + y_i)\prod_{k \ne j}^{n}{(x_j - x_k)}\prod_{k \ne i}^{n}{(y_i - y_k)}}$ + __attribute__((flatten)) IndexType inverse_cauchy_matrix(const ValueType *rows, int i, int j, int n) { #if 0 @@ -80,7 +82,8 @@ struct CauchyReedSolomonErasureCoding } #endif #if defined(__ARM_NEON) || defined(__AVX2__) || defined(__SSE4_1__) - void mac_simd(uint8_t *c, const uint8_t *a, IndexType b, int size, bool init) + __attribute__((flatten)) + static inline void mac_simd(uint8_t *c, const uint8_t *a, IndexType b, int size, bool init) { alignas(16) uint8_t bln[16], bhn[16]; for (int i = 0; i < 16; ++i) { @@ -135,7 +138,8 @@ struct CauchyReedSolomonErasureCoding #endif #endif } - void mac_simd(uint16_t *c, const uint16_t *a, IndexType b, int size, bool init) + __attribute__((flatten)) + static inline void mac_simd(uint16_t *c, const uint16_t *a, IndexType b, int size, bool init) { alignas(16) uint8_t blll[16], bllh[16], blhl[16], blhh[16], bhll[16], bhlh[16], bhhl[16], bhhh[16]; for (int i = 0; i < 16; ++i) { @@ -272,7 +276,8 @@ struct CauchyReedSolomonErasureCoding #endif } #endif - void multiply_accumulate(ValueType *c, const ValueType *a, IndexType b, int len, bool init) + __attribute__((flatten)) + static inline void multiply_accumulate(ValueType *c, const ValueType *a, IndexType b, int len, bool init) { #if defined(__ARM_NEON) || defined(__AVX2__) || defined(__SSE4_1__) #ifdef __AVX2__