From 35451540ca04d74fff9249fdbeeefca161797652 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Mon, 25 Mar 2024 10:42:11 +0100 Subject: [PATCH] added prime field based Cauchy Reed Solomon erasure coding --- cauchy_reed_solomon_erasure_coding2.hh | 90 ++++++++++++++++++++++++++ tests/crs2_regression_test.cc | 81 +++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 cauchy_reed_solomon_erasure_coding2.hh create mode 100644 tests/crs2_regression_test.cc diff --git a/cauchy_reed_solomon_erasure_coding2.hh b/cauchy_reed_solomon_erasure_coding2.hh new file mode 100644 index 0000000..63db715 --- /dev/null +++ b/cauchy_reed_solomon_erasure_coding2.hh @@ -0,0 +1,90 @@ +/* +Cauchy Reed Solomon Erasure Coding + +Copyright 2024 Ahmet Inan +*/ + +#pragma once + +namespace CODE { + +template +struct CauchyReedSolomonErasureCoding2 +{ + PF row_num, row_den; + // $a_{ij} = \frac{1}{x_i + y_j}$ + __attribute__((flatten)) + PF cauchy_matrix(int i, int j) + { + PF row(i), col(j); + return rcp(row + col); + } + // $b_{ij} = \frac{\prod_{k=1}^{n}{(x_j + y_k)(x_k + y_i)}}{(x_j + y_i)\prod_{k \ne j}^{n}{(x_j - x_k)}\prod_{k \ne i}^{n}{(y_i - y_k)}}$ + __attribute__((flatten)) + PF inverse_cauchy_matrix(const PF *rows, int i, int j, int n) + { +#if 1 + PF col_i(i); + PF prod_xy(1), prod_x(1), prod_y(1); + for (int k = 0; k < n; k++) { + PF col_k(k); + prod_xy *= (rows[j] + col_k) * (rows[k] + col_i); + if (k != j) + prod_x *= (rows[j] - rows[k]); + if (k != i) + prod_y *= (col_i - col_k); + } + return prod_xy / ((rows[j] + col_i) * prod_x * prod_y); +#else + PF col_i(i); + if (j == 0) { + PF num(1), den(1); + for (int k = 0; k < n; k++) { + PF col_k(k); + num *= (rows[k] + col_i); + if (k != i) + den *= (col_i - col_k); + } + row_num = num; + row_den = den; + } + PF num(row_num), den(row_den); + for (int k = 0; k < n; k++) { + PF col_k(k); + num *= (rows[j] + col_k); + if (k != j) + den *= (rows[j] - rows[k]); + } + return num / ((rows[j] + col_i) * den); +#endif + } + __attribute__((flatten)) + static inline void multiply_accumulate(PF *c, const PF *a, PF b, int len, bool init) + { + if (init) { + for (int i = 0; i < len; i++) + c[i] = b * a[i]; + } else { + for (int i = 0; i < len; i++) + c[i] += b * a[i]; + } + } + void encode(const PF *data, PF *block, int block_id, int block_len, int block_cnt) + { + assert(block_id >= block_cnt && block_id < int(PF::P) / 2); + for (int k = 0; k < block_cnt; k++) { + PF a_ik = cauchy_matrix(block_id, k); + multiply_accumulate(block, data + block_len * k, a_ik, block_len, !k); + } + } + void decode(PF *data, const PF *blocks, const PF *block_ids, int block_idx, int block_len, int block_cnt) + { + for (int k = 0; k < block_cnt; k++) { + PF b_ik = inverse_cauchy_matrix(block_ids, block_idx, k, block_cnt); + multiply_accumulate(data, blocks + block_len * k, b_ik, block_len, !k); + } + } +}; + +} + diff --git a/tests/crs2_regression_test.cc b/tests/crs2_regression_test.cc new file mode 100644 index 0000000..250ca95 --- /dev/null +++ b/tests/crs2_regression_test.cc @@ -0,0 +1,81 @@ +/* +Regression Test for the second Cauchy Reed Solomon Encoder and Decoder + +Copyright 2024 Ahmet Inan +*/ + +#include +#include +#include +#include +#include +#include +#include "prime_field.hh" +#include "cauchy_reed_solomon_erasure_coding2.hh" + +template +void crs_test(int trials) +{ + int value_bits = log2(PRIME); + int value_bytes = value_bits / 8; + typedef CODE::PrimeField PF; + CODE::CauchyReedSolomonErasureCoding2 crs; + std::random_device rd; + std::default_random_engine generator(rd()); + typedef std::uniform_int_distribution distribution; + auto rnd_cnt = std::bind(distribution(1, std::min(PF::P / 4, 256)), generator); + auto rnd_len = std::bind(distribution(1, 1 << 10), generator); + auto rnd_dat = std::bind(distribution(0, (1 << value_bits) - 1), generator); + while (--trials) { + int block_count = rnd_cnt(); + int identifiers_total = PF::P / 2 - block_count; + int block_values = rnd_len(); + int block_bytes = block_values * value_bytes; + int data_values = block_count * block_values; + int data_bytes = data_values * value_bytes; + PF *orig = new PF[data_values]; + PF *data = new PF[data_values]; + PF *blocks = new PF[data_values]; + for (int i = 0; i < data_values; ++i) + orig[i] = PF(rnd_dat()); + auto identifiers = new PF[identifiers_total]; + for (int i = 0; i < identifiers_total; ++i) + identifiers[i] = PF(block_count + i); + for (int i = 0; i < block_count; i++) { + std::uniform_int_distribution hat(i, identifiers_total - 1); + std::swap(identifiers[i], identifiers[hat(generator)]); + } + auto enc_start = std::chrono::system_clock::now(); + for (int i = 0; i < block_count; ++i) + crs.encode(orig, blocks + block_values * i, identifiers[i](), block_values, block_count); + auto enc_end = std::chrono::system_clock::now(); + auto enc_usec = std::chrono::duration_cast(enc_end - enc_start); + double enc_mbs = double(data_bytes) / enc_usec.count(); + auto dec_start = std::chrono::system_clock::now(); + for (int i = 0; i < block_count; ++i) + crs.decode(data + block_values * i, blocks, identifiers, i, block_values, block_count); + auto dec_end = std::chrono::system_clock::now(); + auto dec_usec = std::chrono::duration_cast(dec_end - dec_start); + double dec_mbs = double(data_bytes) / dec_usec.count(); + std::cout << "block count = " << block_count << ", block size = " << block_bytes << " bytes, encoding speed = " << enc_mbs << " megabyte per second, decoding speed = " << dec_mbs << " megabyte per second" << std::endl; + for (int i = 0; i < data_values; ++i) + assert(data[i] == orig[i]); + delete[] identifiers; + delete[] blocks; + delete[] orig; + delete[] data; + } +} + +int main() +{ + if (1) { + crs_test(200); + } + if (1) { + crs_test(100); + } + std::cerr << "Cauchy Reed Solomon Two regression test passed!" << std::endl; + return 0; +} +