diff --git a/cauchy_mersenne_erasure_coding.hh b/cauchy_mersenne_erasure_coding.hh new file mode 100644 index 0000000..3020972 --- /dev/null +++ b/cauchy_mersenne_erasure_coding.hh @@ -0,0 +1,89 @@ +/* +Cauchy Mersenne 2^31-1 Prime Field Erasure Coding + +Copyright 2026 Ahmet Inan +*/ + +#pragma once + +#include "mersenne_31.hh" + +namespace CODE { + +struct CauchyMersenneErasureCoding +{ + typedef Mersenne31 M31; + M31 row_num, row_den; + // $a_{ij} = \frac{1}{x_i + y_j}$ + M31 cauchy_matrix(int i, int j) + { + M31 row(i), col(j); + return rcp(row + col); + } + // $b_{ij} = \frac{\prod_{k=1}^{n}{(x_j + y_k)(x_k + y_i)}}{(x_j + y_i)\prod_{k \ne j}^{n}{(x_j - x_k)}\prod_{k \ne i}^{n}{(y_i - y_k)}}$ + M31 inverse_cauchy_matrix(const int *rows, int i, int j, int n) + { +#if 0 + M31 row_j(rows[j]), col_i(i); + M31 prod_xy(1), prod_x(1), prod_y(1); + for (int k = 0; k < n; k++) { + M31 row_k(rows[k]), col_k(k); + prod_xy *= (row_j + col_k) * (row_k + col_i); + if (k != j) + prod_x *= (row_j - row_k); + if (k != i) + prod_y *= (col_i - col_k); + } + return prod_xy / ((row_j + col_i) * prod_x * prod_y); +#else + M31 row_j(rows[j]), col_i(i); + if (j == 0) { + M31 num(1), den(1); + for (int k = 0; k < n; k++) { + M31 row_k(rows[k]), col_k(k); + num *= row_k + col_i; + if (k != i) + den *= col_i - col_k; + } + row_num = num; + row_den = den; + } + M31 num(row_num), den(row_den); + for (int k = 0; k < n; k++) { + M31 row_k(rows[k]), col_k(k); + num *= row_j + col_k; + if (k != j) + den *= row_j - row_k; + } + return num / ((row_j + col_i) * den); +#endif + } + void mac(M31 *c, const M31 *a, M31 b, int len, bool init) + { + if (init) { + for (int i = 0; i < len; i++) + c[i] = b * a[i]; + } else { + for (int i = 0; i < len; i++) + c[i] += b * a[i]; + } + } + void encode(const M31 *data, M31 *block, int block_id, int block_len, int block_cnt) + { + assert(block_id >= block_cnt && block_id < int(M31::P) / 2); + for (int k = 0; k < block_cnt; k++) { + M31 a_ik = cauchy_matrix(block_id, k); + mac(block, data + block_len * k, a_ik, block_len, !k); + } + } + void decode(M31 *data, const M31 *blocks, const int *block_ids, int block_idx, int block_len, int block_cnt) + { + for (int k = 0; k < block_cnt; k++) { + M31 b_ik = inverse_cauchy_matrix(block_ids, block_idx, k, block_cnt); + mac(data, blocks + block_len * k, b_ik, block_len, !k); + } + } +}; + +} + diff --git a/tests/cme_regression_test.cc b/tests/cme_regression_test.cc new file mode 100644 index 0000000..93296b3 --- /dev/null +++ b/tests/cme_regression_test.cc @@ -0,0 +1,77 @@ +/* +Regression Test for the Cauchy Mersenne 2^31-1 Prime Field Encoder and Decoder + +Copyright 2026 Ahmet Inan +*/ + +#include +#include +#include +#include +#include +#include +#include "mersenne_31.hh" +#include "cauchy_mersenne_erasure_coding.hh" + +void cme_test(int trials) +{ + typedef CODE::Mersenne31 M31; + int value_bytes = sizeof(M31); + //int value_bits = value_bytes * 8; + const int MAX_LEN = 1024; + CODE::CauchyMersenneErasureCoding cme; + std::random_device rd; + std::default_random_engine generator(rd()); + typedef std::uniform_int_distribution distribution; + auto rnd_cnt = std::bind(distribution(1, 256), generator); + auto rnd_len = std::bind(distribution(1, MAX_LEN), generator); + auto rnd_dat = std::bind(distribution(0, M31::P-1), generator); + //auto rnd_dat = std::bind(distribution(0, (1 << value_bits) - 1), generator); + while (--trials) { + int block_count = rnd_cnt(); + int idents_total = 1000000; // M31::P / 2 - block_count; + int block_values = rnd_len(); + int block_bytes = block_values * value_bytes; + int data_values = block_count * block_values; + int data_bytes = data_values * value_bytes; + M31 *orig = new M31[data_values]; + M31 *data = new M31[data_values]; + M31 *blocks = new M31[data_values]; + int *idents = new int[idents_total]; + for (int i = 0; i < data_values; ++i) + orig[i] = M31(rnd_dat()); + for (int i = 0; i < idents_total; ++i) + idents[i] = block_count + i; + for (int i = 0; i < block_count; i++) { + std::uniform_int_distribution hat(i, idents_total - 1); + std::swap(idents[i], idents[hat(generator)]); + } + auto enc_start = std::chrono::system_clock::now(); + for (int i = 0; i < block_count; ++i) + cme.encode(orig, blocks + block_values * i, idents[i], block_values, block_count); + auto enc_end = std::chrono::system_clock::now(); + auto enc_usec = std::chrono::duration_cast(enc_end - enc_start); + double enc_mbs = double(data_bytes) / enc_usec.count(); + auto dec_start = std::chrono::system_clock::now(); + for (int i = 0; i < block_count; ++i) + cme.decode(data + block_values * i, blocks, idents, i, block_values, block_count); + auto dec_end = std::chrono::system_clock::now(); + auto dec_usec = std::chrono::duration_cast(dec_end - dec_start); + double dec_mbs = double(data_bytes) / dec_usec.count(); + std::cout << "block count = " << block_count << ", block size = " << block_bytes << " bytes, encoding speed = " << enc_mbs << " megabyte per second, decoding speed = " << dec_mbs << " megabyte per second" << std::endl; + for (int i = 0; i < data_values; ++i) + assert(data[i] == orig[i]); + delete[] idents; + delete[] blocks; + delete[] orig; + delete[] data; + } +} + +int main() +{ + cme_test(100); + std::cerr << "Cauchy Mersenne 2^31-1 prime field regression test passed!" << std::endl; + return 0; +} +