From 0b371023833612e3626060513d4af03790ffc8a6 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 1 Oct 2019 22:17:04 +0200 Subject: [PATCH] getting rid of obsolete version --- README.md | 8 +- ldpc_decoder.hh | 271 ++++++++++++++++--------------- ldpc_decoder2.hh | 292 ---------------------------------- tests/ldpc_regression_test.cc | 4 - 4 files changed, 140 insertions(+), 435 deletions(-) delete mode 100644 ldpc_decoder2.hh diff --git a/README.md b/README.md index 89157f4..a7d97c1 100644 --- a/README.md +++ b/README.md @@ -55,13 +55,7 @@ Implemented are the following Encoders and Decoders: ### [ldpc_decoder.hh](ldpc_decoder.hh) -[Low-density parity-check](https://en.wikipedia.org/wiki/Low-density_parity-check_code) layered decoder -This version stores only the first q bit positions and might be faster on low power systems. - -### [ldpc_decoder2.hh](ldpc_decoder2.hh) - -[Low-density parity-check](https://en.wikipedia.org/wiki/Low-density_parity-check_code) layered decoder -This version stores and uses all word positions and might be faster on high performance workstations. +[SIMD](https://en.wikipedia.org/wiki/SIMD) intra-frame accelerated [Low-density parity-check](https://en.wikipedia.org/wiki/Low-density_parity-check_code) layered decoder. ### [exclusive_reduce.hh](exclusive_reduce.hh) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index 6639254..7f18867 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -1,8 +1,6 @@ /* LDPC SISO layered decoder -This version stores only the first q bit positions - Copyright 2018 Ahmet Inan */ @@ -40,15 +38,19 @@ class LDPCDecoder static const int MSG = K/D; static const int CNC = TABLE::LINKS_MAX_CN - 2; static const int BNL = (TABLE::LINKS_TOTAL + D-1) / D; + static const int LOC = (TABLE::LINKS_TOTAL - (2*R-1) + D-1) / D; typedef SIMD TYPE; typedef struct { uint16_t off; uint16_t shi; } Loc; + typedef uint32_t wd_t; + static_assert(sizeof(wd_t) * 8 >= CNC, "write disable mask needs at least as many bits as max check node links"); Rotate rotate; TYPE bnl[BNL]; TYPE msg[MSG]; TYPE pty[PTY]; - uint16_t pos[q * CNC]; + Loc loc[LOC]; + wd_t wd[PTY]; uint8_t cnc[q]; static TYPE eor(TYPE a, TYPE b) @@ -67,33 +69,6 @@ class LDPCDecoder { return orr(eor(a, b), vdup(127)); } - static void cnp(TYPE *out, const TYPE *inp, int cnt) - { - TYPE mags[cnt]; - for (int i = 0; i < cnt; ++i) - mags[i] = vqabs(inp[i]); - - if (BETA) { - auto beta = vunsigned(vdup(BETA)); - for (int i = 0; i < cnt; ++i) - mags[i] = vsigned(vqsub(vunsigned(mags[i]), beta)); - } - - TYPE mins[2]; - mins[0] = vmin(mags[0], mags[1]); - mins[1] = vmax(mags[0], mags[1]); - for (int i = 2; i < cnt; ++i) { - mins[1] = vmin(mins[1], vmax(mins[0], mags[i])); - mins[0] = vmin(mins[0], mags[i]); - } - - TYPE signs = inp[0]; - for (int i = 1; i < cnt; ++i) - signs = eor(signs, inp[i]); - - for (int i = 0; i < cnt; ++i) - out[i] = vsign(other(mags[i], mins[0], mins[1]), mine(signs, inp[i])); - } static TYPE selfcorr(TYPE a, TYPE b) { return vreinterpret(vand(vmask(b), vorr(vceqz(a), veor(vcgtz(a), vcltz(b))))); @@ -101,40 +76,33 @@ class LDPCDecoder bool bad() { + Loc *lo = loc; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; - int offset[cnt], shift[cnt]; - for (int c = 0; c < cnt; ++c) { - shift[c] = pos[CNC*i+c] % M; - offset[c] = pos[CNC*i+c] - shift[c]; - } + int deg = cnt + 2; auto res = vmask(vzero()); for (int j = 0; j < W; ++j) { - Loc lo[cnt]; - for (int c = 0; c < cnt; ++c) { - lo[c].off = offset[c] / D + shift[c] % W; - lo[c].shi = shift[c] / W; - shift[c] = (shift[c] + 1) % M; - } - TYPE par[2]; - if (i) { - par[0] = pty[W*(i-1)+j]; - } else if (j) { - par[0] = pty[W*(q-1)+j-1]; - } else { - par[0] = rotate(pty[PTY-1], 1); - par[0].v[0] = 127; - } - par[1] = pty[W*i+j]; - TYPE mes[cnt]; - for (int c = 0; c < cnt; ++c) - mes[c] = rotate(msg[lo[c].off], -lo[c].shi); TYPE cnv = vdup(1); - for (int c = 0; c < 2; ++c) - cnv = vsign(cnv, par[c]); - for (int c = 0; c < cnt; ++c) - cnv = vsign(cnv, mes[c]); + for (int k = 0; k < deg; ++k) { + TYPE tmp; + if (k < cnt) { + tmp = rotate(msg[lo[k].off], -lo[k].shi); + } else if (k == cnt) { + tmp = pty[W*i+j]; + } else { + if (i) { + tmp = pty[W*(i-1)+j]; + } else if (j) { + tmp = pty[W*(q-1)+j-1]; + } else { + tmp = rotate(pty[PTY-1], 1); + tmp.v[0] = 127; + } + } + cnv = vsign(cnv, tmp); + } res = vorr(res, vclez(cnv)); + lo += cnt; } for (int n = 0; n < D; ++n) if (res.v[n]) @@ -145,87 +113,101 @@ class LDPCDecoder void update() { TYPE *bl = bnl; + Loc *lo = loc; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; - int offset[cnt], shift[cnt]; - for (int c = 0; c < cnt; ++c) { - shift[c] = pos[CNC*i+c] % M; - offset[c] = pos[CNC*i+c] - shift[c]; - } int deg = cnt + 2; for (int j = 0; j < W; ++j) { - Loc lo[cnt]; - for (int c = 0; c < cnt; ++c) { - lo[c].off = offset[c] / D + shift[c] % W; - lo[c].shi = shift[c] / W; - shift[c] = (shift[c] + 1) % M; - } - std::sort(lo, lo + cnt, [](const Loc &a, const Loc &b){ return a.off < b.off; }); - bool wd[deg], repeat = false; - for (int d = 0; d < deg; ++d) - wd[d] = false; - for (int c = 1; c < cnt; ++c) - if (lo[c].off == lo[c-1].off) - wd[c] = repeat = true; - do { - TYPE par[2]; - if (i) { - par[0] = pty[W*(i-1)+j]; - } else if (j) { - par[0] = pty[W*(q-1)+j-1]; + TYPE mags[deg], inps[deg]; + TYPE min0 = vdup(127); + TYPE min1 = vdup(127); + TYPE signs = vdup(127); + + for (int k = 0; k < deg; ++k) { + TYPE tmp; + if (k < cnt) { + tmp = rotate(msg[lo[k].off], -lo[k].shi); + } else if (k == cnt) { + tmp = pty[W*i+j]; } else { - par[0] = rotate(pty[PTY-1], 1); - par[0].v[0] = 127; - } - par[1] = pty[W*i+j]; - TYPE mes[cnt]; - for (int c = 0; c < cnt; ++c) - mes[c] = rotate(msg[lo[c].off], -lo[c].shi); - TYPE inp[deg], out[deg]; - for (int c = 0; c < cnt; ++c) - inp[c] = vqsub(mes[c], bl[c]); - inp[cnt] = vqsub(par[0], bl[cnt]); - inp[cnt+1] = vqsub(par[1], bl[cnt+1]); - cnp(out, inp, deg); - for (int d = 0; d < deg; ++d) - out[d] = vclamp(out[d], -32, 31); - for (int d = 0; d < deg; ++d) - out[d] = selfcorr(bl[d], out[d]); - for (int c = 0; c < cnt; ++c) - mes[c] = vqadd(inp[c], out[c]); - par[0] = vqadd(inp[cnt], out[cnt]); - par[1] = vqadd(inp[cnt+1], out[cnt+1]); - if (i) { - pty[W*(i-1)+j] = par[0]; - } else if (j) { - pty[W*(q-1)+j-1] = par[0]; - } else { - par[0].v[0] = pty[PTY-1].v[D-1]; - pty[PTY-1] = rotate(par[0], -1); - } - pty[W*i+j] = par[1]; - if (repeat) { - for (int c = 0; c < cnt; ++c) - if (!wd[c]) - msg[lo[c].off] = rotate(mes[c], lo[c].shi); - for (int d = 0; d < deg; ++d) - if (!wd[d]) - bl[d] = out[d]; - repeat = false; - for (int c = 1; c < cnt; ++c) { - if (wd[c] && !wd[c-1]) { - wd[c] = false; - repeat = true; - ++c; - } + if (i) { + tmp = pty[W*(i-1)+j]; + } else if (j) { + tmp = pty[W*(q-1)+j-1]; + } else { + tmp = rotate(pty[PTY-1], 1); + tmp.v[0] = 127; } - } else { - for (int c = 0; c < cnt; ++c) - msg[lo[c].off] = rotate(mes[c], lo[c].shi); - for (int d = 0; d < deg; ++d) - bl[d] = out[d]; } - } while (repeat); + + TYPE inp = vqsub(tmp, bl[k]); + + TYPE mag = vqabs(inp); + + if (BETA) { + auto beta = vunsigned(vdup(BETA)); + mag = vsigned(vqsub(vunsigned(mag), beta)); + } + + min1 = vmin(min1, vmax(min0, mag)); + min0 = vmin(min0, mag); + + signs = eor(signs, inp); + + inps[k] = inp; + mags[k] = mag; + } + for (int k = 0; k < deg; ++k) { + TYPE mag = mags[k]; + TYPE inp = inps[k]; + + TYPE out = vsign(other(mag, min0, min1), mine(signs, inp)); + + out = vclamp(out, -32, 31); + + out = selfcorr(bl[k], out); + + TYPE tmp = vqadd(inp, out); + + if (k < cnt) { + if (!((wd[W*i+j]>>k)&1)) { + bl[k] = out; + msg[lo[k].off] = rotate(tmp, lo[k].shi); + } + } else if (k == cnt) { + bl[k] = out; + pty[W*i+j] = tmp; + } else { + bl[k] = out; + if (i) { + pty[W*(i-1)+j] = tmp; + } else if (j) { + pty[W*(q-1)+j-1] = tmp; + } else { + tmp.v[0] = pty[PTY-1].v[D-1]; + pty[PTY-1] = rotate(tmp, -1); + } + } + } + if (wd[W*i+j]) { + for (int first = 0, c = 1; c < cnt; ++c) { + if (lo[first].off != lo[c].off || c == cnt-1) { + int last = c - 1; + if (c == cnt-1) + ++last; + if (last != first) { + int count = last - first + 1; + wd_t mask = ((1 << count) - 1) << first; + wd_t cur = wd[W*i+j]; + wd_t tmp = cur & mask; + wd_t rol = (tmp << 1) | (tmp >> (count-1)); + wd[W*i+j] = (cur & ~mask) | (rol & mask); + } + first = c; + } + } + } + lo += cnt; bl += deg; } } @@ -235,6 +217,7 @@ class LDPCDecoder public: LDPCDecoder() { + uint16_t pos[q * CNC]; for (int i = 0; i < q; ++i) cnc[i] = 0; int bit_pos = 0; @@ -251,6 +234,30 @@ public: bit_pos += M; } } + Loc *lo = loc; + for (int i = 0; i < q; ++i) { + int cnt = cnc[i]; + int offset[cnt], shift[cnt]; + for (int c = 0; c < cnt; ++c) { + shift[c] = pos[CNC*i+c] % M; + offset[c] = pos[CNC*i+c] - shift[c]; + } + for (int j = 0; j < W; ++j) { + for (int c = 0; c < cnt; ++c) { + lo[c].off = offset[c] / D + shift[c] % W; + lo[c].shi = shift[c] / W; + shift[c] = (shift[c] + 1) % M; + } + std::sort(lo, lo + cnt, [](const Loc &a, const Loc &b){ return a.off < b.off; }); + wd[W*i+j] = 0; + for (int c = 1; c < cnt; ++c) + if (lo[c].off == lo[c-1].off) + wd[W*i+j] |= 1 << c; + lo += cnt; + } + } + //assert(lo <= loc + LOC); + //std::cerr << LOC - (lo - loc) << std::endl; } int operator()(int8_t *message, int8_t *parity, int trials = 25) { diff --git a/ldpc_decoder2.hh b/ldpc_decoder2.hh deleted file mode 100644 index 1987162..0000000 --- a/ldpc_decoder2.hh +++ /dev/null @@ -1,292 +0,0 @@ -/* -LDPC SISO layered decoder v2 - -This version stores and uses all word positions - -Copyright 2018 Ahmet Inan -*/ - -#ifndef LDPC_DECODER_HH -#define LDPC_DECODER_HH - -#include -#include "simd.hh" -#include "rotate.hh" - -namespace CODE { - -template -class LDPCDecoder -{ -#ifdef __AVX2__ - static const int SIMD_SIZE = 32; - // M = 360 = 30 * 12 - static const int WORD_SIZE = 30; -#else - static const int SIMD_SIZE = 16; - // M = 360 = 15 * 24 - static const int WORD_SIZE = 15; -#endif - static_assert(TABLE::M % WORD_SIZE == 0, "M must be multiple of word size"); - static_assert(WORD_SIZE <= SIMD_SIZE, "SIMD size must be bigger or equal word size"); - static const int M = TABLE::M; - static const int N = TABLE::N; - static const int K = TABLE::K; - static const int R = N-K; - static const int q = R/M; - static const int D = WORD_SIZE; - static const int W = M/D; - static const int PTY = R/D; - static const int MSG = K/D; - static const int CNC = TABLE::LINKS_MAX_CN - 2; - static const int BNL = (TABLE::LINKS_TOTAL + D-1) / D; - static const int LOC = (TABLE::LINKS_TOTAL - (2*R-1) + D-1) / D; - - typedef SIMD TYPE; - typedef struct { uint16_t off; uint16_t shi; } Loc; - typedef uint32_t wd_t; - static_assert(sizeof(wd_t) * 8 >= CNC, "write disable mask needs at least as many bits as max check node links"); - Rotate rotate; - - TYPE bnl[BNL]; - TYPE msg[MSG]; - TYPE pty[PTY]; - Loc loc[LOC]; - wd_t wd[PTY]; - uint8_t cnc[q]; - - static TYPE eor(TYPE a, TYPE b) - { - return vreinterpret(veor(vmask(a), vmask(b))); - } - static TYPE orr(TYPE a, TYPE b) - { - return vreinterpret(vorr(vmask(a), vmask(b))); - } - static TYPE other(TYPE a, TYPE b, TYPE c) - { - return vreinterpret(vbsl(vceq(a, b), vmask(c), vmask(b))); - } - static TYPE mine(TYPE a, TYPE b) - { - return orr(eor(a, b), vdup(127)); - } - static TYPE selfcorr(TYPE a, TYPE b) - { - return vreinterpret(vand(vmask(b), vorr(vceqz(a), veor(vcgtz(a), vcltz(b))))); - } - - bool bad() - { - Loc *lo = loc; - for (int i = 0; i < q; ++i) { - int cnt = cnc[i]; - int deg = cnt + 2; - auto res = vmask(vzero()); - for (int j = 0; j < W; ++j) { - TYPE cnv = vdup(1); - for (int k = 0; k < deg; ++k) { - TYPE tmp; - if (k < cnt) { - tmp = rotate(msg[lo[k].off], -lo[k].shi); - } else if (k == cnt) { - tmp = pty[W*i+j]; - } else { - if (i) { - tmp = pty[W*(i-1)+j]; - } else if (j) { - tmp = pty[W*(q-1)+j-1]; - } else { - tmp = rotate(pty[PTY-1], 1); - tmp.v[0] = 127; - } - } - cnv = vsign(cnv, tmp); - } - res = vorr(res, vclez(cnv)); - lo += cnt; - } - for (int n = 0; n < D; ++n) - if (res.v[n]) - return true; - } - return false; - } - void update() - { - TYPE *bl = bnl; - Loc *lo = loc; - for (int i = 0; i < q; ++i) { - int cnt = cnc[i]; - int deg = cnt + 2; - for (int j = 0; j < W; ++j) { - TYPE mags[deg], inps[deg]; - TYPE min0 = vdup(127); - TYPE min1 = vdup(127); - TYPE signs = vdup(127); - - for (int k = 0; k < deg; ++k) { - TYPE tmp; - if (k < cnt) { - tmp = rotate(msg[lo[k].off], -lo[k].shi); - } else if (k == cnt) { - tmp = pty[W*i+j]; - } else { - if (i) { - tmp = pty[W*(i-1)+j]; - } else if (j) { - tmp = pty[W*(q-1)+j-1]; - } else { - tmp = rotate(pty[PTY-1], 1); - tmp.v[0] = 127; - } - } - - TYPE inp = vqsub(tmp, bl[k]); - - TYPE mag = vqabs(inp); - - if (BETA) { - auto beta = vunsigned(vdup(BETA)); - mag = vsigned(vqsub(vunsigned(mag), beta)); - } - - min1 = vmin(min1, vmax(min0, mag)); - min0 = vmin(min0, mag); - - signs = eor(signs, inp); - - inps[k] = inp; - mags[k] = mag; - } - for (int k = 0; k < deg; ++k) { - TYPE mag = mags[k]; - TYPE inp = inps[k]; - - TYPE out = vsign(other(mag, min0, min1), mine(signs, inp)); - - out = vclamp(out, -32, 31); - - out = selfcorr(bl[k], out); - - TYPE tmp = vqadd(inp, out); - - if (k < cnt) { - if (!((wd[W*i+j]>>k)&1)) { - bl[k] = out; - msg[lo[k].off] = rotate(tmp, lo[k].shi); - } - } else if (k == cnt) { - bl[k] = out; - pty[W*i+j] = tmp; - } else { - bl[k] = out; - if (i) { - pty[W*(i-1)+j] = tmp; - } else if (j) { - pty[W*(q-1)+j-1] = tmp; - } else { - tmp.v[0] = pty[PTY-1].v[D-1]; - pty[PTY-1] = rotate(tmp, -1); - } - } - } - if (wd[W*i+j]) { - for (int first = 0, c = 1; c < cnt; ++c) { - if (lo[first].off != lo[c].off || c == cnt-1) { - int last = c - 1; - if (c == cnt-1) - ++last; - if (last != first) { - int count = last - first + 1; - wd_t mask = ((1 << count) - 1) << first; - wd_t cur = wd[W*i+j]; - wd_t tmp = cur & mask; - wd_t rol = (tmp << 1) | (tmp >> (count-1)); - wd[W*i+j] = (cur & ~mask) | (rol & mask); - } - first = c; - } - } - } - lo += cnt; - bl += deg; - } - } - //assert(bl <= bnl + BNL); - //std::cerr << BNL - (bl - bnl) << std::endl; - } -public: - LDPCDecoder() - { - uint16_t pos[q * CNC]; - for (int i = 0; i < q; ++i) - cnc[i] = 0; - int bit_pos = 0; - const int *row_ptr = TABLE::POS; - for (int g = 0; TABLE::LEN[g]; ++g) { - int bit_deg = TABLE::DEG[g]; - for (int r = 0; r < TABLE::LEN[g]; ++r) { - for (int d = 0; d < bit_deg; ++d) { - int n = row_ptr[d] % q; - int m = row_ptr[d] / q; - pos[CNC*n+cnc[n]++] = bit_pos + (M - m) % M; - } - row_ptr += bit_deg; - bit_pos += M; - } - } - Loc *lo = loc; - for (int i = 0; i < q; ++i) { - int cnt = cnc[i]; - int offset[cnt], shift[cnt]; - for (int c = 0; c < cnt; ++c) { - shift[c] = pos[CNC*i+c] % M; - offset[c] = pos[CNC*i+c] - shift[c]; - } - for (int j = 0; j < W; ++j) { - for (int c = 0; c < cnt; ++c) { - lo[c].off = offset[c] / D + shift[c] % W; - lo[c].shi = shift[c] / W; - shift[c] = (shift[c] + 1) % M; - } - std::sort(lo, lo + cnt, [](const Loc &a, const Loc &b){ return a.off < b.off; }); - wd[W*i+j] = 0; - for (int c = 1; c < cnt; ++c) - if (lo[c].off == lo[c-1].off) - wd[W*i+j] |= 1 << c; - lo += cnt; - } - } - //assert(lo <= loc + LOC); - //std::cerr << LOC - (lo - loc) << std::endl; - } - int operator()(int8_t *message, int8_t *parity, int trials = 25) - { - for (int i = 0; i < BNL; ++i) - bnl[i] = vzero(); - for (int i = 0; i < K/M; ++i) - for (int j = 0; j < W; ++j) - for (int n = 0; n < D; ++n) - msg[W*i+j].v[n] = message[M*i+W*n+j]; - for (int i = 0; i < q; ++i) - for (int j = 0; j < W; ++j) - for (int n = 0; n < D; ++n) - pty[W*i+j].v[n] = parity[q*(W*n+j)+i]; - while (bad() && --trials >= 0) - update(); - for (int i = 0; i < K/M; ++i) - for (int j = 0; j < W; ++j) - for (int n = 0; n < D; ++n) - message[M*i+W*n+j] = msg[W*i+j].v[n]; - for (int i = 0; i < q; ++i) - for (int j = 0; j < W; ++j) - for (int n = 0; n < D; ++n) - parity[q*(W*n+j)+i] = pty[W*i+j].v[n]; - return trials; - } -}; - -} - -#endif diff --git a/tests/ldpc_regression_test.cc b/tests/ldpc_regression_test.cc index 4add71a..efdb9a1 100644 --- a/tests/ldpc_regression_test.cc +++ b/tests/ldpc_regression_test.cc @@ -15,11 +15,7 @@ Copyright 2018 Ahmet Inan #include #include #include "ldpc_encoder.hh" -#if 0 #include "ldpc_decoder.hh" -#else -#include "ldpc_decoder2.hh" -#endif struct DVB_T2_TABLE_A1 {