From 1f4da1bed3cb1277edf7fb02b20540b279e4059e Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Thu, 3 Oct 2019 10:08:00 +0200 Subject: [PATCH 01/14] only need write disable masks for write conflicts --- ldpc_decoder.hh | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index ba84d97..0d26d27 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -42,15 +42,15 @@ class LDPCDecoder typedef SIMD TYPE; typedef struct { uint16_t off; uint16_t shi; } Loc; - typedef uint32_t wd_t; - static_assert(sizeof(wd_t) * 8 >= CNC, "write disable mask needs at least as many bits as max check node links"); + typedef uint32_t wdm_t; + static_assert(sizeof(wdm_t) * 8 >= CNC, "write disable mask needs at least as many bits as max check node links"); Rotate rotate; TYPE bnl[BNL]; TYPE msg[MSG]; TYPE pty[PTY]; Loc loc[LOC]; - wd_t wd[PTY]; + wdm_t wdm[PTY]; uint8_t cnc[q]; static TYPE eor(TYPE a, TYPE b) @@ -114,6 +114,7 @@ class LDPCDecoder { TYPE *bl = bnl; Loc *lo = loc; + wdm_t *wd = wdm; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; int deg = cnt + 2; @@ -122,11 +123,18 @@ class LDPCDecoder TYPE min0 = vdup(127); TYPE min1 = vdup(127); TYPE signs = vdup(127); + bool write_conflict = false; + int last_offset = -1; for (int k = 0; k < deg; ++k) { TYPE tmp; if (k < cnt) { - tmp = rotate(msg[lo[k].off], -lo[k].shi); + int offset = lo[k].off; + int shift = -lo[k].shi; + tmp = rotate(msg[offset], shift); + if (last_offset == offset) + write_conflict = true; + last_offset = offset; } else if (k == cnt) { tmp = pty[W*i+j]; } else { @@ -170,7 +178,7 @@ class LDPCDecoder TYPE tmp = vqadd(inp, out); if (k < cnt) { - if (!((wd[W*i+j]>>k)&1)) { + if (!write_conflict || !((*wd>>k)&1)) { bl[k] = out; msg[lo[k].off] = rotate(tmp, lo[k].shi); } @@ -189,7 +197,7 @@ class LDPCDecoder } } } - if (wd[W*i+j]) { + if (write_conflict) { for (int first = 0, c = 1; c < cnt; ++c) { if (lo[first].off != lo[c].off || c == cnt-1) { int last = c - 1; @@ -197,15 +205,16 @@ class LDPCDecoder ++last; if (last != first) { int count = last - first + 1; - wd_t mask = ((1 << count) - 1) << first; - wd_t cur = wd[W*i+j]; - wd_t tmp = cur & mask; - wd_t ror = (tmp >> 1) | (tmp << (count-1)); - wd[W*i+j] = (cur & ~mask) | (ror & mask); + wdm_t mask = ((1 << count) - 1) << first; + wdm_t cur = *wd; + wdm_t tmp = cur & mask; + wdm_t ror = (tmp >> 1) | (tmp << (count-1)); + *wd = (cur & ~mask) | (ror & mask); } first = c; } } + ++wd; } lo += cnt; bl += deg; @@ -235,6 +244,7 @@ public: } } Loc *lo = loc; + wdm_t *wd = wdm; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; int offset[cnt], shift[cnt]; @@ -249,10 +259,12 @@ public: shift[c] = (shift[c] + 1) % M; } std::sort(lo, lo + cnt, [](const Loc &a, const Loc &b){ return a.off < b.off; }); - wd[W*i+j] = 0; + wdm_t tmp = 0; for (int c = 0; c < cnt-1; ++c) if (lo[c].off == lo[c+1].off) - wd[W*i+j] |= 1 << c; + tmp |= 1 << c; + if (tmp) + *wd++ = tmp; lo += cnt; } } From 87153003e947495cc68d36d55f6cd6c1ffc03578 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 24 Sep 2019 19:58:46 +0200 Subject: [PATCH 02/14] instead of rotating back, store current shift value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and compute the delta shift needed to achieve the same result. Idea taken from: Conflict Resolution by Matrix Reordering for DVB-T2 LDPC Decoders By Cédric Marchand, Jean-Baptiste Doré, Laura Conde-Canencia, Emmanuel Boutillon - 2009 --- ldpc_decoder.hh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index 0d26d27..aed39aa 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -51,6 +51,7 @@ class LDPCDecoder TYPE pty[PTY]; Loc loc[LOC]; wdm_t wdm[PTY]; + int16_t csh[MSG]; uint8_t cnc[q]; static TYPE eor(TYPE a, TYPE b) @@ -86,7 +87,11 @@ class LDPCDecoder for (int k = 0; k < deg; ++k) { TYPE tmp; if (k < cnt) { - tmp = rotate(msg[lo[k].off], -lo[k].shi); + int offset = lo[k].off; + int shift = -lo[k].shi; + shift -= csh[offset]; + shift %= D; + tmp = rotate(msg[offset], shift); } else if (k == cnt) { tmp = pty[W*i+j]; } else { @@ -131,6 +136,8 @@ class LDPCDecoder if (k < cnt) { int offset = lo[k].off; int shift = -lo[k].shi; + shift -= csh[offset]; + shift %= D; tmp = rotate(msg[offset], shift); if (last_offset == offset) write_conflict = true; @@ -180,7 +187,10 @@ class LDPCDecoder if (k < cnt) { if (!write_conflict || !((*wd>>k)&1)) { bl[k] = out; - msg[lo[k].off] = rotate(tmp, lo[k].shi); + int offset = lo[k].off; + int shift = -lo[k].shi; + msg[offset] = tmp; + csh[offset] = shift; } } else if (k == cnt) { bl[k] = out; @@ -275,6 +285,8 @@ public: { for (int i = 0; i < BNL; ++i) bnl[i] = vzero(); + for (int i = 0; i < MSG; ++i) + csh[i] = 0; for (int i = 0; i < K/M; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) @@ -285,6 +297,8 @@ public: pty[W*i+j].v[n] = parity[q*(W*n+j)+i]; while (bad() && --trials >= 0) update(); + for (int i = 0; i < MSG; ++i) + msg[i] = rotate(msg[i], -csh[i]); for (int i = 0; i < K/M; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) From 422b405596ff1f3beca2da3e96052840940dbe02 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Thu, 3 Oct 2019 20:53:49 +0200 Subject: [PATCH 03/14] simplified by merging pty and msg arrays into var while using location array for parity as well --- ldpc_decoder.hh | 159 +++++++++++++++++++++--------------------------- 1 file changed, 70 insertions(+), 89 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index aed39aa..8a6d2df 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -36,22 +36,22 @@ class LDPCDecoder static const int W = M/D; static const int PTY = R/D; static const int MSG = K/D; + static const int VAR = N/D; static const int CNC = TABLE::LINKS_MAX_CN - 2; static const int BNL = (TABLE::LINKS_TOTAL + D-1) / D; - static const int LOC = (TABLE::LINKS_TOTAL - (2*R-1) + D-1) / D; + static const int LOC = (TABLE::LINKS_TOTAL + D-1) / D; typedef SIMD TYPE; typedef struct { uint16_t off; uint16_t shi; } Loc; typedef uint32_t wdm_t; - static_assert(sizeof(wdm_t) * 8 >= CNC, "write disable mask needs at least as many bits as max check node links"); + static_assert(sizeof(wdm_t) * 8 >= TABLE::LINKS_MAX_CN, "write disable mask needs at least as many bits as max check node links"); Rotate rotate; TYPE bnl[BNL]; - TYPE msg[MSG]; - TYPE pty[PTY]; + TYPE var[VAR]; Loc loc[LOC]; wdm_t wdm[PTY]; - int16_t csh[MSG]; + int16_t csh[VAR]; uint8_t cnc[q]; static TYPE eor(TYPE a, TYPE b) @@ -79,35 +79,22 @@ class LDPCDecoder { Loc *lo = loc; for (int i = 0; i < q; ++i) { - int cnt = cnc[i]; - int deg = cnt + 2; + int deg = cnc[i] + 2; auto res = vmask(vzero()); for (int j = 0; j < W; ++j) { TYPE cnv = vdup(1); for (int k = 0; k < deg; ++k) { - TYPE tmp; - if (k < cnt) { - int offset = lo[k].off; - int shift = -lo[k].shi; - shift -= csh[offset]; - shift %= D; - tmp = rotate(msg[offset], shift); - } else if (k == cnt) { - tmp = pty[W*i+j]; - } else { - if (i) { - tmp = pty[W*(i-1)+j]; - } else if (j) { - tmp = pty[W*(q-1)+j-1]; - } else { - tmp = rotate(pty[PTY-1], 1); - tmp.v[0] = 127; - } - } + int offset = lo[k].off; + int shift = lo[k].shi; + shift -= csh[offset]; + shift %= D; + TYPE tmp = rotate(var[offset], shift); + if (i == 0 && j == 0 && offset == VAR-1) + tmp.v[0] = 127; cnv = vsign(cnv, tmp); } res = vorr(res, vclez(cnv)); - lo += cnt; + lo += deg; } for (int n = 0; n < D; ++n) if (res.v[n]) @@ -121,8 +108,7 @@ class LDPCDecoder Loc *lo = loc; wdm_t *wd = wdm; for (int i = 0; i < q; ++i) { - int cnt = cnc[i]; - int deg = cnt + 2; + int deg = cnc[i] + 2; for (int j = 0; j < W; ++j) { TYPE mags[deg], inps[deg]; TYPE min0 = vdup(127); @@ -130,31 +116,23 @@ class LDPCDecoder TYPE signs = vdup(127); bool write_conflict = false; int last_offset = -1; + int8_t prev_val = 0; for (int k = 0; k < deg; ++k) { - TYPE tmp; - if (k < cnt) { - int offset = lo[k].off; - int shift = -lo[k].shi; - shift -= csh[offset]; - shift %= D; - tmp = rotate(msg[offset], shift); - if (last_offset == offset) - write_conflict = true; - last_offset = offset; - } else if (k == cnt) { - tmp = pty[W*i+j]; - } else { - if (i) { - tmp = pty[W*(i-1)+j]; - } else if (j) { - tmp = pty[W*(q-1)+j-1]; - } else { - tmp = rotate(pty[PTY-1], 1); - tmp.v[0] = 127; - } + int offset = lo[k].off; + int shift = lo[k].shi; + shift -= csh[offset]; + shift %= D; + TYPE tmp = rotate(var[offset], shift); + if (i == 0 && j == 0 && offset == VAR-1) { + prev_val = tmp.v[0]; + tmp.v[0] = 127; } + if (last_offset == offset) + write_conflict = true; + last_offset = offset; + TYPE inp = vqsub(tmp, bl[k]); TYPE mag = vqabs(inp); @@ -184,34 +162,23 @@ class LDPCDecoder TYPE tmp = vqadd(inp, out); - if (k < cnt) { - if (!write_conflict || !((*wd>>k)&1)) { - bl[k] = out; - int offset = lo[k].off; - int shift = -lo[k].shi; - msg[offset] = tmp; - csh[offset] = shift; - } - } else if (k == cnt) { + int offset = lo[k].off; + int shift = lo[k].shi; + + if (i == 0 && j == 0 && offset == VAR-1) + tmp.v[0] = prev_val; + + if (!write_conflict || !((*wd>>k)&1)) { bl[k] = out; - pty[W*i+j] = tmp; - } else { - bl[k] = out; - if (i) { - pty[W*(i-1)+j] = tmp; - } else if (j) { - pty[W*(q-1)+j-1] = tmp; - } else { - tmp.v[0] = pty[PTY-1].v[D-1]; - pty[PTY-1] = rotate(tmp, -1); - } + var[offset] = tmp; + csh[offset] = shift; } } if (write_conflict) { - for (int first = 0, c = 1; c < cnt; ++c) { - if (lo[first].off != lo[c].off || c == cnt-1) { - int last = c - 1; - if (c == cnt-1) + for (int first = 0, k = 1; k < deg; ++k) { + if (lo[first].off != lo[k].off || k == deg-1) { + int last = k - 1; + if (k == deg-1) ++last; if (last != first) { int count = last - first + 1; @@ -221,12 +188,12 @@ class LDPCDecoder wdm_t ror = (tmp >> 1) | (tmp << (count-1)); *wd = (cur & ~mask) | (ror & mask); } - first = c; + first = k; } } ++wd; } - lo += cnt; + lo += deg; bl += deg; } } @@ -257,6 +224,7 @@ public: wdm_t *wd = wdm; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; + int deg = cnt + 2; int offset[cnt], shift[cnt]; for (int c = 0; c < cnt; ++c) { shift[c] = pos[CNC*i+c] % M; @@ -265,17 +233,30 @@ public: for (int j = 0; j < W; ++j) { for (int c = 0; c < cnt; ++c) { lo[c].off = offset[c] / D + shift[c] % W; - lo[c].shi = shift[c] / W; + lo[c].shi = (D - shift[c] / W) % D; shift[c] = (shift[c] + 1) % M; } - std::sort(lo, lo + cnt, [](const Loc &a, const Loc &b){ return a.off < b.off; }); + if (i) { + lo[cnt].off = MSG+W*(i-1)+j; + lo[cnt].shi = 0; + } else if (j) { + lo[cnt].off = MSG+W*(q-1)+j-1; + lo[cnt].shi = 0; + } else { + lo[cnt].off = VAR-1; + lo[cnt].shi = 1; + } + lo[cnt+1].off = MSG+W*i+j; + lo[cnt+1].shi = 0; + + std::sort(lo, lo + deg, [](const Loc &a, const Loc &b){ return a.off < b.off; }); wdm_t tmp = 0; - for (int c = 0; c < cnt-1; ++c) - if (lo[c].off == lo[c+1].off) - tmp |= 1 << c; + for (int d = 0; d < deg-1; ++d) + if (lo[d].off == lo[d+1].off) + tmp |= 1 << d; if (tmp) *wd++ = tmp; - lo += cnt; + lo += deg; } } //assert(lo <= loc + LOC); @@ -285,28 +266,28 @@ public: { for (int i = 0; i < BNL; ++i) bnl[i] = vzero(); - for (int i = 0; i < MSG; ++i) + for (int i = 0; i < VAR; ++i) csh[i] = 0; for (int i = 0; i < K/M; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) - msg[W*i+j].v[n] = message[M*i+W*n+j]; + var[W*i+j].v[n] = message[M*i+W*n+j]; for (int i = 0; i < q; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) - pty[W*i+j].v[n] = parity[q*(W*n+j)+i]; + var[MSG+W*i+j].v[n] = parity[q*(W*n+j)+i]; while (bad() && --trials >= 0) update(); - for (int i = 0; i < MSG; ++i) - msg[i] = rotate(msg[i], -csh[i]); + for (int i = 0; i < VAR; ++i) + var[i] = rotate(var[i], -csh[i]); for (int i = 0; i < K/M; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) - message[M*i+W*n+j] = msg[W*i+j].v[n]; + message[M*i+W*n+j] = var[W*i+j].v[n]; for (int i = 0; i < q; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) - parity[q*(W*n+j)+i] = pty[W*i+j].v[n]; + parity[q*(W*n+j)+i] = var[MSG+W*i+j].v[n]; return trials; } }; From 838c02ae9f37eeb18739a2cbf055bc1920509671 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Thu, 3 Oct 2019 22:05:41 +0200 Subject: [PATCH 04/14] merged bad() into update() --- ldpc_decoder.hh | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index 8a6d2df..fdd2893 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -75,38 +75,12 @@ class LDPCDecoder return vreinterpret(vand(vmask(b), vorr(vceqz(a), veor(vcgtz(a), vcltz(b))))); } - bool bad() - { - Loc *lo = loc; - for (int i = 0; i < q; ++i) { - int deg = cnc[i] + 2; - auto res = vmask(vzero()); - for (int j = 0; j < W; ++j) { - TYPE cnv = vdup(1); - for (int k = 0; k < deg; ++k) { - int offset = lo[k].off; - int shift = lo[k].shi; - shift -= csh[offset]; - shift %= D; - TYPE tmp = rotate(var[offset], shift); - if (i == 0 && j == 0 && offset == VAR-1) - tmp.v[0] = 127; - cnv = vsign(cnv, tmp); - } - res = vorr(res, vclez(cnv)); - lo += deg; - } - for (int n = 0; n < D; ++n) - if (res.v[n]) - return true; - } - return false; - } - void update() + bool update() { TYPE *bl = bnl; Loc *lo = loc; wdm_t *wd = wdm; + auto bad = vmask(vzero()); for (int i = 0; i < q; ++i) { int deg = cnc[i] + 2; for (int j = 0; j < W; ++j) { @@ -114,6 +88,7 @@ class LDPCDecoder TYPE min0 = vdup(127); TYPE min1 = vdup(127); TYPE signs = vdup(127); + TYPE cnv = vdup(127); bool write_conflict = false; int last_offset = -1; int8_t prev_val = 0; @@ -162,6 +137,8 @@ class LDPCDecoder TYPE tmp = vqadd(inp, out); + cnv = vsign(cnv, tmp); + int offset = lo[k].off; int shift = lo[k].shi; @@ -174,6 +151,8 @@ class LDPCDecoder csh[offset] = shift; } } + bad = vorr(bad, vclez(cnv)); + if (write_conflict) { for (int first = 0, k = 1; k < deg; ++k) { if (lo[first].off != lo[k].off || k == deg-1) { @@ -199,6 +178,10 @@ class LDPCDecoder } //assert(bl <= bnl + BNL); //std::cerr << BNL - (bl - bnl) << std::endl; + for (int n = 0; n < D; ++n) + if (bad.v[n]) + return true; + return false; } public: LDPCDecoder() @@ -276,10 +259,12 @@ public: for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) var[MSG+W*i+j].v[n] = parity[q*(W*n+j)+i]; - while (bad() && --trials >= 0) - update(); + + while (--trials >= 0 && update()); + for (int i = 0; i < VAR; ++i) var[i] = rotate(var[i], -csh[i]); + for (int i = 0; i < K/M; ++i) for (int j = 0; j < W; ++j) for (int n = 0; n < D; ++n) From 79ddb18876bbcf8f5279db7e83dace077b407a6c Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 5 Nov 2019 09:05:57 +0100 Subject: [PATCH 05/14] simplified by removing loop over blocks --- ldpc_decoder.hh | 174 ++++++++++++++++++++++++------------------------ 1 file changed, 88 insertions(+), 86 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index fdd2893..1679011 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -52,7 +52,7 @@ class LDPCDecoder Loc loc[LOC]; wdm_t wdm[PTY]; int16_t csh[VAR]; - uint8_t cnc[q]; + uint8_t cnt[PTY]; static TYPE eor(TYPE a, TYPE b) { @@ -81,100 +81,98 @@ class LDPCDecoder Loc *lo = loc; wdm_t *wd = wdm; auto bad = vmask(vzero()); - for (int i = 0; i < q; ++i) { - int deg = cnc[i] + 2; - for (int j = 0; j < W; ++j) { - TYPE mags[deg], inps[deg]; - TYPE min0 = vdup(127); - TYPE min1 = vdup(127); - TYPE signs = vdup(127); - TYPE cnv = vdup(127); - bool write_conflict = false; - int last_offset = -1; - int8_t prev_val = 0; + for (int i = 0; i < PTY; ++i) { + int deg = cnt[i]; + TYPE mags[deg], inps[deg]; + TYPE min0 = vdup(127); + TYPE min1 = vdup(127); + TYPE signs = vdup(127); + TYPE cnv = vdup(127); + bool write_conflict = false; + int last_offset = -1; + int8_t prev_val = 0; - for (int k = 0; k < deg; ++k) { - int offset = lo[k].off; - int shift = lo[k].shi; - shift -= csh[offset]; - shift %= D; - TYPE tmp = rotate(var[offset], shift); - if (i == 0 && j == 0 && offset == VAR-1) { - prev_val = tmp.v[0]; - tmp.v[0] = 127; - } - - if (last_offset == offset) - write_conflict = true; - last_offset = offset; - - TYPE inp = vqsub(tmp, bl[k]); - - TYPE mag = vqabs(inp); - - if (BETA) { - auto beta = vunsigned(vdup(BETA)); - mag = vsigned(vqsub(vunsigned(mag), beta)); - } - - min1 = vmin(min1, vmax(min0, mag)); - min0 = vmin(min0, mag); - - signs = eor(signs, inp); - - inps[k] = inp; - mags[k] = mag; + for (int k = 0; k < deg; ++k) { + int offset = lo[k].off; + int shift = lo[k].shi; + shift -= csh[offset]; + shift %= D; + TYPE tmp = rotate(var[offset], shift); + if (i == 0 && offset == VAR-1) { + prev_val = tmp.v[0]; + tmp.v[0] = 127; } - for (int k = 0; k < deg; ++k) { - TYPE mag = mags[k]; - TYPE inp = inps[k]; - TYPE out = vsign(other(mag, min0, min1), mine(signs, inp)); + if (last_offset == offset) + write_conflict = true; + last_offset = offset; - out = vclamp(out, -32, 31); + TYPE inp = vqsub(tmp, bl[k]); - out = selfcorr(bl[k], out); + TYPE mag = vqabs(inp); - TYPE tmp = vqadd(inp, out); - - cnv = vsign(cnv, tmp); - - int offset = lo[k].off; - int shift = lo[k].shi; - - if (i == 0 && j == 0 && offset == VAR-1) - tmp.v[0] = prev_val; - - if (!write_conflict || !((*wd>>k)&1)) { - bl[k] = out; - var[offset] = tmp; - csh[offset] = shift; - } + if (BETA) { + auto beta = vunsigned(vdup(BETA)); + mag = vsigned(vqsub(vunsigned(mag), beta)); } - bad = vorr(bad, vclez(cnv)); - if (write_conflict) { - for (int first = 0, k = 1; k < deg; ++k) { - if (lo[first].off != lo[k].off || k == deg-1) { - int last = k - 1; - if (k == deg-1) - ++last; - if (last != first) { - int count = last - first + 1; - wdm_t mask = ((1 << count) - 1) << first; - wdm_t cur = *wd; - wdm_t tmp = cur & mask; - wdm_t ror = (tmp >> 1) | (tmp << (count-1)); - *wd = (cur & ~mask) | (ror & mask); - } - first = k; - } - } - ++wd; - } - lo += deg; - bl += deg; + min1 = vmin(min1, vmax(min0, mag)); + min0 = vmin(min0, mag); + + signs = eor(signs, inp); + + inps[k] = inp; + mags[k] = mag; } + for (int k = 0; k < deg; ++k) { + TYPE mag = mags[k]; + TYPE inp = inps[k]; + + TYPE out = vsign(other(mag, min0, min1), mine(signs, inp)); + + out = vclamp(out, -32, 31); + + out = selfcorr(bl[k], out); + + TYPE tmp = vqadd(inp, out); + + cnv = vsign(cnv, tmp); + + int offset = lo[k].off; + int shift = lo[k].shi; + + if (i == 0 && offset == VAR-1) + tmp.v[0] = prev_val; + + if (!write_conflict || !((*wd>>k)&1)) { + bl[k] = out; + var[offset] = tmp; + csh[offset] = shift; + } + } + bad = vorr(bad, vclez(cnv)); + + if (write_conflict) { + for (int first = 0, k = 1; k < deg; ++k) { + if (lo[first].off != lo[k].off || k == deg-1) { + int last = k - 1; + if (k == deg-1) + ++last; + if (last != first) { + int count = last - first + 1; + wdm_t mask = ((1 << count) - 1) << first; + wdm_t cur = *wd; + wdm_t tmp = cur & mask; + wdm_t ror = (tmp >> 1) | (tmp << (count-1)); + *wd = (cur & ~mask) | (ror & mask); + } + first = k; + } + } + ++wd; + } + lo += deg; + bl += deg; } //assert(bl <= bnl + BNL); //std::cerr << BNL - (bl - bnl) << std::endl; @@ -187,6 +185,7 @@ public: LDPCDecoder() { uint16_t pos[q * CNC]; + uint8_t cnc[q]; for (int i = 0; i < q; ++i) cnc[i] = 0; int bit_pos = 0; @@ -203,6 +202,9 @@ public: bit_pos += M; } } + for (int i = 0; i < q; ++i) + for (int j = 0; j < W; ++j) + cnt[W*i+j] = cnc[i] + 2; Loc *lo = loc; wdm_t *wd = wdm; for (int i = 0; i < q; ++i) { From b6313b90873994cf97bbd418029f4f0eba352900 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 5 Nov 2019 09:44:37 +0100 Subject: [PATCH 06/14] added code to print offset and shift tables --- ldpc_decoder.hh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index 1679011..c00f04e 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -241,6 +241,14 @@ public: tmp |= 1 << d; if (tmp) *wd++ = tmp; + +#if 0 + std::cout << deg; + for (int d = 0; d < deg; ++d) + std::cout << '\t' << (int)lo[d].off << ':' << (int)lo[d].shi; + std::cout << std::endl; +#endif + lo += deg; } } From 1eef4c3df5689647a04b9b217b80c2e788f84cfb Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 5 Nov 2019 09:59:55 +0100 Subject: [PATCH 07/14] detect special parity word using offset and shift --- ldpc_decoder.hh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index c00f04e..b02d090 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -95,10 +95,9 @@ class LDPCDecoder for (int k = 0; k < deg; ++k) { int offset = lo[k].off; int shift = lo[k].shi; - shift -= csh[offset]; - shift %= D; - TYPE tmp = rotate(var[offset], shift); - if (i == 0 && offset == VAR-1) { + int dshift = (shift - csh[offset]) % D; + TYPE tmp = rotate(var[offset], dshift); + if (offset == VAR-1 && shift == 1) { prev_val = tmp.v[0]; tmp.v[0] = 127; } @@ -141,7 +140,7 @@ class LDPCDecoder int offset = lo[k].off; int shift = lo[k].shi; - if (i == 0 && offset == VAR-1) + if (offset == VAR-1 && shift == 1) tmp.v[0] = prev_val; if (!write_conflict || !((*wd>>k)&1)) { From 3b5a4088fc1c91777a329d3d385d9bb663bedf45 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Wed, 6 Nov 2019 17:43:12 +0100 Subject: [PATCH 08/14] create write disable mask for next visit on the fly --- ldpc_decoder.hh | 48 +++++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index b02d090..bbef88e 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -79,7 +79,6 @@ class LDPCDecoder { TYPE *bl = bnl; Loc *lo = loc; - wdm_t *wd = wdm; auto bad = vmask(vzero()); for (int i = 0; i < PTY; ++i) { int deg = cnt[i]; @@ -88,8 +87,9 @@ class LDPCDecoder TYPE min1 = vdup(127); TYPE signs = vdup(127); TYPE cnv = vdup(127); - bool write_conflict = false; - int last_offset = -1; + wdm_t first_wdb = 0; + wdm_t next_wdm = 0; + int last_offset = 0; int8_t prev_val = 0; for (int k = 0; k < deg; ++k) { @@ -102,8 +102,17 @@ class LDPCDecoder tmp.v[0] = 127; } - if (last_offset == offset) - write_conflict = true; + wdm_t this_wdb = (wdm[i]>>k)&1; + if (k) { + if (last_offset == offset) { + next_wdm |= this_wdb<<(k-1); + } else { + next_wdm |= first_wdb<<(k-1); + first_wdb = this_wdb; + } + } else { + first_wdb = this_wdb; + } last_offset = offset; TYPE inp = vqsub(tmp, bl[k]); @@ -143,33 +152,14 @@ class LDPCDecoder if (offset == VAR-1 && shift == 1) tmp.v[0] = prev_val; - if (!write_conflict || !((*wd>>k)&1)) { + if (!((wdm[i]>>k)&1)) { bl[k] = out; var[offset] = tmp; csh[offset] = shift; } } bad = vorr(bad, vclez(cnv)); - - if (write_conflict) { - for (int first = 0, k = 1; k < deg; ++k) { - if (lo[first].off != lo[k].off || k == deg-1) { - int last = k - 1; - if (k == deg-1) - ++last; - if (last != first) { - int count = last - first + 1; - wdm_t mask = ((1 << count) - 1) << first; - wdm_t cur = *wd; - wdm_t tmp = cur & mask; - wdm_t ror = (tmp >> 1) | (tmp << (count-1)); - *wd = (cur & ~mask) | (ror & mask); - } - first = k; - } - } - ++wd; - } + wdm[i] = next_wdm; lo += deg; bl += deg; } @@ -205,7 +195,6 @@ public: for (int j = 0; j < W; ++j) cnt[W*i+j] = cnc[i] + 2; Loc *lo = loc; - wdm_t *wd = wdm; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; int deg = cnt + 2; @@ -238,16 +227,13 @@ public: for (int d = 0; d < deg-1; ++d) if (lo[d].off == lo[d+1].off) tmp |= 1 << d; - if (tmp) - *wd++ = tmp; - + wdm[W*i+j] = tmp; #if 0 std::cout << deg; for (int d = 0; d < deg; ++d) std::cout << '\t' << (int)lo[d].off << ':' << (int)lo[d].shi; std::cout << std::endl; #endif - lo += deg; } } From da9d47b95d0cd7de3f1292ea188de8ea74befae3 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Wed, 6 Nov 2019 21:04:55 +0100 Subject: [PATCH 09/14] don't forget the last bit, even though last two are parity vectors --- ldpc_decoder.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index bbef88e..d66be23 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -132,6 +132,7 @@ class LDPCDecoder inps[k] = inp; mags[k] = mag; } + next_wdm |= first_wdb<<(deg-1); for (int k = 0; k < deg; ++k) { TYPE mag = mags[k]; TYPE inp = inps[k]; From 5ab424e0245f6d5b5d8cbe976b50a579828415d3 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Tue, 12 Nov 2019 11:32:30 +0100 Subject: [PATCH 10/14] output write disable mask in binary form --- ldpc_decoder.hh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index d66be23..8b3c86b 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -8,6 +8,7 @@ Copyright 2018 Ahmet Inan #define LDPC_DECODER_HH #include +//#include #include "simd.hh" #include "rotate.hh" @@ -231,6 +232,7 @@ public: wdm[W*i+j] = tmp; #if 0 std::cout << deg; + std::cout << '\t' << std::bitset(tmp); for (int d = 0; d < deg; ++d) std::cout << '\t' << (int)lo[d].off << ':' << (int)lo[d].shi; std::cout << std::endl; From 4054363b2e42821b3f5cac4668a81eff84be861f Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Fri, 15 Nov 2019 22:22:01 +0100 Subject: [PATCH 11/14] moved write disable bit into Loc struct --- ldpc_decoder.hh | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index 8b3c86b..c0b3c39 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -8,7 +8,6 @@ Copyright 2018 Ahmet Inan #define LDPC_DECODER_HH #include -//#include #include "simd.hh" #include "rotate.hh" @@ -43,15 +42,12 @@ class LDPCDecoder static const int LOC = (TABLE::LINKS_TOTAL + D-1) / D; typedef SIMD TYPE; - typedef struct { uint16_t off; uint16_t shi; } Loc; - typedef uint32_t wdm_t; - static_assert(sizeof(wdm_t) * 8 >= TABLE::LINKS_MAX_CN, "write disable mask needs at least as many bits as max check node links"); + typedef struct { uint16_t off; uint16_t shi; bool wd; } Loc; Rotate rotate; TYPE bnl[BNL]; TYPE var[VAR]; Loc loc[LOC]; - wdm_t wdm[PTY]; int16_t csh[VAR]; uint8_t cnt[PTY]; @@ -88,8 +84,7 @@ class LDPCDecoder TYPE min1 = vdup(127); TYPE signs = vdup(127); TYPE cnv = vdup(127); - wdm_t first_wdb = 0; - wdm_t next_wdm = 0; + bool first_wd; int last_offset = 0; int8_t prev_val = 0; @@ -103,19 +98,6 @@ class LDPCDecoder tmp.v[0] = 127; } - wdm_t this_wdb = (wdm[i]>>k)&1; - if (k) { - if (last_offset == offset) { - next_wdm |= this_wdb<<(k-1); - } else { - next_wdm |= first_wdb<<(k-1); - first_wdb = this_wdb; - } - } else { - first_wdb = this_wdb; - } - last_offset = offset; - TYPE inp = vqsub(tmp, bl[k]); TYPE mag = vqabs(inp); @@ -133,7 +115,6 @@ class LDPCDecoder inps[k] = inp; mags[k] = mag; } - next_wdm |= first_wdb<<(deg-1); for (int k = 0; k < deg; ++k) { TYPE mag = mags[k]; TYPE inp = inps[k]; @@ -154,14 +135,26 @@ class LDPCDecoder if (offset == VAR-1 && shift == 1) tmp.v[0] = prev_val; - if (!((wdm[i]>>k)&1)) { + bool this_wd = lo[k].wd; + if (!this_wd) { bl[k] = out; var[offset] = tmp; csh[offset] = shift; } + if (k) { + if (last_offset == offset) { + lo[k-1].wd = this_wd; + } else { + lo[k-1].wd = first_wd; + first_wd = this_wd; + } + } else { + first_wd = this_wd; + } + last_offset = offset; } + lo[deg-1].wd = first_wd; bad = vorr(bad, vclez(cnv)); - wdm[i] = next_wdm; lo += deg; bl += deg; } @@ -225,16 +218,13 @@ public: lo[cnt+1].shi = 0; std::sort(lo, lo + deg, [](const Loc &a, const Loc &b){ return a.off < b.off; }); - wdm_t tmp = 0; for (int d = 0; d < deg-1; ++d) - if (lo[d].off == lo[d+1].off) - tmp |= 1 << d; - wdm[W*i+j] = tmp; + lo[d].wd = lo[d].off == lo[d+1].off; + lo[deg-1].wd = false; #if 0 std::cout << deg; - std::cout << '\t' << std::bitset(tmp); for (int d = 0; d < deg; ++d) - std::cout << '\t' << (int)lo[d].off << ':' << (int)lo[d].shi; + std::cout << ' ' << (int)lo[d].off << ':' << (int)lo[d].shi << ':' << lo[d].wd; std::cout << std::endl; #endif lo += deg; From 19a18d52e78f5c813e663f974f62efb64505a084 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Sat, 16 Nov 2019 00:43:48 +0100 Subject: [PATCH 12/14] better keep RW wd separate from RO loc --- ldpc_decoder.hh | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index c0b3c39..ce3d45a 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -38,16 +38,16 @@ class LDPCDecoder static const int MSG = K/D; static const int VAR = N/D; static const int CNC = TABLE::LINKS_MAX_CN - 2; - static const int BNL = (TABLE::LINKS_TOTAL + D-1) / D; - static const int LOC = (TABLE::LINKS_TOTAL + D-1) / D; + static const int BNL = (TABLE::LINKS_TOTAL + 1) / D; typedef SIMD TYPE; - typedef struct { uint16_t off; uint16_t shi; bool wd; } Loc; + typedef struct { uint16_t off; uint16_t shi; } Loc; Rotate rotate; TYPE bnl[BNL]; TYPE var[VAR]; - Loc loc[LOC]; + Loc loc[BNL]; + bool wds[BNL]; int16_t csh[VAR]; uint8_t cnt[PTY]; @@ -76,6 +76,7 @@ class LDPCDecoder { TYPE *bl = bnl; Loc *lo = loc; + bool *wd = wds; auto bad = vmask(vzero()); for (int i = 0; i < PTY; ++i) { int deg = cnt[i]; @@ -135,7 +136,7 @@ class LDPCDecoder if (offset == VAR-1 && shift == 1) tmp.v[0] = prev_val; - bool this_wd = lo[k].wd; + bool this_wd = wd[k]; if (!this_wd) { bl[k] = out; var[offset] = tmp; @@ -143,9 +144,9 @@ class LDPCDecoder } if (k) { if (last_offset == offset) { - lo[k-1].wd = this_wd; + wd[k-1] = this_wd; } else { - lo[k-1].wd = first_wd; + wd[k-1] = first_wd; first_wd = this_wd; } } else { @@ -153,10 +154,11 @@ class LDPCDecoder } last_offset = offset; } - lo[deg-1].wd = first_wd; + wd[deg-1] = first_wd; bad = vorr(bad, vclez(cnv)); lo += deg; bl += deg; + wd += deg; } //assert(bl <= bnl + BNL); //std::cerr << BNL - (bl - bnl) << std::endl; @@ -190,6 +192,7 @@ public: for (int j = 0; j < W; ++j) cnt[W*i+j] = cnc[i] + 2; Loc *lo = loc; + bool *wd = wds; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; int deg = cnt + 2; @@ -219,19 +222,20 @@ public: std::sort(lo, lo + deg, [](const Loc &a, const Loc &b){ return a.off < b.off; }); for (int d = 0; d < deg-1; ++d) - lo[d].wd = lo[d].off == lo[d+1].off; - lo[deg-1].wd = false; + wd[d] = lo[d].off == lo[d+1].off; + wd[deg-1] = false; #if 0 std::cout << deg; for (int d = 0; d < deg; ++d) - std::cout << ' ' << (int)lo[d].off << ':' << (int)lo[d].shi << ':' << lo[d].wd; + std::cout << ' ' << (int)lo[d].off << ':' << (int)lo[d].shi << ':' << wd[d]; std::cout << std::endl; #endif lo += deg; + wd += deg; } } - //assert(lo <= loc + LOC); - //std::cerr << LOC - (lo - loc) << std::endl; + //assert(lo <= loc + BNL); + //std::cerr << BNL - (lo - loc) << std::endl; } int operator()(int8_t *message, int8_t *parity, int trials = 25) { From 97dd7e22265e710727866fc434425281d705b8f8 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Sat, 16 Nov 2019 09:17:35 +0100 Subject: [PATCH 13/14] initialize bit node links on first iteration --- ldpc_decoder.hh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index ce3d45a..17f334b 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -50,6 +50,7 @@ class LDPCDecoder bool wds[BNL]; int16_t csh[VAR]; uint8_t cnt[PTY]; + bool start; static TYPE eor(TYPE a, TYPE b) { @@ -100,6 +101,8 @@ class LDPCDecoder } TYPE inp = vqsub(tmp, bl[k]); + if (start) + inp = tmp; TYPE mag = vqabs(inp); @@ -124,7 +127,8 @@ class LDPCDecoder out = vclamp(out, -32, 31); - out = selfcorr(bl[k], out); + if (!start) + out = selfcorr(bl[k], out); TYPE tmp = vqadd(inp, out); @@ -141,6 +145,8 @@ class LDPCDecoder bl[k] = out; var[offset] = tmp; csh[offset] = shift; + } else if (start) { + bl[k] = vzero(); } if (k) { if (last_offset == offset) { @@ -239,8 +245,6 @@ public: } int operator()(int8_t *message, int8_t *parity, int trials = 25) { - for (int i = 0; i < BNL; ++i) - bnl[i] = vzero(); for (int i = 0; i < VAR; ++i) csh[i] = 0; for (int i = 0; i < K/M; ++i) @@ -252,7 +256,9 @@ public: for (int n = 0; n < D; ++n) var[MSG+W*i+j].v[n] = parity[q*(W*n+j)+i]; - while (--trials >= 0 && update()); + start = true; + while (--trials >= 0 && update()) + start = false; for (int i = 0; i < VAR; ++i) var[i] = rotate(var[i], -csh[i]); From 59b10af83578773ca75b42d63eb0972e57772c62 Mon Sep 17 00:00:00 2001 From: Ahmet Inan Date: Sat, 16 Nov 2019 09:40:17 +0100 Subject: [PATCH 14/14] initialize write disable flags on first iteration --- ldpc_decoder.hh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ldpc_decoder.hh b/ldpc_decoder.hh index 17f334b..dac88c8 100644 --- a/ldpc_decoder.hh +++ b/ldpc_decoder.hh @@ -141,6 +141,12 @@ class LDPCDecoder tmp.v[0] = prev_val; bool this_wd = wd[k]; + if (start) { + if (k) + this_wd = offset == last_offset; + else + this_wd = false; + } if (!this_wd) { bl[k] = out; var[offset] = tmp; @@ -149,12 +155,12 @@ class LDPCDecoder bl[k] = vzero(); } if (k) { - if (last_offset == offset) { - wd[k-1] = this_wd; - } else { - wd[k-1] = first_wd; + bool next_wd = this_wd; + if (last_offset != offset) { + next_wd = first_wd; first_wd = this_wd; } + wd[k-1] = next_wd; } else { first_wd = this_wd; } @@ -198,7 +204,6 @@ public: for (int j = 0; j < W; ++j) cnt[W*i+j] = cnc[i] + 2; Loc *lo = loc; - bool *wd = wds; for (int i = 0; i < q; ++i) { int cnt = cnc[i]; int deg = cnt + 2; @@ -227,17 +232,13 @@ public: lo[cnt+1].shi = 0; std::sort(lo, lo + deg, [](const Loc &a, const Loc &b){ return a.off < b.off; }); - for (int d = 0; d < deg-1; ++d) - wd[d] = lo[d].off == lo[d+1].off; - wd[deg-1] = false; #if 0 std::cout << deg; for (int d = 0; d < deg; ++d) - std::cout << ' ' << (int)lo[d].off << ':' << (int)lo[d].shi << ':' << wd[d]; + std::cout << '\t' << (int)lo[d].off << ':' << (int)lo[d].shi; std::cout << std::endl; #endif lo += deg; - wd += deg; } } //assert(lo <= loc + BNL);