Add forward error correction code to the udp sender/receiver.

[paraslash.git] / fec.c
diff --git a/fec.c b/fec.c

new file mode 100644 (file)

index 0000000..e543aed
--- /dev/null
+++ b/fec.c
@@ -0,0 +1,598 @@
+/*
+ * fec.c -- forward error correction based on Vandermonde matrices
+ * 980624
+ * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
+ *
+ * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
+ * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
+ * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#include "para.h"
+#include "error.h"
+#include "portable_io.h"
+#include "string.h"
+#include "fec.h"
+
+#define GF_BITS  8 /* code over GF(256) */
+#define        GF_SIZE ((1 << GF_BITS) - 1)
+
+/*
+ * To speed up computations, we have tables for logarithm, exponent and inverse
+ * of a number. We use a table for multiplication as well (it takes 64K, no big
+ * deal even on a PDA, especially because it can be pre-initialized an put into
+ * a ROM!). The macro gf_mul(x,y) takes care of multiplications.
+ */
+static unsigned char gf_exp[2 * GF_SIZE]; /* index->poly form conversion table */
+static int gf_log[GF_SIZE + 1];        /* Poly->index form conversion table    */
+static unsigned char inverse[GF_SIZE + 1]; /* inverse of field elem. */
+static unsigned char gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
+/* Multiply two numbers. */
+#define gf_mul(x,y) gf_mul_table[x][y]
+
+/* Compute x % GF_SIZE without a slow divide. */
+static inline unsigned char modnn(int x)
+{
+       while (x >= GF_SIZE) {
+               x -= GF_SIZE;
+               x = (x >> GF_BITS) + (x & GF_SIZE);
+       }
+       return x;
+}
+
+static void init_mul_table(void)
+{
+       int i, j;
+       for (i = 0; i < GF_SIZE + 1; i++)
+               for (j = 0; j < GF_SIZE + 1; j++)
+                       gf_mul_table[i][j] =
+                               gf_exp[modnn(gf_log[i] + gf_log[j])];
+
+       for (j = 0; j < GF_SIZE + 1; j++)
+               gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
+}
+
+static unsigned char *alloc_matrix(int rows, int cols)
+{
+       return para_malloc(rows * cols);
+}
+
+/*
+ * Initialize the data structures used for computations in GF.
+ *
+ * This generates GF(2**GF_BITS) from the irreducible polynomial p(X) in
+ * p[0]..p[m].
+ *
+ * Lookup tables:
+ *     index->polynomial form          gf_exp[] contains j= \alpha^i;
+ *     polynomial form -> index form   gf_log[ j = \alpha^i ] = i
+ * \alpha=x is the primitive element of GF(2^m)
+ *
+ * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
+ * multiplication of two numbers can be resolved without calling modnn
+ */
+static void generate_gf(void)
+{
+       int i;
+       unsigned char mask = 1;
+       char *pp = "101110001"; /* The primitive polynomial 1+x^2+x^3+x^4+x^8 */
+       gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
+
+       /*
+        * first, generate the (polynomial representation of) powers of \alpha,
+        * which are stored in gf_exp[i] = \alpha ** i .
+        * At the same time build gf_log[gf_exp[i]] = i .
+        * The first GF_BITS powers are simply bits shifted to the left.
+        */
+       for (i = 0; i < GF_BITS; i++, mask <<= 1) {
+               gf_exp[i] = mask;
+               gf_log[gf_exp[i]] = i;
+               /*
+                * If pp[i] == 1 then \alpha ** i occurs in poly-repr
+                * gf_exp[GF_BITS] = \alpha ** GF_BITS
+                */
+               if (pp[i] == '1')
+                       gf_exp[GF_BITS] ^= mask;
+       }
+       /*
+        * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can also
+        * compute its inverse.
+        */
+       gf_log[gf_exp[GF_BITS]] = GF_BITS;
+       /*
+        * Poly-repr of \alpha ** (i+1) is given by poly-repr of \alpha ** i
+        * shifted left one-bit and accounting for any \alpha ** GF_BITS term
+        * that may occur when poly-repr of \alpha ** i is shifted.
+        */
+       mask = 1 << (GF_BITS - 1);
+       for (i = GF_BITS + 1; i < GF_SIZE; i++) {
+               if (gf_exp[i - 1] >= mask)
+                       gf_exp[i] =
+                               gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
+               else
+                       gf_exp[i] = gf_exp[i - 1] << 1;
+               gf_log[gf_exp[i]] = i;
+       }
+       /*
+        * log(0) is not defined, so use a special value
+        */
+       gf_log[0] = GF_SIZE;
+       /* set the extended gf_exp values for fast multiply */
+       for (i = 0; i < GF_SIZE; i++)
+               gf_exp[i + GF_SIZE] = gf_exp[i];
+
+       inverse[0] = 0; /* 0 has no inverse. */
+       inverse[1] = 1;
+       for (i = 2; i <= GF_SIZE; i++)
+               inverse[i] = gf_exp[GF_SIZE - gf_log[i]];
+}
+
+/*
+ * Compute dst[] = dst[] + c * src[]
+ *
+ * This is used often, so better optimize it! Currently the loop is unrolled 16
+ * times. The case c=0 is also optimized, whereas c=1 is not.
+ */
+#define UNROLL 16
+static void addmul(unsigned char *dst1, const unsigned char const *src1,
+       unsigned char c, int sz)
+{
+       if (c == 0)
+               return;
+       unsigned char *dst = dst1, *lim = &dst[sz - UNROLL + 1],
+               *col = gf_mul_table[c];
+       const unsigned char const *src = src1;
+
+       for (; dst < lim; dst += UNROLL, src += UNROLL) {
+               dst[0] ^= col[src[0]];
+               dst[1] ^= col[src[1]];
+               dst[2] ^= col[src[2]];
+               dst[3] ^= col[src[3]];
+               dst[4] ^= col[src[4]];
+               dst[5] ^= col[src[5]];
+               dst[6] ^= col[src[6]];
+               dst[7] ^= col[src[7]];
+               dst[8] ^= col[src[8]];
+               dst[9] ^= col[src[9]];
+               dst[10] ^= col[src[10]];
+               dst[11] ^= col[src[11]];
+               dst[12] ^= col[src[12]];
+               dst[13] ^= col[src[13]];
+               dst[14] ^= col[src[14]];
+               dst[15] ^= col[src[15]];
+       }
+       lim += UNROLL - 1;
+       for (; dst < lim; dst++, src++) /* final components */
+               *dst ^= col[*src];
+}
+
+/*
+ * Compute C = AB where A is n*k, B is k*m, C is n*m
+ */
+static void matmul(unsigned char *a, unsigned char *b, unsigned char *c,
+               int n, int k, int m)
+{
+       int row, col, i;
+
+       for (row = 0; row < n; row++) {
+               for (col = 0; col < m; col++) {
+                       unsigned char *pa = &a[row * k], *pb = &b[col], acc = 0;
+                       for (i = 0; i < k; i++, pa++, pb += m)
+                               acc ^= gf_mul(*pa, *pb);
+                       c[row * m + col] = acc;
+               }
+       }
+}
+
+#define FEC_SWAP(a,b) {typeof(a) tmp = a; a = b; b = tmp;}
+
+/*
+ * Compute the inverse of a matrix.
+ *
+ * k is the size of the matrix 'src' (Gauss-Jordan, adapted from Numerical
+ * Recipes in C). Returns -1 if 'src' is singular.
+ */
+static int invert_mat(unsigned char *src, int k)
+{
+       int irow, icol, row, col, ix, error;
+       int *indxc = para_malloc(k * sizeof(int));
+       int *indxr = para_malloc(k * sizeof(int));
+       int *ipiv = para_malloc(k * sizeof(int)); /* elements used as pivots */
+       unsigned char c, *p, *id_row = alloc_matrix(1, k),
+               *temp_row = alloc_matrix(1, k);
+
+       memset(id_row, 0, k);
+       memset(ipiv, 0, k * sizeof(int));
+
+       for (col = 0; col < k; col++) {
+               unsigned char *pivot_row;
+               /*
+                * Zeroing column 'col', look for a non-zero element.
+                * First try on the diagonal, if it fails, look elsewhere.
+                */
+               irow = icol = -1;
+               if (ipiv[col] != 1 && src[col * k + col] != 0) {
+                       irow = col;
+                       icol = col;
+                       goto found_piv;
+               }
+               for (row = 0; row < k; row++) {
+                       if (ipiv[row] != 1) {
+                               for (ix = 0; ix < k; ix++) {
+                                       if (ipiv[ix] == 0) {
+                                               if (src[row * k + ix] != 0) {
+                                                       irow = row;
+                                                       icol = ix;
+                                                       goto found_piv;
+                                               }
+                                       } else if (ipiv[ix] > 1) {
+                                               error = -E_FEC_PIVOT;
+                                               goto fail;
+                                       }
+                               }
+                       }
+               }
+               error = -E_FEC_PIVOT;
+               if (icol == -1)
+                       goto fail;
+found_piv:
+               ++(ipiv[icol]);
+               /*
+                * swap rows irow and icol, so afterwards the diagonal element
+                * will be correct. Rarely done, not worth optimizing.
+                */
+               if (irow != icol)
+                       for (ix = 0; ix < k; ix++)
+                               FEC_SWAP(src[irow * k + ix], src[icol * k + ix]);
+               indxr[col] = irow;
+               indxc[col] = icol;
+               pivot_row = &src[icol * k];
+               error = -E_FEC_SINGULAR;
+               c = pivot_row[icol];
+               if (c == 0)
+                       goto fail;
+               if (c != 1) { /* otherwise this is a NOP */
+                       /*
+                        * this is done often , but optimizing is not so
+                        * fruitful, at least in the obvious ways (unrolling)
+                        */
+                       c = inverse[c];
+                       pivot_row[icol] = 1;
+                       for (ix = 0; ix < k; ix++)
+                               pivot_row[ix] = gf_mul(c, pivot_row[ix]);
+               }
+               /*
+                * from all rows, remove multiples of the selected row to zero
+                * the relevant entry (in fact, the entry is not zero because
+                * we know it must be zero).  (Here, if we know that the
+                * pivot_row is the identity, we can optimize the addmul).
+                */
+               id_row[icol] = 1;
+               if (memcmp(pivot_row, id_row, k) != 0) {
+                       for (p = src, ix = 0; ix < k; ix++, p += k) {
+                               if (ix != icol) {
+                                       c = p[icol];
+                                       p[icol] = 0;
+                                       addmul(p, pivot_row, c, k);
+                               }
+                       }
+               }
+               id_row[icol] = 0;
+       }
+       for (col = k - 1; col >= 0; col--) {
+               if (indxr[col] < 0 || indxr[col] >= k)
+                       PARA_CRIT_LOG("AARGH, indxr[col] %d\n", indxr[col]);
+               else if (indxc[col] < 0 || indxc[col] >= k)
+                       PARA_CRIT_LOG("AARGH, indxc[col] %d\n", indxc[col]);
+               else if (indxr[col] != indxc[col]) {
+                       for (row = 0; row < k; row++) {
+                               FEC_SWAP(src[row * k + indxr[col]],
+                                       src[row * k + indxc[col]]);
+                       }
+               }
+       }
+       error = 0;
+fail:
+       free(indxc);
+       free(indxr);
+       free(ipiv);
+       free(id_row);
+       free(temp_row);
+       return error;
+}
+
+/*
+ * Invert a Vandermonde matrix.
+ *
+ * It assumes that the matrix is not singular and _IS_ a Vandermonde matrix.
+ * Only uses the second column of the matrix, containing the p_i's.
+ *
+ * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
+ * revised for GF purposes.
+ */
+static void invert_vdm(unsigned char *src, int k)
+{
+       int i, j, row, col;
+       unsigned char *b, *c, *p, t, xx;
+
+       if (k == 1) /* degenerate */
+               return;
+       /*
+        * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
+        * b holds the coefficient for the matrix inversion
+        */
+       c = para_malloc(k);
+       b = para_malloc(k);
+       p = para_malloc(k);
+
+       for (j = 1, i = 0; i < k; i++, j += k) {
+               c[i] = 0;
+               p[i] = src[j];
+       }
+       /*
+        * construct coeffs recursively. We know c[k] = 1 (implicit) and start
+        * P_0 = x - p_0, then at each stage multiply by x - p_i generating P_i
+        * = x P_{i-1} - p_i P_{i-1} After k steps we are done.
+        */
+       c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */
+       for (i = 1; i < k; i++) {
+               unsigned char p_i = p[i];
+               for (j = k - 1 - (i - 1); j < k - 1; j++)
+                       c[j] ^= gf_mul(p_i, c[j + 1]);
+               c[k - 1] ^= p_i;
+       }
+
+       for (row = 0; row < k; row++) {
+               /*
+                * synthetic division etc.
+                */
+               xx = p[row];
+               t = 1;
+               b[k - 1] = 1; /* this is in fact c[k] */
+               for (i = k - 2; i >= 0; i--) {
+                       b[i] = c[i + 1] ^ gf_mul(xx, b[i + 1]);
+                       t = gf_mul(xx, t) ^ b[i];
+               }
+               for (col = 0; col < k; col++)
+                       src[col * k + row] = gf_mul(inverse[t], b[col]);
+       }
+       free(c);
+       free(b);
+       free(p);
+}
+
+static int fec_initialized;
+
+static void init_fec(void)
+{
+       generate_gf();
+       init_mul_table();
+       fec_initialized = 1;
+}
+
+struct fec_parms {
+       int k, n; /* parameters of the code */
+       unsigned char *enc_matrix;
+};
+
+/**
+ * Deallocate a fec params structure.
+ *
+ * \param p The structure to free.
+ */
+void fec_free(struct fec_parms *p)
+{
+       if (!p)
+               return;
+       free(p->enc_matrix);
+       free(p);
+}
+
+/**
+ * Create a new encoder and return an opaque descriptor to it.
+ *
+ * \param k Number of input slices.
+ * \param n Number of output slices.
+ * \param result On success the Fec descriptor is returned here.
+ *
+ * \return Standard.
+ *
+ * This creates the k*n encoding matrix.  It is computed starting with a
+ * Vandermonde matrix, and then transformed into a systematic matrix.
+ */
+int fec_new(int k, int n, struct fec_parms **result)
+{
+       int row, col;
+       unsigned char *p, *tmp_m;
+       struct fec_parms *parms;
+
+       if (!fec_initialized)
+               init_fec();
+
+       if (k < 1 || k > GF_SIZE + 1 || n > GF_SIZE + 1 || k > n)
+               return -E_FEC_PARMS;
+       parms = para_malloc(sizeof(struct fec_parms));
+       parms->k = k;
+       parms->n = n;
+       parms->enc_matrix = alloc_matrix(n, k);
+       tmp_m = alloc_matrix(n, k);
+       /*
+        * fill the matrix with powers of field elements, starting from 0.
+        * The first row is special, cannot be computed with exp. table.
+        */
+       tmp_m[0] = 1;
+       for (col = 1; col < k; col++)
+               tmp_m[col] = 0;
+       for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k) {
+               for (col = 0; col < k; col++)
+                       p[col] = gf_exp[modnn(row * col)];
+       }
+
+       /*
+        * quick code to build systematic matrix: invert the top
+        * k*k vandermonde matrix, multiply right the bottom n-k rows
+        * by the inverse, and construct the identity matrix at the top.
+        */
+       invert_vdm(tmp_m, k); /* much faster than invert_mat */
+       matmul(tmp_m + k * k, tmp_m, parms->enc_matrix + k * k, n - k, k, k);
+       /*
+        * the upper matrix is I so do not bother with a slow multiply
+        */
+       memset(parms->enc_matrix, 0, k * k);
+       for (p = parms->enc_matrix, col = 0; col < k; col++, p += k + 1)
+               *p = 1;
+       free(tmp_m);
+       *result = parms;
+       return 0;
+}
+
+/**
+ * Compute one encoded slice of the given input.
+ *
+ * \param parms The fec parameters returned earlier by fec_new().
+ * \param src The \a k data slices to encode.
+ * \param dst Result pointer.
+ * \param idx The index of the slice to compute.
+ * \param sz The size of the input data packets.
+ *
+ * Encode the \a k slices of size \a sz given by \a src and store the output
+ * slice number \a idx in \dst.
+ */
+void fec_encode(struct fec_parms *parms, const unsigned char * const *src,
+               unsigned char *dst, int idx, int sz)
+{
+       int i, k = parms->k;
+       unsigned char *p;
+
+       assert(idx <= parms->n);
+
+       if (idx < k) {
+               memcpy(dst, src[idx], sz);
+               return;
+       }
+       p = &(parms->enc_matrix[idx * k]);
+       memset(dst, 0, sz);
+       for (i = 0; i < k; i++)
+               addmul(dst, src[i], p[i], sz);
+}
+
+/* Move src packets in their position. */
+static int shuffle(unsigned char **data, int *idx, int k)
+{
+       int i;
+
+       for (i = 0; i < k;) {
+               if (idx[i] >= k || idx[i] == i)
+                       i++;
+               else { /* put index and data at the right position */
+                       int c = idx[i];
+
+                       if (idx[c] == c) /* conflict */
+                               return -E_FEC_BAD_IDX;
+                       FEC_SWAP(idx[i], idx[c]);
+                       FEC_SWAP(data[i], data[c]);
+               }
+       }
+       return 0;
+}
+
+/*
+ * Construct the decoding matrix given the indices. The encoding matrix must
+ * already be allocated.
+ */
+static int build_decode_matrix(struct fec_parms *parms, int *idx,
+               unsigned char **result)
+{
+       int ret = -E_FEC_BAD_IDX, i, k = parms->k;
+       unsigned char *p, *matrix = alloc_matrix(k, k);
+
+       for (i = 0, p = matrix; i < k; i++, p += k) {
+               if (idx[i] >= parms->n) /* invalid index */
+                       goto err;
+               if (idx[i] < k) {
+                       memset(p, 0, k);
+                       p[i] = 1;
+               } else
+                       memcpy(p, &(parms->enc_matrix[idx[i] * k]), k);
+       }
+       ret = invert_mat(matrix, k);
+       if (ret < 0)
+               goto err;
+       *result = matrix;
+       return 0;
+err:
+       free(matrix);
+       *result = NULL;
+       return ret;
+}
+
+/**
+ * Decode one slice from the group of received slices.
+ *
+ * \param code Pointer to fec params structure.
+ * \param data Pointers to received packets.
+ * \param idx Pointer to packet indices (gets modified).
+ * \param sz Size of each packet.
+ *
+ * \return Zero on success, -1 on errors.
+ *
+ * The \a data vector of received slices and the indices of slices are used to
+ * produce the correct output slice. The data slices are modified in-place.
+ */
+int fec_decode(struct fec_parms *parms, unsigned char **data, int *idx,
+               int sz)
+{
+       unsigned char *m_dec, **slice;
+       int ret, row, col, k = parms->k;
+
+       ret = shuffle(data, idx, k);
+       if (ret < 0)
+               return ret;
+       ret = build_decode_matrix(parms, idx, &m_dec);
+       if (ret < 0)
+               return ret;
+       /* do the actual decoding */
+       slice = para_malloc(k * sizeof(unsigned char *));
+       for (row = 0; row < k; row++) {
+               if (idx[row] >= k) {
+                       slice[row] = para_calloc(sz);
+                       for (col = 0; col < k; col++)
+                               addmul(slice[row], data[col],
+                                       m_dec[row * k + col], sz);
+               }
+       }
+       /* move slices to their final destination */
+       for (row = 0; row < k; row++) {
+               if (idx[row] >= k) {
+                       memcpy(data[row], slice[row], sz);
+                       free(slice[row]);
+               }
+       }
+       free(slice);
+       free(m_dec);
+       return 0;
+}