fec.c

   1 /*
   2  * fec.c -- forward error correction based on Vandermonde matrices
   3  * 980624
   4  * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
   5  *
   6  * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
   7  * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
   8  * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  *
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above
  17  *    copyright notice, this list of conditions and the following
  18  *    disclaimer in the documentation and/or other materials
  19  *    provided with the distribution.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
  25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  26  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  28  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  30  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
  32  * OF SUCH DAMAGE.
  33  */
  34
  35 #include "para.h"
  36 #include "error.h"
  37 #include "portable_io.h"
  38 #include "string.h"
  39 #include "fec.h"
  40
  41 #define GF_BITS  8 /* code over GF(256) */
  42 #define GF_SIZE ((1 << GF_BITS) - 1)
  43
  44 /*
  45  * To speed up computations, we have tables for logarithm, exponent and inverse
  46  * of a number. We use a table for multiplication as well (it takes 64K, no big
  47  * deal even on a PDA, especially because it can be pre-initialized an put into
  48  * a ROM!). The macro gf_mul(x,y) takes care of multiplications.
  49  */
  50 static unsigned char gf_exp[2 * GF_SIZE]; /* index->poly form conversion table */
  51 static int gf_log[GF_SIZE + 1]; /* Poly->index form conversion table    */
  52 static unsigned char inverse[GF_SIZE + 1]; /* inverse of field elem. */
  53 static unsigned char gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
  54 /* Multiply two numbers. */
  55 #define gf_mul(x,y) gf_mul_table[x][y]
  56
  57 /* Compute x % GF_SIZE without a slow divide. */
  58 static inline unsigned char modnn(int x)
  59 {
  60         while (x >= GF_SIZE) {
  61                 x -= GF_SIZE;
  62                 x = (x >> GF_BITS) + (x & GF_SIZE);
  63         }
  64         return x;
  65 }
  66
  67 static void init_mul_table(void)
  68 {
  69         int i, j;
  70         for (i = 0; i < GF_SIZE + 1; i++)
  71                 for (j = 0; j < GF_SIZE + 1; j++)
  72                         gf_mul_table[i][j] =
  73                                 gf_exp[modnn(gf_log[i] + gf_log[j])];
  74
  75         for (j = 0; j < GF_SIZE + 1; j++)
  76                 gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
  77 }
  78
  79 static unsigned char *alloc_matrix(int rows, int cols)
  80 {
  81         return para_malloc(rows * cols);
  82 }
  83
  84 /*
  85  * Initialize the data structures used for computations in GF.
  86  *
  87  * This generates GF(2**GF_BITS) from the irreducible polynomial p(X) in
  88  * p[0]..p[m].
  89  *
  90  * Lookup tables:
  91  *     index->polynomial form           gf_exp[] contains j= \alpha^i;
  92  *     polynomial form -> index form    gf_log[ j = \alpha^i ] = i
  93  * \alpha=x is the primitive element of GF(2^m)
  94  *
  95  * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
  96  * multiplication of two numbers can be resolved without calling modnn
  97  */
  98 static void generate_gf(void)
  99 {
 100         int i;
 101         unsigned char mask = 1;
 102         char *pp = "101110001"; /* The primitive polynomial 1+x^2+x^3+x^4+x^8 */
 103         gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
 104
 105         /*
 106          * first, generate the (polynomial representation of) powers of \alpha,
 107          * which are stored in gf_exp[i] = \alpha ** i .
 108          * At the same time build gf_log[gf_exp[i]] = i .
 109          * The first GF_BITS powers are simply bits shifted to the left.
 110          */
 111         for (i = 0; i < GF_BITS; i++, mask <<= 1) {
 112                 gf_exp[i] = mask;
 113                 gf_log[gf_exp[i]] = i;
 114                 /*
 115                  * If pp[i] == 1 then \alpha ** i occurs in poly-repr
 116                  * gf_exp[GF_BITS] = \alpha ** GF_BITS
 117                  */
 118                 if (pp[i] == '1')
 119                         gf_exp[GF_BITS] ^= mask;
 120         }
 121         /*
 122          * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can also
 123          * compute its inverse.
 124          */
 125         gf_log[gf_exp[GF_BITS]] = GF_BITS;
 126         /*
 127          * Poly-repr of \alpha ** (i+1) is given by poly-repr of \alpha ** i
 128          * shifted left one-bit and accounting for any \alpha ** GF_BITS term
 129          * that may occur when poly-repr of \alpha ** i is shifted.
 130          */
 131         mask = 1 << (GF_BITS - 1);
 132         for (i = GF_BITS + 1; i < GF_SIZE; i++) {
 133                 if (gf_exp[i - 1] >= mask)
 134                         gf_exp[i] =
 135                                 gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
 136                 else
 137                         gf_exp[i] = gf_exp[i - 1] << 1;
 138                 gf_log[gf_exp[i]] = i;
 139         }
 140         /*
 141          * log(0) is not defined, so use a special value
 142          */
 143         gf_log[0] = GF_SIZE;
 144         /* set the extended gf_exp values for fast multiply */
 145         for (i = 0; i < GF_SIZE; i++)
 146                 gf_exp[i + GF_SIZE] = gf_exp[i];
 147
 148         inverse[0] = 0; /* 0 has no inverse. */
 149         inverse[1] = 1;
 150         for (i = 2; i <= GF_SIZE; i++)
 151                 inverse[i] = gf_exp[GF_SIZE - gf_log[i]];
 152 }
 153
 154 /*
 155  * Compute dst[] = dst[] + c * src[]
 156  *
 157  * This is used often, so better optimize it! Currently the loop is unrolled 16
 158  * times. The case c=0 is also optimized, whereas c=1 is not.
 159  */
 160 #define UNROLL 16
 161 static void addmul(unsigned char *dst1, const unsigned char const *src1,
 162         unsigned char c, int sz)
 163 {
 164         if (c == 0)
 165                 return;
 166         unsigned char *dst = dst1, *lim = &dst[sz - UNROLL + 1],
 167                 *col = gf_mul_table[c];
 168         const unsigned char const *src = src1;
 169
 170         for (; dst < lim; dst += UNROLL, src += UNROLL) {
 171                 dst[0] ^= col[src[0]];
 172                 dst[1] ^= col[src[1]];
 173                 dst[2] ^= col[src[2]];
 174                 dst[3] ^= col[src[3]];
 175                 dst[4] ^= col[src[4]];
 176                 dst[5] ^= col[src[5]];
 177                 dst[6] ^= col[src[6]];
 178                 dst[7] ^= col[src[7]];
 179                 dst[8] ^= col[src[8]];
 180                 dst[9] ^= col[src[9]];
 181                 dst[10] ^= col[src[10]];
 182                 dst[11] ^= col[src[11]];
 183                 dst[12] ^= col[src[12]];
 184                 dst[13] ^= col[src[13]];
 185                 dst[14] ^= col[src[14]];
 186                 dst[15] ^= col[src[15]];
 187         }
 188         lim += UNROLL - 1;
 189         for (; dst < lim; dst++, src++) /* final components */
 190                 *dst ^= col[*src];
 191 }
 192
 193 /*
 194  * Compute C = AB where A is n*k, B is k*m, C is n*m
 195  */
 196 static void matmul(unsigned char *a, unsigned char *b, unsigned char *c,
 197                 int n, int k, int m)
 198 {
 199         int row, col, i;
 200
 201         for (row = 0; row < n; row++) {
 202                 for (col = 0; col < m; col++) {
 203                         unsigned char *pa = &a[row * k], *pb = &b[col], acc = 0;
 204                         for (i = 0; i < k; i++, pa++, pb += m)
 205                                 acc ^= gf_mul(*pa, *pb);
 206                         c[row * m + col] = acc;
 207                 }
 208         }
 209 }
 210
 211 #define FEC_SWAP(a,b) {typeof(a) tmp = a; a = b; b = tmp;}
 212
 213 /*
 214  * Compute the inverse of a matrix.
 215  *
 216  * k is the size of the matrix 'src' (Gauss-Jordan, adapted from Numerical
 217  * Recipes in C). Returns -1 if 'src' is singular.
 218  */
 219 static int invert_mat(unsigned char *src, int k)
 220 {
 221         int irow, icol, row, col, ix, error;
 222         int *indxc = para_malloc(k * sizeof(int));
 223         int *indxr = para_malloc(k * sizeof(int));
 224         int *ipiv = para_malloc(k * sizeof(int)); /* elements used as pivots */
 225         unsigned char c, *p, *id_row = alloc_matrix(1, k),
 226                 *temp_row = alloc_matrix(1, k);
 227
 228         memset(id_row, 0, k);
 229         memset(ipiv, 0, k * sizeof(int));
 230
 231         for (col = 0; col < k; col++) {
 232                 unsigned char *pivot_row;
 233                 /*
 234                  * Zeroing column 'col', look for a non-zero element.
 235                  * First try on the diagonal, if it fails, look elsewhere.
 236                  */
 237                 irow = icol = -1;
 238                 if (ipiv[col] != 1 && src[col * k + col] != 0) {
 239                         irow = col;
 240                         icol = col;
 241                         goto found_piv;
 242                 }
 243                 for (row = 0; row < k; row++) {
 244                         if (ipiv[row] != 1) {
 245                                 for (ix = 0; ix < k; ix++) {
 246                                         if (ipiv[ix] == 0) {
 247                                                 if (src[row * k + ix] != 0) {
 248                                                         irow = row;
 249                                                         icol = ix;
 250                                                         goto found_piv;
 251                                                 }
 252                                         } else if (ipiv[ix] > 1) {
 253                                                 error = -E_FEC_PIVOT;
 254                                                 goto fail;
 255                                         }
 256                                 }
 257                         }
 258                 }
 259                 error = -E_FEC_PIVOT;
 260                 if (icol == -1)
 261                         goto fail;
 262 found_piv:
 263                 ++(ipiv[icol]);
 264                 /*
 265                  * swap rows irow and icol, so afterwards the diagonal element
 266                  * will be correct. Rarely done, not worth optimizing.
 267                  */
 268                 if (irow != icol)
 269                         for (ix = 0; ix < k; ix++)
 270                                 FEC_SWAP(src[irow * k + ix], src[icol * k + ix]);
 271                 indxr[col] = irow;
 272                 indxc[col] = icol;
 273                 pivot_row = &src[icol * k];
 274                 error = -E_FEC_SINGULAR;
 275                 c = pivot_row[icol];
 276                 if (c == 0)
 277                         goto fail;
 278                 if (c != 1) { /* otherwise this is a NOP */
 279                         /*
 280                          * this is done often , but optimizing is not so
 281                          * fruitful, at least in the obvious ways (unrolling)
 282                          */
 283                         c = inverse[c];
 284                         pivot_row[icol] = 1;
 285                         for (ix = 0; ix < k; ix++)
 286                                 pivot_row[ix] = gf_mul(c, pivot_row[ix]);
 287                 }
 288                 /*
 289                  * from all rows, remove multiples of the selected row to zero
 290                  * the relevant entry (in fact, the entry is not zero because
 291                  * we know it must be zero).  (Here, if we know that the
 292                  * pivot_row is the identity, we can optimize the addmul).
 293                  */
 294                 id_row[icol] = 1;
 295                 if (memcmp(pivot_row, id_row, k) != 0) {
 296                         for (p = src, ix = 0; ix < k; ix++, p += k) {
 297                                 if (ix != icol) {
 298                                         c = p[icol];
 299                                         p[icol] = 0;
 300                                         addmul(p, pivot_row, c, k);
 301                                 }
 302                         }
 303                 }
 304                 id_row[icol] = 0;
 305         }
 306         for (col = k - 1; col >= 0; col--) {
 307                 if (indxr[col] < 0 || indxr[col] >= k)
 308                         PARA_CRIT_LOG("AARGH, indxr[col] %d\n", indxr[col]);
 309                 else if (indxc[col] < 0 || indxc[col] >= k)
 310                         PARA_CRIT_LOG("AARGH, indxc[col] %d\n", indxc[col]);
 311                 else if (indxr[col] != indxc[col]) {
 312                         for (row = 0; row < k; row++) {
 313                                 FEC_SWAP(src[row * k + indxr[col]],
 314                                         src[row * k + indxc[col]]);
 315                         }
 316                 }
 317         }
 318         error = 0;
 319 fail:
 320         free(indxc);
 321         free(indxr);
 322         free(ipiv);
 323         free(id_row);
 324         free(temp_row);
 325         return error;
 326 }
 327
 328 /*
 329  * Invert a Vandermonde matrix.
 330  *
 331  * It assumes that the matrix is not singular and _IS_ a Vandermonde matrix.
 332  * Only uses the second column of the matrix, containing the p_i's.
 333  *
 334  * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
 335  * revised for GF purposes.
 336  */
 337 static void invert_vdm(unsigned char *src, int k)
 338 {
 339         int i, j, row, col;
 340         unsigned char *b, *c, *p, t, xx;
 341
 342         if (k == 1) /* degenerate */
 343                 return;
 344         /*
 345          * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
 346          * b holds the coefficient for the matrix inversion
 347          */
 348         c = para_malloc(k);
 349         b = para_malloc(k);
 350         p = para_malloc(k);
 351
 352         for (j = 1, i = 0; i < k; i++, j += k) {
 353                 c[i] = 0;
 354                 p[i] = src[j];
 355         }
 356         /*
 357          * construct coeffs recursively. We know c[k] = 1 (implicit) and start
 358          * P_0 = x - p_0, then at each stage multiply by x - p_i generating P_i
 359          * = x P_{i-1} - p_i P_{i-1} After k steps we are done.
 360          */
 361         c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */
 362         for (i = 1; i < k; i++) {
 363                 unsigned char p_i = p[i];
 364                 for (j = k - 1 - (i - 1); j < k - 1; j++)
 365                         c[j] ^= gf_mul(p_i, c[j + 1]);
 366                 c[k - 1] ^= p_i;
 367         }
 368
 369         for (row = 0; row < k; row++) {
 370                 /*
 371                  * synthetic division etc.
 372                  */
 373                 xx = p[row];
 374                 t = 1;
 375                 b[k - 1] = 1; /* this is in fact c[k] */
 376                 for (i = k - 2; i >= 0; i--) {
 377                         b[i] = c[i + 1] ^ gf_mul(xx, b[i + 1]);
 378                         t = gf_mul(xx, t) ^ b[i];
 379                 }
 380                 for (col = 0; col < k; col++)
 381                         src[col * k + row] = gf_mul(inverse[t], b[col]);
 382         }
 383         free(c);
 384         free(b);
 385         free(p);
 386 }
 387
 388 static int fec_initialized;
 389
 390 static void init_fec(void)
 391 {
 392         generate_gf();
 393         init_mul_table();
 394         fec_initialized = 1;
 395 }
 396
 397 struct fec_parms {
 398         int k, n; /* parameters of the code */
 399         unsigned char *enc_matrix;
 400 };
 401
 402 /**
 403  * Deallocate a fec params structure.
 404  *
 405  * \param p The structure to free.
 406  */
 407 void fec_free(struct fec_parms *p)
 408 {
 409         if (!p)
 410                 return;
 411         free(p->enc_matrix);
 412         free(p);
 413 }
 414
 415 /**
 416  * Create a new encoder and return an opaque descriptor to it.
 417  *
 418  * \param k Number of input slices.
 419  * \param n Number of output slices.
 420  * \param result On success the Fec descriptor is returned here.
 421  *
 422  * \return Standard.
 423  *
 424  * This creates the k*n encoding matrix.  It is computed starting with a
 425  * Vandermonde matrix, and then transformed into a systematic matrix.
 426  */
 427 int fec_new(int k, int n, struct fec_parms **result)
 428 {
 429         int row, col;
 430         unsigned char *p, *tmp_m;
 431         struct fec_parms *parms;
 432
 433         if (!fec_initialized)
 434                 init_fec();
 435
 436         if (k < 1 || k > GF_SIZE + 1 || n > GF_SIZE + 1 || k > n)
 437                 return -E_FEC_PARMS;
 438         parms = para_malloc(sizeof(struct fec_parms));
 439         parms->k = k;
 440         parms->n = n;
 441         parms->enc_matrix = alloc_matrix(n, k);
 442         tmp_m = alloc_matrix(n, k);
 443         /*
 444          * fill the matrix with powers of field elements, starting from 0.
 445          * The first row is special, cannot be computed with exp. table.
 446          */
 447         tmp_m[0] = 1;
 448         for (col = 1; col < k; col++)
 449                 tmp_m[col] = 0;
 450         for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k) {
 451                 for (col = 0; col < k; col++)
 452                         p[col] = gf_exp[modnn(row * col)];
 453         }
 454
 455         /*
 456          * quick code to build systematic matrix: invert the top
 457          * k*k vandermonde matrix, multiply right the bottom n-k rows
 458          * by the inverse, and construct the identity matrix at the top.
 459          */
 460         invert_vdm(tmp_m, k); /* much faster than invert_mat */
 461         matmul(tmp_m + k * k, tmp_m, parms->enc_matrix + k * k, n - k, k, k);
 462         /*
 463          * the upper matrix is I so do not bother with a slow multiply
 464          */
 465         memset(parms->enc_matrix, 0, k * k);
 466         for (p = parms->enc_matrix, col = 0; col < k; col++, p += k + 1)
 467                 *p = 1;
 468         free(tmp_m);
 469         *result = parms;
 470         return 0;
 471 }
 472
 473 /**
 474  * Compute one encoded slice of the given input.
 475  *
 476  * \param parms The fec parameters returned earlier by fec_new().
 477  * \param src The \a k data slices to encode.
 478  * \param dst Result pointer.
 479  * \param idx The index of the slice to compute.
 480  * \param sz The size of the input data packets.
 481  *
 482  * Encode the \a k slices of size \a sz given by \a src and store the output
 483  * slice number \a idx in \dst.
 484  */
 485 void fec_encode(struct fec_parms *parms, const unsigned char * const *src,
 486                 unsigned char *dst, int idx, int sz)
 487 {
 488         int i, k = parms->k;
 489         unsigned char *p;
 490
 491         assert(idx <= parms->n);
 492
 493         if (idx < k) {
 494                 memcpy(dst, src[idx], sz);
 495                 return;
 496         }
 497         p = &(parms->enc_matrix[idx * k]);
 498         memset(dst, 0, sz);
 499         for (i = 0; i < k; i++)
 500                 addmul(dst, src[i], p[i], sz);
 501 }
 502
 503 /* Move src packets in their position. */
 504 static int shuffle(unsigned char **data, int *idx, int k)
 505 {
 506         int i;
 507
 508         for (i = 0; i < k;) {
 509                 if (idx[i] >= k || idx[i] == i)
 510                         i++;
 511                 else { /* put index and data at the right position */
 512                         int c = idx[i];
 513
 514                         if (idx[c] == c) /* conflict */
 515                                 return -E_FEC_BAD_IDX;
 516                         FEC_SWAP(idx[i], idx[c]);
 517                         FEC_SWAP(data[i], data[c]);
 518                 }
 519         }
 520         return 0;
 521 }
 522
 523 /*
 524  * Construct the decoding matrix given the indices. The encoding matrix must
 525  * already be allocated.
 526  */
 527 static int build_decode_matrix(struct fec_parms *parms, int *idx,
 528                 unsigned char **result)
 529 {
 530         int ret = -E_FEC_BAD_IDX, i, k = parms->k;
 531         unsigned char *p, *matrix = alloc_matrix(k, k);
 532
 533         for (i = 0, p = matrix; i < k; i++, p += k) {
 534                 if (idx[i] >= parms->n) /* invalid index */
 535                         goto err;
 536                 if (idx[i] < k) {
 537                         memset(p, 0, k);
 538                         p[i] = 1;
 539                 } else
 540                         memcpy(p, &(parms->enc_matrix[idx[i] * k]), k);
 541         }
 542         ret = invert_mat(matrix, k);
 543         if (ret < 0)
 544                 goto err;
 545         *result = matrix;
 546         return 0;
 547 err:
 548         free(matrix);
 549         *result = NULL;
 550         return ret;
 551 }
 552
 553 /**
 554  * Decode one slice from the group of received slices.
 555  *
 556  * \param code Pointer to fec params structure.
 557  * \param data Pointers to received packets.
 558  * \param idx Pointer to packet indices (gets modified).
 559  * \param sz Size of each packet.
 560  *
 561  * \return Zero on success, -1 on errors.
 562  *
 563  * The \a data vector of received slices and the indices of slices are used to
 564  * produce the correct output slice. The data slices are modified in-place.
 565  */
 566 int fec_decode(struct fec_parms *parms, unsigned char **data, int *idx,
 567                 int sz)
 568 {
 569         unsigned char *m_dec, **slice;
 570         int ret, row, col, k = parms->k;
 571
 572         ret = shuffle(data, idx, k);
 573         if (ret < 0)
 574                 return ret;
 575         ret = build_decode_matrix(parms, idx, &m_dec);
 576         if (ret < 0)
 577                 return ret;
 578         /* do the actual decoding */
 579         slice = para_malloc(k * sizeof(unsigned char *));
 580         for (row = 0; row < k; row++) {
 581                 if (idx[row] >= k) {
 582                         slice[row] = para_calloc(sz);
 583                         for (col = 0; col < k; col++)
 584                                 addmul(slice[row], data[col],
 585                                         m_dec[row * k + col], sz);
 586                 }
 587         }
 588         /* move slices to their final destination */
 589         for (row = 0; row < k; row++) {
 590                 if (idx[row] >= k) {
 591                         memcpy(data[row], slice[row], sz);
 592                         free(slice[row]);
 593                 }
 594         }
 595         free(slice);
 596         free(m_dec);
 597         return 0;
 598 }