fec.c

   1 /** \file fec.c Forward error correction based on Vandermonde matrices. */
   2
   3 /*
   4  * 980624
   5  * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
   6  *
   7  * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
   8  * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
   9  * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  *
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above
  18  *    copyright notice, this list of conditions and the following
  19  *    disclaimer in the documentation and/or other materials
  20  *    provided with the distribution.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  25  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
  26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  27  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  28  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  29  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  31  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
  33  * OF SUCH DAMAGE.
  34  */
  35
  36 #include <regex.h>
  37
  38 #include "para.h"
  39 #include "error.h"
  40 #include "portable_io.h"
  41 #include "string.h"
  42 #include "fec.h"
  43
  44 /** Code over GF(256). */
  45 #define GF_BITS  8
  46 /** The largest number in GF(256) */
  47 #define GF_SIZE ((1 << GF_BITS) - 1)
  48
  49 /*
  50  * To speed up computations, we have tables for logarithm, exponent and inverse
  51  * of a number.
  52  */
  53
  54 /** Index->poly form conversion table. */
  55 static unsigned char gf_exp[2 * GF_SIZE];
  56
  57 /** Poly->index form conversion table. */
  58 static int gf_log[GF_SIZE + 1];
  59
  60 /** Inverse of a field element. */
  61 static unsigned char inverse[GF_SIZE + 1];
  62
  63 /**
  64  * The multiplication table.
  65  *
  66  * We use a table for multiplication as well. It takes 64K, no big deal even on
  67  * a PDA, especially because it can be pre-initialized and put into a ROM.
  68  *
  69  * \sa \ref gf_mul.
  70  */
  71 static unsigned char gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
  72
  73 /** Multiply two GF numbers. */
  74 #define gf_mul(x,y) gf_mul_table[x][y]
  75
  76 /* Compute x % GF_SIZE without a slow divide. */
  77 __a_const static inline unsigned char modnn(int x)
  78 {
  79         while (x >= GF_SIZE) {
  80                 x -= GF_SIZE;
  81                 x = (x >> GF_BITS) + (x & GF_SIZE);
  82         }
  83         return x;
  84 }
  85
  86 static void init_mul_table(void)
  87 {
  88         int i, j;
  89         for (i = 0; i < GF_SIZE + 1; i++)
  90                 for (j = 0; j < GF_SIZE + 1; j++)
  91                         gf_mul_table[i][j] =
  92                                 gf_exp[modnn(gf_log[i] + gf_log[j])];
  93
  94         for (j = 0; j < GF_SIZE + 1; j++)
  95                 gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
  96 }
  97
  98 static unsigned char *alloc_matrix(int rows, int cols)
  99 {
 100         return para_malloc(rows * cols);
 101 }
 102
 103 /*
 104  * Initialize the data structures used for computations in GF.
 105  *
 106  * This generates GF(2**GF_BITS) from the irreducible polynomial p(X) in
 107  * p[0]..p[m].
 108  *
 109  * Lookup tables:
 110  *     index->polynomial form           gf_exp[] contains j= \alpha^i;
 111  *     polynomial form -> index form    gf_log[ j = \alpha^i ] = i
 112  * \alpha=x is the primitive element of GF(2^m)
 113  *
 114  * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
 115  * multiplication of two numbers can be resolved without calling modnn
 116  */
 117 static void generate_gf(void)
 118 {
 119         int i;
 120         unsigned char mask = 1;
 121         char *pp = "101110001"; /* The primitive polynomial 1+x^2+x^3+x^4+x^8 */
 122         gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
 123
 124         /*
 125          * first, generate the (polynomial representation of) powers of \alpha,
 126          * which are stored in gf_exp[i] = \alpha ** i .
 127          * At the same time build gf_log[gf_exp[i]] = i .
 128          * The first GF_BITS powers are simply bits shifted to the left.
 129          */
 130         for (i = 0; i < GF_BITS; i++, mask <<= 1) {
 131                 gf_exp[i] = mask;
 132                 gf_log[gf_exp[i]] = i;
 133                 /*
 134                  * If pp[i] == 1 then \alpha ** i occurs in poly-repr
 135                  * gf_exp[GF_BITS] = \alpha ** GF_BITS
 136                  */
 137                 if (pp[i] == '1')
 138                         gf_exp[GF_BITS] ^= mask;
 139         }
 140         /*
 141          * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can also
 142          * compute its inverse.
 143          */
 144         gf_log[gf_exp[GF_BITS]] = GF_BITS;
 145         /*
 146          * Poly-repr of \alpha ** (i+1) is given by poly-repr of \alpha ** i
 147          * shifted left one-bit and accounting for any \alpha ** GF_BITS term
 148          * that may occur when poly-repr of \alpha ** i is shifted.
 149          */
 150         mask = 1 << (GF_BITS - 1);
 151         for (i = GF_BITS + 1; i < GF_SIZE; i++) {
 152                 if (gf_exp[i - 1] >= mask)
 153                         gf_exp[i] =
 154                                 gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
 155                 else
 156                         gf_exp[i] = gf_exp[i - 1] << 1;
 157                 gf_log[gf_exp[i]] = i;
 158         }
 159         /*
 160          * log(0) is not defined, so use a special value
 161          */
 162         gf_log[0] = GF_SIZE;
 163         /* set the extended gf_exp values for fast multiply */
 164         for (i = 0; i < GF_SIZE; i++)
 165                 gf_exp[i + GF_SIZE] = gf_exp[i];
 166
 167         inverse[0] = 0; /* 0 has no inverse. */
 168         inverse[1] = 1;
 169         for (i = 2; i <= GF_SIZE; i++)
 170                 inverse[i] = gf_exp[GF_SIZE - gf_log[i]];
 171 }
 172
 173 /** How often the loop is unrolled. */
 174 #define UNROLL 16
 175
 176 /*
 177  * Compute dst[] = dst[] + c * src[]
 178  *
 179  * This is used often, so better optimize it! Currently the loop is unrolled 16
 180  * times. The case c=0 is also optimized, whereas c=1 is not.
 181  */
 182 static void addmul(unsigned char *dst1, const unsigned char *src1,
 183         unsigned char c, int sz)
 184 {
 185         unsigned char *dst, *lim, *col;
 186         const unsigned char *src = src1;
 187
 188         if (c == 0)
 189                 return;
 190
 191         dst = dst1;
 192         lim = &dst[sz - UNROLL + 1];
 193         col = gf_mul_table[c];
 194
 195         for (; dst < lim; dst += UNROLL, src += UNROLL) {
 196                 dst[0] ^= col[src[0]];
 197                 dst[1] ^= col[src[1]];
 198                 dst[2] ^= col[src[2]];
 199                 dst[3] ^= col[src[3]];
 200                 dst[4] ^= col[src[4]];
 201                 dst[5] ^= col[src[5]];
 202                 dst[6] ^= col[src[6]];
 203                 dst[7] ^= col[src[7]];
 204                 dst[8] ^= col[src[8]];
 205                 dst[9] ^= col[src[9]];
 206                 dst[10] ^= col[src[10]];
 207                 dst[11] ^= col[src[11]];
 208                 dst[12] ^= col[src[12]];
 209                 dst[13] ^= col[src[13]];
 210                 dst[14] ^= col[src[14]];
 211                 dst[15] ^= col[src[15]];
 212         }
 213         lim += UNROLL - 1;
 214         for (; dst < lim; dst++, src++) /* final components */
 215                 *dst ^= col[*src];
 216 }
 217
 218 /*
 219  * Compute C = AB where A is n*k, B is k*m, C is n*m
 220  */
 221 static void matmul(unsigned char *a, unsigned char *b, unsigned char *c,
 222                 int n, int k, int m)
 223 {
 224         int row, col, i;
 225
 226         for (row = 0; row < n; row++) {
 227                 for (col = 0; col < m; col++) {
 228                         unsigned char *pa = &a[row * k], *pb = &b[col], acc = 0;
 229                         for (i = 0; i < k; i++, pa++, pb += m)
 230                                 acc ^= gf_mul(*pa, *pb);
 231                         c[row * m + col] = acc;
 232                 }
 233         }
 234 }
 235
 236 /** Swap two numbers. */
 237 #define FEC_SWAP(a,b) {typeof(a) tmp = a; a = b; b = tmp;}
 238
 239 /*
 240  * Compute the inverse of a matrix.
 241  *
 242  * k is the size of the matrix 'src' (Gauss-Jordan, adapted from Numerical
 243  * Recipes in C). Returns negative on errors.
 244  */
 245 static int invert_mat(unsigned char *src, int k)
 246 {
 247         int irow, icol, row, col, ix, error;
 248         int *indxc = para_malloc(k * sizeof(int));
 249         int *indxr = para_malloc(k * sizeof(int));
 250         int *ipiv = para_malloc(k * sizeof(int)); /* elements used as pivots */
 251         unsigned char c, *p, *id_row = alloc_matrix(1, k),
 252                 *temp_row = alloc_matrix(1, k);
 253
 254         memset(id_row, 0, k);
 255         memset(ipiv, 0, k * sizeof(int));
 256
 257         for (col = 0; col < k; col++) {
 258                 unsigned char *pivot_row;
 259                 /*
 260                  * Zeroing column 'col', look for a non-zero element.
 261                  * First try on the diagonal, if it fails, look elsewhere.
 262                  */
 263                 irow = icol = -1;
 264                 if (ipiv[col] != 1 && src[col * k + col] != 0) {
 265                         irow = col;
 266                         icol = col;
 267                         goto found_piv;
 268                 }
 269                 for (row = 0; row < k; row++) {
 270                         if (ipiv[row] != 1) {
 271                                 for (ix = 0; ix < k; ix++) {
 272                                         if (ipiv[ix] == 0) {
 273                                                 if (src[row * k + ix] != 0) {
 274                                                         irow = row;
 275                                                         icol = ix;
 276                                                         goto found_piv;
 277                                                 }
 278                                         } else if (ipiv[ix] > 1) {
 279                                                 error = -E_FEC_PIVOT;
 280                                                 goto fail;
 281                                         }
 282                                 }
 283                         }
 284                 }
 285                 error = -E_FEC_PIVOT;
 286                 if (icol == -1)
 287                         goto fail;
 288 found_piv:
 289                 ++(ipiv[icol]);
 290                 /*
 291                  * swap rows irow and icol, so afterwards the diagonal element
 292                  * will be correct. Rarely done, not worth optimizing.
 293                  */
 294                 if (irow != icol)
 295                         for (ix = 0; ix < k; ix++)
 296                                 FEC_SWAP(src[irow * k + ix], src[icol * k + ix]);
 297                 indxr[col] = irow;
 298                 indxc[col] = icol;
 299                 pivot_row = &src[icol * k];
 300                 error = -E_FEC_SINGULAR;
 301                 c = pivot_row[icol];
 302                 if (c == 0)
 303                         goto fail;
 304                 if (c != 1) { /* otherwise this is a NOP */
 305                         /*
 306                          * this is done often , but optimizing is not so
 307                          * fruitful, at least in the obvious ways (unrolling)
 308                          */
 309                         c = inverse[c];
 310                         pivot_row[icol] = 1;
 311                         for (ix = 0; ix < k; ix++)
 312                                 pivot_row[ix] = gf_mul(c, pivot_row[ix]);
 313                 }
 314                 /*
 315                  * from all rows, remove multiples of the selected row to zero
 316                  * the relevant entry (in fact, the entry is not zero because
 317                  * we know it must be zero).  (Here, if we know that the
 318                  * pivot_row is the identity, we can optimize the addmul).
 319                  */
 320                 id_row[icol] = 1;
 321                 if (memcmp(pivot_row, id_row, k) != 0) {
 322                         for (p = src, ix = 0; ix < k; ix++, p += k) {
 323                                 if (ix != icol) {
 324                                         c = p[icol];
 325                                         p[icol] = 0;
 326                                         addmul(p, pivot_row, c, k);
 327                                 }
 328                         }
 329                 }
 330                 id_row[icol] = 0;
 331         }
 332         for (col = k - 1; col >= 0; col--) {
 333                 if (indxr[col] < 0 || indxr[col] >= k)
 334                         PARA_CRIT_LOG("AARGH, indxr[col] %d\n", indxr[col]);
 335                 else if (indxc[col] < 0 || indxc[col] >= k)
 336                         PARA_CRIT_LOG("AARGH, indxc[col] %d\n", indxc[col]);
 337                 else if (indxr[col] != indxc[col]) {
 338                         for (row = 0; row < k; row++) {
 339                                 FEC_SWAP(src[row * k + indxr[col]],
 340                                         src[row * k + indxc[col]]);
 341                         }
 342                 }
 343         }
 344         error = 0;
 345 fail:
 346         free(indxc);
 347         free(indxr);
 348         free(ipiv);
 349         free(id_row);
 350         free(temp_row);
 351         return error;
 352 }
 353
 354 /*
 355  * Invert a Vandermonde matrix.
 356  *
 357  * It assumes that the matrix is not singular and _IS_ a Vandermonde matrix.
 358  * Only uses the second column of the matrix, containing the p_i's.
 359  *
 360  * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
 361  * revised for GF purposes.
 362  */
 363 static void invert_vdm(unsigned char *src, int k)
 364 {
 365         int i, j, row, col;
 366         unsigned char *b, *c, *p, t, xx;
 367
 368         if (k == 1) /* degenerate */
 369                 return;
 370         /*
 371          * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
 372          * b holds the coefficient for the matrix inversion
 373          */
 374         c = para_malloc(k);
 375         b = para_malloc(k);
 376         p = para_malloc(k);
 377
 378         for (j = 1, i = 0; i < k; i++, j += k) {
 379                 c[i] = 0;
 380                 p[i] = src[j];
 381         }
 382         /*
 383          * construct coeffs recursively. We know c[k] = 1 (implicit) and start
 384          * P_0 = x - p_0, then at each stage multiply by x - p_i generating P_i
 385          * = x P_{i-1} - p_i P_{i-1} After k steps we are done.
 386          */
 387         c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */
 388         for (i = 1; i < k; i++) {
 389                 unsigned char p_i = p[i];
 390                 for (j = k - 1 - (i - 1); j < k - 1; j++)
 391                         c[j] ^= gf_mul(p_i, c[j + 1]);
 392                 c[k - 1] ^= p_i;
 393         }
 394
 395         for (row = 0; row < k; row++) {
 396                 /*
 397                  * synthetic division etc.
 398                  */
 399                 xx = p[row];
 400                 t = 1;
 401                 b[k - 1] = 1; /* this is in fact c[k] */
 402                 for (i = k - 2; i >= 0; i--) {
 403                         b[i] = c[i + 1] ^ gf_mul(xx, b[i + 1]);
 404                         t = gf_mul(xx, t) ^ b[i];
 405                 }
 406                 for (col = 0; col < k; col++)
 407                         src[col * k + row] = gf_mul(inverse[t], b[col]);
 408         }
 409         free(c);
 410         free(b);
 411         free(p);
 412 }
 413
 414 static int fec_initialized;
 415
 416 static void init_fec(void)
 417 {
 418         generate_gf();
 419         init_mul_table();
 420         fec_initialized = 1;
 421 }
 422
 423 /** Internal FEC parameters. */
 424 struct fec_parms {
 425         /** Number of data slices. */
 426         int k;
 427         /** Number of slices (including redundant slices). */
 428         int n;
 429         /** The encoding matrix, computed by init_fec(). */
 430         unsigned char *enc_matrix;
 431 };
 432
 433 /**
 434  * Deallocate a fec params structure.
 435  *
 436  * \param p The structure to free.
 437  */
 438 void fec_free(struct fec_parms *p)
 439 {
 440         if (!p)
 441                 return;
 442         free(p->enc_matrix);
 443         free(p);
 444 }
 445
 446 /**
 447  * Create a new encoder and return an opaque descriptor to it.
 448  *
 449  * \param k Number of input slices.
 450  * \param n Number of output slices.
 451  * \param result On success the Fec descriptor is returned here.
 452  *
 453  * \return Standard.
 454  *
 455  * This creates the k*n encoding matrix.  It is computed starting with a
 456  * Vandermonde matrix, and then transformed into a systematic matrix.
 457  */
 458 int fec_new(int k, int n, struct fec_parms **result)
 459 {
 460         int row, col;
 461         unsigned char *p, *tmp_m;
 462         struct fec_parms *parms;
 463
 464         if (!fec_initialized)
 465                 init_fec();
 466
 467         if (k < 1 || k > GF_SIZE + 1 || n > GF_SIZE + 1 || k > n)
 468                 return -E_FEC_PARMS;
 469         parms = para_malloc(sizeof(struct fec_parms));
 470         parms->k = k;
 471         parms->n = n;
 472         parms->enc_matrix = alloc_matrix(n, k);
 473         tmp_m = alloc_matrix(n, k);
 474         /*
 475          * fill the matrix with powers of field elements, starting from 0.
 476          * The first row is special, cannot be computed with exp. table.
 477          */
 478         tmp_m[0] = 1;
 479         for (col = 1; col < k; col++)
 480                 tmp_m[col] = 0;
 481         for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k) {
 482                 for (col = 0; col < k; col++)
 483                         p[col] = gf_exp[modnn(row * col)];
 484         }
 485
 486         /*
 487          * quick code to build systematic matrix: invert the top
 488          * k*k vandermonde matrix, multiply right the bottom n-k rows
 489          * by the inverse, and construct the identity matrix at the top.
 490          */
 491         invert_vdm(tmp_m, k); /* much faster than invert_mat */
 492         matmul(tmp_m + k * k, tmp_m, parms->enc_matrix + k * k, n - k, k, k);
 493         /*
 494          * the upper matrix is I so do not bother with a slow multiply
 495          */
 496         memset(parms->enc_matrix, 0, k * k);
 497         for (p = parms->enc_matrix, col = 0; col < k; col++, p += k + 1)
 498                 *p = 1;
 499         free(tmp_m);
 500         *result = parms;
 501         return 0;
 502 }
 503
 504 /**
 505  * Compute one encoded slice of the given input.
 506  *
 507  * \param parms The fec parameters returned earlier by fec_new().
 508  * \param src The \a k data slices to encode.
 509  * \param dst Result pointer.
 510  * \param idx The index of the slice to compute.
 511  * \param sz The size of the input data packets.
 512  *
 513  * Encode the \a k slices of size \a sz given by \a src and store the output
 514  * slice number \a idx in \a dst.
 515  */
 516 void fec_encode(struct fec_parms *parms, const unsigned char * const *src,
 517                 unsigned char *dst, int idx, int sz)
 518 {
 519         int i, k = parms->k;
 520         unsigned char *p;
 521
 522         assert(idx <= parms->n);
 523
 524         if (idx < k) {
 525                 memcpy(dst, src[idx], sz);
 526                 return;
 527         }
 528         p = &(parms->enc_matrix[idx * k]);
 529         memset(dst, 0, sz);
 530         for (i = 0; i < k; i++)
 531                 addmul(dst, src[i], p[i], sz);
 532 }
 533
 534 /* Move src packets in their position. */
 535 static int shuffle(unsigned char **data, int *idx, int k)
 536 {
 537         int i;
 538
 539         for (i = 0; i < k;) {
 540                 if (idx[i] >= k || idx[i] == i)
 541                         i++;
 542                 else { /* put index and data at the right position */
 543                         int c = idx[i];
 544
 545                         if (idx[c] == c) /* conflict */
 546                                 return -E_FEC_BAD_IDX;
 547                         FEC_SWAP(idx[i], idx[c]);
 548                         FEC_SWAP(data[i], data[c]);
 549                 }
 550         }
 551         return 0;
 552 }
 553
 554 /*
 555  * Construct the decoding matrix given the indices. The encoding matrix must
 556  * already be allocated.
 557  */
 558 static int build_decode_matrix(struct fec_parms *parms, int *idx,
 559                 unsigned char **result)
 560 {
 561         int ret = -E_FEC_BAD_IDX, i, k = parms->k;
 562         unsigned char *p, *matrix = alloc_matrix(k, k);
 563
 564         for (i = 0, p = matrix; i < k; i++, p += k) {
 565                 if (idx[i] >= parms->n) /* invalid index */
 566                         goto err;
 567                 if (idx[i] < k) {
 568                         memset(p, 0, k);
 569                         p[i] = 1;
 570                 } else
 571                         memcpy(p, &(parms->enc_matrix[idx[i] * k]), k);
 572         }
 573         ret = invert_mat(matrix, k);
 574         if (ret < 0)
 575                 goto err;
 576         *result = matrix;
 577         return 0;
 578 err:
 579         free(matrix);
 580         *result = NULL;
 581         return ret;
 582 }
 583
 584 /**
 585  * Decode one slice from the group of received slices.
 586  *
 587  * \param parms Pointer to fec params structure.
 588  * \param data Pointers to received packets.
 589  * \param idx Pointer to packet indices (gets modified).
 590  * \param sz Size of each packet.
 591  *
 592  * \return Zero on success, -1 on errors.
 593  *
 594  * The \a data vector of received slices and the indices of slices are used to
 595  * produce the correct output slice. The data slices are modified in-place.
 596  */
 597 int fec_decode(struct fec_parms *parms, unsigned char **data, int *idx,
 598                 int sz)
 599 {
 600         unsigned char *m_dec, **slice;
 601         int ret, row, col, k = parms->k;
 602
 603         ret = shuffle(data, idx, k);
 604         if (ret < 0)
 605                 return ret;
 606         ret = build_decode_matrix(parms, idx, &m_dec);
 607         if (ret < 0)
 608                 return ret;
 609         /* do the actual decoding */
 610         slice = para_malloc(k * sizeof(unsigned char *));
 611         for (row = 0; row < k; row++) {
 612                 if (idx[row] >= k) {
 613                         slice[row] = para_calloc(sz);
 614                         for (col = 0; col < k; col++)
 615                                 addmul(slice[row], data[col],
 616                                         m_dec[row * k + col], sz);
 617                 }
 618         }
 619         /* move slices to their final destination */
 620         for (row = 0; row < k; row++) {
 621                 if (idx[row] >= k) {
 622                         memcpy(data[row], slice[row], sz);
 623                         free(slice[row]);
 624                 }
 625         }
 626         free(slice);
 627         free(m_dec);
 628         return 0;
 629 }