com_ff(): Depreciate "n-" syntax.
[paraslash.git] / fec.c
1 /** \file fec.c Forward error correction based on Vandermonde matrices. */
2
3 /*
4 * 980624
5 * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
6 *
7 * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
8 * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
9 * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
27 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
29 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
31 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
33 * OF SUCH DAMAGE.
34 */
35
36 #include <regex.h>
37
38 #include "para.h"
39 #include "error.h"
40 #include "portable_io.h"
41 #include "string.h"
42 #include "fec.h"
43
44 /** Code over GF(256). */
45 #define GF_BITS 8
46 /** The largest number in GF(256) */
47 #define GF_SIZE ((1 << GF_BITS) - 1)
48
49 /*
50 * To speed up computations, we have tables for logarithm, exponent and inverse
51 * of a number.
52 */
53
54 /** Index->poly form conversion table. */
55 static unsigned char gf_exp[2 * GF_SIZE];
56
57 /** Poly->index form conversion table. */
58 static int gf_log[GF_SIZE + 1];
59
60 /** Inverse of a field element. */
61 static unsigned char inverse[GF_SIZE + 1];
62
63 /**
64 * The multiplication table.
65 *
66 * We use a table for multiplication as well. It takes 64K, no big deal even on
67 * a PDA, especially because it can be pre-initialized and put into a ROM.
68 *
69 * \sa \ref gf_mul.
70 */
71 static unsigned char gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
72
73 /** Multiply two GF numbers. */
74 #define gf_mul(x,y) gf_mul_table[x][y]
75
76 /* Compute x % GF_SIZE without a slow divide. */
77 __a_const static inline unsigned char modnn(int x)
78 {
79 while (x >= GF_SIZE) {
80 x -= GF_SIZE;
81 x = (x >> GF_BITS) + (x & GF_SIZE);
82 }
83 return x;
84 }
85
86 static void init_mul_table(void)
87 {
88 int i, j;
89 for (i = 0; i < GF_SIZE + 1; i++)
90 for (j = 0; j < GF_SIZE + 1; j++)
91 gf_mul_table[i][j] =
92 gf_exp[modnn(gf_log[i] + gf_log[j])];
93
94 for (j = 0; j < GF_SIZE + 1; j++)
95 gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
96 }
97
98 static unsigned char *alloc_matrix(int rows, int cols)
99 {
100 return para_malloc(rows * cols);
101 }
102
103 /*
104 * Initialize the data structures used for computations in GF.
105 *
106 * This generates GF(2**GF_BITS) from the irreducible polynomial p(X) in
107 * p[0]..p[m].
108 *
109 * Lookup tables:
110 * index->polynomial form gf_exp[] contains j= \alpha^i;
111 * polynomial form -> index form gf_log[ j = \alpha^i ] = i
112 * \alpha=x is the primitive element of GF(2^m)
113 *
114 * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
115 * multiplication of two numbers can be resolved without calling modnn
116 */
117 static void generate_gf(void)
118 {
119 int i;
120 unsigned char mask = 1;
121 char *pp = "101110001"; /* The primitive polynomial 1+x^2+x^3+x^4+x^8 */
122 gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
123
124 /*
125 * first, generate the (polynomial representation of) powers of \alpha,
126 * which are stored in gf_exp[i] = \alpha ** i .
127 * At the same time build gf_log[gf_exp[i]] = i .
128 * The first GF_BITS powers are simply bits shifted to the left.
129 */
130 for (i = 0; i < GF_BITS; i++, mask <<= 1) {
131 gf_exp[i] = mask;
132 gf_log[gf_exp[i]] = i;
133 /*
134 * If pp[i] == 1 then \alpha ** i occurs in poly-repr
135 * gf_exp[GF_BITS] = \alpha ** GF_BITS
136 */
137 if (pp[i] == '1')
138 gf_exp[GF_BITS] ^= mask;
139 }
140 /*
141 * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can also
142 * compute its inverse.
143 */
144 gf_log[gf_exp[GF_BITS]] = GF_BITS;
145 /*
146 * Poly-repr of \alpha ** (i+1) is given by poly-repr of \alpha ** i
147 * shifted left one-bit and accounting for any \alpha ** GF_BITS term
148 * that may occur when poly-repr of \alpha ** i is shifted.
149 */
150 mask = 1 << (GF_BITS - 1);
151 for (i = GF_BITS + 1; i < GF_SIZE; i++) {
152 if (gf_exp[i - 1] >= mask)
153 gf_exp[i] =
154 gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
155 else
156 gf_exp[i] = gf_exp[i - 1] << 1;
157 gf_log[gf_exp[i]] = i;
158 }
159 /*
160 * log(0) is not defined, so use a special value
161 */
162 gf_log[0] = GF_SIZE;
163 /* set the extended gf_exp values for fast multiply */
164 for (i = 0; i < GF_SIZE; i++)
165 gf_exp[i + GF_SIZE] = gf_exp[i];
166
167 inverse[0] = 0; /* 0 has no inverse. */
168 inverse[1] = 1;
169 for (i = 2; i <= GF_SIZE; i++)
170 inverse[i] = gf_exp[GF_SIZE - gf_log[i]];
171 }
172
173 /** How often the loop is unrolled. */
174 #define UNROLL 16
175
176 /*
177 * Compute dst[] = dst[] + c * src[]
178 *
179 * This is used often, so better optimize it! Currently the loop is unrolled 16
180 * times. The case c=0 is also optimized, whereas c=1 is not.
181 */
182 static void addmul(unsigned char *dst1, const unsigned char *src1,
183 unsigned char c, int sz)
184 {
185 unsigned char *dst, *lim, *col;
186 const unsigned char *src = src1;
187
188 if (c == 0)
189 return;
190
191 dst = dst1;
192 lim = &dst[sz - UNROLL + 1];
193 col = gf_mul_table[c];
194
195 for (; dst < lim; dst += UNROLL, src += UNROLL) {
196 dst[0] ^= col[src[0]];
197 dst[1] ^= col[src[1]];
198 dst[2] ^= col[src[2]];
199 dst[3] ^= col[src[3]];
200 dst[4] ^= col[src[4]];
201 dst[5] ^= col[src[5]];
202 dst[6] ^= col[src[6]];
203 dst[7] ^= col[src[7]];
204 dst[8] ^= col[src[8]];
205 dst[9] ^= col[src[9]];
206 dst[10] ^= col[src[10]];
207 dst[11] ^= col[src[11]];
208 dst[12] ^= col[src[12]];
209 dst[13] ^= col[src[13]];
210 dst[14] ^= col[src[14]];
211 dst[15] ^= col[src[15]];
212 }
213 lim += UNROLL - 1;
214 for (; dst < lim; dst++, src++) /* final components */
215 *dst ^= col[*src];
216 }
217
218 /*
219 * Compute C = AB where A is n*k, B is k*m, C is n*m
220 */
221 static void matmul(unsigned char *a, unsigned char *b, unsigned char *c,
222 int n, int k, int m)
223 {
224 int row, col, i;
225
226 for (row = 0; row < n; row++) {
227 for (col = 0; col < m; col++) {
228 unsigned char *pa = &a[row * k], *pb = &b[col], acc = 0;
229 for (i = 0; i < k; i++, pa++, pb += m)
230 acc ^= gf_mul(*pa, *pb);
231 c[row * m + col] = acc;
232 }
233 }
234 }
235
236 /** Swap two numbers. */
237 #define FEC_SWAP(a,b) {typeof(a) tmp = a; a = b; b = tmp;}
238
239 /*
240 * Compute the inverse of a matrix.
241 *
242 * k is the size of the matrix 'src' (Gauss-Jordan, adapted from Numerical
243 * Recipes in C). Returns negative on errors.
244 */
245 static int invert_mat(unsigned char *src, int k)
246 {
247 int irow, icol, row, col, ix, error;
248 int *indxc = para_malloc(k * sizeof(int));
249 int *indxr = para_malloc(k * sizeof(int));
250 int *ipiv = para_malloc(k * sizeof(int)); /* elements used as pivots */
251 unsigned char c, *p, *id_row = alloc_matrix(1, k),
252 *temp_row = alloc_matrix(1, k);
253
254 memset(id_row, 0, k);
255 memset(ipiv, 0, k * sizeof(int));
256
257 for (col = 0; col < k; col++) {
258 unsigned char *pivot_row;
259 /*
260 * Zeroing column 'col', look for a non-zero element.
261 * First try on the diagonal, if it fails, look elsewhere.
262 */
263 irow = icol = -1;
264 if (ipiv[col] != 1 && src[col * k + col] != 0) {
265 irow = col;
266 icol = col;
267 goto found_piv;
268 }
269 for (row = 0; row < k; row++) {
270 if (ipiv[row] != 1) {
271 for (ix = 0; ix < k; ix++) {
272 if (ipiv[ix] == 0) {
273 if (src[row * k + ix] != 0) {
274 irow = row;
275 icol = ix;
276 goto found_piv;
277 }
278 } else if (ipiv[ix] > 1) {
279 error = -E_FEC_PIVOT;
280 goto fail;
281 }
282 }
283 }
284 }
285 error = -E_FEC_PIVOT;
286 if (icol == -1)
287 goto fail;
288 found_piv:
289 ++(ipiv[icol]);
290 /*
291 * swap rows irow and icol, so afterwards the diagonal element
292 * will be correct. Rarely done, not worth optimizing.
293 */
294 if (irow != icol)
295 for (ix = 0; ix < k; ix++)
296 FEC_SWAP(src[irow * k + ix], src[icol * k + ix]);
297 indxr[col] = irow;
298 indxc[col] = icol;
299 pivot_row = &src[icol * k];
300 error = -E_FEC_SINGULAR;
301 c = pivot_row[icol];
302 if (c == 0)
303 goto fail;
304 if (c != 1) { /* otherwise this is a NOP */
305 /*
306 * this is done often , but optimizing is not so
307 * fruitful, at least in the obvious ways (unrolling)
308 */
309 c = inverse[c];
310 pivot_row[icol] = 1;
311 for (ix = 0; ix < k; ix++)
312 pivot_row[ix] = gf_mul(c, pivot_row[ix]);
313 }
314 /*
315 * from all rows, remove multiples of the selected row to zero
316 * the relevant entry (in fact, the entry is not zero because
317 * we know it must be zero). (Here, if we know that the
318 * pivot_row is the identity, we can optimize the addmul).
319 */
320 id_row[icol] = 1;
321 if (memcmp(pivot_row, id_row, k) != 0) {
322 for (p = src, ix = 0; ix < k; ix++, p += k) {
323 if (ix != icol) {
324 c = p[icol];
325 p[icol] = 0;
326 addmul(p, pivot_row, c, k);
327 }
328 }
329 }
330 id_row[icol] = 0;
331 }
332 for (col = k - 1; col >= 0; col--) {
333 if (indxr[col] < 0 || indxr[col] >= k)
334 PARA_CRIT_LOG("AARGH, indxr[col] %d\n", indxr[col]);
335 else if (indxc[col] < 0 || indxc[col] >= k)
336 PARA_CRIT_LOG("AARGH, indxc[col] %d\n", indxc[col]);
337 else if (indxr[col] != indxc[col]) {
338 for (row = 0; row < k; row++) {
339 FEC_SWAP(src[row * k + indxr[col]],
340 src[row * k + indxc[col]]);
341 }
342 }
343 }
344 error = 0;
345 fail:
346 free(indxc);
347 free(indxr);
348 free(ipiv);
349 free(id_row);
350 free(temp_row);
351 return error;
352 }
353
354 /*
355 * Invert a Vandermonde matrix.
356 *
357 * It assumes that the matrix is not singular and _IS_ a Vandermonde matrix.
358 * Only uses the second column of the matrix, containing the p_i's.
359 *
360 * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
361 * revised for GF purposes.
362 */
363 static void invert_vdm(unsigned char *src, int k)
364 {
365 int i, j, row, col;
366 unsigned char *b, *c, *p, t, xx;
367
368 if (k == 1) /* degenerate */
369 return;
370 /*
371 * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
372 * b holds the coefficient for the matrix inversion
373 */
374 c = para_malloc(k);
375 b = para_malloc(k);
376 p = para_malloc(k);
377
378 for (j = 1, i = 0; i < k; i++, j += k) {
379 c[i] = 0;
380 p[i] = src[j];
381 }
382 /*
383 * construct coeffs recursively. We know c[k] = 1 (implicit) and start
384 * P_0 = x - p_0, then at each stage multiply by x - p_i generating P_i
385 * = x P_{i-1} - p_i P_{i-1} After k steps we are done.
386 */
387 c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */
388 for (i = 1; i < k; i++) {
389 unsigned char p_i = p[i];
390 for (j = k - 1 - (i - 1); j < k - 1; j++)
391 c[j] ^= gf_mul(p_i, c[j + 1]);
392 c[k - 1] ^= p_i;
393 }
394
395 for (row = 0; row < k; row++) {
396 /*
397 * synthetic division etc.
398 */
399 xx = p[row];
400 t = 1;
401 b[k - 1] = 1; /* this is in fact c[k] */
402 for (i = k - 2; i >= 0; i--) {
403 b[i] = c[i + 1] ^ gf_mul(xx, b[i + 1]);
404 t = gf_mul(xx, t) ^ b[i];
405 }
406 for (col = 0; col < k; col++)
407 src[col * k + row] = gf_mul(inverse[t], b[col]);
408 }
409 free(c);
410 free(b);
411 free(p);
412 }
413
414 static int fec_initialized;
415
416 static void init_fec(void)
417 {
418 generate_gf();
419 init_mul_table();
420 fec_initialized = 1;
421 }
422
423 /** Internal FEC parameters. */
424 struct fec_parms {
425 /** Number of data slices. */
426 int k;
427 /** Number of slices (including redundant slices). */
428 int n;
429 /** The encoding matrix, computed by init_fec(). */
430 unsigned char *enc_matrix;
431 };
432
433 /**
434 * Deallocate a fec params structure.
435 *
436 * \param p The structure to free.
437 */
438 void fec_free(struct fec_parms *p)
439 {
440 if (!p)
441 return;
442 free(p->enc_matrix);
443 free(p);
444 }
445
446 /**
447 * Create a new encoder and return an opaque descriptor to it.
448 *
449 * \param k Number of input slices.
450 * \param n Number of output slices.
451 * \param result On success the Fec descriptor is returned here.
452 *
453 * \return Standard.
454 *
455 * This creates the k*n encoding matrix. It is computed starting with a
456 * Vandermonde matrix, and then transformed into a systematic matrix.
457 */
458 int fec_new(int k, int n, struct fec_parms **result)
459 {
460 int row, col;
461 unsigned char *p, *tmp_m;
462 struct fec_parms *parms;
463
464 if (!fec_initialized)
465 init_fec();
466
467 if (k < 1 || k > GF_SIZE + 1 || n > GF_SIZE + 1 || k > n)
468 return -E_FEC_PARMS;
469 parms = para_malloc(sizeof(struct fec_parms));
470 parms->k = k;
471 parms->n = n;
472 parms->enc_matrix = alloc_matrix(n, k);
473 tmp_m = alloc_matrix(n, k);
474 /*
475 * fill the matrix with powers of field elements, starting from 0.
476 * The first row is special, cannot be computed with exp. table.
477 */
478 tmp_m[0] = 1;
479 for (col = 1; col < k; col++)
480 tmp_m[col] = 0;
481 for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k) {
482 for (col = 0; col < k; col++)
483 p[col] = gf_exp[modnn(row * col)];
484 }
485
486 /*
487 * quick code to build systematic matrix: invert the top
488 * k*k vandermonde matrix, multiply right the bottom n-k rows
489 * by the inverse, and construct the identity matrix at the top.
490 */
491 invert_vdm(tmp_m, k); /* much faster than invert_mat */
492 matmul(tmp_m + k * k, tmp_m, parms->enc_matrix + k * k, n - k, k, k);
493 /*
494 * the upper matrix is I so do not bother with a slow multiply
495 */
496 memset(parms->enc_matrix, 0, k * k);
497 for (p = parms->enc_matrix, col = 0; col < k; col++, p += k + 1)
498 *p = 1;
499 free(tmp_m);
500 *result = parms;
501 return 0;
502 }
503
504 /**
505 * Compute one encoded slice of the given input.
506 *
507 * \param parms The fec parameters returned earlier by fec_new().
508 * \param src The \a k data slices to encode.
509 * \param dst Result pointer.
510 * \param idx The index of the slice to compute.
511 * \param sz The size of the input data packets.
512 *
513 * Encode the \a k slices of size \a sz given by \a src and store the output
514 * slice number \a idx in \a dst.
515 */
516 void fec_encode(struct fec_parms *parms, const unsigned char * const *src,
517 unsigned char *dst, int idx, int sz)
518 {
519 int i, k = parms->k;
520 unsigned char *p;
521
522 assert(idx <= parms->n);
523
524 if (idx < k) {
525 memcpy(dst, src[idx], sz);
526 return;
527 }
528 p = &(parms->enc_matrix[idx * k]);
529 memset(dst, 0, sz);
530 for (i = 0; i < k; i++)
531 addmul(dst, src[i], p[i], sz);
532 }
533
534 /* Move src packets in their position. */
535 static int shuffle(unsigned char **data, int *idx, int k)
536 {
537 int i;
538
539 for (i = 0; i < k;) {
540 if (idx[i] >= k || idx[i] == i)
541 i++;
542 else { /* put index and data at the right position */
543 int c = idx[i];
544
545 if (idx[c] == c) /* conflict */
546 return -E_FEC_BAD_IDX;
547 FEC_SWAP(idx[i], idx[c]);
548 FEC_SWAP(data[i], data[c]);
549 }
550 }
551 return 0;
552 }
553
554 /*
555 * Construct the decoding matrix given the indices. The encoding matrix must
556 * already be allocated.
557 */
558 static int build_decode_matrix(struct fec_parms *parms, int *idx,
559 unsigned char **result)
560 {
561 int ret = -E_FEC_BAD_IDX, i, k = parms->k;
562 unsigned char *p, *matrix = alloc_matrix(k, k);
563
564 for (i = 0, p = matrix; i < k; i++, p += k) {
565 if (idx[i] >= parms->n) /* invalid index */
566 goto err;
567 if (idx[i] < k) {
568 memset(p, 0, k);
569 p[i] = 1;
570 } else
571 memcpy(p, &(parms->enc_matrix[idx[i] * k]), k);
572 }
573 ret = invert_mat(matrix, k);
574 if (ret < 0)
575 goto err;
576 *result = matrix;
577 return 0;
578 err:
579 free(matrix);
580 *result = NULL;
581 return ret;
582 }
583
584 /**
585 * Decode one slice from the group of received slices.
586 *
587 * \param parms Pointer to fec params structure.
588 * \param data Pointers to received packets.
589 * \param idx Pointer to packet indices (gets modified).
590 * \param sz Size of each packet.
591 *
592 * \return Zero on success, -1 on errors.
593 *
594 * The \a data vector of received slices and the indices of slices are used to
595 * produce the correct output slice. The data slices are modified in-place.
596 */
597 int fec_decode(struct fec_parms *parms, unsigned char **data, int *idx,
598 int sz)
599 {
600 unsigned char *m_dec, **slice;
601 int ret, row, col, k = parms->k;
602
603 ret = shuffle(data, idx, k);
604 if (ret < 0)
605 return ret;
606 ret = build_decode_matrix(parms, idx, &m_dec);
607 if (ret < 0)
608 return ret;
609 /* do the actual decoding */
610 slice = para_malloc(k * sizeof(unsigned char *));
611 for (row = 0; row < k; row++) {
612 if (idx[row] >= k) {
613 slice[row] = para_calloc(sz);
614 for (col = 0; col < k; col++)
615 addmul(slice[row], data[col],
616 m_dec[row * k + col], sz);
617 }
618 }
619 /* move slices to their final destination */
620 for (row = 0; row < k; row++) {
621 if (idx[row] >= k) {
622 memcpy(data[row], slice[row], sz);
623 free(slice[row]);
624 }
625 }
626 free(slice);
627 free(m_dec);
628 return 0;
629 }