043a1f21869617869d2c6d0573bf71777611e2a3
[paraslash.git] / fec.c
1 /*
2 * fec.c -- forward error correction based on Vandermonde matrices
3 * 980624
4 * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
5 *
6 * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
7 * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
8 * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
26 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
28 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
30 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
32 * OF SUCH DAMAGE.
33 */
34
35 #include "para.h"
36 #include "error.h"
37 #include "portable_io.h"
38 #include "string.h"
39 #include "fec.h"
40
41 #define GF_BITS 8 /* code over GF(256) */
42 #define GF_SIZE ((1 << GF_BITS) - 1)
43
44 /*
45 * To speed up computations, we have tables for logarithm, exponent and inverse
46 * of a number. We use a table for multiplication as well (it takes 64K, no big
47 * deal even on a PDA, especially because it can be pre-initialized an put into
48 * a ROM!). The macro gf_mul(x,y) takes care of multiplications.
49 */
50 static unsigned char gf_exp[2 * GF_SIZE]; /* index->poly form conversion table */
51 static int gf_log[GF_SIZE + 1]; /* Poly->index form conversion table */
52 static unsigned char inverse[GF_SIZE + 1]; /* inverse of field elem. */
53 static unsigned char gf_mul_table[GF_SIZE + 1][GF_SIZE + 1];
54 /* Multiply two numbers. */
55 #define gf_mul(x,y) gf_mul_table[x][y]
56
57 /* Compute x % GF_SIZE without a slow divide. */
58 static inline unsigned char modnn(int x)
59 {
60 while (x >= GF_SIZE) {
61 x -= GF_SIZE;
62 x = (x >> GF_BITS) + (x & GF_SIZE);
63 }
64 return x;
65 }
66
67 static void init_mul_table(void)
68 {
69 int i, j;
70 for (i = 0; i < GF_SIZE + 1; i++)
71 for (j = 0; j < GF_SIZE + 1; j++)
72 gf_mul_table[i][j] =
73 gf_exp[modnn(gf_log[i] + gf_log[j])];
74
75 for (j = 0; j < GF_SIZE + 1; j++)
76 gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
77 }
78
79 static unsigned char *alloc_matrix(int rows, int cols)
80 {
81 return para_malloc(rows * cols);
82 }
83
84 /*
85 * Initialize the data structures used for computations in GF.
86 *
87 * This generates GF(2**GF_BITS) from the irreducible polynomial p(X) in
88 * p[0]..p[m].
89 *
90 * Lookup tables:
91 * index->polynomial form gf_exp[] contains j= \alpha^i;
92 * polynomial form -> index form gf_log[ j = \alpha^i ] = i
93 * \alpha=x is the primitive element of GF(2^m)
94 *
95 * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
96 * multiplication of two numbers can be resolved without calling modnn
97 */
98 static void generate_gf(void)
99 {
100 int i;
101 unsigned char mask = 1;
102 char *pp = "101110001"; /* The primitive polynomial 1+x^2+x^3+x^4+x^8 */
103 gf_exp[GF_BITS] = 0; /* will be updated at the end of the 1st loop */
104
105 /*
106 * first, generate the (polynomial representation of) powers of \alpha,
107 * which are stored in gf_exp[i] = \alpha ** i .
108 * At the same time build gf_log[gf_exp[i]] = i .
109 * The first GF_BITS powers are simply bits shifted to the left.
110 */
111 for (i = 0; i < GF_BITS; i++, mask <<= 1) {
112 gf_exp[i] = mask;
113 gf_log[gf_exp[i]] = i;
114 /*
115 * If pp[i] == 1 then \alpha ** i occurs in poly-repr
116 * gf_exp[GF_BITS] = \alpha ** GF_BITS
117 */
118 if (pp[i] == '1')
119 gf_exp[GF_BITS] ^= mask;
120 }
121 /*
122 * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can also
123 * compute its inverse.
124 */
125 gf_log[gf_exp[GF_BITS]] = GF_BITS;
126 /*
127 * Poly-repr of \alpha ** (i+1) is given by poly-repr of \alpha ** i
128 * shifted left one-bit and accounting for any \alpha ** GF_BITS term
129 * that may occur when poly-repr of \alpha ** i is shifted.
130 */
131 mask = 1 << (GF_BITS - 1);
132 for (i = GF_BITS + 1; i < GF_SIZE; i++) {
133 if (gf_exp[i - 1] >= mask)
134 gf_exp[i] =
135 gf_exp[GF_BITS] ^ ((gf_exp[i - 1] ^ mask) << 1);
136 else
137 gf_exp[i] = gf_exp[i - 1] << 1;
138 gf_log[gf_exp[i]] = i;
139 }
140 /*
141 * log(0) is not defined, so use a special value
142 */
143 gf_log[0] = GF_SIZE;
144 /* set the extended gf_exp values for fast multiply */
145 for (i = 0; i < GF_SIZE; i++)
146 gf_exp[i + GF_SIZE] = gf_exp[i];
147
148 inverse[0] = 0; /* 0 has no inverse. */
149 inverse[1] = 1;
150 for (i = 2; i <= GF_SIZE; i++)
151 inverse[i] = gf_exp[GF_SIZE - gf_log[i]];
152 }
153
154 /*
155 * Compute dst[] = dst[] + c * src[]
156 *
157 * This is used often, so better optimize it! Currently the loop is unrolled 16
158 * times. The case c=0 is also optimized, whereas c=1 is not.
159 */
160 #define UNROLL 16
161 static void addmul(unsigned char *dst1, const unsigned char const *src1,
162 unsigned char c, int sz)
163 {
164 if (c == 0)
165 return;
166 unsigned char *dst = dst1, *lim = &dst[sz - UNROLL + 1],
167 *col = gf_mul_table[c];
168 const unsigned char const *src = src1;
169
170 for (; dst < lim; dst += UNROLL, src += UNROLL) {
171 dst[0] ^= col[src[0]];
172 dst[1] ^= col[src[1]];
173 dst[2] ^= col[src[2]];
174 dst[3] ^= col[src[3]];
175 dst[4] ^= col[src[4]];
176 dst[5] ^= col[src[5]];
177 dst[6] ^= col[src[6]];
178 dst[7] ^= col[src[7]];
179 dst[8] ^= col[src[8]];
180 dst[9] ^= col[src[9]];
181 dst[10] ^= col[src[10]];
182 dst[11] ^= col[src[11]];
183 dst[12] ^= col[src[12]];
184 dst[13] ^= col[src[13]];
185 dst[14] ^= col[src[14]];
186 dst[15] ^= col[src[15]];
187 }
188 lim += UNROLL - 1;
189 for (; dst < lim; dst++, src++) /* final components */
190 *dst ^= col[*src];
191 }
192
193 /*
194 * Compute C = AB where A is n*k, B is k*m, C is n*m
195 */
196 static void matmul(unsigned char *a, unsigned char *b, unsigned char *c,
197 int n, int k, int m)
198 {
199 int row, col, i;
200
201 for (row = 0; row < n; row++) {
202 for (col = 0; col < m; col++) {
203 unsigned char *pa = &a[row * k], *pb = &b[col], acc = 0;
204 for (i = 0; i < k; i++, pa++, pb += m)
205 acc ^= gf_mul(*pa, *pb);
206 c[row * m + col] = acc;
207 }
208 }
209 }
210
211 #define FEC_SWAP(a,b) {typeof(a) tmp = a; a = b; b = tmp;}
212
213 /*
214 * Compute the inverse of a matrix.
215 *
216 * k is the size of the matrix 'src' (Gauss-Jordan, adapted from Numerical
217 * Recipes in C). Returns -1 if 'src' is singular.
218 */
219 static int invert_mat(unsigned char *src, int k)
220 {
221 int irow, icol, row, col, ix, error;
222 int *indxc = para_malloc(k * sizeof(int));
223 int *indxr = para_malloc(k * sizeof(int));
224 int *ipiv = para_malloc(k * sizeof(int)); /* elements used as pivots */
225 unsigned char c, *p, *id_row = alloc_matrix(1, k),
226 *temp_row = alloc_matrix(1, k);
227
228 memset(id_row, 0, k);
229 memset(ipiv, 0, k * sizeof(int));
230
231 for (col = 0; col < k; col++) {
232 unsigned char *pivot_row;
233 /*
234 * Zeroing column 'col', look for a non-zero element.
235 * First try on the diagonal, if it fails, look elsewhere.
236 */
237 irow = icol = -1;
238 if (ipiv[col] != 1 && src[col * k + col] != 0) {
239 irow = col;
240 icol = col;
241 goto found_piv;
242 }
243 for (row = 0; row < k; row++) {
244 if (ipiv[row] != 1) {
245 for (ix = 0; ix < k; ix++) {
246 if (ipiv[ix] == 0) {
247 if (src[row * k + ix] != 0) {
248 irow = row;
249 icol = ix;
250 goto found_piv;
251 }
252 } else if (ipiv[ix] > 1) {
253 error = -E_FEC_PIVOT;
254 goto fail;
255 }
256 }
257 }
258 }
259 error = -E_FEC_PIVOT;
260 if (icol == -1)
261 goto fail;
262 found_piv:
263 ++(ipiv[icol]);
264 /*
265 * swap rows irow and icol, so afterwards the diagonal element
266 * will be correct. Rarely done, not worth optimizing.
267 */
268 if (irow != icol)
269 for (ix = 0; ix < k; ix++)
270 FEC_SWAP(src[irow * k + ix], src[icol * k + ix]);
271 indxr[col] = irow;
272 indxc[col] = icol;
273 pivot_row = &src[icol * k];
274 error = -E_FEC_SINGULAR;
275 c = pivot_row[icol];
276 if (c == 0)
277 goto fail;
278 if (c != 1) { /* otherwise this is a NOP */
279 /*
280 * this is done often , but optimizing is not so
281 * fruitful, at least in the obvious ways (unrolling)
282 */
283 c = inverse[c];
284 pivot_row[icol] = 1;
285 for (ix = 0; ix < k; ix++)
286 pivot_row[ix] = gf_mul(c, pivot_row[ix]);
287 }
288 /*
289 * from all rows, remove multiples of the selected row to zero
290 * the relevant entry (in fact, the entry is not zero because
291 * we know it must be zero). (Here, if we know that the
292 * pivot_row is the identity, we can optimize the addmul).
293 */
294 id_row[icol] = 1;
295 if (memcmp(pivot_row, id_row, k) != 0) {
296 for (p = src, ix = 0; ix < k; ix++, p += k) {
297 if (ix != icol) {
298 c = p[icol];
299 p[icol] = 0;
300 addmul(p, pivot_row, c, k);
301 }
302 }
303 }
304 id_row[icol] = 0;
305 }
306 for (col = k - 1; col >= 0; col--) {
307 if (indxr[col] < 0 || indxr[col] >= k)
308 PARA_CRIT_LOG("AARGH, indxr[col] %d\n", indxr[col]);
309 else if (indxc[col] < 0 || indxc[col] >= k)
310 PARA_CRIT_LOG("AARGH, indxc[col] %d\n", indxc[col]);
311 else if (indxr[col] != indxc[col]) {
312 for (row = 0; row < k; row++) {
313 FEC_SWAP(src[row * k + indxr[col]],
314 src[row * k + indxc[col]]);
315 }
316 }
317 }
318 error = 0;
319 fail:
320 free(indxc);
321 free(indxr);
322 free(ipiv);
323 free(id_row);
324 free(temp_row);
325 return error;
326 }
327
328 /*
329 * Invert a Vandermonde matrix.
330 *
331 * It assumes that the matrix is not singular and _IS_ a Vandermonde matrix.
332 * Only uses the second column of the matrix, containing the p_i's.
333 *
334 * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
335 * revised for GF purposes.
336 */
337 static void invert_vdm(unsigned char *src, int k)
338 {
339 int i, j, row, col;
340 unsigned char *b, *c, *p, t, xx;
341
342 if (k == 1) /* degenerate */
343 return;
344 /*
345 * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
346 * b holds the coefficient for the matrix inversion
347 */
348 c = para_malloc(k);
349 b = para_malloc(k);
350 p = para_malloc(k);
351
352 for (j = 1, i = 0; i < k; i++, j += k) {
353 c[i] = 0;
354 p[i] = src[j];
355 }
356 /*
357 * construct coeffs recursively. We know c[k] = 1 (implicit) and start
358 * P_0 = x - p_0, then at each stage multiply by x - p_i generating P_i
359 * = x P_{i-1} - p_i P_{i-1} After k steps we are done.
360 */
361 c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */
362 for (i = 1; i < k; i++) {
363 unsigned char p_i = p[i];
364 for (j = k - 1 - (i - 1); j < k - 1; j++)
365 c[j] ^= gf_mul(p_i, c[j + 1]);
366 c[k - 1] ^= p_i;
367 }
368
369 for (row = 0; row < k; row++) {
370 /*
371 * synthetic division etc.
372 */
373 xx = p[row];
374 t = 1;
375 b[k - 1] = 1; /* this is in fact c[k] */
376 for (i = k - 2; i >= 0; i--) {
377 b[i] = c[i + 1] ^ gf_mul(xx, b[i + 1]);
378 t = gf_mul(xx, t) ^ b[i];
379 }
380 for (col = 0; col < k; col++)
381 src[col * k + row] = gf_mul(inverse[t], b[col]);
382 }
383 free(c);
384 free(b);
385 free(p);
386 }
387
388 static int fec_initialized;
389
390 static void init_fec(void)
391 {
392 generate_gf();
393 init_mul_table();
394 fec_initialized = 1;
395 }
396
397 struct fec_parms {
398 int k, n; /* parameters of the code */
399 unsigned char *enc_matrix;
400 };
401
402 /**
403 * Deallocate a fec params structure.
404 *
405 * \param p The structure to free.
406 */
407 void fec_free(struct fec_parms *p)
408 {
409 if (!p)
410 return;
411 free(p->enc_matrix);
412 free(p);
413 }
414
415 /**
416 * Create a new encoder and return an opaque descriptor to it.
417 *
418 * \param k Number of input slices.
419 * \param n Number of output slices.
420 * \param result On success the Fec descriptor is returned here.
421 *
422 * \return Standard.
423 *
424 * This creates the k*n encoding matrix. It is computed starting with a
425 * Vandermonde matrix, and then transformed into a systematic matrix.
426 */
427 int fec_new(int k, int n, struct fec_parms **result)
428 {
429 int row, col;
430 unsigned char *p, *tmp_m;
431 struct fec_parms *parms;
432
433 if (!fec_initialized)
434 init_fec();
435
436 if (k < 1 || k > GF_SIZE + 1 || n > GF_SIZE + 1 || k > n)
437 return -E_FEC_PARMS;
438 parms = para_malloc(sizeof(struct fec_parms));
439 parms->k = k;
440 parms->n = n;
441 parms->enc_matrix = alloc_matrix(n, k);
442 tmp_m = alloc_matrix(n, k);
443 /*
444 * fill the matrix with powers of field elements, starting from 0.
445 * The first row is special, cannot be computed with exp. table.
446 */
447 tmp_m[0] = 1;
448 for (col = 1; col < k; col++)
449 tmp_m[col] = 0;
450 for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k) {
451 for (col = 0; col < k; col++)
452 p[col] = gf_exp[modnn(row * col)];
453 }
454
455 /*
456 * quick code to build systematic matrix: invert the top
457 * k*k vandermonde matrix, multiply right the bottom n-k rows
458 * by the inverse, and construct the identity matrix at the top.
459 */
460 invert_vdm(tmp_m, k); /* much faster than invert_mat */
461 matmul(tmp_m + k * k, tmp_m, parms->enc_matrix + k * k, n - k, k, k);
462 /*
463 * the upper matrix is I so do not bother with a slow multiply
464 */
465 memset(parms->enc_matrix, 0, k * k);
466 for (p = parms->enc_matrix, col = 0; col < k; col++, p += k + 1)
467 *p = 1;
468 free(tmp_m);
469 *result = parms;
470 return 0;
471 }
472
473 /**
474 * Compute one encoded slice of the given input.
475 *
476 * \param parms The fec parameters returned earlier by fec_new().
477 * \param src The \a k data slices to encode.
478 * \param dst Result pointer.
479 * \param idx The index of the slice to compute.
480 * \param sz The size of the input data packets.
481 *
482 * Encode the \a k slices of size \a sz given by \a src and store the output
483 * slice number \a idx in \a dst.
484 */
485 void fec_encode(struct fec_parms *parms, const unsigned char * const *src,
486 unsigned char *dst, int idx, int sz)
487 {
488 int i, k = parms->k;
489 unsigned char *p;
490
491 assert(idx <= parms->n);
492
493 if (idx < k) {
494 memcpy(dst, src[idx], sz);
495 return;
496 }
497 p = &(parms->enc_matrix[idx * k]);
498 memset(dst, 0, sz);
499 for (i = 0; i < k; i++)
500 addmul(dst, src[i], p[i], sz);
501 }
502
503 /* Move src packets in their position. */
504 static int shuffle(unsigned char **data, int *idx, int k)
505 {
506 int i;
507
508 for (i = 0; i < k;) {
509 if (idx[i] >= k || idx[i] == i)
510 i++;
511 else { /* put index and data at the right position */
512 int c = idx[i];
513
514 if (idx[c] == c) /* conflict */
515 return -E_FEC_BAD_IDX;
516 FEC_SWAP(idx[i], idx[c]);
517 FEC_SWAP(data[i], data[c]);
518 }
519 }
520 return 0;
521 }
522
523 /*
524 * Construct the decoding matrix given the indices. The encoding matrix must
525 * already be allocated.
526 */
527 static int build_decode_matrix(struct fec_parms *parms, int *idx,
528 unsigned char **result)
529 {
530 int ret = -E_FEC_BAD_IDX, i, k = parms->k;
531 unsigned char *p, *matrix = alloc_matrix(k, k);
532
533 for (i = 0, p = matrix; i < k; i++, p += k) {
534 if (idx[i] >= parms->n) /* invalid index */
535 goto err;
536 if (idx[i] < k) {
537 memset(p, 0, k);
538 p[i] = 1;
539 } else
540 memcpy(p, &(parms->enc_matrix[idx[i] * k]), k);
541 }
542 ret = invert_mat(matrix, k);
543 if (ret < 0)
544 goto err;
545 *result = matrix;
546 return 0;
547 err:
548 free(matrix);
549 *result = NULL;
550 return ret;
551 }
552
553 /**
554 * Decode one slice from the group of received slices.
555 *
556 * \param parms Pointer to fec params structure.
557 * \param data Pointers to received packets.
558 * \param idx Pointer to packet indices (gets modified).
559 * \param sz Size of each packet.
560 *
561 * \return Zero on success, -1 on errors.
562 *
563 * The \a data vector of received slices and the indices of slices are used to
564 * produce the correct output slice. The data slices are modified in-place.
565 */
566 int fec_decode(struct fec_parms *parms, unsigned char **data, int *idx,
567 int sz)
568 {
569 unsigned char *m_dec, **slice;
570 int ret, row, col, k = parms->k;
571
572 ret = shuffle(data, idx, k);
573 if (ret < 0)
574 return ret;
575 ret = build_decode_matrix(parms, idx, &m_dec);
576 if (ret < 0)
577 return ret;
578 /* do the actual decoding */
579 slice = para_malloc(k * sizeof(unsigned char *));
580 for (row = 0; row < k; row++) {
581 if (idx[row] >= k) {
582 slice[row] = para_calloc(sz);
583 for (col = 0; col < k; col++)
584 addmul(slice[row], data[col],
585 m_dec[row * k + col], sz);
586 }
587 }
588 /* move slices to their final destination */
589 for (row = 0; row < k; row++) {
590 if (idx[row] >= k) {
591 memcpy(data[row], slice[row], sz);
592 free(slice[row]);
593 }
594 }
595 free(slice);
596 free(m_dec);
597 return 0;
598 }