1 /** \file fec.c Forward error correction based on Vandermonde matrices. */
5 * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
7 * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
8 * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
9 * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
27 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
29 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
31 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
40 #include "portable_io.h"
44 /** Code over GF(256). */
46 /** The largest number in GF(256) */
47 #define GF_SIZE ((1 << GF_BITS) - 1)
50 * To speed up computations, we have tables for logarithm, exponent and inverse
54 /** Index->poly form conversion table. */
55 static unsigned char gf_exp
[2 * GF_SIZE
];
57 /** Poly->index form conversion table. */
58 static int gf_log
[GF_SIZE
+ 1];
60 /** Inverse of a field element. */
61 static unsigned char inverse
[GF_SIZE
+ 1];
64 * The multiplication table.
66 * We use a table for multiplication as well. It takes 64K, no big deal even on
67 * a PDA, especially because it can be pre-initialized and put into a ROM.
71 static unsigned char gf_mul_table
[GF_SIZE
+ 1][GF_SIZE
+ 1];
73 /** Multiply two GF numbers. */
74 #define gf_mul(x,y) gf_mul_table[x][y]
76 /* Compute x % GF_SIZE without a slow divide. */
77 __a_const
static inline unsigned char modnn(int x
)
79 while (x
>= GF_SIZE
) {
81 x
= (x
>> GF_BITS
) + (x
& GF_SIZE
);
86 static void init_mul_table(void)
89 for (i
= 0; i
< GF_SIZE
+ 1; i
++)
90 for (j
= 0; j
< GF_SIZE
+ 1; j
++)
92 gf_exp
[modnn(gf_log
[i
] + gf_log
[j
])];
94 for (j
= 0; j
< GF_SIZE
+ 1; j
++)
95 gf_mul_table
[0][j
] = gf_mul_table
[j
][0] = 0;
98 static unsigned char *alloc_matrix(int rows
, int cols
)
100 return para_malloc(rows
* cols
);
104 * Initialize the data structures used for computations in GF.
106 * This generates GF(2**GF_BITS) from the irreducible polynomial p(X) in
110 * index->polynomial form gf_exp[] contains j= \alpha^i;
111 * polynomial form -> index form gf_log[ j = \alpha^i ] = i
112 * \alpha=x is the primitive element of GF(2^m)
114 * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
115 * multiplication of two numbers can be resolved without calling modnn
117 static void generate_gf(void)
120 unsigned char mask
= 1;
121 char *pp
= "101110001"; /* The primitive polynomial 1+x^2+x^3+x^4+x^8 */
122 gf_exp
[GF_BITS
] = 0; /* will be updated at the end of the 1st loop */
125 * first, generate the (polynomial representation of) powers of \alpha,
126 * which are stored in gf_exp[i] = \alpha ** i .
127 * At the same time build gf_log[gf_exp[i]] = i .
128 * The first GF_BITS powers are simply bits shifted to the left.
130 for (i
= 0; i
< GF_BITS
; i
++, mask
<<= 1) {
132 gf_log
[gf_exp
[i
]] = i
;
134 * If pp[i] == 1 then \alpha ** i occurs in poly-repr
135 * gf_exp[GF_BITS] = \alpha ** GF_BITS
138 gf_exp
[GF_BITS
] ^= mask
;
141 * now gf_exp[GF_BITS] = \alpha ** GF_BITS is complete, so can also
142 * compute its inverse.
144 gf_log
[gf_exp
[GF_BITS
]] = GF_BITS
;
146 * Poly-repr of \alpha ** (i+1) is given by poly-repr of \alpha ** i
147 * shifted left one-bit and accounting for any \alpha ** GF_BITS term
148 * that may occur when poly-repr of \alpha ** i is shifted.
150 mask
= 1 << (GF_BITS
- 1);
151 for (i
= GF_BITS
+ 1; i
< GF_SIZE
; i
++) {
152 if (gf_exp
[i
- 1] >= mask
)
154 gf_exp
[GF_BITS
] ^ ((gf_exp
[i
- 1] ^ mask
) << 1);
156 gf_exp
[i
] = gf_exp
[i
- 1] << 1;
157 gf_log
[gf_exp
[i
]] = i
;
160 * log(0) is not defined, so use a special value
163 /* set the extended gf_exp values for fast multiply */
164 for (i
= 0; i
< GF_SIZE
; i
++)
165 gf_exp
[i
+ GF_SIZE
] = gf_exp
[i
];
167 inverse
[0] = 0; /* 0 has no inverse. */
169 for (i
= 2; i
<= GF_SIZE
; i
++)
170 inverse
[i
] = gf_exp
[GF_SIZE
- gf_log
[i
]];
173 /** How often the loop is unrolled. */
177 * Compute dst[] = dst[] + c * src[]
179 * This is used often, so better optimize it! Currently the loop is unrolled 16
180 * times. The case c=0 is also optimized, whereas c=1 is not.
182 static void addmul(unsigned char *dst1
, const unsigned char *src1
,
183 unsigned char c
, int sz
)
185 unsigned char *dst
, *lim
, *col
;
186 const unsigned char *src
= src1
;
192 lim
= &dst
[sz
- UNROLL
+ 1];
193 col
= gf_mul_table
[c
];
195 for (; dst
< lim
; dst
+= UNROLL
, src
+= UNROLL
) {
196 dst
[0] ^= col
[src
[0]];
197 dst
[1] ^= col
[src
[1]];
198 dst
[2] ^= col
[src
[2]];
199 dst
[3] ^= col
[src
[3]];
200 dst
[4] ^= col
[src
[4]];
201 dst
[5] ^= col
[src
[5]];
202 dst
[6] ^= col
[src
[6]];
203 dst
[7] ^= col
[src
[7]];
204 dst
[8] ^= col
[src
[8]];
205 dst
[9] ^= col
[src
[9]];
206 dst
[10] ^= col
[src
[10]];
207 dst
[11] ^= col
[src
[11]];
208 dst
[12] ^= col
[src
[12]];
209 dst
[13] ^= col
[src
[13]];
210 dst
[14] ^= col
[src
[14]];
211 dst
[15] ^= col
[src
[15]];
214 for (; dst
< lim
; dst
++, src
++) /* final components */
219 * Compute C = AB where A is n*k, B is k*m, C is n*m
221 static void matmul(unsigned char *a
, unsigned char *b
, unsigned char *c
,
226 for (row
= 0; row
< n
; row
++) {
227 for (col
= 0; col
< m
; col
++) {
228 unsigned char *pa
= &a
[row
* k
], *pb
= &b
[col
], acc
= 0;
229 for (i
= 0; i
< k
; i
++, pa
++, pb
+= m
)
230 acc
^= gf_mul(*pa
, *pb
);
231 c
[row
* m
+ col
] = acc
;
236 /** Swap two numbers. */
237 #define FEC_SWAP(a,b) {typeof(a) tmp = a; a = b; b = tmp;}
240 * Compute the inverse of a matrix.
242 * k is the size of the matrix 'src' (Gauss-Jordan, adapted from Numerical
243 * Recipes in C). Returns negative on errors.
245 static int invert_mat(unsigned char *src
, int k
)
247 int irow
, icol
, row
, col
, ix
, error
;
248 int *indxc
= para_malloc(k
* sizeof(int));
249 int *indxr
= para_malloc(k
* sizeof(int));
250 int *ipiv
= para_malloc(k
* sizeof(int)); /* elements used as pivots */
251 unsigned char c
, *p
, *id_row
= alloc_matrix(1, k
),
252 *temp_row
= alloc_matrix(1, k
);
254 memset(id_row
, 0, k
);
255 memset(ipiv
, 0, k
* sizeof(int));
257 for (col
= 0; col
< k
; col
++) {
258 unsigned char *pivot_row
;
260 * Zeroing column 'col', look for a non-zero element.
261 * First try on the diagonal, if it fails, look elsewhere.
264 if (ipiv
[col
] != 1 && src
[col
* k
+ col
] != 0) {
269 for (row
= 0; row
< k
; row
++) {
270 if (ipiv
[row
] != 1) {
271 for (ix
= 0; ix
< k
; ix
++) {
273 if (src
[row
* k
+ ix
] != 0) {
278 } else if (ipiv
[ix
] > 1) {
279 error
= -E_FEC_PIVOT
;
285 error
= -E_FEC_PIVOT
;
291 * swap rows irow and icol, so afterwards the diagonal element
292 * will be correct. Rarely done, not worth optimizing.
295 for (ix
= 0; ix
< k
; ix
++)
296 FEC_SWAP(src
[irow
* k
+ ix
], src
[icol
* k
+ ix
]);
299 pivot_row
= &src
[icol
* k
];
300 error
= -E_FEC_SINGULAR
;
304 if (c
!= 1) { /* otherwise this is a NOP */
306 * this is done often , but optimizing is not so
307 * fruitful, at least in the obvious ways (unrolling)
311 for (ix
= 0; ix
< k
; ix
++)
312 pivot_row
[ix
] = gf_mul(c
, pivot_row
[ix
]);
315 * from all rows, remove multiples of the selected row to zero
316 * the relevant entry (in fact, the entry is not zero because
317 * we know it must be zero). (Here, if we know that the
318 * pivot_row is the identity, we can optimize the addmul).
321 if (memcmp(pivot_row
, id_row
, k
) != 0) {
322 for (p
= src
, ix
= 0; ix
< k
; ix
++, p
+= k
) {
326 addmul(p
, pivot_row
, c
, k
);
332 for (col
= k
- 1; col
>= 0; col
--) {
333 if (indxr
[col
] < 0 || indxr
[col
] >= k
)
334 PARA_CRIT_LOG("AARGH, indxr[col] %d\n", indxr
[col
]);
335 else if (indxc
[col
] < 0 || indxc
[col
] >= k
)
336 PARA_CRIT_LOG("AARGH, indxc[col] %d\n", indxc
[col
]);
337 else if (indxr
[col
] != indxc
[col
]) {
338 for (row
= 0; row
< k
; row
++) {
339 FEC_SWAP(src
[row
* k
+ indxr
[col
]],
340 src
[row
* k
+ indxc
[col
]]);
355 * Invert a Vandermonde matrix.
357 * It assumes that the matrix is not singular and _IS_ a Vandermonde matrix.
358 * Only uses the second column of the matrix, containing the p_i's.
360 * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
361 * revised for GF purposes.
363 static void invert_vdm(unsigned char *src
, int k
)
366 unsigned char *b
, *c
, *p
, t
, xx
;
368 if (k
== 1) /* degenerate */
371 * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
372 * b holds the coefficient for the matrix inversion
378 for (j
= 1, i
= 0; i
< k
; i
++, j
+= k
) {
383 * construct coeffs recursively. We know c[k] = 1 (implicit) and start
384 * P_0 = x - p_0, then at each stage multiply by x - p_i generating P_i
385 * = x P_{i-1} - p_i P_{i-1} After k steps we are done.
387 c
[k
- 1] = p
[0]; /* really -p(0), but x = -x in GF(2^m) */
388 for (i
= 1; i
< k
; i
++) {
389 unsigned char p_i
= p
[i
];
390 for (j
= k
- 1 - (i
- 1); j
< k
- 1; j
++)
391 c
[j
] ^= gf_mul(p_i
, c
[j
+ 1]);
395 for (row
= 0; row
< k
; row
++) {
397 * synthetic division etc.
401 b
[k
- 1] = 1; /* this is in fact c[k] */
402 for (i
= k
- 2; i
>= 0; i
--) {
403 b
[i
] = c
[i
+ 1] ^ gf_mul(xx
, b
[i
+ 1]);
404 t
= gf_mul(xx
, t
) ^ b
[i
];
406 for (col
= 0; col
< k
; col
++)
407 src
[col
* k
+ row
] = gf_mul(inverse
[t
], b
[col
]);
414 static int fec_initialized
;
416 static void init_fec(void)
423 /** Internal FEC parameters. */
425 /** Number of data slices. */
427 /** Number of slices (including redundant slices). */
429 /** The encoding matrix, computed by init_fec(). */
430 unsigned char *enc_matrix
;
434 * Deallocate a fec params structure.
436 * \param p The structure to free.
438 void fec_free(struct fec_parms
*p
)
447 * Create a new encoder and return an opaque descriptor to it.
449 * \param k Number of input slices.
450 * \param n Number of output slices.
451 * \param result On success the Fec descriptor is returned here.
455 * This creates the k*n encoding matrix. It is computed starting with a
456 * Vandermonde matrix, and then transformed into a systematic matrix.
458 int fec_new(int k
, int n
, struct fec_parms
**result
)
461 unsigned char *p
, *tmp_m
;
462 struct fec_parms
*parms
;
464 if (!fec_initialized
)
467 if (k
< 1 || k
> GF_SIZE
+ 1 || n
> GF_SIZE
+ 1 || k
> n
)
469 parms
= para_malloc(sizeof(struct fec_parms
));
472 parms
->enc_matrix
= alloc_matrix(n
, k
);
473 tmp_m
= alloc_matrix(n
, k
);
475 * fill the matrix with powers of field elements, starting from 0.
476 * The first row is special, cannot be computed with exp. table.
479 for (col
= 1; col
< k
; col
++)
481 for (p
= tmp_m
+ k
, row
= 0; row
< n
- 1; row
++, p
+= k
) {
482 for (col
= 0; col
< k
; col
++)
483 p
[col
] = gf_exp
[modnn(row
* col
)];
487 * quick code to build systematic matrix: invert the top
488 * k*k vandermonde matrix, multiply right the bottom n-k rows
489 * by the inverse, and construct the identity matrix at the top.
491 invert_vdm(tmp_m
, k
); /* much faster than invert_mat */
492 matmul(tmp_m
+ k
* k
, tmp_m
, parms
->enc_matrix
+ k
* k
, n
- k
, k
, k
);
494 * the upper matrix is I so do not bother with a slow multiply
496 memset(parms
->enc_matrix
, 0, k
* k
);
497 for (p
= parms
->enc_matrix
, col
= 0; col
< k
; col
++, p
+= k
+ 1)
505 * Compute one encoded slice of the given input.
507 * \param parms The fec parameters returned earlier by fec_new().
508 * \param src The \a k data slices to encode.
509 * \param dst Result pointer.
510 * \param idx The index of the slice to compute.
511 * \param sz The size of the input data packets.
513 * Encode the \a k slices of size \a sz given by \a src and store the output
514 * slice number \a idx in \a dst.
516 void fec_encode(struct fec_parms
*parms
, const unsigned char * const *src
,
517 unsigned char *dst
, int idx
, int sz
)
522 assert(idx
<= parms
->n
);
525 memcpy(dst
, src
[idx
], sz
);
528 p
= &(parms
->enc_matrix
[idx
* k
]);
530 for (i
= 0; i
< k
; i
++)
531 addmul(dst
, src
[i
], p
[i
], sz
);
534 /* Move src packets in their position. */
535 static int shuffle(unsigned char **data
, int *idx
, int k
)
539 for (i
= 0; i
< k
;) {
540 if (idx
[i
] >= k
|| idx
[i
] == i
)
542 else { /* put index and data at the right position */
545 if (idx
[c
] == c
) /* conflict */
546 return -E_FEC_BAD_IDX
;
547 FEC_SWAP(idx
[i
], idx
[c
]);
548 FEC_SWAP(data
[i
], data
[c
]);
555 * Construct the decoding matrix given the indices. The encoding matrix must
556 * already be allocated.
558 static int build_decode_matrix(struct fec_parms
*parms
, int *idx
,
559 unsigned char **result
)
561 int ret
= -E_FEC_BAD_IDX
, i
, k
= parms
->k
;
562 unsigned char *p
, *matrix
= alloc_matrix(k
, k
);
564 for (i
= 0, p
= matrix
; i
< k
; i
++, p
+= k
) {
565 if (idx
[i
] >= parms
->n
) /* invalid index */
571 memcpy(p
, &(parms
->enc_matrix
[idx
[i
] * k
]), k
);
573 ret
= invert_mat(matrix
, k
);
585 * Decode one slice from the group of received slices.
587 * \param parms Pointer to fec params structure.
588 * \param data Pointers to received packets.
589 * \param idx Pointer to packet indices (gets modified).
590 * \param sz Size of each packet.
592 * \return Zero on success, -1 on errors.
594 * The \a data vector of received slices and the indices of slices are used to
595 * produce the correct output slice. The data slices are modified in-place.
597 int fec_decode(struct fec_parms
*parms
, unsigned char **data
, int *idx
,
600 unsigned char *m_dec
, **slice
;
601 int ret
, row
, col
, k
= parms
->k
;
603 ret
= shuffle(data
, idx
, k
);
606 ret
= build_decode_matrix(parms
, idx
, &m_dec
);
609 /* do the actual decoding */
610 slice
= para_malloc(k
* sizeof(unsigned char *));
611 for (row
= 0; row
< k
; row
++) {
613 slice
[row
] = para_calloc(sz
);
614 for (col
= 0; col
< k
; col
++)
615 addmul(slice
[row
], data
[col
],
616 m_dec
[row
* k
+ col
], sz
);
619 /* move slices to their final destination */
620 for (row
= 0; row
< k
; row
++) {
622 memcpy(data
[row
], slice
[row
], sz
);