X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=imdct.c;h=5f48ba44c11b1186f6ebea4d5cec33b34e622bcc;hp=f77da7bb7228c6a982dff314ba3fd459f3006d32;hb=e7e55d0ad711a46e4768066be197bb41caa5c5eb;hpb=6e3e6ad58782bbd5b6249700e6c52fc8e2a16809;ds=sidebyside diff --git a/imdct.c b/imdct.c index f77da7bb..5f48ba44 100644 --- a/imdct.c +++ b/imdct.c @@ -29,12 +29,19 @@ typedef float fftsample_t; +/** Canonical representation of a complex number. */ struct fft_complex { - fftsample_t re, im; + /** Real part. */ + fftsample_t re; + /** Imaginary part. */ + fftsample_t im; }; +/** FFT Lookup table. */ struct fft_context { + /** Number of bits of this instance of the FFT. */ int nbits; + /** The lookup table for cosine values. */ uint16_t *revtab; }; @@ -52,7 +59,7 @@ struct mdct_context { }; /** cos(2 * pi * x / n) for 0 <= x <= n / 4, followed by its reverse */ -#define COSINE_TAB(n) fftsample_t ff_cos_ ## n[n / 2] __aligned(16) +#define COSINE_TAB(n) fftsample_t cos_ ## n[n / 2] __a_aligned(16) COSINE_TAB(16); COSINE_TAB(32); @@ -68,10 +75,9 @@ COSINE_TAB(16384); COSINE_TAB(32768); COSINE_TAB(65536); -static fftsample_t *ff_cos_tabs[] = { - ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, - ff_cos_512, ff_cos_1024, ff_cos_2048, ff_cos_4096, ff_cos_8192, - ff_cos_16384, ff_cos_32768, ff_cos_65536, +static fftsample_t *cos_tabs[] = { + cos_16, cos_32, cos_64, cos_128, cos_256, cos_512, cos_1024, cos_2048, + cos_4096, cos_8192, cos_16384, cos_32768, cos_65536, }; static int split_radix_permutation(int i, int n) @@ -89,86 +95,86 @@ static int split_radix_permutation(int i, int n) return split_radix_permutation(i, m) * 4 - 1; } -/** 1 / sqrt(2). */ -#define SQRTHALF (float)0.70710678118654752440 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ +#define BF(x, y, a, b) {\ + x = a - b;\ + y = a + b;\ } -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ +#define BUTTERFLIES(a0, a1, a2, a3) {\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, a0.re, t5);\ + BF(a3.im, a1.im, a1.im, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, a1.re, t4);\ + BF(a2.im, a0.im, a0.im, t6);\ } -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - fftsample_t r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ +/* + * Force loading all the inputs before storing any. This is slightly slower for + * small data, but avoids store->load aliasing for addresses separated by large + * powers of 2. + */ +#define BUTTERFLIES_BIG(a0, a1, a2, a3) {\ + fftsample_t r0 = a0.re, i0 = a0.im, r1 = a1.re, i1 = a1.im;\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, r0, t5);\ + BF(a3.im, a1.im, i1, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, r1, t4);\ + BF(a2.im, a0.im, i0, t6);\ } -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM(a0, a1, a2, a3, wre,wim) {\ + t1 = a2.re * wre + a2.im * wim;\ + t2 = a2.im * wre - a2.re * wim;\ + t5 = a3.re * wre - a3.im * wim;\ + t6 = a3.im * wre + a3.re * wim;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM_ZERO(a0, a1, a2, a3) {\ + t1 = a2.re;\ + t2 = a2.im;\ + t5 = a3.re;\ + t6 = a3.im;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -/* z[0...8n-1], w[1...2n-1] */ +/* z[0...8n - 1], w[1...2n - 1] */ #define PASS(name)\ static void name(struct fft_complex *z, const fftsample_t *wre, unsigned int n)\ {\ - fftsample_t t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const fftsample_t *wim = wre+o1;\ - n--;\ + fftsample_t t1, t2, t3, t4, t5, t6;\ + int o1 = 2 * n;\ + int o2 = 4 * n;\ + int o3 = 6 * n;\ + const fftsample_t *wim = wre + o1;\ + n--;\ \ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ + TRANSFORM_ZERO(z[0], z[o1], z[o2], z[o3]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + do {\ + z += 2;\ + wre += 2;\ + wim -= 2;\ + TRANSFORM(z[0], z[o1], z[o2], z[o3], wre[0], wim[0]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + } while (--n);\ } PASS(pass) #undef BUTTERFLIES #define BUTTERFLIES BUTTERFLIES_BIG -#define DECL_FFT(n,n2,n4)\ +#define DECL_FFT(n, n2, n4)\ static void fft##n(struct fft_complex *z)\ {\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ + fft ## n2(z);\ + fft ## n4(z + n4 * 2);\ + fft ## n4(z + n4 * 3);\ + pass(z, cos_ ## n, n4 / 2);\ } + static void fft4(struct fft_complex *z) { fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; @@ -200,7 +206,7 @@ static void fft8(struct fft_complex *z) BF(z[6].re, z[2].re, z[2].re, t7); BF(z[6].im, z[2].im, z[2].im, t8); - TRANSFORM(z[1], z[3], z[5], z[7], SQRTHALF, SQRTHALF); + TRANSFORM(z[1], z[3], z[5], z[7], M_SQRT1_2, M_SQRT1_2); } static void fft16(struct fft_complex *z) @@ -212,9 +218,9 @@ static void fft16(struct fft_complex *z) fft4(z + 12); TRANSFORM_ZERO(z[0], z[4], z[8], z[12]); - TRANSFORM(z[2], z[6], z[10], z[14], SQRTHALF, SQRTHALF); - TRANSFORM(z[1], z[5], z[9], z[13], ff_cos_16[1], ff_cos_16[3]); - TRANSFORM(z[3], z[7], z[11], z[15], ff_cos_16[3], ff_cos_16[1]); + TRANSFORM(z[2], z[6], z[10], z[14], M_SQRT1_2, M_SQRT1_2); + TRANSFORM(z[1], z[5], z[9], z[13], cos_16[1], cos_16[3]); + TRANSFORM(z[3], z[7], z[11], z[15], cos_16[3], cos_16[1]); } DECL_FFT(32, 16, 8) @@ -338,7 +344,7 @@ static int fft_init(struct fft_context *s, int nbits) for (j = 4; j <= nbits; j++) { int k = 1 << j; double freq = 2 * M_PI / k; - fftsample_t *tab = ff_cos_tabs[j - 4]; + fftsample_t *tab = cos_tabs[j - 4]; for (i = 0; i <= k / 4; i++) tab[i] = cos(i * freq); for (i = 1; i < k / 4; i++)