X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=imdct.c;h=5f48ba44c11b1186f6ebea4d5cec33b34e622bcc;hp=6e0360b8b9018d951c7f1062b6411d4d48c80f76;hb=5587494468627e20fe622b6055689717262d09ab;hpb=f30c765f3c837b42d4c335315621631945636643 diff --git a/imdct.c b/imdct.c index 6e0360b8..5f48ba44 100644 --- a/imdct.c +++ b/imdct.c @@ -59,7 +59,7 @@ struct mdct_context { }; /** cos(2 * pi * x / n) for 0 <= x <= n / 4, followed by its reverse */ -#define COSINE_TAB(n) fftsample_t cos_ ## n[n / 2] __aligned(16) +#define COSINE_TAB(n) fftsample_t cos_ ## n[n / 2] __a_aligned(16) COSINE_TAB(16); COSINE_TAB(32); @@ -95,86 +95,86 @@ static int split_radix_permutation(int i, int n) return split_radix_permutation(i, m) * 4 - 1; } -/** 1 / sqrt(2). */ -#define SQRTHALF (float)0.70710678118654752440 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ +#define BF(x, y, a, b) {\ + x = a - b;\ + y = a + b;\ } -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ +#define BUTTERFLIES(a0, a1, a2, a3) {\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, a0.re, t5);\ + BF(a3.im, a1.im, a1.im, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, a1.re, t4);\ + BF(a2.im, a0.im, a0.im, t6);\ } -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - fftsample_t r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ +/* + * Force loading all the inputs before storing any. This is slightly slower for + * small data, but avoids store->load aliasing for addresses separated by large + * powers of 2. + */ +#define BUTTERFLIES_BIG(a0, a1, a2, a3) {\ + fftsample_t r0 = a0.re, i0 = a0.im, r1 = a1.re, i1 = a1.im;\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, r0, t5);\ + BF(a3.im, a1.im, i1, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, r1, t4);\ + BF(a2.im, a0.im, i0, t6);\ } -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM(a0, a1, a2, a3, wre,wim) {\ + t1 = a2.re * wre + a2.im * wim;\ + t2 = a2.im * wre - a2.re * wim;\ + t5 = a3.re * wre - a3.im * wim;\ + t6 = a3.im * wre + a3.re * wim;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM_ZERO(a0, a1, a2, a3) {\ + t1 = a2.re;\ + t2 = a2.im;\ + t5 = a3.re;\ + t6 = a3.im;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -/* z[0...8n-1], w[1...2n-1] */ +/* z[0...8n - 1], w[1...2n - 1] */ #define PASS(name)\ static void name(struct fft_complex *z, const fftsample_t *wre, unsigned int n)\ {\ - fftsample_t t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const fftsample_t *wim = wre+o1;\ - n--;\ + fftsample_t t1, t2, t3, t4, t5, t6;\ + int o1 = 2 * n;\ + int o2 = 4 * n;\ + int o3 = 6 * n;\ + const fftsample_t *wim = wre + o1;\ + n--;\ \ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ + TRANSFORM_ZERO(z[0], z[o1], z[o2], z[o3]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + do {\ + z += 2;\ + wre += 2;\ + wim -= 2;\ + TRANSFORM(z[0], z[o1], z[o2], z[o3], wre[0], wim[0]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + } while (--n);\ } PASS(pass) #undef BUTTERFLIES #define BUTTERFLIES BUTTERFLIES_BIG -#define DECL_FFT(n,n2,n4)\ +#define DECL_FFT(n, n2, n4)\ static void fft##n(struct fft_complex *z)\ {\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,cos_##n,n4/2);\ + fft ## n2(z);\ + fft ## n4(z + n4 * 2);\ + fft ## n4(z + n4 * 3);\ + pass(z, cos_ ## n, n4 / 2);\ } + static void fft4(struct fft_complex *z) { fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; @@ -206,7 +206,7 @@ static void fft8(struct fft_complex *z) BF(z[6].re, z[2].re, z[2].re, t7); BF(z[6].im, z[2].im, z[2].im, t8); - TRANSFORM(z[1], z[3], z[5], z[7], SQRTHALF, SQRTHALF); + TRANSFORM(z[1], z[3], z[5], z[7], M_SQRT1_2, M_SQRT1_2); } static void fft16(struct fft_complex *z) @@ -218,7 +218,7 @@ static void fft16(struct fft_complex *z) fft4(z + 12); TRANSFORM_ZERO(z[0], z[4], z[8], z[12]); - TRANSFORM(z[2], z[6], z[10], z[14], SQRTHALF, SQRTHALF); + TRANSFORM(z[2], z[6], z[10], z[14], M_SQRT1_2, M_SQRT1_2); TRANSFORM(z[1], z[5], z[9], z[13], cos_16[1], cos_16[3]); TRANSFORM(z[3], z[7], z[11], z[15], cos_16[3], cos_16[1]); }