X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=imdct.c;h=5f48ba44c11b1186f6ebea4d5cec33b34e622bcc;hp=db68bc834764b125f1929785069de49131acfc7b;hb=1e012cf40238883621692051a22fb9c7cad5e944;hpb=537de45f0091a6129bc26602496384b464117863 diff --git a/imdct.c b/imdct.c index db68bc83..5f48ba44 100644 --- a/imdct.c +++ b/imdct.c @@ -29,45 +29,55 @@ typedef float fftsample_t; +/** Canonical representation of a complex number. */ struct fft_complex { - fftsample_t re, im; + /** Real part. */ + fftsample_t re; + /** Imaginary part. */ + fftsample_t im; }; +/** FFT Lookup table. */ struct fft_context { + /** Number of bits of this instance of the FFT. */ int nbits; + /** The lookup table for cosine values. */ uint16_t *revtab; }; struct mdct_context { - /** Size of MDCT (i.e. number of input data * 2). */ + /** Size of MDCT (number of input data * 2). */ int n; /** n = 2^n bits. */ int nbits; - /** pre/post rotation tables */ + /** Cosine table for pre/post rotation. */ fftsample_t *tcos; + /** Sine table for pre/post rotation. */ fftsample_t *tsin; + /** The context for the underlying fast Fourier transform. */ struct fft_context fft; }; -/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ -DECLARE_ALIGNED_16(fftsample_t, ff_cos_16[8]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_32[16]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_64[32]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_128[64]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_256[128]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_512[256]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_1024[512]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_2048[1024]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_4096[2048]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_8192[4096]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_16384[8192]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_32768[16384]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_65536[32768]); - -static fftsample_t *ff_cos_tabs[] = { - ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, - ff_cos_512, ff_cos_1024, ff_cos_2048, ff_cos_4096, ff_cos_8192, - ff_cos_16384, ff_cos_32768, ff_cos_65536, +/** cos(2 * pi * x / n) for 0 <= x <= n / 4, followed by its reverse */ +#define COSINE_TAB(n) fftsample_t cos_ ## n[n / 2] __a_aligned(16) + +COSINE_TAB(16); +COSINE_TAB(32); +COSINE_TAB(64); +COSINE_TAB(128); +COSINE_TAB(256); +COSINE_TAB(512); +COSINE_TAB(1024); +COSINE_TAB(2048); +COSINE_TAB(4096); +COSINE_TAB(8192); +COSINE_TAB(16384); +COSINE_TAB(32768); +COSINE_TAB(65536); + +static fftsample_t *cos_tabs[] = { + cos_16, cos_32, cos_64, cos_128, cos_256, cos_512, cos_1024, cos_2048, + cos_4096, cos_8192, cos_16384, cos_32768, cos_65536, }; static int split_radix_permutation(int i, int n) @@ -85,85 +95,86 @@ static int split_radix_permutation(int i, int n) return split_radix_permutation(i, m) * 4 - 1; } -#define SQRTHALF (float)0.70710678118654752440 /* 1/sqrt(2) */ - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ +#define BF(x, y, a, b) {\ + x = a - b;\ + y = a + b;\ } -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ +#define BUTTERFLIES(a0, a1, a2, a3) {\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, a0.re, t5);\ + BF(a3.im, a1.im, a1.im, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, a1.re, t4);\ + BF(a2.im, a0.im, a0.im, t6);\ } -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - fftsample_t r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ +/* + * Force loading all the inputs before storing any. This is slightly slower for + * small data, but avoids store->load aliasing for addresses separated by large + * powers of 2. + */ +#define BUTTERFLIES_BIG(a0, a1, a2, a3) {\ + fftsample_t r0 = a0.re, i0 = a0.im, r1 = a1.re, i1 = a1.im;\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, r0, t5);\ + BF(a3.im, a1.im, i1, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, r1, t4);\ + BF(a2.im, a0.im, i0, t6);\ } -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM(a0, a1, a2, a3, wre,wim) {\ + t1 = a2.re * wre + a2.im * wim;\ + t2 = a2.im * wre - a2.re * wim;\ + t5 = a3.re * wre - a3.im * wim;\ + t6 = a3.im * wre + a3.re * wim;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM_ZERO(a0, a1, a2, a3) {\ + t1 = a2.re;\ + t2 = a2.im;\ + t5 = a3.re;\ + t6 = a3.im;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -/* z[0...8n-1], w[1...2n-1] */ +/* z[0...8n - 1], w[1...2n - 1] */ #define PASS(name)\ static void name(struct fft_complex *z, const fftsample_t *wre, unsigned int n)\ {\ - fftsample_t t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const fftsample_t *wim = wre+o1;\ - n--;\ + fftsample_t t1, t2, t3, t4, t5, t6;\ + int o1 = 2 * n;\ + int o2 = 4 * n;\ + int o3 = 6 * n;\ + const fftsample_t *wim = wre + o1;\ + n--;\ \ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ + TRANSFORM_ZERO(z[0], z[o1], z[o2], z[o3]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + do {\ + z += 2;\ + wre += 2;\ + wim -= 2;\ + TRANSFORM(z[0], z[o1], z[o2], z[o3], wre[0], wim[0]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + } while (--n);\ } PASS(pass) #undef BUTTERFLIES #define BUTTERFLIES BUTTERFLIES_BIG -#define DECL_FFT(n,n2,n4)\ +#define DECL_FFT(n, n2, n4)\ static void fft##n(struct fft_complex *z)\ {\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ + fft ## n2(z);\ + fft ## n4(z + n4 * 2);\ + fft ## n4(z + n4 * 3);\ + pass(z, cos_ ## n, n4 / 2);\ } + static void fft4(struct fft_complex *z) { fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; @@ -195,7 +206,7 @@ static void fft8(struct fft_complex *z) BF(z[6].re, z[2].re, z[2].re, t7); BF(z[6].im, z[2].im, z[2].im, t8); - TRANSFORM(z[1], z[3], z[5], z[7], SQRTHALF, SQRTHALF); + TRANSFORM(z[1], z[3], z[5], z[7], M_SQRT1_2, M_SQRT1_2); } static void fft16(struct fft_complex *z) @@ -207,9 +218,9 @@ static void fft16(struct fft_complex *z) fft4(z + 12); TRANSFORM_ZERO(z[0], z[4], z[8], z[12]); - TRANSFORM(z[2], z[6], z[10], z[14], SQRTHALF, SQRTHALF); - TRANSFORM(z[1], z[5], z[9], z[13], ff_cos_16[1], ff_cos_16[3]); - TRANSFORM(z[3], z[7], z[11], z[15], ff_cos_16[3], ff_cos_16[1]); + TRANSFORM(z[2], z[6], z[10], z[14], M_SQRT1_2, M_SQRT1_2); + TRANSFORM(z[1], z[5], z[9], z[13], cos_16[1], cos_16[3]); + TRANSFORM(z[3], z[7], z[11], z[15], cos_16[3], cos_16[1]); } DECL_FFT(32, 16, 8) @@ -297,19 +308,22 @@ static void imdct_half(struct mdct_context *s, fftsample_t *output, } /** - * Compute the inverse MDCT of size N = 2^nbits. + * Compute the inverse MDCT. * + * \param ctx The initialized context structure. * \param output N samples. * \param input N/2 samples. + * + * \sa \ref imdct_init(). */ -void imdct(struct mdct_context *s, float *output, const float *input) +void imdct(struct mdct_context *ctx, float *output, const float *input) { int k; - int n = 1 << s->nbits; + int n = 1 << ctx->nbits; int n2 = n >> 1; int n4 = n >> 2; - imdct_half(s, output + n4, input); + imdct_half(ctx, output + n4, input); for (k = 0; k < n4; k++) { output[k] = -output[n2 - k - 1]; @@ -330,7 +344,7 @@ static int fft_init(struct fft_context *s, int nbits) for (j = 4; j <= nbits; j++) { int k = 1 << j; double freq = 2 * M_PI / k; - fftsample_t *tab = ff_cos_tabs[j - 4]; + fftsample_t *tab = cos_tabs[j - 4]; for (i = 0; i <= k / 4; i++) tab[i] = cos(i * freq); for (i = 1; i < k / 4; i++) @@ -343,6 +357,12 @@ static int fft_init(struct fft_context *s, int nbits) /** * Initialize the inverse modified cosine transform. + * + * \param nbits The number of bits to use (4 <= \a nbits <= 18). + * + * \param result Opaque structure that must be passed to \ref imdct(). + * + * \return Standard. */ int imdct_init(int nbits, struct mdct_context **result) { @@ -375,10 +395,15 @@ fail: return ret; } +/** + * Deallocate imdct resources. + * + * \param ctx The pointer obtained by imdct_init(). + */ void imdct_end(struct mdct_context *ctx) { - freep(&ctx->tcos); - freep(&ctx->tsin); - freep(&ctx->fft.revtab); + free(ctx->tcos); + free(ctx->tsin); + free(ctx->fft.revtab); free(ctx); }