X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=imdct.c;h=32928487b724653535d604adbf690e013a7eb516;hp=222fff1559c34ef8b8e5eabefa5e957b4b8ff993;hb=c0abcee0da53a6b399c3d16a62830aaa9ae21349;hpb=3375451ddfd7bc95e6100b418a2436bfe12f1354 diff --git a/imdct.c b/imdct.c index 222fff15..32928487 100644 --- a/imdct.c +++ b/imdct.c @@ -29,148 +29,152 @@ typedef float fftsample_t; -#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n))) -#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) -#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ - +/** Canonical representation of a complex number. */ struct fft_complex { - fftsample_t re, im; + /** Real part. */ + fftsample_t re; + /** Imaginary part. */ + fftsample_t im; }; +/** FFT Lookup table. */ struct fft_context { + /** Number of bits of this instance of the FFT. */ int nbits; - int inverse; + /** The lookup table for cosine values. */ uint16_t *revtab; - struct fft_complex *exptab; - struct fft_complex *tmp_buf; }; struct mdct_context { - /** Size of MDCT (i.e. number of input data * 2). */ + /** Size of MDCT (number of input data * 2). */ int n; /** n = 2^n bits. */ int nbits; - /** pre/post rotation tables */ + /** Cosine table for pre/post rotation. */ fftsample_t *tcos; + /** Sine table for pre/post rotation. */ fftsample_t *tsin; + /** The context for the underlying fast Fourier transform. */ struct fft_context fft; }; -/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ -DECLARE_ALIGNED_16(fftsample_t, ff_cos_16[8]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_32[16]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_64[32]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_128[64]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_256[128]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_512[256]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_1024[512]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_2048[1024]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_4096[2048]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_8192[4096]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_16384[8192]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_32768[16384]); -DECLARE_ALIGNED_16(fftsample_t, ff_cos_65536[32768]); - -static fftsample_t *ff_cos_tabs[] = { - ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, - ff_cos_512, ff_cos_1024, ff_cos_2048, ff_cos_4096, ff_cos_8192, - ff_cos_16384, ff_cos_32768, ff_cos_65536, +/** cos(2 * pi * x / n) for 0 <= x <= n / 4, followed by its reverse */ +#define COSINE_TAB(n) static fftsample_t cos_ ## n[n / 2] __a_aligned(16) + +COSINE_TAB(16); +COSINE_TAB(32); +COSINE_TAB(64); +COSINE_TAB(128); +COSINE_TAB(256); +COSINE_TAB(512); +COSINE_TAB(1024); +COSINE_TAB(2048); +COSINE_TAB(4096); +COSINE_TAB(8192); +COSINE_TAB(16384); +COSINE_TAB(32768); +COSINE_TAB(65536); + +static fftsample_t *cos_tabs[] = { + cos_16, cos_32, cos_64, cos_128, cos_256, cos_512, cos_1024, cos_2048, + cos_4096, cos_8192, cos_16384, cos_32768, cos_65536, }; -static int split_radix_permutation(int i, int n, int inverse) +__a_const static int split_radix_permutation(int i, int n) { int m; if (n <= 2) return i & 1; m = n >> 1; - if (!(i & m)) - return split_radix_permutation(i, m, inverse) * 2; + if ((i & m) == 0) + return split_radix_permutation(i, m) * 2; m >>= 1; - if (inverse == !(i & m)) - return split_radix_permutation(i, m, inverse) * 4 + 1; + if ((i & m) == 0) + return split_radix_permutation(i, m) * 4 + 1; else - return split_radix_permutation(i, m, inverse) * 4 - 1; + return split_radix_permutation(i, m) * 4 - 1; } -#define sqrthalf (float)M_SQRT1_2 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ +#define BF(x, y, a, b) {\ + x = a - b;\ + y = a + b;\ } -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ +#define BUTTERFLIES(a0, a1, a2, a3) {\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, a0.re, t5);\ + BF(a3.im, a1.im, a1.im, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, a1.re, t4);\ + BF(a2.im, a0.im, a0.im, t6);\ } -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - fftsample_t r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ +/* + * Force loading all the inputs before storing any. This is slightly slower for + * small data, but avoids store->load aliasing for addresses separated by large + * powers of 2. + */ +#define BUTTERFLIES_BIG(a0, a1, a2, a3) {\ + fftsample_t r0 = a0.re, i0 = a0.im, r1 = a1.re, i1 = a1.im;\ + BF(t3, t5, t5, t1);\ + BF(a2.re, a0.re, r0, t5);\ + BF(a3.im, a1.im, i1, t3);\ + BF(t4, t6, t2, t6);\ + BF(a3.re, a1.re, r1, t4);\ + BF(a2.im, a0.im, i0, t6);\ } -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM(a0, a1, a2, a3, wre,wim) {\ + t1 = a2.re * wre + a2.im * wim;\ + t2 = a2.im * wre - a2.re * wim;\ + t5 = a3.re * wre - a3.im * wim;\ + t6 = a3.im * wre + a3.re * wim;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ +#define TRANSFORM_ZERO(a0, a1, a2, a3) {\ + t1 = a2.re;\ + t2 = a2.im;\ + t5 = a3.re;\ + t6 = a3.im;\ + BUTTERFLIES(a0, a1, a2, a3)\ } -/* z[0...8n-1], w[1...2n-1] */ +/* z[0...8n - 1], w[1...2n - 1] */ #define PASS(name)\ static void name(struct fft_complex *z, const fftsample_t *wre, unsigned int n)\ {\ - fftsample_t t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const fftsample_t *wim = wre+o1;\ - n--;\ + fftsample_t t1, t2, t3, t4, t5, t6;\ + int o1 = 2 * n;\ + int o2 = 4 * n;\ + int o3 = 6 * n;\ + const fftsample_t *wim = wre + o1;\ + n--;\ \ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ + TRANSFORM_ZERO(z[0], z[o1], z[o2], z[o3]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + do {\ + z += 2;\ + wre += 2;\ + wim -= 2;\ + TRANSFORM(z[0], z[o1], z[o2], z[o3], wre[0], wim[0]);\ + TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], wre[1], wim[-1]);\ + } while (--n);\ } PASS(pass) #undef BUTTERFLIES #define BUTTERFLIES BUTTERFLIES_BIG -#define DECL_FFT(n,n2,n4)\ +#define DECL_FFT(n, n2, n4)\ static void fft##n(struct fft_complex *z)\ {\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ + fft ## n2(z);\ + fft ## n4(z + n4 * 2);\ + fft ## n4(z + n4 * 3);\ + pass(z, cos_ ## n, n4 / 2);\ } + static void fft4(struct fft_complex *z) { fftsample_t t1, t2, t3, t4, t5, t6, t7, t8; @@ -202,7 +206,7 @@ static void fft8(struct fft_complex *z) BF(z[6].re, z[2].re, z[2].re, t7); BF(z[6].im, z[2].im, z[2].im, t8); - TRANSFORM(z[1], z[3], z[5], z[7], sqrthalf, sqrthalf); + TRANSFORM(z[1], z[3], z[5], z[7], M_SQRT1_2, M_SQRT1_2); } static void fft16(struct fft_complex *z) @@ -214,9 +218,9 @@ static void fft16(struct fft_complex *z) fft4(z + 12); TRANSFORM_ZERO(z[0], z[4], z[8], z[12]); - TRANSFORM(z[2], z[6], z[10], z[14], sqrthalf, sqrthalf); - TRANSFORM(z[1], z[5], z[9], z[13], ff_cos_16[1], ff_cos_16[3]); - TRANSFORM(z[3], z[7], z[11], z[15], ff_cos_16[3], ff_cos_16[1]); + TRANSFORM(z[2], z[6], z[10], z[14], M_SQRT1_2, M_SQRT1_2); + TRANSFORM(z[1], z[5], z[9], z[13], cos_16[1], cos_16[3]); + TRANSFORM(z[3], z[7], z[11], z[15], cos_16[3], cos_16[1]); } DECL_FFT(32, 16, 8) @@ -304,19 +308,22 @@ static void imdct_half(struct mdct_context *s, fftsample_t *output, } /** - * Compute the inverse MDCT of size N = 2^nbits. + * Compute the inverse MDCT. * + * \param ctx The initialized context structure. * \param output N samples. * \param input N/2 samples. + * + * \sa \ref imdct_init(). */ -void imdct(struct mdct_context *s, float *output, const float *input) +void imdct(struct mdct_context *ctx, float *output, const float *input) { int k; - int n = 1 << s->nbits; + int n = 1 << ctx->nbits; int n2 = n >> 1; int n4 = n >> 2; - imdct_half(s, output + n4, input); + imdct_half(ctx, output + n4, input); for (k = 0; k < n4; k++) { output[k] = -output[n2 - k - 1]; @@ -324,7 +331,7 @@ void imdct(struct mdct_context *s, float *output, const float *input) } } -static int fft_init(struct fft_context *s, int nbits, int inverse) +static int fft_init(struct fft_context *s, int nbits) { int i, j, n; @@ -333,59 +340,31 @@ static int fft_init(struct fft_context *s, int nbits, int inverse) s->nbits = nbits; n = 1 << nbits; - s->tmp_buf = NULL; - s->exptab = para_malloc((n / 2) * sizeof(struct fft_complex)); s->revtab = para_malloc(n * sizeof(uint16_t)); - s->inverse = inverse; - for (j = 4; j <= nbits; j++) { int k = 1 << j; double freq = 2 * M_PI / k; - fftsample_t *tab = ff_cos_tabs[j - 4]; + fftsample_t *tab = cos_tabs[j - 4]; for (i = 0; i <= k / 4; i++) tab[i] = cos(i * freq); for (i = 1; i < k / 4; i++) tab[k / 2 - i] = tab[i]; } for (i = 0; i < n; i++) - s->revtab[-split_radix_permutation( - i, n, s->inverse) & (n - 1)] = i; - s->tmp_buf = para_malloc(n * sizeof(struct fft_complex)); + s->revtab[-split_radix_permutation(i, n) & (n - 1)] = i; return 0; } -static void fft_end(struct fft_context *ctx) -{ - freep(&ctx->revtab); - freep(&ctx->exptab); - freep(&ctx->tmp_buf); -} - -DECLARE_ALIGNED(16, float, ff_sine_128[128]); -DECLARE_ALIGNED(16, float, ff_sine_256[256]); -DECLARE_ALIGNED(16, float, ff_sine_512[512]); -DECLARE_ALIGNED(16, float, ff_sine_1024[1024]); -DECLARE_ALIGNED(16, float, ff_sine_2048[2048]); -DECLARE_ALIGNED(16, float, ff_sine_4096[4096]); - -float *ff_sine_windows[6] = { - ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, - ff_sine_2048, ff_sine_4096 -}; - -// Generate a sine window. -void sine_window_init(float *window, int n) -{ - int i; - - for (i = 0; i < n; i++) - window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n))); -} - /** - * Init MDCT or IMDCT computation. + * Initialize the inverse modified cosine transform. + * + * \param nbits The number of bits to use (4 <= \a nbits <= 18). + * + * \param result Opaque structure that must be passed to \ref imdct(). + * + * \return Standard. */ -int imdct_init(int nbits, int inverse, struct mdct_context **result) +int imdct_init(int nbits, struct mdct_context **result) { int ret, n, n4, i; double alpha; @@ -404,7 +383,7 @@ int imdct_init(int nbits, int inverse, struct mdct_context **result) s->tcos[i] = -cos(alpha); s->tsin[i] = -sin(alpha); } - ret = fft_init(&s->fft, s->nbits - 2, inverse); + ret = fft_init(&s->fft, s->nbits - 2); if (ret < 0) goto fail; *result = s; @@ -416,10 +395,15 @@ fail: return ret; } +/** + * Deallocate imdct resources. + * + * \param ctx The pointer obtained by imdct_init(). + */ void imdct_end(struct mdct_context *ctx) { - freep(&ctx->tcos); - freep(&ctx->tsin); - fft_end(&ctx->fft); + free(ctx->tcos); + free(ctx->tsin); + free(ctx->fft.revtab); free(ctx); }