NEWS update.
[paraslash.git] / wmadec_filter.c
index 8298296..fdf3da9 100644 (file)
@@ -9,7 +9,7 @@
  * For licencing details see COPYING.LIB.
  */
 
-/** * \file wmadec_filter.c paraslash's WMA decoder.  */
+/** \file wmadec_filter.c paraslash's WMA decoder. */
 
 /*
  * This decoder handles Microsoft Windows Media Audio data version 2.
 
 #include <sys/time.h>
 #include <inttypes.h>
-#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <regex.h>
+#include <sys/select.h>
 
 #include "para.h"
 #include "error.h"
@@ -34,7 +34,7 @@
 #include "sched.h"
 #include "filter.h"
 #include "bitstream.h"
-#include "mdct.h"
+#include "imdct.h"
 #include "wma.h"
 #include "wmadata.h"
 
 struct private_wmadec_data {
        struct asf_header_info ahi;
        struct getbit_context gb;
+       /** Whether to use the bit reservoir. */
        int use_bit_reservoir;
+       /** Whether to use variable block length. */
        int use_variable_block_len;
-       int use_exp_vlc;        ///< exponent coding: 0 = lsp, 1 = vlc + delta
-       int use_noise_coding;   ///< true if perceptual noise is added
+       /** Whether to use exponent coding. */
+       int use_exp_vlc;
+       /** Whether perceptual noise is added. */
+       int use_noise_coding;
        int byte_offset_bits;
        struct vlc exp_vlc;
        int exponent_sizes[BLOCK_NB_SIZES];
        uint16_t exponent_bands[BLOCK_NB_SIZES][25];
-       int high_band_start[BLOCK_NB_SIZES];    ///< index of first coef in high band
-       int coefs_start;        ///< first coded coef
+       /** The index of the first coef in high band. */
+       int high_band_start[BLOCK_NB_SIZES];
        int coefs_end[BLOCK_NB_SIZES];  ///< max number of coded coefficients
        int exponent_high_sizes[BLOCK_NB_SIZES];
        int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
@@ -84,19 +88,18 @@ struct private_wmadec_data {
        struct vlc coef_vlc[2];
        uint16_t *run_table[2];
        uint16_t *level_table[2];
-       uint16_t *int_table[2];
        const struct coef_vlc_table *coef_vlcs[2];
        /* frame info */
        int frame_len;          ///< frame length in samples
        int frame_len_bits;     ///< frame_len = 1 << frame_len_bits
-       int nb_block_sizes;     ///< number of block sizes
+       /** Number of block sizes. */
+       int nb_block_sizes;
        /* block info */
        int reset_block_lengths;
        int block_len_bits;     ///< log2 of current block length
        int next_block_len_bits;        ///< log2 of next block length
        int prev_block_len_bits;        ///< log2 of prev block length
        int block_len;          ///< block length in samples
-       int block_num;          ///< block number in current frame
        int block_pos;          ///< current position in frame
        uint8_t ms_stereo;      ///< true if mid/side stereo mode
        uint8_t channel_coded[MAX_CHANNELS];    ///< true if channel is coded
@@ -125,57 +128,71 @@ struct private_wmadec_data {
 };
 
 #define EXPVLCBITS 8
-#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS)
+#define EXPMAX ((19 + EXPVLCBITS - 1) / EXPVLCBITS)
 
 #define HGAINVLCBITS 9
-#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
+#define HGAINMAX ((13 + HGAINVLCBITS - 1) / HGAINVLCBITS)
 
 #define VLCBITS 9
-#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
+#define VLCMAX ((22 + VLCBITS - 1) / VLCBITS)
+
+#define SINE_WINDOW(x) float sine_ ## x[x] __aligned(16)
+
+SINE_WINDOW(128);
+SINE_WINDOW(256);
+SINE_WINDOW(512);
+SINE_WINDOW(1024);
+SINE_WINDOW(2048);
+SINE_WINDOW(4096);
+
+static float *sine_windows[6] = {
+       sine_128, sine_256, sine_512, sine_1024, sine_2048, sine_4096
+};
 
-static int wmadec_cleanup(struct private_wmadec_data *s)
+/* Generate a sine window. */
+static void sine_window_init(float *window, int n)
 {
        int i;
 
-       for (i = 0; i < s->nb_block_sizes; i++)
-               mdct_end(s->mdct_ctx[i]);
+       for (i = 0; i < n; i++)
+               window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
+}
+
+static void wmadec_cleanup(struct private_wmadec_data *pwd)
+{
+       int i;
 
-       if (s->use_exp_vlc)
-               free_vlc(&s->exp_vlc);
-       if (s->use_noise_coding)
-               free_vlc(&s->hgain_vlc);
+       for (i = 0; i < pwd->nb_block_sizes; i++)
+               imdct_end(pwd->mdct_ctx[i]);
+       if (pwd->use_exp_vlc)
+               free_vlc(&pwd->exp_vlc);
+       if (pwd->use_noise_coding)
+               free_vlc(&pwd->hgain_vlc);
        for (i = 0; i < 2; i++) {
-               free_vlc(&s->coef_vlc[i]);
-               free(s->run_table[i]);
-               free(s->level_table[i]);
-               free(s->int_table[i]);
+               free_vlc(&pwd->coef_vlc[i]);
+               free(pwd->run_table[i]);
+               free(pwd->level_table[i]);
        }
-       return 0;
 }
 
-/* XXX: use same run/length optimization as mpeg decoders */
-//FIXME maybe split decode / encode or pass flag
 static void init_coef_vlc(struct vlc *vlc, uint16_t **prun_table,
-               uint16_t **plevel_table, uint16_t **pint_table,
-               const struct coef_vlc_table *vlc_table)
+               uint16_t **plevel_table, const struct coef_vlc_table *vlc_table)
 {
        int n = vlc_table->n;
        const uint8_t *table_bits = vlc_table->huffbits;
        const uint32_t *table_codes = vlc_table->huffcodes;
        const uint16_t *levels_table = vlc_table->levels;
-       uint16_t *run_table, *level_table, *int_table;
+       uint16_t *run_table, *level_table;
        int i, l, j, k, level;
 
-       init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4);
+       init_vlc(vlc, VLCBITS, n, table_bits, table_codes, 4);
 
-       run_table = para_malloc(n * sizeof (uint16_t));
-       level_table = para_malloc(n * sizeof (uint16_t));
-       int_table = para_malloc(n * sizeof (uint16_t));
+       run_table = para_malloc(n * sizeof(uint16_t));
+       level_table = para_malloc(n * sizeof(uint16_t));
        i = 2;
        level = 1;
        k = 0;
        while (i < n) {
-               int_table[k] = i;
                l = levels_table[k++];
                for (j = 0; j < l; j++) {
                        run_table[i] = j;
@@ -186,23 +203,21 @@ static void init_coef_vlc(struct vlc *vlc, uint16_t **prun_table,
        }
        *prun_table = run_table;
        *plevel_table = level_table;
-       *pint_table = int_table;
 }
 
 /* compute the scale factor band sizes for each MDCT block size */
-static void compute_scale_factor_band_sizes(struct private_wmadec_data *s,
+static void compute_scale_factor_band_sizes(struct private_wmadec_data *pwd,
        float high_freq)
 {
-       struct asf_header_info *ahi = &s->ahi;
+       struct asf_header_info *ahi = &pwd->ahi;
        int a, b, pos, lpos, k, block_len, i, j, n;
        const uint8_t *table;
 
-       s->coefs_start = 0;
-       for (k = 0; k < s->nb_block_sizes; k++) {
-               block_len = s->frame_len >> k;
+       for (k = 0; k < pwd->nb_block_sizes; k++) {
+               block_len = pwd->frame_len >> k;
 
                table = NULL;
-               a = s->frame_len_bits - BLOCK_MIN_BITS - k;
+               a = pwd->frame_len_bits - BLOCK_MIN_BITS - k;
                if (a < 3) {
                        if (ahi->sample_rate >= 44100)
                                table = exponent_band_44100[a];
@@ -214,8 +229,8 @@ static void compute_scale_factor_band_sizes(struct private_wmadec_data *s,
                if (table) {
                        n = *table++;
                        for (i = 0; i < n; i++)
-                               s->exponent_bands[k][i] = table[i];
-                       s->exponent_sizes[k] = n;
+                               pwd->exponent_bands[k][i] = table[i];
+                       pwd->exponent_sizes[k] = n;
                } else {
                        j = 0;
                        lpos = 0;
@@ -227,45 +242,47 @@ static void compute_scale_factor_band_sizes(struct private_wmadec_data *s,
                                if (pos > block_len)
                                        pos = block_len;
                                if (pos > lpos)
-                                       s->exponent_bands[k][j++] = pos - lpos;
+                                       pwd->exponent_bands[k][j++] = pos - lpos;
                                if (pos >= block_len)
                                        break;
                                lpos = pos;
                        }
-                       s->exponent_sizes[k] = j;
+                       pwd->exponent_sizes[k] = j;
                }
 
                /* max number of coefs */
-               s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k;
+               pwd->coefs_end[k] = (pwd->frame_len - ((pwd->frame_len * 9) / 100)) >> k;
                /* high freq computation */
-               s->high_band_start[k] = (int) ((block_len * 2 * high_freq)
+               pwd->high_band_start[k] = (int) ((block_len * 2 * high_freq)
                        / ahi->sample_rate + 0.5);
-               n = s->exponent_sizes[k];
+               n = pwd->exponent_sizes[k];
                j = 0;
                pos = 0;
                for (i = 0; i < n; i++) {
                        int start, end;
                        start = pos;
-                       pos += s->exponent_bands[k][i];
+                       pos += pwd->exponent_bands[k][i];
                        end = pos;
-                       if (start < s->high_band_start[k])
-                               start = s->high_band_start[k];
-                       if (end > s->coefs_end[k])
-                               end = s->coefs_end[k];
+                       if (start < pwd->high_band_start[k])
+                               start = pwd->high_band_start[k];
+                       if (end > pwd->coefs_end[k])
+                               end = pwd->coefs_end[k];
                        if (end > start)
-                               s->exponent_high_bands[k][j++] = end - start;
+                               pwd->exponent_high_bands[k][j++] = end - start;
                }
-               s->exponent_high_sizes[k] = j;
+               pwd->exponent_high_sizes[k] = j;
        }
 }
 
-static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header_info *ahi)
+static int wma_init(struct private_wmadec_data *pwd)
 {
        int i;
        float bps1, high_freq;
        volatile float bps;
        int sample_rate1;
        int coef_vlc_table;
+       struct asf_header_info *ahi = &pwd->ahi;
+       int flags2 = ahi->flags2;
 
        if (ahi->sample_rate <= 0 || ahi->sample_rate > 50000
                || ahi->channels <= 0 || ahi->channels > 8
@@ -273,29 +290,27 @@ static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header
                return -E_WMA_BAD_PARAMS;
 
        /* compute MDCT block size */
-       if (ahi->sample_rate <= 16000) {
-               s->frame_len_bits = 9;
-       } else if (ahi->sample_rate <= 22050) {
-               s->frame_len_bits = 10;
-       } else {
-               s->frame_len_bits = 11;
-       }
-       s->frame_len = 1 << s->frame_len_bits;
-       if (s->use_variable_block_len) {
+       if (ahi->sample_rate <= 16000)
+               pwd->frame_len_bits = 9;
+       else if (ahi->sample_rate <= 22050)
+               pwd->frame_len_bits = 10;
+       else
+               pwd->frame_len_bits = 11;
+       pwd->frame_len = 1 << pwd->frame_len_bits;
+       if (pwd->use_variable_block_len) {
                int nb_max, nb;
                nb = ((flags2 >> 3) & 3) + 1;
                if ((ahi->bit_rate / ahi->channels) >= 32000)
                        nb += 2;
-               nb_max = s->frame_len_bits - BLOCK_MIN_BITS;
+               nb_max = pwd->frame_len_bits - BLOCK_MIN_BITS;
                if (nb > nb_max)
                        nb = nb_max;
-               s->nb_block_sizes = nb + 1;
-       } else {
-               s->nb_block_sizes = 1;
-       }
+               pwd->nb_block_sizes = nb + 1;
+       } else
+               pwd->nb_block_sizes = 1;
 
        /* init rate dependent parameters */
-       s->use_noise_coding = 1;
+       pwd->use_noise_coding = 1;
        high_freq = ahi->sample_rate * 0.5;
 
        /* wma2 rates are normalized */
@@ -312,7 +327,7 @@ static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header
                sample_rate1 = 8000;
 
        bps = (float) ahi->bit_rate / (float) (ahi->channels * ahi->sample_rate);
-       s->byte_offset_bits = wma_log2((int) (bps * s->frame_len / 8.0 + 0.5)) + 2;
+       pwd->byte_offset_bits = wma_log2((int) (bps * pwd->frame_len / 8.0 + 0.5)) + 2;
        /*
         * Compute high frequency value and choose if noise coding should be
         * activated.
@@ -322,12 +337,12 @@ static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header
                bps1 = bps * 1.6;
        if (sample_rate1 == 44100) {
                if (bps1 >= 0.61)
-                       s->use_noise_coding = 0;
+                       pwd->use_noise_coding = 0;
                else
                        high_freq = high_freq * 0.4;
        } else if (sample_rate1 == 22050) {
                if (bps1 >= 1.16)
-                       s->use_noise_coding = 0;
+                       pwd->use_noise_coding = 0;
                else if (bps1 >= 0.72)
                        high_freq = high_freq * 0.7;
                else
@@ -337,24 +352,22 @@ static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header
                        high_freq = high_freq * 0.5;
                else
                        high_freq = high_freq * 0.3;
-       } else if (sample_rate1 == 11025) {
+       } else if (sample_rate1 == 11025)
                high_freq = high_freq * 0.7;
-       else if (sample_rate1 == 8000) {
-               if (bps <= 0.625) {
+       else if (sample_rate1 == 8000) {
+               if (bps <= 0.625)
                        high_freq = high_freq * 0.5;
-               } else if (bps > 0.75) {
-                       s->use_noise_coding = 0;
-               } else {
+               else if (bps > 0.75)
+                       pwd->use_noise_coding = 0;
+               else
                        high_freq = high_freq * 0.65;
-               }
        } else {
-               if (bps >= 0.8) {
+               if (bps >= 0.8)
                        high_freq = high_freq * 0.75;
-               } else if (bps >= 0.6) {
+               else if (bps >= 0.6)
                        high_freq = high_freq * 0.6;
-               } else {
+               else
                        high_freq = high_freq * 0.5;
-               }
        }
        PARA_INFO_LOG("channels=%d sample_rate=%d "
                "bitrate=%d block_align=%d\n",
@@ -362,37 +375,37 @@ static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header
                ahi->bit_rate, ahi->block_align);
        PARA_INFO_LOG("frame_len=%d, bps=%f bps1=%f "
                "high_freq=%f bitoffset=%d\n",
-               s->frame_len, bps, bps1,
-               high_freq, s->byte_offset_bits);
+               pwd->frame_len, bps, bps1,
+               high_freq, pwd->byte_offset_bits);
        PARA_INFO_LOG("use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n",
-               s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes);
+               pwd->use_noise_coding, pwd->use_exp_vlc, pwd->nb_block_sizes);
 
-       compute_scale_factor_band_sizes(s, high_freq);
+       compute_scale_factor_band_sizes(pwd, high_freq);
        /* init MDCT windows : simple sinus window */
-       for (i = 0; i < s->nb_block_sizes; i++) {
+       for (i = 0; i < pwd->nb_block_sizes; i++) {
                int n;
-               n = 1 << (s->frame_len_bits - i);
-               sine_window_init(ff_sine_windows[s->frame_len_bits - i - 7], n);
-               s->windows[i] = ff_sine_windows[s->frame_len_bits - i - 7];
+               n = 1 << (pwd->frame_len_bits - i);
+               sine_window_init(sine_windows[pwd->frame_len_bits - i - 7], n);
+               pwd->windows[i] = sine_windows[pwd->frame_len_bits - i - 7];
        }
 
-       s->reset_block_lengths = 1;
+       pwd->reset_block_lengths = 1;
 
-       if (s->use_noise_coding) {
+       if (pwd->use_noise_coding) {
                /* init the noise generator */
-               if (s->use_exp_vlc)
-                       s->noise_mult = 0.02;
+               if (pwd->use_exp_vlc)
+                       pwd->noise_mult = 0.02;
                else
-                       s->noise_mult = 0.04;
+                       pwd->noise_mult = 0.04;
 
                {
                        unsigned int seed;
                        float norm;
                        seed = 1;
-                       norm = (1.0 / (float) (1LL << 31)) * sqrt(3) * s->noise_mult;
+                       norm = (1.0 / (float) (1LL << 31)) * sqrt(3) * pwd->noise_mult;
                        for (i = 0; i < NOISE_TAB_SIZE; i++) {
                                seed = seed * 314159 + 1;
-                               s->noise_table[i] = (float) ((int) seed) * norm;
+                               pwd->noise_table[i] = (float) ((int) seed) * norm;
                        }
                }
        }
@@ -405,28 +418,28 @@ static int wma_init(struct private_wmadec_data *s, int flags2, struct asf_header
                else if (bps1 < 1.16)
                        coef_vlc_table = 1;
        }
-       s->coef_vlcs[0] = &coef_vlcs[coef_vlc_table * 2];
-       s->coef_vlcs[1] = &coef_vlcs[coef_vlc_table * 2 + 1];
-       init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0],
-               &s->int_table[0], s->coef_vlcs[0]);
-       init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1],
-               &s->int_table[1], s->coef_vlcs[1]);
+       pwd->coef_vlcs[0] = &coef_vlcs[coef_vlc_table * 2];
+       pwd->coef_vlcs[1] = &coef_vlcs[coef_vlc_table * 2 + 1];
+       init_coef_vlc(&pwd->coef_vlc[0], &pwd->run_table[0], &pwd->level_table[0],
+               pwd->coef_vlcs[0]);
+       init_coef_vlc(&pwd->coef_vlc[1], &pwd->run_table[1], &pwd->level_table[1],
+               pwd->coef_vlcs[1]);
        return 0;
 }
 
-static void wma_lsp_to_curve_init(struct private_wmadec_data *s, int frame_len)
+static void wma_lsp_to_curve_init(struct private_wmadec_data *pwd, int frame_len)
 {
        float wdel, a, b;
        int i, e, m;
 
        wdel = M_PI / frame_len;
        for (i = 0; i < frame_len; i++)
-               s->lsp_cos_table[i] = 2.0f * cos(wdel * i);
+               pwd->lsp_cos_table[i] = 2.0f * cos(wdel * i);
 
        /* tables for x^-0.25 computation */
        for (i = 0; i < 256; i++) {
                e = i - 126;
-               s->lsp_pow_e_table[i] = pow(2.0, e * -0.25);
+               pwd->lsp_pow_e_table[i] = pow(2.0, e * -0.25);
        }
 
        /* These two tables are needed to avoid two operations in pow_m1_4. */
@@ -435,57 +448,56 @@ static void wma_lsp_to_curve_init(struct private_wmadec_data *s, int frame_len)
                m = (1 << LSP_POW_BITS) + i;
                a = (float) m *(0.5 / (1 << LSP_POW_BITS));
                a = pow(a, -0.25);
-               s->lsp_pow_m_table1[i] = 2 * a - b;
-               s->lsp_pow_m_table2[i] = b - a;
+               pwd->lsp_pow_m_table1[i] = 2 * a - b;
+               pwd->lsp_pow_m_table2[i] = b - a;
                b = a;
        }
 }
 
 static int wma_decode_init(char *initial_buf, int len, struct private_wmadec_data **result)
 {
-       struct private_wmadec_data *s;
+       struct private_wmadec_data *pwd;
        int ret, i;
 
-       if (len < 18)
-               return 0;
-
        PARA_NOTICE_LOG("initial buf: %d bytes\n", len);
-       s = para_calloc(sizeof(*s));
-       ret = read_asf_header(initial_buf, len, &s->ahi);
-       if (ret < 0)
+       pwd = para_calloc(sizeof(*pwd));
+       ret = read_asf_header(initial_buf, len, &pwd->ahi);
+       if (ret <= 0) {
+               free(pwd);
                return ret;
+       }
 
-       s->use_exp_vlc = s->ahi.flags2 & 0x0001;
-       s->use_bit_reservoir = s->ahi.flags2 & 0x0002;
-       s->use_variable_block_len = s->ahi.flags2 & 0x0004;
+       pwd->use_exp_vlc = pwd->ahi.flags2 & 0x0001;
+       pwd->use_bit_reservoir = pwd->ahi.flags2 & 0x0002;
+       pwd->use_variable_block_len = pwd->ahi.flags2 & 0x0004;
 
-       ret = wma_init(s, s->ahi.flags2, &s->ahi);
+       ret = wma_init(pwd);
        if (ret < 0)
                return ret;
        /* init MDCT */
-       for (i = 0; i < s->nb_block_sizes; i++) {
-               ret = mdct_init(s->frame_len_bits - i + 1, 1, &s->mdct_ctx[i]);
+       for (i = 0; i < pwd->nb_block_sizes; i++) {
+               ret = imdct_init(pwd->frame_len_bits - i + 1, &pwd->mdct_ctx[i]);
                if (ret < 0)
                        return ret;
        }
-       if (s->use_noise_coding) {
+       if (pwd->use_noise_coding) {
                PARA_INFO_LOG("using noise coding\n");
-               init_vlc(&s->hgain_vlc, HGAINVLCBITS,
-                       sizeof (ff_wma_hgain_huffbits), ff_wma_hgain_huffbits,
-                       1, 1, ff_wma_hgain_huffcodes, 2, 2);
+               init_vlc(&pwd->hgain_vlc, HGAINVLCBITS,
+                       sizeof(wma_hgain_huffbits), wma_hgain_huffbits,
+                       wma_hgain_huffcodes, 2);
        }
 
-       if (s->use_exp_vlc) {
+       if (pwd->use_exp_vlc) {
                PARA_INFO_LOG("using exp_vlc\n");
-               init_vlc(&s->exp_vlc, EXPVLCBITS,
-               sizeof (ff_wma_scale_huffbits), ff_wma_scale_huffbits,
-               1, 1, ff_wma_scale_huffcodes, 4, 4);
+               init_vlc(&pwd->exp_vlc, EXPVLCBITS,
+               sizeof(wma_scale_huffbits), wma_scale_huffbits,
+               wma_scale_huffcodes, 4);
        } else {
                PARA_INFO_LOG("using curve\n");
-               wma_lsp_to_curve_init(s, s->frame_len);
+               wma_lsp_to_curve_init(pwd, pwd->frame_len);
        }
-       *result = s;
-       return s->ahi.header_len;
+       *result = pwd;
+       return pwd->ahi.header_len;
 }
 
 /**
@@ -494,7 +506,7 @@ static int wma_decode_init(char *initial_buf, int len, struct private_wmadec_dat
  * expense (linear interpolation approximately doubles the number of
  * bits of precision).
  */
-static inline float pow_m1_4(struct private_wmadec_data *s, float x)
+static inline float pow_m1_4(struct private_wmadec_data *pwd, float x)
 {
        union {
                float f;
@@ -508,12 +520,12 @@ static inline float pow_m1_4(struct private_wmadec_data *s, float x)
        m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1);
        /* build interpolation scale: 1 <= t < 2. */
        t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23);
-       a = s->lsp_pow_m_table1[m];
-       b = s->lsp_pow_m_table2[m];
-       return s->lsp_pow_e_table[e] * (a + b * t.f);
+       a = pwd->lsp_pow_m_table1[m];
+       b = pwd->lsp_pow_m_table2[m];
+       return pwd->lsp_pow_e_table[e] * (a + b * t.f);
 }
 
-static void wma_lsp_to_curve(struct private_wmadec_data *s,
+static void wma_lsp_to_curve(struct private_wmadec_data *pwd,
                float *out, float *val_max_ptr, int n, float *lsp)
 {
        int i, j;
@@ -523,7 +535,7 @@ static void wma_lsp_to_curve(struct private_wmadec_data *s,
        for (i = 0; i < n; i++) {
                p = 0.5f;
                q = 0.5f;
-               w = s->lsp_cos_table[i];
+               w = pwd->lsp_cos_table[i];
                for (j = 1; j < NB_LSP_COEFS; j += 2) {
                        q *= w - lsp[j - 1];
                        p *= w - lsp[j];
@@ -531,7 +543,7 @@ static void wma_lsp_to_curve(struct private_wmadec_data *s,
                p *= p * (2.0f - w);
                q *= q * (2.0f + w);
                v = p + q;
-               v = pow_m1_4(s, v);
+               v = pow_m1_4(pwd, v);
                if (v > val_max)
                        val_max = v;
                out[i] = v;
@@ -540,62 +552,41 @@ static void wma_lsp_to_curve(struct private_wmadec_data *s,
 }
 
 /* Decode exponents coded with LSP coefficients (same idea as Vorbis). */
-static void decode_exp_lsp(struct private_wmadec_data *s, int ch)
+static void decode_exp_lsp(struct private_wmadec_data *pwd, int ch)
 {
        float lsp_coefs[NB_LSP_COEFS];
        int val, i;
 
        for (i = 0; i < NB_LSP_COEFS; i++) {
                if (i == 0 || i >= 8)
-                       val = get_bits(&s->gb, 3);
+                       val = get_bits(&pwd->gb, 3);
                else
-                       val = get_bits(&s->gb, 4);
-               lsp_coefs[i] = ff_wma_lsp_codebook[i][val];
+                       val = get_bits(&pwd->gb, 4);
+               lsp_coefs[i] = wma_lsp_codebook[i][val];
        }
 
-       wma_lsp_to_curve(s, s->exponents[ch], &s->max_exponent[ch],
-                        s->block_len, lsp_coefs);
-}
-
-/*
- * Parse a vlc code, faster then get_vlc().
- *
- * \param bits The number of bits which will be read at once, must be
- * identical to nb_bits in init_vlc()
- *
- * \param max_depth The number of times bits bits must be read to completely
- * read the longest vlc code = (max_vlc_length + bits - 1) / bits.
- */
-static int get_vlc2(struct getbit_context *s, VLC_TYPE(*table)[2],
-               int bits, int max_depth)
-{
-       int code;
-
-       OPEN_READER(re, s)
-       UPDATE_CACHE(re, s)
-       GET_VLC(code, re, s, table, bits, max_depth)
-       CLOSE_READER(re, s)
-       return code;
+       wma_lsp_to_curve(pwd, pwd->exponents[ch], &pwd->max_exponent[ch],
+               pwd->block_len, lsp_coefs);
 }
 
 /* Decode exponents coded with VLC codes. */
-static int decode_exp_vlc(struct private_wmadec_data *s, int ch)
+static int decode_exp_vlc(struct private_wmadec_data *pwd, int ch)
 {
        int last_exp, n, code;
        const uint16_t *ptr, *band_ptr;
        float v, *q, max_scale, *q_end;
 
-       band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
+       band_ptr = pwd->exponent_bands[pwd->frame_len_bits - pwd->block_len_bits];
        ptr = band_ptr;
-       q = s->exponents[ch];
-       q_end = q + s->block_len;
+       q = pwd->exponents[ch];
+       q_end = q + pwd->block_len;
        max_scale = 0;
        last_exp = 36;
 
        while (q < q_end) {
-               code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX);
+               code = get_vlc(&pwd->gb, pwd->exp_vlc.table, EXPVLCBITS, EXPMAX);
                if (code < 0)
-                       return -1;
+                       return code;
                /* NOTE: this offset is the same as MPEG4 AAC ! */
                last_exp += code - 60;
                /* XXX: use a table */
@@ -607,22 +598,25 @@ static int decode_exp_vlc(struct private_wmadec_data *s, int ch)
                        *q++ = v;
                } while (--n);
        }
-       s->max_exponent[ch] = max_scale;
+       pwd->max_exponent[ch] = max_scale;
        return 0;
 }
 
-static void vector_fmul_add(float *dst, const float *src0, const float *src1,
-               const float *src2, int src3, int len, int step)
+/* compute src0 * src1 + src2 */
+static inline void vector_mult_add(float *dst, const float *src0, const float *src1,
+               const float *src2, int len)
 {
        int i;
+
        for (i = 0; i < len; i++)
-               dst[i * step] = src0[i] * src1[i] + src2[i] + src3;
+               dst[i] = src0[i] * src1[i] + src2[i];
 }
 
-static void vector_fmul_reverse_c(float *dst, const float *src0,
+static inline void vector_mult_reverse(float *dst, const float *src0,
                const float *src1, int len)
 {
        int i;
+
        src1 += len - 1;
        for (i = 0; i < len; i++)
                dst[i] = src0[i] * src1[-i];
@@ -634,52 +628,40 @@ static void vector_fmul_reverse_c(float *dst, const float *src0,
  * We ensure that when the windows overlap their squared sum
  * is always 1 (MDCT reconstruction rule).
  */
-static void wma_window(struct private_wmadec_data *s, float *out)
+static void wma_window(struct private_wmadec_data *pwd, float *out)
 {
-       float *in = s->output;
+       float *in = pwd->output;
        int block_len, bsize, n;
 
        /* left part */
-       if (s->block_len_bits <= s->prev_block_len_bits) {
-               block_len = s->block_len;
-               bsize = s->frame_len_bits - s->block_len_bits;
-
-               vector_fmul_add(out, in, s->windows[bsize],
-                                        out, 0, block_len, 1);
-
+       if (pwd->block_len_bits <= pwd->prev_block_len_bits) {
+               block_len = pwd->block_len;
+               bsize = pwd->frame_len_bits - pwd->block_len_bits;
+               vector_mult_add(out, in, pwd->windows[bsize], out, block_len);
        } else {
-               block_len = 1 << s->prev_block_len_bits;
-               n = (s->block_len - block_len) / 2;
-               bsize = s->frame_len_bits - s->prev_block_len_bits;
-
-               vector_fmul_add(out + n, in + n, s->windows[bsize],
-                                        out + n, 0, block_len, 1);
-
+               block_len = 1 << pwd->prev_block_len_bits;
+               n = (pwd->block_len - block_len) / 2;
+               bsize = pwd->frame_len_bits - pwd->prev_block_len_bits;
+               vector_mult_add(out + n, in + n, pwd->windows[bsize], out + n,
+                       block_len);
                memcpy(out + n + block_len, in + n + block_len,
-                      n * sizeof (float));
+                       n * sizeof(float));
        }
-
-       out += s->block_len;
-       in += s->block_len;
-
+       out += pwd->block_len;
+       in += pwd->block_len;
        /* right part */
-       if (s->block_len_bits <= s->next_block_len_bits) {
-               block_len = s->block_len;
-               bsize = s->frame_len_bits - s->block_len_bits;
-
-               vector_fmul_reverse_c(out, in, s->windows[bsize], block_len);
-
+       if (pwd->block_len_bits <= pwd->next_block_len_bits) {
+               block_len = pwd->block_len;
+               bsize = pwd->frame_len_bits - pwd->block_len_bits;
+               vector_mult_reverse(out, in, pwd->windows[bsize], block_len);
        } else {
-               block_len = 1 << s->next_block_len_bits;
-               n = (s->block_len - block_len) / 2;
-               bsize = s->frame_len_bits - s->next_block_len_bits;
-
-               memcpy(out, in, n * sizeof (float));
-
-               vector_fmul_reverse_c(out + n, in + n, s->windows[bsize],
-                                     block_len);
-
-               memset(out + n + block_len, 0, n * sizeof (float));
+               block_len = 1 << pwd->next_block_len_bits;
+               n = (pwd->block_len - block_len) / 2;
+               bsize = pwd->frame_len_bits - pwd->next_block_len_bits;
+               memcpy(out, in, n * sizeof(float));
+               vector_mult_reverse(out + n, in + n, pwd->windows[bsize],
+                       block_len);
+               memset(out + n + block_len, 0, n * sizeof(float));
        }
 }
 
@@ -697,74 +679,220 @@ static int wma_total_gain_to_bits(int total_gain)
                return 9;
 }
 
+static int compute_high_band_values(struct private_wmadec_data *pwd,
+               int bsize, int nb_coefs[MAX_CHANNELS])
+{
+       int ch;
+
+       if (!pwd->use_noise_coding)
+               return 0;
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
+               int i, m, a;
+               if (!pwd->channel_coded[ch])
+                       continue;
+               m = pwd->exponent_high_sizes[bsize];
+               for (i = 0; i < m; i++) {
+                       a = get_bit(&pwd->gb);
+                       pwd->high_band_coded[ch][i] = a;
+                       if (!a)
+                               continue;
+                       nb_coefs[ch] -= pwd->exponent_high_bands[bsize][i];
+               }
+       }
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
+               int i, n, val;
+               if (!pwd->channel_coded[ch])
+                       continue;
+               n = pwd->exponent_high_sizes[bsize];
+               val = (int)0x80000000;
+               for (i = 0; i < n; i++) {
+                       if (!pwd->high_band_coded[ch][i])
+                               continue;
+                       if (val == (int)0x80000000)
+                               val = get_bits(&pwd->gb, 7) - 19;
+                       else {
+                               int code = get_vlc(&pwd->gb,
+                                       pwd->hgain_vlc.table, HGAINVLCBITS,
+                                       HGAINMAX);
+                               if (code < 0)
+                                       return code;
+                               val += code - 18;
+                       }
+                       pwd->high_band_values[ch][i] = val;
+               }
+       }
+       return 1;
+}
+
+static void compute_mdct_coefficients(struct private_wmadec_data *pwd,
+               int bsize, int total_gain, int nb_coefs[MAX_CHANNELS])
+{
+       int ch;
+       float mdct_norm = 1.0 / (pwd->block_len / 2);
+
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
+               int16_t *coefs1;
+               float *coefs, *exponents, mult, mult1, noise;
+               int i, j, n, n1, last_high_band, esize;
+               float exp_power[HIGH_BAND_MAX_SIZE];
+
+               if (!pwd->channel_coded[ch])
+                       continue;
+               coefs1 = pwd->coefs1[ch];
+               exponents = pwd->exponents[ch];
+               esize = pwd->exponents_bsize[ch];
+               mult = pow(10, total_gain * 0.05) / pwd->max_exponent[ch];
+               mult *= mdct_norm;
+               coefs = pwd->coefs[ch];
+               if (!pwd->use_noise_coding) {
+                       /* XXX: optimize more */
+                       n = nb_coefs[ch];
+                       for (i = 0; i < n; i++)
+                               *coefs++ = coefs1[i] *
+                                       exponents[i << bsize >> esize] * mult;
+                       n = pwd->block_len - pwd->coefs_end[bsize];
+                       for (i = 0; i < n; i++)
+                               *coefs++ = 0.0;
+                       continue;
+               }
+               mult1 = mult;
+               n1 = pwd->exponent_high_sizes[bsize];
+               /* compute power of high bands */
+               exponents = pwd->exponents[ch] +
+                       (pwd->high_band_start[bsize] << bsize);
+               last_high_band = 0; /* avoid warning */
+               for (j = 0; j < n1; j++) {
+                       n = pwd->exponent_high_bands[
+                               pwd->frame_len_bits - pwd->block_len_bits][j];
+                       if (pwd->high_band_coded[ch][j]) {
+                               float e2, val;
+                               e2 = 0;
+                               for (i = 0; i < n; i++) {
+                                       val = exponents[i << bsize >> esize];
+                                       e2 += val * val;
+                               }
+                               exp_power[j] = e2 / n;
+                               last_high_band = j;
+                       }
+                       exponents += n << bsize;
+               }
+               /* main freqs and high freqs */
+               exponents = pwd->exponents[ch];
+               for (j = -1; j < n1; j++) {
+                       if (j < 0)
+                               n = pwd->high_band_start[bsize];
+                       else
+                               n = pwd->exponent_high_bands[pwd->frame_len_bits
+                                       - pwd->block_len_bits][j];
+                       if (j >= 0 && pwd->high_band_coded[ch][j]) {
+                               /* use noise with specified power */
+                               mult1 = sqrt(exp_power[j]
+                                       / exp_power[last_high_band]);
+                               /* XXX: use a table */
+                               mult1 = mult1 * pow(10,
+                                       pwd->high_band_values[ch][j] * 0.05);
+                               mult1 /= (pwd->max_exponent[ch] * pwd->noise_mult);
+                               mult1 *= mdct_norm;
+                               for (i = 0; i < n; i++) {
+                                       noise = pwd->noise_table[pwd->noise_index];
+                                       pwd->noise_index = (pwd->noise_index + 1)
+                                               & (NOISE_TAB_SIZE - 1);
+                                       *coefs++ = noise * exponents[
+                                               i << bsize >> esize] * mult1;
+                               }
+                               exponents += n << bsize;
+                       } else {
+                               /* coded values + small noise */
+                               for (i = 0; i < n; i++) {
+                                       noise = pwd->noise_table[pwd->noise_index];
+                                       pwd->noise_index = (pwd->noise_index + 1)
+                                               & (NOISE_TAB_SIZE - 1);
+                                       *coefs++ = ((*coefs1++) + noise) *
+                                               exponents[i << bsize >> esize]
+                                               * mult;
+                               }
+                               exponents += n << bsize;
+                       }
+               }
+               /* very high freqs: noise */
+               n = pwd->block_len - pwd->coefs_end[bsize];
+               mult1 = mult * exponents[((-1 << bsize)) >> esize];
+               for (i = 0; i < n; i++) {
+                       *coefs++ = pwd->noise_table[pwd->noise_index] * mult1;
+                       pwd->noise_index = (pwd->noise_index + 1)
+                               & (NOISE_TAB_SIZE - 1);
+               }
+       }
+}
+
 /**
- * @return 0 if OK. 1 if last block of frame. return -1 if
- * unrecorrable error.
+ * Returns 0 if OK, 1 if last block of frame, negative on uncorrectable
+ * errors.
  */
-static int wma_decode_block(struct private_wmadec_data *s)
+static int wma_decode_block(struct private_wmadec_data *pwd)
 {
-       int n, v, ch, code, bsize;
+       int ret, n, v, ch, code, bsize;
        int coef_nb_bits, total_gain;
        int nb_coefs[MAX_CHANNELS];
-       float mdct_norm;
 
        /* compute current block length */
-       if (s->use_variable_block_len) {
-               n = wma_log2(s->nb_block_sizes - 1) + 1;
-
-               if (s->reset_block_lengths) {
-                       s->reset_block_lengths = 0;
-                       v = get_bits(&s->gb, n);
-                       if (v >= s->nb_block_sizes)
-                               return -1;
-                       s->prev_block_len_bits = s->frame_len_bits - v;
-                       v = get_bits(&s->gb, n);
-                       if (v >= s->nb_block_sizes)
-                               return -1;
-                       s->block_len_bits = s->frame_len_bits - v;
+       if (pwd->use_variable_block_len) {
+               n = wma_log2(pwd->nb_block_sizes - 1) + 1;
+
+               if (pwd->reset_block_lengths) {
+                       pwd->reset_block_lengths = 0;
+                       v = get_bits(&pwd->gb, n);
+                       if (v >= pwd->nb_block_sizes)
+                               return -E_WMA_BLOCK_SIZE;
+                       pwd->prev_block_len_bits = pwd->frame_len_bits - v;
+                       v = get_bits(&pwd->gb, n);
+                       if (v >= pwd->nb_block_sizes)
+                               return -E_WMA_BLOCK_SIZE;
+                       pwd->block_len_bits = pwd->frame_len_bits - v;
                } else {
                        /* update block lengths */
-                       s->prev_block_len_bits = s->block_len_bits;
-                       s->block_len_bits = s->next_block_len_bits;
+                       pwd->prev_block_len_bits = pwd->block_len_bits;
+                       pwd->block_len_bits = pwd->next_block_len_bits;
                }
-               v = get_bits(&s->gb, n);
-               if (v >= s->nb_block_sizes)
-                       return -1;
-               s->next_block_len_bits = s->frame_len_bits - v;
+               v = get_bits(&pwd->gb, n);
+               if (v >= pwd->nb_block_sizes)
+                       return -E_WMA_BLOCK_SIZE;
+               pwd->next_block_len_bits = pwd->frame_len_bits - v;
        } else {
                /* fixed block len */
-               s->next_block_len_bits = s->frame_len_bits;
-               s->prev_block_len_bits = s->frame_len_bits;
-               s->block_len_bits = s->frame_len_bits;
+               pwd->next_block_len_bits = pwd->frame_len_bits;
+               pwd->prev_block_len_bits = pwd->frame_len_bits;
+               pwd->block_len_bits = pwd->frame_len_bits;
        }
 
        /* now check if the block length is coherent with the frame length */
-       s->block_len = 1 << s->block_len_bits;
-       if ((s->block_pos + s->block_len) > s->frame_len)
+       pwd->block_len = 1 << pwd->block_len_bits;
+       if ((pwd->block_pos + pwd->block_len) > pwd->frame_len)
                return -E_INCOHERENT_BLOCK_LEN;
 
-       if (s->ahi.channels == 2) {
-               s->ms_stereo = get_bits1(&s->gb);
-       }
+       if (pwd->ahi.channels == 2)
+               pwd->ms_stereo = get_bit(&pwd->gb);
        v = 0;
-       for (ch = 0; ch < s->ahi.channels; ch++) {
-               int a = get_bits1(&s->gb);
-               s->channel_coded[ch] = a;
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
+               int a = get_bit(&pwd->gb);
+               pwd->channel_coded[ch] = a;
                v |= a;
        }
 
-       bsize = s->frame_len_bits - s->block_len_bits;
+       bsize = pwd->frame_len_bits - pwd->block_len_bits;
 
        /* if no channel coded, no need to go further */
        /* XXX: fix potential framing problems */
        if (!v)
                goto next;
 
-       /* read total gain and extract corresponding number of bits for
-          coef escape coding */
+       /*
+        * Read total gain and extract corresponding number of bits for coef
+        * escape coding.
+        */
        total_gain = 1;
        for (;;) {
-               int a = get_bits(&s->gb, 7);
+               int a = get_bits(&pwd->gb, 7);
                total_gain += a;
                if (a != 127)
                        break;
@@ -773,335 +901,118 @@ static int wma_decode_block(struct private_wmadec_data *s)
        coef_nb_bits = wma_total_gain_to_bits(total_gain);
 
        /* compute number of coefficients */
-       n = s->coefs_end[bsize] - s->coefs_start;
-       for (ch = 0; ch < s->ahi.channels; ch++)
+       n = pwd->coefs_end[bsize];
+       for (ch = 0; ch < pwd->ahi.channels; ch++)
                nb_coefs[ch] = n;
 
-       /* complex coding */
-       if (s->use_noise_coding) {
-
-               for (ch = 0; ch < s->ahi.channels; ch++) {
-                       if (s->channel_coded[ch]) {
-                               int i, m, a;
-                               m = s->exponent_high_sizes[bsize];
-                               for (i = 0; i < m; i++) {
-                                       a = get_bits1(&s->gb);
-                                       s->high_band_coded[ch][i] = a;
-                                       /* if noise coding, the coefficients are not transmitted */
-                                       if (a)
-                                               nb_coefs[ch] -=
-                                                   s->
-                                                   exponent_high_bands[bsize]
-                                                   [i];
-                               }
-                       }
-               }
-               for (ch = 0; ch < s->ahi.channels; ch++) {
-                       if (s->channel_coded[ch]) {
-                               int i, val;
-
-                               n = s->exponent_high_sizes[bsize];
-                               val = (int) 0x80000000;
-                               for (i = 0; i < n; i++) {
-                                       if (s->high_band_coded[ch][i]) {
-                                               if (val == (int) 0x80000000) {
-                                                       val =
-                                                           get_bits(&s->gb,
-                                                                    7) - 19;
-                                               } else {
-                                                       code =
-                                                           get_vlc2(&s->gb,
-                                                                    s->
-                                                                    hgain_vlc.
-                                                                    table,
-                                                                    HGAINVLCBITS,
-                                                                    HGAINMAX);
-                                                       if (code < 0)
-                                                               return -1;
-                                                       val += code - 18;
-                                               }
-                                               s->high_band_values[ch][i] =
-                                                   val;
-                                       }
-                               }
-                       }
-               }
-       }
+       ret = compute_high_band_values(pwd, bsize, nb_coefs);
+       if (ret < 0)
+               return ret;
 
        /* exponents can be reused in short blocks. */
-       if ((s->block_len_bits == s->frame_len_bits) || get_bits1(&s->gb)) {
-               for (ch = 0; ch < s->ahi.channels; ch++) {
-                       if (s->channel_coded[ch]) {
-                               if (s->use_exp_vlc) {
-                                       if (decode_exp_vlc(s, ch) < 0)
-                                               return -1;
-                               } else {
-                                       decode_exp_lsp(s, ch);
-                               }
-                               s->exponents_bsize[ch] = bsize;
+       if ((pwd->block_len_bits == pwd->frame_len_bits) || get_bit(&pwd->gb)) {
+               for (ch = 0; ch < pwd->ahi.channels; ch++) {
+                       if (pwd->channel_coded[ch]) {
+                               if (pwd->use_exp_vlc) {
+                                       ret = decode_exp_vlc(pwd, ch);
+                                       if (ret < 0)
+                                               return ret;
+                               } else
+                                       decode_exp_lsp(pwd, ch);
+                               pwd->exponents_bsize[ch] = bsize;
                        }
                }
        }
 
        /* parse spectral coefficients : just RLE encoding */
-       for (ch = 0; ch < s->ahi.channels; ch++) {
-               if (s->channel_coded[ch]) {
-                       struct vlc *coef_vlc;
-                       int level, run, sign, tindex;
-                       int16_t *ptr, *eptr;
-                       const uint16_t *level_table, *run_table;
-
-                       /* special VLC tables are used for ms stereo because
-                          there is potentially less energy there */
-                       tindex = (ch == 1 && s->ms_stereo);
-                       coef_vlc = &s->coef_vlc[tindex];
-                       run_table = s->run_table[tindex];
-                       level_table = s->level_table[tindex];
-                       /* XXX: optimize */
-                       ptr = &s->coefs1[ch][0];
-                       eptr = ptr + nb_coefs[ch];
-                       memset(ptr, 0, s->block_len * sizeof(int16_t));
-                       for (;;) {
-                               code =
-                                   get_vlc2(&s->gb, coef_vlc->table, VLCBITS,
-                                            VLCMAX);
-                               if (code < 0)
-                                       return -1;
-                               if (code == 1) {
-                                       /* EOB */
-                                       break;
-                               } else if (code == 0) {
-                                       /* escape */
-                                       level = get_bits(&s->gb, coef_nb_bits);
-                                       /* NOTE: this is rather suboptimal. reading
-                                          block_len_bits would be better */
-                                       run =
-                                           get_bits(&s->gb, s->frame_len_bits);
-                               } else {
-                                       /* normal code */
-                                       run = run_table[code];
-                                       level = level_table[code];
-                               }
-                               sign = get_bits1(&s->gb);
-                               if (!sign)
-                                       level = -level;
-                               ptr += run;
-                               if (ptr >= eptr) {
-                                       PARA_ERROR_LOG("overflow in spectral RLE, ignoring\n");
-                                       break;
-                               }
-                               *ptr++ = level;
-                               /* NOTE: EOB can be omitted */
-                               if (ptr >= eptr)
-                                       break;
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
+               struct vlc *coef_vlc;
+               int level, run, tindex;
+               int16_t *ptr, *eptr;
+               const uint16_t *level_table, *run_table;
+
+               if (!pwd->channel_coded[ch])
+                       continue;
+               /*
+                * special VLC tables are used for ms stereo because there is
+                * potentially less energy there
+                */
+               tindex = (ch == 1 && pwd->ms_stereo);
+               coef_vlc = &pwd->coef_vlc[tindex];
+               run_table = pwd->run_table[tindex];
+               level_table = pwd->level_table[tindex];
+               /* XXX: optimize */
+               ptr = &pwd->coefs1[ch][0];
+               eptr = ptr + nb_coefs[ch];
+               memset(ptr, 0, pwd->block_len * sizeof(int16_t));
+               for (;;) {
+                       code = get_vlc(&pwd->gb, coef_vlc->table,
+                               VLCBITS, VLCMAX);
+                       if (code < 0)
+                               return code;
+                       if (code == 1) /* EOB */
+                               break;
+                       if (code == 0) { /* escape */
+                               level = get_bits(&pwd->gb, coef_nb_bits);
+                               /* reading block_len_bits would be better */
+                               run = get_bits(&pwd->gb, pwd->frame_len_bits);
+                       } else { /* normal code */
+                               run = run_table[code];
+                               level = level_table[code];
                        }
-               }
-       }
-
-       /* normalize */
-       {
-               int n4 = s->block_len / 2;
-               mdct_norm = 1.0 / (float) n4;
-       }
-
-       /* finally compute the MDCT coefficients */
-       for (ch = 0; ch < s->ahi.channels; ch++) {
-               if (s->channel_coded[ch]) {
-                       int16_t *coefs1;
-                       float *coefs, *exponents, mult, mult1, noise;
-                       int i, j, n1, last_high_band, esize;
-                       float exp_power[HIGH_BAND_MAX_SIZE];
-
-                       coefs1 = s->coefs1[ch];
-                       exponents = s->exponents[ch];
-                       esize = s->exponents_bsize[ch];
-                       mult = pow(10, total_gain * 0.05) / s->max_exponent[ch];
-                       mult *= mdct_norm;
-                       coefs = s->coefs[ch];
-                       if (s->use_noise_coding) {
-                               mult1 = mult;
-                               /* very low freqs : noise */
-                               for (i = 0; i < s->coefs_start; i++) {
-                                       *coefs++ =
-                                           s->noise_table[s->noise_index] *
-                                           exponents[i << bsize >> esize] *
-                                           mult1;
-                                       s->noise_index =
-                                           (s->noise_index +
-                                            1) & (NOISE_TAB_SIZE - 1);
-                               }
-
-                               n1 = s->exponent_high_sizes[bsize];
-
-                               /* compute power of high bands */
-                               exponents = s->exponents[ch] +
-                                   (s->high_band_start[bsize] << bsize);
-                               last_high_band = 0;     /* avoid warning */
-                               for (j = 0; j < n1; j++) {
-                                       n = s->exponent_high_bands[s->
-                                                                  frame_len_bits
-                                                                  -
-                                                                  s->
-                                                                  block_len_bits]
-                                           [j];
-                                       if (s->high_band_coded[ch][j]) {
-                                               float e2, val;
-                                               e2 = 0;
-                                               for (i = 0; i < n; i++) {
-                                                       val = exponents[i << bsize
-                                                                     >> esize];
-                                                       e2 += val * val;
-                                               }
-                                               exp_power[j] = e2 / n;
-                                               last_high_band = j;
-                                       }
-                                       exponents += n << bsize;
-                               }
-
-                               /* main freqs and high freqs */
-                               exponents =
-                                   s->exponents[ch] +
-                                   (s->coefs_start << bsize);
-                               for (j = -1; j < n1; j++) {
-                                       if (j < 0) {
-                                               n = s->high_band_start[bsize] -
-                                                   s->coefs_start;
-                                       } else {
-                                               n = s->exponent_high_bands[s->
-                                                                          frame_len_bits
-                                                                          -
-                                                                          s->
-                                                                          block_len_bits]
-                                                   [j];
-                                       }
-                                       if (j >= 0 && s->high_band_coded[ch][j]) {
-                                               /* use noise with specified power */
-                                               mult1 =
-                                                   sqrt(exp_power[j] /
-                                                        exp_power
-                                                        [last_high_band]);
-                                               /* XXX: use a table */
-                                               mult1 =
-                                                   mult1 * pow(10,
-                                                               s->
-                                                               high_band_values
-                                                               [ch][j] * 0.05);
-                                               mult1 =
-                                                   mult1 /
-                                                   (s->max_exponent[ch] *
-                                                    s->noise_mult);
-                                               mult1 *= mdct_norm;
-                                               for (i = 0; i < n; i++) {
-                                                       noise =
-                                                           s->noise_table[s->
-                                                                          noise_index];
-                                                       s->noise_index =
-                                                           (s->noise_index +
-                                                            1) &
-                                                           (NOISE_TAB_SIZE -
-                                                            1);
-                                                       *coefs++ =
-                                                           noise *
-                                                           exponents[i << bsize
-                                                                     >> esize]
-                                                           * mult1;
-                                               }
-                                               exponents += n << bsize;
-                                       } else {
-                                               /* coded values + small noise */
-                                               for (i = 0; i < n; i++) {
-                                                       noise =
-                                                           s->noise_table[s->
-                                                                          noise_index];
-                                                       s->noise_index =
-                                                           (s->noise_index +
-                                                            1) &
-                                                           (NOISE_TAB_SIZE -
-                                                            1);
-                                                       *coefs++ =
-                                                           ((*coefs1++) +
-                                                            noise) *
-                                                           exponents[i << bsize
-                                                                     >> esize]
-                                                           * mult;
-                                               }
-                                               exponents += n << bsize;
-                                       }
-                               }
-
-                               /* very high freqs : noise */
-                               n = s->block_len - s->coefs_end[bsize];
-                               mult1 =
-                                   mult * exponents[((-1 << bsize)) >> esize];
-                               for (i = 0; i < n; i++) {
-                                       *coefs++ =
-                                           s->noise_table[s->noise_index] *
-                                           mult1;
-                                       s->noise_index =
-                                           (s->noise_index +
-                                            1) & (NOISE_TAB_SIZE - 1);
-                               }
-                       } else {
-                               /* XXX: optimize more */
-                               for (i = 0; i < s->coefs_start; i++)
-                                       *coefs++ = 0.0;
-                               n = nb_coefs[ch];
-                               for (i = 0; i < n; i++) {
-                                       *coefs++ =
-                                           coefs1[i] *
-                                           exponents[i << bsize >> esize] *
-                                           mult;
-                               }
-                               n = s->block_len - s->coefs_end[bsize];
-                               for (i = 0; i < n; i++)
-                                       *coefs++ = 0.0;
+                       if (!get_bit(&pwd->gb))
+                               level = -level;
+                       ptr += run;
+                       if (ptr >= eptr) {
+                               PARA_ERROR_LOG("overflow in spectral RLE, ignoring\n");
+                               break;
                        }
+                       *ptr++ = level;
+                       if (ptr >= eptr) /* EOB can be omitted */
+                               break;
                }
        }
-
-       if (s->ms_stereo && s->channel_coded[1]) {
+       compute_mdct_coefficients(pwd, bsize, total_gain, nb_coefs);
+       if (pwd->ms_stereo && pwd->channel_coded[1]) {
                float a, b;
                int i;
-
                /*
                 * Nominal case for ms stereo: we do it before mdct.
                 *
                 * No need to optimize this case because it should almost never
                 * happen.
                 */
-               if (!s->channel_coded[0]) {
+               if (!pwd->channel_coded[0]) {
                        PARA_NOTICE_LOG("rare ms-stereo\n");
-                       memset(s->coefs[0], 0, sizeof(float) * s->block_len);
-                       s->channel_coded[0] = 1;
+                       memset(pwd->coefs[0], 0, sizeof(float) * pwd->block_len);
+                       pwd->channel_coded[0] = 1;
                }
-               for (i = 0; i < s->block_len; i++) {
-                       a = s->coefs[0][i];
-                       b = s->coefs[1][i];
-                       s->coefs[0][i] = a + b;
-                       s->coefs[1][i] = a - b;
+               for (i = 0; i < pwd->block_len; i++) {
+                       a = pwd->coefs[0][i];
+                       b = pwd->coefs[1][i];
+                       pwd->coefs[0][i] = a + b;
+                       pwd->coefs[1][i] = a - b;
                }
        }
-
 next:
-       for (ch = 0; ch < s->ahi.channels; ch++) {
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
                int n4, index;
 
-               n = s->block_len;
-               n4 = s->block_len / 2;
-               if (s->channel_coded[ch])
-                       imdct(s->mdct_ctx[bsize], s->output, s->coefs[ch]);
-               else if (!(s->ms_stereo && ch == 1))
-                       memset(s->output, 0, sizeof (s->output));
+               n = pwd->block_len;
+               n4 = pwd->block_len / 2;
+               if (pwd->channel_coded[ch])
+                       imdct(pwd->mdct_ctx[bsize], pwd->output, pwd->coefs[ch]);
+               else if (!(pwd->ms_stereo && ch == 1))
+                       memset(pwd->output, 0, sizeof(pwd->output));
 
                /* multiply by the window and add in the frame */
-               index = (s->frame_len / 2) + s->block_pos - n4;
-               wma_window(s, &s->frame_out[ch][index]);
+               index = (pwd->frame_len / 2) + pwd->block_pos - n4;
+               wma_window(pwd, &pwd->frame_out[ch][index]);
        }
 
        /* update block number */
-       s->block_num++;
-       s->block_pos += s->block_len;
-       if (s->block_pos >= s->frame_len)
+       pwd->block_pos += pwd->block_len;
+       if (pwd->block_pos >= pwd->frame_len)
                return 1;
        else
                return 0;
@@ -1123,186 +1034,189 @@ static inline int16_t av_clip_int16(int a)
 }
 
 /* Decode a frame of frame_len samples. */
-static int wma_decode_frame(struct private_wmadec_data *s, int16_t * samples)
+static int wma_decode_frame(struct private_wmadec_data *pwd, int16_t *samples)
 {
        int ret, i, n, ch, incr;
        int16_t *ptr;
        float *iptr;
 
        /* read each block */
-       s->block_num = 0;
-       s->block_pos = 0;
+       pwd->block_pos = 0;
        for (;;) {
-               ret = wma_decode_block(s);
+               ret = wma_decode_block(pwd);
                if (ret < 0)
-                       return -1;
+                       return ret;
                if (ret)
                        break;
        }
 
        /* convert frame to integer */
-       n = s->frame_len;
-       incr = s->ahi.channels;
-       for (ch = 0; ch < s->ahi.channels; ch++) {
+       n = pwd->frame_len;
+       incr = pwd->ahi.channels;
+       for (ch = 0; ch < pwd->ahi.channels; ch++) {
                ptr = samples + ch;
-               iptr = s->frame_out[ch];
+               iptr = pwd->frame_out[ch];
 
                for (i = 0; i < n; i++) {
                        *ptr = av_clip_int16(lrintf(*iptr++));
                        ptr += incr;
                }
                /* prepare for next block */
-               memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
-                       s->frame_len * sizeof (float));
+               memmove(&pwd->frame_out[ch][0], &pwd->frame_out[ch][pwd->frame_len],
+                       pwd->frame_len * sizeof(float));
        }
        return 0;
 }
 
-static int wma_decode_superframe(struct private_wmadec_data *s, void *data,
+static int wma_decode_superframe(struct private_wmadec_data *pwd, void *data,
                int *data_size, const uint8_t *buf, int buf_size)
 {
-       int ret, nb_frames, bit_offset, i, pos, len;
-       uint8_t *q;
+       int ret;
        int16_t *samples;
-       static int frame_count;
 
        if (buf_size == 0) {
-               s->last_superframe_len = 0;
+               pwd->last_superframe_len = 0;
                return 0;
        }
-       if (buf_size < s->ahi.block_align)
+       if (buf_size < pwd->ahi.block_align)
                return 0;
-       buf_size = s->ahi.block_align;
+       buf_size = pwd->ahi.block_align;
        samples = data;
-       init_get_bits(&s->gb, buf, buf_size * 8);
-       if (s->use_bit_reservoir) {
+       init_get_bits(&pwd->gb, buf, buf_size);
+       if (pwd->use_bit_reservoir) {
+               int i, nb_frames, bit_offset, pos, len;
+               uint8_t *q;
+
                /* read super frame header */
-               skip_bits(&s->gb, 4);   /* super frame index */
-               nb_frames = get_bits(&s->gb, 4) - 1;
+               skip_bits(&pwd->gb, 4); /* super frame index */
+               nb_frames = get_bits(&pwd->gb, 4) - 1;
                // PARA_DEBUG_LOG("have %d frames\n", nb_frames);
                ret = -E_WMA_OUTPUT_SPACE;
-               if ((nb_frames + 1) * s->ahi.channels * s->frame_len
+               if ((nb_frames + 1) * pwd->ahi.channels * pwd->frame_len
                                * sizeof(int16_t) > *data_size)
                        goto fail;
 
-               bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3);
+               bit_offset = get_bits(&pwd->gb, pwd->byte_offset_bits + 3);
 
-               if (s->last_superframe_len > 0) {
+               if (pwd->last_superframe_len > 0) {
                        /* add bit_offset bits to last frame */
                        ret = -E_WMA_BAD_SUPERFRAME;
-                       if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) >
+                       if ((pwd->last_superframe_len + ((bit_offset + 7) >> 3)) >
                                        MAX_CODED_SUPERFRAME_SIZE)
                                goto fail;
-                       q = s->last_superframe + s->last_superframe_len;
+                       q = pwd->last_superframe + pwd->last_superframe_len;
                        len = bit_offset;
                        while (len > 7) {
-                               *q++ = get_bits(&s->gb, 8);
+                               *q++ = get_bits(&pwd->gb, 8);
                                len -= 8;
                        }
-                       if (len > 0) {
-                               *q++ = get_bits(&s->gb, len) << (8 - len);
-                       }
+                       if (len > 0)
+                               *q++ = get_bits(&pwd->gb, len) << (8 - len);
 
                        /* XXX: bit_offset bits into last frame */
-                       init_get_bits(&s->gb, s->last_superframe,
-                               MAX_CODED_SUPERFRAME_SIZE * 8);
+                       init_get_bits(&pwd->gb, pwd->last_superframe,
+                               MAX_CODED_SUPERFRAME_SIZE);
                        /* skip unused bits */
-                       if (s->last_bitoffset > 0)
-                               skip_bits(&s->gb, s->last_bitoffset);
+                       if (pwd->last_bitoffset > 0)
+                               skip_bits(&pwd->gb, pwd->last_bitoffset);
                        /*
                         * This frame is stored in the last superframe and in
                         * the current one.
                         */
-                       ret = -E_WMA_DECODE;
-                       if (wma_decode_frame(s, samples) < 0)
+                       ret = wma_decode_frame(pwd, samples);
+                       if (ret < 0)
                                goto fail;
-                       frame_count++;
-                       samples += s->ahi.channels * s->frame_len;
+                       samples += pwd->ahi.channels * pwd->frame_len;
                }
 
                /* read each frame starting from bit_offset */
-               pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3;
-               init_get_bits(&s->gb, buf + (pos >> 3),
-                       (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3)) * 8);
+               pos = bit_offset + 4 + 4 + pwd->byte_offset_bits + 3;
+               init_get_bits(&pwd->gb, buf + (pos >> 3),
+                       (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3)));
                len = pos & 7;
                if (len > 0)
-                       skip_bits(&s->gb, len);
+                       skip_bits(&pwd->gb, len);
 
-               s->reset_block_lengths = 1;
+               pwd->reset_block_lengths = 1;
                for (i = 0; i < nb_frames; i++) {
-                       ret = -E_WMA_DECODE;
-                       if (wma_decode_frame(s, samples) < 0)
+                       ret = wma_decode_frame(pwd, samples);
+                       if (ret < 0)
                                goto fail;
-                       frame_count++;
-                       samples += s->ahi.channels * s->frame_len;
+                       samples += pwd->ahi.channels * pwd->frame_len;
                }
 
                /* we copy the end of the frame in the last frame buffer */
-               pos = get_bits_count(&s->gb) +
-                       ((bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7);
-               s->last_bitoffset = pos & 7;
+               pos = get_bits_count(&pwd->gb) +
+                       ((bit_offset + 4 + 4 + pwd->byte_offset_bits + 3) & ~7);
+               pwd->last_bitoffset = pos & 7;
                pos >>= 3;
                len = buf_size - pos;
                ret = -E_WMA_BAD_SUPERFRAME;
-               if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0) {
+               if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0)
                        goto fail;
-               }
-               s->last_superframe_len = len;
-               memcpy(s->last_superframe, buf + pos, len);
+               pwd->last_superframe_len = len;
+               memcpy(pwd->last_superframe, buf + pos, len);
        } else {
                PARA_DEBUG_LOG("not using bit reservoir\n");
                ret = -E_WMA_OUTPUT_SPACE;
-               if (s->ahi.channels * s->frame_len * sizeof(int16_t) > *data_size)
+               if (pwd->ahi.channels * pwd->frame_len * sizeof(int16_t) > *data_size)
                        goto fail;
                /* single frame decode */
-               ret = -E_WMA_DECODE;
-               if (wma_decode_frame(s, samples) < 0)
+               ret = wma_decode_frame(pwd, samples);
+               if (ret < 0)
                        goto fail;
-               frame_count++;
-               samples += s->ahi.channels * s->frame_len;
+               samples += pwd->ahi.channels * pwd->frame_len;
        }
-       PARA_DEBUG_LOG("frame_count: %d frame_len: %d, block_len: %d, "
-               "outbytes: %d, eaten: %d\n",
-               frame_count, s->frame_len, s->block_len,
-               (int8_t *) samples - (int8_t *) data, s->ahi.block_align);
+       PARA_DEBUG_LOG("frame_len: %d, block_len: %d, outbytes: %zd, eaten: %d\n",
+               pwd->frame_len, pwd->block_len,
+               (int8_t *) samples - (int8_t *) data, pwd->ahi.block_align);
        *data_size = (int8_t *)samples - (int8_t *)data;
-       return s->ahi.block_align;
+       return pwd->ahi.block_align;
 fail:
        /* reset the bit reservoir on errors */
-       s->last_superframe_len = 0;
+       pwd->last_superframe_len = 0;
        return ret;
 }
 
 static ssize_t wmadec_convert(char *inbuffer, size_t len,
                struct filter_node *fn)
 {
-       int ret, out_size = fn->bufsize - fn->loaded;
+       int ret, converted = 0;
        struct private_wmadec_data *pwd = fn->private_data;
 
-       if (out_size < 128 * 1024)
+       if (len <= WMA_FRAME_SKIP)
                return 0;
        if (!pwd) {
                ret = wma_decode_init(inbuffer, len, &pwd);
                if (ret <= 0)
                        return ret;
                fn->private_data = pwd;
+               fn->fc->channels = pwd->ahi.channels;
+               fn->fc->samplerate = pwd->ahi.sample_rate;
                return pwd->ahi.header_len;
        }
-       /* skip 31 bytes */
-       if (len <= WMA_FRAME_SKIP + pwd->ahi.block_align)
-               return 0;
-       ret = wma_decode_superframe(pwd, fn->buf + fn->loaded,
-               &out_size, (uint8_t *)inbuffer + WMA_FRAME_SKIP,
-               len - WMA_FRAME_SKIP);
-       if (ret < 0)
-               return ret;
-       fn->loaded += out_size;
-       return ret + WMA_FRAME_SKIP;
+       for (;;) {
+               int out_size;
+               if (converted + WMA_FRAME_SKIP + pwd->ahi.block_align > len)
+                       break;
+               out_size = fn->bufsize - fn->loaded;
+               if (out_size < 128 * 1024)
+                       break;
+               ret = wma_decode_superframe(pwd, fn->buf + fn->loaded,
+                       &out_size, (uint8_t *)inbuffer + converted + WMA_FRAME_SKIP,
+                       len - WMA_FRAME_SKIP);
+               if (ret < 0)
+                       return ret;
+               fn->loaded += out_size;
+               converted += ret + WMA_FRAME_SKIP;
+       }
+       return converted;
 }
 
 static void wmadec_close(struct filter_node *fn)
 {
        struct private_wmadec_data *pwd = fn->private_data;
+
        if (!pwd)
                return;
        wmadec_cleanup(pwd);