Merge branch 'refs/heads/t/compress'
authorAndre Noll <maan@tuebingen.mpg.de>
Sat, 24 Aug 2019 11:38:29 +0000 (13:38 +0200)
committerAndre Noll <maan@tuebingen.mpg.de>
Sat, 24 Aug 2019 11:39:16 +0000 (13:39 +0200)
A short series which overhauls the algorithm behind the compress
filter and its documentation.

Cooking for almost a year.

* refs/heads/t/compress:
  compress: Overhaul the meaning of --aggressiveness.
  compress: Apply damping later.
  compress: Warn when samples are clipped.
  compress: Document and sanity-check command line options.
  compress: Fix off by one in help of --target-level.

NEWS.md
compress_filter.c
m4/lls/filter_cmd.suite.m4

diff --git a/NEWS.md b/NEWS.md
index d8813b3..50ec0a3 100644 (file)
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,9 +10,11 @@ NEWS
   stream. The old syntax (e.g., "ff 30-") is still supported but it
   is deprecated and no longer documented. The compatibility code is
   sheduled for removal after 0.7.0.
-
 - para_afh: New option: --preserve to reset the modification time to
   the value of the original file after meta data modification.
+- Overhaul of the compress filter code. The refined algorithm should
+  reduce clipping. The meaning of --aggressiveness has changed, see the
+  updated and extended documentation of the compress filter for details.
 
 --------------------------------------
 0.6.2 (2018-06-30) "elastic diversity"
index 69a982e..15bed6d 100644 (file)
@@ -48,8 +48,7 @@ static int compress_post_select(__a_unused struct sched *s, void *context)
        size_t length, i;
        int16_t *ip, *op;
        uint32_t inertia = U32_OPTVAL(INERTIA, fn->lpr);
-       unsigned gain_shift = inertia + U32_OPTVAL(DAMP, fn->lpr),
-               mask = (1U << U32_OPTVAL(BLOCKSIZE, fn->lpr)) - 1U;
+       unsigned mask = (1U << U32_OPTVAL(BLOCKSIZE, fn->lpr)) - 1U;
        //inplace = false;
 next_buffer:
        ret = btr_node_status(btrn, fn->min_iqs, BTR_NT_INTERNAL);
@@ -78,19 +77,22 @@ next_buffer:
                        neg = true;
                }
                sample *= pcd->current_gain;
-               sample >>= gain_shift;
+               sample >>= inertia + 1;
                if (sample > 32767) { /* clip */
+                       PARA_WARNING_LOG("clip: %d\n", sample);
                        sample = 32767;
                        pcd->current_gain = (3 * pcd->current_gain +
                                (1 << inertia)) / 4;
                        pcd->peak = 0;
                } else if (sample > pcd->peak)
                        pcd->peak = sample;
+               sample >>= U32_OPTVAL(DAMP, fn->lpr);
                op[i] = neg? -sample : sample;
                if (++pcd->num_samples & mask)
                        continue;
 //             PARA_DEBUG_LOG("gain: %u, peak: %u\n", pcd->current_gain,
 //                     pcd->peak);
+
                if (pcd->peak < U32_OPTVAL(TARGET_LEVEL, fn->lpr)) {
                        if (pcd->current_gain < pcd->max_gain)
                                pcd->current_gain++;
@@ -121,10 +123,43 @@ static void compress_open(struct filter_node *fn)
        fn->private_data = pcd;
        fn->min_iqs = 2; /* 16 bit audio */
        pcd->current_gain = 1U << inertia;
-       pcd->max_gain = 1U << (inertia + aggressiveness);
+       pcd->max_gain = (1U << inertia) * (1.0 + 3.0 * aggressiveness / 10.0);
+}
+
+static void *compress_setup(const struct lls_parse_result *lpr)
+{
+       uint32_t val;
+
+       val = U32_OPTVAL(BLOCKSIZE, lpr);
+       if (val == 0 || val > 31) {
+               PARA_EMERG_LOG("blocksize (%u) out of range\n", val);
+               exit(EXIT_FAILURE);
+       }
+       val = U32_OPTVAL(AGGRESSIVENESS, lpr);
+       if (val > 10) {
+               PARA_EMERG_LOG("aggressiveness (%u) out of range\n", val);
+               exit(EXIT_FAILURE);
+       }
+       val = U32_OPTVAL(INERTIA, lpr);
+       if (val == 0 || val > 14) {
+               PARA_EMERG_LOG("inertia (%u) out of range\n", val);
+               exit(EXIT_FAILURE);
+       }
+       val = U32_OPTVAL(TARGET_LEVEL, lpr);
+       if (val > 32767) {
+               PARA_EMERG_LOG("target-level (%u) out of range\n", val);
+               exit(EXIT_FAILURE);
+       }
+       val = U32_OPTVAL(DAMP, lpr);
+       if (val > 16) {
+               PARA_EMERG_LOG("damp (%u) out of range\n", val);
+               exit(EXIT_FAILURE);
+       }
+       return NULL; /* no need for a config structure */
 }
 
 const struct filter lsg_filter_cmd_com_compress_user_data = {
+       .setup = compress_setup,
        .open = compress_open,
        .close = compress_close,
        .pre_select = generic_filter_pre_select,
index d269d23..c026a62 100644 (file)
@@ -25,7 +25,7 @@ caption = filters
        purpose = dynamically adjust the volume of an audio stream
        [option blocksize]
                short_opt = b
-               summary = use blocks of size 2**bits
+               summary = adjust volume after each block of size 2**bits (1-31)
                typestr = bits
                arg_info = required_arg
                arg_type = uint32
@@ -35,32 +35,52 @@ caption = filters
                [/help]
        [option aggressiveness]
                short_opt = a
-               summary = controls the maximum amount to amplify by
+               summary = controls the maximum amount to amplify by (0-10)
                typestr = bits
                arg_info = required_arg
                arg_type = uint32
                default_val = 4
+               [help]
+                       This controls the maximal gain factor. Zero means to not amplify
+                       at all while the value 10 corresponds to maximal gain factor which
+                       results in a 4-fold increase in volume.
+               [/help]
        [option inertia]
                short_opt = i
-               summary = how much inertia ramping has
+               summary = how much inertia ramping has (1-14)
                typestr = bits
                arg_info = required_arg
                arg_type = uint32
                default_val = 6
+               [help]
+                       Larger values cause smaller volume adjustments.
+               [/help]
        [option target-level]
                short_opt = t
-               summary = target signal level (0-32768)
+               summary = target signal level (0-32767)
                typestr = level
                arg_info = required_arg
                arg_type = uint32
-               default_val = 20000
+               default_val = 16384
+               [help]
+                       If the peak of the previous block is less than the target level,
+                       volume is increased slightly for the next block. Otherwise it is
+                       decreased. The default value is chosen to minimize clipping. There
+                       is usually no reason to change it.
+               [/help]
        [option damp]
                short_opt = d
-               summary = if non-zero, scale down after normalizing
+               summary = if non-zero, scale down after normalizing (0-16)
                typestr = bits
                arg_info = required_arg
                arg_type = uint32
                default_val = 0
+               [help]
+                       This scales down the volume of the audio stream by factor 2**bits.
+                       This is mostly useful if another audio application (e.g., a video
+                       game) is running in parallel and the relative volume of the audio
+                       stream is too high.
+               [/help]
 [subcommand fecdec]
        purpose = decode a (lossy) input stream using forward error correction
 [subcommand flacdec]