From: Andre Noll <maan@tuebingen.mpg.de>
Date: Sat, 24 Aug 2019 11:38:29 +0000 (+0200)
Subject: Merge branch 'refs/heads/t/compress'
X-Git-Tag: v0.6.3~42
X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=commitdiff_plain;h=742be1f7334570492615fdf89ce46123e3f71886;hp=a1c4cc4b26f72c19e12100d47a9da3c449aab3e7

Merge branch 'refs/heads/t/compress'

A short series which overhauls the algorithm behind the compress
filter and its documentation.

Cooking for almost a year.

* refs/heads/t/compress:
  compress: Overhaul the meaning of --aggressiveness.
  compress: Apply damping later.
  compress: Warn when samples are clipped.
  compress: Document and sanity-check command line options.
  compress: Fix off by one in help of --target-level.
---

diff --git a/NEWS.md b/NEWS.md
index d8813b3f..50ec0a3a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,9 +10,11 @@ NEWS
   stream. The old syntax (e.g., "ff 30-") is still supported but it
   is deprecated and no longer documented. The compatibility code is
   sheduled for removal after 0.7.0.
-
 - para_afh: New option: --preserve to reset the modification time to
   the value of the original file after meta data modification.
+- Overhaul of the compress filter code. The refined algorithm should
+  reduce clipping. The meaning of --aggressiveness has changed, see the
+  updated and extended documentation of the compress filter for details.
 
 --------------------------------------
 0.6.2 (2018-06-30) "elastic diversity"
diff --git a/compress_filter.c b/compress_filter.c
index 69a982ef..15bed6df 100644
--- a/compress_filter.c
+++ b/compress_filter.c
@@ -48,8 +48,7 @@ static int compress_post_select(__a_unused struct sched *s, void *context)
 	size_t length, i;
 	int16_t *ip, *op;
 	uint32_t inertia = U32_OPTVAL(INERTIA, fn->lpr);
-	unsigned gain_shift = inertia + U32_OPTVAL(DAMP, fn->lpr),
-		mask = (1U << U32_OPTVAL(BLOCKSIZE, fn->lpr)) - 1U;
+	unsigned mask = (1U << U32_OPTVAL(BLOCKSIZE, fn->lpr)) - 1U;
 	//inplace = false;
 next_buffer:
 	ret = btr_node_status(btrn, fn->min_iqs, BTR_NT_INTERNAL);
@@ -78,19 +77,22 @@ next_buffer:
 			neg = true;
 		}
 		sample *= pcd->current_gain;
-		sample >>= gain_shift;
+		sample >>= inertia + 1;
 		if (sample > 32767) { /* clip */
+			PARA_WARNING_LOG("clip: %d\n", sample);
 			sample = 32767;
 			pcd->current_gain = (3 * pcd->current_gain +
 				(1 << inertia)) / 4;
 			pcd->peak = 0;
 		} else if (sample > pcd->peak)
 			pcd->peak = sample;
+		sample >>= U32_OPTVAL(DAMP, fn->lpr);
 		op[i] = neg? -sample : sample;
 		if (++pcd->num_samples & mask)
 			continue;
 //		PARA_DEBUG_LOG("gain: %u, peak: %u\n", pcd->current_gain,
 //			pcd->peak);
+
 		if (pcd->peak < U32_OPTVAL(TARGET_LEVEL, fn->lpr)) {
 			if (pcd->current_gain < pcd->max_gain)
 				pcd->current_gain++;
@@ -121,10 +123,43 @@ static void compress_open(struct filter_node *fn)
 	fn->private_data = pcd;
 	fn->min_iqs = 2; /* 16 bit audio */
 	pcd->current_gain = 1U << inertia;
-	pcd->max_gain = 1U << (inertia + aggressiveness);
+	pcd->max_gain = (1U << inertia) * (1.0 + 3.0 * aggressiveness / 10.0);
+}
+
+static void *compress_setup(const struct lls_parse_result *lpr)
+{
+	uint32_t val;
+
+	val = U32_OPTVAL(BLOCKSIZE, lpr);
+	if (val == 0 || val > 31) {
+		PARA_EMERG_LOG("blocksize (%u) out of range\n", val);
+		exit(EXIT_FAILURE);
+	}
+	val = U32_OPTVAL(AGGRESSIVENESS, lpr);
+	if (val > 10) {
+		PARA_EMERG_LOG("aggressiveness (%u) out of range\n", val);
+		exit(EXIT_FAILURE);
+	}
+	val = U32_OPTVAL(INERTIA, lpr);
+	if (val == 0 || val > 14) {
+		PARA_EMERG_LOG("inertia (%u) out of range\n", val);
+		exit(EXIT_FAILURE);
+	}
+	val = U32_OPTVAL(TARGET_LEVEL, lpr);
+	if (val > 32767) {
+		PARA_EMERG_LOG("target-level (%u) out of range\n", val);
+		exit(EXIT_FAILURE);
+	}
+	val = U32_OPTVAL(DAMP, lpr);
+	if (val > 16) {
+		PARA_EMERG_LOG("damp (%u) out of range\n", val);
+		exit(EXIT_FAILURE);
+	}
+	return NULL; /* no need for a config structure */
 }
 
 const struct filter lsg_filter_cmd_com_compress_user_data = {
+	.setup = compress_setup,
 	.open = compress_open,
 	.close = compress_close,
 	.pre_select = generic_filter_pre_select,
diff --git a/m4/lls/filter_cmd.suite.m4 b/m4/lls/filter_cmd.suite.m4
index d269d237..c026a628 100644
--- a/m4/lls/filter_cmd.suite.m4
+++ b/m4/lls/filter_cmd.suite.m4
@@ -25,7 +25,7 @@ caption = filters
 	purpose = dynamically adjust the volume of an audio stream
 	[option blocksize]
 		short_opt = b
-		summary = use blocks of size 2**bits
+		summary = adjust volume after each block of size 2**bits (1-31)
 		typestr = bits
 		arg_info = required_arg
 		arg_type = uint32
@@ -35,32 +35,52 @@ caption = filters
 		[/help]
 	[option aggressiveness]
 		short_opt = a
-		summary = controls the maximum amount to amplify by
+		summary = controls the maximum amount to amplify by (0-10)
 		typestr = bits
 		arg_info = required_arg
 		arg_type = uint32
 		default_val = 4
+		[help]
+			This controls the maximal gain factor. Zero means to not amplify
+			at all while the value 10 corresponds to maximal gain factor which
+			results in a 4-fold increase in volume.
+		[/help]
 	[option inertia]
 		short_opt = i
-		summary = how much inertia ramping has
+		summary = how much inertia ramping has (1-14)
 		typestr = bits
 		arg_info = required_arg
 		arg_type = uint32
 		default_val = 6
+		[help]
+			Larger values cause smaller volume adjustments.
+		[/help]
 	[option target-level]
 		short_opt = t
-		summary = target signal level (0-32768)
+		summary = target signal level (0-32767)
 		typestr = level
 		arg_info = required_arg
 		arg_type = uint32
-		default_val = 20000
+		default_val = 16384
+		[help]
+			If the peak of the previous block is less than the target level,
+			volume is increased slightly for the next block. Otherwise it is
+			decreased. The default value is chosen to minimize clipping. There
+			is usually no reason to change it.
+		[/help]
 	[option damp]
 		short_opt = d
-		summary = if non-zero, scale down after normalizing
+		summary = if non-zero, scale down after normalizing (0-16)
 		typestr = bits
 		arg_info = required_arg
 		arg_type = uint32
 		default_val = 0
+		[help]
+			This scales down the volume of the audio stream by factor 2**bits.
+			This is mostly useful if another audio application (e.g., a video
+			game) is running in parallel and the relative volume of the audio
+			stream is too high.
+		[/help]
 [subcommand fecdec]
 	purpose = decode a (lossy) input stream using forward error correction
 [subcommand flacdec]