From 86b5aba882056a6ff6d8645684e59222ba74a818 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@tuebingen.mpg.de>
Date: Mon, 25 Nov 2019 21:22:35 +0100
Subject: [PATCH] Rework score formula.

Currently the two scales for the num_played and the last_played
components of the score value are computed independently of each
other. There is, however, a natural link between the two scales:
a file with best possible num_played value (zero) and worst possible
last_played value (now) should receive the average score zero.

This patch employs this idea to rescale the two components. See the
new comment to compute_score() for details about the implementation.
---
 mood.c | 123 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 98 insertions(+), 25 deletions(-)

diff --git a/mood.c b/mood.c
index a63d4d2a..a5d2b025 100644
--- a/mood.c
+++ b/mood.c
@@ -39,10 +39,16 @@ struct afs_statistics {
 	int64_t num_played_qd;
 	/** Quadratic deviation of last played time. */
 	int64_t last_played_qd;
+	/** Correction factor for the num played score. */
+	int64_t num_played_correction;
+	/** Correction factor for the last played score. */
+	int64_t last_played_correction;
+	/** Common divisor of the correction factors. */
+	int64_t normalization_divisor;
 	/** Number of admissible files */
 	unsigned num;
 };
-static struct afs_statistics statistics;
+static struct afs_statistics statistics = {.normalization_divisor = 1};
 
 /**
  * Each line of the current mood corresponds to a mood_item.
@@ -499,20 +505,59 @@ int mood_check_callback(struct afs_callback_arg *aca)
 		check_mood));
 }
 
-static int64_t normalized_value(int64_t x, int64_t n, int64_t sum, int64_t qd)
-{
-	if (!n || !qd)
-		return 0;
-	return 100 * (n * x - sum) / (int64_t)int_sqrt(n) / (int64_t)int_sqrt(qd);
-}
-
+/*
+ * The normalized num_played and last_played values are defined as
+ *
+ *	nn := -(np - mean_n) / sigma_n and nl := -(lp - mean_l) / sigma_l
+ *
+ *  For a (hypothetical) file with np = 0 and lp = now we thus have
+ *
+ *	nn =  mean_n / sigma_n =: hn > 0
+ *	nl = -(now - mean_l) / sigma_l =: hl < 0
+ *
+ * We design the score function so that both contributions get the same
+ * weight. Define the np and lp score of an arbitrary file as
+ *
+ *	sn := nn * -hl and sl := nl * hn
+ *
+ * Example:
+ *	num_played mean/sigma: 87/14
+ *	last_played mean/sigma: 45/32 days
+ *
+ *	We have hn = 87 / 14 = 6.21 and hl = -45 / 32 = -1.41. Multiplying
+ *	nn of every file with the correction factor 1.41 and nl with
+ *	6.21 makes the weight of the two contributions equal.
+ *
+ * The total score s := sn + sl has the representation
+ *
+ *	s = -cn * (np - mean_n) - cl * (lp - mean_l)
+ *
+ * with positive correction factors
+ *
+ *	cn = (now - mean_l) / (sqrt(ql) * sqrt(qn) / n)
+ *	cl = mean_n / (sqrt(ql) * sqrt(qn) / n)
+ *
+ * where ql and qn are the quadratic deviations stored in the statistics
+ * structure and n is the number of admissible files. To avoid integer
+ * overflows and rounding errors we store the common divisor of the
+ * correction factors separately.
+ */
 static long compute_score(struct afs_info *afsi, long mood_score)
 {
-	mood_score -= normalized_value(afsi->num_played, statistics.num,
-		statistics.num_played_sum, statistics.num_played_qd);
-	mood_score -= normalized_value(afsi->last_played, statistics.num,
-		statistics.last_played_sum, statistics.last_played_qd);
-	return mood_score / 3;
+	int64_t mean_n, mean_l,score_n, score_l;
+
+	assert(statistics.normalization_divisor > 0);
+	assert(statistics.num > 0);
+	mean_n = statistics.num_played_sum / statistics.num;
+	mean_l = statistics.last_played_sum / statistics.num;
+
+	score_n = -((int64_t)afsi->num_played - mean_n)
+		* statistics.num_played_correction
+		/ statistics.normalization_divisor;
+	score_l = -((int64_t)afsi->last_played - mean_l)
+		* statistics.last_played_correction
+		/ statistics.normalization_divisor;
+	return (mood_score + score_n + score_l) / 3;
 }
 
 static int add_afs_statistics(const struct osl_row *row)
@@ -556,6 +601,7 @@ static int del_afs_statistics(const struct osl_row *row)
 	assert(n);
 	if (n == 1) {
 		memset(&statistics, 0, sizeof(statistics));
+		statistics.normalization_divisor = 1;
 		return 1;
 	}
 
@@ -804,15 +850,11 @@ static int mood_update_audio_file(const struct osl_row *aft_row,
 	return score_update(aft_row, percent);
 }
 
-static void log_statistics(void)
+/* sse: seconds since epoch. */
+static void log_statistics(int64_t sse)
 {
 	unsigned n = statistics.num;
 	int mean_days, sigma_days;
-	/*
-	 * We can not use the "now" pointer from sched.c here because we are
-	 * called before schedule(), which initializes "now".
-	 */
-	struct timeval rnow;
 
 	assert(current_mood);
 	PARA_NOTICE_LOG("loaded mood %s\n", current_mood->name?
@@ -822,13 +864,18 @@ static void log_statistics(void)
 		return;
 	}
 	PARA_NOTICE_LOG("%u admissible files\n", statistics.num);
-	clock_get_realtime(&rnow);
-	mean_days = (rnow.tv_sec - statistics.last_played_sum / n) / 3600 / 24;
+	mean_days = (sse - statistics.last_played_sum / n) / 3600 / 24;
 	sigma_days = int_sqrt(statistics.last_played_qd / n) / 3600 / 24;
 	PARA_NOTICE_LOG("last_played mean/sigma: %d/%d days\n", mean_days, sigma_days);
-	PARA_NOTICE_LOG("num_played mean/sigma: %llu/%llu\n",
-		(long long unsigned)statistics.num_played_sum / n,
-		(long long unsigned)int_sqrt(statistics.num_played_qd / n));
+	PARA_NOTICE_LOG("num_played mean/sigma: %" PRId64 "/%" PRIu64 "\n",
+		statistics.num_played_sum / n,
+		int_sqrt(statistics.num_played_qd / n));
+	PARA_NOTICE_LOG("num_played correction factor: %" PRId64 "\n",
+		statistics.num_played_correction);
+	PARA_NOTICE_LOG("last_played correction factor: %" PRId64 "\n",
+		statistics.last_played_correction);
+	PARA_NOTICE_LOG("normalization divisor: %" PRId64 "\n",
+		statistics.normalization_divisor);
 }
 
 /**
@@ -841,6 +888,25 @@ void close_current_mood(void)
 	destroy_mood(current_mood);
 	current_mood = NULL;
 	memset(&statistics, 0, sizeof(statistics));
+	statistics.normalization_divisor = 1;
+}
+
+static void compute_correction_factors(int64_t sse)
+{
+	struct afs_statistics *s = &statistics;
+
+	if (s->num > 0) {
+		s->normalization_divisor = int_sqrt(s->last_played_qd)
+			* int_sqrt(s->num_played_qd) / s->num / 100;
+		s->num_played_correction = sse - s->last_played_sum / s->num;
+		s->last_played_correction = s->num_played_sum / s->num;
+	}
+	if (s->num_played_correction == 0)
+		s->num_played_correction = 1;
+	if (s->normalization_divisor == 0)
+		s->normalization_divisor = 1;
+	if (s->last_played_correction == 0)
+		s->last_played_correction = 1;
 }
 
 /**
@@ -869,6 +935,11 @@ int change_current_mood(const char *mood_name, char **errmsg)
 		.size = 0,
 		.array = NULL
 	};
+	/*
+	 * We can not use the "now" pointer from sched.c here because we are
+	 * called before schedule(), which initializes "now".
+	 */
+	struct timeval rnow;
 
 	if (mood_name) {
 		struct mood *m;
@@ -901,6 +972,9 @@ int change_current_mood(const char *mood_name, char **errmsg)
 			*errmsg = make_message("audio file loop failed");
 		return ret;
 	}
+	clock_get_realtime(&rnow);
+	compute_correction_factors(rnow.tv_sec);
+	log_statistics(rnow.tv_sec);
 	for (i = 0; i < statistics.num; i++) {
 		struct admissible_file_info *a = aa.array + i;
 		ret = add_to_score_table(a->aft_row, a->score);
@@ -911,7 +985,6 @@ int change_current_mood(const char *mood_name, char **errmsg)
 			goto out;
 		}
 	}
-	log_statistics();
 	ret = statistics.num;
 out:
 	free(aa.array);
-- 
2.39.5