rework score formula
authorAndre Noll <maan@tuebingen.mpg.de>
Mon, 25 Nov 2019 20:22:35 +0000 (21:22 +0100)
committerAndre Noll <maan@tuebingen.mpg.de>
Sun, 8 Dec 2019 09:03:44 +0000 (10:03 +0100)
mood.c

diff --git a/mood.c b/mood.c
index a63d4d2..919c765 100644 (file)
--- a/mood.c
+++ b/mood.c
@@ -39,10 +39,16 @@ struct afs_statistics {
        int64_t num_played_qd;
        /** Quadratic deviation of last played time. */
        int64_t last_played_qd;
+       /** Correction factor for the num played score. */
+       int64_t num_played_correction;
+       /** Correction factor for the last played score. */
+       int64_t last_played_correction;
+       /** Common divisor of the correction factors. */
+       int64_t normalization_divisor;
        /** Number of admissible files */
        unsigned num;
 };
-static struct afs_statistics statistics;
+static struct afs_statistics statistics = {.normalization_divisor = 1};
 
 /**
  * Each line of the current mood corresponds to a mood_item.
@@ -499,20 +505,78 @@ int mood_check_callback(struct afs_callback_arg *aca)
                check_mood));
 }
 
-static int64_t normalized_value(int64_t x, int64_t n, int64_t sum, int64_t qd)
-{
-       if (!n || !qd)
-               return 0;
-       return 100 * (n * x - sum) / (int64_t)int_sqrt(n) / (int64_t)int_sqrt(qd);
-}
+/*
+ * The normalized num_played and last_played values are defined as
+ *
+ *     nn := -(np - mean_n) / sigma_n and nl := -(lp - mean_l) / sigma_l
+ *
+ *  For a (hypothetical) file with np = 0 and lp = now we thus have
+ *
+ *     nn =  mean_n / sigma_n =: hn > 0
+ *     nl = -(now - mean_l) / sigma_l =: hl < 0
+ *
+ * We design the score function so that both contributions get the same
+ * weight. Define the np and lp score of an arbitrary file as
+ *
+ *     sn := nn * -hl and sl := nl * hn
+ *
+ * Example:
+ *     num_played mean/sigma: 87/14
+ *     last_played mean/sigma: 45/32 days
+ *
+ *     We have hn = 87 / 14 = 6.21 and hl = -45 / 32 = -1.41. Multiplying
+ *     nn of every file with the correction factor 1.41 and nl with
+ *     6.21 makes the weight of the two contributions equal.
+ *
+ * The total score s := sn + sl has the representation
+ *
+ *     s = -cn * (np - mean_n) - cl * (lp - mean_l)
+ *
+ * with positive correction factors
+ *
+ *     cn = (now - mean_l) / (sqrt(ql) * sqrt(qn) / n)
+ *     cl = mean_n / (sqrt(ql) * sqrt(qn) / n)
+ *
+ * where ql and qn are the quadratic deviations stored in the statistics
+ * structure and n is the number of admissible files. To avoid integer
+ * overflows and rounding errors we store the common divisor of the
+ * correction factors separately.
+ */
 
 static long compute_score(struct afs_info *afsi, long mood_score)
 {
-       mood_score -= normalized_value(afsi->num_played, statistics.num,
-               statistics.num_played_sum, statistics.num_played_qd);
-       mood_score -= normalized_value(afsi->last_played, statistics.num,
-               statistics.last_played_sum, statistics.last_played_qd);
-       return mood_score / 3;
+//     int64_t n, sqrt_n, mean_n, mean_l, sigma_n, sigma_l, score_n, score_l;
+       int64_t mean_n, mean_l,score_n, score_l;
+//     struct timeval rnow;
+//     clock_get_realtime(&rnow);
+
+//     n = statistics.num;
+//     sqrt_n = int_sqrt(n);
+       assert(statistics.normalization_divisor > 0);
+       assert(statistics.num > 0);
+       mean_n = statistics.num_played_sum / statistics.num;
+       mean_l = statistics.last_played_sum / statistics.num;
+//     sigma_n = int_sqrt(statistics.num_played_qd) / sqrt_n;
+//     if (sigma_n == 0)
+//             sigma_n = 1;
+//     sigma_l = int_sqrt(statistics.last_played_qd) / sqrt_n;
+//     if (sigma_l == 0)
+//             sigma_l = 1;
+//     score_l = (rnow.tv_sec - mean_l) / sigma_l;
+//     score_n = mean_n / sigma_n;
+
+       score_n = -((int64_t)afsi->num_played - mean_n)
+               * statistics.num_played_correction
+               / statistics.normalization_divisor;
+//     score_n = -(int64_t)100 * (afsi->num_played - mean_n) / sigma_n * (rnow.tv_sec - mean_l) / sigma_l;
+//     PARA_CRIT_LOG("this score nold: %lli\n", score_n);
+       score_l = -((int64_t)afsi->last_played - mean_l)
+               * statistics.last_played_correction
+               / statistics.normalization_divisor;
+//     PARA_CRIT_LOG("this score lnew: %lli\n", score_l);
+//     score_l = -(int64_t)100 * ((int64_t)afsi->last_played - mean_l) / sigma_l * mean_n / sigma_n;
+//     PARA_CRIT_LOG("this score lold: %lli\n", score_l);
+       return (mood_score + score_n + score_l) / 3;
 }
 
 static int add_afs_statistics(const struct osl_row *row)
@@ -556,6 +620,7 @@ static int del_afs_statistics(const struct osl_row *row)
        assert(n);
        if (n == 1) {
                memset(&statistics, 0, sizeof(statistics));
+               statistics.normalization_divisor = 1;
                return 1;
        }
 
@@ -804,15 +869,11 @@ static int mood_update_audio_file(const struct osl_row *aft_row,
        return score_update(aft_row, percent);
 }
 
-static void log_statistics(void)
+/* sse: seconds since epoch. */
+static void log_statistics(int64_t sse)
 {
        unsigned n = statistics.num;
        int mean_days, sigma_days;
-       /*
-        * We can not use the "now" pointer from sched.c here because we are
-        * called before schedule(), which initializes "now".
-        */
-       struct timeval rnow;
 
        assert(current_mood);
        PARA_NOTICE_LOG("loaded mood %s\n", current_mood->name?
@@ -822,13 +883,18 @@ static void log_statistics(void)
                return;
        }
        PARA_NOTICE_LOG("%u admissible files\n", statistics.num);
-       clock_get_realtime(&rnow);
-       mean_days = (rnow.tv_sec - statistics.last_played_sum / n) / 3600 / 24;
+       mean_days = (sse - statistics.last_played_sum / n) / 3600 / 24;
        sigma_days = int_sqrt(statistics.last_played_qd / n) / 3600 / 24;
        PARA_NOTICE_LOG("last_played mean/sigma: %d/%d days\n", mean_days, sigma_days);
-       PARA_NOTICE_LOG("num_played mean/sigma: %llu/%llu\n",
-               (long long unsigned)statistics.num_played_sum / n,
-               (long long unsigned)int_sqrt(statistics.num_played_qd / n));
+       PARA_NOTICE_LOG("num_played mean/sigma: %" PRId64 "/%" PRIu64 "\n",
+               statistics.num_played_sum / n,
+               int_sqrt(statistics.num_played_qd / n));
+       PARA_NOTICE_LOG("num_played correction factor: %" PRId64 "\n",
+               statistics.num_played_correction);
+       PARA_NOTICE_LOG("last_played correction factor: %" PRId64 "\n",
+               statistics.last_played_correction);
+       PARA_NOTICE_LOG("normalization divisor: %" PRId64 "\n",
+               statistics.normalization_divisor);
 }
 
 /**
@@ -841,6 +907,7 @@ void close_current_mood(void)
        destroy_mood(current_mood);
        current_mood = NULL;
        memset(&statistics, 0, sizeof(statistics));
+       statistics.normalization_divisor = 1;
 }
 
 /**
@@ -869,6 +936,11 @@ int change_current_mood(const char *mood_name, char **errmsg)
                .size = 0,
                .array = NULL
        };
+       /*
+        * We can not use the "now" pointer from sched.c here because we are
+        * called before schedule(), which initializes "now".
+        */
+       struct timeval rnow;
 
        if (mood_name) {
                struct mood *m;
@@ -901,6 +973,21 @@ int change_current_mood(const char *mood_name, char **errmsg)
                        *errmsg = make_message("audio file loop failed");
                return ret;
        }
+       /* compute correction factors for score function */
+       statistics.normalization_divisor = int_sqrt(statistics.last_played_qd)
+               * int_sqrt(statistics.num_played_qd) / statistics.num / 100;
+       if (statistics.normalization_divisor == 0)
+               statistics.normalization_divisor = 1;
+       clock_get_realtime(&rnow);
+       statistics.num_played_correction =
+               (int64_t)rnow.tv_sec - statistics.last_played_sum / statistics.num;
+       if (statistics.num_played_correction == 0)
+               statistics.num_played_correction = 1;
+       statistics.last_played_correction =
+               statistics.num_played_sum / statistics.num;
+       if (statistics.last_played_correction == 0)
+               statistics.last_played_correction = 1;
+       log_statistics(rnow.tv_sec);
        for (i = 0; i < statistics.num; i++) {
                struct admissible_file_info *a = aa.array + i;
                ret = add_to_score_table(a->aft_row, a->score);
@@ -911,7 +998,6 @@ int change_current_mood(const char *mood_name, char **errmsg)
                        goto out;
                }
        }
-       log_statistics();
        ret = statistics.num;
 out:
        free(aa.array);