--- /dev/null
+/*
+ * Copyright (C) 2017 Andre Noll <maan@tuebingen.mpg.de>
+ *
+ * Licensed under the GPL v2. For licencing details see COPYING.
+ */
+
+/**
+ * \file mp.c Mood parser helper functions.
+ *
+ * This file contains the public and the private API of the flex/bison based
+ * mood parser.
+ *
+ * The public API (at the bottom of the file) allows to parse the same mood
+ * definition many times in an efficient manner.
+ *
+ * The first function to all is \ref mp_init(), which analyzes the given mood
+ * definition syntactically. It returns the abstract syntax tree of the mood
+ * definition and pre-compiles all regular expression patterns to make later
+ * pattern matching efficient.
+ *
+ * Semantic analysis is performed in \ref mp_eval_row(). This function is
+ * called from \ref mood.c once for each file in the audio file table. It
+ * utilizes the abstract syntax tree and the pre-compiled regular expressions
+ * to determine the set of admissible audio files.
+ *
+ * If the mood is no longer needed, \ref mp_shutdown() should be called to free
+ * the resources.
+ *
+ * The internal API is described in \ref mp.h.
+ */
+
+#include "para.h"
+
+#include <regex.h>
+#include <fnmatch.h>
+#include <osl.h>
+#include <lopsub.h>
+
+#include "string.h"
+#include "error.h"
+#include "afh.h"
+#include "afs.h"
+#include "mp.h"
+#include "mp.bison.h"
+
+struct mp_context {
+ /* global context */
+ char *errmsg;
+ struct mp_ast_node *ast;
+ /* per audio file context */
+ const struct osl_row *aft_row;
+ char *path;
+ bool have_afsi;
+ struct afs_info afsi;
+ bool have_afhi;
+ struct afh_info afhi;
+};
+
+/**
+ * Parse a (generalized) string literal.
+ *
+ * \param src The string to parse.
+ * \param quote_chars Opening and closing quote characters.
+ * \param result The corresponding C string is returned here.
+ *
+ * This function turns a generalized C99 string literal like "xyz\n" into a C
+ * string (containing the three characters 'x', 'y' and 'z', followed by a
+ * newline character and the terminating zero byte). The function allows to
+ * specify different quote characters so that, for example, regular expression
+ * patterns enclosed in '/' can be parsed as well. To parse a proper string
+ * literal, one has to pass two double quotes as the second argument.
+ *
+ * The function strips off the opening and leading quote characters, replaces
+ * double backslashes by single backslashes and handles the usual escapes like
+ * \n and \".
+ *
+ * The caller must make sure that the input is well-formed. The function simply
+ * aborts if the input is not a valid C99 string literal (modulo the quote
+ * characters).
+ *
+ * \return Offset of the first character after the closing quote. For proper
+ * string literals this will be the terminating zero byte of the input string,
+ * for regular expression patterns it is the beginning of the flags which
+ * modify the matching behaviour.
+ *
+ * \sa \ref mp_parse_regex_pattern(), \ref mp_parse_wildcard_pattern().
+ */
+unsigned parse_quoted_string(const char *src, const char quote_chars[2],
+ char **result)
+{
+ size_t n, len = strlen(src);
+ char *dst, *p;
+ bool backslash;
+
+ assert(len >= 2);
+ assert(src[0] == quote_chars[0]);
+ p = dst = para_malloc(len - 1);
+ backslash = false;
+ for (n = 1;; n++) {
+ char c;
+ assert(n < len);
+ c = src[n];
+ if (!backslash) {
+ if (c == '\\') {
+ backslash = true;
+ continue;
+ }
+ if (c == quote_chars[1])
+ break;
+ *p++ = c;
+ continue;
+ }
+ if (c == quote_chars[1])
+ *p++ = quote_chars[1];
+ else switch (c) {
+ case '\\': *p++ = '\\'; break;
+ case 'a': *p++ = '\a'; break;
+ case 'b': *p++ = '\b'; break;
+ case 'f': *p++ = '\f'; break;
+ case 'n': *p++ = '\n'; break;
+ case 'r': *p++ = '\r'; break;
+ case 't': *p++ = '\t'; break;
+ case 'v': *p++ = '\v'; break;
+ default: assert(false);
+ }
+ backslash = false;
+ }
+ assert(src[n] == quote_chars[1]);
+ *p = '\0';
+ *result = dst;
+ return n + 1;
+}
+
+/**
+ * Parse and compile an extended regular expression pattern, including flags.
+ *
+ * \param src The pattern to parse.
+ * \param result C-string and flags are returned here.
+ *
+ * A regex pattern is identical to a C99 string literal except (a) it is
+ * enclosed in '/' characters rather than double quotes, (b) double quote
+ * characters which are part of the pattern do not need to be quoted with
+ * backslashes, but slashes must be quoted in this way, and (c) the closing
+ * slash may be followed by one or more flag characters which modify the
+ * matching behaviour.
+ *
+ * The only flags which are currently supported are 'i' to ignore case in match
+ * (REG_ICASE) and 'n' to change the handling of newline characters
+ * (REG_NEWLINE).
+ *
+ * \return Standard. This function calls \ref parse_quoted_string(), hence it
+ * aborts if the input string is malformed. However, errors from \ref
+ * para_regcomp are returned without aborting the process. The rationale behind
+ * this difference is that passing a malformed string must be considered an
+ * implementation bug because malformed strings should be rejected earlier by
+ * the lexer.
+ *
+ * \sa \ref mp_parse_wildcard_pattern(), \ref parse_quoted_string(),
+ * \ref para_regcomp(), regex(3).
+ */
+int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result)
+{
+ int ret;
+ char *pat;
+ unsigned n = parse_quoted_string(src, "//", &pat);
+
+ result->flags = 0;
+ for (; src[n]; n++) {
+ switch (src[n]) {
+ case 'i': result->flags |= REG_ICASE; break;
+ case 'n': result->flags |= REG_NEWLINE; break;
+ default: assert(false);
+ }
+ }
+ ret = para_regcomp(&result->preg, pat, result->flags);
+ free(pat);
+ return ret;
+}
+
+/**
+ * Parse a wildcard pattern, including flags.
+ *
+ * \param src The pattern to parse.
+ * \param result C-string and flags are returned here.
+ *
+ * This function parses a shell wildcard pattern. It is similar to \ref
+ * mp_parse_regex_pattern(), so the remarks mentioned there apply to this
+ * function as well.
+ *
+ * Wildcard patterns differ from regular expression patterns in that (a) they
+ * must be enclosed in '|' characters, (b) they support different flags for
+ * modifying matching behaviour, and (c) there is no cache for them.
+ *
+ * The following flags, whose meaning is explained in fnmatch(3), are currently
+ * supported: 'n' (FNM_NOESCAPE), 'p' (FNM_PATHNAME), 'P' (FNM_PERIOD), 'l'
+ * (FNM_LEADING_DIR), 'i' (FNM_CASEFOLD), 'e' (FNM_EXTMATCH). The last flag is
+ * a GNU extension. It is silently ignored on non GNU systems.
+ *
+ * \sa \ref parse_quoted_string(), \ref mp_parse_regex_pattern(), fnmatch(3).
+ */
+void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result)
+{
+ unsigned n = parse_quoted_string(src, "||", &result->pat);
+
+ result->flags = 0;
+ for (; src[n]; n++) {
+ switch (src[n]) {
+ case 'n': result->flags |= FNM_NOESCAPE; break;
+ case 'p': result->flags |= FNM_PATHNAME; break;
+ case 'P': result->flags |= FNM_PERIOD; break;
+ /* not POSIX, but both FreeBSD and NetBSD have it */
+ case 'l': result->flags |= FNM_LEADING_DIR; break;
+ case 'i': result->flags |= FNM_CASEFOLD; break;
+ /* GNU only */
+#ifdef HAVE_FNM_EXTMATCH
+ case 'e': result->flags |= FNM_EXTMATCH; break;
+#else /* silently ignore extglob flag */
+ case 'e': break;
+#endif
+ default: assert(false);
+ }
+ }
+}
+
+/**
+ * Set the error bit in the parser context and log a message.
+ *
+ * \param line The number of the input line which caused the error.
+ * \param ctx Contains the error bit.
+ * \param fmt Usual format string.
+ *
+ * This is called if the lexer or the parser detect an error in the mood
+ * definition. Only the first error is logged (with a severity of "warn").
+ */
+__printf_3_4 void mp_parse_error(int line, struct mp_context *ctx,
+ const char *fmt, ...)
+{
+ va_list ap;
+ char *tmp;
+
+ if (ctx->errmsg) /* we already printed an error message */
+ return;
+ va_start(ap, fmt);
+ xvasprintf(&tmp, fmt, ap);
+ va_end(ap);
+ xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
+ free(tmp);
+ PARA_WARNING_LOG("%s\n", ctx->errmsg);
+}
+
+static int get_afsi(struct mp_context *ctx)
+{
+ int ret;
+
+ if (ctx->have_afsi)
+ return 0;
+ ret = get_afsi_of_row(ctx->aft_row, &ctx->afsi);
+ if (ret < 0)
+ return ret;
+ ctx->have_afsi = true;
+ return 1;
+}
+
+static int get_afhi(struct mp_context *ctx)
+{
+ int ret;
+
+ if (ctx->have_afhi)
+ return 0;
+ ret = get_afhi_of_row(ctx->aft_row, &ctx->afhi);
+ if (ret < 0)
+ return ret;
+ ctx->have_afhi = true;
+ return 1;
+}
+
+/**
+ * Return the full path to the audio file.
+ *
+ * \param ctx Contains a reference to the row of the audio file table which
+ * corresponds to the current audio file. The path of the audio file, the
+ * afs_info and the afh_info structures (which contain the tag information) can
+ * be retrieved through this reference.
+ *
+ * \return A reference to the path. Must not be freed by the caller.
+ *
+ * \sa \ref get_audio_file_path_of_row().
+ */
+char *mp_path(struct mp_context *ctx)
+{
+ if (!ctx->path)
+ get_audio_file_path_of_row(ctx->aft_row, &ctx->path);
+ return ctx->path;
+}
+
+/**
+ * Check whether the given attribute is set for the current audio file.
+ *
+ * \param attr The string to look up in the attribute table.
+ * \param ctx See \ref mp_path().
+ *
+ * First, determine the bit number which corresponds to the attribute, then
+ * check if this bit is set in the ->attributes field of the afs_info structure
+ * of the audio file.
+ *
+ * \return True if the attribute is set, false if it is not. On errors, for
+ * example if the given string is no attribute, the function returns false.
+ *
+ * \sa \ref get_attribute_bitnum_by_name().
+ */
+bool mp_is_set(const char *attr, struct mp_context *ctx)
+{
+ int ret;
+ unsigned char bitnum;
+ const uint64_t one = 1;
+
+ ret = get_attribute_bitnum_by_name(attr, &bitnum);
+ if (ret < 0) /* treat invalid attributes as not set */
+ return false;
+ ret = get_afsi(ctx);
+ if (ret < 0)
+ return false;
+ return (one << bitnum) & ctx->afsi.attributes;
+}
+
+/**
+ * Count the number of attributes set.
+ *
+ * \param ctx See \ref mp_path().
+ *
+ * \return The number of bits which are set in the ->attributes field of the
+ * afs_info structure of the current audio file.
+ */
+int64_t mp_num_attributes_set(struct mp_context *ctx)
+{
+ const uint64_t m = ~(uint64_t)0;
+ int ret;
+ uint64_t v;
+
+ ret = get_afsi(ctx);
+ if (ret < 0)
+ return 0;
+
+ v = ctx->afsi.attributes;
+ /* taken from https://graphics.stanford.edu/~seander/bithacks.html */
+ v = v - ((v >> 1) & m / 3);
+ v = (v & m / 15 * 3) + ((v >> 2) & m / 15 * 3);
+ v = (v + (v >> 4)) & m / 255 * 15;
+ v = (v * (m / 255)) >> 56;
+ assert(v <= 64);
+ return v;
+}
+
+/**
+ * Define a function which returns a field of the afs_info structure.
+ *
+ * \param _name The name of the field.
+ *
+ * The defined function casts the value to int64_t. On errors, zero is returned.
+ */
+#define MP_AFSI(_name) \
+ int64_t mp_ ## _name(struct mp_context *ctx) \
+ { \
+ int ret = get_afsi(ctx); \
+ if (ret < 0) \
+ return 0; \
+ return ctx->afsi._name; \
+ }
+/** \cond MP_AFSI */
+MP_AFSI(num_played)
+MP_AFSI(image_id)
+MP_AFSI(lyrics_id)
+/** \endcond */
+
+/**
+ * Define a function which returns a field of the afh_info structure.
+ *
+ * \param _name The name of the field.
+ *
+ * The defined function casts the value to int64_t. On errors, zero is returned.
+ */
+#define MP_AFHI(_name) \
+ int64_t mp_ ## _name(struct mp_context *ctx) \
+ { \
+ int ret = get_afhi(ctx); \
+ if (ret < 0) \
+ return 0; \
+ return ctx->afhi._name; \
+ }
+/** \cond MP_AFHI */
+MP_AFHI(bitrate)
+MP_AFHI(frequency)
+MP_AFHI(channels)
+/** \endcond */
+
+/**
+ * Define a function which extracts and returns the value of a meta tag.
+ *
+ * \param _name The name of the tag (artist, title, ...).
+ *
+ * The function will return a pointer to memory owned by the audio file
+ * selector. On errors, or if the current audio file has no tag of the given
+ * name, the function returns the empty string. The caller must not attempt to
+ * free the returned string.
+ */
+#define MP_TAG(_name) \
+ char *mp_ ## _name (struct mp_context *ctx) \
+ { \
+ int ret = get_afhi(ctx); \
+ if (ret < 0) \
+ return ""; \
+ return ctx->afhi.tags._name; \
+ }
+/** \cond MP_TAG */
+MP_TAG(artist)
+MP_TAG(title)
+MP_TAG(album)
+MP_TAG(comment)
+/** \endcond */
+
+/**
+ * Parse and return the value of the year tag.
+ *
+ * \param ctx See \ref mp_path().
+ *
+ * \return If the year tag is not present, can not be parsed, or its value is
+ * less than zero, the function returns 0. If the value is less than 100, we
+ * add 1900.
+ */
+int64_t mp_year(struct mp_context *ctx)
+{
+ int64_t year;
+ int ret = get_afhi(ctx);
+
+ if (ret < 0)
+ return 0;
+ assert(ctx->afhi.tags.year);
+ ret = para_atoi64(ctx->afhi.tags.year, &year);
+ if (ret < 0)
+ return 0;
+ if (year < 0)
+ return 0;
+ if (year < 100)
+ year += 1900;
+ return year;
+}
+
+/*
+ * Ideally, these functions should be declared in a header file which is
+ * created by flex with the --header-file option. However, for flex-2.6.x
+ * (2017) this option is borken: if --reentrant is also given, the generated
+ * header file contains syntax errors. As a workaround we declare the functions
+ * here.
+ */
+/** \cond flex_workaround */
+int mp_yylex_init(mp_yyscan_t *yyscanner);
+struct yy_buffer_state *mp_yy_scan_bytes(const char *buf, int len,
+ mp_yyscan_t yyscanner);
+void mp_yy_delete_buffer(struct yy_buffer_state *bs, mp_yyscan_t yyscanner);
+int mp_yylex_destroy(mp_yyscan_t yyscanner);
+void mp_yyset_lineno(int lineno, mp_yyscan_t scanner);
+/** \endcond */
+
+/* Public API */
+
+/**
+ * Initialize the mood parser.
+ *
+ * This allocates and sets up the internal structures of the mood parser
+ * and creates an abstract syntax tree from the given mood definition.
+ * It must be called before \ref mp_eval_row() can be called.
+ *
+ * The context pointer returned by this function may be passed to \ref
+ * mp_eval_row() to determine whether an audio file is admissible.
+ *
+ * \param definition A reference to the mood definition.
+ * \param nbytes The size of the mood definition.
+ * \param result Opaque context pointer is returned here.
+ * \param errmsg Optional error message is returned here.
+ *
+ * It's OK to pass a NULL pointer or a zero sized buffer as the mood
+ * definition. This corresponds to the "dummy" mood for which all audio files
+ * are admissible.
+ *
+ * The error message pointer may also be NULL in which case no error message
+ * is returned. Otherwise, the caller must free the returned string.
+ *
+ * \return Standard. On success *errmsg is set to NULL.
+ */
+int mp_init(const char *definition, int nbytes, struct mp_context **result,
+ char **errmsg)
+{
+ int ret;
+ mp_yyscan_t scanner;
+ struct mp_context *ctx;
+ struct yy_buffer_state *buffer_state;
+
+ if (!definition || nbytes == 0) { /* dummy mood */
+ if (errmsg)
+ *errmsg = NULL;
+ *result = NULL;
+ return 0;
+ }
+ ctx = para_calloc(sizeof(*ctx));
+ ctx->errmsg = NULL;
+ ctx->ast = NULL;
+
+ ret = mp_yylex_init(&scanner);
+ assert(ret == 0);
+ buffer_state = mp_yy_scan_bytes(definition, nbytes, scanner);
+ mp_yyset_lineno(1, scanner);
+ PARA_NOTICE_LOG("creating abstract syntax tree\n");
+ ret = mp_yyparse(ctx, &ctx->ast, scanner);
+ mp_yy_delete_buffer(buffer_state, scanner);
+ mp_yylex_destroy(scanner);
+ if (ctx->errmsg) { /* parse error */
+ if (errmsg)
+ *errmsg = ctx->errmsg;
+ else
+ free(ctx->errmsg);
+ free(ctx);
+ return -E_MOOD_PARSE;
+ }
+ if (errmsg)
+ *errmsg = NULL;
+ *result = ctx;
+ return 1;
+}
+
+/**
+ * Determine whether the given audio file is admissible.
+ *
+ * \param aft_row The audio file to check for admissibility.
+ * \param ctx As returned from \ref mp_init().
+ *
+ * \return Whether the audio file is admissible.
+ *
+ * If the mood parser was set up without an input buffer (dummy mood), this
+ * function returns true (without looking at the audio file metadata) to
+ * indicate that the given audio file should be considered admissible.
+ *
+ * \sa \ref change_current_mood(), \ref mp_eval_ast().
+ */
+bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx)
+{
+ if (!ctx) /* dummy mood */
+ return true;
+ assert(aft_row);
+ ctx->aft_row = aft_row;
+ ctx->have_afsi = false;
+ ctx->have_afhi = false;
+ ctx->path = NULL;
+ return mp_eval_ast(ctx->ast, ctx);
+}
+
+/**
+ * Deallocate the resources of a mood parser.
+ *
+ * This function frees the abstract syntax tree which was created by \ref
+ * mp_init().
+ *
+ * \param ctx As returned from \ref mp_init().
+ *
+ * It's OK to pass a NULL pointer, in which case the function does nothing.
+ */
+void mp_shutdown(struct mp_context *ctx)
+{
+ if (!ctx)
+ return;
+ mp_free_ast(ctx->ast);
+ free(ctx);
+}