X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=mp.c;fp=mp.c;h=12fe336ef682823df8de17ead3669f1b7d120ec6;hp=0000000000000000000000000000000000000000;hb=3d3a2f50a05501cf27f1155629799953f952bd4b;hpb=a61e862450dc1e74394bf5bbf7002d9947f98d31 diff --git a/mp.c b/mp.c new file mode 100644 index 00000000..12fe336e --- /dev/null +++ b/mp.c @@ -0,0 +1,572 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** + * \file mp.c Mood parser helper functions. + * + * This file contains the public and the private API of the flex/bison based + * mood parser. + * + * The public API (at the bottom of the file) allows to parse the same mood + * definition many times in an efficient manner. + * + * The first function to all is \ref mp_init(), which analyzes the given mood + * definition syntactically. It returns the abstract syntax tree of the mood + * definition and pre-compiles all regular expression patterns to make later + * pattern matching efficient. + * + * Semantic analysis is performed in \ref mp_eval_row(). This function is + * called from \ref mood.c once for each file in the audio file table. It + * utilizes the abstract syntax tree and the pre-compiled regular expressions + * to determine the set of admissible audio files. + * + * If the mood is no longer needed, \ref mp_shutdown() should be called to free + * the resources. + * + * The internal API is described in \ref mp.h. + */ + +#include "para.h" + +#include +#include +#include +#include + +#include "string.h" +#include "error.h" +#include "afh.h" +#include "afs.h" +#include "mp.h" +#include "mp.bison.h" + +struct mp_context { + /* global context */ + char *errmsg; + struct mp_ast_node *ast; + /* per audio file context */ + const struct osl_row *aft_row; + char *path; + bool have_afsi; + struct afs_info afsi; + bool have_afhi; + struct afh_info afhi; +}; + +/** + * Parse a (generalized) string literal. + * + * \param src The string to parse. + * \param quote_chars Opening and closing quote characters. + * \param result The corresponding C string is returned here. + * + * This function turns a generalized C99 string literal like "xyz\n" into a C + * string (containing the three characters 'x', 'y' and 'z', followed by a + * newline character and the terminating zero byte). The function allows to + * specify different quote characters so that, for example, regular expression + * patterns enclosed in '/' can be parsed as well. To parse a proper string + * literal, one has to pass two double quotes as the second argument. + * + * The function strips off the opening and leading quote characters, replaces + * double backslashes by single backslashes and handles the usual escapes like + * \n and \". + * + * The caller must make sure that the input is well-formed. The function simply + * aborts if the input is not a valid C99 string literal (modulo the quote + * characters). + * + * \return Offset of the first character after the closing quote. For proper + * string literals this will be the terminating zero byte of the input string, + * for regular expression patterns it is the beginning of the flags which + * modify the matching behaviour. + * + * \sa \ref mp_parse_regex_pattern(), \ref mp_parse_wildcard_pattern(). + */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result) +{ + size_t n, len = strlen(src); + char *dst, *p; + bool backslash; + + assert(len >= 2); + assert(src[0] == quote_chars[0]); + p = dst = para_malloc(len - 1); + backslash = false; + for (n = 1;; n++) { + char c; + assert(n < len); + c = src[n]; + if (!backslash) { + if (c == '\\') { + backslash = true; + continue; + } + if (c == quote_chars[1]) + break; + *p++ = c; + continue; + } + if (c == quote_chars[1]) + *p++ = quote_chars[1]; + else switch (c) { + case '\\': *p++ = '\\'; break; + case 'a': *p++ = '\a'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\f'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 't': *p++ = '\t'; break; + case 'v': *p++ = '\v'; break; + default: assert(false); + } + backslash = false; + } + assert(src[n] == quote_chars[1]); + *p = '\0'; + *result = dst; + return n + 1; +} + +/** + * Parse and compile an extended regular expression pattern, including flags. + * + * \param src The pattern to parse. + * \param result C-string and flags are returned here. + * + * A regex pattern is identical to a C99 string literal except (a) it is + * enclosed in '/' characters rather than double quotes, (b) double quote + * characters which are part of the pattern do not need to be quoted with + * backslashes, but slashes must be quoted in this way, and (c) the closing + * slash may be followed by one or more flag characters which modify the + * matching behaviour. + * + * The only flags which are currently supported are 'i' to ignore case in match + * (REG_ICASE) and 'n' to change the handling of newline characters + * (REG_NEWLINE). + * + * \return Standard. This function calls \ref parse_quoted_string(), hence it + * aborts if the input string is malformed. However, errors from \ref + * para_regcomp are returned without aborting the process. The rationale behind + * this difference is that passing a malformed string must be considered an + * implementation bug because malformed strings should be rejected earlier by + * the lexer. + * + * \sa \ref mp_parse_wildcard_pattern(), \ref parse_quoted_string(), + * \ref para_regcomp(), regex(3). + */ +int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result) +{ + int ret; + char *pat; + unsigned n = parse_quoted_string(src, "//", &pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'i': result->flags |= REG_ICASE; break; + case 'n': result->flags |= REG_NEWLINE; break; + default: assert(false); + } + } + ret = para_regcomp(&result->preg, pat, result->flags); + free(pat); + return ret; +} + +/** + * Parse a wildcard pattern, including flags. + * + * \param src The pattern to parse. + * \param result C-string and flags are returned here. + * + * This function parses a shell wildcard pattern. It is similar to \ref + * mp_parse_regex_pattern(), so the remarks mentioned there apply to this + * function as well. + * + * Wildcard patterns differ from regular expression patterns in that (a) they + * must be enclosed in '|' characters, (b) they support different flags for + * modifying matching behaviour, and (c) there is no cache for them. + * + * The following flags, whose meaning is explained in fnmatch(3), are currently + * supported: 'n' (FNM_NOESCAPE), 'p' (FNM_PATHNAME), 'P' (FNM_PERIOD), 'l' + * (FNM_LEADING_DIR), 'i' (FNM_CASEFOLD), 'e' (FNM_EXTMATCH). The last flag is + * a GNU extension. It is silently ignored on non GNU systems. + * + * \sa \ref parse_quoted_string(), \ref mp_parse_regex_pattern(), fnmatch(3). + */ +void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result) +{ + unsigned n = parse_quoted_string(src, "||", &result->pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'n': result->flags |= FNM_NOESCAPE; break; + case 'p': result->flags |= FNM_PATHNAME; break; + case 'P': result->flags |= FNM_PERIOD; break; + /* not POSIX, but both FreeBSD and NetBSD have it */ + case 'l': result->flags |= FNM_LEADING_DIR; break; + case 'i': result->flags |= FNM_CASEFOLD; break; + /* GNU only */ +#ifdef HAVE_FNM_EXTMATCH + case 'e': result->flags |= FNM_EXTMATCH; break; +#else /* silently ignore extglob flag */ + case 'e': break; +#endif + default: assert(false); + } + } +} + +/** + * Set the error bit in the parser context and log a message. + * + * \param line The number of the input line which caused the error. + * \param ctx Contains the error bit. + * \param fmt Usual format string. + * + * This is called if the lexer or the parser detect an error in the mood + * definition. Only the first error is logged (with a severity of "warn"). + */ +__printf_3_4 void mp_parse_error(int line, struct mp_context *ctx, + const char *fmt, ...) +{ + va_list ap; + char *tmp; + + if (ctx->errmsg) /* we already printed an error message */ + return; + va_start(ap, fmt); + xvasprintf(&tmp, fmt, ap); + va_end(ap); + xasprintf(&ctx->errmsg, "line %d: %s", line, tmp); + free(tmp); + PARA_WARNING_LOG("%s\n", ctx->errmsg); +} + +static int get_afsi(struct mp_context *ctx) +{ + int ret; + + if (ctx->have_afsi) + return 0; + ret = get_afsi_of_row(ctx->aft_row, &ctx->afsi); + if (ret < 0) + return ret; + ctx->have_afsi = true; + return 1; +} + +static int get_afhi(struct mp_context *ctx) +{ + int ret; + + if (ctx->have_afhi) + return 0; + ret = get_afhi_of_row(ctx->aft_row, &ctx->afhi); + if (ret < 0) + return ret; + ctx->have_afhi = true; + return 1; +} + +/** + * Return the full path to the audio file. + * + * \param ctx Contains a reference to the row of the audio file table which + * corresponds to the current audio file. The path of the audio file, the + * afs_info and the afh_info structures (which contain the tag information) can + * be retrieved through this reference. + * + * \return A reference to the path. Must not be freed by the caller. + * + * \sa \ref get_audio_file_path_of_row(). + */ +char *mp_path(struct mp_context *ctx) +{ + if (!ctx->path) + get_audio_file_path_of_row(ctx->aft_row, &ctx->path); + return ctx->path; +} + +/** + * Check whether the given attribute is set for the current audio file. + * + * \param attr The string to look up in the attribute table. + * \param ctx See \ref mp_path(). + * + * First, determine the bit number which corresponds to the attribute, then + * check if this bit is set in the ->attributes field of the afs_info structure + * of the audio file. + * + * \return True if the attribute is set, false if it is not. On errors, for + * example if the given string is no attribute, the function returns false. + * + * \sa \ref get_attribute_bitnum_by_name(). + */ +bool mp_is_set(const char *attr, struct mp_context *ctx) +{ + int ret; + unsigned char bitnum; + const uint64_t one = 1; + + ret = get_attribute_bitnum_by_name(attr, &bitnum); + if (ret < 0) /* treat invalid attributes as not set */ + return false; + ret = get_afsi(ctx); + if (ret < 0) + return false; + return (one << bitnum) & ctx->afsi.attributes; +} + +/** + * Count the number of attributes set. + * + * \param ctx See \ref mp_path(). + * + * \return The number of bits which are set in the ->attributes field of the + * afs_info structure of the current audio file. + */ +int64_t mp_num_attributes_set(struct mp_context *ctx) +{ + const uint64_t m = ~(uint64_t)0; + int ret; + uint64_t v; + + ret = get_afsi(ctx); + if (ret < 0) + return 0; + + v = ctx->afsi.attributes; + /* taken from https://graphics.stanford.edu/~seander/bithacks.html */ + v = v - ((v >> 1) & m / 3); + v = (v & m / 15 * 3) + ((v >> 2) & m / 15 * 3); + v = (v + (v >> 4)) & m / 255 * 15; + v = (v * (m / 255)) >> 56; + assert(v <= 64); + return v; +} + +/** + * Define a function which returns a field of the afs_info structure. + * + * \param _name The name of the field. + * + * The defined function casts the value to int64_t. On errors, zero is returned. + */ +#define MP_AFSI(_name) \ + int64_t mp_ ## _name(struct mp_context *ctx) \ + { \ + int ret = get_afsi(ctx); \ + if (ret < 0) \ + return 0; \ + return ctx->afsi._name; \ + } +/** \cond MP_AFSI */ +MP_AFSI(num_played) +MP_AFSI(image_id) +MP_AFSI(lyrics_id) +/** \endcond */ + +/** + * Define a function which returns a field of the afh_info structure. + * + * \param _name The name of the field. + * + * The defined function casts the value to int64_t. On errors, zero is returned. + */ +#define MP_AFHI(_name) \ + int64_t mp_ ## _name(struct mp_context *ctx) \ + { \ + int ret = get_afhi(ctx); \ + if (ret < 0) \ + return 0; \ + return ctx->afhi._name; \ + } +/** \cond MP_AFHI */ +MP_AFHI(bitrate) +MP_AFHI(frequency) +MP_AFHI(channels) +/** \endcond */ + +/** + * Define a function which extracts and returns the value of a meta tag. + * + * \param _name The name of the tag (artist, title, ...). + * + * The function will return a pointer to memory owned by the audio file + * selector. On errors, or if the current audio file has no tag of the given + * name, the function returns the empty string. The caller must not attempt to + * free the returned string. + */ +#define MP_TAG(_name) \ + char *mp_ ## _name (struct mp_context *ctx) \ + { \ + int ret = get_afhi(ctx); \ + if (ret < 0) \ + return ""; \ + return ctx->afhi.tags._name; \ + } +/** \cond MP_TAG */ +MP_TAG(artist) +MP_TAG(title) +MP_TAG(album) +MP_TAG(comment) +/** \endcond */ + +/** + * Parse and return the value of the year tag. + * + * \param ctx See \ref mp_path(). + * + * \return If the year tag is not present, can not be parsed, or its value is + * less than zero, the function returns 0. If the value is less than 100, we + * add 1900. + */ +int64_t mp_year(struct mp_context *ctx) +{ + int64_t year; + int ret = get_afhi(ctx); + + if (ret < 0) + return 0; + assert(ctx->afhi.tags.year); + ret = para_atoi64(ctx->afhi.tags.year, &year); + if (ret < 0) + return 0; + if (year < 0) + return 0; + if (year < 100) + year += 1900; + return year; +} + +/* + * Ideally, these functions should be declared in a header file which is + * created by flex with the --header-file option. However, for flex-2.6.x + * (2017) this option is borken: if --reentrant is also given, the generated + * header file contains syntax errors. As a workaround we declare the functions + * here. + */ +/** \cond flex_workaround */ +int mp_yylex_init(mp_yyscan_t *yyscanner); +struct yy_buffer_state *mp_yy_scan_bytes(const char *buf, int len, + mp_yyscan_t yyscanner); +void mp_yy_delete_buffer(struct yy_buffer_state *bs, mp_yyscan_t yyscanner); +int mp_yylex_destroy(mp_yyscan_t yyscanner); +void mp_yyset_lineno(int lineno, mp_yyscan_t scanner); +/** \endcond */ + +/* Public API */ + +/** + * Initialize the mood parser. + * + * This allocates and sets up the internal structures of the mood parser + * and creates an abstract syntax tree from the given mood definition. + * It must be called before \ref mp_eval_row() can be called. + * + * The context pointer returned by this function may be passed to \ref + * mp_eval_row() to determine whether an audio file is admissible. + * + * \param definition A reference to the mood definition. + * \param nbytes The size of the mood definition. + * \param result Opaque context pointer is returned here. + * \param errmsg Optional error message is returned here. + * + * It's OK to pass a NULL pointer or a zero sized buffer as the mood + * definition. This corresponds to the "dummy" mood for which all audio files + * are admissible. + * + * The error message pointer may also be NULL in which case no error message + * is returned. Otherwise, the caller must free the returned string. + * + * \return Standard. On success *errmsg is set to NULL. + */ +int mp_init(const char *definition, int nbytes, struct mp_context **result, + char **errmsg) +{ + int ret; + mp_yyscan_t scanner; + struct mp_context *ctx; + struct yy_buffer_state *buffer_state; + + if (!definition || nbytes == 0) { /* dummy mood */ + if (errmsg) + *errmsg = NULL; + *result = NULL; + return 0; + } + ctx = para_calloc(sizeof(*ctx)); + ctx->errmsg = NULL; + ctx->ast = NULL; + + ret = mp_yylex_init(&scanner); + assert(ret == 0); + buffer_state = mp_yy_scan_bytes(definition, nbytes, scanner); + mp_yyset_lineno(1, scanner); + PARA_NOTICE_LOG("creating abstract syntax tree\n"); + ret = mp_yyparse(ctx, &ctx->ast, scanner); + mp_yy_delete_buffer(buffer_state, scanner); + mp_yylex_destroy(scanner); + if (ctx->errmsg) { /* parse error */ + if (errmsg) + *errmsg = ctx->errmsg; + else + free(ctx->errmsg); + free(ctx); + return -E_MOOD_PARSE; + } + if (errmsg) + *errmsg = NULL; + *result = ctx; + return 1; +} + +/** + * Determine whether the given audio file is admissible. + * + * \param aft_row The audio file to check for admissibility. + * \param ctx As returned from \ref mp_init(). + * + * \return Whether the audio file is admissible. + * + * If the mood parser was set up without an input buffer (dummy mood), this + * function returns true (without looking at the audio file metadata) to + * indicate that the given audio file should be considered admissible. + * + * \sa \ref change_current_mood(), \ref mp_eval_ast(). + */ +bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx) +{ + if (!ctx) /* dummy mood */ + return true; + assert(aft_row); + ctx->aft_row = aft_row; + ctx->have_afsi = false; + ctx->have_afhi = false; + ctx->path = NULL; + return mp_eval_ast(ctx->ast, ctx); +} + +/** + * Deallocate the resources of a mood parser. + * + * This function frees the abstract syntax tree which was created by \ref + * mp_init(). + * + * \param ctx As returned from \ref mp_init(). + * + * It's OK to pass a NULL pointer, in which case the function does nothing. + */ +void mp_shutdown(struct mp_context *ctx) +{ + if (!ctx) + return; + mp_free_ast(ctx->ast); + free(ctx); +}