From: Andre Noll Date: Fri, 22 Sep 2017 08:06:45 +0000 (+0200) Subject: Merge branch 'refs/heads/t/mp' X-Git-Tag: v0.6.1~7 X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=commitdiff_plain;h=177ea8ea46918a925c0d2d8a07e7fbe9f478a40c;hp=ee7c57f8dc4edfdc91f2f8657b6429d90ab13e79 Merge branch 'refs/heads/t/mp' This pile contains the new version 2 mood parser which depends on flex and bison. Cooking for a month. * refs/heads/t/mp: Version 2 moods. --- diff --git a/Makefile.in b/Makefile.in index 556a926c..d4a83a77 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,6 +8,8 @@ datarootdir := @datarootdir@ PACKAGE_TARNAME := @PACKAGE_TARNAME@ PACKAGE_VERSION := @PACKAGE_VERSION@ +FLEX := @FLEX@ +BISON := @BISON@ M4 := @M4@ LOPSUBGEN := @LOPSUBGEN@ diff --git a/Makefile.real b/Makefile.real index 3631a5c9..b60c5698 100644 --- a/Makefile.real +++ b/Makefile.real @@ -32,12 +32,15 @@ m4depdir := $(build_dir)/m4deps lls_suite_dir := $(build_dir)/lls lls_m4_dir := m4/lls test_dir := t +yy_src_dir = yy +yy_build_dir = $(build_dir)/yy # sort removes duplicate words, which is all we need here all_objs := $(sort $(recv_objs) $(filter_objs) $(client_objs) $(gui_objs) \ $(audiod_objs) $(audioc_objs) $(mixer_objs) $(server_objs) \ $(write_objs) $(afh_objs) $(play_objs)) deps := $(addprefix $(dep_dir)/, $(all_objs:.o=.d)) +deps += $(addprefix $(dep_dir)/, mp.bison.d mp.flex.d) afh_objs += afh.lsg.o audioc_objs += audioc.lsg.o @@ -57,6 +60,12 @@ suites := $(addprefix $(lls_suite_dir)/, $(cmd_suites) $(executables)) m4_lls_deps := $(addsuffix .m4d, $(suites)) lsg_h := $(addsuffix .lsg.h, $(suites)) +# flex/bison objects and headers are only needed if para_server is built +ifeq ("$(findstring server, $(executables))", "server") + server_objs += mp.flex.o mp.bison.o + yy_h := $(yy_build_dir)/mp.bison.h +endif + # now prefix all objects with object dir recv_objs := $(addprefix $(object_dir)/, $(recv_objs)) filter_objs := $(addprefix $(object_dir)/, $(filter_objs)) @@ -85,12 +94,14 @@ man: $(man_pages) include $(lls_m4_dir)/makefile include $(test_dir)/makefile.test +include $(yy_src_dir)/makefile ifeq ($(findstring clean, $(MAKECMDGOALS)),) -include $(deps) -include $(m4_lls_deps) endif -$(object_dir) $(man_dir) $(dep_dir) $(m4depdir) $(lls_suite_dir): +$(object_dir) $(man_dir) $(dep_dir) $(m4depdir) $(lls_suite_dir) \ + $(yy_build_dir): $(Q) $(MKDIR_P) $@ CPPFLAGS += -DBINDIR='"$(bindir)"' @@ -101,6 +112,7 @@ CPPFLAGS += -DUNAME_RS='"$(uname_rs)"' CPPFLAGS += -DCC_VERSION='"$(cc_version)"' CPPFLAGS += -I/usr/local/include CPPFLAGS += -I$(lls_suite_dir) +CPPFLAGS += -I$(yy_build_dir) CPPFLAGS += $(lopsub_cppflags) STRICT_CFLAGS += -fno-strict-aliasing @@ -234,7 +246,7 @@ $(object_dir)/mm.o \ $(object_dir)/compress_filter.o: CFLAGS += -O3 -$(object_dir)/%.o: %.c | $(object_dir) $(dep_dir) $(lsg_h) +$(object_dir)/%.o: %.c | $(object_dir) $(dep_dir) $(lsg_h) $(yy_h) @[ -z "$(Q)" ] || echo 'CC $<' $(Q) $(CC) -c -o $@ -MMD -MF $(dep_dir)/$(*F).d -MT $@ $(CPPFLAGS) \ $(STRICT_CFLAGS) $(CFLAGS) $< diff --git a/NEWS.md b/NEWS.md index 6724317d..b87ae52c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,20 @@ NEWS ==== + --------------------- current master branch --------------------- +The highlight of this release is the version 2 mood parser. But there +is a lot more than that, as summarized in the list below. And of +course we have many small usability improvements and bug fixes not +mentioned here. + +- A more intuitive syntax for moods ("version 2 moods"). The + traditional version 1 moods are still supported but are deprecated + now. Removal of the version 1 mood parser is scheduled for the next + major release. +- Flex and bison are now required to build para_server. - New sort order for the ls command: -s=h sorts the ls output by hash value of the audio file. - autogen.sh now runs the test suite after a successful build. diff --git a/afs.c b/afs.c index ef05a473..5623d7e9 100644 --- a/afs.c +++ b/afs.c @@ -466,23 +466,30 @@ no_admissible_files: } /* Never fails if arg == NULL */ -static int activate_mood_or_playlist(const char *arg, int *num_admissible) +static int activate_mood_or_playlist(const char *arg, int *num_admissible, + char **errmsg) { enum play_mode mode; int ret; if (!arg) { - ret = change_current_mood(NULL); /* always successful */ + ret = change_current_mood(NULL, NULL); /* always successful */ mode = PLAY_MODE_MOOD; } else { if (!strncmp(arg, "p/", 2)) { ret = playlist_open(arg + 2); + if (ret < 0 && errmsg) + *errmsg = make_message( "could not open %s", + arg); mode = PLAY_MODE_PLAYLIST; } else if (!strncmp(arg, "m/", 2)) { - ret = change_current_mood(arg + 2); + ret = change_current_mood(arg + 2, errmsg); mode = PLAY_MODE_MOOD; - } else + } else { + if (errmsg) + *errmsg = make_message("%s: parse error", arg); return -ERRNO_TO_PARA_ERROR(EINVAL); + } if (ret < 0) return ret; } @@ -564,6 +571,7 @@ static int com_select_callback(struct afs_callback_arg *aca) const struct lls_command *cmd = SERVER_CMD_CMD_PTR(SELECT); const char *arg; int num_admissible, ret; + char *errmsg; ret = lls_deserialize_parse_result(aca->query.data, cmd, &aca->lpr); assert(ret >= 0); @@ -577,22 +585,27 @@ static int com_select_callback(struct afs_callback_arg *aca) close_current_mood(); else playlist_close(); - ret = activate_mood_or_playlist(arg, &num_admissible); + ret = activate_mood_or_playlist(arg, &num_admissible, &errmsg); if (ret >= 0) goto out; /* ignore subsequent errors (but log them) */ + para_printf(&aca->pbout, "%s\n", errmsg); + free(errmsg); para_printf(&aca->pbout, "could not activate %s\n", arg); if (current_mop && strcmp(current_mop, arg) != 0) { int ret2; para_printf(&aca->pbout, "switching back to %s\n", current_mop); - ret2 = activate_mood_or_playlist(current_mop, &num_admissible); + ret2 = activate_mood_or_playlist(current_mop, &num_admissible, + &errmsg); if (ret2 >= 0) goto out; + para_printf(&aca->pbout, "%s\n", errmsg); + free(errmsg); para_printf(&aca->pbout, "could not reactivate %s: %s\n", current_mop, para_strerror(-ret2)); } para_printf(&aca->pbout, "activating dummy mood\n"); - activate_mood_or_playlist(NULL, &num_admissible); + activate_mood_or_playlist(NULL, &num_admissible, NULL); out: para_printf(&aca->pbout, "activated %s (%d admissible files)\n", current_mop? current_mop : "dummy mood", num_admissible); @@ -617,12 +630,12 @@ EXPORT_SERVER_CMD_HANDLER(select); static void init_admissible_files(const char *arg) { - int ret = activate_mood_or_playlist(arg, NULL); + int ret = activate_mood_or_playlist(arg, NULL, NULL); if (ret < 0) { assert(arg); PARA_WARNING_LOG("could not activate %s: %s\n", arg, para_strerror(-ret)); - activate_mood_or_playlist(NULL, NULL); /* always successful */ + activate_mood_or_playlist(NULL, NULL, NULL); } } diff --git a/configure.ac b/configure.ac index 83375559..1a375e9f 100644 --- a/configure.ac +++ b/configure.ac @@ -50,6 +50,9 @@ AC_DEFUN([LIB_SUBST_FLAGS], [ AC_USE_SYSTEM_EXTENSIONS AC_C_BIGENDIAN() +AC_PATH_PROG([BISON], [bison]) +AC_PATH_PROG([FLEX], [flex]) + AC_PATH_PROG([M4], [m4]) test -z "$M4" && AC_MSG_ERROR( [The m4 macro processor is required to build this package]) @@ -169,6 +172,17 @@ AC_CHECK_TYPE([struct ucred], [ #include #include ]) +################################################################### FNM_EXTMATCH +AC_MSG_CHECKING(for extended wildcard pattern matching) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include +]], [[ + unsigned n = FNM_EXTMATCH; +]])], [have_fnm_extmatch=yes], [have_fnm_extmatch=no]) +AC_MSG_RESULT($have_fnm_extmatch) +if test $have_fnm_extmatch = yes; then + AC_DEFINE(HAVE_FNM_EXTMATCH, 1, define to 1 if FNM_EXTMATCH is defined) +fi ########################################################################### curses STASH_FLAGS LIB_ARG_WITH([curses], []) @@ -334,7 +348,8 @@ AC_CHECK_LIB([samplerate], [src_process], [], HAVE_SAMPLERATE=no) LIB_SUBST_FLAGS(samplerate) UNSTASH_FLAGS ######################################################################### server -if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then +if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes && test -n "$BISON" && \ + test -n "$FLEX"; then build_server="yes" executables="$executables server" server_errlist_objs=" @@ -361,6 +376,7 @@ if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then afs aft mood + mp score attribute blob diff --git a/error.h b/error.h index e792b058..7afd772b 100644 --- a/error.h +++ b/error.h @@ -137,6 +137,7 @@ PARA_ERROR(MAX_CLIENTS, "maximal number of clients exceeded"), \ PARA_ERROR(MISSING_COLON, "syntax error: missing colon"), \ PARA_ERROR(MOOD_SYNTAX, "mood syntax error"), \ + PARA_ERROR(MOOD_PARSE, "mood parse error"), \ PARA_ERROR(MP3DEC_CORRUPT, "too many corrupt frames"), \ PARA_ERROR(MP3DEC_EOF, "mp3dec: end of file"), \ PARA_ERROR(MP3_INFO, "could not read mp3 info"), \ diff --git a/mood.c b/mood.c index bbe3a8ae..c06f695c 100644 --- a/mood.c +++ b/mood.c @@ -19,6 +19,16 @@ #include "mm.h" #include "mood.h" +/* + * Mood parser API. It's overkill to have an own header file for + * these declarations as they are only needed in this .c file. + */ +struct mp_context; +int mp_init(const char *definition, int nbytes, struct mp_context **result, + char **errmsg); +bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx); +void mp_shutdown(struct mp_context *ctx); + /** * Contains statistical data of the currently admissible audio files. * @@ -73,6 +83,8 @@ struct mood { struct list_head deny_list; /** The list of mood items of type \p score. */ struct list_head score_list; + /* Only used for version 2 moods. */ + struct mp_context *parser_context; }; /* @@ -178,6 +190,10 @@ static int row_is_admissible(const struct osl_row *aft_row, struct mood *m, if (!m) return -E_NO_MOOD; + if (m->parser_context) { + *scorep = 0; + return mp_eval_row(aft_row, m->parser_context); + } ret = get_afsi_of_row(aft_row, &afsi); if (ret < 0) return ret; @@ -242,6 +258,7 @@ static void destroy_mood(struct mood *m) list_for_each_entry_safe(item, tmp, &m->score_list, mood_item_node) cleanup_list_entry(item); free(m->name); + mp_shutdown(m->parser_context); free(m); } @@ -395,7 +412,8 @@ out: return ret; } -static int load_mood(const struct osl_row *mood_row, struct mood **m) +static int load_mood(const struct osl_row *mood_row, struct mood **m, + char **errmsg) { char *mood_name; struct osl_object mood_def; @@ -411,15 +429,21 @@ static int load_mood(const struct osl_row *mood_row, struct mood **m) mlpd.m = alloc_new_mood(mood_name); ret = for_each_line(FELF_READ_ONLY, mood_def.data, mood_def.size, parse_mood_line, &mlpd); - osl_close_disk_object(&mood_def); if (ret < 0) { - PARA_ERROR_LOG("unable to load mood %s: %s\n", mlpd.m->name, - para_strerror(-ret)); - destroy_mood(mlpd.m); - return ret; + PARA_INFO_LOG("opening version 2 mood %s\n", mlpd.m->name); + ret = mp_init(mood_def.data, mood_def.size, &mlpd.m->parser_context, + errmsg); + if (ret < 0) + destroy_mood(mlpd.m); + } else { + PARA_WARNING_LOG("loaded version 1 mood %s\n", mlpd.m->name); + PARA_WARNING_LOG("please convert to version 2\n"); + ret = 1; } - *m = mlpd.m; - return 1; + osl_close_disk_object(&mood_def); + if (ret >= 0) + *m = mlpd.m; + return ret; } static int check_mood(struct osl_row *mood_row, void *data) @@ -437,12 +461,24 @@ static int check_mood(struct osl_row *mood_row, void *data) } if (!*mood_name) /* ignore dummy row */ goto out; - para_printf(pb, "checking mood %s...\n", mood_name); ret = for_each_line(FELF_READ_ONLY, mood_def.data, mood_def.size, parse_mood_line, &mlpd); - if (ret < 0) - para_printf(pb, "mood %s: error in line %u: %s\n", mood_name, - mlpd.line_num, para_strerror(-ret)); + if (ret < 0) { + char *errmsg; + struct mood *m = alloc_new_mood("check"); + ret = mp_init(mood_def.data, mood_def.size, &m->parser_context, + &errmsg); + if (ret < 0) { + para_printf(pb, "%s: %s\n", mood_name, errmsg); + free(errmsg); + para_printf(pb, "%s\n", para_strerror(-ret)); + } else + destroy_mood(m); + } else { + para_printf(pb, "%s: v1 mood, please convert to v2\n", + mood_name); + + } ret = 1; /* don't fail the loop on invalid mood definitions */ out: osl_close_disk_object(&mood_def); @@ -812,18 +848,22 @@ void close_current_mood(void) * Change the current mood. * * \param mood_name The name of the mood to open. + * \param errmsg Error description is returned here. * * If \a mood_name is \a NULL, load the dummy mood that accepts every audio file * and uses a scoring method based only on the \a last_played information. * + * The errmsg pointer may be NULL, in which case no error message will be + * returned. If a non-NULL pointer is given, the caller must free *errmsg. + * * If there is already an open mood, it will be closed first. * * \return Positive on success, negative on errors. Loading the dummy mood * always succeeds. * - * \sa struct \ref afs_info::last_played. + * \sa struct \ref afs_info::last_played, \ref mp_eval_row(). */ -int change_current_mood(const char *mood_name) +int change_current_mood(const char *mood_name, char **errmsg) { int i, ret; struct admissible_array aa = { @@ -843,7 +883,7 @@ int change_current_mood(const char *mood_name) PARA_NOTICE_LOG("no such mood: %s\n", mood_name); return ret; } - ret = load_mood(row, &m); + ret = load_mood(row, &m, errmsg); if (ret < 0) return ret; close_current_mood(); @@ -894,7 +934,7 @@ static int reload_current_mood(void) if (current_mood->name) mood_name = para_strdup(current_mood->name); close_current_mood(); - ret = change_current_mood(mood_name); + ret = change_current_mood(mood_name, NULL); free(mood_name); return ret; } diff --git a/mood.h b/mood.h index f7055753..87050142 100644 --- a/mood.h +++ b/mood.h @@ -6,6 +6,6 @@ /** \file mood.h Public functions of mood.c. */ -int change_current_mood(const char *mood_name); +int change_current_mood(const char *mood_name, char **errmsg); void close_current_mood(void); int mood_check_callback(struct afs_callback_arg *aca); diff --git a/mp.c b/mp.c new file mode 100644 index 00000000..12fe336e --- /dev/null +++ b/mp.c @@ -0,0 +1,572 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** + * \file mp.c Mood parser helper functions. + * + * This file contains the public and the private API of the flex/bison based + * mood parser. + * + * The public API (at the bottom of the file) allows to parse the same mood + * definition many times in an efficient manner. + * + * The first function to all is \ref mp_init(), which analyzes the given mood + * definition syntactically. It returns the abstract syntax tree of the mood + * definition and pre-compiles all regular expression patterns to make later + * pattern matching efficient. + * + * Semantic analysis is performed in \ref mp_eval_row(). This function is + * called from \ref mood.c once for each file in the audio file table. It + * utilizes the abstract syntax tree and the pre-compiled regular expressions + * to determine the set of admissible audio files. + * + * If the mood is no longer needed, \ref mp_shutdown() should be called to free + * the resources. + * + * The internal API is described in \ref mp.h. + */ + +#include "para.h" + +#include +#include +#include +#include + +#include "string.h" +#include "error.h" +#include "afh.h" +#include "afs.h" +#include "mp.h" +#include "mp.bison.h" + +struct mp_context { + /* global context */ + char *errmsg; + struct mp_ast_node *ast; + /* per audio file context */ + const struct osl_row *aft_row; + char *path; + bool have_afsi; + struct afs_info afsi; + bool have_afhi; + struct afh_info afhi; +}; + +/** + * Parse a (generalized) string literal. + * + * \param src The string to parse. + * \param quote_chars Opening and closing quote characters. + * \param result The corresponding C string is returned here. + * + * This function turns a generalized C99 string literal like "xyz\n" into a C + * string (containing the three characters 'x', 'y' and 'z', followed by a + * newline character and the terminating zero byte). The function allows to + * specify different quote characters so that, for example, regular expression + * patterns enclosed in '/' can be parsed as well. To parse a proper string + * literal, one has to pass two double quotes as the second argument. + * + * The function strips off the opening and leading quote characters, replaces + * double backslashes by single backslashes and handles the usual escapes like + * \n and \". + * + * The caller must make sure that the input is well-formed. The function simply + * aborts if the input is not a valid C99 string literal (modulo the quote + * characters). + * + * \return Offset of the first character after the closing quote. For proper + * string literals this will be the terminating zero byte of the input string, + * for regular expression patterns it is the beginning of the flags which + * modify the matching behaviour. + * + * \sa \ref mp_parse_regex_pattern(), \ref mp_parse_wildcard_pattern(). + */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result) +{ + size_t n, len = strlen(src); + char *dst, *p; + bool backslash; + + assert(len >= 2); + assert(src[0] == quote_chars[0]); + p = dst = para_malloc(len - 1); + backslash = false; + for (n = 1;; n++) { + char c; + assert(n < len); + c = src[n]; + if (!backslash) { + if (c == '\\') { + backslash = true; + continue; + } + if (c == quote_chars[1]) + break; + *p++ = c; + continue; + } + if (c == quote_chars[1]) + *p++ = quote_chars[1]; + else switch (c) { + case '\\': *p++ = '\\'; break; + case 'a': *p++ = '\a'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\f'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 't': *p++ = '\t'; break; + case 'v': *p++ = '\v'; break; + default: assert(false); + } + backslash = false; + } + assert(src[n] == quote_chars[1]); + *p = '\0'; + *result = dst; + return n + 1; +} + +/** + * Parse and compile an extended regular expression pattern, including flags. + * + * \param src The pattern to parse. + * \param result C-string and flags are returned here. + * + * A regex pattern is identical to a C99 string literal except (a) it is + * enclosed in '/' characters rather than double quotes, (b) double quote + * characters which are part of the pattern do not need to be quoted with + * backslashes, but slashes must be quoted in this way, and (c) the closing + * slash may be followed by one or more flag characters which modify the + * matching behaviour. + * + * The only flags which are currently supported are 'i' to ignore case in match + * (REG_ICASE) and 'n' to change the handling of newline characters + * (REG_NEWLINE). + * + * \return Standard. This function calls \ref parse_quoted_string(), hence it + * aborts if the input string is malformed. However, errors from \ref + * para_regcomp are returned without aborting the process. The rationale behind + * this difference is that passing a malformed string must be considered an + * implementation bug because malformed strings should be rejected earlier by + * the lexer. + * + * \sa \ref mp_parse_wildcard_pattern(), \ref parse_quoted_string(), + * \ref para_regcomp(), regex(3). + */ +int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result) +{ + int ret; + char *pat; + unsigned n = parse_quoted_string(src, "//", &pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'i': result->flags |= REG_ICASE; break; + case 'n': result->flags |= REG_NEWLINE; break; + default: assert(false); + } + } + ret = para_regcomp(&result->preg, pat, result->flags); + free(pat); + return ret; +} + +/** + * Parse a wildcard pattern, including flags. + * + * \param src The pattern to parse. + * \param result C-string and flags are returned here. + * + * This function parses a shell wildcard pattern. It is similar to \ref + * mp_parse_regex_pattern(), so the remarks mentioned there apply to this + * function as well. + * + * Wildcard patterns differ from regular expression patterns in that (a) they + * must be enclosed in '|' characters, (b) they support different flags for + * modifying matching behaviour, and (c) there is no cache for them. + * + * The following flags, whose meaning is explained in fnmatch(3), are currently + * supported: 'n' (FNM_NOESCAPE), 'p' (FNM_PATHNAME), 'P' (FNM_PERIOD), 'l' + * (FNM_LEADING_DIR), 'i' (FNM_CASEFOLD), 'e' (FNM_EXTMATCH). The last flag is + * a GNU extension. It is silently ignored on non GNU systems. + * + * \sa \ref parse_quoted_string(), \ref mp_parse_regex_pattern(), fnmatch(3). + */ +void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result) +{ + unsigned n = parse_quoted_string(src, "||", &result->pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'n': result->flags |= FNM_NOESCAPE; break; + case 'p': result->flags |= FNM_PATHNAME; break; + case 'P': result->flags |= FNM_PERIOD; break; + /* not POSIX, but both FreeBSD and NetBSD have it */ + case 'l': result->flags |= FNM_LEADING_DIR; break; + case 'i': result->flags |= FNM_CASEFOLD; break; + /* GNU only */ +#ifdef HAVE_FNM_EXTMATCH + case 'e': result->flags |= FNM_EXTMATCH; break; +#else /* silently ignore extglob flag */ + case 'e': break; +#endif + default: assert(false); + } + } +} + +/** + * Set the error bit in the parser context and log a message. + * + * \param line The number of the input line which caused the error. + * \param ctx Contains the error bit. + * \param fmt Usual format string. + * + * This is called if the lexer or the parser detect an error in the mood + * definition. Only the first error is logged (with a severity of "warn"). + */ +__printf_3_4 void mp_parse_error(int line, struct mp_context *ctx, + const char *fmt, ...) +{ + va_list ap; + char *tmp; + + if (ctx->errmsg) /* we already printed an error message */ + return; + va_start(ap, fmt); + xvasprintf(&tmp, fmt, ap); + va_end(ap); + xasprintf(&ctx->errmsg, "line %d: %s", line, tmp); + free(tmp); + PARA_WARNING_LOG("%s\n", ctx->errmsg); +} + +static int get_afsi(struct mp_context *ctx) +{ + int ret; + + if (ctx->have_afsi) + return 0; + ret = get_afsi_of_row(ctx->aft_row, &ctx->afsi); + if (ret < 0) + return ret; + ctx->have_afsi = true; + return 1; +} + +static int get_afhi(struct mp_context *ctx) +{ + int ret; + + if (ctx->have_afhi) + return 0; + ret = get_afhi_of_row(ctx->aft_row, &ctx->afhi); + if (ret < 0) + return ret; + ctx->have_afhi = true; + return 1; +} + +/** + * Return the full path to the audio file. + * + * \param ctx Contains a reference to the row of the audio file table which + * corresponds to the current audio file. The path of the audio file, the + * afs_info and the afh_info structures (which contain the tag information) can + * be retrieved through this reference. + * + * \return A reference to the path. Must not be freed by the caller. + * + * \sa \ref get_audio_file_path_of_row(). + */ +char *mp_path(struct mp_context *ctx) +{ + if (!ctx->path) + get_audio_file_path_of_row(ctx->aft_row, &ctx->path); + return ctx->path; +} + +/** + * Check whether the given attribute is set for the current audio file. + * + * \param attr The string to look up in the attribute table. + * \param ctx See \ref mp_path(). + * + * First, determine the bit number which corresponds to the attribute, then + * check if this bit is set in the ->attributes field of the afs_info structure + * of the audio file. + * + * \return True if the attribute is set, false if it is not. On errors, for + * example if the given string is no attribute, the function returns false. + * + * \sa \ref get_attribute_bitnum_by_name(). + */ +bool mp_is_set(const char *attr, struct mp_context *ctx) +{ + int ret; + unsigned char bitnum; + const uint64_t one = 1; + + ret = get_attribute_bitnum_by_name(attr, &bitnum); + if (ret < 0) /* treat invalid attributes as not set */ + return false; + ret = get_afsi(ctx); + if (ret < 0) + return false; + return (one << bitnum) & ctx->afsi.attributes; +} + +/** + * Count the number of attributes set. + * + * \param ctx See \ref mp_path(). + * + * \return The number of bits which are set in the ->attributes field of the + * afs_info structure of the current audio file. + */ +int64_t mp_num_attributes_set(struct mp_context *ctx) +{ + const uint64_t m = ~(uint64_t)0; + int ret; + uint64_t v; + + ret = get_afsi(ctx); + if (ret < 0) + return 0; + + v = ctx->afsi.attributes; + /* taken from https://graphics.stanford.edu/~seander/bithacks.html */ + v = v - ((v >> 1) & m / 3); + v = (v & m / 15 * 3) + ((v >> 2) & m / 15 * 3); + v = (v + (v >> 4)) & m / 255 * 15; + v = (v * (m / 255)) >> 56; + assert(v <= 64); + return v; +} + +/** + * Define a function which returns a field of the afs_info structure. + * + * \param _name The name of the field. + * + * The defined function casts the value to int64_t. On errors, zero is returned. + */ +#define MP_AFSI(_name) \ + int64_t mp_ ## _name(struct mp_context *ctx) \ + { \ + int ret = get_afsi(ctx); \ + if (ret < 0) \ + return 0; \ + return ctx->afsi._name; \ + } +/** \cond MP_AFSI */ +MP_AFSI(num_played) +MP_AFSI(image_id) +MP_AFSI(lyrics_id) +/** \endcond */ + +/** + * Define a function which returns a field of the afh_info structure. + * + * \param _name The name of the field. + * + * The defined function casts the value to int64_t. On errors, zero is returned. + */ +#define MP_AFHI(_name) \ + int64_t mp_ ## _name(struct mp_context *ctx) \ + { \ + int ret = get_afhi(ctx); \ + if (ret < 0) \ + return 0; \ + return ctx->afhi._name; \ + } +/** \cond MP_AFHI */ +MP_AFHI(bitrate) +MP_AFHI(frequency) +MP_AFHI(channels) +/** \endcond */ + +/** + * Define a function which extracts and returns the value of a meta tag. + * + * \param _name The name of the tag (artist, title, ...). + * + * The function will return a pointer to memory owned by the audio file + * selector. On errors, or if the current audio file has no tag of the given + * name, the function returns the empty string. The caller must not attempt to + * free the returned string. + */ +#define MP_TAG(_name) \ + char *mp_ ## _name (struct mp_context *ctx) \ + { \ + int ret = get_afhi(ctx); \ + if (ret < 0) \ + return ""; \ + return ctx->afhi.tags._name; \ + } +/** \cond MP_TAG */ +MP_TAG(artist) +MP_TAG(title) +MP_TAG(album) +MP_TAG(comment) +/** \endcond */ + +/** + * Parse and return the value of the year tag. + * + * \param ctx See \ref mp_path(). + * + * \return If the year tag is not present, can not be parsed, or its value is + * less than zero, the function returns 0. If the value is less than 100, we + * add 1900. + */ +int64_t mp_year(struct mp_context *ctx) +{ + int64_t year; + int ret = get_afhi(ctx); + + if (ret < 0) + return 0; + assert(ctx->afhi.tags.year); + ret = para_atoi64(ctx->afhi.tags.year, &year); + if (ret < 0) + return 0; + if (year < 0) + return 0; + if (year < 100) + year += 1900; + return year; +} + +/* + * Ideally, these functions should be declared in a header file which is + * created by flex with the --header-file option. However, for flex-2.6.x + * (2017) this option is borken: if --reentrant is also given, the generated + * header file contains syntax errors. As a workaround we declare the functions + * here. + */ +/** \cond flex_workaround */ +int mp_yylex_init(mp_yyscan_t *yyscanner); +struct yy_buffer_state *mp_yy_scan_bytes(const char *buf, int len, + mp_yyscan_t yyscanner); +void mp_yy_delete_buffer(struct yy_buffer_state *bs, mp_yyscan_t yyscanner); +int mp_yylex_destroy(mp_yyscan_t yyscanner); +void mp_yyset_lineno(int lineno, mp_yyscan_t scanner); +/** \endcond */ + +/* Public API */ + +/** + * Initialize the mood parser. + * + * This allocates and sets up the internal structures of the mood parser + * and creates an abstract syntax tree from the given mood definition. + * It must be called before \ref mp_eval_row() can be called. + * + * The context pointer returned by this function may be passed to \ref + * mp_eval_row() to determine whether an audio file is admissible. + * + * \param definition A reference to the mood definition. + * \param nbytes The size of the mood definition. + * \param result Opaque context pointer is returned here. + * \param errmsg Optional error message is returned here. + * + * It's OK to pass a NULL pointer or a zero sized buffer as the mood + * definition. This corresponds to the "dummy" mood for which all audio files + * are admissible. + * + * The error message pointer may also be NULL in which case no error message + * is returned. Otherwise, the caller must free the returned string. + * + * \return Standard. On success *errmsg is set to NULL. + */ +int mp_init(const char *definition, int nbytes, struct mp_context **result, + char **errmsg) +{ + int ret; + mp_yyscan_t scanner; + struct mp_context *ctx; + struct yy_buffer_state *buffer_state; + + if (!definition || nbytes == 0) { /* dummy mood */ + if (errmsg) + *errmsg = NULL; + *result = NULL; + return 0; + } + ctx = para_calloc(sizeof(*ctx)); + ctx->errmsg = NULL; + ctx->ast = NULL; + + ret = mp_yylex_init(&scanner); + assert(ret == 0); + buffer_state = mp_yy_scan_bytes(definition, nbytes, scanner); + mp_yyset_lineno(1, scanner); + PARA_NOTICE_LOG("creating abstract syntax tree\n"); + ret = mp_yyparse(ctx, &ctx->ast, scanner); + mp_yy_delete_buffer(buffer_state, scanner); + mp_yylex_destroy(scanner); + if (ctx->errmsg) { /* parse error */ + if (errmsg) + *errmsg = ctx->errmsg; + else + free(ctx->errmsg); + free(ctx); + return -E_MOOD_PARSE; + } + if (errmsg) + *errmsg = NULL; + *result = ctx; + return 1; +} + +/** + * Determine whether the given audio file is admissible. + * + * \param aft_row The audio file to check for admissibility. + * \param ctx As returned from \ref mp_init(). + * + * \return Whether the audio file is admissible. + * + * If the mood parser was set up without an input buffer (dummy mood), this + * function returns true (without looking at the audio file metadata) to + * indicate that the given audio file should be considered admissible. + * + * \sa \ref change_current_mood(), \ref mp_eval_ast(). + */ +bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx) +{ + if (!ctx) /* dummy mood */ + return true; + assert(aft_row); + ctx->aft_row = aft_row; + ctx->have_afsi = false; + ctx->have_afhi = false; + ctx->path = NULL; + return mp_eval_ast(ctx->ast, ctx); +} + +/** + * Deallocate the resources of a mood parser. + * + * This function frees the abstract syntax tree which was created by \ref + * mp_init(). + * + * \param ctx As returned from \ref mp_init(). + * + * It's OK to pass a NULL pointer, in which case the function does nothing. + */ +void mp_shutdown(struct mp_context *ctx) +{ + if (!ctx) + return; + mp_free_ast(ctx->ast); + free(ctx); +} diff --git a/mp.h b/mp.h new file mode 100644 index 00000000..93bbab3e --- /dev/null +++ b/mp.h @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** + * \file mp.h Internal mood parser API (backend). + * + * This header is included from the lexer, the parser, and from \ref mp.c, but + * not from \ref mood.c, the only user of the mood parser front end. It + * contains structures and function prototypes which are considered + * implementation details. + * + * There is one function for each keyword in the context-free grammar of the + * parser. These functions return the semantic value of the keyword. + * + * The functions declared here are defined either in mp.c or in mp.y. + */ + +/** Opaque, only known to mp.c. Passed to the generated mp_yyparse(). */ +struct mp_context; + +/** + * Since we use a reentrant lexer, all functions generated by flex(1) + * receive an additional argument of this type. + */ +typedef void *mp_yyscan_t; + +/** Parsed regex pattern. */ +struct mp_re_pattern { + regex_t preg; /**< Pre-compiled regex. **/ + unsigned flags; /**< Subset of the cflags described in regex(3). */ +}; + +/** Parsed wildcard pattern. */ +struct mp_wc_pattern { + char *pat; /**< Unescaped C string (without quotes and flags). */ + unsigned flags; /**< For modifying matching behaviour. */ +}; + +/** + * The possible values of a node in the abstract syntax tree (AST). + * + * Constant semantic values (string literals, numeric constants, wildcard and + * regex patterns which are part of the mood definition) are determined during + * \ref mp_init() while values which depend on the audio file (path, bitrate, + * etc.) are determined during mp_eval_row(). + * + * This union, and the \ref mp_ast_node structure below are used extensively in + * mp.y. However, both need to be public because the lexer must be able to + * create AST nodes for the constant semantic values. + */ +union mp_semantic_value { + bool boolval; /**< Comparators, =~ and =|. */ + char *strval; /**< String literals, tags, path. */ + int64_t intval; /**< Constants, bitrate, frequency, etc. */ + struct mp_wc_pattern wc_pattern; /**< Right-hand side operand of =|. */ + struct mp_re_pattern re_pattern; /**< Right-hand side operand of =~. */ +}; + +/** + * Describes one node of the abstract syntax tree. + * + * A node is either interior or a leaf node. Interior nodes have at least one + * child while leaf nodes have a semantic value and no children. + * + * Examples: (a) STRING_LITERAL has a semantic value (the unescaped string + * literal) and no children, (b) NEG (unary minus) has no semantic value but + * one child (the numeric expression that is to be negated), (c) LESS_OR_EQUAL + * has no semantic value and two children (the two numeric expressions being + * compared). + */ +struct mp_ast_node { + /** Corresponds to a token type, for example LESS_OR_EQUAL. */ + int id; + union { + /** Pointers to the child nodes (interior nodes only). */ + struct mp_ast_node **children; + /** Leaf nodes only. */ + union mp_semantic_value sv; + }; + /** + * The number of children is implicitly given by the id, but we include + * it here to avoid having to maintain a lookup table. The AST is + * usually small, so we can afford to waste a byte per node. + */ + uint8_t num_children; +}; + +/* Called from both the lexer and the parser. */ +__printf_3_4 void mp_parse_error(int line, struct mp_context *ctx, + const char *fmt, ...); + +/* Helper functions for the lexer. */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result); +int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result); +void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result); + +/* + * The functions below are implemented in mp.y. They are documented here + * because mp.y is not doxyfied. + */ + +/** + * Allocate a new leaf node for the abstract syntax tree. + * + * \param id Initial value for the ->id field of the new node + * + * \return Pointer to a node whose ->num_children field is initialized to zero. + * The caller is expected to initialize the ->sv field. + */ +struct mp_ast_node *mp_new_ast_leaf_node(int id); + +/** + * Evaluate an abstract syntax tree, starting at the root node. + * + * \param root As returned from \ref mp_init() via the context pointer. + * \param ctx Contains the aft row to evaluate. + * + * \return True if the AST evaluates to true, a non-empty string, or a + * non-zero number. False otherwise. + * + * \sa mp_eval_row(). + */ +bool mp_eval_ast(struct mp_ast_node *root, struct mp_context *ctx); + +/** + * Deallocate an abstract syntax tree. + * + * This frees the memory occupied by the nodes of the AST, the child pointers + * of the internal nodes and the (constant) semantic values of the leaf nodes + * (string literals, unescaped wildcard patterns and pre-compiled regular + * expressions). + * + * \param root It's OK to pass NULL here. + */ +void mp_free_ast(struct mp_ast_node *root); + +/* Helper functions for the parser. */ +bool mp_is_set(const char *attr, struct mp_context *ctx); +char *mp_path(struct mp_context *ctx); +int64_t mp_year(struct mp_context *ctx); +int64_t mp_num_attributes_set(struct mp_context *ctx); + +/* Generated with MP_AFSI() */ +/** \cond MP_AFSI */ +int64_t mp_num_played(struct mp_context *ctx); +int64_t mp_image_id(struct mp_context *ctx); +int64_t mp_lyrics_id(struct mp_context *ctx); +/** \endcond */ + +/* Generated with MP_AFHI() */ +/** \cond MP_AFHI */ +int64_t mp_bitrate(struct mp_context *ctx); +int64_t mp_frequency(struct mp_context *ctx); +int64_t mp_channels(struct mp_context *ctx); +/** \endcond */ + +/* Generated with MP_TAG() */ +/** \cond MP_TAG */ +char *mp_artist(struct mp_context *ctx); +char *mp_title(struct mp_context *ctx); +char *mp_album(struct mp_context *ctx); +char *mp_comment(struct mp_context *ctx); +/** \endcond */ diff --git a/web/manual.md b/web/manual.md index 5e411742..fb3a05f7 100644 --- a/web/manual.md +++ b/web/manual.md @@ -338,6 +338,11 @@ libgcrypt are usually shipped with the distro, but you might have to install the development package (`libssl-dev` or `libgcrypt-dev` on debian systems) as well. +- [flex](https://github.com/westes/flex) and +[bison](https://www.gnu.org/software/bison) are needed to build the +mood parser of para_server. The build system will skip para_server +if these tools are not installed. + - [libmad](http://www.underbit.com/products/mad/). To compile in MP3 support for paraslash, the development package must be installed. It is called `libmad0-dev` on debian-based systems. Note that libmad is @@ -976,124 +981,140 @@ the score table (but not from the playlist).

Moods

-A mood consists of a unique name and its *mood definition*, which is -a set of *mood lines* containing expressions in terms of attributes -and other data contained in the database. - -At any time at most one mood can be *active* which means that -para_server is going to select only files from that subset of -admissible files. - -So in order to create a mood definition one has to write a set of -mood lines. Mood lines come in three flavours: Accept lines, deny -lines and score lines. - -The general syntax of the three types of mood lines is - - - accept [with score ] [if] [not] [options] - deny [with score ] [if] [not] [options] - score [if] [not] [options] - - -Here is either an integer or the string "random" which assigns -a random score to all matching files. The score value changes the -order in which admissible files are going to be selected, but is of -minor importance for this introduction. - -So we concentrate on the first two forms, i.e. accept and deny -lines. As usual, everything in square brackets is optional, i.e. -accept/deny lines take the following form when ignoring scores: - - accept [if] [not] [options] - -and analogously for the deny case. The "if" keyword is only syntactic -sugar and has no function. The "not" keyword just inverts the result, -so the essence of a mood line is the mood method part and the options -following thereafter. - -A *mood method* is realized as a function which takes an audio file -and computes a number from the data contained in the database. -If this number is non-negative, we say the file *matches* the mood -method. The file matches the full mood line if it either - - - matches the mood method and the "not" keyword is not given, -or - - does not match the mood method, but the "not" keyword is given. - -The set of admissible files for the whole mood is now defined as those -files which match at least one accept mood line, but no deny mood line. -More formally, an audio file F is admissible if and only if - - (F ~ AL1 or F ~ AL2...) and not (F ~ DL1 or F ~ DN2 ...) - -where AL1, AL2... are the accept lines, DL1, DL2... are the deny -lines and "~" means "matches". - -The cases where no mood lines of accept/deny type are defined need -special treatment: - - - Neither accept nor deny lines: This treats all files as - admissible (in fact, that is the definition of the dummy mood - which is activated automatically if no moods are available). - - - Only accept lines: A file is admissible iff it matches at - least one accept line: - - F ~ AL1 or F ~ AL2 or ... - - - Only deny lines: A file is admissible iff it matches no - deny line: - - not (F ~ DL1 or F ~ DN2 ...) - - - -

List of mood_methods

- - no_attributes_set - -Takes no arguments and matches an audio file if and only if no -attributes are set. - - is_set - -Takes the name of an attribute and matches iff that attribute is set. - - path_matches - -Takes a filename pattern and matches iff the path of the audio file -matches the pattern. - - artist_matches - album_matches - title_matches - comment_matches - -Takes an extended regular expression and matches iff the text of the -corresponding tag of the audio file matches the pattern. If the tag -is not set, the empty string is matched against the pattern. - - year ~ - bitrate ~ - frequency ~ - channels ~ - num_played ~ - image_id ~ - lyrics_id ~ - -Takes a comparator ~ of the set {<, =, <=, >, >=, !=} and a number -. Matches an audio file iff the condition ~ is -satisfied where val is the corresponding value of the audio file -(value of the year tag, bitrate in kbit/s, etc.). - -The year tag is special as its value is undefined if the audio file -has no year tag or the content of the year tag is not a number. Such -audio files never match. Another difference is the special treatment -if the year tag is a two-digit number. In this case either 1900 or -2000 is added to the tag value, depending on whether the number is -greater than 2000 plus the current year. - +A mood consists of a unique name and a definition. The definition +is an expression which describes which audio files are considered +admissible. At any time at most one mood can be active, meaning +that para_server will only stream files which are admissible for the +active mood. + +The expression may refer to attributes and other metadata stored in +the database. Expressions may be combined by means of logical and +arithmetical operators in a natural way. Moreover, string matching +based on regular expression or wildcard patterns is supported. + +The set of admissible files is determined by applying the expression +to each audio file in turn. For a mood definition to be valid, its +expression must evaluate to a number, a string or a boolean value +("true" or "false"). For numbers, any value other than zero means the +file is admissible. For strings, any non-empty string indicates an +admissible file. For boolean values, true means admissible and false +means not admissible. As a special case, the empty expression treats +all files as admissible. + +

Mood grammar

+ +Expressions are based on a context-free grammar which distinguishes +between several types for syntactic units or groupings. The grammar +defines a set of keywords which have a type and a corresponding +semantic value, as shown in the following table. + +Keyword | Type | Semantic value +:--------------------|--------:|:---------------------------------- +`path` | string | Full path of the current audio file +`artist` | string | Content of the artist meta tag +`title` | string | Content of the title meta tag +`album` | string | Content of the album meta tag +`comment` | string | Content of the somment meta tag +`num_attributes_set` | integer | Number of attributes which are set +`year` | integer | Content of the year meta tag [\*] +`num_played` | integer | How many times the file has been streamed +`image_id` | integer | The identifier of the (cover art) image +`lyrics_id` | integer | The identifier of the lyrics blob +`bitrate` | integer | The average bitrate +`frequency` | integer | The output sample rate +`channels` | integer | The number of channels +`is_set("foo")` | boolean | True if attribute "foo" is set. + +[\*] For most audio formats, the year tag is stored as a string. It +is converted to an integer by the mood parser. If the audio file +has no year tag or the content of the year tag is not a number, the +semantic value is zero. A special convention applies if the year tag +is a one-digit or a two-digit number. In this case 1900 is added to +the tag value. + +Expressions may be grouped using parentheses, logical and +arithmetical operators or string matching operators. The following +table lists the available operators. + +Token | Meaning +:------|:------- +`\|\|` | Logical Or +`&&` | Logical And +`!` | Logical Not +`==` | Equal (can be applied to all types) +`!=` | Not equal. Likewise +`<` | Less than +`<=` | Less or equal +`>=` | Greater or equal +`+` | Arithmetical minus +`-` | Binary/unary minus +`*` | Multiplication +`/` | Division +`=~` | Regular expression match +`=\|` | Filename match + +Besides integers, strings and booleans there is an additional type +which describes regular expression or wildcard patterns. Patterns +are not just strings because they also include a list of flags which +modify matching behaviour. + +Regular expression patterns are of the form `/pattern/[flags]`. That +is, the pattern is delimited by slashes, and is followed by zero or +more characters, each specifying a flag according to the following +table + +Flag | POSIX name | Meaning +:----|--------------:|-------- +`i` | `REG_ICASE` | Ignore case in match +`n` | `REG_NEWLINE` | Treat newline as an ordinary character + +Note that only extended regular expression patterns are supported. See +regex(3) for details. + +Wildcard patterns are similar, but the pattern must be delimited by +`'|'` characters rather than slashes. For wildcard patterns different +flags exist, as shown below. + +Flag | POSIX name | Meaning +:----|-----------------------:|-------- +`n` | `FNM_NOESCAPE` | Treat backslash as an ordinary character +`p` | `FNM_PATHNAME` | Match a slash only with a slash in pattern +`P` | `FNM_PERIOD` | Leading period has to be matched exactly +`l` | `FNM_LEADING_DIR` [\*] | Ignore "/\*" rest after successful matching +`i` | `FNM_CASEFOLD` [\*] | Ignore case in match +`e` | `FNM_EXTMATCH` [\*\*] | Enable extended pattern matching + +[\*] Not in POSIX, but both FreeBSD and NetBSD have it. + +[\*\*] GNU extension, silently ignored on non GNU systems. + +See fnmatch(3) for details. + +Mood definitions may contain arbitrary whitespace and comments. +A comment is a word beginning with #. This word and all remaining +characters of the line are ignored. + +

Example moods

+ +* Files with no/invalid year tag: `year == 0` + +* Only oldies: `year != 0 && year < 1980` + +* Only 80's Rock or Metal: `(year >= 1980 && year < 1990) && + (is_set("rock") || is_set("metal"))` + +* Files with incomplete tags: `artist == "" || title == "" || album = +"" || comment == "" || year == 0` + +* Files with no attributes defined so far: `num_attributes_set == 0` + +* Only newly added files: `num_played == 0` + +* Only poor quality files: `bitrate < 96` + +* Cope with different spellings of Motörhead: `artist =~ /mot(ö|oe{0,1})rhead/i` + +* The same with extended wildcard patterns: `artist =| |mot+(o\|oe\|ö)rhead|ie`

Mood usage

@@ -1122,27 +1143,6 @@ if the "-a" switch is given: para ls -a - -

Example mood definition

- -Suppose you have defined attributes "punk" and "rock" and want to define -a mood containing only Punk-Rock songs. That is, an audio file should be -admissible if and only if both attributes are set. Since - - punk and rock - -is obviously the same as - - not (not punk or not rock) - -(de Morgan's rule), a mood definition that selects only Punk-Rock -songs is - - deny if not is_set punk - deny if not is_set rock - - - File renames and content changes -------------------------------- diff --git a/yy/makefile b/yy/makefile new file mode 100644 index 00000000..ed70d655 --- /dev/null +++ b/yy/makefile @@ -0,0 +1,17 @@ +.PRECIOUS: $(yy_build_dir)/%.flex.c $(yy_build_dir)/%.bison.c \ + $(yy_build_dir)/%.bison.h + +$(yy_build_dir)/%.flex.c: $(yy_src_dir)/%.lex | $(yy_build_dir) + @[ -z "$(Q)" ] || echo 'FLEX $<' + @$(FLEX) -o $@ $< + +$(yy_build_dir)/%.bison.c $(yy_build_dir)/%.bison.h: $(yy_src_dir)/%.y \ + | $(yy_build_dir) + @[ -z "$(Q)" ] || echo 'BISON $<' + @$(BISON) --defines=$(yy_build_dir)/$(notdir $(<:.y=.bison.h)) \ + --output=$(yy_build_dir)/$(notdir $(<:.y=.bison.c)) $< + +$(object_dir)/%.o: $(yy_build_dir)/%.c | $(object_dir) + @[ -z "$(Q)" ] || echo 'CC $<' + @$(Q) $(CC) -g -c -o $@ $(CPPFLAGS) -MMD -MF $(dep_dir)/$(*F).d \ + -MT $@ -iquote . -Wno-unused-macros $< diff --git a/yy/mp.lex b/yy/mp.lex new file mode 100644 index 00000000..1e06b8dc --- /dev/null +++ b/yy/mp.lex @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + + /* + * Since we do not supply yywrap(), we use noyywrap to instruct the scanner to + * behave as though yywrap() returned 1. + */ +%option noyywrap + + /* + * We don't want symbols to clash with those of other flex users, particularly + * lopsub. + */ +%option prefix="mp_yy" + + /* + * Generate a scanner that maintains the number of the current line read from + * its input in the yylineno variable. + */ +%option yylineno + + /* Generate a bison-compatible scanner. */ +%option bison-bridge bison-locations + + /* + * Warn (in particular) if the default rule can be matched but no default rule + * has been given. + */ +%option warn + + /* + * Generate a scanner which is portable and safe to use in one or more threads + * of control. + */ +%option reentrant + + /* + * Generate a scanner which always looks one extra character ahead. This is a + * bit faster than an interactive scanner for which look ahead happens only + * when necessary. + */ +%option never-interactive + +%{ +#include +#include "para.h" +#include "string.h" +#include "mp.h" +#include "error.h" + +#define YYSTYPE MP_YYSTYPE +#define YYLTYPE MP_YYLTYPE +#define YY_DECL int mp_yylex(MP_YYSTYPE *yylval_param, MP_YYLTYPE *yylloc_param, \ + struct mp_context *ctx, struct mp_ast_node **ast, mp_yyscan_t yyscanner) +#include "mp.bison.h" +#define MP_YY_USER_ACTION do {mp_yylloc->first_line = mp_yylineno;} while (0); +%} +DECIMAL_CONSTANT (0|([[:digit:]]{-}[0])[[:digit:]]*) +STRING_LITERAL \"([^\"\\\n]|(\\[\"\\abfnrtv]))*\" +REGEX_PATTERN \/([^\/\\\n]|(\\[\/\\abfnrtv]))*\/([in])* +WILDCARD_PATTERN \|([^\|\\\n]|(\\[\|\\abfnrtv]))*\|([npPlie])* +%% + +is_set {return IS_SET;} +num_attributes_set {return NUM_ATTRIBUTES_SET;} +path {return PATH;} +artist {return ARTIST;} +title {return TITLE;} +album {return ALBUM;} +comment {return COMMENT;} +year {return YEAR;} +num_played {return NUM_PLAYED;} +image_id {return IMAGE_ID;} +lyrics_id {return LYRICS_ID;} +bitrate {return BITRATE;} +frequency {return FREQUENCY;} +channels {return CHANNELS;} +true {return TRUE;} +false {return FALSE;} + +[[:space:]]+|#.*\n /* skip comments and whitespace */ + +"("|")"|","|"+"|"-"|"*"|"/"|"<"|">" {return yytext[0];} + +"||" {return OR;} +"&&" {return AND;} +"!" {return NOT;} +"==" {return EQUAL;} +"!=" {return NOT_EQUAL;} +"<=" {return LESS_OR_EQUAL;} +">=" {return GREATER_OR_EQUAL;} +"=~" {return REGEX_MATCH;} +"=|" {return FILENAME_MATCH;} + +{DECIMAL_CONSTANT} { + int ret; + yylval->node = mp_new_ast_leaf_node(NUM); + ret = para_atoi64(yytext, &yylval->node->sv.intval); + if (ret < 0) { + free(yylval->node); + mp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + para_strerror(-ret)); + return -E_MOOD_PARSE; + } + return NUM; +} + +{STRING_LITERAL} { + yylval->node = mp_new_ast_leaf_node(STRING_LITERAL); + parse_quoted_string(yytext, "\"\"", &yylval->node->sv.strval); + //PARA_CRIT_LOG("strval: %s\n", yylval->node->sv.strval); + //PARA_CRIT_LOG("node: %p\n", yylval->node); + return STRING_LITERAL; +} + +{REGEX_PATTERN} { + int ret; + yylval->node = mp_new_ast_leaf_node(REGEX_PATTERN); + ret = mp_parse_regex_pattern(yytext, &yylval->node->sv.re_pattern); + if (ret < 0) { + mp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + para_strerror(-ret)); + return -E_MOOD_PARSE; + } + return REGEX_PATTERN; +} + +{WILDCARD_PATTERN} { + yylval->node = mp_new_ast_leaf_node(WILDCARD_PATTERN); + mp_parse_wildcard_pattern(yytext, &yylval->node->sv.wc_pattern); + return WILDCARD_PATTERN; +} + +. { + mp_parse_error(yylloc->first_line, ctx, "unrecognized text: %s", + yytext); + return -E_MOOD_PARSE; +} diff --git a/yy/mp.y b/yy/mp.y new file mode 100644 index 00000000..82ef5140 --- /dev/null +++ b/yy/mp.y @@ -0,0 +1,419 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/* + * Provide more verbose and specific error messages instead of just "syntax + * error". + */ +%define parse.error verbose + +/* + * Verbose error messages may contain incorrect information if LAC (Lookahead + * Correction) is not enabled. + */ +%define parse.lac full + +/* Avoid symbol clashes (lopsub might also expose yy* symbols). */ +%define api.prefix {mp_yy} + +/* + * Although locations are automatically enabled as soon as the grammar uses the + * special @N tokens, specifying %locations explicitly allows for more accurate + * syntax error messages. + */ +%locations + +/* + * Generate a pure (reentrant) parser. With this option enabled, yylval and + * yylloc become local variables in yyparse(), and a different calling + * convention is used for yylex(). + */ +%define api.pure full + +/* Additional arguments to yylex(), yyparse() and yyerror() */ +%param {struct mp_context *ctx} +%param {struct mp_ast_node **ast} +%param {mp_yyscan_t yyscanner} /* reentrant lexers */ + +%{ +#include +#include + +#include "para.h" +#include "string.h" +#include "mp.h" +#include "mp.bison.h" +#include "error.h" + +int yylex(MP_YYSTYPE *lvalp, MP_YYLTYPE *llocp, struct mp_context *ctx, + struct mp_ast_node **ast, mp_yyscan_t yyscanner); +static void yyerror(YYLTYPE *llocp, struct mp_context *ctx, + struct mp_ast_node **ast, mp_yyscan_t yyscanner, const char *msg); + +enum semantic_types { + ST_STRVAL, + ST_INTVAL, + ST_BOOLVAL, + ST_REGEX_PATTERN, + ST_WC_PATTERN +}; + +static struct mp_ast_node *ast_node_raw(int id) +{ + struct mp_ast_node *node = para_malloc(sizeof(struct mp_ast_node)); + node->id = id; + return node; +} + +/* This is non-static because it is also called from the lexer. */ +struct mp_ast_node *mp_new_ast_leaf_node(int id) +{ + struct mp_ast_node *node = ast_node_raw(id); + node->num_children = 0; + return node; +} + +static struct mp_ast_node *ast_node_new_unary(int id, struct mp_ast_node *child) +{ + struct mp_ast_node *node = ast_node_raw(id); + node->num_children = 1; + node->children = para_malloc(sizeof(struct mp_ast_node *)); + node->children[0] = child; + return node; +} + +static struct mp_ast_node *ast_node_new_binary(int id, struct mp_ast_node *left, + struct mp_ast_node *right) +{ + struct mp_ast_node *node = ast_node_raw(id); + node->num_children = 2; + node->children = para_malloc(2 * sizeof(struct mp_ast_node *)); + node->children[0] = left; + node->children[1] = right; + return node; +} + +void mp_free_ast(struct mp_ast_node *root) +{ + if (!root) + return; + if (root->num_children > 0) { + int i; + for (i = 0; i < root->num_children; i++) + mp_free_ast(root->children[i]); + free(root->children); + } else { + union mp_semantic_value *sv = &root->sv; + switch (root->id) { + case STRING_LITERAL: + free(sv->strval); + break; + case REGEX_PATTERN: + regfree(&sv->re_pattern.preg); + break; + case WILDCARD_PATTERN: + free(sv->wc_pattern.pat); + break; + } + } + free(root); +} + +static int eval_node(struct mp_ast_node *node, struct mp_context *ctx, + union mp_semantic_value *result); + +static void eval_binary_op(struct mp_ast_node *node, struct mp_context *ctx, + union mp_semantic_value *v1, union mp_semantic_value *v2) +{ + eval_node(node->children[0], ctx, v1); + eval_node(node->children[1], ctx, v2); +} + +static int eval_node(struct mp_ast_node *node, struct mp_context *ctx, + union mp_semantic_value *result) +{ + int ret; + char *arg; + union mp_semantic_value v1, v2; + + switch (node->id) { + /* strings */ + case STRING_LITERAL: + result->strval = node->sv.strval; + return ST_STRVAL; + case PATH: + result->strval = mp_path(ctx); + return ST_STRVAL; + case ARTIST: + result->strval = mp_artist(ctx); + return ST_STRVAL; + case TITLE: + result->strval = mp_title(ctx); + return ST_STRVAL; + case ALBUM: + result->strval = mp_album(ctx); + return ST_STRVAL; + case COMMENT: + result->strval = mp_comment(ctx); + return ST_STRVAL; + /* integers */ + case NUM: + result->intval = node->sv.intval; + return ST_INTVAL; + case '+': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval + v2.intval; + return ST_INTVAL; + case '-': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval - v2.intval; + return ST_INTVAL; + case '*': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval * v2.intval; + return ST_INTVAL; + case '/': + eval_binary_op(node, ctx, &v1, &v2); + if (v2.intval == 0) { + static bool warned; + if (!warned) + PARA_ERROR_LOG("division by zero\n"); + warned = true; + result->intval = 0; + } else + result->intval = v1.intval / v2.intval; + return ST_INTVAL; + case NEG: + eval_node(node->children[0], ctx, &v1); + result->intval = -v1.intval; + return ST_INTVAL; + case YEAR: + result->intval = mp_year(ctx); + return ST_INTVAL; + case NUM_ATTRIBUTES_SET: + result->intval = mp_num_attributes_set(ctx); + return ST_INTVAL; + case NUM_PLAYED: + result->intval = mp_num_played(ctx); + return ST_INTVAL; + case IMAGE_ID: + result->intval = mp_image_id(ctx); + return ST_INTVAL; + case LYRICS_ID: + result->intval = mp_lyrics_id(ctx); + return ST_INTVAL; + case BITRATE: + result->intval = mp_bitrate(ctx); + return ST_INTVAL; + case FREQUENCY: + result->intval = mp_frequency(ctx); + return ST_INTVAL; + case CHANNELS: + result->intval= mp_channels(ctx); + return ST_INTVAL; + /* bools */ + case IS_SET: + arg = node->children[0]->sv.strval; + result->boolval = mp_is_set(arg, ctx); + return ST_BOOLVAL; + case TRUE: + result->boolval = true; + return ST_BOOLVAL; + case FALSE: + result->boolval = false; + return ST_BOOLVAL; + case OR: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval || v2.boolval; + return ST_BOOLVAL; + case AND: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval && v2.boolval; + return ST_BOOLVAL; + case NOT: + eval_node(node->children[0], ctx, &v1); + result->boolval = !v1.boolval; + return ST_BOOLVAL; + case EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = !strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval == v2.intval; + return ST_BOOLVAL; + case NOT_EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval != v2.intval; + return ST_BOOLVAL; + case '<': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval < v2.intval; + return ST_BOOLVAL; + case '>': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval > v2.intval; + return ST_BOOLVAL; + case LESS_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval <= v2.intval; + return ST_BOOLVAL; + case GREATER_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval >= v2.intval; + return ST_BOOLVAL; + case FILENAME_MATCH: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = fnmatch(v2.wc_pattern.pat, v1.strval, + v2.wc_pattern.flags) == 0; + return ST_BOOLVAL; + case REGEX_MATCH: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = regexec(&v2.re_pattern.preg, v1.strval, + 0, NULL, 0) == 0; + return ST_BOOLVAL; + case REGEX_PATTERN: + result->re_pattern = node->sv.re_pattern; + return ST_REGEX_PATTERN; + case WILDCARD_PATTERN: + result->wc_pattern = node->sv.wc_pattern; + return ST_WC_PATTERN; + default: + PARA_EMERG_LOG("bug: invalid node id %d\n", node->id); + exit(EXIT_FAILURE); + } +} + +bool mp_eval_ast(struct mp_ast_node *root, struct mp_context *ctx) +{ + union mp_semantic_value v; + int ret = eval_node(root, ctx, &v); + + if (ret == ST_INTVAL) + return v.intval != 0; + if (ret == ST_STRVAL) + return v.strval[0] != 0; + if (ret == ST_BOOLVAL) + return v.boolval; + assert(false); +} + +%} + +%union { + struct mp_ast_node *node; +} + +/* terminals */ +%token NUM +%token STRING_LITERAL +%token REGEX_PATTERN +%token WILDCARD_PATTERN + +/* keywords with semantic value */ +%token PATH +%token ARTIST +%token TITLE +%token ALBUM +%token COMMENT +%token YEAR +%token NUM_ATTRIBUTES_SET +%token NUM_PLAYED +%token IMAGE_ID +%token LYRICS_ID +%token BITRATE +%token FREQUENCY +%token CHANNELS +%token FALSE TRUE + +/* keywords without semantic value */ +%token IS_SET + +/* operators, ordered by precendence */ +%left OR +%left AND +%left EQUAL NOT_EQUAL +%left LESS_THAN LESS_OR_EQUAL GREATER_OR_EQUAL REGEX_MATCH FILENAME_MATCH +%left '-' '+' +%left '*' '/' +%right NOT NEG /* negation (unary minus) */ + +/* nonterminals */ +%type string +%type exp +%type boolexp + +%% + +program: + /* empty */ {*ast = NULL; return 0;} + | string {*ast = $1; return 0;} + | exp {*ast = $1; return 0;} + | boolexp {*ast = $1; return 0;} + +string: STRING_LITERAL {$$ = $1;} + | PATH {$$ = mp_new_ast_leaf_node(PATH);} + | ARTIST {$$ = mp_new_ast_leaf_node(ARTIST);} + | TITLE {$$ = mp_new_ast_leaf_node(TITLE);} + | ALBUM {$$ = mp_new_ast_leaf_node(ALBUM);} + | COMMENT {$$ = mp_new_ast_leaf_node(COMMENT);} +; + +exp: NUM {$$ = $1;} + | exp '+' exp {$$ = ast_node_new_binary('+', $1, $3);} + | exp '-' exp {$$ = ast_node_new_binary('-', $1, $3);} + | exp '*' exp {$$ = ast_node_new_binary('*', $1, $3);} + | exp '/' exp {$$ = ast_node_new_binary('/', $1, $3);} + | '-' exp %prec NEG {$$ = ast_node_new_unary(NEG, $2);} + | '(' exp ')' {$$ = $2;} + | YEAR {$$ = mp_new_ast_leaf_node(YEAR);} + | NUM_ATTRIBUTES_SET {$$ = mp_new_ast_leaf_node(NUM_ATTRIBUTES_SET);} + | NUM_PLAYED {$$ = mp_new_ast_leaf_node(NUM_PLAYED);} + | IMAGE_ID {$$ = mp_new_ast_leaf_node(IMAGE_ID);} + | LYRICS_ID {$$ = mp_new_ast_leaf_node(LYRICS_ID);} + | BITRATE {$$ = mp_new_ast_leaf_node(BITRATE);} + | FREQUENCY {$$ = mp_new_ast_leaf_node(FREQUENCY);} + | CHANNELS {$$ = mp_new_ast_leaf_node(CHANNELS);} +; + +boolexp: IS_SET '(' STRING_LITERAL ')' {$$ = ast_node_new_unary(IS_SET, $3);} + | TRUE {$$ = mp_new_ast_leaf_node(TRUE);} + | FALSE {$$ = mp_new_ast_leaf_node(FALSE);} + | '(' boolexp ')' {$$ = $2;} + | boolexp OR boolexp {$$ = ast_node_new_binary(OR, $1, $3);} + | boolexp AND boolexp {$$ = ast_node_new_binary(AND, $1, $3);} + | NOT boolexp {$$ = ast_node_new_unary(NOT, $2);} + | exp EQUAL exp {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | exp NOT_EQUAL exp {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} + | exp '<' exp {$$ = ast_node_new_binary('<', $1, $3);} + | exp '>' exp {$$ = ast_node_new_binary('>', $1, $3);} + | exp LESS_OR_EQUAL exp { + $$ = ast_node_new_binary(LESS_OR_EQUAL, $1, $3); + } + | exp GREATER_OR_EQUAL exp { + $$ = ast_node_new_binary(GREATER_OR_EQUAL, $1, $3); + } + | string REGEX_MATCH REGEX_PATTERN { + $$ = ast_node_new_binary(REGEX_MATCH, $1, $3); + } + | string FILENAME_MATCH WILDCARD_PATTERN { + $$ = ast_node_new_binary(FILENAME_MATCH, $1, $3); + } + | string EQUAL string {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | string NOT_EQUAL string {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} +; +%% + +/* Called by yyparse() on error */ +static void yyerror(YYLTYPE *llocp, struct mp_context *ctx, + struct mp_ast_node **ast, mp_yyscan_t yyscanner, const char *msg) +{ + mp_parse_error(llocp->first_line, ctx, "%s", msg); +}