1 /* Copyright (C) 2017 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
4 * \file mp.c Mood parser helper functions.
6 * This file contains the public and the private API of the flex/bison based
9 * The public API (at the bottom of the file) allows parsing the same mood
10 * definition many times in an efficient manner.
12 * The first function to call is \ref mp_init(), which analyzes the given mood
13 * definition syntactically. It returns the abstract syntax tree of the mood
14 * definition and pre-compiles all regular expression patterns to make later
15 * pattern matching efficient.
17 * Semantic analysis is performed in \ref mp_eval_row(). This function is
18 * called from \ref mood.c once for each file in the audio file table. It
19 * utilizes the abstract syntax tree and the pre-compiled regular expressions
20 * to determine the set of admissible audio files.
22 * If the mood is no longer needed, \ref mp_shutdown() should be called to free
25 * The internal API is described in \ref mp.h.
45 struct mp_ast_node *ast;
46 /* per audio file context */
47 const struct osl_row *aft_row;
56 * Parse a (generalized) string literal.
58 * \param src The string to parse.
59 * \param quote_chars Opening and closing quote characters.
60 * \param result The corresponding C string is returned here.
62 * This function turns a generalized C99 string literal like "xyz\n" into a C
63 * string (containing the three characters 'x', 'y' and 'z', followed by a
64 * newline character and the terminating zero byte). The function receives
65 * quote characters as an argument so that, for example, regular expression
66 * patterns enclosed in '/' can be parsed as well. To parse a proper string
67 * literal, one has to pass two double quotes as the second argument.
69 * The function strips off the opening and leading quote characters, replaces
70 * double backslashes by single backslashes and handles the usual escapes like
73 * The caller must make sure that the input is well-formed. The function simply
74 * aborts if the input is not a valid C99 string literal (modulo the quote
77 * \return Offset of the first character after the closing quote. For proper
78 * string literals this will be the terminating zero byte of the input string,
79 * for regular expression patterns it is the beginning of the flags which
80 * modify the matching behaviour.
82 * \sa \ref mp_parse_regex_pattern(), \ref mp_parse_wildcard_pattern().
84 unsigned parse_quoted_string(const char *src, const char quote_chars[2],
87 size_t n, len = strlen(src);
92 assert(src[0] == quote_chars[0]);
93 p = dst = alloc(len - 1);
104 if (c == quote_chars[1])
109 if (c == quote_chars[1])
110 *p++ = quote_chars[1];
112 case '\\': *p++ = '\\'; break;
113 case 'a': *p++ = '\a'; break;
114 case 'b': *p++ = '\b'; break;
115 case 'f': *p++ = '\f'; break;
116 case 'n': *p++ = '\n'; break;
117 case 'r': *p++ = '\r'; break;
118 case 't': *p++ = '\t'; break;
119 case 'v': *p++ = '\v'; break;
120 default: assert(false);
124 assert(src[n] == quote_chars[1]);
131 * Parse and compile an extended regular expression pattern, including flags.
133 * \param src The pattern to parse.
134 * \param result C-string and flags are returned here.
136 * A regex pattern is identical to a C99 string literal except (a) it is
137 * enclosed in '/' characters rather than double quotes, (b) double quote
138 * characters which are part of the pattern do not need to be quoted with
139 * backslashes, but slashes must be quoted in this way, and (c) the closing
140 * slash may be followed by one or more flag characters which modify the
141 * matching behaviour.
143 * The only flags which are currently supported are 'i' to ignore case in match
144 * (REG_ICASE) and 'n' to change the handling of newline characters
147 * \return Standard. This function calls \ref parse_quoted_string(), hence it
148 * aborts if the input string is malformed. However, errors from \ref
149 * para_regcomp are returned without aborting the process. The rationale behind
150 * this difference is that passing a malformed string must be considered an
151 * implementation bug because malformed strings should be rejected earlier by
154 * \sa \ref mp_parse_wildcard_pattern(), \ref parse_quoted_string(),
155 * \ref para_regcomp(), regex(3).
157 int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result)
161 unsigned n = parse_quoted_string(src, "//", &pat);
164 for (; src[n]; n++) {
166 case 'i': result->flags |= REG_ICASE; break;
167 case 'n': result->flags |= REG_NEWLINE; break;
168 default: assert(false);
171 ret = para_regcomp(&result->preg, pat, result->flags);
177 * Parse a wildcard pattern, including flags.
179 * \param src The pattern to parse.
180 * \param result C-string and flags are returned here.
182 * This function parses a shell wildcard pattern. It is similar to \ref
183 * mp_parse_regex_pattern(), so the remarks mentioned there apply to this
186 * Wildcard patterns differ from regular expression patterns in that (a) they
187 * must be enclosed in '|' characters, (b) they support different flags for
188 * modifying matching behaviour, and (c) there is no cache for them.
190 * The following flags, whose meaning is explained in fnmatch(3), are currently
191 * supported: 'n' (FNM_NOESCAPE), 'p' (FNM_PATHNAME), 'P' (FNM_PERIOD), 'l'
192 * (FNM_LEADING_DIR), 'i' (FNM_CASEFOLD), 'e' (FNM_EXTMATCH). The last flag is
193 * a GNU extension. It is silently ignored on non GNU systems.
195 * \sa \ref parse_quoted_string(), \ref mp_parse_regex_pattern(), fnmatch(3).
197 void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result)
199 unsigned n = parse_quoted_string(src, "||", &result->pat);
202 for (; src[n]; n++) {
204 case 'n': result->flags |= FNM_NOESCAPE; break;
205 case 'p': result->flags |= FNM_PATHNAME; break;
206 case 'P': result->flags |= FNM_PERIOD; break;
207 /* not POSIX, but both FreeBSD and NetBSD have it */
208 case 'l': result->flags |= FNM_LEADING_DIR; break;
209 case 'i': result->flags |= FNM_CASEFOLD; break;
211 #ifdef HAVE_FNM_EXTMATCH
212 case 'e': result->flags |= FNM_EXTMATCH; break;
213 #else /* silently ignore extglob flag */
216 default: assert(false);
222 * Set the error bit in the parser context and log a message.
224 * \param line The number of the input line which caused the error.
225 * \param ctx Contains the error bit.
226 * \param fmt Usual format string.
228 * This is called if the lexer or the parser detect an error in the mood
229 * definition. Only the first error is logged (with a severity of "warn").
231 __printf_3_4 void mp_parse_error(int line, struct mp_context *ctx,
232 const char *fmt, ...)
237 if (ctx->errmsg) /* we already printed an error message */
240 xvasprintf(&tmp, fmt, ap);
242 xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
244 PARA_WARNING_LOG("%s\n", ctx->errmsg);
247 static int get_afsi(struct mp_context *ctx)
253 ret = get_afsi_of_row(ctx->aft_row, &ctx->afsi);
256 ctx->have_afsi = true;
260 static int get_afhi(struct mp_context *ctx)
266 ret = get_afhi_of_row(ctx->aft_row, &ctx->afhi);
269 ctx->have_afhi = true;
274 * Return the full path to the audio file.
276 * \param ctx Contains a reference to the row of the audio file table which
277 * corresponds to the current audio file. The path of the audio file, the
278 * afs_info and the afh_info structures (which contain the tag information) can
279 * be retrieved through this reference.
281 * \return A reference to the path. Must not be freed by the caller.
283 * \sa \ref get_audio_file_path_of_row().
285 char *mp_path(struct mp_context *ctx)
288 get_audio_file_path_of_row(ctx->aft_row, &ctx->path);
293 * Check whether the given attribute is set for the current audio file.
295 * \param attr The string to look up in the attribute table.
296 * \param ctx See \ref mp_path().
298 * First, determine the bit number which corresponds to the attribute, then
299 * check if this bit is set in the ->attributes field of the afs_info structure
302 * \return True if the attribute is set, false if it is not. On errors, for
303 * example if the given string is no attribute, the function returns false.
305 * \sa \ref get_attribute_bitnum_by_name().
307 bool mp_is_set(const char *attr, struct mp_context *ctx)
310 unsigned char bitnum;
311 const uint64_t one = 1;
313 ret = get_attribute_bitnum_by_name(attr, &bitnum);
314 if (ret < 0) /* treat invalid attributes as not set */
319 return (one << bitnum) & ctx->afsi.attributes;
323 * Count the number of attributes set.
325 * \param ctx See \ref mp_path().
327 * \return The number of bits which are set in the ->attributes field of the
328 * afs_info structure of the current audio file.
330 int64_t mp_num_attributes_set(struct mp_context *ctx)
332 const uint64_t m = ~(uint64_t)0;
340 v = ctx->afsi.attributes;
341 /* taken from https://graphics.stanford.edu/~seander/bithacks.html */
342 v = v - ((v >> 1) & m / 3);
343 v = (v & m / 15 * 3) + ((v >> 2) & m / 15 * 3);
344 v = (v + (v >> 4)) & m / 255 * 15;
345 v = (v * (m / 255)) >> 56;
351 * Define a function which returns a field of the afs_info structure.
353 * \param _name The name of the field.
355 * The defined function casts the value to int64_t. On errors, zero is returned.
357 #define MP_AFSI(_name) \
358 int64_t mp_ ## _name(struct mp_context *ctx) \
360 int ret = get_afsi(ctx); \
363 return ctx->afsi._name; \
372 * Define a function which returns a field of the afh_info structure.
374 * \param _name The name of the field.
376 * The defined function casts the value to int64_t. On errors, zero is returned.
378 #define MP_AFHI(_name) \
379 int64_t mp_ ## _name(struct mp_context *ctx) \
381 int ret = get_afhi(ctx); \
384 return ctx->afhi._name; \
393 * Return the duration of the audio file from the afh info structure.
395 * \param ctx See \ref mp_path().
397 * The duration is computed by multiplying the number of chunks and the
398 * duration of one chunk.
400 * \return The approximate number of milliseconds.
402 int64_t mp_duration(struct mp_context *ctx)
405 int ret = get_afhi(ctx);
409 tv_scale(ctx->afhi.chunks_total, &ctx->afhi.chunk_tv, &tmp);
414 * Define a function which extracts and returns the value of a meta tag.
416 * \param _name The name of the tag (artist, title, ...).
418 * The function will return a pointer to memory owned by the audio file
419 * selector. On errors, or if the current audio file has no tag of the given
420 * name, the function returns the empty string. The caller must not attempt to
421 * free the returned string.
423 #define MP_TAG(_name) \
424 char *mp_ ## _name (struct mp_context *ctx) \
426 int ret = get_afhi(ctx); \
429 return ctx->afhi.tags._name; \
439 * Parse and return the value of the year tag.
441 * \param ctx See \ref mp_path().
443 * \return If the year tag is not present, can not be parsed, or its value is
444 * less than zero, the function returns 0. If the value is less than 100, we
447 int64_t mp_year(struct mp_context *ctx)
450 int ret = get_afhi(ctx);
454 assert(ctx->afhi.tags.year);
455 ret = para_atoi64(ctx->afhi.tags.year, &year);
466 * Ideally, these functions should be declared in a header file which is
467 * created by flex with the --header-file option. However, for flex-2.6.x
468 * (2017) this option is borken: if --reentrant is also given, the generated
469 * header file contains syntax errors. As a workaround we declare the functions
472 /** \cond flex_workaround */
473 int mp_yylex_init(mp_yyscan_t *yyscanner);
474 struct yy_buffer_state *mp_yy_scan_bytes(const char *buf, int len,
475 mp_yyscan_t yyscanner);
476 void mp_yy_delete_buffer(struct yy_buffer_state *bs, mp_yyscan_t yyscanner);
477 int mp_yylex_destroy(mp_yyscan_t yyscanner);
478 void mp_yyset_lineno(int lineno, mp_yyscan_t scanner);
484 * Initialize the mood parser.
486 * This allocates and sets up the internal structures of the mood parser
487 * and creates an abstract syntax tree from the given mood definition.
488 * It must be called before \ref mp_eval_row() can be called.
490 * The context pointer returned by this function may be passed to \ref
491 * mp_eval_row() to determine whether an audio file is admissible.
493 * \param definition A reference to the mood definition.
494 * \param nbytes The size of the mood definition.
495 * \param result Opaque context pointer is returned here.
496 * \param errmsg Optional error message is returned here.
498 * It's OK to pass a NULL pointer or a zero sized buffer as the mood
499 * definition. This corresponds to the "dummy" mood for which all audio files
502 * The error message pointer may also be NULL in which case no error message
503 * is returned. Otherwise, the caller must free the returned string.
505 * \return Standard. On success *errmsg is set to NULL.
507 int mp_init(const char *definition, int nbytes, struct mp_context **result,
512 struct mp_context *ctx;
513 struct yy_buffer_state *buffer_state;
516 if (!definition || nbytes == 0) { /* dummy mood */
521 ctx = zalloc(sizeof(*ctx));
525 ret = mp_yylex_init(&scanner);
527 buffer_state = mp_yy_scan_bytes(definition, nbytes, scanner);
528 mp_yyset_lineno(1, scanner);
529 PARA_NOTICE_LOG("creating abstract syntax tree\n");
530 ret = mp_yyparse(ctx, &ctx->ast, scanner);
531 mp_yy_delete_buffer(buffer_state, scanner);
532 mp_yylex_destroy(scanner);
533 if (ctx->errmsg) { /* parse error */
534 mp_free_ast(ctx->ast);
536 *errmsg = ctx->errmsg;
540 return -E_MOOD_PARSE;
549 * Determine whether the given audio file is admissible.
551 * \param aft_row The audio file to check for admissibility.
552 * \param ctx As returned from \ref mp_init().
554 * \return Whether the audio file is admissible.
556 * If the mood parser was set up without an input buffer (dummy mood), this
557 * function returns true (without looking at the audio file metadata) to
558 * indicate that the given audio file should be considered admissible.
560 * \sa \ref change_current_mood(), \ref mp_eval_ast().
562 bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx)
564 if (!ctx) /* dummy mood */
566 if (!ctx->ast) /* empty mood */
569 ctx->aft_row = aft_row;
570 ctx->have_afsi = false;
571 ctx->have_afhi = false;
573 return mp_eval_ast(ctx->ast, ctx);
577 * Deallocate the resources of a mood parser.
579 * This function frees the abstract syntax tree which was created by \ref
582 * \param ctx As returned from \ref mp_init().
584 * It's OK to pass a NULL pointer, in which case the function does nothing.
586 void mp_shutdown(struct mp_context *ctx)
590 mp_free_ast(ctx->ast);