1 /* SPDX-License-Identifier: GPL-3.0-only */
15 struct txp_ast_node *ast;
19 * Set the error bit in the parser context and log a message.
21 * This is called if the lexer or the parser detect an error. Only the first
22 * error is logged (with a severity of "warn").
24 __attribute__ ((format (printf, 3, 4)))
25 void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
30 if (ctx->errmsg) /* we already printed an error message */
33 xvasprintf(&tmp, fmt, ap);
35 xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
37 WARNING_LOG("%s\n", ctx->errmsg);
41 * Parse a (generalized) string literal.
43 * This function turns the generalized C99 string literal given by src into a C
44 * string. For example, the string literal "xyz\n" is transformed into an
45 * array containing the three characters 'x', 'y' and 'z', followed by a
46 * newline character and the terminating zero byte. The function allows to
47 * specify different quote characters so that, for example, regular expression
48 * patterns enclosed in '/' can be parsed as well. To parse a proper string
49 * literal, one has to pass two double quotes as the second argument.
51 * The function strips off the opening and leading quote characters, replaces
52 * double backslashes by single backslashes and handles the usual escapes like
55 * The caller must make sure that the input is well-formed. The function simply
56 * aborts if the input is not a valid C99 string literal (modulo the quote
59 * The return value is the offset of the first character after the closing
60 * quote. For proper string literals this will be the terminating zero byte of
61 * the input string, for regular expression patterns it is the beginning of the
62 * flags which modify the matching behaviour.
64 unsigned parse_quoted_string(const char *src, const char quote_chars[2],
67 size_t n, len = strlen(src);
72 assert(src[0] == quote_chars[0]);
73 p = dst = xmalloc(len - 1);
84 if (c == quote_chars[1])
89 if (c == quote_chars[1])
90 *p++ = quote_chars[1];
92 case '\\': *p++ = '\\'; break;
93 case 'a': *p++ = '\a'; break;
94 case 'b': *p++ = '\b'; break;
95 case 'f': *p++ = '\f'; break;
96 case 'n': *p++ = '\n'; break;
97 case 'r': *p++ = '\r'; break;
98 case 't': *p++ = '\t'; break;
99 case 'v': *p++ = '\v'; break;
100 default: assert(false);
104 assert(src[n] == quote_chars[1]);
111 * Parse and compile an extended regular expression pattern, including flags.
113 * A regex pattern is identical to a C99 string literal except (a) it is
114 * enclosed in '/' characters rather than double quotes, (b) double quote
115 * characters which are part of the pattern do not need to be quoted with
116 * backslashes, but slashes must be quoted in this way, and (c) the closing
117 * slash may be followed by one or more flag characters which modify the
118 * matching behaviour.
120 * The only flags which are currently supported are 'i' to ignore case in match
121 * (REG_ICASE) and 'n' to change the handling of newline characters
124 * This function calls parse_quoted_string(), hence it aborts if the input
125 * string is malformed. However, errors from regcomp(3) are returned without
126 * aborting the process. The rationale behind this difference is that passing a
127 * malformed string must be considered an implementation bug because malformed
128 * strings should be rejected earlier by the lexer.
130 int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
134 unsigned n = parse_quoted_string(src, "//", &pat);
137 for (; src[n]; n++) {
139 case 'i': result->flags |= REG_ICASE; break;
140 case 'n': result->flags |= REG_NEWLINE; break;
141 default: assert(false);
144 ret = xregcomp(&result->preg, pat, result->flags);
149 static struct txp_ast_node *ast_node_raw(int id)
151 struct txp_ast_node *node = xmalloc(sizeof(*node));
157 * Allocate a new leaf node for the abstract syntax tree.
159 * This returns a pointer to a node whose ->num_children field is initialized
160 * to zero. The ->id field is initialized with the given id. The caller is
161 * expected to initialize the ->sv field.
163 * This has to be non-static because it is also called from the lexer.
165 struct txp_ast_node *txp_new_ast_leaf_node(int id)
167 struct txp_ast_node *node = ast_node_raw(id);
168 node->num_children = 0;
172 struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
174 struct txp_ast_node *node = ast_node_raw(id);
175 node->num_children = 1;
176 node->children = xmalloc(sizeof(struct txp_ast_node *));
177 node->children[0] = child;
181 struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
182 struct txp_ast_node *right)
184 struct txp_ast_node *node = ast_node_raw(id);
185 node->num_children = 2;
186 node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
187 node->children[0] = left;
188 node->children[1] = right;
193 * Deallocate an abstract syntax tree.
195 * This frees the memory occupied by the nodes of the AST, the child pointers
196 * of the internal nodes and the (constant) semantic values of the leaf nodes
197 * (string literals and pre-compiled regular expressions).
199 static void txp_free_ast(struct txp_ast_node *root)
203 if (root->num_children > 0) {
205 for (i = 0; i < root->num_children; i++)
206 txp_free_ast(root->children[i]);
207 free(root->children);
209 union txp_semantic_value *sv = &root->sv;
215 regfree(&sv->re_pattern.preg);
222 void txp_free(struct txp_context *ctx)
224 txp_free_ast(ctx->ast);
228 static int eval_node(const struct txp_ast_node *node,
229 const struct txp_context *ctx,
230 const struct epi_properties *props,
231 union txp_semantic_value *result);
233 static void eval_binary_op(const struct txp_ast_node *node,
234 const struct txp_context *ctx,
235 const struct epi_properties *props,
236 union txp_semantic_value *v1, union txp_semantic_value *v2)
238 eval_node(node->children[0], ctx, props, v1);
239 eval_node(node->children[1], ctx, props, v2);
242 static int eval_node(const struct txp_ast_node *node,
243 const struct txp_context *ctx,
244 const struct epi_properties *props,
245 union txp_semantic_value *result)
248 union txp_semantic_value v1, v2;
254 result->strval = node->sv.strval;
257 result->strval = epi_text(props);
261 result->intval = node->sv.intval;
264 eval_binary_op(node, ctx, props, &v1, &v2);
265 result->intval = v1.intval + v2.intval;
268 eval_binary_op(node, ctx, props, &v1, &v2);
269 result->intval = v1.intval - v2.intval;
272 eval_binary_op(node, ctx, props, &v1, &v2);
273 result->intval = v1.intval * v2.intval;
276 eval_binary_op(node, ctx, props, &v1, &v2);
277 if (v2.intval == 0) {
280 ERROR_LOG("division by zero\n");
284 result->intval = v1.intval / v2.intval;
287 eval_node(node->children[0], ctx, props, &v1);
288 result->intval = -v1.intval;
291 result->intval = epi_len(props);
295 eval_node(node->children[0], ctx, props, &v1);
296 result->boolval = epi_has_tag(node->children[0]->sv.strval,
300 result->boolval = true;
303 result->boolval = false;
306 eval_binary_op(node, ctx, props, &v1, &v2);
307 result->boolval = v1.boolval || v2.boolval;
310 eval_binary_op(node, ctx, props, &v1, &v2);
311 result->boolval = v1.boolval && v2.boolval;
314 eval_node(node->children[0], ctx, props, &v1);
315 result->boolval = !v1.boolval;
318 ret = eval_node(node->children[0], ctx, props, &v1);
319 eval_node(node->children[1], ctx, props, &v2);
320 if (ret == ST_STRVAL)
321 result->boolval = !strcmp(v1.strval, v2.strval);
323 result->boolval = v1.intval == v2.intval;
326 ret = eval_node(node->children[0], ctx, props, &v1);
327 eval_node(node->children[1], ctx, props, &v2);
328 if (ret == ST_STRVAL)
329 result->boolval = strcmp(v1.strval, v2.strval);
331 result->boolval = v1.intval != v2.intval;
334 eval_binary_op(node, ctx, props, &v1, &v2);
335 result->boolval = v1.intval < v2.intval;
338 eval_binary_op(node, ctx, props, &v1, &v2);
339 result->boolval = v1.intval > v2.intval;
342 eval_binary_op(node, ctx, props, &v1, &v2);
343 result->boolval = v1.intval <= v2.intval;
345 case GREATER_OR_EQUAL:
346 eval_binary_op(node, ctx, props, &v1, &v2);
347 result->boolval = v1.intval >= v2.intval;
350 eval_binary_op(node, ctx, props, &v1, &v2);
351 result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
355 result->re_pattern = node->sv.re_pattern;
356 return ST_REGEX_PATTERN;
358 EMERG_LOG("bug: invalid node id %d\n", node->id);
364 * Evaluate an abstract syntax tree, starting at the root node.
366 * The ctx argument should be the pointer that was returned from an earlier
367 * call to txp_init(). The cookie properties structure contains the information
370 * Returns true if the AST evaluates to true, a non-empty string, or a non-zero
371 * number, false otherwise.
373 bool txp_eval_ast(const struct txp_context *ctx,
374 const struct epi_properties *props)
376 union txp_semantic_value v;
381 ret = eval_node(ctx->ast, ctx, props, &v);
383 if (ret == ST_INTVAL)
384 return v.intval != 0;
385 if (ret == ST_STRVAL)
386 return v.strval[0] != 0;
387 if (ret == ST_BOOLVAL)
392 int txp_yylex_init(txp_yyscan_t *yyscanner);
393 struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
394 txp_yyscan_t yyscanner);
395 void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
396 int txp_yylex_destroy(txp_yyscan_t yyscanner);
397 void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);
400 * Initialize the tag expression parser.
402 * This allocates and sets up the internal structures of the tag expression
403 * parser and creates an abstract syntax tree from the given epigram (including
404 * the tags). It must be called before txp_eval_ast() can be called.
406 * The context pointer returned by this function may be passed to mp_eval_ast()
407 * to determine whether an epigram is admissible.
409 * The error message pointer may be NULL in which case no error message is
410 * returned. Otherwise, the caller must free the returned string.
412 int txp_init(const struct iovec *definition, struct txp_context **result,
416 txp_yyscan_t scanner;
417 struct txp_context *ctx;
418 struct yy_buffer_state *buffer_state;
420 ctx = xcalloc(sizeof(*ctx));
421 ret = txp_yylex_init(&scanner);
423 buffer_state = txp_yy_scan_bytes(definition->iov_base,
424 definition->iov_len, scanner);
425 txp_yyset_lineno(1, scanner);
426 NOTICE_LOG("creating abstract syntax tree from tag expression\n");
427 ret = txp_yyparse(ctx, &ctx->ast, scanner);
428 txp_yy_delete_buffer(buffer_state, scanner);
429 txp_yylex_destroy(scanner);
430 if (ctx->errmsg) { /* parse error */
432 *errmsg = ctx->errmsg;