1 /* SPDX-License-Identifier: GPL-2.0 */
16 struct txp_ast_node *ast;
17 /* per tag expression context */
24 * Set the error bit in the parser context and log a message.
26 * This is called if the lexer or the parser detect an error. Only the first
27 * error is logged (with a severity of "warn").
29 __attribute__ ((format (printf, 3, 4)))
30 void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
35 if (ctx->errmsg) /* we already printed an error message */
38 xvasprintf(&tmp, fmt, ap);
40 xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
42 WARNING_LOG("%s\n", ctx->errmsg);
46 * Parse a (generalized) string literal.
48 * This function turns the generalized C99 string literal given by src into a C
49 * string. For example, the string literal "xyz\n" is transformed into an
50 * array containing the three characters 'x', 'y' and 'z', followed by a
51 * newline character and the terminating zero byte. The function allows to
52 * specify different quote characters so that, for example, regular expression
53 * patterns enclosed in '/' can be parsed as well. To parse a proper string
54 * literal, one has to pass two double quotes as the second argument.
56 * The function strips off the opening and leading quote characters, replaces
57 * double backslashes by single backslashes and handles the usual escapes like
60 * The caller must make sure that the input is well-formed. The function simply
61 * aborts if the input is not a valid C99 string literal (modulo the quote
64 * The return value is the offset of the first character after the closing
65 * quote. For proper string literals this will be the terminating zero byte of
66 * the input string, for regular expression patterns it is the beginning of the
67 * flags which modify the matching behaviour.
69 unsigned parse_quoted_string(const char *src, const char quote_chars[2],
72 size_t n, len = strlen(src);
77 assert(src[0] == quote_chars[0]);
78 p = dst = xmalloc(len - 1);
89 if (c == quote_chars[1])
94 if (c == quote_chars[1])
95 *p++ = quote_chars[1];
97 case '\\': *p++ = '\\'; break;
98 case 'a': *p++ = '\a'; break;
99 case 'b': *p++ = '\b'; break;
100 case 'f': *p++ = '\f'; break;
101 case 'n': *p++ = '\n'; break;
102 case 'r': *p++ = '\r'; break;
103 case 't': *p++ = '\t'; break;
104 case 'v': *p++ = '\v'; break;
105 default: assert(false);
109 assert(src[n] == quote_chars[1]);
116 * Parse and compile an extended regular expression pattern, including flags.
118 * A regex pattern is identical to a C99 string literal except (a) it is
119 * enclosed in '/' characters rather than double quotes, (b) double quote
120 * characters which are part of the pattern do not need to be quoted with
121 * backslashes, but slashes must be quoted in this way, and (c) the closing
122 * slash may be followed by one or more flag characters which modify the
123 * matching behaviour.
125 * The only flags which are currently supported are 'i' to ignore case in match
126 * (REG_ICASE) and 'n' to change the handling of newline characters
129 * This function calls parse_quoted_string(), hence it aborts if the input
130 * string is malformed. However, errors from regcomp(3) are returned without
131 * aborting the process. The rationale behind this difference is that passing a
132 * malformed string must be considered an implementation bug because malformed
133 * strings should be rejected earlier by the lexer.
135 int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
139 unsigned n = parse_quoted_string(src, "//", &pat);
142 for (; src[n]; n++) {
144 case 'i': result->flags |= REG_ICASE; break;
145 case 'n': result->flags |= REG_NEWLINE; break;
146 default: assert(false);
149 ret = xregcomp(&result->preg, pat, result->flags);
154 static struct txp_ast_node *ast_node_raw(int id)
156 struct txp_ast_node *node = xmalloc(sizeof(*node));
161 /* This is non-static because it is also called from the lexer. */
162 struct txp_ast_node *txp_new_ast_leaf_node(int id)
164 struct txp_ast_node *node = ast_node_raw(id);
165 node->num_children = 0;
169 struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
171 struct txp_ast_node *node = ast_node_raw(id);
172 node->num_children = 1;
173 node->children = xmalloc(sizeof(struct txp_ast_node *));
174 node->children[0] = child;
178 struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
179 struct txp_ast_node *right)
181 struct txp_ast_node *node = ast_node_raw(id);
182 node->num_children = 2;
183 node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
184 node->children[0] = left;
185 node->children[1] = right;
189 void txp_free_ast(struct txp_ast_node *root)
193 if (root->num_children > 0) {
195 for (i = 0; i < root->num_children; i++)
196 txp_free_ast(root->children[i]);
197 free(root->children);
199 union txp_semantic_value *sv = &root->sv;
205 regfree(&sv->re_pattern.preg);
212 static int eval_node(struct txp_ast_node *node, struct txp_context *ctx,
213 union txp_semantic_value *result);
215 static void eval_binary_op(struct txp_ast_node *node, struct txp_context *ctx,
216 union txp_semantic_value *v1, union txp_semantic_value *v2)
218 eval_node(node->children[0], ctx, v1);
219 eval_node(node->children[1], ctx, v2);
222 static int eval_node(struct txp_ast_node *node, struct txp_context *ctx,
223 union txp_semantic_value *result)
226 union txp_semantic_value v1, v2;
231 result->strval = node->sv.strval;
235 result->intval = node->sv.intval;
238 eval_binary_op(node, ctx, &v1, &v2);
239 result->intval = v1.intval + v2.intval;
242 eval_binary_op(node, ctx, &v1, &v2);
243 result->intval = v1.intval - v2.intval;
246 eval_binary_op(node, ctx, &v1, &v2);
247 result->intval = v1.intval * v2.intval;
250 eval_binary_op(node, ctx, &v1, &v2);
251 if (v2.intval == 0) {
254 ERROR_LOG("division by zero\n");
258 result->intval = v1.intval / v2.intval;
261 eval_node(node->children[0], ctx, &v1);
262 result->intval = -v1.intval;
265 result->intval = ctx->num_lines;
269 result->boolval = true;
272 result->boolval = false;
275 eval_binary_op(node, ctx, &v1, &v2);
276 result->boolval = v1.boolval || v2.boolval;
279 eval_binary_op(node, ctx, &v1, &v2);
280 result->boolval = v1.boolval && v2.boolval;
283 eval_node(node->children[0], ctx, &v1);
284 result->boolval = !v1.boolval;
287 ret = eval_node(node->children[0], ctx, &v1);
288 eval_node(node->children[1], ctx, &v2);
289 if (ret == ST_STRVAL)
290 result->boolval = !strcmp(v1.strval, v2.strval);
292 result->boolval = v1.intval == v2.intval;
295 ret = eval_node(node->children[0], ctx, &v1);
296 eval_node(node->children[1], ctx, &v2);
297 if (ret == ST_STRVAL)
298 result->boolval = strcmp(v1.strval, v2.strval);
300 result->boolval = v1.intval != v2.intval;
303 eval_binary_op(node, ctx, &v1, &v2);
304 result->boolval = v1.intval < v2.intval;
307 eval_binary_op(node, ctx, &v1, &v2);
308 result->boolval = v1.intval > v2.intval;
311 eval_binary_op(node, ctx, &v1, &v2);
312 result->boolval = v1.intval <= v2.intval;
314 case GREATER_OR_EQUAL:
315 eval_binary_op(node, ctx, &v1, &v2);
316 result->boolval = v1.intval >= v2.intval;
319 eval_binary_op(node, ctx, &v1, &v2);
320 result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
324 result->re_pattern = node->sv.re_pattern;
325 return ST_REGEX_PATTERN;
327 EMERG_LOG("bug: invalid node id %d\n", node->id);
332 bool txp_eval_ast(struct txp_ast_node *root, struct txp_context *ctx)
334 union txp_semantic_value v;
335 int ret = eval_node(root, ctx, &v);
337 if (ret == ST_INTVAL)
338 return v.intval != 0;
339 if (ret == ST_STRVAL)
340 return v.strval[0] != 0;
341 if (ret == ST_BOOLVAL)
346 int txp_yylex_init(txp_yyscan_t *yyscanner);
347 struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
348 txp_yyscan_t yyscanner);
349 void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
350 int txp_yylex_destroy(txp_yyscan_t yyscanner);
351 void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);
354 * Initialize the tag expression parser.
356 * This allocates and sets up the internal structures of the tag expression
357 * parser and creates an abstract syntax tree from the given epigram (including
358 * the tags). It must be called before txp_eval_ast() can be called.
360 * The context pointer returned by this function may be passed to mp_eval_ast()
361 * to determine whether an epigram is admissible.
363 * The error message pointer may be NULL in which case no error message is
364 * returned. Otherwise, the caller must free the returned string.
366 int txp_init(const char *definition, int nbytes, struct txp_context **result,
370 txp_yyscan_t scanner;
371 struct txp_context *ctx;
372 struct yy_buffer_state *buffer_state;
374 ctx = xcalloc(sizeof(*ctx));
375 ret = txp_yylex_init(&scanner);
377 buffer_state = txp_yy_scan_bytes(definition, nbytes, scanner);
378 txp_yyset_lineno(1, scanner);
379 NOTICE_LOG("creating abstract syntax tree from tag expression\n");
380 ret = txp_yyparse(ctx, &ctx->ast, scanner);
381 txp_yy_delete_buffer(buffer_state, scanner);
382 txp_yylex_destroy(scanner);
383 if (ctx->errmsg) { /* parse error */
385 *errmsg = ctx->errmsg;