ast.c

   1 /* SPDX-License-Identifier: GPL-3.0-only */
   2
   3 #include "tf.h"
   4 #include "txp.bison.h"
   5
   6 enum semantic_types {
   7         ST_STRVAL,
   8         ST_INTVAL,
   9         ST_BOOLVAL,
  10         ST_REGEX_PATTERN,
  11 };
  12
  13 struct txp_context {
  14         char *errmsg;
  15         struct txp_ast_node *ast;
  16 };
  17
  18 /*
  19  * Set the error bit in the parser context and log a message.
  20  *
  21  * This is called if the lexer or the parser detect an error. Only the first
  22  * error is logged (with a severity of "warn").
  23  */
  24 __attribute__ ((format (printf, 3, 4)))
  25 void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
  26 {
  27         va_list ap;
  28         char *tmp;
  29
  30         if (ctx->errmsg) /* we already printed an error message */
  31                 return;
  32         va_start(ap, fmt);
  33         xvasprintf(&tmp, fmt, ap);
  34         va_end(ap);
  35         xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
  36         free(tmp);
  37         WARNING_LOG("%s\n", ctx->errmsg);
  38 }
  39
  40 /*
  41  * Parse a (generalized) string literal.
  42  *
  43  * This function turns the generalized C99 string literal given by src into a C
  44  * string.  For example, the string literal "xyz\n" is transformed into an
  45  * array containing the three characters 'x', 'y' and 'z', followed by a
  46  * newline character and the terminating zero byte. The function allows to
  47  * specify different quote characters so that, for example, regular expression
  48  * patterns enclosed in '/' can be parsed as well. To parse a proper string
  49  * literal, one has to pass two double quotes as the second argument.
  50  *
  51  * The function strips off the opening and leading quote characters, replaces
  52  * double backslashes by single backslashes and handles the usual escapes like
  53  * \n and \".
  54  *
  55  * The caller must make sure that the input is well-formed. The function simply
  56  * aborts if the input is not a valid C99 string literal (modulo the quote
  57  * characters).
  58  *
  59  * The return value is the offset of the first character after the closing
  60  * quote. For proper string literals this will be the terminating zero byte of
  61  * the input string, for regular expression patterns it is the beginning of the
  62  * flags which modify the matching behaviour.
  63  */
  64 unsigned parse_quoted_string(const char *src, const char quote_chars[2],
  65                 char **result)
  66 {
  67         size_t n, len = strlen(src);
  68         char *dst, *p;
  69         bool backslash;
  70
  71         assert(len >= 2);
  72         assert(src[0] == quote_chars[0]);
  73         p = dst = xmalloc(len - 1);
  74         backslash = false;
  75         for (n = 1;; n++) {
  76                 char c;
  77                 assert(n < len);
  78                 c = src[n];
  79                 if (!backslash) {
  80                         if (c == '\\') {
  81                                 backslash = true;
  82                                 continue;
  83                         }
  84                         if (c == quote_chars[1])
  85                                 break;
  86                         *p++ = c;
  87                         continue;
  88                 }
  89                 if (c == quote_chars[1])
  90                         *p++ = quote_chars[1];
  91                 else switch (c) {
  92                         case '\\': *p++ = '\\'; break;
  93                         case 'a': *p++ = '\a'; break;
  94                         case 'b': *p++ = '\b'; break;
  95                         case 'f': *p++ = '\f'; break;
  96                         case 'n': *p++ = '\n'; break;
  97                         case 'r': *p++ = '\r'; break;
  98                         case 't': *p++ = '\t'; break;
  99                         case 'v': *p++ = '\v'; break;
 100                         default: assert(false);
 101                 }
 102                 backslash = false;
 103         }
 104         assert(src[n] == quote_chars[1]);
 105         *p = '\0';
 106         *result = dst;
 107         return n + 1;
 108 }
 109
 110 /*
 111  * Parse and compile an extended regular expression pattern, including flags.
 112  *
 113  * A regex pattern is identical to a C99 string literal except (a) it is
 114  * enclosed in '/' characters rather than double quotes, (b) double quote
 115  * characters which are part of the pattern do not need to be quoted with
 116  * backslashes, but slashes must be quoted in this way, and (c) the closing
 117  * slash may be followed by one or more flag characters which modify the
 118  * matching behaviour.
 119  *
 120  * The only flags which are currently supported are 'i' to ignore case in match
 121  * (REG_ICASE) and 'n' to change the handling of newline characters
 122  * (REG_NEWLINE).
 123  *
 124  * This function calls parse_quoted_string(), hence it aborts if the input
 125  * string is malformed. However, errors from regcomp(3) are returned without
 126  * aborting the process. The rationale behind this difference is that passing a
 127  * malformed string must be considered an implementation bug because malformed
 128  * strings should be rejected earlier by the lexer.
 129  */
 130 int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
 131 {
 132         int ret;
 133         char *pat;
 134         unsigned n = parse_quoted_string(src, "//", &pat);
 135
 136         result->flags = 0;
 137         for (; src[n]; n++) {
 138                 switch (src[n]) {
 139                 case 'i': result->flags |= REG_ICASE; break;
 140                 case 'n': result->flags |= REG_NEWLINE; break;
 141                 default: assert(false);
 142                 }
 143         }
 144         ret = xregcomp(&result->preg, pat, result->flags);
 145         free(pat);
 146         return ret;
 147 }
 148
 149 static struct txp_ast_node *ast_node_raw(int id)
 150 {
 151         struct txp_ast_node *node = xmalloc(sizeof(*node));
 152         node->id = id;
 153         return node;
 154 }
 155
 156 /*
 157  * Allocate a new leaf node for the abstract syntax tree.
 158  *
 159  * This returns a pointer to a node whose ->num_children field is initialized
 160  * to zero. The ->id field is initialized with the given id.  The caller is
 161  * expected to initialize the ->sv field.
 162  *
 163  * This has to be non-static because it is also called from the lexer.
 164  */
 165 struct txp_ast_node *txp_new_ast_leaf_node(int id)
 166 {
 167         struct txp_ast_node *node = ast_node_raw(id);
 168         node->num_children = 0;
 169         return node;
 170 }
 171
 172 struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
 173 {
 174         struct txp_ast_node *node = ast_node_raw(id);
 175         node->num_children = 1;
 176         node->children = xmalloc(sizeof(struct txp_ast_node *));
 177         node->children[0] = child;
 178         return node;
 179 }
 180
 181 struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
 182                 struct txp_ast_node *right)
 183 {
 184         struct txp_ast_node *node = ast_node_raw(id);
 185         node->num_children = 2;
 186         node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
 187         node->children[0] = left;
 188         node->children[1] = right;
 189         return node;
 190 }
 191
 192 /*
 193  * Deallocate an abstract syntax tree.
 194  *
 195  * This frees the memory occupied by the nodes of the AST, the child pointers
 196  * of the internal nodes and the (constant) semantic values of the leaf nodes
 197  * (string literals and pre-compiled regular expressions).
 198  */
 199 static void txp_free_ast(struct txp_ast_node *root)
 200 {
 201         if (!root)
 202                 return;
 203         if (root->num_children > 0) {
 204                 int i;
 205                 for (i = 0; i < root->num_children; i++)
 206                         txp_free_ast(root->children[i]);
 207                 free(root->children);
 208         } else {
 209                 union txp_semantic_value *sv = &root->sv;
 210                 switch (root->id) {
 211                 case STRING_LITERAL:
 212                         free(sv->strval);
 213                         break;
 214                 case REGEX_PATTERN:
 215                         regfree(&sv->re_pattern.preg);
 216                         break;
 217                 }
 218         }
 219         free(root);
 220 }
 221
 222 void txp_free(struct txp_context *ctx)
 223 {
 224         txp_free_ast(ctx->ast);
 225         free(ctx);
 226 }
 227
 228 static int eval_node(const struct txp_ast_node *node,
 229                 const struct txp_context *ctx,
 230                 const struct epi_properties *props,
 231                 union txp_semantic_value *result);
 232
 233 static void eval_binary_op(const struct txp_ast_node *node,
 234                 const struct txp_context *ctx,
 235                 const struct epi_properties *props,
 236                 union txp_semantic_value *v1, union txp_semantic_value *v2)
 237 {
 238         eval_node(node->children[0], ctx, props, v1);
 239         eval_node(node->children[1], ctx, props, v2);
 240 }
 241
 242 static int eval_node(const struct txp_ast_node *node,
 243                 const struct txp_context *ctx,
 244                 const struct epi_properties *props,
 245                 union txp_semantic_value *result)
 246 {
 247         int ret;
 248         union txp_semantic_value v1, v2;
 249
 250         assert(node);
 251         switch (node->id) {
 252         /* strings */
 253         case STRING_LITERAL:
 254                 result->strval = node->sv.strval;
 255                 return ST_STRVAL;
 256         case TEXT:
 257                 result->strval = epi_text(props);
 258                 return ST_STRVAL;
 259         /* integers */
 260         case NUM:
 261                 result->intval = node->sv.intval;
 262                 return ST_INTVAL;
 263         case '+':
 264                 eval_binary_op(node, ctx, props, &v1, &v2);
 265                 result->intval = v1.intval + v2.intval;
 266                 return ST_INTVAL;
 267         case '-':
 268                 eval_binary_op(node, ctx, props, &v1, &v2);
 269                 result->intval = v1.intval - v2.intval;
 270                 return ST_INTVAL;
 271         case '*':
 272                 eval_binary_op(node, ctx, props, &v1, &v2);
 273                 result->intval = v1.intval * v2.intval;
 274                 return ST_INTVAL;
 275         case '/':
 276                 eval_binary_op(node, ctx, props, &v1, &v2);
 277                 if (v2.intval == 0) {
 278                         static bool warned;
 279                         if (!warned)
 280                                 ERROR_LOG("division by zero\n");
 281                         warned = true;
 282                         result->intval = 0;
 283                 } else
 284                         result->intval = v1.intval / v2.intval;
 285                 return ST_INTVAL;
 286         case NEG:
 287                 eval_node(node->children[0], ctx, props, &v1);
 288                 result->intval = -v1.intval;
 289                 return ST_INTVAL;
 290         case LEN:
 291                 result->intval = epi_len(props);
 292                 return ST_INTVAL;
 293         /* bools */
 294         case TAG:
 295                 eval_node(node->children[0], ctx, props, &v1);
 296                 result->boolval = epi_has_tag(node->children[0]->sv.strval,
 297                         props);
 298                 return ST_BOOLVAL;
 299         case TRUE:
 300                 result->boolval = true;
 301                 return ST_BOOLVAL;
 302         case FALSE:
 303                 result->boolval = false;
 304                 return ST_BOOLVAL;
 305         case OR:
 306                 eval_binary_op(node, ctx, props, &v1, &v2);
 307                 result->boolval = v1.boolval || v2.boolval;
 308                 return ST_BOOLVAL;
 309         case AND:
 310                 eval_binary_op(node, ctx, props, &v1, &v2);
 311                 result->boolval = v1.boolval && v2.boolval;
 312                 return ST_BOOLVAL;
 313         case NOT:
 314                 eval_node(node->children[0], ctx, props, &v1);
 315                 result->boolval = !v1.boolval;
 316                 return ST_BOOLVAL;
 317         case EQUAL:
 318                 ret = eval_node(node->children[0], ctx, props, &v1);
 319                 eval_node(node->children[1], ctx, props, &v2);
 320                 if (ret == ST_STRVAL)
 321                         result->boolval = !strcmp(v1.strval, v2.strval);
 322                 else
 323                         result->boolval = v1.intval == v2.intval;
 324                 return ST_BOOLVAL;
 325         case NOT_EQUAL:
 326                 ret = eval_node(node->children[0], ctx, props, &v1);
 327                 eval_node(node->children[1], ctx, props, &v2);
 328                 if (ret == ST_STRVAL)
 329                         result->boolval = strcmp(v1.strval, v2.strval);
 330                 else
 331                         result->boolval = v1.intval != v2.intval;
 332                 return ST_BOOLVAL;
 333         case '<':
 334                 eval_binary_op(node, ctx, props, &v1, &v2);
 335                 result->boolval = v1.intval < v2.intval;
 336                 return ST_BOOLVAL;
 337         case '>':
 338                 eval_binary_op(node, ctx, props, &v1, &v2);
 339                 result->boolval = v1.intval > v2.intval;
 340                 return ST_BOOLVAL;
 341         case LESS_OR_EQUAL:
 342                 eval_binary_op(node, ctx, props, &v1, &v2);
 343                 result->boolval = v1.intval <= v2.intval;
 344                 return ST_BOOLVAL;
 345         case GREATER_OR_EQUAL:
 346                 eval_binary_op(node, ctx, props, &v1, &v2);
 347                 result->boolval = v1.intval >= v2.intval;
 348                 return ST_BOOLVAL;
 349         case REGEX_MATCH:
 350                 eval_binary_op(node, ctx, props, &v1, &v2);
 351                 result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
 352                          0, NULL, 0) == 0;
 353                 return ST_BOOLVAL;
 354         case REGEX_PATTERN:
 355                 result->re_pattern = node->sv.re_pattern;
 356                 return ST_REGEX_PATTERN;
 357         default:
 358                 EMERG_LOG("bug: invalid node id %d\n", node->id);
 359                 exit(EXIT_FAILURE);
 360         }
 361 }
 362
 363 /*
 364  * Evaluate an abstract syntax tree, starting at the root node.
 365  *
 366  * The ctx argument should be the pointer that was returned from an earlier
 367  * call to txp_init(). The cookie properties structure contains the information
 368  * about the epigram.
 369  *
 370  * Returns true if the AST evaluates to true, a non-empty string, or a non-zero
 371  * number, false otherwise.
 372  */
 373 bool txp_eval_ast(const struct txp_context *ctx,
 374                 const struct epi_properties *props)
 375 {
 376         union txp_semantic_value v;
 377         int ret;
 378
 379         if (!ctx->ast)
 380                 return true;
 381         ret = eval_node(ctx->ast, ctx, props, &v);
 382
 383         if (ret == ST_INTVAL)
 384                 return v.intval != 0;
 385         if (ret == ST_STRVAL)
 386                 return v.strval[0] != 0;
 387         if (ret == ST_BOOLVAL)
 388                 return v.boolval;
 389         assert(false);
 390 }
 391
 392 int txp_yylex_init(txp_yyscan_t *yyscanner);
 393 struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
 394         txp_yyscan_t yyscanner);
 395 void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
 396 int txp_yylex_destroy(txp_yyscan_t yyscanner);
 397 void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);
 398
 399 /*
 400  * Initialize the tag expression parser.
 401  *
 402  * This allocates and sets up the internal structures of the tag expression
 403  * parser and creates an abstract syntax tree from the given epigram (including
 404  * the tags). It must be called before txp_eval_ast() can be called.
 405  *
 406  * The context pointer returned by this function may be passed to mp_eval_ast()
 407  * to determine whether an epigram is admissible.
 408  *
 409  * The error message pointer may be NULL in which case no error message is
 410  * returned. Otherwise, the caller must free the returned string.
 411  */
 412 int txp_init(const struct iovec *definition, struct txp_context **result,
 413                  char **errmsg)
 414 {
 415         int ret;
 416         txp_yyscan_t scanner;
 417         struct txp_context *ctx;
 418         struct yy_buffer_state *buffer_state;
 419
 420         ctx = xcalloc(sizeof(*ctx));
 421         ret = txp_yylex_init(&scanner);
 422         assert(ret == 0);
 423         buffer_state = txp_yy_scan_bytes(definition->iov_base,
 424                 definition->iov_len, scanner);
 425         txp_yyset_lineno(1, scanner);
 426         NOTICE_LOG("creating abstract syntax tree from tag expression\n");
 427         ret = txp_yyparse(ctx, &ctx->ast, scanner);
 428         txp_yy_delete_buffer(buffer_state, scanner);
 429         txp_yylex_destroy(scanner);
 430         if (ctx->errmsg) { /* parse error */
 431                 if (errmsg)
 432                         *errmsg = ctx->errmsg;
 433                 else
 434                         free(ctx->errmsg);
 435                 free(ctx);
 436                 return -E_TXP;
 437         }
 438         if (errmsg)
 439                 *errmsg = NULL;
 440         *result = ctx;
 441         return 1;
 442 }