ast.c

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2
   3 #include "tf.h"
   4 #include "txp.bison.h"
   5
   6 enum semantic_types {
   7         ST_STRVAL,
   8         ST_INTVAL,
   9         ST_BOOLVAL,
  10         ST_REGEX_PATTERN,
  11 };
  12
  13 struct txp_context {
  14         /* global context */
  15         char *errmsg;
  16         struct txp_ast_node *ast;
  17         /* per tag expression context */
  18         unsigned num_lines;
  19         unsigned num_tags;
  20         char **tags;
  21 };
  22
  23 /*
  24  * Set the error bit in the parser context and log a message.
  25  *
  26  * This is called if the lexer or the parser detect an error. Only the first
  27  * error is logged (with a severity of "warn").
  28  */
  29 __attribute__ ((format (printf, 3, 4)))
  30 void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
  31 {
  32         va_list ap;
  33         char *tmp;
  34
  35         if (ctx->errmsg) /* we already printed an error message */
  36                 return;
  37         va_start(ap, fmt);
  38         xvasprintf(&tmp, fmt, ap);
  39         va_end(ap);
  40         xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
  41         free(tmp);
  42         WARNING_LOG("%s\n", ctx->errmsg);
  43 }
  44
  45 /*
  46  * Parse a (generalized) string literal.
  47  *
  48  * This function turns the generalized C99 string literal given by src into a C
  49  * string.  For example, the string literal "xyz\n" is transformed into an
  50  * array containing the three characters 'x', 'y' and 'z', followed by a
  51  * newline character and the terminating zero byte. The function allows to
  52  * specify different quote characters so that, for example, regular expression
  53  * patterns enclosed in '/' can be parsed as well. To parse a proper string
  54  * literal, one has to pass two double quotes as the second argument.
  55  *
  56  * The function strips off the opening and leading quote characters, replaces
  57  * double backslashes by single backslashes and handles the usual escapes like
  58  * \n and \".
  59  *
  60  * The caller must make sure that the input is well-formed. The function simply
  61  * aborts if the input is not a valid C99 string literal (modulo the quote
  62  * characters).
  63  *
  64  * The return value is the offset of the first character after the closing
  65  * quote. For proper string literals this will be the terminating zero byte of
  66  * the input string, for regular expression patterns it is the beginning of the
  67  * flags which modify the matching behaviour.
  68  */
  69 unsigned parse_quoted_string(const char *src, const char quote_chars[2],
  70                 char **result)
  71 {
  72         size_t n, len = strlen(src);
  73         char *dst, *p;
  74         bool backslash;
  75
  76         assert(len >= 2);
  77         assert(src[0] == quote_chars[0]);
  78         p = dst = xmalloc(len - 1);
  79         backslash = false;
  80         for (n = 1;; n++) {
  81                 char c;
  82                 assert(n < len);
  83                 c = src[n];
  84                 if (!backslash) {
  85                         if (c == '\\') {
  86                                 backslash = true;
  87                                 continue;
  88                         }
  89                         if (c == quote_chars[1])
  90                                 break;
  91                         *p++ = c;
  92                         continue;
  93                 }
  94                 if (c == quote_chars[1])
  95                         *p++ = quote_chars[1];
  96                 else switch (c) {
  97                         case '\\': *p++ = '\\'; break;
  98                         case 'a': *p++ = '\a'; break;
  99                         case 'b': *p++ = '\b'; break;
 100                         case 'f': *p++ = '\f'; break;
 101                         case 'n': *p++ = '\n'; break;
 102                         case 'r': *p++ = '\r'; break;
 103                         case 't': *p++ = '\t'; break;
 104                         case 'v': *p++ = '\v'; break;
 105                         default: assert(false);
 106                 }
 107                 backslash = false;
 108         }
 109         assert(src[n] == quote_chars[1]);
 110         *p = '\0';
 111         *result = dst;
 112         return n + 1;
 113 }
 114
 115 /*
 116  * Parse and compile an extended regular expression pattern, including flags.
 117  *
 118  * A regex pattern is identical to a C99 string literal except (a) it is
 119  * enclosed in '/' characters rather than double quotes, (b) double quote
 120  * characters which are part of the pattern do not need to be quoted with
 121  * backslashes, but slashes must be quoted in this way, and (c) the closing
 122  * slash may be followed by one or more flag characters which modify the
 123  * matching behaviour.
 124  *
 125  * The only flags which are currently supported are 'i' to ignore case in match
 126  * (REG_ICASE) and 'n' to change the handling of newline characters
 127  * (REG_NEWLINE).
 128  *
 129  * This function calls parse_quoted_string(), hence it aborts if the input
 130  * string is malformed. However, errors from regcomp(3) are returned without
 131  * aborting the process. The rationale behind this difference is that passing a
 132  * malformed string must be considered an implementation bug because malformed
 133  * strings should be rejected earlier by the lexer.
 134  */
 135 int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
 136 {
 137         int ret;
 138         char *pat;
 139         unsigned n = parse_quoted_string(src, "//", &pat);
 140
 141         result->flags = 0;
 142         for (; src[n]; n++) {
 143                 switch (src[n]) {
 144                 case 'i': result->flags |= REG_ICASE; break;
 145                 case 'n': result->flags |= REG_NEWLINE; break;
 146                 default: assert(false);
 147                 }
 148         }
 149         ret = xregcomp(&result->preg, pat, result->flags);
 150         free(pat);
 151         return ret;
 152 }
 153
 154 static struct txp_ast_node *ast_node_raw(int id)
 155 {
 156         struct txp_ast_node *node = xmalloc(sizeof(*node));
 157         node->id = id;
 158         return node;
 159 }
 160
 161 /* This is non-static because it is also called from the lexer. */
 162 struct txp_ast_node *txp_new_ast_leaf_node(int id)
 163 {
 164         struct txp_ast_node *node = ast_node_raw(id);
 165         node->num_children = 0;
 166         return node;
 167 }
 168
 169 struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
 170 {
 171         struct txp_ast_node *node = ast_node_raw(id);
 172         node->num_children = 1;
 173         node->children = xmalloc(sizeof(struct txp_ast_node *));
 174         node->children[0] = child;
 175         return node;
 176 }
 177
 178 struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
 179                 struct txp_ast_node *right)
 180 {
 181         struct txp_ast_node *node = ast_node_raw(id);
 182         node->num_children = 2;
 183         node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
 184         node->children[0] = left;
 185         node->children[1] = right;
 186         return node;
 187 }
 188
 189 void txp_free_ast(struct txp_ast_node *root)
 190 {
 191         if (!root)
 192                 return;
 193         if (root->num_children > 0) {
 194                 int i;
 195                 for (i = 0; i < root->num_children; i++)
 196                         txp_free_ast(root->children[i]);
 197                 free(root->children);
 198         } else {
 199                 union txp_semantic_value *sv = &root->sv;
 200                 switch (root->id) {
 201                 case STRING_LITERAL:
 202                         free(sv->strval);
 203                         break;
 204                 case REGEX_PATTERN:
 205                         regfree(&sv->re_pattern.preg);
 206                         break;
 207                 }
 208         }
 209         free(root);
 210 }
 211
 212 static int eval_node(struct txp_ast_node *node, struct txp_context *ctx,
 213                 union txp_semantic_value *result);
 214
 215 static void eval_binary_op(struct txp_ast_node *node, struct txp_context *ctx,
 216                 union txp_semantic_value *v1, union txp_semantic_value *v2)
 217 {
 218         eval_node(node->children[0], ctx, v1);
 219         eval_node(node->children[1], ctx, v2);
 220 }
 221
 222 static int eval_node(struct txp_ast_node *node, struct txp_context *ctx,
 223                 union txp_semantic_value *result)
 224 {
 225         int ret;
 226         union txp_semantic_value v1, v2;
 227
 228         switch (node->id) {
 229         /* strings */
 230         case STRING_LITERAL:
 231                 result->strval = node->sv.strval;
 232                 return ST_STRVAL;
 233         /* integers */
 234         case NUM:
 235                 result->intval = node->sv.intval;
 236                 return ST_INTVAL;
 237         case '+':
 238                 eval_binary_op(node, ctx, &v1, &v2);
 239                 result->intval = v1.intval + v2.intval;
 240                 return ST_INTVAL;
 241         case '-':
 242                 eval_binary_op(node, ctx, &v1, &v2);
 243                 result->intval = v1.intval - v2.intval;
 244                 return ST_INTVAL;
 245         case '*':
 246                 eval_binary_op(node, ctx, &v1, &v2);
 247                 result->intval = v1.intval * v2.intval;
 248                 return ST_INTVAL;
 249         case '/':
 250                 eval_binary_op(node, ctx, &v1, &v2);
 251                 if (v2.intval == 0) {
 252                         static bool warned;
 253                         if (!warned)
 254                                 ERROR_LOG("division by zero\n");
 255                         warned = true;
 256                         result->intval = 0;
 257                 } else
 258                         result->intval = v1.intval / v2.intval;
 259                 return ST_INTVAL;
 260         case NEG:
 261                 eval_node(node->children[0], ctx, &v1);
 262                 result->intval = -v1.intval;
 263                 return ST_INTVAL;
 264         case NUM_LINES:
 265                 result->intval = ctx->num_lines;
 266                 return ST_INTVAL;
 267         /* bools */
 268         case TRUE:
 269                 result->boolval = true;
 270                 return ST_BOOLVAL;
 271         case FALSE:
 272                 result->boolval = false;
 273                 return ST_BOOLVAL;
 274         case OR:
 275                 eval_binary_op(node, ctx, &v1, &v2);
 276                 result->boolval = v1.boolval || v2.boolval;
 277                 return ST_BOOLVAL;
 278         case AND:
 279                 eval_binary_op(node, ctx, &v1, &v2);
 280                 result->boolval = v1.boolval && v2.boolval;
 281                 return ST_BOOLVAL;
 282         case NOT:
 283                 eval_node(node->children[0], ctx, &v1);
 284                 result->boolval = !v1.boolval;
 285                 return ST_BOOLVAL;
 286         case EQUAL:
 287                 ret = eval_node(node->children[0], ctx, &v1);
 288                 eval_node(node->children[1], ctx, &v2);
 289                 if (ret == ST_STRVAL)
 290                         result->boolval = !strcmp(v1.strval, v2.strval);
 291                 else
 292                         result->boolval = v1.intval == v2.intval;
 293                 return ST_BOOLVAL;
 294         case NOT_EQUAL:
 295                 ret = eval_node(node->children[0], ctx, &v1);
 296                 eval_node(node->children[1], ctx, &v2);
 297                 if (ret == ST_STRVAL)
 298                         result->boolval = strcmp(v1.strval, v2.strval);
 299                 else
 300                         result->boolval = v1.intval != v2.intval;
 301                 return ST_BOOLVAL;
 302         case '<':
 303                 eval_binary_op(node, ctx, &v1, &v2);
 304                 result->boolval = v1.intval < v2.intval;
 305                 return ST_BOOLVAL;
 306         case '>':
 307                 eval_binary_op(node, ctx, &v1, &v2);
 308                 result->boolval = v1.intval > v2.intval;
 309                 return ST_BOOLVAL;
 310         case LESS_OR_EQUAL:
 311                 eval_binary_op(node, ctx, &v1, &v2);
 312                 result->boolval = v1.intval <= v2.intval;
 313                 return ST_BOOLVAL;
 314         case GREATER_OR_EQUAL:
 315                 eval_binary_op(node, ctx, &v1, &v2);
 316                 result->boolval = v1.intval >= v2.intval;
 317                 return ST_BOOLVAL;
 318         case REGEX_MATCH:
 319                 eval_binary_op(node, ctx, &v1, &v2);
 320                 result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
 321                          0, NULL, 0) == 0;
 322                 return ST_BOOLVAL;
 323         case REGEX_PATTERN:
 324                 result->re_pattern = node->sv.re_pattern;
 325                 return ST_REGEX_PATTERN;
 326         default:
 327                 EMERG_LOG("bug: invalid node id %d\n", node->id);
 328                 exit(EXIT_FAILURE);
 329         }
 330 }
 331
 332 bool txp_eval_ast(struct txp_ast_node *root, struct txp_context *ctx)
 333 {
 334         union txp_semantic_value v;
 335         int ret = eval_node(root, ctx, &v);
 336
 337         if (ret == ST_INTVAL)
 338                 return v.intval != 0;
 339         if (ret == ST_STRVAL)
 340                 return v.strval[0] != 0;
 341         if (ret == ST_BOOLVAL)
 342                 return v.boolval;
 343         assert(false);
 344 }
 345
 346 int txp_yylex_init(txp_yyscan_t *yyscanner);
 347 struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
 348         txp_yyscan_t yyscanner);
 349 void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
 350 int txp_yylex_destroy(txp_yyscan_t yyscanner);
 351 void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);
 352
 353 /*
 354  * Initialize the tag expression parser.
 355  *
 356  * This allocates and sets up the internal structures of the tag expression
 357  * parser and creates an abstract syntax tree from the given epigram (including
 358  * the tags). It must be called before txp_eval_ast() can be called.
 359  *
 360  * The context pointer returned by this function may be passed to mp_eval_ast()
 361  * to determine whether an epigram is admissible.
 362  *
 363  * The error message pointer may be NULL in which case no error message is
 364  * returned. Otherwise, the caller must free the returned string.
 365  */
 366 int txp_init(const char *definition, int nbytes, struct txp_context **result,
 367                  char **errmsg)
 368 {
 369         int ret;
 370         txp_yyscan_t scanner;
 371         struct txp_context *ctx;
 372         struct yy_buffer_state *buffer_state;
 373
 374         ctx = xcalloc(sizeof(*ctx));
 375         ret = txp_yylex_init(&scanner);
 376         assert(ret == 0);
 377         buffer_state = txp_yy_scan_bytes(definition, nbytes, scanner);
 378         txp_yyset_lineno(1, scanner);
 379         NOTICE_LOG("creating abstract syntax tree from tag expression\n");
 380         ret = txp_yyparse(ctx, &ctx->ast, scanner);
 381         txp_yy_delete_buffer(buffer_state, scanner);
 382         txp_yylex_destroy(scanner);
 383         if (ctx->errmsg) { /* parse error */
 384                 if (errmsg)
 385                         *errmsg = ctx->errmsg;
 386                 else
 387                         free(ctx->errmsg);
 388                 free(ctx);
 389                 return -E_TXP;
 390         }
 391         if (errmsg)
 392                 *errmsg = NULL;
 393         *result = ctx;
 394         return 1;
 395 }