Initial commit.

author Andre Noll <maan@tuebingen.mpg.de>

Mon, 19 Mar 2018 17:37:28 +0000 (18:37 +0100)

committer Andre Noll <maan@tuebingen.mpg.de>

Mon, 19 Mar 2018 18:09:43 +0000 (19:09 +0100)
author Andre Noll <maan@tuebingen.mpg.de>
Mon, 19 Mar 2018 17:37:28 +0000 (18:37 +0100)
committer Andre Noll <maan@tuebingen.mpg.de>
Mon, 19 Mar 2018 18:09:43 +0000 (19:09 +0100)
diff --git a/.gitignore b/.gitignore

new file mode 100644 (file)

index 0000000..442c3ef
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,16 @@
+*.d
+*.o
+*.html
+autom4te.cache
+config.h*
+config.mak
+config.log
+config.status
+configure.sh
+*.1
+*.lsg.*
+*.flex.*
+*.bison.*
+version.c
+tfortune
+tfortune.suite
diff --git a/INSTALL b/INSTALL

new file mode 100644 (file)

index 0000000..52a1fd7
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1 @@
+Run "make README".
diff --git a/Makefile b/Makefile

new file mode 100644 (file)

index 0000000..9500595
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,229 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+.SUFFIXES:
+MAKEFLAGS += -Rr
+.ONESHELL:
+.SHELLFLAGS := -ec
+
+RM := rm -f
+MKDIR_P := mkdir -p
+CHMOD := chmod
+
+ifeq ("$(origin CC)", "default")
+        CC := cc
+endif
+ifeq ("$(origin V)", "command line")
+       SAY =
+else
+       SAY = @echo '$(strip $(1))'
+endif
+
+AUTHOR := Andre Noll
+COPYRIGHT_YEAR := 2018
+CLONE_URL := git://git.tuebingen.mpg.de/tfortune
+GITWEB_URL := http://git.tuebingen.mpg.de/tfortune.git
+LICENSE := GNU GPL version 3
+LICENSE_URL := https://www.gnu.org/licenses/gpl-3.0-standalone.html
+
+define DESCRIPTION1 :=
+       Like fortune(1), tfortune is a Unix command line utility which prints
+       a random epigram. Epigrams are stored as plain text files, but they
+       must be annotated with tags to make full use of the features which
+       tfortune offers over other implementations.
+endef
+
+define DESCRIPTION2 :=
+       Tfortune has a built-in matching language for epigrams. User-supplied
+       tag expressions define subsets of admissible epigrams. If a tag
+       expression is given, epigrams are picked from the admissible subset
+       only.
+endef
+
+define DESCRIPTION3 :=
+       Besides printing random epigrams, tfortune supports a number of other
+       subcommands which help to maintain epigrams and tag expressions. There
+       are subcommands for listing and for editing the input files, and for
+       printing statistics about epigrams, expressions and tags. A subcommand
+       that enables bash command line completion is also included.
+endef
+LOGLEVELS := LL_DEBUG,LL_INFO,LL_NOTICE,LL_WARNING,LL_ERROR,LL_CRIT,LL_EMERG
+GIT_VERSION := $(shell ./version-gen.sh)
+cc_version := $(shell $(CC) --version | head -n 1)
+build_date := $(shell date)
+uname_rs := $(shell uname -rs)
+
+all := tfortune tfortune.1
+all: $(all)
+
+deps := txp.bison.d txp.flex.d ast.d tfortune.d util.d txp.flex.d \
+       tfortune.lsg.d version.d linhash.d
+
+ifeq ($(findstring clean, $(MAKECMDGOALS)),)
+-include $(deps)
+include config.mak
+endif
+
+config.h.in: configure.ac
+       $(call SAY, AH $<)
+       autoheader -f
+config.mak config.h: config.status config.mak.in config.h.in
+       $(call SAY, CS $@)
+       ./config.status -q
+       test -f config.h && touch config.h
+config.status: configure.sh
+       $(call SAY, CFG)
+       if test -x config.status; then \
+               ./config.status --recheck; \
+       else \
+               ./configure.sh --no-create; \
+       fi
+configure.sh: configure.ac
+       $(call SAY, AC $<)
+       autoconf configure.ac > $@
+       $(CHMOD) 755 $@
+
+.PRECIOUS: %.flex.c %.bison.c %.bison.h %.lsg.h %.lsg.c %.lsg.h
+
+# created by version-gen.sh
+version.c:
+
+index.html tfortune.suite: %: %.m4
+       $(call SAY, M4 $<)
+       $(M4) -D "AUTHOR=$(AUTHOR)" -D "COPYRIGHT_YEAR=$(COPYRIGHT_YEAR)" \
+               -D "PACKAGE_BUGREPORT=$(PACKAGE_BUGREPORT)" \
+               -D "PACKAGE_URL=$(PACKAGE_URL)" \
+               -D "CLONE_URL=$(CLONE_URL)" \
+               -D "GITWEB_URL=$(GITWEB_URL)" \
+               -D "LICENSE=$(LICENSE)" \
+               -D "LICENSE_URL=$(LICENSE_URL)" \
+               -D "DESCRIPTION1=$(DESCRIPTION1)" \
+               -D "DESCRIPTION2=$(DESCRIPTION2)" \
+               -D "DESCRIPTION3=$(DESCRIPTION3)" $< > $@
+%.lsg.c: %.suite
+       $(call SAY, LSGC $<)
+       $(LOPSUBGEN) --gen-c < $<
+
+%.lsg.h: %.suite
+       $(call SAY, LSGH $<)
+       $(LOPSUBGEN) --gen-header < $<
+
+%.1: %.suite version.c
+       $(call SAY, LSGM $<)
+       $(LOPSUBGEN) --gen-man=$@ --version-string $(GIT_VERSION) < $<
+
+%.flex.c: %.lex
+       $(call SAY, FLEX $<)
+       $(FLEX) -o $@ $<
+
+%.bison.c %.bison.h: %.y
+       $(call SAY, BISON $<)
+       $(BISON) --defines=$(notdir $(<:.y=.bison.h)) \
+               --output=$(notdir $(<:.y=.bison.c)) $<
+
+TF_CPPFLAGS += -Wunused-macros
+TF_CPPFLAGS += -DCOPYRIGHT_YEAR='"$(COPYRIGHT_YEAR)"'
+TF_CPPFLAGS += -DAUTHOR='"$(AUTHOR)"'
+TF_CPPFLAGS += -DLOGLEVELS='$(LOGLEVELS)'
+TF_CPPFLAGS += -DBUILD_DATE='"$(build_date)"'
+TF_CPPFLAGS += -DCC_VERSION='"$(cc_version)"'
+TF_CPPFLAGS += -DUNAME_RS='"$(uname_rs)"'
+TF_CPPFLAGS += -DLICENSE='"$(LICENSE)"'
+TF_CPPFLAGS += -DLICENSE_URL='"$(LICENSE_URL)"'
+TF_CPPFLAGS += -I/usr/local/include
+
+TF_CFLAGS += -g
+TF_CFLAGS += -O2
+TF_CFLAGS += -Wall
+TF_CFLAGS += -Wundef -W -Wuninitialized
+TF_CFLAGS += -Wchar-subscripts
+TF_CFLAGS += -Werror-implicit-function-declaration
+TF_CFLAGS += -Wmissing-noreturn
+TF_CFLAGS += -Wbad-function-cast
+TF_CFLAGS += -Wredundant-decls
+TF_CFLAGS += -Wdeclaration-after-statement
+TF_CFLAGS += -Wformat -Wformat-security -Wmissing-format-attribute
+#TF_CFLAGS += -fsanitize=address
+
+%.flex.o: TF_CFLAGS += -Wno-all -Wno-sign-compare -Wno-unused-parameter
+%.flex.o %.bison.o: TF_CPPFLAGS += -Wno-unused-macros
+
+%.o: %.c tfortune.lsg.h txp.bison.h
+       $(call SAY, CC $<)
+       $(CC) \
+               -o $@ -c -MMD -MF $(*F).d \
+               -MT $@ $(TF_CPPFLAGS) $(CPPFLAGS) $(TF_CFLAGS) $(CFLAGS) $<
+
+TF_LDFLAGS = -llopsub
+#TF_LDFLAGS += -fsanitize=address
+
+tfortune: $(deps:.d=.o)
+       $(call SAY, LD $@)
+       $(CC) $^ -o $@ $(TF_LDFLAGS) $(LDFLAGS)
+
+.PHONY: all mostlyclean clean distclan maintainer-clean install \
+       install-strip README
+
+mostlyclean:
+       $(RM) tfortune *.o *.d
+clean: mostlyclean
+       $(RM) *.lsg.* *.flex.* *.bison.* *.1 *.suite
+distclean: clean
+       $(RM) config.mak config.status config.log config.h config.h.in version.c
+       $(RM) -r autom4te.cache
+maintainer-clean:
+       git clean -dfqx > /dev/null 2>&1
+
+mandir := $(datarootdir)/man/man1
+INSTALL ?= install
+INSTALL_PROGRAM ?= $(INSTALL) -m 755
+INSTALL_DATA ?= $(INSTALL) -m 644
+ifneq ($(findstring strip, $(MAKECMDGOALS)),)
+       strip_option := -s
+endif
+
+install install-strip: all
+       $(MKDIR_P) $(DESTDIR)$(bindir) $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) $(strip_option) tfortune $(DESTDIR)$(bindir)
+       $(INSTALL_DATA) tfortune.1 $(DESTDIR)$(mandir)
+
+define README :=
+tfortune - fortune cookies with tags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+$(DESCRIPTION1)
+
+$(DESCRIPTION2)
+
+$(DESCRIPTION3)
+
+Resources
+~~~~~~~~~
+|      git clone URL: $(CLONE_URL)
+|      web page: $(PACKAGE_URL)
+|      email: $(AUTHOR) <$(PACKAGE_BUGREPORT)>
+
+Documentation
+~~~~~~~~~~~~~
+See tfortune.suite. Or build the man page with \"make tfortune.1\"
+and run \"man -l tfortune.1\".
+
+Dependencies
+~~~~~~~~~~~~
+This package requires m4, autoconf, gnu make, flex, bison, gcc or
+clang, and lopsub. The configure script checks if all required
+dependencies are installed and prints a meaningful error message if
+one of them is missing.
+
+Building
+~~~~~~~~
+Run \"make\" to build tfortune with the default settings. Run
+\"./configure -h\" to list configuration options.
+
+Installation
+~~~~~~~~~~~~
+Run \"sudo make install\" to install to /usr/local. To install to
+/somewhere/else, run \"./configure --prefix /somewhere/else && make\"
+first.
+endef
+
+README:
+       @printf '%s\n' "$(README)"
diff --git a/README b/README

new file mode 100644 (file)

index 0000000..52a1fd7
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+Run "make README".
diff --git a/ast.c b/ast.c

new file mode 100644 (file)

index 0000000..1320964
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include "tf.h"
+#include "txp.bison.h"
+
+enum semantic_types {
+       ST_STRVAL,
+       ST_INTVAL,
+       ST_BOOLVAL,
+       ST_REGEX_PATTERN,
+};
+
+struct txp_context {
+       char *errmsg;
+       struct txp_ast_node *ast;
+};
+
+/*
+ * Set the error bit in the parser context and log a message.
+ *
+ * This is called if the lexer or the parser detect an error. Only the first
+ * error is logged (with a severity of "warn").
+ */
+__attribute__ ((format (printf, 3, 4)))
+void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
+{
+       va_list ap;
+       char *tmp;
+
+       if (ctx->errmsg) /* we already printed an error message */
+               return;
+       va_start(ap, fmt);
+       xvasprintf(&tmp, fmt, ap);
+       va_end(ap);
+       xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
+       free(tmp);
+       WARNING_LOG("%s\n", ctx->errmsg);
+}
+
+/*
+ * Parse a (generalized) string literal.
+ *
+ * This function turns the generalized C99 string literal given by src into a C
+ * string.  For example, the string literal "xyz\n" is transformed into an
+ * array containing the three characters 'x', 'y' and 'z', followed by a
+ * newline character and the terminating zero byte. The function allows to
+ * specify different quote characters so that, for example, regular expression
+ * patterns enclosed in '/' can be parsed as well. To parse a proper string
+ * literal, one has to pass two double quotes as the second argument.
+ *
+ * The function strips off the opening and leading quote characters, replaces
+ * double backslashes by single backslashes and handles the usual escapes like
+ * \n and \".
+ *
+ * The caller must make sure that the input is well-formed. The function simply
+ * aborts if the input is not a valid C99 string literal (modulo the quote
+ * characters).
+ *
+ * The return value is the offset of the first character after the closing
+ * quote. For proper string literals this will be the terminating zero byte of
+ * the input string, for regular expression patterns it is the beginning of the
+ * flags which modify the matching behaviour.
+ */
+unsigned parse_quoted_string(const char *src, const char quote_chars[2],
+               char **result)
+{
+       size_t n, len = strlen(src);
+       char *dst, *p;
+       bool backslash;
+
+       assert(len >= 2);
+       assert(src[0] == quote_chars[0]);
+       p = dst = xmalloc(len - 1);
+       backslash = false;
+       for (n = 1;; n++) {
+               char c;
+               assert(n < len);
+               c = src[n];
+               if (!backslash) {
+                       if (c == '\\') {
+                               backslash = true;
+                               continue;
+                       }
+                       if (c == quote_chars[1])
+                               break;
+                       *p++ = c;
+                       continue;
+               }
+               if (c == quote_chars[1])
+                       *p++ = quote_chars[1];
+               else switch (c) {
+                       case '\\': *p++ = '\\'; break;
+                       case 'a': *p++ = '\a'; break;
+                       case 'b': *p++ = '\b'; break;
+                       case 'f': *p++ = '\f'; break;
+                       case 'n': *p++ = '\n'; break;
+                       case 'r': *p++ = '\r'; break;
+                       case 't': *p++ = '\t'; break;
+                       case 'v': *p++ = '\v'; break;
+                       default: assert(false);
+               }
+               backslash = false;
+       }
+       assert(src[n] == quote_chars[1]);
+       *p = '\0';
+       *result = dst;
+       return n + 1;
+}
+
+/*
+ * Parse and compile an extended regular expression pattern, including flags.
+ *
+ * A regex pattern is identical to a C99 string literal except (a) it is
+ * enclosed in '/' characters rather than double quotes, (b) double quote
+ * characters which are part of the pattern do not need to be quoted with
+ * backslashes, but slashes must be quoted in this way, and (c) the closing
+ * slash may be followed by one or more flag characters which modify the
+ * matching behaviour.
+ *
+ * The only flags which are currently supported are 'i' to ignore case in match
+ * (REG_ICASE) and 'n' to change the handling of newline characters
+ * (REG_NEWLINE).
+ *
+ * This function calls parse_quoted_string(), hence it aborts if the input
+ * string is malformed. However, errors from regcomp(3) are returned without
+ * aborting the process. The rationale behind this difference is that passing a
+ * malformed string must be considered an implementation bug because malformed
+ * strings should be rejected earlier by the lexer.
+ */
+int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
+{
+       int ret;
+       char *pat;
+       unsigned n = parse_quoted_string(src, "//", &pat);
+
+       result->flags = 0;
+       for (; src[n]; n++) {
+               switch (src[n]) {
+               case 'i': result->flags |= REG_ICASE; break;
+               case 'n': result->flags |= REG_NEWLINE; break;
+               default: assert(false);
+               }
+       }
+       ret = xregcomp(&result->preg, pat, result->flags);
+       free(pat);
+       return ret;
+}
+
+static struct txp_ast_node *ast_node_raw(int id)
+{
+       struct txp_ast_node *node = xmalloc(sizeof(*node));
+       node->id = id;
+       return node;
+}
+
+/*
+ * Allocate a new leaf node for the abstract syntax tree.
+ *
+ * This returns a pointer to a node whose ->num_children field is initialized
+ * to zero. The ->id field is initialized with the given id.  The caller is
+ * expected to initialize the ->sv field.
+ *
+ * This has to be non-static because it is also called from the lexer.
+ */
+struct txp_ast_node *txp_new_ast_leaf_node(int id)
+{
+       struct txp_ast_node *node = ast_node_raw(id);
+       node->num_children = 0;
+       return node;
+}
+
+struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
+{
+       struct txp_ast_node *node = ast_node_raw(id);
+       node->num_children = 1;
+       node->children = xmalloc(sizeof(struct txp_ast_node *));
+       node->children[0] = child;
+       return node;
+}
+
+struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
+               struct txp_ast_node *right)
+{
+       struct txp_ast_node *node = ast_node_raw(id);
+       node->num_children = 2;
+       node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
+       node->children[0] = left;
+       node->children[1] = right;
+       return node;
+}
+
+/*
+ * Deallocate an abstract syntax tree.
+ *
+ * This frees the memory occupied by the nodes of the AST, the child pointers
+ * of the internal nodes and the (constant) semantic values of the leaf nodes
+ * (string literals and pre-compiled regular expressions).
+ */
+static void txp_free_ast(struct txp_ast_node *root)
+{
+       if (!root)
+               return;
+       if (root->num_children > 0) {
+               int i;
+               for (i = 0; i < root->num_children; i++)
+                       txp_free_ast(root->children[i]);
+               free(root->children);
+       } else {
+               union txp_semantic_value *sv = &root->sv;
+               switch (root->id) {
+               case STRING_LITERAL:
+                       free(sv->strval);
+                       break;
+               case REGEX_PATTERN:
+                       regfree(&sv->re_pattern.preg);
+                       break;
+               }
+       }
+       free(root);
+}
+
+void txp_free(struct txp_context *ctx)
+{
+       txp_free_ast(ctx->ast);
+       free(ctx);
+}
+
+static int eval_node(const struct txp_ast_node *node,
+               const struct txp_context *ctx,
+               const struct epi_properties *props,
+               union txp_semantic_value *result);
+
+static void eval_binary_op(const struct txp_ast_node *node,
+               const struct txp_context *ctx,
+               const struct epi_properties *props,
+               union txp_semantic_value *v1, union txp_semantic_value *v2)
+{
+       eval_node(node->children[0], ctx, props, v1);
+       eval_node(node->children[1], ctx, props, v2);
+}
+
+static int eval_node(const struct txp_ast_node *node,
+               const struct txp_context *ctx,
+               const struct epi_properties *props,
+               union txp_semantic_value *result)
+{
+       int ret;
+       union txp_semantic_value v1, v2;
+
+       assert(node);
+       switch (node->id) {
+       /* strings */
+       case STRING_LITERAL:
+               result->strval = node->sv.strval;
+               return ST_STRVAL;
+       case TEXT:
+               result->strval = epi_text(props);
+               return ST_STRVAL;
+       /* integers */
+       case NUM:
+               result->intval = node->sv.intval;
+               return ST_INTVAL;
+       case '+':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->intval = v1.intval + v2.intval;
+               return ST_INTVAL;
+       case '-':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->intval = v1.intval - v2.intval;
+               return ST_INTVAL;
+       case '*':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->intval = v1.intval * v2.intval;
+               return ST_INTVAL;
+       case '/':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               if (v2.intval == 0) {
+                       static bool warned;
+                       if (!warned)
+                               ERROR_LOG("division by zero\n");
+                       warned = true;
+                       result->intval = 0;
+               } else
+                       result->intval = v1.intval / v2.intval;
+               return ST_INTVAL;
+       case NEG:
+               eval_node(node->children[0], ctx, props, &v1);
+               result->intval = -v1.intval;
+               return ST_INTVAL;
+       case LEN:
+               result->intval = epi_len(props);
+               return ST_INTVAL;
+       /* bools */
+       case TAG:
+               eval_node(node->children[0], ctx, props, &v1);
+               result->boolval = epi_has_tag(node->children[0]->sv.strval,
+                       props);
+               return ST_BOOLVAL;
+       case TRUE:
+               result->boolval = true;
+               return ST_BOOLVAL;
+       case FALSE:
+               result->boolval = false;
+               return ST_BOOLVAL;
+       case OR:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.boolval || v2.boolval;
+               return ST_BOOLVAL;
+       case AND:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.boolval && v2.boolval;
+               return ST_BOOLVAL;
+       case NOT:
+               eval_node(node->children[0], ctx, props, &v1);
+               result->boolval = !v1.boolval;
+               return ST_BOOLVAL;
+       case EQUAL:
+               ret = eval_node(node->children[0], ctx, props, &v1);
+               eval_node(node->children[1], ctx, props, &v2);
+               if (ret == ST_STRVAL)
+                       result->boolval = !strcmp(v1.strval, v2.strval);
+               else
+                       result->boolval = v1.intval == v2.intval;
+               return ST_BOOLVAL;
+       case NOT_EQUAL:
+               ret = eval_node(node->children[0], ctx, props, &v1);
+               eval_node(node->children[1], ctx, props, &v2);
+               if (ret == ST_STRVAL)
+                       result->boolval = strcmp(v1.strval, v2.strval);
+               else
+                       result->boolval = v1.intval != v2.intval;
+               return ST_BOOLVAL;
+       case '<':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval < v2.intval;
+               return ST_BOOLVAL;
+       case '>':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval > v2.intval;
+               return ST_BOOLVAL;
+       case LESS_OR_EQUAL:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval <= v2.intval;
+               return ST_BOOLVAL;
+       case GREATER_OR_EQUAL:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval >= v2.intval;
+               return ST_BOOLVAL;
+       case REGEX_MATCH:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
+                        0, NULL, 0) == 0;
+               return ST_BOOLVAL;
+       case REGEX_PATTERN:
+               result->re_pattern = node->sv.re_pattern;
+               return ST_REGEX_PATTERN;
+       default:
+               EMERG_LOG("bug: invalid node id %d\n", node->id);
+               exit(EXIT_FAILURE);
+       }
+}
+
+/*
+ * Evaluate an abstract syntax tree, starting at the root node.
+ *
+ * The ctx argument should be the pointer that was returned from an earlier
+ * call to txp_init(). The cookie properties structure contains the information
+ * about the epigram.
+ *
+ * Returns true if the AST evaluates to true, a non-empty string, or a non-zero
+ * number, false otherwise.
+ */
+bool txp_eval_ast(const struct txp_context *ctx,
+               const struct epi_properties *props)
+{
+       union txp_semantic_value v;
+       int ret;
+
+       if (!ctx->ast)
+               return true;
+       ret = eval_node(ctx->ast, ctx, props, &v);
+
+       if (ret == ST_INTVAL)
+               return v.intval != 0;
+       if (ret == ST_STRVAL)
+               return v.strval[0] != 0;
+       if (ret == ST_BOOLVAL)
+               return v.boolval;
+       assert(false);
+}
+
+int txp_yylex_init(txp_yyscan_t *yyscanner);
+struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
+       txp_yyscan_t yyscanner);
+void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
+int txp_yylex_destroy(txp_yyscan_t yyscanner);
+void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);
+
+/*
+ * Initialize the tag expression parser.
+ *
+ * This allocates and sets up the internal structures of the tag expression
+ * parser and creates an abstract syntax tree from the given epigram (including
+ * the tags). It must be called before txp_eval_ast() can be called.
+ *
+ * The context pointer returned by this function may be passed to mp_eval_ast()
+ * to determine whether an epigram is admissible.
+ *
+ * The error message pointer may be NULL in which case no error message is
+ * returned. Otherwise, the caller must free the returned string.
+ */
+int txp_init(const struct iovec *definition, struct txp_context **result,
+                char **errmsg)
+{
+       int ret;
+       txp_yyscan_t scanner;
+       struct txp_context *ctx;
+       struct yy_buffer_state *buffer_state;
+
+       ctx = xcalloc(sizeof(*ctx));
+       ret = txp_yylex_init(&scanner);
+       assert(ret == 0);
+       buffer_state = txp_yy_scan_bytes(definition->iov_base,
+               definition->iov_len, scanner);
+       txp_yyset_lineno(1, scanner);
+       NOTICE_LOG("creating abstract syntax tree from tag expression\n");
+       ret = txp_yyparse(ctx, &ctx->ast, scanner);
+       txp_yy_delete_buffer(buffer_state, scanner);
+       txp_yylex_destroy(scanner);
+       if (ctx->errmsg) { /* parse error */
+               if (errmsg)
+                       *errmsg = ctx->errmsg;
+               else
+                       free(ctx->errmsg);
+               free(ctx);
+               return -E_TXP;
+       }
+       if (errmsg)
+               *errmsg = NULL;
+       *result = ctx;
+       return 1;
+}
diff --git a/config.mak.in b/config.mak.in

new file mode 100644 (file)

index 0000000..b42c106
--- /dev/null
+++ b/config.mak.in
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+prefix := @prefix@
+exec_prefix := @exec_prefix@
+
+# These two use prefix and exec_prefix
+bindir := @bindir@
+datarootdir := @datarootdir@
+
+PACKAGE_TARNAME := @PACKAGE_TARNAME@
+PACKAGE_VERSION := @PACKAGE_VERSION@
+PACKAGE_BUGREPORT := @PACKAGE_BUGREPORT@
+PACKAGE_URL := @PACKAGE_URL@
+
+FLEX := @FLEX@
+BISON := @BISON@
+M4 := @M4@
+LOPSUBGEN := @LOPSUBGEN@
diff --git a/configure b/configure

new file mode 100755 (executable)

index 0000000..b2f719f
--- /dev/null
+++ b/configure
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-3.0-only
+
+set -e
+
+if ! test -x configure.sh; then
+       autoconf configure.ac > configure.sh
+       chmod 755 configure.sh
+       autoheader
+fi
+
+sh configure.sh "$@"
diff --git a/configure.ac b/configure.ac

new file mode 100644 (file)

index 0000000..58925f1
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+AC_PREREQ([2.61])
+AC_INIT([tfortune], [m4_esyscmd_s(./version-gen.sh)],
+       [maan@tuebingen.mpg.de], [], [http://people.tuebingen.mpg.de/maan/tfortune/])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_FILES([config.mak])
+AC_USE_SYSTEM_EXTENSIONS
+AC_PROG_CC
+AC_PROG_CPP
+
+AC_DEFUN([REQUIRE_EXECUTABLE], [
+       AC_PATH_PROG(m4_toupper([$1]), [$1])
+       test -z "$m4_toupper([$1])" && AC_MSG_ERROR([$1 is required])
+])
+REQUIRE_EXECUTABLE([m4])
+REQUIRE_EXECUTABLE([flex])
+REQUIRE_EXECUTABLE([bison])
+REQUIRE_EXECUTABLE([lopsubgen])
+
+HAVE_LOPSUB=yes
+AC_CHECK_HEADER(lopsub.h, [], [HAVE_LOPSUB=no])
+AC_CHECK_LIB([lopsub], [lls_merge], [], [HAVE_LOPSUB=no])
+if test $HAVE_LOPSUB == no; then AC_MSG_ERROR([
+       The lopsub library is required to build this software, but
+       the above checks indicate it is not installed on your system.
+       Run the following command to download a copy.
+               git clone git://git.tuebingen.mpg.de/lopsub.git
+       Install the library, then run this configure script again.
+
+       If you installed lopsub at a non-standard location, make sure to set
+       PATH, CPPFLAGS and LDFLAGS accordingly. For example:
+
+               pfx=/prefix/where/lopsub/is/installed
+               export PATH=\$pfx/bin:\$PATH
+               export CPPFLAGS=-I\$pfx/include
+               export LDFLAGS=-L\$pfx/lib
+])
+fi
+AC_OUTPUT
diff --git a/err.h b/err.h

new file mode 100644 (file)

index 0000000..76988bd
--- /dev/null
+++ b/err.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#define TF_ERRORS \
+       TF_ERROR(SUCCESS, "success"), \
+       TF_ERROR(ATOI_OVERFLOW, "value too large"), \
+       TF_ERROR(ATOI_NO_DIGITS, "no digits found in string"), \
+       TF_ERROR(ATOI_JUNK_AT_END, "further characters after number"), \
+       TF_ERROR(LOPSUB, "lopsub error"), \
+       TF_ERROR(REGEX, "regular expression error"), \
+       TF_ERROR(TXP, "tag expression parse error"), \
+       TF_ERROR(LH_EXIST, "item already exists in hash table"), \
+
+/*
+ * This is temporarily defined to expand to its first argument (prefixed by
+ * 'E_') and gets later redefined to expand to the error text only
+ */
+#define TF_ERROR(err, msg) E_ ## err
+enum tf_error_codes {TF_ERRORS};
+#undef TF_ERROR
+#define TF_ERROR(err, msg) msg
+#define DEFINE_TF_ERRLIST char *tf_errlist[] = {TF_ERRORS}
+
+extern char *tf_errlist[];
+
+/**
+ * This bit indicates whether a number is considered a system error number
+ * If yes, the system errno is just the result of clearing this bit from
+ * the given number.
+ */
+#define SYSTEM_ERROR_BIT 30
+
+/** Check whether the system error bit is set. */
+#define IS_SYSTEM_ERROR(num) (!!((num) & (1 << SYSTEM_ERROR_BIT)))
+
+/** Set the system error bit for the given number. */
+#define ERRNO_TO_TF_ERROR(num) ((num) | (1 << SYSTEM_ERROR_BIT))
+
+static inline char *tf_strerror(int num)
+{
+       assert(num > 0);
+       if (IS_SYSTEM_ERROR(num))
+               return strerror((num) & ((1 << SYSTEM_ERROR_BIT) - 1));
+       else
+               return tf_errlist[num];
+}
+
diff --git a/index.html.m4 b/index.html.m4

new file mode 100644 (file)

index 0000000..e1b5f5e
--- /dev/null
+++ b/index.html.m4
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'
+'http://www.w3.org/TR/html4/loose.dtd'>
+
+<!-- SPDX-License-Identifier: GPL-3.0-only */ -->
+<html>
+       <head>
+               <meta
+                       http-equiv='Content-Type';
+                       content='text/html';
+                       charset=utf-8;
+               >
+               <title>Tfortune</title>
+               <style type='text/css'>
+                       a {
+                               color: #003355;
+                       }
+                       p {
+                               font-size: 120%;
+                       }
+               </style>
+       </head>
+       <body>
+               <table width="100%">
+                       <tr>
+                               <td>
+                                       <h1 align="left">
+                                                tfortune - Fortune Cookies with Tags
+                                       </h1>
+                               </td>
+                               <td align="right">
+                                       <img
+                                               src="logo.svg"
+                                               alt="tfortune logo"
+                                               title="The text on the paper strip says:
+&quot;You will soon use a different fortune program&quot;"
+
+                                       >
+                               </td>
+                       </tr>
+               </table>
+               <p> DESCRIPTION1() </p>
+               <p> DESCRIPTION2() </p>
+               <p> DESCRIPTION3() </p>
+
+               <h2> Resources </h2>
+               <ul>
+                       <li> git clone URL: CLONE_URL() </li>
+                       <li>
+                               gitweb: <a href="GITWEB_URL()">GITWEB_URL()</a>
+                       </li>
+                       <li>
+                               Email: AUTHOR() &lt;<a
+                               href="mailto:PACKAGE_BUGREPORT()">PACKAGE_BUGREPORT()</a>&gt;
+                       </li>
+               </ul>
+
+               <h2> License </h2>
+               Open source, licensed under the <a
+               href="LICENSE_URL()">LICENSE()</a>
+               (GNU General Public License, Version 3)
+
+               <h2> Documentation </h2>
+               All parts of tfortune are well documented and illustrated with
+               examples. See the manual page for details.
+
+               <h2> Programming Language </h2>
+               Plain C.
+
+               <h2> Dependencies </h2>
+               A working C compiler and a couple of other dependencies, most of which
+               are standard (autoconf, make, m4, flex, bison). The notable exception
+               is the <a href="http://people.tuebingen.mpg.de/maan/lopsub/">lopsub</a>
+               library.
+       </body>
+</html>
diff --git a/linhash.c b/linhash.c

new file mode 100644 (file)

index 0000000..28c67ed
--- /dev/null
+++ b/linhash.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+/*
+ * Simple linear hashing implementation
+ *
+ * Linear hashing is a hash table algorithm by Witold Litwin (1980)
+ * which grows the table on demand without rehashing existing objects.
+ *
+ * This implementation employs a conventional hash function C whose
+ * output length is assumed to be greater than the order of the table,
+ * i.e. the base-2 logarithm of the number of slots. Hash collisions
+ * are resolved by chaining.
+ *
+ * The hash table consists in fact of two tables which are consecutive
+ * in memory. Each table represents N = 1 << n many slots of the
+ * table. Example (n = 2, N = 4):
+ *
+ *     [....][....]
+ *      0123  4567
+ *
+ * In what follows, we refer to the two tables as the lower table and
+ * the higher table.
+ *
+ * C gives rise to a sequence of hash functions C_k via C_k(key) :=
+ * lower k bits of C(key). In terms of C_k the lower and higher hash
+ * functions, h and H, are defined by h = C_n and H = C_{n+1}. Hence
+ * h(key) always corresponds to a slot in the lower table while H(key)
+ * corresponds to a slot in either table. Both functions are trivial to
+ * compute given C(key).
+ *
+ * The algorithm also maintains a split position s which corresponds to
+ * a slot of the lower table and starts out at zero.
+ *
+ *     [s...][....]
+ *      0123  4567
+ *
+ * The linear hash function L is defined by L(key) = H(key) if h(key)
+ * < s, and L(key) = h(key) otherwise. In other words, L(key) is either
+ * the lower or the higher hash value, depending on the relation between
+ * the lower hash value and s. The higher hash function applies for all
+ * keys whose lower hash value is smaller than s. Initially we have L = h.
+ *
+ * On insertion, if the table load is large, the chain of entries which
+ * hash to s is split by transitioning from h to H.
+ *
+ * For an existing key, we have either H(key) = h(key) or H(key) =
+ * h(key) + 1 << n. That is, the most significant bit of H(key) tells
+ * whether this entry must be moved from its slot in the low table to the
+ * corresponding slot in the high table. Approximately half the entries
+ * will be moved this way. To reflect the fact that from now on L(key)
+ * = H(key) for all keys with h(key) = s, s is increased by one.
+ *
+ * Each time s reaches 1 << n, the upper bound of the lower table, we
+ * have L = H. In this case the hash table size is doubled, and the split
+ * position is reset to zero. Moreover, h and H are redefined as h :=
+ * C_{n+1} (i.e., the new h becomes the former H), and H := C_{n+2}. This
+ * means the table order n has increased by one, and a new round begins.
+ *
+ * This implementation includes an API for iterating over all entries
+ * in the hash table which avoids callback functions. Instead an opaque
+ * iterator structure is provided together with a set of functions that
+ * create or operate on such a structure.
+ *
+ * While shrinking a linear hash table can performed essentially
+ * by reversing the steps for growing described above, this is not
+ * implemented since it is not needed for tfortune.
+ */
+
+#include "tf.h"
+
+struct lh_slot {
+       struct linhash_item item;
+       uint32_t hash;
+       struct lh_slot *next;
+};
+
+struct linhash_table {
+       struct lh_slot **slots;
+       unsigned order;
+       uint32_t num_items; /* how many items have been inserted so far. */
+       uint32_t split_position;
+};
+
+struct linhash_iterator {
+       /* these are used regardless of whether comp is NULL */
+       struct linhash_table *t;
+       uint32_t idx;
+       /* comp == NULL means: iterate in hash order */
+       linhash_comparator *comp;
+       /* only used if comp == NULL */
+       struct lh_slot *head, *prev, *next;
+       /* only used if comp != NULL */
+       struct linhash_item **items;
+       bool reverse;
+};
+
+static uint32_t lh_num_slots_low(const struct linhash_table *t)
+{
+       return (uint32_t)1 << t->order;
+}
+
+static uint32_t lh_num_slots_high(const struct linhash_table *t)
+{
+       return lh_num_slots_low(t) * 2;
+}
+
+static uint32_t lh_index_low(uint32_t hash, const struct linhash_table *t)
+{
+       return hash % lh_num_slots_low(t);
+}
+
+static uint32_t lh_index_high(uint32_t hash, const struct linhash_table *t)
+{
+       return hash % lh_num_slots_high(t);
+}
+
+static bool lh_must_grow(const struct linhash_table *t)
+{
+       return t->num_items >= lh_num_slots_high(t) / 2;
+}
+
+/* The simple DJB (Daniel J Bernstein) hash is good enough here. */
+static uint32_t lh_hash(const char *data)
+{
+       uint32_t h = 5381;
+       const unsigned char *c = (typeof(c))data;
+
+       while (*(c++))
+               h = h * 33 + *c;
+       return h;
+}
+
+/*
+ * Create a new table for linear hashing.
+ *
+ * The order argument determines the initial number of slots: it is given by 2
+ * << order. The hash table grows on demand, and the point of linear hashing is
+ * the ability to cheaply grow the table, so specifying anything greater than
+ * zero is probably not necessary.
+ *
+ * The functtion returns an opaque pointer which serves as the handle to the
+ * newly created hash table.  Most functions of the linhash API take such a
+ * pointer to know the table to operate on. This function either succeeds or
+ * terminates, it never returns NULL.
+ */
+struct linhash_table *linhash_new(uint32_t order)
+{
+       struct linhash_table *t;
+       uint32_t ns;
+
+       DEBUG_LOG("creating order %u hash table\n", order);
+       t = xmalloc(sizeof(*t));
+       t->order = order;
+       t->num_items = 0;
+       t->split_position = 0;
+       ns = lh_num_slots_high(t);
+       t->slots = xcalloc(ns * sizeof(*t->slots));
+       return t;
+}
+
+static uint32_t lh_index(uint32_t hash, const struct linhash_table *t)
+{
+       uint32_t low = lh_index_low(hash, t);
+
+       if (low >= t->split_position)
+               return low;
+       return lh_index_high(hash, t);
+}
+
+static void lh_split_slot(struct linhash_table *t)
+{
+       struct lh_slot *prev, *next, *s;
+       uint32_t sp = t->split_position;
+
+       DEBUG_LOG("splitting slot %u\n", sp);
+       for (prev = NULL, s = t->slots[sp]; s;) {
+               uint32_t idx = lh_index_high(s->hash, t);
+               if (idx == sp) {
+                       prev = s;
+                       s = s->next;
+                       continue;
+               }
+               DEBUG_LOG("moving %s up from slot %u to slot %u\n",
+                       s->item.key, sp, idx);
+               if (t->slots[sp] == s)
+                       t->slots[sp] = s->next;
+               if (prev)
+                       prev->next = s->next;
+               next = s->next;
+               s->next = t->slots[idx];
+               t->slots[idx] = s;
+               s = next;
+       }
+       DEBUG_LOG("slot %u has been split\n", sp);
+       if (!t->slots[sp])
+               DEBUG_LOG("slot #%u has become empty\n", sp);
+}
+
+static void lh_grow_table(struct linhash_table *t)
+{
+       uint32_t idx, ns;
+
+       DEBUG_LOG("growing hash table to order %u\n", ++t->order);
+       ns = lh_num_slots_high(t);
+       t->slots = xrealloc(t->slots, ns * sizeof(*t->slots));
+       idx = lh_num_slots_low(t);
+       memset(t->slots + idx, 0, (ns - idx) * sizeof(*t->slots));
+}
+
+static struct lh_slot *lh_lookup(const char *key,
+               const struct linhash_table *t, uint32_t *hashp, uint32_t *idxp,
+               struct lh_slot **prevp)
+{
+       struct lh_slot *s, *prev;
+       uint32_t hash, idx;
+
+       if (!t)
+               return NULL;
+       hash = lh_hash(key);
+       idx = lh_index(hash, t);
+       //DEBUG_LOG("key %s, hash: %u, idx: %u\n", key, hash, idx);
+       if (hashp)
+               *hashp = hash;
+       if (idxp)
+               *idxp = idx;
+       for (s = t->slots[idx], prev = NULL; s; prev = s, s = s->next) {
+               //DEBUG_LOG("comparing %s vs. %s\n", key, s->item.key);
+               if (strcmp(s->item.key, key))
+                       continue;
+               /* found it */
+               if (prevp)
+                       *prevp = prev;
+               return s;
+       }
+       if (prevp)
+               *prevp = NULL;
+       return NULL;
+}
+
+/**
+ * Find the entry identified by a key.
+ *
+ * \param key The key to look up.
+ * \param t Where to look up the key.
+ */
+struct linhash_item *linhash_lookup(const char *key,
+               const struct linhash_table *t)
+{
+       struct lh_slot *s = lh_lookup(key, t, NULL, NULL, NULL);
+
+       if (!s)
+               return NULL;
+       return &s->item;
+}
+
+static void *lh_remove(struct lh_slot *s, struct lh_slot *prev, uint32_t idx, struct linhash_table *t)
+{
+       void *obj;
+
+       if (!s)
+               return NULL;
+       t->num_items--;
+       obj = s->item.object;
+       if (prev)
+               prev->next = s->next;
+       else
+               t->slots[idx] = s->next;
+       free(s);
+       return obj;
+}
+
+/* Note: This renders all existing iterators stale. */
+void *linhash_remove(const char *key, struct linhash_table *t)
+{
+       uint32_t idx;
+       struct lh_slot *prev, *s = lh_lookup(key, t, NULL, &idx, &prev);
+
+       return lh_remove(s, prev, idx, t);
+}
+
+static void *lh_iterator_remove_current(struct linhash_iterator *iter)
+{
+       void *obj;
+
+       assert(!iter->comp);
+       if (!iter->head)
+               return NULL;
+       obj = lh_remove(iter->head, iter->prev, iter->idx, iter->t);
+       iter->head = iter->prev;
+       return obj;
+}
+
+static struct lh_slot *lh_first_nonempty_slot(uint32_t *idxp,
+               const struct linhash_table *t)
+{
+       uint32_t ns = lh_num_slots_high(t);
+
+       for (; *idxp < ns; (*idxp)++)
+               if (t->slots[*idxp])
+                       return t->slots[*idxp];
+       return NULL;
+}
+
+static void lh_iter_init(struct linhash_iterator *iter, uint32_t idx)
+{
+       iter->idx = idx;
+       iter->prev = NULL;
+       iter->head = lh_first_nonempty_slot(&iter->idx, iter->t);
+       if (iter->head)
+               iter->next = iter->head->next;
+}
+
+/*
+ * Normally iter->head points to the current head. However, if this head was
+ * removed with lh_iterator_remove_current(), iter->head points to its
+ * predecessor in the list.
+ */
+void linhash_iterator_next(struct linhash_iterator *iter)
+{
+       if (iter->comp) {
+               if (iter->reverse)
+                       iter->idx--;
+               else
+                       iter->idx++;
+               return;
+       }
+       if (iter->next) {
+               iter->prev = iter->head;
+               iter->head = iter->next;
+               iter->next = iter->next->next;
+               return;
+       }
+       lh_iter_init(iter, iter->idx + 1);
+}
+
+struct linhash_item *linhash_iterator_item(const struct linhash_iterator *iter)
+{
+       if (iter->comp) {
+               if (iter->idx >= iter->t->num_items)
+                       return NULL;
+               return iter->items[iter->idx];
+       }
+       if (!iter->head)
+               return NULL;
+       return &iter->head->item;
+}
+
+void linhash_iterator_free(struct linhash_iterator *iter)
+{
+       if (!iter)
+               return;
+       if (iter->comp)
+               free(iter->items);
+       free(iter);
+}
+
+/* always succeeds. reverse order is only respected if a comparator is given. */
+struct linhash_iterator *linhash_iterator_new(struct linhash_table *t,
+               linhash_comparator *comp, bool reverse_sort_order)
+{
+       struct linhash_iterator *iter = xmalloc(sizeof(*iter)), *iter2;
+       struct linhash_item *item;
+       unsigned n;
+
+       iter->t = t;
+       iter->comp = comp;
+       if (!comp) {
+               lh_iter_init(iter, 0);
+               return iter;
+       }
+       iter->reverse = reverse_sort_order;
+       iter2 = linhash_iterator_new(t, NULL, false);
+       iter->items = xmalloc(t->num_items * sizeof(struct linhash_item *));
+       for (
+               n = 0;
+               (item = linhash_iterator_item(iter2));
+               linhash_iterator_next(iter2)
+       )
+               iter->items[n++] = item;
+       linhash_iterator_free(iter2);
+       qsort(iter->items, t->num_items, sizeof(struct linhash_iter *),
+               (int (*)(const void *, const void *))comp);
+       iter->idx = reverse_sort_order? t->num_items - 1 : 0;
+       return iter;
+}
+
+/* Deallocate the resources occupied by the given hash table. */
+void linhash_free(struct linhash_table *t)
+{
+       struct linhash_iterator *iter;
+       struct linhash_item *itemp;
+
+       if (!t)
+               return;
+       for (
+               iter = linhash_iterator_new(t, NULL, false);
+               (itemp = linhash_iterator_item(iter));
+               linhash_iterator_next(iter)
+       )
+               lh_iterator_remove_current(iter);
+       linhash_iterator_free(iter);
+       assert(t->num_items == 0);
+       free(t->slots);
+       free(t);
+}
+
+/* returns item in first arg if key already exists */
+int linhash_insert(struct linhash_item *item, struct linhash_table *t,
+               void ***object)
+{
+       struct lh_slot *s;
+       uint32_t idx, hash;
+
+       s = lh_lookup(item->key, t, &hash, &idx, NULL);
+       if (s) {
+               if (object)
+                       *object = &s->item.object;
+               return -E_LH_EXIST;
+       }
+       s = xmalloc(sizeof(*s));
+       s->item = *item;
+       s->hash = hash;
+       DEBUG_LOG("inserting item #%u, key: %s, hash: %u, idx: %u\n",
+               t->num_items, item->key, hash, idx);
+       s->next = t->slots[idx];
+       t->slots[idx] = s;
+       t->num_items++;
+
+       if (!lh_must_grow(t)) {
+               DEBUG_LOG("no need to grow\n");
+               return 0;
+       }
+       lh_split_slot(t);
+       t->split_position++;
+       if (t->split_position < lh_num_slots_low(t))
+               return 1;
+       t->split_position = 0;
+       lh_grow_table(t);
+       return 2;
+}
+
+uint32_t linhash_num_items(const struct linhash_table *t)
+{
+       return t->num_items;
+}
+
+char *linhash_statistics(const struct linhash_table *t)
+{
+       uint32_t min_fill = -1, max_fill = 0, n, idx, ns;
+       uint32_t fill_count[11] = {0};
+       char *result;
+
+       ns = lh_num_slots_low(t) + t->split_position;
+       for (idx = 0; idx < ns; idx++) {
+               struct lh_slot *s;
+               for (n = 0, s = t->slots[idx]; s; s = s->next, n++)
+                       ; /* nothing */
+               min_fill = MIN(min_fill, n);
+               max_fill = MAX(max_fill, n);
+               fill_count[n < 10? n : 10]++;
+       }
+       xasprintf(&result,
+               "order............... %2u\n"
+               "num slots........... %u\n"
+               "num items (table)... %u\n"
+               "load factor........ %3u%%\n"
+               "min fill............ %2u\n"
+               "max fill............ %2u\n"
+               "max count[0]....... %3u%% (%u)\n"
+               "max count[1]....... %3u%% (%u)\n"
+               "max count[2]....... %3u%% (%u)\n"
+               "max count[3]....... %3u%% (%u)\n"
+               "max count[4]....... %3u%% (%u)\n"
+               "max count[5]....... %3u%% (%u)\n"
+               "max count[6]....... %3u%% (%u)\n"
+               "max count[7]....... %3u%% (%u)\n"
+               "max count[8]....... %3u%% (%u)\n"
+               "max count[9]....... %3u%% (%u)\n"
+               "max count[10+]..... %3u%% (%u)\n"
+               ,
+               t->order,
+               ns,
+               t->num_items,
+               (t->num_items * 100 + (ns / 2)) / ns,
+               min_fill,
+               max_fill,
+               100 * fill_count[0] / ns, fill_count[0],
+               100 * fill_count[1] / ns, fill_count[1],
+               100 * fill_count[2] / ns, fill_count[2],
+               100 * fill_count[3] / ns, fill_count[3],
+               100 * fill_count[4] / ns, fill_count[4],
+               100 * fill_count[5] / ns, fill_count[5],
+               100 * fill_count[6] / ns, fill_count[6],
+               100 * fill_count[7] / ns, fill_count[7],
+               100 * fill_count[8] / ns, fill_count[8],
+               100 * fill_count[9] / ns, fill_count[9],
+               100 * fill_count[10] / ns, fill_count[10]
+       );
+       return result;
+}
diff --git a/logo.svg b/logo.svg

new file mode 100644 (file)

index 0000000..d4a70a6
--- /dev/null
+++ b/logo.svg
@@ -0,0 +1,27 @@
+<!-- SPDX-License-Identifier: GPL-3.0-only -->
+
+<svg xmlns="http://www.w3.org/2000/svg" height="70" width="100" >
+       <g fill="none" stroke-width="4" linecap="round">
+               <path
+                        d="
+                               M 40,25 c 6,-20 20,-10 20,5
+                               M 3,48 s 5,-30 41,-30
+                               M 56,17 s 30,10 20,33
+                               M 59,24 s 32,10 -5,35
+                               M 56,58 s -6,1 -3,10
+                               M 66,49 s 10,-2 20,10
+                               m 0,-1 s -2,2 10,5
+                               M 60,55 s 5,-5 25,15
+                               M 85,70 s -2,-2 11,-7
+                               M 72,58 s -2,10 -20,10
+                               M 53,68 s -20,0 -5,-28
+                               m 0,0 s 7,-10 3,-20
+                               m 0,0 s 5,5 -7,15
+                               M 45,45 s -10,-4 -30,20
+                               m 0,0 s -3,-28 -12,-17
+                               M 16,65 s -14,4 -13,-17
+                       "
+                       stroke="#000"
+               />
+       </g>
+</svg>
diff --git a/tf.h b/tf.h

new file mode 100644 (file)

index 0000000..b937d07
--- /dev/null
+++ b/tf.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <fnmatch.h>
+#include <regex.h>
+#include <assert.h>
+#include <lopsub.h>
+#include <sys/uio.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <pwd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <grp.h>
+
+#include "err.h"
+
+/** Compute the minimum of \a x and \a y. */
+#define MIN(x, y) ({ \
+       typeof(x) _min1 = (x); \
+       typeof(y) _min2 = (y); \
+       (void) (&_min1 == &_min2); \
+       _min1 < _min2 ? _min1 : _min2; })
+
+/** Compute the maximum of \a x and \a y. */
+#define MAX(x, y) ({ \
+       typeof(x) _max1 = (x); \
+       typeof(y) _max2 = (y); \
+       (void) (&_max1 == &_max2); \
+       _max1 < _max2 ? _max2 : _max1; })
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+/* util.c */
+
+extern int loglevel_arg_val;
+enum loglevels {LOGLEVELS, NUM_LOGLEVELS};
+void tf_log(int ll, const char* fmt,...);
+#define DEBUG_LOG(f,...) tf_log(LL_DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define INFO_LOG(f,...) tf_log(LL_INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define NOTICE_LOG(f,...) tf_log(LL_NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define WARNING_LOG(f,...) tf_log(LL_WARNING, "%s: " f, __FUNCTION__, ##  __VA_ARGS__)
+#define ERROR_LOG(f,...) tf_log(LL_ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define CRIT_LOG(f,...) tf_log(LL_CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define EMERG_LOG(f,...) tf_log(LL_EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+
+int atoi64(const char *str, int64_t *value);
+unsigned xvasprintf(char **result, const char *fmt, va_list ap);
+unsigned xasprintf(char **result, const char *fmt, ...);
+void *xrealloc(void *p, size_t size);
+void *xmalloc(size_t size);
+void *xcalloc(size_t size);
+char *xstrdup(const char *str);
+char *get_homedir(void);
+int xregcomp(regex_t *preg, const char *regex, int cflags);
+void mmap_file(const char *path, struct iovec *iov);
+int fd2buf(int fd, struct iovec *result);
+
+struct regfile_iter; /* opaque */
+void regfile_iter_new(const char *dirname, struct regfile_iter **result);
+bool regfile_iter_map(const struct regfile_iter *iter, struct iovec *result);
+const char *regfile_iter_basename(const struct regfile_iter *iter);
+const struct stat *regfile_iter_stat(const struct regfile_iter *iter);
+void regfile_iter_next(struct regfile_iter *iter);
+void regfile_iter_free(struct regfile_iter *iter);
+
+/* tfortune.c */
+struct epi_properties; /* opaque */
+unsigned epi_len(const struct epi_properties *props);
+bool epi_has_tag(const char *tag, const struct epi_properties *props);
+char *epi_text(const struct epi_properties *props);
+
+/* version.c */
+const char *tf_version(void);
+
+/* tag expression parser (ast.c, txp.lex, txp.y) */
+
+/* Opaque, only known to ast.c. Passed to the generated txp_yyparse(). */
+struct txp_context;
+
+int txp_init(const struct iovec *definition, struct txp_context **result,
+                char **errmsg);
+bool txp_eval_ast(const struct txp_context *ctx,
+               const struct epi_properties *props);
+void txp_free(struct txp_context *ctx);
+
+/* non-public API of the tag expression parser */
+
+/*
+ * Since we use a reentrant lexer, all functions generated by flex(1)
+ * receive an additional argument of this type.
+ */
+typedef void *txp_yyscan_t;
+
+/* Parsed regex pattern. */
+struct txp_re_pattern {
+       regex_t preg; /* Pre-compiled regex. */
+       unsigned flags; /* Subset of the cflags described in regex(3). */
+};
+
+/*
+ * The possible values of a node in the abstract syntax tree (AST).
+ *
+ * Constant semantic values (string literals, numeric constants and regex
+ * patterns which are part of the tag expression) are determined during
+ * txp_init() while values which depend on the epigram (tags, number of lines,
+ * etc.) are determined during txp_eval_row().
+ *
+ * This union, and the txp_ast_node structure below are used extensively in
+ * txp.y. However, both need to be public because the lexer must be able to
+ * create AST nodes for the constant semantic values.
+ */
+union txp_semantic_value {
+       bool boolval; /* Comparators, =~ and =|. */
+       char *strval; /* String literals (e.g., argument of tag()) */
+       int64_t intval; /* Constants, num_lines, etc. */
+       struct txp_re_pattern re_pattern; /* Right-hand side operand of =~. */
+};
+
+/*
+ * A node is either interior or a leaf node. Interior nodes have at least one
+ * child while leaf nodes have a semantic value and no children.
+ *
+ * Examples: (a) STRING_LITERAL has a semantic value (the unescaped string
+ * literal) and no children, (b) NEG (unary minus) has no semantic value but
+ * one child (the numeric expression that is to be negated), (c) LESS_OR_EQUAL
+ * has no semantic value and two children (the two numeric expressions being
+ * compared).
+ */
+struct txp_ast_node {
+       /* Corresponds to a token type, for example LESS_OR_EQUAL. */
+       int id;
+       union {
+               /* Pointers to the child nodes (interior nodes only). */
+               struct txp_ast_node **children;
+               /* Leaf nodes only. */
+               union txp_semantic_value sv;
+       };
+       /*
+        * The number of children is implicitly given by the id, but we include
+        * it here to avoid having to maintain a lookup table. The AST is
+        * usually small, so we can afford to waste a byte per node.
+        */
+       uint8_t num_children;
+};
+
+/* Called from both the lexer and the parser. */
+__attribute__ ((format (printf, 3, 4)))
+void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...);
+
+/* Helper functions for the lexer. */
+unsigned parse_quoted_string(const char *src, const char quote_chars[2],
+               char **result);
+int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result);
+
+struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child);
+struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
+               struct txp_ast_node *right);
+
+struct txp_ast_node *txp_new_ast_leaf_node(int id);
+
+/* linhash.c */
+
+struct linhash_item {
+       const char *key;
+       void *object;
+};
+
+typedef int linhash_comparator(const struct linhash_item **a,
+               const struct linhash_item **b);
+
+struct linhash_table;
+struct linhash_iterator;
+
+struct linhash_table *linhash_new(uint32_t order);
+int linhash_insert(struct linhash_item *item, struct linhash_table *t,
+               void ***object);
+struct linhash_item *linhash_lookup(const char *key,
+               const struct linhash_table *t);
+void *linhash_remove(const char *key, struct linhash_table *t);
+void linhash_free(struct linhash_table *t);
+
+struct linhash_iterator *linhash_iterator_new(struct linhash_table *t,
+               linhash_comparator *comp, bool reverse);
+struct linhash_item *linhash_iterator_item(const struct linhash_iterator *iter);
+void linhash_iterator_next(struct linhash_iterator *iter);
+void linhash_iterator_free(struct linhash_iterator *iter);
+
+char *linhash_statistics(const struct linhash_table *t);
+uint32_t linhash_num_items(const struct linhash_table *t);
diff --git a/tfortune.c b/tfortune.c

new file mode 100644 (file)

index 0000000..e5b2061
--- /dev/null
+++ b/tfortune.c
@@ -0,0 +1,1549 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <lopsub.h>
+
+#include "tf.h"
+#include "tfortune.lsg.h"
+
+#define TF_SEP "---- "
+
+static struct lls_parse_result *lpr, *sublpr;
+#define CMD_PTR(_cname) lls_cmd(LSG_TFORTUNE_CMD_ ## _cname, tfortune_suite)
+#define OPT_RESULT(_cname, _oname) (lls_opt_result(\
+       LSG_TFORTUNE_ ## _cname ## _OPT_ ## _oname, \
+       (CMD_PTR(_cname) == CMD_PTR(TFORTUNE))? lpr : sublpr))
+#define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
+#define OPT_STRING_VAL(_cname, _oname) (lls_string_val(0, \
+       OPT_RESULT(_cname, _oname)))
+#define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
+               OPT_RESULT(_cname, _oname)))
+
+int loglevel_arg_val;
+
+struct tf_user_data {int (*handler)(void);};
+#define EXPORT_CMD_HANDLER(_cmd) const struct tf_user_data \
+       lsg_tfortune_com_ ## _cmd ## _user_data = { \
+               .handler = com_ ## _cmd \
+       };
+
+struct map_chunk {
+       const char *base;
+       size_t len;
+};
+
+struct epigram {
+       struct map_chunk epi, tags;
+};
+
+static int lopsub_error(int lopsub_ret, char **errctx)
+{
+       const char *msg = lls_strerror(-lopsub_ret);
+       if (*errctx)
+               ERROR_LOG("%s: %s\n", *errctx, msg);
+       else
+               ERROR_LOG("%s\n", msg);
+       free(*errctx);
+       *errctx = NULL;
+       return -E_LOPSUB;
+}
+
+/* per epigram context for the tag expression parser */
+struct epi_properties {
+       const struct map_chunk *chunk; /* only the epigram */
+       struct linhash_table *tagtab;
+       unsigned num_tags;
+};
+
+unsigned epi_len(const struct epi_properties *props)
+{
+       return props->chunk->len;
+}
+
+char *epi_text(const struct epi_properties *props)
+{
+       const char *txt = props->chunk->base;
+       char *result = malloc(props->chunk->len + 1);
+       memcpy(result, txt, props->chunk->len);
+       result[props->chunk->len] = '\0';
+       return result;
+}
+
+bool epi_has_tag(const char *tag, const struct epi_properties *props)
+{
+       return linhash_lookup(tag, props->tagtab);
+}
+
+struct tag_iter {
+       char *str;
+       char *saveptr; /* for strtok_r(3) */
+       char *token;
+};
+
+static struct tag_iter *tag_iter_new(const struct map_chunk *tags)
+{
+       struct tag_iter *titer = xmalloc(sizeof(*titer));
+
+       titer->str = xmalloc(tags->len + 1);
+       memcpy(titer->str, tags->base, tags->len);
+       titer->str[tags->len] = '\0';
+       titer->token = strtok_r(titer->str, ",", &titer->saveptr);
+       return titer;
+}
+
+static const char *tag_iter_get(const struct tag_iter *titer)
+{
+       return titer->token;
+}
+
+static void tag_iter_next(struct tag_iter *titer)
+{
+       titer->token = strtok_r(NULL, ",", &titer->saveptr);
+}
+
+static void tag_iter_free(struct tag_iter *titer)
+{
+       if (!titer)
+               return;
+       free(titer->str);
+       free(titer);
+}
+
+static bool epi_admissible(const struct epigram *epi,
+               const struct txp_context *ast)
+{
+       bool admissible;
+       const char *p;
+       struct epi_properties props = {
+               .chunk = &epi->epi,
+               .tagtab = linhash_new(3),
+               .num_tags = 0,
+       };
+       struct tag_iter *titer;
+
+
+       for (
+               titer = tag_iter_new(&epi->tags);
+               (p = tag_iter_get(titer));
+               tag_iter_next(titer)
+       ) {
+               struct linhash_item item = {.key = p};
+               linhash_insert(&item, props.tagtab, NULL);
+               props.num_tags++;
+       }
+
+       admissible = txp_eval_ast(ast, &props);
+       linhash_free(props.tagtab);
+       tag_iter_free(titer);
+       return admissible;
+}
+
+static void print_epigram(const struct epigram *epi, bool print_tags)
+{
+       printf("%.*s", (int)epi->epi.len, epi->epi.base);
+       if (print_tags)
+               printf(TF_SEP "%.*s\n", (int)epi->tags.len, epi->tags.base);
+}
+
+static void print_admissible_epigrams(const struct epigram *epis,
+               unsigned num_epis)
+{
+       unsigned n;
+
+       for (n = 0; n < num_epis; n++)
+               print_epigram(epis + n, OPT_GIVEN(PRINT, TAGS));
+}
+
+static void print_random_epigram(const struct epigram *epis, unsigned num_epis)
+{
+       long unsigned r;
+       const struct epigram *epi;
+       struct timeval tv;
+
+       if (num_epis == 0) {
+               ERROR_LOG("no matching epigram\n");
+               return;
+       }
+       gettimeofday(&tv, NULL);
+       srandom((unsigned)tv.tv_usec);
+       r = (num_epis + 0.0) * (random() / (RAND_MAX + 1.0));
+       assert(r < num_epis);
+       epi = epis + r;
+       print_epigram(epi, OPT_GIVEN(PRINT, TAGS));
+}
+
+static int read_tag_expression(struct iovec *result)
+{
+       const char *tx = OPT_STRING_VAL(PRINT, EXPRESSION);
+       int ret, fd;
+
+       assert(tx);
+       if (strcmp(tx, "-")) {
+               char *filename;
+               if (tx[0] != '/') {
+                       char *home = get_homedir();
+                       xasprintf(&filename, "%s/.tfortune/expressions/%s",
+                               home, tx);
+                       free(home);
+               } else
+                       filename = xstrdup(tx);
+               ret = open(filename, O_RDONLY);
+               if (ret < 0) {
+                       ret = -ERRNO_TO_TF_ERROR(errno);
+                       ERROR_LOG("could not open %s\n", filename);
+               }
+               free(filename);
+               if (ret < 0)
+                       return ret;
+               fd = ret;
+       } else
+               fd = STDIN_FILENO;
+       ret = fd2buf(fd, result);
+       close(fd);
+       return ret;
+}
+
+static int tx2ast(struct iovec *tx, struct txp_context **ast)
+{
+       int ret;
+       char *errmsg;
+
+       ret = txp_init(tx, ast, &errmsg);
+       if (ret < 0) {
+               ERROR_LOG("could not parse tag expression: %s\n", errmsg);
+               free(errmsg);
+               return ret;
+       }
+       return 1;
+}
+
+struct epi_iter {
+       struct iovec *maps;
+       unsigned num_maps;
+       unsigned map_num;
+       struct epigram epi;
+       unsigned num_epis;
+};
+
+static bool get_next_epi(struct epi_iter *eiter)
+{
+       const char *epi_start = NULL;
+
+       for (; eiter->map_num < eiter->num_maps; eiter->map_num++) {
+               struct iovec *iov = eiter->maps + eiter->map_num;
+               const char *buf, *end = iov->iov_base + iov->iov_len;
+
+               if (!epi_start && eiter->epi.tags.base)
+                       epi_start = eiter->epi.tags.base
+                               + eiter->epi.tags.len + 1;
+               else
+                       epi_start = iov->iov_base;
+               buf = epi_start;
+               while (buf < end) {
+                       const size_t sep_len = strlen(TF_SEP);
+                       const char *p, *cr, *tags;
+                       size_t tag_len;
+
+                       cr = memchr(buf, '\n', end - buf);
+                       if (!cr)
+                               break;
+                       p = cr + 1;
+                       if (p + sep_len >= end)
+                               break;
+                       if (strncmp(p, TF_SEP, sep_len) != 0) {
+                               buf = p;
+                               continue;
+                       }
+                       tags = p + sep_len;
+                       cr = memchr(tags, '\n', end - tags);
+                       if (cr)
+                               tag_len = cr - tags;
+                       else
+                               tag_len = end - tags;
+                       eiter->epi.epi.base = epi_start;
+                       eiter->epi.epi.len = p - epi_start;
+                       eiter->epi.tags.base = tags;
+                       eiter->epi.tags.len = tag_len;
+                       eiter->num_epis++;
+                       return true;
+               }
+       }
+       eiter->epi.epi.base = NULL;
+       eiter->epi.epi.len = 0;
+       eiter->epi.tags.base = NULL;
+       eiter->epi.tags.len = 0;
+       return false;
+}
+
+static char *get_basedir(void)
+{
+       char *home, *basedir;
+       if (OPT_GIVEN(TFORTUNE, BASEDIR))
+               return xstrdup(OPT_STRING_VAL(TFORTUNE, BASEDIR));
+       home = get_homedir();
+       xasprintf(&basedir, "%s/.tfortune", home);
+       free(home);
+       return basedir;
+}
+
+static char *get_epidir(void)
+{
+       char *basedir, *epidir;
+       basedir = get_basedir();
+       xasprintf(&epidir, "%s/epigrams", basedir);
+       free(basedir);
+       return epidir;
+}
+
+static char *get_xdir(void)
+{
+       char *basedir = get_basedir(), *xdir;
+       xasprintf(&xdir, "%s/expressions", basedir);
+       free(basedir);
+       return xdir;
+}
+
+static struct epi_iter *epi_iter_new(void)
+{
+       struct epi_iter *eiter = xmalloc(sizeof(*eiter));
+       unsigned num_inputs = lls_num_inputs(sublpr);
+
+       if (num_inputs == 0) {
+               struct regfile_iter *riter;
+               struct iovec iov;
+               char *epidir = get_epidir();
+
+               regfile_iter_new(epidir, &riter);
+               free(epidir);
+               eiter->maps = NULL;
+               eiter->num_maps = 0;
+               for (;
+                       regfile_iter_map(riter, &iov);
+                       regfile_iter_next(riter)
+               ) {
+                       eiter->num_maps++;
+                       eiter->maps = realloc(eiter->maps,
+                               eiter->num_maps * sizeof(*eiter->maps));
+                       eiter->maps[eiter->num_maps - 1] = iov;
+               }
+               regfile_iter_free(riter);
+       } else {
+               unsigned n;
+               eiter->maps = xmalloc(num_inputs * sizeof(*eiter->maps));
+               for (n = 0; n < num_inputs; n++)
+                       mmap_file(lls_input(n, sublpr), eiter->maps + n);
+               eiter->num_maps = num_inputs;
+       }
+       eiter->map_num = 0;
+       eiter->epi.epi.base = NULL;
+       eiter->epi.epi.len = 0;
+       eiter->epi.tags.base = NULL;
+       eiter->epi.tags.len = 0;
+       eiter->num_epis = 0;
+       get_next_epi(eiter);
+       return eiter;
+}
+
+static const struct epigram *epi_iter_get(const struct epi_iter *eiter)
+{
+       return (eiter->epi.epi.base && eiter->epi.tags.base)?
+               &eiter->epi : NULL;
+}
+
+static unsigned epi_iter_num_maps(const struct epi_iter *eiter)
+{
+       return eiter->num_maps;
+}
+
+static unsigned epi_iter_num_epis(const struct epi_iter *eiter)
+{
+       return eiter->num_epis;
+}
+
+static void epi_iter_next(struct epi_iter *eiter)
+{
+       get_next_epi(eiter);
+}
+
+static void epi_iter_free(struct epi_iter *eiter)
+{
+       unsigned n;
+
+       if (!eiter)
+               return;
+       for (n = 0; n < eiter->num_maps; n++)
+               munmap(eiter->maps[n].iov_base, eiter->maps[n].iov_len);
+       free(eiter->maps);
+       free(eiter);
+}
+
+static int com_print(void)
+{
+       int ret;
+       struct epigram *epis = NULL;
+       unsigned epis_sz = 0, nae = 0; /* number of admissible epis */
+       struct iovec tx;
+       struct txp_context *ast;
+       struct epi_iter *eiter;
+       const struct epigram *epi;
+
+       ret = read_tag_expression(&tx);
+       if (ret < 0)
+               return ret;
+       ret = tx2ast(&tx, &ast);
+       if (ret < 0)
+               goto free_tx;
+       for (
+               eiter = epi_iter_new();
+               (epi = epi_iter_get(eiter));
+               epi_iter_next(eiter)
+       ) {
+               if (!epi_admissible(epi, ast))
+                       continue;
+               if (nae >= epis_sz) {
+                       epis_sz = 2 * epis_sz + 1;
+                       epis = xrealloc(epis, epis_sz * sizeof(*epis));
+               }
+               epis[nae++] = *epi;
+       }
+       if (OPT_GIVEN(PRINT, ALL))
+               print_admissible_epigrams(epis, nae);
+       else
+               print_random_epigram(epis, nae);
+       epi_iter_free(eiter);
+       free(epis);
+       txp_free(ast);
+       ret = 1;
+free_tx:
+       free(tx.iov_base);
+       return ret;
+}
+EXPORT_CMD_HANDLER(print);
+
+static char *get_editor(void)
+{
+       char *val = getenv("TFORTUNE_EDITOR");
+
+       if (val && val[0])
+               return xstrdup(val);
+       val = getenv("EDITOR");
+       if (val && val[0])
+               return xstrdup(val);
+       return xstrdup("vi");
+}
+
+static void open_editor(const char *dir)
+{
+       char *editor;
+       char **argv;
+       pid_t pid;
+       unsigned n, num_inputs = lls_num_inputs(sublpr);
+
+       if ((pid = fork()) < 0) {
+               EMERG_LOG("fork error: %s\n", strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       if (pid) { /* parent */
+               wait(NULL);
+               return;
+       }
+       editor = get_editor();
+       argv = xmalloc((num_inputs + 2) * sizeof(*argv));
+       argv[0] = editor;
+       for (n = 0; n < num_inputs; n++)
+               xasprintf(&argv[n + 1], "%s/%s", dir, lls_input(n, sublpr));
+       argv[num_inputs + 1] = NULL;
+       execvp(editor, argv);
+       EMERG_LOG("execvp error: %s\n", strerror(errno));
+       _exit(EXIT_FAILURE);
+}
+
+static int create_dir(const char *path)
+{
+       int ret;
+
+       ret = mkdir(path, 0777); /* rely on umask */
+       if (ret < 0) {
+               if (errno == EEXIST)
+                       return 0;
+               ERROR_LOG("could not create %s\n", path);
+               return -ERRNO_TO_TF_ERROR(errno);
+       }
+       NOTICE_LOG("created directory %s\n", path);
+       return 1;
+}
+
+static int create_basedir(void)
+{
+       char *basedir;
+       int ret;
+
+       basedir = get_basedir();
+       ret = create_dir(basedir);
+       free(basedir);
+       return ret;
+}
+
+static int generic_edit(const char *dir)
+{
+       char *errctx;
+       int ret;
+       bool basedir_given = OPT_GIVEN(TFORTUNE, BASEDIR);
+
+       ret = lls_check_arg_count(sublpr, 1, INT_MAX, &errctx);
+       if (ret < 0) {
+               ret = lopsub_error(ret, &errctx);
+               return ret;
+       }
+       if (!basedir_given) {
+               ret = create_basedir();
+               if (ret < 0)
+                       return ret;
+               ret = create_dir(dir);
+               if (ret < 0)
+                       return ret;
+       }
+       open_editor(dir);
+       ret = 1;
+       return ret;
+}
+
+static int com_ede(void)
+{
+       int ret;
+       char *epidir = get_epidir();
+       ret = generic_edit(epidir);
+       free(epidir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(ede);
+
+static int com_edx(void)
+{
+       int ret;
+       char *xdir = get_xdir();
+       ret = generic_edit(xdir);
+       free(xdir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(edx);
+
+static int item_alpha_compare(const struct linhash_item **i1,
+               const struct linhash_item **i2)
+{
+       return strcmp((*i1)->key, (*i2)->key);
+}
+
+static int item_num_compare(const struct linhash_item **i1,
+               const struct linhash_item **i2)
+{
+       long unsigned v1 = (long unsigned)(*i1)->object;
+       long unsigned v2 = (long unsigned)(*i2)->object;
+
+       return v1 < v2? -1 : (v1 == v2? 0 : 1);
+}
+
+struct dentry {
+       char mode[11];
+       nlink_t nlink;
+       char *user, *group;
+       off_t size;
+       uint64_t mtime;
+       char *name;
+};
+
+static void make_dentry(const char *name, const struct stat *stat,
+               struct dentry *d)
+{
+       mode_t m = stat->st_mode;
+       struct group *g;
+       struct passwd *pwentry;
+
+       sprintf(d->mode, "----------");
+       if (S_ISREG(m))
+               d->mode[0] = '-';
+       else if (S_ISDIR(m))
+               d->mode[0] = 'd';
+       else if (S_ISCHR(m))
+               d->mode[0] = 'c';
+       else if ((S_ISBLK(m)))
+               d->mode[0] = 'b';
+       else if (S_ISLNK(m))
+               d->mode[0] = 'l';
+       else if (S_ISFIFO(m))
+               d->mode[0] = 'p';
+       else if ((S_ISSOCK(m)))
+               d->mode[0] = 's';
+       else
+               d->mode[0] = '?';
+
+       if (m & S_IRUSR)
+               d->mode[1] = 'r';
+       if (m & S_IWUSR)
+               d->mode[2] = 'w';
+       if (m & S_IXUSR) {
+               if (m & S_ISUID)
+                       d->mode[3] = 's';
+               else
+                       d->mode[3] = 'x';
+       } else if (m & S_ISUID)
+               d->mode[3] = 'S';
+
+       if (m & S_IRGRP)
+               d->mode[4] = 'r';
+       if (m & S_IWGRP)
+               d->mode[5] = 'w';
+       if (m & S_IXGRP) {
+               if (m & S_ISGID)
+                       d->mode[6] = 's';
+               else
+                       d->mode[6] = 'x';
+       } else if (m & S_ISGID)
+               d->mode[6] = 'S';
+
+       if (m & S_IROTH)
+               d->mode[7] = 'r';
+       if (m & S_IWOTH)
+               d->mode[8] = 'w';
+       if (m & S_IXOTH) {
+               if (m & S_ISVTX)
+                       d->mode[9] = 't';
+               else
+                       d->mode[9] = 'x';
+       } else if (m & S_ISVTX)
+               d->mode[9] = 'T';
+
+       d->nlink = stat->st_nlink;
+
+       pwentry = getpwuid(stat->st_uid);
+       if (pwentry && pwentry->pw_name)
+               d->user = xstrdup(pwentry->pw_name);
+       else
+               xasprintf(&d->user, "%u", stat->st_uid);
+
+       g = getgrgid(stat->st_gid);
+       if (g && g->gr_name)
+               d->group = xstrdup(g->gr_name);
+       else
+               xasprintf(&d->group, "%u", stat->st_gid);
+       d->size = stat->st_size;
+       d->mtime = stat->st_mtime;
+       d->name = xstrdup(name);
+}
+
+static int num_digits(uint64_t x)
+{
+       unsigned n = 1;
+
+       if (x != 0)
+               while (x > 9) {
+                       x /= 10;
+                       n++;
+               }
+       return n;
+}
+
+enum var_length_dentry_fields {
+       VLDF_NLINKS,
+       VLDF_USER,
+       VLDF_GROUP,
+       VLDF_SIZE,
+       NUM_VLDF
+};
+
+static void update_field_field_widths(int field_widths[NUM_VLDF],
+               const struct dentry *d)
+{
+       int *w, n;
+
+       w = field_widths + VLDF_NLINKS;
+       n = num_digits(d->nlink);
+       *w = MAX(*w, n);
+
+       w = field_widths + VLDF_USER;
+       n = strlen(d->user);
+       *w = MAX(*w, n);
+
+       w = field_widths + VLDF_GROUP;
+       n = strlen(d->group);
+       *w = MAX(*w, n);
+
+       w = field_widths + VLDF_SIZE;
+       n = num_digits(d->size);
+       *w = MAX(*w, n);
+}
+
+static void format_time(uint64_t seconds, uint64_t now, struct iovec *result)
+{
+       struct tm *tm;
+       const uint64_t m = 6 * 30 * 24 * 3600; /* six months */
+       size_t nbytes;
+
+       tm = localtime((time_t *)&seconds);
+       assert(tm);
+
+       if (seconds > now - m && seconds < now + m) {
+               nbytes = strftime(result->iov_base, result->iov_len,
+                       "%b %e %k:%M", tm);
+               assert(nbytes > 0);
+       } else {
+               nbytes = strftime(result->iov_base, result->iov_len,
+                       "%b %e  %Y", tm);
+               assert(nbytes > 0);
+       }
+}
+
+static int list_directory(const char *dir, bool long_listing)
+{
+       struct regfile_iter *riter;
+       const char *basename;
+       int ret, field_widths[NUM_VLDF] = {0};
+       struct dentry *dentries = NULL;
+       unsigned n, num_dentries = 0, dentries_size = 0;
+       struct timespec now;
+
+       for (
+               regfile_iter_new(dir, &riter);
+               (basename = regfile_iter_basename(riter));
+               regfile_iter_next(riter)
+       ) {
+               const struct stat *stat;
+               struct dentry *dentry;
+               if (!long_listing) {
+                       printf("%s\n", basename);
+                       continue;
+               }
+               num_dentries++;
+               if (num_dentries > dentries_size) {
+                       dentries_size = 2 * dentries_size + 1;
+                       dentries = xrealloc(dentries,
+                               dentries_size * sizeof(*dentries));
+               }
+               dentry = dentries + num_dentries - 1;
+               stat = regfile_iter_stat(riter);
+               make_dentry(basename, stat, dentry);
+               update_field_field_widths(field_widths, dentry);
+       }
+       regfile_iter_free(riter);
+       if (!long_listing)
+               return 0;
+       ret = clock_gettime(CLOCK_REALTIME, &now);
+       assert(ret == 0);
+       for (n = 0; n < num_dentries; n++) {
+               struct dentry *d = dentries + n;
+               char buf[30];
+               struct iovec iov = {.iov_base = buf, .iov_len = sizeof(buf)};
+
+               format_time(d->mtime, now.tv_sec, &iov);
+               printf("%s %*lu %*s %*s %*" PRIu64 " %s %s\n",
+                       d->mode,
+                       field_widths[VLDF_NLINKS], (long unsigned)d->nlink,
+                       field_widths[VLDF_USER], d->user,
+                       field_widths[VLDF_GROUP], d->group,
+                       field_widths[VLDF_SIZE], (uint64_t)d->size,
+                       buf,
+                       d->name
+               );
+               free(d->user);
+               free(d->group);
+               free(d->name);
+       }
+       free(dentries);
+       return 1;
+}
+
+static int com_lse(void)
+{
+       int ret;
+       char *dir = get_epidir();
+
+       ret = list_directory(dir, OPT_GIVEN(LSE, LONG));
+       free(dir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(lse);
+
+static int com_lsx(void)
+{
+       int ret;
+       char *dir = get_xdir();
+
+       ret = list_directory(dir, OPT_GIVEN(LSE, LONG));
+       free(dir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(lsx);
+
+static struct linhash_table *hash_tags(unsigned *num_epi_files,
+               unsigned *num_epis)
+{
+       struct linhash_table *tagtab = linhash_new(3);
+       struct epi_iter *eiter;
+       const struct epigram *epi;
+
+       for (
+               eiter = epi_iter_new();
+               (epi = epi_iter_get(eiter));
+               epi_iter_next(eiter)
+       ) {
+               struct tag_iter *titer;
+               const char *tag;
+               for (
+                       titer = tag_iter_new(&epi->tags);
+                       (tag = tag_iter_get(titer));
+                       tag_iter_next(titer)
+               ) {
+                       struct linhash_item item = {
+                               .key = xstrdup(tag),
+                               .object = (void *)1LU
+                       };
+                       void **object;
+                       if (linhash_insert(&item, tagtab, &object) < 0) {
+                               long unsigned val = (long unsigned)*object;
+                               val++;
+                               *object = (void *)val;
+                               free((char *)item.key);
+                       }
+               }
+               tag_iter_free(titer);
+       }
+       if (num_epi_files)
+               *num_epi_files = epi_iter_num_maps(eiter);
+       if (num_epis)
+               *num_epis = epi_iter_num_epis(eiter);
+       epi_iter_free(eiter);
+       return tagtab;
+}
+
+static int com_lst(void)
+{
+       struct linhash_table *tagtab;
+       struct linhash_iterator *liter;
+       struct linhash_item *itemp;
+       linhash_comparator *comp = OPT_GIVEN(LST, SORT_BY_COUNT)?
+               item_num_compare : item_alpha_compare;
+       bool reverse = OPT_GIVEN(LST, REVERSE);
+
+       tagtab = hash_tags(NULL, NULL);
+       for (
+               liter = linhash_iterator_new(tagtab, comp, reverse);
+               (itemp = linhash_iterator_item(liter));
+               linhash_iterator_next(liter)
+       ) {
+               if (OPT_GIVEN(LST, LONG))
+                       printf("%lu\t%s\n", (long unsigned)itemp->object,
+                               itemp->key);
+               else
+                       printf("%s\n", itemp->key);
+               free((char *)itemp->key);
+       }
+       linhash_iterator_free(liter);
+       linhash_free(tagtab);
+       return 0;
+}
+EXPORT_CMD_HANDLER(lst);
+
+static int com_stats(void)
+{
+       struct linhash_table *tagtab;
+       struct linhash_iterator *liter;
+       struct linhash_item *itemp;
+       unsigned num_epi_files, num_epis, num_unique_tags, num_x = 0;
+       long unsigned num_tags = 0;
+       char *xdir, *lh_stats;
+       struct regfile_iter *riter;
+       bool verbose = OPT_GIVEN(STATS, VERBOSE);
+
+       tagtab = hash_tags(&num_epi_files, &num_epis);
+       for (
+               liter = linhash_iterator_new(tagtab, NULL, false);
+               (itemp = linhash_iterator_item(liter));
+               linhash_iterator_next(liter)
+       )
+               num_tags += (long unsigned)itemp->object;
+       num_unique_tags = linhash_num_items(tagtab);
+       linhash_iterator_free(liter);
+       if (verbose)
+               lh_stats = linhash_statistics(tagtab);
+       linhash_free(tagtab);
+
+       xdir = get_xdir();
+       for (
+               regfile_iter_new(xdir, &riter);
+               regfile_iter_basename(riter);
+               regfile_iter_next(riter)
+       )
+               num_x++;
+       regfile_iter_free(riter);
+       free(xdir);
+       printf("number of tag expressions.......... %5u\n", num_x);
+       printf("number of epigram files............ %5u\n", num_epi_files);
+       printf("number of epigrams................. %5u\n", num_epis);
+       printf("number of tags..................... %5lu\n", num_tags);
+       printf("number of unique tags.............. %5u\n", num_unique_tags);
+       printf("average number of epigrams per file %8.02f\n",
+               (float)num_epis / num_epi_files);
+       printf("average number of tags per epigram. %8.02f\n",
+               (float)num_tags / num_epis);
+       printf("average number of tag recurrence... %8.02f\n",
+               (float)num_tags / num_unique_tags);
+       if (verbose) {
+               printf("\nlinear hashing statistics:\n%s\n", lh_stats);
+               free(lh_stats);
+       }
+       return 1;
+}
+EXPORT_CMD_HANDLER(stats);
+
+#define LSG_TFORTUNE_CMD(_name) #_name
+static const char * const subcommand_names[] = {LSG_TFORTUNE_SUBCOMMANDS NULL};
+#undef LSG_TFORTUNE_CMD
+
+static void show_subcommand_summary(bool verbose)
+{
+       int i;
+
+       printf("Available subcommands:\n");
+       if (verbose) {
+               const struct lls_command *cmd;
+               for (i = 1; (cmd = lls_cmd(i, tfortune_suite)); i++) {
+                       const char *purpose = lls_purpose(cmd);
+                       const char *name = lls_command_name(cmd);
+                       printf("%-11s%s\n", name, purpose);
+               }
+       } else {
+               unsigned n = 8;
+               printf("\t");
+               for (i = 0; i < LSG_NUM_TFORTUNE_SUBCOMMANDS; i++) {
+                       if (i > 0)
+                               n += printf(", ");
+                       n += printf("%s", subcommand_names[i]);
+                       if (n > 70) {
+                               printf("\n\t");
+                               n = 8;
+                       }
+               }
+               printf("\n");
+       }
+}
+
+static int com_help(void)
+{
+       int ret;
+       char *errctx, *help;
+       const char *arg;
+       const struct lls_command *cmd;
+
+       ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
+       if (ret < 0)
+               return lopsub_error(ret, &errctx);
+       if (lls_num_inputs(sublpr) == 0) {
+               show_subcommand_summary(OPT_GIVEN(HELP, LONG));
+               return 0;
+       }
+       arg = lls_input(0, sublpr);
+       ret = lls_lookup_subcmd(arg, tfortune_suite, &errctx);
+       if (ret < 0)
+               return lopsub_error(ret, &errctx);
+       cmd = lls_cmd(ret, tfortune_suite);
+       if (OPT_GIVEN(HELP, LONG))
+               help = lls_long_help(cmd);
+       else
+               help = lls_short_help(cmd);
+       printf("%s\n", help);
+       free(help);
+       return 1;
+}
+EXPORT_CMD_HANDLER(help);
+
+static void handle_help_and_version(void)
+{
+       int i;
+       char *help;
+       const struct lls_command *cmd;
+
+       if (OPT_GIVEN(TFORTUNE, VERSION)) {
+               printf("tfortune %s\n"
+                       "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
+                       "License " LICENSE ": <" LICENSE_URL ">.\n"
+                       "This is free software: you are free to change and redistribute it.\n"
+                       "There is NO WARRANTY, to the extent permitted by law.\n"
+                       "Report bugs to " AUTHOR " <" PACKAGE_BUGREPORT ">.\n"
+                       ,
+                       tf_version()
+               );
+               exit(EXIT_SUCCESS);
+       }
+       cmd = CMD_PTR(TFORTUNE);
+       if (OPT_GIVEN(TFORTUNE, DETAILED_HELP))
+               help = lls_long_help(cmd);
+       else if (OPT_GIVEN(TFORTUNE, HELP))
+               help = lls_short_help(cmd);
+       else
+               return;
+       printf("%s\n", help);
+       free(help);
+       if (OPT_GIVEN(TFORTUNE, DETAILED_HELP))
+               for (i = 1; (cmd = lls_cmd(i, tfortune_suite)); i++) {
+                       help = lls_short_help(cmd);
+                       printf("%s\n---\n", help);
+                       free(help);
+               }
+       else
+               show_subcommand_summary(true /* verbose */);
+       exit(EXIT_SUCCESS);
+}
+
+enum tf_word_type {
+       WT_COMMAND_NAME,
+       WT_DOUBLE_DASH, /* -- */
+       WT_SHORT_OPT_WITH_ARG, /* -l */
+       WT_SHORT_OPT_WITHOUT_ARG, /* -V, -abc=d */
+       WT_LONG_OPT_WITH_ARG, /* --loglevel */
+       WT_LONG_OPT_WITHOUT_ARG, /* --foo=bar --help */
+       WT_OPTION_ARG,
+       WT_NON_OPTION_ARG,
+       WT_DUNNO,
+};
+
+static bool is_short_opt(const char *word)
+{
+       if (word[0] != '-')
+               return false;
+       if (word[1] == '-')
+               return false;
+       if (word[1] == '\0')
+               return false;
+       return true;
+}
+
+static bool is_long_opt(const char *word)
+{
+       if (word[0] != '-')
+               return false;
+       if (word[1] != '-')
+               return false;
+       if (word[2] == '\0')
+               return false;
+       return true;
+}
+
+/* whether the next word will be an arg to this short opt */
+static int short_opt_needs_arg(const char *word,
+               const char * const *short_opts)
+{
+       size_t n, len;
+
+       if (strchr(word, '='))
+               return false;
+       len = strlen(word);
+       for (n = 0; short_opts[n]; n++) {
+               const char *opt = short_opts[n];
+               if (word[len - 1] != opt[1])
+                       continue;
+               if (opt[2] == '=')
+                       return true;
+               else
+                       return false;
+       }
+       return -1;
+}
+
+/* whether the next word will be an arg to this long opt */
+static int long_opt_needs_arg(const char *word,
+               const char * const *long_opts)
+{
+       size_t n;
+
+       if (strchr(word, '='))
+               return false;
+       for (n = 0; long_opts[n]; n++) {
+               const char *opt = long_opts[n];
+               size_t len = strlen(opt);
+
+               if (opt[len - 1] == '=')
+                       len--;
+               if (strncmp(word + 2, opt + 2, len - 2))
+                       continue;
+               if (opt[len] == '=')
+                       return true;
+               else
+                       return false;
+       }
+       return -1;
+}
+
+static bool get_word_types(unsigned cword, unsigned arg0,
+               const char * const *short_opts, const char * const *long_opts,
+               enum tf_word_type *result)
+{
+       const char *word;
+       unsigned n;
+       bool have_dd = false;
+       int ret;
+
+       /* index zero is always the command name */
+       assert(cword > arg0);
+       result[arg0] = WT_COMMAND_NAME;
+       for (n = arg0 + 1; n < cword; n++) {
+               enum tf_word_type prev_type = result[n - 1];
+
+               if (have_dd) {
+                       result[n] = WT_NON_OPTION_ARG;
+                       continue;
+               }
+               if (prev_type == WT_SHORT_OPT_WITH_ARG) {
+                       result[n] = WT_OPTION_ARG;
+                       continue;
+               }
+               if (prev_type == WT_LONG_OPT_WITH_ARG) {
+                       result[n] = WT_OPTION_ARG;
+                       continue;
+               }
+               word = lls_input(n, sublpr);
+               if (strcmp(word, "--") == 0) {
+                       result[n] = WT_DOUBLE_DASH;
+                       have_dd = true;
+                       continue;
+               }
+               if (is_short_opt(word)) {
+                       ret = short_opt_needs_arg(word, short_opts);
+                       if (ret < 0)
+                               goto dunno;
+                       if (ret > 0)
+                               result[n] = WT_SHORT_OPT_WITH_ARG;
+                       else
+                               result[n] = WT_SHORT_OPT_WITHOUT_ARG;
+                       continue;
+               }
+               if (is_long_opt(word)) {
+                       ret = long_opt_needs_arg(word, long_opts);
+                       if (ret < 0)
+                               goto dunno;
+                       if (ret > 0)
+                               result[n] = WT_LONG_OPT_WITH_ARG;
+                       else
+                               result[n] = WT_LONG_OPT_WITHOUT_ARG;
+                       continue;
+               }
+               result[n] = WT_NON_OPTION_ARG;
+       }
+       return have_dd;
+dunno:
+       for (; n <= cword; n++)
+               result[n] = WT_DUNNO;
+       return false;
+}
+
+#define DUMMY_COMPLETER(_name) static char **complete_ ## _name( \
+       __attribute__ ((unused)) uint32_t cword, \
+       __attribute__ ((unused)) unsigned arg0, \
+       __attribute__ ((unused)) bool have_dd \
+       ) {return NULL;}
+
+DUMMY_COMPLETER(tfortune)
+DUMMY_COMPLETER(compgen)
+DUMMY_COMPLETER(completer)
+
+static const char * const supercmd_opts[] = {LSG_TFORTUNE_TFORTUNE_OPTS, NULL};
+
+static void print_zero_terminated_list(const char * const *list)
+{
+       const char * const *c;
+       for (c = list; *c; c++)
+               printf("%s%c", *c, '\0');
+}
+
+static void print_option_list(const char * const *opts)
+{
+       const char * const *c;
+
+       for (c = opts; *c; c++) {
+               int len = strlen(*c);
+               assert(len > 0);
+               if ((*c)[len - 1] == '=')
+                       len--;
+               printf("%.*s%c", len, *c, '\0');
+       }
+}
+
+static void activate_dirname_completion(void)
+{
+       printf("%c", '\0');
+       printf("-o dirnames%c", '\0');
+}
+
+static void complete_loglevels(void)
+{
+       unsigned n;
+       const struct lls_option *opt = lls_opt(LSG_TFORTUNE_TFORTUNE_OPT_LOGLEVEL,
+               CMD_PTR(TFORTUNE));
+
+       for (n = 0; n < LSG_NUM_TFORTUNE_TFORTUNE_LOGLEVEL_VALUES; n++) {
+               const char *v = lls_enum_string_val(n, opt);
+               printf("%s%c", v, '\0');
+       }
+}
+
+static char **complete_dentries(const char *dir)
+{
+       const char *bn;
+       struct regfile_iter *riter;
+       unsigned n;
+       char **result = NULL;
+
+       regfile_iter_new(dir, &riter);
+       for (
+               n = 0;
+               (bn = regfile_iter_basename(riter));
+               regfile_iter_next(riter), n++
+       ) {
+               result = xrealloc(result, (n + 2) * sizeof(*result));
+               result[n] = xstrdup(bn);
+               result[n + 1] = NULL;
+       }
+       return result;
+}
+
+static char **complete_ede(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0,
+               __attribute__ ((unused)) bool have_dd)
+{
+       char **result, *epidir = get_epidir();
+
+       result = complete_dentries(epidir);
+       free(epidir);
+       return result;
+}
+
+static char **complete_edx(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0,
+               __attribute__ ((unused)) bool have_dd)
+{
+       char **result, *xdir = get_xdir();
+
+       result = complete_dentries(xdir);
+       free(xdir);
+       return result;
+}
+
+static char **complete_std_opts(bool have_dd, const char * const *opts)
+{
+       if (have_dd)
+               print_option_list(opts);
+       else
+               print_option_list(supercmd_opts);
+       return NULL;
+}
+
+static char **complete_stats(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_STATS_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_lse(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_LSE_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_lst(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_LST_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_lsx(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_LSX_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_help(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_HELP_OPTS, NULL};
+
+       if (!have_dd)
+               print_option_list(supercmd_opts);
+       else
+               print_option_list(opts);
+       print_zero_terminated_list(subcommand_names);
+       return NULL;
+}
+
+static char **complete_print(uint32_t cword, unsigned arg0, bool have_dd)
+{
+       const char * const short_opts[] = {LSG_TFORTUNE_PRINT_SHORT_OPTS, NULL};
+       const char * const long_opts[] = {LSG_TFORTUNE_PRINT_LONG_OPTS, NULL};
+       const char * const opts[] = {LSG_TFORTUNE_PRINT_OPTS, NULL};
+       enum tf_word_type *word_types, prev_type;
+       const char *prev;
+       char **result, *xdir;
+
+       word_types = xmalloc(cword * sizeof(*word_types));
+       get_word_types(cword, arg0, short_opts, long_opts, word_types);
+       prev = lls_input(cword - 1, sublpr);
+       prev_type = word_types[cword - 1];
+       free(word_types);
+       switch (prev_type) {
+               case WT_COMMAND_NAME:
+               case WT_SHORT_OPT_WITHOUT_ARG:
+               case WT_OPTION_ARG:
+               case WT_LONG_OPT_WITHOUT_ARG:
+               case WT_DOUBLE_DASH:
+                       if (!have_dd)
+                               print_option_list(supercmd_opts);
+                       else
+                               print_option_list(opts);
+                       return NULL;
+               case WT_SHORT_OPT_WITH_ARG:
+                       if (strcmp(prev, "-x") == 0)
+                               goto complete_expression;
+                       break;
+               case WT_LONG_OPT_WITH_ARG:
+                       if (strcmp(prev, "--expression") == 0)
+                               goto complete_expression;
+                       break;
+               default:
+                       return NULL;
+       }
+complete_expression:
+       xdir = get_xdir();
+       result = complete_dentries(xdir);
+       free(xdir);
+       return result;
+}
+
+typedef char **(*completer)(uint32_t cword, unsigned arg0, bool have_dd);
+
+#define LSG_TFORTUNE_CMD(_name) complete_ ## _name
+static const completer completers[] = {LSG_TFORTUNE_COMMANDS};
+#undef LSG_TFORTUNE_CMD
+
+static int call_subcmd_completer(unsigned cmd_num, int arg0, uint32_t cword,
+               bool have_dd)
+{
+       char **c, **candidates = completers[cmd_num](cword, arg0, have_dd);
+
+       if (!candidates)
+               return 0;
+       for (c = candidates; *c; c++) {
+               printf("%s%c", *c, '\0');
+               free(*c);
+       }
+       free(candidates);
+       return 1;
+}
+
+static bool need_subcommand_completer(uint32_t cword, unsigned subcmd_idx,
+               const enum tf_word_type *word_types, bool have_dd)
+{
+       enum tf_word_type prev_type;
+       const char *word;
+
+       if (subcmd_idx == 0)
+               return false;
+       if (have_dd)
+               return true;
+       prev_type = word_types[cword - 1];
+       assert(prev_type != WT_COMMAND_NAME);
+       switch (prev_type) {
+               case WT_SHORT_OPT_WITH_ARG:
+               case WT_LONG_OPT_WITH_ARG:
+               case WT_DUNNO:
+                       return false;
+               default:
+                       break;
+       }
+       word = lls_input(cword, sublpr);
+       if (is_short_opt(word))
+               return false;
+       if (is_long_opt(word))
+               return false;
+       return true;
+}
+
+static int com_compgen(void)
+{
+       unsigned n;
+       uint32_t cword = OPT_UINT32_VAL(COMPGEN, CURRENT_WORD_INDEX);
+       int ret;
+       unsigned subcmd_idx;
+       const char *word, *prev;
+       const char * const short_opts[] = {LSG_TFORTUNE_TFORTUNE_SHORT_OPTS, NULL};
+       const char * const long_opts[] = {LSG_TFORTUNE_TFORTUNE_LONG_OPTS, NULL};
+       enum tf_word_type *word_types, prev_type;
+       bool have_dd;
+
+       if (cword == 0 || cword > lls_num_inputs(sublpr)) {
+               ERROR_LOG("current word index == %u!?\n", cword);
+               return -ERRNO_TO_TF_ERROR(EINVAL);
+       }
+       word_types = xmalloc(cword * sizeof(*word_types));
+       have_dd = get_word_types(cword, 0, short_opts, long_opts, word_types);
+       /*
+        * Locate the subcommand argument, if present. It is always the first
+        * non-option argument.
+        */
+       subcmd_idx = 0;
+       for (n = 1; n < cword; n++) {
+               if (word_types[n] != WT_NON_OPTION_ARG)
+                       continue;
+               subcmd_idx = n;
+               break;
+       }
+       if (need_subcommand_completer(cword, subcmd_idx, word_types, have_dd)) {
+               free(word_types);
+               word = lls_input(subcmd_idx, sublpr);
+               ret = lls_lookup_subcmd(word, tfortune_suite, NULL);
+               if (ret < 0) /* invalid subcommand */
+                       return 0;
+               return call_subcmd_completer(ret, subcmd_idx, cword, have_dd);
+       }
+       /* no subcommand */
+       prev_type = word_types[cword - 1];
+       prev = lls_input(cword - 1, sublpr);
+       free(word_types);
+       switch (prev_type) {
+               case WT_DUNNO:
+                       return 0;
+               case WT_COMMAND_NAME:
+               case WT_SHORT_OPT_WITHOUT_ARG:
+               case WT_OPTION_ARG:
+               case WT_NON_OPTION_ARG:
+               case WT_LONG_OPT_WITHOUT_ARG:
+                       if (!have_dd)
+                               print_option_list(supercmd_opts);
+                       /* fall through */
+               case WT_DOUBLE_DASH:
+                       print_zero_terminated_list(subcommand_names);
+                       break;
+               case WT_SHORT_OPT_WITH_ARG:
+                       if (strcmp(prev, "-b") == 0) {
+                               activate_dirname_completion();
+                               return 1;
+                       }
+                       if (strcmp(prev, "-l") == 0) {
+                               complete_loglevels();
+                               return 1;
+                       }
+                       break;
+               case WT_LONG_OPT_WITH_ARG:
+                       if (strcmp(prev, "--basename") == 0) {
+                               activate_dirname_completion();
+                               return 1;
+                       }
+                       if (strcmp(prev, "--loglevel") == 0) {
+                               complete_loglevels();
+                               return 1;
+                       }
+                       break;
+       }
+       return 0;
+}
+EXPORT_CMD_HANDLER(compgen);
+
+static int com_completer(void)
+{
+       printf("%s\n",
+               "_tfortune() \n"
+               "{ \n"
+                       "local -i i offset=${TF_OFFSET:-0} \n"
+                       "local w compopts= have_empty=false\n"
+                       "local cur=\"${COMP_WORDS[$COMP_CWORD]}\" \n"
+
+                       "i=0 \n"
+                       "COMPREPLY=() \n"
+                       "while read -d '' w; do \n"
+                               "[[ -z \"$w\" ]] && { have_empty=true; continue; }\n"
+                               "if [[ $have_empty == true ]]; then\n"
+                                       "compopt $w\n"
+                               "else \n"
+                                       "[[ \"$w\" != \"$cur\"* ]] && continue \n"
+                                       "COMPREPLY[i]=\"$w\" \n"
+                                       "let i++ \n"
+                               "fi \n"
+                       "done < <(tfortune -- compgen --current-word-index \\\n"
+                       "\"$((COMP_CWORD + offset))\" -- $TF_EXTRA \"${COMP_WORDS[@]}\")\n"
+               "} \n"
+               "complete -F _tfortune tfortune \n"
+       );
+       if (OPT_GIVEN(COMPLETER, ALIAS)) {
+               const char *ali = OPT_STRING_VAL(PRINT, EXPRESSION);
+               printf("alias %s=\"tfortune --\"\n", ali);
+               printf("_%s() { \n"
+                               "COMP_WORDS[0]='--'\n"
+                               "TF_EXTRA='tf' \n"
+                               "TF_OFFSET=1 \n"
+                               "_tfortune \"$@\" \n"
+                               "unset TF_EXTRA TF_OFFSET\n"
+                       "}\n",
+                       ali
+               );
+               printf("complete -F _%s %s \n", ali, ali);
+       }
+       return 1;
+}
+EXPORT_CMD_HANDLER(completer);
+
+int main(int argc, char **argv)
+{
+       char *errctx;
+       int ret;
+       const struct lls_command *cmd = CMD_PTR(TFORTUNE), *subcmd;
+       const struct tf_user_data *ud;
+       unsigned num_inputs;
+
+       ret = lls_parse(argc, argv, cmd, &lpr, &errctx);
+       if (ret < 0) {
+               lopsub_error(ret, &errctx);
+               exit(EXIT_FAILURE);
+       }
+       loglevel_arg_val = OPT_UINT32_VAL(TFORTUNE, LOGLEVEL);
+       handle_help_and_version();
+       num_inputs = lls_num_inputs(lpr);
+       if (num_inputs == 0) {
+               show_subcommand_summary(true /* verbose */);
+               ret = 0;
+               goto free_lpr;
+       }
+       ret = lls_lookup_subcmd(argv[argc - num_inputs], tfortune_suite, &errctx);
+       if (ret < 0) {
+               ret = lopsub_error(ret, &errctx);
+               goto free_lpr;
+       }
+       subcmd = lls_cmd(ret, tfortune_suite);
+       ret = lls_parse(num_inputs, argv + argc - num_inputs, subcmd,
+               &sublpr, &errctx);
+       if (ret < 0) {
+               ret = lopsub_error(ret, &errctx);
+               goto free_lpr;
+       }
+       ud = lls_user_data(subcmd);
+       ret = ud->handler();
+       lls_free_parse_result(sublpr, subcmd);
+       if (ret < 0)
+               ERROR_LOG("%s\n", tf_strerror(-ret));
+free_lpr:
+       lls_free_parse_result(lpr, cmd);
+       exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/tfortune.suite.m4 b/tfortune.suite.m4

new file mode 100644 (file)

index 0000000..f3e288d
--- /dev/null
+++ b/tfortune.suite.m4
@@ -0,0 +1,418 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+[suite tfortune]
+caption = Subcommands
+[supercommand tfortune]
+       purpose = fortune cookies with tags
+       [description]
+               DESCRIPTION1()
+
+               DESCRIPTION2()
+
+               DESCRIPTION3()
+       [/description]
+       synopsis = [global-options...] [--] [<subcommand> [subcommand-options...]]
+       [option help]
+               summary = print help and exit
+               short_opt = h
+       [option detailed-help]
+               summary = print help, including all details, and exit
+       [option version]
+               summary = print version and exit
+               short_opt = V
+       [option loglevel]
+               summary = control amount of logging
+               short_opt = l
+               arg_info = required_arg
+               arg_type = string
+               typestr = severity
+               values = {
+                       LSGLL_DEBUG = "debug",
+                       LSGLL_INFO = "info",
+                       LSGLL_NOTICE = "notice",
+                       LSGLL_WARNING = "warning",
+                       LSGLL_ERROR = "error",
+                       LSGLL_CRIT = "crit",
+                       LSGLL_EMERG = "emerg"
+               }
+               default_val = warning
+               [help]
+                       Log only messages with severity greater or equal than the given
+                       value. Possible values:
+
+                       debug: Produces really noisy output.
+                       info: Still noisy, but won't fill up the disk quickly.
+                       notice: Indicates normal, but significant event.
+                       warning: Unexpected events that can be handled.
+                       error: Unhandled error condition.
+                       crit: System might be unreliable.
+                       emerg: Last message before exit.
+               [/help]
+       [option basedir]
+               summary = where to look for input files
+               short_opt = b
+               arg_info = required_arg
+               arg_type = string
+               typestr = directory
+               default_val = "~/.tfortune"
+               [help]
+                       This is used to locate epigrams and named tag expressions.
+                       If any filename argument does not start with a slash, the
+                       filename is interpreted as relative to this base directory.
+
+                       Epigrams are expected in the "epigrams" subdirectory of the base
+                       directory while tag expressions are expected to be stored below
+                       "expressions".
+               [/help]
+
+[subcommand compgen]
+       purpose = perform bash command line completion
+       non-opts-name = arg...
+       [description]
+               This subcommand is executed from the bash completer for tfortune. The
+               completer sets the non-option arguments for the subcommand to the
+               words of the current command line. It obtains these words from the
+               elements of the special COMP_WORDS array which is maintained by bash.
+
+               The compgen subcommand writes all possible completions to stdout. The
+               completer reads in the completions and builds the COMPREPLY array
+               containing the matching entries. Bash examines the elements of this
+               array and completes the command line if there is a single matching
+               completion, or prints out the list of completions in case of ambiguity.
+
+       [/description]
+       [option current-word-index]
+               short_opt = c
+               summary = index of the current word in the command line
+               typestr = num
+               arg_info = required_arg
+               arg_type = uint32
+               [help]
+                       An index into the argument vector of the word containing the current
+                       cursor position. See the description of the $CWORD special variable
+                       in the bash manual.
+               [/help]
+
+[subcommand completer]
+       purpose = print the bash completer to stdout
+       [description]
+               The output of this command is designed to be re-used as input for bash.
+               Specifically, bash completion for tfortune can be activated by adding
+               the following to .bashrc: eval "$(tfortune completer)".
+       [/description]
+       [option alias]
+               summary = also add an alias and a completer for it
+               short_opt = a
+               arg_info = required_arg
+               arg_type = string
+               typestr = name
+               [help]
+                       Specify this to define a bash alias for tfortune along with the
+                       completer. Unlike the regular tfortune program, the alias will
+                       contain the double dash argument which separates the subcommand and
+                       its options from the options to tfortune itself.
+               [/help]
+
+[subcommand ede]
+       purpose = edit epigrams
+       non-opts-name = basename...
+       [description]
+               Opens the named epigram file an interactive editor. The executable
+               of the editor is determined as follows: First the contents of the
+               environment variable TFORTUNE_EDITOR is examined. If this variable
+               is empty or unset, EDITOR is tried. If EDITOR is also unset, vi
+               is executed.
+
+               The given basename is interpreted as described in the help text of the
+               --basedir option above. If --basedir is not given and the "epigrams"
+               directory does not exist, it is created.
+       [/description]
+
+[subcommand edx]
+       purpose = edit tag expressions
+       non-opts-name = basename...
+       [description]
+               Opens the named tag expression file an interactive editor. The editor
+               to execute is determined in the same way as for the "ede" subcommand.
+               Also, a non-existing "expressions" subdirectory is handled in the same
+               way.
+       [/description]
+[subcommand help]
+       purpose = list available subcommands or print command-specific help
+       non-opts-name = [command]
+       [description]
+               Without any arguments, help prints the list of available commands. When
+               called with a command name argument, it prints the help text of the
+               given command.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = show the long help text
+       [help]
+               If the optional argument is supplied, the long help text contains the
+               synopsis, the purpose and the description of the specified command,
+               followed by the option list including summary and help text of each
+               option. Without --long, the short help is shown instead. This omits
+               the description of the command and the option help.
+
+               If no command is supplied but --long is given, the list contains the
+               purpose of each command.
+       [/help]
+
+[subcommand lse]
+       purpose = list epigram files
+       [description]
+               Print the list of all epigram files.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = long listing
+               [help]
+                       This is similar to the long output of the standard ls(1) command.
+               [/help]
+
+[subcommand lst]
+       purpose = list tags
+       [description]
+               This lists all tags contained in any of the given input files.
+       [/description]
+       non-opts-name = <file>...
+       [option long]
+               short_opt = l
+               summary = long listing
+               [help]
+                       Also show how many times this tag appears.
+               [/help]
+       [option sort-by-count]
+               short_opt = c
+               summary = sort by occurrence count rather than alphabetically.
+       [option reverse]
+               short_opt = r
+               summary = reverse sort order
+
+[subcommand lsx]
+       purpose = list tag expressions
+       [description]
+               Print the list of all named tag expressions.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = long listing
+               [help]
+                       This is similar to the long output of the standard ls(1) command.
+               [/help]
+[subcommand print]
+       purpose = print epigram(s)
+       [description]
+               Unless --all is given, this picks an epigram by random from the
+               given file(s) which is admissible with respect to the given named
+               tag expression. If no file is given, all files are taken into account.
+       [/description]
+       non-opts-name = [file...]
+       [option expression]
+               short_opt = x
+               summary = name of the tag expression
+               arg_info = required_arg
+               arg_type = string
+               typestr = filename
+               default_val = /dev/null
+               [help]
+                       Use the tag expression stored in the given file to define the
+                       admissible epigrams. The special string "-" means to read the tag
+                       expression from stdin. The default value corresponds to the empty
+                       tag expression for which all epigrams are admissible.
+               [/help]
+       [option all]
+               short_opt = a
+               summary = print all admissible epigrams, not just a random one.
+       [option tags]
+               short_opt = t
+               summary = print also the tags of the selected epigram
+[subcommand stats]
+       purpose = show statistics
+       [description]
+               This prints several counts and averages about the epigrams, tags and
+               tag expressions.
+       [/description]
+       [option verbose]
+               short_opt = v
+               summary = include statistics about hash table utilization
+
+[section Input file format]
+       Input files may contain arbitrary many epigrams. The end of each
+       epigram must be marked with a "tag" line. The tag line consists of
+       four dashes, a space character, and a comma separated list of tags.
+       Tags may span multiple words, but no comma is allowed. The following
+       is an example input file for tfortune. It contains a single epigram
+       with two tags.
+
+       .RS
+       .EX
+       Anyone who attempts to generate random numbers by deterministic means
+       is, of course, living in a state of sin.          -- John von Neumann
+       ---- math,religion
+       .EE
+       .RE
+[/section]
+
+[section Tag Expressions]
+       Tag expressions are based on a context-free grammar in which the
+       following keywords are defined:
+       .IP \(bu 4
+       .B tag("foo")
+       evaluates to
+       .I true
+       if the epigram contains a tag named
+       .IR foo ,
+       .I false
+       otherwise.
+       .IP \(bu 4
+       .B len
+       evaluates to the number of bytes of the epigram.
+       .IP \(bu 4
+       .B text
+       evaluates to contents of the epigram. This is useful for pattern
+       matching.
+       .P
+
+       The grammar admits the following operators and relations:
+       .IP \(bu 4
+       .BR && ,
+       .BR || ,
+       .BR !:
+       logical operators for
+       .IR and ,
+       .IR or ,
+       and
+       .IR not .
+       .IP \(bu 4
+       .BR + ,
+       .BR - ,
+       .BR * ,
+       .BR + :
+       arithmetic operators for addition, negation or subtraction (unary
+       or binary minus), multiplication and division. Arithmetic is always
+       performed on 64 bit signed integers.
+       .IP \(bu 4
+       .BR < ,
+       .BR > ,
+       .BR <=,
+       .BR >=,
+       .BR ==,
+       .BR != :
+       .BR =~ :
+       less than, greater than, less or equal than, greater or equal than,
+       equal to, not equal to, regular expression match.
+       Regular expression patterns are of the form
+       .IR /pattern/[flags] .
+       That is, the pattern is delimited by slashes, and is followed by
+       zero or more characters, each specifying a flag according to the
+       following list
+       .RS
+       .IP \(bu 4
+       .BR i :
+       Ignore case in match
+       .RI ( REG_ICASE )
+       .IP \(bu 4
+       .BR n :
+       Treat newline as an ordinary character
+       .RI ( REG_NEWLINE )
+
+       .RE
+       .RS
+       Note that only extended regular expression patterns are supported. See
+       regex(3) for details.
+       .RE
+
+       The above operators obey the usual associativity and precedence
+       rules. Parentheses can be used to change precedence.
+
+       A
+       .I tag expression
+       is an expression in this grammar which evaluates to either
+       .I true
+       or
+       .IR false .
+       Epigrams for which the expression is
+       .I true
+       are called
+       .IR admissible .
+
+       For example, the above epigram is admissible for the tag expression
+
+       .RS
+       .EX
+               (tag("math") || tag("physics")) && len < 1000 && text =~ /neumann/i
+       .EE
+       .RE
+
+       It is not admissible for
+
+       .RS
+       .EX
+               (tag("math") && tag("physics")) || len > 1000 || text =~ /neumann/
+       .EE
+       .RE
+[/section]
+
+[section Examples]
+       .IP \(bu 2
+       Print a random epigram:
+
+       .RS 6
+       .EX
+               tfortune print
+       .EE
+       .RE
+       .IP \(bu 2
+       Print a random short (less than 100 bytes) epigram:
+
+       .RS 6
+       .EX
+               echo 'len < 100' | tfortune -l debug -- print -x -
+       .EE
+       .RE
+       .IP \(bu 2
+       List tags, including usage counts, sort by count in descending order:
+
+       .RS 6
+       .EX
+               tfortune -- lst -lcr
+       .EE
+       .RE
+       .IP \(bu 2
+       Activate bash completion and define the
+       .I tf
+       alias:
+
+       .RS 6
+       .EX
+       eval "$(tfortune completer -a tf)"
+       .EE
+       .RE
+
+[/section]
+
+[section copyright]
+       Written by AUTHOR()
+       .br
+       Copyright (C) COPYRIGHT_YEAR() AUTHOR()
+       .br
+       License: LICENSE()
+       .br
+       This is free software: you are free to change and redistribute it.
+       .br
+       There is NO WARRANTY, to the extent permitted by law.
+       .br
+       Report bugs to
+       .MT <PACKAGE_BUGREPORT()>
+       AUTHOR()
+       .ME
+       .br
+       Homepage:
+       .UR PACKAGE_URL()
+       .UE
+[/section]
+[section see also]
+       .BR fortune (6)
+[/section]
diff --git a/txp.lex b/txp.lex

new file mode 100644 (file)

index 0000000..b51002f
--- /dev/null
+++ b/txp.lex
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+ /*
+  * Since we do not supply yywrap(), we use noyywrap to instruct the scanner to
+  * behave as though yywrap() returned 1.
+  */
+%option noyywrap
+
+ /*
+  * We don't want symbols to clash with those of other flex users, particularly
+  * lopsub.
+  */
+%option prefix="txp_yy"
+
+ /*
+  * Generate a scanner that maintains the number of the current line read from
+  * its input in the yylineno variable.
+  */
+%option yylineno
+
+ /* Generate a bison-compatible scanner. */
+%option bison-bridge bison-locations
+
+ /*
+  * Warn (in particular) if the default rule can be matched but no default rule
+  * has been given.
+  */
+%option warn
+
+ /*
+  * Generate a scanner which is portable and safe to use in one or more threads
+  * of control.
+  */
+%option reentrant
+
+ /*
+  * Generate a scanner which always looks one extra character ahead. This is a
+  * bit faster than an interactive scanner for which look ahead happens only
+  * when necessary.
+  */
+%option never-interactive
+
+%{
+#include "tf.h"
+
+#define YYSTYPE TXP_YYSTYPE
+#define YYLTYPE TXP_YYLTYPE
+#define YY_DECL int txp_yylex(TXP_YYSTYPE *yylval_param, TXP_YYLTYPE *yylloc_param, \
+       struct txp_context *ctx, struct txp_ast_node **ast, txp_yyscan_t yyscanner)
+#include "txp.bison.h"
+#define TXP_YY_USER_ACTION do {txp_yylloc->first_line = txp_yylineno;} while (0);
+%}
+DECIMAL_CONSTANT  (0|([[:digit:]]{-}[0])[[:digit:]]*)
+STRING_LITERAL    \"([^\"\\\n]|(\\[\"\\abfnrtv]))*\"
+REGEX_PATTERN     \/([^\/\\\n]|(\\[\/\\abfnrtv]))*\/([in])*
+%%
+
+tag {return TAG;}
+len {return LEN;}
+text {return TEXT;}
+true {return TRUE;}
+false {return FALSE;}
+
+[[:space:]]+|#.*\n /* skip comments and whitespace */
+
+"("|")"|","|"+"|"-"|"*"|"/"|"<"|">" {return yytext[0];}
+
+"||" {return OR;}
+"&&" {return AND;}
+"!" {return NOT;}
+"==" {return EQUAL;}
+"!=" {return NOT_EQUAL;}
+"<=" {return LESS_OR_EQUAL;}
+">=" {return GREATER_OR_EQUAL;}
+"=~" {return REGEX_MATCH;}
+
+{DECIMAL_CONSTANT} {
+       int ret;
+       yylval->node = txp_new_ast_leaf_node(NUM);
+       ret = atoi64(yytext, &yylval->node->sv.intval);
+       if (ret < 0) {
+               free(yylval->node);
+               txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext,
+                       tf_strerror(-ret));
+               return -E_TXP;
+       }
+       return NUM;
+}
+
+{STRING_LITERAL} {
+       yylval->node = txp_new_ast_leaf_node(STRING_LITERAL);
+       parse_quoted_string(yytext, "\"\"", &yylval->node->sv.strval);
+       return STRING_LITERAL;
+}
+
+{REGEX_PATTERN} {
+       int ret;
+       yylval->node = txp_new_ast_leaf_node(REGEX_PATTERN);
+       ret = txp_parse_regex_pattern(yytext, &yylval->node->sv.re_pattern);
+       if (ret < 0) {
+               txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext,
+                       tf_strerror(-ret));
+               return -E_TXP;
+       }
+       return REGEX_PATTERN;
+}
+
+. {
+       txp_parse_error(yylloc->first_line, ctx, "unrecognized text: %s",
+               yytext);
+       return -E_TXP;
+}
diff --git a/txp.y b/txp.y

new file mode 100644 (file)

index 0000000..ee9080d
--- /dev/null
+++ b/txp.y
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+/*
+ * Provide more verbose and specific error messages instead of just "syntax
+ * error".
+ */
+%define parse.error verbose
+
+/*
+ * Verbose error messages may contain incorrect information if LAC (Lookahead
+ * Correction) is not enabled.
+ */
+%define parse.lac full
+
+/* Avoid symbol clashes (lopsub might also expose yy* symbols). */
+%define api.prefix {txp_yy}
+
+/*
+ * Although locations are automatically enabled as soon as the grammar uses the
+ * special @N tokens, specifying %locations explicitly allows for more accurate
+ * syntax error messages.
+ */
+%locations
+
+/*
+ * Generate a pure (reentrant) parser. With this option enabled, yylval and
+ * yylloc become local variables in yyparse(), and a different calling
+ * convention is used for yylex().
+ */
+%define api.pure full
+
+/* Additional arguments to yylex(), yyparse() and yyerror() */
+%param {struct txp_context *ctx}
+%param {struct txp_ast_node **ast}
+%param {txp_yyscan_t yyscanner} /* reentrant lexers */
+
+%{
+#include "tf.h"
+#include "txp.bison.h"
+
+int yylex(TXP_YYSTYPE *lvalp, TXP_YYLTYPE *llocp, struct txp_context *ctx,
+               struct txp_ast_node **ast, txp_yyscan_t yyscanner);
+static void yyerror(YYLTYPE *llocp, struct txp_context *ctx,
+               struct txp_ast_node **ast, txp_yyscan_t yyscanner, const char *msg);
+
+%}
+
+%union {
+       struct txp_ast_node *node;
+}
+
+/* terminals */
+%token <node> NUM
+%token <node> STRING_LITERAL
+%token <node> REGEX_PATTERN
+
+/* keywords with semantic value */
+%token <node> LEN
+%token <node> FALSE TRUE
+%token <node> TEXT
+
+/* keywords without semantic value */
+%token TAG
+
+/* operators, ordered by precendence */
+%left OR
+%left AND
+%left EQUAL NOT_EQUAL
+%left LESS_THAN LESS_OR_EQUAL GREATER_OR_EQUAL REGEX_MATCH FILENAME_MATCH
+%left '-' '+'
+%left '*' '/'
+%right NOT NEG /* negation (unary minus) */
+
+/* nonterminals */
+%type <node> string
+%type <node> exp
+%type <node> boolexp
+
+%%
+
+program:
+        /* empty */ {*ast = NULL; return 0;}
+        | string {*ast = $1; return 0;}
+        | exp {*ast = $1; return 0;}
+       | boolexp {*ast = $1; return 0;}
+
+string: STRING_LITERAL {$$ = $1;}
+       | TEXT {$$ = txp_new_ast_leaf_node(TEXT);}
+;
+
+exp: NUM {$$ = $1;}
+        | exp '+' exp {$$ = ast_node_new_binary('+', $1, $3);}
+        | exp '-' exp {$$ = ast_node_new_binary('-', $1, $3);}
+        | exp '*' exp {$$ = ast_node_new_binary('*', $1, $3);}
+        | exp '/' exp {$$ = ast_node_new_binary('/', $1, $3);}
+        | '-' exp %prec NEG {$$ = ast_node_new_unary(NEG, $2);}
+        | '(' exp ')' {$$ = $2;}
+       | LEN {$$ = txp_new_ast_leaf_node(LEN);}
+;
+
+boolexp: TAG '(' STRING_LITERAL ')' {$$ = ast_node_new_unary(TAG, $3);}
+       | TRUE {$$ = txp_new_ast_leaf_node(TRUE);}
+       | FALSE {$$ = txp_new_ast_leaf_node(FALSE);}
+       | '(' boolexp ')' {$$ = $2;}
+       | boolexp OR boolexp {$$ = ast_node_new_binary(OR, $1, $3);}
+       | boolexp AND boolexp {$$ = ast_node_new_binary(AND, $1, $3);}
+       | NOT boolexp {$$ = ast_node_new_unary(NOT, $2);}
+       | exp EQUAL exp {$$ = ast_node_new_binary(EQUAL, $1, $3);}
+       | exp NOT_EQUAL exp {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);}
+       | exp '<' exp {$$ = ast_node_new_binary('<', $1, $3);}
+       | exp '>' exp {$$ = ast_node_new_binary('>', $1, $3);}
+       | exp LESS_OR_EQUAL exp {
+               $$ = ast_node_new_binary(LESS_OR_EQUAL, $1, $3);
+       }
+       | exp GREATER_OR_EQUAL exp {
+               $$ = ast_node_new_binary(GREATER_OR_EQUAL, $1, $3);
+       }
+       | string REGEX_MATCH REGEX_PATTERN {
+               $$ = ast_node_new_binary(REGEX_MATCH, $1, $3);
+       }
+       | string EQUAL string {$$ = ast_node_new_binary(EQUAL, $1, $3);}
+       | string NOT_EQUAL string {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);}
+;
+%%
+
+/* Called by yyparse() on error */
+static void yyerror(YYLTYPE *llocp, struct txp_context *ctx,
+               __attribute__ ((unused)) struct txp_ast_node **ast,
+               __attribute__ ((unused)) txp_yyscan_t yyscanner,
+               const char *msg)
+{
+       txp_parse_error(llocp->first_line, ctx, "%s", msg);
+}
diff --git a/util.c b/util.c

new file mode 100644 (file)

index 0000000..69c759e
--- /dev/null
+++ b/util.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include "tf.h"
+
+DEFINE_TF_ERRLIST;
+
+int atoi64(const char *str, int64_t *value)
+{
+       char *endptr;
+       long long tmp;
+
+       errno = 0; /* To distinguish success/failure after call */
+       tmp = strtoll(str, &endptr, 10);
+       if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
+               return -E_ATOI_OVERFLOW;
+       /*
+        * If there were no digits at all, strtoll() stores the original value
+        * of str in *endptr.
+        */
+       if (endptr == str)
+               return -E_ATOI_NO_DIGITS;
+       /*
+        * The implementation may also set errno and return 0 in case no
+        * conversion was performed.
+        */
+       if (errno != 0 && tmp == 0)
+               return -E_ATOI_NO_DIGITS;
+       if (*endptr != '\0') /* Further characters after number */
+               return -E_ATOI_JUNK_AT_END;
+       *value = tmp;
+       return 1;
+}
+
+__attribute__ ((warn_unused_result))
+void *xrealloc(void *p, size_t size)
+{
+       /*
+        * No need to check for NULL pointers: If p is NULL, the call
+        * to realloc is equivalent to malloc(size)
+        */
+       assert(size);
+       if (!(p = realloc(p, size))) {
+               EMERG_LOG("realloc failed (size = %zu), aborting\n", size);
+               exit(EXIT_FAILURE);
+       }
+       return p;
+}
+
+__attribute__ ((warn_unused_result))
+void *xmalloc(size_t size)
+{
+       return xrealloc(NULL, size);
+}
+
+__attribute__ ((warn_unused_result))
+void *xcalloc(size_t size)
+{
+       void *p = xmalloc(size);
+       memset(p, 0, size);
+       return p;
+}
+
+__attribute__ ((warn_unused_result))
+char *xstrdup(const char *str)
+{
+       char *p = strdup(str);
+
+       if (p)
+               return p;
+       EMERG_LOG("strdup failed, aborting\n");
+       exit(EXIT_FAILURE);
+}
+
+/*
+ * Get the home directory of the current user. Returns a dynamically allocated
+ * string that must be freed by the caller.
+ */
+__attribute__ ((warn_unused_result))
+char *get_homedir(void)
+{
+       struct passwd *pw = getpwuid(getuid());
+
+       if (!pw) {
+               EMERG_LOG("could not get home directory: %s\n",
+                       strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       return xstrdup(pw->pw_dir);
+}
+
+/*
+ * Print a formated message to a dynamically allocated string.
+ *
+ * This function is similar to vasprintf(), a GNU extension which is not in C
+ * or POSIX. It allocates a string large enough to hold the output including
+ * the terminating null byte. The allocated string is returned via the first
+ * argument and must be freed by the caller. However, unlike vasprintf(), this
+ * function calls exit() if insufficient memory is available, while vasprintf()
+ * returns -1 in this case.
+ *
+ * It returns the number of bytes written, not including the terminating \p
+ * NULL character.
+ */
+__attribute__ ((format (printf, 2, 0)))
+unsigned xvasprintf(char **result, const char *fmt, va_list ap)
+{
+       int ret;
+       size_t size = 150;
+       va_list aq;
+
+       *result = xmalloc(size + 1);
+       va_copy(aq, ap);
+       ret = vsnprintf(*result, size, fmt, aq);
+       va_end(aq);
+       assert(ret >= 0);
+       if ((size_t)ret < size)
+               return ret;
+       size = ret + 1;
+       *result = xrealloc(*result, size);
+       va_copy(aq, ap);
+       ret = vsnprintf(*result, size, fmt, aq);
+       va_end(aq);
+       assert(ret >= 0 && (size_t)ret < size);
+       return ret;
+}
+
+__attribute__ ((format (printf, 2, 3)))
+/* Print to a dynamically allocated string, variable number of arguments.  */
+unsigned xasprintf(char **result, const char *fmt, ...)
+{
+       va_list ap;
+       unsigned ret;
+
+       va_start(ap, fmt);
+       ret = xvasprintf(result, fmt, ap);
+       va_end(ap);
+       return ret;
+}
+
+/*
+ * Compile a regular expression.
+ *
+ * This simple wrapper calls regcomp(3) and logs a message on errors.
+ */
+int xregcomp(regex_t *preg, const char *regex, int cflags)
+{
+       char *buf;
+       size_t size;
+       int ret = regcomp(preg, regex, cflags);
+
+       if (ret == 0)
+               return 1;
+       size = regerror(ret, preg, NULL, 0);
+       buf = xmalloc(size);
+       regerror(ret, preg, buf, size);
+       ERROR_LOG("%s\n", buf);
+       free(buf);
+       return -E_REGEX;
+}
+
+/* Write input from fd to dynamically allocated buffer. */
+int fd2buf(int fd, struct iovec *result)
+{
+       const size_t max_size = 32 * 1024;
+       size_t loaded = 0;
+       int ret;
+       struct iovec iov;
+
+       iov.iov_len = 100; /* guess that's sufficient */
+       iov.iov_base = xmalloc(iov.iov_len);
+       for (;;) {
+               ret = read(fd, iov.iov_base + loaded,  iov.iov_len - loaded);
+               if (ret < 0)
+                       ret = -ERRNO_TO_TF_ERROR(errno);
+               if (ret <= 0)
+                       break;
+               loaded += ret;
+               if (loaded >= iov.iov_len) {
+                       iov.iov_len *= 2;
+                       ret = -ERRNO_TO_TF_ERROR(EOVERFLOW);
+                       if (iov.iov_len >= max_size)
+                               break;
+                       iov.iov_base = xrealloc(iov.iov_base, iov.iov_len);
+               }
+       };
+       if (ret < 0) {
+               free(iov.iov_base);
+               result->iov_base = NULL;
+               result->iov_len = 0;
+               return ret;
+       }
+       iov.iov_len = loaded;
+       iov.iov_base = xrealloc(iov.iov_base, loaded + 1);
+       *((char *)iov.iov_base + loaded) = '\0';
+       *result = iov;
+       return 1;
+}
+
+__attribute__ ((format (printf, 2, 3)))
+void tf_log(int ll, const char* fmt,...)
+{
+       va_list argp;
+       if (ll < loglevel_arg_val)
+               return;
+       va_start(argp, fmt);
+       vfprintf(stderr, fmt, argp);
+       va_end(argp);
+}
+
+struct regfile_iter {
+       DIR *dir;
+       int dfd;
+       struct dirent *entry;
+       struct stat stat;
+};
+
+void regfile_iter_next(struct regfile_iter *iter)
+{
+       for (;;) {
+               iter->entry = readdir(iter->dir);
+               if (!iter->entry)
+                       return;
+               if (!strcmp(iter->entry->d_name, "."))
+                       continue;
+               if (!strcmp(iter->entry->d_name, ".."))
+                       continue;
+               if (fstatat(iter->dfd, iter->entry->d_name, &iter->stat, 0) == -1)
+                       continue;
+               if (!S_ISREG(iter->stat.st_mode))
+                       continue;
+               if (iter->stat.st_size == 0) /* skip over empty files */
+                       continue;
+               return;
+       }
+}
+
+void regfile_iter_new(const char *dirname, struct regfile_iter **result)
+{
+       struct regfile_iter *iter = xmalloc(sizeof(*iter));
+
+       iter->dir = opendir(dirname);
+       if (!iter->dir) {
+               EMERG_LOG("opendir %s: %s\n", dirname, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       iter->dfd = dirfd(iter->dir);
+       assert(iter->dfd >= 0);
+       regfile_iter_next(iter);
+       *result = iter;
+}
+
+static void *xmmap(size_t sz, int fd, const char *path)
+{
+       void *result = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+
+       if (result == MAP_FAILED) {
+               EMERG_LOG("mmap %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       return result;
+}
+
+void mmap_file(const char *path, struct iovec *iov)
+{
+       int fd, ret;
+
+       ret = open(path, 0);
+       if (ret < 0) {
+               EMERG_LOG("could not open %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       fd = ret;
+       iov->iov_len = lseek(fd, 0, SEEK_END);
+       if (iov->iov_len == (size_t)(off_t)-1) {
+               EMERG_LOG("lseek %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       iov->iov_base = xmmap(iov->iov_len, fd, path);
+       close(fd);
+}
+
+bool regfile_iter_map(const struct regfile_iter *iter, struct iovec *result)
+{
+       int ret, fd;
+       void *map;
+       const char *path;
+
+       if (!iter->entry)
+               return false;
+       path = iter->entry->d_name;
+       ret = openat(iter->dfd, path, O_RDONLY, 0);
+       if (ret < 0) {
+               EMERG_LOG("could not open %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       fd = ret;
+       map = xmmap(iter->stat.st_size, fd, path);
+       close(fd);
+       result->iov_len = iter->stat.st_size;
+       result->iov_base = map;
+       return true;
+}
+
+const char *regfile_iter_basename(const struct regfile_iter *iter)
+{
+       if (!iter->entry)
+               return NULL;
+       return iter->entry->d_name;
+}
+
+const struct stat *regfile_iter_stat(const struct regfile_iter *iter)
+{
+       return &iter->stat;
+}
+
+void regfile_iter_free(struct regfile_iter *iter)
+{
+       closedir(iter->dir);
+       free(iter);
+}
diff --git a/version-gen.sh b/version-gen.sh

new file mode 100755 (executable)

index 0000000..a2c9075
--- /dev/null
+++ b/version-gen.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# SPDX-License-Identifier: GPL-3.0-only
+
+version_file='version.c'
+ver='unnamed_version'
+# First try git, then gitweb, then default.
+if [ -e '.git' -o -e '../.git' ]; then
+       git_ver=$(git describe --abbrev=4 HEAD 2>/dev/null)
+       [ -z "$git_ver" ] && git_ver="$ver"
+       # update stat information in index to match working tree
+       git update-index -q --refresh > /dev/null
+       # if there are differences (exit code 1), the working tree is dirty
+       git diff-index --quiet HEAD || git_ver=$git_ver-dirty
+       ver=$git_ver
+elif [ "${PWD%%-*}" = 'tfortune-' ]; then
+       ver=${PWD##*/tfortune-}
+fi
+ver=${ver#v}
+
+echo "$ver"
+
+# update version file if necessary
+content="const char *tf_version(void) {return \"$ver\";};"
+[ -r "$version_file" ] && echo "$content" | cmp -s - $version_file && exit 0
+echo >&2 "new git version: $ver"
+echo "$content" > $version_file
author	Andre Noll <maan@tuebingen.mpg.de>
	Mon, 19 Mar 2018 17:37:28 +0000 (18:37 +0100)
committer	Andre Noll <maan@tuebingen.mpg.de>
	Mon, 19 Mar 2018 18:09:43 +0000 (19:09 +0100)
.gitignore	[new file with mode: 0644]	patch \| blob
INSTALL	[new file with mode: 0644]	patch \| blob
Makefile	[new file with mode: 0644]	patch \| blob
README	[new file with mode: 0644]	patch \| blob
ast.c	[new file with mode: 0644]	patch \| blob
config.mak.in	[new file with mode: 0644]	patch \| blob
configure	[new file with mode: 0755]	patch \| blob
configure.ac	[new file with mode: 0644]	patch \| blob
err.h	[new file with mode: 0644]	patch \| blob
index.html.m4	[new file with mode: 0644]	patch \| blob
linhash.c	[new file with mode: 0644]	patch \| blob
logo.svg	[new file with mode: 0644]	patch \| blob
tf.h	[new file with mode: 0644]	patch \| blob
tfortune.c	[new file with mode: 0644]	patch \| blob
tfortune.suite.m4	[new file with mode: 0644]	patch \| blob
txp.lex	[new file with mode: 0644]	patch \| blob
txp.y	[new file with mode: 0644]	patch \| blob
util.c	[new file with mode: 0644]	patch \| blob
version-gen.sh	[new file with mode: 0755]	patch \| blob