]> git.tuebingen.mpg.de Git - tfortune.git/commitdiff
Initial commit. v1.0.0
authorAndre Noll <maan@tuebingen.mpg.de>
Mon, 19 Mar 2018 17:37:28 +0000 (18:37 +0100)
committerAndre Noll <maan@tuebingen.mpg.de>
Mon, 19 Mar 2018 18:09:43 +0000 (19:09 +0100)
Tfortune was maintained in a non-public git repository between 2016-04
and 2018-01. By then it was moved to a dedicated repository, rewritten
to support the tag expression grammar and made scalable by introducing
linear hashing. At the same time command line parsing was switched
to the lopsub library, the command line options were redesigned and
properly documented, and the nifty logo was added.

In 2018-03 tfortune reached version 1.0 and was finally made public. All
commits that led to version 1.0 have been discarded, so this repository
contains only the final result as a single commit.

Many thanks to Effie Symeonidi who gave valuable feedback regarding the
installation instructions.

19 files changed:
.gitignore [new file with mode: 0644]
INSTALL [new file with mode: 0644]
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
ast.c [new file with mode: 0644]
config.mak.in [new file with mode: 0644]
configure [new file with mode: 0755]
configure.ac [new file with mode: 0644]
err.h [new file with mode: 0644]
index.html.m4 [new file with mode: 0644]
linhash.c [new file with mode: 0644]
logo.svg [new file with mode: 0644]
tf.h [new file with mode: 0644]
tfortune.c [new file with mode: 0644]
tfortune.suite.m4 [new file with mode: 0644]
txp.lex [new file with mode: 0644]
txp.y [new file with mode: 0644]
util.c [new file with mode: 0644]
version-gen.sh [new file with mode: 0755]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..442c3ef
--- /dev/null
@@ -0,0 +1,16 @@
+*.d
+*.o
+*.html
+autom4te.cache
+config.h*
+config.mak
+config.log
+config.status
+configure.sh
+*.1
+*.lsg.*
+*.flex.*
+*.bison.*
+version.c
+tfortune
+tfortune.suite
diff --git a/INSTALL b/INSTALL
new file mode 100644 (file)
index 0000000..52a1fd7
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1 @@
+Run "make README".
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..9500595
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,229 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+.SUFFIXES:
+MAKEFLAGS += -Rr
+.ONESHELL:
+.SHELLFLAGS := -ec
+
+RM := rm -f
+MKDIR_P := mkdir -p
+CHMOD := chmod
+
+ifeq ("$(origin CC)", "default")
+        CC := cc
+endif
+ifeq ("$(origin V)", "command line")
+       SAY =
+else
+       SAY = @echo '$(strip $(1))'
+endif
+
+AUTHOR := Andre Noll
+COPYRIGHT_YEAR := 2018
+CLONE_URL := git://git.tuebingen.mpg.de/tfortune
+GITWEB_URL := http://git.tuebingen.mpg.de/tfortune.git
+LICENSE := GNU GPL version 3
+LICENSE_URL := https://www.gnu.org/licenses/gpl-3.0-standalone.html
+
+define DESCRIPTION1 :=
+       Like fortune(1), tfortune is a Unix command line utility which prints
+       a random epigram. Epigrams are stored as plain text files, but they
+       must be annotated with tags to make full use of the features which
+       tfortune offers over other implementations.
+endef
+
+define DESCRIPTION2 :=
+       Tfortune has a built-in matching language for epigrams. User-supplied
+       tag expressions define subsets of admissible epigrams. If a tag
+       expression is given, epigrams are picked from the admissible subset
+       only.
+endef
+
+define DESCRIPTION3 :=
+       Besides printing random epigrams, tfortune supports a number of other
+       subcommands which help to maintain epigrams and tag expressions. There
+       are subcommands for listing and for editing the input files, and for
+       printing statistics about epigrams, expressions and tags. A subcommand
+       that enables bash command line completion is also included.
+endef
+LOGLEVELS := LL_DEBUG,LL_INFO,LL_NOTICE,LL_WARNING,LL_ERROR,LL_CRIT,LL_EMERG
+GIT_VERSION := $(shell ./version-gen.sh)
+cc_version := $(shell $(CC) --version | head -n 1)
+build_date := $(shell date)
+uname_rs := $(shell uname -rs)
+
+all := tfortune tfortune.1
+all: $(all)
+
+deps := txp.bison.d txp.flex.d ast.d tfortune.d util.d txp.flex.d \
+       tfortune.lsg.d version.d linhash.d
+
+ifeq ($(findstring clean, $(MAKECMDGOALS)),)
+-include $(deps)
+include config.mak
+endif
+
+config.h.in: configure.ac
+       $(call SAY, AH $<)
+       autoheader -f
+config.mak config.h: config.status config.mak.in config.h.in
+       $(call SAY, CS $@)
+       ./config.status -q
+       test -f config.h && touch config.h
+config.status: configure.sh
+       $(call SAY, CFG)
+       if test -x config.status; then \
+               ./config.status --recheck; \
+       else \
+               ./configure.sh --no-create; \
+       fi
+configure.sh: configure.ac
+       $(call SAY, AC $<)
+       autoconf configure.ac > $@
+       $(CHMOD) 755 $@
+
+.PRECIOUS: %.flex.c %.bison.c %.bison.h %.lsg.h %.lsg.c %.lsg.h
+
+# created by version-gen.sh
+version.c:
+
+index.html tfortune.suite: %: %.m4
+       $(call SAY, M4 $<)
+       $(M4) -D "AUTHOR=$(AUTHOR)" -D "COPYRIGHT_YEAR=$(COPYRIGHT_YEAR)" \
+               -D "PACKAGE_BUGREPORT=$(PACKAGE_BUGREPORT)" \
+               -D "PACKAGE_URL=$(PACKAGE_URL)" \
+               -D "CLONE_URL=$(CLONE_URL)" \
+               -D "GITWEB_URL=$(GITWEB_URL)" \
+               -D "LICENSE=$(LICENSE)" \
+               -D "LICENSE_URL=$(LICENSE_URL)" \
+               -D "DESCRIPTION1=$(DESCRIPTION1)" \
+               -D "DESCRIPTION2=$(DESCRIPTION2)" \
+               -D "DESCRIPTION3=$(DESCRIPTION3)" $< > $@
+%.lsg.c: %.suite
+       $(call SAY, LSGC $<)
+       $(LOPSUBGEN) --gen-c < $<
+
+%.lsg.h: %.suite
+       $(call SAY, LSGH $<)
+       $(LOPSUBGEN) --gen-header < $<
+
+%.1: %.suite version.c
+       $(call SAY, LSGM $<)
+       $(LOPSUBGEN) --gen-man=$@ --version-string $(GIT_VERSION) < $<
+
+%.flex.c: %.lex
+       $(call SAY, FLEX $<)
+       $(FLEX) -o $@ $<
+
+%.bison.c %.bison.h: %.y
+       $(call SAY, BISON $<)
+       $(BISON) --defines=$(notdir $(<:.y=.bison.h)) \
+               --output=$(notdir $(<:.y=.bison.c)) $<
+
+TF_CPPFLAGS += -Wunused-macros
+TF_CPPFLAGS += -DCOPYRIGHT_YEAR='"$(COPYRIGHT_YEAR)"'
+TF_CPPFLAGS += -DAUTHOR='"$(AUTHOR)"'
+TF_CPPFLAGS += -DLOGLEVELS='$(LOGLEVELS)'
+TF_CPPFLAGS += -DBUILD_DATE='"$(build_date)"'
+TF_CPPFLAGS += -DCC_VERSION='"$(cc_version)"'
+TF_CPPFLAGS += -DUNAME_RS='"$(uname_rs)"'
+TF_CPPFLAGS += -DLICENSE='"$(LICENSE)"'
+TF_CPPFLAGS += -DLICENSE_URL='"$(LICENSE_URL)"'
+TF_CPPFLAGS += -I/usr/local/include
+
+TF_CFLAGS += -g
+TF_CFLAGS += -O2
+TF_CFLAGS += -Wall
+TF_CFLAGS += -Wundef -W -Wuninitialized
+TF_CFLAGS += -Wchar-subscripts
+TF_CFLAGS += -Werror-implicit-function-declaration
+TF_CFLAGS += -Wmissing-noreturn
+TF_CFLAGS += -Wbad-function-cast
+TF_CFLAGS += -Wredundant-decls
+TF_CFLAGS += -Wdeclaration-after-statement
+TF_CFLAGS += -Wformat -Wformat-security -Wmissing-format-attribute
+#TF_CFLAGS += -fsanitize=address
+
+%.flex.o: TF_CFLAGS += -Wno-all -Wno-sign-compare -Wno-unused-parameter
+%.flex.o %.bison.o: TF_CPPFLAGS += -Wno-unused-macros
+
+%.o: %.c tfortune.lsg.h txp.bison.h
+       $(call SAY, CC $<)
+       $(CC) \
+               -o $@ -c -MMD -MF $(*F).d \
+               -MT $@ $(TF_CPPFLAGS) $(CPPFLAGS) $(TF_CFLAGS) $(CFLAGS) $<
+
+TF_LDFLAGS = -llopsub
+#TF_LDFLAGS += -fsanitize=address
+
+tfortune: $(deps:.d=.o)
+       $(call SAY, LD $@)
+       $(CC) $^ -o $@ $(TF_LDFLAGS) $(LDFLAGS)
+
+.PHONY: all mostlyclean clean distclan maintainer-clean install \
+       install-strip README
+
+mostlyclean:
+       $(RM) tfortune *.o *.d
+clean: mostlyclean
+       $(RM) *.lsg.* *.flex.* *.bison.* *.1 *.suite
+distclean: clean
+       $(RM) config.mak config.status config.log config.h config.h.in version.c
+       $(RM) -r autom4te.cache
+maintainer-clean:
+       git clean -dfqx > /dev/null 2>&1
+
+mandir := $(datarootdir)/man/man1
+INSTALL ?= install
+INSTALL_PROGRAM ?= $(INSTALL) -m 755
+INSTALL_DATA ?= $(INSTALL) -m 644
+ifneq ($(findstring strip, $(MAKECMDGOALS)),)
+       strip_option := -s
+endif
+
+install install-strip: all
+       $(MKDIR_P) $(DESTDIR)$(bindir) $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) $(strip_option) tfortune $(DESTDIR)$(bindir)
+       $(INSTALL_DATA) tfortune.1 $(DESTDIR)$(mandir)
+
+define README :=
+tfortune - fortune cookies with tags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+$(DESCRIPTION1)
+
+$(DESCRIPTION2)
+
+$(DESCRIPTION3)
+
+Resources
+~~~~~~~~~
+|      git clone URL: $(CLONE_URL)
+|      web page: $(PACKAGE_URL)
+|      email: $(AUTHOR) <$(PACKAGE_BUGREPORT)>
+
+Documentation
+~~~~~~~~~~~~~
+See tfortune.suite. Or build the man page with \"make tfortune.1\"
+and run \"man -l tfortune.1\".
+
+Dependencies
+~~~~~~~~~~~~
+This package requires m4, autoconf, gnu make, flex, bison, gcc or
+clang, and lopsub. The configure script checks if all required
+dependencies are installed and prints a meaningful error message if
+one of them is missing.
+
+Building
+~~~~~~~~
+Run \"make\" to build tfortune with the default settings. Run
+\"./configure -h\" to list configuration options.
+
+Installation
+~~~~~~~~~~~~
+Run \"sudo make install\" to install to /usr/local. To install to
+/somewhere/else, run \"./configure --prefix /somewhere/else && make\"
+first.
+endef
+
+README:
+       @printf '%s\n' "$(README)"
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..52a1fd7
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+Run "make README".
diff --git a/ast.c b/ast.c
new file mode 100644 (file)
index 0000000..1320964
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include "tf.h"
+#include "txp.bison.h"
+
+enum semantic_types {
+       ST_STRVAL,
+       ST_INTVAL,
+       ST_BOOLVAL,
+       ST_REGEX_PATTERN,
+};
+
+struct txp_context {
+       char *errmsg;
+       struct txp_ast_node *ast;
+};
+
+/*
+ * Set the error bit in the parser context and log a message.
+ *
+ * This is called if the lexer or the parser detect an error. Only the first
+ * error is logged (with a severity of "warn").
+ */
+__attribute__ ((format (printf, 3, 4)))
+void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...)
+{
+       va_list ap;
+       char *tmp;
+
+       if (ctx->errmsg) /* we already printed an error message */
+               return;
+       va_start(ap, fmt);
+       xvasprintf(&tmp, fmt, ap);
+       va_end(ap);
+       xasprintf(&ctx->errmsg, "line %d: %s", line, tmp);
+       free(tmp);
+       WARNING_LOG("%s\n", ctx->errmsg);
+}
+
+/*
+ * Parse a (generalized) string literal.
+ *
+ * This function turns the generalized C99 string literal given by src into a C
+ * string.  For example, the string literal "xyz\n" is transformed into an
+ * array containing the three characters 'x', 'y' and 'z', followed by a
+ * newline character and the terminating zero byte. The function allows to
+ * specify different quote characters so that, for example, regular expression
+ * patterns enclosed in '/' can be parsed as well. To parse a proper string
+ * literal, one has to pass two double quotes as the second argument.
+ *
+ * The function strips off the opening and leading quote characters, replaces
+ * double backslashes by single backslashes and handles the usual escapes like
+ * \n and \".
+ *
+ * The caller must make sure that the input is well-formed. The function simply
+ * aborts if the input is not a valid C99 string literal (modulo the quote
+ * characters).
+ *
+ * The return value is the offset of the first character after the closing
+ * quote. For proper string literals this will be the terminating zero byte of
+ * the input string, for regular expression patterns it is the beginning of the
+ * flags which modify the matching behaviour.
+ */
+unsigned parse_quoted_string(const char *src, const char quote_chars[2],
+               char **result)
+{
+       size_t n, len = strlen(src);
+       char *dst, *p;
+       bool backslash;
+
+       assert(len >= 2);
+       assert(src[0] == quote_chars[0]);
+       p = dst = xmalloc(len - 1);
+       backslash = false;
+       for (n = 1;; n++) {
+               char c;
+               assert(n < len);
+               c = src[n];
+               if (!backslash) {
+                       if (c == '\\') {
+                               backslash = true;
+                               continue;
+                       }
+                       if (c == quote_chars[1])
+                               break;
+                       *p++ = c;
+                       continue;
+               }
+               if (c == quote_chars[1])
+                       *p++ = quote_chars[1];
+               else switch (c) {
+                       case '\\': *p++ = '\\'; break;
+                       case 'a': *p++ = '\a'; break;
+                       case 'b': *p++ = '\b'; break;
+                       case 'f': *p++ = '\f'; break;
+                       case 'n': *p++ = '\n'; break;
+                       case 'r': *p++ = '\r'; break;
+                       case 't': *p++ = '\t'; break;
+                       case 'v': *p++ = '\v'; break;
+                       default: assert(false);
+               }
+               backslash = false;
+       }
+       assert(src[n] == quote_chars[1]);
+       *p = '\0';
+       *result = dst;
+       return n + 1;
+}
+
+/*
+ * Parse and compile an extended regular expression pattern, including flags.
+ *
+ * A regex pattern is identical to a C99 string literal except (a) it is
+ * enclosed in '/' characters rather than double quotes, (b) double quote
+ * characters which are part of the pattern do not need to be quoted with
+ * backslashes, but slashes must be quoted in this way, and (c) the closing
+ * slash may be followed by one or more flag characters which modify the
+ * matching behaviour.
+ *
+ * The only flags which are currently supported are 'i' to ignore case in match
+ * (REG_ICASE) and 'n' to change the handling of newline characters
+ * (REG_NEWLINE).
+ *
+ * This function calls parse_quoted_string(), hence it aborts if the input
+ * string is malformed. However, errors from regcomp(3) are returned without
+ * aborting the process. The rationale behind this difference is that passing a
+ * malformed string must be considered an implementation bug because malformed
+ * strings should be rejected earlier by the lexer.
+ */
+int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result)
+{
+       int ret;
+       char *pat;
+       unsigned n = parse_quoted_string(src, "//", &pat);
+
+       result->flags = 0;
+       for (; src[n]; n++) {
+               switch (src[n]) {
+               case 'i': result->flags |= REG_ICASE; break;
+               case 'n': result->flags |= REG_NEWLINE; break;
+               default: assert(false);
+               }
+       }
+       ret = xregcomp(&result->preg, pat, result->flags);
+       free(pat);
+       return ret;
+}
+
+static struct txp_ast_node *ast_node_raw(int id)
+{
+       struct txp_ast_node *node = xmalloc(sizeof(*node));
+       node->id = id;
+       return node;
+}
+
+/*
+ * Allocate a new leaf node for the abstract syntax tree.
+ *
+ * This returns a pointer to a node whose ->num_children field is initialized
+ * to zero. The ->id field is initialized with the given id.  The caller is
+ * expected to initialize the ->sv field.
+ *
+ * This has to be non-static because it is also called from the lexer.
+ */
+struct txp_ast_node *txp_new_ast_leaf_node(int id)
+{
+       struct txp_ast_node *node = ast_node_raw(id);
+       node->num_children = 0;
+       return node;
+}
+
+struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child)
+{
+       struct txp_ast_node *node = ast_node_raw(id);
+       node->num_children = 1;
+       node->children = xmalloc(sizeof(struct txp_ast_node *));
+       node->children[0] = child;
+       return node;
+}
+
+struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
+               struct txp_ast_node *right)
+{
+       struct txp_ast_node *node = ast_node_raw(id);
+       node->num_children = 2;
+       node->children = xmalloc(2 * sizeof(struct txp_ast_node *));
+       node->children[0] = left;
+       node->children[1] = right;
+       return node;
+}
+
+/*
+ * Deallocate an abstract syntax tree.
+ *
+ * This frees the memory occupied by the nodes of the AST, the child pointers
+ * of the internal nodes and the (constant) semantic values of the leaf nodes
+ * (string literals and pre-compiled regular expressions).
+ */
+static void txp_free_ast(struct txp_ast_node *root)
+{
+       if (!root)
+               return;
+       if (root->num_children > 0) {
+               int i;
+               for (i = 0; i < root->num_children; i++)
+                       txp_free_ast(root->children[i]);
+               free(root->children);
+       } else {
+               union txp_semantic_value *sv = &root->sv;
+               switch (root->id) {
+               case STRING_LITERAL:
+                       free(sv->strval);
+                       break;
+               case REGEX_PATTERN:
+                       regfree(&sv->re_pattern.preg);
+                       break;
+               }
+       }
+       free(root);
+}
+
+void txp_free(struct txp_context *ctx)
+{
+       txp_free_ast(ctx->ast);
+       free(ctx);
+}
+
+static int eval_node(const struct txp_ast_node *node,
+               const struct txp_context *ctx,
+               const struct epi_properties *props,
+               union txp_semantic_value *result);
+
+static void eval_binary_op(const struct txp_ast_node *node,
+               const struct txp_context *ctx,
+               const struct epi_properties *props,
+               union txp_semantic_value *v1, union txp_semantic_value *v2)
+{
+       eval_node(node->children[0], ctx, props, v1);
+       eval_node(node->children[1], ctx, props, v2);
+}
+
+static int eval_node(const struct txp_ast_node *node,
+               const struct txp_context *ctx,
+               const struct epi_properties *props,
+               union txp_semantic_value *result)
+{
+       int ret;
+       union txp_semantic_value v1, v2;
+
+       assert(node);
+       switch (node->id) {
+       /* strings */
+       case STRING_LITERAL:
+               result->strval = node->sv.strval;
+               return ST_STRVAL;
+       case TEXT:
+               result->strval = epi_text(props);
+               return ST_STRVAL;
+       /* integers */
+       case NUM:
+               result->intval = node->sv.intval;
+               return ST_INTVAL;
+       case '+':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->intval = v1.intval + v2.intval;
+               return ST_INTVAL;
+       case '-':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->intval = v1.intval - v2.intval;
+               return ST_INTVAL;
+       case '*':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->intval = v1.intval * v2.intval;
+               return ST_INTVAL;
+       case '/':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               if (v2.intval == 0) {
+                       static bool warned;
+                       if (!warned)
+                               ERROR_LOG("division by zero\n");
+                       warned = true;
+                       result->intval = 0;
+               } else
+                       result->intval = v1.intval / v2.intval;
+               return ST_INTVAL;
+       case NEG:
+               eval_node(node->children[0], ctx, props, &v1);
+               result->intval = -v1.intval;
+               return ST_INTVAL;
+       case LEN:
+               result->intval = epi_len(props);
+               return ST_INTVAL;
+       /* bools */
+       case TAG:
+               eval_node(node->children[0], ctx, props, &v1);
+               result->boolval = epi_has_tag(node->children[0]->sv.strval,
+                       props);
+               return ST_BOOLVAL;
+       case TRUE:
+               result->boolval = true;
+               return ST_BOOLVAL;
+       case FALSE:
+               result->boolval = false;
+               return ST_BOOLVAL;
+       case OR:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.boolval || v2.boolval;
+               return ST_BOOLVAL;
+       case AND:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.boolval && v2.boolval;
+               return ST_BOOLVAL;
+       case NOT:
+               eval_node(node->children[0], ctx, props, &v1);
+               result->boolval = !v1.boolval;
+               return ST_BOOLVAL;
+       case EQUAL:
+               ret = eval_node(node->children[0], ctx, props, &v1);
+               eval_node(node->children[1], ctx, props, &v2);
+               if (ret == ST_STRVAL)
+                       result->boolval = !strcmp(v1.strval, v2.strval);
+               else
+                       result->boolval = v1.intval == v2.intval;
+               return ST_BOOLVAL;
+       case NOT_EQUAL:
+               ret = eval_node(node->children[0], ctx, props, &v1);
+               eval_node(node->children[1], ctx, props, &v2);
+               if (ret == ST_STRVAL)
+                       result->boolval = strcmp(v1.strval, v2.strval);
+               else
+                       result->boolval = v1.intval != v2.intval;
+               return ST_BOOLVAL;
+       case '<':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval < v2.intval;
+               return ST_BOOLVAL;
+       case '>':
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval > v2.intval;
+               return ST_BOOLVAL;
+       case LESS_OR_EQUAL:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval <= v2.intval;
+               return ST_BOOLVAL;
+       case GREATER_OR_EQUAL:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = v1.intval >= v2.intval;
+               return ST_BOOLVAL;
+       case REGEX_MATCH:
+               eval_binary_op(node, ctx, props, &v1, &v2);
+               result->boolval = regexec(&v2.re_pattern.preg, v1.strval,
+                        0, NULL, 0) == 0;
+               return ST_BOOLVAL;
+       case REGEX_PATTERN:
+               result->re_pattern = node->sv.re_pattern;
+               return ST_REGEX_PATTERN;
+       default:
+               EMERG_LOG("bug: invalid node id %d\n", node->id);
+               exit(EXIT_FAILURE);
+       }
+}
+
+/*
+ * Evaluate an abstract syntax tree, starting at the root node.
+ *
+ * The ctx argument should be the pointer that was returned from an earlier
+ * call to txp_init(). The cookie properties structure contains the information
+ * about the epigram.
+ *
+ * Returns true if the AST evaluates to true, a non-empty string, or a non-zero
+ * number, false otherwise.
+ */
+bool txp_eval_ast(const struct txp_context *ctx,
+               const struct epi_properties *props)
+{
+       union txp_semantic_value v;
+       int ret;
+
+       if (!ctx->ast)
+               return true;
+       ret = eval_node(ctx->ast, ctx, props, &v);
+
+       if (ret == ST_INTVAL)
+               return v.intval != 0;
+       if (ret == ST_STRVAL)
+               return v.strval[0] != 0;
+       if (ret == ST_BOOLVAL)
+               return v.boolval;
+       assert(false);
+}
+
+int txp_yylex_init(txp_yyscan_t *yyscanner);
+struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len,
+       txp_yyscan_t yyscanner);
+void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner);
+int txp_yylex_destroy(txp_yyscan_t yyscanner);
+void txp_yyset_lineno(int lineno, txp_yyscan_t scanner);
+
+/*
+ * Initialize the tag expression parser.
+ *
+ * This allocates and sets up the internal structures of the tag expression
+ * parser and creates an abstract syntax tree from the given epigram (including
+ * the tags). It must be called before txp_eval_ast() can be called.
+ *
+ * The context pointer returned by this function may be passed to mp_eval_ast()
+ * to determine whether an epigram is admissible.
+ *
+ * The error message pointer may be NULL in which case no error message is
+ * returned. Otherwise, the caller must free the returned string.
+ */
+int txp_init(const struct iovec *definition, struct txp_context **result,
+                char **errmsg)
+{
+       int ret;
+       txp_yyscan_t scanner;
+       struct txp_context *ctx;
+       struct yy_buffer_state *buffer_state;
+
+       ctx = xcalloc(sizeof(*ctx));
+       ret = txp_yylex_init(&scanner);
+       assert(ret == 0);
+       buffer_state = txp_yy_scan_bytes(definition->iov_base,
+               definition->iov_len, scanner);
+       txp_yyset_lineno(1, scanner);
+       NOTICE_LOG("creating abstract syntax tree from tag expression\n");
+       ret = txp_yyparse(ctx, &ctx->ast, scanner);
+       txp_yy_delete_buffer(buffer_state, scanner);
+       txp_yylex_destroy(scanner);
+       if (ctx->errmsg) { /* parse error */
+               if (errmsg)
+                       *errmsg = ctx->errmsg;
+               else
+                       free(ctx->errmsg);
+               free(ctx);
+               return -E_TXP;
+       }
+       if (errmsg)
+               *errmsg = NULL;
+       *result = ctx;
+       return 1;
+}
diff --git a/config.mak.in b/config.mak.in
new file mode 100644 (file)
index 0000000..b42c106
--- /dev/null
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+prefix := @prefix@
+exec_prefix := @exec_prefix@
+
+# These two use prefix and exec_prefix
+bindir := @bindir@
+datarootdir := @datarootdir@
+
+PACKAGE_TARNAME := @PACKAGE_TARNAME@
+PACKAGE_VERSION := @PACKAGE_VERSION@
+PACKAGE_BUGREPORT := @PACKAGE_BUGREPORT@
+PACKAGE_URL := @PACKAGE_URL@
+
+FLEX := @FLEX@
+BISON := @BISON@
+M4 := @M4@
+LOPSUBGEN := @LOPSUBGEN@
diff --git a/configure b/configure
new file mode 100755 (executable)
index 0000000..b2f719f
--- /dev/null
+++ b/configure
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-3.0-only
+
+set -e
+
+if ! test -x configure.sh; then
+       autoconf configure.ac > configure.sh
+       chmod 755 configure.sh
+       autoheader
+fi
+
+sh configure.sh "$@"
diff --git a/configure.ac b/configure.ac
new file mode 100644 (file)
index 0000000..58925f1
--- /dev/null
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+AC_PREREQ([2.61])
+AC_INIT([tfortune], [m4_esyscmd_s(./version-gen.sh)],
+       [maan@tuebingen.mpg.de], [], [http://people.tuebingen.mpg.de/maan/tfortune/])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_FILES([config.mak])
+AC_USE_SYSTEM_EXTENSIONS
+AC_PROG_CC
+AC_PROG_CPP
+
+AC_DEFUN([REQUIRE_EXECUTABLE], [
+       AC_PATH_PROG(m4_toupper([$1]), [$1])
+       test -z "$m4_toupper([$1])" && AC_MSG_ERROR([$1 is required])
+])
+REQUIRE_EXECUTABLE([m4])
+REQUIRE_EXECUTABLE([flex])
+REQUIRE_EXECUTABLE([bison])
+REQUIRE_EXECUTABLE([lopsubgen])
+
+HAVE_LOPSUB=yes
+AC_CHECK_HEADER(lopsub.h, [], [HAVE_LOPSUB=no])
+AC_CHECK_LIB([lopsub], [lls_merge], [], [HAVE_LOPSUB=no])
+if test $HAVE_LOPSUB == no; then AC_MSG_ERROR([
+       The lopsub library is required to build this software, but
+       the above checks indicate it is not installed on your system.
+       Run the following command to download a copy.
+               git clone git://git.tuebingen.mpg.de/lopsub.git
+       Install the library, then run this configure script again.
+
+       If you installed lopsub at a non-standard location, make sure to set
+       PATH, CPPFLAGS and LDFLAGS accordingly. For example:
+
+               pfx=/prefix/where/lopsub/is/installed
+               export PATH=\$pfx/bin:\$PATH
+               export CPPFLAGS=-I\$pfx/include
+               export LDFLAGS=-L\$pfx/lib
+])
+fi
+AC_OUTPUT
diff --git a/err.h b/err.h
new file mode 100644 (file)
index 0000000..76988bd
--- /dev/null
+++ b/err.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#define TF_ERRORS \
+       TF_ERROR(SUCCESS, "success"), \
+       TF_ERROR(ATOI_OVERFLOW, "value too large"), \
+       TF_ERROR(ATOI_NO_DIGITS, "no digits found in string"), \
+       TF_ERROR(ATOI_JUNK_AT_END, "further characters after number"), \
+       TF_ERROR(LOPSUB, "lopsub error"), \
+       TF_ERROR(REGEX, "regular expression error"), \
+       TF_ERROR(TXP, "tag expression parse error"), \
+       TF_ERROR(LH_EXIST, "item already exists in hash table"), \
+
+/*
+ * This is temporarily defined to expand to its first argument (prefixed by
+ * 'E_') and gets later redefined to expand to the error text only
+ */
+#define TF_ERROR(err, msg) E_ ## err
+enum tf_error_codes {TF_ERRORS};
+#undef TF_ERROR
+#define TF_ERROR(err, msg) msg
+#define DEFINE_TF_ERRLIST char *tf_errlist[] = {TF_ERRORS}
+
+extern char *tf_errlist[];
+
+/**
+ * This bit indicates whether a number is considered a system error number
+ * If yes, the system errno is just the result of clearing this bit from
+ * the given number.
+ */
+#define SYSTEM_ERROR_BIT 30
+
+/** Check whether the system error bit is set. */
+#define IS_SYSTEM_ERROR(num) (!!((num) & (1 << SYSTEM_ERROR_BIT)))
+
+/** Set the system error bit for the given number. */
+#define ERRNO_TO_TF_ERROR(num) ((num) | (1 << SYSTEM_ERROR_BIT))
+
+static inline char *tf_strerror(int num)
+{
+       assert(num > 0);
+       if (IS_SYSTEM_ERROR(num))
+               return strerror((num) & ((1 << SYSTEM_ERROR_BIT) - 1));
+       else
+               return tf_errlist[num];
+}
+
diff --git a/index.html.m4 b/index.html.m4
new file mode 100644 (file)
index 0000000..e1b5f5e
--- /dev/null
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'
+'http://www.w3.org/TR/html4/loose.dtd'>
+
+<!-- SPDX-License-Identifier: GPL-3.0-only */ -->
+<html>
+       <head>
+               <meta
+                       http-equiv='Content-Type';
+                       content='text/html';
+                       charset=utf-8;
+               >
+               <title>Tfortune</title>
+               <style type='text/css'>
+                       a {
+                               color: #003355;
+                       }
+                       p {
+                               font-size: 120%;
+                       }
+               </style>
+       </head>
+       <body>
+               <table width="100%">
+                       <tr>
+                               <td>
+                                       <h1 align="left">
+                                                tfortune - Fortune Cookies with Tags
+                                       </h1>
+                               </td>
+                               <td align="right">
+                                       <img
+                                               src="logo.svg"
+                                               alt="tfortune logo"
+                                               title="The text on the paper strip says:
+&quot;You will soon use a different fortune program&quot;"
+
+                                       >
+                               </td>
+                       </tr>
+               </table>
+               <p> DESCRIPTION1() </p>
+               <p> DESCRIPTION2() </p>
+               <p> DESCRIPTION3() </p>
+
+               <h2> Resources </h2>
+               <ul>
+                       <li> git clone URL: CLONE_URL() </li>
+                       <li>
+                               gitweb: <a href="GITWEB_URL()">GITWEB_URL()</a>
+                       </li>
+                       <li>
+                               Email: AUTHOR() &lt;<a
+                               href="mailto:PACKAGE_BUGREPORT()">PACKAGE_BUGREPORT()</a>&gt;
+                       </li>
+               </ul>
+
+               <h2> License </h2>
+               Open source, licensed under the <a
+               href="LICENSE_URL()">LICENSE()</a>
+               (GNU General Public License, Version 3)
+
+               <h2> Documentation </h2>
+               All parts of tfortune are well documented and illustrated with
+               examples. See the manual page for details.
+
+               <h2> Programming Language </h2>
+               Plain C.
+
+               <h2> Dependencies </h2>
+               A working C compiler and a couple of other dependencies, most of which
+               are standard (autoconf, make, m4, flex, bison). The notable exception
+               is the <a href="http://people.tuebingen.mpg.de/maan/lopsub/">lopsub</a>
+               library.
+       </body>
+</html>
diff --git a/linhash.c b/linhash.c
new file mode 100644 (file)
index 0000000..28c67ed
--- /dev/null
+++ b/linhash.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+/*
+ * Simple linear hashing implementation
+ *
+ * Linear hashing is a hash table algorithm by Witold Litwin (1980)
+ * which grows the table on demand without rehashing existing objects.
+ *
+ * This implementation employs a conventional hash function C whose
+ * output length is assumed to be greater than the order of the table,
+ * i.e. the base-2 logarithm of the number of slots. Hash collisions
+ * are resolved by chaining.
+ *
+ * The hash table consists in fact of two tables which are consecutive
+ * in memory. Each table represents N = 1 << n many slots of the
+ * table. Example (n = 2, N = 4):
+ *
+ *     [....][....]
+ *      0123  4567
+ *
+ * In what follows, we refer to the two tables as the lower table and
+ * the higher table.
+ *
+ * C gives rise to a sequence of hash functions C_k via C_k(key) :=
+ * lower k bits of C(key). In terms of C_k the lower and higher hash
+ * functions, h and H, are defined by h = C_n and H = C_{n+1}. Hence
+ * h(key) always corresponds to a slot in the lower table while H(key)
+ * corresponds to a slot in either table. Both functions are trivial to
+ * compute given C(key).
+ *
+ * The algorithm also maintains a split position s which corresponds to
+ * a slot of the lower table and starts out at zero.
+ *
+ *     [s...][....]
+ *      0123  4567
+ *
+ * The linear hash function L is defined by L(key) = H(key) if h(key)
+ * < s, and L(key) = h(key) otherwise. In other words, L(key) is either
+ * the lower or the higher hash value, depending on the relation between
+ * the lower hash value and s. The higher hash function applies for all
+ * keys whose lower hash value is smaller than s. Initially we have L = h.
+ *
+ * On insertion, if the table load is large, the chain of entries which
+ * hash to s is split by transitioning from h to H.
+ *
+ * For an existing key, we have either H(key) = h(key) or H(key) =
+ * h(key) + 1 << n. That is, the most significant bit of H(key) tells
+ * whether this entry must be moved from its slot in the low table to the
+ * corresponding slot in the high table. Approximately half the entries
+ * will be moved this way. To reflect the fact that from now on L(key)
+ * = H(key) for all keys with h(key) = s, s is increased by one.
+ *
+ * Each time s reaches 1 << n, the upper bound of the lower table, we
+ * have L = H. In this case the hash table size is doubled, and the split
+ * position is reset to zero. Moreover, h and H are redefined as h :=
+ * C_{n+1} (i.e., the new h becomes the former H), and H := C_{n+2}. This
+ * means the table order n has increased by one, and a new round begins.
+ *
+ * This implementation includes an API for iterating over all entries
+ * in the hash table which avoids callback functions. Instead an opaque
+ * iterator structure is provided together with a set of functions that
+ * create or operate on such a structure.
+ *
+ * While shrinking a linear hash table can performed essentially
+ * by reversing the steps for growing described above, this is not
+ * implemented since it is not needed for tfortune.
+ */
+
+#include "tf.h"
+
+struct lh_slot {
+       struct linhash_item item;
+       uint32_t hash;
+       struct lh_slot *next;
+};
+
+struct linhash_table {
+       struct lh_slot **slots;
+       unsigned order;
+       uint32_t num_items; /* how many items have been inserted so far. */
+       uint32_t split_position;
+};
+
+struct linhash_iterator {
+       /* these are used regardless of whether comp is NULL */
+       struct linhash_table *t;
+       uint32_t idx;
+       /* comp == NULL means: iterate in hash order */
+       linhash_comparator *comp;
+       /* only used if comp == NULL */
+       struct lh_slot *head, *prev, *next;
+       /* only used if comp != NULL */
+       struct linhash_item **items;
+       bool reverse;
+};
+
+static uint32_t lh_num_slots_low(const struct linhash_table *t)
+{
+       return (uint32_t)1 << t->order;
+}
+
+static uint32_t lh_num_slots_high(const struct linhash_table *t)
+{
+       return lh_num_slots_low(t) * 2;
+}
+
+static uint32_t lh_index_low(uint32_t hash, const struct linhash_table *t)
+{
+       return hash % lh_num_slots_low(t);
+}
+
+static uint32_t lh_index_high(uint32_t hash, const struct linhash_table *t)
+{
+       return hash % lh_num_slots_high(t);
+}
+
+static bool lh_must_grow(const struct linhash_table *t)
+{
+       return t->num_items >= lh_num_slots_high(t) / 2;
+}
+
+/* The simple DJB (Daniel J Bernstein) hash is good enough here. */
+static uint32_t lh_hash(const char *data)
+{
+       uint32_t h = 5381;
+       const unsigned char *c = (typeof(c))data;
+
+       while (*(c++))
+               h = h * 33 + *c;
+       return h;
+}
+
+/*
+ * Create a new table for linear hashing.
+ *
+ * The order argument determines the initial number of slots: it is given by 2
+ * << order. The hash table grows on demand, and the point of linear hashing is
+ * the ability to cheaply grow the table, so specifying anything greater than
+ * zero is probably not necessary.
+ *
+ * The functtion returns an opaque pointer which serves as the handle to the
+ * newly created hash table.  Most functions of the linhash API take such a
+ * pointer to know the table to operate on. This function either succeeds or
+ * terminates, it never returns NULL.
+ */
+struct linhash_table *linhash_new(uint32_t order)
+{
+       struct linhash_table *t;
+       uint32_t ns;
+
+       DEBUG_LOG("creating order %u hash table\n", order);
+       t = xmalloc(sizeof(*t));
+       t->order = order;
+       t->num_items = 0;
+       t->split_position = 0;
+       ns = lh_num_slots_high(t);
+       t->slots = xcalloc(ns * sizeof(*t->slots));
+       return t;
+}
+
+static uint32_t lh_index(uint32_t hash, const struct linhash_table *t)
+{
+       uint32_t low = lh_index_low(hash, t);
+
+       if (low >= t->split_position)
+               return low;
+       return lh_index_high(hash, t);
+}
+
+static void lh_split_slot(struct linhash_table *t)
+{
+       struct lh_slot *prev, *next, *s;
+       uint32_t sp = t->split_position;
+
+       DEBUG_LOG("splitting slot %u\n", sp);
+       for (prev = NULL, s = t->slots[sp]; s;) {
+               uint32_t idx = lh_index_high(s->hash, t);
+               if (idx == sp) {
+                       prev = s;
+                       s = s->next;
+                       continue;
+               }
+               DEBUG_LOG("moving %s up from slot %u to slot %u\n",
+                       s->item.key, sp, idx);
+               if (t->slots[sp] == s)
+                       t->slots[sp] = s->next;
+               if (prev)
+                       prev->next = s->next;
+               next = s->next;
+               s->next = t->slots[idx];
+               t->slots[idx] = s;
+               s = next;
+       }
+       DEBUG_LOG("slot %u has been split\n", sp);
+       if (!t->slots[sp])
+               DEBUG_LOG("slot #%u has become empty\n", sp);
+}
+
+static void lh_grow_table(struct linhash_table *t)
+{
+       uint32_t idx, ns;
+
+       DEBUG_LOG("growing hash table to order %u\n", ++t->order);
+       ns = lh_num_slots_high(t);
+       t->slots = xrealloc(t->slots, ns * sizeof(*t->slots));
+       idx = lh_num_slots_low(t);
+       memset(t->slots + idx, 0, (ns - idx) * sizeof(*t->slots));
+}
+
+static struct lh_slot *lh_lookup(const char *key,
+               const struct linhash_table *t, uint32_t *hashp, uint32_t *idxp,
+               struct lh_slot **prevp)
+{
+       struct lh_slot *s, *prev;
+       uint32_t hash, idx;
+
+       if (!t)
+               return NULL;
+       hash = lh_hash(key);
+       idx = lh_index(hash, t);
+       //DEBUG_LOG("key %s, hash: %u, idx: %u\n", key, hash, idx);
+       if (hashp)
+               *hashp = hash;
+       if (idxp)
+               *idxp = idx;
+       for (s = t->slots[idx], prev = NULL; s; prev = s, s = s->next) {
+               //DEBUG_LOG("comparing %s vs. %s\n", key, s->item.key);
+               if (strcmp(s->item.key, key))
+                       continue;
+               /* found it */
+               if (prevp)
+                       *prevp = prev;
+               return s;
+       }
+       if (prevp)
+               *prevp = NULL;
+       return NULL;
+}
+
+/**
+ * Find the entry identified by a key.
+ *
+ * \param key The key to look up.
+ * \param t Where to look up the key.
+ */
+struct linhash_item *linhash_lookup(const char *key,
+               const struct linhash_table *t)
+{
+       struct lh_slot *s = lh_lookup(key, t, NULL, NULL, NULL);
+
+       if (!s)
+               return NULL;
+       return &s->item;
+}
+
+static void *lh_remove(struct lh_slot *s, struct lh_slot *prev, uint32_t idx, struct linhash_table *t)
+{
+       void *obj;
+
+       if (!s)
+               return NULL;
+       t->num_items--;
+       obj = s->item.object;
+       if (prev)
+               prev->next = s->next;
+       else
+               t->slots[idx] = s->next;
+       free(s);
+       return obj;
+}
+
+/* Note: This renders all existing iterators stale. */
+void *linhash_remove(const char *key, struct linhash_table *t)
+{
+       uint32_t idx;
+       struct lh_slot *prev, *s = lh_lookup(key, t, NULL, &idx, &prev);
+
+       return lh_remove(s, prev, idx, t);
+}
+
+static void *lh_iterator_remove_current(struct linhash_iterator *iter)
+{
+       void *obj;
+
+       assert(!iter->comp);
+       if (!iter->head)
+               return NULL;
+       obj = lh_remove(iter->head, iter->prev, iter->idx, iter->t);
+       iter->head = iter->prev;
+       return obj;
+}
+
+static struct lh_slot *lh_first_nonempty_slot(uint32_t *idxp,
+               const struct linhash_table *t)
+{
+       uint32_t ns = lh_num_slots_high(t);
+
+       for (; *idxp < ns; (*idxp)++)
+               if (t->slots[*idxp])
+                       return t->slots[*idxp];
+       return NULL;
+}
+
+static void lh_iter_init(struct linhash_iterator *iter, uint32_t idx)
+{
+       iter->idx = idx;
+       iter->prev = NULL;
+       iter->head = lh_first_nonempty_slot(&iter->idx, iter->t);
+       if (iter->head)
+               iter->next = iter->head->next;
+}
+
+/*
+ * Normally iter->head points to the current head. However, if this head was
+ * removed with lh_iterator_remove_current(), iter->head points to its
+ * predecessor in the list.
+ */
+void linhash_iterator_next(struct linhash_iterator *iter)
+{
+       if (iter->comp) {
+               if (iter->reverse)
+                       iter->idx--;
+               else
+                       iter->idx++;
+               return;
+       }
+       if (iter->next) {
+               iter->prev = iter->head;
+               iter->head = iter->next;
+               iter->next = iter->next->next;
+               return;
+       }
+       lh_iter_init(iter, iter->idx + 1);
+}
+
+struct linhash_item *linhash_iterator_item(const struct linhash_iterator *iter)
+{
+       if (iter->comp) {
+               if (iter->idx >= iter->t->num_items)
+                       return NULL;
+               return iter->items[iter->idx];
+       }
+       if (!iter->head)
+               return NULL;
+       return &iter->head->item;
+}
+
+void linhash_iterator_free(struct linhash_iterator *iter)
+{
+       if (!iter)
+               return;
+       if (iter->comp)
+               free(iter->items);
+       free(iter);
+}
+
+/* always succeeds. reverse order is only respected if a comparator is given. */
+struct linhash_iterator *linhash_iterator_new(struct linhash_table *t,
+               linhash_comparator *comp, bool reverse_sort_order)
+{
+       struct linhash_iterator *iter = xmalloc(sizeof(*iter)), *iter2;
+       struct linhash_item *item;
+       unsigned n;
+
+       iter->t = t;
+       iter->comp = comp;
+       if (!comp) {
+               lh_iter_init(iter, 0);
+               return iter;
+       }
+       iter->reverse = reverse_sort_order;
+       iter2 = linhash_iterator_new(t, NULL, false);
+       iter->items = xmalloc(t->num_items * sizeof(struct linhash_item *));
+       for (
+               n = 0;
+               (item = linhash_iterator_item(iter2));
+               linhash_iterator_next(iter2)
+       )
+               iter->items[n++] = item;
+       linhash_iterator_free(iter2);
+       qsort(iter->items, t->num_items, sizeof(struct linhash_iter *),
+               (int (*)(const void *, const void *))comp);
+       iter->idx = reverse_sort_order? t->num_items - 1 : 0;
+       return iter;
+}
+
+/* Deallocate the resources occupied by the given hash table. */
+void linhash_free(struct linhash_table *t)
+{
+       struct linhash_iterator *iter;
+       struct linhash_item *itemp;
+
+       if (!t)
+               return;
+       for (
+               iter = linhash_iterator_new(t, NULL, false);
+               (itemp = linhash_iterator_item(iter));
+               linhash_iterator_next(iter)
+       )
+               lh_iterator_remove_current(iter);
+       linhash_iterator_free(iter);
+       assert(t->num_items == 0);
+       free(t->slots);
+       free(t);
+}
+
+/* returns item in first arg if key already exists */
+int linhash_insert(struct linhash_item *item, struct linhash_table *t,
+               void ***object)
+{
+       struct lh_slot *s;
+       uint32_t idx, hash;
+
+       s = lh_lookup(item->key, t, &hash, &idx, NULL);
+       if (s) {
+               if (object)
+                       *object = &s->item.object;
+               return -E_LH_EXIST;
+       }
+       s = xmalloc(sizeof(*s));
+       s->item = *item;
+       s->hash = hash;
+       DEBUG_LOG("inserting item #%u, key: %s, hash: %u, idx: %u\n",
+               t->num_items, item->key, hash, idx);
+       s->next = t->slots[idx];
+       t->slots[idx] = s;
+       t->num_items++;
+
+       if (!lh_must_grow(t)) {
+               DEBUG_LOG("no need to grow\n");
+               return 0;
+       }
+       lh_split_slot(t);
+       t->split_position++;
+       if (t->split_position < lh_num_slots_low(t))
+               return 1;
+       t->split_position = 0;
+       lh_grow_table(t);
+       return 2;
+}
+
+uint32_t linhash_num_items(const struct linhash_table *t)
+{
+       return t->num_items;
+}
+
+char *linhash_statistics(const struct linhash_table *t)
+{
+       uint32_t min_fill = -1, max_fill = 0, n, idx, ns;
+       uint32_t fill_count[11] = {0};
+       char *result;
+
+       ns = lh_num_slots_low(t) + t->split_position;
+       for (idx = 0; idx < ns; idx++) {
+               struct lh_slot *s;
+               for (n = 0, s = t->slots[idx]; s; s = s->next, n++)
+                       ; /* nothing */
+               min_fill = MIN(min_fill, n);
+               max_fill = MAX(max_fill, n);
+               fill_count[n < 10? n : 10]++;
+       }
+       xasprintf(&result,
+               "order............... %2u\n"
+               "num slots........... %u\n"
+               "num items (table)... %u\n"
+               "load factor........ %3u%%\n"
+               "min fill............ %2u\n"
+               "max fill............ %2u\n"
+               "max count[0]....... %3u%% (%u)\n"
+               "max count[1]....... %3u%% (%u)\n"
+               "max count[2]....... %3u%% (%u)\n"
+               "max count[3]....... %3u%% (%u)\n"
+               "max count[4]....... %3u%% (%u)\n"
+               "max count[5]....... %3u%% (%u)\n"
+               "max count[6]....... %3u%% (%u)\n"
+               "max count[7]....... %3u%% (%u)\n"
+               "max count[8]....... %3u%% (%u)\n"
+               "max count[9]....... %3u%% (%u)\n"
+               "max count[10+]..... %3u%% (%u)\n"
+               ,
+               t->order,
+               ns,
+               t->num_items,
+               (t->num_items * 100 + (ns / 2)) / ns,
+               min_fill,
+               max_fill,
+               100 * fill_count[0] / ns, fill_count[0],
+               100 * fill_count[1] / ns, fill_count[1],
+               100 * fill_count[2] / ns, fill_count[2],
+               100 * fill_count[3] / ns, fill_count[3],
+               100 * fill_count[4] / ns, fill_count[4],
+               100 * fill_count[5] / ns, fill_count[5],
+               100 * fill_count[6] / ns, fill_count[6],
+               100 * fill_count[7] / ns, fill_count[7],
+               100 * fill_count[8] / ns, fill_count[8],
+               100 * fill_count[9] / ns, fill_count[9],
+               100 * fill_count[10] / ns, fill_count[10]
+       );
+       return result;
+}
diff --git a/logo.svg b/logo.svg
new file mode 100644 (file)
index 0000000..d4a70a6
--- /dev/null
+++ b/logo.svg
@@ -0,0 +1,27 @@
+<!-- SPDX-License-Identifier: GPL-3.0-only -->
+
+<svg xmlns="http://www.w3.org/2000/svg" height="70" width="100" >
+       <g fill="none" stroke-width="4" linecap="round">
+               <path
+                        d="
+                               M 40,25 c 6,-20 20,-10 20,5
+                               M 3,48 s 5,-30 41,-30
+                               M 56,17 s 30,10 20,33
+                               M 59,24 s 32,10 -5,35
+                               M 56,58 s -6,1 -3,10
+                               M 66,49 s 10,-2 20,10
+                               m 0,-1 s -2,2 10,5
+                               M 60,55 s 5,-5 25,15
+                               M 85,70 s -2,-2 11,-7
+                               M 72,58 s -2,10 -20,10
+                               M 53,68 s -20,0 -5,-28
+                               m 0,0 s 7,-10 3,-20
+                               m 0,0 s 5,5 -7,15
+                               M 45,45 s -10,-4 -30,20
+                               m 0,0 s -3,-28 -12,-17
+                               M 16,65 s -14,4 -13,-17
+                       "
+                       stroke="#000"
+               />
+       </g>
+</svg>
diff --git a/tf.h b/tf.h
new file mode 100644 (file)
index 0000000..b937d07
--- /dev/null
+++ b/tf.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <fnmatch.h>
+#include <regex.h>
+#include <assert.h>
+#include <lopsub.h>
+#include <sys/uio.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <pwd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <grp.h>
+
+#include "err.h"
+
+/** Compute the minimum of \a x and \a y. */
+#define MIN(x, y) ({ \
+       typeof(x) _min1 = (x); \
+       typeof(y) _min2 = (y); \
+       (void) (&_min1 == &_min2); \
+       _min1 < _min2 ? _min1 : _min2; })
+
+/** Compute the maximum of \a x and \a y. */
+#define MAX(x, y) ({ \
+       typeof(x) _max1 = (x); \
+       typeof(y) _max2 = (y); \
+       (void) (&_max1 == &_max2); \
+       _max1 < _max2 ? _max2 : _max1; })
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+/* util.c */
+
+extern int loglevel_arg_val;
+enum loglevels {LOGLEVELS, NUM_LOGLEVELS};
+void tf_log(int ll, const char* fmt,...);
+#define DEBUG_LOG(f,...) tf_log(LL_DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define INFO_LOG(f,...) tf_log(LL_INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define NOTICE_LOG(f,...) tf_log(LL_NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define WARNING_LOG(f,...) tf_log(LL_WARNING, "%s: " f, __FUNCTION__, ##  __VA_ARGS__)
+#define ERROR_LOG(f,...) tf_log(LL_ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define CRIT_LOG(f,...) tf_log(LL_CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define EMERG_LOG(f,...) tf_log(LL_EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+
+int atoi64(const char *str, int64_t *value);
+unsigned xvasprintf(char **result, const char *fmt, va_list ap);
+unsigned xasprintf(char **result, const char *fmt, ...);
+void *xrealloc(void *p, size_t size);
+void *xmalloc(size_t size);
+void *xcalloc(size_t size);
+char *xstrdup(const char *str);
+char *get_homedir(void);
+int xregcomp(regex_t *preg, const char *regex, int cflags);
+void mmap_file(const char *path, struct iovec *iov);
+int fd2buf(int fd, struct iovec *result);
+
+struct regfile_iter; /* opaque */
+void regfile_iter_new(const char *dirname, struct regfile_iter **result);
+bool regfile_iter_map(const struct regfile_iter *iter, struct iovec *result);
+const char *regfile_iter_basename(const struct regfile_iter *iter);
+const struct stat *regfile_iter_stat(const struct regfile_iter *iter);
+void regfile_iter_next(struct regfile_iter *iter);
+void regfile_iter_free(struct regfile_iter *iter);
+
+/* tfortune.c */
+struct epi_properties; /* opaque */
+unsigned epi_len(const struct epi_properties *props);
+bool epi_has_tag(const char *tag, const struct epi_properties *props);
+char *epi_text(const struct epi_properties *props);
+
+/* version.c */
+const char *tf_version(void);
+
+/* tag expression parser (ast.c, txp.lex, txp.y) */
+
+/* Opaque, only known to ast.c. Passed to the generated txp_yyparse(). */
+struct txp_context;
+
+int txp_init(const struct iovec *definition, struct txp_context **result,
+                char **errmsg);
+bool txp_eval_ast(const struct txp_context *ctx,
+               const struct epi_properties *props);
+void txp_free(struct txp_context *ctx);
+
+/* non-public API of the tag expression parser */
+
+/*
+ * Since we use a reentrant lexer, all functions generated by flex(1)
+ * receive an additional argument of this type.
+ */
+typedef void *txp_yyscan_t;
+
+/* Parsed regex pattern. */
+struct txp_re_pattern {
+       regex_t preg; /* Pre-compiled regex. */
+       unsigned flags; /* Subset of the cflags described in regex(3). */
+};
+
+/*
+ * The possible values of a node in the abstract syntax tree (AST).
+ *
+ * Constant semantic values (string literals, numeric constants and regex
+ * patterns which are part of the tag expression) are determined during
+ * txp_init() while values which depend on the epigram (tags, number of lines,
+ * etc.) are determined during txp_eval_row().
+ *
+ * This union, and the txp_ast_node structure below are used extensively in
+ * txp.y. However, both need to be public because the lexer must be able to
+ * create AST nodes for the constant semantic values.
+ */
+union txp_semantic_value {
+       bool boolval; /* Comparators, =~ and =|. */
+       char *strval; /* String literals (e.g., argument of tag()) */
+       int64_t intval; /* Constants, num_lines, etc. */
+       struct txp_re_pattern re_pattern; /* Right-hand side operand of =~. */
+};
+
+/*
+ * A node is either interior or a leaf node. Interior nodes have at least one
+ * child while leaf nodes have a semantic value and no children.
+ *
+ * Examples: (a) STRING_LITERAL has a semantic value (the unescaped string
+ * literal) and no children, (b) NEG (unary minus) has no semantic value but
+ * one child (the numeric expression that is to be negated), (c) LESS_OR_EQUAL
+ * has no semantic value and two children (the two numeric expressions being
+ * compared).
+ */
+struct txp_ast_node {
+       /* Corresponds to a token type, for example LESS_OR_EQUAL. */
+       int id;
+       union {
+               /* Pointers to the child nodes (interior nodes only). */
+               struct txp_ast_node **children;
+               /* Leaf nodes only. */
+               union txp_semantic_value sv;
+       };
+       /*
+        * The number of children is implicitly given by the id, but we include
+        * it here to avoid having to maintain a lookup table. The AST is
+        * usually small, so we can afford to waste a byte per node.
+        */
+       uint8_t num_children;
+};
+
+/* Called from both the lexer and the parser. */
+__attribute__ ((format (printf, 3, 4)))
+void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...);
+
+/* Helper functions for the lexer. */
+unsigned parse_quoted_string(const char *src, const char quote_chars[2],
+               char **result);
+int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result);
+
+struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child);
+struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left,
+               struct txp_ast_node *right);
+
+struct txp_ast_node *txp_new_ast_leaf_node(int id);
+
+/* linhash.c */
+
+struct linhash_item {
+       const char *key;
+       void *object;
+};
+
+typedef int linhash_comparator(const struct linhash_item **a,
+               const struct linhash_item **b);
+
+struct linhash_table;
+struct linhash_iterator;
+
+struct linhash_table *linhash_new(uint32_t order);
+int linhash_insert(struct linhash_item *item, struct linhash_table *t,
+               void ***object);
+struct linhash_item *linhash_lookup(const char *key,
+               const struct linhash_table *t);
+void *linhash_remove(const char *key, struct linhash_table *t);
+void linhash_free(struct linhash_table *t);
+
+struct linhash_iterator *linhash_iterator_new(struct linhash_table *t,
+               linhash_comparator *comp, bool reverse);
+struct linhash_item *linhash_iterator_item(const struct linhash_iterator *iter);
+void linhash_iterator_next(struct linhash_iterator *iter);
+void linhash_iterator_free(struct linhash_iterator *iter);
+
+char *linhash_statistics(const struct linhash_table *t);
+uint32_t linhash_num_items(const struct linhash_table *t);
diff --git a/tfortune.c b/tfortune.c
new file mode 100644 (file)
index 0000000..e5b2061
--- /dev/null
@@ -0,0 +1,1549 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <lopsub.h>
+
+#include "tf.h"
+#include "tfortune.lsg.h"
+
+#define TF_SEP "---- "
+
+static struct lls_parse_result *lpr, *sublpr;
+#define CMD_PTR(_cname) lls_cmd(LSG_TFORTUNE_CMD_ ## _cname, tfortune_suite)
+#define OPT_RESULT(_cname, _oname) (lls_opt_result(\
+       LSG_TFORTUNE_ ## _cname ## _OPT_ ## _oname, \
+       (CMD_PTR(_cname) == CMD_PTR(TFORTUNE))? lpr : sublpr))
+#define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
+#define OPT_STRING_VAL(_cname, _oname) (lls_string_val(0, \
+       OPT_RESULT(_cname, _oname)))
+#define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
+               OPT_RESULT(_cname, _oname)))
+
+int loglevel_arg_val;
+
+struct tf_user_data {int (*handler)(void);};
+#define EXPORT_CMD_HANDLER(_cmd) const struct tf_user_data \
+       lsg_tfortune_com_ ## _cmd ## _user_data = { \
+               .handler = com_ ## _cmd \
+       };
+
+struct map_chunk {
+       const char *base;
+       size_t len;
+};
+
+struct epigram {
+       struct map_chunk epi, tags;
+};
+
+static int lopsub_error(int lopsub_ret, char **errctx)
+{
+       const char *msg = lls_strerror(-lopsub_ret);
+       if (*errctx)
+               ERROR_LOG("%s: %s\n", *errctx, msg);
+       else
+               ERROR_LOG("%s\n", msg);
+       free(*errctx);
+       *errctx = NULL;
+       return -E_LOPSUB;
+}
+
+/* per epigram context for the tag expression parser */
+struct epi_properties {
+       const struct map_chunk *chunk; /* only the epigram */
+       struct linhash_table *tagtab;
+       unsigned num_tags;
+};
+
+unsigned epi_len(const struct epi_properties *props)
+{
+       return props->chunk->len;
+}
+
+char *epi_text(const struct epi_properties *props)
+{
+       const char *txt = props->chunk->base;
+       char *result = malloc(props->chunk->len + 1);
+       memcpy(result, txt, props->chunk->len);
+       result[props->chunk->len] = '\0';
+       return result;
+}
+
+bool epi_has_tag(const char *tag, const struct epi_properties *props)
+{
+       return linhash_lookup(tag, props->tagtab);
+}
+
+struct tag_iter {
+       char *str;
+       char *saveptr; /* for strtok_r(3) */
+       char *token;
+};
+
+static struct tag_iter *tag_iter_new(const struct map_chunk *tags)
+{
+       struct tag_iter *titer = xmalloc(sizeof(*titer));
+
+       titer->str = xmalloc(tags->len + 1);
+       memcpy(titer->str, tags->base, tags->len);
+       titer->str[tags->len] = '\0';
+       titer->token = strtok_r(titer->str, ",", &titer->saveptr);
+       return titer;
+}
+
+static const char *tag_iter_get(const struct tag_iter *titer)
+{
+       return titer->token;
+}
+
+static void tag_iter_next(struct tag_iter *titer)
+{
+       titer->token = strtok_r(NULL, ",", &titer->saveptr);
+}
+
+static void tag_iter_free(struct tag_iter *titer)
+{
+       if (!titer)
+               return;
+       free(titer->str);
+       free(titer);
+}
+
+static bool epi_admissible(const struct epigram *epi,
+               const struct txp_context *ast)
+{
+       bool admissible;
+       const char *p;
+       struct epi_properties props = {
+               .chunk = &epi->epi,
+               .tagtab = linhash_new(3),
+               .num_tags = 0,
+       };
+       struct tag_iter *titer;
+
+
+       for (
+               titer = tag_iter_new(&epi->tags);
+               (p = tag_iter_get(titer));
+               tag_iter_next(titer)
+       ) {
+               struct linhash_item item = {.key = p};
+               linhash_insert(&item, props.tagtab, NULL);
+               props.num_tags++;
+       }
+
+       admissible = txp_eval_ast(ast, &props);
+       linhash_free(props.tagtab);
+       tag_iter_free(titer);
+       return admissible;
+}
+
+static void print_epigram(const struct epigram *epi, bool print_tags)
+{
+       printf("%.*s", (int)epi->epi.len, epi->epi.base);
+       if (print_tags)
+               printf(TF_SEP "%.*s\n", (int)epi->tags.len, epi->tags.base);
+}
+
+static void print_admissible_epigrams(const struct epigram *epis,
+               unsigned num_epis)
+{
+       unsigned n;
+
+       for (n = 0; n < num_epis; n++)
+               print_epigram(epis + n, OPT_GIVEN(PRINT, TAGS));
+}
+
+static void print_random_epigram(const struct epigram *epis, unsigned num_epis)
+{
+       long unsigned r;
+       const struct epigram *epi;
+       struct timeval tv;
+
+       if (num_epis == 0) {
+               ERROR_LOG("no matching epigram\n");
+               return;
+       }
+       gettimeofday(&tv, NULL);
+       srandom((unsigned)tv.tv_usec);
+       r = (num_epis + 0.0) * (random() / (RAND_MAX + 1.0));
+       assert(r < num_epis);
+       epi = epis + r;
+       print_epigram(epi, OPT_GIVEN(PRINT, TAGS));
+}
+
+static int read_tag_expression(struct iovec *result)
+{
+       const char *tx = OPT_STRING_VAL(PRINT, EXPRESSION);
+       int ret, fd;
+
+       assert(tx);
+       if (strcmp(tx, "-")) {
+               char *filename;
+               if (tx[0] != '/') {
+                       char *home = get_homedir();
+                       xasprintf(&filename, "%s/.tfortune/expressions/%s",
+                               home, tx);
+                       free(home);
+               } else
+                       filename = xstrdup(tx);
+               ret = open(filename, O_RDONLY);
+               if (ret < 0) {
+                       ret = -ERRNO_TO_TF_ERROR(errno);
+                       ERROR_LOG("could not open %s\n", filename);
+               }
+               free(filename);
+               if (ret < 0)
+                       return ret;
+               fd = ret;
+       } else
+               fd = STDIN_FILENO;
+       ret = fd2buf(fd, result);
+       close(fd);
+       return ret;
+}
+
+static int tx2ast(struct iovec *tx, struct txp_context **ast)
+{
+       int ret;
+       char *errmsg;
+
+       ret = txp_init(tx, ast, &errmsg);
+       if (ret < 0) {
+               ERROR_LOG("could not parse tag expression: %s\n", errmsg);
+               free(errmsg);
+               return ret;
+       }
+       return 1;
+}
+
+struct epi_iter {
+       struct iovec *maps;
+       unsigned num_maps;
+       unsigned map_num;
+       struct epigram epi;
+       unsigned num_epis;
+};
+
+static bool get_next_epi(struct epi_iter *eiter)
+{
+       const char *epi_start = NULL;
+
+       for (; eiter->map_num < eiter->num_maps; eiter->map_num++) {
+               struct iovec *iov = eiter->maps + eiter->map_num;
+               const char *buf, *end = iov->iov_base + iov->iov_len;
+
+               if (!epi_start && eiter->epi.tags.base)
+                       epi_start = eiter->epi.tags.base
+                               + eiter->epi.tags.len + 1;
+               else
+                       epi_start = iov->iov_base;
+               buf = epi_start;
+               while (buf < end) {
+                       const size_t sep_len = strlen(TF_SEP);
+                       const char *p, *cr, *tags;
+                       size_t tag_len;
+
+                       cr = memchr(buf, '\n', end - buf);
+                       if (!cr)
+                               break;
+                       p = cr + 1;
+                       if (p + sep_len >= end)
+                               break;
+                       if (strncmp(p, TF_SEP, sep_len) != 0) {
+                               buf = p;
+                               continue;
+                       }
+                       tags = p + sep_len;
+                       cr = memchr(tags, '\n', end - tags);
+                       if (cr)
+                               tag_len = cr - tags;
+                       else
+                               tag_len = end - tags;
+                       eiter->epi.epi.base = epi_start;
+                       eiter->epi.epi.len = p - epi_start;
+                       eiter->epi.tags.base = tags;
+                       eiter->epi.tags.len = tag_len;
+                       eiter->num_epis++;
+                       return true;
+               }
+       }
+       eiter->epi.epi.base = NULL;
+       eiter->epi.epi.len = 0;
+       eiter->epi.tags.base = NULL;
+       eiter->epi.tags.len = 0;
+       return false;
+}
+
+static char *get_basedir(void)
+{
+       char *home, *basedir;
+       if (OPT_GIVEN(TFORTUNE, BASEDIR))
+               return xstrdup(OPT_STRING_VAL(TFORTUNE, BASEDIR));
+       home = get_homedir();
+       xasprintf(&basedir, "%s/.tfortune", home);
+       free(home);
+       return basedir;
+}
+
+static char *get_epidir(void)
+{
+       char *basedir, *epidir;
+       basedir = get_basedir();
+       xasprintf(&epidir, "%s/epigrams", basedir);
+       free(basedir);
+       return epidir;
+}
+
+static char *get_xdir(void)
+{
+       char *basedir = get_basedir(), *xdir;
+       xasprintf(&xdir, "%s/expressions", basedir);
+       free(basedir);
+       return xdir;
+}
+
+static struct epi_iter *epi_iter_new(void)
+{
+       struct epi_iter *eiter = xmalloc(sizeof(*eiter));
+       unsigned num_inputs = lls_num_inputs(sublpr);
+
+       if (num_inputs == 0) {
+               struct regfile_iter *riter;
+               struct iovec iov;
+               char *epidir = get_epidir();
+
+               regfile_iter_new(epidir, &riter);
+               free(epidir);
+               eiter->maps = NULL;
+               eiter->num_maps = 0;
+               for (;
+                       regfile_iter_map(riter, &iov);
+                       regfile_iter_next(riter)
+               ) {
+                       eiter->num_maps++;
+                       eiter->maps = realloc(eiter->maps,
+                               eiter->num_maps * sizeof(*eiter->maps));
+                       eiter->maps[eiter->num_maps - 1] = iov;
+               }
+               regfile_iter_free(riter);
+       } else {
+               unsigned n;
+               eiter->maps = xmalloc(num_inputs * sizeof(*eiter->maps));
+               for (n = 0; n < num_inputs; n++)
+                       mmap_file(lls_input(n, sublpr), eiter->maps + n);
+               eiter->num_maps = num_inputs;
+       }
+       eiter->map_num = 0;
+       eiter->epi.epi.base = NULL;
+       eiter->epi.epi.len = 0;
+       eiter->epi.tags.base = NULL;
+       eiter->epi.tags.len = 0;
+       eiter->num_epis = 0;
+       get_next_epi(eiter);
+       return eiter;
+}
+
+static const struct epigram *epi_iter_get(const struct epi_iter *eiter)
+{
+       return (eiter->epi.epi.base && eiter->epi.tags.base)?
+               &eiter->epi : NULL;
+}
+
+static unsigned epi_iter_num_maps(const struct epi_iter *eiter)
+{
+       return eiter->num_maps;
+}
+
+static unsigned epi_iter_num_epis(const struct epi_iter *eiter)
+{
+       return eiter->num_epis;
+}
+
+static void epi_iter_next(struct epi_iter *eiter)
+{
+       get_next_epi(eiter);
+}
+
+static void epi_iter_free(struct epi_iter *eiter)
+{
+       unsigned n;
+
+       if (!eiter)
+               return;
+       for (n = 0; n < eiter->num_maps; n++)
+               munmap(eiter->maps[n].iov_base, eiter->maps[n].iov_len);
+       free(eiter->maps);
+       free(eiter);
+}
+
+static int com_print(void)
+{
+       int ret;
+       struct epigram *epis = NULL;
+       unsigned epis_sz = 0, nae = 0; /* number of admissible epis */
+       struct iovec tx;
+       struct txp_context *ast;
+       struct epi_iter *eiter;
+       const struct epigram *epi;
+
+       ret = read_tag_expression(&tx);
+       if (ret < 0)
+               return ret;
+       ret = tx2ast(&tx, &ast);
+       if (ret < 0)
+               goto free_tx;
+       for (
+               eiter = epi_iter_new();
+               (epi = epi_iter_get(eiter));
+               epi_iter_next(eiter)
+       ) {
+               if (!epi_admissible(epi, ast))
+                       continue;
+               if (nae >= epis_sz) {
+                       epis_sz = 2 * epis_sz + 1;
+                       epis = xrealloc(epis, epis_sz * sizeof(*epis));
+               }
+               epis[nae++] = *epi;
+       }
+       if (OPT_GIVEN(PRINT, ALL))
+               print_admissible_epigrams(epis, nae);
+       else
+               print_random_epigram(epis, nae);
+       epi_iter_free(eiter);
+       free(epis);
+       txp_free(ast);
+       ret = 1;
+free_tx:
+       free(tx.iov_base);
+       return ret;
+}
+EXPORT_CMD_HANDLER(print);
+
+static char *get_editor(void)
+{
+       char *val = getenv("TFORTUNE_EDITOR");
+
+       if (val && val[0])
+               return xstrdup(val);
+       val = getenv("EDITOR");
+       if (val && val[0])
+               return xstrdup(val);
+       return xstrdup("vi");
+}
+
+static void open_editor(const char *dir)
+{
+       char *editor;
+       char **argv;
+       pid_t pid;
+       unsigned n, num_inputs = lls_num_inputs(sublpr);
+
+       if ((pid = fork()) < 0) {
+               EMERG_LOG("fork error: %s\n", strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       if (pid) { /* parent */
+               wait(NULL);
+               return;
+       }
+       editor = get_editor();
+       argv = xmalloc((num_inputs + 2) * sizeof(*argv));
+       argv[0] = editor;
+       for (n = 0; n < num_inputs; n++)
+               xasprintf(&argv[n + 1], "%s/%s", dir, lls_input(n, sublpr));
+       argv[num_inputs + 1] = NULL;
+       execvp(editor, argv);
+       EMERG_LOG("execvp error: %s\n", strerror(errno));
+       _exit(EXIT_FAILURE);
+}
+
+static int create_dir(const char *path)
+{
+       int ret;
+
+       ret = mkdir(path, 0777); /* rely on umask */
+       if (ret < 0) {
+               if (errno == EEXIST)
+                       return 0;
+               ERROR_LOG("could not create %s\n", path);
+               return -ERRNO_TO_TF_ERROR(errno);
+       }
+       NOTICE_LOG("created directory %s\n", path);
+       return 1;
+}
+
+static int create_basedir(void)
+{
+       char *basedir;
+       int ret;
+
+       basedir = get_basedir();
+       ret = create_dir(basedir);
+       free(basedir);
+       return ret;
+}
+
+static int generic_edit(const char *dir)
+{
+       char *errctx;
+       int ret;
+       bool basedir_given = OPT_GIVEN(TFORTUNE, BASEDIR);
+
+       ret = lls_check_arg_count(sublpr, 1, INT_MAX, &errctx);
+       if (ret < 0) {
+               ret = lopsub_error(ret, &errctx);
+               return ret;
+       }
+       if (!basedir_given) {
+               ret = create_basedir();
+               if (ret < 0)
+                       return ret;
+               ret = create_dir(dir);
+               if (ret < 0)
+                       return ret;
+       }
+       open_editor(dir);
+       ret = 1;
+       return ret;
+}
+
+static int com_ede(void)
+{
+       int ret;
+       char *epidir = get_epidir();
+       ret = generic_edit(epidir);
+       free(epidir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(ede);
+
+static int com_edx(void)
+{
+       int ret;
+       char *xdir = get_xdir();
+       ret = generic_edit(xdir);
+       free(xdir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(edx);
+
+static int item_alpha_compare(const struct linhash_item **i1,
+               const struct linhash_item **i2)
+{
+       return strcmp((*i1)->key, (*i2)->key);
+}
+
+static int item_num_compare(const struct linhash_item **i1,
+               const struct linhash_item **i2)
+{
+       long unsigned v1 = (long unsigned)(*i1)->object;
+       long unsigned v2 = (long unsigned)(*i2)->object;
+
+       return v1 < v2? -1 : (v1 == v2? 0 : 1);
+}
+
+struct dentry {
+       char mode[11];
+       nlink_t nlink;
+       char *user, *group;
+       off_t size;
+       uint64_t mtime;
+       char *name;
+};
+
+static void make_dentry(const char *name, const struct stat *stat,
+               struct dentry *d)
+{
+       mode_t m = stat->st_mode;
+       struct group *g;
+       struct passwd *pwentry;
+
+       sprintf(d->mode, "----------");
+       if (S_ISREG(m))
+               d->mode[0] = '-';
+       else if (S_ISDIR(m))
+               d->mode[0] = 'd';
+       else if (S_ISCHR(m))
+               d->mode[0] = 'c';
+       else if ((S_ISBLK(m)))
+               d->mode[0] = 'b';
+       else if (S_ISLNK(m))
+               d->mode[0] = 'l';
+       else if (S_ISFIFO(m))
+               d->mode[0] = 'p';
+       else if ((S_ISSOCK(m)))
+               d->mode[0] = 's';
+       else
+               d->mode[0] = '?';
+
+       if (m & S_IRUSR)
+               d->mode[1] = 'r';
+       if (m & S_IWUSR)
+               d->mode[2] = 'w';
+       if (m & S_IXUSR) {
+               if (m & S_ISUID)
+                       d->mode[3] = 's';
+               else
+                       d->mode[3] = 'x';
+       } else if (m & S_ISUID)
+               d->mode[3] = 'S';
+
+       if (m & S_IRGRP)
+               d->mode[4] = 'r';
+       if (m & S_IWGRP)
+               d->mode[5] = 'w';
+       if (m & S_IXGRP) {
+               if (m & S_ISGID)
+                       d->mode[6] = 's';
+               else
+                       d->mode[6] = 'x';
+       } else if (m & S_ISGID)
+               d->mode[6] = 'S';
+
+       if (m & S_IROTH)
+               d->mode[7] = 'r';
+       if (m & S_IWOTH)
+               d->mode[8] = 'w';
+       if (m & S_IXOTH) {
+               if (m & S_ISVTX)
+                       d->mode[9] = 't';
+               else
+                       d->mode[9] = 'x';
+       } else if (m & S_ISVTX)
+               d->mode[9] = 'T';
+
+       d->nlink = stat->st_nlink;
+
+       pwentry = getpwuid(stat->st_uid);
+       if (pwentry && pwentry->pw_name)
+               d->user = xstrdup(pwentry->pw_name);
+       else
+               xasprintf(&d->user, "%u", stat->st_uid);
+
+       g = getgrgid(stat->st_gid);
+       if (g && g->gr_name)
+               d->group = xstrdup(g->gr_name);
+       else
+               xasprintf(&d->group, "%u", stat->st_gid);
+       d->size = stat->st_size;
+       d->mtime = stat->st_mtime;
+       d->name = xstrdup(name);
+}
+
+static int num_digits(uint64_t x)
+{
+       unsigned n = 1;
+
+       if (x != 0)
+               while (x > 9) {
+                       x /= 10;
+                       n++;
+               }
+       return n;
+}
+
+enum var_length_dentry_fields {
+       VLDF_NLINKS,
+       VLDF_USER,
+       VLDF_GROUP,
+       VLDF_SIZE,
+       NUM_VLDF
+};
+
+static void update_field_field_widths(int field_widths[NUM_VLDF],
+               const struct dentry *d)
+{
+       int *w, n;
+
+       w = field_widths + VLDF_NLINKS;
+       n = num_digits(d->nlink);
+       *w = MAX(*w, n);
+
+       w = field_widths + VLDF_USER;
+       n = strlen(d->user);
+       *w = MAX(*w, n);
+
+       w = field_widths + VLDF_GROUP;
+       n = strlen(d->group);
+       *w = MAX(*w, n);
+
+       w = field_widths + VLDF_SIZE;
+       n = num_digits(d->size);
+       *w = MAX(*w, n);
+}
+
+static void format_time(uint64_t seconds, uint64_t now, struct iovec *result)
+{
+       struct tm *tm;
+       const uint64_t m = 6 * 30 * 24 * 3600; /* six months */
+       size_t nbytes;
+
+       tm = localtime((time_t *)&seconds);
+       assert(tm);
+
+       if (seconds > now - m && seconds < now + m) {
+               nbytes = strftime(result->iov_base, result->iov_len,
+                       "%b %e %k:%M", tm);
+               assert(nbytes > 0);
+       } else {
+               nbytes = strftime(result->iov_base, result->iov_len,
+                       "%b %e  %Y", tm);
+               assert(nbytes > 0);
+       }
+}
+
+static int list_directory(const char *dir, bool long_listing)
+{
+       struct regfile_iter *riter;
+       const char *basename;
+       int ret, field_widths[NUM_VLDF] = {0};
+       struct dentry *dentries = NULL;
+       unsigned n, num_dentries = 0, dentries_size = 0;
+       struct timespec now;
+
+       for (
+               regfile_iter_new(dir, &riter);
+               (basename = regfile_iter_basename(riter));
+               regfile_iter_next(riter)
+       ) {
+               const struct stat *stat;
+               struct dentry *dentry;
+               if (!long_listing) {
+                       printf("%s\n", basename);
+                       continue;
+               }
+               num_dentries++;
+               if (num_dentries > dentries_size) {
+                       dentries_size = 2 * dentries_size + 1;
+                       dentries = xrealloc(dentries,
+                               dentries_size * sizeof(*dentries));
+               }
+               dentry = dentries + num_dentries - 1;
+               stat = regfile_iter_stat(riter);
+               make_dentry(basename, stat, dentry);
+               update_field_field_widths(field_widths, dentry);
+       }
+       regfile_iter_free(riter);
+       if (!long_listing)
+               return 0;
+       ret = clock_gettime(CLOCK_REALTIME, &now);
+       assert(ret == 0);
+       for (n = 0; n < num_dentries; n++) {
+               struct dentry *d = dentries + n;
+               char buf[30];
+               struct iovec iov = {.iov_base = buf, .iov_len = sizeof(buf)};
+
+               format_time(d->mtime, now.tv_sec, &iov);
+               printf("%s %*lu %*s %*s %*" PRIu64 " %s %s\n",
+                       d->mode,
+                       field_widths[VLDF_NLINKS], (long unsigned)d->nlink,
+                       field_widths[VLDF_USER], d->user,
+                       field_widths[VLDF_GROUP], d->group,
+                       field_widths[VLDF_SIZE], (uint64_t)d->size,
+                       buf,
+                       d->name
+               );
+               free(d->user);
+               free(d->group);
+               free(d->name);
+       }
+       free(dentries);
+       return 1;
+}
+
+static int com_lse(void)
+{
+       int ret;
+       char *dir = get_epidir();
+
+       ret = list_directory(dir, OPT_GIVEN(LSE, LONG));
+       free(dir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(lse);
+
+static int com_lsx(void)
+{
+       int ret;
+       char *dir = get_xdir();
+
+       ret = list_directory(dir, OPT_GIVEN(LSE, LONG));
+       free(dir);
+       return ret;
+}
+EXPORT_CMD_HANDLER(lsx);
+
+static struct linhash_table *hash_tags(unsigned *num_epi_files,
+               unsigned *num_epis)
+{
+       struct linhash_table *tagtab = linhash_new(3);
+       struct epi_iter *eiter;
+       const struct epigram *epi;
+
+       for (
+               eiter = epi_iter_new();
+               (epi = epi_iter_get(eiter));
+               epi_iter_next(eiter)
+       ) {
+               struct tag_iter *titer;
+               const char *tag;
+               for (
+                       titer = tag_iter_new(&epi->tags);
+                       (tag = tag_iter_get(titer));
+                       tag_iter_next(titer)
+               ) {
+                       struct linhash_item item = {
+                               .key = xstrdup(tag),
+                               .object = (void *)1LU
+                       };
+                       void **object;
+                       if (linhash_insert(&item, tagtab, &object) < 0) {
+                               long unsigned val = (long unsigned)*object;
+                               val++;
+                               *object = (void *)val;
+                               free((char *)item.key);
+                       }
+               }
+               tag_iter_free(titer);
+       }
+       if (num_epi_files)
+               *num_epi_files = epi_iter_num_maps(eiter);
+       if (num_epis)
+               *num_epis = epi_iter_num_epis(eiter);
+       epi_iter_free(eiter);
+       return tagtab;
+}
+
+static int com_lst(void)
+{
+       struct linhash_table *tagtab;
+       struct linhash_iterator *liter;
+       struct linhash_item *itemp;
+       linhash_comparator *comp = OPT_GIVEN(LST, SORT_BY_COUNT)?
+               item_num_compare : item_alpha_compare;
+       bool reverse = OPT_GIVEN(LST, REVERSE);
+
+       tagtab = hash_tags(NULL, NULL);
+       for (
+               liter = linhash_iterator_new(tagtab, comp, reverse);
+               (itemp = linhash_iterator_item(liter));
+               linhash_iterator_next(liter)
+       ) {
+               if (OPT_GIVEN(LST, LONG))
+                       printf("%lu\t%s\n", (long unsigned)itemp->object,
+                               itemp->key);
+               else
+                       printf("%s\n", itemp->key);
+               free((char *)itemp->key);
+       }
+       linhash_iterator_free(liter);
+       linhash_free(tagtab);
+       return 0;
+}
+EXPORT_CMD_HANDLER(lst);
+
+static int com_stats(void)
+{
+       struct linhash_table *tagtab;
+       struct linhash_iterator *liter;
+       struct linhash_item *itemp;
+       unsigned num_epi_files, num_epis, num_unique_tags, num_x = 0;
+       long unsigned num_tags = 0;
+       char *xdir, *lh_stats;
+       struct regfile_iter *riter;
+       bool verbose = OPT_GIVEN(STATS, VERBOSE);
+
+       tagtab = hash_tags(&num_epi_files, &num_epis);
+       for (
+               liter = linhash_iterator_new(tagtab, NULL, false);
+               (itemp = linhash_iterator_item(liter));
+               linhash_iterator_next(liter)
+       )
+               num_tags += (long unsigned)itemp->object;
+       num_unique_tags = linhash_num_items(tagtab);
+       linhash_iterator_free(liter);
+       if (verbose)
+               lh_stats = linhash_statistics(tagtab);
+       linhash_free(tagtab);
+
+       xdir = get_xdir();
+       for (
+               regfile_iter_new(xdir, &riter);
+               regfile_iter_basename(riter);
+               regfile_iter_next(riter)
+       )
+               num_x++;
+       regfile_iter_free(riter);
+       free(xdir);
+       printf("number of tag expressions.......... %5u\n", num_x);
+       printf("number of epigram files............ %5u\n", num_epi_files);
+       printf("number of epigrams................. %5u\n", num_epis);
+       printf("number of tags..................... %5lu\n", num_tags);
+       printf("number of unique tags.............. %5u\n", num_unique_tags);
+       printf("average number of epigrams per file %8.02f\n",
+               (float)num_epis / num_epi_files);
+       printf("average number of tags per epigram. %8.02f\n",
+               (float)num_tags / num_epis);
+       printf("average number of tag recurrence... %8.02f\n",
+               (float)num_tags / num_unique_tags);
+       if (verbose) {
+               printf("\nlinear hashing statistics:\n%s\n", lh_stats);
+               free(lh_stats);
+       }
+       return 1;
+}
+EXPORT_CMD_HANDLER(stats);
+
+#define LSG_TFORTUNE_CMD(_name) #_name
+static const char * const subcommand_names[] = {LSG_TFORTUNE_SUBCOMMANDS NULL};
+#undef LSG_TFORTUNE_CMD
+
+static void show_subcommand_summary(bool verbose)
+{
+       int i;
+
+       printf("Available subcommands:\n");
+       if (verbose) {
+               const struct lls_command *cmd;
+               for (i = 1; (cmd = lls_cmd(i, tfortune_suite)); i++) {
+                       const char *purpose = lls_purpose(cmd);
+                       const char *name = lls_command_name(cmd);
+                       printf("%-11s%s\n", name, purpose);
+               }
+       } else {
+               unsigned n = 8;
+               printf("\t");
+               for (i = 0; i < LSG_NUM_TFORTUNE_SUBCOMMANDS; i++) {
+                       if (i > 0)
+                               n += printf(", ");
+                       n += printf("%s", subcommand_names[i]);
+                       if (n > 70) {
+                               printf("\n\t");
+                               n = 8;
+                       }
+               }
+               printf("\n");
+       }
+}
+
+static int com_help(void)
+{
+       int ret;
+       char *errctx, *help;
+       const char *arg;
+       const struct lls_command *cmd;
+
+       ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
+       if (ret < 0)
+               return lopsub_error(ret, &errctx);
+       if (lls_num_inputs(sublpr) == 0) {
+               show_subcommand_summary(OPT_GIVEN(HELP, LONG));
+               return 0;
+       }
+       arg = lls_input(0, sublpr);
+       ret = lls_lookup_subcmd(arg, tfortune_suite, &errctx);
+       if (ret < 0)
+               return lopsub_error(ret, &errctx);
+       cmd = lls_cmd(ret, tfortune_suite);
+       if (OPT_GIVEN(HELP, LONG))
+               help = lls_long_help(cmd);
+       else
+               help = lls_short_help(cmd);
+       printf("%s\n", help);
+       free(help);
+       return 1;
+}
+EXPORT_CMD_HANDLER(help);
+
+static void handle_help_and_version(void)
+{
+       int i;
+       char *help;
+       const struct lls_command *cmd;
+
+       if (OPT_GIVEN(TFORTUNE, VERSION)) {
+               printf("tfortune %s\n"
+                       "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
+                       "License " LICENSE ": <" LICENSE_URL ">.\n"
+                       "This is free software: you are free to change and redistribute it.\n"
+                       "There is NO WARRANTY, to the extent permitted by law.\n"
+                       "Report bugs to " AUTHOR " <" PACKAGE_BUGREPORT ">.\n"
+                       ,
+                       tf_version()
+               );
+               exit(EXIT_SUCCESS);
+       }
+       cmd = CMD_PTR(TFORTUNE);
+       if (OPT_GIVEN(TFORTUNE, DETAILED_HELP))
+               help = lls_long_help(cmd);
+       else if (OPT_GIVEN(TFORTUNE, HELP))
+               help = lls_short_help(cmd);
+       else
+               return;
+       printf("%s\n", help);
+       free(help);
+       if (OPT_GIVEN(TFORTUNE, DETAILED_HELP))
+               for (i = 1; (cmd = lls_cmd(i, tfortune_suite)); i++) {
+                       help = lls_short_help(cmd);
+                       printf("%s\n---\n", help);
+                       free(help);
+               }
+       else
+               show_subcommand_summary(true /* verbose */);
+       exit(EXIT_SUCCESS);
+}
+
+enum tf_word_type {
+       WT_COMMAND_NAME,
+       WT_DOUBLE_DASH, /* -- */
+       WT_SHORT_OPT_WITH_ARG, /* -l */
+       WT_SHORT_OPT_WITHOUT_ARG, /* -V, -abc=d */
+       WT_LONG_OPT_WITH_ARG, /* --loglevel */
+       WT_LONG_OPT_WITHOUT_ARG, /* --foo=bar --help */
+       WT_OPTION_ARG,
+       WT_NON_OPTION_ARG,
+       WT_DUNNO,
+};
+
+static bool is_short_opt(const char *word)
+{
+       if (word[0] != '-')
+               return false;
+       if (word[1] == '-')
+               return false;
+       if (word[1] == '\0')
+               return false;
+       return true;
+}
+
+static bool is_long_opt(const char *word)
+{
+       if (word[0] != '-')
+               return false;
+       if (word[1] != '-')
+               return false;
+       if (word[2] == '\0')
+               return false;
+       return true;
+}
+
+/* whether the next word will be an arg to this short opt */
+static int short_opt_needs_arg(const char *word,
+               const char * const *short_opts)
+{
+       size_t n, len;
+
+       if (strchr(word, '='))
+               return false;
+       len = strlen(word);
+       for (n = 0; short_opts[n]; n++) {
+               const char *opt = short_opts[n];
+               if (word[len - 1] != opt[1])
+                       continue;
+               if (opt[2] == '=')
+                       return true;
+               else
+                       return false;
+       }
+       return -1;
+}
+
+/* whether the next word will be an arg to this long opt */
+static int long_opt_needs_arg(const char *word,
+               const char * const *long_opts)
+{
+       size_t n;
+
+       if (strchr(word, '='))
+               return false;
+       for (n = 0; long_opts[n]; n++) {
+               const char *opt = long_opts[n];
+               size_t len = strlen(opt);
+
+               if (opt[len - 1] == '=')
+                       len--;
+               if (strncmp(word + 2, opt + 2, len - 2))
+                       continue;
+               if (opt[len] == '=')
+                       return true;
+               else
+                       return false;
+       }
+       return -1;
+}
+
+static bool get_word_types(unsigned cword, unsigned arg0,
+               const char * const *short_opts, const char * const *long_opts,
+               enum tf_word_type *result)
+{
+       const char *word;
+       unsigned n;
+       bool have_dd = false;
+       int ret;
+
+       /* index zero is always the command name */
+       assert(cword > arg0);
+       result[arg0] = WT_COMMAND_NAME;
+       for (n = arg0 + 1; n < cword; n++) {
+               enum tf_word_type prev_type = result[n - 1];
+
+               if (have_dd) {
+                       result[n] = WT_NON_OPTION_ARG;
+                       continue;
+               }
+               if (prev_type == WT_SHORT_OPT_WITH_ARG) {
+                       result[n] = WT_OPTION_ARG;
+                       continue;
+               }
+               if (prev_type == WT_LONG_OPT_WITH_ARG) {
+                       result[n] = WT_OPTION_ARG;
+                       continue;
+               }
+               word = lls_input(n, sublpr);
+               if (strcmp(word, "--") == 0) {
+                       result[n] = WT_DOUBLE_DASH;
+                       have_dd = true;
+                       continue;
+               }
+               if (is_short_opt(word)) {
+                       ret = short_opt_needs_arg(word, short_opts);
+                       if (ret < 0)
+                               goto dunno;
+                       if (ret > 0)
+                               result[n] = WT_SHORT_OPT_WITH_ARG;
+                       else
+                               result[n] = WT_SHORT_OPT_WITHOUT_ARG;
+                       continue;
+               }
+               if (is_long_opt(word)) {
+                       ret = long_opt_needs_arg(word, long_opts);
+                       if (ret < 0)
+                               goto dunno;
+                       if (ret > 0)
+                               result[n] = WT_LONG_OPT_WITH_ARG;
+                       else
+                               result[n] = WT_LONG_OPT_WITHOUT_ARG;
+                       continue;
+               }
+               result[n] = WT_NON_OPTION_ARG;
+       }
+       return have_dd;
+dunno:
+       for (; n <= cword; n++)
+               result[n] = WT_DUNNO;
+       return false;
+}
+
+#define DUMMY_COMPLETER(_name) static char **complete_ ## _name( \
+       __attribute__ ((unused)) uint32_t cword, \
+       __attribute__ ((unused)) unsigned arg0, \
+       __attribute__ ((unused)) bool have_dd \
+       ) {return NULL;}
+
+DUMMY_COMPLETER(tfortune)
+DUMMY_COMPLETER(compgen)
+DUMMY_COMPLETER(completer)
+
+static const char * const supercmd_opts[] = {LSG_TFORTUNE_TFORTUNE_OPTS, NULL};
+
+static void print_zero_terminated_list(const char * const *list)
+{
+       const char * const *c;
+       for (c = list; *c; c++)
+               printf("%s%c", *c, '\0');
+}
+
+static void print_option_list(const char * const *opts)
+{
+       const char * const *c;
+
+       for (c = opts; *c; c++) {
+               int len = strlen(*c);
+               assert(len > 0);
+               if ((*c)[len - 1] == '=')
+                       len--;
+               printf("%.*s%c", len, *c, '\0');
+       }
+}
+
+static void activate_dirname_completion(void)
+{
+       printf("%c", '\0');
+       printf("-o dirnames%c", '\0');
+}
+
+static void complete_loglevels(void)
+{
+       unsigned n;
+       const struct lls_option *opt = lls_opt(LSG_TFORTUNE_TFORTUNE_OPT_LOGLEVEL,
+               CMD_PTR(TFORTUNE));
+
+       for (n = 0; n < LSG_NUM_TFORTUNE_TFORTUNE_LOGLEVEL_VALUES; n++) {
+               const char *v = lls_enum_string_val(n, opt);
+               printf("%s%c", v, '\0');
+       }
+}
+
+static char **complete_dentries(const char *dir)
+{
+       const char *bn;
+       struct regfile_iter *riter;
+       unsigned n;
+       char **result = NULL;
+
+       regfile_iter_new(dir, &riter);
+       for (
+               n = 0;
+               (bn = regfile_iter_basename(riter));
+               regfile_iter_next(riter), n++
+       ) {
+               result = xrealloc(result, (n + 2) * sizeof(*result));
+               result[n] = xstrdup(bn);
+               result[n + 1] = NULL;
+       }
+       return result;
+}
+
+static char **complete_ede(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0,
+               __attribute__ ((unused)) bool have_dd)
+{
+       char **result, *epidir = get_epidir();
+
+       result = complete_dentries(epidir);
+       free(epidir);
+       return result;
+}
+
+static char **complete_edx(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0,
+               __attribute__ ((unused)) bool have_dd)
+{
+       char **result, *xdir = get_xdir();
+
+       result = complete_dentries(xdir);
+       free(xdir);
+       return result;
+}
+
+static char **complete_std_opts(bool have_dd, const char * const *opts)
+{
+       if (have_dd)
+               print_option_list(opts);
+       else
+               print_option_list(supercmd_opts);
+       return NULL;
+}
+
+static char **complete_stats(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_STATS_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_lse(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_LSE_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_lst(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_LST_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_lsx(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_LSX_OPTS, NULL};
+       return complete_std_opts(have_dd, opts);
+}
+
+static char **complete_help(__attribute__ ((unused)) uint32_t cword,
+               __attribute__ ((unused)) unsigned arg0, bool have_dd)
+{
+       const char * const opts[] = {LSG_TFORTUNE_HELP_OPTS, NULL};
+
+       if (!have_dd)
+               print_option_list(supercmd_opts);
+       else
+               print_option_list(opts);
+       print_zero_terminated_list(subcommand_names);
+       return NULL;
+}
+
+static char **complete_print(uint32_t cword, unsigned arg0, bool have_dd)
+{
+       const char * const short_opts[] = {LSG_TFORTUNE_PRINT_SHORT_OPTS, NULL};
+       const char * const long_opts[] = {LSG_TFORTUNE_PRINT_LONG_OPTS, NULL};
+       const char * const opts[] = {LSG_TFORTUNE_PRINT_OPTS, NULL};
+       enum tf_word_type *word_types, prev_type;
+       const char *prev;
+       char **result, *xdir;
+
+       word_types = xmalloc(cword * sizeof(*word_types));
+       get_word_types(cword, arg0, short_opts, long_opts, word_types);
+       prev = lls_input(cword - 1, sublpr);
+       prev_type = word_types[cword - 1];
+       free(word_types);
+       switch (prev_type) {
+               case WT_COMMAND_NAME:
+               case WT_SHORT_OPT_WITHOUT_ARG:
+               case WT_OPTION_ARG:
+               case WT_LONG_OPT_WITHOUT_ARG:
+               case WT_DOUBLE_DASH:
+                       if (!have_dd)
+                               print_option_list(supercmd_opts);
+                       else
+                               print_option_list(opts);
+                       return NULL;
+               case WT_SHORT_OPT_WITH_ARG:
+                       if (strcmp(prev, "-x") == 0)
+                               goto complete_expression;
+                       break;
+               case WT_LONG_OPT_WITH_ARG:
+                       if (strcmp(prev, "--expression") == 0)
+                               goto complete_expression;
+                       break;
+               default:
+                       return NULL;
+       }
+complete_expression:
+       xdir = get_xdir();
+       result = complete_dentries(xdir);
+       free(xdir);
+       return result;
+}
+
+typedef char **(*completer)(uint32_t cword, unsigned arg0, bool have_dd);
+
+#define LSG_TFORTUNE_CMD(_name) complete_ ## _name
+static const completer completers[] = {LSG_TFORTUNE_COMMANDS};
+#undef LSG_TFORTUNE_CMD
+
+static int call_subcmd_completer(unsigned cmd_num, int arg0, uint32_t cword,
+               bool have_dd)
+{
+       char **c, **candidates = completers[cmd_num](cword, arg0, have_dd);
+
+       if (!candidates)
+               return 0;
+       for (c = candidates; *c; c++) {
+               printf("%s%c", *c, '\0');
+               free(*c);
+       }
+       free(candidates);
+       return 1;
+}
+
+static bool need_subcommand_completer(uint32_t cword, unsigned subcmd_idx,
+               const enum tf_word_type *word_types, bool have_dd)
+{
+       enum tf_word_type prev_type;
+       const char *word;
+
+       if (subcmd_idx == 0)
+               return false;
+       if (have_dd)
+               return true;
+       prev_type = word_types[cword - 1];
+       assert(prev_type != WT_COMMAND_NAME);
+       switch (prev_type) {
+               case WT_SHORT_OPT_WITH_ARG:
+               case WT_LONG_OPT_WITH_ARG:
+               case WT_DUNNO:
+                       return false;
+               default:
+                       break;
+       }
+       word = lls_input(cword, sublpr);
+       if (is_short_opt(word))
+               return false;
+       if (is_long_opt(word))
+               return false;
+       return true;
+}
+
+static int com_compgen(void)
+{
+       unsigned n;
+       uint32_t cword = OPT_UINT32_VAL(COMPGEN, CURRENT_WORD_INDEX);
+       int ret;
+       unsigned subcmd_idx;
+       const char *word, *prev;
+       const char * const short_opts[] = {LSG_TFORTUNE_TFORTUNE_SHORT_OPTS, NULL};
+       const char * const long_opts[] = {LSG_TFORTUNE_TFORTUNE_LONG_OPTS, NULL};
+       enum tf_word_type *word_types, prev_type;
+       bool have_dd;
+
+       if (cword == 0 || cword > lls_num_inputs(sublpr)) {
+               ERROR_LOG("current word index == %u!?\n", cword);
+               return -ERRNO_TO_TF_ERROR(EINVAL);
+       }
+       word_types = xmalloc(cword * sizeof(*word_types));
+       have_dd = get_word_types(cword, 0, short_opts, long_opts, word_types);
+       /*
+        * Locate the subcommand argument, if present. It is always the first
+        * non-option argument.
+        */
+       subcmd_idx = 0;
+       for (n = 1; n < cword; n++) {
+               if (word_types[n] != WT_NON_OPTION_ARG)
+                       continue;
+               subcmd_idx = n;
+               break;
+       }
+       if (need_subcommand_completer(cword, subcmd_idx, word_types, have_dd)) {
+               free(word_types);
+               word = lls_input(subcmd_idx, sublpr);
+               ret = lls_lookup_subcmd(word, tfortune_suite, NULL);
+               if (ret < 0) /* invalid subcommand */
+                       return 0;
+               return call_subcmd_completer(ret, subcmd_idx, cword, have_dd);
+       }
+       /* no subcommand */
+       prev_type = word_types[cword - 1];
+       prev = lls_input(cword - 1, sublpr);
+       free(word_types);
+       switch (prev_type) {
+               case WT_DUNNO:
+                       return 0;
+               case WT_COMMAND_NAME:
+               case WT_SHORT_OPT_WITHOUT_ARG:
+               case WT_OPTION_ARG:
+               case WT_NON_OPTION_ARG:
+               case WT_LONG_OPT_WITHOUT_ARG:
+                       if (!have_dd)
+                               print_option_list(supercmd_opts);
+                       /* fall through */
+               case WT_DOUBLE_DASH:
+                       print_zero_terminated_list(subcommand_names);
+                       break;
+               case WT_SHORT_OPT_WITH_ARG:
+                       if (strcmp(prev, "-b") == 0) {
+                               activate_dirname_completion();
+                               return 1;
+                       }
+                       if (strcmp(prev, "-l") == 0) {
+                               complete_loglevels();
+                               return 1;
+                       }
+                       break;
+               case WT_LONG_OPT_WITH_ARG:
+                       if (strcmp(prev, "--basename") == 0) {
+                               activate_dirname_completion();
+                               return 1;
+                       }
+                       if (strcmp(prev, "--loglevel") == 0) {
+                               complete_loglevels();
+                               return 1;
+                       }
+                       break;
+       }
+       return 0;
+}
+EXPORT_CMD_HANDLER(compgen);
+
+static int com_completer(void)
+{
+       printf("%s\n",
+               "_tfortune() \n"
+               "{ \n"
+                       "local -i i offset=${TF_OFFSET:-0} \n"
+                       "local w compopts= have_empty=false\n"
+                       "local cur=\"${COMP_WORDS[$COMP_CWORD]}\" \n"
+
+                       "i=0 \n"
+                       "COMPREPLY=() \n"
+                       "while read -d '' w; do \n"
+                               "[[ -z \"$w\" ]] && { have_empty=true; continue; }\n"
+                               "if [[ $have_empty == true ]]; then\n"
+                                       "compopt $w\n"
+                               "else \n"
+                                       "[[ \"$w\" != \"$cur\"* ]] && continue \n"
+                                       "COMPREPLY[i]=\"$w\" \n"
+                                       "let i++ \n"
+                               "fi \n"
+                       "done < <(tfortune -- compgen --current-word-index \\\n"
+                       "\"$((COMP_CWORD + offset))\" -- $TF_EXTRA \"${COMP_WORDS[@]}\")\n"
+               "} \n"
+               "complete -F _tfortune tfortune \n"
+       );
+       if (OPT_GIVEN(COMPLETER, ALIAS)) {
+               const char *ali = OPT_STRING_VAL(PRINT, EXPRESSION);
+               printf("alias %s=\"tfortune --\"\n", ali);
+               printf("_%s() { \n"
+                               "COMP_WORDS[0]='--'\n"
+                               "TF_EXTRA='tf' \n"
+                               "TF_OFFSET=1 \n"
+                               "_tfortune \"$@\" \n"
+                               "unset TF_EXTRA TF_OFFSET\n"
+                       "}\n",
+                       ali
+               );
+               printf("complete -F _%s %s \n", ali, ali);
+       }
+       return 1;
+}
+EXPORT_CMD_HANDLER(completer);
+
+int main(int argc, char **argv)
+{
+       char *errctx;
+       int ret;
+       const struct lls_command *cmd = CMD_PTR(TFORTUNE), *subcmd;
+       const struct tf_user_data *ud;
+       unsigned num_inputs;
+
+       ret = lls_parse(argc, argv, cmd, &lpr, &errctx);
+       if (ret < 0) {
+               lopsub_error(ret, &errctx);
+               exit(EXIT_FAILURE);
+       }
+       loglevel_arg_val = OPT_UINT32_VAL(TFORTUNE, LOGLEVEL);
+       handle_help_and_version();
+       num_inputs = lls_num_inputs(lpr);
+       if (num_inputs == 0) {
+               show_subcommand_summary(true /* verbose */);
+               ret = 0;
+               goto free_lpr;
+       }
+       ret = lls_lookup_subcmd(argv[argc - num_inputs], tfortune_suite, &errctx);
+       if (ret < 0) {
+               ret = lopsub_error(ret, &errctx);
+               goto free_lpr;
+       }
+       subcmd = lls_cmd(ret, tfortune_suite);
+       ret = lls_parse(num_inputs, argv + argc - num_inputs, subcmd,
+               &sublpr, &errctx);
+       if (ret < 0) {
+               ret = lopsub_error(ret, &errctx);
+               goto free_lpr;
+       }
+       ud = lls_user_data(subcmd);
+       ret = ud->handler();
+       lls_free_parse_result(sublpr, subcmd);
+       if (ret < 0)
+               ERROR_LOG("%s\n", tf_strerror(-ret));
+free_lpr:
+       lls_free_parse_result(lpr, cmd);
+       exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/tfortune.suite.m4 b/tfortune.suite.m4
new file mode 100644 (file)
index 0000000..f3e288d
--- /dev/null
@@ -0,0 +1,418 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+[suite tfortune]
+caption = Subcommands
+[supercommand tfortune]
+       purpose = fortune cookies with tags
+       [description]
+               DESCRIPTION1()
+
+               DESCRIPTION2()
+
+               DESCRIPTION3()
+       [/description]
+       synopsis = [global-options...] [--] [<subcommand> [subcommand-options...]]
+       [option help]
+               summary = print help and exit
+               short_opt = h
+       [option detailed-help]
+               summary = print help, including all details, and exit
+       [option version]
+               summary = print version and exit
+               short_opt = V
+       [option loglevel]
+               summary = control amount of logging
+               short_opt = l
+               arg_info = required_arg
+               arg_type = string
+               typestr = severity
+               values = {
+                       LSGLL_DEBUG = "debug",
+                       LSGLL_INFO = "info",
+                       LSGLL_NOTICE = "notice",
+                       LSGLL_WARNING = "warning",
+                       LSGLL_ERROR = "error",
+                       LSGLL_CRIT = "crit",
+                       LSGLL_EMERG = "emerg"
+               }
+               default_val = warning
+               [help]
+                       Log only messages with severity greater or equal than the given
+                       value. Possible values:
+
+                       debug: Produces really noisy output.
+                       info: Still noisy, but won't fill up the disk quickly.
+                       notice: Indicates normal, but significant event.
+                       warning: Unexpected events that can be handled.
+                       error: Unhandled error condition.
+                       crit: System might be unreliable.
+                       emerg: Last message before exit.
+               [/help]
+       [option basedir]
+               summary = where to look for input files
+               short_opt = b
+               arg_info = required_arg
+               arg_type = string
+               typestr = directory
+               default_val = "~/.tfortune"
+               [help]
+                       This is used to locate epigrams and named tag expressions.
+                       If any filename argument does not start with a slash, the
+                       filename is interpreted as relative to this base directory.
+
+                       Epigrams are expected in the "epigrams" subdirectory of the base
+                       directory while tag expressions are expected to be stored below
+                       "expressions".
+               [/help]
+
+[subcommand compgen]
+       purpose = perform bash command line completion
+       non-opts-name = arg...
+       [description]
+               This subcommand is executed from the bash completer for tfortune. The
+               completer sets the non-option arguments for the subcommand to the
+               words of the current command line. It obtains these words from the
+               elements of the special COMP_WORDS array which is maintained by bash.
+
+               The compgen subcommand writes all possible completions to stdout. The
+               completer reads in the completions and builds the COMPREPLY array
+               containing the matching entries. Bash examines the elements of this
+               array and completes the command line if there is a single matching
+               completion, or prints out the list of completions in case of ambiguity.
+
+       [/description]
+       [option current-word-index]
+               short_opt = c
+               summary = index of the current word in the command line
+               typestr = num
+               arg_info = required_arg
+               arg_type = uint32
+               [help]
+                       An index into the argument vector of the word containing the current
+                       cursor position. See the description of the $CWORD special variable
+                       in the bash manual.
+               [/help]
+
+[subcommand completer]
+       purpose = print the bash completer to stdout
+       [description]
+               The output of this command is designed to be re-used as input for bash.
+               Specifically, bash completion for tfortune can be activated by adding
+               the following to .bashrc: eval "$(tfortune completer)".
+       [/description]
+       [option alias]
+               summary = also add an alias and a completer for it
+               short_opt = a
+               arg_info = required_arg
+               arg_type = string
+               typestr = name
+               [help]
+                       Specify this to define a bash alias for tfortune along with the
+                       completer. Unlike the regular tfortune program, the alias will
+                       contain the double dash argument which separates the subcommand and
+                       its options from the options to tfortune itself.
+               [/help]
+
+[subcommand ede]
+       purpose = edit epigrams
+       non-opts-name = basename...
+       [description]
+               Opens the named epigram file an interactive editor. The executable
+               of the editor is determined as follows: First the contents of the
+               environment variable TFORTUNE_EDITOR is examined. If this variable
+               is empty or unset, EDITOR is tried. If EDITOR is also unset, vi
+               is executed.
+
+               The given basename is interpreted as described in the help text of the
+               --basedir option above. If --basedir is not given and the "epigrams"
+               directory does not exist, it is created.
+       [/description]
+
+[subcommand edx]
+       purpose = edit tag expressions
+       non-opts-name = basename...
+       [description]
+               Opens the named tag expression file an interactive editor. The editor
+               to execute is determined in the same way as for the "ede" subcommand.
+               Also, a non-existing "expressions" subdirectory is handled in the same
+               way.
+       [/description]
+[subcommand help]
+       purpose = list available subcommands or print command-specific help
+       non-opts-name = [command]
+       [description]
+               Without any arguments, help prints the list of available commands. When
+               called with a command name argument, it prints the help text of the
+               given command.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = show the long help text
+       [help]
+               If the optional argument is supplied, the long help text contains the
+               synopsis, the purpose and the description of the specified command,
+               followed by the option list including summary and help text of each
+               option. Without --long, the short help is shown instead. This omits
+               the description of the command and the option help.
+
+               If no command is supplied but --long is given, the list contains the
+               purpose of each command.
+       [/help]
+
+[subcommand lse]
+       purpose = list epigram files
+       [description]
+               Print the list of all epigram files.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = long listing
+               [help]
+                       This is similar to the long output of the standard ls(1) command.
+               [/help]
+
+[subcommand lst]
+       purpose = list tags
+       [description]
+               This lists all tags contained in any of the given input files.
+       [/description]
+       non-opts-name = <file>...
+       [option long]
+               short_opt = l
+               summary = long listing
+               [help]
+                       Also show how many times this tag appears.
+               [/help]
+       [option sort-by-count]
+               short_opt = c
+               summary = sort by occurrence count rather than alphabetically.
+       [option reverse]
+               short_opt = r
+               summary = reverse sort order
+
+[subcommand lsx]
+       purpose = list tag expressions
+       [description]
+               Print the list of all named tag expressions.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = long listing
+               [help]
+                       This is similar to the long output of the standard ls(1) command.
+               [/help]
+[subcommand print]
+       purpose = print epigram(s)
+       [description]
+               Unless --all is given, this picks an epigram by random from the
+               given file(s) which is admissible with respect to the given named
+               tag expression. If no file is given, all files are taken into account.
+       [/description]
+       non-opts-name = [file...]
+       [option expression]
+               short_opt = x
+               summary = name of the tag expression
+               arg_info = required_arg
+               arg_type = string
+               typestr = filename
+               default_val = /dev/null
+               [help]
+                       Use the tag expression stored in the given file to define the
+                       admissible epigrams. The special string "-" means to read the tag
+                       expression from stdin. The default value corresponds to the empty
+                       tag expression for which all epigrams are admissible.
+               [/help]
+       [option all]
+               short_opt = a
+               summary = print all admissible epigrams, not just a random one.
+       [option tags]
+               short_opt = t
+               summary = print also the tags of the selected epigram
+[subcommand stats]
+       purpose = show statistics
+       [description]
+               This prints several counts and averages about the epigrams, tags and
+               tag expressions.
+       [/description]
+       [option verbose]
+               short_opt = v
+               summary = include statistics about hash table utilization
+
+[section Input file format]
+       Input files may contain arbitrary many epigrams. The end of each
+       epigram must be marked with a "tag" line. The tag line consists of
+       four dashes, a space character, and a comma separated list of tags.
+       Tags may span multiple words, but no comma is allowed. The following
+       is an example input file for tfortune. It contains a single epigram
+       with two tags.
+
+       .RS
+       .EX
+       Anyone who attempts to generate random numbers by deterministic means
+       is, of course, living in a state of sin.          -- John von Neumann
+       ---- math,religion
+       .EE
+       .RE
+[/section]
+
+[section Tag Expressions]
+       Tag expressions are based on a context-free grammar in which the
+       following keywords are defined:
+       .IP \(bu 4
+       .B tag("foo")
+       evaluates to
+       .I true
+       if the epigram contains a tag named
+       .IR foo ,
+       .I false
+       otherwise.
+       .IP \(bu 4
+       .B len
+       evaluates to the number of bytes of the epigram.
+       .IP \(bu 4
+       .B text
+       evaluates to contents of the epigram. This is useful for pattern
+       matching.
+       .P
+
+       The grammar admits the following operators and relations:
+       .IP \(bu 4
+       .BR && ,
+       .BR || ,
+       .BR !:
+       logical operators for
+       .IR and ,
+       .IR or ,
+       and
+       .IR not .
+       .IP \(bu 4
+       .BR + ,
+       .BR - ,
+       .BR * ,
+       .BR + :
+       arithmetic operators for addition, negation or subtraction (unary
+       or binary minus), multiplication and division. Arithmetic is always
+       performed on 64 bit signed integers.
+       .IP \(bu 4
+       .BR < ,
+       .BR > ,
+       .BR <=,
+       .BR >=,
+       .BR ==,
+       .BR != :
+       .BR =~ :
+       less than, greater than, less or equal than, greater or equal than,
+       equal to, not equal to, regular expression match.
+       Regular expression patterns are of the form
+       .IR /pattern/[flags] .
+       That is, the pattern is delimited by slashes, and is followed by
+       zero or more characters, each specifying a flag according to the
+       following list
+       .RS
+       .IP \(bu 4
+       .BR i :
+       Ignore case in match
+       .RI ( REG_ICASE )
+       .IP \(bu 4
+       .BR n :
+       Treat newline as an ordinary character
+       .RI ( REG_NEWLINE )
+
+       .RE
+       .RS
+       Note that only extended regular expression patterns are supported. See
+       regex(3) for details.
+       .RE
+
+       The above operators obey the usual associativity and precedence
+       rules. Parentheses can be used to change precedence.
+
+       A
+       .I tag expression
+       is an expression in this grammar which evaluates to either
+       .I true
+       or
+       .IR false .
+       Epigrams for which the expression is
+       .I true
+       are called
+       .IR admissible .
+
+       For example, the above epigram is admissible for the tag expression
+
+       .RS
+       .EX
+               (tag("math") || tag("physics")) && len < 1000 && text =~ /neumann/i
+       .EE
+       .RE
+
+       It is not admissible for
+
+       .RS
+       .EX
+               (tag("math") && tag("physics")) || len > 1000 || text =~ /neumann/
+       .EE
+       .RE
+[/section]
+
+[section Examples]
+       .IP \(bu 2
+       Print a random epigram:
+
+       .RS 6
+       .EX
+               tfortune print
+       .EE
+       .RE
+       .IP \(bu 2
+       Print a random short (less than 100 bytes) epigram:
+
+       .RS 6
+       .EX
+               echo 'len < 100' | tfortune -l debug -- print -x -
+       .EE
+       .RE
+       .IP \(bu 2
+       List tags, including usage counts, sort by count in descending order:
+
+       .RS 6
+       .EX
+               tfortune -- lst -lcr
+       .EE
+       .RE
+       .IP \(bu 2
+       Activate bash completion and define the
+       .I tf
+       alias:
+
+       .RS 6
+       .EX
+       eval "$(tfortune completer -a tf)"
+       .EE
+       .RE
+
+[/section]
+
+[section copyright]
+       Written by AUTHOR()
+       .br
+       Copyright (C) COPYRIGHT_YEAR() AUTHOR()
+       .br
+       License: LICENSE()
+       .br
+       This is free software: you are free to change and redistribute it.
+       .br
+       There is NO WARRANTY, to the extent permitted by law.
+       .br
+       Report bugs to
+       .MT <PACKAGE_BUGREPORT()>
+       AUTHOR()
+       .ME
+       .br
+       Homepage:
+       .UR PACKAGE_URL()
+       .UE
+[/section]
+[section see also]
+       .BR fortune (6)
+[/section]
diff --git a/txp.lex b/txp.lex
new file mode 100644 (file)
index 0000000..b51002f
--- /dev/null
+++ b/txp.lex
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+ /*
+  * Since we do not supply yywrap(), we use noyywrap to instruct the scanner to
+  * behave as though yywrap() returned 1.
+  */
+%option noyywrap
+
+ /*
+  * We don't want symbols to clash with those of other flex users, particularly
+  * lopsub.
+  */
+%option prefix="txp_yy"
+
+ /*
+  * Generate a scanner that maintains the number of the current line read from
+  * its input in the yylineno variable.
+  */
+%option yylineno
+
+ /* Generate a bison-compatible scanner. */
+%option bison-bridge bison-locations
+
+ /*
+  * Warn (in particular) if the default rule can be matched but no default rule
+  * has been given.
+  */
+%option warn
+
+ /*
+  * Generate a scanner which is portable and safe to use in one or more threads
+  * of control.
+  */
+%option reentrant
+
+ /*
+  * Generate a scanner which always looks one extra character ahead. This is a
+  * bit faster than an interactive scanner for which look ahead happens only
+  * when necessary.
+  */
+%option never-interactive
+
+%{
+#include "tf.h"
+
+#define YYSTYPE TXP_YYSTYPE
+#define YYLTYPE TXP_YYLTYPE
+#define YY_DECL int txp_yylex(TXP_YYSTYPE *yylval_param, TXP_YYLTYPE *yylloc_param, \
+       struct txp_context *ctx, struct txp_ast_node **ast, txp_yyscan_t yyscanner)
+#include "txp.bison.h"
+#define TXP_YY_USER_ACTION do {txp_yylloc->first_line = txp_yylineno;} while (0);
+%}
+DECIMAL_CONSTANT  (0|([[:digit:]]{-}[0])[[:digit:]]*)
+STRING_LITERAL    \"([^\"\\\n]|(\\[\"\\abfnrtv]))*\"
+REGEX_PATTERN     \/([^\/\\\n]|(\\[\/\\abfnrtv]))*\/([in])*
+%%
+
+tag {return TAG;}
+len {return LEN;}
+text {return TEXT;}
+true {return TRUE;}
+false {return FALSE;}
+
+[[:space:]]+|#.*\n /* skip comments and whitespace */
+
+"("|")"|","|"+"|"-"|"*"|"/"|"<"|">" {return yytext[0];}
+
+"||" {return OR;}
+"&&" {return AND;}
+"!" {return NOT;}
+"==" {return EQUAL;}
+"!=" {return NOT_EQUAL;}
+"<=" {return LESS_OR_EQUAL;}
+">=" {return GREATER_OR_EQUAL;}
+"=~" {return REGEX_MATCH;}
+
+{DECIMAL_CONSTANT} {
+       int ret;
+       yylval->node = txp_new_ast_leaf_node(NUM);
+       ret = atoi64(yytext, &yylval->node->sv.intval);
+       if (ret < 0) {
+               free(yylval->node);
+               txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext,
+                       tf_strerror(-ret));
+               return -E_TXP;
+       }
+       return NUM;
+}
+
+{STRING_LITERAL} {
+       yylval->node = txp_new_ast_leaf_node(STRING_LITERAL);
+       parse_quoted_string(yytext, "\"\"", &yylval->node->sv.strval);
+       return STRING_LITERAL;
+}
+
+{REGEX_PATTERN} {
+       int ret;
+       yylval->node = txp_new_ast_leaf_node(REGEX_PATTERN);
+       ret = txp_parse_regex_pattern(yytext, &yylval->node->sv.re_pattern);
+       if (ret < 0) {
+               txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext,
+                       tf_strerror(-ret));
+               return -E_TXP;
+       }
+       return REGEX_PATTERN;
+}
+
+. {
+       txp_parse_error(yylloc->first_line, ctx, "unrecognized text: %s",
+               yytext);
+       return -E_TXP;
+}
diff --git a/txp.y b/txp.y
new file mode 100644 (file)
index 0000000..ee9080d
--- /dev/null
+++ b/txp.y
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+/*
+ * Provide more verbose and specific error messages instead of just "syntax
+ * error".
+ */
+%define parse.error verbose
+
+/*
+ * Verbose error messages may contain incorrect information if LAC (Lookahead
+ * Correction) is not enabled.
+ */
+%define parse.lac full
+
+/* Avoid symbol clashes (lopsub might also expose yy* symbols). */
+%define api.prefix {txp_yy}
+
+/*
+ * Although locations are automatically enabled as soon as the grammar uses the
+ * special @N tokens, specifying %locations explicitly allows for more accurate
+ * syntax error messages.
+ */
+%locations
+
+/*
+ * Generate a pure (reentrant) parser. With this option enabled, yylval and
+ * yylloc become local variables in yyparse(), and a different calling
+ * convention is used for yylex().
+ */
+%define api.pure full
+
+/* Additional arguments to yylex(), yyparse() and yyerror() */
+%param {struct txp_context *ctx}
+%param {struct txp_ast_node **ast}
+%param {txp_yyscan_t yyscanner} /* reentrant lexers */
+
+%{
+#include "tf.h"
+#include "txp.bison.h"
+
+int yylex(TXP_YYSTYPE *lvalp, TXP_YYLTYPE *llocp, struct txp_context *ctx,
+               struct txp_ast_node **ast, txp_yyscan_t yyscanner);
+static void yyerror(YYLTYPE *llocp, struct txp_context *ctx,
+               struct txp_ast_node **ast, txp_yyscan_t yyscanner, const char *msg);
+
+%}
+
+%union {
+       struct txp_ast_node *node;
+}
+
+/* terminals */
+%token <node> NUM
+%token <node> STRING_LITERAL
+%token <node> REGEX_PATTERN
+
+/* keywords with semantic value */
+%token <node> LEN
+%token <node> FALSE TRUE
+%token <node> TEXT
+
+/* keywords without semantic value */
+%token TAG
+
+/* operators, ordered by precendence */
+%left OR
+%left AND
+%left EQUAL NOT_EQUAL
+%left LESS_THAN LESS_OR_EQUAL GREATER_OR_EQUAL REGEX_MATCH FILENAME_MATCH
+%left '-' '+'
+%left '*' '/'
+%right NOT NEG /* negation (unary minus) */
+
+/* nonterminals */
+%type <node> string
+%type <node> exp
+%type <node> boolexp
+
+%%
+
+program:
+        /* empty */ {*ast = NULL; return 0;}
+        | string {*ast = $1; return 0;}
+        | exp {*ast = $1; return 0;}
+       | boolexp {*ast = $1; return 0;}
+
+string: STRING_LITERAL {$$ = $1;}
+       | TEXT {$$ = txp_new_ast_leaf_node(TEXT);}
+;
+
+exp: NUM {$$ = $1;}
+        | exp '+' exp {$$ = ast_node_new_binary('+', $1, $3);}
+        | exp '-' exp {$$ = ast_node_new_binary('-', $1, $3);}
+        | exp '*' exp {$$ = ast_node_new_binary('*', $1, $3);}
+        | exp '/' exp {$$ = ast_node_new_binary('/', $1, $3);}
+        | '-' exp %prec NEG {$$ = ast_node_new_unary(NEG, $2);}
+        | '(' exp ')' {$$ = $2;}
+       | LEN {$$ = txp_new_ast_leaf_node(LEN);}
+;
+
+boolexp: TAG '(' STRING_LITERAL ')' {$$ = ast_node_new_unary(TAG, $3);}
+       | TRUE {$$ = txp_new_ast_leaf_node(TRUE);}
+       | FALSE {$$ = txp_new_ast_leaf_node(FALSE);}
+       | '(' boolexp ')' {$$ = $2;}
+       | boolexp OR boolexp {$$ = ast_node_new_binary(OR, $1, $3);}
+       | boolexp AND boolexp {$$ = ast_node_new_binary(AND, $1, $3);}
+       | NOT boolexp {$$ = ast_node_new_unary(NOT, $2);}
+       | exp EQUAL exp {$$ = ast_node_new_binary(EQUAL, $1, $3);}
+       | exp NOT_EQUAL exp {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);}
+       | exp '<' exp {$$ = ast_node_new_binary('<', $1, $3);}
+       | exp '>' exp {$$ = ast_node_new_binary('>', $1, $3);}
+       | exp LESS_OR_EQUAL exp {
+               $$ = ast_node_new_binary(LESS_OR_EQUAL, $1, $3);
+       }
+       | exp GREATER_OR_EQUAL exp {
+               $$ = ast_node_new_binary(GREATER_OR_EQUAL, $1, $3);
+       }
+       | string REGEX_MATCH REGEX_PATTERN {
+               $$ = ast_node_new_binary(REGEX_MATCH, $1, $3);
+       }
+       | string EQUAL string {$$ = ast_node_new_binary(EQUAL, $1, $3);}
+       | string NOT_EQUAL string {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);}
+;
+%%
+
+/* Called by yyparse() on error */
+static void yyerror(YYLTYPE *llocp, struct txp_context *ctx,
+               __attribute__ ((unused)) struct txp_ast_node **ast,
+               __attribute__ ((unused)) txp_yyscan_t yyscanner,
+               const char *msg)
+{
+       txp_parse_error(llocp->first_line, ctx, "%s", msg);
+}
diff --git a/util.c b/util.c
new file mode 100644 (file)
index 0000000..69c759e
--- /dev/null
+++ b/util.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: GPL-3.0-only */
+
+#include "tf.h"
+
+DEFINE_TF_ERRLIST;
+
+int atoi64(const char *str, int64_t *value)
+{
+       char *endptr;
+       long long tmp;
+
+       errno = 0; /* To distinguish success/failure after call */
+       tmp = strtoll(str, &endptr, 10);
+       if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
+               return -E_ATOI_OVERFLOW;
+       /*
+        * If there were no digits at all, strtoll() stores the original value
+        * of str in *endptr.
+        */
+       if (endptr == str)
+               return -E_ATOI_NO_DIGITS;
+       /*
+        * The implementation may also set errno and return 0 in case no
+        * conversion was performed.
+        */
+       if (errno != 0 && tmp == 0)
+               return -E_ATOI_NO_DIGITS;
+       if (*endptr != '\0') /* Further characters after number */
+               return -E_ATOI_JUNK_AT_END;
+       *value = tmp;
+       return 1;
+}
+
+__attribute__ ((warn_unused_result))
+void *xrealloc(void *p, size_t size)
+{
+       /*
+        * No need to check for NULL pointers: If p is NULL, the call
+        * to realloc is equivalent to malloc(size)
+        */
+       assert(size);
+       if (!(p = realloc(p, size))) {
+               EMERG_LOG("realloc failed (size = %zu), aborting\n", size);
+               exit(EXIT_FAILURE);
+       }
+       return p;
+}
+
+__attribute__ ((warn_unused_result))
+void *xmalloc(size_t size)
+{
+       return xrealloc(NULL, size);
+}
+
+__attribute__ ((warn_unused_result))
+void *xcalloc(size_t size)
+{
+       void *p = xmalloc(size);
+       memset(p, 0, size);
+       return p;
+}
+
+__attribute__ ((warn_unused_result))
+char *xstrdup(const char *str)
+{
+       char *p = strdup(str);
+
+       if (p)
+               return p;
+       EMERG_LOG("strdup failed, aborting\n");
+       exit(EXIT_FAILURE);
+}
+
+/*
+ * Get the home directory of the current user. Returns a dynamically allocated
+ * string that must be freed by the caller.
+ */
+__attribute__ ((warn_unused_result))
+char *get_homedir(void)
+{
+       struct passwd *pw = getpwuid(getuid());
+
+       if (!pw) {
+               EMERG_LOG("could not get home directory: %s\n",
+                       strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       return xstrdup(pw->pw_dir);
+}
+
+/*
+ * Print a formated message to a dynamically allocated string.
+ *
+ * This function is similar to vasprintf(), a GNU extension which is not in C
+ * or POSIX. It allocates a string large enough to hold the output including
+ * the terminating null byte. The allocated string is returned via the first
+ * argument and must be freed by the caller. However, unlike vasprintf(), this
+ * function calls exit() if insufficient memory is available, while vasprintf()
+ * returns -1 in this case.
+ *
+ * It returns the number of bytes written, not including the terminating \p
+ * NULL character.
+ */
+__attribute__ ((format (printf, 2, 0)))
+unsigned xvasprintf(char **result, const char *fmt, va_list ap)
+{
+       int ret;
+       size_t size = 150;
+       va_list aq;
+
+       *result = xmalloc(size + 1);
+       va_copy(aq, ap);
+       ret = vsnprintf(*result, size, fmt, aq);
+       va_end(aq);
+       assert(ret >= 0);
+       if ((size_t)ret < size)
+               return ret;
+       size = ret + 1;
+       *result = xrealloc(*result, size);
+       va_copy(aq, ap);
+       ret = vsnprintf(*result, size, fmt, aq);
+       va_end(aq);
+       assert(ret >= 0 && (size_t)ret < size);
+       return ret;
+}
+
+__attribute__ ((format (printf, 2, 3)))
+/* Print to a dynamically allocated string, variable number of arguments.  */
+unsigned xasprintf(char **result, const char *fmt, ...)
+{
+       va_list ap;
+       unsigned ret;
+
+       va_start(ap, fmt);
+       ret = xvasprintf(result, fmt, ap);
+       va_end(ap);
+       return ret;
+}
+
+/*
+ * Compile a regular expression.
+ *
+ * This simple wrapper calls regcomp(3) and logs a message on errors.
+ */
+int xregcomp(regex_t *preg, const char *regex, int cflags)
+{
+       char *buf;
+       size_t size;
+       int ret = regcomp(preg, regex, cflags);
+
+       if (ret == 0)
+               return 1;
+       size = regerror(ret, preg, NULL, 0);
+       buf = xmalloc(size);
+       regerror(ret, preg, buf, size);
+       ERROR_LOG("%s\n", buf);
+       free(buf);
+       return -E_REGEX;
+}
+
+/* Write input from fd to dynamically allocated buffer. */
+int fd2buf(int fd, struct iovec *result)
+{
+       const size_t max_size = 32 * 1024;
+       size_t loaded = 0;
+       int ret;
+       struct iovec iov;
+
+       iov.iov_len = 100; /* guess that's sufficient */
+       iov.iov_base = xmalloc(iov.iov_len);
+       for (;;) {
+               ret = read(fd, iov.iov_base + loaded,  iov.iov_len - loaded);
+               if (ret < 0)
+                       ret = -ERRNO_TO_TF_ERROR(errno);
+               if (ret <= 0)
+                       break;
+               loaded += ret;
+               if (loaded >= iov.iov_len) {
+                       iov.iov_len *= 2;
+                       ret = -ERRNO_TO_TF_ERROR(EOVERFLOW);
+                       if (iov.iov_len >= max_size)
+                               break;
+                       iov.iov_base = xrealloc(iov.iov_base, iov.iov_len);
+               }
+       };
+       if (ret < 0) {
+               free(iov.iov_base);
+               result->iov_base = NULL;
+               result->iov_len = 0;
+               return ret;
+       }
+       iov.iov_len = loaded;
+       iov.iov_base = xrealloc(iov.iov_base, loaded + 1);
+       *((char *)iov.iov_base + loaded) = '\0';
+       *result = iov;
+       return 1;
+}
+
+__attribute__ ((format (printf, 2, 3)))
+void tf_log(int ll, const char* fmt,...)
+{
+       va_list argp;
+       if (ll < loglevel_arg_val)
+               return;
+       va_start(argp, fmt);
+       vfprintf(stderr, fmt, argp);
+       va_end(argp);
+}
+
+struct regfile_iter {
+       DIR *dir;
+       int dfd;
+       struct dirent *entry;
+       struct stat stat;
+};
+
+void regfile_iter_next(struct regfile_iter *iter)
+{
+       for (;;) {
+               iter->entry = readdir(iter->dir);
+               if (!iter->entry)
+                       return;
+               if (!strcmp(iter->entry->d_name, "."))
+                       continue;
+               if (!strcmp(iter->entry->d_name, ".."))
+                       continue;
+               if (fstatat(iter->dfd, iter->entry->d_name, &iter->stat, 0) == -1)
+                       continue;
+               if (!S_ISREG(iter->stat.st_mode))
+                       continue;
+               if (iter->stat.st_size == 0) /* skip over empty files */
+                       continue;
+               return;
+       }
+}
+
+void regfile_iter_new(const char *dirname, struct regfile_iter **result)
+{
+       struct regfile_iter *iter = xmalloc(sizeof(*iter));
+
+       iter->dir = opendir(dirname);
+       if (!iter->dir) {
+               EMERG_LOG("opendir %s: %s\n", dirname, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       iter->dfd = dirfd(iter->dir);
+       assert(iter->dfd >= 0);
+       regfile_iter_next(iter);
+       *result = iter;
+}
+
+static void *xmmap(size_t sz, int fd, const char *path)
+{
+       void *result = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+
+       if (result == MAP_FAILED) {
+               EMERG_LOG("mmap %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       return result;
+}
+
+void mmap_file(const char *path, struct iovec *iov)
+{
+       int fd, ret;
+
+       ret = open(path, 0);
+       if (ret < 0) {
+               EMERG_LOG("could not open %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       fd = ret;
+       iov->iov_len = lseek(fd, 0, SEEK_END);
+       if (iov->iov_len == (size_t)(off_t)-1) {
+               EMERG_LOG("lseek %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       iov->iov_base = xmmap(iov->iov_len, fd, path);
+       close(fd);
+}
+
+bool regfile_iter_map(const struct regfile_iter *iter, struct iovec *result)
+{
+       int ret, fd;
+       void *map;
+       const char *path;
+
+       if (!iter->entry)
+               return false;
+       path = iter->entry->d_name;
+       ret = openat(iter->dfd, path, O_RDONLY, 0);
+       if (ret < 0) {
+               EMERG_LOG("could not open %s: %s\n", path, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+       fd = ret;
+       map = xmmap(iter->stat.st_size, fd, path);
+       close(fd);
+       result->iov_len = iter->stat.st_size;
+       result->iov_base = map;
+       return true;
+}
+
+const char *regfile_iter_basename(const struct regfile_iter *iter)
+{
+       if (!iter->entry)
+               return NULL;
+       return iter->entry->d_name;
+}
+
+const struct stat *regfile_iter_stat(const struct regfile_iter *iter)
+{
+       return &iter->stat;
+}
+
+void regfile_iter_free(struct regfile_iter *iter)
+{
+       closedir(iter->dir);
+       free(iter);
+}
diff --git a/version-gen.sh b/version-gen.sh
new file mode 100755 (executable)
index 0000000..a2c9075
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# SPDX-License-Identifier: GPL-3.0-only
+
+version_file='version.c'
+ver='unnamed_version'
+# First try git, then gitweb, then default.
+if [ -e '.git' -o -e '../.git' ]; then
+       git_ver=$(git describe --abbrev=4 HEAD 2>/dev/null)
+       [ -z "$git_ver" ] && git_ver="$ver"
+       # update stat information in index to match working tree
+       git update-index -q --refresh > /dev/null
+       # if there are differences (exit code 1), the working tree is dirty
+       git diff-index --quiet HEAD || git_ver=$git_ver-dirty
+       ver=$git_ver
+elif [ "${PWD%%-*}" = 'tfortune-' ]; then
+       ver=${PWD##*/tfortune-}
+fi
+ver=${ver#v}
+
+echo "$ver"
+
+# update version file if necessary
+content="const char *tf_version(void) {return \"$ver\";};"
+[ -r "$version_file" ] && echo "$content" | cmp -s - $version_file && exit 0
+echo >&2 "new git version: $ver"
+echo "$content" > $version_file