]> git.tuebingen.mpg.de Git - misma.git/commitdiff
Initial commit. v1.0.0
authorAndre Noll <maan@tuebingen.mpg.de>
Sun, 14 Jan 2024 22:55:26 +0000 (23:55 +0100)
committerAndre Noll <maan@tuebingen.mpg.de>
Sun, 14 Jan 2024 22:56:01 +0000 (23:56 +0100)
This project was started in late 2018. A first usable version was
ready by 2019-02 and was deployed on a single server, Until 2022-05,
the project languished but the trial version had had been running all
the time. This identified the major design and usability shortcomings
which have been addressed subsequently. This work resulted in a much
improved but incompatible version.

Starting in late 2022, the improved version was deployed on several
storage servers, which revealed a few minor bugs and usability issues,
all of which have been addressed.

As of this commit the project has been made public. All commits that
led to the improved version have been discarded, so the repository
contains only the final result as a single initial commit.

From now on, backward incompatible changes are avoided if possible,
and require a deprecation period and a major version change otherwise.
Also subsequent changes will be documented properly.

12 files changed:
.gitignore [new file with mode: 0644]
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
config.mak.in [new file with mode: 0644]
configure [new file with mode: 0755]
configure.ac [new file with mode: 0644]
index.html.m4 [new file with mode: 0644]
misma.c [new file with mode: 0644]
misma.h [new file with mode: 0644]
misma.suite.m4 [new file with mode: 0644]
util.c [new file with mode: 0644]
version-gen.sh [new file with mode: 0755]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..7a9c605
--- /dev/null
@@ -0,0 +1,5 @@
+misma
+misma.8
+build
+*.swp
+Makefile.local
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..32b2c10
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,261 @@
+# SPDX-License-Identifier: GPL-2.0+
+.SUFFIXES:
+MAKEFLAGS += -Rr
+ifeq ("$(origin CC)", "default")
+        CC := cc
+endif
+ifeq ("$(origin V)", "command line")
+       SAY =
+else
+       SAY = @echo '$(strip $(1))'
+endif
+
+.ONESHELL:
+.SHELLFLAGS := -ec
+PREFIX ?= /usr/local
+INSTALL ?= install
+MKDIR_P := mkdir -p
+RM := rm -f
+CHMOD := chmod
+GROFF := groff
+B := build
+all := misma misma.8
+all: $(all)
+
+PACKAGE := misma
+SLOGAN := the minimal snapshot manager
+AUTHOR := Andre Noll
+EMAIL := maan@tuebingen.mpg.de
+COPYRIGHT_YEAR := 2024
+URL := http://people.tuebingen.mpg.de/maan/$(PACKAGE)/
+CLONE_URL := git://git.tuebingen.mpg.de/$(PACKAGE)
+GITWEB_URL := http://git.tuebingen.mpg.de/$(PACKAGE).git
+HOME_URL := http://people.tuebingen.mpg.de/maan/
+LICENSE := GPL-2.0+
+LICENSE_URL := https://www.gnu.org/licenses/gpl-3.0-standalone.html
+LOGLEVELS := LL_DEBUG,LL_INFO,LL_NOTICE,LL_WARNING,LL_ERROR,LL_CRIT,LL_EMERG
+
+units := misma util version misma.lsg
+deps := $(addprefix $(B)/, $(addsuffix .d, $(units)))
+objs := $(addprefix $(B)/, $(addsuffix .o, $(units)))
+
+ifeq ($(findstring clean, $(MAKECMDGOALS)),)
+ifeq ($(findstring README, $(MAKECMDGOALS)),)
+-include $(deps)
+-include $(B)/config.mak
+endif
+endif
+
+XCPPFLAGS :=
+XCPPFLAGS += -I$(B)
+XCPPFLAGS += -Wunused-macros
+XCPPFLAGS += -DCOPYRIGHT_YEAR='"$(COPYRIGHT_YEAR)"'
+XCPPFLAGS += -DPACKAGE='"$(PACKAGE)"'
+XCPPFLAGS += -DAUTHOR='"$(AUTHOR)"'
+XCPPFLAGS += -DEMAIL='"$(EMAIL)"'
+XCPPFLAGS += -DURL='"$(URL)"'
+XCPPFLAGS += -DCLONE_URL='"$(CLONE_URL)"'
+XCPPFLAGS += -DGITWEB_URL='"$(GITWEB_URL)"'
+XCPPFLAGS += -DHOME_URL='"$(HOME_URL)"'
+XCPPFLAGS += -DGET_VERSION='$(PACKAGE)_version'
+XCPPFLAGS += -DLOGLEVELS='$(LOGLEVELS)'
+XCPPFLAGS += -DBUILD_DATE='"$(build_date)"'
+XCPPFLAGS += -DCC_VERSION='"$(cc_version)"'
+XCPPFLAGS += -DUNAME_RS='"$(uname_rs)"'
+XCPPFLAGS += -DLICENSE='"$(LICENSE)"'
+XCPPFLAGS += -DLICENSE_URL='"$(LICENSE_URL)"'
+
+XCFLAGS :=
+XCFLAGS += -fno-strict-aliasing
+XCFLAGS += -g
+XCFLAGS += -Os
+XCFLAGS += -Wundef -W -Wuninitialized
+XCFLAGS += -Wchar-subscripts
+XCFLAGS += -Werror-implicit-function-declaration
+XCFLAGS += -Wmissing-noreturn
+XCFLAGS += -Wbad-function-cast
+XCFLAGS += -Wredundant-decls
+XCFLAGS += -Wno-sign-compare -Wno-unknown-pragmas
+XCFLAGS += -Wdeclaration-after-statement
+XCFLAGS += -Wformat -Wformat-security -Wmissing-format-attribute
+XCFLAGS += -fdata-sections -ffunction-sections
+XCFLAGS += -Wstrict-prototypes
+XCFLAGS += -Wshadow
+XCFLAGS += -Wunused -Wall
+XCFLAGS += -Wformat-signedness
+XCFLAGS += -Wdiscarded-qualifiers
+
+XLDFLAGS := -Wl,--gc-sections
+version_file := $(B)/version.c
+GIT_VERSION := $(shell $(MKDIR_P) $(B) && ./version-gen.sh $(PACKAGE) $(version_file))
+
+CC_CMD = $(CC) -c -o $@ $(XCPPFLAGS) $(CPPFLAGS) \
+       $(XCFLAGS) $(CFLAGS) -MMD -MF $(B)/$(*F).d -MT $@
+
+$(objs): misma.h $(B)/misma.lsg.h Makefile
+
+$(B):
+       @$(MKDIR_P) $@
+
+$(B)/config.h.in: configure.ac | $(B)
+       $(call SAY, AH $<)
+       cd $(B)
+       autoheader -f ../configure.ac
+$(B)/configure.sh: configure.ac | $(B)
+       $(call SAY, AC $<)
+       cd $(B)
+       autoconf ../configure.ac > configure.sh
+       $(CHMOD) 755 configure.sh
+$(B)/config.status: $(B)/configure.sh | $(B)
+       $(call SAY, SH $<)
+       cd $(B)
+       if test -x config.status; then \
+               ./config.status --quiet --recheck; \
+       else \
+               ./configure.sh --no-create; \
+       fi
+$(B)/config.mak $(B)/config.h: $(B)/config.status config.mak.in $(B)/config.h.in
+       $(call SAY, CS $@)
+       cd $(B)
+       ln -f ../config.mak.in
+       ./config.status -q
+       test -f config.h && touch config.h
+
+define DESCRIPTION1 :=
+       PACKAGE() is an open source application which maintains snapshots of one
+       or more thin provisioned logical volumes on Linux systems.
+endef
+
+define DESCRIPTION2 :=
+       Snapshots are created and removed automatically according to the
+       configured schedule. Old snapshots are replaced so that the time
+       between two consecutive snapshots doubles at each step. To prevent data
+       or metadata space exhaustion, the available space of the underlying
+       thin pools is monitored periodically and snapshots are removed early
+       when space gets tight.
+endef
+
+define DESCRIPTION3 :=
+       Besides the run subcommand which implements snapshot scheduling and
+       free space monitoring, PACKAGE() supports additional subcommands
+       to list existing snapshots and the utilization of the thin pools,
+       or to create/remove snapshots manually.
+endef
+
+define M4_CMD =
+       $(call SAY, M4 $<)
+       $(M4) -D "AUTHOR=$(AUTHOR)" -D "COPYRIGHT_YEAR=$(COPYRIGHT_YEAR)" \
+               -D "PACKAGE=$(PACKAGE)" \
+               -D "SLOGAN=$(SLOGAN)" \
+               -D "EMAIL=$(EMAIL)" \
+               -D "URL=$(URL)" \
+               -D "CLONE_URL=$(CLONE_URL)" \
+               -D "GITWEB_URL=$(GITWEB_URL)" \
+               -D "HOME_URL=$(HOME_URL)" \
+               -D "LICENSE=$(LICENSE)" \
+               -D "LICENSE_URL=$(LICENSE_URL)" \
+               -D "DESCRIPTION1=$(DESCRIPTION1)" \
+               -D "DESCRIPTION2=$(DESCRIPTION2)" \
+               -D "DESCRIPTION3=$(DESCRIPTION3)"
+endef
+
+$(B)/logo.svg: index.html.m4 Makefile
+       $(M4_CMD) -D MODE=svg $< > $@
+$(B)/index.html: index.html.m4 Makefile
+       $(M4_CMD) $< > $@
+$(B)/misma.suite: misma.suite.m4 Makefile
+       $(M4_CMD) $< > $@
+$(B)/%.lsg.c: $(B)/%.suite
+       $(call SAY, LSGC $<)
+       $(LOPSUBGEN) --gen-c --output-dir $(B) < $<
+$(B)/%.lsg.h: $(B)/%.suite
+       $(call SAY, LSGH $<)
+       $(LOPSUBGEN) --gen-header --output-dir $(B) < $<
+%.8: $(B)/%.suite $(B)/version.c
+       $(call SAY, LSGM $<)
+       $(LOPSUBGEN) --gen-man=$(*F).8 --version-string $(GIT_VERSION) < $<
+$(B)/%.8.html: %.8
+       $(GROFF) -m man -Thtml -Wbreak < $< > $@
+
+$(B)/%.o: %.c | $(B)
+       $(call SAY, CC $<)
+       $(CC_CMD) $<
+$(B)/%.o: $(B)/%.c
+       $(call SAY, CC $<)
+       $(CC_CMD) $<
+$(PACKAGE): $(objs)
+       $(call SAY, LD $@)
+       $(CC) -o $@ $^ $(XLDFLAGS) $(LDFLAGS) -llopsub -lm
+$(B)/$(PACKAGE): $(objs)
+       $(call SAY, LD-STATIC $@)
+       $(CC) -static -o $@ $^ $(XLDFLAGS) $(LDFLAGS) -llopsub -lm
+
+mandir := $(datarootdir)/man/man8
+INSTALL ?= install
+INSTALL_PROGRAM ?= $(INSTALL) -m 755
+INSTALL_DATA ?= $(INSTALL) -m 644
+ifneq ($(findstring strip, $(MAKECMDGOALS)),)
+       strip_option := -s
+endif
+install install-strip: all
+       $(MKDIR_P) $(DESTDIR)$(sbindir) $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) $(strip_option) misma $(DESTDIR)$(sbindir)
+       $(INSTALL_DATA) misma.8 $(DESTDIR)$(mandir)
+
+clean:
+       $(RM) $(B)/*.o $(all)
+distclean: clean
+       $(RM) -r $(B)
+maintainer-clean:
+       git clean -dfqx > /dev/null 2>&1
+
+define README :=
+$(PACKAGE) -  $(SLOGAN)
+
+$(DESCRIPTION1)
+
+$(DESCRIPTION2)
+
+$(DESCRIPTION3)
+
+Resources
+~~~~~~~~~
+|      web page: $(URL)
+|      git clone URL: $(CLONE_URL)
+|      gitweb: $(GITWEB_URL)
+|      author's home page: $(HOME_URL)
+|      Send feedback to: $(AUTHOR) <$(EMAIL)>
+
+License
+~~~~~~~
+Open source, licensed under the $(LICENSE) license.
+
+Documentation
+~~~~~~~~~~~~~
+See misma.suite.m4. Or build the man page with \"make\" and run
+\"man -l misma.8\".
+
+Dependencies
+~~~~~~~~~~~~
+This package requires m4, autoconf, gnu make, gcc or clang, and
+lopsub. The configure script checks if all dependencies are installed
+and prints a meaningful error message if one of them is missing.
+
+Building
+~~~~~~~~
+Run \"make\" to build the package with the default settings. Run
+\"./configure -h\" to list configuration options.
+
+Installation
+~~~~~~~~~~~~
+Run \"sudo make install\" to install to /usr/local. To install to
+/somewhere/else, run \"./configure --prefix /somewhere/else && make\"
+first.
+endef
+
+README:
+       @printf '%s\n' "$(README)"
+
+.PRECIOUS: $(B)/%.lsg.c $(B)/%.lsg.h $(B)/%.8
+.PHONY: all clean install distclean maintainer-clean README
+-include Makefile.local
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..52a1fd7
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+Run "make README".
diff --git a/config.mak.in b/config.mak.in
new file mode 100644 (file)
index 0000000..592d9e6
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+prefix := @prefix@
+exec_prefix := @exec_prefix@
+
+# These two use prefix and exec_prefix
+sbindir := @sbindir@
+datarootdir := @datarootdir@
+
+LOPSUBGEN := @LOPSUBGEN@
+M4 := @M4@
diff --git a/configure b/configure
new file mode 100755 (executable)
index 0000000..d59262d
--- /dev/null
+++ b/configure
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+
+set -e
+
+mkdir -p build
+cd build
+autoconf ../configure.ac > configure.sh
+chmod 755 configure.sh
+ln -f ../config.mak.in
+autoheader ../configure.ac
+sh configure.sh "$@"
diff --git a/configure.ac b/configure.ac
new file mode 100644 (file)
index 0000000..00e0f09
--- /dev/null
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+AC_PREREQ([2.61])
+# only for configure -h, see Makefile
+AC_INIT([software], [packages])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_FILES([config.mak])
+AC_USE_SYSTEM_EXTENSIONS
+AC_PROG_CC
+AC_PROG_CPP
+
+AC_DEFUN([LOPSUB_NOT_FOUND], [
+The lopsub library is required to build this software, but the checks
+indicate it is not installed on your system.  Run the following
+command to download a copy.
+       git clone git://git.tuebingen.mpg.de/lopsub.git
+Install the library, then run this configure script again.
+
+If you installed lopsub at a non-standard location, make sure to set
+PATH, CPPFLAGS and LDFLAGS accordingly. For example:
+
+       pfx=/prefix/where/lopsub/is/installed
+       export PATH=\$pfx/bin:\$PATH
+       export CPPFLAGS=-I\$pfx/include
+       export LDFLAGS=-L\$pfx/lib
+])
+
+AC_DEFUN([REQUIRE_EXECUTABLE], [
+       AC_PATH_PROG(m4_toupper([$1]), [$1])
+       test -z "$m4_toupper([$1])" && AC_MSG_ERROR([$2])
+])
+REQUIRE_EXECUTABLE([lopsubgen], [LOPSUB_NOT_FOUND])
+REQUIRE_EXECUTABLE([m4], [m4 is required to build this package])
+
+HAVE_LOPSUB=yes
+AC_CHECK_HEADER(lopsub.h, [], [HAVE_LOPSUB=no])
+AC_CHECK_LIB([lopsub], [lls_merge], [], [HAVE_LOPSUB=no])
+if test $HAVE_LOPSUB = no; then AC_MSG_ERROR([LOPSUB_NOT_FOUND()]); fi
+AC_OUTPUT
diff --git a/index.html.m4 b/index.html.m4
new file mode 100644 (file)
index 0000000..47e8c79
--- /dev/null
@@ -0,0 +1,113 @@
+dnl SPDX-License-Identifier: GPL-2.0+
+define(`SVG', `dnl
+<svg xmlns="http://www.w3.org/2000/svg" height="50" width="100">
+       <path stroke-width="3" stroke="black" fill="none"
+               d="
+                       M 5 23
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+                       l 0,-12 l 6,0 l 0,12
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+                       l 0,-18 l 6,0 l 0,18
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+                       l 0,-12 l 6,0 l 0,12
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+               "
+       />
+       <path stroke-width="4" stroke="blue" fill="none"
+               d="
+                       M 10 46
+                       l 0,-15 l 6,0 l 0,10 l 6,0 l 0,-10 l 6,0 l 0,15
+                       m 6,0
+                       l 0,-13 l 0,-4 m 0,15
+                       m 5,0
+                       l 9,0 l 0,-6 l -8,0 l 0,-7 l 9,0 m 0,15
+                       m 5,0
+                       l 0,-15 l 6,0 l 0,10 l 6,0 l 0,-10 l 6,0 l 0,15
+                       m 6,-2
+                       l 0,-13 l 10,0 l 0,13 l -12,0 m 12,0 l 4,0
+               "
+       />
+</svg>
+')dnl
+ifelse(MODE(), `svg', `SVG() m4exit')
+
+<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'
+'http://www.w3.org/TR/html4/loose.dtd'>
+
+<html>
+       <head>
+               <meta
+                       http-equiv='Content-Type';
+                       content='text/html';
+                       charset=utf-8;
+               >
+               <title>PACKAGE()</title>
+               <style type='text/css'>
+                       body {
+                               text-align: justify;
+                               padding: 0px 30px 0px 30px;
+                               font-size: 130%;
+                       }
+                       a {
+                               color: #01c;
+                       }
+                       pre,code {
+                               font-size: 110%;
+                       }
+               </style>
+       </head>
+       <body>
+               <table width="100%">
+                       <tr>
+                               <td>
+                                       <h2 align="left">
+                                                PACKAGE() - SLOGAN()
+                                       </h2>
+                               </td>
+                               <td align="right"> SVG() </td>
+                       </tr>
+               </table>
+               <p> DESCRIPTION1() </p>
+               <p> DESCRIPTION2() </p>
+               <p> DESCRIPTION3() </p>
+
+               <h3> Installation </h3>
+
+               <p> PACKAGE() is easy to install and easy to configure. To build from
+               source, a number of dependencies must be installed. The following
+               should work on Debian/Ubuntu: </p>
+
+               <pre>
+       sudo apt-get install gcc git autoconf m4 make liblopsub-dev
+       git clone CLONE_URL()
+       cd misma
+       ./configure &amp;&amp; make &amp;&amp; sudo make install
+               </pre>
+
+               <p> Alternatively, download this pre-compiled <a
+               href="PACKAGE()">static binary</a> for x86, which should work fine
+               on all Linux distributions. </p>
+
+               Run <code>PACKAGE() help</code> to display the subcommands and
+               <code>man PACKAGE()</code> for the manual page. The examples included
+               in the manual illustrate how to create thin logical volumes and how
+               to snapshot them with PACKAGE().
+
+               <h3> Resources </h3> <ul>
+                       <li> Clone `URL': <code>CLONE_URL()</code> </li>
+                       <li> <a href="GITWEB_URL()">Gitweb</a> </li>
+                       <li> <a href="PACKAGE().8.html">manual page</a> </li>
+                       <li> The author's <a href="HOME_URL()">home page</a> </li>
+                       <li> Send feedback to <a href="mailto:EMAIL()">AUTHOR()</a> </li>
+               </ul>
+
+       </body>
+</html>
diff --git a/misma.c b/misma.c
new file mode 100644 (file)
index 0000000..fc9ab3d
--- /dev/null
+++ b/misma.c
@@ -0,0 +1,1543 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#include "misma.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <lopsub.h>
+#include <sys/mman.h>
+#include <math.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/ioctl.h>
+
+#include "misma.lsg.h"
+
+enum interval_type {
+       IT_CREATE,
+       IT_TRIM,
+       IT_MAX_AGE,
+       NUM_INTERVAL_TYPES
+};
+
+struct snapshot_config {
+       struct percentage_pair thresholds;
+       unsigned interval[NUM_INTERVAL_TYPES];
+};
+static struct snapshot_config global_config = {
+       .thresholds = {.data = 95, .meta = 95},
+       .interval = {
+               [IT_CREATE] = 6 * 3600,
+               [IT_TRIM] = 0,
+               [IT_MAX_AGE] = 86400 * 365
+       }
+};
+
+enum event_type {ET_CREATE, ET_CHECK, ET_TRIM, NUM_EVENT_TYPES};
+
+struct volume_group {
+       char *name;
+       struct snapshot_config config;
+};
+static unsigned num_vgs;
+static struct volume_group *volume_group; /* num_vgs elements */
+
+static const char *vgname(unsigned vgid)
+{
+       return volume_group[vgid].name;
+}
+
+/* sequential search is good enough */
+static unsigned get_vgid(const char *name)
+{
+       for (unsigned n = 0; n < num_vgs; n++)
+               if (!strcmp(name, volume_group[n].name))
+                       return n;
+       return ~0U;
+}
+
+/* insert only if it not exists already */
+static unsigned insert_vg(const char *name)
+{
+       struct volume_group *vg;
+       unsigned vgid = get_vgid(name);
+
+       if (vgid != ~0U)
+               return vgid;
+       INFO_LOG("vg #%u: %s\n", num_vgs, name);
+       num_vgs++;
+       volume_group = xrealloc(volume_group, num_vgs
+               * sizeof(struct volume_group));
+       vg = volume_group + num_vgs - 1;
+       memset(vg, 0, sizeof(struct volume_group));
+       vg->name = xstrdup(name);
+       return num_vgs - 1;
+}
+
+struct thin_pool {
+       char *name;
+       unsigned vgid;
+       struct snapshot_config config;
+       struct percentage_pair utilization;
+       enum lvm_scope threshold_scope;
+};
+static unsigned num_pools;
+static struct thin_pool *thin_pool; /* num_pools elements */
+
+static unsigned get_poolid(const char *name, const char *vg_name)
+{
+       for (unsigned n = 0; n < num_pools; n++) {
+               struct thin_pool *pool = thin_pool + n;
+               if (!strcmp(name, pool->name) && !strcmp(vg_name,
+                               vgname(pool->vgid)))
+                       return n;
+       }
+       return ~0U;
+}
+
+/* vg of pool must have been inserted already */
+static unsigned insert_pool(const char *name, const char *vgname)
+{
+       struct thin_pool *pool;
+       unsigned poolid = get_poolid(name, vgname);
+
+       if (poolid != ~0U)
+               return poolid;
+       INFO_LOG("pool #%u: %s/%s\n", num_pools, vgname, name);
+       num_pools++;
+       thin_pool = xrealloc(thin_pool, num_pools * sizeof(struct thin_pool));
+       pool = thin_pool + num_pools - 1;
+       memset(pool, 0, sizeof(struct thin_pool));
+       pool->name = xstrdup(name);
+       pool->vgid = get_vgid(vgname);
+       if (pool->vgid == ~0U)
+               die("invalid vg: %s", vgname);
+       return num_pools - 1;
+}
+
+struct snapshot {
+       unsigned seq;
+       uint64_t epoch;
+};
+
+struct origin {
+       char *name;
+       unsigned vgid;
+       unsigned poolid;
+       struct snapshot_config config;
+       enum lvm_scope iscope[NUM_INTERVAL_TYPES]; /* interval scopes */
+       uint64_t last_event[NUM_EVENT_TYPES]; /* epochs */
+       unsigned last_seq;
+       unsigned num_slots;
+       struct snapshot *snapshot;
+};
+static unsigned num_origins;
+static struct origin *origin;
+#define FOR_EACH_ORIGIN(_n) for (_n = 0; _n < num_origins; _n++)
+
+static unsigned check_seconds = 60;
+
+static unsigned interval_length(enum interval_type it, const struct origin *o)
+{
+       switch (o->iscope[it]) {
+               case LS_GLOBAL: return global_config.interval[it];
+               case LS_VG: return volume_group[o->vgid].config.interval[it];
+               case LS_POOL: return thin_pool[o->poolid].config.interval[it];
+               case LS_ORIGIN: return o->config.interval[it];
+               default: assert(0);
+       }
+}
+
+static unsigned get_oid(const char *name, const char *vg_name)
+{
+       unsigned n;
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               if (!strcmp(name, o->name) && !strcmp(vg_name, vgname(o->vgid)))
+                       return n;
+       }
+       return ~0U;
+}
+
+/* vg must have been inserted already */
+static unsigned insert_origin(const char *name, const char *vgname,
+               const char *poolname)
+{
+       struct origin *o;
+       unsigned oid = get_oid(name, vgname);
+
+       assert(oid == ~0U);
+       INFO_LOG("origin #%u: %s/%s, pool: %s\n", num_origins, vgname, name,
+               poolname);
+       num_origins++;
+       origin = xrealloc(origin, num_origins * sizeof(struct origin));
+       o = origin + num_origins - 1;
+       memset(o, 0, sizeof(struct origin));
+       o->name = xstrdup(name);
+       o->vgid = get_vgid(vgname);
+       assert(o->vgid != ~0U);
+       o->poolid = get_poolid(poolname, vgname);
+       assert(o->poolid != ~0U);
+       return num_origins - 1;
+}
+
+struct event {
+       enum event_type type;
+       uint64_t epoch;
+       struct origin *origin;
+};
+
+static int event_compare(const void *d1, const void *d2)
+{
+       const struct event *a = d1, *b = d2;
+
+       if (a->epoch < b->epoch)
+               return 1;
+       if (a->epoch > b->epoch)
+               return -1;
+       return 0;
+}
+
+static char *config_file;
+
+#define FOR_EACH_SLOT_REVERSE(_j, _o) for ( \
+       unsigned _j = _o->num_slots - 1; _j != -1U; _j--)
+
+static unsigned loglevel_arg_val = LL_WARNING;
+
+/* lopsub */
+static const struct lls_command *subcmd;
+static struct lls_parse_result *lpr, *sublpr;
+#define CMD_PTR(_cname) lls_cmd(LSG_MISMA_CMD_ ## _cname, misma_suite)
+#define OPT_RESULT(_cname, _oname) (lls_opt_result(\
+       LSG_MISMA_ ## _cname ## _OPT_ ## _oname, \
+       (CMD_PTR(_cname) == CMD_PTR(MISMA))? lpr : sublpr))
+#define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
+#define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
+               OPT_RESULT(_cname, _oname)))
+#define OPT_STRING_VAL_N(_n, _cname, _oname) (lls_string_val(_n, \
+       OPT_RESULT(_cname, _oname)))
+#define OPT_STRING_VAL(_cname, _oname) (OPT_STRING_VAL_N(0, _cname, _oname))
+
+struct misma_user_data {bool (*handler)(void);};
+#define EXPORT_CMD_HANDLER(_cmd) const struct misma_user_data \
+       lsg_misma_com_ ## _cmd ## _user_data = { \
+               .handler = com_ ## _cmd \
+       };
+
+/* does not allocate memory */
+void misma_log(int ll, const char* fmt,...)
+{
+       va_list argp;
+       time_t t1;
+       struct tm *tm;
+       char str[255] = "";
+
+       if (ll < loglevel_arg_val)
+               return;
+       if (subcmd == CMD_PTR(RUN)) {
+               time(&t1);
+               tm = localtime(&t1);
+               strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
+               fprintf(stderr, "%s ", str);
+       }
+       va_start(argp, fmt);
+       vfprintf(stderr, fmt, argp);
+       va_end(argp);
+}
+static const char *exit_hook;
+
+__attribute__ ((noreturn))
+static void run_exit_hook_and_die(const char *str)
+{
+       char *arg;
+       char *argv[] = {"/bin/sh", "-c", NULL, NULL};
+       const char *tmp;
+
+       if (exit_hook) {
+               /*
+                * Prevent helpers from calling us again via die() or
+                * die_errno(), which would result in a crash due to an endless
+                * call stack.
+                */
+               tmp = exit_hook;
+               exit_hook = NULL;
+               arg = msg("%s '%s'", tmp, str);
+               argv[2] = arg;
+               xexec(argv, NULL);
+       }
+       exit(EXIT_FAILURE);
+}
+
+void die(const char *fmt, ...)
+{
+       char *str;
+       va_list argp;
+       int ret;
+
+       va_start(argp, fmt);
+       ret = vasprintf(&str, fmt, argp);
+       va_end(argp);
+       if (ret < 0) { /* give up */
+               EMERG_LOG("OOM\n");
+               exit(EXIT_FAILURE);
+       }
+       misma_log(LL_EMERG, "%s\n", str);
+       run_exit_hook_and_die(str);
+}
+
+void die_errno(const char *fmt, ...)
+{
+       char *str;
+       va_list argp;
+       int ret, save_errno = errno;
+
+       va_start(argp, fmt);
+       ret = vasprintf(&str, fmt, argp);
+       va_end(argp);
+       if (ret < 0) {
+               EMERG_LOG("OOM\n");
+               exit(EXIT_FAILURE);
+       }
+       misma_log(LL_EMERG, "%s: %s\n", str, strerror(save_errno));
+       run_exit_hook_and_die(str);
+}
+
+__attribute__ ((const))
+static uint32_t ffz(uint32_t v)
+{
+       uint32_t ret = 0;
+
+       assert(v != (uint32_t)-1);
+       if ((v & 0xffff) == 0xffff) {
+               ret += 16;
+               v >>= 16;
+       }
+       if ((v & 0xff) == 0xff) {
+               ret += 8;
+               v >>= 8;
+       }
+       if ((v & 0xf) == 0xf) {
+               ret += 4;
+               v >>= 4;
+       }
+       if ((v & 0x3) == 0x3) {
+               ret += 2;
+               v >>= 2;
+       }
+       if ((v & 0x1) == 0x1)
+               ret += 1;
+       return ret;
+}
+
+static bool slot_is_used(unsigned slot, const struct origin *o)
+{
+       return o->snapshot[slot].seq != 0;
+}
+
+static void mark_slot_unused(unsigned slot, struct origin *o)
+{
+       o->snapshot[slot].seq = 0;
+}
+
+/* Use highest numbered unused slot, or default if all slots are used. */
+static unsigned get_slot(unsigned seq, const struct origin *o)
+{
+       unsigned mod;
+       FOR_EACH_SLOT_REVERSE(sl, o)
+               if (!slot_is_used(sl, o))
+                       return sl;
+       /* all slots used */
+       mod = (1 << o->num_slots) - 1;
+       return ffz(seq % mod);
+}
+
+/*
+ * We specify --autobackup n to avoid filling up /etc/lvm/archive with tons of
+ * useless backup configurations.
+ */
+static bool remove_snapshot(unsigned sl, struct origin *o, bool dry_run)
+{
+       struct snapshot *snap = o->snapshot + sl;
+       bool success;
+       char *arg = msg("%s/misma-%s.%u", vgname(o->vgid), o->name, snap->seq);
+       char *argv[] = {
+               "lvremove",
+               "--yes",
+               "--quiet",
+               "--quiet",
+               "--autobackup",
+               "n",
+               arg,
+               NULL
+       };
+       if (dry_run) {
+               printf("dry-run: would remove snapshot %s\n", arg);
+               free(arg);
+               return true;
+       }
+       NOTICE_LOG("removing snapshot %s\n", arg);
+       success = xexec(argv, NULL);
+       free(arg);
+       if (success)
+               mark_slot_unused(sl, o);
+       return success;
+}
+
+static int slot_compare(const void *a, const void *b, void *data)
+{
+       const struct snapshot *s1 = a, *s2 = b;
+       struct origin *o = data;
+
+       if (!slot_is_used(s1 - o->snapshot, o))
+               return -1;
+       if (!slot_is_used(s2 - o->snapshot, o))
+               return 1;
+       if (s1->seq < s2->seq)
+               return 1;
+       if (s1->seq > s2->seq)
+               return -1;
+       return 0;
+}
+
+static void sort_slots(struct origin *o)
+{
+       qsort_r(o->snapshot, o->num_slots, sizeof(struct snapshot),
+               slot_compare, o);
+}
+
+/*
+ * sleazy (adj.): 1640s, "downy, fuzzy," later "flimsy, unsubstantial" (1660s).
+ *
+ * A sleazy snapshot is one whose distance (with respect to creation time) to
+ * its sibling snapshots is minimal.
+ */
+static bool remove_sleazy_snapshot(struct origin *o, bool dry_run)
+{
+       unsigned sl, victim = 0;
+       uint64_t score = 0;
+       bool have_victim = false;
+       struct snapshot *prev = NULL, *next = NULL;
+
+       sort_slots(o);
+       for (sl = 0; sl < o->num_slots; sl++)
+               if (slot_is_used(sl, o))
+                       break;
+       for (; sl < o->num_slots; prev = o->snapshot + sl, sl++) {
+               uint64_t dist;
+               struct snapshot *s = o->snapshot + sl;
+
+               assert(slot_is_used(sl, o));
+               next = sl == o->num_slots - 1? NULL : s + 1;
+               if (!prev && !next)
+                       dist = 1;
+               else if (!prev)
+                       dist = 10 * (s->epoch - next->epoch);
+               else if (!next)
+                       dist = 10 * (prev->epoch - s->epoch);
+               else
+                       dist = prev->epoch - next->epoch;
+               DEBUG_LOG("seq %u, slot %u, epoch %" PRIu64 ", score %" PRIu64"\n",
+                       s->seq, sl, s->epoch, dist);
+               if (!have_victim || dist < score) {
+                       have_victim = true;
+                       victim = sl;
+                       score = dist;
+               }
+       }
+       if (!have_victim) {
+               INFO_LOG("no snapshots\n");
+               return false;
+       }
+       NOTICE_LOG("victim: seq %u, slot %u, score %" PRIu64 "\n",
+               o->snapshot[victim].seq, victim, score);
+       if (!remove_snapshot(victim, o, dry_run))
+               return false;
+       sort_slots(o);
+       return true;
+}
+
+static void set_interval(enum interval_type it, const struct time_arg *ta)
+{
+       enum lvm_scope scope = ta->lvmspec.scope;
+       unsigned vgid, poolid, oid, n;
+
+       if (scope == LS_GLOBAL) {
+               NOTICE_LOG("default interval #%u: %u seconds\n", it,
+                       ta->seconds);
+               global_config.interval[it] = ta->seconds;
+               return;
+       }
+       vgid = get_vgid(ta->lvmspec.vg);
+       if (vgid == ~0U)
+               die("invalid vg in lvmspec: %s", ta->lvmspec.vg);
+       switch (scope) {
+       case LS_VG:
+               volume_group[vgid].config.interval[it] = ta->seconds;
+               break;
+       case LS_POOL:
+               poolid = get_poolid(ta->lvmspec.pool, vgname(vgid));
+               if (poolid == ~0U)
+                       die("invalid pool in lvmspec: %s", ta->lvmspec.pool);
+               thin_pool[poolid].config.interval[it] = ta->seconds;
+               break;
+       case LS_ORIGIN:
+               oid = get_oid(ta->lvmspec.tlv, vgname(vgid));
+               if (oid == ~0U)
+                       die("invalid tlv in lvmspec: %s", ta->lvmspec.tlv);
+               origin[oid].config.interval[it] = ta->seconds;
+               break;
+       default:
+               assert(0);
+       }
+       /*
+        * Narrow the scope of all matching origins for which it is currently
+        * set to a wider scope.
+        */
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               if (o->iscope[it] >= scope)
+                       continue; /* already set to more narrow scope */
+               switch (scope) {
+               case LS_ORIGIN:
+                       if (n != oid)
+                               continue;
+                       break;
+               case LS_POOL:
+                       if (poolid != o->poolid || vgid != o->vgid)
+                               continue;
+                       break;
+               case LS_VG:
+                       if (vgid != o->vgid)
+                               continue;
+                       break;
+               default:
+                       assert(0);
+               }
+               NOTICE_LOG("interval #%u for %s/%s: %u seconds\n", it,
+                       vgname(o->vgid), o->name, ta->seconds);
+               o->iscope[it] = scope;
+       }
+}
+
+struct lv_info {
+       char *vg, *lv, *pool, *origin;
+       uint64_t time;
+};
+
+static void free_lv_info(struct lv_info *lv)
+{
+       free(lv->vg);
+       free(lv->lv);
+       free(lv->pool);
+       free(lv->origin);
+}
+
+static void parse_lvs_line(const char *line, struct lv_info *result)
+{
+       char *tmp = xstrdup(line), *p = tmp + 2, *comma;
+
+       comma = strchr(p, ',');
+       assert(comma && comma != p);
+       *comma = '\0';
+       result->vg = xstrdup(p);
+       p = comma + 1;
+       comma = strchr(p, ',');
+       assert(comma);
+       *comma = '\0';
+       result->lv = xstrdup(p);
+       p = comma + 1;
+       comma = strchr(p, ',');
+       assert(comma);
+       *comma = '\0';
+       result->pool = xstrdup(p);
+       p = comma + 1;
+       comma = strchr(p, ',');
+       assert(comma);
+       *comma = '\0';
+       result->origin = xstrdup(p);
+       p = comma + 1;
+       assert(sscanf(p, "%" PRIu64, &result->time) == 1);
+       free(tmp);
+}
+
+static void init_origins(void)
+{
+       unsigned n, oid;
+       char *argv[] = {
+               "lvs",
+               "--select", NULL,
+               "--noheading",
+               "--separator", ",",
+               "--readonly",
+               "--unquoted",
+               "-o", "vgname,lvname,pool_lv,origin,lvtime",
+               "-O", "-lv_time",
+               "--config", "report/time_format=%s",
+               NULL
+       };
+       char *buf, *tmp, *line, *select_string = NULL;
+       struct line_iter liter;
+       struct lv_info lv;
+
+       if (OPT_GIVEN(MISMA, ORIGIN) == 0)
+               die("--origin not given");
+
+       /* create argument to --select */
+       for (n = 0; n < OPT_GIVEN(MISMA, ORIGIN); n++) {
+               char *tmp2, *slash;
+               const char *arg = OPT_STRING_VAL_N(n, MISMA, ORIGIN);
+
+               tmp = xstrdup(arg),
+               slash = strchr(tmp, '/');
+               if (!slash || slash == tmp || !slash[1])
+                       die("--origin arg must be of the form vg/tlv");
+               *slash = '\0';
+               tmp2 = msg("%s%s (vg_name=%s && (lv_name=%s ||"
+                       "(origin=%s && lv_name =~ misma-%s.[0-9]+)))",
+                       select_string? select_string : "",
+                       select_string? " || " : "" ,
+                       tmp, slash + 1, slash + 1, slash + 1
+               );
+               free(tmp);
+               free(select_string);
+               select_string = tmp2;
+       }
+       argv[2] = select_string;
+       if (!xexec(argv, &buf))
+               die("lvs failure");
+       tmp = xstrdup(buf);
+       line_iter_init(&liter, tmp);
+       /* insert vgs and pools */
+       while ((line = line_iter_get(&liter))) {
+               parse_lvs_line(line, &lv);
+               DEBUG_LOG("vg: %s, lv: %s, pool: %s, origin: %s, "
+                       "time: %" PRIu64"\n",
+                       lv.vg, lv.lv, lv.pool, lv.origin, lv.time);
+               if (lv.origin[0] == '\0') { /* origin */
+                       insert_vg(lv.vg);
+                       if (lv.pool[0] == '\0')
+                               die("%s/%s is no thin LV", lv.vg, lv.lv);
+                       insert_pool(lv.pool, lv.vg);
+               }
+               free_lv_info(&lv);
+       }
+       free(tmp);
+       tmp = xstrdup(buf);
+       line_iter_init(&liter, tmp);
+       /* insert origins */
+       while ((line = line_iter_get(&liter))) {
+               parse_lvs_line(line, &lv);
+               if (lv.origin[0] == '\0')
+                       insert_origin(lv.lv, lv.vg, lv.pool);
+               free_lv_info(&lv);
+       }
+       free(tmp);
+       /* check that all given origins exist */
+       for (n = 0; n < OPT_GIVEN(MISMA, ORIGIN); n++) {
+               const char *arg = OPT_STRING_VAL_N(n, MISMA, ORIGIN);
+               char *slash;
+
+               tmp = xstrdup(arg),
+               slash = strchr(tmp, '/');
+               *slash = '\0';
+               oid = get_oid(slash + 1, tmp);
+               free(tmp);
+               if (oid == ~0U)
+                       die("origin %s does not exist", arg);
+       }
+       tmp = xstrdup(buf);
+       line_iter_init(&liter, tmp);
+       /* allocate and init snapshot arrays */
+       while ((line = line_iter_get(&liter))) {
+               char *fmt;
+               struct snapshot *s;
+               struct origin *o;
+
+               parse_lvs_line(line, &lv);
+               if (lv.origin[0] == '\0') { /* no snapshot */
+                       free_lv_info(&lv);
+                       continue;
+               }
+               oid = get_oid(lv.origin, lv.vg);
+               assert(oid != ~0U);
+               o = origin + oid;
+               o->num_slots++;
+               o->snapshot = xrealloc(o->snapshot, o->num_slots
+                       * sizeof(struct snapshot));
+               s = o->snapshot + o->num_slots - 1;
+               fmt = msg("misma-%s.%%u", lv.origin);
+               if (sscanf(lv.lv, fmt, &s->seq) != 1)
+                       die("parse error: %s", lv.lv);
+               free(fmt);
+               s->epoch = lv.time;
+               if (s->seq > o->last_seq)
+                       o->last_seq = s->seq;
+               if (s->epoch > o->last_event[ET_CREATE])
+                       o->last_event[ET_CREATE] = s->epoch;
+               free_lv_info(&lv);
+       }
+       free(tmp);
+}
+
+static void die_lopsub(int lopsub_ret, char **errctx)
+{
+       const char *m = lls_strerror(-lopsub_ret);
+       if (*errctx)
+               ERROR_LOG("%s: %s\n", *errctx, m);
+       else
+               ERROR_LOG("%s\n", m);
+       free(*errctx);
+       *errctx = NULL;
+       die("lopsub error");
+}
+
+static void parse_options(int argc, char **argv, const struct lls_command *cmd,
+               struct lls_parse_result **lprp)
+{
+       int ret, fd = -1;
+       struct stat statbuf;
+       void *map;
+       size_t sz;
+       int cf_argc;
+       char **cf_argv, *errctx = NULL;
+       const char *subcmd_name;
+       struct lls_parse_result *merged_lpr, *cf_lpr;
+
+       ret = lls_parse(argc, argv, cmd, lprp, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       if (!config_file) {
+               if (OPT_GIVEN(MISMA, CONFIG_FILE))
+                       config_file = xstrdup(OPT_STRING_VAL(MISMA,
+                               CONFIG_FILE));
+               else {
+                       const char *home = getenv("HOME");
+                       if (!home || !*home)
+                               die("fatal: HOME is unset or empty");
+                       config_file = msg("%s/.mismarc", home);
+               }
+       }
+       ret = open(config_file, O_RDONLY);
+       if (ret < 0) {
+               if (errno != ENOENT || OPT_GIVEN(MISMA, CONFIG_FILE))
+                       die_errno("can not open config file %s", config_file);
+               /* no config file -- nothing to do */
+               ret = 0;
+               goto success;
+       }
+       fd = ret;
+       ret = fstat(fd, &statbuf);
+       if (ret < 0)
+               die_errno("failed to stat config file %s", config_file);
+       sz = statbuf.st_size;
+       if (sz == 0) { /* config file is empty -- nothing to do */
+               ret = 0;
+               goto success;
+       }
+       map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (map == MAP_FAILED)
+               die_errno("failed to mmap config file %s", config_file);
+       subcmd_name = (cmd == CMD_PTR(MISMA))? NULL : lls_command_name(cmd);
+       ret = lls_convert_config(map, sz, subcmd_name, &cf_argv,
+               &errctx);
+       munmap(map, sz);
+       if (ret < 0) {
+               ERROR_LOG("failed to convert config file %s\n", config_file);
+               die_lopsub(ret, &errctx);
+       }
+       cf_argc = ret;
+       ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
+       lls_free_argv(cf_argv);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       /* command line options override config file options */
+       ret = lls_merge(*lprp, cf_lpr, cmd, &merged_lpr, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       lls_free_parse_result(cf_lpr, cmd);
+       lls_free_parse_result(*lprp, cmd);
+       *lprp = merged_lpr;
+success:
+       if (fd >= 0)
+               close(fd);
+}
+
+static void get_utilization(void)
+{
+       char *select_string = NULL, *buf, *line;
+       unsigned n;
+       char *argv[] = {
+               "lvs",
+               "--select", NULL,
+               "--noheading", "--unquoted",
+               "-o", "vgname,lvname,data_percent,metadata_percent",
+               NULL
+       };
+       struct line_iter liter;
+
+       for (n = 0; n < num_pools; n++) {
+               const struct thin_pool *pool = thin_pool + n;
+               char *tmp = msg("%s%s (vg_name = %s && lv_name = %s)",
+                       (n == 0)? "" : select_string, (n == 0)? "" : "||",
+                       vgname(pool->vgid), pool->name);
+               free(select_string);
+               select_string = tmp;
+       }
+       argv[2] = select_string;
+       if (!xexec(argv, &buf))
+               die("lvs failure");
+       free(select_string);
+       line_iter_init(&liter, buf);
+       while ((line = line_iter_get(&liter))) {
+               struct percentage_pair *u;
+               struct thin_pool *pool;
+               unsigned poolid;
+               float data, meta;
+               size_t len = strlen(line);
+               char *vg = xmalloc(len), *lv = xmalloc(len);
+               if (sscanf(line, "%s %s %f %f", vg, lv, &data, &meta) != 4)
+                       die("cannot parse lvs line: %s", line);
+               poolid = get_poolid(lv, vg);
+               free(vg);
+               free(lv);
+               assert(poolid != ~0U);
+               pool = thin_pool + poolid;
+               u = &pool->utilization;
+               u->data = data + 0.5;
+               u->meta = meta + 0.5;
+               INFO_LOG("pool %s/%s utilization: %u/%u\n",
+                       vgname(pool->vgid), pool->name, u->data, u->meta);
+       }
+       free(buf);
+}
+
+static bool pool_is_full(const struct thin_pool *pool)
+{
+       bool ret;
+       struct percentage_pair t, u = pool->utilization;
+
+       if (pool->threshold_scope == LS_GLOBAL)
+               t = global_config.thresholds;
+       else if (pool->threshold_scope == LS_VG)
+               t = volume_group[pool->vgid].config.thresholds;
+       else
+               t = pool->config.thresholds;
+       ret = u.data > t.data || u.meta > t.meta;
+       if (ret) {
+               NOTICE_LOG("pool %s/%s utilization: %u/%u, threshold: %u/%u\n",
+                       vgname(pool->vgid), pool->name,
+                       u.data, u.meta, t.data, t.meta);
+               WARNING_LOG("pool %s/%s exceeds utilization thresholds\n",
+                       vgname(pool->vgid), pool->name);
+       }
+       return ret;
+}
+
+static void check_utilization(void)
+{
+       bool found_full_pool, removed_snapshot;
+
+again:
+       found_full_pool = false;
+       removed_snapshot = false;
+       get_utilization();
+       for (unsigned n = 0; n < num_pools; n++) {
+               unsigned m;
+               const struct thin_pool *pool = thin_pool + n;
+               if (!pool_is_full(pool))
+                       continue;
+               found_full_pool = true;
+               FOR_EACH_ORIGIN(m) {
+                       struct origin *o = origin + m;
+                       if (o->poolid != n)
+                               continue;
+                       if (remove_sleazy_snapshot(o, false))
+                               removed_snapshot = true;
+               }
+       }
+       if (!found_full_pool)
+               return;
+       if (removed_snapshot)
+               goto again;
+       INFO_LOG("full pool found, but nothing to remove\n");
+}
+
+static bool create_snapshot(struct origin *o, bool dry_run)
+{
+       unsigned seq = o->last_seq + 1;
+       char *name = msg("misma-%s.%u", o->name, seq);
+       char *vg_origin = msg("%s/%s", vgname(o->vgid), o->name);
+       char *argv[] = {
+               "lvcreate",
+               "--type",
+               "thin",
+               "--quiet",
+               "--quiet",
+               "-s",
+               "--autobackup",
+               "n",
+               "-n",
+               name,
+               vg_origin,
+               NULL
+       };
+       if (dry_run) {
+               printf("dry-run: would create snapshot #%u of origin %s\n",
+                       seq, vg_origin);
+               free(name);
+               free(vg_origin);
+               return true;
+       }
+       NOTICE_LOG("creating snapshot %s/%s\n", vgname(o->vgid), name);
+       if (!xexec(argv, NULL))
+               die("could not create snapshot");
+       free(name);
+       free(vg_origin);
+       return true;
+}
+
+static void signal_handler(int signo)
+{
+       die("caught signal %d, terminating", signo);
+}
+
+#ifndef FITRIM
+struct fstrim_range {uint64_t start; uint64_t len; uint64_t minlen;};
+#define FITRIM _IOWR('X', 121, struct fstrim_range)
+#endif
+static bool trim_filesystem(struct origin *o, bool dry_run)
+{
+       struct stat sb;
+       char *dev;
+       unsigned majo, mino;
+       int fd;
+       char *buf;
+       struct line_iter liter;
+       char *line, *mp = NULL;
+       struct fstrim_range range = {.len = ULLONG_MAX};
+
+       dev = msg("/dev/%s/%s", vgname(o->vgid), o->name);
+       if (stat(dev, &sb) < 0) {
+               WARNING_LOG("stat(%s): %m\n", dev);
+               free(dev);
+               return false;
+       }
+       if ((sb.st_mode & S_IFMT) != S_IFBLK) {
+               WARNING_LOG("not a block device: %s\n", dev);
+               free(dev);
+               return false;
+       }
+       free(dev);
+       majo = major(sb.st_rdev);
+       mino = minor(sb.st_rdev);
+       fd = open("/proc/self/mountinfo", O_RDONLY);
+       if (fd < 0) {
+               WARNING_LOG("open(/proc/self/mountinfo): %m\n");
+               return false;
+       }
+       if (!fd2buf(fd, &buf)) {
+               WARNING_LOG("fd2buf error\n");
+               close(fd);
+               return false;
+       }
+       close(fd);
+       line_iter_init(&liter, buf);
+       /* 13 15 0:5 / /proc */
+       while ((line = line_iter_get(&liter))) {
+               unsigned id, parent, mmajo, mmino;
+               size_t len = strlen(line);
+               char *mountroot = xmalloc(len), *target = xmalloc(len);
+
+               if (sscanf(line, "%u %u %u:%u %s %s", &id, &parent, &mmajo,
+                               &mmino, mountroot, target) != 6) {
+                       WARNING_LOG("parse mountinfo line: %s\n", line);
+                       free(mountroot);
+                       free(target);
+                       return false;
+               }
+               free(mountroot);
+               if (mmajo == majo && mmino == mino) {
+                       mp = target;
+                       break;
+               }
+               free(target);
+       }
+       free(buf);
+       if (!mp) {
+               WARNING_LOG("unable to find mountpoint of origin\n");
+               return false;
+       }
+       if (dry_run) {
+               printf("%s\n", mp);
+               free(mp);
+               return true;
+       }
+       fd = open(mp, O_RDONLY);
+       if (fd < 0) {
+               WARNING_LOG("open(%s): %m\n", mp);
+               free(mp);
+               return false;
+       }
+       if (ioctl(fd, FITRIM, &range)) {
+               WARNING_LOG("ioctl(FITRIM, %s): %m\n", mp);
+               close(fd);
+               free(mp);
+               return false;
+       }
+       close(fd);
+       NOTICE_LOG("trimmed %s\n", mp);
+       free(mp);
+       return true;
+}
+
+static void set_threshold(const struct threshold_arg *ta)
+{
+       enum lvm_scope scope = ta->lvmspec.scope;
+       unsigned poolid = 0, vgid;
+
+       if (scope == LS_GLOBAL) {
+               global_config.thresholds = ta->threshold;
+               return;
+       }
+       vgid = get_vgid(ta->lvmspec.vg);
+       if (vgid == ~0U)
+               die("invalid vg in lvmspec: %s", ta->lvmspec.vg);
+       if (scope == LS_VG) {
+               volume_group[vgid].config.thresholds = ta->threshold;
+       } else {
+               assert(scope == LS_POOL);
+               poolid = get_poolid(ta->lvmspec.pool, vgname(vgid));
+               if (poolid == ~0U)
+                       die("invalid pool in lvmspec: %s", ta->lvmspec.pool);
+               thin_pool[poolid].config.thresholds = ta->threshold;
+       }
+       /*
+        * Narrow the scope of all matching pools for which it is currently
+        * set to a wider scope.
+        */
+       for (unsigned n = 0; n < num_pools; n++) {
+               struct thin_pool *p = thin_pool + n;
+               if (p->threshold_scope >= scope)
+                       continue; /* already set to more narrow scope */
+               if (vgid != p->vgid)
+                       continue;
+               if (scope == LS_POOL && poolid != n)
+                       continue;
+               NOTICE_LOG("threshold for pool %s/%s: %u/%u\n",
+                       vgname(vgid), p->name, ta->threshold.data,
+                       ta->threshold.meta);
+               p->threshold_scope = scope;
+       }
+}
+
+static void log_event(const void *d)
+{
+       const struct event *e = d;
+
+       if (e->origin)
+               DEBUG_LOG("(%s,%u): %" PRIu64 "\n", e->origin->name,
+                       e->type, e->epoch);
+       else
+               DEBUG_LOG("(utilization): %" PRIu64 "\n", e->epoch);
+}
+
+static unsigned check_run_options(void)
+{
+       struct time_arg ta;
+       const char *arg;
+       unsigned n, num_events = 0;
+
+       for (n = 0; n < OPT_GIVEN(RUN, THRESHOLD); n++) {
+               struct threshold_arg tha;
+               arg = OPT_STRING_VAL_N(n, RUN, THRESHOLD);
+               parse_threshold_arg(arg,"--threshold", &tha);
+               set_threshold(&tha);
+               free_lvmspec(&tha.lvmspec);
+       }
+       if (OPT_GIVEN(RUN, CHECK_INTERVAL)) {
+               arg = OPT_STRING_VAL(RUN, CHECK_INTERVAL);
+               check_seconds = parse_timespec(arg, "check-interval");
+               check_range(check_seconds, 10, 86400, "check-interval");
+       }
+       for (n = 0; n < OPT_GIVEN(RUN, TRIM_INTERVAL); n++) {
+               arg = OPT_STRING_VAL_N(n, RUN, TRIM_INTERVAL);
+               parse_time_arg(arg, "--trim-interval", &ta);
+               if (ta.seconds > 0)
+                       check_range(ta.seconds, 60, ~0U, "trim-interval");
+               set_interval(IT_TRIM, &ta);
+               free_lvmspec(&ta.lvmspec);
+       }
+       for (n = 0; n < OPT_GIVEN(RUN, CREATE_INTERVAL); n++) {
+               arg = OPT_STRING_VAL_N(n, RUN, CREATE_INTERVAL);
+               parse_time_arg(arg, "--create-interval", &ta);
+               check_range(ta.seconds, 60, 86400 * 365, "create-interval");
+               set_interval(IT_CREATE, &ta);
+               free_lvmspec(&ta.lvmspec);
+       }
+       for (n = 0; n < OPT_GIVEN(RUN, MAX_AGE); n++) {
+               arg = OPT_STRING_VAL_N(n, RUN, MAX_AGE);
+               parse_time_arg(arg, "--max-age", &ta);
+               check_range(ta.seconds, 86400, 86400 * 20 * 365, "max-age");
+               set_interval(IT_MAX_AGE, &ta);
+               free_lvmspec(&ta.lvmspec);
+       }
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               uint32_t ma, cr, max_slots; /* max age, create interval */
+
+               INFO_LOG("found %u snapshots of origin %s/%s\n",
+                       o->num_slots, vgname(o->vgid), o->name);
+               /* set number of slots */
+               ma = interval_length(IT_MAX_AGE, o);
+               cr = interval_length(IT_CREATE, o);
+               if (ma / 3 < cr)
+                       die("%s/%s: max-age/create ratio too small",
+                               vgname(o->vgid), o->name);
+               max_slots = 1 + ceil(log2((double)ma / cr + 1));
+               assert(max_slots > 2);
+               assert(max_slots < 30);
+               if (o->num_slots > max_slots)
+                       die("%s/%s: too many snapshots", vgname(o->vgid),
+                               o->name);
+               if (o->num_slots < max_slots) {
+                       unsigned diff = max_slots - o->num_slots;
+                       o->snapshot = xrealloc(o->snapshot, max_slots
+                               * sizeof(struct snapshot));
+                       memset(o->snapshot + o->num_slots, 0,
+                               diff * sizeof(struct snapshot));
+                       o->num_slots = max_slots;
+               }
+               INFO_LOG("%s/%s: using %u slots\n", vgname(o->vgid), o->name,
+                       o->num_slots);
+               if (interval_length(IT_TRIM, o) > 0)
+                       num_events++;
+       }
+       return num_events + 1 + num_origins;
+}
+
+static void dispatch_create_event(struct origin *o)
+{
+       unsigned seq, sl;
+       const struct thin_pool *pool;
+       uint64_t now;
+
+       pool = thin_pool + o->poolid;
+       if (pool_is_full(pool)) {
+               WARNING_LOG("%s/%s: creation suspended\n", vgname(o->vgid),
+                       o->name);
+               return;
+       }
+       seq = o->last_seq + 1, sl = get_slot(seq, o);
+       if (slot_is_used(sl, o) && !remove_snapshot(sl, o, false))
+               die("%s/%s: unable to free slot\n", vgname(o->vgid), o->name);
+       now = time(NULL);
+       create_snapshot(o, false);
+       o->snapshot[sl].seq = seq;
+       o->snapshot[sl].epoch = now;
+       o->last_seq = seq;
+       o->last_event[ET_CREATE] = now;
+}
+
+/* We leak the fd but that's OK as long as we're only called once. */
+static int silence_lvm(void)
+{
+       char *val;
+       int fd = open("/dev/null", O_RDWR);
+
+       if (fd < 0)
+               die_errno("open(/dev/null)");
+       val = msg("%d", fd);
+       setenv("LVM_ERR_FD", val, true /* overwrite */);
+       free(val);
+       return fd;
+}
+
+__attribute__ ((noreturn))
+static bool com_run(void)
+{
+       int fd = -1;
+       unsigned n, num_events;
+       struct event **ep;
+       struct event **event; /* At most 2 * num_origins + 1 */
+       struct heap *event_heap;
+       uint64_t now = time(NULL);
+
+       num_events = check_run_options();
+       event = xmalloc(num_events * sizeof(struct event *));
+       ep = event;
+       (*ep) = xmalloc(sizeof(struct event));
+       (*ep)->type = ET_CHECK;
+       (*ep)->origin = NULL;
+       (*ep)->epoch = 0;
+       log_event(*ep);
+       ep++;
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               (*ep) = xmalloc(sizeof(struct event));
+               (*ep)->type = ET_CREATE;
+               (*ep)->origin = o;
+               (*ep)->epoch = o->last_event[ET_CREATE]
+                       + interval_length(IT_CREATE, o);
+               log_event(*ep);
+               ep++;
+               if (interval_length(IT_TRIM, o) == 0)
+                       continue;
+               (*ep) = xmalloc(sizeof(struct event));
+               (*ep)->type = ET_TRIM;
+               (*ep)->origin = o;
+               (*ep)->epoch = now + interval_length(IT_TRIM, o);
+               log_event(*ep);
+               ep++;
+       }
+       event_heap = heap_init(&event, num_events, event_compare);
+       if (get_misma_pid(config_file) > 0)
+               die("already running");
+       if (OPT_GIVEN(RUN, DAEMON))
+               fd = daemonize(OPT_STRING_VAL(RUN, LOGFILE));
+       if (!misma_lock(config_file))
+               die("already running");
+       if (signal(SIGINT, &signal_handler) == SIG_ERR)
+               die_errno("signal handler for SIGINT");
+       if (signal(SIGTERM, &signal_handler) == SIG_ERR)
+               die_errno("signal handler for SIGTERM");
+       if (signal(SIGHUP, &signal_handler) == SIG_ERR)
+               die_errno("signal handler for SIGHUP");
+       if (fd >= 0) {
+               if (write(fd, "\0", 1) < 0)
+                       die_errno("write");
+               close(fd);
+       }
+       exit_hook = OPT_STRING_VAL(RUN, EXIT_HOOK);
+       if (OPT_GIVEN(RUN, SUPPRESS_LVM_WARNINGS))
+               silence_lvm();
+       for (;;) {
+               struct event *e = heap_min(event_heap);
+               struct origin *o;
+
+               now = time(NULL);
+               if (e->epoch > now) {
+                       INFO_LOG("sleeping %" PRIu64 " seconds\n",
+                               e->epoch - now);
+                       sleep(e->epoch - now);
+                       continue;
+               }
+               e = heap_extract_min(event_heap);
+               o = e->origin;
+               switch (e->type) {
+               case ET_CHECK:
+                       INFO_LOG("next event: check\n");
+                       check_utilization();
+                       now = time(NULL);
+                       e->epoch = now + check_seconds;
+                       break;
+               case ET_TRIM:
+                       INFO_LOG("next event: trim %s/%s\n",
+                               vgname(o->vgid), o->name);
+                       trim_filesystem(o, false /* dry-run */);
+                       e->origin->last_event[ET_TRIM] = now;
+                       e->epoch = now + interval_length(IT_TRIM, o);
+                       break;
+               case ET_CREATE:
+                       INFO_LOG("next event: create %s/%s\n", vgname(o->vgid),
+                               o->name);
+                       dispatch_create_event(o);
+                       e->epoch = now + interval_length(IT_CREATE, o);
+                       break;
+               default: assert(0);
+               }
+               heap_insert(e, event_heap);
+               heap_dump(event_heap, log_event);
+               sleep(3);
+       }
+}
+EXPORT_CMD_HANDLER(run);
+
+static void seconds_to_human(int64_t diff, char *buf)
+{
+       if (diff > 2 * 86400 * 365)
+               sprintf(buf, "%3" PRId64 " years  ", diff / (86400 * 365));
+       else if (diff > 2 * 86400 * 60)
+               sprintf(buf, "%3" PRId64 " months ", diff / (86400 * 60));
+       else if (diff > 2 * 86400 * 7)
+               sprintf(buf, "%3" PRId64 " weeks  ", diff / (86400 * 7));
+       else if (diff > 2 * 86400)
+               sprintf(buf, "%3" PRId64 " days   ", diff / 86400);
+       else if (diff > 2 * 3600)
+               sprintf(buf, "%3" PRId64 " hours  ", diff / 3600);
+       else if (diff > 2 * 60)
+               sprintf(buf, "%3" PRId64 " minutes", diff / 60);
+       else
+               sprintf(buf, "%3" PRId64 " second%s", diff, diff == 1? "" : "s");
+}
+
+static bool origin_matches_lvmspec(const struct origin *o,
+               const struct lvmspec *spec)
+{
+       if (spec->scope == LS_GLOBAL)
+               return true;
+       if (strcmp(spec->vg, vgname(o->vgid)))
+               return false;
+       if (spec->scope == LS_VG)
+               return true;
+       if (spec->scope == LS_ORIGIN)
+               return !strcmp(spec->tlv, o->name);
+       return !strcmp(spec->pool, thin_pool[o->poolid].name);
+}
+
+static bool for_each_matching_origin(bool (*func)(struct origin *, bool),
+               bool dry_run)
+{
+       unsigned k, n, num_args = lls_num_inputs(sublpr);
+       struct lvmspec *spec = NULL; /* STFU gcc-12.3.0 */
+       bool match = false;
+
+       if (num_args > 0)
+               spec = xmalloc(num_args * sizeof(*spec));
+       for (k = 0; k < num_args; k++)
+               parse_lvmspec(lls_input(k, sublpr), "create/rm", spec + k);
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               for (k = 0; k < num_args; k++)
+                       if (origin_matches_lvmspec(o, spec + k))
+                               break;
+               if (num_args == 0 || k < num_args) {
+                       func(o, dry_run);
+                       match = true;
+               }
+       }
+       free(spec);
+       if (!match && num_args > 0)
+               printf("no matches\n");
+       return match;
+}
+
+static bool list_snapshots(struct origin *o, bool l_given)
+{
+       if (!l_given)
+               printf("%s/%s:\n", vgname(o->vgid), o->name);
+       FOR_EACH_SLOT_REVERSE(sl, o) {
+               char buf[32];
+               struct tm *tm;
+               struct snapshot *s = o->snapshot + sl;
+               time_t t;
+
+               assert(slot_is_used(sl, o));
+               if (l_given) {
+                       printf("/dev/%s/misma-%s.%u\t", vgname(o->vgid),
+                               o->name, s->seq);
+                       t = s->epoch;
+                       tm = localtime(&t);
+                       strftime(buf, sizeof(buf), "%F %R", tm);
+                       printf("%s", buf);
+               } else
+                       printf("%8u ", s->seq);
+               t = time(NULL);
+               seconds_to_human(t - s->epoch, buf);
+               printf("  %s\n", buf);
+       }
+       return true;
+}
+
+static bool com_ls(void)
+{
+       return for_each_matching_origin(list_snapshots,
+               OPT_GIVEN(LS, LONG));
+}
+EXPORT_CMD_HANDLER(ls);
+
+static bool com_create(void)
+{
+       if (!misma_lock(config_file))
+               die("already running");
+       return for_each_matching_origin(create_snapshot,
+               OPT_GIVEN(CREATE, DRY_RUN));
+}
+EXPORT_CMD_HANDLER(create);
+
+static bool com_rm(void)
+{
+       if (!misma_lock(config_file))
+               die("already running");
+       return for_each_matching_origin(remove_sleazy_snapshot,
+               OPT_GIVEN(RM, DRY_RUN));
+}
+EXPORT_CMD_HANDLER(rm);
+
+static bool com_kill(void)
+{
+       pid_t pid;
+       unsigned sig = OPT_UINT32_VAL(KILL, SIGNAL);
+       unsigned ms = 32;
+
+       pid = get_misma_pid(config_file);
+       if (pid == 0)
+               die("no misma run process to send signal to");
+       NOTICE_LOG("sending signal %u to pid %d\n", sig, pid);
+       if (kill(pid, sig) < 0)
+               die_errno("kill");
+       if (!OPT_GIVEN(KILL, WAIT))
+               return true;
+       while (ms < 5000) {
+               struct timespec ts = {
+                       .tv_sec = ms / 1000,
+                       .tv_nsec = (ms % 1000) * 1000 * 1000
+               };
+               if (nanosleep(&ts, NULL) < 0)
+                       return false;
+               if (kill(pid, 0) < 0)
+                       return errno == ESRCH;
+               ms *= 2;
+       }
+       return false;
+}
+EXPORT_CMD_HANDLER(kill);
+
+#define LSG_MISMA_CMD(_name) #_name
+static const char * const subcommand_names[] = {LSG_MISMA_SUBCOMMANDS NULL};
+#undef LSG_MISMA_CMD
+
+static void show_subcommand_summary(bool verbose)
+{
+       int i;
+
+       printf("Available subcommands:\n");
+       if (verbose) {
+               const struct lls_command *cmd;
+               for (i = 1; (cmd = lls_cmd(i, misma_suite)); i++) {
+                       const char *purpose = lls_purpose(cmd);
+                       const char *name = lls_command_name(cmd);
+                       printf("%-12s%s\n", name, purpose);
+               }
+       } else {
+               unsigned n = 8;
+               printf("\t");
+               for (i = 0; i < LSG_NUM_MISMA_SUBCOMMANDS; i++) {
+                       if (i > 0)
+                               n += printf(", ");
+                       if (n > 70) {
+                               printf("\n\t");
+                               n = 8;
+                       }
+                       n += printf("%s", subcommand_names[i]);
+               }
+               printf("\n");
+       }
+}
+
+static bool com_trim(void)
+{
+       if (!misma_lock(config_file))
+               die("already running");
+       return for_each_matching_origin(trim_filesystem,
+               OPT_GIVEN(TRIM, DRY_RUN));
+}
+EXPORT_CMD_HANDLER(trim);
+
+static bool com_help(void)
+{
+       int ret;
+       char *errctx, *help;
+       const char *arg;
+       const struct lls_command *cmd;
+
+       ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       if (lls_num_inputs(sublpr) == 0) {
+               show_subcommand_summary(OPT_GIVEN(HELP, LONG));
+               return true;
+       }
+       arg = lls_input(0, sublpr);
+       ret = lls_lookup_subcmd(arg, misma_suite, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       cmd = lls_cmd(ret, misma_suite);
+       if (OPT_GIVEN(HELP, LONG))
+               help = lls_long_help(cmd);
+       else
+               help = lls_short_help(cmd);
+       printf("%s\n", help);
+       free(help);
+       return true;
+}
+EXPORT_CMD_HANDLER(help);
+
+static bool com_configtest(void)
+{
+       printf("Syntax Ok\n");
+       return true;
+}
+EXPORT_CMD_HANDLER(configtest);
+
+static bool com_utilization(void)
+{
+       get_utilization();
+       for (unsigned n = 0; n < num_pools; n++) {
+               struct thin_pool *p = thin_pool + n;
+               printf("%s/%s: %u%%/%u%%\n",
+                       vgname(p->vgid), p->name, p->utilization.data,
+                       p->utilization.meta);
+       }
+       return true;
+}
+EXPORT_CMD_HANDLER(utilization);
+
+const char *GET_VERSION(void);
+static void handle_version_and_help(void)
+{
+       char *help;
+
+       if (OPT_GIVEN(MISMA, VERSION)) {
+               printf(PACKAGE " %s\n"
+                       "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
+                       "License: " LICENSE ": <" LICENSE_URL ">.\n"
+                       "This is free software: you are free to change and redistribute it.\n"
+                       "There is NO WARRANTY, to the extent permitted by law.\n"
+                       "\n"
+                       "Web page: " URL "\n"
+                       "Clone URL: " CLONE_URL "\n"
+                       "Gitweb: " GITWEB_URL "\n"
+                       "Author's Home Page: " HOME_URL "\n"
+                       "Send feedback to: " AUTHOR " <" EMAIL ">\n"
+                       ,
+                       GET_VERSION()
+               );
+               exit(EXIT_SUCCESS);
+       }
+       if (OPT_GIVEN(MISMA, DETAILED_HELP))
+               help = lls_long_help(CMD_PTR(MISMA));
+       else if (OPT_GIVEN(MISMA, HELP))
+               help = lls_short_help(CMD_PTR(MISMA));
+       else
+               return;
+       printf("%s\n", help);
+       free(help);
+       exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+       unsigned num_inputs;
+       int ret;
+       char *errctx;
+       const struct misma_user_data *ud;
+
+       valid_fd012();
+       parse_options(argc, argv, CMD_PTR(MISMA), &lpr);
+       loglevel_arg_val = OPT_UINT32_VAL(MISMA, LOGLEVEL);
+       handle_version_and_help();
+       num_inputs = lls_num_inputs(lpr);
+       if (num_inputs == 0) {
+               show_subcommand_summary(true /* verbose */);
+               exit(EXIT_SUCCESS);
+       }
+       ret = lls_lookup_subcmd(argv[argc - num_inputs], misma_suite, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       subcmd = lls_cmd(ret, misma_suite);
+       parse_options(num_inputs, argv + argc - num_inputs, subcmd, &sublpr);
+       if (subcmd != CMD_PTR(HELP))
+               init_origins();
+       ud = lls_user_data(subcmd);
+       exit(ud->handler()? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/misma.h b/misma.h
new file mode 100644 (file)
index 0000000..fceab34
--- /dev/null
+++ b/misma.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <time.h>
+
+#include "config.h"
+
+__attribute__ ((warn_unused_result))
+void *xrealloc(void *p, size_t size);
+
+__attribute__ ((warn_unused_result))
+void *xmalloc(size_t size);
+
+__attribute__ ((warn_unused_result))
+void *xzmalloc(size_t size);
+
+void *xstrdup(const char *s);
+
+__attribute__ ((format (printf, 1, 2))) __attribute__ ((warn_unused_result))
+char *msg(const char *fmt, ...);
+
+__attribute__ ((noreturn))
+void die_empty_arg(const char *opt);
+
+__attribute__ ((noreturn))
+void die_range(const char *opt);
+
+void check_range(uint32_t val, uint32_t min, uint32_t max, const char *opt);
+bool xexec(char * const argv[], char **buf);
+
+enum lvm_scope {
+       LS_GLOBAL, LS_VG, LS_POOL, LS_ORIGIN
+};
+struct lvmspec {
+       enum lvm_scope scope;
+       char *vg, *pool, *tlv;
+};
+void parse_lvmspec(const char *arg, const char *context,
+               struct lvmspec *result);
+void free_lvmspec(struct lvmspec *spec);
+struct percentage_pair {
+       uint8_t data, meta;
+};
+struct threshold_arg {
+       struct lvmspec lvmspec;
+       struct percentage_pair threshold;
+};
+void parse_threshold_arg(const char *arg, const char *context,
+               struct threshold_arg *result);
+struct time_arg {
+       struct lvmspec lvmspec;
+       uint32_t seconds;
+};
+unsigned parse_timespec(const char *spec, const char *context);
+void parse_time_arg(const char *arg, const char *context,
+                struct time_arg *result);
+
+void valid_fd012(void);
+int daemonize(const char *logfile);
+bool misma_lock(const char *string);
+pid_t get_misma_pid(const char *string);
+struct line_iter {
+       char *base;
+       char *line;
+};
+void line_iter_init(struct line_iter *liter, char *text);
+char *line_iter_get(struct line_iter *liter);
+bool fd2buf(int fd, char **buf);
+
+enum loglevels {LOGLEVELS, NUM_LOGLEVELS};
+
+__attribute__ ((format (printf, 2, 3)))
+void misma_log(int ll, const char* fmt,...);
+
+#define DEBUG_LOG(f,...) misma_log(LL_DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define INFO_LOG(f,...) misma_log(LL_INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define NOTICE_LOG(f,...) misma_log(LL_NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define WARNING_LOG(f,...) misma_log(LL_WARNING, "%s: " f, __FUNCTION__, ##  __VA_ARGS__)
+#define ERROR_LOG(f,...) misma_log(LL_ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define CRIT_LOG(f,...) misma_log(LL_CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define EMERG_LOG(f,...) misma_log(LL_EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+
+__attribute__ ((noreturn))
+__attribute__ ((format (printf, 1, 2)))
+void die(const char *fmt, ...);
+
+__attribute__ ((noreturn))
+__attribute__ ((format (printf, 1, 2)))
+void die_errno(const char *fmt, ...);
+
+struct heap;
+struct heap *heap_init(void *array, unsigned num_elements,
+       int (*compare)(const void *data1, const void *data2));
+unsigned heap_num_elements(const struct heap *h);
+void heap_insert(void *new_element, struct heap *h);
+void *heap_min(const struct heap *h);
+void *heap_extract_min(struct heap *h);
+void heap_dump(const struct heap *h, void (*dumper)(const void *));
diff --git a/misma.suite.m4 b/misma.suite.m4
new file mode 100644 (file)
index 0000000..35c702a
--- /dev/null
@@ -0,0 +1,614 @@
+# SPDX-License-Identifier: GPL-2.0+
+[suite misma]
+       caption = Subcommands
+       mansect = 8
+       manual_title = System Manager's Manual
+[supercommand misma]
+       [description]
+               DESCRIPTION1()
+
+               DESCRIPTION2()
+
+               DESCRIPTION3()
+       [/description]
+       synopsis = [global-options...] [--] [<subcommand> [subcommand-options...]]
+       purpose = SLOGAN()
+
+       [option title-text]
+               summary = General options
+               flag ignored
+       [option help]
+               summary = print help and exit
+               short_opt = h
+       [option detailed-help]
+               summary = print help, including all details, and exit
+       [option version]
+               summary = print version and exit
+               short_opt = V
+       [option loglevel]
+               summary = control amount of logging
+               short_opt = l
+               arg_info = required_arg
+               arg_type = string
+               typestr = severity
+               values = {
+                       LSGLL_DEBUG = "debug",
+                       LSGLL_INFO = "info",
+                       LSGLL_NOTICE = "notice",
+                       LSGLL_WARNING = "warning",
+                       LSGLL_ERROR = "error",
+                       LSGLL_CRIT = "crit",
+                       LSGLL_EMERG = "emerg"
+               }
+               default_val = warning
+               [help]
+                       Log only messages with severity greater or equal than the given
+                       value. Possible values:
+
+                       debug: produces really noisy output.
+                       info: still noisy, but won't fill up the disk quickly.
+                       notice: indicates normal, but significant event.
+                       warning: unexpected events that can be handled.
+                       error: unhandled error condition.
+                       crit: system might be unreliable.
+                       emerg: last message before exit.
+               [/help]
+       [option config-file]
+               short_opt = c
+               summary = use alternative config file (default: ~/.mismarc)
+               typestr = path
+               arg_info = required_arg
+               arg_type = string
+               [help]
+                       Options may be given at the command line or in the configuration
+                       file. As usual, if an option is given both at the command line and
+                       in the configuration file, the command line option takes precedence.
+
+                       The config file may contain global options as well as options for
+                       any subcommand, but subcommand specific options must be placed in a
+                       separate section. See the Examples section of the man page.
+               [/help]
+
+       [option title-text]
+               summary = LVM options
+               flag ignored
+       [option origin]
+               summary = the VG and the thin LV to snapshot
+               typestr = vg/tlv
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               [help]
+                       The named volume group must exist and it must contain the named thin
+                       logical volume. This option may be given multiple times where each
+                       instance corresponds to one origin to snapshot.
+               [/help]
+[introduction]
+       Misma supports the subcommands described below. If no subcommand is
+       given, the list of available subcommands is shown and the program
+       terminates successfully without performing any further action.
+[/introduction]
+
+[subcommand run]
+       purpose = create and prune snapshots, discard unused blocks
+       [description]
+               This is the main mode of operation. Snapshots are created and pruned
+               periodically, the thin pool utilization is monitored and filesystem
+               trims are scheduled as configured. The subcommand terminates only on
+               fatal errors or after a terminating signal was received.
+       [/description]
+       [option daemon]
+               short_opt = d
+               summary = run as background daemon
+               [help]
+                       If this option is given, the process detaches from the console and
+                       continues to run in the background.
+               [/help]
+       [option logfile]
+               short_opt = l
+               summary = where to write log output
+               arg_info = required_arg
+               arg_type = string
+               typestr = path
+               default_val = /dev/null
+               [help]
+                       This option is only honored if --daemon is given, in which case
+                       log messages go to the given file. Otherwise the option is silently
+                       ignored and log output is written to stderr.
+               [/help]
+       [option create-interval]
+               summary = Time span between two subsequent snapshots
+               typestr = [lvmspec:]timespec
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 6h
+               [help]
+                       The lvm specifier determines to which origins this instance of the
+                       option applies. If no specifier is given, the option applies to all
+                       origins. Otherwise the specifier may be in one of the following forms:
+                       <vg>: applies to all origins in VG vg, <vg|pool>: applies to all
+                       origins in thin pool <pool> of VG vg, or <vg/tlv>: applies to origin
+                       tlv of vg only. If more than one specifier match a particular origin,
+                       the narrowest scoped one applies. The order of precedence is therefore
+                       <vg/tlv>, <vg|pool>, <vg>, <global>.
+
+                       The time specifier is an unsigned integer which is followed by a time
+                       unit, a single character of the set {s,m,h,d,y} for seconds, minutes,
+                       hours, days, and years.
+               [/help]
+       [option max-age]
+               summary = age of the oldest snapshot to keep
+               typestr = [lvmspec:]timespec
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 1y
+               [help]
+                       See --create-interval for the format of the lvm and time specifiers.
+               [/help]
+       [option check-interval]
+               summary = the time period between two utilization checks
+               typestr = timespec
+               arg_info = required_arg
+               arg_type = string
+               default_val = 1m
+               [help]
+                       The utilization of all thin pools which contain at least one thin
+                       logical volume specified as an argument to --origin are checked
+                       periodically. See --create-interval for the format of the time
+                       specifier.
+               [/help]
+       [option threshold]
+               summary = high watermarks for snapshot removal (1-99)
+               typestr = [lvmspec:]data_threshold,meta_threshold
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 95,95
+               [help]
+                       The threshold part of the argument is a comma-separated pair of
+                       percentages between 1 and 99, inclusively. If the percentage of used
+                       space in the data/metadata logical volume of the thin pool exceeds
+                       the corresponding threshold value, forced snapshot removal kicks in
+                       to bring back the utilization below the thresholds.
+
+                       The format of the lvm specifier is described in the help text of
+                       --create-interval. However, since the utilization is a property
+                       of the pool, arguments of the form <vg/tlv> make no sense and are
+                       therefore rejected.
+               [/help]
+       [option trim-interval]
+               summary = discard unused blocks periodically
+               typestr = [lvmspec:]timespec
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 0
+               [help]
+                       The argument specifies the duration between two successive trims. The
+                       default value of zero deactivates this feature.
+
+                       Trimming is performed in the same way as for the trim subcommand.
+                       Errors related to trimming are logged but are otherwise ignored.
+
+                       See --create-interval for the format of the specifiers.
+               [/help]
+       [option exit-hook]
+               summary = command to be executed before exit
+               typestr = command
+               arg_info = required_arg
+               arg_type = string
+               default_val = true
+               [help]
+                       One possible application for this hook is to inform system manager
+                       that no more snapshots are going to be created.
+
+                       A (quoted) string which describes the error that caused the termination is
+                       appended to the given command and the resulting string is passed as a single
+                       argument to /bin/sh -c.
+               [/help]
+       [option suppress-lvm-warnings]
+               summary = quieten lvcreate(8) and lvremove(8)
+               [help]
+                       suppress
+               [/help]
+[subcommand create]
+       purpose = create a snapshot of each matching origin
+       non-opts-name = [<lvmspec>]...
+       [description]
+               This creates one snapshot of each origin which matches the given lvm
+               specifier, ignoring creation intervals, maximal age and utilization
+               thresholds. If no specifiers are given, all origins are regarded as
+               matching so that one snapshot of each configured origin is created.
+
+               The subcommand fails if another "run", "create", or "remove" command
+               is currently running.
+       [/description]
+       [option dry-run]
+               short_opt = n
+               summary = just print which snapshot would be created
+[subcommand rm]
+       purpose = remove one snapshot of each matching origin
+       non-opts-name = [<lvmspec>]...
+       [description]
+               The remarks stated in the description of the "create" subcommand apply
+               for this subcommand as well.
+       [/description]
+       [option dry-run]
+               short_opt = n
+               summary = just print which snapshot would get removed
+[subcommand ls]
+       purpose = print the snapshot list of each origin
+       non-opts-name = [<lvmspec>]...
+       [description]
+               The list is sorted by snapshot creation date.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = use long listing format
+               [help]
+                       The default output mode lists only the sequence number and the age
+                       of each snapshot as human readable text. This option adds additional
+                       output.
+               [/help]
+[subcommand kill]
+       purpose = signal another misma process
+       [description]
+               This sends a signal to the misma "run" process.
+       [/description]
+       [option signal]
+               short_opt = s
+               summary = send the given signal rather than SIGTERM
+               typestr = signal_number
+               arg_info = required_arg
+               arg_type = uint32
+               default_val = 15
+               [help]
+                       The standard Unix semantics apply if the specified signal number
+                       is zero. That is, no signal is actually sent, and the subcommand
+                       exits successfully only if a misma "run" process exists.
+               [/help]
+       [option wait]
+               short_opt = w
+               summary = wait until the signalled process has terminated
+               [help]
+                       This option is handy for system shutdown scripts which would like
+                       to terminate the misma daemon process.
+
+                       Without --wait the misma process which executes the kill subcommand
+                       exits right after the kill(2) system call returns. At this point the
+                       signalled process might still be alive (even if SIGKILL was sent).
+                       If --wait is given, the process waits until the signalled process
+                       has terminated or the timeout expires.
+
+                       If --wait is not given, the kill subcommand exits successfully if
+                       and only if the signal was sent (i.e., if there exists another misma
+                       process to receive the signal). With --wait it exits successfully
+                       if, additionally, the signalled process has terminated before the
+                       timeout expires.
+
+                       It makes only sense to use the option for signals which terminate
+                       the misma process.
+               [/help]
+[subcommand trim]
+       purpose = discard unused blocks of origin LVs
+       non-opts-name = [<lvmspec>]...
+       [description]
+               Each matching origin LV is expected to contain a mounted and writable
+               filesystem. The subcommand is equivalent to running fstrim(8) on
+               the mountpoints of these filesystems. The full block range of each
+               origin LV is taken into account and the default minimal block size for
+               discards is used. This corresponds to the default values of fstrim(8).
+       [/description]
+       [option dry-run]
+               short_opt = n
+               summary = print the mount points, but do not trim
+               [help]
+                       In dry-run mode the mount points are determined as usual, but the
+                       command exits without starting any trim operation.
+               [/help]
+[subcommand help]
+       purpose = list available subcommands or print subcommand-specific help
+       non-opts-name = [subcommand]
+       [description]
+               Without any arguments, help prints the list of available
+               subcommands. When called with a subcommand name argument, it prints
+               the help text of the given subcommand.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = show the long help text
+               [help]
+                       If the optional argument is supplied, the long help text contains the
+                       synopsis, the purpose and the description of the specified subcommand,
+                       followed by the option list including summary and help text of each
+                       option. Without --long, the short help is shown instead. This omits
+                       the description of the subcommand and the option help.
+
+                       If no subcommand is supplied but --long is given, the list contains the
+                       purpose of each subcommand.
+               [/help]
+[subcommand utilization]
+       purpose = show thin pool utilization
+       [description]
+               This prints the percentage of used blocks in the data and metadata
+               logical volumes of each pool.
+       [/description]
+[subcommand configtest]
+       purpose = run a configuration file syntax test
+       [description]
+               This subcommand checks the command line options and the configuration
+               file for syntactic correctness. It either reports "Syntax Ok" and
+               exits successfully or prints information about the first syntax error
+               detected and terminates with exit code 1.
+       [/description]
+
+[section Notes]
+.SS Naming
+       Snapshots created by misma are named
+       .IR misma-origin.seq ,
+       where
+       .I origin
+       is the name of the thin logical volume (i.e., the second component
+       of the argument to
+       .I --origin)
+       and
+       .I seq
+       is a sequence number.
+.SS Snapshot Replacement Strategy
+       Assume that the arguments
+       to
+       .I --create-interval
+       and
+       .I --max-age
+       correspond to
+       .I d
+       minutes and
+       .I m
+       days, respectively. These two quantities determine the length
+       .I n
+       of a sequence of snapshots such that
+       .IP \(bu 2
+               the first two snapshots are
+       .I d
+       minutes apart,
+       .IP \(bu 2
+               the difference of the creation times between two consecutive snapshots
+               doubles at each step,
+       .IP \(bu 2
+               the first and the last snapshot are at least
+       .I m
+       days apart.
+       .P
+       At startup,
+       .B misma
+       maps each existing snapshot to a slot in an array
+       of length
+       .IR n .
+       When a new snapshot has to be created and not all slots are mapped
+       yet, the new snapshot is mapped to an unmapped slot. If all slots
+       are mapped, an existing snapshot is removed first and its slot is
+       reused. The slot number of the snapshot to be replaced is computed as
+       .B ffz(seq % (2^n - 1)),
+       where
+       .I seq
+       is the sequence number of the new snapshot, and
+       .B ffz(x)
+       is the first zero in
+       the binary representation of
+       .IR x .
+       By properties of the
+       .B ffz()
+       function, the frequency at which a slot gets reused halves at each
+       step: the snapshot in slot 0 gets reused (roughly) every second time,
+       the snapshot in slot one every fourth time, and so on.
+.SS Forced Snapshot Removal
+       In addition to the normal snapshot removal which takes place when a
+       slot gets reused as described above, snapshots are
+       .I force-removed
+       when the utilization of a thin pool exceeds its configured
+       thresholds. One snapshot is removed from each affected origin until
+       the utilization drops below the thresholds. If the utilization still
+       exceeds the thresholds after all snapshots have been removed, snapshot
+       creation is suspended.
+       .P
+       Forced removal
+       reliably prevents data and metadata exhaustion if the pool is
+       not overbooked. That is, if the sum of the (virtual) sizes of the
+       non-snapshot logical volumes is smaller than the pool size.
+.SS Trimming
+       The trim operation instructs a mounted filesystem to identify blocks
+       which are currently not in use and to pass this information to the
+       underlying block device driver. For a configured misma origin, this
+       driver is
+       .BR dm-thin ,
+       which keeps track of the used and unused blocks of each thin pool.
+       The blocks which are freed by the trim operation become available
+       for subsequent snapshots.
+
+       A one-shot trim operation is started by invoking the
+       .B trim
+       subcommand while periodic trims may be configured via the
+       .I --trim-interval
+       option of the
+       .B run
+       subcommand.
+
+       Trimming is implemented by issuing the
+       .I FITRIM
+       ioctl on the mount point, which is identical to how the
+       .BR fstrim (8)
+       command works. The mount point is determined from the major and minor
+       device numbers of the block special of the origin by parsing
+       .IR /proc/self/mountinfo .
+.SS Activating and Mounting Snapshots
+       Since thin provisioned snapshots have the
+       .I activation-skip
+       flag set, one must first
+       .I activate
+       the snapshot logical volume to create the corresponding device node.
+
+       Moreover, the XFS filesystem driver refuses to mount a block device
+       which contains a UUID that is identical to the UUID of an already
+       mounted filesystem. To mount a snapshot of an XFS filesystem, one
+       must therefore tell XFS to skip the UUID check.
+
+       See the examples below for suitable command line options for
+       .BR lvchange (8)
+       and
+       . BR mount (8).
+
+       Since logical volumes which contain a mounted filesystem cannot be
+       removed, a thin pool which is not overbooked may still run out of
+       space when one of its snapshot logical volumes is still mounted. It
+       is therefore good practice to activate and mount snapshots only for
+       as long as necessary.
+[/section]
+
+[section Examples]
+       .IP \(bu 2
+       Create a 1T large thin pool named
+       .I tp
+       in the volume group
+       .IR vg :
+
+       .RS 6
+       .EX
+               .B lvcreate \-\-type thin\-pool \-L 1T \-\-poolmetadatasize 16G \-n tp vg
+       .EE
+       .RE
+       .IP \(bu 2
+       Create the thin logical volume
+       .I tlv
+       of virtual size 100G in the thin pool
+       .IR tp :
+
+       .RS 6
+       .EX
+               .B lvcreate \-\-thin \-n tlv \-\-virtualsize 100G \-\-thinpool vg/tp
+       .EE
+       .RE
+       .IP \(bu 2
+       Run
+       .B misma
+       to create snapshots of the logical volume
+       .IR tlv ,
+       using default values:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv run
+       .EE
+       .RE
+       .IP \(bu 2
+       Same as before, but run
+       .B misma
+       as a background daemon to create a snapshot every hour:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv \-\-create-interval 1h \-\- run \-d
+       .EE
+       .RE
+       .IP \(bu 2
+       List all snapshots created so far:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv \-\- ls \-l
+       .EE
+       .RE
+       .IP \(bu 2
+       Run
+       .B lvs
+       to print similar information:
+
+       .RS 6
+       .EX
+               .B vg=vg; o=tlv
+               .B lvs -o 'lv_path,lv_attr,lv_time,origin' \[rs]
+               .B \~ \-S \[dq]vg_name = $vg && origin = $o\[dq] \[rs]
+               .B \~ \-\-config \[dq]report/time_format='%F %R'\[dq]
+       .EE
+       .RE
+       .IP \(bu 2
+       Activate snapshot number 42:
+
+       .RS 6
+       .EX
+               .B lvchange \-\-ignoreactivationskip \-\-activate y vg/misma-tlv.42
+       .EE
+       .RE
+       .IP \(bu 2
+       Mount an active snapshot which contains an XFS filesystem:
+
+       .RS 6
+       .EX
+               .B mount /dev/vg/misma-tlv.42 \-o nouuid /mnt
+       .EE
+       .RE
+       .IP \(bu 2
+       Terminate the
+       .B misma
+       daemon process:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv kill
+       .EE
+       .RE
+       .IP \(bu 2
+       A simple config file:
+
+       .RS 6
+       .EX
+               # global options
+               origin vg/tlv
+               loglevel info
+               # an option for the "run" subcommand
+               [run]
+                   logfile /var/log/misma.log
+       .EE
+       .RE
+
+[/section]
+
+[section copyright]
+       Written by AUTHOR()
+       .br
+       Copyright (C) COPYRIGHT_YEAR() AUTHOR()
+       .br
+       License: LICENSE()
+       .br
+       This is free software: you are free to change and redistribute it.
+       .br
+       There is NO WARRANTY, to the extent permitted by law.
+       .P
+       Web page:
+       .UR URL()
+       .UE
+       .br
+       Git clone `URL':
+       .UR CLONE_URL()
+       .UE
+       .br
+       Gitweb:
+       .UR GITWEB_URL()
+       .UE
+       .br
+       Author's home page:
+       .UR HOME_URL()
+       .UE
+       .br
+       Report bugs to
+       .MT EMAIL()
+       AUTHOR()
+       .ME
+[/section]
+[section see also]
+       .BR lvm (8),
+       .BR fstrim (8),
+       .BR lvmthin (7),
+       .BR dss (1)
+[/section]
diff --git a/util.c b/util.c
new file mode 100644 (file)
index 0000000..10972da
--- /dev/null
+++ b/util.c
@@ -0,0 +1,562 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#include "misma.h"
+
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+void *xrealloc(void *p, size_t size)
+{
+       assert(size > 0);
+       assert((p = realloc(p, size)));
+       return p;
+}
+
+void *xmalloc(size_t size)
+{
+       return xrealloc(NULL, size);
+}
+
+void *xzmalloc(size_t size)
+{
+       void *p = xrealloc(NULL, size);
+       memset(p, 0, size);
+       return p;
+}
+
+void *xstrdup(const char *s)
+{
+       char *ret = strdup(s? s: "");
+
+       assert(ret);
+       return ret;
+}
+
+char *msg(const char *fmt, ...)
+{
+       char *m;
+       size_t size = 100;
+
+       m = xmalloc(size);
+       while (1) {
+               int n;
+               va_list ap;
+
+               /* Try to print in the allocated space. */
+               va_start(ap, fmt);
+               n = vsnprintf(m, size, fmt, ap);
+               va_end(ap);
+               /* If that worked, return the string. */
+               if (n < size)
+                       return m;
+               /* Else try again with more space. */
+               size = n + 1; /* precisely what is needed */
+               m = xrealloc(m, size);
+       }
+}
+
+bool fd2buf(int fd, char **buf)
+{
+       ssize_t ret, nread = 0, sz = 100;
+
+       *buf = xmalloc(sz);
+       for (;;) {
+               ret = read(fd, *buf + nread, sz - nread - 1);
+               if (ret < 0) {
+                       if (errno == EAGAIN || errno == EINTR)
+                               continue;
+                       ERROR_LOG("read error: %s\n", strerror(errno));
+                       return false;
+               }
+               if (ret == 0) {
+                       (*buf)[nread] = '\0';
+                       return true;
+               }
+               nread += ret;
+               if (nread >= sz - 1) {
+                       sz *= 2;
+                       *buf = xrealloc(*buf, sz);
+               }
+       }
+}
+
+bool xexec(char * const argv[], char **buf)
+{
+       pid_t pid;
+       int pipefd[2] = {-1, -1};
+       unsigned n;
+
+       for (n = 0; argv[n]; n++)
+               DEBUG_LOG("argv[%u]=%s\n", n, argv[n]);
+       if (buf) {
+               if (pipe(pipefd) < 0)
+                       die_errno("pipe");
+       }
+       if ((pid = fork()) < 0)
+               die_errno("fork");
+       if (pid > 0) { /* parent */
+               int wstatus;
+               bool success = true;
+               if (buf) {
+                       close(pipefd[1]);
+                       success = fd2buf(pipefd[0], buf);
+                       close(pipefd[0]);
+               }
+               if (waitpid(pid, &wstatus, 0) < 0)
+                       die_errno("waitp");
+               if (!success)
+                       return false;
+               if (!WIFEXITED(wstatus))
+                       return false;
+               if (WEXITSTATUS(wstatus) != EXIT_SUCCESS)
+                       return false;
+               return true;
+       }
+       if (pipefd[0] >= 0)
+               close(pipefd[0]);
+       if (pipefd[1] >= 0 && pipefd[1] != STDOUT_FILENO) {
+               if (dup2(pipefd[1], STDOUT_FILENO) < 0)
+                       die_errno("dup2()");
+               close(pipefd[1]);
+       }
+       execvp(argv[0], argv);
+       EMERG_LOG("execvp error: %s\n", strerror(errno));
+       _exit(EXIT_FAILURE);
+}
+
+void die_empty_arg(const char *opt)
+{
+       die("argument to --%s must not be empty", opt);
+}
+
+void die_range(const char *opt)
+{
+       die("argument to --%s is out of range", opt);
+}
+
+void check_range(uint32_t val, uint32_t min, uint32_t max, const char *opt)
+{
+       if (val < min || val > max)
+               die_range(opt);
+}
+
+static uint32_t atou32(const char *str, const char *opt)
+{
+       char *endptr;
+       long long tmp;
+
+       errno = 0; /* To distinguish success/failure after call */
+       tmp = strtoll(str, &endptr, 10);
+       if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
+               die_range(opt);
+       if (tmp < 0 || tmp > (uint32_t)-1)
+               die_range(opt);
+       /*
+        * If there were no digits at all, strtoll() stores the original value
+        * of str in *endptr.
+        */
+       if (endptr == str)
+               die_empty_arg(opt);
+       /*
+        * The implementation may also set errno and return 0 in case no
+        * conversion was performed.
+        */
+       if (errno != 0 && tmp == 0)
+               die_empty_arg(opt);
+       if (*endptr != '\0') /* Further characters after number */
+               die("--%s: trailing characters after number", opt);
+       return tmp;
+}
+
+static void split_arg(const char *arg, const char *context,
+               char **prefix, char **suffix)
+{
+       char *colon;
+       char *tmp = xstrdup(arg);
+
+       if (!tmp[0])
+               die_empty_arg(context);
+       colon = strchr(tmp, ':');
+       if (!colon) {
+               *prefix = NULL;
+               *suffix = tmp;
+               return;
+       }
+       *colon = '\0';
+       if (colon == tmp || !colon[1])
+               die("%s: invalid argument", context);
+       *prefix = xstrdup(tmp);
+       *suffix = xstrdup(colon + 1);
+       free(tmp);
+}
+
+void parse_lvmspec(const char *arg, const char *context,
+               struct lvmspec *result)
+{
+       char *slash, *pipe;
+       char *tmp = xstrdup(arg);
+
+       slash = strchr(tmp, '/');
+       if (slash) {
+               if (slash == tmp || !slash[1])
+                       die("%s: invalid argument", context);
+               *slash = '\0';
+               result->scope = LS_ORIGIN;
+               result->tlv = xstrdup(slash + 1);
+               goto free_tmp;
+       }
+       pipe = strchr(tmp, '|');
+       if (pipe) {
+               if (pipe == tmp || !pipe[1])
+                       die("%s: invalid argument", context);
+               *pipe = '\0';
+               result->scope = LS_POOL;
+               result->pool = xstrdup(pipe + 1);
+               goto free_tmp;
+       }
+       result->scope = LS_VG;
+free_tmp:
+       result->vg = xstrdup(tmp);
+       free(tmp);
+}
+
+void free_lvmspec(struct lvmspec *spec)
+{
+       if (spec->scope == LS_GLOBAL)
+               return;
+       free(spec->vg);
+       if (spec->scope == LS_POOL)
+               free(spec->pool);
+       else if (spec->scope == LS_ORIGIN)
+               free(spec->tlv);
+}
+
+void parse_threshold_arg(const char *arg, const char *context,
+               struct threshold_arg *result)
+{
+       char *prefix, *suffix, *comma;
+       uint32_t val;
+
+       split_arg(arg, context, &prefix, &suffix);
+       if (prefix) {
+               parse_lvmspec(prefix, context, &result->lvmspec);
+               if (result->lvmspec.scope == LS_ORIGIN)
+                       die("invalid scope for threshold lvmspec");
+       } else
+               result->lvmspec.scope = LS_GLOBAL;
+       free(prefix);
+       comma = strchr(suffix, ',');
+       if (!comma)
+               die("%s: invalid argument", context);
+       *comma = '\0';
+       val = atou32(suffix, context);
+       check_range(val, 1, 99, context);
+       result->threshold.data = val;
+       val = atou32(comma + 1, context);
+       check_range(val, 1, 99, context);
+       result->threshold.meta = val;
+       free(suffix);
+}
+
+unsigned parse_timespec(const char *spec, const char *context)
+{
+       char *p, *tmp = xstrdup(spec);
+       uint64_t val, multiplier;
+
+       for (p = tmp; isdigit(*p); p++)
+               ;
+       if (*p == '\0')
+               die("%s: timepec lacks trailing time unit", context);
+       switch (*p) {
+       case 's': multiplier = 1; break;
+       case 'm': multiplier = 60; break;
+       case 'h': multiplier = 3600; break;
+       case 'd': multiplier = 86400; break;
+       case 'y': multiplier = 365 * 86400; break;
+       default:
+               die("%s: invalid time unit in timepec argument", context);
+       }
+       *p = '\0';
+       if (p[1])
+               die("%s: trailing characters after time unit", context);
+       val = atou32(tmp, context) * multiplier;
+       free(tmp);
+       if (val > (uint32_t)-1)
+               die_range(context);
+       return val;
+}
+
+void parse_time_arg(const char *arg, const char *context,
+                struct time_arg *result)
+{
+       char *prefix, *suffix;
+
+       split_arg(arg, context, &prefix, &suffix);
+       if (prefix)
+               parse_lvmspec(prefix, context, &result->lvmspec);
+       else
+               result->lvmspec.scope = LS_GLOBAL;
+       free(prefix);
+       result->seconds = parse_timespec(suffix, context);
+       free(suffix);
+}
+
+void line_iter_init(struct line_iter *liter, char *text)
+{
+       liter->line = liter->base = text;
+}
+
+char *line_iter_get(struct line_iter *liter)
+{
+       char *cr, *line;
+
+       if (!liter->line || !liter->line[0])
+               return NULL;
+       line = liter->line;
+       cr = strchr(liter->line, '\n');
+       if (cr) {
+               *cr = '\0';
+               liter->line = cr + 1;
+       } else
+               liter->line = NULL;
+       return line;
+}
+
+void valid_fd012(void)
+{
+       /* Ensure that file descriptors 0, 1, and 2 are valid. */
+       while (1) {
+               int fd = open("/dev/null", O_RDWR);
+               if (fd < 0)
+                       die_errno("open");
+               if (fd > 2) {
+                       close(fd);
+                       break;
+               }
+       }
+}
+
+int daemonize(const char *logfile)
+{
+       pid_t pid;
+       int nullfd, logfd, pipefd[2];
+
+       if (pipe(pipefd) < 0)
+               die_errno("pipe");
+       if ((pid = fork()) < 0)
+               die_errno("fork");
+       if (pid) { /* parent exits after reading from the pipe */
+               char c;
+               close(pipefd[1]);
+               if (read(pipefd[0], &c, 1) <= 0)
+                       die("child terminated unsuccessfully");
+               exit(EXIT_SUCCESS);
+       }
+       close(pipefd[0]);
+       /* become session leader */
+       if (setsid() < 0)
+               die_errno("setsid");
+       if ((nullfd = open("/dev/null", O_RDWR)) < 0)
+               die_errno("open /dev/null");
+       logfile = logfile? logfile : "/dev/null";
+       if ((logfd = open(logfile, O_WRONLY | O_APPEND | O_CREAT, 0666)) < 0)
+               die_errno("open %s", logfile);
+       INFO_LOG("subsequent log messages go to %s\n", logfile);
+       if (dup2(nullfd, STDIN_FILENO) < 0)
+               die_errno("dup2");
+       close(nullfd);
+       if (dup2(logfd, STDOUT_FILENO) < 0)
+               die_errno("dup2");
+       if (dup2(logfd, STDERR_FILENO) < 0)
+               die_errno("dup2");
+       close(logfd);
+       valid_fd012();
+       if (chdir("/") < 0)
+               die_errno("chdir");
+       return pipefd[1];
+}
+
+static int super_dull_hash(const char *input)
+{
+       const uint8_t *x = (typeof(x))input;
+       const unsigned p1 = 16777619, p2 = 2971215073;
+       unsigned n, m, h, result = 0;
+
+       for (n = 0; n < 4; n++) {
+               h = p1 * (x[0] + n);
+               for (m = 1; x[m] != 0; m++)
+                       h = p2 * (h ^ x[m]);
+               result = (result << 8) | (h % 256);
+       }
+       return result >> 1;
+}
+
+/**
+ * We use a semaphore set with two semaphores. The first semaphore is modified
+ * in both misma_lock() and get_misma_pid() while the second one is modified
+ * only in misma_lock(). This allows us to obtain the PID of the running misma
+ * process by querying the PID that last performed an operation on the second
+ * semaphore. This is achieved by passing GETPID as the control operation to
+ * semctl().
+ */
+
+bool misma_lock(const char *string)
+{
+       int ret, semid;
+       struct sembuf sops[4];
+       key_t key = super_dull_hash(string);
+
+       ret = semget(key, 2, IPC_CREAT | 0600);
+       if (ret < 0)
+               return false;
+       semid = ret;
+       DEBUG_LOG("key: 0x%0x, semid: %d\n", (unsigned)key, semid);
+       sops[0].sem_num = 0;
+       sops[0].sem_op = 0;
+       sops[0].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       sops[1].sem_num = 0;
+       sops[1].sem_op = 1;
+       sops[1].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       sops[2].sem_num = 1;
+       sops[2].sem_op = 0;
+       sops[2].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       sops[3].sem_num = 1;
+       sops[3].sem_op = 1;
+       sops[3].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       return semop(semid, sops, 4) >= 0;
+}
+
+/* returns zero if misma is not running */
+pid_t get_misma_pid(const char *string)
+{
+       int ret, semid;
+       struct sembuf sops = {
+               .sem_num = 0,
+               .sem_op = 0,
+               .sem_flg = SEM_UNDO | IPC_NOWAIT
+       };
+       key_t key = super_dull_hash(string);
+
+       ret = semget(key, 2, 0);
+       if (ret < 0)
+               return 0;
+       semid = ret;
+       DEBUG_LOG("key: 0x%0x, semid: %d\n", (unsigned)key, semid);
+       if (semop(semid, &sops, 1) >= 0)
+               return 0;
+       ret = semctl(semid, 1, GETPID);
+       if (ret < 0)
+               return 0;
+       return ret;
+}
+
+/* Simplistic min-heap implementation (see e.g. Cormen et al. Chapter 6) */
+struct heap {
+       void ***aa; /* array address */
+       unsigned n; /* num elements */
+       int (*compare)(const void *data1, const void *data2);
+};
+
+static unsigned heap_parent(unsigned idx)
+{
+       return (idx + 1) / 2 - 1;
+}
+
+static unsigned heap_left(unsigned idx)
+{
+       return (idx + 1) * 2 - 1;
+}
+
+static unsigned heap_right(unsigned idx)
+{
+       return (idx + 1) * 2;
+}
+
+static void heapify(struct heap *h, unsigned idx)
+{
+       unsigned l = heap_left(idx), r = heap_right(idx), smallest;
+       void **array = *(h->aa);
+
+       assert(idx < h->n);
+       if (l < h->n && h->compare(array[l], array[idx]) > 0)
+               smallest = l;
+       else
+               smallest = idx;
+       if (r < h->n && h->compare(array[r], array[smallest]) > 0)
+               smallest = r;
+       if (smallest != idx) { /* exchange idx and smallest */
+               void *tmp = array[idx];
+               array[idx] = array[smallest];
+               array[smallest] = tmp;
+               heapify(h, smallest);
+       }
+}
+
+struct heap *heap_init(void *aa, unsigned num_elements,
+       int (*compare)(const void *data1, const void *data2))
+{
+       struct heap *h = xmalloc(sizeof(*h));
+
+       INFO_LOG("creating heap with %u elements\n", num_elements);
+       h->aa = aa;
+       h->n = num_elements;
+       h->compare = compare;
+       for (unsigned j = h->n / 2 - 1; j != ~0U; j--)
+               heapify(h, j);
+       return h;
+}
+
+void *heap_min(const struct heap *h)
+{
+       assert(h->n > 0);
+       return (*(h->aa))[0];
+}
+
+unsigned heap_num_elements(const struct heap *h)
+{
+       return h->n;
+}
+
+void *heap_extract_min(struct heap *h)
+{
+       void *smallest = heap_min(h);
+       void **array = *(h->aa);
+
+       array[0] = array[h->n - 1];
+       h->n--;
+       *(h->aa) = xrealloc((*h->aa), h->n * sizeof(void *));
+       heapify(h, 0);
+       return smallest;
+}
+
+void heap_insert(void *new_element, struct heap *h)
+{
+       unsigned parent;
+       void **array;
+
+       h->n++;
+       *(h->aa) = xrealloc((*h->aa), h->n * sizeof(void *));
+       array = *(h->aa);
+       array[h->n - 1] = new_element;
+       for (unsigned j = h->n - 1; j > 0; j = parent) {
+               void *tmp;
+               parent = heap_parent(j);
+               if (h->compare(array[j], array[parent]) <= 0)
+                       break;
+               tmp = array[j];
+               array[j] = array[parent];
+               array[parent] = tmp;
+       }
+}
+
+void heap_dump(const struct heap *h, void (*dumper)(const void *))
+{
+       void **array = *(h->aa);
+       for (unsigned j = 0; j < h->n; j++)
+               dumper(array[j]);
+}
diff --git a/version-gen.sh b/version-gen.sh
new file mode 100755 (executable)
index 0000000..29134b3
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+
+package="$1"
+version_file="$2"
+
+ver='unnamed_version'
+# First try git, then gitweb, then default.
+if [ -e '.git' -o -e '../.git' ]; then
+       git_ver=$(git describe --abbrev=4 HEAD 2>/dev/null)
+       [ -z "$git_ver" ] && git_ver="$ver"
+       # update stat information in index to match working tree
+       git update-index -q --refresh > /dev/null
+       # if there are differences (exit code 1), the working tree is dirty
+       git diff-index --quiet HEAD || git_ver=$git_ver-dirty
+       ver=$git_ver
+elif [ "${PWD%%-*}" = $package- ]; then
+       ver=${PWD##*/$package-}
+fi
+ver=${ver#v}
+
+echo "$ver"
+[ -z "${version_file}" ] && exit 0
+# update version file if necessary
+content="const char *${package}_version(void) {return \"$ver\";};"
+[ -r "$version_file" ] && echo "$content" | cmp -s - $version_file && exit 0
+echo "$content" > $version_file