]> git.tuebingen.mpg.de Git - dss.git/commitdiff
Merge branch 'refs/heads/t/signal_handler_improvement'
authorAndre Noll <maan@tuebingen.mpg.de>
Wed, 5 Aug 2015 10:52:47 +0000 (12:52 +0200)
committerAndre Noll <maan@tuebingen.mpg.de>
Wed, 5 Aug 2015 10:52:47 +0000 (12:52 +0200)
A single commit that was cooking for over a month.

INSTALL
NEWS
daemon.c
dss.c
dss.ggo
err.h

diff --git a/INSTALL b/INSTALL
index a99abead6bc3ef9a5bdbafb64c82c42b21a6d1e0..182d58ca1cced093ad5875071933b7949bae0124 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -6,8 +6,8 @@ in the dss source directory to build the dss executable and copy it to
 some directory that is included in your PATH, e.g. to $HOME/bin or to
 /usr/local/bin.
 
-Note that you'll likely need a recent version of
-ftp://ftp.gnu.org/pub/gnu/gengetopt/ (gnu gengetopt) to compile dss.
+Note that https://www.gnu.org/software/gengetopt/gengetopt.html (gnu gengetopt)
+is required to compile dss.
 
 Optionally, type
 
diff --git a/NEWS b/NEWS
index db77b4591b3775b458bb8b86a0a882cae5b4d432..e1ae41e69c6b4756bc94ea18aecdfef31da5773f 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -2,8 +2,13 @@
 0.1.6 (to be announced)
 -----------------------
 
-- New option --min-complete
-- New home page URL, email address
+ - New option --min-complete to specify the minimal number of snapshots
+   to keep.
+
+ - Improved handling of rsync errors. The new --max-rsync-errors option
+ tells dss to terminate after the given number of rsync failures.
+
+ - New home page URL, email address
 
 ------------------
 0.1.5 (2014-01-14)
@@ -57,7 +62,7 @@ snapshot in the post_create_hook.
 0.1.1 (2008-11-13)
 ~~~~~~~~~~~~~~~~~~
 This release prevents busy loops on rsync exit code 13. It ignores
-any snapshot directory with creation time &gt; completion time. It
+any snapshot directory with creation time >= completion time. It
 opens /dev/null for reading and writing when executing rsync. It shows
 human readable snapshot creation duration when listing snapshots. It
 restarts the rsync process if it returned with exit code 13.
index b63e5ff57b408a79e62d44349cb9fd9d342b956e..24bbfe5cdaaa22b03924fd4aba0cf379a1de22e1 100644 (file)
--- a/daemon.c
+++ b/daemon.c
@@ -48,7 +48,7 @@ void daemon_init(void)
        if (chdir("/") < 0)
                goto err;
        umask(0);
-       null = open("/dev/null", O_RDONLY);
+       null = open("/dev/null", O_RDWR);
        if (null < 0)
                goto err;
        if (dup2(null, STDIN_FILENO) < 0)
diff --git a/dss.c b/dss.c
index c64156188bc753f7ee264f7434db1e254bbbc8f2..07a60425170ddab063ee6071d4901993a9db8c62 100644 (file)
--- a/dss.c
+++ b/dss.c
@@ -45,6 +45,8 @@ static int signal_pipe;
 static pid_t create_pid;
 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
 static int create_process_stopped;
+/** How many times in a row the rsync command failed. */
+static int num_consecutive_rsync_errors;
 /** Process id of current pre-remove/rm/post-remove process. */
 static pid_t remove_pid;
 /** When the next snapshot is due. */
@@ -125,6 +127,7 @@ static void dump_dss_config(const char *msg)
                "reference_snapshot: %s\n"
                "snapshot_creation_status: %s\n"
                "snapshot_removal_status: %s\n"
+               "num_consecutive_rsync_errors: %d\n"
                ,
                (int) getpid(),
                logfile? conf.logfile_arg : "stderr",
@@ -135,7 +138,8 @@ static void dump_dss_config(const char *msg)
                name_of_reference_snapshot?
                        name_of_reference_snapshot : "(none)",
                hook_status_description[snapshot_creation_status],
-               hook_status_description[snapshot_removal_status]
+               hook_status_description[snapshot_removal_status],
+               num_consecutive_rsync_errors
        );
        if (create_pid != 0)
                fprintf(log,
@@ -844,23 +848,23 @@ static int handle_rsync_exit(int status)
        es = WEXITSTATUS(status);
        /*
         * Restart rsync on non-fatal errors:
-        * 12: Error in rsync protocol data stream
-        * 13: Errors with program diagnostics
+        * 24: Partial transfer due to vanished source files
         */
-       if (es == 12 || es == 13) {
-               DSS_WARNING_LOG(("rsync process %d returned %d -- restarting\n",
-                       (int)create_pid, es));
+       if (es != 0 && es != 24) {
+               DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
+                       es, ++num_consecutive_rsync_errors));
+               if (num_consecutive_rsync_errors > conf.max_rsync_errors_arg) {
+                       ret = -E_TOO_MANY_RSYNC_ERRORS;
+                       snapshot_creation_status = HS_READY;
+                       goto out;
+               }
+               DSS_WARNING_LOG(("restarting rsync process\n"));
                snapshot_creation_status = HS_NEEDS_RESTART;
                next_snapshot_time = get_current_time() + 60;
                ret = 1;
                goto out;
        }
-       if (es != 0 && es != 23 && es != 24) {
-               DSS_ERROR_LOG(("rsync process %d returned %d\n", (int)create_pid, es));
-               ret = -E_BAD_EXIT_CODE;
-               snapshot_creation_status = HS_READY;
-               goto out;
-       }
+       num_consecutive_rsync_errors = 0;
        ret = rename_incomplete_snapshot(current_snapshot_creation_time);
        if (ret < 0)
                goto out;
diff --git a/dss.ggo b/dss.ggo
index db119015e9a64c49f0be1edcc4cc88d4ff5c3816..11529956891db25e5110e03009b88ff5ca36c0c6 100644 (file)
--- a/dss.ggo
+++ b/dss.ggo
@@ -244,6 +244,26 @@ details="
                --rsync-option --exclude --rsync-option /proc
 "
 
+option "max-rsync-errors" -
+"Terminate after this many rsync failures"
+int typestr="count"
+default="10"
+optional
+details="
+       Only relevant when operating in --run mode (see above). If
+       the rsync process exits with a fatal error, dss restarts
+       the command in the hope that the problem is transient
+       and subsequent rsync runs succeed. After the given number
+       of consecutive rsync error exits, however, dss gives up,
+       executes the exit hook and terminates. Set this to zero if
+       dss should exit immediately on the first rsync error.
+
+       The only non-fatal error is when rsync exits with code 24. This
+       indicates a partial transfer due to vanished source files
+       and happens frequently when snapshotting a directory which
+       is concurrently being modified.
+"
+
 ###################
 section "Intervals"
 ###################
diff --git a/err.h b/err.h
index e651d9ca06f082eff6eee583ada6ee73ffacdf61..e7aced084542cb8d05fec76d49839e6d25b4cef8 100644 (file)
--- a/err.h
+++ b/err.h
@@ -55,7 +55,8 @@ static inline char *dss_strerror(int num)
        DSS_ERROR(SIGNAL_SIG_ERR, "signal() returned SIG_ERR"), \
        DSS_ERROR(SIGNAL, "caught terminating signal"), \
        DSS_ERROR(BUG, "values of beta might cause dom!"), \
-       DSS_ERROR(NOT_RUNNING, "dss not running")
+       DSS_ERROR(NOT_RUNNING, "dss not running"), \
+       DSS_ERROR(TOO_MANY_RSYNC_ERRORS, "too many consecutive rsync errors")
 
 /**
  * This is temporarily defined to expand to its first argument (prefixed by