A single commit that was cooking for over a month.
some directory that is included in your PATH, e.g. to $HOME/bin or to
/usr/local/bin.
-Note that you'll likely need a recent version of
-ftp://ftp.gnu.org/pub/gnu/gengetopt/ (gnu gengetopt) to compile dss.
+Note that https://www.gnu.org/software/gengetopt/gengetopt.html (gnu gengetopt)
+is required to compile dss.
Optionally, type
0.1.6 (to be announced)
-----------------------
-- New option --min-complete
-- New home page URL, email address
+ - New option --min-complete to specify the minimal number of snapshots
+ to keep.
+
+ - Improved handling of rsync errors. The new --max-rsync-errors option
+ tells dss to terminate after the given number of rsync failures.
+
+ - New home page URL, email address
------------------
0.1.5 (2014-01-14)
0.1.1 (2008-11-13)
~~~~~~~~~~~~~~~~~~
This release prevents busy loops on rsync exit code 13. It ignores
-any snapshot directory with creation time > completion time. It
+any snapshot directory with creation time >= completion time. It
opens /dev/null for reading and writing when executing rsync. It shows
human readable snapshot creation duration when listing snapshots. It
restarts the rsync process if it returned with exit code 13.
if (chdir("/") < 0)
goto err;
umask(0);
- null = open("/dev/null", O_RDONLY);
+ null = open("/dev/null", O_RDWR);
if (null < 0)
goto err;
if (dup2(null, STDIN_FILENO) < 0)
static pid_t create_pid;
/** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
static int create_process_stopped;
+/** How many times in a row the rsync command failed. */
+static int num_consecutive_rsync_errors;
/** Process id of current pre-remove/rm/post-remove process. */
static pid_t remove_pid;
/** When the next snapshot is due. */
"reference_snapshot: %s\n"
"snapshot_creation_status: %s\n"
"snapshot_removal_status: %s\n"
+ "num_consecutive_rsync_errors: %d\n"
,
(int) getpid(),
logfile? conf.logfile_arg : "stderr",
name_of_reference_snapshot?
name_of_reference_snapshot : "(none)",
hook_status_description[snapshot_creation_status],
- hook_status_description[snapshot_removal_status]
+ hook_status_description[snapshot_removal_status],
+ num_consecutive_rsync_errors
);
if (create_pid != 0)
fprintf(log,
es = WEXITSTATUS(status);
/*
* Restart rsync on non-fatal errors:
- * 12: Error in rsync protocol data stream
- * 13: Errors with program diagnostics
+ * 24: Partial transfer due to vanished source files
*/
- if (es == 12 || es == 13) {
- DSS_WARNING_LOG(("rsync process %d returned %d -- restarting\n",
- (int)create_pid, es));
+ if (es != 0 && es != 24) {
+ DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
+ es, ++num_consecutive_rsync_errors));
+ if (num_consecutive_rsync_errors > conf.max_rsync_errors_arg) {
+ ret = -E_TOO_MANY_RSYNC_ERRORS;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ DSS_WARNING_LOG(("restarting rsync process\n"));
snapshot_creation_status = HS_NEEDS_RESTART;
next_snapshot_time = get_current_time() + 60;
ret = 1;
goto out;
}
- if (es != 0 && es != 23 && es != 24) {
- DSS_ERROR_LOG(("rsync process %d returned %d\n", (int)create_pid, es));
- ret = -E_BAD_EXIT_CODE;
- snapshot_creation_status = HS_READY;
- goto out;
- }
+ num_consecutive_rsync_errors = 0;
ret = rename_incomplete_snapshot(current_snapshot_creation_time);
if (ret < 0)
goto out;
--rsync-option --exclude --rsync-option /proc
"
+option "max-rsync-errors" -
+"Terminate after this many rsync failures"
+int typestr="count"
+default="10"
+optional
+details="
+ Only relevant when operating in --run mode (see above). If
+ the rsync process exits with a fatal error, dss restarts
+ the command in the hope that the problem is transient
+ and subsequent rsync runs succeed. After the given number
+ of consecutive rsync error exits, however, dss gives up,
+ executes the exit hook and terminates. Set this to zero if
+ dss should exit immediately on the first rsync error.
+
+ The only non-fatal error is when rsync exits with code 24. This
+ indicates a partial transfer due to vanished source files
+ and happens frequently when snapshotting a directory which
+ is concurrently being modified.
+"
+
###################
section "Intervals"
###################
DSS_ERROR(SIGNAL_SIG_ERR, "signal() returned SIG_ERR"), \
DSS_ERROR(SIGNAL, "caught terminating signal"), \
DSS_ERROR(BUG, "values of beta might cause dom!"), \
- DSS_ERROR(NOT_RUNNING, "dss not running")
+ DSS_ERROR(NOT_RUNNING, "dss not running"), \
+ DSS_ERROR(TOO_MANY_RSYNC_ERRORS, "too many consecutive rsync errors")
/**
* This is temporarily defined to expand to its first argument (prefixed by