/*
- * Copyright (C) 2008-2011 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2008-2011 Andre Noll <maan@tuebingen.mpg.de>
*
* Licensed under the GPL v2. For licencing details see COPYING.
*/
static pid_t create_pid;
/** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
static int create_process_stopped;
+/** How many times in a row the rsync command failed. */
+static int num_consecutive_rsync_errors;
/** Process id of current pre-remove/rm/post-remove process. */
static pid_t remove_pid;
/** When the next snapshot is due. */
"reference_snapshot: %s\n"
"snapshot_creation_status: %s\n"
"snapshot_removal_status: %s\n"
+ "num_consecutive_rsync_errors: %d\n"
,
(int) getpid(),
logfile? conf.logfile_arg : "stderr",
name_of_reference_snapshot?
name_of_reference_snapshot : "(none)",
hook_status_description[snapshot_creation_status],
- hook_status_description[snapshot_removal_status]
+ hook_status_description[snapshot_removal_status],
+ num_consecutive_rsync_errors
);
if (create_pid != 0)
fprintf(log,
static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
{
- int i;
- struct snapshot *s;
+ int i, num_complete;
+ struct snapshot *s, *ref = NULL;
+
+ num_complete = num_complete_snapshots(sl);
+ if (num_complete <= conf.min_complete_arg)
+ return NULL;
FOR_EACH_SNAPSHOT(s, i, sl) {
if (snapshot_is_being_created(s))
continue;
- if (is_reference_snapshot(s))
+ if (is_reference_snapshot(s)) { /* avoid this one */
+ ref = s;
continue;
+ }
DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name));
return s;
}
- return NULL;
+ assert(ref);
+ DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name));
+ return ref;
}
static int rename_incomplete_snapshot(int64_t start)
{
char *old_name;
int ret;
+ int64_t now;
+ /*
+ * We don't want the dss_rename() below to fail with EEXIST because the
+ * last complete snapshot was created (and completed) in the same
+ * second as this one.
+ */
+ while ((now = get_current_time()) == start)
+ sleep(1);
free(path_to_last_complete_snapshot);
- ret = complete_name(start, get_current_time(),
- &path_to_last_complete_snapshot);
+ ret = complete_name(start, now, &path_to_last_complete_snapshot);
if (ret < 0)
return ret;
old_name = incomplete_name(start);
if (next_snapshot_is_due())
return 0;
}
+ /*
+ * Idle and --keep_redundant not given, or low disk space. Look at
+ * existing snapshots.
+ */
dss_get_snapshot_list(&sl);
ret = 0;
if (!low_disk_space && sl.num_snapshots <= 1)
es = WEXITSTATUS(status);
/*
* Restart rsync on non-fatal errors:
- * 12: Error in rsync protocol data stream
- * 13: Errors with program diagnostics
+ * 24: Partial transfer due to vanished source files
*/
- if (es == 12 || es == 13) {
- DSS_WARNING_LOG(("rsync process %d returned %d -- restarting\n",
- (int)create_pid, es));
+ if (es != 0 && es != 24) {
+ DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
+ es, ++num_consecutive_rsync_errors));
+ if (num_consecutive_rsync_errors > conf.max_rsync_errors_arg) {
+ ret = -E_TOO_MANY_RSYNC_ERRORS;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ DSS_WARNING_LOG(("restarting rsync process\n"));
snapshot_creation_status = HS_NEEDS_RESTART;
next_snapshot_time = get_current_time() + 60;
ret = 1;
goto out;
}
- if (es != 0 && es != 23 && es != 24) {
- DSS_ERROR_LOG(("rsync process %d returned %d\n", (int)create_pid, es));
- ret = -E_BAD_EXIT_CODE;
- snapshot_creation_status = HS_READY;
- goto out;
- }
+ num_consecutive_rsync_errors = 0;
ret = rename_incomplete_snapshot(current_snapshot_creation_time);
if (ret < 0)
goto out;
return change_to_dest_dir();
}
+static void kill_children(void)
+{
+ restart_create_process();
+ dss_kill(create_pid, SIGTERM, NULL);
+ dss_kill(remove_pid, SIGTERM, NULL);
+}
+
static int handle_signal(void)
{
int sig, ret = next_signal();
switch (sig) {
case SIGINT:
case SIGTERM:
- restart_create_process();
- dss_kill(create_pid, SIGTERM, NULL);
- dss_kill(remove_pid, SIGTERM, NULL);
+ kill_children();
ret = -E_SIGNAL;
break;
case SIGHUP:
*argv = dss_malloc((15 + conf.rsync_option_given) * sizeof(char *));
(*argv)[i++] = dss_strdup("rsync");
- (*argv)[i++] = dss_strdup("-aq");
+ (*argv)[i++] = dss_strdup("-a");
(*argv)[i++] = dss_strdup("--delete");
for (j = 0; j < conf.rsync_option_given; j++)
(*argv)[i++] = dss_strdup(conf.rsync_option_arg[j]);
ret = select_loop();
if (ret >= 0) /* impossible */
ret = -E_BUG;
+ kill_children();
exit_hook(ret);
return ret;
}
if (s->flags & SS_COMPLETE)
d = (s->completion_time - s->creation_time) / 60;
dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval, s->name, d/60, d%60);
- };
+ }
free_snapshot_list(&sl);
return 1;
}
if (ret < 0)
goto out;
ret = call_command_handler();
+ signal_shutdown();
out:
if (ret < 0)
DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));