/*
- * Copyright (C) 2008-2011 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2008-2011 Andre Noll <maan@tuebingen.mpg.de>
*
* Licensed under the GPL v2. For licencing details see COPYING.
*/
#include <string.h>
#include <stdlib.h>
+#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <errno.h>
#include <sys/types.h>
#include <signal.h>
#include <ctype.h>
+#include <stdbool.h>
#include <sys/stat.h>
#include <unistd.h>
#include <inttypes.h>
static pid_t create_pid;
/** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
static int create_process_stopped;
+/** How many times in a row the rsync command failed. */
+static int num_consecutive_rsync_errors;
/** Process id of current pre-remove/rm/post-remove process. */
static pid_t remove_pid;
/** When the next snapshot is due. */
"reference_snapshot: %s\n"
"snapshot_creation_status: %s\n"
"snapshot_removal_status: %s\n"
+ "num_consecutive_rsync_errors: %d\n"
,
(int) getpid(),
logfile? conf.logfile_arg : "stderr",
name_of_reference_snapshot?
name_of_reference_snapshot : "(none)",
hook_status_description[snapshot_creation_status],
- hook_status_description[snapshot_removal_status]
+ hook_status_description[snapshot_removal_status],
+ num_consecutive_rsync_errors
);
if (create_pid != 0)
fprintf(log,
int64_t x = 0, now = get_current_time(), unit_interval
= 24 * 3600 * conf.unit_interval_arg, ret;
unsigned wanted = desired_number_of_snapshots(0, conf.num_intervals_arg),
- num_complete_snapshots = 0;
+ num_complete = 0;
int i;
struct snapshot *s = NULL;
struct snapshot_list sl;
FOR_EACH_SNAPSHOT(s, i, &sl) {
if (!(s->flags & SS_COMPLETE))
continue;
- num_complete_snapshots++;
+ num_complete++;
x += s->completion_time - s->creation_time;
}
assert(x >= 0);
ret = now;
- if (num_complete_snapshots == 0)
+ if (num_complete == 0)
goto out;
- x /= num_complete_snapshots; /* avg time to create one snapshot */
+ x /= num_complete; /* avg time to create one snapshot */
if (unit_interval < x * wanted) /* oops, no sleep at all */
goto out;
ret = s->completion_time + unit_interval / wanted - x;
static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
{
- int i;
- struct snapshot *s;
+ int i, num_complete;
+ struct snapshot *s, *ref = NULL;
+
+ num_complete = num_complete_snapshots(sl);
+ if (num_complete <= conf.min_complete_arg)
+ return NULL;
FOR_EACH_SNAPSHOT(s, i, sl) {
if (snapshot_is_being_created(s))
continue;
- if (is_reference_snapshot(s))
+ if (is_reference_snapshot(s)) { /* avoid this one */
+ ref = s;
continue;
+ }
DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name));
return s;
}
- return NULL;
+ assert(ref);
+ DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name));
+ return ref;
}
static int rename_incomplete_snapshot(int64_t start)
{
char *old_name;
int ret;
+ int64_t now;
+ /*
+ * We don't want the dss_rename() below to fail with EEXIST because the
+ * last complete snapshot was created (and completed) in the same
+ * second as this one.
+ */
+ while ((now = get_current_time()) == start)
+ sleep(1);
free(path_to_last_complete_snapshot);
- ret = complete_name(start, get_current_time(),
- &path_to_last_complete_snapshot);
+ ret = complete_name(start, now, &path_to_last_complete_snapshot);
if (ret < 0)
return ret;
old_name = incomplete_name(start);
if (next_snapshot_is_due())
return 0;
}
+ /*
+ * Idle and --keep_redundant not given, or low disk space. Look at
+ * existing snapshots.
+ */
dss_get_snapshot_list(&sl);
ret = 0;
- if (!low_disk_space && sl.num_snapshots <= 1)
+ /*
+ * Don't remove anything if there is free space and we have fewer
+ * snapshots than configured, plus one. This way there is always one
+ * snapshot that can be recycled.
+ */
+ if (!low_disk_space && sl.num_snapshots <= 1 << conf.num_intervals_arg)
goto out;
why = "outdated";
victim = find_outdated_snapshot(&sl);
victim = find_redundant_snapshot(&sl);
if (victim)
goto remove;
- /* try harder only if disk space is low */
- if (!low_disk_space)
- goto out;
why = "orphaned";
victim = find_orphaned_snapshot(&sl);
if (victim)
goto remove;
+ /* try harder only if disk space is low */
+ if (!low_disk_space)
+ goto out;
DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n"));
victim = find_oldest_removable_snapshot(&sl);
if (victim)
es = WEXITSTATUS(status);
/*
* Restart rsync on non-fatal errors:
- * 12: Error in rsync protocol data stream
- * 13: Errors with program diagnostics
+ * 24: Partial transfer due to vanished source files
*/
- if (es == 12 || es == 13) {
- DSS_WARNING_LOG(("rsync process %d returned %d -- restarting\n",
- (int)create_pid, es));
+ if (es != 0 && es != 24) {
+ DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
+ es, ++num_consecutive_rsync_errors));
+ if (conf.create_given) {
+ ret = -E_BAD_EXIT_CODE;
+ goto out;
+ }
+ if (num_consecutive_rsync_errors > conf.max_rsync_errors_arg) {
+ ret = -E_TOO_MANY_RSYNC_ERRORS;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ DSS_WARNING_LOG(("restarting rsync process\n"));
snapshot_creation_status = HS_NEEDS_RESTART;
next_snapshot_time = get_current_time() + 60;
ret = 1;
goto out;
}
- if (es != 0 && es != 23 && es != 24) {
- DSS_ERROR_LOG(("rsync process %d returned %d\n", (int)create_pid, es));
- ret = -E_BAD_EXIT_CODE;
- snapshot_creation_status = HS_READY;
- goto out;
- }
+ num_consecutive_rsync_errors = 0;
ret = rename_incomplete_snapshot(current_snapshot_creation_time);
if (ret < 0)
goto out;
* Returns < 0 on errors, 0 if no config file is given and > 0 if the config
* file was read successfully.
*/
-static int parse_config_file(int override)
+static int parse_config_file(bool sighup)
{
int ret, config_file_exists;
char *config_file = get_config_file_name();
char *old_logfile_arg = NULL;
int old_daemon_given = 0;
- if (override) { /* SIGHUP */
+ if (sighup) {
if (conf.logfile_given)
old_logfile_arg = dss_strdup(conf.logfile_arg);
old_daemon_given = conf.daemon_given;
}
if (config_file_exists) {
struct cmdline_parser_params params;
- params.override = override;
+ params.override = sighup;
params.initialize = 0;
params.check_required = 1;
params.check_ambiguity = 0;
params.print_errors = 1;
- if (override) { /* invalidate all rsync options */
+ if (sighup) { /* invalidate all rsync options */
int i;
for (i = 0; i < conf.rsync_option_given; i++) {
ret = check_config();
if (ret < 0)
goto out;
- if (override) {
+ if (sighup) {
/* don't change daemon mode on SIGHUP */
conf.daemon_given = old_daemon_given;
close_log(logfile);
DSS_NOTICE_LOG(("SIGHUP, re-reading config\n"));
dump_dss_config("old");
- ret = parse_config_file(1);
+ ret = parse_config_file(true /* SIGHUP */);
if (ret < 0)
return ret;
dump_dss_config("reloaded");
return change_to_dest_dir();
}
+static void kill_children(void)
+{
+ restart_create_process();
+ dss_kill(create_pid, SIGTERM, NULL);
+ dss_kill(remove_pid, SIGTERM, NULL);
+}
+
static int handle_signal(void)
{
int sig, ret = next_signal();
switch (sig) {
case SIGINT:
case SIGTERM:
- restart_create_process();
- dss_kill(create_pid, SIGTERM, NULL);
- dss_kill(remove_pid, SIGTERM, NULL);
+ kill_children();
ret = -E_SIGNAL;
break;
case SIGHUP:
s = find_orphaned_snapshot(&sl);
out:
if (s) {
- DSS_INFO_LOG(("reusing %s snapshot %s\n", why, s->name));
+ DSS_NOTICE_LOG(("recycling %s snapshot %s\n", why, s->name));
ret = dss_rename(s->name, new_name);
}
if (ret >= 0)
- DSS_NOTICE_LOG(("creating new snapshot %s\n", new_name));
+ DSS_NOTICE_LOG(("creating %s\n", new_name));
free(new_name);
free_snapshot_list(&sl);
return ret;
*argv = dss_malloc((15 + conf.rsync_option_given) * sizeof(char *));
(*argv)[i++] = dss_strdup("rsync");
- (*argv)[i++] = dss_strdup("-aq");
+ (*argv)[i++] = dss_strdup("-a");
(*argv)[i++] = dss_strdup("--delete");
for (j = 0; j < conf.rsync_option_given; j++)
(*argv)[i++] = dss_strdup(conf.rsync_option_arg[j]);
ret = select_loop();
if (ret >= 0) /* impossible */
ret = -E_BUG;
+ kill_children();
exit_hook(ret);
return ret;
}
if (s->flags & SS_COMPLETE)
d = (s->completion_time - s->creation_time) / 60;
dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval, s->name, d/60, d%60);
- };
+ }
free_snapshot_list(&sl);
return 1;
}
return install_sighandler(SIGCHLD);
}
-/**
- * The main function of dss.
- *
- * \param argc Usual argument count.
- * \param argv Usual argument vector.
- */
int main(int argc, char **argv)
{
int ret;
cmdline_parser_ext(argc, argv, &conf, ¶ms); /* aborts on errors */
ret = parse_config_file(0);
+ ret = parse_config_file(false /* no SIGHUP */);
if (ret < 0)
goto out;
if (ret == 0) { /* no config file given */
* Parse the command line options again, but this time check
* that all required options are given.
*/
- struct cmdline_parser_params params;
params.override = 1;
params.initialize = 1;
params.check_required = 1;
if (ret < 0)
goto out;
ret = call_command_handler();
+ signal_shutdown();
out:
if (ret < 0)
DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));