X-Git-Url: http://git.tuebingen.mpg.de/?p=dss.git;a=blobdiff_plain;f=dss.c;h=07a60425170ddab063ee6071d4901993a9db8c62;hp=237208b7da0a0f3a8f14c2e86c4cbce7cb1fe735;hb=05e75054398c9d39f62f8c4b9be7b874a2019a3c;hpb=20f8c7d5e2b7985137e89143005f4b7b15ae59d4 diff --git a/dss.c b/dss.c index 237208b..07a6042 100644 --- a/dss.c +++ b/dss.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2011 Andre Noll + * Copyright (C) 2008-2011 Andre Noll * * Licensed under the GPL v2. For licencing details see COPYING. */ @@ -45,6 +45,8 @@ static int signal_pipe; static pid_t create_pid; /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */ static int create_process_stopped; +/** How many times in a row the rsync command failed. */ +static int num_consecutive_rsync_errors; /** Process id of current pre-remove/rm/post-remove process. */ static pid_t remove_pid; /** When the next snapshot is due. */ @@ -125,6 +127,7 @@ static void dump_dss_config(const char *msg) "reference_snapshot: %s\n" "snapshot_creation_status: %s\n" "snapshot_removal_status: %s\n" + "num_consecutive_rsync_errors: %d\n" , (int) getpid(), logfile? conf.logfile_arg : "stderr", @@ -135,7 +138,8 @@ static void dump_dss_config(const char *msg) name_of_reference_snapshot? name_of_reference_snapshot : "(none)", hook_status_description[snapshot_creation_status], - hook_status_description[snapshot_removal_status] + hook_status_description[snapshot_removal_status], + num_consecutive_rsync_errors ); if (create_pid != 0) fprintf(log, @@ -531,17 +535,25 @@ static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl) static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl) { - int i; - struct snapshot *s; + int i, num_complete; + struct snapshot *s, *ref = NULL; + + num_complete = num_complete_snapshots(sl); + if (num_complete <= conf.min_complete_arg) + return NULL; FOR_EACH_SNAPSHOT(s, i, sl) { if (snapshot_is_being_created(s)) continue; - if (is_reference_snapshot(s)) + if (is_reference_snapshot(s)) { /* avoid this one */ + ref = s; continue; + } DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name)); return s; } - return NULL; + assert(ref); + DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name)); + return ref; } static int rename_incomplete_snapshot(int64_t start) @@ -836,23 +848,23 @@ static int handle_rsync_exit(int status) es = WEXITSTATUS(status); /* * Restart rsync on non-fatal errors: - * 12: Error in rsync protocol data stream - * 13: Errors with program diagnostics + * 24: Partial transfer due to vanished source files */ - if (es == 12 || es == 13) { - DSS_WARNING_LOG(("rsync process %d returned %d -- restarting\n", - (int)create_pid, es)); + if (es != 0 && es != 24) { + DSS_WARNING_LOG(("rsync exit code %d, error count %d\n", + es, ++num_consecutive_rsync_errors)); + if (num_consecutive_rsync_errors > conf.max_rsync_errors_arg) { + ret = -E_TOO_MANY_RSYNC_ERRORS; + snapshot_creation_status = HS_READY; + goto out; + } + DSS_WARNING_LOG(("restarting rsync process\n")); snapshot_creation_status = HS_NEEDS_RESTART; next_snapshot_time = get_current_time() + 60; ret = 1; goto out; } - if (es != 0 && es != 23 && es != 24) { - DSS_ERROR_LOG(("rsync process %d returned %d\n", (int)create_pid, es)); - ret = -E_BAD_EXIT_CODE; - snapshot_creation_status = HS_READY; - goto out; - } + num_consecutive_rsync_errors = 0; ret = rename_incomplete_snapshot(current_snapshot_creation_time); if (ret < 0) goto out; @@ -1038,6 +1050,13 @@ static int handle_sighup(void) return change_to_dest_dir(); } +static void kill_children(void) +{ + restart_create_process(); + dss_kill(create_pid, SIGTERM, NULL); + dss_kill(remove_pid, SIGTERM, NULL); +} + static int handle_signal(void) { int sig, ret = next_signal(); @@ -1048,9 +1067,7 @@ static int handle_signal(void) switch (sig) { case SIGINT: case SIGTERM: - restart_create_process(); - dss_kill(create_pid, SIGTERM, NULL); - dss_kill(remove_pid, SIGTERM, NULL); + kill_children(); ret = -E_SIGNAL; break; case SIGHUP: @@ -1319,6 +1336,7 @@ static int com_run(void) ret = select_loop(); if (ret >= 0) /* impossible */ ret = -E_BUG; + kill_children(); exit_hook(ret); return ret; } @@ -1506,6 +1524,7 @@ int main(int argc, char **argv) if (ret < 0) goto out; ret = call_command_handler(); + signal_shutdown(); out: if (ret < 0) DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));