X-Git-Url: http://git.tuebingen.mpg.de/?p=dss.git;a=blobdiff_plain;f=dss.c;h=0bc891373bd7e6096063ebc45aa953582433d68c;hp=006cd27c333e097231ae140374fffdf3b8d689f6;hb=bf4beb6878cfcf692755807486f95e835f807b72;hpb=15eb99dcf9a30f8c8d488d5bc3c92130cf828d60 diff --git a/dss.c b/dss.c index 006cd27..0bc8913 100644 --- a/dss.c +++ b/dss.c @@ -45,6 +45,8 @@ static int signal_pipe; static pid_t create_pid; /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */ static int create_process_stopped; +/** How many times in a row the rsync command failed. */ +static int num_consecutive_rsync_errors; /** Process id of current pre-remove/rm/post-remove process. */ static pid_t remove_pid; /** When the next snapshot is due. */ @@ -125,6 +127,7 @@ static void dump_dss_config(const char *msg) "reference_snapshot: %s\n" "snapshot_creation_status: %s\n" "snapshot_removal_status: %s\n" + "num_consecutive_rsync_errors: %d\n" , (int) getpid(), logfile? conf.logfile_arg : "stderr", @@ -135,7 +138,8 @@ static void dump_dss_config(const char *msg) name_of_reference_snapshot? name_of_reference_snapshot : "(none)", hook_status_description[snapshot_creation_status], - hook_status_description[snapshot_removal_status] + hook_status_description[snapshot_removal_status], + num_consecutive_rsync_errors ); if (create_pid != 0) fprintf(log, @@ -602,9 +606,18 @@ static int try_to_free_disk_space(void) if (next_snapshot_is_due()) return 0; } + /* + * Idle and --keep_redundant not given, or low disk space. Look at + * existing snapshots. + */ dss_get_snapshot_list(&sl); ret = 0; - if (!low_disk_space && sl.num_snapshots <= 1) + /* + * Don't remove anything if there is free space and we have fewer + * snapshots than configured, plus one. This way there is always one + * snapshot that can be recycled. + */ + if (!low_disk_space && sl.num_snapshots <= 1 << conf.num_intervals_arg) goto out; why = "outdated"; victim = find_outdated_snapshot(&sl); @@ -614,13 +627,13 @@ static int try_to_free_disk_space(void) victim = find_redundant_snapshot(&sl); if (victim) goto remove; - /* try harder only if disk space is low */ - if (!low_disk_space) - goto out; why = "orphaned"; victim = find_orphaned_snapshot(&sl); if (victim) goto remove; + /* try harder only if disk space is low */ + if (!low_disk_space) + goto out; DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n")); victim = find_oldest_removable_snapshot(&sl); if (victim) @@ -844,23 +857,27 @@ static int handle_rsync_exit(int status) es = WEXITSTATUS(status); /* * Restart rsync on non-fatal errors: - * 12: Error in rsync protocol data stream - * 13: Errors with program diagnostics + * 24: Partial transfer due to vanished source files */ - if (es == 12 || es == 13) { - DSS_WARNING_LOG(("rsync process %d returned %d -- restarting\n", - (int)create_pid, es)); + if (es != 0 && es != 24) { + DSS_WARNING_LOG(("rsync exit code %d, error count %d\n", + es, ++num_consecutive_rsync_errors)); + if (conf.create_given) { + ret = -E_BAD_EXIT_CODE; + goto out; + } + if (num_consecutive_rsync_errors > conf.max_rsync_errors_arg) { + ret = -E_TOO_MANY_RSYNC_ERRORS; + snapshot_creation_status = HS_READY; + goto out; + } + DSS_WARNING_LOG(("restarting rsync process\n")); snapshot_creation_status = HS_NEEDS_RESTART; next_snapshot_time = get_current_time() + 60; ret = 1; goto out; } - if (es != 0 && es != 23 && es != 24) { - DSS_ERROR_LOG(("rsync process %d returned %d\n", (int)create_pid, es)); - ret = -E_BAD_EXIT_CODE; - snapshot_creation_status = HS_READY; - goto out; - } + num_consecutive_rsync_errors = 0; ret = rename_incomplete_snapshot(current_snapshot_creation_time); if (ret < 0) goto out; @@ -1476,12 +1493,6 @@ static int setup_signal_handling(void) return install_sighandler(SIGCHLD); } -/** - * The main function of dss. - * - * \param argc Usual argument count. - * \param argv Usual argument vector. - */ int main(int argc, char **argv) { int ret; @@ -1520,6 +1531,7 @@ int main(int argc, char **argv) if (ret < 0) goto out; ret = call_command_handler(); + signal_shutdown(); out: if (ret < 0) DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));