X-Git-Url: http://git.tuebingen.mpg.de/?p=dss.git;a=blobdiff_plain;f=dss.c;h=4bf01e961ad1d2b3d32313edd70668511f899b37;hp=1fa6d569895f36df3eaf7d8cde34b0bc5ef0e513;hb=ed109024b978f48399a5f4451f1b9f46d6f25ede;hpb=985ee856a76b6cf415b2342dcfb44ac9192e5483 diff --git a/dss.c b/dss.c index 1fa6d56..4bf01e9 100644 --- a/dss.c +++ b/dss.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -25,12 +26,25 @@ #include "daemon.h" #include "signal.h" #include "df.h" +#include "time.h" struct gengetopt_args_info conf; char *dss_error_txt = NULL; static FILE *logfile; -int signal_pipe; +static int signal_pipe; + +/** Process id of current rsync process. */ +static pid_t rsync_pid; +/** Whether the rsync process is currently stopped */ +static int rsync_stopped; +/** Process id of current rm process. */ +static pid_t rm_pid; +/** When the next snapshot is due. */ +struct timeval next_snapshot_time; +/* Creation time of the snapshot currently being created. */ +int64_t current_snapshot_creation_time; + DEFINE_DSS_ERRLIST; @@ -61,7 +75,9 @@ int call_command_handler(void) * incomplete, being deleted: 1204565370-incomplete.being_deleted */ enum snapshot_status_flags { + /** The rsync process terminated successfully. */ SS_COMPLETE = 1, + /** The rm process is running to remove this snapshot. */ SS_BEING_DELETED = 2, }; @@ -73,6 +89,19 @@ struct snapshot { unsigned interval; }; +/* + * An edge snapshot is either the oldest one or the newest one. + * + * We need to find either of them occasionally: The create code + * needs to know the newest snapshot because that is the one + * used as the link destination dir. The pruning code needs to + * find the oldest one in case disk space becomes low. + */ +struct edge_snapshot_data { + int64_t now; + struct snapshot snap; +}; + __printf_2_3 void dss_log(int ll, const char* fmt,...) { va_list argp; @@ -107,6 +136,22 @@ __printf_1_2 void dss_msg(const char* fmt,...) va_end(argp); } +/** + * Return the desired number of snapshots of an interval. + */ +unsigned num_snapshots(int interval) +{ + unsigned n; + + assert(interval >= 0); + + if (interval >= conf.num_intervals_arg) + return 0; + n = conf.num_intervals_arg - interval - 1; + return 1 << n; +} + +/* return: Whether dirname is a snapshot directory (0: no, 1: yes) */ int is_snapshot(const char *dirname, int64_t now, struct snapshot *s) { int i, ret; @@ -164,7 +209,7 @@ int is_snapshot(const char *dirname, int64_t now, struct snapshot *s) return 0; s->completion_time = num; s->flags = SS_COMPLETE; - if (strcmp(dot + 1, "being_deleted")) + if (!strcmp(dot + 1, "being_deleted")) s->flags |= SS_BEING_DELETED; success: s->name = dss_strdup(dirname); @@ -298,6 +343,22 @@ void free_snapshot_list(struct snapshot_list *sl) free(sl->snapshots); } +void stop_rsync_process(void) +{ + if (!rsync_pid || rsync_stopped) + return; + kill(SIGSTOP, rsync_pid); + rsync_stopped = 1; +} + +void restart_rsync_process(void) +{ + if (!rsync_pid || !rsync_stopped) + return; + kill (SIGCONT, rsync_pid); + rsync_stopped = 0; +} + /** * Print a log message about the exit status of a child. */ @@ -321,7 +382,7 @@ int wait_for_process(pid_t pid, int *status) for (;;) { pause(); ret = next_signal(); - if (ret < 0) + if (ret < 0) break; if (!ret) continue; @@ -329,23 +390,26 @@ int wait_for_process(pid_t pid, int *status) ret = waitpid(pid, status, 0); if (ret >= 0) break; - if (errno != EINTR) /* error */ + if (errno != EINTR) { /* error */ + ret = -ERRNO_TO_DSS_ERROR(errno); break; + } } + /* SIGINT or SIGTERM */ DSS_WARNING_LOG("sending SIGTERM to pid %d\n", (int)pid); kill(pid, SIGTERM); } - if (ret < 0) { - ret = -ERRNO_TO_DSS_ERROR(errno); + if (ret < 0) make_err_msg("failed to wait for process %d", (int)pid); - } else + else log_termination_msg(pid, *status); return ret; } -int remove_snapshot(struct snapshot *s, pid_t *pid) +int remove_snapshot(struct snapshot *s) { int fds[3] = {0, 0, 0}; + assert(!rm_pid); char *new_name = being_deleted_name(s); int ret = dss_rename(s->name, new_name); char *argv[] = {"rm", "-rf", new_name, NULL}; @@ -353,13 +417,17 @@ int remove_snapshot(struct snapshot *s, pid_t *pid) if (ret < 0) goto out; DSS_NOTICE_LOG("removing %s (interval = %i)\n", s->name, s->interval); - ret = dss_exec(pid, argv[0], argv, fds); + stop_rsync_process(); + ret = dss_exec(&rm_pid, argv[0], argv, fds); out: free(new_name); return ret; } -int remove_redundant_snapshot(struct snapshot_list *sl, pid_t *pid) +/* + * return: 0: no redundant snapshots, 1: rm process started, negative: error + */ +int remove_redundant_snapshot(struct snapshot_list *sl) { int ret, i, interval; struct snapshot *s; @@ -367,7 +435,7 @@ int remove_redundant_snapshot(struct snapshot_list *sl, pid_t *pid) DSS_INFO_LOG("looking for intervals containing too many snapshots\n"); for (interval = conf.num_intervals_arg - 1; interval >= 0; interval--) { - unsigned keep = 1<<(conf.num_intervals_arg - interval - 1); + unsigned keep = num_snapshots(interval); unsigned num = sl->interval_count[interval]; struct snapshot *victim = NULL, *prev = NULL; int64_t score = LONG_MAX; @@ -382,7 +450,7 @@ int remove_redundant_snapshot(struct snapshot_list *sl, pid_t *pid) FOR_EACH_SNAPSHOT(s, i, sl) { int64_t this_score; - DSS_DEBUG_LOG("checking %s\n", s->name); + //DSS_DEBUG_LOG("checking %s\n", s->name); if (s->interval > interval) { prev = s; continue; @@ -398,7 +466,7 @@ int remove_redundant_snapshot(struct snapshot_list *sl, pid_t *pid) /* check if s is a better victim */ this_score = s->creation_time - prev->creation_time; assert(this_score >= 0); - DSS_DEBUG_LOG("%s: score %lli\n", s->name, (long long)score); + //DSS_DEBUG_LOG("%s: score %lli\n", s->name, (long long)score); if (this_score < score) { score = this_score; victim = s; @@ -411,13 +479,13 @@ int remove_redundant_snapshot(struct snapshot_list *sl, pid_t *pid) victim->name, victim->interval); continue; } - ret = remove_snapshot(victim, pid); + ret = remove_snapshot(victim); return ret < 0? ret : 1; } return 0; } -int remove_old_snapshot(struct snapshot_list *sl, pid_t *pid) +int remove_outdated_snapshot(struct snapshot_list *sl) { int i, ret; struct snapshot *s; @@ -432,7 +500,7 @@ int remove_old_snapshot(struct snapshot_list *sl, pid_t *pid) s->name, s->interval); continue; } - ret = remove_snapshot(s, pid); + ret = remove_snapshot(s); if (ret < 0) return ret; return 1; @@ -440,116 +508,114 @@ int remove_old_snapshot(struct snapshot_list *sl, pid_t *pid) return 0; } -int wait_for_rm_process(pid_t pid) +int handle_rm_exit(int status) { - int status, es, ret = wait_for_process(pid, &status); - if (ret < 0) - return ret; + int es, ret; + if (!WIFEXITED(status)) { - ret = E_INVOLUNTARY_EXIT; - make_err_msg("rm process %d died involuntary", (int)pid); - return ret; + make_err_msg("rm process %d died involuntary", (int)rm_pid); + ret = -E_INVOLUNTARY_EXIT; + goto out; } es = WEXITSTATUS(status); if (es) { + make_err_msg("rm process %d returned %d", (int)rm_pid, es); ret = -E_BAD_EXIT_CODE; - make_err_msg("rm process %d returned %d", (int)pid, es); - return ret; + goto out; } - return 1; + ret = 1; + rm_pid = 0; +out: + return ret; } -int com_run(void) +int wait_for_rm_process(void) { - int ret; + int status, ret = wait_for_process(rm_pid, &status); - if (conf.dry_run_given) { - make_err_msg("dry_run not supported by this command"); - return -E_SYNTAX; - } - ret = install_sighandler(SIGHUP); if (ret < 0) return ret; - return 42; + return handle_rm_exit(status); } -void log_disk_space(struct disk_space *ds) +void kill_process(pid_t pid) { - DSS_INFO_LOG("free: %uM/%uM (%u%%), %u%% inodes unused\n", - ds->free_mb, ds->total_mb, ds->percent_free, - ds->percent_free_inodes); + if (!pid) + return; + DSS_WARNING_LOG("sending SIGTERM to pid %d\n", (int)pid); + kill(pid, SIGTERM); } -int com_prune(void) +void handle_sighup() { + DSS_INFO_LOG("FIXME: no sighup handling yet\n"); +} + +int rename_incomplete_snapshot(int64_t start) +{ + char *old_name, *new_name; int ret; - struct snapshot_list sl; - pid_t pid; - struct disk_space ds; - ret = get_disk_space(".", &ds); + ret = complete_name(start, get_current_time(), &new_name); if (ret < 0) return ret; - log_disk_space(&ds); - for (;;) { - get_snapshot_list(&sl); - ret = remove_old_snapshot(&sl, &pid); - free_snapshot_list(&sl); - if (ret < 0) - return ret; - if (!ret) - break; - ret = wait_for_rm_process(pid); - if (ret < 0) - goto out; + old_name = incomplete_name(start); + ret = dss_rename(old_name, new_name); + if (ret >= 0) + DSS_NOTICE_LOG("%s -> %s\n", old_name, new_name); + free(old_name); + free(new_name); + return ret; +} + +int handle_rsync_exit(int status) +{ + int es, ret; + + if (!WIFEXITED(status)) { + make_err_msg("rsync process %d died involuntary", (int)rsync_pid); + ret = -E_INVOLUNTARY_EXIT; + goto out; } - for (;;) { - get_snapshot_list(&sl); - ret = remove_redundant_snapshot(&sl, &pid); - free_snapshot_list(&sl); - if (ret < 0) - return ret; - if (!ret) - break; - ret = wait_for_rm_process(pid); - if (ret < 0) - goto out; + es = WEXITSTATUS(status); + if (es != 0 && es != 23 && es != 24) { + make_err_msg("rsync process %d returned %d", (int)rsync_pid, es); + ret = -E_BAD_EXIT_CODE; + goto out; } - return 1; + ret = rename_incomplete_snapshot(current_snapshot_creation_time); out: + rsync_pid = 0; + current_snapshot_creation_time = 0; + rsync_stopped = 0; return ret; } -struct newest_snapshot_data { - char * newest_name; - int64_t newest_creation_time; - int64_t now; -}; - int get_newest_complete(const char *dirname, void *private) { - struct newest_snapshot_data *nsd = private; + struct edge_snapshot_data *esd = private; struct snapshot s; - int ret = is_snapshot(dirname, nsd->now, &s); + int ret = is_snapshot(dirname, esd->now, &s); if (ret <= 0) return 1; - if (s.creation_time < nsd->newest_creation_time) + if (s.flags != SS_COMPLETE) /* incomplete or being deleted */ + return 1; + if (s.creation_time < esd->snap.creation_time) return 1; - nsd->newest_creation_time = s.creation_time; - free(nsd->newest_name); - nsd->newest_name = s.name; + free(esd->snap.name); + esd->snap = s; return 1; } __malloc char *name_of_newest_complete_snapshot(void) { - struct newest_snapshot_data nsd = { + struct edge_snapshot_data esd = { .now = get_current_time(), - .newest_creation_time = -1 + .snap = {.creation_time = -1} }; - for_each_subdir(get_newest_complete, &nsd); - return nsd.newest_name; + for_each_subdir(get_newest_complete, &esd); + return esd.snap.name; } void create_rsync_argv(char ***argv, int64_t *num) @@ -568,7 +634,7 @@ void create_rsync_argv(char ***argv, int64_t *num) (*argv)[i++] = make_message("--link-dest=../%s", newest); free(newest); } else - DSS_INFO_LOG("no previous snapshot found"); + DSS_INFO_LOG("no previous snapshot found\n"); if (conf.exclude_patterns_given) { (*argv)[i++] = dss_strdup("--exclude-from"); (*argv)[i++] = dss_strdup(conf.exclude_patterns_arg); @@ -597,38 +663,279 @@ void free_rsync_argv(char **argv) free(argv); } -int create_snapshot(char **argv, pid_t *pid) +int create_snapshot(char **argv) { int fds[3] = {0, 0, 0}; + char *name = incomplete_name(current_snapshot_creation_time); - return dss_exec(pid, argv[0], argv, fds); + DSS_NOTICE_LOG("creating new snapshot %s\n", name); + free(name); + return dss_exec(&rsync_pid, argv[0], argv, fds); } -int rename_incomplete_snapshot(int64_t start) +void compute_next_snapshot_time(struct snapshot_list *sl) +{ + struct timeval now, unit_interval = {.tv_sec = 24 * 3600 * conf.unit_interval_arg}, + tmp, diff; + int64_t x = 0; + unsigned wanted = num_snapshots(0), num_complete_snapshots = 0; + int i, ret; + struct snapshot *s; + + gettimeofday(&now, NULL); + FOR_EACH_SNAPSHOT(s, i, sl) { + if (!(s->flags & SS_COMPLETE)) + continue; + num_complete_snapshots++; + x += s->completion_time - s->creation_time; + } + assert(x >= 0); + if (num_complete_snapshots) + x /= num_complete_snapshots; /* avg time to create one snapshot */ + x *= wanted; /* time to create all snapshots in interval 0 */ + tmp.tv_sec = x; + tmp.tv_usec = 0; + ret = tv_diff(&unit_interval, &tmp, &diff); /* time between creation */ + if (ret < 0) { + next_snapshot_time = now; + return; + } + tv_divide(wanted, &diff, &tmp); + tv_add(&now, &tmp, &next_snapshot_time); +} + +void handle_signal(struct snapshot_list *sl) +{ + int sig, ret = next_signal(); + + if (ret <= 0) + goto out; + sig = ret; + switch (sig) { + int status; + pid_t pid; + case SIGINT: + case SIGTERM: + restart_rsync_process(); + kill_process(rsync_pid); + kill_process(rm_pid); + exit(EXIT_FAILURE); + case SIGHUP: + handle_sighup(); + break; + case SIGCHLD: + ret = reap_child(&pid, &status); + if (ret <= 0) + break; + assert(pid == rsync_pid || pid == rm_pid); + if (pid == rsync_pid) + ret = handle_rsync_exit(status); + else + ret = handle_rm_exit(status); + free_snapshot_list(sl); + get_snapshot_list(sl); + compute_next_snapshot_time(sl); + } +out: + if (ret < 0) + log_err_msg(ERROR, -ret); +} + +int get_oldest(const char *dirname, void *private) +{ + struct edge_snapshot_data *esd = private; + struct snapshot s; + int ret = is_snapshot(dirname, esd->now, &s); + + if (ret <= 0) + return 1; + if (s.creation_time > esd->snap.creation_time) + return 1; + free(esd->snap.name); + esd->snap = s; + return 1; +} + +int remove_oldest_snapshot() { - char *old_name, *new_name; int ret; + struct edge_snapshot_data esd = { + .now = get_current_time(), + .snap = {.creation_time = LLONG_MAX} + }; + for_each_subdir(get_oldest, &esd); + if (!esd.snap.name) /* no snapshot found */ + return 0; + DSS_INFO_LOG("oldest snapshot: %s\n", esd.snap.name); + ret = 0; + if (esd.snap.creation_time == current_snapshot_creation_time) + goto out; /* do not remove the snapshot currently being created */ + ret = remove_snapshot(&esd.snap); +out: + free(esd.snap.name); + return ret; +} + +/* TODO: Also consider number of inodes. */ +int disk_space_low(void) +{ + struct disk_space ds; + int ret = get_disk_space(".", &ds); - ret = complete_name(start, get_current_time(), &new_name); if (ret < 0) return ret; - old_name = incomplete_name(start); - ret = dss_rename(old_name, new_name); - if (ret >= 0) - DSS_NOTICE_LOG("%s -> %s\n", old_name, new_name); - free(old_name); - free(new_name); + if (conf.min_free_mb_arg) + if (ds.free_mb < conf.min_free_mb_arg) + return 1; + if (conf.min_free_percent_arg) + if (ds.percent_free < conf.min_free_percent_arg) + return 1; + return 0; +} + +int try_to_free_disk_space(int low_disk_space, struct snapshot_list *sl) +{ + int ret; + + ret = remove_outdated_snapshot(sl); + if (ret) /* error, or we are removing something */ + return ret; + /* no outdated snapshot */ + ret = remove_redundant_snapshot(sl); + if (ret) + return ret; + if (!low_disk_space) + return 0; + DSS_WARNING_LOG("disk space low and nothing obvious to remove\n"); + ret = remove_oldest_snapshot(); + if (ret) + return ret; + make_err_msg("uhuhu: not enough disk space for a single snapshot"); + return -ENOSPC; +} + +int select_loop(void) +{ + int ret; + struct timeval tv = {.tv_sec = 0, .tv_usec = 0}; + struct snapshot_list sl = {.num_snapshots = 0}; + + get_snapshot_list(&sl); + compute_next_snapshot_time(&sl); + for (;;) { + struct timeval now, *tvp = &tv; + fd_set rfds; + int low_disk_space; + char **rsync_argv; + + FD_ZERO(&rfds); + FD_SET(signal_pipe, &rfds); + if (rsync_pid) + tv.tv_sec = 60; + else if (rm_pid) + tvp = NULL; + ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp); + if (ret < 0) + return ret; + if (FD_ISSET(signal_pipe, &rfds)) + handle_signal(&sl); + if (rm_pid) + continue; + ret = disk_space_low(); + if (ret < 0) + break; + low_disk_space = ret; + if (low_disk_space) + stop_rsync_process(); + ret = try_to_free_disk_space(low_disk_space, &sl); + if (ret < 0) + break; + if (rm_pid) + continue; + if (rsync_pid) { + restart_rsync_process(); + continue; + } + /* neither rsync nor rm are running. Start rsync? */ + gettimeofday(&now, NULL); + if (tv_diff(&next_snapshot_time, &now, &tv) > 0) + continue; + create_rsync_argv(&rsync_argv, ¤t_snapshot_creation_time); + ret = create_snapshot(rsync_argv); + free_rsync_argv(rsync_argv); + if (ret < 0) + break; + } + free_snapshot_list(&sl); + return ret; +} + +int com_run(void) +{ + int ret; + + if (conf.dry_run_given) { + make_err_msg("dry_run not supported by this command"); + return -E_SYNTAX; + } + ret = install_sighandler(SIGHUP); + if (ret < 0) + return ret; + return select_loop(); +} + +void log_disk_space(struct disk_space *ds) +{ + DSS_INFO_LOG("free: %uM/%uM (%u%%), %u%% inodes unused\n", + ds->free_mb, ds->total_mb, ds->percent_free, + ds->percent_free_inodes); +} + +int com_prune(void) +{ + int ret; + struct snapshot_list sl; + struct disk_space ds; + + ret = get_disk_space(".", &ds); + if (ret < 0) + return ret; + log_disk_space(&ds); + for (;;) { + get_snapshot_list(&sl); + ret = remove_outdated_snapshot(&sl); + free_snapshot_list(&sl); + if (ret < 0) + return ret; + if (!ret) + break; + ret = wait_for_rm_process(); + if (ret < 0) + goto out; + } + for (;;) { + get_snapshot_list(&sl); + ret = remove_redundant_snapshot(&sl); + free_snapshot_list(&sl); + if (ret < 0) + return ret; + if (!ret) + break; + ret = wait_for_rm_process(); + if (ret < 0) + goto out; + } + return 1; +out: return ret; } int com_create(void) { - int ret, status, es; + int ret, status; char **rsync_argv; - int64_t snapshot_num; - pid_t pid; - create_rsync_argv(&rsync_argv, &snapshot_num); + create_rsync_argv(&rsync_argv, ¤t_snapshot_creation_time); if (conf.dry_run_given) { int i; char *msg = NULL; @@ -642,25 +949,13 @@ int com_create(void) free(msg); return 1; } - DSS_NOTICE_LOG("creating snapshot %lli\n", (long long)snapshot_num); - ret = create_snapshot(rsync_argv, &pid); + ret = create_snapshot(rsync_argv); if (ret < 0) goto out; - ret = wait_for_process(pid, &status); + ret = wait_for_process(rsync_pid, &status); if (ret < 0) goto out; - if (!WIFEXITED(status)) { - ret = E_INVOLUNTARY_EXIT; - make_err_msg("rsync process %d died involuntary", (int)pid); - goto out; - } - es = WEXITSTATUS(status); - if (es != 0 && es != 23 && es != 24) { - ret = -E_BAD_EXIT_CODE; - make_err_msg("rsync process %d returned %d", (int)pid, es); - goto out; - } - ret = rename_incomplete_snapshot(snapshot_num); + ret = handle_rsync_exit(status); out: free_rsync_argv(rsync_argv); return ret; @@ -678,10 +973,8 @@ int com_ls(void) return 1; } -/* TODO: Unlink pid file */ __noreturn void clean_exit(int status) { - //kill(0, SIGTERM); free(dss_error_txt); exit(status); } @@ -744,7 +1037,7 @@ static void setup_signal_handling(void) { int ret; - DSS_NOTICE_LOG("setting up signal handlers\n"); + DSS_INFO_LOG("setting up signal handlers\n"); signal_pipe = signal_init(); /* always successful */ ret = install_sighandler(SIGINT); if (ret < 0) @@ -761,17 +1054,11 @@ err: exit(EXIT_FAILURE); } - int main(int argc, char **argv) { int ret; cmdline_parser(argc, argv, &conf); /* aborts on errors */ - if (conf.inputs_num) { - ret = -E_SYNTAX; - make_err_msg("additional non-options given"); - goto out; - } ret = read_config_file(); if (ret < 0) goto out;