X-Git-Url: http://git.tuebingen.mpg.de/?p=dss.git;a=blobdiff_plain;f=dss.c;h=217dfafd25c55741e564c0feb13c1ccfbad317d0;hp=24d77eb79dbd2ab0862b2b7012b73babde542c93;hb=HEAD;hpb=07ab37b55e1bf4ad7ef65fd25db84fa2a5f50f56 diff --git a/dss.c b/dss.c index 24d77eb..0992ec6 100644 --- a/dss.c +++ b/dss.c @@ -1,8 +1,4 @@ -/* - * Copyright (C) 2008-2011 Andre Noll - * - * Licensed under the GPL v2. For licencing details see COPYING. - */ +/* SPDX-License-Identifier: GPL-2.0 */ #include #include #include @@ -85,7 +81,7 @@ static int64_t next_snapshot_time; static struct timeval next_removal_check; /** Creation time of the snapshot currently being created. */ static int64_t current_snapshot_creation_time; -/** The snapshot currently being removed. */ +/* Set by the pre-rm hook, cleared by handle_remove_exit(). */ struct snapshot *snapshot_currently_being_removed; /** Needed by the post-create hook. */ static char *path_to_last_complete_snapshot; @@ -162,7 +158,7 @@ static void dump_dss_config(const char *msg) fprintf(log, "\n*** internal state ***\n\n"); fprintf(log, "pid: %d\n" - "logile: %s\n" + "logfile: %s\n" "snapshot_currently_being_removed: %s\n" "path_to_last_complete_snapshot: %s\n" "reference_snapshot: %s\n" @@ -287,11 +283,13 @@ static char *get_config_file_name(void) return config_file; } -static int send_signal(int sig) +static int send_signal(int sig, bool wait) { pid_t pid; char *config_file = get_config_file_name(); int ret = get_dss_pid(config_file, &pid); + unsigned ms = 32; + struct timespec ts; free(config_file); if (ret < 0) @@ -304,7 +302,23 @@ static int send_signal(int sig) ret = kill(pid, sig); if (ret < 0) return -ERRNO_TO_DSS_ERROR(errno); - return 1; + if (!wait) + return 1; + while (ms < 5000) { + ts.tv_sec = ms / 1000; + ts.tv_nsec = (ms % 1000) * 1000 * 1000; + ret = nanosleep(&ts, NULL); + if (ret < 0) + return -ERRNO_TO_DSS_ERROR(errno); + ret = kill(pid, 0); + if (ret < 0) { + if (errno != ESRCH) + return -ERRNO_TO_DSS_ERROR(errno); + return 1; + } + ms *= 2; + } + return -E_KILL_TIMEOUT; } struct signal_info { @@ -363,6 +377,7 @@ static const struct signal_info signal_table[] = { static int com_kill(void) { + bool w_given = OPT_GIVEN(KILL, WAIT); const char *arg = OPT_STRING_VAL(KILL, SIGNAL); int ret, i; @@ -373,17 +388,17 @@ static int com_kill(void) return ret; if (val < 0 || val > SIGRTMAX) return -ERRNO_TO_DSS_ERROR(EINVAL); - return send_signal(val); + return send_signal(val, w_given); } if (strncasecmp(arg, "sig", 3) == 0) arg += 3; if (strcasecmp(arg, "CLD") == 0) - return send_signal(SIGCHLD); + return send_signal(SIGCHLD, w_given); if (strcasecmp(arg, "IOT") == 0) - return send_signal(SIGABRT); + return send_signal(SIGABRT, w_given); for (i = 0; i < SIGNAL_TABLE_SIZE; i++) if (strcasecmp(arg, signal_table[i].name) == 0) - return send_signal(signal_table[i].num); + return send_signal(signal_table[i].num, w_given); DSS_ERROR_LOG(("invalid sigspec: %s\n", arg)); return -ERRNO_TO_DSS_ERROR(EINVAL); } @@ -398,7 +413,8 @@ static void dss_get_snapshot_list(struct snapshot_list *sl) static int64_t compute_next_snapshot_time(void) { int64_t x = 0, now = get_current_time(), unit_interval - = 24 * 3600 * OPT_UINT32_VAL(DSS, UNIT_INTERVAL), ret; + = 24 * 3600 * OPT_UINT32_VAL(DSS, UNIT_INTERVAL), ret, + last_completion_time; unsigned wanted = desired_number_of_snapshots(0, OPT_UINT32_VAL(DSS, NUM_INTERVALS)), num_complete = 0; @@ -412,6 +428,7 @@ static int64_t compute_next_snapshot_time(void) continue; num_complete++; x += s->completion_time - s->creation_time; + last_completion_time = s->completion_time; } assert(x >= 0); @@ -421,7 +438,7 @@ static int64_t compute_next_snapshot_time(void) x /= num_complete; /* avg time to create one snapshot */ if (unit_interval < x * wanted) /* oops, no sleep at all */ goto out; - ret = s->completion_time + unit_interval / wanted - x; + ret = last_completion_time + unit_interval / wanted - x; out: free_snapshot_list(&sl); return ret; @@ -557,9 +574,6 @@ static int is_reference_snapshot(struct snapshot *s) return strcmp(s->name, name_of_reference_snapshot)? 0 : 1; } -/* - * return: 0: no redundant snapshots, 1: rm process started, negative: error - */ static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl) { int i, interval; @@ -637,6 +651,7 @@ static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl) int i, num_complete; struct snapshot *s, *ref = NULL; + DSS_DEBUG_LOG(("picking snapshot with earliest creation time\n")); num_complete = num_complete_snapshots(sl); if (num_complete <= OPT_UINT32_VAL(DSS, MIN_COMPLETE)) return NULL; @@ -647,7 +662,6 @@ static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl) ref = s; continue; } - DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name)); return s; } assert(ref); @@ -655,6 +669,50 @@ static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl) return ref; } +/* returns NULL <==> *reason is set to NULL */ +static struct snapshot *find_removable_snapshot(struct snapshot_list *sl, + bool try_hard, char **reason) +{ + struct snapshot *victim; + + /* + * Don't remove anything if there is free space and we have fewer + * snapshots than configured, plus one. This way there is always one + * snapshot that can be recycled. + */ + if (!try_hard && sl->num_snapshots <= + 1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS)) + goto nope; + victim = find_orphaned_snapshot(sl); + if (victim) { + *reason = make_message("orphaned"); + return victim; + } + victim = find_outdated_snapshot(sl); + if (victim) { + *reason = make_message("outdated"); + return victim; + } + if (!OPT_GIVEN(DSS, KEEP_REDUNDANT)) { + victim = find_redundant_snapshot(sl); + if (victim) { + *reason = make_message("redundant"); + return victim; + } + } + if (!try_hard) + goto nope; + DSS_WARNING_LOG(("nothing obvious to remove\n")); + victim = find_oldest_removable_snapshot(sl); + if (victim) { + *reason = make_message("oldest"); + return victim; + } +nope: + *reason = NULL; + return NULL; +} + static int rename_incomplete_snapshot(int64_t start) { char *old_name; @@ -687,7 +745,7 @@ static int try_to_free_disk_space(void) struct snapshot_list sl; struct snapshot *victim; struct timeval now; - const char *why; + char *why; int low_disk_space; ret = disk_space_low(NULL); @@ -698,55 +756,25 @@ static int try_to_free_disk_space(void) if (tv_diff(&next_removal_check, &now, NULL) > 0) return 0; if (!low_disk_space) { - if (OPT_GIVEN(DSS, KEEP_REDUNDANT)) - return 0; if (snapshot_creation_status != HS_READY) return 0; if (next_snapshot_is_due()) return 0; } - /* - * Idle and --keep_redundant not given, or low disk space. Look at - * existing snapshots. - */ + /* Idle or low disk space, look at existing snapshots. */ dss_get_snapshot_list(&sl); - ret = 0; - /* - * Don't remove anything if there is free space and we have fewer - * snapshots than configured, plus one. This way there is always one - * snapshot that can be recycled. - */ - if (!low_disk_space && sl.num_snapshots <= - 1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS)) - goto out; - why = "outdated"; - victim = find_outdated_snapshot(&sl); - if (victim) - goto remove; - why = "redundant"; - victim = find_redundant_snapshot(&sl); - if (victim) - goto remove; - why = "orphaned"; - victim = find_orphaned_snapshot(&sl); + victim = find_removable_snapshot(&sl, low_disk_space, &why); + if (victim) { + pre_remove_hook(victim, why); + free(why); + } + free_snapshot_list(&sl); if (victim) - goto remove; - /* try harder only if disk space is low */ + return 1; if (!low_disk_space) - goto out; - DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n")); - why = "oldest"; - victim = find_oldest_removable_snapshot(&sl); - if (victim) - goto remove; + return 0; DSS_CRIT_LOG(("uhuhu: disk space low and nothing to remove\n")); - ret = -ERRNO_TO_DSS_ERROR(ENOSPC); - goto out; -remove: - pre_remove_hook(victim, why); -out: - free_snapshot_list(&sl); - return ret; + return -ERRNO_TO_DSS_ERROR(ENOSPC); } static void post_create_hook(void) @@ -1094,7 +1122,6 @@ static int change_to_dest_dir(void) static int check_config(void) { - int ret; uint32_t unit_interval = OPT_UINT32_VAL(DSS, UNIT_INTERVAL); uint32_t num_intervals = OPT_UINT32_VAL(DSS, NUM_INTERVALS); @@ -1119,9 +1146,6 @@ static int check_config(void) DSS_ERROR_LOG(("--dest-dir required\n")); return -E_SYNTAX; } - ret = change_to_dest_dir(); - if (ret < 0) - return ret; } DSS_DEBUG_LOG(("number of intervals: %i\n", num_intervals)); return 1; @@ -1235,8 +1259,6 @@ close_fd: close(fd); out: free(config_file); - if (ret < 0) - DSS_EMERG_LOG(("%s\n", dss_strerror(-ret))); return ret; } @@ -1284,9 +1306,7 @@ static int handle_signal(void) switch (sig) { case SIGINT: case SIGTERM: - kill_children(); - ret = -E_SIGNAL; - break; + return -E_SIGNAL; case SIGHUP: ret = handle_sighup(); break; @@ -1371,7 +1391,7 @@ out: static void create_rsync_argv(char ***argv, int64_t *num) { char *logname; - int i = 0, j, N = OPT_GIVEN(DSS, RSYNC_OPTION); + int i = 0, j, N; struct snapshot_list sl; static bool seeded; @@ -1380,7 +1400,13 @@ static void create_rsync_argv(char ***argv, int64_t *num) name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl); free_snapshot_list(&sl); - *argv = dss_malloc((15 + N) * sizeof(char *)); + /* + * We specify up to 6 arguments, one argument per given rsync option + * and one argument per given source dir. We also need space for the + * terminating NULL pointer. + */ + N = OPT_GIVEN(DSS, RSYNC_OPTION) + OPT_GIVEN(DSS, SOURCE_DIR); + *argv = dss_malloc((7 + N) * sizeof(char *)); (*argv)[i++] = dss_strdup("rsync"); (*argv)[i++] = dss_strdup("-a"); (*argv)[i++] = dss_strdup("--delete"); @@ -1392,7 +1418,7 @@ static void create_rsync_argv(char ***argv, int64_t *num) DSS_NOTICE_LOG(("adding --checksum to rsync options\n")); (*argv)[i++] = dss_strdup("--checksum"); } - for (j = 0; j < N; j++) + for (j = 0; j < OPT_GIVEN(DSS, RSYNC_OPTION); j++) (*argv)[i++] = dss_strdup(lls_string_val(j, OPT_RESULT(DSS, RSYNC_OPTION))); if (name_of_reference_snapshot) { @@ -1402,14 +1428,34 @@ static void create_rsync_argv(char ***argv, int64_t *num) } else DSS_INFO_LOG(("no suitable reference snapshot found\n")); logname = dss_logname(); - if (use_rsync_locally(logname)) - (*argv)[i++] = dss_strdup(OPT_STRING_VAL(DSS, SOURCE_DIR)); - else - (*argv)[i++] = make_message("%s@%s:%s/", - OPT_GIVEN(DSS, REMOTE_USER)? - OPT_STRING_VAL(DSS, REMOTE_USER) : logname, - OPT_STRING_VAL(DSS, REMOTE_HOST), - OPT_STRING_VAL(DSS, SOURCE_DIR)); + if (use_rsync_locally(logname)) { + for (j = 0; j < OPT_GIVEN(DSS, SOURCE_DIR); j++) + (*argv)[i++] = dss_strdup(lls_string_val(j, + OPT_RESULT(DSS, SOURCE_DIR))); + } else { + /* + * dss-1.0 and earlier did not support multiple source + * directories. These versions appended a slash to the end of + * the source directory to make sure that only the contents of + * the single source directory, but not the directory itself, + * are copied to the destination. For multiple source + * directories, however, this is not a good idea because the + * source directories may well contain identical file names, + * which would then be copied to the same location on the + * destination, overwriting each other. Moreover, we want the + * directory on the destination match the source. To preserve + * the old behaviour, we thus have to special-case N=1. + */ + for (j = 0; j < OPT_GIVEN(DSS, SOURCE_DIR); j++) { + (*argv)[i++] = make_message("%s@%s:%s%s", + OPT_GIVEN(DSS, REMOTE_USER)? + OPT_STRING_VAL(DSS, REMOTE_USER) : logname, + OPT_STRING_VAL(DSS, REMOTE_HOST), + lls_string_val(j, OPT_RESULT(DSS, SOURCE_DIR)), + OPT_GIVEN(DSS, SOURCE_DIR) == 1? "/" : "" + ); + } + } free(logname); *num = get_current_time(); (*argv)[i++] = incomplete_name(*num); @@ -1433,6 +1479,7 @@ static int create_snapshot(char **argv) { int ret; + assert(argv); ret = rename_resume_snap(current_snapshot_creation_time); if (ret < 0) return ret; @@ -1526,15 +1573,18 @@ out: static void exit_hook(int exit_code) { - const char *argv[3]; pid_t pid; - - argv[0] = OPT_STRING_VAL(DSS, EXIT_HOOK); - argv[1] = dss_strerror(-exit_code); - argv[2] = NULL; - - DSS_NOTICE_LOG(("executing %s %s\n", argv[0], argv[1])); - dss_exec(&pid, argv[0], (char **)argv); + char **argv, *tmp = dss_strdup(OPT_STRING_VAL(DSS, EXIT_HOOK)); + unsigned n = split_args(tmp, &argv); + + n++; + argv = dss_realloc(argv, (n + 1) * sizeof(char *)); + argv[n - 1] = dss_strdup(dss_strerror(-exit_code)); + argv[n] = NULL; + dss_exec(&pid, argv[0], argv); + free(argv[n - 1]); + free(argv); + free(tmp); } static void lock_dss_or_die(void) @@ -1566,12 +1616,23 @@ static int com_run(void) DSS_ERROR_LOG(("pid %d\n", (int)pid)); return -E_ALREADY_RUNNING; } + /* + * Order is important here: Since daemon_init() forks, it would drop + * the lock if it had been acquired already. Changing the cwd before + * grabbing the lock causes stat(2) to fail in case a relative config + * file path was given, which results in a different key ID for + * locking. Therefore we must first daemonize, then lock, then change + * the cwd. + */ if (OPT_GIVEN(RUN, DAEMON)) { fd = daemon_init(); daemonized = true; logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE)); } lock_dss_or_die(); + ret = change_to_dest_dir(); + if (ret < 0) + return ret; dump_dss_config("startup"); ret = install_sighandler(SIGHUP); if (ret < 0) @@ -1591,6 +1652,8 @@ static int com_run(void) ret = -E_BUG; kill_children(); exit_hook(ret); + while (wait(NULL) >= 0 || errno != ECHILD) + ; /* still have children to wait for */ return ret; } EXPORT_CMD_HANDLER(run); @@ -1601,55 +1664,58 @@ static int com_prune(void) struct snapshot_list sl; struct snapshot *victim; struct disk_space ds; - const char *why; + char *why; + bool try_hard; lock_dss_or_die(); - ret = get_disk_space(".", &ds); + ret = change_to_dest_dir(); if (ret < 0) return ret; - log_disk_space(&ds); + switch (OPT_UINT32_VAL(PRUNE, DISK_SPACE)) { + case FDS_LOW: try_hard = true; break; + case FDS_HIGH: try_hard = false; break; + default: + ret = get_disk_space(".", &ds); + if (ret < 0) + return ret; + log_disk_space(&ds); + try_hard = disk_space_low(&ds); + } dss_get_snapshot_list(&sl); - why = "outdated"; - victim = find_outdated_snapshot(&sl); - if (victim) - goto rm; - why = "redundant"; - victim = find_redundant_snapshot(&sl); - if (victim) - goto rm; - ret = 0; - goto out; -rm: + victim = find_removable_snapshot(&sl, try_hard, &why); + if (!victim) { + dss_msg("nothing to prune\n"); + ret = 0; + goto free_sl; + } if (OPT_GIVEN(DSS, DRY_RUN)) { - dss_msg("%s snapshot %s (interval = %i)\n", + dss_msg("picking %s snapshot %s (interval = %i)\n", why, victim->name, victim->interval); ret = 0; - goto out; + goto free_why; } pre_remove_hook(victim, why); if (snapshot_removal_status == HS_PRE_RUNNING) { ret = wait_for_remove_process(); if (ret < 0) - goto out; + goto free_why; + ret = -E_HOOK_FAILED; if (snapshot_removal_status != HS_PRE_SUCCESS) - goto out; + goto free_why; } ret = exec_rm(); if (ret < 0) - goto out; + goto free_why; ret = wait_for_remove_process(); if (ret < 0) - goto out; - if (snapshot_removal_status != HS_SUCCESS) - goto out; + goto free_why; + assert(snapshot_removal_status == HS_SUCCESS); post_remove_hook(); - if (snapshot_removal_status != HS_POST_RUNNING) - goto out; + assert(snapshot_removal_status == HS_POST_RUNNING); ret = wait_for_remove_process(); - if (ret < 0) - goto out; - ret = 1; -out: +free_why: + free(why); +free_sl: free_snapshot_list(&sl); return ret; } @@ -1661,6 +1727,9 @@ static int com_create(void) char **rsync_argv; lock_dss_or_die(); + ret = change_to_dest_dir(); + if (ret < 0) + return ret; if (OPT_GIVEN(DSS, DRY_RUN)) { int i; char *msg = NULL; @@ -1706,11 +1775,14 @@ EXPORT_CMD_HANDLER(create); static int com_ls(void) { - int i; + int i, ret; struct snapshot_list sl; struct snapshot *s; int64_t now = get_current_time(); + ret = change_to_dest_dir(); + if (ret < 0) + return ret; dss_get_snapshot_list(&sl); FOR_EACH_SNAPSHOT(s, i, &sl) { int64_t d; @@ -1757,7 +1829,7 @@ static void handle_version_and_help(void) else if (OPT_GIVEN(DSS, HELP)) txt = lls_short_help(CMD_PTR(DSS)); else if (OPT_GIVEN(DSS, VERSION)) - txt = dss_strdup(VERSION_STRING); + txt = make_message("%s\n", VERSION_STRING); else return; printf("%s", txt);