+ FD_ZERO(&rfds);
+ FD_SET(signal_pipe, &rfds);
+ ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
+ if (ret < 0)
+ break;
+ ret = next_signal();
+ if (!ret)
+ continue;
+ if (ret == SIGCHLD) {
+ ret = waitpid(pid, status, 0);
+ if (ret >= 0)
+ break;
+ if (errno != EINTR) { /* error */
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ break;
+ }
+ }
+ /* SIGINT or SIGTERM */
+ dss_kill(pid, SIGTERM, "killing child process");
+ }
+ if (ret < 0)
+ DSS_ERROR_LOG(("failed to wait for process %d\n", (int)pid));
+ else
+ log_termination_msg(pid, *status);
+ return ret;
+}
+
+static void handle_pre_remove_exit(int status)
+{
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ snapshot_removal_status = HS_READY;
+ gettimeofday(&next_removal_check, NULL);
+ next_removal_check.tv_sec += 60;
+ return;
+ }
+ snapshot_removal_status = HS_PRE_SUCCESS;
+}
+
+static int handle_rm_exit(int status)
+{
+ if (!WIFEXITED(status)) {
+ snapshot_removal_status = HS_READY;
+ return -E_INVOLUNTARY_EXIT;
+ }
+ if (WEXITSTATUS(status)) {
+ snapshot_removal_status = HS_READY;
+ return -E_BAD_EXIT_CODE;
+ }
+ snapshot_removal_status = HS_SUCCESS;
+ return 1;
+}
+
+static void handle_post_remove_exit(void)
+{
+ snapshot_removal_status = HS_READY;
+}
+
+static int handle_remove_exit(int status)
+{
+ int ret;
+ struct snapshot *s = snapshot_currently_being_removed;
+
+ assert(s);
+ switch (snapshot_removal_status) {
+ case HS_PRE_RUNNING:
+ handle_pre_remove_exit(status);
+ ret = 1;
+ break;
+ case HS_RUNNING:
+ ret = handle_rm_exit(status);
+ break;
+ case HS_POST_RUNNING:
+ handle_post_remove_exit();
+ ret = 1;
+ break;
+ default:
+ ret = -E_BUG;
+ }
+ if (snapshot_removal_status == HS_READY) {
+ free(s->name);
+ free(s);
+ snapshot_currently_being_removed = NULL;
+ }
+ remove_pid = 0;
+ return ret;
+}
+
+static int wait_for_remove_process(void)
+{
+ int status, ret;
+
+ assert(remove_pid);
+ assert(
+ snapshot_removal_status == HS_PRE_RUNNING ||
+ snapshot_removal_status == HS_RUNNING ||
+ snapshot_removal_status == HS_POST_RUNNING
+ );
+ ret = wait_for_process(remove_pid, &status);
+ if (ret < 0)
+ return ret;
+ return handle_remove_exit(status);
+}
+
+static int handle_rsync_exit(int status)
+{
+ int es, ret;
+
+ if (!WIFEXITED(status)) {
+ DSS_ERROR_LOG(("rsync process %d died involuntary\n", (int)create_pid));
+ ret = -E_INVOLUNTARY_EXIT;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ es = WEXITSTATUS(status);
+ /*
+ * Restart rsync on non-fatal errors:
+ * 24: Partial transfer due to vanished source files
+ */
+ if (es != 0 && es != 24) {
+ DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
+ es, ++num_consecutive_rsync_errors));
+ if (!logfile) { /* called by com_run() */
+ ret = -E_BAD_EXIT_CODE;
+ goto out;
+ }
+ if (num_consecutive_rsync_errors >
+ OPT_UINT32_VAL(RUN, MAX_RSYNC_ERRORS)) {
+ ret = -E_TOO_MANY_RSYNC_ERRORS;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ DSS_WARNING_LOG(("restarting rsync process\n"));
+ snapshot_creation_status = HS_NEEDS_RESTART;
+ next_snapshot_time = get_current_time() + 60;
+ ret = 1;
+ goto out;
+ }
+ num_consecutive_rsync_errors = 0;
+ ret = rename_incomplete_snapshot(current_snapshot_creation_time);
+ if (ret < 0)
+ goto out;
+ snapshot_creation_status = HS_SUCCESS;
+ free(name_of_reference_snapshot);
+ name_of_reference_snapshot = NULL;
+out:
+ create_process_stopped = 0;
+ return ret;
+}
+
+static int handle_pre_create_hook_exit(int status)
+{
+ int es, ret;
+ static int warn_count;
+
+ if (!WIFEXITED(status)) {
+ snapshot_creation_status = HS_READY;
+ ret = -E_INVOLUNTARY_EXIT;
+ goto out;
+ }
+ es = WEXITSTATUS(status);
+ if (es) {
+ if (!warn_count--) {
+ DSS_NOTICE_LOG(("pre_create_hook %s returned %d\n",
+ OPT_STRING_VAL(DSS, PRE_CREATE_HOOK), es));
+ DSS_NOTICE_LOG(("deferring snapshot creation...\n"));
+ warn_count = 60; /* warn only once per hour */
+ }
+ next_snapshot_time = get_current_time() + 60;
+ snapshot_creation_status = HS_READY;
+ ret = 0;
+ goto out;
+ }
+ warn_count = 0;
+ snapshot_creation_status = HS_PRE_SUCCESS;
+ ret = 1;
+out:
+ return ret;
+}
+
+static int handle_sigchld(void)
+{
+ pid_t pid;
+ int status, ret = reap_child(&pid, &status);
+
+ if (ret <= 0)
+ return ret;
+
+ if (pid == create_pid) {
+ switch (snapshot_creation_status) {
+ case HS_PRE_RUNNING:
+ ret = handle_pre_create_hook_exit(status);
+ break;
+ case HS_RUNNING:
+ ret = handle_rsync_exit(status);
+ break;
+ case HS_POST_RUNNING:
+ snapshot_creation_status = HS_READY;
+ ret = 1;
+ break;
+ default:
+ DSS_EMERG_LOG(("BUG: create can't die in status %d\n",
+ snapshot_creation_status));
+ return -E_BUG;
+ }
+ create_pid = 0;
+ return ret;
+ }
+ if (pid == remove_pid) {
+ ret = handle_remove_exit(status);
+ if (ret < 0)
+ return ret;
+ return ret;
+ }
+ DSS_EMERG_LOG(("BUG: unknown process %d died\n", (int)pid));
+ return -E_BUG;
+}
+
+/* also checks if . is a mountpoint, if --mountpoint was given */
+static int change_to_dest_dir(void)
+{
+ int ret;
+ const char *dd = OPT_STRING_VAL(DSS, DEST_DIR);
+ struct stat dot, dotdot;
+
+ DSS_INFO_LOG(("changing cwd to %s\n", dd));
+ if (chdir(dd) < 0) {
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ DSS_ERROR_LOG(("could not change cwd to %s\n", dd));
+ return ret;
+ }
+ if (!OPT_GIVEN(DSS, MOUNTPOINT))
+ return 0;
+ if (stat(".", &dot) < 0) {
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ DSS_ERROR_LOG(("could not stat .\n"));
+ return ret;
+ }
+ if (stat("..", &dotdot) < 0) {
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ DSS_ERROR_LOG(("could not stat ..\n"));
+ return ret;
+ }
+ if (dot.st_dev == dotdot.st_dev && dot.st_ino != dotdot.st_ino) {
+ DSS_ERROR_LOG(("mountpoint check failed for %s\n", dd));
+ return -E_MOUNTPOINT;
+ }
+ return 1;
+}
+
+static int check_config(void)
+{
+ int ret;
+ uint32_t unit_interval = OPT_UINT32_VAL(DSS, UNIT_INTERVAL);
+ uint32_t num_intervals = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
+
+ if (unit_interval == 0) {
+ DSS_ERROR_LOG(("bad unit interval: %i\n", unit_interval));
+ return -E_INVALID_NUMBER;
+ }
+ DSS_DEBUG_LOG(("unit interval: %i day(s)\n", unit_interval));
+
+ if (num_intervals == 0 || num_intervals > 30) {
+ DSS_ERROR_LOG(("bad number of intervals: %i\n", num_intervals));
+ return -E_INVALID_NUMBER;
+ }
+ if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE))
+ if (!OPT_GIVEN(DSS, SOURCE_DIR)) {
+ DSS_ERROR_LOG(("--source-dir required\n"));
+ return -E_SYNTAX;
+ }
+ if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE)
+ || subcmd == CMD_PTR(LS) || subcmd == CMD_PTR(PRUNE)) {
+ if (!OPT_GIVEN(DSS, DEST_DIR)) {
+ DSS_ERROR_LOG(("--dest-dir required\n"));
+ return -E_SYNTAX;
+ }
+ ret = change_to_dest_dir();
+ if (ret < 0)
+ return ret;
+ }
+ DSS_DEBUG_LOG(("number of intervals: %i\n", num_intervals));
+ return 1;
+}
+
+static int lopsub_error(int lopsub_ret, char **errctx)
+{
+ const char *msg = lls_strerror(-lopsub_ret);
+ if (*errctx)
+ DSS_ERROR_LOG(("%s: %s\n", *errctx, msg));
+ else
+ DSS_ERROR_LOG(("%s\n", msg));
+ free(*errctx);
+ *errctx = NULL;
+ return -E_LOPSUB;
+}
+
+static int parse_config_file(bool sighup, const struct lls_command *cmd)
+{
+ int ret, fd = -1;
+ char *config_file = get_config_file_name();
+ struct stat statbuf;
+ void *map;
+ size_t sz;
+ int cf_argc;
+ char **cf_argv, *errctx = NULL;
+ struct lls_parse_result *cf_lpr, *merged_lpr, *clpr;
+ const char *subcmd_name;
+
+ ret = open(config_file, O_RDONLY);
+ if (ret < 0) {
+ if (errno != ENOENT || OPT_GIVEN(DSS, CONFIG_FILE)) {
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ DSS_ERROR_LOG(("config file %s can not be opened\n",
+ config_file));
+ goto out;
+ }
+ /* no config file -- nothing to do */
+ ret = 0;
+ goto success;
+ }
+ fd = ret;
+ ret = fstat(fd, &statbuf);
+ if (ret < 0) {
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ DSS_ERROR_LOG(("failed to stat config file %s\n", config_file));
+ goto close_fd;
+ }
+ sz = statbuf.st_size;
+ if (sz == 0) { /* config file is empty -- nothing to do */
+ ret = 0;
+ goto success;
+ }
+ map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (map == MAP_FAILED) {
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ DSS_ERROR_LOG(("failed to mmap config file %s\n",
+ config_file));
+ goto close_fd;
+ }
+ if (cmd == CMD_PTR(DSS))
+ subcmd_name = NULL;
+ else
+ subcmd_name = lls_command_name(cmd);
+ ret = lls_convert_config(map, sz, subcmd_name, &cf_argv, &errctx);
+ munmap(map, sz);
+ if (ret < 0) {
+ DSS_ERROR_LOG(("failed to convert config file %s\n",
+ config_file));
+ ret = lopsub_error(ret, &errctx);
+ goto close_fd;
+ }
+ cf_argc = ret;
+ ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
+ lls_free_argv(cf_argv);
+ if (ret < 0) {
+ ret = lopsub_error(ret, &errctx);
+ goto close_fd;
+ }
+ clpr = cmd == CMD_PTR(DSS)? cmdline_lpr : cmdline_sublpr;
+ if (sighup) /* config file overrides command line */
+ ret = lls_merge(cf_lpr, clpr, cmd, &merged_lpr, &errctx);
+ else /* command line options overrride config file options */
+ ret = lls_merge(clpr, cf_lpr, cmd, &merged_lpr, &errctx);
+ lls_free_parse_result(cf_lpr, cmd);
+ if (ret < 0) {
+ ret = lopsub_error(ret, &errctx);
+ goto close_fd;
+ }
+ ret = 1;
+success:
+ assert(ret >= 0);
+ DSS_DEBUG_LOG(("loglevel: %d\n", OPT_UINT32_VAL(DSS, LOGLEVEL)));
+ if (cmd != CMD_PTR(DSS)) {
+ if (ret > 0) {
+ if (sublpr != cmdline_sublpr)
+ lls_free_parse_result(sublpr, cmd);
+ sublpr = merged_lpr;
+ } else
+ sublpr = cmdline_sublpr;
+ } else {
+ if (ret > 0) {
+ if (lpr != cmdline_lpr)
+ lls_free_parse_result(lpr, cmd);
+ lpr = merged_lpr;
+ } else
+ lpr = cmdline_lpr;
+ }
+close_fd:
+ if (fd >= 0)
+ close(fd);
+out:
+ free(config_file);
+ if (ret < 0)
+ DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
+ return ret;
+}
+
+static int handle_sighup(void)
+{
+ int ret;
+
+ DSS_NOTICE_LOG(("SIGHUP, re-reading config\n"));
+ dump_dss_config("old");
+ ret = parse_config_file(true /* SIGHUP */, CMD_PTR(DSS));
+ if (ret < 0)
+ return ret;
+ ret = parse_config_file(true /* SIGHUP */, CMD_PTR(RUN));
+ if (ret < 0)
+ return ret;
+ ret = check_config();
+ if (ret < 0)
+ return ret;
+ close_log(logfile);
+ logfile = NULL;
+ if (OPT_GIVEN(RUN, DAEMON) || daemonized) {
+ logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
+ log_welcome(OPT_UINT32_VAL(DSS, LOGLEVEL));
+ daemonized = true;
+ }
+ dump_dss_config("reloaded");
+ invalidate_next_snapshot_time();
+ return 1;
+}
+
+static void kill_children(void)
+{
+ restart_create_process();
+ dss_kill(create_pid, SIGTERM, NULL);
+ dss_kill(remove_pid, SIGTERM, NULL);
+}
+
+static int handle_signal(void)
+{
+ int sig, ret = next_signal();
+
+ if (ret <= 0)
+ goto out;
+ sig = ret;
+ switch (sig) {
+ case SIGINT:
+ case SIGTERM:
+ kill_children();
+ ret = -E_SIGNAL;
+ break;
+ case SIGHUP:
+ ret = handle_sighup();
+ break;
+ case SIGCHLD:
+ ret = handle_sigchld();
+ break;
+ }
+out:
+ if (ret < 0)
+ DSS_ERROR_LOG(("%s\n", dss_strerror(-ret)));
+ return ret;
+}
+
+/*
+ * We can not use rsync locally if the local user is different from the remote
+ * user or if the src dir is not on the local host (or both).
+ */
+static int use_rsync_locally(char *logname)
+{
+ const char *h = OPT_STRING_VAL(DSS, REMOTE_HOST);
+
+ if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
+ return 0;
+ if (OPT_GIVEN(DSS, REMOTE_USER) &&
+ strcmp(OPT_STRING_VAL(DSS, REMOTE_USER), logname))
+ return 0;
+ return 1;
+}
+
+static int rename_resume_snap(int64_t creation_time)
+{
+ struct snapshot_list sl;
+ struct snapshot *s = NULL;
+ char *new_name = incomplete_name(creation_time);
+ int ret;
+ const char *why;
+
+ sl.num_snapshots = 0;
+
+ ret = 0;
+ dss_get_snapshot_list(&sl);
+ /*
+ * Snapshot recycling: We first look at the newest snapshot. If this
+ * snapshot happens to be incomplete, the last rsync process was
+ * aborted and we reuse this one. Otherwise we look at snapshots which
+ * could be removed (outdated and redundant snapshots) as candidates
+ * for recycling. If no outdated/redundant snapshot exists, we check if
+ * there is an orphaned snapshot, which likely is useless anyway.
+ *
+ * Only if no existing snapshot is suitable for recycling, we bite the
+ * bullet and create a new one.
+ */
+ s = get_newest_snapshot(&sl);
+ if (!s) /* no snapshots at all */
+ goto out;
+ /* re-use last snapshot if it is incomplete */
+ why = "aborted";
+ if ((s->flags & SS_COMPLETE) == 0)
+ goto out;
+ why = "outdated";
+ s = find_outdated_snapshot(&sl);
+ if (s)
+ goto out;
+ why = "redundant";
+ s = find_redundant_snapshot(&sl);
+ if (s)
+ goto out;
+ why = "orphaned";
+ s = find_orphaned_snapshot(&sl);
+out:
+ if (s) {
+ DSS_NOTICE_LOG(("recycling %s snapshot %s\n", why, s->name));
+ ret = dss_rename(s->name, new_name);
+ }
+ if (ret >= 0)
+ DSS_NOTICE_LOG(("creating %s\n", new_name));
+ free(new_name);
+ free_snapshot_list(&sl);
+ return ret;
+}
+
+static void create_rsync_argv(char ***argv, int64_t *num)
+{
+ char *logname;
+ int i = 0, j, N = OPT_GIVEN(DSS, RSYNC_OPTION);
+ struct snapshot_list sl;
+ static bool seeded;
+
+ dss_get_snapshot_list(&sl);
+ assert(!name_of_reference_snapshot);
+ name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
+ free_snapshot_list(&sl);
+
+ *argv = dss_malloc((15 + N) * sizeof(char *));
+ (*argv)[i++] = dss_strdup("rsync");
+ (*argv)[i++] = dss_strdup("-a");
+ (*argv)[i++] = dss_strdup("--delete");
+ if (!seeded) {
+ srandom((unsigned)time(NULL)); /* no need to be fancy here */
+ seeded = true;
+ }
+ if (1000 * (random() / (RAND_MAX + 1.0)) < OPT_UINT32_VAL(DSS, CHECKSUM)) {
+ DSS_NOTICE_LOG(("adding --checksum to rsync options\n"));
+ (*argv)[i++] = dss_strdup("--checksum");
+ }
+ for (j = 0; j < N; j++)
+ (*argv)[i++] = dss_strdup(lls_string_val(j,
+ OPT_RESULT(DSS, RSYNC_OPTION)));
+ if (name_of_reference_snapshot) {
+ DSS_INFO_LOG(("using %s as reference\n", name_of_reference_snapshot));
+ (*argv)[i++] = make_message("--link-dest=../%s",
+ name_of_reference_snapshot);
+ } else
+ DSS_INFO_LOG(("no suitable reference snapshot found\n"));
+ logname = dss_logname();
+ if (use_rsync_locally(logname))
+ (*argv)[i++] = dss_strdup(OPT_STRING_VAL(DSS, SOURCE_DIR));
+ else
+ (*argv)[i++] = make_message("%s@%s:%s/",
+ OPT_GIVEN(DSS, REMOTE_USER)?
+ OPT_STRING_VAL(DSS, REMOTE_USER) : logname,
+ OPT_STRING_VAL(DSS, REMOTE_HOST),
+ OPT_STRING_VAL(DSS, SOURCE_DIR));
+ free(logname);
+ *num = get_current_time();
+ (*argv)[i++] = incomplete_name(*num);
+ (*argv)[i++] = NULL;
+ for (j = 0; j < i; j++)
+ DSS_DEBUG_LOG(("argv[%d] = %s\n", j, (*argv)[j]));
+}
+
+static void free_rsync_argv(char **argv)
+{
+ int i;
+
+ if (!argv)
+ return;
+ for (i = 0; argv[i]; i++)
+ free(argv[i]);
+ free(argv);
+}
+
+static int create_snapshot(char **argv)
+{
+ int ret;
+
+ ret = rename_resume_snap(current_snapshot_creation_time);
+ if (ret < 0)
+ return ret;
+ dss_exec(&create_pid, argv[0], argv);
+ snapshot_creation_status = HS_RUNNING;
+ return ret;
+}
+
+static int select_loop(void)
+{
+ int ret;
+ /* check every 60 seconds for free disk space */
+ struct timeval tv;
+ char **rsync_argv = NULL;
+
+ for (;;) {
+ fd_set rfds;
+ struct timeval *tvp;
+
+ if (remove_pid)
+ tvp = NULL; /* sleep until rm hook/process dies */
+ else { /* sleep one minute */
+ tv.tv_sec = 60;
+ tv.tv_usec = 0;
+ tvp = &tv;
+ }
+ FD_ZERO(&rfds);
+ FD_SET(signal_pipe, &rfds);
+ ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
+ if (ret < 0)
+ goto out;
+ if (FD_ISSET(signal_pipe, &rfds)) {
+ ret = handle_signal();
+ if (ret < 0)
+ goto out;
+ }
+ if (remove_pid)
+ continue;
+ if (snapshot_removal_status == HS_PRE_SUCCESS) {
+ ret = exec_rm();
+ if (ret < 0)
+ goto out;
+ continue;
+ }
+ if (snapshot_removal_status == HS_SUCCESS) {
+ post_remove_hook();
+ continue;
+ }
+ ret = try_to_free_disk_space();
+ if (ret < 0)
+ goto out;
+ if (snapshot_removal_status != HS_READY) {
+ stop_create_process();
+ continue;
+ }
+ restart_create_process();
+ switch (snapshot_creation_status) {
+ case HS_READY:
+ if (!next_snapshot_is_due())
+ continue;
+ pre_create_hook();
+ continue;
+ case HS_PRE_RUNNING:
+ case HS_RUNNING:
+ case HS_POST_RUNNING:
+ continue;
+ case HS_PRE_SUCCESS:
+ if (!name_of_reference_snapshot) {
+ free_rsync_argv(rsync_argv);
+ create_rsync_argv(&rsync_argv, ¤t_snapshot_creation_time);
+ }
+ ret = create_snapshot(rsync_argv);
+ if (ret < 0)
+ goto out;
+ continue;
+ case HS_NEEDS_RESTART:
+ if (!next_snapshot_is_due())
+ continue;
+ ret = create_snapshot(rsync_argv);
+ if (ret < 0)
+ goto out;
+ continue;
+ case HS_SUCCESS:
+ post_create_hook();
+ continue;
+ }
+ }
+out:
+ return ret;
+}
+
+static void exit_hook(int exit_code)
+{
+ pid_t pid;
+ char **argv, *tmp = dss_strdup(OPT_STRING_VAL(DSS, EXIT_HOOK));
+ unsigned n = split_args(tmp, &argv, " \t");
+
+ n++;
+ argv = dss_realloc(argv, (n + 1) * sizeof(char *));
+ argv[n - 1] = dss_strdup(dss_strerror(-exit_code));
+ argv[n] = NULL;
+ dss_exec(&pid, argv[0], argv);
+ free(argv[n - 1]);
+ free(argv);
+ free(tmp);
+}
+
+static void lock_dss_or_die(void)
+{
+ char *config_file = get_config_file_name();
+ int ret = lock_dss(config_file);
+
+ free(config_file);
+ if (ret < 0) {
+ DSS_EMERG_LOG(("failed to lock: %s\n", dss_strerror(-ret)));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static int com_run(void)
+{
+ int ret, fd = -1;
+ char *config_file;