+ const char *signame, *process_name;
+
+ if (pid == 0)
+ return;
+ switch (sig) {
+ case SIGTERM: signame = "TERM"; break;
+ case SIGSTOP: signame = "STOP"; break;
+ case SIGCONT: signame = "CONT"; break;
+ default: signame = "????";
+ }
+
+ if (pid == create_pid)
+ process_name = "create";
+ else if (pid == remove_pid)
+ process_name = "remove";
+ else process_name = "??????";
+
+ if (msg)
+ DSS_INFO_LOG("%s\n", msg);
+ DSS_DEBUG_LOG("sending signal %d (%s) to pid %d (%s process)\n",
+ sig, signame, (int)pid, process_name);
+ if (kill(pid, sig) >= 0)
+ return;
+ DSS_INFO_LOG("failed to send signal %d (%s) to pid %d (%s process)\n",
+ sig, signame, (int)pid, process_name);
+}
+
+static void stop_create_process(void)
+{
+ if (create_process_stopped)
+ return;
+ dss_kill(create_pid, SIGSTOP, "suspending create process");
+ create_process_stopped = 1;
+}
+
+static void restart_create_process(void)
+{
+ if (!create_process_stopped)
+ return;
+ dss_kill(create_pid, SIGCONT, "resuming create process");
+ create_process_stopped = 0;
+}
+
+/**
+ * Print a log message about the exit status of a child.
+ */
+static void log_termination_msg(pid_t pid, int status)
+{
+ if (WIFEXITED(status))
+ DSS_INFO_LOG("child %i exited. Exit status: %i\n", (int)pid,
+ WEXITSTATUS(status));
+ else if (WIFSIGNALED(status))
+ DSS_NOTICE_LOG("child %i was killed by signal %i\n", (int)pid,
+ WTERMSIG(status));
+ else
+ DSS_WARNING_LOG("child %i terminated abormally\n", (int)pid);
+}
+
+static int wait_for_process(pid_t pid, int *status)
+{
+ int ret;
+
+ DSS_DEBUG_LOG("Waiting for process %d to terminate\n", (int)pid);
+ for (;;) {
+ fd_set rfds;
+
+ FD_ZERO(&rfds);
+ FD_SET(signal_pipe, &rfds);
+ ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
+ if (ret < 0)
+ break;
+ ret = next_signal();
+ if (!ret)
+ continue;
+ if (ret == SIGCHLD) {
+ ret = waitpid(pid, status, 0);
+ if (ret >= 0)
+ break;
+ if (errno != EINTR) { /* error */
+ ret = -ERRNO_TO_DSS_ERROR(errno);
+ break;
+ }
+ }
+ /* SIGINT or SIGTERM */
+ dss_kill(pid, SIGTERM, "killing child process");
+ }
+ if (ret < 0)
+ DSS_ERROR_LOG("failed to wait for process %d\n", (int)pid);
+ else
+ log_termination_msg(pid, *status);
+ return ret;
+}
+
+static void handle_pre_remove_exit(int status)
+{
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ snapshot_removal_status = HS_READY;
+ gettimeofday(&next_removal_check, NULL);
+ next_removal_check.tv_sec += 60;
+ return;
+ }
+ snapshot_removal_status = HS_PRE_SUCCESS;
+}
+
+static int handle_rm_exit(int status)
+{
+ if (!WIFEXITED(status)) {
+ snapshot_removal_status = HS_READY;
+ return -E_INVOLUNTARY_EXIT;
+ }
+ if (WEXITSTATUS(status)) {
+ snapshot_removal_status = HS_READY;
+ return -E_BAD_EXIT_CODE;
+ }
+ snapshot_removal_status = HS_SUCCESS;
+ return 1;
+}
+
+static void handle_post_remove_exit(void)
+{
+ snapshot_removal_status = HS_READY;
+}
+
+static int handle_remove_exit(int status)
+{
+ int ret;
+ struct snapshot *s = snapshot_currently_being_removed;
+
+ assert(s);
+ switch (snapshot_removal_status) {
+ case HS_PRE_RUNNING:
+ handle_pre_remove_exit(status);
+ ret = 1;
+ break;
+ case HS_RUNNING:
+ ret = handle_rm_exit(status);
+ break;
+ case HS_POST_RUNNING:
+ handle_post_remove_exit();
+ ret = 1;
+ break;
+ default:
+ ret = -E_BUG;
+ }
+ if (snapshot_removal_status == HS_READY) {
+ free(s->name);
+ free(s);
+ snapshot_currently_being_removed = NULL;
+ }
+ remove_pid = 0;
+ return ret;
+}
+
+static int wait_for_remove_process(void)
+{
+ int status, ret;
+
+ assert(remove_pid);
+ assert(
+ snapshot_removal_status == HS_PRE_RUNNING ||
+ snapshot_removal_status == HS_RUNNING ||
+ snapshot_removal_status == HS_POST_RUNNING
+ );
+ ret = wait_for_process(remove_pid, &status);
+ if (ret < 0)
+ return ret;
+ return handle_remove_exit(status);
+}
+
+static int handle_rsync_exit(int status)
+{
+ int es, ret;
+
+ if (!WIFEXITED(status)) {
+ DSS_ERROR_LOG("rsync process %d died involuntary\n", (int)create_pid);
+ ret = -E_INVOLUNTARY_EXIT;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ es = WEXITSTATUS(status);
+ /*
+ * Restart rsync on non-fatal errors:
+ * 12: Error in rsync protocol data stream
+ * 13: Errors with program diagnostics
+ */
+ if (es == 12 || es == 13) {
+ DSS_WARNING_LOG("rsync process %d returned %d -- restarting\n",
+ (int)create_pid, es);
+ snapshot_creation_status = HS_NEEDS_RESTART;
+ next_snapshot_time = get_current_time() + 60;
+ ret = 1;
+ goto out;
+ }
+ if (es != 0 && es != 23 && es != 24) {
+ DSS_ERROR_LOG("rsync process %d returned %d\n", (int)create_pid, es);
+ ret = -E_BAD_EXIT_CODE;
+ snapshot_creation_status = HS_READY;
+ goto out;
+ }
+ ret = rename_incomplete_snapshot(current_snapshot_creation_time);
+ if (ret < 0)
+ goto out;
+ snapshot_creation_status = HS_SUCCESS;
+ free(name_of_reference_snapshot);
+ name_of_reference_snapshot = NULL;
+out:
+ create_process_stopped = 0;
+ return ret;
+}
+
+static int handle_pre_create_hook_exit(int status)
+{
+ int es, ret;
+ static int warn_count;
+
+ if (!WIFEXITED(status)) {
+ snapshot_creation_status = HS_READY;
+ ret = -E_INVOLUNTARY_EXIT;
+ goto out;
+ }
+ es = WEXITSTATUS(status);
+ if (es) {
+ if (!warn_count--) {
+ DSS_NOTICE_LOG("pre_create_hook %s returned %d\n",
+ conf.pre_create_hook_arg, es);
+ DSS_NOTICE_LOG("deferring snapshot creation...\n");
+ warn_count = 60; /* warn only once per hour */
+ }
+ next_snapshot_time = get_current_time() + 60;
+ snapshot_creation_status = HS_READY;
+ ret = 0;
+ goto out;
+ }
+ warn_count = 0;
+ snapshot_creation_status = HS_PRE_SUCCESS;
+ ret = 1;
+out:
+ return ret;
+}
+
+static int handle_sigchld(void)
+{
+ pid_t pid;
+ int status, ret = reap_child(&pid, &status);
+
+ if (ret <= 0)
+ return ret;
+
+ if (pid == create_pid) {
+ switch (snapshot_creation_status) {
+ case HS_PRE_RUNNING:
+ ret = handle_pre_create_hook_exit(status);
+ break;
+ case HS_RUNNING:
+ ret = handle_rsync_exit(status);
+ break;
+ case HS_POST_RUNNING:
+ snapshot_creation_status = HS_READY;
+ ret = 1;
+ break;
+ default:
+ DSS_EMERG_LOG("BUG: create can't die in status %d\n",
+ snapshot_creation_status);
+ return -E_BUG;
+ }
+ create_pid = 0;
+ return ret;
+ }
+ if (pid == remove_pid) {
+ ret = handle_remove_exit(status);
+ if (ret < 0)
+ return ret;
+ return ret;
+ }
+ DSS_EMERG_LOG("BUG: unknown process %d died\n", (int)pid);
+ return -E_BUG;
+}
+
+static int check_config(void)
+{
+ if (conf.unit_interval_arg <= 0) {
+ DSS_ERROR_LOG("bad unit interval: %i\n", conf.unit_interval_arg);
+ return -E_INVALID_NUMBER;
+ }
+ DSS_DEBUG_LOG("unit interval: %i day(s)\n", conf.unit_interval_arg);