]> git.tuebingen.mpg.de Git - dss.git/blobdiff - dss.c
Merge branch 'master' into rm_hook
[dss.git] / dss.c
diff --git a/dss.c b/dss.c
index 82cdc94abd157eb750f7f050bc494e86fa5ef95d..6dbe11f6ba7e5fd3541a0364228b6fc81562b793 100644 (file)
--- a/dss.c
+++ b/dss.c
@@ -40,24 +40,22 @@ static struct gengetopt_args_info conf;
 static FILE *logfile;
 /** The read end of the signal pipe */
 static int signal_pipe;
-/** Process id of current rsync process. */
-static pid_t rsync_pid;
-/** Whether the rsync process is currently stopped */
-static int rsync_stopped;
-/** Process id of current rm process. */
-static pid_t rm_pid;
+/** Process id of current pre-create-hook/rsync/post-create-hook process. */
+static pid_t create_pid;
+/** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
+static int create_process_stopped;
+/** Process id of current pre-remove/rm/post-remove process. */
+static pid_t remove_pid;
 /** When the next snapshot is due. */
 static struct timeval next_snapshot_time;
-/** The pid of the pre-create hook. */
-static pid_t pre_create_hook_pid;
-/** The pid of the post-create hook. */
-static pid_t post_create_hook_pid;
 /** Creation time of the snapshot currently being created. */
 static int64_t current_snapshot_creation_time;
 /** Needed by the post-create hook. */
 static char *path_to_last_complete_snapshot;
 /** \sa \ref snap.h for details. */
 static unsigned snapshot_creation_status;
+/** \sa \ref snap.h for details. */
+static unsigned snapshot_removal_status;
 
 
 DEFINE_DSS_ERRLIST;
@@ -192,18 +190,19 @@ out:
 }
 
 
-static int remove_snapshot(struct snapshot *s)
+static int remove_snapshot(struct snapshot *s, char *why)
 {
        int fds[3] = {0, 0, 0};
-       assert(!rm_pid);
+       assert(!remove_pid);
        char *new_name = being_deleted_name(s);
        int ret = dss_rename(s->name, new_name);
        char *argv[] = {"rm", "-rf", new_name, NULL};
 
        if (ret < 0)
                goto out;
-       DSS_NOTICE_LOG("removing %s (interval = %i)\n", s->name, s->interval);
-       ret = dss_exec(&rm_pid, argv[0], argv, fds);
+       DSS_NOTICE_LOG("removing %s snapshot %s (interval = %i)\n",
+               why, s->name, s->interval);
+       ret = dss_exec(&remove_pid, argv[0], argv, fds);
 out:
        free(new_name);
        return ret;
@@ -271,7 +270,7 @@ static int remove_redundant_snapshot(struct snapshot_list *sl)
                                victim->name, victim->interval);
                        continue;
                }
-               ret = remove_snapshot(victim);
+               ret = remove_snapshot(victim, "redundant");
                return ret < 0? ret : 1;
        }
        return 0;
@@ -294,7 +293,7 @@ static int remove_outdated_snapshot(struct snapshot_list *sl)
                                s->name, s->interval);
                        continue;
                }
-               ret = remove_snapshot(s);
+               ret = remove_snapshot(s, "outdated");
                if (ret < 0)
                        return ret;
                return 1;
@@ -311,7 +310,7 @@ static int remove_oldest_snapshot(struct snapshot_list *sl)
        DSS_INFO_LOG("oldest snapshot: %s\n", s->name);
        if (snapshot_is_being_created(s))
                return 0;
-       return remove_snapshot(s);
+       return remove_snapshot(s, "oldest");
 }
 
 static int rename_incomplete_snapshot(int64_t start)
@@ -370,8 +369,8 @@ static int pre_create_hook(void)
                snapshot_creation_status = SCS_PRE_HOOK_SUCCESS;
                return 0;
        }
-       DSS_NOTICE_LOG("executing %s\n", conf.pre_create_hook_arg);
-       ret = dss_exec_cmdline_pid(&pre_create_hook_pid,
+       DSS_DEBUG_LOG("executing %s\n", conf.pre_create_hook_arg);
+       ret = dss_exec_cmdline_pid(&create_pid,
                conf.pre_create_hook_arg, fds);
        if (ret < 0)
                return ret;
@@ -392,7 +391,7 @@ static int post_create_hook(void)
        cmd = make_message("%s %s/%s", conf.post_create_hook_arg,
                conf.dest_dir_arg, path_to_last_complete_snapshot);
        DSS_NOTICE_LOG("executing %s\n", cmd);
-       ret = dss_exec_cmdline_pid(&post_create_hook_pid, cmd, fds);
+       ret = dss_exec_cmdline_pid(&create_pid, cmd, fds);
        free(cmd);
        if (ret < 0)
                return ret;
@@ -408,23 +407,22 @@ static void kill_process(pid_t pid)
        kill(pid, SIGTERM);
 }
 
-static void stop_rsync_process(void)
+static void stop_create_process(void)
 {
-       if (!rsync_pid || rsync_stopped)
+       if (!create_pid || create_process_stopped)
                return;
-       kill(SIGSTOP, rsync_pid);
-       rsync_stopped = 1;
+       kill(SIGSTOP, create_pid);
+       create_process_stopped = 1;
 }
 
-static void restart_rsync_process(void)
+static void restart_create_process(void)
 {
-       if (!rsync_pid || !rsync_stopped)
+       if (!create_pid || !create_process_stopped)
                return;
-       kill (SIGCONT, rsync_pid);
-       rsync_stopped = 0;
+       kill (SIGCONT, create_pid);
+       create_process_stopped = 0;
 }
 
-
 /**
  * Print a log message about the exit status of a child.
  */
@@ -478,7 +476,7 @@ static int wait_for_process(pid_t pid, int *status)
 
 static int handle_rm_exit(int status)
 {
-       rm_pid = 0;
+       remove_pid = 0;
        if (!WIFEXITED(status))
                return -E_INVOLUNTARY_EXIT;
        if (WEXITSTATUS(status))
@@ -488,7 +486,7 @@ static int handle_rm_exit(int status)
 
 static int wait_for_rm_process(void)
 {
-       int status, ret = wait_for_process(rm_pid, &status);
+       int status, ret = wait_for_process(remove_pid, &status);
 
        if (ret < 0)
                return ret;
@@ -500,7 +498,7 @@ static int handle_rsync_exit(int status)
        int es, ret;
 
        if (!WIFEXITED(status)) {
-               DSS_ERROR_LOG("rsync process %d died involuntary\n", (int)rsync_pid);
+               DSS_ERROR_LOG("rsync process %d died involuntary\n", (int)create_pid);
                ret = -E_INVOLUNTARY_EXIT;
                snapshot_creation_status = SCS_READY;
                compute_next_snapshot_time();
@@ -509,7 +507,7 @@ static int handle_rsync_exit(int status)
        es = WEXITSTATUS(status);
        if (es == 13) { /* Errors with program diagnostics */
                DSS_WARNING_LOG("rsync process %d returned %d -- restarting\n",
-                       (int)rsync_pid, es);
+                       (int)create_pid, es);
                snapshot_creation_status = SCS_RSYNC_NEEDS_RESTART;
                gettimeofday(&next_snapshot_time, NULL);
                next_snapshot_time.tv_sec += 60;
@@ -517,7 +515,7 @@ static int handle_rsync_exit(int status)
                goto out;
        }
        if (es != 0 && es != 23 && es != 24) {
-               DSS_ERROR_LOG("rsync process %d returned %d\n", (int)rsync_pid, es);
+               DSS_ERROR_LOG("rsync process %d returned %d\n", (int)create_pid, es);
                ret = -E_BAD_EXIT_CODE;
                snapshot_creation_status = SCS_READY;
                compute_next_snapshot_time();
@@ -528,14 +526,15 @@ static int handle_rsync_exit(int status)
                goto out;
        snapshot_creation_status = SCS_RSYNC_SUCCESS;
 out:
-       rsync_pid = 0;
-       rsync_stopped = 0;
+       create_pid = 0;
+       create_process_stopped = 0;
        return ret;
 }
 
 static int handle_pre_create_hook_exit(int status)
 {
        int es, ret;
+       static int warn_count;
 
        if (!WIFEXITED(status)) {
                snapshot_creation_status = SCS_READY;
@@ -545,15 +544,22 @@ static int handle_pre_create_hook_exit(int status)
        }
        es = WEXITSTATUS(status);
        if (es) {
+               if (!warn_count--) {
+                       DSS_NOTICE_LOG("pre_create_hook %s returned %d\n",
+                               conf.pre_create_hook_arg, es);
+                       DSS_NOTICE_LOG("deferring snapshot creation...\n");
+                       warn_count = 60; /* warn only once per hour */
+               }
                snapshot_creation_status = SCS_READY;
                compute_next_snapshot_time();
-               ret = -E_BAD_EXIT_CODE;
+               ret = 0;
                goto out;
        }
+       warn_count = 0;
        snapshot_creation_status = SCS_PRE_HOOK_SUCCESS;
        ret = 1;
 out:
-       pre_create_hook_pid = 0;
+       create_pid = 0;
        return ret;
 }
 
@@ -564,17 +570,25 @@ static int handle_sigchld(void)
 
        if (ret <= 0)
                return ret;
-       if (pid == rsync_pid)
-               return handle_rsync_exit(status);
-       if (pid == rm_pid)
-               return handle_rm_exit(status);
-       if (pid == pre_create_hook_pid)
-               return handle_pre_create_hook_exit(status);
-       if (pid == post_create_hook_pid) {
-               snapshot_creation_status = SCS_READY;
-               compute_next_snapshot_time();
-               return 1;
+
+       if (pid == create_pid) {
+               switch (snapshot_creation_status) {
+               case SCS_PRE_HOOK_RUNNING:
+                       return handle_pre_create_hook_exit(status);
+               case SCS_RSYNC_RUNNING:
+                       return handle_rsync_exit(status);
+               case SCS_POST_HOOK_RUNNING:
+                       snapshot_creation_status = SCS_READY;
+                       compute_next_snapshot_time();
+                       return 1;
+               default:
+                       DSS_EMERG_LOG("BUG: create can't die in status %d\n",
+                               snapshot_creation_status);
+                       return -E_BUG;
+               }
        }
+       if (pid == remove_pid)
+               return handle_rm_exit(status);
        DSS_EMERG_LOG("BUG: unknown process %d died\n", (int)pid);
        return -E_BUG;
 }
@@ -695,9 +709,9 @@ static int handle_signal(void)
        switch (sig) {
        case SIGINT:
        case SIGTERM:
-               restart_rsync_process();
-               kill_process(rsync_pid);
-               kill_process(rm_pid);
+               restart_create_process();
+               kill_process(create_pid);
+               kill_process(remove_pid);
                ret = -E_SIGNAL;
                break;
        case SIGHUP:
@@ -784,7 +798,7 @@ static int create_snapshot(char **argv)
        name = incomplete_name(current_snapshot_creation_time);
        DSS_NOTICE_LOG("creating new snapshot %s\n", name);
        free(name);
-       ret = dss_exec(&rsync_pid, argv[0], argv, fds);
+       ret = dss_exec(&create_pid, argv[0], argv, fds);
        if (ret < 0)
                return ret;
        snapshot_creation_status = SCS_RSYNC_RUNNING;
@@ -803,7 +817,7 @@ static int select_loop(void)
                int low_disk_space;
                struct timeval now, *tvp;
 
-               if (rm_pid)
+               if (remove_pid)
                        tvp = NULL; /* sleep until rm process dies */
                else { /* sleep one minute */
                        tv.tv_sec = 60;
@@ -821,7 +835,7 @@ static int select_loop(void)
                        if (ret < 0)
                                goto out;
                }
-               if (rm_pid)
+               if (remove_pid)
                        continue;
                ret = disk_space_low();
                if (ret < 0)
@@ -830,11 +844,11 @@ static int select_loop(void)
                ret = try_to_free_disk_space(low_disk_space);
                if (ret < 0)
                        goto out;
-               if (rm_pid) {
-                       stop_rsync_process();
+               if (remove_pid) {
+                       stop_create_process();
                        continue;
                }
-               restart_rsync_process();
+               restart_create_process();
                gettimeofday(&now, NULL);
                if (tv_diff(&next_snapshot_time, &now, NULL) > 0)
                        continue;
@@ -966,27 +980,27 @@ static int com_create(void)
        ret = pre_create_hook();
        if (ret < 0)
                return ret;
-       if (pre_create_hook_pid) {
-               ret = wait_for_process(pre_create_hook_pid, &status);
+       if (create_pid) {
+               ret = wait_for_process(create_pid, &status);
                if (ret < 0)
                        return ret;
                ret = handle_pre_create_hook_exit(status);
-               if (ret < 0)
+               if (ret <= 0) /* error, or pre-create failed */
                        return ret;
        }
        create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
        ret = create_snapshot(rsync_argv);
        if (ret < 0)
                goto out;
-       ret = wait_for_process(rsync_pid, &status);
+       ret = wait_for_process(create_pid, &status);
        if (ret < 0)
                goto out;
        ret = handle_rsync_exit(status);
        if (ret < 0)
                goto out;
        post_create_hook();
-       if (post_create_hook_pid)
-               ret = wait_for_process(post_create_hook_pid, &status);
+       if (create_pid)
+               ret = wait_for_process(create_pid, &status);
 out:
        free_rsync_argv(rsync_argv);
        return ret;