Merge branch 'refs/heads/t/kill-w'
authorAndre Noll <maan@tuebingen.mpg.de>
Sat, 18 Nov 2017 14:54:03 +0000 (15:54 +0100)
committerAndre Noll <maan@tuebingen.mpg.de>
Sat, 18 Nov 2017 14:56:55 +0000 (15:56 +0100)
Two patches which make life easier for shutdown scripts which need
to terminate the dss process, but would like to wait until the exit
hook completed.

The merge conflicted in dss.suite, but this was trivial to fix.

Cooking for a week.

* refs/heads/t/kill-w:
  kill: New option --wait.
  run: Wait for children to die.

NEWS
dss.c
dss.suite
err.h

diff --git a/NEWS b/NEWS
index 09e7209..0f9b6d2 100644 (file)
--- a/NEWS
+++ b/NEWS
 
  - New option --checksum to let rsync compute checksums occasionally.
 
+ - The kill subcommand gained the new --wait option which instructs dss
+ to wait until the signalled process has terminated.
+
  - The --no-resume option has been removed.
 
+ - On exit, the run subcommand now waits for any previously spawned
+ rsync or rm processes to terminate.
+
  - The ls subcommand now shows the age of incomplete snapshots rather
  than 0:00.
 
diff --git a/dss.c b/dss.c
index d60f00b..491b8aa 100644 (file)
--- a/dss.c
+++ b/dss.c
@@ -287,11 +287,13 @@ static char *get_config_file_name(void)
        return config_file;
 }
 
-static int send_signal(int sig)
+static int send_signal(int sig, bool wait)
 {
        pid_t pid;
        char *config_file = get_config_file_name();
        int ret = get_dss_pid(config_file, &pid);
+       unsigned ms = 32;
+       struct timespec ts;
 
        free(config_file);
        if (ret < 0)
@@ -304,7 +306,23 @@ static int send_signal(int sig)
        ret = kill(pid, sig);
        if (ret < 0)
                return -ERRNO_TO_DSS_ERROR(errno);
-       return 1;
+       if (!wait)
+               return 1;
+       while (ms < 5000) {
+               ts.tv_sec = ms / 1000;
+               ts.tv_nsec = (ms % 1000) * 1000 * 1000;
+               ret = nanosleep(&ts, NULL);
+               if (ret < 0)
+                       return -ERRNO_TO_DSS_ERROR(errno);
+               ret = kill(pid, 0);
+               if (ret < 0) {
+                       if (errno != ESRCH)
+                               return -ERRNO_TO_DSS_ERROR(errno);
+                       return 1;
+               }
+               ms *= 2;
+       }
+       return -E_KILL_TIMEOUT;
 }
 
 struct signal_info {
@@ -363,6 +381,7 @@ static const struct signal_info signal_table[] = {
 
 static int com_kill(void)
 {
+       bool w_given = OPT_GIVEN(KILL, WAIT);
        const char *arg = OPT_STRING_VAL(KILL, SIGNAL);
        int ret, i;
 
@@ -373,17 +392,17 @@ static int com_kill(void)
                        return ret;
                if (val < 0 || val > SIGRTMAX)
                        return -ERRNO_TO_DSS_ERROR(EINVAL);
-               return send_signal(val);
+               return send_signal(val, w_given);
        }
        if (strncasecmp(arg, "sig", 3) == 0)
                arg += 3;
        if (strcasecmp(arg, "CLD") == 0)
-               return send_signal(SIGCHLD);
+               return send_signal(SIGCHLD, w_given);
        if (strcasecmp(arg, "IOT") == 0)
-               return send_signal(SIGABRT);
+               return send_signal(SIGABRT, w_given);
        for (i = 0; i < SIGNAL_TABLE_SIZE; i++)
                if (strcasecmp(arg, signal_table[i].name) == 0)
-                       return send_signal(signal_table[i].num);
+                       return send_signal(signal_table[i].num, w_given);
        DSS_ERROR_LOG(("invalid sigspec: %s\n", arg));
        return -ERRNO_TO_DSS_ERROR(EINVAL);
 }
@@ -1594,6 +1613,8 @@ static int com_run(void)
                ret = -E_BUG;
        kill_children();
        exit_hook(ret);
+       while (wait(NULL) >= 0 || errno != ECHILD)
+               ; /* still have children to wait for */
        return ret;
 }
 EXPORT_CMD_HANDLER(run);
index 1e79870..f02c825 100644 (file)
--- a/dss.suite
+++ b/dss.suite
@@ -487,6 +487,27 @@ caption = Subcommands
 
                        Sending SIGHUP causes the running dss process to reload its config file.
                [/help]
+       [option wait]
+               short_opt = w
+               summary = wait until the signalled process has terminated
+               [help]
+                       This option is handy for system shutdown scripts which would like
+                       to terminate the dss daemon process.
+
+                       Without --wait the dss process which executes the kill subcommand
+                       exits right after the kill(2) system call returns. At this point the
+                       signalled process might still be alive (even if SIGKILL was sent).
+                       If --wait is given, the process waits until the signalled process
+                       has terminated or the timeout expires.
+
+                       If --wait is not given, the kill subcommand exits successfully if
+                       and only if the signal was sent (i.e., if there exists another dss
+                       process to receive the signal). With --wait it exits successfully
+                       if, additionally, the signalled process has terminated before the
+                       timeout expires.
+
+                       It makes only sense to use the option for signals which terminate dss.
+               [/help]
 [subcommand configtest]
        purpose = run a configuration file syntax test
        [description]
diff --git a/err.h b/err.h
index e747bf3..2280010 100644 (file)
--- a/err.h
+++ b/err.h
@@ -55,6 +55,7 @@ static inline char *dss_strerror(int num)
        DSS_ERROR(BAD_EXIT_CODE, "unexpected exit code"), \
        DSS_ERROR(SIGNAL_SIG_ERR, "signal() returned SIG_ERR"), \
        DSS_ERROR(SIGNAL, "caught terminating signal"), \
+       DSS_ERROR(KILL_TIMEOUT, "signal timeout expired"), \
        DSS_ERROR(BUG, "values of beta might cause dom!"), \
        DSS_ERROR(NOT_RUNNING, "dss not running"), \
        DSS_ERROR(ALREADY_RUNNING, "dss already running"), \