server: Exit cleanly on SIGINT/SIGTERM.

author Andre Noll <maan@tuebingen.mpg.de>

Mon, 7 Aug 2017 19:41:00 +0000 (21:41 +0200)

committer Andre Noll <maan@tuebingen.mpg.de>

Tue, 13 Mar 2018 02:28:10 +0000 (03:28 +0100)
author Andre Noll <maan@tuebingen.mpg.de>
Mon, 7 Aug 2017 19:41:00 +0000 (21:41 +0200)
committer Andre Noll <maan@tuebingen.mpg.de>
Tue, 13 Mar 2018 02:28:10 +0000 (03:28 +0100)
diff --git a/error.h b/error.h

index 0c7412681d4f9b637ab24a136d9461b83860842c..02f42246c97c9fda934a3b675008566eec351bf3 100644 (file)
--- a/error.h
+++ b/error.h
@@ -225,6 +225,7 @@
         PARA_ERROR(TARGET_EXISTS, "requested target is already present"),\
         PARA_ERROR(TARGET_NOT_FOUND, "requested target not found"), \
         PARA_ERROR(TASK_STARTED, "task started"), \
+       PARA_ERROR(DEADLY_SIGNAL, "termination request by signal"), \
         PARA_ERROR(TOO_MANY_CLIENTS, "maximal number of stat clients exceeded"), \
         PARA_ERROR(UCRED_PERM, "permission denied"), \
         PARA_ERROR(UDP_OVERRUN, "output buffer overrun"), \
diff --git a/server.c b/server.c

index dd0acdadec94384b5c62ea878b617be52ab02b0d..13c8c85f98339023b90415125e9df275a28c98f3 100644 (file)
--- a/server.c
+++ b/server.c
@@ -325,27 +325,22 @@ static int signal_post_select(struct sched *s, __a_unused void *context)
                 PARA_EMERG_LOG("terminating on signal %d\n", signum);
                 kill(0, SIGTERM);
                 /*
-                * We must wait for afs because afs catches SIGINT/SIGTERM.
-                * Before reacting to the signal, afs might want to use the
+                * We must wait for all of our children to die. For the afs
+                * process or a command handler might want to use the
                  * shared memory area and the mmd mutex.  If we destroy this
                  * mutex too early and afs tries to lock the shared memory
                  * area, the call to mutex_lock() will fail and terminate the
                  * afs process. This leads to dirty osl tables.
-                *
-                * There's no such problem with the other children of the
-                * server process (the command handlers) as these reset their
-                * SIGINT/SIGTERM handlers to the default action, i.e.  these
-                * processes get killed immediately by the above kill().
                  */
-               PARA_INFO_LOG("waiting for afs (pid %d) to die\n",
-                       (int)afs_pid);
-               waitpid(afs_pid, NULL, 0);
+               PARA_INFO_LOG("waiting for child processes to die\n");
+               mutex_unlock(mmd_mutex);
+               while (wait(NULL) != -1 || errno != ECHILD)
+                       ; /* still at least one child alive */
+               mutex_lock(mmd_mutex);
  cleanup:
                 free(mmd->afd.afhi.chunk_table);
-               close_listed_fds();
-               mutex_destroy(mmd_mutex);
-               shm_detach(mmd);
-               exit(EXIT_FAILURE);
+               task_notify_all(s, E_DEADLY_SIGNAL);
+               return -E_DEADLY_SIGNAL;
         }
         return 0;
  }
@@ -649,22 +644,25 @@ int main(int argc, char *argv[])
         server_init(argc, argv, sct);
         mutex_lock(mmd_mutex);
         ret = schedule(&sched);
+       /*
+        * We hold the mmd lock: it was re-acquired in server_select()
+        * after the select call.
+        */
+       mutex_unlock(mmd_mutex);
         sched_shutdown(&sched);
         signal_shutdown(signal_task);
         if (!process_is_command_handler()) { /* parent (server) */
+               mutex_destroy(mmd_mutex);
+               shm_detach(mmd);
                 if (ret < 0)
                         PARA_EMERG_LOG("%s\n", para_strerror(-ret));
         } else {
-               /*
-                * We hold the mmd lock: it was re-acquired in server_select()
-                * after the select call.
-                */
-               mutex_unlock(mmd_mutex);
                 alarm(ALARM_TIMEOUT);
                 close_listed_fds();
                 ret = handle_connect(sct->child_fd);
         }
         vss_shutdown();
+       shm_detach(mmd);
         lls_free_parse_result(server_lpr, CMD_PTR);
         if (server_lpr != cmdline_lpr)
                 lls_free_parse_result(cmdline_lpr, CMD_PTR);
author	Andre Noll <maan@tuebingen.mpg.de>
	Mon, 7 Aug 2017 19:41:00 +0000 (21:41 +0200)
committer	Andre Noll <maan@tuebingen.mpg.de>
	Tue, 13 Mar 2018 02:28:10 +0000 (03:28 +0100)