Currently signal_post_select() kills all child processes and then
calls exit(3) if SIGNINT or SIGTERM was received. This leaves all file
descriptors open and memory blocks allocated, which makes debugging
memory leaks difficult because the valgrind output is hard to read.
This patch changes the server to cleanly shutdown the scheduler and
deallocate resources (close file descriptors, free memory, destroy
locks and shared memory areas) before exit(3) is called.
PARA_ERROR(TARGET_EXISTS, "requested target is already present"),\
PARA_ERROR(TARGET_NOT_FOUND, "requested target not found"), \
PARA_ERROR(TASK_STARTED, "task started"), \
PARA_ERROR(TARGET_EXISTS, "requested target is already present"),\
PARA_ERROR(TARGET_NOT_FOUND, "requested target not found"), \
PARA_ERROR(TASK_STARTED, "task started"), \
+ PARA_ERROR(DEADLY_SIGNAL, "termination request by signal"), \
PARA_ERROR(TOO_MANY_CLIENTS, "maximal number of stat clients exceeded"), \
PARA_ERROR(UCRED_PERM, "permission denied"), \
PARA_ERROR(UDP_OVERRUN, "output buffer overrun"), \
PARA_ERROR(TOO_MANY_CLIENTS, "maximal number of stat clients exceeded"), \
PARA_ERROR(UCRED_PERM, "permission denied"), \
PARA_ERROR(UDP_OVERRUN, "output buffer overrun"), \
PARA_EMERG_LOG("terminating on signal %d\n", signum);
kill(0, SIGTERM);
/*
PARA_EMERG_LOG("terminating on signal %d\n", signum);
kill(0, SIGTERM);
/*
- * We must wait for afs because afs catches SIGINT/SIGTERM.
- * Before reacting to the signal, afs might want to use the
+ * We must wait for all of our children to die. For the afs
+ * process or a command handler might want to use the
* shared memory area and the mmd mutex. If we destroy this
* mutex too early and afs tries to lock the shared memory
* area, the call to mutex_lock() will fail and terminate the
* afs process. This leads to dirty osl tables.
* shared memory area and the mmd mutex. If we destroy this
* mutex too early and afs tries to lock the shared memory
* area, the call to mutex_lock() will fail and terminate the
* afs process. This leads to dirty osl tables.
- *
- * There's no such problem with the other children of the
- * server process (the command handlers) as these reset their
- * SIGINT/SIGTERM handlers to the default action, i.e. these
- * processes get killed immediately by the above kill().
- PARA_INFO_LOG("waiting for afs (pid %d) to die\n",
- (int)afs_pid);
- waitpid(afs_pid, NULL, 0);
+ PARA_INFO_LOG("waiting for child processes to die\n");
+ mutex_unlock(mmd_mutex);
+ while (wait(NULL) != -1 || errno != ECHILD)
+ ; /* still at least one child alive */
+ mutex_lock(mmd_mutex);
cleanup:
free(mmd->afd.afhi.chunk_table);
cleanup:
free(mmd->afd.afhi.chunk_table);
- close_listed_fds();
- mutex_destroy(mmd_mutex);
- shm_detach(mmd);
- exit(EXIT_FAILURE);
+ task_notify_all(s, E_DEADLY_SIGNAL);
+ return -E_DEADLY_SIGNAL;
server_init(argc, argv, sct);
mutex_lock(mmd_mutex);
ret = schedule(&sched);
server_init(argc, argv, sct);
mutex_lock(mmd_mutex);
ret = schedule(&sched);
+ /*
+ * We hold the mmd lock: it was re-acquired in server_select()
+ * after the select call.
+ */
+ mutex_unlock(mmd_mutex);
sched_shutdown(&sched);
signal_shutdown(signal_task);
if (!process_is_command_handler()) { /* parent (server) */
sched_shutdown(&sched);
signal_shutdown(signal_task);
if (!process_is_command_handler()) { /* parent (server) */
+ mutex_destroy(mmd_mutex);
+ shm_detach(mmd);
if (ret < 0)
PARA_EMERG_LOG("%s\n", para_strerror(-ret));
} else {
if (ret < 0)
PARA_EMERG_LOG("%s\n", para_strerror(-ret));
} else {
- /*
- * We hold the mmd lock: it was re-acquired in server_select()
- * after the select call.
- */
- mutex_unlock(mmd_mutex);
alarm(ALARM_TIMEOUT);
close_listed_fds();
ret = handle_connect(sct->child_fd);
}
vss_shutdown();
alarm(ALARM_TIMEOUT);
close_listed_fds();
ret = handle_connect(sct->child_fd);
}
vss_shutdown();
lls_free_parse_result(server_lpr, CMD_PTR);
if (server_lpr != cmdline_lpr)
lls_free_parse_result(cmdline_lpr, CMD_PTR);
lls_free_parse_result(server_lpr, CMD_PTR);
if (server_lpr != cmdline_lpr)
lls_free_parse_result(cmdline_lpr, CMD_PTR);