From 17c140cc24838c2b7049e2b4c17621f4284d53dc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 7 Aug 2017 03:44:07 +0200 Subject: [PATCH] server: Shutdown the scheduler before handling commands. Currently the command handlers are called from the ->post_select method of the command task after the main server process has forked. When the command handler is done, it exits with the scheduler still active. This is not a problem per se, and it has been like this for more than a decade, but it does make it harder to debug memory leaks because we leak some resources. Valgrind complains about this, cluttering the output with pointless warnings. This commit cleans up the memory handling of the child process on exit. The command task of the child process notifies all tasks in its ->post_select method and then returns the new pseudo error code -E_CHILD_CONTEXT. This causes the scheduler to return to main(). The main() function checks via the new ->child_fd of struct server_command_task whether it is running in child context after schedule() has returned and calls the command handler in this case. --- command.c | 4 ++-- error.h | 1 + server.c | 52 ++++++++++++++++++++++++++++++++++++++++++---------- server.h | 2 +- vss.c | 3 +++ 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/command.c b/command.c index cd24c035..84b7d943 100644 --- a/command.c +++ b/command.c @@ -877,7 +877,7 @@ static int run_command(struct command_context *cc, struct iovec *iov) * * \sa alarm(2), \ref crypt.c, \ref crypt.h. */ -__noreturn void handle_connect(int fd) +int handle_connect(int fd) { int ret; unsigned char rand_buf[CHALLENGE_SIZE + 2 * SESSION_KEY_LEN]; @@ -987,5 +987,5 @@ out: } sc_free(cc->scc.recv); sc_free(cc->scc.send); - exit(ret < 0? EXIT_FAILURE : EXIT_SUCCESS); + return ret; } diff --git a/error.h b/error.h index 1904a6d6..0c741268 100644 --- a/error.h +++ b/error.h @@ -70,6 +70,7 @@ PARA_ERROR(BTR_EOF, "buffer tree: end of file"), \ PARA_ERROR(BTR_NAVAIL, "btr node: value currently unavailable"), \ PARA_ERROR(BTR_NO_CHILD, "btr node has no children"), \ + PARA_ERROR(CHILD_CONTEXT, "now running in child context"), \ PARA_ERROR(CHMOD, "failed to set socket mode"), \ PARA_ERROR(CLIENT_SYNTAX, "syntax error"), \ PARA_ERROR(CLIENT_WRITE, "client write error"), \ diff --git a/server.c b/server.c index 13082e78..4f1d5708 100644 --- a/server.c +++ b/server.c @@ -110,6 +110,8 @@ pid_t afs_pid = 0; struct server_command_task { /** TCP port on which para_server listens for connections. */ int listen_fd; + /* File descriptor for the accepted socket. */ + int child_fd; /** Copied from para_server's main function. */ int argc; /** Argument vector passed to para_server's main function. */ @@ -274,8 +276,12 @@ static void handle_sighup(void) static int signal_post_select(struct sched *s, __a_unused void *context) { - int signum = para_next_signal(&s->rfds); + int ret, signum; + ret = task_get_notification(signal_task->task); + if (ret < 0) + return ret; + signum = para_next_signal(&s->rfds); switch (signum) { case 0: return 0; @@ -285,7 +291,7 @@ static int signal_post_select(struct sched *s, __a_unused void *context) case SIGCHLD: for (;;) { pid_t pid; - int ret = para_reap_child(&pid); + ret = para_reap_child(&pid); if (ret <= 0) break; if (pid != afs_pid) @@ -353,12 +359,14 @@ static void command_pre_select(struct sched *s, void *context) static int command_post_select(struct sched *s, void *context) { struct server_command_task *sct = context; - int new_fd, ret, i; char *peer_name; pid_t child_pid; uint32_t *chunk_table; + ret = task_get_notification(sct->task); + if (ret < 0) + return ret; ret = para_accept(sct->listen_fd, &s->rfds, NULL, 0, &new_fd); if (ret <= 0) goto out; @@ -391,9 +399,7 @@ static int command_post_select(struct sched *s, void *context) PARA_INFO_LOG("accepted connection from %s\n", peer_name); /* mmd might already have changed at this point */ free(chunk_table); - alarm(ALARM_TIMEOUT); - close_listed_fds(); - signal_shutdown(signal_task); + sct->child_fd = new_fd; /* * put info on who we are serving into argv[0] to make * client ip visible in top/ps @@ -402,8 +408,21 @@ static int command_post_select(struct sched *s, void *context) memset(sct->argv[i], 0, strlen(sct->argv[i])); i = sct->argc - 1 - lls_num_inputs(cmdline_lpr); sprintf(sct->argv[i], "para_server (serving %s)", peer_name); - handle_connect(new_fd); - /* never reached*/ + /* ask other tasks to terminate */ + task_notify_all(s, E_CHILD_CONTEXT); + /* + * After we return, the scheduler calls server_select() with a minimal + * timeout value, because the remaining tasks have a notification + * pending. Next it calls the ->post_select method of these tasks, + * which will return negative in view of the notification. This causes + * schedule() to return as there are no more runnable tasks. + * + * Note that semaphores are not inherited across a fork(), so we don't + * hold the lock at this point. Since server_select() drops the lock + * prior to calling para_select(), we need to acquire it here. + */ + mutex_lock(mmd_mutex); + return -E_CHILD_CONTEXT; out: if (ret < 0) PARA_CRIT_LOG("%s\n", para_strerror(-ret)); @@ -416,6 +435,7 @@ static void init_server_command_task(struct server_command_task *sct, int ret; PARA_NOTICE_LOG("initializing tcp command socket\n"); + sct->child_fd = -1; sct->argc = argc; sct->argv = argv; ret = para_listen_simple(IPPROTO_TCP, OPT_UINT32_VAL(PORT)); @@ -611,10 +631,22 @@ int main(int argc, char *argv[]) mutex_lock(mmd_mutex); ret = schedule(&sched); sched_shutdown(&sched); + signal_shutdown(signal_task); + if (sct->child_fd < 0) { /* parent (server) */ + if (ret < 0) + PARA_EMERG_LOG("%s\n", para_strerror(-ret)); + } else { /* child (command handler) */ + /* + * We hold the lock: it was re-acquired in server_select() + * after the select call. + */ + mutex_unlock(mmd_mutex); + alarm(ALARM_TIMEOUT); + close_listed_fds(); + ret = handle_connect(sct->child_fd); + } lls_free_parse_result(server_lpr, CMD_PTR); if (server_lpr != cmdline_lpr) lls_free_parse_result(cmdline_lpr, CMD_PTR); - if (ret < 0) - PARA_EMERG_LOG("%s\n", para_strerror(-ret)); exit(ret < 0? EXIT_FAILURE : EXIT_SUCCESS); } diff --git a/server.h b/server.h index 1852dd50..fb92beff 100644 --- a/server.h +++ b/server.h @@ -113,6 +113,6 @@ extern struct lls_parse_result *server_lpr; #define ENUM_STRING_VAL(_name) (lls_enum_string_val(OPT_UINT32_VAL(_name), \ lls_opt(LSG_SERVER_PARA_SERVER_OPT_ ## _name, CMD_PTR))) -__noreturn void handle_connect(int fd); +int handle_connect(int fd); void parse_config_or_die(bool reload); char *server_get_tasks(void); diff --git a/vss.c b/vss.c index 84f5ada5..fbc35e1a 100644 --- a/vss.c +++ b/vss.c @@ -1083,6 +1083,9 @@ static int vss_post_select(struct sched *s, void *context) int ret, i; struct vss_task *vsst = context; + ret = task_get_notification(vsst->task); + if (ret < 0) + return ret; if (!vsst->map || vss_next() || vss_paused() || vss_repos()) { /* shut down senders and fec clients */ struct fec_client *fc, *tmp; -- 2.39.2