X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=server.c;h=95ff25cf77afb9f772a4336626e2bf340a2da868;hp=79e67818c87a1573b312c00091c8199f41c286ed;hb=0dd69d3988a677aeb8d0d3aea8364c664ac35fb9;hpb=b27f3e8dd53af7209626a9f0e98eb4770602c2e7 diff --git a/server.c b/server.c index 79e67818..95ff25cf 100644 --- a/server.c +++ b/server.c @@ -59,7 +59,7 @@ * - Ring buffer: \ref ringbuffer.c, \ref ringbuffer.h, * - Hashing: \ref hash.h, \ref sha1.h, \ref sha1.c, * - Crypto: \ref crypt.c. - * + * - Forward error correction: \ref fec.c */ #include @@ -291,15 +291,32 @@ static void signal_post_select(struct sched *s, struct task *t) if (pid != mmd->afs_pid) continue; PARA_EMERG_LOG("fatal: afs died\n"); - goto genocide; + kill(0, SIGTERM); + goto cleanup; } break; /* die on sigint/sigterm. Kill all children too. */ case SIGINT: case SIGTERM: PARA_EMERG_LOG("terminating on signal %d\n", st->signum); -genocide: kill(0, SIGTERM); + /* + * We must wait for afs because afs catches SIGINT/SIGTERM. + * Before reacting to the signal, afs might want to use the + * shared memory area and the mmd mutex. If we destroy this + * mutex too early and afs tries to lock the shared memory + * area, the call to mutex_lock() will fail and terminate the + * afs process. This leads to dirty osl tables. + * + * There's no such problem with the other children of the + * server process (the command handlers) as these reset their + * SIGINT/SIGTERM handlers to the default action, i.e. these + * processes get killed immediately by the above kill(). + */ + PARA_INFO_LOG("waiting for afs (pid %d) to die\n", + (int)mmd->afs_pid); + waitpid(mmd->afs_pid, NULL, 0); +cleanup: free(mmd->afd.afhi.chunk_table); free(mmd->afd.afhi.info_string); close_listed_fds(); @@ -342,6 +359,8 @@ static void command_post_select(struct sched *s, struct task *t) int new_fd, ret, i; char *peer_name; pid_t child_pid; + uint32_t *chunk_table; + char *info_string; if (!FD_ISSET(sct->listen_fd, &s->rfds)) return; @@ -354,6 +373,16 @@ static void command_post_select(struct sched *s, struct task *t) mmd->num_connects++; mmd->active_connections++; random(); + /* The chunk table and the info_string are pointers located in the + * mmd struct that point to dynamically allocated memory that must be + * freed by the parent and the child. However, as the mmd struct is in + * a shared memory area, there's no guarantee that after the fork these + * pointers are still valid in child context. As these two pointers are + * not used in the child anyway, we save them to local variables and + * free the memory via that copy in the child. + */ + info_string = mmd->afd.afhi.info_string; + chunk_table = mmd->afd.afhi.chunk_table; child_pid = fork(); if (child_pid < 0) { ret = -ERRNO_TO_PARA_ERROR(errno); @@ -364,6 +393,9 @@ static void command_post_select(struct sched *s, struct task *t) /* parent keeps accepting connections */ return; } + /* mmd might already have changed at this point */ + free(info_string); + free(chunk_table); alarm(ALARM_TIMEOUT); close_listed_fds(); para_signal_shutdown(); @@ -438,18 +470,20 @@ err: static int init_afs(void) { int ret, afs_server_socket[2]; + pid_t afs_pid; ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, afs_server_socket); if (ret < 0) exit(EXIT_FAILURE); afs_socket_cookie = para_random((uint32_t)-1); - mmd->afs_pid = fork(); - if (mmd->afs_pid < 0) + afs_pid = fork(); + if (afs_pid < 0) exit(EXIT_FAILURE); - if (!mmd->afs_pid) { /* child (afs) */ + if (afs_pid == 0) { /* child (afs) */ close(afs_server_socket[0]); afs_init(afs_socket_cookie, afs_server_socket[1]); } + mmd->afs_pid = afs_pid; close(afs_server_socket[1]); ret = mark_fd_nonblocking(afs_server_socket[0]); if (ret < 0)