From 4ebaf62d6bbd0498d93e19bc4f0a3c9564b1a8f5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 30 May 2010 18:10:42 +0200 Subject: [PATCH] udp_send: add a time window for errors This implements a grace period during which ICMP Destination Unreachable / Port Unreachable are ignored from the peer, with the purpose of catching * synchronisation problems (e.g., receiver started after sender); * unforeseen events (e.g. delays, reboot, reconfiguration). To avoid receiving persistent errors from unicast UDP clients, two time windows are used: (a) The 'error-allowed' period t_A This time window starts when the first ECONNREFUSED error is seen and ends after t_A seconds. The value of t_A is a guess which should cover the expected time needed to sort any receiver problems out. (b) The 'error-free' period t_B During the t_B seconds following the interval t_A, no further connection errors are accepted; if an ECONNREFUSED is seen it will cause the target to be evicted from the list. This windowing process restarts itself, i.e. the first error seen after t_A+t_B will reset the counters. The following examples illustrate the algorithm, where 'x' indicates receipt of an ICMP error message. 1) Some errors received during initial receiver setup |-x-x-x-x-x-x-x--|------------|------.... | t_0 t_0 + t_A t_0+t_A+t_B Since no errors are received after t_0 + t_A, no action will be taken. 2) Persistent errors |-x-x-x-x-x-x-x-x|-x-x-x-x-x-x| | t_0 t_0 + t_A t_0+t_A+t_B The first error received after t_0+t_A evicts the target. 3) Recurring short errors |-x-x-x-x-x-x-x--|-//--|--------//---|-x-x-x-x-x-x-x--|------------|---... | t_0 t_0+t_A+t_B t_1 t_1+t_A t_1+t_A+t_B Here the counter is reset at the first error after t_1. Since no more errors were seen after t_1+t_A, streaming continues. For simplicity, the implementation uses t_A = t_B = 30 seconds. The behaviour with an unavailable receiver is now: May 24 18:08:16 (0) (2702) vss_send: sending 123:0 (548 bytes) May 24 18:08:18 (0) (2702) vss_send: sending 132:2 (548 bytes) May 24 18:08:18 (2) (2702) udp_check_socket_state: Evicting 10.0.0.2#8000 after 31 seconds of connection errors. May 24 19:34:55 (0) (2702) vss_send: sending 5:14 (1232 bytes) May 24 19:35:10 (0) (2702) vss_send: sending 11:3 (1232 bytes) May 24 19:35:10 (2) (2702) udp_check_socket_state: Evicting 3ffe::2#8000 after 31 seconds of connection errors. --- udp_send.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/udp_send.c b/udp_send.c index 08429aa4..24ebf584 100644 --- a/udp_send.c +++ b/udp_send.c @@ -32,12 +32,21 @@ #include "close_on_fork.h" #include "chunk_queue.h" +/** + * Time window during which ICMP Destination/Port Unreachable messages are + * ignored, covering transient receiver problems such as restarting the + * client, rebooting, reconfiguration, or handover. + */ +#define UDP_MAX_UNREACHABLE_TIME 30 + /** Describes one entry in the list of targets for the udp sender. */ struct udp_target { /** The hostname (DNS name or IPv4/v6 address string). */ char host[MAX_HOSTLEN]; /** The UDP port. */ int port; + /** Track time (seconds) of last ICMP Port Unreachable error */ + time_t last_unreachable; /** Common sender client data */ struct sender_client *sc; /** The opaque structure returned by vss_add_fec_client(). */ @@ -219,6 +228,38 @@ static int udp_init_fec(struct sender_client *sc) return mps; } +/** Check and clear socket error if any. */ +static int udp_check_socket_state(struct udp_target *ut) +{ + int ret; + socklen_t errlen = sizeof(ret); + + if (getsockopt(ut->sc->fd, SOL_SOCKET, SO_ERROR, &ret, &errlen) < 0) { + PARA_ERROR_LOG("SO_ERROR failed: %s\n", strerror(ret)); + return 0; + } else if (ret == 0) { + return 0; + } else if (ret == ECONNREFUSED) { + time_t dist = now->tv_sec - ut->last_unreachable; + + if (dist <= UDP_MAX_UNREACHABLE_TIME) { + return 0; + } else if (dist > 2 * UDP_MAX_UNREACHABLE_TIME) { + ut->last_unreachable = now->tv_sec; + return 0; + } else { + /* + * unreachable_time < dist <= 2 * unreachable_time + * No errors are allowed during this time window. + */ + PARA_NOTICE_LOG("Evicting %s#%d after %d seconds " + "of connection errors.\n", + ut->host, ut->port, (int)dist); + } + } + return -ERRNO_TO_PARA_ERROR(ret); +} + static int udp_send_fec(struct sender_client *sc, char *buf, size_t len) { struct udp_target *ut = sc->private_data; @@ -226,21 +267,27 @@ static int udp_send_fec(struct sender_client *sc, char *buf, size_t len) if (sender_status == SENDER_OFF) return 0; + if (len == 0 && !cq_peek(ut->sc->cq)) + return 0; + ret = udp_check_socket_state(ut); + if (ret < 0) + goto fail; ret = send_queued_chunks(sc->fd, sc->cq); - if (ret == -ERRNO_TO_PARA_ERROR(ECONNREFUSED)) - ret = 0; if (ret < 0) goto fail; - if (!len) - return 0; if (!ret) { /* still data left in the queue */ ret = cq_force_enqueue(sc->cq, buf, len); assert(ret >= 0); return 0; } ret = write_nonblock(sc->fd, buf, len); - if (ret == -ERRNO_TO_PARA_ERROR(ECONNREFUSED)) + if (ret == -ERRNO_TO_PARA_ERROR(ECONNREFUSED)) { + /* + * Happens if meanwhile an ICMP Destination / Port Unreachable + * has arrived. Ignore, persistent errors will be caught above. + */ ret = 0; + } if (ret < 0) goto fail; if (ret != len) { -- 2.30.2