+static inline int sock_type(const unsigned l4type)
+{
+ switch (l4type) {
+ case IPPROTO_UDP: return SOCK_DGRAM;
+ case IPPROTO_TCP: return SOCK_STREAM;
+ case IPPROTO_DCCP: return SOCK_DCCP;
+ }
+ return -1; /* not supported here */
+}
+
+/**
+ * Pretty-print transport-layer name.
+ */
+static const char *layer4_name(const unsigned l4type)
+{
+ switch (l4type) {
+ case IPPROTO_UDP: return "UDP";
+ case IPPROTO_TCP: return "TCP";
+ case IPPROTO_DCCP: return "DCCP";
+ }
+ return "UNKNOWN PROTOCOL";
+}
+
+/**
+ * Flowopts: Transport-layer independent encapsulation of socket options.
+ *
+ * These collect individual socket options into a queue, which is disposed of
+ * directly after makesock(). The 'pre_conn_opt' structure is for internal use
+ * only and should not be visible elsewhere.
+ *
+ * \sa setsockopt(2), makesock()
+ */
+struct pre_conn_opt {
+ int sock_level; /**< Second argument to setsockopt() */
+ int sock_option; /**< Third argument to setsockopt() */
+ char *opt_name; /**< Stringified \a sock_option */
+ void *opt_val; /**< Fourth argument to setsockopt() */
+ socklen_t opt_len; /**< Fifth argument to setsockopt() */
+
+ struct list_head node; /**< FIFO, as sockopt order matters. */
+};
+
+/** FIFO list of pre-connection socket options to be set */
+struct flowopts {
+ struct list_head sockopts;
+};
+
+struct flowopts *flowopt_new(void)
+{
+ struct flowopts *new = para_malloc(sizeof(*new));
+
+ INIT_LIST_HEAD(&new->sockopts);
+ return new;
+}
+
+/**
+ * Append new socket option to flowopt queue.
+ *
+ * \param fo The flowopt queue to append to.
+ * \param lev Level at which \a opt resides.
+ * \param opt New option to add.
+ * \param name Stringified name of \a opt.
+ * \param val The value to set \a opt to.
+ * \param len Length of \a val.
+ *
+ * \sa setsockopt(2)
+ */
+void flowopt_add(struct flowopts *fo, int lev, int opt,
+ char *name, const void *val, int len)
+{
+ struct pre_conn_opt *new = para_malloc(sizeof(*new));
+
+ new->sock_option = opt;
+ new->sock_level = lev;
+ new->opt_name = para_strdup(name);
+
+ if (val == NULL) {
+ new->opt_val = NULL;
+ new->opt_len = 0;
+ } else {
+ new->opt_val = para_malloc(len);
+ new->opt_len = len;
+ memcpy(new->opt_val, val, len);
+ }
+
+ list_add_tail(&new->node, &fo->sockopts);
+}
+
+void flowopt_add_bool(struct flowopts *fo, int lev, int opt,
+ char *optname, bool on_or_off)
+{
+ int on = on_or_off; /* kernel takes 'int' */
+
+ flowopt_add(fo, lev, opt, optname, &on, sizeof(on));
+}
+
+/** Set the entire bunch of pre-connection options at once. */
+static void flowopt_setopts(int sockfd, struct flowopts *fo)
+{
+ struct pre_conn_opt *pc;
+
+ if (fo == NULL)
+ return;
+
+ list_for_each_entry(pc, &fo->sockopts, node)
+ if (setsockopt(sockfd, pc->sock_level, pc->sock_option,
+ pc->opt_val, pc->opt_len) < 0) {
+ PARA_EMERG_LOG("Can not set %s socket option: %s",
+ pc->opt_name, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void flowopt_cleanup(struct flowopts *fo)
+{
+ struct pre_conn_opt *cur, *next;
+
+ if (fo == NULL)
+ return;
+
+ list_for_each_entry_safe(cur, next, &fo->sockopts, node) {
+ free(cur->opt_name);
+ free(cur->opt_val);
+ free(cur);
+ }
+ free(fo);
+}
+
+/**
+ * Resolve IPv4/IPv6 address and create a ready-to-use active or passive socket.
+ *
+ * \param l4type The layer-4 type (\p IPPROTO_xxx).
+ * \param passive Whether this is a passive (1) or active (0) socket.
+ * \param host Remote or local hostname or IPv/6 address string.
+ * \param port_number Decimal port number.
+ * \param fo Socket options to be set before making the connection.
+ *
+ * This creates a ready-made IPv4/v6 socket structure after looking up the
+ * necessary parameters. The interpretation of \a host depends on the value of
+ * \a passive:
+ * - on a passive socket host is interpreted as an interface IPv4/6 address
+ * (can be left NULL);
+ * - on an active socket, \a host is the peer DNS name or IPv4/6 address
+ * to connect to;
+ * - \a port_number is in either case the numeric port number (not service
+ * string).
+ *
+ * Furthermore, bind(2) is called on passive sockets, and connect(2) on active
+ * sockets. The algorithm tries all possible address combinations until it
+ * succeeds. If \a fo is supplied, options are set and cleanup is performed.
+ *
+ * \return This function returns 1 on success and \a -E_ADDRESS_LOOKUP when no
+ * matching connection could be set up (with details in the error log).
+ *
+ * \sa ipv6(7), getaddrinfo(3), bind(2), connect(2).
+ */
+int makesock(unsigned l4type, bool passive,
+ const char *host, uint16_t port_number,
+ struct flowopts *fo)
+{
+ struct addrinfo *local = NULL, *src,
+ *remote = NULL, *dst, hints;
+ unsigned int l3type = AF_UNSPEC;
+ int rc, on = 1, sockfd = -1,
+ socktype = sock_type(l4type);
+ char port[6]; /* port number has at most 5 digits */
+
+ sprintf(port, "%u", port_number);
+ /* Set up address hint structure */
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = l3type;
+ hints.ai_socktype = socktype;
+ /*
+ * getaddrinfo does not support SOCK_DCCP, so for the sake of lookup
+ * (and only then) pretend to be UDP.
+ */
+ if (l4type == IPPROTO_DCCP)
+ hints.ai_socktype = SOCK_DGRAM;
+
+ /* only use addresses available on the host */
+ hints.ai_flags = AI_ADDRCONFIG;
+ if (l3type == AF_INET6)
+ /* use v4-mapped-v6 if no v6 addresses found */
+ hints.ai_flags |= AI_V4MAPPED | AI_ALL;
+
+ if (passive && host == NULL)
+ hints.ai_flags |= AI_PASSIVE;
+
+ /* Obtain local/remote address information */
+ if ((rc = getaddrinfo(host, port, &hints, passive ? &local : &remote))) {
+ PARA_ERROR_LOG("can not resolve %s address %s#%s: %s.\n",
+ layer4_name(l4type),
+ host? host : (passive? "[loopback]" : "[localhost]"),
+ port, gai_strerror(rc));
+ return -E_ADDRESS_LOOKUP;
+ }
+
+ /* Iterate over all src/dst combination, exhausting dst first */
+ for (src = local, dst = remote; src != NULL || dst != NULL; /* no op */ ) {
+ if (src && dst && src->ai_family == AF_INET
+ && dst->ai_family == AF_INET6)
+ goto get_next_dst; /* v4 -> v6 is not possible */
+
+ sockfd = socket(src ? src->ai_family : dst->ai_family,
+ socktype, l4type);
+ if (sockfd < 0)
+ goto get_next_dst;
+
+ /*
+ * Reuse the address on passive sockets to avoid failure on
+ * restart (protocols using listen()) and when creating
+ * multiple listener instances (UDP multicast).
+ */
+ if (passive && setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR,
+ &on, sizeof(on)) == -1) {
+ rc = errno;
+ close(sockfd);
+ PARA_ERROR_LOG("can not set SO_REUSEADDR: %s\n",
+ strerror(rc));
+ return -ERRNO_TO_PARA_ERROR(rc);
+ }
+ flowopt_setopts(sockfd, fo);
+
+ if (src) {
+ if (bind(sockfd, src->ai_addr, src->ai_addrlen) < 0) {
+ close(sockfd);
+ goto get_next_src;
+ }
+ if (!dst) /* bind-only completed successfully */
+ break;
+ }
+
+ if (dst && connect(sockfd, dst->ai_addr, dst->ai_addrlen) == 0)
+ break; /* connection completed successfully */
+ close(sockfd);
+get_next_dst:
+ if (dst && (dst = dst->ai_next))
+ continue;
+get_next_src:
+ if (src && (src = src->ai_next)) /* restart inner loop */
+ dst = remote;
+ }
+ if (local)
+ freeaddrinfo(local);
+ if (remote)
+ freeaddrinfo(remote);
+ flowopt_cleanup(fo);
+
+ if (src == NULL && dst == NULL) {
+ rc = errno;
+ PARA_ERROR_LOG("can not create %s socket %s#%s.\n",
+ layer4_name(l4type), host? host : (passive?
+ "[loopback]" : "[localhost]"), port);
+ return -ERRNO_TO_PARA_ERROR(rc);
+ }
+ return sockfd;
+}
+
+/**
+ * Create a passive / listening socket.
+ *
+ * \param l4type The transport-layer type (\p IPPROTO_xxx).
+ * \param port The decimal port number to listen on.
+ * \param fo Flowopts (if any) to set before starting to listen.
+ *
+ * \return Positive integer (socket descriptor) on success, negative value
+ * otherwise.
+ *
+ * \sa makesock(), ip(7), ipv6(7), bind(2), listen(2).
+ */
+int para_listen(unsigned l4type, uint16_t port, struct flowopts *fo)
+{
+ int ret, fd = makesock(l4type, 1, NULL, port, fo);
+
+ if (fd > 0) {
+ ret = listen(fd, BACKLOG);
+ if (ret < 0) {
+ ret = errno;
+ close(fd);
+ return -ERRNO_TO_PARA_ERROR(ret);
+ }
+ PARA_INFO_LOG("listening on %s port %u, fd %d\n",
+ layer4_name(l4type), port, fd);
+ }
+ return fd;
+}
+
+/**
+ * Determine IPv4/v6 socket address length.
+ * \param sa Container of IPv4 or IPv6 address.
+ * \return Address-family dependent address length.
+ */
+static socklen_t salen(const struct sockaddr *sa)
+{
+ assert(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+ return sa->sa_family == AF_INET6
+ ? sizeof(struct sockaddr_in6)
+ : sizeof(struct sockaddr_in);
+}
+
+/** True if @ss holds a v6-mapped-v4 address (RFC 4291, 2.5.5.2) */
+static bool SS_IS_ADDR_V4MAPPED(const struct sockaddr_storage *ss)
+{
+ const struct sockaddr_in6 *ia6 = (const struct sockaddr_in6 *)ss;
+
+ return ss->ss_family == AF_INET6 && IN6_IS_ADDR_V4MAPPED(&ia6->sin6_addr);
+}
+
+/**
+ * Process IPv4/v6 address, turn v6-mapped-v4 address into normal IPv4 address.
+ * \param ss Container of IPv4/6 address.
+ * \return Pointer to normalized address (may be static storage).
+ *
+ * \sa RFC 3493
+ */
+static const struct sockaddr *
+normalize_ip_address(const struct sockaddr_storage *ss)
+{
+ assert(ss->ss_family == AF_INET || ss->ss_family == AF_INET6);
+
+ if (SS_IS_ADDR_V4MAPPED(ss)) {
+ const struct sockaddr_in6 *ia6 = (const struct sockaddr_in6 *)ss;
+ static struct sockaddr_in ia;
+
+ ia.sin_family = AF_INET;
+ ia.sin_port = ia6->sin6_port;
+ memcpy(&ia.sin_addr.s_addr, &(ia6->sin6_addr.s6_addr[12]), 4);
+ return (const struct sockaddr *)&ia;
+ }
+ return (const struct sockaddr *)ss;
+}
+
+/**
+ * Generic/fallback MTU values
+ *
+ * These are taken from RFC 1122, RFC 2460, and RFC 5405.
+ * - RFC 1122, 3.3.3 defines EMTU_S ("Effective MTU for sending") and recommends
+ * to use an EMTU_S size of of 576 bytes if the IPv4 path MTU is unknown;
+ * - RFC 2460, 5. requires a minimum IPv6 MTU of 1280 bytes;
+ * - RFC 5405, 3.2 recommends that if path MTU discovery is not done,
+ * UDP senders should use the respective minimum values of EMTU_S.
+ */
+static inline int generic_mtu(const int af_type)
+{
+ return af_type == AF_INET6 ? 1280 : 576;
+}
+
+/** Crude approximation of IP header overhead - neglecting options. */
+static inline int estimated_header_overhead(const int af_type)
+{
+ return af_type == AF_INET6 ? 40 : 20;
+}
+
+/**
+ * Maximum transport-layer message size (MMS_S) as per RFC 1122, 3.3.3
+ * Socket must be connected.
+ */
+int generic_max_transport_msg_size(int sockfd)
+{
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+ int af_type = AF_INET;
+
+ if (getpeername(sockfd, (struct sockaddr *)&ss, &sslen) < 0) {
+ PARA_ERROR_LOG("can not determine remote address type: %s\n",
+ strerror(errno));
+ } else if (!SS_IS_ADDR_V4MAPPED(&ss)) {
+ af_type = ss.ss_family;
+ }
+ return generic_mtu(af_type) - estimated_header_overhead(af_type);
+}
+
+/**
+ * Print numeric host and port number (beware - uses static char).
+ *
+ * \param sa The IPv4/IPv6 socket address to use.
+ *
+ * \return Host string in numeric host:port format, \sa parse_url().
+ * \sa getnameinfo(3), services(5), nsswitch.conf(5)
+ */
+static char *host_and_port(const struct sockaddr_storage *ss)