/*
- * Copyright (C) 2005-2007 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2005 Andre Noll <maan@tuebingen.mpg.de>
*
* Licensed under the GPL v2. For licencing details see COPYING.
*/
-/** \file net.c networking-related helper functions */
+/** \file net.c Networking-related helper functions. */
#include "para.h"
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+/* At least NetBSD needs these. */
+#ifndef AI_V4MAPPED
+#define AI_V4MAPPED 0
+#endif
+#ifndef AI_ALL
+#define AI_ALL 0
+#endif
+#ifndef AI_ADDRCONFIG
+#define AI_ADDRCONFIG 0
+#endif
+
+#include <regex.h>
+
#include "error.h"
#include "net.h"
#include "string.h"
+#include "list.h"
+#include "fd.h"
+/**
+ * Parse and validate IPv4 address/netmask string.
+ *
+ * \param cidr Address in CIDR notation
+ * \param addr Copy of the IPv4 address part of \a cidr
+ * \param addrlen Size of \a addr in bytes
+ * \param netmask Value of the netmask part in \a cidr or the
+ * default of 32 if not specified.
+ *
+ * \return Pointer to \a addr if successful, NULL on error.
+ * \sa RFC 4632
+ */
+char *parse_cidr(const char *cidr,
+ char *addr, ssize_t addrlen,
+ int32_t *netmask)
+{
+ const char *o = cidr;
+ char *c = addr, *end = c + (addrlen - 1);
+
+ *netmask = 0x20;
+
+ if (cidr == NULL || addrlen < 1)
+ goto failed;
+
+ for (o = cidr; (*c = *o == '/'? '\0' : *o); c++, o++)
+ if (c == end)
+ goto failed;
+
+ if (*o == '/')
+ if (para_atoi32(++o, netmask) < 0 ||
+ *netmask < 0 || *netmask > 0x20)
+ goto failed;
+
+ if (is_valid_ipv4_address(addr))
+ return addr;
+failed:
+ *addr = '\0';
+ return NULL;
+}
-/** Information about one encrypted connection. */
-struct crypt_data {
- /** Function used to decrypt received data. */
- crypt_function *recv;
- /** Function used to encrypt data to be sent. */
- crypt_function *send;
- /**
- * Context-dependent data (crypt keys), passed verbatim to the above
- * crypt functions.
- */
- void *private_data;
-};
-/** Array holding per fd crypt data. */
-static struct crypt_data *crypt_data_array;
-/** Current size of the crypt data array. */
-static unsigned cda_size = 0;
/**
- * activate encryption for one file descriptor
- *
- * \param fd the file descriptor
- * \param recv_f the function used for decrypting received data
- * \param send_f the function used for encrypting before sending
- * \param private_data user data supplied by the caller
- */
-void enable_crypt(int fd, crypt_function *recv_f, crypt_function *send_f,
- void *private_data)
-{
- if (fd + 1 > cda_size) {
- crypt_data_array = para_realloc(crypt_data_array,
- (fd + 1) * sizeof(struct crypt_data));
- memset(crypt_data_array + cda_size, 0,
- (fd + 1 - cda_size) * sizeof(struct crypt_data));
- cda_size = fd + 1;
+ * Match string as a candidate IPv4 address.
+ *
+ * \param address The string to match.
+ * \return True if \a address has "dot-quad" format.
+ */
+static bool is_v4_dot_quad(const char *address)
+{
+ bool result;
+ regex_t r;
+
+ assert(para_regcomp(&r, "^([0-9]+\\.){3}[0-9]+$",
+ REG_EXTENDED | REG_NOSUB) >= 0);
+ result = regexec(&r, address, 0, NULL, 0) == 0;
+ regfree(&r);
+ return result;
+}
+
+/**
+ * Perform basic syntax checking on the host-part of an URL:
+ *
+ * - Since ':' is invalid in IPv4 addresses and DNS names, the
+ * presence of ':' causes interpretation as IPv6 address;
+ * - next the first-match-wins algorithm from RFC 3986 is applied;
+ * - else the string is considered as DNS name, to be resolved later.
+ *
+ * \param host The host string to check.
+ * \return True if \a host passes the syntax checks.
+ *
+ * \sa RFC 3986, 3.2.2; RFC 1123, 2.1; RFC 1034, 3.5
+ */
+static bool host_string_ok(const char *host)
+{
+ if (host == NULL || *host == '\0')
+ return false;
+ if (strchr(host, ':') != NULL)
+ return is_valid_ipv6_address(host);
+ if (is_v4_dot_quad(host))
+ return is_valid_ipv4_address(host);
+ return true;
+}
+
+/**
+ * Parse and validate URL string.
+ *
+ * The URL syntax is loosely based on RFC 3986, supporting one of
+ * - "["host"]"[:port] for native IPv6 addresses and
+ * - host[:port] for IPv4 hostnames and DNS names.
+ *
+ * Native IPv6 addresses must be enclosed in square brackets, since
+ * otherwise there is an ambiguity with the port separator `:'.
+ * The 'port' part is always considered to be a number; if absent,
+ * it is set to -1, to indicate that a default port is to be used.
+ *
+ * The following are valid examples:
+ * - 10.10.1.1
+ * - 10.10.1.2:8000
+ * - localhost
+ * - localhost:8001
+ * - [::1]:8000
+ * - [badc0de::1]
+ *
+ * \param url The URL string to take apart.
+ * \param host To return the copied host part of \a url.
+ * \param hostlen The maximum length of \a host.
+ * \param port To return the port number (if any) of \a url.
+ *
+ * \return Pointer to \a host, or \p NULL if failed. If \p NULL is returned,
+ * \a host and \a port are undefined. If no port number was present in \a url,
+ * \a port is set to -1.
+ *
+ * \sa RFC 3986, 3.2.2/3.2.3
+ */
+char *parse_url(const char *url,
+ char *host, ssize_t hostlen,
+ int32_t *port)
+{
+ const char *o = url;
+ char *c = host, *end = c + (hostlen - 1);
+
+ *port = -1;
+
+ if (o == NULL || hostlen < 1)
+ goto failed;
+
+ if (*o == '[') {
+ for (++o; (*c = *o == ']' ? '\0' : *o); c++, o++)
+ if (c == end)
+ goto failed;
+
+ if (*o++ != ']' || (*o != '\0' && *o != ':'))
+ goto failed;
+ } else {
+ for (; (*c = *o == ':'? '\0' : *o); c++, o++) {
+ if (c == end && o[1])
+ goto failed;
+ }
}
- crypt_data_array[fd].recv = recv_f;
- crypt_data_array[fd].send = send_f;
- crypt_data_array[fd].private_data = private_data;
- PARA_INFO_LOG("rc4 encryption activated for fd %d\n", fd);
+
+ if (*o == ':')
+ if (para_atoi32(++o, port) < 0 || *port < 0 || *port > 0xffff)
+ goto failed;
+ if (host_string_ok(host))
+ return host;
+failed:
+ *host = '\0';
+ return NULL;
}
/**
- * deactivate encryption for a given fd
+ * Stringify port number, resolve into service name where defined.
*
- * \param fd the file descriptor
+ * \param port 2-byte port number, in host-byte-order.
+ * \param transport Transport protocol name (e.g. "udp", "tcp"), or NULL.
+ * \return Pointer to static result buffer.
*
- * This must be called if and only if \p fd was activated via enable_crypt().
+ * \sa getservent(3), services(5), nsswitch.conf(5)
*/
-void disable_crypt(int fd)
+const char *stringify_port(int port, const char *transport)
{
- if (cda_size < fd + 1)
- return;
- crypt_data_array[fd].recv = NULL;
- crypt_data_array[fd].send = NULL;
- crypt_data_array[fd].private_data = NULL;
+ static char service[NI_MAXSERV];
+
+ if (port < 0 || port > 0xFFFF) {
+ snprintf(service, sizeof(service), "undefined (%d)", port);
+ } else {
+ struct servent *se = getservbyport(htons(port), transport);
+
+ if (se == NULL)
+ snprintf(service, sizeof(service), "%d", port);
+ else
+ snprintf(service, sizeof(service), "%s", se->s_name);
+ }
+ return service;
+}
+
+/**
+ * Determine the socket type for a given layer-4 protocol.
+ *
+ * \param l4type The symbolic name of the transport-layer protocol.
+ *
+ * \sa ip(7), socket(2)
+ */
+static inline int sock_type(const unsigned l4type)
+{
+ switch (l4type) {
+ case IPPROTO_UDP: return SOCK_DGRAM;
+ case IPPROTO_TCP: return SOCK_STREAM;
+ case IPPROTO_DCCP: return SOCK_DCCP;
+ }
+ return -1; /* not supported here */
}
+/**
+ * Pretty-print transport-layer name.
+ */
+static const char *layer4_name(const unsigned l4type)
+{
+ switch (l4type) {
+ case IPPROTO_UDP: return "UDP";
+ case IPPROTO_TCP: return "TCP";
+ case IPPROTO_DCCP: return "DCCP";
+ }
+ return "UNKNOWN PROTOCOL";
+}
/**
- * initialize a struct sockaddr_in
+ * Flowopts: Transport-layer independent encapsulation of socket options.
*
- * \param addr A pointer to the struct to be initialized
- * \param port The port number to use
- * \param he The address to use
+ * These collect individual socket options into a queue, which is disposed of
+ * directly after makesock(). The 'pre_conn_opt' structure is for internal use
+ * only and should not be visible elsewhere.
*
- * If \a he is null (server mode), \a addr->sin_addr is initialized with \p
- * INADDR_ANY. Otherwise, the address given by \a he is copied to addr.
+ * \sa setsockopt(2), makesock()
*/
-void init_sockaddr(struct sockaddr_in *addr, int port, const struct hostent *he)
+struct pre_conn_opt {
+ int sock_level; /**< Second argument to setsockopt() */
+ int sock_option; /**< Third argument to setsockopt() */
+ char *opt_name; /**< Stringified \a sock_option */
+ void *opt_val; /**< Fourth argument to setsockopt() */
+ socklen_t opt_len; /**< Fifth argument to setsockopt() */
+
+ struct list_head node; /**< FIFO, as sockopt order matters. */
+};
+
+/** FIFO list of pre-connection socket options to be set */
+struct flowopts {
+ struct list_head sockopts;
+};
+
+/**
+ * Allocate and initialize a flowopt queue.
+ *
+ * \return A new structure to be passed to \ref flowopt_add(). It is
+ * automatically deallocated in \ref makesock().
+ */
+struct flowopts *flowopt_new(void)
{
- /* host byte order */
- addr->sin_family = AF_INET;
- /* short, network byte order */
- addr->sin_port = htons(port);
- if (he)
- addr->sin_addr = *((struct in_addr *)he->h_addr);
- else
- addr->sin_addr.s_addr = INADDR_ANY;
- /* zero the rest of the struct */
- memset(&addr->sin_zero, '\0', 8);
+ struct flowopts *new = para_malloc(sizeof(*new));
+
+ INIT_LIST_HEAD(&new->sockopts);
+ return new;
}
-/*
- * send out a buffer, resend on short writes
+/**
+ * Append new socket option to flowopt queue.
*
- * \param fd the file descriptor
- * \param buf The buffer to be sent
- * \param len The length of \a buf
+ * \param fo The flowopt queue to append to.
+ * \param lev Level at which \a opt resides.
+ * \param opt New option to add.
+ * \param name Stringified name of \a opt.
+ * \param val The value to set \a opt to.
+ * \param len Length of \a val.
+ *
+ * \sa setsockopt(2)
+ */
+void flowopt_add(struct flowopts *fo, int lev, int opt,
+ const char *name, const void *val, int len)
+{
+ struct pre_conn_opt *new = para_malloc(sizeof(*new));
+
+ new->sock_option = opt;
+ new->sock_level = lev;
+ new->opt_name = para_strdup(name);
+
+ if (val == NULL) {
+ new->opt_val = NULL;
+ new->opt_len = 0;
+ } else {
+ new->opt_val = para_malloc(len);
+ new->opt_len = len;
+ memcpy(new->opt_val, val, len);
+ }
+
+ list_add_tail(&new->node, &fo->sockopts);
+}
+
+/** Set the entire bunch of pre-connection options at once. */
+static void flowopt_setopts(int sockfd, struct flowopts *fo)
+{
+ struct pre_conn_opt *pc;
+
+ if (fo == NULL)
+ return;
+
+ list_for_each_entry(pc, &fo->sockopts, node)
+ if (setsockopt(sockfd, pc->sock_level, pc->sock_option,
+ pc->opt_val, pc->opt_len) < 0) {
+ PARA_EMERG_LOG("Can not set %s socket option: %s",
+ pc->opt_name, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+/**
+ * Deallocate all resources of a flowopts structure.
*
- * Due to circumstances beyond your control, the kernel might not send all the
- * data out in one chunk, and now, my friend, it's up to us to get the data out
- * there (Beej's Guide to Network Programming).
+ * \param fo A pointer as returned from flowopt_new().
*
- * \return This function returns 1 on success and \a -E_SEND on errors. The
- * number of bytes actually sent is stored upon successful return in \a len.
+ * It's OK to pass \p NULL here in which case the function does nothing.
*/
-static int sendall(int fd, const char *buf, size_t *len)
+void flowopt_cleanup(struct flowopts *fo)
{
- size_t total = 0, bytesleft = *len; /* how many we have left to send */
- int n = -1;
+ struct pre_conn_opt *cur, *next;
- while (total < *len) {
- n = send(fd, buf + total, bytesleft, 0);
- if (n == -1)
- break;
- total += n;
- bytesleft -= n;
- if (total < *len)
- PARA_DEBUG_LOG("short write (%zd byte(s) left)\n",
- *len - total);
+ if (fo == NULL)
+ return;
+
+ list_for_each_entry_safe(cur, next, &fo->sockopts, node) {
+ free(cur->opt_name);
+ free(cur->opt_val);
+ free(cur);
}
- *len = total; /* return number actually sent here */
- return n == -1? -E_SEND : 1; /* return 1 on success */
+ free(fo);
}
/**
- * Encrypt and send a binary buffer.
+ * Resolve an IPv4/IPv6 address.
*
- * \param fd The file descriptor.
- * \param buf The buffer to be encrypted and sent.
- * \param len The length of \a buf.
+ * \param l4type The layer-4 type (\p IPPROTO_xxx).
+ * \param passive Whether \p AI_PASSIVE should be included as hint.
+ * \param host Remote or local hostname or IPv/6 address string.
+ * \param port_number Used to set the port in each returned address structure.
+ * \param result addrinfo structures are returned here.
*
- * Check if encryption is available. If yes, encrypt the given buffer. Send
- * out the buffer, encrypted or not, and try to resend the remaing part in case
- * of short writes.
+ * The interpretation of \a host depends on the value of \a passive. On a
+ * passive socket host is interpreted as an interface IPv4/6 address (can be
+ * left NULL). On an active socket, \a host is the peer DNS name or IPv4/6
+ * address to connect to.
*
- * \return Positive on success, \p -E_SEND on errors.
+ * \return Standard.
+ *
+ * \sa getaddrinfo(3).
*/
-int send_bin_buffer(int fd, const char *buf, size_t len)
+int lookup_address(unsigned l4type, bool passive, const char *host,
+ int port_number, struct addrinfo **result)
{
int ret;
- crypt_function *cf = NULL;
-
- if (!len)
- PARA_CRIT_LOG("%s", "len == 0\n");
- if (fd + 1 <= cda_size)
- cf = crypt_data_array[fd].send;
- if (cf) {
- void *private = crypt_data_array[fd].private_data;
- /* RC4 may write more than len to the output buffer */
- unsigned char *outbuf = para_malloc(ROUND_UP(len, 8));
- (*cf)(len, (unsigned char *)buf, outbuf, private);
- ret = sendall(fd, (char *)outbuf, &len);
- free(outbuf);
- } else
- ret = sendall(fd, buf, &len);
+ char port[6]; /* port number has at most 5 digits */
+ struct addrinfo *addr = NULL, hints;
+
+ *result = NULL;
+ sprintf(port, "%d", port_number & 0xffff);
+ /* Set up address hint structure */
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = sock_type(l4type);
+ /*
+ * getaddrinfo does not support SOCK_DCCP, so for the sake of lookup
+ * (and only then) pretend to be UDP.
+ */
+ if (l4type == IPPROTO_DCCP)
+ hints.ai_socktype = SOCK_DGRAM;
+ /* only use addresses available on the host */
+ hints.ai_flags = AI_ADDRCONFIG;
+ if (passive && host == NULL)
+ hints.ai_flags |= AI_PASSIVE;
+ /* Obtain local/remote address information */
+ ret = getaddrinfo(host, port, &hints, &addr);
+ if (ret != 0) {
+ PARA_ERROR_LOG("can not resolve %s address %s#%s: %s\n",
+ layer4_name(l4type),
+ host? host : (passive? "[loopback]" : "[localhost]"),
+ port, gai_strerror(ret));
+ return -E_ADDRESS_LOOKUP;
+ }
+ *result = addr;
+ return 1;
+}
+
+/**
+ * Create an active or passive socket.
+ *
+ * \param l4type \p IPPROTO_TCP, \p IPPROTO_UDP, or \p IPPROTO_DCCP.
+ * \param passive Whether to call bind(2) or connect(2).
+ * \param ai Address information as obtained from \ref lookup_address().
+ * \param fo Socket options to be set before making the connection.
+ *
+ * bind(2) is called on passive sockets, and connect(2) on active sockets. The
+ * algorithm tries all possible address combinations until it succeeds. If \a
+ * fo is supplied, options are set but cleanup must be performed in the caller.
+ *
+ * \return File descriptor on success, \p E_MAKESOCK on errors.
+ *
+ * \sa \ref lookup_address(), \ref makesock(), ip(7), ipv6(7), bind(2),
+ * connect(2).
+ */
+int makesock_addrinfo(unsigned l4type, bool passive, struct addrinfo *ai,
+ struct flowopts *fo)
+{
+ int ret = -E_MAKESOCK, on = 1;
+
+ for (; ai; ai = ai->ai_next) {
+ int fd;
+ ret = socket(ai->ai_family, sock_type(l4type), l4type);
+ if (ret < 0)
+ continue;
+ fd = ret;
+ flowopt_setopts(fd, fo);
+ if (!passive) {
+ if (connect(fd, ai->ai_addr, ai->ai_addrlen) == 0)
+ return fd;
+ close(fd);
+ continue;
+ }
+ /*
+ * Reuse the address on passive sockets to avoid failure on
+ * restart (protocols using listen()) and when creating
+ * multiple listener instances (UDP multicast).
+ */
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on,
+ sizeof(on)) == -1) {
+ close(fd);
+ continue;
+ }
+ if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) {
+ close(fd);
+ continue;
+ }
+ return fd;
+ }
+ return -E_MAKESOCK;
+}
+
+/**
+ * Resolve IPv4/IPv6 address and create a ready-to-use active or passive socket.
+ *
+ * \param l4type The layer-4 type (\p IPPROTO_xxx).
+ * \param passive Whether this is a passive or active socket.
+ * \param host Passed to \ref lookup_address().
+ * \param port_number Passed to \ref lookup_address().
+ * \param fo Passed to \ref makesock_addrinfo().
+ *
+ * This creates a ready-made IPv4/v6 socket structure after looking up the
+ * necessary parameters. The function first calls \ref lookup_address() and
+ * passes the address information to makesock_addrinfo() to create and
+ * initialize the socket.
+ *
+ * \return The newly created file descriptor on success, a negative error code
+ * on failure.
+ *
+ * \sa \ref lookup_address(), \ref makesock_addrinfo().
+ */
+int makesock(unsigned l4type, bool passive, const char *host, uint16_t port_number,
+ struct flowopts *fo)
+{
+ struct addrinfo *ai;
+ int ret = lookup_address(l4type, passive, host, port_number, &ai);
+
+ if (ret >= 0)
+ ret = makesock_addrinfo(l4type, passive, ai, fo);
+ if (ai)
+ freeaddrinfo(ai);
+ if (ret < 0) {
+ PARA_ERROR_LOG("can not create %s socket %s#%d.\n",
+ layer4_name(l4type), host? host : (passive?
+ "[loopback]" : "[localhost]"), port_number);
+ }
return ret;
}
/**
- * encrypt and send null terminated buffer.
+ * Create a passive / listening socket.
+ *
+ * \param l4type The transport-layer type (\p IPPROTO_xxx).
+ * \param port The decimal port number to listen on.
+ * \param fo Flowopts (if any) to set before starting to listen.
+ *
+ * \return Positive integer (socket descriptor) on success, negative value
+ * otherwise.
*
- * \param fd the file descriptor
- * \param buf the null-terminated buffer to be send
+ * \sa makesock(), ip(7), ipv6(7), bind(2), listen(2).
+ */
+int para_listen(unsigned l4type, uint16_t port, struct flowopts *fo)
+{
+ int ret, fd = makesock(l4type, 1, NULL, port, fo);
+
+ if (fd > 0) {
+ ret = listen(fd, BACKLOG);
+ if (ret < 0) {
+ ret = errno;
+ close(fd);
+ return -ERRNO_TO_PARA_ERROR(ret);
+ }
+ PARA_INFO_LOG("listening on %s port %u, fd %d\n",
+ layer4_name(l4type), port, fd);
+ }
+ return fd;
+}
+
+/**
+ * Determine IPv4/v6 socket address length.
+ * \param sa Container of IPv4 or IPv6 address.
+ * \return Address-family dependent address length.
+ */
+static socklen_t salen(const struct sockaddr *sa)
+{
+ assert(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+ return sa->sa_family == AF_INET6
+ ? sizeof(struct sockaddr_in6)
+ : sizeof(struct sockaddr_in);
+}
+
+/** True if @ss holds a v6-mapped-v4 address (RFC 4291, 2.5.5.2) */
+static bool SS_IS_ADDR_V4MAPPED(const struct sockaddr_storage *ss)
+{
+ const struct sockaddr_in6 *ia6 = (const struct sockaddr_in6 *)ss;
+
+ return ss->ss_family == AF_INET6 && IN6_IS_ADDR_V4MAPPED(&ia6->sin6_addr);
+}
+
+/**
+ * Process IPv4/v6 address, turn v6-mapped-v4 address into normal IPv4 address.
+ * \param ss Container of IPv4/6 address.
+ * \return Pointer to normalized address (may be static storage).
*
- * This is equivalent to send_bin_buffer(fd, buf, strlen(buf)).
+ * \sa RFC 3493
+ */
+static const struct sockaddr *
+normalize_ip_address(const struct sockaddr_storage *ss)
+{
+ assert(ss->ss_family == AF_INET || ss->ss_family == AF_INET6);
+
+ if (SS_IS_ADDR_V4MAPPED(ss)) {
+ const struct sockaddr_in6 *ia6 = (const struct sockaddr_in6 *)ss;
+ static struct sockaddr_in ia;
+
+ ia.sin_family = AF_INET;
+ ia.sin_port = ia6->sin6_port;
+ memcpy(&ia.sin_addr.s_addr, &(ia6->sin6_addr.s6_addr[12]), 4);
+ return (const struct sockaddr *)&ia;
+ }
+ return (const struct sockaddr *)ss;
+}
+
+/**
+ * Generic/fallback MTU values
*
- * \return Positive on success, \p -E_SEND on errors.
+ * These are taken from RFC 1122, RFC 2460, and RFC 5405.
+ * - RFC 1122, 3.3.3 defines EMTU_S ("Effective MTU for sending") and recommends
+ * to use an EMTU_S size of of 576 bytes if the IPv4 path MTU is unknown;
+ * - RFC 2460, 5. requires a minimum IPv6 MTU of 1280 bytes;
+ * - RFC 5405, 3.2 recommends that if path MTU discovery is not done,
+ * UDP senders should use the respective minimum values of EMTU_S.
*/
-int send_buffer(int fd, const char *buf)
+static inline int generic_mtu(const int af_type)
{
- return send_bin_buffer(fd, buf, strlen(buf));
+ return af_type == AF_INET6 ? 1280 : 576;
}
+/** Crude approximation of IP header overhead - neglecting options. */
+static inline int estimated_header_overhead(const int af_type)
+{
+ return af_type == AF_INET6 ? 40 : 20;
+}
/**
- * send and encrypt a buffer given by a format string
+ * Get the maximum transport-layer message size (MMS_S).
+ *
+ * \param sockfd The socket file descriptor.
*
- * \param fd the file descriptor
- * \param fmt a format string
+ * The socket must be connected. See RFC 1122, 3.3.3. If the protocol family
+ * could not be determined, \p AF_INET is assumed.
*
- * \return Positive on success, \p -E_SEND on errors.
+ * \return The maximum message size of the address family type.
*/
-__printf_2_3 int send_va_buffer(int fd, const char *fmt, ...)
+int generic_max_transport_msg_size(int sockfd)
{
- char *msg;
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+ int af_type = AF_INET;
+
+ if (getpeername(sockfd, (struct sockaddr *)&ss, &sslen) < 0) {
+ PARA_ERROR_LOG("can not determine remote address type: %s\n",
+ strerror(errno));
+ } else if (!SS_IS_ADDR_V4MAPPED(&ss)) {
+ af_type = ss.ss_family;
+ }
+ return generic_mtu(af_type) - estimated_header_overhead(af_type);
+}
+
+/**
+ * Look up the remote side of a connected socket structure.
+ *
+ * \param fd The socket descriptor of the connected socket.
+ *
+ * \return A static character string identifying hostname and port of the
+ * chosen side in numeric host:port format.
+ *
+ * \sa getsockname(2), getpeername(2), parse_url(), getnameinfo(3),
+ * services(5), nsswitch.conf(5).
+ */
+char *remote_name(int fd)
+{
+ struct sockaddr_storage ss;
+ const struct sockaddr *sa;
+ socklen_t sslen = sizeof(ss);
+ char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
+ static char output[sizeof(hbuf) + sizeof(sbuf) + 4];
int ret;
- PARA_VSPRINTF(fmt, msg);
- ret = send_buffer(fd, msg);
- free(msg);
- return ret;
+ if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+ PARA_ERROR_LOG("can not determine address from fd %d: %s\n",
+ fd, strerror(errno));
+ snprintf(output, sizeof(output), "(unknown)");
+ return output;
+ }
+ sa = normalize_ip_address(&ss);
+ ret = getnameinfo(sa, salen(sa), hbuf, sizeof(hbuf), sbuf,
+ sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret) {
+ PARA_WARNING_LOG("hostname lookup error (%s).\n",
+ gai_strerror(ret));
+ snprintf(output, sizeof(output), "(lookup error)");
+ } else if (sa->sa_family == AF_INET6)
+ snprintf(output, sizeof(output), "[%s]:%s", hbuf, sbuf);
+ else
+ snprintf(output, sizeof(output), "%s:%s", hbuf, sbuf);
+ return output;
}
/**
- * receive and decrypt.
+ * Extract IPv4 or IPv6-mapped-IPv4 address from sockaddr_storage.
*
- * \param fd the file descriptor
- * \param buf the buffer to write the decrypted data to
- * \param size the size of \a buf
+ * \param ss Container of IPv4/6 address.
+ * \param ia Extracted IPv4 address (different from 0) or 0 if unsuccessful.
*
- * Receive at most \a size bytes from file descriptor \a fd. If encryption is
- * available, decrypt the received buffer.
+ * \sa RFC 3493.
+ */
+void extract_v4_addr(const struct sockaddr_storage *ss, struct in_addr *ia)
+{
+ const struct sockaddr *sa = normalize_ip_address(ss);
+
+ memset(ia, 0, sizeof(*ia));
+ if (sa->sa_family == AF_INET)
+ *ia = ((struct sockaddr_in *)sa)->sin_addr;
+}
+
+/**
+ * Compare the address part of IPv4/6 addresses.
*
- * \return The number of bytes received on success, negative on errors.
+ * \param sa1 First address.
+ * \param sa2 Second address.
*
- * \sa recv(2)
+ * \return True iff the IP address of \a sa1 and \a sa2 match.
+ */
+bool sockaddr_equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
+{
+ if (!sa1 || !sa2)
+ return false;
+ if (sa1->sa_family != sa2->sa_family)
+ return false;
+ if (sa1->sa_family == AF_INET) {
+ struct sockaddr_in *a1 = (typeof(a1))sa1,
+ *a2 = (typeof (a2))sa2;
+ return a1->sin_addr.s_addr == a2->sin_addr.s_addr;
+ } else if (sa1->sa_family == AF_INET6) {
+ struct sockaddr_in6 *a1 = (typeof(a1))sa1,
+ *a2 = (typeof (a2))sa2;
+ return !memcmp(a1, a2, sizeof(*a1));
+ } else
+ return false;
+}
+
+/**
+ * Receive data from a file descriptor.
+ *
+ * \param fd The file descriptor.
+ * \param buf The buffer to write the data to.
+ * \param size The size of \a buf.
+ *
+ * Receive at most \a size bytes from file descriptor \a fd.
+ *
+ * \return The number of bytes received on success, negative on errors, zero if
+ * the peer has performed an orderly shutdown.
+ *
+ * \sa recv(2).
*/
__must_check int recv_bin_buffer(int fd, char *buf, size_t size)
{
ssize_t n;
- crypt_function *cf = NULL;
-
- if (fd + 1 <= cda_size)
- cf = crypt_data_array[fd].recv;
- if (cf) {
- unsigned char *tmp = para_malloc(size);
- void *private = crypt_data_array[fd].private_data;
- n = recv(fd, tmp, size, 0);
- if (n > 0) {
- size_t numbytes = n;
- unsigned char *b = (unsigned char *)buf;
- (*cf)(numbytes, tmp, b, private);
- }
- free(tmp);
- } else
- n = recv(fd, buf, size, 0);
+
+ n = recv(fd, buf, size, 0);
if (n == -1)
return -ERRNO_TO_PARA_ERROR(errno);
return n;
}
/**
- * receive, decrypt and write terminating NULL byte
+ * Receive and write terminating NULL byte.
*
- * \param fd the file descriptor
- * \param buf the buffer to write the decrypted data to
- * \param size the size of \a buf
+ * \param fd The file descriptor.
+ * \param buf The buffer to write the data to.
+ * \param size The size of \a buf.
*
- * Read and decrypt at most \a size - 1 bytes from file descriptor \a fd and
+ * Read at most \a size - 1 bytes from file descriptor \a fd and
* write a NULL byte at the end of the received data.
*
- * \return: The return value of the underlying call to \a recv_bin_buffer().
+ * \return The return value of the underlying call to \a recv_bin_buffer().
*
* \sa recv_bin_buffer()
*/
}
/**
- * wrapper around gethostbyname
- *
- * \param host hostname or IPv4 address
- * \param ret the hostent structure is returned here
- *
- * \return positive on success, negative on errors. On success, \a ret
- * contains the return value of the underlying gethostbyname() call.
- *
- * \sa gethostbyname(2)
- */
-int get_host_info(char *host, struct hostent **ret)
-{
- PARA_INFO_LOG("getting host info of %s\n", host);
- /* FIXME: gethostbyname() is obsolete */
- *ret = gethostbyname(host);
- return *ret? 1 : -E_HOST_INFO;
-}
-
-/**
- * A wrapper around socket(2).
+ * Wrapper around the accept system call.
*
- * \param domain The communication domain that selects the protocol family.
+ * \param fd The listening socket.
+ * \param rfds An optional fd_set pointer.
+ * \param addr Structure which is filled in with the address of the peer socket.
+ * \param size Should contain the size of the structure pointed to by \a addr.
+ * \param new_fd Result pointer.
*
- * \return The socket fd on success, -E_SOCKET on errors.
+ * Accept incoming connections on \a addr, retry if interrupted. If \a rfds is
+ * not \p NULL, return 0 if \a fd is not set in \a rfds without calling accept().
*
- * Create an IPv4 socket for sequenced, reliable, two-way, connection-based
- * byte streams.
+ * \return Negative on errors, zero if no connections are present to be accepted,
+ * one otherwise.
*
- * \sa socket(2).
+ * \sa accept(2).
*/
-int get_stream_socket(int domain)
+int para_accept(int fd, fd_set *rfds, void *addr, socklen_t size, int *new_fd)
{
- int socket_fd;
+ int ret;
- if ((socket_fd = socket(domain, SOCK_STREAM, 0)) == -1)
- return -E_SOCKET;
- return socket_fd;
+ if (rfds && !FD_ISSET(fd, rfds))
+ return 0;
+ do
+ ret = accept(fd, (struct sockaddr *) addr, &size);
+ while (ret < 0 && errno == EINTR);
+
+ if (ret >= 0) {
+ *new_fd = ret;
+ return 1;
+ }
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ return 0;
+ return -ERRNO_TO_PARA_ERROR(errno);
}
/**
- * paraslash's wrapper around the accept system call
- *
- * \param fd the listening socket
- * \param addr structure which is filled in with the address of the peer socket
- * \param size should contain the size of the structure pointed to by \a addr
- *
- * Accept incoming connections on \a addr. Retry if interrupted.
+ * Probe the list of DCCP CCIDs configured on this host.
+ * \param ccid_array Pointer to return statically allocated array in.
+ * \return Number of elements returned in \a ccid_array or error.
*
- * \return The new file descriptor on success, \a -E_ACCEPT on errors.
- *
- * \sa accept(2).
+ * NB: This feature is only available on Linux > 2.6.30; on older kernels
+ * ENOPROTOOPT ("Protocol not available") will be returned.
*/
-int para_accept(int fd, void *addr, socklen_t size)
+int dccp_available_ccids(uint8_t **ccid_array)
{
- int new_fd;
+ static uint8_t ccids[DCCP_MAX_HOST_CCIDS];
+ socklen_t nccids = sizeof(ccids);
+ int ret, fd;
- do
- new_fd = accept(fd, (struct sockaddr *) addr, &size);
- while (new_fd < 0 && errno == EINTR);
- return new_fd < 0? -E_ACCEPT : new_fd;
-}
+ ret = fd = makesock(IPPROTO_DCCP, 1, NULL, 0, NULL);
+ if (ret < 0)
+ return ret;
-static int setserversockopts(int socket_fd)
-{
- int yes = 1;
+ if (getsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_AVAILABLE_CCIDS,
+ ccids, &nccids) < 0) {
+ ret = errno;
+ close(fd);
+ PARA_ERROR_LOG("No DCCP_SOCKOPT_AVAILABLE_CCIDS: %s\n",
+ strerror(ret));
+ return -ERRNO_TO_PARA_ERROR(ret);
+ }
- if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &yes,
- sizeof(int)) == -1)
- return -E_SETSOCKOPT;
- return 1;
+ close(fd);
+ *ccid_array = ccids;
+ return nccids;
}
/**
- * prepare a structure for \p AF_UNIX socket addresses
+ * Prepare a structure for \p AF_UNIX socket addresses.
*
- * \param u pointer to the struct to be prepared
- * \param name the socket pathname
+ * \param u Pointer to the struct to be prepared.
+ * \param name The socket pathname.
*
* This just copies \a name to the sun_path component of \a u.
*
* \return Positive on success, \p -E_NAME_TOO_LONG if \a name is longer
* than \p UNIX_PATH_MAX.
*/
-int init_unix_addr(struct sockaddr_un *u, const char *name)
+static int init_unix_addr(struct sockaddr_un *u, const char *name,
+ bool abstract)
{
- if (strlen(name) >= UNIX_PATH_MAX)
+ if (strlen(name) + abstract >= UNIX_PATH_MAX)
return -E_NAME_TOO_LONG;
memset(u->sun_path, 0, UNIX_PATH_MAX);
u->sun_family = PF_UNIX;
- strcpy(u->sun_path, name);
+ strcpy(u->sun_path + abstract, name);
return 1;
}
/**
- * Prepare, create, and bind a socket for local communication.
+ * Create a socket for local communication and listen on it.
*
* \param name The socket pathname.
- * \param unix_addr Pointer to the \p AF_UNIX socket structure.
- * \param mode The desired mode of the socket.
+ * \param mode The desired permissions of the socket.
*
- * This functions creates a local socket for sequenced, reliable,
- * two-way, connection-based byte streams.
+ * This function creates a passive local socket for sequenced, reliable,
+ * two-way, connection-based byte streams. The socket file descriptor is set to
+ * nonblocking mode and listen(2) is called to prepare the socket for
+ * accepting incoming connection requests.
*
- * \return The file descriptor, on success, negative on errors.
+ * If mode is zero, an abstract socket (a non-portable Linux extension) is
+ * created. In this case the socket name has no connection with filesystem
+ * pathnames.
*
- * \sa socket(2)
- * \sa bind(2)
- * \sa chmod(2)
+ * \return The file descriptor on success, negative error code on failure.
+ *
+ * \sa socket(2), \sa bind(2), \sa chmod(2), listen(2), unix(7).
*/
-int create_local_socket(const char *name, struct sockaddr_un *unix_addr,
- mode_t mode)
+int create_local_socket(const char *name, mode_t mode)
{
+ struct sockaddr_un unix_addr;
int fd, ret;
+ bool abstract = mode == 0;
- ret = init_unix_addr(unix_addr, name);
+ ret = init_unix_addr(&unix_addr, name, abstract);
if (ret < 0)
return ret;
+ ret = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0)
+ return -ERRNO_TO_PARA_ERROR(errno);
+ fd = ret;
+ ret = mark_fd_nonblocking(fd);
+ if (ret < 0)
+ goto err;
+ ret = bind(fd, (struct sockaddr *)&unix_addr, sizeof(unix_addr));
+ if (ret < 0) {
+ ret = -ERRNO_TO_PARA_ERROR(errno);
+ goto err;
+ }
+ if (!abstract) {
+ ret = -E_CHMOD;
+ if (chmod(name, mode) < 0)
+ goto err;
+ }
+ if (listen(fd , 5) < 0) {
+ ret = -ERRNO_TO_PARA_ERROR(errno);
+ goto err;
+ }
+ return fd;
+err:
+ close(fd);
+ return ret;
+}
+
+/**
+ * Prepare, create, and connect to a Unix domain socket for local communication.
+ *
+ * \param name The socket pathname.
+ *
+ * This function creates a local socket for sequenced, reliable, two-way,
+ * connection-based byte streams.
+ *
+ * \return The file descriptor of the connected socket on success, negative on
+ * errors.
+ *
+ * \sa create_local_socket(), unix(7), connect(2).
+ */
+int connect_local_socket(const char *name)
+{
+ struct sockaddr_un unix_addr;
+ int fd, ret;
+
+ PARA_DEBUG_LOG("connecting to %s\n", name);
fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (fd < 0)
- return -E_SOCKET;
- ret = -E_BIND;
- if (bind(fd, (struct sockaddr *) unix_addr, UNIX_PATH_MAX) < 0)
+ return -ERRNO_TO_PARA_ERROR(errno);
+ /* first try (linux-only) abstract socket */
+ ret = init_unix_addr(&unix_addr, name, true);
+ if (ret < 0)
goto err;
- ret = -E_CHMOD;
- if (chmod(name, mode) < 0)
+ if (connect(fd, (struct sockaddr *)&unix_addr, sizeof(unix_addr)) != -1)
+ return fd;
+ /* next try pathname socket */
+ ret = init_unix_addr(&unix_addr, name, false);
+ if (ret < 0)
goto err;
- return fd;
+ if (connect(fd, (struct sockaddr *)&unix_addr, sizeof(unix_addr)) != -1)
+ return fd;
+ ret = -ERRNO_TO_PARA_ERROR(errno);
err:
close(fd);
return ret;
#ifndef HAVE_UCRED
ssize_t send_cred_buffer(int sock, char *buf)
{
- return send_buffer(sock, buf);
+ return write_buffer(sock, buf);
}
int recv_cred_buffer(int fd, char *buf, size_t size)
{
return recv_buffer(fd, buf, size) > 0? 1 : -E_RECVMSG;
}
#else /* HAVE_UCRED */
+
/**
- * send NULL terminated buffer and Unix credentials of the current process
+ * Send a buffer and the credentials of the current process to a socket.
*
- * \param sock the socket file descriptor
- * \param buf the buffer to be sent
+ * \param sock The file descriptor of the sending socket.
+ * \param buf The zero-terminated buffer to send.
*
- * \return On success, this call returns the number of characters sent. On
- * error, \p -E_SENDMSG ist returned.
+ * \return On success, this call returns the number of bytes sent. On errors,
+ * \p -E_SENDMSG is returned.
*
- * \sa okir's Black Hats Manual
- * \sa sendmsg(2)
+ * \sa \ref recv_cred_buffer, sendmsg(2), socket(7), unix(7), okir's Black Hats
+ * Manual.
*/
ssize_t send_cred_buffer(int sock, char *buf)
{
- char control[sizeof(struct cmsghdr) + 10];
+ char control[sizeof(struct cmsghdr) + sizeof(struct ucred)];
struct msghdr msg;
struct cmsghdr *cmsg;
static struct iovec iov;
/* Response data */
iov.iov_base = buf;
- iov.iov_len = strlen(buf);
+ iov.iov_len = strlen(buf);
c.pid = getpid();
c.uid = getuid();
c.gid = getgid();
*(struct ucred *)CMSG_DATA(cmsg) = c;
msg.msg_controllen = cmsg->cmsg_len;
ret = sendmsg(sock, &msg, 0);
- if (ret < 0)
+ if (ret < 0)
ret = -E_SENDMSG;
return ret;
}
}
/**
- * receive a buffer and the Unix credentials of the sending process
+ * Receive a buffer and the Unix credentials of the sending process.
*
- * \param fd the socket file descriptor
- * \param buf the buffer to store the message
- * \param size the size of \a buffer
+ * \param fd The file descriptor of the receiving socket.
+ * \param buf The buffer to store the received message.
+ * \param size The length of \a buf in bytes.
*
- * \return negative on errors, the user id on success.
+ * \return Negative on errors, the user id of the sending process on success.
*
- * \sa okir's Black Hats Manual
- * \sa recvmsg(2)
+ * \sa \ref send_cred_buffer and the references given there.
*/
int recv_cred_buffer(int fd, char *buf, size_t size)
{
- char control[255];
+ char control[255] __a_aligned(8);
struct msghdr msg;
struct cmsghdr *cmsg;
struct iovec iov;
} else
if (cmsg->cmsg_level == SOL_SOCKET
&& cmsg->cmsg_type == SCM_RIGHTS) {
- dispose_fds((int *) CMSG_DATA(cmsg),
+ dispose_fds((int *)CMSG_DATA(cmsg),
(cmsg->cmsg_len - CMSG_LEN(0))
/ sizeof(int));
}
return result;
}
#endif /* HAVE_UCRED */
-
-/** how many pending connections queue will hold */
-#define BACKLOG 10
-
-/**
- * create a socket, bind it and listen
- *
- * \param port the tcp port to listen on
- *
- * \return The file descriptor of the created socket, negative
- * on errors.
- *
- * \sa get_stream_socket()
- * \sa setsockopt(2)
- * \sa bind(2)
- * \sa listen(2)
- */
-int init_tcp_socket(int port)
-{
- struct sockaddr_in my_addr;
- int fd, ret = get_stream_socket(AF_INET);
-
- if (ret < 0)
- return ret;
- fd = ret;
- ret = setserversockopts(fd);
- if (ret < 0)
- goto err;
- init_sockaddr(&my_addr, port, NULL);
- ret = -E_BIND;
- if (bind(fd, (struct sockaddr *)&my_addr,
- sizeof(struct sockaddr)) == -1) {
- PARA_CRIT_LOG("bind error: %s\n", strerror(errno));
- goto err;
- }
- ret = -E_LISTEN;
- if (listen(fd, BACKLOG) == -1)
- goto err;
- PARA_INFO_LOG("listening on port %d, fd %d\n", port, fd);
- return fd;
-err:
- close(fd);
- return ret;
-}
-
-/**
- * receive a buffer and check for a pattern
- *
- * \param fd the file descriptor to receive from
- * \param pattern the expected pattern
- * \param bufsize the size of the internal buffer
- *
- * \return Positive if \a pattern was received, negative otherwise.
- *
- * This function creates a buffer of size \a bufsize and tries
- * to receive at most \a bufsize bytes from file descriptor \a fd.
- * If at least \p strlen(\a pattern) bytes were received, the beginning of
- * the received buffer is compared with \a pattern, ignoring case.
- *
- * \sa recv_buffer()
- * \sa strncasecmp(3)
- */
-int recv_pattern(int fd, const char *pattern, size_t bufsize)
-{
- size_t len = strlen(pattern);
- char *buf = para_malloc(bufsize + 1);
- int ret = -E_RECV_PATTERN, n = recv_buffer(fd, buf, bufsize);
-
- if (n < len)
- goto out;
- if (strncasecmp(buf, pattern, len))
- goto out;
- ret = 1;
-out:
- if (ret < 0) {
- PARA_NOTICE_LOG("n = %d, did not receive pattern '%s'\n", n, pattern);
- if (n > 0)
- PARA_NOTICE_LOG("recvd: %s\n", buf);
- }
- free(buf);
- return ret;
-}