fd.c

   1 /* Copyright (C) 2006 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
   2
   3 /** \file fd.c Helper functions for file descriptor handling. */
   4
   5 #include <regex.h>
   6 #include <sys/types.h>
   7 #include <dirent.h>
   8 #include <sys/mman.h>
   9
  10 #include "para.h"
  11 #include "error.h"
  12 #include "string.h"
  13 #include "fd.h"
  14
  15 /**
  16  * Change the name or location of a file.
  17  *
  18  * \param oldpath File to be moved.
  19  * \param newpath Destination.
  20  *
  21  * This is just a simple wrapper for the rename(2) system call which returns a
  22  * paraslash error code and prints an error message on failure.
  23  *
  24  * \return Standard.
  25  *
  26  * \sa rename(2).
  27  */
  28 int xrename(const char *oldpath, const char *newpath)
  29 {
  30         int ret = rename(oldpath, newpath);
  31
  32         if (ret >= 0)
  33                 return 1;
  34         ret = -ERRNO_TO_PARA_ERROR(errno);
  35         PARA_ERROR_LOG("failed to rename %s -> %s\n", oldpath, newpath);
  36         return ret;
  37 }
  38
  39 /**
  40  * Write an array of buffers, handling non-fatal errors.
  41  *
  42  * \param fd The file descriptor to write to.
  43  * \param iov Pointer to one or more buffers.
  44  * \param iovcnt The number of buffers.
  45  *
  46  * EAGAIN, EWOULDBLOCK and EINTR are not considered error conditions. If a
  47  * write operation fails with EAGAIN or EWOULDBLOCK, the number of bytes that
  48  * have been written so far is returned. In the EINTR case the operation is
  49  * retried. Short writes are handled by issuing a subsequent write operation
  50  * for the remaining part.
  51  *
  52  * \return Negative on fatal errors, number of bytes written else.
  53  *
  54  * For blocking file descriptors, this function returns either the sum of all
  55  * buffer sizes or a negative error code which indicates the fatal error that
  56  * caused a write call to fail.
  57  *
  58  * For nonblocking file descriptors there is a third possibility: Any
  59  * non-negative return value less than the sum of the buffer sizes indicates
  60  * that a write operation returned EAGAIN/EWOULDBLOCK.
  61  *
  62  * \sa writev(2), \ref xwrite().
  63  */
  64 int xwritev(int fd, struct iovec *iov, int iovcnt)
  65 {
  66         size_t written = 0;
  67         int i;
  68         struct iovec saved_iov, *curiov;
  69
  70         i = 0;
  71         curiov = iov;
  72         saved_iov = *curiov;
  73         while (i < iovcnt && curiov->iov_len > 0) {
  74                 ssize_t ret = writev(fd, curiov, iovcnt - i);
  75                 if (ret >= 0) {
  76                         written += ret;
  77                         while (ret > 0) {
  78                                 if (ret < curiov->iov_len) {
  79                                         curiov->iov_base += ret;
  80                                         curiov->iov_len -= ret;
  81                                         break;
  82                                 }
  83                                 ret -= curiov->iov_len;
  84                                 *curiov = saved_iov;
  85                                 i++;
  86                                 if (i >= iovcnt)
  87                                         return written;
  88                                 curiov++;
  89                                 saved_iov = *curiov;
  90                         }
  91                         continue;
  92                 }
  93                 if (errno == EINTR)
  94                         /*
  95                          * The write() call was interrupted by a signal before
  96                          * any data was written. Try again.
  97                          */
  98                         continue;
  99                 if (errno == EAGAIN || errno == EWOULDBLOCK)
 100                         /*
 101                          * We don't consider this an error. Note that POSIX
 102                          * allows either error to be returned, and does not
 103                          * require these constants to have the same value.
 104                          */
 105                         return written;
 106                 /* fatal error */
 107                 return -ERRNO_TO_PARA_ERROR(errno);
 108         }
 109         return written;
 110 }
 111
 112 /**
 113  * Write a buffer to a file descriptor, re-writing on short writes.
 114  *
 115  * \param fd The file descriptor.
 116  * \param buf The buffer to write.
 117  * \param len The number of bytes to write.
 118  *
 119  * This is a simple wrapper for \ref xwritev().
 120  *
 121  * \return The return value of the underlying call to \ref xwritev().
 122  */
 123 int xwrite(int fd, const char *buf, size_t len)
 124 {
 125         struct iovec iov = {.iov_base = (void *)buf, .iov_len = len};
 126         return xwritev(fd, &iov, 1);
 127 }
 128
 129 /**
 130  * Write to a file descriptor, fail on short writes.
 131  *
 132  * \param fd The file descriptor.
 133  * \param buf The buffer to be written.
 134  * \param len The length of the buffer.
 135  *
 136  * For blocking file descriptors this function behaves identical to \ref
 137  * xwrite(). For non-blocking file descriptors it returns -E_SHORT_WRITE
 138  * (rather than a value less than len) if not all data could be written.
 139  *
 140  * \return Number of bytes written on success, negative error code else.
 141  */
 142 int write_all(int fd, const char *buf, size_t len)
 143 {
 144         int ret = xwrite(fd, buf, len);
 145
 146         if (ret < 0)
 147                 return ret;
 148         if (ret != len)
 149                 return -E_SHORT_WRITE;
 150         return ret;
 151 }
 152
 153 /**
 154  * A fprintf-like function for raw file descriptors.
 155  *
 156  * This function creates a string buffer according to the given format and
 157  * writes this buffer to a file descriptor.
 158  *
 159  * \param fd The file descriptor.
 160  * \param fmt A format string.
 161  *
 162  * The difference to fprintf(3) is that the first argument is a file
 163  * descriptor, not a FILE pointer. This function does not rely on stdio.
 164  *
 165  * \return The return value of the underlying call to \ref write_all().
 166  *
 167  * \sa fprintf(3), \ref xvasprintf().
 168  */
 169 __printf_2_3 int write_va_buffer(int fd, const char *fmt, ...)
 170 {
 171         char *msg;
 172         int ret;
 173         va_list ap;
 174
 175         va_start(ap, fmt);
 176         ret = xvasprintf(&msg, fmt, ap);
 177         va_end(ap);
 178         ret = write_all(fd, msg, ret);
 179         free(msg);
 180         return ret;
 181 }
 182
 183 /**
 184  * Read from a non-blocking file descriptor into multiple buffers.
 185  *
 186  * \param fd The file descriptor to read from.
 187  * \param iov Scatter/gather array used in readv().
 188  * \param iovcnt Number of elements in \a iov.
 189  * \param num_bytes Result pointer. Contains the number of bytes read from \a fd.
 190  *
 191  * This function tries to read up to sz bytes from fd, where sz is the sum of
 192  * the lengths of all vectors in iov. Like \ref xwrite(), EAGAIN and EINTR are
 193  * not considered error conditions. However, EOF is.
 194  *
 195  * \return Zero or a negative error code. If the underlying call to readv(2)
 196  * returned zero (indicating an end of file condition) or failed for some
 197  * reason other than EAGAIN or EINTR, a negative error code is returned.
 198  *
 199  * In any case, \a num_bytes contains the number of bytes that have been
 200  * successfully read from \a fd (zero if the first readv() call failed with
 201  * EAGAIN). Note that even if the function returns negative, some data might
 202  * have been read before the error occurred. In this case \a num_bytes is
 203  * positive.
 204  *
 205  * \sa \ref xwrite(), read(2), readv(2).
 206  */
 207 int readv_nonblock(int fd, struct iovec *iov, int iovcnt, size_t *num_bytes)
 208 {
 209         int ret, i, j;
 210
 211         *num_bytes = 0;
 212         for (i = 0, j = 0; i < iovcnt;) {
 213                 /* fix up the first iov */
 214                 assert(j < iov[i].iov_len);
 215                 iov[i].iov_base += j;
 216                 iov[i].iov_len -= j;
 217                 ret = readv(fd, iov + i, iovcnt - i);
 218                 iov[i].iov_base -= j;
 219                 iov[i].iov_len += j;
 220
 221                 if (ret == 0)
 222                         return -E_EOF;
 223                 if (ret < 0) {
 224                         if (errno == EAGAIN || errno == EINTR)
 225                                 return 0;
 226                         return -ERRNO_TO_PARA_ERROR(errno);
 227                 }
 228                 *num_bytes += ret;
 229                 while (ret > 0) {
 230                         if (ret < iov[i].iov_len - j) {
 231                                 j += ret;
 232                                 break;
 233                         }
 234                         ret -= iov[i].iov_len - j;
 235                         j = 0;
 236                         if (++i >= iovcnt)
 237                                 break;
 238                 }
 239         }
 240         return 0;
 241 }
 242
 243 /**
 244  * Read from a non-blocking file descriptor into a single buffer.
 245  *
 246  * \param fd The file descriptor to read from.
 247  * \param buf The buffer to read data to.
 248  * \param sz The size of \a buf.
 249  * \param num_bytes \see \ref readv_nonblock().
 250  *
 251  * This is a simple wrapper for readv_nonblock() which uses an iovec with a single
 252  * buffer.
 253  *
 254  * \return The return value of the underlying call to readv_nonblock().
 255  */
 256 int read_nonblock(int fd, void *buf, size_t sz, size_t *num_bytes)
 257 {
 258         struct iovec iov = {.iov_base = buf, .iov_len = sz};
 259         return readv_nonblock(fd, &iov, 1, num_bytes);
 260 }
 261
 262 /**
 263  * Read a buffer and compare its contents to a string, ignoring case.
 264  *
 265  * \param fd The file descriptor to read from.
 266  * \param expectation The expected string to compare to.
 267  *
 268  * The given file descriptor is expected to be in non-blocking mode. The string
 269  * comparison is performed using strncasecmp(3).
 270  *
 271  * \return Zero if no data was available, positive if a buffer was read whose
 272  * contents compare as equal to the expected string, negative otherwise.
 273  * Possible errors: (a) not enough data was read, (b) the buffer contents
 274  * compared as non-equal, (c) a read error occurred. In the first two cases,
 275  * -E_READ_PATTERN is returned. In the read error case the (negative) return
 276  * value of the underlying call to \ref read_nonblock() is returned.
 277  */
 278 int read_and_compare(int fd, const char *expectation)
 279 {
 280         size_t n, len = strlen(expectation);
 281         char *buf = alloc(len + 1);
 282         int ret = read_nonblock(fd, buf, len, &n);
 283
 284         if (ret < 0)
 285                 goto out;
 286         buf[n] = '\0';
 287         ret = 0;
 288         if (n == 0)
 289                 goto out;
 290         ret = -E_READ_PATTERN;
 291         if (n < len)
 292                 goto out;
 293         if (strncasecmp(buf, expectation, len) != 0)
 294                 goto out;
 295         ret = 1;
 296 out:
 297         free(buf);
 298         return ret;
 299 }
 300
 301 /**
 302  * Set a file descriptor to blocking mode.
 303  *
 304  * \param fd The file descriptor.
 305  *
 306  * \return Standard.
 307  */
 308 __must_check int mark_fd_blocking(int fd)
 309 {
 310         int flags = fcntl(fd, F_GETFL);
 311         if (flags < 0)
 312                 return -ERRNO_TO_PARA_ERROR(errno);
 313         flags = fcntl(fd, F_SETFL, ((long)flags) & ~O_NONBLOCK);
 314         if (flags < 0)
 315                 return -ERRNO_TO_PARA_ERROR(errno);
 316         return 1;
 317 }
 318
 319 /**
 320  * Set a file descriptor to non-blocking mode.
 321  *
 322  * \param fd The file descriptor.
 323  *
 324  * \return Standard.
 325  */
 326 __must_check int mark_fd_nonblocking(int fd)
 327 {
 328         int flags = fcntl(fd, F_GETFL);
 329         if (flags < 0)
 330                 return -ERRNO_TO_PARA_ERROR(errno);
 331         flags = fcntl(fd, F_SETFL, ((long)flags) | O_NONBLOCK);
 332         if (flags < 0)
 333                 return -ERRNO_TO_PARA_ERROR(errno);
 334         return 1;
 335 }
 336
 337 /**
 338  * Paraslash's wrapper for mmap.
 339  *
 340  * \param length Number of bytes to mmap.
 341  * \param prot Either PROT_NONE or the bitwise OR of one or more of
 342  * PROT_EXEC PROT_READ PROT_WRITE.
 343  * \param flags Exactly one of MAP_SHARED and MAP_PRIVATE.
 344  * \param fd The file to mmap from.
 345  * \param map Result pointer.
 346  *
 347  * \return Standard.
 348  *
 349  * \sa mmap(2).
 350  */
 351 int para_mmap(size_t length, int prot, int flags, int fd, void *map)
 352 {
 353         void **m = map;
 354
 355         errno = EINVAL;
 356         if (!length)
 357                 goto err;
 358         *m = mmap(NULL, length, prot, flags, fd, (off_t)0);
 359         if (*m != MAP_FAILED)
 360                 return 1;
 361 err:
 362         *m = NULL;
 363         return -ERRNO_TO_PARA_ERROR(errno);
 364 }
 365
 366 /**
 367  * Wrapper for the open(2) system call.
 368  *
 369  * \param path The filename.
 370  * \param flags The usual open(2) flags.
 371  * \param mode Specifies the permissions to use.
 372  *
 373  * The mode parameter must be specified when O_CREAT is in the flags, and is
 374  * ignored otherwise.
 375  *
 376  * \return The file descriptor on success, negative on errors.
 377  *
 378  * \sa open(2).
 379  */
 380 int para_open(const char *path, int flags, mode_t mode)
 381 {
 382         int ret = open(path, flags, mode);
 383
 384         if (ret >= 0)
 385                 return ret;
 386         return -ERRNO_TO_PARA_ERROR(errno);
 387 }
 388
 389 /**
 390  * Create a directory, don't fail if it already exists.
 391  *
 392  * \param path Name of the directory to create.
 393  *
 394  * This function passes the fixed mode value 0777 to mkdir(3) (which consults
 395  * the file creation mask and restricts this value).
 396  *
 397  * \return Zero if the path already existed as a directory or as a symbolic
 398  * link which leads to a directory, one if the path did not exist and the
 399  * directory has been created successfully, negative error code else.
 400  */
 401 int para_mkdir(const char *path)
 402 {
 403         /*
 404          * We call opendir(3) rather than relying on stat(2) because this way
 405          * we don't need extra code to get the symlink case right.
 406          */
 407         DIR *dir = opendir(path);
 408
 409         if (dir) {
 410                 closedir(dir);
 411                 return 0;
 412         }
 413         if (errno != ENOENT)
 414                 return -ERRNO_TO_PARA_ERROR(errno);
 415         return mkdir(path, 0777) == 0? 1 : -ERRNO_TO_PARA_ERROR(errno);
 416 }
 417
 418 /**
 419  * Open a file and map it into memory.
 420  *
 421  * \param path Name of the regular file to map.
 422  * \param open_mode Either \p O_RDONLY or \p O_RDWR.
 423  * \param map On success, the mapping is returned here.
 424  * \param size size of the mapping.
 425  * \param fd_ptr The file descriptor of the mapping.
 426  *
 427  * If \a fd_ptr is \p NULL, the file descriptor resulting from the underlying
 428  * open call is closed after mmap().  Otherwise the file is kept open and the
 429  * file descriptor is returned in \a fd_ptr.
 430  *
 431  * \return Standard.
 432  *
 433  * \sa para_open(), mmap(2).
 434  */
 435 int mmap_full_file(const char *path, int open_mode, void **map,
 436                 size_t *size, int *fd_ptr)
 437 {
 438         int fd, ret, mmap_prot, mmap_flags;
 439         struct stat file_status;
 440
 441         if (open_mode == O_RDONLY) {
 442                 mmap_prot = PROT_READ;
 443                 mmap_flags = MAP_PRIVATE;
 444         } else {
 445                 mmap_prot = PROT_READ | PROT_WRITE;
 446                 mmap_flags = MAP_SHARED;
 447         }
 448         ret = para_open(path, open_mode, 0);
 449         if (ret < 0)
 450                 return ret;
 451         fd = ret;
 452         if (fstat(fd, &file_status) < 0) {
 453                 ret = -ERRNO_TO_PARA_ERROR(errno);
 454                 goto out;
 455         }
 456         *size = file_status.st_size;
 457         /*
 458          * If the file is empty, *size is zero and mmap() would return EINVAL
 459          * (Invalid argument). This error is common enough to spend an extra
 460          * error code which explicitly states the problem.
 461          */
 462         ret = -E_EMPTY;
 463         if (*size == 0)
 464                 goto out;
 465         /*
 466          * If fd refers to a directory, mmap() returns ENODEV (No such device),
 467          * at least on Linux. "Is a directory" seems to be more to the point.
 468          */
 469         ret = -ERRNO_TO_PARA_ERROR(EISDIR);
 470         if (S_ISDIR(file_status.st_mode))
 471                 goto out;
 472
 473         ret = para_mmap(*size, mmap_prot, mmap_flags, fd, map);
 474 out:
 475         if (ret < 0 || !fd_ptr)
 476                 close(fd);
 477         else
 478                 *fd_ptr = fd;
 479         return ret;
 480 }
 481
 482 /**
 483  * A wrapper for munmap(2).
 484  *
 485  * \param start The start address of the memory mapping.
 486  * \param length The size of the mapping.
 487  *
 488  * If NULL is passed as the start address, the length value is ignored and the
 489  * function does nothing.
 490  *
 491  * \return Zero if NULL was passed, one if the memory area was successfully
 492  * unmapped, a negative error code otherwise.
 493  *
 494  * \sa munmap(2), \ref mmap_full_file().
 495  */
 496 int para_munmap(void *start, size_t length)
 497 {
 498         if (!start)
 499                 return 0;
 500         if (munmap(start, length) >= 0)
 501                 return 1;
 502         return -ERRNO_TO_PARA_ERROR(errno);
 503 }
 504
 505 /**
 506  * Simple wrapper for poll(2).
 507  *
 508  * It calls poll(2) and starts over if the call was interrupted by a signal.
 509  *
 510  * \param fds See poll(2).
 511  * \param nfds See poll(2).
 512  * \param timeout See poll(2).
 513  *
 514  * \return The return value of the underlying poll() call on success, the
 515  * negative paraslash error code on errors.
 516  *
 517  * All arguments are passed verbatim to poll(2).
 518  */
 519 int xpoll(struct pollfd *fds, nfds_t nfds, int timeout)
 520 {
 521         int ret;
 522
 523         do
 524                 ret = poll(fds, nfds, timeout);
 525         while (ret < 0 && errno == EINTR);
 526         return ret < 0? -ERRNO_TO_PARA_ERROR(errno) : ret;
 527 }
 528
 529 /**
 530  * Check a file descriptor for readability.
 531  *
 532  * \param fd The file descriptor.
 533  *
 534  * \return positive if fd is ready for reading, zero if it isn't, negative if
 535  * an error occurred.
 536  *
 537  * \sa \ref write_ok().
 538  */
 539 int read_ok(int fd)
 540 {
 541         struct pollfd pfd = {.fd = fd, .events = POLLIN};
 542         int ret = xpoll(&pfd, 1, 0);
 543         return ret < 0? ret : pfd.revents & POLLIN;
 544 }
 545
 546 /**
 547  * Check a file descriptor for writability.
 548  *
 549  * \param fd The file descriptor.
 550  *
 551  * \return positive if fd is ready for writing, zero if it isn't, negative if
 552  * an error occurred.
 553  *
 554  * \sa \ref read_ok().
 555  */
 556 int write_ok(int fd)
 557 {
 558         struct pollfd pfd = {.fd = fd, .events = POLLOUT};
 559         int ret = xpoll(&pfd, 1, 0);
 560         return ret < 0? ret : pfd.revents & POLLOUT;
 561 }
 562
 563 /**
 564  * Ensure that file descriptors 0, 1, and 2 are valid.
 565  *
 566  * Common approach that opens /dev/null until it gets a file descriptor greater
 567  * than two.
 568  */
 569 void valid_fd_012(void)
 570 {
 571         while (1) {
 572                 int fd = open("/dev/null", O_RDWR);
 573                 if (fd < 0)
 574                         exit(EXIT_FAILURE);
 575                 if (fd > 2) {
 576                         close(fd);
 577                         break;
 578                 }
 579         }
 580 }