string.c

   1 /* Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
   2
   3 /** \file string.c Memory allocation and string handling functions. */
   4
   5 #include "para.h"
   6
   7 #include <pwd.h>
   8 #include <sys/utsname.h> /* uname() */
   9 #include <regex.h>
  10 #include <langinfo.h>
  11 #include <wchar.h>
  12 #include <wctype.h>
  13
  14 #include "string.h"
  15 #include "error.h"
  16
  17 /**
  18  * Paraslash's version of realloc().
  19  *
  20  * \param p Pointer to the memory block, may be \p NULL.
  21  * \param size The desired new size.
  22  *
  23  * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
  24  * i.e. there is no need to check the return value in the caller.
  25  *
  26  * \return A pointer to newly allocated memory which is suitably aligned for
  27  * any kind of variable and may be different from \a p.
  28  *
  29  * \sa realloc(3).
  30  */
  31 __must_check void *para_realloc(void *p, size_t size)
  32 {
  33         /*
  34          * No need to check for NULL pointers: If p is NULL, the call
  35          * to realloc is equivalent to malloc(size)
  36          */
  37         assert(size);
  38         if (!(p = realloc(p, size))) {
  39                 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
  40                         size);
  41                 exit(EXIT_FAILURE);
  42         }
  43         return p;
  44 }
  45
  46 /**
  47  * Paraslash's version of malloc().
  48  *
  49  * \param size The desired new size.
  50  *
  51  * A wrapper for malloc(3) which exits on errors.
  52  *
  53  * \return A pointer to the allocated memory, which is suitably aligned for any
  54  * kind of variable.
  55  *
  56  * \sa malloc(3).
  57  */
  58 __must_check __malloc void *para_malloc(size_t size)
  59 {
  60         void *p;
  61
  62         assert(size);
  63         p = malloc(size);
  64         if (!p) {
  65                 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
  66                         size);
  67                 exit(EXIT_FAILURE);
  68         }
  69         return p;
  70 }
  71
  72 /**
  73  * Paraslash's version of calloc().
  74  *
  75  * \param size The desired new size.
  76  *
  77  * A wrapper for calloc(3) which exits on errors.
  78  *
  79  * \return A pointer to the allocated and zeroed-out memory, which is suitably
  80  * aligned for any kind of variable.
  81  *
  82  * \sa calloc(3)
  83  */
  84 __must_check __malloc void *para_calloc(size_t size)
  85 {
  86         void *ret = para_malloc(size);
  87
  88         memset(ret, 0, size);
  89         return ret;
  90 }
  91
  92 /**
  93  * Paraslash's version of strdup().
  94  *
  95  * \param s The string to be duplicated.
  96  *
  97  * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
  98  * there is no need to check the return value in the caller.
  99  *
 100  * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
 101  * an pointer to an empty string is returned.
 102  *
 103  * \sa strdup(3)
 104  */
 105 __must_check __malloc char *para_strdup(const char *s)
 106 {
 107         char *ret;
 108
 109         if ((ret = strdup(s? s: "")))
 110                 return ret;
 111         PARA_EMERG_LOG("strdup failed, aborting\n");
 112         exit(EXIT_FAILURE);
 113 }
 114
 115 /**
 116  * Print a formatted message to a dynamically allocated string.
 117  *
 118  * \param result The formatted string is returned here.
 119  * \param fmt The format string.
 120  * \param ap Initialized list of arguments.
 121  *
 122  * This function is similar to vasprintf(), a GNU extension which is not in C
 123  * or POSIX. It allocates a string large enough to hold the output including
 124  * the terminating null byte. The allocated string is returned via the first
 125  * argument and must be freed by the caller. However, unlike vasprintf(), this
 126  * function calls exit() if insufficient memory is available, while vasprintf()
 127  * returns -1 in this case.
 128  *
 129  * \return Number of bytes written, not including the terminating \p NULL
 130  * character.
 131  *
 132  * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
 133  */
 134 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
 135 {
 136         int ret;
 137         size_t size = 150;
 138         va_list aq;
 139
 140         *result = para_malloc(size + 1);
 141         va_copy(aq, ap);
 142         ret = vsnprintf(*result, size, fmt, aq);
 143         va_end(aq);
 144         assert(ret >= 0);
 145         if (ret < size) /* OK */
 146                 return ret;
 147         size = ret + 1;
 148         *result = para_realloc(*result, size);
 149         va_copy(aq, ap);
 150         ret = vsnprintf(*result, size, fmt, aq);
 151         va_end(aq);
 152         assert(ret >= 0 && ret < size);
 153         return ret;
 154 }
 155
 156 /**
 157  * Print to a dynamically allocated string, variable number of arguments.
 158  *
 159  * \param result See \ref xvasprintf().
 160  * \param fmt Usual format string.
 161  *
 162  * \return The return value of the underlying call to \ref xvasprintf().
 163  *
 164  * \sa \ref xvasprintf() and the references mentioned there.
 165  */
 166 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
 167 {
 168         va_list ap;
 169         unsigned ret;
 170
 171         va_start(ap, fmt);
 172         ret = xvasprintf(result, fmt, ap);
 173         va_end(ap);
 174         return ret;
 175 }
 176
 177 /**
 178  * Allocate a sufficiently large string and print into it.
 179  *
 180  * \param fmt A usual format string.
 181  *
 182  * Produce output according to \p fmt. No artificial bound on the length of the
 183  * resulting string is imposed.
 184  *
 185  * \return This function either returns a pointer to a string that must be
 186  * freed by the caller or aborts without returning.
 187  *
 188  * \sa printf(3), \ref xasprintf().
 189  */
 190 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
 191 {
 192         char *msg;
 193         va_list ap;
 194
 195         va_start(ap, fmt);
 196         xvasprintf(&msg, fmt, ap);
 197         va_end(ap);
 198         return msg;
 199 }
 200
 201 /**
 202  * Free the content of a pointer and set it to NULL.
 203  *
 204  * \param arg A pointer to the pointer whose content should be freed.
 205  *
 206  * If arg is NULL, the function returns immediately. Otherwise it frees the
 207  * memory pointed to by *arg and sets *arg to NULL. Hence callers have to pass
 208  * the *address* of the pointer variable that points to the memory which should
 209  * be freed.
 210  */
 211 void freep(void *arg)
 212 {
 213         if (arg) {
 214                 void **ptr = arg;
 215                 free(*ptr);
 216                 *ptr = NULL;
 217         }
 218 }
 219
 220 /**
 221  * Paraslash's version of strcat().
 222  *
 223  * \param a String to be appended to.
 224  * \param b String to append.
 225  *
 226  * Append \p b to \p a.
 227  *
 228  * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
 229  * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
 230  * return \a a without making a copy of \a a.  Otherwise, construct the
 231  * concatenation \a c, free \a a (but not \a b) and return \a c.
 232  *
 233  * \sa strcat(3).
 234  */
 235 __must_check __malloc char *para_strcat(char *a, const char *b)
 236 {
 237         char *tmp;
 238
 239         if (!a)
 240                 return para_strdup(b);
 241         if (!b)
 242                 return a;
 243         tmp = make_message("%s%s", a, b);
 244         free(a);
 245         return tmp;
 246 }
 247
 248 /**
 249  * Paraslash's version of dirname().
 250  *
 251  * \param name Pointer to the full path.
 252  *
 253  * Compute the directory component of \p name.
 254  *
 255  * \return If \a name is \p NULL or the empty string, return \p NULL.
 256  * Otherwise, Make a copy of \a name and return its directory component. Caller
 257  * is responsible to free the result.
 258  */
 259 __must_check __malloc char *para_dirname(const char *name)
 260 {
 261         char *p, *ret;
 262
 263         if (!name || !*name)
 264                 return NULL;
 265         ret = para_strdup(name);
 266         p = strrchr(ret, '/');
 267         if (!p)
 268                 *ret = '\0';
 269         else
 270                 *p = '\0';
 271         return ret;
 272 }
 273
 274 /**
 275  * Paraslash's version of basename().
 276  *
 277  * \param name Pointer to the full path.
 278  *
 279  * Compute the filename component of \a name.
 280  *
 281  * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
 282  * ends with a slash.  Otherwise, a pointer within \a name is returned.  Caller
 283  * must not free the result.
 284  */
 285 __must_check char *para_basename(const char *name)
 286 {
 287         char *ret;
 288
 289         if (!name || !*name)
 290                 return NULL;
 291         ret = strrchr(name, '/');
 292         if (!ret)
 293                 return (char *)name;
 294         ret++;
 295         return ret;
 296 }
 297
 298 /**
 299  * Get the logname of the current user.
 300  *
 301  * \return A dynamically allocated string that must be freed by the caller. On
 302  * errors, the string "unknown_user" is returned, i.e. this function never
 303  * returns \p NULL.
 304  *
 305  * \sa getpwuid(3).
 306  */
 307 __must_check __malloc char *para_logname(void)
 308 {
 309         struct passwd *pw = getpwuid(getuid());
 310         return para_strdup(pw? pw->pw_name : "unknown_user");
 311 }
 312
 313 /**
 314  * Get the home directory of the current user.
 315  *
 316  * \return A dynamically allocated string that must be freed by the caller. If
 317  * the home directory could not be found, this function returns "/tmp".
 318  */
 319 __must_check __malloc char *para_homedir(void)
 320 {
 321         struct passwd *pw = getpwuid(getuid());
 322         return para_strdup(pw? pw->pw_dir : "/tmp");
 323 }
 324
 325 /**
 326  * Get the own hostname.
 327  *
 328  * \return A dynamically allocated string containing the hostname.
 329  *
 330  * \sa uname(2).
 331  */
 332 __malloc char *para_hostname(void)
 333 {
 334         struct utsname u;
 335
 336         uname(&u);
 337         return para_strdup(u.nodename);
 338 }
 339
 340 /**
 341  * Call a custom function for each complete line.
 342  *
 343  * \param flags Any combination of flags defined in \ref for_each_line_flags.
 344  * \param buf The buffer containing data separated by newlines.
 345  * \param size The number of bytes in \a buf.
 346  * \param line_handler The custom function.
 347  * \param private_data Pointer passed to \a line_handler.
 348  *
 349  * For each complete line in \p buf, \p line_handler is called. The first
 350  * argument to \p line_handler is (a copy of) the current line, and \p
 351  * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
 352  * flag is unset, a pointer into \a buf is passed to the line handler,
 353  * otherwise a pointer to a copy of the current line is passed instead. This
 354  * copy is freed immediately after the line handler returns.
 355  *
 356  * The function returns if \p line_handler returns a negative value or no more
 357  * lines are in the buffer.  The rest of the buffer (last chunk containing an
 358  * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
 359  * unset.
 360  *
 361  * \return On success this function returns the number of bytes not handled to
 362  * \p line_handler. The only possible error is a negative return value from the
 363  * line handler. In this case processing stops and the return value of the line
 364  * handler is returned to indicate failure.
 365  *
 366  * \sa \ref for_each_line_flags.
 367  */
 368 int for_each_line(unsigned flags, char *buf, size_t size,
 369                 line_handler_t *line_handler, void *private_data)
 370 {
 371         char *start = buf, *end;
 372         int ret, i, num_lines = 0;
 373
 374 //      PARA_NOTICE_LOG("buf: %s\n", buf);
 375         while (start < buf + size) {
 376                 char *next_null;
 377                 char *next_cr;
 378
 379                 next_cr = memchr(start, '\n', buf + size - start);
 380                 next_null = memchr(start, '\0', next_cr?
 381                         next_cr - start : buf + size - start);
 382                 if (!next_cr && !next_null)
 383                         break;
 384                 if (next_null)
 385                         end = next_null;
 386                 else
 387                         end = next_cr;
 388                 num_lines++;
 389                 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
 390                         if (flags & FELF_READ_ONLY) {
 391                                 size_t s = end - start;
 392                                 char *b = para_malloc(s + 1);
 393                                 memcpy(b, start, s);
 394                                 b[s] = '\0';
 395                                 ret = line_handler(b, private_data);
 396                                 free(b);
 397                         } else {
 398                                 *end = '\0';
 399                                 ret = line_handler(start, private_data);
 400                         }
 401                         if (ret < 0)
 402                                 return ret;
 403                 }
 404                 start = ++end;
 405         }
 406         i = buf + size - start;
 407         if (i && i != size && !(flags & FELF_READ_ONLY))
 408                 memmove(buf, start, i);
 409         return i;
 410 }
 411
 412 /** Return the hex characters of the lower 4 bits. */
 413 #define hex(a) (hexchar[(a) & 15])
 414
 415 static void write_size_header(char *buf, int n)
 416 {
 417         static char hexchar[] = "0123456789abcdef";
 418
 419         buf[0] = hex(n >> 12);
 420         buf[1] = hex(n >> 8);
 421         buf[2] = hex(n >> 4);
 422         buf[3] = hex(n);
 423         buf[4] = ' ';
 424 }
 425
 426 /**
 427  * Read a four-byte hex-number and return its value.
 428  *
 429  * Each status item sent by para_server is prefixed with such a hex number in
 430  * ASCII which describes the size of the status item.
 431  *
 432  * \param buf The buffer which must be at least four bytes long.
 433  *
 434  * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
 435  * buffer did not contain only hex digits.
 436  */
 437 int read_size_header(const char *buf)
 438 {
 439         int i, len = 0;
 440
 441         for (i = 0; i < 4; i++) {
 442                 unsigned char c = buf[i];
 443                 len <<= 4;
 444                 if (c >= '0' && c <= '9') {
 445                         len += c - '0';
 446                         continue;
 447                 }
 448                 if (c >= 'a' && c <= 'f') {
 449                         len += c - 'a' + 10;
 450                         continue;
 451                 }
 452                 return -E_SIZE_PREFIX;
 453         }
 454         if (buf[4] != ' ')
 455                 return -E_SIZE_PREFIX;
 456         return len;
 457 }
 458
 459 /**
 460  * Safely print into a buffer at a given offset.
 461  *
 462  * \param b Determines the buffer, its size, and the offset.
 463  * \param fmt The format string.
 464  *
 465  * This function prints into the buffer given by \a b at the offset which is
 466  * also given by \a b. If there is not enough space to hold the result, the
 467  * buffer size is doubled until the underlying call to vsnprintf() succeeds
 468  * or the size of the buffer exceeds the maximal size specified in \a b.
 469  *
 470  * In the latter case the unmodified \a buf and \a offset values as well as the
 471  * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
 472  * If this function succeeds, i.e. returns a non-negative value, the offset of
 473  * \a b is reset to zero and the given data is written to the beginning of the
 474  * buffer. If \a max_size_handler() returns a negative value, this value is
 475  * returned by \a para_printf().
 476  *
 477  * Upon return, the offset of \a b is adjusted accordingly so that subsequent
 478  * calls to this function append data to what is already contained in the
 479  * buffer.
 480  *
 481  * It's OK to call this function with \p b->buf being \p NULL. In this case, an
 482  * initial buffer is allocated.
 483  *
 484  * \return The number of bytes printed into the buffer (not including the
 485  * terminating \p NULL byte) on success, negative on errors. If there is no
 486  * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
 487  * fails.
 488  *
 489  * \sa make_message(), vsnprintf(3).
 490  */
 491 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
 492 {
 493         int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
 494
 495         if (!b->buf) {
 496                 b->buf = para_malloc(128);
 497                 b->size = 128;
 498                 b->offset = 0;
 499         }
 500         while (1) {
 501                 char *p = b->buf + b->offset;
 502                 size_t size = b->size - b->offset;
 503                 va_list ap;
 504
 505                 if (size > sz_off) {
 506                         va_start(ap, fmt);
 507                         ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
 508                         va_end(ap);
 509                         if (ret > -1 && ret < size - sz_off) { /* success */
 510                                 b->offset += ret + sz_off;
 511                                 if (sz_off)
 512                                         write_size_header(p, ret);
 513                                 return ret + sz_off;
 514                         }
 515                 }
 516                 /* check if we may grow the buffer */
 517                 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
 518                         /* try again with more space */
 519                         b->size *= 2;
 520                         b->buf = para_realloc(b->buf, b->size);
 521                         continue;
 522                 }
 523                 /* can't grow buffer */
 524                 if (!b->offset || !b->max_size_handler) /* message too large */
 525                         return -ERRNO_TO_PARA_ERROR(ENOSPC);
 526                 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
 527                 if (ret < 0)
 528                         return ret;
 529                 b->offset = 0;
 530         }
 531 }
 532
 533 /** \cond llong_minmax */
 534 /* LLONG_MAX and LLONG_MIN might not be defined. */
 535 #ifndef LLONG_MAX
 536 #define LLONG_MAX 9223372036854775807LL
 537 #endif
 538 #ifndef LLONG_MIN
 539 #define LLONG_MIN (-LLONG_MAX - 1LL)
 540 #endif
 541 /** \endcond llong_minmax */
 542
 543 /**
 544  * Convert a string to a 64-bit signed integer value.
 545  *
 546  * \param str The string to be converted.
 547  * \param value Result pointer.
 548  *
 549  * \return Standard.
 550  *
 551  * \sa \ref para_atoi32(), strtol(3), atoi(3).
 552  */
 553 int para_atoi64(const char *str, int64_t *value)
 554 {
 555         char *endptr;
 556         long long tmp;
 557
 558         errno = 0; /* To distinguish success/failure after call */
 559         tmp = strtoll(str, &endptr, 10);
 560         if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
 561                 return -E_ATOI_OVERFLOW;
 562         /*
 563          * If there were no digits at all, strtoll() stores the original value
 564          * of str in *endptr.
 565          */
 566         if (endptr == str)
 567                 return -E_ATOI_NO_DIGITS;
 568         /*
 569          * The implementation may also set errno and return 0 in case no
 570          * conversion was performed.
 571          */
 572         if (errno != 0 && tmp == 0)
 573                 return -E_ATOI_NO_DIGITS;
 574         if (*endptr != '\0') /* Further characters after number */
 575                 return -E_ATOI_JUNK_AT_END;
 576         *value = tmp;
 577         return 1;
 578 }
 579
 580 /**
 581  * Convert a string to a 32-bit signed integer value.
 582  *
 583  * \param str The string to be converted.
 584  * \param value Result pointer.
 585  *
 586  * \return Standard.
 587  *
 588  * \sa \ref para_atoi64().
 589 */
 590 int para_atoi32(const char *str, int32_t *value)
 591 {
 592         int64_t tmp;
 593         int ret;
 594         const int32_t max = 2147483647;
 595
 596         ret = para_atoi64(str, &tmp);
 597         if (ret < 0)
 598                 return ret;
 599         if (tmp > max || tmp < -max - 1)
 600                 return -E_ATOI_OVERFLOW;
 601         *value = tmp;
 602         return 1;
 603 }
 604
 605 static int get_next_word(const char *buf, const char *delim, char **word)
 606 {
 607         enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
 608                 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
 609         const char *in;
 610         char *out;
 611         int ret, state = 0;
 612
 613         out = para_malloc(strlen(buf) + 1);
 614         *out = '\0';
 615         *word = out;
 616         for (in = buf; *in; in++) {
 617                 const char *p;
 618
 619                 switch (*in) {
 620                 case '\\':
 621                         if (state & LSF_BACKSLASH) /* \\ */
 622                                 goto copy_char;
 623                         state |= LSF_BACKSLASH;
 624                         state |= LSF_HAVE_WORD;
 625                         continue;
 626                 case 'n':
 627                 case 't':
 628                         if (state & LSF_BACKSLASH) { /* \n or \t */
 629                                 *out++ = (*in == 'n')? '\n' : '\t';
 630                                 state &= ~LSF_BACKSLASH;
 631                                 continue;
 632                         }
 633                         goto copy_char;
 634                 case '"':
 635                         if (state & LSF_BACKSLASH) /* \" */
 636                                 goto copy_char;
 637                         if (state & LSF_SINGLE_QUOTE) /* '" */
 638                                 goto copy_char;
 639                         if (state & LSF_DOUBLE_QUOTE) {
 640                                 state &= ~LSF_DOUBLE_QUOTE;
 641                                 continue;
 642                         }
 643                         state |= LSF_HAVE_WORD;
 644                         state |= LSF_DOUBLE_QUOTE;
 645                         continue;
 646                 case '\'':
 647                         if (state & LSF_BACKSLASH) /* \' */
 648                                 goto copy_char;
 649                         if (state & LSF_DOUBLE_QUOTE) /* "' */
 650                                 goto copy_char;
 651                         if (state & LSF_SINGLE_QUOTE) {
 652                                 state &= ~LSF_SINGLE_QUOTE;
 653                                 continue;
 654                         }
 655                         state |= LSF_HAVE_WORD;
 656                         state |= LSF_SINGLE_QUOTE;
 657                         continue;
 658                 }
 659                 for (p = delim; *p; p++) {
 660                         if (*in != *p)
 661                                 continue;
 662                         if (state & LSF_BACKSLASH)
 663                                 goto copy_char;
 664                         if (state & LSF_SINGLE_QUOTE)
 665                                 goto copy_char;
 666                         if (state & LSF_DOUBLE_QUOTE)
 667                                 goto copy_char;
 668                         if (state & LSF_HAVE_WORD)
 669                                 goto success;
 670                         break;
 671                 }
 672                 if (*p) /* ignore delimiter at the beginning */
 673                         continue;
 674 copy_char:
 675                 state |= LSF_HAVE_WORD;
 676                 *out++ = *in;
 677                 state &= ~LSF_BACKSLASH;
 678         }
 679         ret = 0;
 680         if (!(state & LSF_HAVE_WORD))
 681                 goto out;
 682         ret = -ERRNO_TO_PARA_ERROR(EINVAL);
 683         if (state & LSF_BACKSLASH) {
 684                 PARA_ERROR_LOG("trailing backslash\n");
 685                 goto out;
 686         }
 687         if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
 688                 PARA_ERROR_LOG("unmatched quote character\n");
 689                 goto out;
 690         }
 691 success:
 692         *out = '\0';
 693         return in - buf;
 694 out:
 695         free(*word);
 696         *word = NULL;
 697         return ret;
 698 }
 699
 700 /**
 701  * Get the number of the word the cursor is on.
 702  *
 703  * \param buf The zero-terminated line buffer.
 704  * \param delim Characters that separate words.
 705  * \param point The cursor position.
 706  *
 707  * \return Zero-based word number.
 708  */
 709 int compute_word_num(const char *buf, const char *delim, int point)
 710 {
 711         int ret, num_words;
 712         const char *p;
 713         char *word;
 714
 715         for (p = buf, num_words = 0; ; p += ret, num_words++) {
 716                 ret = get_next_word(p, delim, &word);
 717                 if (ret <= 0)
 718                         break;
 719                 free(word);
 720                 if (p + ret >= buf + point)
 721                         break;
 722         }
 723         return num_words;
 724 }
 725
 726 /**
 727  * Free an array of words created by create_argv() or create_shifted_argv().
 728  *
 729  * \param argv A pointer previously obtained by \ref create_argv().
 730  */
 731 void free_argv(char **argv)
 732 {
 733         int i;
 734
 735         if (!argv)
 736                 return;
 737         for (i = 0; argv[i]; i++)
 738                 free(argv[i]);
 739         free(argv);
 740 }
 741
 742 static int create_argv_offset(int offset, const char *buf, const char *delim,
 743                 char ***result)
 744 {
 745         char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
 746         const char *p;
 747         int i, ret;
 748
 749         for (i = 0; i < offset; i++)
 750                 argv[i] = NULL;
 751         for (p = buf; p && *p; p += ret, i++) {
 752                 ret = get_next_word(p, delim, &word);
 753                 if (ret < 0)
 754                         goto err;
 755                 if (!ret)
 756                         break;
 757                 argv = para_realloc(argv, (i + 2) * sizeof(char*));
 758                 argv[i] = word;
 759         }
 760         argv[i] = NULL;
 761         *result = argv;
 762         return i;
 763 err:
 764         while (i > 0)
 765                 free(argv[--i]);
 766         free(argv);
 767         *result = NULL;
 768         return ret;
 769 }
 770
 771 /**
 772  * Split a buffer into words.
 773  *
 774  * This parser honors single and double quotes, backslash-escaped characters
 775  * and special characters like \\n. The result contains pointers to copies of
 776  * the words contained in buf and has to be freed by using \ref free_argv().
 777  *
 778  * \param buf The buffer to be split.
 779  * \param delim Each character in this string is treated as a separator.
 780  * \param result The array of words is returned here.
 781  *
 782  * It's OK to pass NULL as the buffer argument. This is equivalent to passing
 783  * the empty string.
 784  *
 785  * \return Number of words in buf, negative on errors. The array returned
 786  * through the result pointer is NULL terminated.
 787  */
 788 int create_argv(const char *buf, const char *delim, char ***result)
 789 {
 790         return create_argv_offset(0, buf, delim, result);
 791 }
 792
 793 /**
 794  * Split a buffer into words, offset one.
 795  *
 796  * This is similar to \ref create_argv() but the returned array is one element
 797  * larger, words start at index one and element zero is initialized to \p NULL.
 798  * Callers must set element zero to a non-NULL value before calling free_argv()
 799  * on the returned array to avoid a memory leak.
 800  *
 801  * \param buf See \ref create_argv().
 802  * \param delim See \ref create_argv().
 803  * \param result See \ref create_argv().
 804  *
 805  * \return Number of words plus one on success, negative on errors.
 806  */
 807 int create_shifted_argv(const char *buf, const char *delim, char ***result)
 808 {
 809         return create_argv_offset(1, buf, delim, result);
 810 }
 811
 812 /**
 813  * Find out if the given string is contained in the arg vector.
 814  *
 815  * \param arg The string to look for.
 816  * \param argv The array to search.
 817  *
 818  * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
 819  * arg was not found in \a argv.
 820  */
 821 int find_arg(const char *arg, char **argv)
 822 {
 823         int i;
 824
 825         if (!argv)
 826                 return -E_ARG_NOT_FOUND;
 827         for (i = 0; argv[i]; i++)
 828                 if (strcmp(arg, argv[i]) == 0)
 829                         return i;
 830         return -E_ARG_NOT_FOUND;
 831 }
 832
 833 /**
 834  * Compile a regular expression.
 835  *
 836  * This simple wrapper calls regcomp() and logs a message on errors.
 837  *
 838  * \param preg See regcomp(3).
 839  * \param regex See regcomp(3).
 840  * \param cflags See regcomp(3).
 841  *
 842  * \return Standard.
 843  */
 844 int para_regcomp(regex_t *preg, const char *regex, int cflags)
 845 {
 846         char *buf;
 847         size_t size;
 848         int ret = regcomp(preg, regex, cflags);
 849
 850         if (ret == 0)
 851                 return 1;
 852         size = regerror(ret, preg, NULL, 0);
 853         buf = para_malloc(size);
 854         regerror(ret, preg, buf, size);
 855         PARA_ERROR_LOG("%s\n", buf);
 856         free(buf);
 857         return -E_REGEX;
 858 }
 859
 860 /**
 861  * strdup() for not necessarily zero-terminated strings.
 862  *
 863  * \param src The source buffer.
 864  * \param len The number of bytes to be copied.
 865  *
 866  * \return A 0-terminated buffer of length \a len + 1.
 867  *
 868  * This is similar to strndup(), which is a GNU extension. However, one
 869  * difference is that strndup() returns \p NULL if insufficient memory was
 870  * available while this function aborts in this case.
 871  *
 872  * \sa strdup(), \ref para_strdup().
 873  */
 874 char *safe_strdup(const char *src, size_t len)
 875 {
 876         char *p;
 877
 878         assert(len < (size_t)-1);
 879         p = para_malloc(len + 1);
 880         if (len > 0)
 881                 memcpy(p, src, len);
 882         p[len] = '\0';
 883         return p;
 884 }
 885
 886 /**
 887  * Copy the value of a key=value pair.
 888  *
 889  * This checks whether the given buffer starts with "key=", ignoring case. If
 890  * yes, a copy of the value is returned. The source buffer may not be
 891  * zero-terminated.
 892  *
 893  * \param src The source buffer.
 894  * \param len The number of bytes of the tag.
 895  * \param key Only copy if it is the value of this key.
 896  *
 897  * \return A zero-terminated buffer, or \p NULL if the key was
 898  * not of the given type.
 899  */
 900 char *key_value_copy(const char *src, size_t len, const char *key)
 901 {
 902         int keylen = strlen(key);
 903
 904         if (len <= keylen)
 905                 return NULL;
 906         if (strncasecmp(src, key, keylen))
 907                 return NULL;
 908         if (src[keylen] != '=')
 909                 return NULL;
 910         return safe_strdup(src + keylen + 1, len - keylen - 1);
 911 }
 912
 913 static bool utf8_mode(void)
 914 {
 915         static bool initialized, have_utf8;
 916
 917         if (!initialized) {
 918                 char *info = nl_langinfo(CODESET);
 919                 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
 920                 initialized = true;
 921                 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
 922                         have_utf8? "" : "not ");
 923         }
 924         return have_utf8;
 925 }
 926
 927 static int xwcwidth(wchar_t wc, size_t pos)
 928 {
 929         int n;
 930
 931         /* special-case for tab */
 932         if (wc == 0x09) /* tab */
 933                 return (pos | 7) + 1 - pos;
 934         n = wcwidth(wc);
 935         /* wcswidth() returns -1 for non-printable characters */
 936         return n >= 0? n : 1;
 937 }
 938
 939 static size_t xwcswidth(const wchar_t *s, size_t n)
 940 {
 941         size_t w = 0;
 942
 943         while (n--)
 944                 w += xwcwidth(*s++, w);
 945         return w;
 946 }
 947
 948 /**
 949  * Skip a given number of cells at the beginning of a string.
 950  *
 951  * \param s The input string.
 952  * \param cells_to_skip Desired number of cells that should be skipped.
 953  * \param bytes_to_skip Result.
 954  *
 955  * This function computes how many input bytes must be skipped to advance a
 956  * string by the given width. If the current character encoding is not UTF-8,
 957  * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
 958  * \a s is treated as a multibyte string and on successful return, \a s +
 959  * bytes_to_skip points to the start of a multibyte string such that the total
 960  * width of the multibyte characters that are skipped by advancing \a s that
 961  * many bytes equals at least \a cells_to_skip.
 962  *
 963  * \return Standard.
 964  */
 965 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
 966 {
 967         wchar_t wc;
 968         mbstate_t ps;
 969         size_t n, bytes_parsed, cells_skipped;
 970
 971         *bytes_to_skip = 0;
 972         if (cells_to_skip == 0)
 973                 return 0;
 974         if (!utf8_mode()) {
 975                 *bytes_to_skip = cells_to_skip;
 976                 return 0;
 977         }
 978         bytes_parsed = cells_skipped = 0;
 979         memset(&ps, 0, sizeof(ps));
 980         n = strlen(s);
 981         while (cells_to_skip > cells_skipped) {
 982                 size_t mbret;
 983
 984                 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
 985                 assert(mbret != 0);
 986                 if (mbret == (size_t)-1 || mbret == (size_t)-2)
 987                         return -ERRNO_TO_PARA_ERROR(EILSEQ);
 988                 bytes_parsed += mbret;
 989                 cells_skipped += xwcwidth(wc, cells_skipped);
 990         }
 991         *bytes_to_skip = bytes_parsed;
 992         return 1;
 993 }
 994
 995 /**
 996  * Compute the width of an UTF-8 string.
 997  *
 998  * \param s The string.
 999  * \param result The width of \a s is returned here.
1000  *
1001  * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1002  * Otherwise \a s is treated as an UTF-8 string and its display width is
1003  * computed. Note that this function may fail if the underlying call to
1004  * mbsrtowcs(3) fails, so the caller must check the return value.
1005  *
1006  * \sa nl_langinfo(3), wcswidth(3).
1007  *
1008  * \return Standard.
1009  */
1010 __must_check int strwidth(const char *s, size_t *result)
1011 {
1012         const char *src = s;
1013         mbstate_t state;
1014         static wchar_t *dest;
1015         size_t num_wchars;
1016
1017         /*
1018          * Never call any log function here. This may result in an endless loop
1019          * as para_gui's para_log() calls this function.
1020          */
1021
1022         if (!utf8_mode()) {
1023                 *result = strlen(s);
1024                 return 0;
1025         }
1026         memset(&state, 0, sizeof(state));
1027         *result = 0;
1028         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1029         if (num_wchars == (size_t)-1)
1030                 return -ERRNO_TO_PARA_ERROR(errno);
1031         if (num_wchars == 0)
1032                 return 0;
1033         dest = para_malloc((num_wchars + 1) * sizeof(*dest));
1034         src = s;
1035         memset(&state, 0, sizeof(state));
1036         num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1037         assert(num_wchars > 0 && num_wchars != (size_t)-1);
1038         *result = xwcswidth(dest, num_wchars);
1039         free(dest);
1040         return 1;
1041 }
1042
1043 /**
1044  * Truncate and sanitize a (wide character) string.
1045  *
1046  * This replaces all non-printable characters by spaces and makes sure that the
1047  * modified string does not exceed the given maximal width.
1048  *
1049  * \param src The source string in multi-byte form.
1050  * \param max_width The maximal number of cells the result may occupy.
1051  * \param result Sanitized multi-byte string, must be freed by caller.
1052  * \param width The width of the sanitized string, always <= max_width.
1053  *
1054  * The function is wide-character aware but falls back to C strings for
1055  * non-UTF-8 locales.
1056  *
1057  * \return Standard. On success, *result points to a sanitized copy of the
1058  * given string. This copy was allocated with malloc() and should hence be
1059  * freed when the caller is no longer interested in the result.
1060  *
1061  * The function fails if the given string contains an invalid multibyte
1062  * sequence. In this case, *result is set to NULL, and *width to zero.
1063  */
1064 __must_check int sanitize_str(const char *src, size_t max_width,
1065                 char **result, size_t *width)
1066 {
1067         mbstate_t state;
1068         static wchar_t *wcs;
1069         size_t num_wchars, n;
1070
1071         if (!utf8_mode()) {
1072                 *result = para_strdup(src);
1073                 /* replace non-printable characters by spaces */
1074                 for (n = 0; n < max_width && src[n]; n++) {
1075                         if (!isprint((unsigned char)src[n]))
1076                                 (*result)[n] = ' ';
1077                 }
1078                 (*result)[n] = '\0';
1079                 *width = n;
1080                 return 0;
1081         }
1082         *result = NULL;
1083         *width = 0;
1084         memset(&state, 0, sizeof(state));
1085         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1086         if (num_wchars == (size_t)-1)
1087                 return -ERRNO_TO_PARA_ERROR(errno);
1088         wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
1089         memset(&state, 0, sizeof(state));
1090         num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1091         assert(num_wchars != (size_t)-1);
1092         for (n = 0; n < num_wchars && *width < max_width; n++) {
1093                 if (!iswprint(wcs[n]))
1094                         wcs[n] = L' ';
1095                 *width += xwcwidth(wcs[n], *width);
1096         }
1097         wcs[n] = L'\0';
1098         n = wcstombs(NULL, wcs, 0) + 1;
1099         *result = para_malloc(n);
1100         num_wchars = wcstombs(*result, wcs, n);
1101         assert(num_wchars != (size_t)-1);
1102         free(wcs);
1103         return 1;
1104 }