string.c

   1 /* Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
   2
   3 /** \file string.c Memory allocation and string handling functions. */
   4
   5 #include "para.h"
   6
   7 #include <pwd.h>
   8 #include <sys/utsname.h> /* uname() */
   9 #include <regex.h>
  10 #include <langinfo.h>
  11 #include <wchar.h>
  12 #include <wctype.h>
  13
  14 #include "string.h"
  15 #include "error.h"
  16
  17 /**
  18  * Reallocate an array, abort on failure or bugs.
  19  *
  20  * \param ptr Pointer to the memory block, may be NULL.
  21  * \param nmemb Number of elements.
  22  * \param size The size of one element in bytes.
  23  *
  24  * A wrapper for realloc(3) which aborts on invalid arguments or integer
  25  * overflow. The wrapper also terminates the current process on allocation
  26  * errors, so the caller does not need to check for failure.
  27  *
  28  * \return A pointer to newly allocated memory which is suitably aligned for
  29  * any kind of variable and may be different from ptr.
  30  *
  31  * \sa realloc(3).
  32  */
  33 __must_check void *arr_realloc(void *ptr, size_t nmemb, size_t size)
  34 {
  35         size_t pr;
  36
  37         assert(size > 0);
  38         assert(nmemb > 0);
  39         assert(!__builtin_mul_overflow(nmemb, size, &pr));
  40         assert(pr != 0);
  41         ptr = realloc(ptr, pr);
  42         assert(ptr);
  43         return ptr;
  44 }
  45
  46 /**
  47  * Allocate an array, abort on failure or bugs.
  48  *
  49  * \param nmemb See \ref arr_realloc().
  50  * \param size See \ref arr_realloc().
  51  *
  52  * Like \ref arr_realloc(), this aborts on invalid arguments, integer overflow
  53  * and allocation errors.
  54  *
  55  * \return A pointer to newly allocated memory which is suitably aligned for
  56  * any kind of variable.
  57  *
  58  * \sa See \ref arr_realloc().
  59  */
  60 __must_check __malloc void *arr_alloc(size_t nmemb, size_t size)
  61 {
  62         return arr_realloc(NULL, nmemb, size);
  63 }
  64
  65 /**
  66  * Allocate and initialize an array, abort on failure or bugs.
  67  *
  68  * \param nmemb See \ref arr_realloc().
  69  * \param size See \ref arr_realloc().
  70  *
  71  * This calls \ref arr_alloc() and zeroes-out the array.
  72  *
  73  * \return See \ref arr_alloc().
  74  */
  75 __must_check __malloc void *arr_zalloc(size_t nmemb, size_t size)
  76 {
  77         void *ptr = arr_alloc(nmemb, size);
  78
  79         /*
  80          * This multiplication can not overflow because the above call to \ref
  81          * arr_alloc() aborts on overflow.
  82          */
  83         memset(ptr, 0, nmemb * size);
  84         return ptr;
  85 }
  86
  87 /**
  88  * Allocate and initialize memory.
  89  *
  90  * \param size The desired new size.
  91  *
  92  * \return A pointer to the allocated and zeroed-out memory, which is suitably
  93  * aligned for any kind of variable.
  94  *
  95  * \sa \ref alloc(), calloc(3).
  96  */
  97 __must_check void *zalloc(size_t size)
  98 {
  99         return arr_zalloc(1, size);
 100 }
 101
 102 /**
 103  * Paraslash's version of realloc().
 104  *
 105  * \param p Pointer to the memory block, may be \p NULL.
 106  * \param size The desired new size.
 107  *
 108  * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
 109  * i.e. there is no need to check the return value in the caller.
 110  *
 111  * \return A pointer to newly allocated memory which is suitably aligned for
 112  * any kind of variable and may be different from \a p.
 113  *
 114  * \sa realloc(3).
 115  */
 116 __must_check void *para_realloc(void *p, size_t size)
 117 {
 118         return arr_realloc(p, 1, size);
 119 }
 120
 121 /**
 122  * Paraslash's version of malloc().
 123  *
 124  * \param size The desired new size.
 125  *
 126  * A wrapper for malloc(3) which exits on errors.
 127  *
 128  * \return A pointer to the allocated memory, which is suitably aligned for any
 129  * kind of variable.
 130  *
 131  * \sa malloc(3).
 132  */
 133 __must_check __malloc void *alloc(size_t size)
 134 {
 135         return arr_alloc(1, size);
 136 }
 137
 138 /**
 139  * Paraslash's version of strdup().
 140  *
 141  * \param s The string to be duplicated.
 142  *
 143  * A strdup(3)-like function which aborts if insufficient memory was available
 144  * to allocate the duplicated string, absolving the caller from the
 145  * responsibility to check for failure.
 146  *
 147  * \return A pointer to the duplicated string. Unlike strdup(3), the caller may
 148  * pass NULL, in which case the function returns a pointer to an empty string.
 149  * Regardless of whether or not NULL was passed, the returned string is
 150  * allocated on the heap and has to be freed by the caller.
 151  *
 152  * \sa strdup(3).
 153  */
 154 __must_check __malloc char *para_strdup(const char *s)
 155 {
 156         char *dupped_string = strdup(s? s: "");
 157
 158         assert(dupped_string);
 159         return dupped_string;
 160 }
 161
 162 /**
 163  * Print a formatted message to a dynamically allocated string.
 164  *
 165  * \param result The formatted string is returned here.
 166  * \param fmt The format string.
 167  * \param ap Initialized list of arguments.
 168  *
 169  * This function is similar to vasprintf(), a GNU extension which is not in C
 170  * or POSIX. It allocates a string large enough to hold the output including
 171  * the terminating null byte. The allocated string is returned via the first
 172  * argument and must be freed by the caller. However, unlike vasprintf(), this
 173  * function calls exit() if insufficient memory is available, while vasprintf()
 174  * returns -1 in this case.
 175  *
 176  * \return Number of bytes written, not including the terminating \p NULL
 177  * character.
 178  *
 179  * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
 180  */
 181 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
 182 {
 183         int ret;
 184         size_t size = 150;
 185         va_list aq;
 186
 187         *result = alloc(size + 1);
 188         va_copy(aq, ap);
 189         ret = vsnprintf(*result, size, fmt, aq);
 190         va_end(aq);
 191         assert(ret >= 0);
 192         if (ret < size) /* OK */
 193                 return ret;
 194         size = ret + 1;
 195         *result = para_realloc(*result, size);
 196         va_copy(aq, ap);
 197         ret = vsnprintf(*result, size, fmt, aq);
 198         va_end(aq);
 199         assert(ret >= 0 && ret < size);
 200         return ret;
 201 }
 202
 203 /**
 204  * Print to a dynamically allocated string, variable number of arguments.
 205  *
 206  * \param result See \ref xvasprintf().
 207  * \param fmt Usual format string.
 208  *
 209  * \return The return value of the underlying call to \ref xvasprintf().
 210  *
 211  * \sa \ref xvasprintf() and the references mentioned there.
 212  */
 213 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
 214 {
 215         va_list ap;
 216         unsigned ret;
 217
 218         va_start(ap, fmt);
 219         ret = xvasprintf(result, fmt, ap);
 220         va_end(ap);
 221         return ret;
 222 }
 223
 224 /**
 225  * Allocate a sufficiently large string and print into it.
 226  *
 227  * \param fmt A usual format string.
 228  *
 229  * Produce output according to \p fmt. No artificial bound on the length of the
 230  * resulting string is imposed.
 231  *
 232  * \return This function either returns a pointer to a string that must be
 233  * freed by the caller or aborts without returning.
 234  *
 235  * \sa printf(3), \ref xasprintf().
 236  */
 237 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
 238 {
 239         char *msg;
 240         va_list ap;
 241
 242         va_start(ap, fmt);
 243         xvasprintf(&msg, fmt, ap);
 244         va_end(ap);
 245         return msg;
 246 }
 247
 248 /**
 249  * Free the content of a pointer and set it to NULL.
 250  *
 251  * \param arg A pointer to the pointer whose content should be freed.
 252  *
 253  * If arg is NULL, the function returns immediately. Otherwise it frees the
 254  * memory pointed to by *arg and sets *arg to NULL. Hence callers have to pass
 255  * the *address* of the pointer variable that points to the memory which should
 256  * be freed.
 257  */
 258 void freep(void *arg)
 259 {
 260         if (arg) {
 261                 void **ptr = arg;
 262                 free(*ptr);
 263                 *ptr = NULL;
 264         }
 265 }
 266
 267 /**
 268  * Paraslash's version of strcat().
 269  *
 270  * \param a String to be appended to.
 271  * \param b String to append.
 272  *
 273  * Append \p b to \p a.
 274  *
 275  * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
 276  * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
 277  * return \a a without making a copy of \a a.  Otherwise, construct the
 278  * concatenation \a c, free \a a (but not \a b) and return \a c.
 279  *
 280  * \sa strcat(3).
 281  */
 282 __must_check __malloc char *para_strcat(char *a, const char *b)
 283 {
 284         char *tmp;
 285
 286         if (!a)
 287                 return para_strdup(b);
 288         if (!b)
 289                 return a;
 290         tmp = make_message("%s%s", a, b);
 291         free(a);
 292         return tmp;
 293 }
 294
 295 /**
 296  * Get the logname of the current user.
 297  *
 298  * \return A dynamically allocated string that must be freed by the caller. On
 299  * errors, the string "unknown_user" is returned, i.e. this function never
 300  * returns \p NULL.
 301  *
 302  * \sa getpwuid(3).
 303  */
 304 __must_check __malloc char *para_logname(void)
 305 {
 306         struct passwd *pw = getpwuid(getuid());
 307         return para_strdup(pw? pw->pw_name : "unknown_user");
 308 }
 309
 310 /**
 311  * Get the home directory of the calling user.
 312  *
 313  * \return A dynamically allocated string that must be freed by the caller. If
 314  * no entry is found which matches the UID of the calling process, or any other
 315  * error occurs, the function prints an error message and aborts.
 316  *
 317  * \sa getpwuid(3), getuid(2).
 318  */
 319 __must_check __malloc char *para_homedir(void)
 320 {
 321         struct passwd *pw;
 322
 323         /*
 324          * To distinguish between the error case and the "not found" case we
 325          * have to check errno after getpwuid(3). The manual page recommends to
 326          * set it to zero before the call.
 327          */
 328         errno = 0;
 329         pw = getpwuid(getuid());
 330         if (pw)
 331                 return para_strdup(pw->pw_dir);
 332         if (errno != 0)
 333                 PARA_EMERG_LOG("getpwuid error: %s\n", strerror(errno));
 334         else
 335                 PARA_EMERG_LOG("no pw entry for uid %u\n", (unsigned)getuid());
 336         exit(EXIT_FAILURE);
 337 }
 338
 339 /**
 340  * Get the own hostname.
 341  *
 342  * \return A dynamically allocated string containing the hostname.
 343  *
 344  * \sa uname(2).
 345  */
 346 __malloc char *para_hostname(void)
 347 {
 348         struct utsname u;
 349
 350         uname(&u);
 351         return para_strdup(u.nodename);
 352 }
 353
 354 /**
 355  * Call a custom function for each complete line.
 356  *
 357  * \param flags Any combination of flags defined in \ref for_each_line_flags.
 358  * \param buf The buffer containing data separated by newlines.
 359  * \param size The number of bytes in \a buf.
 360  * \param line_handler The custom function.
 361  * \param private_data Pointer passed to \a line_handler.
 362  *
 363  * For each complete line in \p buf, \p line_handler is called. The first
 364  * argument to \p line_handler is (a copy of) the current line, and \p
 365  * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
 366  * flag is unset, a pointer into \a buf is passed to the line handler,
 367  * otherwise a pointer to a copy of the current line is passed instead. This
 368  * copy is freed immediately after the line handler returns.
 369  *
 370  * The function returns if \p line_handler returns a negative value or no more
 371  * lines are in the buffer.  The rest of the buffer (last chunk containing an
 372  * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
 373  * unset.
 374  *
 375  * \return On success this function returns the number of bytes not handled to
 376  * \p line_handler. The only possible error is a negative return value from the
 377  * line handler. In this case processing stops and the return value of the line
 378  * handler is returned to indicate failure.
 379  *
 380  * \sa \ref for_each_line_flags.
 381  */
 382 int for_each_line(unsigned flags, char *buf, size_t size,
 383                 line_handler_t *line_handler, void *private_data)
 384 {
 385         char *start = buf, *end;
 386         int ret, i, num_lines = 0;
 387
 388 //      PARA_NOTICE_LOG("buf: %s\n", buf);
 389         while (start < buf + size) {
 390                 char *next_null;
 391                 char *next_cr;
 392
 393                 next_cr = memchr(start, '\n', buf + size - start);
 394                 next_null = memchr(start, '\0', next_cr?
 395                         next_cr - start : buf + size - start);
 396                 if (!next_cr && !next_null)
 397                         break;
 398                 if (next_null)
 399                         end = next_null;
 400                 else
 401                         end = next_cr;
 402                 num_lines++;
 403                 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
 404                         if (flags & FELF_READ_ONLY) {
 405                                 size_t s = end - start;
 406                                 char *b = alloc(s + 1);
 407                                 memcpy(b, start, s);
 408                                 b[s] = '\0';
 409                                 ret = line_handler(b, private_data);
 410                                 free(b);
 411                         } else {
 412                                 *end = '\0';
 413                                 ret = line_handler(start, private_data);
 414                         }
 415                         if (ret < 0)
 416                                 return ret;
 417                 }
 418                 start = ++end;
 419         }
 420         i = buf + size - start;
 421         if (i && i != size && !(flags & FELF_READ_ONLY))
 422                 memmove(buf, start, i);
 423         return i;
 424 }
 425
 426 /** Return the hex characters of the lower 4 bits. */
 427 #define hex(a) (hexchar[(a) & 15])
 428
 429 static void write_size_header(char *buf, int n)
 430 {
 431         static char hexchar[] = "0123456789abcdef";
 432
 433         buf[0] = hex(n >> 12);
 434         buf[1] = hex(n >> 8);
 435         buf[2] = hex(n >> 4);
 436         buf[3] = hex(n);
 437         buf[4] = ' ';
 438 }
 439
 440 /**
 441  * Read a four-byte hex-number and return its value.
 442  *
 443  * Each status item sent by para_server is prefixed with such a hex number in
 444  * ASCII which describes the size of the status item.
 445  *
 446  * \param buf The buffer which must be at least four bytes long.
 447  *
 448  * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
 449  * buffer did not contain only hex digits.
 450  */
 451 int read_size_header(const char *buf)
 452 {
 453         int i, len = 0;
 454
 455         for (i = 0; i < 4; i++) {
 456                 unsigned char c = buf[i];
 457                 len <<= 4;
 458                 if (c >= '0' && c <= '9') {
 459                         len += c - '0';
 460                         continue;
 461                 }
 462                 if (c >= 'a' && c <= 'f') {
 463                         len += c - 'a' + 10;
 464                         continue;
 465                 }
 466                 return -E_SIZE_PREFIX;
 467         }
 468         if (buf[4] != ' ')
 469                 return -E_SIZE_PREFIX;
 470         return len;
 471 }
 472
 473 /**
 474  * Safely print into a buffer at a given offset.
 475  *
 476  * \param b Determines the buffer, its size, and the offset.
 477  * \param fmt The format string.
 478  *
 479  * This function prints into the buffer given by \a b at the offset which is
 480  * also given by \a b. If there is not enough space to hold the result, the
 481  * buffer size is doubled until the underlying call to vsnprintf() succeeds
 482  * or the size of the buffer exceeds the maximal size specified in \a b.
 483  *
 484  * In the latter case the unmodified \a buf and \a offset values as well as the
 485  * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
 486  * If this function succeeds, i.e. returns a non-negative value, the offset of
 487  * \a b is reset to zero and the given data is written to the beginning of the
 488  * buffer. If \a max_size_handler() returns a negative value, this value is
 489  * returned by \a para_printf().
 490  *
 491  * Upon return, the offset of \a b is adjusted accordingly so that subsequent
 492  * calls to this function append data to what is already contained in the
 493  * buffer.
 494  *
 495  * It's OK to call this function with \p b->buf being \p NULL. In this case, an
 496  * initial buffer is allocated.
 497  *
 498  * \return The number of bytes printed into the buffer (not including the
 499  * terminating \p NULL byte) on success, negative on errors. If there is no
 500  * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
 501  * fails.
 502  *
 503  * \sa make_message(), vsnprintf(3).
 504  */
 505 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
 506 {
 507         int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
 508
 509         if (!b->buf) {
 510                 b->buf = alloc(128);
 511                 b->size = 128;
 512                 b->offset = 0;
 513         }
 514         while (1) {
 515                 char *p = b->buf + b->offset;
 516                 size_t size = b->size - b->offset;
 517                 va_list ap;
 518
 519                 if (size > sz_off) {
 520                         va_start(ap, fmt);
 521                         ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
 522                         va_end(ap);
 523                         if (ret > -1 && ret < size - sz_off) { /* success */
 524                                 b->offset += ret + sz_off;
 525                                 if (sz_off)
 526                                         write_size_header(p, ret);
 527                                 return ret + sz_off;
 528                         }
 529                 }
 530                 /* check if we may grow the buffer */
 531                 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
 532                         /* try again with more space */
 533                         b->size *= 2;
 534                         b->buf = para_realloc(b->buf, b->size);
 535                         continue;
 536                 }
 537                 /* can't grow buffer */
 538                 if (!b->offset || !b->max_size_handler) /* message too large */
 539                         return -ERRNO_TO_PARA_ERROR(ENOSPC);
 540                 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
 541                 if (ret < 0)
 542                         return ret;
 543                 b->offset = 0;
 544         }
 545 }
 546
 547 /** \cond llong_minmax */
 548 /* LLONG_MAX and LLONG_MIN might not be defined. */
 549 #ifndef LLONG_MAX
 550 #define LLONG_MAX 9223372036854775807LL
 551 #endif
 552 #ifndef LLONG_MIN
 553 #define LLONG_MIN (-LLONG_MAX - 1LL)
 554 #endif
 555 /** \endcond llong_minmax */
 556
 557 /**
 558  * Convert a string to a 64-bit signed integer value.
 559  *
 560  * \param str The string to be converted.
 561  * \param value Result pointer.
 562  *
 563  * \return Standard.
 564  *
 565  * \sa \ref para_atoi32(), strtol(3), atoi(3).
 566  */
 567 int para_atoi64(const char *str, int64_t *value)
 568 {
 569         char *endptr;
 570         long long tmp;
 571
 572         errno = 0; /* To distinguish success/failure after call */
 573         tmp = strtoll(str, &endptr, 10);
 574         if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
 575                 return -E_ATOI_OVERFLOW;
 576         /*
 577          * If there were no digits at all, strtoll() stores the original value
 578          * of str in *endptr.
 579          */
 580         if (endptr == str)
 581                 return -E_ATOI_NO_DIGITS;
 582         /*
 583          * The implementation may also set errno and return 0 in case no
 584          * conversion was performed.
 585          */
 586         if (errno != 0 && tmp == 0)
 587                 return -E_ATOI_NO_DIGITS;
 588         if (*endptr != '\0') /* Further characters after number */
 589                 return -E_ATOI_JUNK_AT_END;
 590         *value = tmp;
 591         return 1;
 592 }
 593
 594 /**
 595  * Convert a string to a 32-bit signed integer value.
 596  *
 597  * \param str The string to be converted.
 598  * \param value Result pointer.
 599  *
 600  * \return Standard.
 601  *
 602  * \sa \ref para_atoi64().
 603 */
 604 int para_atoi32(const char *str, int32_t *value)
 605 {
 606         int64_t tmp;
 607         int ret;
 608         const int32_t max = 2147483647;
 609
 610         ret = para_atoi64(str, &tmp);
 611         if (ret < 0)
 612                 return ret;
 613         if (tmp > max || tmp < -max - 1)
 614                 return -E_ATOI_OVERFLOW;
 615         *value = tmp;
 616         return 1;
 617 }
 618
 619 static int get_next_word(const char *buf, const char *delim, char **word)
 620 {
 621         enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
 622                 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
 623         const char *in;
 624         char *out;
 625         int ret, state = 0;
 626
 627         out = alloc(strlen(buf) + 1);
 628         *out = '\0';
 629         *word = out;
 630         for (in = buf; *in; in++) {
 631                 const char *p;
 632
 633                 switch (*in) {
 634                 case '\\':
 635                         if (state & LSF_BACKSLASH) /* \\ */
 636                                 goto copy_char;
 637                         state |= LSF_BACKSLASH;
 638                         state |= LSF_HAVE_WORD;
 639                         continue;
 640                 case 'n':
 641                 case 't':
 642                         if (state & LSF_BACKSLASH) { /* \n or \t */
 643                                 *out++ = (*in == 'n')? '\n' : '\t';
 644                                 state &= ~LSF_BACKSLASH;
 645                                 continue;
 646                         }
 647                         goto copy_char;
 648                 case '"':
 649                         if (state & LSF_BACKSLASH) /* \" */
 650                                 goto copy_char;
 651                         if (state & LSF_SINGLE_QUOTE) /* '" */
 652                                 goto copy_char;
 653                         if (state & LSF_DOUBLE_QUOTE) {
 654                                 state &= ~LSF_DOUBLE_QUOTE;
 655                                 continue;
 656                         }
 657                         state |= LSF_HAVE_WORD;
 658                         state |= LSF_DOUBLE_QUOTE;
 659                         continue;
 660                 case '\'':
 661                         if (state & LSF_BACKSLASH) /* \' */
 662                                 goto copy_char;
 663                         if (state & LSF_DOUBLE_QUOTE) /* "' */
 664                                 goto copy_char;
 665                         if (state & LSF_SINGLE_QUOTE) {
 666                                 state &= ~LSF_SINGLE_QUOTE;
 667                                 continue;
 668                         }
 669                         state |= LSF_HAVE_WORD;
 670                         state |= LSF_SINGLE_QUOTE;
 671                         continue;
 672                 }
 673                 for (p = delim; *p; p++) {
 674                         if (*in != *p)
 675                                 continue;
 676                         if (state & LSF_BACKSLASH)
 677                                 goto copy_char;
 678                         if (state & LSF_SINGLE_QUOTE)
 679                                 goto copy_char;
 680                         if (state & LSF_DOUBLE_QUOTE)
 681                                 goto copy_char;
 682                         if (state & LSF_HAVE_WORD)
 683                                 goto success;
 684                         break;
 685                 }
 686                 if (*p) /* ignore delimiter at the beginning */
 687                         continue;
 688 copy_char:
 689                 state |= LSF_HAVE_WORD;
 690                 *out++ = *in;
 691                 state &= ~LSF_BACKSLASH;
 692         }
 693         ret = 0;
 694         if (!(state & LSF_HAVE_WORD))
 695                 goto out;
 696         ret = -ERRNO_TO_PARA_ERROR(EINVAL);
 697         if (state & LSF_BACKSLASH) {
 698                 PARA_ERROR_LOG("trailing backslash\n");
 699                 goto out;
 700         }
 701         if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
 702                 PARA_ERROR_LOG("unmatched quote character\n");
 703                 goto out;
 704         }
 705 success:
 706         *out = '\0';
 707         return in - buf;
 708 out:
 709         free(*word);
 710         *word = NULL;
 711         return ret;
 712 }
 713
 714 /**
 715  * Get the number of the word the cursor is on.
 716  *
 717  * \param buf The zero-terminated line buffer.
 718  * \param delim Characters that separate words.
 719  * \param point The cursor position.
 720  *
 721  * \return Zero-based word number.
 722  */
 723 int compute_word_num(const char *buf, const char *delim, int point)
 724 {
 725         int ret, num_words;
 726         const char *p;
 727         char *word;
 728
 729         for (p = buf, num_words = 0; ; p += ret, num_words++) {
 730                 ret = get_next_word(p, delim, &word);
 731                 if (ret <= 0)
 732                         break;
 733                 free(word);
 734                 if (p + ret >= buf + point)
 735                         break;
 736         }
 737         return num_words;
 738 }
 739
 740 /**
 741  * Free an array of words created by create_argv() or create_shifted_argv().
 742  *
 743  * \param argv A pointer previously obtained by \ref create_argv().
 744  */
 745 void free_argv(char **argv)
 746 {
 747         int i;
 748
 749         if (!argv)
 750                 return;
 751         for (i = 0; argv[i]; i++)
 752                 free(argv[i]);
 753         free(argv);
 754 }
 755
 756 static int create_argv_offset(int offset, const char *buf, const char *delim,
 757                 char ***result)
 758 {
 759         char *word, **argv = arr_zalloc(offset + 1, sizeof(char *));
 760         const char *p;
 761         int i, ret;
 762
 763         for (p = buf, i = offset; p && *p; p += ret, i++) {
 764                 ret = get_next_word(p, delim, &word);
 765                 if (ret < 0)
 766                         goto err;
 767                 if (!ret)
 768                         break;
 769                 argv = arr_realloc(argv, i + 2, sizeof(char*));
 770                 argv[i] = word;
 771         }
 772         argv[i] = NULL;
 773         *result = argv;
 774         return i;
 775 err:
 776         while (i > 0)
 777                 free(argv[--i]);
 778         free(argv);
 779         *result = NULL;
 780         return ret;
 781 }
 782
 783 /**
 784  * Split a buffer into words.
 785  *
 786  * This parser honors single and double quotes, backslash-escaped characters
 787  * and special characters like \\n. The result contains pointers to copies of
 788  * the words contained in buf and has to be freed by using \ref free_argv().
 789  *
 790  * \param buf The buffer to be split.
 791  * \param delim Each character in this string is treated as a separator.
 792  * \param result The array of words is returned here.
 793  *
 794  * It's OK to pass NULL as the buffer argument. This is equivalent to passing
 795  * the empty string.
 796  *
 797  * \return Number of words in buf, negative on errors. The array returned
 798  * through the result pointer is NULL terminated.
 799  */
 800 int create_argv(const char *buf, const char *delim, char ***result)
 801 {
 802         return create_argv_offset(0, buf, delim, result);
 803 }
 804
 805 /**
 806  * Split a buffer into words, offset one.
 807  *
 808  * This is similar to \ref create_argv() but the returned array is one element
 809  * larger, words start at index one and element zero is initialized to \p NULL.
 810  * Callers must set element zero to a non-NULL value before calling free_argv()
 811  * on the returned array to avoid a memory leak.
 812  *
 813  * \param buf See \ref create_argv().
 814  * \param delim See \ref create_argv().
 815  * \param result See \ref create_argv().
 816  *
 817  * \return Number of words plus one on success, negative on errors.
 818  */
 819 int create_shifted_argv(const char *buf, const char *delim, char ***result)
 820 {
 821         return create_argv_offset(1, buf, delim, result);
 822 }
 823
 824 /**
 825  * Compile a regular expression.
 826  *
 827  * This simple wrapper calls regcomp() and logs a message on errors.
 828  *
 829  * \param preg See regcomp(3).
 830  * \param regex See regcomp(3).
 831  * \param cflags See regcomp(3).
 832  *
 833  * \return Standard.
 834  */
 835 int para_regcomp(regex_t *preg, const char *regex, int cflags)
 836 {
 837         char *buf;
 838         size_t size;
 839         int ret = regcomp(preg, regex, cflags);
 840
 841         if (ret == 0)
 842                 return 1;
 843         size = regerror(ret, preg, NULL, 0);
 844         buf = alloc(size);
 845         regerror(ret, preg, buf, size);
 846         PARA_ERROR_LOG("%s\n", buf);
 847         free(buf);
 848         return -E_REGEX;
 849 }
 850
 851 /**
 852  * strdup() for not necessarily zero-terminated strings.
 853  *
 854  * \param src The source buffer.
 855  * \param len The number of bytes to be copied.
 856  *
 857  * \return A 0-terminated buffer of length \a len + 1.
 858  *
 859  * This is similar to strndup(), which is a GNU extension. However, one
 860  * difference is that strndup() returns \p NULL if insufficient memory was
 861  * available while this function aborts in this case.
 862  *
 863  * \sa strdup(), \ref para_strdup().
 864  */
 865 char *safe_strdup(const char *src, size_t len)
 866 {
 867         char *p;
 868
 869         assert(len < (size_t)-1);
 870         p = alloc(len + 1);
 871         if (len > 0)
 872                 memcpy(p, src, len);
 873         p[len] = '\0';
 874         return p;
 875 }
 876
 877 /**
 878  * Copy the value of a key=value pair.
 879  *
 880  * This checks whether the given buffer starts with "key=", ignoring case. If
 881  * yes, a copy of the value is returned. The source buffer may not be
 882  * zero-terminated.
 883  *
 884  * \param src The source buffer.
 885  * \param len The number of bytes of the tag.
 886  * \param key Only copy if it is the value of this key.
 887  *
 888  * \return A zero-terminated buffer, or \p NULL if the key was
 889  * not of the given type.
 890  */
 891 char *key_value_copy(const char *src, size_t len, const char *key)
 892 {
 893         int keylen = strlen(key);
 894
 895         if (len <= keylen)
 896                 return NULL;
 897         if (strncasecmp(src, key, keylen))
 898                 return NULL;
 899         if (src[keylen] != '=')
 900                 return NULL;
 901         return safe_strdup(src + keylen + 1, len - keylen - 1);
 902 }
 903
 904 static bool utf8_mode(void)
 905 {
 906         static bool initialized, have_utf8;
 907
 908         if (!initialized) {
 909                 char *info = nl_langinfo(CODESET);
 910                 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
 911                 initialized = true;
 912                 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
 913                         have_utf8? "" : "not ");
 914         }
 915         return have_utf8;
 916 }
 917
 918 static int xwcwidth(wchar_t wc, size_t pos)
 919 {
 920         int n;
 921
 922         /* special-case for tab */
 923         if (wc == 0x09) /* tab */
 924                 return (pos | 7) + 1 - pos;
 925         n = wcwidth(wc);
 926         /* wcswidth() returns -1 for non-printable characters */
 927         return n >= 0? n : 1;
 928 }
 929
 930 static size_t xwcswidth(const wchar_t *s, size_t n)
 931 {
 932         size_t w = 0;
 933
 934         while (n--)
 935                 w += xwcwidth(*s++, w);
 936         return w;
 937 }
 938
 939 /**
 940  * Skip a given number of cells at the beginning of a string.
 941  *
 942  * \param s The input string.
 943  * \param cells_to_skip Desired number of cells that should be skipped.
 944  * \param bytes_to_skip Result.
 945  *
 946  * This function computes how many input bytes must be skipped to advance a
 947  * string by the given width. If the current character encoding is not UTF-8,
 948  * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
 949  * \a s is treated as a multibyte string and on successful return, \a s +
 950  * bytes_to_skip points to the start of a multibyte string such that the total
 951  * width of the multibyte characters that are skipped by advancing \a s that
 952  * many bytes equals at least \a cells_to_skip.
 953  *
 954  * \return Standard.
 955  */
 956 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
 957 {
 958         wchar_t wc;
 959         mbstate_t ps;
 960         size_t n, bytes_parsed, cells_skipped;
 961
 962         *bytes_to_skip = 0;
 963         if (cells_to_skip == 0)
 964                 return 0;
 965         if (!utf8_mode()) {
 966                 *bytes_to_skip = cells_to_skip;
 967                 return 0;
 968         }
 969         bytes_parsed = cells_skipped = 0;
 970         memset(&ps, 0, sizeof(ps));
 971         n = strlen(s);
 972         while (cells_to_skip > cells_skipped) {
 973                 size_t mbret;
 974
 975                 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
 976                 assert(mbret != 0);
 977                 if (mbret == (size_t)-1 || mbret == (size_t)-2)
 978                         return -ERRNO_TO_PARA_ERROR(EILSEQ);
 979                 bytes_parsed += mbret;
 980                 cells_skipped += xwcwidth(wc, cells_skipped);
 981         }
 982         *bytes_to_skip = bytes_parsed;
 983         return 1;
 984 }
 985
 986 /**
 987  * Compute the width of an UTF-8 string.
 988  *
 989  * \param s The string.
 990  * \param result The width of \a s is returned here.
 991  *
 992  * If not in UTF8-mode. this function is just a wrapper for strlen(3).
 993  * Otherwise \a s is treated as an UTF-8 string and its display width is
 994  * computed. Note that this function may fail if the underlying call to
 995  * mbsrtowcs(3) fails, so the caller must check the return value.
 996  *
 997  * \sa nl_langinfo(3), wcswidth(3).
 998  *
 999  * \return Standard.
1000  */
1001 __must_check int strwidth(const char *s, size_t *result)
1002 {
1003         const char *src = s;
1004         mbstate_t state;
1005         static wchar_t *dest;
1006         size_t num_wchars;
1007
1008         /*
1009          * Never call any log function here. This may result in an endless loop
1010          * as para_gui's para_log() calls this function.
1011          */
1012
1013         if (!utf8_mode()) {
1014                 *result = strlen(s);
1015                 return 0;
1016         }
1017         memset(&state, 0, sizeof(state));
1018         *result = 0;
1019         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1020         if (num_wchars == (size_t)-1)
1021                 return -ERRNO_TO_PARA_ERROR(errno);
1022         if (num_wchars == 0)
1023                 return 0;
1024         dest = arr_alloc(num_wchars + 1, sizeof(*dest));
1025         src = s;
1026         memset(&state, 0, sizeof(state));
1027         num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1028         assert(num_wchars > 0 && num_wchars != (size_t)-1);
1029         *result = xwcswidth(dest, num_wchars);
1030         free(dest);
1031         return 1;
1032 }
1033
1034 /**
1035  * Truncate and sanitize a (wide character) string.
1036  *
1037  * This replaces all non-printable characters by spaces and makes sure that the
1038  * modified string does not exceed the given maximal width.
1039  *
1040  * \param src The source string in multi-byte form.
1041  * \param max_width The maximal number of cells the result may occupy.
1042  * \param result Sanitized multi-byte string, must be freed by caller.
1043  * \param width The width of the sanitized string, always <= max_width.
1044  *
1045  * The function is wide-character aware but falls back to C strings for
1046  * non-UTF-8 locales.
1047  *
1048  * \return Standard. On success, *result points to a sanitized copy of the
1049  * given string. This copy was allocated with malloc() and should hence be
1050  * freed when the caller is no longer interested in the result.
1051  *
1052  * The function fails if the given string contains an invalid multibyte
1053  * sequence. In this case, *result is set to NULL, and *width to zero.
1054  */
1055 __must_check int sanitize_str(const char *src, size_t max_width,
1056                 char **result, size_t *width)
1057 {
1058         mbstate_t state;
1059         static wchar_t *wcs;
1060         size_t num_wchars, n;
1061
1062         if (!utf8_mode()) {
1063                 *result = para_strdup(src);
1064                 /* replace non-printable characters by spaces */
1065                 for (n = 0; n < max_width && src[n]; n++) {
1066                         if (!isprint((unsigned char)src[n]))
1067                                 (*result)[n] = ' ';
1068                 }
1069                 (*result)[n] = '\0';
1070                 *width = n;
1071                 return 0;
1072         }
1073         *result = NULL;
1074         *width = 0;
1075         memset(&state, 0, sizeof(state));
1076         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1077         if (num_wchars == (size_t)-1)
1078                 return -ERRNO_TO_PARA_ERROR(errno);
1079         wcs = arr_alloc(num_wchars + 1, sizeof(*wcs));
1080         memset(&state, 0, sizeof(state));
1081         num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1082         assert(num_wchars != (size_t)-1);
1083         for (n = 0; n < num_wchars && *width < max_width; n++) {
1084                 if (!iswprint(wcs[n]))
1085                         wcs[n] = L' ';
1086                 *width += xwcwidth(wcs[n], *width);
1087         }
1088         wcs[n] = L'\0';
1089         n = wcstombs(NULL, wcs, 0) + 1;
1090         *result = alloc(n);
1091         num_wchars = wcstombs(*result, wcs, n);
1092         assert(num_wchars != (size_t)-1);
1093         free(wcs);
1094         return 1;
1095 }