string.c

   1 /*
   2  * Copyright (C) 2004-2013 Andre Noll <maan@systemlinux.org>
   3  *
   4  * Licensed under the GPL v2. For licencing details see COPYING.
   5  */
   6
   7 /** \file string.c Memory allocation and string handling functions. */
   8
   9 #define _GNU_SOURCE
  10
  11 #include <pwd.h>
  12 #include <sys/utsname.h> /* uname() */
  13
  14 #include <string.h>
  15 #include <regex.h>
  16
  17 #include <langinfo.h>
  18 #include <wchar.h>
  19 #include <wctype.h>
  20
  21 #include "para.h"
  22 #include "string.h"
  23 #include "error.h"
  24
  25 /**
  26  * Paraslash's version of realloc().
  27  *
  28  * \param p Pointer to the memory block, may be \p NULL.
  29  * \param size The desired new size.
  30  *
  31  * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
  32  * i.e. there is no need to check the return value in the caller.
  33  *
  34  * \return A pointer to  the newly allocated memory, which is suitably aligned
  35  * for any kind of variable and may be different from \a p.
  36  *
  37  * \sa realloc(3).
  38  */
  39 __must_check __malloc void *para_realloc(void *p, size_t size)
  40 {
  41         /*
  42          * No need to check for NULL pointers: If p is NULL, the call
  43          * to realloc is equivalent to malloc(size)
  44          */
  45         assert(size);
  46         if (!(p = realloc(p, size))) {
  47                 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
  48                         size);
  49                 exit(EXIT_FAILURE);
  50         }
  51         return p;
  52 }
  53
  54 /**
  55  * Paraslash's version of malloc().
  56  *
  57  * \param size The desired new size.
  58  *
  59  * A wrapper for malloc(3) which exits on errors.
  60  *
  61  * \return A pointer to the allocated memory, which is suitably aligned for any
  62  * kind of variable.
  63  *
  64  * \sa malloc(3).
  65  */
  66 __must_check __malloc void *para_malloc(size_t size)
  67 {
  68         void *p;
  69
  70         assert(size);
  71         p = malloc(size);
  72         if (!p) {
  73                 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
  74                         size);
  75                 exit(EXIT_FAILURE);
  76         }
  77         return p;
  78 }
  79
  80 /**
  81  * Paraslash's version of calloc().
  82  *
  83  * \param size The desired new size.
  84  *
  85  * A wrapper for calloc(3) which exits on errors.
  86  *
  87  * \return A pointer to the allocated and zeroed-out memory, which is suitably
  88  * aligned for any kind of variable.
  89  *
  90  * \sa calloc(3)
  91  */
  92 __must_check __malloc void *para_calloc(size_t size)
  93 {
  94         void *ret = para_malloc(size);
  95
  96         memset(ret, 0, size);
  97         return ret;
  98 }
  99
 100 /**
 101  * Paraslash's version of strdup().
 102  *
 103  * \param s The string to be duplicated.
 104  *
 105  * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
 106  * there is no need to check the return value in the caller.
 107  *
 108  * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
 109  * an pointer to an empty string is returned.
 110  *
 111  * \sa strdup(3)
 112  */
 113 __must_check __malloc char *para_strdup(const char *s)
 114 {
 115         char *ret;
 116
 117         if ((ret = strdup(s? s: "")))
 118                 return ret;
 119         PARA_EMERG_LOG("strdup failed, aborting\n");
 120         exit(EXIT_FAILURE);
 121 }
 122
 123 /**
 124  * Print a formated message to a dynamically allocated string.
 125  *
 126  * \param result The formated string is returned here.
 127  * \param fmt The format string.
 128  * \param ap Initialized list of arguments.
 129  *
 130  * This function is similar to vasprintf(), a GNU extension which is not in C
 131  * or POSIX. It allocates a string large enough to hold the output including
 132  * the terminating null byte. The allocated string is returned via the first
 133  * argument and must be freed by the caller. However, unlike vasprintf(), this
 134  * function calls exit() if insufficient memory is available, while vasprintf()
 135  * returns -1 in this case.
 136  *
 137  * \return Number of bytes written, not including the terminating \p NULL
 138  * character.
 139  *
 140  * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
 141  */
 142 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
 143 {
 144         int ret;
 145         size_t size = 150;
 146         va_list aq;
 147
 148         *result = para_malloc(size + 1);
 149         va_copy(aq, ap);
 150         ret = vsnprintf(*result, size, fmt, aq);
 151         va_end(aq);
 152         assert(ret >= 0);
 153         if (ret < size) /* OK */
 154                 return ret;
 155         size = ret + 1;
 156         *result = para_realloc(*result, size);
 157         va_copy(aq, ap);
 158         ret = vsnprintf(*result, size, fmt, aq);
 159         va_end(aq);
 160         assert(ret >= 0 && ret < size);
 161         return ret;
 162 }
 163
 164 /**
 165  * Print to a dynamically allocated string, variable number of arguments.
 166  *
 167  * \param result See \ref xvasprintf().
 168  * \param fmt Usual format string.
 169  *
 170  * \return The return value of the underlying call to \ref xvasprintf().
 171  *
 172  * \sa \ref xvasprintf() and the references mentioned there.
 173  */
 174 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
 175 {
 176         va_list ap;
 177         unsigned ret;
 178
 179         va_start(ap, fmt);
 180         ret = xvasprintf(result, fmt, ap);
 181         va_end(ap);
 182         return ret;
 183 }
 184
 185 /**
 186  * Allocate a sufficiently large string and print into it.
 187  *
 188  * \param fmt A usual format string.
 189  *
 190  * Produce output according to \p fmt. No artificial bound on the length of the
 191  * resulting string is imposed.
 192  *
 193  * \return This function either returns a pointer to a string that must be
 194  * freed by the caller or aborts without returning.
 195  *
 196  * \sa printf(3), xasprintf().
 197  */
 198 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
 199 {
 200         char *msg;
 201         va_list ap;
 202
 203         va_start(ap, fmt);
 204         xvasprintf(&msg, fmt, ap);
 205         va_end(ap);
 206         return msg;
 207 }
 208
 209 /**
 210  * Free the content of a pointer and set it to \p NULL.
 211  *
 212  * This is equivalent to "free(*arg); *arg = NULL;".
 213  *
 214  * \param arg The pointer whose content should be freed.
 215  */
 216 void freep(void *arg)
 217 {
 218         void **ptr = (void **)arg;
 219         free(*ptr);
 220         *ptr = NULL;
 221 }
 222
 223 /**
 224  * Paraslash's version of strcat().
 225  *
 226  * \param a String to be appended to.
 227  * \param b String to append.
 228  *
 229  * Append \p b to \p a.
 230  *
 231  * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
 232  * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
 233  * return \a a without making a copy of \a a.  Otherwise, construct the
 234  * concatenation \a c, free \a a (but not \a b) and return \a c.
 235  *
 236  * \sa strcat(3)
 237  */
 238 __must_check __malloc char *para_strcat(char *a, const char *b)
 239 {
 240         char *tmp;
 241
 242         if (!a)
 243                 return para_strdup(b);
 244         if (!b)
 245                 return a;
 246         tmp = make_message("%s%s", a, b);
 247         free(a);
 248         return tmp;
 249 }
 250
 251 /**
 252  * Paraslash's version of dirname().
 253  *
 254  * \param name Pointer to the full path.
 255  *
 256  * Compute the directory component of \p name.
 257  *
 258  * \return If \a name is \p NULL or the empty string, return \p NULL.
 259  * Otherwise, Make a copy of \a name and return its directory component. Caller
 260  * is responsible to free the result.
 261  */
 262 __must_check __malloc char *para_dirname(const char *name)
 263 {
 264         char *p, *ret;
 265
 266         if (!name || !*name)
 267                 return NULL;
 268         ret = para_strdup(name);
 269         p = strrchr(ret, '/');
 270         if (!p)
 271                 *ret = '\0';
 272         else
 273                 *p = '\0';
 274         return ret;
 275 }
 276
 277 /**
 278  * Paraslash's version of basename().
 279  *
 280  * \param name Pointer to the full path.
 281  *
 282  * Compute the filename component of \a name.
 283  *
 284  * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
 285  * ends with a slash.  Otherwise, a pointer within \a name is returned.  Caller
 286  * must not free the result.
 287  */
 288 __must_check char *para_basename(const char *name)
 289 {
 290         char *ret;
 291
 292         if (!name || !*name)
 293                 return NULL;
 294         ret = strrchr(name, '/');
 295         if (!ret)
 296                 return (char *)name;
 297         ret++;
 298         return ret;
 299 }
 300
 301 /**
 302  * Cut trailing newline.
 303  *
 304  * \param buf The string to be chopped.
 305  *
 306  * Replace the last character in \p buf by zero if it is equal to
 307  * the newline character.
 308  */
 309 void chop(char *buf)
 310 {
 311         int n = strlen(buf);
 312
 313         if (!n)
 314                 return;
 315         if (buf[n - 1] == '\n')
 316                 buf[n - 1] = '\0';
 317 }
 318
 319 /**
 320  * Get the logname of the current user.
 321  *
 322  * \return A dynamically allocated string that must be freed by the caller. On
 323  * errors, the string "unknown_user" is returned, i.e. this function never
 324  * returns \p NULL.
 325  *
 326  * \sa getpwuid(3).
 327  */
 328 __must_check __malloc char *para_logname(void)
 329 {
 330         struct passwd *pw = getpwuid(getuid());
 331         return para_strdup(pw? pw->pw_name : "unknown_user");
 332 }
 333
 334 /**
 335  * Get the home directory of the current user.
 336  *
 337  * \return A dynamically allocated string that must be freed by the caller. If
 338  * the home directory could not be found, this function returns "/tmp".
 339  */
 340 __must_check __malloc char *para_homedir(void)
 341 {
 342         struct passwd *pw = getpwuid(getuid());
 343         return para_strdup(pw? pw->pw_dir : "/tmp");
 344 }
 345
 346 /**
 347  * Get the own hostname.
 348  *
 349  * \return A dynamically allocated string containing the hostname.
 350  *
 351  * \sa uname(2).
 352  */
 353 __malloc char *para_hostname(void)
 354 {
 355         struct utsname u;
 356
 357         uname(&u);
 358         return para_strdup(u.nodename);
 359 }
 360
 361 /**
 362  * Call a custom function for each complete line.
 363  *
 364  * \param flags Any combination of flags defined in \ref for_each_line_flags.
 365  * \param buf The buffer containing data separated by newlines.
 366  * \param size The number of bytes in \a buf.
 367  * \param line_handler The custom function.
 368  * \param private_data Pointer passed to \a line_handler.
 369  *
 370  * For each complete line in \p buf, \p line_handler is called. The first
 371  * argument to \p line_handler is (a copy of) the current line, and \p
 372  * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
 373  * flag is unset, a pointer into \a buf is passed to the line handler,
 374  * otherwise a pointer to a copy of the current line is passed instead. This
 375  * copy is freed immediately after the line handler returns.
 376  *
 377  * The function returns if \p line_handler returns a negative value or no more
 378  * lines are in the buffer.  The rest of the buffer (last chunk containing an
 379  * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
 380  * unset.
 381  *
 382  * \return On success this function returns the number of bytes not handled to
 383  * \p line_handler. The only possible error is a negative return value from the
 384  * line handler. In this case processing stops and the return value of the line
 385  * handler is returned to indicate failure.
 386  *
 387  * \sa \ref for_each_line_flags.
 388  */
 389 int for_each_line(unsigned flags, char *buf, size_t size,
 390                 line_handler_t *line_handler, void *private_data)
 391 {
 392         char *start = buf, *end;
 393         int ret, i, num_lines = 0;
 394
 395 //      PARA_NOTICE_LOG("buf: %s\n", buf);
 396         while (start < buf + size) {
 397                 char *next_null;
 398                 char *next_cr;
 399
 400                 next_cr = memchr(start, '\n', buf + size - start);
 401                 next_null = memchr(start, '\0', buf + size - start);
 402                 if (!next_cr && !next_null)
 403                         break;
 404                 if (next_cr && next_null) {
 405                         end = next_cr < next_null? next_cr : next_null;
 406                 } else if (next_null) {
 407                         end = next_null;
 408                 } else
 409                         end = next_cr;
 410                 num_lines++;
 411                 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
 412                         if (flags & FELF_READ_ONLY) {
 413                                 size_t s = end - start;
 414                                 char *b = para_malloc(s + 1);
 415                                 memcpy(b, start, s);
 416                                 b[s] = '\0';
 417                                 ret = line_handler(b, private_data);
 418                                 free(b);
 419                         } else {
 420                                 *end = '\0';
 421                                 ret = line_handler(start, private_data);
 422                         }
 423                         if (ret < 0)
 424                                 return ret;
 425                 }
 426                 start = ++end;
 427         }
 428         i = buf + size - start;
 429         if (i && i != size && !(flags & FELF_READ_ONLY))
 430                 memmove(buf, start, i);
 431         return i;
 432 }
 433
 434 /** Return the hex characters of the lower 4 bits. */
 435 #define hex(a) (hexchar[(a) & 15])
 436
 437 static void write_size_header(char *buf, int n)
 438 {
 439         static char hexchar[] = "0123456789abcdef";
 440
 441         buf[0] = hex(n >> 12);
 442         buf[1] = hex(n >> 8);
 443         buf[2] = hex(n >> 4);
 444         buf[3] = hex(n);
 445         buf[4] = ' ';
 446 }
 447
 448 /**
 449  * Read a four-byte hex-number and return its value.
 450  *
 451  * Each status item sent by para_server is prefixed with such a hex number in
 452  * ASCII which describes the size of the status item.
 453  *
 454  * \param buf The buffer which must be at least four bytes long.
 455  *
 456  * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
 457  * buffer did not contain only hex digits.
 458  */
 459 int read_size_header(const char *buf)
 460 {
 461         int i, len = 0;
 462
 463         for (i = 0; i < 4; i++) {
 464                 unsigned char c = buf[i];
 465                 len <<= 4;
 466                 if (c >= '0' && c <= '9') {
 467                         len += c - '0';
 468                         continue;
 469                 }
 470                 if (c >= 'a' && c <= 'f') {
 471                         len += c - 'a' + 10;
 472                         continue;
 473                 }
 474                 return -E_SIZE_PREFIX;
 475         }
 476         if (buf[4] != ' ')
 477                 return -E_SIZE_PREFIX;
 478         return len;
 479 }
 480
 481 /**
 482  * Safely print into a buffer at a given offset.
 483  *
 484  * \param b Determines the buffer, its size, and the offset.
 485  * \param fmt The format string.
 486  *
 487  * This function prints into the buffer given by \a b at the offset which is
 488  * also given by \a b. If there is not enough space to hold the result, the
 489  * buffer size is doubled until the underlying call to vsnprintf() succeeds
 490  * or the size of the buffer exceeds the maximal size specified in \a b.
 491  *
 492  * In the latter case the unmodified \a buf and \a offset values as well as the
 493  * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
 494  * If this function succeeds, i.e. returns a non-negative value, the offset of
 495  * \a b is reset to zero and the given data is written to the beginning of the
 496  * buffer. If \a max_size_handler() returns a negative value, this value is
 497  * returned by \a para_printf().
 498  *
 499  * Upon return, the offset of \a b is adjusted accordingly so that subsequent
 500  * calls to this function append data to what is already contained in the
 501  * buffer.
 502  *
 503  * It's OK to call this function with \p b->buf being \p NULL. In this case, an
 504  * initial buffer is allocated.
 505  *
 506  * \return The number of bytes printed into the buffer (not including the
 507  * terminating \p NULL byte) on success, negative on errors. If there is no
 508  * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
 509  * fails.
 510  *
 511  * \sa make_message(), vsnprintf(3).
 512  */
 513 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
 514 {
 515         int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
 516
 517         if (!b->buf) {
 518                 b->buf = para_malloc(128);
 519                 b->size = 128;
 520                 b->offset = 0;
 521         }
 522         while (1) {
 523                 char *p = b->buf + b->offset;
 524                 size_t size = b->size - b->offset;
 525                 va_list ap;
 526
 527                 if (size > sz_off) {
 528                         va_start(ap, fmt);
 529                         ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
 530                         va_end(ap);
 531                         if (ret > -1 && ret < size - sz_off) { /* success */
 532                                 b->offset += ret + sz_off;
 533                                 if (sz_off)
 534                                         write_size_header(p, ret);
 535                                 return ret + sz_off;
 536                         }
 537                 }
 538                 /* check if we may grow the buffer */
 539                 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
 540                         /* try again with more space */
 541                         b->size *= 2;
 542                         b->buf = para_realloc(b->buf, b->size);
 543                         continue;
 544                 }
 545                 /* can't grow buffer */
 546                 if (!b->offset || !b->max_size_handler) /* message too large */
 547                         return -ERRNO_TO_PARA_ERROR(ENOSPC);
 548                 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
 549                 if (ret < 0)
 550                         return ret;
 551                 b->offset = 0;
 552         }
 553 }
 554
 555 /** \cond llong_minmax */
 556 /* LLONG_MAX and LLONG_MIN might not be defined. */
 557 #ifndef LLONG_MAX
 558 #define LLONG_MAX 9223372036854775807LL
 559 #endif
 560 #ifndef LLONG_MIN
 561 #define LLONG_MIN (-LLONG_MAX - 1LL)
 562 #endif
 563 /** \endcond llong_minmax */
 564
 565 /**
 566  * Convert a string to a 64-bit signed integer value.
 567  *
 568  * \param str The string to be converted.
 569  * \param value Result pointer.
 570  *
 571  * \return Standard.
 572  *
 573  * \sa para_atoi32(), strtol(3), atoi(3).
 574  */
 575 int para_atoi64(const char *str, int64_t *value)
 576 {
 577         char *endptr;
 578         long long tmp;
 579
 580         errno = 0; /* To distinguish success/failure after call */
 581         tmp = strtoll(str, &endptr, 10);
 582         if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
 583                 return -E_ATOI_OVERFLOW;
 584         if (errno != 0 && tmp == 0) /* other error */
 585                 return -E_STRTOLL;
 586         if (endptr == str)
 587                 return -E_ATOI_NO_DIGITS;
 588         if (*endptr != '\0') /* Further characters after number */
 589                 return -E_ATOI_JUNK_AT_END;
 590         *value = tmp;
 591         return 1;
 592 }
 593
 594 /**
 595  * Convert a string to a 32-bit signed integer value.
 596  *
 597  * \param str The string to be converted.
 598  * \param value Result pointer.
 599  *
 600  * \return Standard.
 601  *
 602  * \sa para_atoi64().
 603 */
 604 int para_atoi32(const char *str, int32_t *value)
 605 {
 606         int64_t tmp;
 607         int ret;
 608         const int32_t max = 2147483647;
 609
 610         ret = para_atoi64(str, &tmp);
 611         if (ret < 0)
 612                 return ret;
 613         if (tmp > max || tmp < -max - 1)
 614                 return -E_ATOI_OVERFLOW;
 615         *value = tmp;
 616         return 1;
 617 }
 618
 619 static inline int loglevel_equal(const char *arg, const char * const ll)
 620 {
 621         return !strncasecmp(arg, ll, strlen(ll));
 622 }
 623
 624 /**
 625  * Compute the loglevel number from its name.
 626  *
 627  * \param txt The name of the loglevel (debug, info, ...).
 628  *
 629  * \return The numeric representation of the loglevel name.
 630  */
 631 int get_loglevel_by_name(const char *txt)
 632 {
 633         if (loglevel_equal(txt, "debug"))
 634                 return LL_DEBUG;
 635         if (loglevel_equal(txt, "info"))
 636                 return LL_INFO;
 637         if (loglevel_equal(txt, "notice"))
 638                 return LL_NOTICE;
 639         if (loglevel_equal(txt, "warning"))
 640                 return LL_WARNING;
 641         if (loglevel_equal(txt, "error"))
 642                 return LL_ERROR;
 643         if (loglevel_equal(txt, "crit"))
 644                 return LL_CRIT;
 645         if (loglevel_equal(txt, "emerg"))
 646                 return LL_EMERG;
 647         return -1;
 648 }
 649
 650 static int get_next_word(const char *buf, const char *delim, char **word)
 651 {
 652         enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
 653                 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
 654         const char *in;
 655         char *out;
 656         int ret, state = 0;
 657
 658         out = para_malloc(strlen(buf) + 1);
 659         *out = '\0';
 660         *word = out;
 661         for (in = buf; *in; in++) {
 662                 const char *p;
 663
 664                 switch (*in) {
 665                 case '\\':
 666                         if (state & LSF_BACKSLASH) /* \\ */
 667                                 goto copy_char;
 668                         state |= LSF_BACKSLASH;
 669                         state |= LSF_HAVE_WORD;
 670                         continue;
 671                 case 'n':
 672                 case 't':
 673                         if (state & LSF_BACKSLASH) { /* \n or \t */
 674                                 *out++ = (*in == 'n')? '\n' : '\t';
 675                                 state &= ~LSF_BACKSLASH;
 676                                 continue;
 677                         }
 678                         goto copy_char;
 679                 case '"':
 680                         if (state & LSF_BACKSLASH) /* \" */
 681                                 goto copy_char;
 682                         if (state & LSF_SINGLE_QUOTE) /* '" */
 683                                 goto copy_char;
 684                         if (state & LSF_DOUBLE_QUOTE) {
 685                                 state &= ~LSF_DOUBLE_QUOTE;
 686                                 continue;
 687                         }
 688                         state |= LSF_HAVE_WORD;
 689                         state |= LSF_DOUBLE_QUOTE;
 690                         continue;
 691                 case '\'':
 692                         if (state & LSF_BACKSLASH) /* \' */
 693                                 goto copy_char;
 694                         if (state & LSF_DOUBLE_QUOTE) /* "' */
 695                                 goto copy_char;
 696                         if (state & LSF_SINGLE_QUOTE) {
 697                                 state &= ~LSF_SINGLE_QUOTE;
 698                                 continue;
 699                         }
 700                         state |= LSF_HAVE_WORD;
 701                         state |= LSF_SINGLE_QUOTE;
 702                         continue;
 703                 }
 704                 for (p = delim; *p; p++) {
 705                         if (*in != *p)
 706                                 continue;
 707                         if (state & LSF_BACKSLASH)
 708                                 goto copy_char;
 709                         if (state & LSF_SINGLE_QUOTE)
 710                                 goto copy_char;
 711                         if (state & LSF_DOUBLE_QUOTE)
 712                                 goto copy_char;
 713                         if (state & LSF_HAVE_WORD)
 714                                 goto success;
 715                         break;
 716                 }
 717                 if (*p) /* ignore delimiter at the beginning */
 718                         continue;
 719 copy_char:
 720                 state |= LSF_HAVE_WORD;
 721                 *out++ = *in;
 722                 state &= ~LSF_BACKSLASH;
 723         }
 724         ret = 0;
 725         if (!(state & LSF_HAVE_WORD))
 726                 goto out;
 727         ret = -ERRNO_TO_PARA_ERROR(EINVAL);
 728         if (state & LSF_BACKSLASH) {
 729                 PARA_ERROR_LOG("trailing backslash\n");
 730                 goto out;
 731         }
 732         if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
 733                 PARA_ERROR_LOG("unmatched quote character\n");
 734                 goto out;
 735         }
 736 success:
 737         *out = '\0';
 738         return in - buf;
 739 out:
 740         free(*word);
 741         *word = NULL;
 742         return ret;
 743 }
 744
 745 /**
 746  * Get the number of the word the cursor is on.
 747  *
 748  * \param buf The zero-terminated line buffer.
 749  * \param delim Characters that separate words.
 750  * \param point The cursor position.
 751  *
 752  * \return Zero-based word number.
 753  */
 754 int compute_word_num(const char *buf, const char *delim, int point)
 755 {
 756         int ret, num_words;
 757         const char *p;
 758         char *word;
 759
 760         for (p = buf, num_words = 0; ; p += ret, num_words++) {
 761                 ret = get_next_word(p, delim, &word);
 762                 if (ret <= 0)
 763                         break;
 764                 free(word);
 765                 if (p + ret >= buf + point)
 766                         break;
 767         }
 768         return num_words;
 769 }
 770
 771 /**
 772  * Free an array of words created by create_argv() or create_shifted_argv().
 773  *
 774  * \param argv A pointer previously obtained by \ref create_argv().
 775  */
 776 void free_argv(char **argv)
 777 {
 778         int i;
 779
 780         if (!argv)
 781                 return;
 782         for (i = 0; argv[i]; i++)
 783                 free(argv[i]);
 784         free(argv);
 785 }
 786
 787 static int create_argv_offset(int offset, const char *buf, const char *delim,
 788                 char ***result)
 789 {
 790         char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
 791         const char *p;
 792         int i, ret;
 793
 794         for (i = 0; i < offset; i++)
 795                 argv[i] = NULL;
 796         for (p = buf; p && *p; p += ret, i++) {
 797                 ret = get_next_word(p, delim, &word);
 798                 if (ret < 0)
 799                         goto err;
 800                 if (!ret)
 801                         break;
 802                 argv = para_realloc(argv, (i + 2) * sizeof(char*));
 803                 argv[i] = word;
 804         }
 805         argv[i] = NULL;
 806         *result = argv;
 807         return i;
 808 err:
 809         while (i > 0)
 810                 free(argv[--i]);
 811         free(argv);
 812         *result = NULL;
 813         return ret;
 814 }
 815
 816 /**
 817  * Split a buffer into words.
 818  *
 819  * This parser honors single and double quotes, backslash-escaped characters
 820  * and special characters like \p \\n. The result contains pointers to copies
 821  * of the words contained in \a buf and has to be freed by using \ref
 822  * free_argv().
 823  *
 824  * \param buf The buffer to be split.
 825  * \param delim Each character in this string is treated as a separator.
 826  * \param result The array of words is returned here.
 827  *
 828  * \return Number of words in \a buf, negative on errors.
 829  */
 830 int create_argv(const char *buf, const char *delim, char ***result)
 831 {
 832         return create_argv_offset(0, buf, delim, result);
 833 }
 834
 835 /**
 836  * Split a buffer into words, offset one.
 837  *
 838  * This is similar to \ref create_argv() but the returned array is one element
 839  * larger, words start at index one and element zero is initialized to \p NULL.
 840  * Callers must set element zero to a non-NULL value before calling free_argv()
 841  * on the returned array to avoid a memory leak.
 842  *
 843  * \param buf See \ref create_argv().
 844  * \param delim See \ref create_argv().
 845  * \param result See \ref create_argv().
 846  *
 847  * \return Number of words plus one on success, negative on errors.
 848  */
 849 int create_shifted_argv(const char *buf, const char *delim, char ***result)
 850 {
 851         return create_argv_offset(1, buf, delim, result);
 852 }
 853
 854 /**
 855  * Find out if the given string is contained in the arg vector.
 856  *
 857  * \param arg The string to look for.
 858  * \param argv The array to search.
 859  *
 860  * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
 861  * arg was not found in \a argv.
 862  */
 863 int find_arg(const char *arg, char **argv)
 864 {
 865         int i;
 866
 867         if (!argv)
 868                 return -E_ARG_NOT_FOUND;
 869         for (i = 0; argv[i]; i++)
 870                 if (strcmp(arg, argv[i]) == 0)
 871                         return i;
 872         return -E_ARG_NOT_FOUND;
 873 }
 874
 875 /**
 876  * Compile a regular expression.
 877  *
 878  * This simple wrapper calls regcomp() and logs a message on errors.
 879  *
 880  * \param preg See regcomp(3).
 881  * \param regex See regcomp(3).
 882  * \param cflags See regcomp(3).
 883  *
 884  * \return Standard.
 885  */
 886 int para_regcomp(regex_t *preg, const char *regex, int cflags)
 887 {
 888         char *buf;
 889         size_t size;
 890         int ret = regcomp(preg, regex, cflags);
 891
 892         if (ret == 0)
 893                 return 1;
 894         size = regerror(ret, preg, NULL, 0);
 895         buf = para_malloc(size);
 896         regerror(ret, preg, buf, size);
 897         PARA_ERROR_LOG("%s\n", buf);
 898         free(buf);
 899         return -E_REGEX;
 900 }
 901
 902 /**
 903  * strdup() for not necessarily zero-terminated strings.
 904  *
 905  * \param src The source buffer.
 906  * \param len The number of bytes to be copied.
 907  *
 908  * \return A 0-terminated buffer of length \a len + 1.
 909  *
 910  * This is similar to strndup(), which is a GNU extension. However, one
 911  * difference is that strndup() returns \p NULL if insufficient memory was
 912  * available while this function aborts in this case.
 913  *
 914  * \sa strdup(), \ref para_strdup().
 915  */
 916 char *safe_strdup(const char *src, size_t len)
 917 {
 918         char *p;
 919
 920         assert(len < (size_t)-1);
 921         p = para_malloc(len + 1);
 922         if (len > 0)
 923                 memcpy(p, src, len);
 924         p[len] = '\0';
 925         return p;
 926 }
 927
 928 /**
 929  * Copy the value of a key=value pair.
 930  *
 931  * This checks whether the given buffer starts with "key=", ignoring case. If
 932  * yes, a copy of the value is returned. The source buffer may not be
 933  * zero-terminated.
 934  *
 935  * \param src The source buffer.
 936  * \param len The number of bytes of the tag.
 937  * \param key Only copy if it is the value of this key.
 938  *
 939  * \return A zero-terminated buffer, or \p NULL if the key was
 940  * not of the given type.
 941  */
 942 char *key_value_copy(const char *src, size_t len, const char *key)
 943 {
 944         int keylen = strlen(key);
 945
 946         if (len <= keylen)
 947                 return NULL;
 948         if (strncasecmp(src, key, keylen))
 949                 return NULL;
 950         if (src[keylen] != '=')
 951                 return NULL;
 952         return safe_strdup(src + keylen + 1, len - keylen - 1);
 953 }
 954
 955 static bool utf8_mode(void)
 956 {
 957         static bool initialized, have_utf8;
 958
 959         if (!initialized) {
 960                 char *info = nl_langinfo(CODESET);
 961                 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
 962                 initialized = true;
 963                 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
 964                         have_utf8? "" : "not ");
 965         }
 966         return have_utf8;
 967 }
 968
 969 /*
 970  * glibc's wcswidth returns -1 if the string contains a tab character, which
 971  * makes the function next to useless. The two functions below are taken from
 972  * mutt.
 973  */
 974
 975 #define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0)
 976
 977 static int mutt_wcwidth(wchar_t wc, size_t pos)
 978 {
 979         int n;
 980
 981         if (wc == 0x09) /* tab */
 982                 return (pos | 7) + 1 - pos;
 983         n = wcwidth(wc);
 984         if (IsWPrint(wc) && n > 0)
 985                 return n;
 986         if (!(wc & ~0x7f))
 987                 return 2;
 988         if (!(wc & ~0xffff))
 989                 return 6;
 990         return 10;
 991 }
 992
 993 static size_t mutt_wcswidth(const wchar_t *s, size_t n)
 994 {
 995         size_t w = 0;
 996
 997         while (n--)
 998                 w += mutt_wcwidth(*s++, w);
 999         return w;
1000 }
1001
1002 /**
1003  * Skip a given number of cells at the beginning of a string.
1004  *
1005  * \param s The input string.
1006  * \param cells_to_skip Desired number of cells that should be skipped.
1007  * \param bytes_to_skip Result.
1008  *
1009  * This function computes how many input bytes must be skipped to advance a
1010  * string by the given width. If the current character encoding is not UTF-8,
1011  * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
1012  * \a s is treated as a multibyte string and on successful return, \a s +
1013  * bytes_to_skip points to the start of a multibyte string such that the total
1014  * width of the multibyte characters that are skipped by advancing \a s that
1015  * many bytes equals at least \a cells_to_skip.
1016  *
1017  * \return Standard.
1018  */
1019 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
1020 {
1021         wchar_t wc;
1022         mbstate_t ps;
1023         size_t n, bytes_parsed, cells_skipped;
1024
1025         *bytes_to_skip = 0;
1026         if (cells_to_skip == 0)
1027                 return 0;
1028         if (!utf8_mode()) {
1029                 *bytes_to_skip = cells_to_skip;
1030                 return 0;
1031         }
1032         bytes_parsed = cells_skipped = 0;
1033         memset(&ps, 0, sizeof(ps));
1034         n = strlen(s);
1035         while (cells_to_skip > cells_skipped) {
1036                 size_t mbret;
1037
1038                 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
1039                 assert(mbret != 0);
1040                 if (mbret == (size_t)-1 || mbret == (size_t)-2)
1041                         return -ERRNO_TO_PARA_ERROR(EILSEQ);
1042                 bytes_parsed += mbret;
1043                 cells_skipped += mutt_wcwidth(wc, cells_skipped);
1044         }
1045         *bytes_to_skip = bytes_parsed;
1046         return 1;
1047 }
1048
1049 /**
1050  * Compute the width of an UTF-8 string.
1051  *
1052  * \param s The string.
1053  * \param result The width of \a s is returned here.
1054  *
1055  * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1056  * Otherwise \a s is treated as an UTF-8 string and its display width is
1057  * computed. Note that this function may fail if the underlying call to
1058  * mbsrtowcs(3) fails, so the caller must check the return value.
1059  *
1060  * \sa nl_langinfo(3), wcswidth(3).
1061  *
1062  * \return Standard.
1063  */
1064 __must_check int strwidth(const char *s, size_t *result)
1065 {
1066         const char *src = s;
1067         mbstate_t state;
1068         static wchar_t *dest;
1069         size_t num_wchars;
1070
1071         /*
1072          * Never call any log function here. This may result in an endless loop
1073          * as para_gui's para_log() calls this function.
1074          */
1075
1076         if (!utf8_mode()) {
1077                 *result = strlen(s);
1078                 return 0;
1079         }
1080         memset(&state, 0, sizeof(state));
1081         *result = 0;
1082         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1083         if (num_wchars == (size_t)-1)
1084                 return -ERRNO_TO_PARA_ERROR(errno);
1085         if (num_wchars == 0)
1086                 return 0;
1087         dest = para_malloc(num_wchars * sizeof(*dest));
1088         src = s;
1089         memset(&state, 0, sizeof(state));
1090         num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1091         assert(num_wchars > 0 && num_wchars != (size_t)-1);
1092         *result = mutt_wcswidth(dest, num_wchars);
1093         free(dest);
1094         return 1;
1095 }