string.c

   1 /* Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
   2
   3 /** \file string.c Memory allocation and string handling functions. */
   4
   5 #include "para.h"
   6
   7 #include <pwd.h>
   8 #include <sys/utsname.h> /* uname() */
   9 #include <regex.h>
  10 #include <langinfo.h>
  11 #include <wchar.h>
  12 #include <wctype.h>
  13
  14 #include "string.h"
  15 #include "error.h"
  16
  17 /**
  18  * Paraslash's version of realloc().
  19  *
  20  * \param p Pointer to the memory block, may be \p NULL.
  21  * \param size The desired new size.
  22  *
  23  * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
  24  * i.e. there is no need to check the return value in the caller.
  25  *
  26  * \return A pointer to newly allocated memory which is suitably aligned for
  27  * any kind of variable and may be different from \a p.
  28  *
  29  * \sa realloc(3).
  30  */
  31 __must_check void *para_realloc(void *p, size_t size)
  32 {
  33         /*
  34          * No need to check for NULL pointers: If p is NULL, the call
  35          * to realloc is equivalent to malloc(size)
  36          */
  37         assert(size);
  38         if (!(p = realloc(p, size))) {
  39                 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
  40                         size);
  41                 exit(EXIT_FAILURE);
  42         }
  43         return p;
  44 }
  45
  46 /**
  47  * Paraslash's version of malloc().
  48  *
  49  * \param size The desired new size.
  50  *
  51  * A wrapper for malloc(3) which exits on errors.
  52  *
  53  * \return A pointer to the allocated memory, which is suitably aligned for any
  54  * kind of variable.
  55  *
  56  * \sa malloc(3).
  57  */
  58 __must_check __malloc void *alloc(size_t size)
  59 {
  60         void *p;
  61
  62         assert(size);
  63         p = malloc(size);
  64         if (!p) {
  65                 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
  66                         size);
  67                 exit(EXIT_FAILURE);
  68         }
  69         return p;
  70 }
  71
  72 /**
  73  * Paraslash's version of calloc().
  74  *
  75  * \param size The desired new size.
  76  *
  77  * A wrapper for calloc(3) which exits on errors.
  78  *
  79  * \return A pointer to the allocated and zeroed-out memory, which is suitably
  80  * aligned for any kind of variable.
  81  *
  82  * \sa calloc(3)
  83  */
  84 __must_check __malloc void *para_calloc(size_t size)
  85 {
  86         void *ret = alloc(size);
  87
  88         memset(ret, 0, size);
  89         return ret;
  90 }
  91
  92 /**
  93  * Paraslash's version of strdup().
  94  *
  95  * \param s The string to be duplicated.
  96  *
  97  * A strdup(3)-like function which aborts if insufficient memory was available
  98  * to allocate the duplicated string, absolving the caller from the
  99  * responsibility to check for failure.
 100  *
 101  * \return A pointer to the duplicated string. Unlike strdup(3), the caller may
 102  * pass NULL, in which case the function returns a pointer to an empty string.
 103  * Regardless of whether or not NULL was passed, the returned string is
 104  * allocated on the heap and has to be freed by the caller.
 105  *
 106  * \sa strdup(3).
 107  */
 108 __must_check __malloc char *para_strdup(const char *s)
 109 {
 110         char *dupped_string = strdup(s? s: "");
 111
 112         assert(dupped_string);
 113         return dupped_string;
 114 }
 115
 116 /**
 117  * Print a formatted message to a dynamically allocated string.
 118  *
 119  * \param result The formatted string is returned here.
 120  * \param fmt The format string.
 121  * \param ap Initialized list of arguments.
 122  *
 123  * This function is similar to vasprintf(), a GNU extension which is not in C
 124  * or POSIX. It allocates a string large enough to hold the output including
 125  * the terminating null byte. The allocated string is returned via the first
 126  * argument and must be freed by the caller. However, unlike vasprintf(), this
 127  * function calls exit() if insufficient memory is available, while vasprintf()
 128  * returns -1 in this case.
 129  *
 130  * \return Number of bytes written, not including the terminating \p NULL
 131  * character.
 132  *
 133  * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
 134  */
 135 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
 136 {
 137         int ret;
 138         size_t size = 150;
 139         va_list aq;
 140
 141         *result = alloc(size + 1);
 142         va_copy(aq, ap);
 143         ret = vsnprintf(*result, size, fmt, aq);
 144         va_end(aq);
 145         assert(ret >= 0);
 146         if (ret < size) /* OK */
 147                 return ret;
 148         size = ret + 1;
 149         *result = para_realloc(*result, size);
 150         va_copy(aq, ap);
 151         ret = vsnprintf(*result, size, fmt, aq);
 152         va_end(aq);
 153         assert(ret >= 0 && ret < size);
 154         return ret;
 155 }
 156
 157 /**
 158  * Print to a dynamically allocated string, variable number of arguments.
 159  *
 160  * \param result See \ref xvasprintf().
 161  * \param fmt Usual format string.
 162  *
 163  * \return The return value of the underlying call to \ref xvasprintf().
 164  *
 165  * \sa \ref xvasprintf() and the references mentioned there.
 166  */
 167 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
 168 {
 169         va_list ap;
 170         unsigned ret;
 171
 172         va_start(ap, fmt);
 173         ret = xvasprintf(result, fmt, ap);
 174         va_end(ap);
 175         return ret;
 176 }
 177
 178 /**
 179  * Allocate a sufficiently large string and print into it.
 180  *
 181  * \param fmt A usual format string.
 182  *
 183  * Produce output according to \p fmt. No artificial bound on the length of the
 184  * resulting string is imposed.
 185  *
 186  * \return This function either returns a pointer to a string that must be
 187  * freed by the caller or aborts without returning.
 188  *
 189  * \sa printf(3), \ref xasprintf().
 190  */
 191 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
 192 {
 193         char *msg;
 194         va_list ap;
 195
 196         va_start(ap, fmt);
 197         xvasprintf(&msg, fmt, ap);
 198         va_end(ap);
 199         return msg;
 200 }
 201
 202 /**
 203  * Free the content of a pointer and set it to NULL.
 204  *
 205  * \param arg A pointer to the pointer whose content should be freed.
 206  *
 207  * If arg is NULL, the function returns immediately. Otherwise it frees the
 208  * memory pointed to by *arg and sets *arg to NULL. Hence callers have to pass
 209  * the *address* of the pointer variable that points to the memory which should
 210  * be freed.
 211  */
 212 void freep(void *arg)
 213 {
 214         if (arg) {
 215                 void **ptr = arg;
 216                 free(*ptr);
 217                 *ptr = NULL;
 218         }
 219 }
 220
 221 /**
 222  * Paraslash's version of strcat().
 223  *
 224  * \param a String to be appended to.
 225  * \param b String to append.
 226  *
 227  * Append \p b to \p a.
 228  *
 229  * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
 230  * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
 231  * return \a a without making a copy of \a a.  Otherwise, construct the
 232  * concatenation \a c, free \a a (but not \a b) and return \a c.
 233  *
 234  * \sa strcat(3).
 235  */
 236 __must_check __malloc char *para_strcat(char *a, const char *b)
 237 {
 238         char *tmp;
 239
 240         if (!a)
 241                 return para_strdup(b);
 242         if (!b)
 243                 return a;
 244         tmp = make_message("%s%s", a, b);
 245         free(a);
 246         return tmp;
 247 }
 248
 249 /**
 250  * Get the logname of the current user.
 251  *
 252  * \return A dynamically allocated string that must be freed by the caller. On
 253  * errors, the string "unknown_user" is returned, i.e. this function never
 254  * returns \p NULL.
 255  *
 256  * \sa getpwuid(3).
 257  */
 258 __must_check __malloc char *para_logname(void)
 259 {
 260         struct passwd *pw = getpwuid(getuid());
 261         return para_strdup(pw? pw->pw_name : "unknown_user");
 262 }
 263
 264 /**
 265  * Get the home directory of the current user.
 266  *
 267  * \return A dynamically allocated string that must be freed by the caller. If
 268  * the home directory could not be found, this function returns "/tmp".
 269  */
 270 __must_check __malloc char *para_homedir(void)
 271 {
 272         struct passwd *pw = getpwuid(getuid());
 273         return para_strdup(pw? pw->pw_dir : "/tmp");
 274 }
 275
 276 /**
 277  * Get the own hostname.
 278  *
 279  * \return A dynamically allocated string containing the hostname.
 280  *
 281  * \sa uname(2).
 282  */
 283 __malloc char *para_hostname(void)
 284 {
 285         struct utsname u;
 286
 287         uname(&u);
 288         return para_strdup(u.nodename);
 289 }
 290
 291 /**
 292  * Call a custom function for each complete line.
 293  *
 294  * \param flags Any combination of flags defined in \ref for_each_line_flags.
 295  * \param buf The buffer containing data separated by newlines.
 296  * \param size The number of bytes in \a buf.
 297  * \param line_handler The custom function.
 298  * \param private_data Pointer passed to \a line_handler.
 299  *
 300  * For each complete line in \p buf, \p line_handler is called. The first
 301  * argument to \p line_handler is (a copy of) the current line, and \p
 302  * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
 303  * flag is unset, a pointer into \a buf is passed to the line handler,
 304  * otherwise a pointer to a copy of the current line is passed instead. This
 305  * copy is freed immediately after the line handler returns.
 306  *
 307  * The function returns if \p line_handler returns a negative value or no more
 308  * lines are in the buffer.  The rest of the buffer (last chunk containing an
 309  * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
 310  * unset.
 311  *
 312  * \return On success this function returns the number of bytes not handled to
 313  * \p line_handler. The only possible error is a negative return value from the
 314  * line handler. In this case processing stops and the return value of the line
 315  * handler is returned to indicate failure.
 316  *
 317  * \sa \ref for_each_line_flags.
 318  */
 319 int for_each_line(unsigned flags, char *buf, size_t size,
 320                 line_handler_t *line_handler, void *private_data)
 321 {
 322         char *start = buf, *end;
 323         int ret, i, num_lines = 0;
 324
 325 //      PARA_NOTICE_LOG("buf: %s\n", buf);
 326         while (start < buf + size) {
 327                 char *next_null;
 328                 char *next_cr;
 329
 330                 next_cr = memchr(start, '\n', buf + size - start);
 331                 next_null = memchr(start, '\0', next_cr?
 332                         next_cr - start : buf + size - start);
 333                 if (!next_cr && !next_null)
 334                         break;
 335                 if (next_null)
 336                         end = next_null;
 337                 else
 338                         end = next_cr;
 339                 num_lines++;
 340                 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
 341                         if (flags & FELF_READ_ONLY) {
 342                                 size_t s = end - start;
 343                                 char *b = alloc(s + 1);
 344                                 memcpy(b, start, s);
 345                                 b[s] = '\0';
 346                                 ret = line_handler(b, private_data);
 347                                 free(b);
 348                         } else {
 349                                 *end = '\0';
 350                                 ret = line_handler(start, private_data);
 351                         }
 352                         if (ret < 0)
 353                                 return ret;
 354                 }
 355                 start = ++end;
 356         }
 357         i = buf + size - start;
 358         if (i && i != size && !(flags & FELF_READ_ONLY))
 359                 memmove(buf, start, i);
 360         return i;
 361 }
 362
 363 /** Return the hex characters of the lower 4 bits. */
 364 #define hex(a) (hexchar[(a) & 15])
 365
 366 static void write_size_header(char *buf, int n)
 367 {
 368         static char hexchar[] = "0123456789abcdef";
 369
 370         buf[0] = hex(n >> 12);
 371         buf[1] = hex(n >> 8);
 372         buf[2] = hex(n >> 4);
 373         buf[3] = hex(n);
 374         buf[4] = ' ';
 375 }
 376
 377 /**
 378  * Read a four-byte hex-number and return its value.
 379  *
 380  * Each status item sent by para_server is prefixed with such a hex number in
 381  * ASCII which describes the size of the status item.
 382  *
 383  * \param buf The buffer which must be at least four bytes long.
 384  *
 385  * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
 386  * buffer did not contain only hex digits.
 387  */
 388 int read_size_header(const char *buf)
 389 {
 390         int i, len = 0;
 391
 392         for (i = 0; i < 4; i++) {
 393                 unsigned char c = buf[i];
 394                 len <<= 4;
 395                 if (c >= '0' && c <= '9') {
 396                         len += c - '0';
 397                         continue;
 398                 }
 399                 if (c >= 'a' && c <= 'f') {
 400                         len += c - 'a' + 10;
 401                         continue;
 402                 }
 403                 return -E_SIZE_PREFIX;
 404         }
 405         if (buf[4] != ' ')
 406                 return -E_SIZE_PREFIX;
 407         return len;
 408 }
 409
 410 /**
 411  * Safely print into a buffer at a given offset.
 412  *
 413  * \param b Determines the buffer, its size, and the offset.
 414  * \param fmt The format string.
 415  *
 416  * This function prints into the buffer given by \a b at the offset which is
 417  * also given by \a b. If there is not enough space to hold the result, the
 418  * buffer size is doubled until the underlying call to vsnprintf() succeeds
 419  * or the size of the buffer exceeds the maximal size specified in \a b.
 420  *
 421  * In the latter case the unmodified \a buf and \a offset values as well as the
 422  * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
 423  * If this function succeeds, i.e. returns a non-negative value, the offset of
 424  * \a b is reset to zero and the given data is written to the beginning of the
 425  * buffer. If \a max_size_handler() returns a negative value, this value is
 426  * returned by \a para_printf().
 427  *
 428  * Upon return, the offset of \a b is adjusted accordingly so that subsequent
 429  * calls to this function append data to what is already contained in the
 430  * buffer.
 431  *
 432  * It's OK to call this function with \p b->buf being \p NULL. In this case, an
 433  * initial buffer is allocated.
 434  *
 435  * \return The number of bytes printed into the buffer (not including the
 436  * terminating \p NULL byte) on success, negative on errors. If there is no
 437  * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
 438  * fails.
 439  *
 440  * \sa make_message(), vsnprintf(3).
 441  */
 442 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
 443 {
 444         int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
 445
 446         if (!b->buf) {
 447                 b->buf = alloc(128);
 448                 b->size = 128;
 449                 b->offset = 0;
 450         }
 451         while (1) {
 452                 char *p = b->buf + b->offset;
 453                 size_t size = b->size - b->offset;
 454                 va_list ap;
 455
 456                 if (size > sz_off) {
 457                         va_start(ap, fmt);
 458                         ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
 459                         va_end(ap);
 460                         if (ret > -1 && ret < size - sz_off) { /* success */
 461                                 b->offset += ret + sz_off;
 462                                 if (sz_off)
 463                                         write_size_header(p, ret);
 464                                 return ret + sz_off;
 465                         }
 466                 }
 467                 /* check if we may grow the buffer */
 468                 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
 469                         /* try again with more space */
 470                         b->size *= 2;
 471                         b->buf = para_realloc(b->buf, b->size);
 472                         continue;
 473                 }
 474                 /* can't grow buffer */
 475                 if (!b->offset || !b->max_size_handler) /* message too large */
 476                         return -ERRNO_TO_PARA_ERROR(ENOSPC);
 477                 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
 478                 if (ret < 0)
 479                         return ret;
 480                 b->offset = 0;
 481         }
 482 }
 483
 484 /** \cond llong_minmax */
 485 /* LLONG_MAX and LLONG_MIN might not be defined. */
 486 #ifndef LLONG_MAX
 487 #define LLONG_MAX 9223372036854775807LL
 488 #endif
 489 #ifndef LLONG_MIN
 490 #define LLONG_MIN (-LLONG_MAX - 1LL)
 491 #endif
 492 /** \endcond llong_minmax */
 493
 494 /**
 495  * Convert a string to a 64-bit signed integer value.
 496  *
 497  * \param str The string to be converted.
 498  * \param value Result pointer.
 499  *
 500  * \return Standard.
 501  *
 502  * \sa \ref para_atoi32(), strtol(3), atoi(3).
 503  */
 504 int para_atoi64(const char *str, int64_t *value)
 505 {
 506         char *endptr;
 507         long long tmp;
 508
 509         errno = 0; /* To distinguish success/failure after call */
 510         tmp = strtoll(str, &endptr, 10);
 511         if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
 512                 return -E_ATOI_OVERFLOW;
 513         /*
 514          * If there were no digits at all, strtoll() stores the original value
 515          * of str in *endptr.
 516          */
 517         if (endptr == str)
 518                 return -E_ATOI_NO_DIGITS;
 519         /*
 520          * The implementation may also set errno and return 0 in case no
 521          * conversion was performed.
 522          */
 523         if (errno != 0 && tmp == 0)
 524                 return -E_ATOI_NO_DIGITS;
 525         if (*endptr != '\0') /* Further characters after number */
 526                 return -E_ATOI_JUNK_AT_END;
 527         *value = tmp;
 528         return 1;
 529 }
 530
 531 /**
 532  * Convert a string to a 32-bit signed integer value.
 533  *
 534  * \param str The string to be converted.
 535  * \param value Result pointer.
 536  *
 537  * \return Standard.
 538  *
 539  * \sa \ref para_atoi64().
 540 */
 541 int para_atoi32(const char *str, int32_t *value)
 542 {
 543         int64_t tmp;
 544         int ret;
 545         const int32_t max = 2147483647;
 546
 547         ret = para_atoi64(str, &tmp);
 548         if (ret < 0)
 549                 return ret;
 550         if (tmp > max || tmp < -max - 1)
 551                 return -E_ATOI_OVERFLOW;
 552         *value = tmp;
 553         return 1;
 554 }
 555
 556 static inline int loglevel_equal(const char *arg, const char * const ll)
 557 {
 558         return !strncasecmp(arg, ll, strlen(ll));
 559 }
 560
 561 /**
 562  * Compute the loglevel number from its name.
 563  *
 564  * \param txt The name of the loglevel (debug, info, ...).
 565  *
 566  * \return The numeric representation of the loglevel name.
 567  */
 568 int get_loglevel_by_name(const char *txt)
 569 {
 570         if (loglevel_equal(txt, "debug"))
 571                 return LL_DEBUG;
 572         if (loglevel_equal(txt, "info"))
 573                 return LL_INFO;
 574         if (loglevel_equal(txt, "notice"))
 575                 return LL_NOTICE;
 576         if (loglevel_equal(txt, "warning"))
 577                 return LL_WARNING;
 578         if (loglevel_equal(txt, "error"))
 579                 return LL_ERROR;
 580         if (loglevel_equal(txt, "crit"))
 581                 return LL_CRIT;
 582         if (loglevel_equal(txt, "emerg"))
 583                 return LL_EMERG;
 584         return -E_BAD_LL;
 585 }
 586
 587 static int get_next_word(const char *buf, const char *delim, char **word)
 588 {
 589         enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
 590                 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
 591         const char *in;
 592         char *out;
 593         int ret, state = 0;
 594
 595         out = alloc(strlen(buf) + 1);
 596         *out = '\0';
 597         *word = out;
 598         for (in = buf; *in; in++) {
 599                 const char *p;
 600
 601                 switch (*in) {
 602                 case '\\':
 603                         if (state & LSF_BACKSLASH) /* \\ */
 604                                 goto copy_char;
 605                         state |= LSF_BACKSLASH;
 606                         state |= LSF_HAVE_WORD;
 607                         continue;
 608                 case 'n':
 609                 case 't':
 610                         if (state & LSF_BACKSLASH) { /* \n or \t */
 611                                 *out++ = (*in == 'n')? '\n' : '\t';
 612                                 state &= ~LSF_BACKSLASH;
 613                                 continue;
 614                         }
 615                         goto copy_char;
 616                 case '"':
 617                         if (state & LSF_BACKSLASH) /* \" */
 618                                 goto copy_char;
 619                         if (state & LSF_SINGLE_QUOTE) /* '" */
 620                                 goto copy_char;
 621                         if (state & LSF_DOUBLE_QUOTE) {
 622                                 state &= ~LSF_DOUBLE_QUOTE;
 623                                 continue;
 624                         }
 625                         state |= LSF_HAVE_WORD;
 626                         state |= LSF_DOUBLE_QUOTE;
 627                         continue;
 628                 case '\'':
 629                         if (state & LSF_BACKSLASH) /* \' */
 630                                 goto copy_char;
 631                         if (state & LSF_DOUBLE_QUOTE) /* "' */
 632                                 goto copy_char;
 633                         if (state & LSF_SINGLE_QUOTE) {
 634                                 state &= ~LSF_SINGLE_QUOTE;
 635                                 continue;
 636                         }
 637                         state |= LSF_HAVE_WORD;
 638                         state |= LSF_SINGLE_QUOTE;
 639                         continue;
 640                 }
 641                 for (p = delim; *p; p++) {
 642                         if (*in != *p)
 643                                 continue;
 644                         if (state & LSF_BACKSLASH)
 645                                 goto copy_char;
 646                         if (state & LSF_SINGLE_QUOTE)
 647                                 goto copy_char;
 648                         if (state & LSF_DOUBLE_QUOTE)
 649                                 goto copy_char;
 650                         if (state & LSF_HAVE_WORD)
 651                                 goto success;
 652                         break;
 653                 }
 654                 if (*p) /* ignore delimiter at the beginning */
 655                         continue;
 656 copy_char:
 657                 state |= LSF_HAVE_WORD;
 658                 *out++ = *in;
 659                 state &= ~LSF_BACKSLASH;
 660         }
 661         ret = 0;
 662         if (!(state & LSF_HAVE_WORD))
 663                 goto out;
 664         ret = -ERRNO_TO_PARA_ERROR(EINVAL);
 665         if (state & LSF_BACKSLASH) {
 666                 PARA_ERROR_LOG("trailing backslash\n");
 667                 goto out;
 668         }
 669         if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
 670                 PARA_ERROR_LOG("unmatched quote character\n");
 671                 goto out;
 672         }
 673 success:
 674         *out = '\0';
 675         return in - buf;
 676 out:
 677         free(*word);
 678         *word = NULL;
 679         return ret;
 680 }
 681
 682 /**
 683  * Get the number of the word the cursor is on.
 684  *
 685  * \param buf The zero-terminated line buffer.
 686  * \param delim Characters that separate words.
 687  * \param point The cursor position.
 688  *
 689  * \return Zero-based word number.
 690  */
 691 int compute_word_num(const char *buf, const char *delim, int point)
 692 {
 693         int ret, num_words;
 694         const char *p;
 695         char *word;
 696
 697         for (p = buf, num_words = 0; ; p += ret, num_words++) {
 698                 ret = get_next_word(p, delim, &word);
 699                 if (ret <= 0)
 700                         break;
 701                 free(word);
 702                 if (p + ret >= buf + point)
 703                         break;
 704         }
 705         return num_words;
 706 }
 707
 708 /**
 709  * Free an array of words created by create_argv() or create_shifted_argv().
 710  *
 711  * \param argv A pointer previously obtained by \ref create_argv().
 712  */
 713 void free_argv(char **argv)
 714 {
 715         int i;
 716
 717         if (!argv)
 718                 return;
 719         for (i = 0; argv[i]; i++)
 720                 free(argv[i]);
 721         free(argv);
 722 }
 723
 724 static int create_argv_offset(int offset, const char *buf, const char *delim,
 725                 char ***result)
 726 {
 727         char *word, **argv = alloc((offset + 1) * sizeof(char *));
 728         const char *p;
 729         int i, ret;
 730
 731         for (i = 0; i < offset; i++)
 732                 argv[i] = NULL;
 733         for (p = buf; p && *p; p += ret, i++) {
 734                 ret = get_next_word(p, delim, &word);
 735                 if (ret < 0)
 736                         goto err;
 737                 if (!ret)
 738                         break;
 739                 argv = para_realloc(argv, (i + 2) * sizeof(char*));
 740                 argv[i] = word;
 741         }
 742         argv[i] = NULL;
 743         *result = argv;
 744         return i;
 745 err:
 746         while (i > 0)
 747                 free(argv[--i]);
 748         free(argv);
 749         *result = NULL;
 750         return ret;
 751 }
 752
 753 /**
 754  * Split a buffer into words.
 755  *
 756  * This parser honors single and double quotes, backslash-escaped characters
 757  * and special characters like \\n. The result contains pointers to copies of
 758  * the words contained in buf and has to be freed by using \ref free_argv().
 759  *
 760  * \param buf The buffer to be split.
 761  * \param delim Each character in this string is treated as a separator.
 762  * \param result The array of words is returned here.
 763  *
 764  * It's OK to pass NULL as the buffer argument. This is equivalent to passing
 765  * the empty string.
 766  *
 767  * \return Number of words in buf, negative on errors. The array returned
 768  * through the result pointer is NULL terminated.
 769  */
 770 int create_argv(const char *buf, const char *delim, char ***result)
 771 {
 772         return create_argv_offset(0, buf, delim, result);
 773 }
 774
 775 /**
 776  * Split a buffer into words, offset one.
 777  *
 778  * This is similar to \ref create_argv() but the returned array is one element
 779  * larger, words start at index one and element zero is initialized to \p NULL.
 780  * Callers must set element zero to a non-NULL value before calling free_argv()
 781  * on the returned array to avoid a memory leak.
 782  *
 783  * \param buf See \ref create_argv().
 784  * \param delim See \ref create_argv().
 785  * \param result See \ref create_argv().
 786  *
 787  * \return Number of words plus one on success, negative on errors.
 788  */
 789 int create_shifted_argv(const char *buf, const char *delim, char ***result)
 790 {
 791         return create_argv_offset(1, buf, delim, result);
 792 }
 793
 794 /**
 795  * Find out if the given string is contained in the arg vector.
 796  *
 797  * \param arg The string to look for.
 798  * \param argv The array to search.
 799  *
 800  * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
 801  * arg was not found in \a argv.
 802  */
 803 int find_arg(const char *arg, char **argv)
 804 {
 805         int i;
 806
 807         if (!argv)
 808                 return -E_ARG_NOT_FOUND;
 809         for (i = 0; argv[i]; i++)
 810                 if (strcmp(arg, argv[i]) == 0)
 811                         return i;
 812         return -E_ARG_NOT_FOUND;
 813 }
 814
 815 /**
 816  * Compile a regular expression.
 817  *
 818  * This simple wrapper calls regcomp() and logs a message on errors.
 819  *
 820  * \param preg See regcomp(3).
 821  * \param regex See regcomp(3).
 822  * \param cflags See regcomp(3).
 823  *
 824  * \return Standard.
 825  */
 826 int para_regcomp(regex_t *preg, const char *regex, int cflags)
 827 {
 828         char *buf;
 829         size_t size;
 830         int ret = regcomp(preg, regex, cflags);
 831
 832         if (ret == 0)
 833                 return 1;
 834         size = regerror(ret, preg, NULL, 0);
 835         buf = alloc(size);
 836         regerror(ret, preg, buf, size);
 837         PARA_ERROR_LOG("%s\n", buf);
 838         free(buf);
 839         return -E_REGEX;
 840 }
 841
 842 /**
 843  * strdup() for not necessarily zero-terminated strings.
 844  *
 845  * \param src The source buffer.
 846  * \param len The number of bytes to be copied.
 847  *
 848  * \return A 0-terminated buffer of length \a len + 1.
 849  *
 850  * This is similar to strndup(), which is a GNU extension. However, one
 851  * difference is that strndup() returns \p NULL if insufficient memory was
 852  * available while this function aborts in this case.
 853  *
 854  * \sa strdup(), \ref para_strdup().
 855  */
 856 char *safe_strdup(const char *src, size_t len)
 857 {
 858         char *p;
 859
 860         assert(len < (size_t)-1);
 861         p = alloc(len + 1);
 862         if (len > 0)
 863                 memcpy(p, src, len);
 864         p[len] = '\0';
 865         return p;
 866 }
 867
 868 /**
 869  * Copy the value of a key=value pair.
 870  *
 871  * This checks whether the given buffer starts with "key=", ignoring case. If
 872  * yes, a copy of the value is returned. The source buffer may not be
 873  * zero-terminated.
 874  *
 875  * \param src The source buffer.
 876  * \param len The number of bytes of the tag.
 877  * \param key Only copy if it is the value of this key.
 878  *
 879  * \return A zero-terminated buffer, or \p NULL if the key was
 880  * not of the given type.
 881  */
 882 char *key_value_copy(const char *src, size_t len, const char *key)
 883 {
 884         int keylen = strlen(key);
 885
 886         if (len <= keylen)
 887                 return NULL;
 888         if (strncasecmp(src, key, keylen))
 889                 return NULL;
 890         if (src[keylen] != '=')
 891                 return NULL;
 892         return safe_strdup(src + keylen + 1, len - keylen - 1);
 893 }
 894
 895 static bool utf8_mode(void)
 896 {
 897         static bool initialized, have_utf8;
 898
 899         if (!initialized) {
 900                 char *info = nl_langinfo(CODESET);
 901                 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
 902                 initialized = true;
 903                 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
 904                         have_utf8? "" : "not ");
 905         }
 906         return have_utf8;
 907 }
 908
 909 static int xwcwidth(wchar_t wc, size_t pos)
 910 {
 911         int n;
 912
 913         /* special-case for tab */
 914         if (wc == 0x09) /* tab */
 915                 return (pos | 7) + 1 - pos;
 916         n = wcwidth(wc);
 917         /* wcswidth() returns -1 for non-printable characters */
 918         return n >= 0? n : 1;
 919 }
 920
 921 static size_t xwcswidth(const wchar_t *s, size_t n)
 922 {
 923         size_t w = 0;
 924
 925         while (n--)
 926                 w += xwcwidth(*s++, w);
 927         return w;
 928 }
 929
 930 /**
 931  * Skip a given number of cells at the beginning of a string.
 932  *
 933  * \param s The input string.
 934  * \param cells_to_skip Desired number of cells that should be skipped.
 935  * \param bytes_to_skip Result.
 936  *
 937  * This function computes how many input bytes must be skipped to advance a
 938  * string by the given width. If the current character encoding is not UTF-8,
 939  * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
 940  * \a s is treated as a multibyte string and on successful return, \a s +
 941  * bytes_to_skip points to the start of a multibyte string such that the total
 942  * width of the multibyte characters that are skipped by advancing \a s that
 943  * many bytes equals at least \a cells_to_skip.
 944  *
 945  * \return Standard.
 946  */
 947 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
 948 {
 949         wchar_t wc;
 950         mbstate_t ps;
 951         size_t n, bytes_parsed, cells_skipped;
 952
 953         *bytes_to_skip = 0;
 954         if (cells_to_skip == 0)
 955                 return 0;
 956         if (!utf8_mode()) {
 957                 *bytes_to_skip = cells_to_skip;
 958                 return 0;
 959         }
 960         bytes_parsed = cells_skipped = 0;
 961         memset(&ps, 0, sizeof(ps));
 962         n = strlen(s);
 963         while (cells_to_skip > cells_skipped) {
 964                 size_t mbret;
 965
 966                 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
 967                 assert(mbret != 0);
 968                 if (mbret == (size_t)-1 || mbret == (size_t)-2)
 969                         return -ERRNO_TO_PARA_ERROR(EILSEQ);
 970                 bytes_parsed += mbret;
 971                 cells_skipped += xwcwidth(wc, cells_skipped);
 972         }
 973         *bytes_to_skip = bytes_parsed;
 974         return 1;
 975 }
 976
 977 /**
 978  * Compute the width of an UTF-8 string.
 979  *
 980  * \param s The string.
 981  * \param result The width of \a s is returned here.
 982  *
 983  * If not in UTF8-mode. this function is just a wrapper for strlen(3).
 984  * Otherwise \a s is treated as an UTF-8 string and its display width is
 985  * computed. Note that this function may fail if the underlying call to
 986  * mbsrtowcs(3) fails, so the caller must check the return value.
 987  *
 988  * \sa nl_langinfo(3), wcswidth(3).
 989  *
 990  * \return Standard.
 991  */
 992 __must_check int strwidth(const char *s, size_t *result)
 993 {
 994         const char *src = s;
 995         mbstate_t state;
 996         static wchar_t *dest;
 997         size_t num_wchars;
 998
 999         /*
1000          * Never call any log function here. This may result in an endless loop
1001          * as para_gui's para_log() calls this function.
1002          */
1003
1004         if (!utf8_mode()) {
1005                 *result = strlen(s);
1006                 return 0;
1007         }
1008         memset(&state, 0, sizeof(state));
1009         *result = 0;
1010         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1011         if (num_wchars == (size_t)-1)
1012                 return -ERRNO_TO_PARA_ERROR(errno);
1013         if (num_wchars == 0)
1014                 return 0;
1015         dest = alloc((num_wchars + 1) * sizeof(*dest));
1016         src = s;
1017         memset(&state, 0, sizeof(state));
1018         num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1019         assert(num_wchars > 0 && num_wchars != (size_t)-1);
1020         *result = xwcswidth(dest, num_wchars);
1021         free(dest);
1022         return 1;
1023 }
1024
1025 /**
1026  * Truncate and sanitize a (wide character) string.
1027  *
1028  * This replaces all non-printable characters by spaces and makes sure that the
1029  * modified string does not exceed the given maximal width.
1030  *
1031  * \param src The source string in multi-byte form.
1032  * \param max_width The maximal number of cells the result may occupy.
1033  * \param result Sanitized multi-byte string, must be freed by caller.
1034  * \param width The width of the sanitized string, always <= max_width.
1035  *
1036  * The function is wide-character aware but falls back to C strings for
1037  * non-UTF-8 locales.
1038  *
1039  * \return Standard. On success, *result points to a sanitized copy of the
1040  * given string. This copy was allocated with malloc() and should hence be
1041  * freed when the caller is no longer interested in the result.
1042  *
1043  * The function fails if the given string contains an invalid multibyte
1044  * sequence. In this case, *result is set to NULL, and *width to zero.
1045  */
1046 __must_check int sanitize_str(const char *src, size_t max_width,
1047                 char **result, size_t *width)
1048 {
1049         mbstate_t state;
1050         static wchar_t *wcs;
1051         size_t num_wchars, n;
1052
1053         if (!utf8_mode()) {
1054                 *result = para_strdup(src);
1055                 /* replace non-printable characters by spaces */
1056                 for (n = 0; n < max_width && src[n]; n++) {
1057                         if (!isprint((unsigned char)src[n]))
1058                                 (*result)[n] = ' ';
1059                 }
1060                 (*result)[n] = '\0';
1061                 *width = n;
1062                 return 0;
1063         }
1064         *result = NULL;
1065         *width = 0;
1066         memset(&state, 0, sizeof(state));
1067         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1068         if (num_wchars == (size_t)-1)
1069                 return -ERRNO_TO_PARA_ERROR(errno);
1070         wcs = alloc((num_wchars + 1) * sizeof(*wcs));
1071         memset(&state, 0, sizeof(state));
1072         num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1073         assert(num_wchars != (size_t)-1);
1074         for (n = 0; n < num_wchars && *width < max_width; n++) {
1075                 if (!iswprint(wcs[n]))
1076                         wcs[n] = L' ';
1077                 *width += xwcwidth(wcs[n], *width);
1078         }
1079         wcs[n] = L'\0';
1080         n = wcstombs(NULL, wcs, 0) + 1;
1081         *result = alloc(n);
1082         num_wchars = wcstombs(*result, wcs, n);
1083         assert(num_wchars != (size_t)-1);
1084         free(wcs);
1085         return 1;
1086 }