string.c

   1 /* Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
   2
   3 /** \file string.c Memory allocation and string handling functions. */
   4
   5 #include "para.h"
   6
   7 #include <pwd.h>
   8 #include <sys/utsname.h> /* uname() */
   9 #include <regex.h>
  10 #include <langinfo.h>
  11 #include <wchar.h>
  12 #include <wctype.h>
  13
  14 #include "string.h"
  15 #include "error.h"
  16
  17 /**
  18  * Reallocate an array, abort on failure or bugs.
  19  *
  20  * \param ptr Pointer to the memory block, may be NULL.
  21  * \param nmemb Number of elements.
  22  * \param size The size of one element in bytes.
  23  *
  24  * A wrapper for realloc(3) which aborts on invalid arguments or integer
  25  * overflow. The wrapper also terminates the current process on allocation
  26  * errors, so the caller does not need to check for failure.
  27  *
  28  * \return A pointer to newly allocated memory which is suitably aligned for
  29  * any kind of variable and may be different from ptr.
  30  *
  31  * \sa realloc(3).
  32  */
  33 __must_check void *arr_realloc(void *ptr, size_t nmemb, size_t size)
  34 {
  35         size_t pr;
  36
  37         assert(size > 0);
  38         assert(nmemb > 0);
  39         assert(!__builtin_mul_overflow(nmemb, size, &pr));
  40         assert(pr != 0);
  41         ptr = realloc(ptr, pr);
  42         assert(ptr);
  43         return ptr;
  44 }
  45
  46 /**
  47  * Allocate an array, abort on failure or bugs.
  48  *
  49  * \param nmemb See \ref arr_realloc().
  50  * \param size See \ref arr_realloc().
  51  *
  52  * Like \ref arr_realloc(), this aborts on invalid arguments, integer overflow
  53  * and allocation errors.
  54  *
  55  * \return A pointer to newly allocated memory which is suitably aligned for
  56  * any kind of variable.
  57  *
  58  * \sa See \ref arr_realloc().
  59  */
  60 __must_check __malloc void *arr_alloc(size_t nmemb, size_t size)
  61 {
  62         return arr_realloc(NULL, nmemb, size);
  63 }
  64
  65 /**
  66  * Paraslash's version of realloc().
  67  *
  68  * \param p Pointer to the memory block, may be \p NULL.
  69  * \param size The desired new size.
  70  *
  71  * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
  72  * i.e. there is no need to check the return value in the caller.
  73  *
  74  * \return A pointer to newly allocated memory which is suitably aligned for
  75  * any kind of variable and may be different from \a p.
  76  *
  77  * \sa realloc(3).
  78  */
  79 __must_check void *para_realloc(void *p, size_t size)
  80 {
  81         return arr_realloc(p, 1, size);
  82 }
  83
  84 /**
  85  * Paraslash's version of malloc().
  86  *
  87  * \param size The desired new size.
  88  *
  89  * A wrapper for malloc(3) which exits on errors.
  90  *
  91  * \return A pointer to the allocated memory, which is suitably aligned for any
  92  * kind of variable.
  93  *
  94  * \sa malloc(3).
  95  */
  96 __must_check __malloc void *alloc(size_t size)
  97 {
  98         return arr_alloc(1, size);
  99 }
 100
 101 /**
 102  * Allocate and initialize memory.
 103  *
 104  * \param size The desired new size.
 105  *
 106  * \return A pointer to the allocated and zeroed-out memory, which is suitably
 107  * aligned for any kind of variable.
 108  *
 109  * \sa \ref alloc(), calloc(3).
 110  */
 111 __must_check __malloc void *zalloc(size_t size)
 112 {
 113         void *ret = alloc(size);
 114
 115         memset(ret, 0, size);
 116         return ret;
 117 }
 118
 119 /**
 120  * Paraslash's version of strdup().
 121  *
 122  * \param s The string to be duplicated.
 123  *
 124  * A strdup(3)-like function which aborts if insufficient memory was available
 125  * to allocate the duplicated string, absolving the caller from the
 126  * responsibility to check for failure.
 127  *
 128  * \return A pointer to the duplicated string. Unlike strdup(3), the caller may
 129  * pass NULL, in which case the function returns a pointer to an empty string.
 130  * Regardless of whether or not NULL was passed, the returned string is
 131  * allocated on the heap and has to be freed by the caller.
 132  *
 133  * \sa strdup(3).
 134  */
 135 __must_check __malloc char *para_strdup(const char *s)
 136 {
 137         char *dupped_string = strdup(s? s: "");
 138
 139         assert(dupped_string);
 140         return dupped_string;
 141 }
 142
 143 /**
 144  * Print a formatted message to a dynamically allocated string.
 145  *
 146  * \param result The formatted string is returned here.
 147  * \param fmt The format string.
 148  * \param ap Initialized list of arguments.
 149  *
 150  * This function is similar to vasprintf(), a GNU extension which is not in C
 151  * or POSIX. It allocates a string large enough to hold the output including
 152  * the terminating null byte. The allocated string is returned via the first
 153  * argument and must be freed by the caller. However, unlike vasprintf(), this
 154  * function calls exit() if insufficient memory is available, while vasprintf()
 155  * returns -1 in this case.
 156  *
 157  * \return Number of bytes written, not including the terminating \p NULL
 158  * character.
 159  *
 160  * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
 161  */
 162 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
 163 {
 164         int ret;
 165         size_t size = 150;
 166         va_list aq;
 167
 168         *result = alloc(size + 1);
 169         va_copy(aq, ap);
 170         ret = vsnprintf(*result, size, fmt, aq);
 171         va_end(aq);
 172         assert(ret >= 0);
 173         if (ret < size) /* OK */
 174                 return ret;
 175         size = ret + 1;
 176         *result = para_realloc(*result, size);
 177         va_copy(aq, ap);
 178         ret = vsnprintf(*result, size, fmt, aq);
 179         va_end(aq);
 180         assert(ret >= 0 && ret < size);
 181         return ret;
 182 }
 183
 184 /**
 185  * Print to a dynamically allocated string, variable number of arguments.
 186  *
 187  * \param result See \ref xvasprintf().
 188  * \param fmt Usual format string.
 189  *
 190  * \return The return value of the underlying call to \ref xvasprintf().
 191  *
 192  * \sa \ref xvasprintf() and the references mentioned there.
 193  */
 194 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
 195 {
 196         va_list ap;
 197         unsigned ret;
 198
 199         va_start(ap, fmt);
 200         ret = xvasprintf(result, fmt, ap);
 201         va_end(ap);
 202         return ret;
 203 }
 204
 205 /**
 206  * Allocate a sufficiently large string and print into it.
 207  *
 208  * \param fmt A usual format string.
 209  *
 210  * Produce output according to \p fmt. No artificial bound on the length of the
 211  * resulting string is imposed.
 212  *
 213  * \return This function either returns a pointer to a string that must be
 214  * freed by the caller or aborts without returning.
 215  *
 216  * \sa printf(3), \ref xasprintf().
 217  */
 218 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
 219 {
 220         char *msg;
 221         va_list ap;
 222
 223         va_start(ap, fmt);
 224         xvasprintf(&msg, fmt, ap);
 225         va_end(ap);
 226         return msg;
 227 }
 228
 229 /**
 230  * Free the content of a pointer and set it to NULL.
 231  *
 232  * \param arg A pointer to the pointer whose content should be freed.
 233  *
 234  * If arg is NULL, the function returns immediately. Otherwise it frees the
 235  * memory pointed to by *arg and sets *arg to NULL. Hence callers have to pass
 236  * the *address* of the pointer variable that points to the memory which should
 237  * be freed.
 238  */
 239 void freep(void *arg)
 240 {
 241         if (arg) {
 242                 void **ptr = arg;
 243                 free(*ptr);
 244                 *ptr = NULL;
 245         }
 246 }
 247
 248 /**
 249  * Paraslash's version of strcat().
 250  *
 251  * \param a String to be appended to.
 252  * \param b String to append.
 253  *
 254  * Append \p b to \p a.
 255  *
 256  * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
 257  * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
 258  * return \a a without making a copy of \a a.  Otherwise, construct the
 259  * concatenation \a c, free \a a (but not \a b) and return \a c.
 260  *
 261  * \sa strcat(3).
 262  */
 263 __must_check __malloc char *para_strcat(char *a, const char *b)
 264 {
 265         char *tmp;
 266
 267         if (!a)
 268                 return para_strdup(b);
 269         if (!b)
 270                 return a;
 271         tmp = make_message("%s%s", a, b);
 272         free(a);
 273         return tmp;
 274 }
 275
 276 /**
 277  * Get the logname of the current user.
 278  *
 279  * \return A dynamically allocated string that must be freed by the caller. On
 280  * errors, the string "unknown_user" is returned, i.e. this function never
 281  * returns \p NULL.
 282  *
 283  * \sa getpwuid(3).
 284  */
 285 __must_check __malloc char *para_logname(void)
 286 {
 287         struct passwd *pw = getpwuid(getuid());
 288         return para_strdup(pw? pw->pw_name : "unknown_user");
 289 }
 290
 291 /**
 292  * Get the home directory of the current user.
 293  *
 294  * \return A dynamically allocated string that must be freed by the caller. If
 295  * the home directory could not be found, this function returns "/tmp".
 296  */
 297 __must_check __malloc char *para_homedir(void)
 298 {
 299         struct passwd *pw = getpwuid(getuid());
 300         return para_strdup(pw? pw->pw_dir : "/tmp");
 301 }
 302
 303 /**
 304  * Get the own hostname.
 305  *
 306  * \return A dynamically allocated string containing the hostname.
 307  *
 308  * \sa uname(2).
 309  */
 310 __malloc char *para_hostname(void)
 311 {
 312         struct utsname u;
 313
 314         uname(&u);
 315         return para_strdup(u.nodename);
 316 }
 317
 318 /**
 319  * Call a custom function for each complete line.
 320  *
 321  * \param flags Any combination of flags defined in \ref for_each_line_flags.
 322  * \param buf The buffer containing data separated by newlines.
 323  * \param size The number of bytes in \a buf.
 324  * \param line_handler The custom function.
 325  * \param private_data Pointer passed to \a line_handler.
 326  *
 327  * For each complete line in \p buf, \p line_handler is called. The first
 328  * argument to \p line_handler is (a copy of) the current line, and \p
 329  * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
 330  * flag is unset, a pointer into \a buf is passed to the line handler,
 331  * otherwise a pointer to a copy of the current line is passed instead. This
 332  * copy is freed immediately after the line handler returns.
 333  *
 334  * The function returns if \p line_handler returns a negative value or no more
 335  * lines are in the buffer.  The rest of the buffer (last chunk containing an
 336  * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
 337  * unset.
 338  *
 339  * \return On success this function returns the number of bytes not handled to
 340  * \p line_handler. The only possible error is a negative return value from the
 341  * line handler. In this case processing stops and the return value of the line
 342  * handler is returned to indicate failure.
 343  *
 344  * \sa \ref for_each_line_flags.
 345  */
 346 int for_each_line(unsigned flags, char *buf, size_t size,
 347                 line_handler_t *line_handler, void *private_data)
 348 {
 349         char *start = buf, *end;
 350         int ret, i, num_lines = 0;
 351
 352 //      PARA_NOTICE_LOG("buf: %s\n", buf);
 353         while (start < buf + size) {
 354                 char *next_null;
 355                 char *next_cr;
 356
 357                 next_cr = memchr(start, '\n', buf + size - start);
 358                 next_null = memchr(start, '\0', next_cr?
 359                         next_cr - start : buf + size - start);
 360                 if (!next_cr && !next_null)
 361                         break;
 362                 if (next_null)
 363                         end = next_null;
 364                 else
 365                         end = next_cr;
 366                 num_lines++;
 367                 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
 368                         if (flags & FELF_READ_ONLY) {
 369                                 size_t s = end - start;
 370                                 char *b = alloc(s + 1);
 371                                 memcpy(b, start, s);
 372                                 b[s] = '\0';
 373                                 ret = line_handler(b, private_data);
 374                                 free(b);
 375                         } else {
 376                                 *end = '\0';
 377                                 ret = line_handler(start, private_data);
 378                         }
 379                         if (ret < 0)
 380                                 return ret;
 381                 }
 382                 start = ++end;
 383         }
 384         i = buf + size - start;
 385         if (i && i != size && !(flags & FELF_READ_ONLY))
 386                 memmove(buf, start, i);
 387         return i;
 388 }
 389
 390 /** Return the hex characters of the lower 4 bits. */
 391 #define hex(a) (hexchar[(a) & 15])
 392
 393 static void write_size_header(char *buf, int n)
 394 {
 395         static char hexchar[] = "0123456789abcdef";
 396
 397         buf[0] = hex(n >> 12);
 398         buf[1] = hex(n >> 8);
 399         buf[2] = hex(n >> 4);
 400         buf[3] = hex(n);
 401         buf[4] = ' ';
 402 }
 403
 404 /**
 405  * Read a four-byte hex-number and return its value.
 406  *
 407  * Each status item sent by para_server is prefixed with such a hex number in
 408  * ASCII which describes the size of the status item.
 409  *
 410  * \param buf The buffer which must be at least four bytes long.
 411  *
 412  * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
 413  * buffer did not contain only hex digits.
 414  */
 415 int read_size_header(const char *buf)
 416 {
 417         int i, len = 0;
 418
 419         for (i = 0; i < 4; i++) {
 420                 unsigned char c = buf[i];
 421                 len <<= 4;
 422                 if (c >= '0' && c <= '9') {
 423                         len += c - '0';
 424                         continue;
 425                 }
 426                 if (c >= 'a' && c <= 'f') {
 427                         len += c - 'a' + 10;
 428                         continue;
 429                 }
 430                 return -E_SIZE_PREFIX;
 431         }
 432         if (buf[4] != ' ')
 433                 return -E_SIZE_PREFIX;
 434         return len;
 435 }
 436
 437 /**
 438  * Safely print into a buffer at a given offset.
 439  *
 440  * \param b Determines the buffer, its size, and the offset.
 441  * \param fmt The format string.
 442  *
 443  * This function prints into the buffer given by \a b at the offset which is
 444  * also given by \a b. If there is not enough space to hold the result, the
 445  * buffer size is doubled until the underlying call to vsnprintf() succeeds
 446  * or the size of the buffer exceeds the maximal size specified in \a b.
 447  *
 448  * In the latter case the unmodified \a buf and \a offset values as well as the
 449  * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
 450  * If this function succeeds, i.e. returns a non-negative value, the offset of
 451  * \a b is reset to zero and the given data is written to the beginning of the
 452  * buffer. If \a max_size_handler() returns a negative value, this value is
 453  * returned by \a para_printf().
 454  *
 455  * Upon return, the offset of \a b is adjusted accordingly so that subsequent
 456  * calls to this function append data to what is already contained in the
 457  * buffer.
 458  *
 459  * It's OK to call this function with \p b->buf being \p NULL. In this case, an
 460  * initial buffer is allocated.
 461  *
 462  * \return The number of bytes printed into the buffer (not including the
 463  * terminating \p NULL byte) on success, negative on errors. If there is no
 464  * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
 465  * fails.
 466  *
 467  * \sa make_message(), vsnprintf(3).
 468  */
 469 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
 470 {
 471         int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
 472
 473         if (!b->buf) {
 474                 b->buf = alloc(128);
 475                 b->size = 128;
 476                 b->offset = 0;
 477         }
 478         while (1) {
 479                 char *p = b->buf + b->offset;
 480                 size_t size = b->size - b->offset;
 481                 va_list ap;
 482
 483                 if (size > sz_off) {
 484                         va_start(ap, fmt);
 485                         ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
 486                         va_end(ap);
 487                         if (ret > -1 && ret < size - sz_off) { /* success */
 488                                 b->offset += ret + sz_off;
 489                                 if (sz_off)
 490                                         write_size_header(p, ret);
 491                                 return ret + sz_off;
 492                         }
 493                 }
 494                 /* check if we may grow the buffer */
 495                 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
 496                         /* try again with more space */
 497                         b->size *= 2;
 498                         b->buf = para_realloc(b->buf, b->size);
 499                         continue;
 500                 }
 501                 /* can't grow buffer */
 502                 if (!b->offset || !b->max_size_handler) /* message too large */
 503                         return -ERRNO_TO_PARA_ERROR(ENOSPC);
 504                 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
 505                 if (ret < 0)
 506                         return ret;
 507                 b->offset = 0;
 508         }
 509 }
 510
 511 /** \cond llong_minmax */
 512 /* LLONG_MAX and LLONG_MIN might not be defined. */
 513 #ifndef LLONG_MAX
 514 #define LLONG_MAX 9223372036854775807LL
 515 #endif
 516 #ifndef LLONG_MIN
 517 #define LLONG_MIN (-LLONG_MAX - 1LL)
 518 #endif
 519 /** \endcond llong_minmax */
 520
 521 /**
 522  * Convert a string to a 64-bit signed integer value.
 523  *
 524  * \param str The string to be converted.
 525  * \param value Result pointer.
 526  *
 527  * \return Standard.
 528  *
 529  * \sa \ref para_atoi32(), strtol(3), atoi(3).
 530  */
 531 int para_atoi64(const char *str, int64_t *value)
 532 {
 533         char *endptr;
 534         long long tmp;
 535
 536         errno = 0; /* To distinguish success/failure after call */
 537         tmp = strtoll(str, &endptr, 10);
 538         if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
 539                 return -E_ATOI_OVERFLOW;
 540         /*
 541          * If there were no digits at all, strtoll() stores the original value
 542          * of str in *endptr.
 543          */
 544         if (endptr == str)
 545                 return -E_ATOI_NO_DIGITS;
 546         /*
 547          * The implementation may also set errno and return 0 in case no
 548          * conversion was performed.
 549          */
 550         if (errno != 0 && tmp == 0)
 551                 return -E_ATOI_NO_DIGITS;
 552         if (*endptr != '\0') /* Further characters after number */
 553                 return -E_ATOI_JUNK_AT_END;
 554         *value = tmp;
 555         return 1;
 556 }
 557
 558 /**
 559  * Convert a string to a 32-bit signed integer value.
 560  *
 561  * \param str The string to be converted.
 562  * \param value Result pointer.
 563  *
 564  * \return Standard.
 565  *
 566  * \sa \ref para_atoi64().
 567 */
 568 int para_atoi32(const char *str, int32_t *value)
 569 {
 570         int64_t tmp;
 571         int ret;
 572         const int32_t max = 2147483647;
 573
 574         ret = para_atoi64(str, &tmp);
 575         if (ret < 0)
 576                 return ret;
 577         if (tmp > max || tmp < -max - 1)
 578                 return -E_ATOI_OVERFLOW;
 579         *value = tmp;
 580         return 1;
 581 }
 582
 583 static inline int loglevel_equal(const char *arg, const char * const ll)
 584 {
 585         return !strncasecmp(arg, ll, strlen(ll));
 586 }
 587
 588 /**
 589  * Compute the loglevel number from its name.
 590  *
 591  * \param txt The name of the loglevel (debug, info, ...).
 592  *
 593  * \return The numeric representation of the loglevel name.
 594  */
 595 int get_loglevel_by_name(const char *txt)
 596 {
 597         if (loglevel_equal(txt, "debug"))
 598                 return LL_DEBUG;
 599         if (loglevel_equal(txt, "info"))
 600                 return LL_INFO;
 601         if (loglevel_equal(txt, "notice"))
 602                 return LL_NOTICE;
 603         if (loglevel_equal(txt, "warning"))
 604                 return LL_WARNING;
 605         if (loglevel_equal(txt, "error"))
 606                 return LL_ERROR;
 607         if (loglevel_equal(txt, "crit"))
 608                 return LL_CRIT;
 609         if (loglevel_equal(txt, "emerg"))
 610                 return LL_EMERG;
 611         return -E_BAD_LL;
 612 }
 613
 614 static int get_next_word(const char *buf, const char *delim, char **word)
 615 {
 616         enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
 617                 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
 618         const char *in;
 619         char *out;
 620         int ret, state = 0;
 621
 622         out = alloc(strlen(buf) + 1);
 623         *out = '\0';
 624         *word = out;
 625         for (in = buf; *in; in++) {
 626                 const char *p;
 627
 628                 switch (*in) {
 629                 case '\\':
 630                         if (state & LSF_BACKSLASH) /* \\ */
 631                                 goto copy_char;
 632                         state |= LSF_BACKSLASH;
 633                         state |= LSF_HAVE_WORD;
 634                         continue;
 635                 case 'n':
 636                 case 't':
 637                         if (state & LSF_BACKSLASH) { /* \n or \t */
 638                                 *out++ = (*in == 'n')? '\n' : '\t';
 639                                 state &= ~LSF_BACKSLASH;
 640                                 continue;
 641                         }
 642                         goto copy_char;
 643                 case '"':
 644                         if (state & LSF_BACKSLASH) /* \" */
 645                                 goto copy_char;
 646                         if (state & LSF_SINGLE_QUOTE) /* '" */
 647                                 goto copy_char;
 648                         if (state & LSF_DOUBLE_QUOTE) {
 649                                 state &= ~LSF_DOUBLE_QUOTE;
 650                                 continue;
 651                         }
 652                         state |= LSF_HAVE_WORD;
 653                         state |= LSF_DOUBLE_QUOTE;
 654                         continue;
 655                 case '\'':
 656                         if (state & LSF_BACKSLASH) /* \' */
 657                                 goto copy_char;
 658                         if (state & LSF_DOUBLE_QUOTE) /* "' */
 659                                 goto copy_char;
 660                         if (state & LSF_SINGLE_QUOTE) {
 661                                 state &= ~LSF_SINGLE_QUOTE;
 662                                 continue;
 663                         }
 664                         state |= LSF_HAVE_WORD;
 665                         state |= LSF_SINGLE_QUOTE;
 666                         continue;
 667                 }
 668                 for (p = delim; *p; p++) {
 669                         if (*in != *p)
 670                                 continue;
 671                         if (state & LSF_BACKSLASH)
 672                                 goto copy_char;
 673                         if (state & LSF_SINGLE_QUOTE)
 674                                 goto copy_char;
 675                         if (state & LSF_DOUBLE_QUOTE)
 676                                 goto copy_char;
 677                         if (state & LSF_HAVE_WORD)
 678                                 goto success;
 679                         break;
 680                 }
 681                 if (*p) /* ignore delimiter at the beginning */
 682                         continue;
 683 copy_char:
 684                 state |= LSF_HAVE_WORD;
 685                 *out++ = *in;
 686                 state &= ~LSF_BACKSLASH;
 687         }
 688         ret = 0;
 689         if (!(state & LSF_HAVE_WORD))
 690                 goto out;
 691         ret = -ERRNO_TO_PARA_ERROR(EINVAL);
 692         if (state & LSF_BACKSLASH) {
 693                 PARA_ERROR_LOG("trailing backslash\n");
 694                 goto out;
 695         }
 696         if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
 697                 PARA_ERROR_LOG("unmatched quote character\n");
 698                 goto out;
 699         }
 700 success:
 701         *out = '\0';
 702         return in - buf;
 703 out:
 704         free(*word);
 705         *word = NULL;
 706         return ret;
 707 }
 708
 709 /**
 710  * Get the number of the word the cursor is on.
 711  *
 712  * \param buf The zero-terminated line buffer.
 713  * \param delim Characters that separate words.
 714  * \param point The cursor position.
 715  *
 716  * \return Zero-based word number.
 717  */
 718 int compute_word_num(const char *buf, const char *delim, int point)
 719 {
 720         int ret, num_words;
 721         const char *p;
 722         char *word;
 723
 724         for (p = buf, num_words = 0; ; p += ret, num_words++) {
 725                 ret = get_next_word(p, delim, &word);
 726                 if (ret <= 0)
 727                         break;
 728                 free(word);
 729                 if (p + ret >= buf + point)
 730                         break;
 731         }
 732         return num_words;
 733 }
 734
 735 /**
 736  * Free an array of words created by create_argv() or create_shifted_argv().
 737  *
 738  * \param argv A pointer previously obtained by \ref create_argv().
 739  */
 740 void free_argv(char **argv)
 741 {
 742         int i;
 743
 744         if (!argv)
 745                 return;
 746         for (i = 0; argv[i]; i++)
 747                 free(argv[i]);
 748         free(argv);
 749 }
 750
 751 static int create_argv_offset(int offset, const char *buf, const char *delim,
 752                 char ***result)
 753 {
 754         char *word, **argv = arr_alloc(offset + 1, sizeof(char *));
 755         const char *p;
 756         int i, ret;
 757
 758         for (i = 0; i < offset; i++)
 759                 argv[i] = NULL;
 760         for (p = buf; p && *p; p += ret, i++) {
 761                 ret = get_next_word(p, delim, &word);
 762                 if (ret < 0)
 763                         goto err;
 764                 if (!ret)
 765                         break;
 766                 argv = arr_realloc(argv, i + 2, sizeof(char*));
 767                 argv[i] = word;
 768         }
 769         argv[i] = NULL;
 770         *result = argv;
 771         return i;
 772 err:
 773         while (i > 0)
 774                 free(argv[--i]);
 775         free(argv);
 776         *result = NULL;
 777         return ret;
 778 }
 779
 780 /**
 781  * Split a buffer into words.
 782  *
 783  * This parser honors single and double quotes, backslash-escaped characters
 784  * and special characters like \\n. The result contains pointers to copies of
 785  * the words contained in buf and has to be freed by using \ref free_argv().
 786  *
 787  * \param buf The buffer to be split.
 788  * \param delim Each character in this string is treated as a separator.
 789  * \param result The array of words is returned here.
 790  *
 791  * It's OK to pass NULL as the buffer argument. This is equivalent to passing
 792  * the empty string.
 793  *
 794  * \return Number of words in buf, negative on errors. The array returned
 795  * through the result pointer is NULL terminated.
 796  */
 797 int create_argv(const char *buf, const char *delim, char ***result)
 798 {
 799         return create_argv_offset(0, buf, delim, result);
 800 }
 801
 802 /**
 803  * Split a buffer into words, offset one.
 804  *
 805  * This is similar to \ref create_argv() but the returned array is one element
 806  * larger, words start at index one and element zero is initialized to \p NULL.
 807  * Callers must set element zero to a non-NULL value before calling free_argv()
 808  * on the returned array to avoid a memory leak.
 809  *
 810  * \param buf See \ref create_argv().
 811  * \param delim See \ref create_argv().
 812  * \param result See \ref create_argv().
 813  *
 814  * \return Number of words plus one on success, negative on errors.
 815  */
 816 int create_shifted_argv(const char *buf, const char *delim, char ***result)
 817 {
 818         return create_argv_offset(1, buf, delim, result);
 819 }
 820
 821 /**
 822  * Find out if the given string is contained in the arg vector.
 823  *
 824  * \param arg The string to look for.
 825  * \param argv The array to search.
 826  *
 827  * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
 828  * arg was not found in \a argv.
 829  */
 830 int find_arg(const char *arg, char **argv)
 831 {
 832         int i;
 833
 834         if (!argv)
 835                 return -E_ARG_NOT_FOUND;
 836         for (i = 0; argv[i]; i++)
 837                 if (strcmp(arg, argv[i]) == 0)
 838                         return i;
 839         return -E_ARG_NOT_FOUND;
 840 }
 841
 842 /**
 843  * Compile a regular expression.
 844  *
 845  * This simple wrapper calls regcomp() and logs a message on errors.
 846  *
 847  * \param preg See regcomp(3).
 848  * \param regex See regcomp(3).
 849  * \param cflags See regcomp(3).
 850  *
 851  * \return Standard.
 852  */
 853 int para_regcomp(regex_t *preg, const char *regex, int cflags)
 854 {
 855         char *buf;
 856         size_t size;
 857         int ret = regcomp(preg, regex, cflags);
 858
 859         if (ret == 0)
 860                 return 1;
 861         size = regerror(ret, preg, NULL, 0);
 862         buf = alloc(size);
 863         regerror(ret, preg, buf, size);
 864         PARA_ERROR_LOG("%s\n", buf);
 865         free(buf);
 866         return -E_REGEX;
 867 }
 868
 869 /**
 870  * strdup() for not necessarily zero-terminated strings.
 871  *
 872  * \param src The source buffer.
 873  * \param len The number of bytes to be copied.
 874  *
 875  * \return A 0-terminated buffer of length \a len + 1.
 876  *
 877  * This is similar to strndup(), which is a GNU extension. However, one
 878  * difference is that strndup() returns \p NULL if insufficient memory was
 879  * available while this function aborts in this case.
 880  *
 881  * \sa strdup(), \ref para_strdup().
 882  */
 883 char *safe_strdup(const char *src, size_t len)
 884 {
 885         char *p;
 886
 887         assert(len < (size_t)-1);
 888         p = alloc(len + 1);
 889         if (len > 0)
 890                 memcpy(p, src, len);
 891         p[len] = '\0';
 892         return p;
 893 }
 894
 895 /**
 896  * Copy the value of a key=value pair.
 897  *
 898  * This checks whether the given buffer starts with "key=", ignoring case. If
 899  * yes, a copy of the value is returned. The source buffer may not be
 900  * zero-terminated.
 901  *
 902  * \param src The source buffer.
 903  * \param len The number of bytes of the tag.
 904  * \param key Only copy if it is the value of this key.
 905  *
 906  * \return A zero-terminated buffer, or \p NULL if the key was
 907  * not of the given type.
 908  */
 909 char *key_value_copy(const char *src, size_t len, const char *key)
 910 {
 911         int keylen = strlen(key);
 912
 913         if (len <= keylen)
 914                 return NULL;
 915         if (strncasecmp(src, key, keylen))
 916                 return NULL;
 917         if (src[keylen] != '=')
 918                 return NULL;
 919         return safe_strdup(src + keylen + 1, len - keylen - 1);
 920 }
 921
 922 static bool utf8_mode(void)
 923 {
 924         static bool initialized, have_utf8;
 925
 926         if (!initialized) {
 927                 char *info = nl_langinfo(CODESET);
 928                 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
 929                 initialized = true;
 930                 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
 931                         have_utf8? "" : "not ");
 932         }
 933         return have_utf8;
 934 }
 935
 936 static int xwcwidth(wchar_t wc, size_t pos)
 937 {
 938         int n;
 939
 940         /* special-case for tab */
 941         if (wc == 0x09) /* tab */
 942                 return (pos | 7) + 1 - pos;
 943         n = wcwidth(wc);
 944         /* wcswidth() returns -1 for non-printable characters */
 945         return n >= 0? n : 1;
 946 }
 947
 948 static size_t xwcswidth(const wchar_t *s, size_t n)
 949 {
 950         size_t w = 0;
 951
 952         while (n--)
 953                 w += xwcwidth(*s++, w);
 954         return w;
 955 }
 956
 957 /**
 958  * Skip a given number of cells at the beginning of a string.
 959  *
 960  * \param s The input string.
 961  * \param cells_to_skip Desired number of cells that should be skipped.
 962  * \param bytes_to_skip Result.
 963  *
 964  * This function computes how many input bytes must be skipped to advance a
 965  * string by the given width. If the current character encoding is not UTF-8,
 966  * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
 967  * \a s is treated as a multibyte string and on successful return, \a s +
 968  * bytes_to_skip points to the start of a multibyte string such that the total
 969  * width of the multibyte characters that are skipped by advancing \a s that
 970  * many bytes equals at least \a cells_to_skip.
 971  *
 972  * \return Standard.
 973  */
 974 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
 975 {
 976         wchar_t wc;
 977         mbstate_t ps;
 978         size_t n, bytes_parsed, cells_skipped;
 979
 980         *bytes_to_skip = 0;
 981         if (cells_to_skip == 0)
 982                 return 0;
 983         if (!utf8_mode()) {
 984                 *bytes_to_skip = cells_to_skip;
 985                 return 0;
 986         }
 987         bytes_parsed = cells_skipped = 0;
 988         memset(&ps, 0, sizeof(ps));
 989         n = strlen(s);
 990         while (cells_to_skip > cells_skipped) {
 991                 size_t mbret;
 992
 993                 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
 994                 assert(mbret != 0);
 995                 if (mbret == (size_t)-1 || mbret == (size_t)-2)
 996                         return -ERRNO_TO_PARA_ERROR(EILSEQ);
 997                 bytes_parsed += mbret;
 998                 cells_skipped += xwcwidth(wc, cells_skipped);
 999         }
1000         *bytes_to_skip = bytes_parsed;
1001         return 1;
1002 }
1003
1004 /**
1005  * Compute the width of an UTF-8 string.
1006  *
1007  * \param s The string.
1008  * \param result The width of \a s is returned here.
1009  *
1010  * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1011  * Otherwise \a s is treated as an UTF-8 string and its display width is
1012  * computed. Note that this function may fail if the underlying call to
1013  * mbsrtowcs(3) fails, so the caller must check the return value.
1014  *
1015  * \sa nl_langinfo(3), wcswidth(3).
1016  *
1017  * \return Standard.
1018  */
1019 __must_check int strwidth(const char *s, size_t *result)
1020 {
1021         const char *src = s;
1022         mbstate_t state;
1023         static wchar_t *dest;
1024         size_t num_wchars;
1025
1026         /*
1027          * Never call any log function here. This may result in an endless loop
1028          * as para_gui's para_log() calls this function.
1029          */
1030
1031         if (!utf8_mode()) {
1032                 *result = strlen(s);
1033                 return 0;
1034         }
1035         memset(&state, 0, sizeof(state));
1036         *result = 0;
1037         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1038         if (num_wchars == (size_t)-1)
1039                 return -ERRNO_TO_PARA_ERROR(errno);
1040         if (num_wchars == 0)
1041                 return 0;
1042         dest = arr_alloc(num_wchars + 1, sizeof(*dest));
1043         src = s;
1044         memset(&state, 0, sizeof(state));
1045         num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1046         assert(num_wchars > 0 && num_wchars != (size_t)-1);
1047         *result = xwcswidth(dest, num_wchars);
1048         free(dest);
1049         return 1;
1050 }
1051
1052 /**
1053  * Truncate and sanitize a (wide character) string.
1054  *
1055  * This replaces all non-printable characters by spaces and makes sure that the
1056  * modified string does not exceed the given maximal width.
1057  *
1058  * \param src The source string in multi-byte form.
1059  * \param max_width The maximal number of cells the result may occupy.
1060  * \param result Sanitized multi-byte string, must be freed by caller.
1061  * \param width The width of the sanitized string, always <= max_width.
1062  *
1063  * The function is wide-character aware but falls back to C strings for
1064  * non-UTF-8 locales.
1065  *
1066  * \return Standard. On success, *result points to a sanitized copy of the
1067  * given string. This copy was allocated with malloc() and should hence be
1068  * freed when the caller is no longer interested in the result.
1069  *
1070  * The function fails if the given string contains an invalid multibyte
1071  * sequence. In this case, *result is set to NULL, and *width to zero.
1072  */
1073 __must_check int sanitize_str(const char *src, size_t max_width,
1074                 char **result, size_t *width)
1075 {
1076         mbstate_t state;
1077         static wchar_t *wcs;
1078         size_t num_wchars, n;
1079
1080         if (!utf8_mode()) {
1081                 *result = para_strdup(src);
1082                 /* replace non-printable characters by spaces */
1083                 for (n = 0; n < max_width && src[n]; n++) {
1084                         if (!isprint((unsigned char)src[n]))
1085                                 (*result)[n] = ' ';
1086                 }
1087                 (*result)[n] = '\0';
1088                 *width = n;
1089                 return 0;
1090         }
1091         *result = NULL;
1092         *width = 0;
1093         memset(&state, 0, sizeof(state));
1094         num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1095         if (num_wchars == (size_t)-1)
1096                 return -ERRNO_TO_PARA_ERROR(errno);
1097         wcs = arr_alloc(num_wchars + 1, sizeof(*wcs));
1098         memset(&state, 0, sizeof(state));
1099         num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1100         assert(num_wchars != (size_t)-1);
1101         for (n = 0; n < num_wchars && *width < max_width; n++) {
1102                 if (!iswprint(wcs[n]))
1103                         wcs[n] = L' ';
1104                 *width += xwcwidth(wcs[n], *width);
1105         }
1106         wcs[n] = L'\0';
1107         n = wcstombs(NULL, wcs, 0) + 1;
1108         *result = alloc(n);
1109         num_wchars = wcstombs(*result, wcs, n);
1110         assert(num_wchars != (size_t)-1);
1111         free(wcs);
1112         return 1;
1113 }