afh.h: Fix typo in prototype of ->get_file_info().

[paraslash.git] / string.c
diff --git a/string.c b/string.c

index aa3bcbddabe425cc823e3ad5e567f707f70dbccb..6033a008dbf154953de673f85048eb78812d1722 100644 (file)
--- a/string.c
+++ b/string.c
@@ -1,18 +1,21 @@
  /*
- * Copyright (C) 2004-2012 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>
   *
   * Licensed under the GPL v2. For licencing details see COPYING.
   */
  
  /** \file string.c Memory allocation and string handling functions. */
  
-#include <sys/time.h> /* gettimeofday */
+#include "para.h"
+
  #include <pwd.h>
  #include <sys/utsname.h> /* uname() */
  #include <string.h>
  #include <regex.h>
+#include <langinfo.h>
+#include <wchar.h>
+#include <wctype.h>
  
-#include "para.h"
  #include "string.h"
  #include "error.h"
  
@@ -25,12 +28,12 @@
   * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
   * i.e. there is no need to check the return value in the caller.
   *
- * \return A pointer to  the newly allocated memory, which is suitably aligned
- * for any kind of variable and may be different from \a p.
+ * \return A pointer to newly allocated memory which is suitably aligned for
+ * any kind of variable and may be different from \a p.
   *
   * \sa realloc(3).
   */
-__must_check __malloc void *para_realloc(void *p, size_t size)
+__must_check void *para_realloc(void *p, size_t size)
  {
         /*
          * No need to check for NULL pointers: If p is NULL, the call
@@ -136,15 +139,18 @@ __must_check __malloc char *para_strdup(const char *s)
  __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
  {
         int ret;
-       size_t size;
+       size_t size = 150;
         va_list aq;
  
+       *result = para_malloc(size + 1);
         va_copy(aq, ap);
-       ret = vsnprintf(NULL, 0, fmt, aq);
+       ret = vsnprintf(*result, size, fmt, aq);
         va_end(aq);
         assert(ret >= 0);
+       if (ret < size) /* OK */
+               return ret;
         size = ret + 1;
-       *result = para_malloc(size);
+       *result = para_realloc(*result, size);
         va_copy(aq, ap);
         ret = vsnprintf(*result, size, fmt, aq);
         va_end(aq);
@@ -289,24 +295,6 @@ __must_check char *para_basename(const char *name)
         return ret;
  }
  
-/**
- * Cut trailing newline.
- *
- * \param buf The string to be chopped.
- *
- * Replace the last character in \p buf by zero if it is equal to
- * the newline character.
- */
-void chop(char *buf)
-{
-       int n = strlen(buf);
-
-       if (!n)
-               return;
-       if (buf[n - 1] == '\n')
-               buf[n - 1] = '\0';
-}
-
  /**
   * Get the logname of the current user.
   *
@@ -350,19 +338,35 @@ __malloc char *para_hostname(void)
  }
  
  /**
- * Used to distinguish between read-only and read-write mode.
+ * Call a custom function for each complete line.
+ *
+ * \param flags Any combination of flags defined in \ref for_each_line_flags.
+ * \param buf The buffer containing data separated by newlines.
+ * \param size The number of bytes in \a buf.
+ * \param line_handler The custom function.
+ * \param private_data Pointer passed to \a line_handler.
   *
- * \sa for_each_line(), for_each_line_ro().
+ * For each complete line in \p buf, \p line_handler is called. The first
+ * argument to \p line_handler is (a copy of) the current line, and \p
+ * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
+ * flag is unset, a pointer into \a buf is passed to the line handler,
+ * otherwise a pointer to a copy of the current line is passed instead. This
+ * copy is freed immediately after the line handler returns.
+ *
+ * The function returns if \p line_handler returns a negative value or no more
+ * lines are in the buffer.  The rest of the buffer (last chunk containing an
+ * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
+ * unset.
+ *
+ * \return On success this function returns the number of bytes not handled to
+ * \p line_handler. The only possible error is a negative return value from the
+ * line handler. In this case processing stops and the return value of the line
+ * handler is returned to indicate failure.
+ *
+ * \sa \ref for_each_line_flags.
   */
-enum for_each_line_modes{
-       /** Activate read-only mode. */
-       LINE_MODE_RO,
-       /** Activate read-write mode. */
-       LINE_MODE_RW
-};
-
-static int for_each_complete_line(enum for_each_line_modes mode, char *buf,
-               size_t size, line_handler_t *line_handler, void *private_data)
+int for_each_line(unsigned flags, char *buf, size_t size,
+               line_handler_t *line_handler, void *private_data)
  {
         char *start = buf, *end;
         int ret, i, num_lines = 0;
@@ -373,95 +377,38 @@ static int for_each_complete_line(enum for_each_line_modes mode, char *buf,
                 char *next_cr;
  
                 next_cr = memchr(start, '\n', buf + size - start);
-               next_null = memchr(start, '\0', buf + size - start);
+               next_null = memchr(start, '\0', next_cr?
+                       next_cr - start : buf + size - start);
                 if (!next_cr && !next_null)
                         break;
-               if (next_cr && next_null) {
-                       end = next_cr < next_null? next_cr : next_null;
-               } else if (next_null) {
+               if (next_null)
                         end = next_null;
-               } else
+               else
                         end = next_cr;
                 num_lines++;
-               if (!line_handler) {
-                       start = ++end;
-                       continue;
-               }
-               if (mode == LINE_MODE_RO) {
-                       size_t s = end - start;
-                       char *b = para_malloc(s + 1);
-                       memcpy(b, start, s);
-                       b[s] = '\0';
-//                     PARA_NOTICE_LOG("b: %s, start: %s\n", b, start);
-                       ret = line_handler(b, private_data);
-                       free(b);
-               } else {
-                       *end = '\0';
-                       ret = line_handler(start, private_data);
+               if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
+                       if (flags & FELF_READ_ONLY) {
+                               size_t s = end - start;
+                               char *b = para_malloc(s + 1);
+                               memcpy(b, start, s);
+                               b[s] = '\0';
+                               ret = line_handler(b, private_data);
+                               free(b);
+                       } else {
+                               *end = '\0';
+                               ret = line_handler(start, private_data);
+                       }
+                       if (ret < 0)
+                               return ret;
                 }
-               if (ret < 0)
-                       return ret;
                 start = ++end;
         }
-       if (!line_handler || mode == LINE_MODE_RO)
-               return num_lines;
         i = buf + size - start;
-       if (i && i != size)
+       if (i && i != size && !(flags & FELF_READ_ONLY))
                 memmove(buf, start, i);
         return i;
  }
  
-/**
- * Call a custom function for each complete line.
- *
- * \param buf The buffer containing data separated by newlines.
- * \param size The number of bytes in \a buf.
- * \param line_handler The custom function.
- * \param private_data Pointer passed to \a line_handler.
- *
- * If \p line_handler is \p NULL, the function returns the number of complete
- * lines in \p buf.  Otherwise, \p line_handler is called for each complete
- * line in \p buf.  The first argument to \p line_handler is the current line,
- * and \p private_data is passed as the second argument.  The function returns
- * if \p line_handler returns a negative value or no more lines are in the
- * buffer.  The rest of the buffer (last chunk containing an incomplete line)
- * is moved to the beginning of the buffer.
- *
- * \return If \p line_handler is not \p NULL, this function returns the number
- * of bytes not handled to \p line_handler on success, or the negative return
- * value of the \p line_handler on errors.
- *
- * \sa for_each_line_ro().
- */
-int for_each_line(char *buf, size_t size, line_handler_t *line_handler,
-               void *private_data)
-{
-       return for_each_complete_line(LINE_MODE_RW, buf, size, line_handler,
-               private_data);
-}
-
-/**
- * Call a custom function for each complete line.
- *
- * \param buf Same meaning as in \p for_each_line().
- * \param size Same meaning as in \p for_each_line().
- * \param line_handler Same meaning as in \p for_each_line().
- * \param private_data Same meaning as in \p for_each_line().
- *
- * This function behaves like \p for_each_line(), but \a buf is left unchanged.
- *
- * \return On success, the function returns the number of complete lines in \p
- * buf, otherwise the (negative) return value of \p line_handler is returned.
- *
- * \sa for_each_line().
- */
-int for_each_line_ro(char *buf, size_t size, line_handler_t *line_handler,
-               void *private_data)
-{
-       return for_each_complete_line(LINE_MODE_RO, buf, size, line_handler,
-               private_data);
-}
-
  /** Return the hex characters of the lower 4 bits. */
  #define hex(a) (hexchar[(a) & 15])
  
@@ -612,10 +559,18 @@ int para_atoi64(const char *str, int64_t *value)
         tmp = strtoll(str, &endptr, 10);
         if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
                 return -E_ATOI_OVERFLOW;
-       if (errno != 0 && tmp == 0) /* other error */
-               return -E_STRTOLL;
+       /*
+        * If there were no digits at all, strtoll() stores the original value
+        * of str in *endptr.
+        */
         if (endptr == str)
                 return -E_ATOI_NO_DIGITS;
+       /*
+        * The implementation may also set errno and return 0 in case no
+        * conversion was performed.
+        */
+       if (errno != 0 && tmp == 0)
+               return -E_ATOI_NO_DIGITS;
         if (*endptr != '\0') /* Further characters after number */
                 return -E_ATOI_JUNK_AT_END;
         *value = tmp;
@@ -675,7 +630,7 @@ int get_loglevel_by_name(const char *txt)
                 return LL_CRIT;
         if (loglevel_equal(txt, "emerg"))
                 return LL_EMERG;
-       return -1;
+       return -E_BAD_LL;
  }
  
  static int get_next_word(const char *buf, const char *delim, char **word)
@@ -800,7 +755,7 @@ int compute_word_num(const char *buf, const char *delim, int point)
  }
  
  /**
- * Free an array of words created by create_argv().
+ * Free an array of words created by create_argv() or create_shifted_argv().
   *
   * \param argv A pointer previously obtained by \ref create_argv().
   */
@@ -815,46 +770,97 @@ void free_argv(char **argv)
         free(argv);
  }
  
-/**
- * Split a buffer into words.
- *
- * This parser honors single and double quotes, backslash-escaped characters
- * and special characters like \p \\n. The result contains pointers to copies
- * of the words contained in \a buf and has to be freed by using \ref
- * free_argv().
- *
- * \param buf The buffer to be split.
- * \param delim Each character in this string is treated as a separator.
- * \param result The array of words is returned here.
- *
- * \return Number of words in \a buf, negative on errors.
- */
-int create_argv(const char *buf, const char *delim, char ***result)
+static int create_argv_offset(int offset, const char *buf, const char *delim,
+               char ***result)
  {
-       char *word, **argv = para_malloc(2 * sizeof(char *));
+       char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
         const char *p;
-       int ret, num_words;
+       int i, ret;
  
-       for (p = buf, num_words = 0; ; p += ret, num_words++) {
+       for (i = 0; i < offset; i++)
+               argv[i] = NULL;
+       for (p = buf; p && *p; p += ret, i++) {
                 ret = get_next_word(p, delim, &word);
                 if (ret < 0)
                         goto err;
                 if (!ret)
                         break;
-               argv = para_realloc(argv, (num_words + 2) * sizeof(char*));
-               argv[num_words] = word;
+               argv = para_realloc(argv, (i + 2) * sizeof(char*));
+               argv[i] = word;
         }
-       argv[num_words] = NULL;
+       argv[i] = NULL;
         *result = argv;
-       return num_words;
+       return i;
  err:
-       while (num_words > 0)
-               free(argv[--num_words]);
+       while (i > 0)
+               free(argv[--i]);
         free(argv);
         *result = NULL;
         return ret;
  }
  
+/**
+ * Split a buffer into words.
+ *
+ * This parser honors single and double quotes, backslash-escaped characters
+ * and special characters like \\n. The result contains pointers to copies of
+ * the words contained in buf and has to be freed by using \ref free_argv().
+ *
+ * \param buf The buffer to be split.
+ * \param delim Each character in this string is treated as a separator.
+ * \param result The array of words is returned here.
+ *
+ * It's OK to pass NULL as the buffer argument. This is equivalent to passing
+ * the empty string.
+ *
+ * \return Number of words in buf, negative on errors. The array returned
+ * through the result pointer is NULL terminated.
+ */
+int create_argv(const char *buf, const char *delim, char ***result)
+{
+       return create_argv_offset(0, buf, delim, result);
+}
+
+/**
+ * Split a buffer into words, offset one.
+ *
+ * This is similar to \ref create_argv() but the returned array is one element
+ * larger, words start at index one and element zero is initialized to \p NULL.
+ * Callers must set element zero to a non-NULL value before calling free_argv()
+ * on the returned array to avoid a memory leak.
+ *
+ * \param buf See \ref create_argv().
+ * \param delim See \ref create_argv().
+ * \param result See \ref create_argv().
+ *
+ * \return Number of words plus one on success, negative on errors.
+ */
+int create_shifted_argv(const char *buf, const char *delim, char ***result)
+{
+       return create_argv_offset(1, buf, delim, result);
+}
+
+/**
+ * Find out if the given string is contained in the arg vector.
+ *
+ * \param arg The string to look for.
+ * \param argv The array to search.
+ *
+ * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
+ * arg was not found in \a argv.
+ */
+int find_arg(const char *arg, char **argv)
+{
+       int i;
+
+       if (!argv)
+               return -E_ARG_NOT_FOUND;
+       for (i = 0; argv[i]; i++)
+               if (strcmp(arg, argv[i]) == 0)
+                       return i;
+       return -E_ARG_NOT_FOUND;
+}
+
  /**
   * Compile a regular expression.
   *
@@ -934,3 +940,196 @@ char *key_value_copy(const char *src, size_t len, const char *key)
                 return NULL;
         return safe_strdup(src + keylen + 1, len - keylen - 1);
  }
+
+static bool utf8_mode(void)
+{
+       static bool initialized, have_utf8;
+
+       if (!initialized) {
+               char *info = nl_langinfo(CODESET);
+               have_utf8 = (info && strcmp(info, "UTF-8") == 0);
+               initialized = true;
+               PARA_INFO_LOG("%susing UTF-8 character encoding\n",
+                       have_utf8? "" : "not ");
+       }
+       return have_utf8;
+}
+
+static int xwcwidth(wchar_t wc, size_t pos)
+{
+       int n;
+
+       /* special-case for tab */
+       if (wc == 0x09) /* tab */
+               return (pos | 7) + 1 - pos;
+       n = wcwidth(wc);
+       /* wcswidth() returns -1 for non-printable characters */
+       return n >= 0? n : 1;
+}
+
+static size_t xwcswidth(const wchar_t *s, size_t n)
+{
+       size_t w = 0;
+
+       while (n--)
+               w += xwcwidth(*s++, w);
+       return w;
+}
+
+/**
+ * Skip a given number of cells at the beginning of a string.
+ *
+ * \param s The input string.
+ * \param cells_to_skip Desired number of cells that should be skipped.
+ * \param bytes_to_skip Result.
+ *
+ * This function computes how many input bytes must be skipped to advance a
+ * string by the given width. If the current character encoding is not UTF-8,
+ * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
+ * \a s is treated as a multibyte string and on successful return, \a s +
+ * bytes_to_skip points to the start of a multibyte string such that the total
+ * width of the multibyte characters that are skipped by advancing \a s that
+ * many bytes equals at least \a cells_to_skip.
+ *
+ * \return Standard.
+ */
+int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
+{
+       wchar_t wc;
+       mbstate_t ps;
+       size_t n, bytes_parsed, cells_skipped;
+
+       *bytes_to_skip = 0;
+       if (cells_to_skip == 0)
+               return 0;
+       if (!utf8_mode()) {
+               *bytes_to_skip = cells_to_skip;
+               return 0;
+       }
+       bytes_parsed = cells_skipped = 0;
+       memset(&ps, 0, sizeof(ps));
+       n = strlen(s);
+       while (cells_to_skip > cells_skipped) {
+               size_t mbret;
+
+               mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
+               assert(mbret != 0);
+               if (mbret == (size_t)-1 || mbret == (size_t)-2)
+                       return -ERRNO_TO_PARA_ERROR(EILSEQ);
+               bytes_parsed += mbret;
+               cells_skipped += xwcwidth(wc, cells_skipped);
+       }
+       *bytes_to_skip = bytes_parsed;
+       return 1;
+}
+
+/**
+ * Compute the width of an UTF-8 string.
+ *
+ * \param s The string.
+ * \param result The width of \a s is returned here.
+ *
+ * If not in UTF8-mode. this function is just a wrapper for strlen(3).
+ * Otherwise \a s is treated as an UTF-8 string and its display width is
+ * computed. Note that this function may fail if the underlying call to
+ * mbsrtowcs(3) fails, so the caller must check the return value.
+ *
+ * \sa nl_langinfo(3), wcswidth(3).
+ *
+ * \return Standard.
+ */
+__must_check int strwidth(const char *s, size_t *result)
+{
+       const char *src = s;
+       mbstate_t state;
+       static wchar_t *dest;
+       size_t num_wchars;
+
+       /*
+        * Never call any log function here. This may result in an endless loop
+        * as para_gui's para_log() calls this function.
+        */
+
+       if (!utf8_mode()) {
+               *result = strlen(s);
+               return 0;
+       }
+       memset(&state, 0, sizeof(state));
+       *result = 0;
+       num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+       if (num_wchars == (size_t)-1)
+               return -ERRNO_TO_PARA_ERROR(errno);
+       if (num_wchars == 0)
+               return 0;
+       dest = para_malloc((num_wchars + 1) * sizeof(*dest));
+       src = s;
+       memset(&state, 0, sizeof(state));
+       num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
+       assert(num_wchars > 0 && num_wchars != (size_t)-1);
+       *result = xwcswidth(dest, num_wchars);
+       free(dest);
+       return 1;
+}
+
+/**
+ * Truncate and sanitize a (wide character) string.
+ *
+ * This replaces all non-printable characters by spaces and makes sure that the
+ * modified string does not exceed the given maximal width.
+ *
+ * \param src The source string in multi-byte form.
+ * \param max_width The maximal number of cells the result may occupy.
+ * \param result Sanitized multi-byte string, must be freed by caller.
+ * \param width The width of the sanitized string, always <= max_width.
+ *
+ * The function is wide-character aware but falls back to C strings for
+ * non-UTF-8 locales.
+ *
+ * \return Standard. On success, *result points to a sanitized copy of the
+ * given string. This copy was allocated with malloc() and should hence be
+ * freed when the caller is no longer interested in the result.
+ *
+ * The function fails if the given string contains an invalid multibyte
+ * sequence. In this case, *result is set to NULL, and *width to zero.
+ */
+__must_check int sanitize_str(const char *src, size_t max_width,
+               char **result, size_t *width)
+{
+       mbstate_t state;
+       static wchar_t *wcs;
+       size_t num_wchars, n;
+
+       if (!utf8_mode()) {
+               *result = para_strdup(src);
+               /* replace non-printable characters by spaces */
+               for (n = 0; n < max_width && src[n]; n++) {
+                       if (!isprint((unsigned char)src[n]))
+                               (*result)[n] = ' ';
+               }
+               (*result)[n] = '\0';
+               *width = n;
+               return 0;
+       }
+       *result = NULL;
+       *width = 0;
+       memset(&state, 0, sizeof(state));
+       num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+       if (num_wchars == (size_t)-1)
+               return -ERRNO_TO_PARA_ERROR(errno);
+       wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
+       memset(&state, 0, sizeof(state));
+       num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
+       assert(num_wchars != (size_t)-1);
+       for (n = 0; n < num_wchars && *width < max_width; n++) {
+               if (!iswprint(wcs[n]))
+                       wcs[n] = L' ';
+               *width += xwcwidth(wcs[n], *width);
+       }
+       wcs[n] = L'\0';
+       n = wcstombs(NULL, wcs, 0) + 1;
+       *result = para_malloc(n);
+       num_wchars = wcstombs(*result, wcs, n);
+       assert(num_wchars != (size_t)-1);
+       free(wcs);
+       return 1;
+}