X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=e675502cf6707240a61be523c6fb510692991acd;hp=b844fb381dc25eb50c47fa04c1c60a9d91f6ff0c;hb=6811b2f8ea8b7a8c77046285c9432aee6327da80;hpb=a946afda2284a7b7f5f6020b27027fd543c054b2

diff --git a/string.c b/string.c
index b844fb38..e675502c 100644
--- a/string.c
+++ b/string.c
@@ -1,25 +1,20 @@
 /*
- * Copyright (C) 2004-2012 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>
  *
  * Licensed under the GPL v2. For licencing details see COPYING.
  */
 
 /** \file string.c Memory allocation and string handling functions. */
 
-#define _GNU_SOURCE
+#include "para.h"
 
-#include <sys/time.h> /* gettimeofday */
 #include <pwd.h>
 #include <sys/utsname.h> /* uname() */
-
-#include <string.h>
 #include <regex.h>
-
 #include <langinfo.h>
 #include <wchar.h>
 #include <wctype.h>
 
-#include "para.h"
 #include "string.h"
 #include "error.h"
 
@@ -32,12 +27,12 @@
  * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
  * i.e. there is no need to check the return value in the caller.
  *
- * \return A pointer to  the newly allocated memory, which is suitably aligned
- * for any kind of variable and may be different from \a p.
+ * \return A pointer to newly allocated memory which is suitably aligned for
+ * any kind of variable and may be different from \a p.
  *
  * \sa realloc(3).
  */
-__must_check __malloc void *para_realloc(void *p, size_t size)
+__must_check void *para_realloc(void *p, size_t size)
 {
 	/*
 	 * No need to check for NULL pointers: If p is NULL, the call
@@ -143,15 +138,18 @@ __must_check __malloc char *para_strdup(const char *s)
 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
 {
 	int ret;
-	size_t size;
+	size_t size = 150;
 	va_list aq;
 
+	*result = para_malloc(size + 1);
 	va_copy(aq, ap);
-	ret = vsnprintf(NULL, 0, fmt, aq);
+	ret = vsnprintf(*result, size, fmt, aq);
 	va_end(aq);
 	assert(ret >= 0);
+	if (ret < size) /* OK */
+		return ret;
 	size = ret + 1;
-	*result = para_malloc(size);
+	*result = para_realloc(*result, size);
 	va_copy(aq, ap);
 	ret = vsnprintf(*result, size, fmt, aq);
 	va_end(aq);
@@ -191,7 +189,7 @@ __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
  * \return This function either returns a pointer to a string that must be
  * freed by the caller or aborts without returning.
  *
- * \sa printf(3), xasprintf().
+ * \sa printf(3), \ref xasprintf().
  */
 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
 {
@@ -231,7 +229,7 @@ void freep(void *arg)
  * return \a a without making a copy of \a a.  Otherwise, construct the
  * concatenation \a c, free \a a (but not \a b) and return \a c.
  *
- * \sa strcat(3)
+ * \sa strcat(3).
  */
 __must_check __malloc char *para_strcat(char *a, const char *b)
 {
@@ -296,24 +294,6 @@ __must_check char *para_basename(const char *name)
 	return ret;
 }
 
-/**
- * Cut trailing newline.
- *
- * \param buf The string to be chopped.
- *
- * Replace the last character in \p buf by zero if it is equal to
- * the newline character.
- */
-void chop(char *buf)
-{
-	int n = strlen(buf);
-
-	if (!n)
-		return;
-	if (buf[n - 1] == '\n')
-		buf[n - 1] = '\0';
-}
-
 /**
  * Get the logname of the current user.
  *
@@ -357,19 +337,35 @@ __malloc char *para_hostname(void)
 }
 
 /**
- * Used to distinguish between read-only and read-write mode.
+ * Call a custom function for each complete line.
+ *
+ * \param flags Any combination of flags defined in \ref for_each_line_flags.
+ * \param buf The buffer containing data separated by newlines.
+ * \param size The number of bytes in \a buf.
+ * \param line_handler The custom function.
+ * \param private_data Pointer passed to \a line_handler.
  *
- * \sa for_each_line(), for_each_line_ro().
+ * For each complete line in \p buf, \p line_handler is called. The first
+ * argument to \p line_handler is (a copy of) the current line, and \p
+ * private_data is passed as the second argument.  If the \p FELF_READ_ONLY
+ * flag is unset, a pointer into \a buf is passed to the line handler,
+ * otherwise a pointer to a copy of the current line is passed instead. This
+ * copy is freed immediately after the line handler returns.
+ *
+ * The function returns if \p line_handler returns a negative value or no more
+ * lines are in the buffer.  The rest of the buffer (last chunk containing an
+ * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
+ * unset.
+ *
+ * \return On success this function returns the number of bytes not handled to
+ * \p line_handler. The only possible error is a negative return value from the
+ * line handler. In this case processing stops and the return value of the line
+ * handler is returned to indicate failure.
+ *
+ * \sa \ref for_each_line_flags.
  */
-enum for_each_line_modes{
-	/** Activate read-only mode. */
-	LINE_MODE_RO,
-	/** Activate read-write mode. */
-	LINE_MODE_RW
-};
-
-static int for_each_complete_line(enum for_each_line_modes mode, char *buf,
-		size_t size, line_handler_t *line_handler, void *private_data)
+int for_each_line(unsigned flags, char *buf, size_t size,
+		line_handler_t *line_handler, void *private_data)
 {
 	char *start = buf, *end;
 	int ret, i, num_lines = 0;
@@ -380,95 +376,38 @@ static int for_each_complete_line(enum for_each_line_modes mode, char *buf,
 		char *next_cr;
 
 		next_cr = memchr(start, '\n', buf + size - start);
-		next_null = memchr(start, '\0', buf + size - start);
+		next_null = memchr(start, '\0', next_cr?
+			next_cr - start : buf + size - start);
 		if (!next_cr && !next_null)
 			break;
-		if (next_cr && next_null) {
-			end = next_cr < next_null? next_cr : next_null;
-		} else if (next_null) {
+		if (next_null)
 			end = next_null;
-		} else
+		else
 			end = next_cr;
 		num_lines++;
-		if (!line_handler) {
-			start = ++end;
-			continue;
-		}
-		if (mode == LINE_MODE_RO) {
-			size_t s = end - start;
-			char *b = para_malloc(s + 1);
-			memcpy(b, start, s);
-			b[s] = '\0';
-//			PARA_NOTICE_LOG("b: %s, start: %s\n", b, start);
-			ret = line_handler(b, private_data);
-			free(b);
-		} else {
-			*end = '\0';
-			ret = line_handler(start, private_data);
+		if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
+			if (flags & FELF_READ_ONLY) {
+				size_t s = end - start;
+				char *b = para_malloc(s + 1);
+				memcpy(b, start, s);
+				b[s] = '\0';
+				ret = line_handler(b, private_data);
+				free(b);
+			} else {
+				*end = '\0';
+				ret = line_handler(start, private_data);
+			}
+			if (ret < 0)
+				return ret;
 		}
-		if (ret < 0)
-			return ret;
 		start = ++end;
 	}
-	if (!line_handler || mode == LINE_MODE_RO)
-		return num_lines;
 	i = buf + size - start;
-	if (i && i != size)
+	if (i && i != size && !(flags & FELF_READ_ONLY))
 		memmove(buf, start, i);
 	return i;
 }
 
-/**
- * Call a custom function for each complete line.
- *
- * \param buf The buffer containing data separated by newlines.
- * \param size The number of bytes in \a buf.
- * \param line_handler The custom function.
- * \param private_data Pointer passed to \a line_handler.
- *
- * If \p line_handler is \p NULL, the function returns the number of complete
- * lines in \p buf.  Otherwise, \p line_handler is called for each complete
- * line in \p buf.  The first argument to \p line_handler is the current line,
- * and \p private_data is passed as the second argument.  The function returns
- * if \p line_handler returns a negative value or no more lines are in the
- * buffer.  The rest of the buffer (last chunk containing an incomplete line)
- * is moved to the beginning of the buffer.
- *
- * \return If \p line_handler is not \p NULL, this function returns the number
- * of bytes not handled to \p line_handler on success, or the negative return
- * value of the \p line_handler on errors.
- *
- * \sa for_each_line_ro().
- */
-int for_each_line(char *buf, size_t size, line_handler_t *line_handler,
-		void *private_data)
-{
-	return for_each_complete_line(LINE_MODE_RW, buf, size, line_handler,
-		private_data);
-}
-
-/**
- * Call a custom function for each complete line.
- *
- * \param buf Same meaning as in \p for_each_line().
- * \param size Same meaning as in \p for_each_line().
- * \param line_handler Same meaning as in \p for_each_line().
- * \param private_data Same meaning as in \p for_each_line().
- *
- * This function behaves like \p for_each_line(), but \a buf is left unchanged.
- *
- * \return On success, the function returns the number of complete lines in \p
- * buf, otherwise the (negative) return value of \p line_handler is returned.
- *
- * \sa for_each_line().
- */
-int for_each_line_ro(char *buf, size_t size, line_handler_t *line_handler,
-		void *private_data)
-{
-	return for_each_complete_line(LINE_MODE_RO, buf, size, line_handler,
-		private_data);
-}
-
 /** Return the hex characters of the lower 4 bits. */
 #define hex(a) (hexchar[(a) & 15])
 
@@ -608,7 +547,7 @@ __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
  *
  * \return Standard.
  *
- * \sa para_atoi32(), strtol(3), atoi(3).
+ * \sa \ref para_atoi32(), strtol(3), atoi(3).
  */
 int para_atoi64(const char *str, int64_t *value)
 {
@@ -619,10 +558,18 @@ int para_atoi64(const char *str, int64_t *value)
 	tmp = strtoll(str, &endptr, 10);
 	if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
 		return -E_ATOI_OVERFLOW;
-	if (errno != 0 && tmp == 0) /* other error */
-		return -E_STRTOLL;
+	/*
+	 * If there were no digits at all, strtoll() stores the original value
+	 * of str in *endptr.
+	 */
 	if (endptr == str)
 		return -E_ATOI_NO_DIGITS;
+	/*
+	 * The implementation may also set errno and return 0 in case no
+	 * conversion was performed.
+	 */
+	if (errno != 0 && tmp == 0)
+		return -E_ATOI_NO_DIGITS;
 	if (*endptr != '\0') /* Further characters after number */
 		return -E_ATOI_JUNK_AT_END;
 	*value = tmp;
@@ -637,7 +584,7 @@ int para_atoi64(const char *str, int64_t *value)
  *
  * \return Standard.
  *
- * \sa para_atoi64().
+ * \sa \ref para_atoi64().
 */
 int para_atoi32(const char *str, int32_t *value)
 {
@@ -682,7 +629,7 @@ int get_loglevel_by_name(const char *txt)
 		return LL_CRIT;
 	if (loglevel_equal(txt, "emerg"))
 		return LL_EMERG;
-	return -1;
+	return -E_BAD_LL;
 }
 
 static int get_next_word(const char *buf, const char *delim, char **word)
@@ -855,15 +802,18 @@ err:
  * Split a buffer into words.
  *
  * This parser honors single and double quotes, backslash-escaped characters
- * and special characters like \p \\n. The result contains pointers to copies
- * of the words contained in \a buf and has to be freed by using \ref
- * free_argv().
+ * and special characters like \\n. The result contains pointers to copies of
+ * the words contained in buf and has to be freed by using \ref free_argv().
  *
  * \param buf The buffer to be split.
  * \param delim Each character in this string is treated as a separator.
  * \param result The array of words is returned here.
  *
- * \return Number of words in \a buf, negative on errors.
+ * It's OK to pass NULL as the buffer argument. This is equivalent to passing
+ * the empty string.
+ *
+ * \return Number of words in buf, negative on errors. The array returned
+ * through the result pointer is NULL terminated.
  */
 int create_argv(const char *buf, const char *delim, char ***result)
 {
@@ -1004,36 +954,24 @@ static bool utf8_mode(void)
 	return have_utf8;
 }
 
-/*
- * glibc's wcswidth returns -1 if the string contains a tab character, which
- * makes the function next to useless. The two functions below are taken from
- * mutt.
- */
-
-#define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0)
-
-static int mutt_wcwidth(wchar_t wc, size_t pos)
+static int xwcwidth(wchar_t wc, size_t pos)
 {
 	int n;
 
+	/* special-case for tab */
 	if (wc == 0x09) /* tab */
 		return (pos | 7) + 1 - pos;
 	n = wcwidth(wc);
-	if (IsWPrint(wc) && n > 0)
-		return n;
-	if (!(wc & ~0x7f))
-		return 2;
-	if (!(wc & ~0xffff))
-		return 6;
-	return 10;
+	/* wcswidth() returns -1 for non-printable characters */
+	return n >= 0? n : 1;
 }
 
-static size_t mutt_wcswidth(const wchar_t *s, size_t n)
+static size_t xwcswidth(const wchar_t *s, size_t n)
 {
 	size_t w = 0;
 
 	while (n--)
-		w += mutt_wcwidth(*s++, w);
+		w += xwcwidth(*s++, w);
 	return w;
 }
 
@@ -1078,7 +1016,7 @@ int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
 		if (mbret == (size_t)-1 || mbret == (size_t)-2)
 			return -ERRNO_TO_PARA_ERROR(EILSEQ);
 		bytes_parsed += mbret;
-		cells_skipped += mutt_wcwidth(wc, cells_skipped);
+		cells_skipped += xwcwidth(wc, cells_skipped);
 	}
 	*bytes_to_skip = bytes_parsed;
 	return 1;
@@ -1122,12 +1060,75 @@ __must_check int strwidth(const char *s, size_t *result)
 		return -ERRNO_TO_PARA_ERROR(errno);
 	if (num_wchars == 0)
 		return 0;
-	dest = para_malloc(num_wchars * sizeof(*dest));
+	dest = para_malloc((num_wchars + 1) * sizeof(*dest));
 	src = s;
 	memset(&state, 0, sizeof(state));
 	num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
 	assert(num_wchars > 0 && num_wchars != (size_t)-1);
-	*result = mutt_wcswidth(dest, num_wchars);
+	*result = xwcswidth(dest, num_wchars);
 	free(dest);
 	return 1;
 }
+
+/**
+ * Truncate and sanitize a (wide character) string.
+ *
+ * This replaces all non-printable characters by spaces and makes sure that the
+ * modified string does not exceed the given maximal width.
+ *
+ * \param src The source string in multi-byte form.
+ * \param max_width The maximal number of cells the result may occupy.
+ * \param result Sanitized multi-byte string, must be freed by caller.
+ * \param width The width of the sanitized string, always <= max_width.
+ *
+ * The function is wide-character aware but falls back to C strings for
+ * non-UTF-8 locales.
+ *
+ * \return Standard. On success, *result points to a sanitized copy of the
+ * given string. This copy was allocated with malloc() and should hence be
+ * freed when the caller is no longer interested in the result.
+ *
+ * The function fails if the given string contains an invalid multibyte
+ * sequence. In this case, *result is set to NULL, and *width to zero.
+ */
+__must_check int sanitize_str(const char *src, size_t max_width,
+		char **result, size_t *width)
+{
+	mbstate_t state;
+	static wchar_t *wcs;
+	size_t num_wchars, n;
+
+	if (!utf8_mode()) {
+		*result = para_strdup(src);
+		/* replace non-printable characters by spaces */
+		for (n = 0; n < max_width && src[n]; n++) {
+			if (!isprint((unsigned char)src[n]))
+				(*result)[n] = ' ';
+		}
+		(*result)[n] = '\0';
+		*width = n;
+		return 0;
+	}
+	*result = NULL;
+	*width = 0;
+	memset(&state, 0, sizeof(state));
+	num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+	if (num_wchars == (size_t)-1)
+		return -ERRNO_TO_PARA_ERROR(errno);
+	wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
+	memset(&state, 0, sizeof(state));
+	num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
+	assert(num_wchars != (size_t)-1);
+	for (n = 0; n < num_wchars && *width < max_width; n++) {
+		if (!iswprint(wcs[n]))
+			wcs[n] = L' ';
+		*width += xwcwidth(wcs[n], *width);
+	}
+	wcs[n] = L'\0';
+	n = wcstombs(NULL, wcs, 0) + 1;
+	*result = para_malloc(n);
+	num_wchars = wcstombs(*result, wcs, n);
+	assert(num_wchars != (size_t)-1);
+	free(wcs);
+	return 1;
+}