X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=dfcfa2cdf6eb776c63afdaec981aed55147a31b1;hp=7123ba1ae5e00dfc13e1417bfbafd102c3946175;hb=7007aedb78262af262e7e7db8d010c6498e79290;hpb=02dd632ab2a6696aff7b6c6d108069704cfe871a

diff --git a/string.c b/string.c
index 7123ba1a..dfcfa2cd 100644
--- a/string.c
+++ b/string.c
@@ -1,17 +1,23 @@
 /*
- * Copyright (C) 2004-2012 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2004-2013 Andre Noll <maan@systemlinux.org>
  *
  * Licensed under the GPL v2. For licencing details see COPYING.
  */
 
 /** \file string.c Memory allocation and string handling functions. */
 
-#include <sys/time.h> /* gettimeofday */
+#define _GNU_SOURCE
+
 #include <pwd.h>
 #include <sys/utsname.h> /* uname() */
+
 #include <string.h>
 #include <regex.h>
 
+#include <langinfo.h>
+#include <wchar.h>
+#include <wctype.h>
+
 #include "para.h"
 #include "string.h"
 #include "error.h"
@@ -800,7 +806,7 @@ int compute_word_num(const char *buf, const char *delim, int point)
 }
 
 /**
- * Free an array of words created by create_argv().
+ * Free an array of words created by create_argv() or create_shifted_argv().
  *
  * \param argv A pointer previously obtained by \ref create_argv().
  */
@@ -815,6 +821,35 @@ void free_argv(char **argv)
 	free(argv);
 }
 
+static int create_argv_offset(int offset, const char *buf, const char *delim,
+		char ***result)
+{
+	char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
+	const char *p;
+	int i, ret;
+
+	for (i = 0; i < offset; i++)
+		argv[i] = NULL;
+	for (p = buf; p && *p; p += ret, i++) {
+		ret = get_next_word(p, delim, &word);
+		if (ret < 0)
+			goto err;
+		if (!ret)
+			break;
+		argv = para_realloc(argv, (i + 2) * sizeof(char*));
+		argv[i] = word;
+	}
+	argv[i] = NULL;
+	*result = argv;
+	return i;
+err:
+	while (i > 0)
+		free(argv[--i]);
+	free(argv);
+	*result = NULL;
+	return ret;
+}
+
 /**
  * Split a buffer into words.
  *
@@ -831,28 +866,26 @@ void free_argv(char **argv)
  */
 int create_argv(const char *buf, const char *delim, char ***result)
 {
-	char *word, **argv = para_malloc(2 * sizeof(char *));
-	const char *p;
-	int ret, num_words;
+	return create_argv_offset(0, buf, delim, result);
+}
 
-	for (p = buf, num_words = 0; ; p += ret, num_words++) {
-		ret = get_next_word(p, delim, &word);
-		if (ret < 0)
-			goto err;
-		if (!ret)
-			break;
-		argv = para_realloc(argv, (num_words + 2) * sizeof(char*));
-		argv[num_words] = word;
-	}
-	argv[num_words] = NULL;
-	*result = argv;
-	return num_words;
-err:
-	while (num_words > 0)
-		free(argv[--num_words]);
-	free(argv);
-	*result = NULL;
-	return ret;
+/**
+ * Split a buffer into words, offset one.
+ *
+ * This is similar to \ref create_argv() but the returned array is one element
+ * larger, words start at index one and element zero is initialized to \p NULL.
+ * Callers must set element zero to a non-NULL value before calling free_argv()
+ * on the returned array to avoid a memory leak.
+ *
+ * \param buf See \ref create_argv().
+ * \param delim See \ref create_argv().
+ * \param result See \ref create_argv().
+ *
+ * \return Number of words plus one on success, negative on errors.
+ */
+int create_shifted_argv(const char *buf, const char *delim, char ***result)
+{
+	return create_argv_offset(1, buf, delim, result);
 }
 
 /**
@@ -955,3 +988,145 @@ char *key_value_copy(const char *src, size_t len, const char *key)
 		return NULL;
 	return safe_strdup(src + keylen + 1, len - keylen - 1);
 }
+
+static bool utf8_mode(void)
+{
+	static bool initialized, have_utf8;
+
+	if (!initialized) {
+		char *info = nl_langinfo(CODESET);
+		have_utf8 = (info && strcmp(info, "UTF-8") == 0);
+		initialized = true;
+		PARA_INFO_LOG("%susing UTF-8 character encoding\n",
+			have_utf8? "" : "not ");
+	}
+	return have_utf8;
+}
+
+/*
+ * glibc's wcswidth returns -1 if the string contains a tab character, which
+ * makes the function next to useless. The two functions below are taken from
+ * mutt.
+ */
+
+#define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0)
+
+static int mutt_wcwidth(wchar_t wc, size_t pos)
+{
+	int n;
+
+	if (wc == 0x09) /* tab */
+		return (pos | 7) + 1 - pos;
+	n = wcwidth(wc);
+	if (IsWPrint(wc) && n > 0)
+		return n;
+	if (!(wc & ~0x7f))
+		return 2;
+	if (!(wc & ~0xffff))
+		return 6;
+	return 10;
+}
+
+static size_t mutt_wcswidth(const wchar_t *s, size_t n)
+{
+	size_t w = 0;
+
+	while (n--)
+		w += mutt_wcwidth(*s++, w);
+	return w;
+}
+
+/**
+ * Skip a given number of cells at the beginning of a string.
+ *
+ * \param s The input string.
+ * \param cells_to_skip Desired number of cells that should be skipped.
+ * \param bytes_to_skip Result.
+ *
+ * This function computes how many input bytes must be skipped to advance a
+ * string by the given width. If the current character encoding is not UTF-8,
+ * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
+ * \a s is treated as a multibyte string and on successful return, \a s +
+ * bytes_to_skip points to the start of a multibyte string such that the total
+ * width of the multibyte characters that are skipped by advancing \a s that
+ * many bytes equals at least \a cells_to_skip.
+ *
+ * \return Standard.
+ */
+int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
+{
+	wchar_t wc;
+	mbstate_t ps;
+	size_t n, bytes_parsed, cells_skipped;
+
+	*bytes_to_skip = 0;
+	if (cells_to_skip == 0)
+		return 0;
+	if (!utf8_mode()) {
+		*bytes_to_skip = cells_to_skip;
+		return 0;
+	}
+	bytes_parsed = cells_skipped = 0;
+	memset(&ps, 0, sizeof(ps));
+	n = strlen(s);
+	while (cells_to_skip > cells_skipped) {
+		size_t mbret;
+
+		mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
+		assert(mbret != 0);
+		if (mbret == (size_t)-1 || mbret == (size_t)-2)
+			return -ERRNO_TO_PARA_ERROR(EILSEQ);
+		bytes_parsed += mbret;
+		cells_skipped += mutt_wcwidth(wc, cells_skipped);
+	}
+	*bytes_to_skip = bytes_parsed;
+	return 1;
+}
+
+/**
+ * Compute the width of an UTF-8 string.
+ *
+ * \param s The string.
+ * \param result The width of \a s is returned here.
+ *
+ * If not in UTF8-mode. this function is just a wrapper for strlen(3).
+ * Otherwise \a s is treated as an UTF-8 string and its display width is
+ * computed. Note that this function may fail if the underlying call to
+ * mbsrtowcs(3) fails, so the caller must check the return value.
+ *
+ * \sa nl_langinfo(3), wcswidth(3).
+ *
+ * \return Standard.
+ */
+__must_check int strwidth(const char *s, size_t *result)
+{
+	const char *src = s;
+	mbstate_t state;
+	static wchar_t *dest;
+	size_t num_wchars;
+
+	/*
+	 * Never call any log function here. This may result in an endless loop
+	 * as para_gui's para_log() calls this function.
+	 */
+
+	if (!utf8_mode()) {
+		*result = strlen(s);
+		return 0;
+	}
+	memset(&state, 0, sizeof(state));
+	*result = 0;
+	num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+	if (num_wchars == (size_t)-1)
+		return -ERRNO_TO_PARA_ERROR(errno);
+	if (num_wchars == 0)
+		return 0;
+	dest = para_malloc(num_wchars * sizeof(*dest));
+	src = s;
+	memset(&state, 0, sizeof(state));
+	num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
+	assert(num_wchars > 0 && num_wchars != (size_t)-1);
+	*result = mutt_wcswidth(dest, num_wchars);
+	free(dest);
+	return 1;
+}