X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=dfcfa2cdf6eb776c63afdaec981aed55147a31b1;hp=8c8c50ac6ef30e51ec86751837e44e0e2a4ef95f;hb=7007aedb78262af262e7e7db8d010c6498e79290;hpb=9de1287d67c9562e9140c6dc7deb0c01c4e10cc0 diff --git a/string.c b/string.c index 8c8c50ac..dfcfa2cd 100644 --- a/string.c +++ b/string.c @@ -1,17 +1,23 @@ /* - * Copyright (C) 2004-2012 Andre Noll + * Copyright (C) 2004-2013 Andre Noll * * Licensed under the GPL v2. For licencing details see COPYING. */ /** \file string.c Memory allocation and string handling functions. */ -#include /* gettimeofday */ +#define _GNU_SOURCE + #include #include /* uname() */ + #include #include +#include +#include +#include + #include "para.h" #include "string.h" #include "error.h" @@ -114,6 +120,65 @@ __must_check __malloc char *para_strdup(const char *s) exit(EXIT_FAILURE); } +/** + * Print a formated message to a dynamically allocated string. + * + * \param result The formated string is returned here. + * \param fmt The format string. + * \param ap Initialized list of arguments. + * + * This function is similar to vasprintf(), a GNU extension which is not in C + * or POSIX. It allocates a string large enough to hold the output including + * the terminating null byte. The allocated string is returned via the first + * argument and must be freed by the caller. However, unlike vasprintf(), this + * function calls exit() if insufficient memory is available, while vasprintf() + * returns -1 in this case. + * + * \return Number of bytes written, not including the terminating \p NULL + * character. + * + * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf(). + */ +__printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap) +{ + int ret; + size_t size; + va_list aq; + + va_copy(aq, ap); + ret = vsnprintf(NULL, 0, fmt, aq); + va_end(aq); + assert(ret >= 0); + size = ret + 1; + *result = para_malloc(size); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0 && ret < size); + return ret; +} + +/** + * Print to a dynamically allocated string, variable number of arguments. + * + * \param result See \ref xvasprintf(). + * \param fmt Usual format string. + * + * \return The return value of the underlying call to \ref xvasprintf(). + * + * \sa \ref xvasprintf() and the references mentioned there. + */ +__printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...) +{ + va_list ap; + unsigned ret; + + va_start(ap, fmt); + ret = xvasprintf(result, fmt, ap); + va_end(ap); + return ret; +} + /** * Allocate a sufficiently large string and print into it. * @@ -125,13 +190,16 @@ __must_check __malloc char *para_strdup(const char *s) * \return This function either returns a pointer to a string that must be * freed by the caller or aborts without returning. * - * \sa printf(3). + * \sa printf(3), xasprintf(). */ __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...) { char *msg; + va_list ap; - PARA_VSPRINTF(fmt, msg); + va_start(ap, fmt); + xvasprintf(&msg, fmt, ap); + va_end(ap); return msg; } @@ -738,7 +806,7 @@ int compute_word_num(const char *buf, const char *delim, int point) } /** - * Free an array of words created by create_argv(). + * Free an array of words created by create_argv() or create_shifted_argv(). * * \param argv A pointer previously obtained by \ref create_argv(). */ @@ -753,6 +821,35 @@ void free_argv(char **argv) free(argv); } +static int create_argv_offset(int offset, const char *buf, const char *delim, + char ***result) +{ + char *word, **argv = para_malloc((offset + 1) * sizeof(char *)); + const char *p; + int i, ret; + + for (i = 0; i < offset; i++) + argv[i] = NULL; + for (p = buf; p && *p; p += ret, i++) { + ret = get_next_word(p, delim, &word); + if (ret < 0) + goto err; + if (!ret) + break; + argv = para_realloc(argv, (i + 2) * sizeof(char*)); + argv[i] = word; + } + argv[i] = NULL; + *result = argv; + return i; +err: + while (i > 0) + free(argv[--i]); + free(argv); + *result = NULL; + return ret; +} + /** * Split a buffer into words. * @@ -769,28 +866,47 @@ void free_argv(char **argv) */ int create_argv(const char *buf, const char *delim, char ***result) { - char *word, **argv = para_malloc(2 * sizeof(char *)); - const char *p; - int ret, num_words; + return create_argv_offset(0, buf, delim, result); +} - for (p = buf, num_words = 0; ; p += ret, num_words++) { - ret = get_next_word(p, delim, &word); - if (ret < 0) - goto err; - if (!ret) - break; - argv = para_realloc(argv, (num_words + 2) * sizeof(char*)); - argv[num_words] = word; - } - argv[num_words] = NULL; - *result = argv; - return num_words; -err: - while (num_words > 0) - free(argv[--num_words]); - free(argv); - *result = NULL; - return ret; +/** + * Split a buffer into words, offset one. + * + * This is similar to \ref create_argv() but the returned array is one element + * larger, words start at index one and element zero is initialized to \p NULL. + * Callers must set element zero to a non-NULL value before calling free_argv() + * on the returned array to avoid a memory leak. + * + * \param buf See \ref create_argv(). + * \param delim See \ref create_argv(). + * \param result See \ref create_argv(). + * + * \return Number of words plus one on success, negative on errors. + */ +int create_shifted_argv(const char *buf, const char *delim, char ***result) +{ + return create_argv_offset(1, buf, delim, result); +} + +/** + * Find out if the given string is contained in the arg vector. + * + * \param arg The string to look for. + * \param argv The array to search. + * + * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if + * arg was not found in \a argv. + */ +int find_arg(const char *arg, char **argv) +{ + int i; + + if (!argv) + return -E_ARG_NOT_FOUND; + for (i = 0; argv[i]; i++) + if (strcmp(arg, argv[i]) == 0) + return i; + return -E_ARG_NOT_FOUND; } /** @@ -872,3 +988,145 @@ char *key_value_copy(const char *src, size_t len, const char *key) return NULL; return safe_strdup(src + keylen + 1, len - keylen - 1); } + +static bool utf8_mode(void) +{ + static bool initialized, have_utf8; + + if (!initialized) { + char *info = nl_langinfo(CODESET); + have_utf8 = (info && strcmp(info, "UTF-8") == 0); + initialized = true; + PARA_INFO_LOG("%susing UTF-8 character encoding\n", + have_utf8? "" : "not "); + } + return have_utf8; +} + +/* + * glibc's wcswidth returns -1 if the string contains a tab character, which + * makes the function next to useless. The two functions below are taken from + * mutt. + */ + +#define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0) + +static int mutt_wcwidth(wchar_t wc, size_t pos) +{ + int n; + + if (wc == 0x09) /* tab */ + return (pos | 7) + 1 - pos; + n = wcwidth(wc); + if (IsWPrint(wc) && n > 0) + return n; + if (!(wc & ~0x7f)) + return 2; + if (!(wc & ~0xffff)) + return 6; + return 10; +} + +static size_t mutt_wcswidth(const wchar_t *s, size_t n) +{ + size_t w = 0; + + while (n--) + w += mutt_wcwidth(*s++, w); + return w; +} + +/** + * Skip a given number of cells at the beginning of a string. + * + * \param s The input string. + * \param cells_to_skip Desired number of cells that should be skipped. + * \param bytes_to_skip Result. + * + * This function computes how many input bytes must be skipped to advance a + * string by the given width. If the current character encoding is not UTF-8, + * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise, + * \a s is treated as a multibyte string and on successful return, \a s + + * bytes_to_skip points to the start of a multibyte string such that the total + * width of the multibyte characters that are skipped by advancing \a s that + * many bytes equals at least \a cells_to_skip. + * + * \return Standard. + */ +int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip) +{ + wchar_t wc; + mbstate_t ps; + size_t n, bytes_parsed, cells_skipped; + + *bytes_to_skip = 0; + if (cells_to_skip == 0) + return 0; + if (!utf8_mode()) { + *bytes_to_skip = cells_to_skip; + return 0; + } + bytes_parsed = cells_skipped = 0; + memset(&ps, 0, sizeof(ps)); + n = strlen(s); + while (cells_to_skip > cells_skipped) { + size_t mbret; + + mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps); + assert(mbret != 0); + if (mbret == (size_t)-1 || mbret == (size_t)-2) + return -ERRNO_TO_PARA_ERROR(EILSEQ); + bytes_parsed += mbret; + cells_skipped += mutt_wcwidth(wc, cells_skipped); + } + *bytes_to_skip = bytes_parsed; + return 1; +} + +/** + * Compute the width of an UTF-8 string. + * + * \param s The string. + * \param result The width of \a s is returned here. + * + * If not in UTF8-mode. this function is just a wrapper for strlen(3). + * Otherwise \a s is treated as an UTF-8 string and its display width is + * computed. Note that this function may fail if the underlying call to + * mbsrtowcs(3) fails, so the caller must check the return value. + * + * \sa nl_langinfo(3), wcswidth(3). + * + * \return Standard. + */ +__must_check int strwidth(const char *s, size_t *result) +{ + const char *src = s; + mbstate_t state; + static wchar_t *dest; + size_t num_wchars; + + /* + * Never call any log function here. This may result in an endless loop + * as para_gui's para_log() calls this function. + */ + + if (!utf8_mode()) { + *result = strlen(s); + return 0; + } + memset(&state, 0, sizeof(state)); + *result = 0; + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + if (num_wchars == 0) + return 0; + dest = para_malloc(num_wchars * sizeof(*dest)); + src = s; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(dest, &src, num_wchars, &state); + assert(num_wchars > 0 && num_wchars != (size_t)-1); + *result = mutt_wcswidth(dest, num_wchars); + free(dest); + return 1; +}