X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=6033a008dbf154953de673f85048eb78812d1722;hp=d9dcc62d2ea45efc6a3c2899185a2033d5e74339;hb=534a94f441767947874cb15d18211edf758e9277;hpb=5971e3303cf800603622c2475e07fdccc33e4915 diff --git a/string.c b/string.c index d9dcc62d..6033a008 100644 --- a/string.c +++ b/string.c @@ -6,19 +6,16 @@ /** \file string.c Memory allocation and string handling functions. */ -#define _GNU_SOURCE +#include "para.h" #include #include /* uname() */ - #include #include - #include #include #include -#include "para.h" #include "string.h" #include "error.h" @@ -806,15 +803,18 @@ err: * Split a buffer into words. * * This parser honors single and double quotes, backslash-escaped characters - * and special characters like \p \\n. The result contains pointers to copies - * of the words contained in \a buf and has to be freed by using \ref - * free_argv(). + * and special characters like \\n. The result contains pointers to copies of + * the words contained in buf and has to be freed by using \ref free_argv(). * * \param buf The buffer to be split. * \param delim Each character in this string is treated as a separator. * \param result The array of words is returned here. * - * \return Number of words in \a buf, negative on errors. + * It's OK to pass NULL as the buffer argument. This is equivalent to passing + * the empty string. + * + * \return Number of words in buf, negative on errors. The array returned + * through the result pointer is NULL terminated. */ int create_argv(const char *buf, const char *delim, char ***result) { @@ -1070,3 +1070,66 @@ __must_check int strwidth(const char *s, size_t *result) free(dest); return 1; } + +/** + * Truncate and sanitize a (wide character) string. + * + * This replaces all non-printable characters by spaces and makes sure that the + * modified string does not exceed the given maximal width. + * + * \param src The source string in multi-byte form. + * \param max_width The maximal number of cells the result may occupy. + * \param result Sanitized multi-byte string, must be freed by caller. + * \param width The width of the sanitized string, always <= max_width. + * + * The function is wide-character aware but falls back to C strings for + * non-UTF-8 locales. + * + * \return Standard. On success, *result points to a sanitized copy of the + * given string. This copy was allocated with malloc() and should hence be + * freed when the caller is no longer interested in the result. + * + * The function fails if the given string contains an invalid multibyte + * sequence. In this case, *result is set to NULL, and *width to zero. + */ +__must_check int sanitize_str(const char *src, size_t max_width, + char **result, size_t *width) +{ + mbstate_t state; + static wchar_t *wcs; + size_t num_wchars, n; + + if (!utf8_mode()) { + *result = para_strdup(src); + /* replace non-printable characters by spaces */ + for (n = 0; n < max_width && src[n]; n++) { + if (!isprint((unsigned char)src[n])) + (*result)[n] = ' '; + } + (*result)[n] = '\0'; + *width = n; + return 0; + } + *result = NULL; + *width = 0; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + wcs = para_malloc((num_wchars + 1) * sizeof(*wcs)); + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state); + assert(num_wchars != (size_t)-1); + for (n = 0; n < num_wchars && *width < max_width; n++) { + if (!iswprint(wcs[n])) + wcs[n] = L' '; + *width += xwcwidth(wcs[n], *width); + } + wcs[n] = L'\0'; + n = wcstombs(NULL, wcs, 0) + 1; + *result = para_malloc(n); + num_wchars = wcstombs(*result, wcs, n); + assert(num_wchars != (size_t)-1); + free(wcs); + return 1; +}