From: Andre Noll Date: Mon, 29 Feb 2016 21:10:05 +0000 (+0100) Subject: Introduce sanitize_str(). X-Git-Tag: v0.5.6~23^2~8 X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=commitdiff_plain;h=8daabd03bb649ae04d6b2dd9e46640ca1e514d3d Introduce sanitize_str(). Currently we sanitize the status item strings for para_gui in align_str() of gui.c. This implementation is flawed because it does not work for wide character strings. This patch provides an abstraction in string.c which works in both the UFT-8 and non-UTF-8 case. The flawed implementation of this function in gui.c is removed and the surrounding code is changed to call the new function instead. --- diff --git a/gui.c b/gui.c index 9d96e70f..14214890 100644 --- a/gui.c +++ b/gui.c @@ -264,41 +264,35 @@ static void add_spaces(WINDOW* win, unsigned int num) * print aligned string to curses window. This function always prints * exactly len chars. */ -static int align_str(WINDOW* win, char *str, unsigned int len, +static int align_str(WINDOW* win, const char *str, unsigned int len, unsigned int align) { - int ret, i, num; /* of spaces */ + int ret, num; /* of spaces */ size_t width; + char *sstr; /* sanitized string */ if (!win || !str) return 0; - ret = strwidth(str, &width); + ret = sanitize_str(str, len, &sstr, &width); if (ret < 0) { PARA_ERROR_LOG("%s\n", para_strerror(-ret)); width = 0; - str[0] = '\0'; + sstr = para_strdup(NULL); } + assert(width <= len); num = len - width; - if (num < 0) { - str[len] = '\0'; - num = 0; - } - /* replace control characters by spaces */ - for (i = 0; i < len && str[i]; i++) { - if (str[i] == '\n' || str[i] == '\r' || str[i] == '\f') - str[i] = ' '; - } if (align == LEFT) { - waddstr(win, str); + waddstr(win, sstr); add_spaces(win, num); } else if (align == RIGHT) { add_spaces(win, num); - waddstr(win, str); + waddstr(win, sstr); } else { add_spaces(win, num / 2); - waddstr(win, str); + waddstr(win, sstr); add_spaces(win, num - num / 2); } + free(sstr); return 1; } diff --git a/string.c b/string.c index d9dcc62d..e731bb49 100644 --- a/string.c +++ b/string.c @@ -1070,3 +1070,66 @@ __must_check int strwidth(const char *s, size_t *result) free(dest); return 1; } + +/** + * Truncate and sanitize a (wide character) string. + * + * This replaces all non-printable characters by spaces and makes sure that the + * modified string does not exceed the given maximal width. + * + * \param src The source string in multi-byte form. + * \param max_width The maximal number of cells the result may occupy. + * \param result Sanitized multi-byte string, must be freed by caller. + * \param width The width of the sanitized string, always <= max_width. + * + * The function is wide-character aware but falls back to C strings for + * non-UTF-8 locales. + * + * \return Standard. On success, *result points to a sanitized copy of the + * given string. This copy was allocated with malloc() and should hence be + * freed when the caller is no longer interested in the result. + * + * The function fails if the given string contains an invalid multibyte + * sequence. In this case, *result is set to NULL, and *width to zero. + */ +__must_check int sanitize_str(const char *src, size_t max_width, + char **result, size_t *width) +{ + mbstate_t state; + static wchar_t *wcs; + size_t num_wchars, n; + + if (!utf8_mode()) { + *result = para_strdup(src); + /* replace non-printable characters by spaces */ + for (n = 0; n < max_width && src[n]; n++) { + if (!isprint((unsigned char)src[n])) + (*result)[n] = ' '; + } + (*result)[n] = '\0'; + *width = n; + return 0; + } + *result = NULL; + *width = 0; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + wcs = para_malloc((num_wchars + 1) * sizeof(*wcs)); + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state); + assert(num_wchars != (size_t)-1); + for (n = 0; n < num_wchars && *width < max_width; n++) { + if (!iswprint(wcs[n])) + wcs[n] = L' '; + *width += xwcwidth(wcs[n], *width); + } + wcs[n] = L'\0'; + n = wcstombs(NULL, wcs, 0) + 1; + *result = para_malloc(n); + num_wchars = wcstombs(*result, wcs, n); + assert(num_wchars != (size_t)-1); + free(wcs); + return 1; +} diff --git a/string.h b/string.h index 61bb7c25..aa8292fd 100644 --- a/string.h +++ b/string.h @@ -101,3 +101,5 @@ char *safe_strdup(const char *src, size_t len); char *key_value_copy(const char *src, size_t len, const char *key); int skip_cells(const char *s, size_t cells_to_skip, size_t *result); __must_check int strwidth(const char *s, size_t *result); +__must_check int sanitize_str(const char *src, size_t max_width, + char **result, size_t *width);