+
+static bool utf8_mode(void)
+{
+ static bool initialized, have_utf8;
+
+ if (!initialized) {
+ char *info = nl_langinfo(CODESET);
+ have_utf8 = (info && strcmp(info, "UTF-8") == 0);
+ initialized = true;
+ PARA_INFO_LOG("%susing UTF-8 character encoding\n",
+ have_utf8? "" : "not ");
+ }
+ return have_utf8;
+}
+
+static int xwcwidth(wchar_t wc, size_t pos)
+{
+ int n;
+
+ /* special-case for tab */
+ if (wc == 0x09) /* tab */
+ return (pos | 7) + 1 - pos;
+ n = wcwidth(wc);
+ /* wcswidth() returns -1 for non-printable characters */
+ return n >= 0? n : 1;
+}
+
+static size_t xwcswidth(const wchar_t *s, size_t n)
+{
+ size_t w = 0;
+
+ while (n--)
+ w += xwcwidth(*s++, w);
+ return w;
+}
+
+/**
+ * Skip a given number of cells at the beginning of a string.
+ *
+ * \param s The input string.
+ * \param cells_to_skip Desired number of cells that should be skipped.
+ * \param bytes_to_skip Result.
+ *
+ * This function computes how many input bytes must be skipped to advance a
+ * string by the given width. If the current character encoding is not UTF-8,
+ * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
+ * \a s is treated as a multibyte string and on successful return, \a s +
+ * bytes_to_skip points to the start of a multibyte string such that the total
+ * width of the multibyte characters that are skipped by advancing \a s that
+ * many bytes equals at least \a cells_to_skip.
+ *
+ * \return Standard.
+ */
+int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
+{
+ wchar_t wc;
+ mbstate_t ps;
+ size_t n, bytes_parsed, cells_skipped;
+
+ *bytes_to_skip = 0;
+ if (cells_to_skip == 0)
+ return 0;
+ if (!utf8_mode()) {
+ *bytes_to_skip = cells_to_skip;
+ return 0;
+ }
+ bytes_parsed = cells_skipped = 0;
+ memset(&ps, 0, sizeof(ps));
+ n = strlen(s);
+ while (cells_to_skip > cells_skipped) {
+ size_t mbret;
+
+ mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
+ assert(mbret != 0);
+ if (mbret == (size_t)-1 || mbret == (size_t)-2)
+ return -ERRNO_TO_PARA_ERROR(EILSEQ);
+ bytes_parsed += mbret;
+ cells_skipped += xwcwidth(wc, cells_skipped);
+ }
+ *bytes_to_skip = bytes_parsed;
+ return 1;
+}
+
+/**
+ * Compute the width of an UTF-8 string.
+ *
+ * \param s The string.
+ * \param result The width of \a s is returned here.
+ *
+ * If not in UTF8-mode. this function is just a wrapper for strlen(3).
+ * Otherwise \a s is treated as an UTF-8 string and its display width is
+ * computed. Note that this function may fail if the underlying call to
+ * mbsrtowcs(3) fails, so the caller must check the return value.
+ *
+ * \sa nl_langinfo(3), wcswidth(3).
+ *
+ * \return Standard.
+ */
+__must_check int strwidth(const char *s, size_t *result)
+{
+ const char *src = s;
+ mbstate_t state;
+ static wchar_t *dest;
+ size_t num_wchars;
+
+ /*
+ * Never call any log function here. This may result in an endless loop
+ * as para_gui's para_log() calls this function.
+ */
+
+ if (!utf8_mode()) {
+ *result = strlen(s);
+ return 0;
+ }
+ memset(&state, 0, sizeof(state));
+ *result = 0;
+ num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+ if (num_wchars == (size_t)-1)
+ return -ERRNO_TO_PARA_ERROR(errno);
+ if (num_wchars == 0)
+ return 0;
+ dest = para_malloc((num_wchars + 1) * sizeof(*dest));
+ src = s;
+ memset(&state, 0, sizeof(state));
+ num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
+ assert(num_wchars > 0 && num_wchars != (size_t)-1);
+ *result = xwcswidth(dest, num_wchars);
+ free(dest);
+ return 1;
+}
+
+/**
+ * Truncate and sanitize a (wide character) string.
+ *
+ * This replaces all non-printable characters by spaces and makes sure that the
+ * modified string does not exceed the given maximal width.
+ *
+ * \param src The source string in multi-byte form.
+ * \param max_width The maximal number of cells the result may occupy.
+ * \param result Sanitized multi-byte string, must be freed by caller.
+ * \param width The width of the sanitized string, always <= max_width.
+ *
+ * The function is wide-character aware but falls back to C strings for
+ * non-UTF-8 locales.
+ *
+ * \return Standard. On success, *result points to a sanitized copy of the
+ * given string. This copy was allocated with malloc() and should hence be
+ * freed when the caller is no longer interested in the result.
+ *
+ * The function fails if the given string contains an invalid multibyte
+ * sequence. In this case, *result is set to NULL, and *width to zero.
+ */
+__must_check int sanitize_str(const char *src, size_t max_width,
+ char **result, size_t *width)
+{
+ mbstate_t state;
+ static wchar_t *wcs;
+ size_t num_wchars, n;
+
+ if (!utf8_mode()) {
+ *result = para_strdup(src);
+ /* replace non-printable characters by spaces */
+ for (n = 0; n < max_width && src[n]; n++) {
+ if (!isprint((unsigned char)src[n]))
+ (*result)[n] = ' ';
+ }
+ (*result)[n] = '\0';
+ *width = n;
+ return 0;
+ }
+ *result = NULL;
+ *width = 0;
+ memset(&state, 0, sizeof(state));
+ num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+ if (num_wchars == (size_t)-1)
+ return -ERRNO_TO_PARA_ERROR(errno);
+ wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
+ memset(&state, 0, sizeof(state));
+ num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
+ assert(num_wchars != (size_t)-1);
+ for (n = 0; n < num_wchars && *width < max_width; n++) {
+ if (!iswprint(wcs[n]))
+ wcs[n] = L' ';
+ *width += xwcwidth(wcs[n], *width);
+ }
+ wcs[n] = L'\0';
+ n = wcstombs(NULL, wcs, 0) + 1;
+ *result = para_malloc(n);
+ num_wchars = wcstombs(*result, wcs, n);
+ assert(num_wchars != (size_t)-1);
+ free(wcs);
+ return 1;
+}