/** \file string.c Memory allocation and string handling functions. */
-#define _GNU_SOURCE
+#include "para.h"
#include <pwd.h>
#include <sys/utsname.h> /* uname() */
-
#include <string.h>
#include <regex.h>
-
#include <langinfo.h>
#include <wchar.h>
#include <wctype.h>
-#include "para.h"
#include "string.h"
#include "error.h"
* Split a buffer into words.
*
* This parser honors single and double quotes, backslash-escaped characters
- * and special characters like \p \\n. The result contains pointers to copies
- * of the words contained in \a buf and has to be freed by using \ref
- * free_argv().
+ * and special characters like \\n. The result contains pointers to copies of
+ * the words contained in buf and has to be freed by using \ref free_argv().
*
* \param buf The buffer to be split.
* \param delim Each character in this string is treated as a separator.
* \param result The array of words is returned here.
*
- * \return Number of words in \a buf, negative on errors.
+ * It's OK to pass NULL as the buffer argument. This is equivalent to passing
+ * the empty string.
+ *
+ * \return Number of words in buf, negative on errors. The array returned
+ * through the result pointer is NULL terminated.
*/
int create_argv(const char *buf, const char *delim, char ***result)
{
free(dest);
return 1;
}
+
+/**
+ * Truncate and sanitize a (wide character) string.
+ *
+ * This replaces all non-printable characters by spaces and makes sure that the
+ * modified string does not exceed the given maximal width.
+ *
+ * \param src The source string in multi-byte form.
+ * \param max_width The maximal number of cells the result may occupy.
+ * \param result Sanitized multi-byte string, must be freed by caller.
+ * \param width The width of the sanitized string, always <= max_width.
+ *
+ * The function is wide-character aware but falls back to C strings for
+ * non-UTF-8 locales.
+ *
+ * \return Standard. On success, *result points to a sanitized copy of the
+ * given string. This copy was allocated with malloc() and should hence be
+ * freed when the caller is no longer interested in the result.
+ *
+ * The function fails if the given string contains an invalid multibyte
+ * sequence. In this case, *result is set to NULL, and *width to zero.
+ */
+__must_check int sanitize_str(const char *src, size_t max_width,
+ char **result, size_t *width)
+{
+ mbstate_t state;
+ static wchar_t *wcs;
+ size_t num_wchars, n;
+
+ if (!utf8_mode()) {
+ *result = para_strdup(src);
+ /* replace non-printable characters by spaces */
+ for (n = 0; n < max_width && src[n]; n++) {
+ if (!isprint((unsigned char)src[n]))
+ (*result)[n] = ' ';
+ }
+ (*result)[n] = '\0';
+ *width = n;
+ return 0;
+ }
+ *result = NULL;
+ *width = 0;
+ memset(&state, 0, sizeof(state));
+ num_wchars = mbsrtowcs(NULL, &src, 0, &state);
+ if (num_wchars == (size_t)-1)
+ return -ERRNO_TO_PARA_ERROR(errno);
+ wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
+ memset(&state, 0, sizeof(state));
+ num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
+ assert(num_wchars != (size_t)-1);
+ for (n = 0; n < num_wchars && *width < max_width; n++) {
+ if (!iswprint(wcs[n]))
+ wcs[n] = L' ';
+ *width += xwcwidth(wcs[n], *width);
+ }
+ wcs[n] = L'\0';
+ n = wcstombs(NULL, wcs, 0) + 1;
+ *result = para_malloc(n);
+ num_wchars = wcstombs(*result, wcs, n);
+ assert(num_wchars != (size_t)-1);
+ free(wcs);
+ return 1;
+}