X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=e731bb496b4b0d0f2f5a7dac417cbbeab7b9f3f6;hp=813999bbf1ced122ea8aa1347929ee03ce045c77;hb=06d0c50525fc14e8127916481a74c14a2f7098af;hpb=7584638594109184f329bead008f1dcdd9030767 diff --git a/string.c b/string.c index 813999bb..e731bb49 100644 --- a/string.c +++ b/string.c @@ -1,17 +1,23 @@ /* - * Copyright (C) 2004-2011 Andre Noll + * Copyright (C) 2004 Andre Noll * * Licensed under the GPL v2. For licencing details see COPYING. */ /** \file string.c Memory allocation and string handling functions. */ -#include /* gettimeofday */ +#define _GNU_SOURCE + #include #include /* uname() */ + #include #include +#include +#include +#include + #include "para.h" #include "string.h" #include "error.h" @@ -25,12 +31,12 @@ * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors, * i.e. there is no need to check the return value in the caller. * - * \return A pointer to the newly allocated memory, which is suitably aligned - * for any kind of variable and may be different from \a p. + * \return A pointer to newly allocated memory which is suitably aligned for + * any kind of variable and may be different from \a p. * * \sa realloc(3). */ -__must_check __malloc void *para_realloc(void *p, size_t size) +__must_check void *para_realloc(void *p, size_t size) { /* * No need to check for NULL pointers: If p is NULL, the call @@ -114,6 +120,68 @@ __must_check __malloc char *para_strdup(const char *s) exit(EXIT_FAILURE); } +/** + * Print a formated message to a dynamically allocated string. + * + * \param result The formated string is returned here. + * \param fmt The format string. + * \param ap Initialized list of arguments. + * + * This function is similar to vasprintf(), a GNU extension which is not in C + * or POSIX. It allocates a string large enough to hold the output including + * the terminating null byte. The allocated string is returned via the first + * argument and must be freed by the caller. However, unlike vasprintf(), this + * function calls exit() if insufficient memory is available, while vasprintf() + * returns -1 in this case. + * + * \return Number of bytes written, not including the terminating \p NULL + * character. + * + * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf(). + */ +__printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap) +{ + int ret; + size_t size = 150; + va_list aq; + + *result = para_malloc(size + 1); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0); + if (ret < size) /* OK */ + return ret; + size = ret + 1; + *result = para_realloc(*result, size); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0 && ret < size); + return ret; +} + +/** + * Print to a dynamically allocated string, variable number of arguments. + * + * \param result See \ref xvasprintf(). + * \param fmt Usual format string. + * + * \return The return value of the underlying call to \ref xvasprintf(). + * + * \sa \ref xvasprintf() and the references mentioned there. + */ +__printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...) +{ + va_list ap; + unsigned ret; + + va_start(ap, fmt); + ret = xvasprintf(result, fmt, ap); + va_end(ap); + return ret; +} + /** * Allocate a sufficiently large string and print into it. * @@ -125,13 +193,16 @@ __must_check __malloc char *para_strdup(const char *s) * \return This function either returns a pointer to a string that must be * freed by the caller or aborts without returning. * - * \sa printf(3). + * \sa printf(3), xasprintf(). */ __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...) { char *msg; + va_list ap; - PARA_VSPRINTF(fmt, msg); + va_start(ap, fmt); + xvasprintf(&msg, fmt, ap); + va_end(ap); return msg; } @@ -227,24 +298,6 @@ __must_check char *para_basename(const char *name) return ret; } -/** - * Cut trailing newline. - * - * \param buf The string to be chopped. - * - * Replace the last character in \p buf by zero if it is equal to - * the newline character. - */ -void chop(char *buf) -{ - int n = strlen(buf); - - if (!n) - return; - if (buf[n - 1] == '\n') - buf[n - 1] = '\0'; -} - /** * Get the logname of the current user. * @@ -288,19 +341,35 @@ __malloc char *para_hostname(void) } /** - * Used to distinguish between read-only and read-write mode. + * Call a custom function for each complete line. + * + * \param flags Any combination of flags defined in \ref for_each_line_flags. + * \param buf The buffer containing data separated by newlines. + * \param size The number of bytes in \a buf. + * \param line_handler The custom function. + * \param private_data Pointer passed to \a line_handler. + * + * For each complete line in \p buf, \p line_handler is called. The first + * argument to \p line_handler is (a copy of) the current line, and \p + * private_data is passed as the second argument. If the \p FELF_READ_ONLY + * flag is unset, a pointer into \a buf is passed to the line handler, + * otherwise a pointer to a copy of the current line is passed instead. This + * copy is freed immediately after the line handler returns. * - * \sa for_each_line(), for_each_line_ro(). + * The function returns if \p line_handler returns a negative value or no more + * lines are in the buffer. The rest of the buffer (last chunk containing an + * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is + * unset. + * + * \return On success this function returns the number of bytes not handled to + * \p line_handler. The only possible error is a negative return value from the + * line handler. In this case processing stops and the return value of the line + * handler is returned to indicate failure. + * + * \sa \ref for_each_line_flags. */ -enum for_each_line_modes{ - /** Activate read-only mode. */ - LINE_MODE_RO, - /** Activate read-write mode. */ - LINE_MODE_RW -}; - -static int for_each_complete_line(enum for_each_line_modes mode, char *buf, - size_t size, line_handler_t *line_handler, void *private_data) +int for_each_line(unsigned flags, char *buf, size_t size, + line_handler_t *line_handler, void *private_data) { char *start = buf, *end; int ret, i, num_lines = 0; @@ -311,95 +380,38 @@ static int for_each_complete_line(enum for_each_line_modes mode, char *buf, char *next_cr; next_cr = memchr(start, '\n', buf + size - start); - next_null = memchr(start, '\0', buf + size - start); + next_null = memchr(start, '\0', next_cr? + next_cr - start : buf + size - start); if (!next_cr && !next_null) break; - if (next_cr && next_null) { - end = next_cr < next_null? next_cr : next_null; - } else if (next_null) { + if (next_null) end = next_null; - } else + else end = next_cr; num_lines++; - if (!line_handler) { - start = ++end; - continue; - } - if (mode == LINE_MODE_RO) { - size_t s = end - start; - char *b = para_malloc(s + 1); - memcpy(b, start, s); - b[s] = '\0'; -// PARA_NOTICE_LOG("b: %s, start: %s\n", b, start); - ret = line_handler(b, private_data); - free(b); - } else { - *end = '\0'; - ret = line_handler(start, private_data); + if (!(flags & FELF_DISCARD_FIRST) || start != buf) { + if (flags & FELF_READ_ONLY) { + size_t s = end - start; + char *b = para_malloc(s + 1); + memcpy(b, start, s); + b[s] = '\0'; + ret = line_handler(b, private_data); + free(b); + } else { + *end = '\0'; + ret = line_handler(start, private_data); + } + if (ret < 0) + return ret; } - if (ret < 0) - return ret; start = ++end; } - if (!line_handler || mode == LINE_MODE_RO) - return num_lines; i = buf + size - start; - if (i && i != size) + if (i && i != size && !(flags & FELF_READ_ONLY)) memmove(buf, start, i); return i; } -/** - * Call a custom function for each complete line. - * - * \param buf The buffer containing data separated by newlines. - * \param size The number of bytes in \a buf. - * \param line_handler The custom function. - * \param private_data Pointer passed to \a line_handler. - * - * If \p line_handler is \p NULL, the function returns the number of complete - * lines in \p buf. Otherwise, \p line_handler is called for each complete - * line in \p buf. The first argument to \p line_handler is the current line, - * and \p private_data is passed as the second argument. The function returns - * if \p line_handler returns a negative value or no more lines are in the - * buffer. The rest of the buffer (last chunk containing an incomplete line) - * is moved to the beginning of the buffer. - * - * \return If \p line_handler is not \p NULL, this function returns the number - * of bytes not handled to \p line_handler on success, or the negative return - * value of the \p line_handler on errors. - * - * \sa for_each_line_ro(). - */ -int for_each_line(char *buf, size_t size, line_handler_t *line_handler, - void *private_data) -{ - return for_each_complete_line(LINE_MODE_RW, buf, size, line_handler, - private_data); -} - -/** - * Call a custom function for each complete line. - * - * \param buf Same meaning as in \p for_each_line(). - * \param size Same meaning as in \p for_each_line(). - * \param line_handler Same meaning as in \p for_each_line(). - * \param private_data Same meaning as in \p for_each_line(). - * - * This function behaves like \p for_each_line(), but \a buf is left unchanged. - * - * \return On success, the function returns the number of complete lines in \p - * buf, otherwise the (negative) return value of \p line_handler is returned. - * - * \sa for_each_line(). - */ -int for_each_line_ro(char *buf, size_t size, line_handler_t *line_handler, - void *private_data) -{ - return for_each_complete_line(LINE_MODE_RO, buf, size, line_handler, - private_data); -} - /** Return the hex characters of the lower 4 bits. */ #define hex(a) (hexchar[(a) & 15]) @@ -521,14 +533,15 @@ __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...) } } -/** \cond LLONG_MAX and LLONG_MIN might not be defined. */ +/** \cond llong_minmax */ +/* LLONG_MAX and LLONG_MIN might not be defined. */ #ifndef LLONG_MAX #define LLONG_MAX 9223372036854775807LL #endif #ifndef LLONG_MIN #define LLONG_MIN (-LLONG_MAX - 1LL) #endif -/** \endcond */ +/** \endcond llong_minmax */ /** * Convert a string to a 64-bit signed integer value. @@ -549,10 +562,18 @@ int para_atoi64(const char *str, int64_t *value) tmp = strtoll(str, &endptr, 10); if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN)) return -E_ATOI_OVERFLOW; - if (errno != 0 && tmp == 0) /* other error */ - return -E_STRTOLL; + /* + * If there were no digits at all, strtoll() stores the original value + * of str in *endptr. + */ if (endptr == str) return -E_ATOI_NO_DIGITS; + /* + * The implementation may also set errno and return 0 in case no + * conversion was performed. + */ + if (errno != 0 && tmp == 0) + return -E_ATOI_NO_DIGITS; if (*endptr != '\0') /* Further characters after number */ return -E_ATOI_JUNK_AT_END; *value = tmp; @@ -612,10 +633,10 @@ int get_loglevel_by_name(const char *txt) return LL_CRIT; if (loglevel_equal(txt, "emerg")) return LL_EMERG; - return -1; + return -E_BAD_LL; } -static int get_next_word(const char *buf, const char *delim, char **word) +static int get_next_word(const char *buf, const char *delim, char **word) { enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2, LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8}; @@ -711,7 +732,33 @@ out: } /** - * Free an array of words created by create_argv(). + * Get the number of the word the cursor is on. + * + * \param buf The zero-terminated line buffer. + * \param delim Characters that separate words. + * \param point The cursor position. + * + * \return Zero-based word number. + */ +int compute_word_num(const char *buf, const char *delim, int point) +{ + int ret, num_words; + const char *p; + char *word; + + for (p = buf, num_words = 0; ; p += ret, num_words++) { + ret = get_next_word(p, delim, &word); + if (ret <= 0) + break; + free(word); + if (p + ret >= buf + point) + break; + } + return num_words; +} + +/** + * Free an array of words created by create_argv() or create_shifted_argv(). * * \param argv A pointer previously obtained by \ref create_argv(). */ @@ -719,11 +766,42 @@ void free_argv(char **argv) { int i; + if (!argv) + return; for (i = 0; argv[i]; i++) free(argv[i]); free(argv); } +static int create_argv_offset(int offset, const char *buf, const char *delim, + char ***result) +{ + char *word, **argv = para_malloc((offset + 1) * sizeof(char *)); + const char *p; + int i, ret; + + for (i = 0; i < offset; i++) + argv[i] = NULL; + for (p = buf; p && *p; p += ret, i++) { + ret = get_next_word(p, delim, &word); + if (ret < 0) + goto err; + if (!ret) + break; + argv = para_realloc(argv, (i + 2) * sizeof(char*)); + argv[i] = word; + } + argv[i] = NULL; + *result = argv; + return i; +err: + while (i > 0) + free(argv[--i]); + free(argv); + *result = NULL; + return ret; +} + /** * Split a buffer into words. * @@ -740,27 +818,47 @@ void free_argv(char **argv) */ int create_argv(const char *buf, const char *delim, char ***result) { - char *word, **argv = para_malloc(2 * sizeof(char *)); - const char *p; - int ret, num_words; + return create_argv_offset(0, buf, delim, result); +} - for (p = buf, num_words = 0; ; p += ret, num_words++) { - ret = get_next_word(p, delim, &word); - if (ret < 0) - goto err; - if (!ret) - break; - argv = para_realloc(argv, (num_words + 2) * sizeof(char*)); - argv[num_words] = word; - } - argv[num_words] = NULL; - *result = argv; - return num_words; -err: - while (num_words > 0) - free(argv[--num_words]); - free(argv); - return ret; +/** + * Split a buffer into words, offset one. + * + * This is similar to \ref create_argv() but the returned array is one element + * larger, words start at index one and element zero is initialized to \p NULL. + * Callers must set element zero to a non-NULL value before calling free_argv() + * on the returned array to avoid a memory leak. + * + * \param buf See \ref create_argv(). + * \param delim See \ref create_argv(). + * \param result See \ref create_argv(). + * + * \return Number of words plus one on success, negative on errors. + */ +int create_shifted_argv(const char *buf, const char *delim, char ***result) +{ + return create_argv_offset(1, buf, delim, result); +} + +/** + * Find out if the given string is contained in the arg vector. + * + * \param arg The string to look for. + * \param argv The array to search. + * + * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if + * arg was not found in \a argv. + */ +int find_arg(const char *arg, char **argv) +{ + int i; + + if (!argv) + return -E_ARG_NOT_FOUND; + for (i = 0; argv[i]; i++) + if (strcmp(arg, argv[i]) == 0) + return i; + return -E_ARG_NOT_FOUND; } /** @@ -789,3 +887,249 @@ int para_regcomp(regex_t *preg, const char *regex, int cflags) free(buf); return -E_REGEX; } + +/** + * strdup() for not necessarily zero-terminated strings. + * + * \param src The source buffer. + * \param len The number of bytes to be copied. + * + * \return A 0-terminated buffer of length \a len + 1. + * + * This is similar to strndup(), which is a GNU extension. However, one + * difference is that strndup() returns \p NULL if insufficient memory was + * available while this function aborts in this case. + * + * \sa strdup(), \ref para_strdup(). + */ +char *safe_strdup(const char *src, size_t len) +{ + char *p; + + assert(len < (size_t)-1); + p = para_malloc(len + 1); + if (len > 0) + memcpy(p, src, len); + p[len] = '\0'; + return p; +} + +/** + * Copy the value of a key=value pair. + * + * This checks whether the given buffer starts with "key=", ignoring case. If + * yes, a copy of the value is returned. The source buffer may not be + * zero-terminated. + * + * \param src The source buffer. + * \param len The number of bytes of the tag. + * \param key Only copy if it is the value of this key. + * + * \return A zero-terminated buffer, or \p NULL if the key was + * not of the given type. + */ +char *key_value_copy(const char *src, size_t len, const char *key) +{ + int keylen = strlen(key); + + if (len <= keylen) + return NULL; + if (strncasecmp(src, key, keylen)) + return NULL; + if (src[keylen] != '=') + return NULL; + return safe_strdup(src + keylen + 1, len - keylen - 1); +} + +static bool utf8_mode(void) +{ + static bool initialized, have_utf8; + + if (!initialized) { + char *info = nl_langinfo(CODESET); + have_utf8 = (info && strcmp(info, "UTF-8") == 0); + initialized = true; + PARA_INFO_LOG("%susing UTF-8 character encoding\n", + have_utf8? "" : "not "); + } + return have_utf8; +} + +static int xwcwidth(wchar_t wc, size_t pos) +{ + int n; + + /* special-case for tab */ + if (wc == 0x09) /* tab */ + return (pos | 7) + 1 - pos; + n = wcwidth(wc); + /* wcswidth() returns -1 for non-printable characters */ + return n >= 0? n : 1; +} + +static size_t xwcswidth(const wchar_t *s, size_t n) +{ + size_t w = 0; + + while (n--) + w += xwcwidth(*s++, w); + return w; +} + +/** + * Skip a given number of cells at the beginning of a string. + * + * \param s The input string. + * \param cells_to_skip Desired number of cells that should be skipped. + * \param bytes_to_skip Result. + * + * This function computes how many input bytes must be skipped to advance a + * string by the given width. If the current character encoding is not UTF-8, + * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise, + * \a s is treated as a multibyte string and on successful return, \a s + + * bytes_to_skip points to the start of a multibyte string such that the total + * width of the multibyte characters that are skipped by advancing \a s that + * many bytes equals at least \a cells_to_skip. + * + * \return Standard. + */ +int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip) +{ + wchar_t wc; + mbstate_t ps; + size_t n, bytes_parsed, cells_skipped; + + *bytes_to_skip = 0; + if (cells_to_skip == 0) + return 0; + if (!utf8_mode()) { + *bytes_to_skip = cells_to_skip; + return 0; + } + bytes_parsed = cells_skipped = 0; + memset(&ps, 0, sizeof(ps)); + n = strlen(s); + while (cells_to_skip > cells_skipped) { + size_t mbret; + + mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps); + assert(mbret != 0); + if (mbret == (size_t)-1 || mbret == (size_t)-2) + return -ERRNO_TO_PARA_ERROR(EILSEQ); + bytes_parsed += mbret; + cells_skipped += xwcwidth(wc, cells_skipped); + } + *bytes_to_skip = bytes_parsed; + return 1; +} + +/** + * Compute the width of an UTF-8 string. + * + * \param s The string. + * \param result The width of \a s is returned here. + * + * If not in UTF8-mode. this function is just a wrapper for strlen(3). + * Otherwise \a s is treated as an UTF-8 string and its display width is + * computed. Note that this function may fail if the underlying call to + * mbsrtowcs(3) fails, so the caller must check the return value. + * + * \sa nl_langinfo(3), wcswidth(3). + * + * \return Standard. + */ +__must_check int strwidth(const char *s, size_t *result) +{ + const char *src = s; + mbstate_t state; + static wchar_t *dest; + size_t num_wchars; + + /* + * Never call any log function here. This may result in an endless loop + * as para_gui's para_log() calls this function. + */ + + if (!utf8_mode()) { + *result = strlen(s); + return 0; + } + memset(&state, 0, sizeof(state)); + *result = 0; + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + if (num_wchars == 0) + return 0; + dest = para_malloc((num_wchars + 1) * sizeof(*dest)); + src = s; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(dest, &src, num_wchars, &state); + assert(num_wchars > 0 && num_wchars != (size_t)-1); + *result = xwcswidth(dest, num_wchars); + free(dest); + return 1; +} + +/** + * Truncate and sanitize a (wide character) string. + * + * This replaces all non-printable characters by spaces and makes sure that the + * modified string does not exceed the given maximal width. + * + * \param src The source string in multi-byte form. + * \param max_width The maximal number of cells the result may occupy. + * \param result Sanitized multi-byte string, must be freed by caller. + * \param width The width of the sanitized string, always <= max_width. + * + * The function is wide-character aware but falls back to C strings for + * non-UTF-8 locales. + * + * \return Standard. On success, *result points to a sanitized copy of the + * given string. This copy was allocated with malloc() and should hence be + * freed when the caller is no longer interested in the result. + * + * The function fails if the given string contains an invalid multibyte + * sequence. In this case, *result is set to NULL, and *width to zero. + */ +__must_check int sanitize_str(const char *src, size_t max_width, + char **result, size_t *width) +{ + mbstate_t state; + static wchar_t *wcs; + size_t num_wchars, n; + + if (!utf8_mode()) { + *result = para_strdup(src); + /* replace non-printable characters by spaces */ + for (n = 0; n < max_width && src[n]; n++) { + if (!isprint((unsigned char)src[n])) + (*result)[n] = ' '; + } + (*result)[n] = '\0'; + *width = n; + return 0; + } + *result = NULL; + *width = 0; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + wcs = para_malloc((num_wchars + 1) * sizeof(*wcs)); + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state); + assert(num_wchars != (size_t)-1); + for (n = 0; n < num_wchars && *width < max_width; n++) { + if (!iswprint(wcs[n])) + wcs[n] = L' '; + *width += xwcwidth(wcs[n], *width); + } + wcs[n] = L'\0'; + n = wcstombs(NULL, wcs, 0) + 1; + *result = para_malloc(n); + num_wchars = wcstombs(*result, wcs, n); + assert(num_wchars != (size_t)-1); + free(wcs); + return 1; +}