X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=f8b64b77c08d0c109c62129bdfeac04b775b3d6b;hp=4d8b8b747201950ab4eb23caf85d8599dee94a26;hb=9a67f9e1e37589b548fc1823a21ffdf0b6faf4ea;hpb=bf1831886b93258ffcec63c47d6737fffff53aae diff --git a/string.c b/string.c index 4d8b8b74..f8b64b77 100644 --- a/string.c +++ b/string.c @@ -1,17 +1,23 @@ /* - * Copyright (C) 2004-2013 Andre Noll + * Copyright (C) 2004 Andre Noll * * Licensed under the GPL v2. For licencing details see COPYING. */ /** \file string.c Memory allocation and string handling functions. */ -#include /* gettimeofday */ +#define _GNU_SOURCE + #include #include /* uname() */ + #include #include +#include +#include +#include + #include "para.h" #include "string.h" #include "error.h" @@ -25,12 +31,12 @@ * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors, * i.e. there is no need to check the return value in the caller. * - * \return A pointer to the newly allocated memory, which is suitably aligned - * for any kind of variable and may be different from \a p. + * \return A pointer to newly allocated memory which is suitably aligned for + * any kind of variable and may be different from \a p. * * \sa realloc(3). */ -__must_check __malloc void *para_realloc(void *p, size_t size) +__must_check void *para_realloc(void *p, size_t size) { /* * No need to check for NULL pointers: If p is NULL, the call @@ -292,24 +298,6 @@ __must_check char *para_basename(const char *name) return ret; } -/** - * Cut trailing newline. - * - * \param buf The string to be chopped. - * - * Replace the last character in \p buf by zero if it is equal to - * the newline character. - */ -void chop(char *buf) -{ - int n = strlen(buf); - - if (!n) - return; - if (buf[n - 1] == '\n') - buf[n - 1] = '\0'; -} - /** * Get the logname of the current user. * @@ -392,14 +380,13 @@ int for_each_line(unsigned flags, char *buf, size_t size, char *next_cr; next_cr = memchr(start, '\n', buf + size - start); - next_null = memchr(start, '\0', buf + size - start); + next_null = memchr(start, '\0', next_cr? + next_cr - start : buf + size - start); if (!next_cr && !next_null) break; - if (next_cr && next_null) { - end = next_cr < next_null? next_cr : next_null; - } else if (next_null) { + if (next_null) end = next_null; - } else + else end = next_cr; num_lines++; if (!(flags & FELF_DISCARD_FIRST) || start != buf) { @@ -945,3 +932,145 @@ char *key_value_copy(const char *src, size_t len, const char *key) return NULL; return safe_strdup(src + keylen + 1, len - keylen - 1); } + +static bool utf8_mode(void) +{ + static bool initialized, have_utf8; + + if (!initialized) { + char *info = nl_langinfo(CODESET); + have_utf8 = (info && strcmp(info, "UTF-8") == 0); + initialized = true; + PARA_INFO_LOG("%susing UTF-8 character encoding\n", + have_utf8? "" : "not "); + } + return have_utf8; +} + +/* + * glibc's wcswidth returns -1 if the string contains a tab character, which + * makes the function next to useless. The two functions below are taken from + * mutt. + */ + +#define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0) + +static int mutt_wcwidth(wchar_t wc, size_t pos) +{ + int n; + + if (wc == 0x09) /* tab */ + return (pos | 7) + 1 - pos; + n = wcwidth(wc); + if (IsWPrint(wc) && n > 0) + return n; + if (!(wc & ~0x7f)) + return 2; + if (!(wc & ~0xffff)) + return 6; + return 10; +} + +static size_t mutt_wcswidth(const wchar_t *s, size_t n) +{ + size_t w = 0; + + while (n--) + w += mutt_wcwidth(*s++, w); + return w; +} + +/** + * Skip a given number of cells at the beginning of a string. + * + * \param s The input string. + * \param cells_to_skip Desired number of cells that should be skipped. + * \param bytes_to_skip Result. + * + * This function computes how many input bytes must be skipped to advance a + * string by the given width. If the current character encoding is not UTF-8, + * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise, + * \a s is treated as a multibyte string and on successful return, \a s + + * bytes_to_skip points to the start of a multibyte string such that the total + * width of the multibyte characters that are skipped by advancing \a s that + * many bytes equals at least \a cells_to_skip. + * + * \return Standard. + */ +int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip) +{ + wchar_t wc; + mbstate_t ps; + size_t n, bytes_parsed, cells_skipped; + + *bytes_to_skip = 0; + if (cells_to_skip == 0) + return 0; + if (!utf8_mode()) { + *bytes_to_skip = cells_to_skip; + return 0; + } + bytes_parsed = cells_skipped = 0; + memset(&ps, 0, sizeof(ps)); + n = strlen(s); + while (cells_to_skip > cells_skipped) { + size_t mbret; + + mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps); + assert(mbret != 0); + if (mbret == (size_t)-1 || mbret == (size_t)-2) + return -ERRNO_TO_PARA_ERROR(EILSEQ); + bytes_parsed += mbret; + cells_skipped += mutt_wcwidth(wc, cells_skipped); + } + *bytes_to_skip = bytes_parsed; + return 1; +} + +/** + * Compute the width of an UTF-8 string. + * + * \param s The string. + * \param result The width of \a s is returned here. + * + * If not in UTF8-mode. this function is just a wrapper for strlen(3). + * Otherwise \a s is treated as an UTF-8 string and its display width is + * computed. Note that this function may fail if the underlying call to + * mbsrtowcs(3) fails, so the caller must check the return value. + * + * \sa nl_langinfo(3), wcswidth(3). + * + * \return Standard. + */ +__must_check int strwidth(const char *s, size_t *result) +{ + const char *src = s; + mbstate_t state; + static wchar_t *dest; + size_t num_wchars; + + /* + * Never call any log function here. This may result in an endless loop + * as para_gui's para_log() calls this function. + */ + + if (!utf8_mode()) { + *result = strlen(s); + return 0; + } + memset(&state, 0, sizeof(state)); + *result = 0; + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + if (num_wchars == 0) + return 0; + dest = para_malloc(num_wchars * sizeof(*dest)); + src = s; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(dest, &src, num_wchars, &state); + assert(num_wchars > 0 && num_wchars != (size_t)-1); + *result = mutt_wcswidth(dest, num_wchars); + free(dest); + return 1; +}