X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=blobdiff_plain;f=string.c;h=bb63120184c79b44270a4ae1ba671e6dd74de5e1;hp=51985404817d032261d0f19ba2ebce35682156dd;hb=57e0c93e87138fdf1c8793c3d1f3d2880d717548;hpb=f31f8ceeaff0db558b63a46559e7d7464859d0c4;ds=sidebyside diff --git a/string.c b/string.c index 51985404..bb631201 100644 --- a/string.c +++ b/string.c @@ -6,14 +6,14 @@ /** \file string.c Memory allocation and string handling functions. */ -#include "para.h" -#include "string.h" - #include /* gettimeofday */ #include #include /* uname() */ #include +#include +#include "para.h" +#include "string.h" #include "error.h" /** @@ -258,56 +258,6 @@ __must_check __malloc char *para_homedir(void) return para_strdup(pw? pw->pw_dir : "/tmp"); } -/** - * Split string and return pointers to its parts. - * - * \param args The string to be split. - * \param argv_ptr Pointer to the list of substrings. - * \param delim Delimiter. - * - * This function modifies \a args by replacing each occurrence of \a delim by - * zero. A \p NULL-terminated array of pointers to char* is allocated dynamically - * and these pointers are initialized to point to the broken-up substrings - * within \a args. A pointer to this array is returned via \a argv_ptr. - * - * \return The number of substrings found in \a args. - */ -unsigned split_args(char *args, char *** const argv_ptr, const char *delim) -{ - char *p; - char **argv; - size_t n = 0, i, j; - - p = args + strspn(args, delim); - for (;;) { - i = strcspn(p, delim); - if (!i) - break; - p += i; - n++; - p += strspn(p, delim); - } - *argv_ptr = para_malloc((n + 1) * sizeof(char *)); - argv = *argv_ptr; - i = 0; - p = args + strspn(args, delim); - while (p) { - argv[i] = p; - j = strcspn(p, delim); - if (!j) - break; - p += strcspn(p, delim); - if (*p) { - *p = '\0'; - p++; - p += strspn(p, delim); - } - i++; - } - argv[n] = NULL; - return n; -} - /** * Get the own hostname. * @@ -637,3 +587,167 @@ int get_loglevel_by_name(const char *txt) return LL_EMERG; return -1; } + +static int get_next_word(const char *buf, const char *delim, char **word) +{ + enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2, + LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8}; + const char *in; + char *out; + int ret, state = 0; + + out = para_malloc(strlen(buf) + 1); + *out = '\0'; + *word = out; + for (in = buf; *in; in++) { + const char *p; + + switch (*in) { + case '\\': + if (state & LSF_BACKSLASH) /* \\ */ + goto copy_char; + state |= LSF_BACKSLASH; + state |= LSF_HAVE_WORD; + continue; + case 'n': + case 't': + if (state & LSF_BACKSLASH) { /* \n or \t */ + *out++ = (*in == 'n')? '\n' : '\t'; + state &= ~LSF_BACKSLASH; + continue; + } + goto copy_char; + case '"': + if (state & LSF_BACKSLASH) /* \" */ + goto copy_char; + if (state & LSF_SINGLE_QUOTE) /* '" */ + goto copy_char; + if (state & LSF_DOUBLE_QUOTE) { + state &= ~LSF_DOUBLE_QUOTE; + continue; + } + state |= LSF_HAVE_WORD; + state |= LSF_DOUBLE_QUOTE; + continue; + case '\'': + if (state & LSF_BACKSLASH) /* \' */ + goto copy_char; + if (state & LSF_DOUBLE_QUOTE) /* "' */ + goto copy_char; + if (state & LSF_SINGLE_QUOTE) { + state &= ~LSF_SINGLE_QUOTE; + continue; + } + state |= LSF_HAVE_WORD; + state |= LSF_SINGLE_QUOTE; + continue; + } + for (p = delim; *p; p++) { + if (*in != *p) + continue; + if (state & LSF_BACKSLASH) + goto copy_char; + if (state & LSF_SINGLE_QUOTE) + goto copy_char; + if (state & LSF_DOUBLE_QUOTE) + goto copy_char; + if (state & LSF_HAVE_WORD) + goto success; + break; + } + if (*p) /* ignore delimiter at the beginning */ + continue; +copy_char: + state |= LSF_HAVE_WORD; + *out++ = *in; + state &= ~LSF_BACKSLASH; + } + ret = 0; + if (!(state & LSF_HAVE_WORD)) + goto out; + ret = -ERRNO_TO_PARA_ERROR(EINVAL); + if (state & LSF_BACKSLASH) { + PARA_ERROR_LOG("trailing backslash\n"); + goto out; + } + if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) { + PARA_ERROR_LOG("unmatched quote character\n"); + goto out; + } +success: + *out = '\0'; + return in - buf; +out: + free(*word); + *word = NULL; + return ret; +} + +/** + * Free an array of words created by create_argv(). + * + * \param argv A pointer previously obtained by \ref create_argv(). + */ +void free_argv(char **argv) +{ + int i; + + for (i = 0; argv[i]; i++) + free(argv[i]); + free(argv); +} + +/** + * Split a buffer into words. + * + * This parser honors single and double quotes, backslash-escaped characters + * and special characters like \p \\n. The result contains pointers to copies + * of the words contained in \a buf and has to be freed by using \ref + * free_argv(). + * + * \param buf The buffer to be split. + * \param delim Each character in this string is treated as a separator. + * \param result The array of words is returned here. + * + * \return Number of words in \a buf, negative on errors. + */ +int create_argv(const char *buf, const char *delim, char ***result) +{ + char *word, **argv = para_malloc(2 * sizeof(char *)); + const char *p; + int ret, num_words; + + for (p = buf, num_words = 0; ; p += ret, num_words++) { + ret = get_next_word(p, delim, &word); + if (ret < 0) + goto err; + if (!ret) + break; + argv = para_realloc(argv, (num_words + 2) * sizeof(char*)); + argv[num_words] = word; + } + argv[num_words] = NULL; + *result = argv; + return num_words; +err: + while (num_words > 0) + free(argv[--num_words]); + free(argv); + return ret; +} + +int para_regcomp(regex_t *preg, const char *regex, int cflags) +{ + char *buf; + size_t size; + int ret = regcomp(preg, regex, cflags); + + if (ret == 0) + return 1; + size = regerror(ret, preg, NULL, 0); + buf = para_malloc(size); + regerror(ret, preg, buf, size); + PARA_ERROR_LOG("%s\n", buf); + free(buf); + return -E_REGEX; +}