string.c: Handle invalid loglevels gracefully.
[paraslash.git] / string.c
1 /*
2 * Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file string.c Memory allocation and string handling functions. */
8
9 #define _GNU_SOURCE
10
11 #include <pwd.h>
12 #include <sys/utsname.h> /* uname() */
13
14 #include <string.h>
15 #include <regex.h>
16
17 #include <langinfo.h>
18 #include <wchar.h>
19 #include <wctype.h>
20
21 #include "para.h"
22 #include "string.h"
23 #include "error.h"
24
25 /**
26 * Paraslash's version of realloc().
27 *
28 * \param p Pointer to the memory block, may be \p NULL.
29 * \param size The desired new size.
30 *
31 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
32 * i.e. there is no need to check the return value in the caller.
33 *
34 * \return A pointer to newly allocated memory which is suitably aligned for
35 * any kind of variable and may be different from \a p.
36 *
37 * \sa realloc(3).
38 */
39 __must_check void *para_realloc(void *p, size_t size)
40 {
41 /*
42 * No need to check for NULL pointers: If p is NULL, the call
43 * to realloc is equivalent to malloc(size)
44 */
45 assert(size);
46 if (!(p = realloc(p, size))) {
47 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
48 size);
49 exit(EXIT_FAILURE);
50 }
51 return p;
52 }
53
54 /**
55 * Paraslash's version of malloc().
56 *
57 * \param size The desired new size.
58 *
59 * A wrapper for malloc(3) which exits on errors.
60 *
61 * \return A pointer to the allocated memory, which is suitably aligned for any
62 * kind of variable.
63 *
64 * \sa malloc(3).
65 */
66 __must_check __malloc void *para_malloc(size_t size)
67 {
68 void *p;
69
70 assert(size);
71 p = malloc(size);
72 if (!p) {
73 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
74 size);
75 exit(EXIT_FAILURE);
76 }
77 return p;
78 }
79
80 /**
81 * Paraslash's version of calloc().
82 *
83 * \param size The desired new size.
84 *
85 * A wrapper for calloc(3) which exits on errors.
86 *
87 * \return A pointer to the allocated and zeroed-out memory, which is suitably
88 * aligned for any kind of variable.
89 *
90 * \sa calloc(3)
91 */
92 __must_check __malloc void *para_calloc(size_t size)
93 {
94 void *ret = para_malloc(size);
95
96 memset(ret, 0, size);
97 return ret;
98 }
99
100 /**
101 * Paraslash's version of strdup().
102 *
103 * \param s The string to be duplicated.
104 *
105 * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
106 * there is no need to check the return value in the caller.
107 *
108 * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
109 * an pointer to an empty string is returned.
110 *
111 * \sa strdup(3)
112 */
113 __must_check __malloc char *para_strdup(const char *s)
114 {
115 char *ret;
116
117 if ((ret = strdup(s? s: "")))
118 return ret;
119 PARA_EMERG_LOG("strdup failed, aborting\n");
120 exit(EXIT_FAILURE);
121 }
122
123 /**
124 * Print a formated message to a dynamically allocated string.
125 *
126 * \param result The formated string is returned here.
127 * \param fmt The format string.
128 * \param ap Initialized list of arguments.
129 *
130 * This function is similar to vasprintf(), a GNU extension which is not in C
131 * or POSIX. It allocates a string large enough to hold the output including
132 * the terminating null byte. The allocated string is returned via the first
133 * argument and must be freed by the caller. However, unlike vasprintf(), this
134 * function calls exit() if insufficient memory is available, while vasprintf()
135 * returns -1 in this case.
136 *
137 * \return Number of bytes written, not including the terminating \p NULL
138 * character.
139 *
140 * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
141 */
142 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
143 {
144 int ret;
145 size_t size = 150;
146 va_list aq;
147
148 *result = para_malloc(size + 1);
149 va_copy(aq, ap);
150 ret = vsnprintf(*result, size, fmt, aq);
151 va_end(aq);
152 assert(ret >= 0);
153 if (ret < size) /* OK */
154 return ret;
155 size = ret + 1;
156 *result = para_realloc(*result, size);
157 va_copy(aq, ap);
158 ret = vsnprintf(*result, size, fmt, aq);
159 va_end(aq);
160 assert(ret >= 0 && ret < size);
161 return ret;
162 }
163
164 /**
165 * Print to a dynamically allocated string, variable number of arguments.
166 *
167 * \param result See \ref xvasprintf().
168 * \param fmt Usual format string.
169 *
170 * \return The return value of the underlying call to \ref xvasprintf().
171 *
172 * \sa \ref xvasprintf() and the references mentioned there.
173 */
174 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
175 {
176 va_list ap;
177 unsigned ret;
178
179 va_start(ap, fmt);
180 ret = xvasprintf(result, fmt, ap);
181 va_end(ap);
182 return ret;
183 }
184
185 /**
186 * Allocate a sufficiently large string and print into it.
187 *
188 * \param fmt A usual format string.
189 *
190 * Produce output according to \p fmt. No artificial bound on the length of the
191 * resulting string is imposed.
192 *
193 * \return This function either returns a pointer to a string that must be
194 * freed by the caller or aborts without returning.
195 *
196 * \sa printf(3), xasprintf().
197 */
198 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
199 {
200 char *msg;
201 va_list ap;
202
203 va_start(ap, fmt);
204 xvasprintf(&msg, fmt, ap);
205 va_end(ap);
206 return msg;
207 }
208
209 /**
210 * Free the content of a pointer and set it to \p NULL.
211 *
212 * This is equivalent to "free(*arg); *arg = NULL;".
213 *
214 * \param arg The pointer whose content should be freed.
215 */
216 void freep(void *arg)
217 {
218 void **ptr = (void **)arg;
219 free(*ptr);
220 *ptr = NULL;
221 }
222
223 /**
224 * Paraslash's version of strcat().
225 *
226 * \param a String to be appended to.
227 * \param b String to append.
228 *
229 * Append \p b to \p a.
230 *
231 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
232 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
233 * return \a a without making a copy of \a a. Otherwise, construct the
234 * concatenation \a c, free \a a (but not \a b) and return \a c.
235 *
236 * \sa strcat(3)
237 */
238 __must_check __malloc char *para_strcat(char *a, const char *b)
239 {
240 char *tmp;
241
242 if (!a)
243 return para_strdup(b);
244 if (!b)
245 return a;
246 tmp = make_message("%s%s", a, b);
247 free(a);
248 return tmp;
249 }
250
251 /**
252 * Paraslash's version of dirname().
253 *
254 * \param name Pointer to the full path.
255 *
256 * Compute the directory component of \p name.
257 *
258 * \return If \a name is \p NULL or the empty string, return \p NULL.
259 * Otherwise, Make a copy of \a name and return its directory component. Caller
260 * is responsible to free the result.
261 */
262 __must_check __malloc char *para_dirname(const char *name)
263 {
264 char *p, *ret;
265
266 if (!name || !*name)
267 return NULL;
268 ret = para_strdup(name);
269 p = strrchr(ret, '/');
270 if (!p)
271 *ret = '\0';
272 else
273 *p = '\0';
274 return ret;
275 }
276
277 /**
278 * Paraslash's version of basename().
279 *
280 * \param name Pointer to the full path.
281 *
282 * Compute the filename component of \a name.
283 *
284 * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
285 * ends with a slash. Otherwise, a pointer within \a name is returned. Caller
286 * must not free the result.
287 */
288 __must_check char *para_basename(const char *name)
289 {
290 char *ret;
291
292 if (!name || !*name)
293 return NULL;
294 ret = strrchr(name, '/');
295 if (!ret)
296 return (char *)name;
297 ret++;
298 return ret;
299 }
300
301 /**
302 * Get the logname of the current user.
303 *
304 * \return A dynamically allocated string that must be freed by the caller. On
305 * errors, the string "unknown_user" is returned, i.e. this function never
306 * returns \p NULL.
307 *
308 * \sa getpwuid(3).
309 */
310 __must_check __malloc char *para_logname(void)
311 {
312 struct passwd *pw = getpwuid(getuid());
313 return para_strdup(pw? pw->pw_name : "unknown_user");
314 }
315
316 /**
317 * Get the home directory of the current user.
318 *
319 * \return A dynamically allocated string that must be freed by the caller. If
320 * the home directory could not be found, this function returns "/tmp".
321 */
322 __must_check __malloc char *para_homedir(void)
323 {
324 struct passwd *pw = getpwuid(getuid());
325 return para_strdup(pw? pw->pw_dir : "/tmp");
326 }
327
328 /**
329 * Get the own hostname.
330 *
331 * \return A dynamically allocated string containing the hostname.
332 *
333 * \sa uname(2).
334 */
335 __malloc char *para_hostname(void)
336 {
337 struct utsname u;
338
339 uname(&u);
340 return para_strdup(u.nodename);
341 }
342
343 /**
344 * Call a custom function for each complete line.
345 *
346 * \param flags Any combination of flags defined in \ref for_each_line_flags.
347 * \param buf The buffer containing data separated by newlines.
348 * \param size The number of bytes in \a buf.
349 * \param line_handler The custom function.
350 * \param private_data Pointer passed to \a line_handler.
351 *
352 * For each complete line in \p buf, \p line_handler is called. The first
353 * argument to \p line_handler is (a copy of) the current line, and \p
354 * private_data is passed as the second argument. If the \p FELF_READ_ONLY
355 * flag is unset, a pointer into \a buf is passed to the line handler,
356 * otherwise a pointer to a copy of the current line is passed instead. This
357 * copy is freed immediately after the line handler returns.
358 *
359 * The function returns if \p line_handler returns a negative value or no more
360 * lines are in the buffer. The rest of the buffer (last chunk containing an
361 * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
362 * unset.
363 *
364 * \return On success this function returns the number of bytes not handled to
365 * \p line_handler. The only possible error is a negative return value from the
366 * line handler. In this case processing stops and the return value of the line
367 * handler is returned to indicate failure.
368 *
369 * \sa \ref for_each_line_flags.
370 */
371 int for_each_line(unsigned flags, char *buf, size_t size,
372 line_handler_t *line_handler, void *private_data)
373 {
374 char *start = buf, *end;
375 int ret, i, num_lines = 0;
376
377 // PARA_NOTICE_LOG("buf: %s\n", buf);
378 while (start < buf + size) {
379 char *next_null;
380 char *next_cr;
381
382 next_cr = memchr(start, '\n', buf + size - start);
383 next_null = memchr(start, '\0', next_cr?
384 next_cr - start : buf + size - start);
385 if (!next_cr && !next_null)
386 break;
387 if (next_null)
388 end = next_null;
389 else
390 end = next_cr;
391 num_lines++;
392 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
393 if (flags & FELF_READ_ONLY) {
394 size_t s = end - start;
395 char *b = para_malloc(s + 1);
396 memcpy(b, start, s);
397 b[s] = '\0';
398 ret = line_handler(b, private_data);
399 free(b);
400 } else {
401 *end = '\0';
402 ret = line_handler(start, private_data);
403 }
404 if (ret < 0)
405 return ret;
406 }
407 start = ++end;
408 }
409 i = buf + size - start;
410 if (i && i != size && !(flags & FELF_READ_ONLY))
411 memmove(buf, start, i);
412 return i;
413 }
414
415 /** Return the hex characters of the lower 4 bits. */
416 #define hex(a) (hexchar[(a) & 15])
417
418 static void write_size_header(char *buf, int n)
419 {
420 static char hexchar[] = "0123456789abcdef";
421
422 buf[0] = hex(n >> 12);
423 buf[1] = hex(n >> 8);
424 buf[2] = hex(n >> 4);
425 buf[3] = hex(n);
426 buf[4] = ' ';
427 }
428
429 /**
430 * Read a four-byte hex-number and return its value.
431 *
432 * Each status item sent by para_server is prefixed with such a hex number in
433 * ASCII which describes the size of the status item.
434 *
435 * \param buf The buffer which must be at least four bytes long.
436 *
437 * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
438 * buffer did not contain only hex digits.
439 */
440 int read_size_header(const char *buf)
441 {
442 int i, len = 0;
443
444 for (i = 0; i < 4; i++) {
445 unsigned char c = buf[i];
446 len <<= 4;
447 if (c >= '0' && c <= '9') {
448 len += c - '0';
449 continue;
450 }
451 if (c >= 'a' && c <= 'f') {
452 len += c - 'a' + 10;
453 continue;
454 }
455 return -E_SIZE_PREFIX;
456 }
457 if (buf[4] != ' ')
458 return -E_SIZE_PREFIX;
459 return len;
460 }
461
462 /**
463 * Safely print into a buffer at a given offset.
464 *
465 * \param b Determines the buffer, its size, and the offset.
466 * \param fmt The format string.
467 *
468 * This function prints into the buffer given by \a b at the offset which is
469 * also given by \a b. If there is not enough space to hold the result, the
470 * buffer size is doubled until the underlying call to vsnprintf() succeeds
471 * or the size of the buffer exceeds the maximal size specified in \a b.
472 *
473 * In the latter case the unmodified \a buf and \a offset values as well as the
474 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
475 * If this function succeeds, i.e. returns a non-negative value, the offset of
476 * \a b is reset to zero and the given data is written to the beginning of the
477 * buffer. If \a max_size_handler() returns a negative value, this value is
478 * returned by \a para_printf().
479 *
480 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
481 * calls to this function append data to what is already contained in the
482 * buffer.
483 *
484 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
485 * initial buffer is allocated.
486 *
487 * \return The number of bytes printed into the buffer (not including the
488 * terminating \p NULL byte) on success, negative on errors. If there is no
489 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
490 * fails.
491 *
492 * \sa make_message(), vsnprintf(3).
493 */
494 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
495 {
496 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
497
498 if (!b->buf) {
499 b->buf = para_malloc(128);
500 b->size = 128;
501 b->offset = 0;
502 }
503 while (1) {
504 char *p = b->buf + b->offset;
505 size_t size = b->size - b->offset;
506 va_list ap;
507
508 if (size > sz_off) {
509 va_start(ap, fmt);
510 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
511 va_end(ap);
512 if (ret > -1 && ret < size - sz_off) { /* success */
513 b->offset += ret + sz_off;
514 if (sz_off)
515 write_size_header(p, ret);
516 return ret + sz_off;
517 }
518 }
519 /* check if we may grow the buffer */
520 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
521 /* try again with more space */
522 b->size *= 2;
523 b->buf = para_realloc(b->buf, b->size);
524 continue;
525 }
526 /* can't grow buffer */
527 if (!b->offset || !b->max_size_handler) /* message too large */
528 return -ERRNO_TO_PARA_ERROR(ENOSPC);
529 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
530 if (ret < 0)
531 return ret;
532 b->offset = 0;
533 }
534 }
535
536 /** \cond llong_minmax */
537 /* LLONG_MAX and LLONG_MIN might not be defined. */
538 #ifndef LLONG_MAX
539 #define LLONG_MAX 9223372036854775807LL
540 #endif
541 #ifndef LLONG_MIN
542 #define LLONG_MIN (-LLONG_MAX - 1LL)
543 #endif
544 /** \endcond llong_minmax */
545
546 /**
547 * Convert a string to a 64-bit signed integer value.
548 *
549 * \param str The string to be converted.
550 * \param value Result pointer.
551 *
552 * \return Standard.
553 *
554 * \sa para_atoi32(), strtol(3), atoi(3).
555 */
556 int para_atoi64(const char *str, int64_t *value)
557 {
558 char *endptr;
559 long long tmp;
560
561 errno = 0; /* To distinguish success/failure after call */
562 tmp = strtoll(str, &endptr, 10);
563 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
564 return -E_ATOI_OVERFLOW;
565 /*
566 * If there were no digits at all, strtoll() stores the original value
567 * of str in *endptr.
568 */
569 if (endptr == str)
570 return -E_ATOI_NO_DIGITS;
571 /*
572 * The implementation may also set errno and return 0 in case no
573 * conversion was performed.
574 */
575 if (errno != 0 && tmp == 0)
576 return -E_ATOI_NO_DIGITS;
577 if (*endptr != '\0') /* Further characters after number */
578 return -E_ATOI_JUNK_AT_END;
579 *value = tmp;
580 return 1;
581 }
582
583 /**
584 * Convert a string to a 32-bit signed integer value.
585 *
586 * \param str The string to be converted.
587 * \param value Result pointer.
588 *
589 * \return Standard.
590 *
591 * \sa para_atoi64().
592 */
593 int para_atoi32(const char *str, int32_t *value)
594 {
595 int64_t tmp;
596 int ret;
597 const int32_t max = 2147483647;
598
599 ret = para_atoi64(str, &tmp);
600 if (ret < 0)
601 return ret;
602 if (tmp > max || tmp < -max - 1)
603 return -E_ATOI_OVERFLOW;
604 *value = tmp;
605 return 1;
606 }
607
608 static inline int loglevel_equal(const char *arg, const char * const ll)
609 {
610 return !strncasecmp(arg, ll, strlen(ll));
611 }
612
613 /**
614 * Compute the loglevel number from its name.
615 *
616 * \param txt The name of the loglevel (debug, info, ...).
617 *
618 * \return The numeric representation of the loglevel name.
619 */
620 int get_loglevel_by_name(const char *txt)
621 {
622 if (loglevel_equal(txt, "debug"))
623 return LL_DEBUG;
624 if (loglevel_equal(txt, "info"))
625 return LL_INFO;
626 if (loglevel_equal(txt, "notice"))
627 return LL_NOTICE;
628 if (loglevel_equal(txt, "warning"))
629 return LL_WARNING;
630 if (loglevel_equal(txt, "error"))
631 return LL_ERROR;
632 if (loglevel_equal(txt, "crit"))
633 return LL_CRIT;
634 if (loglevel_equal(txt, "emerg"))
635 return LL_EMERG;
636 return -E_BAD_LL;
637 }
638
639 static int get_next_word(const char *buf, const char *delim, char **word)
640 {
641 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
642 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
643 const char *in;
644 char *out;
645 int ret, state = 0;
646
647 out = para_malloc(strlen(buf) + 1);
648 *out = '\0';
649 *word = out;
650 for (in = buf; *in; in++) {
651 const char *p;
652
653 switch (*in) {
654 case '\\':
655 if (state & LSF_BACKSLASH) /* \\ */
656 goto copy_char;
657 state |= LSF_BACKSLASH;
658 state |= LSF_HAVE_WORD;
659 continue;
660 case 'n':
661 case 't':
662 if (state & LSF_BACKSLASH) { /* \n or \t */
663 *out++ = (*in == 'n')? '\n' : '\t';
664 state &= ~LSF_BACKSLASH;
665 continue;
666 }
667 goto copy_char;
668 case '"':
669 if (state & LSF_BACKSLASH) /* \" */
670 goto copy_char;
671 if (state & LSF_SINGLE_QUOTE) /* '" */
672 goto copy_char;
673 if (state & LSF_DOUBLE_QUOTE) {
674 state &= ~LSF_DOUBLE_QUOTE;
675 continue;
676 }
677 state |= LSF_HAVE_WORD;
678 state |= LSF_DOUBLE_QUOTE;
679 continue;
680 case '\'':
681 if (state & LSF_BACKSLASH) /* \' */
682 goto copy_char;
683 if (state & LSF_DOUBLE_QUOTE) /* "' */
684 goto copy_char;
685 if (state & LSF_SINGLE_QUOTE) {
686 state &= ~LSF_SINGLE_QUOTE;
687 continue;
688 }
689 state |= LSF_HAVE_WORD;
690 state |= LSF_SINGLE_QUOTE;
691 continue;
692 }
693 for (p = delim; *p; p++) {
694 if (*in != *p)
695 continue;
696 if (state & LSF_BACKSLASH)
697 goto copy_char;
698 if (state & LSF_SINGLE_QUOTE)
699 goto copy_char;
700 if (state & LSF_DOUBLE_QUOTE)
701 goto copy_char;
702 if (state & LSF_HAVE_WORD)
703 goto success;
704 break;
705 }
706 if (*p) /* ignore delimiter at the beginning */
707 continue;
708 copy_char:
709 state |= LSF_HAVE_WORD;
710 *out++ = *in;
711 state &= ~LSF_BACKSLASH;
712 }
713 ret = 0;
714 if (!(state & LSF_HAVE_WORD))
715 goto out;
716 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
717 if (state & LSF_BACKSLASH) {
718 PARA_ERROR_LOG("trailing backslash\n");
719 goto out;
720 }
721 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
722 PARA_ERROR_LOG("unmatched quote character\n");
723 goto out;
724 }
725 success:
726 *out = '\0';
727 return in - buf;
728 out:
729 free(*word);
730 *word = NULL;
731 return ret;
732 }
733
734 /**
735 * Get the number of the word the cursor is on.
736 *
737 * \param buf The zero-terminated line buffer.
738 * \param delim Characters that separate words.
739 * \param point The cursor position.
740 *
741 * \return Zero-based word number.
742 */
743 int compute_word_num(const char *buf, const char *delim, int point)
744 {
745 int ret, num_words;
746 const char *p;
747 char *word;
748
749 for (p = buf, num_words = 0; ; p += ret, num_words++) {
750 ret = get_next_word(p, delim, &word);
751 if (ret <= 0)
752 break;
753 free(word);
754 if (p + ret >= buf + point)
755 break;
756 }
757 return num_words;
758 }
759
760 /**
761 * Free an array of words created by create_argv() or create_shifted_argv().
762 *
763 * \param argv A pointer previously obtained by \ref create_argv().
764 */
765 void free_argv(char **argv)
766 {
767 int i;
768
769 if (!argv)
770 return;
771 for (i = 0; argv[i]; i++)
772 free(argv[i]);
773 free(argv);
774 }
775
776 static int create_argv_offset(int offset, const char *buf, const char *delim,
777 char ***result)
778 {
779 char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
780 const char *p;
781 int i, ret;
782
783 for (i = 0; i < offset; i++)
784 argv[i] = NULL;
785 for (p = buf; p && *p; p += ret, i++) {
786 ret = get_next_word(p, delim, &word);
787 if (ret < 0)
788 goto err;
789 if (!ret)
790 break;
791 argv = para_realloc(argv, (i + 2) * sizeof(char*));
792 argv[i] = word;
793 }
794 argv[i] = NULL;
795 *result = argv;
796 return i;
797 err:
798 while (i > 0)
799 free(argv[--i]);
800 free(argv);
801 *result = NULL;
802 return ret;
803 }
804
805 /**
806 * Split a buffer into words.
807 *
808 * This parser honors single and double quotes, backslash-escaped characters
809 * and special characters like \p \\n. The result contains pointers to copies
810 * of the words contained in \a buf and has to be freed by using \ref
811 * free_argv().
812 *
813 * \param buf The buffer to be split.
814 * \param delim Each character in this string is treated as a separator.
815 * \param result The array of words is returned here.
816 *
817 * \return Number of words in \a buf, negative on errors.
818 */
819 int create_argv(const char *buf, const char *delim, char ***result)
820 {
821 return create_argv_offset(0, buf, delim, result);
822 }
823
824 /**
825 * Split a buffer into words, offset one.
826 *
827 * This is similar to \ref create_argv() but the returned array is one element
828 * larger, words start at index one and element zero is initialized to \p NULL.
829 * Callers must set element zero to a non-NULL value before calling free_argv()
830 * on the returned array to avoid a memory leak.
831 *
832 * \param buf See \ref create_argv().
833 * \param delim See \ref create_argv().
834 * \param result See \ref create_argv().
835 *
836 * \return Number of words plus one on success, negative on errors.
837 */
838 int create_shifted_argv(const char *buf, const char *delim, char ***result)
839 {
840 return create_argv_offset(1, buf, delim, result);
841 }
842
843 /**
844 * Find out if the given string is contained in the arg vector.
845 *
846 * \param arg The string to look for.
847 * \param argv The array to search.
848 *
849 * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
850 * arg was not found in \a argv.
851 */
852 int find_arg(const char *arg, char **argv)
853 {
854 int i;
855
856 if (!argv)
857 return -E_ARG_NOT_FOUND;
858 for (i = 0; argv[i]; i++)
859 if (strcmp(arg, argv[i]) == 0)
860 return i;
861 return -E_ARG_NOT_FOUND;
862 }
863
864 /**
865 * Compile a regular expression.
866 *
867 * This simple wrapper calls regcomp() and logs a message on errors.
868 *
869 * \param preg See regcomp(3).
870 * \param regex See regcomp(3).
871 * \param cflags See regcomp(3).
872 *
873 * \return Standard.
874 */
875 int para_regcomp(regex_t *preg, const char *regex, int cflags)
876 {
877 char *buf;
878 size_t size;
879 int ret = regcomp(preg, regex, cflags);
880
881 if (ret == 0)
882 return 1;
883 size = regerror(ret, preg, NULL, 0);
884 buf = para_malloc(size);
885 regerror(ret, preg, buf, size);
886 PARA_ERROR_LOG("%s\n", buf);
887 free(buf);
888 return -E_REGEX;
889 }
890
891 /**
892 * strdup() for not necessarily zero-terminated strings.
893 *
894 * \param src The source buffer.
895 * \param len The number of bytes to be copied.
896 *
897 * \return A 0-terminated buffer of length \a len + 1.
898 *
899 * This is similar to strndup(), which is a GNU extension. However, one
900 * difference is that strndup() returns \p NULL if insufficient memory was
901 * available while this function aborts in this case.
902 *
903 * \sa strdup(), \ref para_strdup().
904 */
905 char *safe_strdup(const char *src, size_t len)
906 {
907 char *p;
908
909 assert(len < (size_t)-1);
910 p = para_malloc(len + 1);
911 if (len > 0)
912 memcpy(p, src, len);
913 p[len] = '\0';
914 return p;
915 }
916
917 /**
918 * Copy the value of a key=value pair.
919 *
920 * This checks whether the given buffer starts with "key=", ignoring case. If
921 * yes, a copy of the value is returned. The source buffer may not be
922 * zero-terminated.
923 *
924 * \param src The source buffer.
925 * \param len The number of bytes of the tag.
926 * \param key Only copy if it is the value of this key.
927 *
928 * \return A zero-terminated buffer, or \p NULL if the key was
929 * not of the given type.
930 */
931 char *key_value_copy(const char *src, size_t len, const char *key)
932 {
933 int keylen = strlen(key);
934
935 if (len <= keylen)
936 return NULL;
937 if (strncasecmp(src, key, keylen))
938 return NULL;
939 if (src[keylen] != '=')
940 return NULL;
941 return safe_strdup(src + keylen + 1, len - keylen - 1);
942 }
943
944 static bool utf8_mode(void)
945 {
946 static bool initialized, have_utf8;
947
948 if (!initialized) {
949 char *info = nl_langinfo(CODESET);
950 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
951 initialized = true;
952 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
953 have_utf8? "" : "not ");
954 }
955 return have_utf8;
956 }
957
958 /*
959 * glibc's wcswidth returns -1 if the string contains a tab character, which
960 * makes the function next to useless. The two functions below are taken from
961 * mutt.
962 */
963
964 #define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0)
965
966 static int mutt_wcwidth(wchar_t wc, size_t pos)
967 {
968 int n;
969
970 if (wc == 0x09) /* tab */
971 return (pos | 7) + 1 - pos;
972 n = wcwidth(wc);
973 if (IsWPrint(wc) && n > 0)
974 return n;
975 if (!(wc & ~0x7f))
976 return 2;
977 if (!(wc & ~0xffff))
978 return 6;
979 return 10;
980 }
981
982 static size_t mutt_wcswidth(const wchar_t *s, size_t n)
983 {
984 size_t w = 0;
985
986 while (n--)
987 w += mutt_wcwidth(*s++, w);
988 return w;
989 }
990
991 /**
992 * Skip a given number of cells at the beginning of a string.
993 *
994 * \param s The input string.
995 * \param cells_to_skip Desired number of cells that should be skipped.
996 * \param bytes_to_skip Result.
997 *
998 * This function computes how many input bytes must be skipped to advance a
999 * string by the given width. If the current character encoding is not UTF-8,
1000 * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
1001 * \a s is treated as a multibyte string and on successful return, \a s +
1002 * bytes_to_skip points to the start of a multibyte string such that the total
1003 * width of the multibyte characters that are skipped by advancing \a s that
1004 * many bytes equals at least \a cells_to_skip.
1005 *
1006 * \return Standard.
1007 */
1008 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
1009 {
1010 wchar_t wc;
1011 mbstate_t ps;
1012 size_t n, bytes_parsed, cells_skipped;
1013
1014 *bytes_to_skip = 0;
1015 if (cells_to_skip == 0)
1016 return 0;
1017 if (!utf8_mode()) {
1018 *bytes_to_skip = cells_to_skip;
1019 return 0;
1020 }
1021 bytes_parsed = cells_skipped = 0;
1022 memset(&ps, 0, sizeof(ps));
1023 n = strlen(s);
1024 while (cells_to_skip > cells_skipped) {
1025 size_t mbret;
1026
1027 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
1028 assert(mbret != 0);
1029 if (mbret == (size_t)-1 || mbret == (size_t)-2)
1030 return -ERRNO_TO_PARA_ERROR(EILSEQ);
1031 bytes_parsed += mbret;
1032 cells_skipped += mutt_wcwidth(wc, cells_skipped);
1033 }
1034 *bytes_to_skip = bytes_parsed;
1035 return 1;
1036 }
1037
1038 /**
1039 * Compute the width of an UTF-8 string.
1040 *
1041 * \param s The string.
1042 * \param result The width of \a s is returned here.
1043 *
1044 * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1045 * Otherwise \a s is treated as an UTF-8 string and its display width is
1046 * computed. Note that this function may fail if the underlying call to
1047 * mbsrtowcs(3) fails, so the caller must check the return value.
1048 *
1049 * \sa nl_langinfo(3), wcswidth(3).
1050 *
1051 * \return Standard.
1052 */
1053 __must_check int strwidth(const char *s, size_t *result)
1054 {
1055 const char *src = s;
1056 mbstate_t state;
1057 static wchar_t *dest;
1058 size_t num_wchars;
1059
1060 /*
1061 * Never call any log function here. This may result in an endless loop
1062 * as para_gui's para_log() calls this function.
1063 */
1064
1065 if (!utf8_mode()) {
1066 *result = strlen(s);
1067 return 0;
1068 }
1069 memset(&state, 0, sizeof(state));
1070 *result = 0;
1071 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1072 if (num_wchars == (size_t)-1)
1073 return -ERRNO_TO_PARA_ERROR(errno);
1074 if (num_wchars == 0)
1075 return 0;
1076 dest = para_malloc(num_wchars * sizeof(*dest));
1077 src = s;
1078 memset(&state, 0, sizeof(state));
1079 num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1080 assert(num_wchars > 0 && num_wchars != (size_t)-1);
1081 *result = mutt_wcswidth(dest, num_wchars);
1082 free(dest);
1083 return 1;
1084 }