e675502cf6707240a61be523c6fb510692991acd
[paraslash.git] / string.c
1 /*
2 * Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file string.c Memory allocation and string handling functions. */
8
9 #include "para.h"
10
11 #include <pwd.h>
12 #include <sys/utsname.h> /* uname() */
13 #include <regex.h>
14 #include <langinfo.h>
15 #include <wchar.h>
16 #include <wctype.h>
17
18 #include "string.h"
19 #include "error.h"
20
21 /**
22 * Paraslash's version of realloc().
23 *
24 * \param p Pointer to the memory block, may be \p NULL.
25 * \param size The desired new size.
26 *
27 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
28 * i.e. there is no need to check the return value in the caller.
29 *
30 * \return A pointer to newly allocated memory which is suitably aligned for
31 * any kind of variable and may be different from \a p.
32 *
33 * \sa realloc(3).
34 */
35 __must_check void *para_realloc(void *p, size_t size)
36 {
37 /*
38 * No need to check for NULL pointers: If p is NULL, the call
39 * to realloc is equivalent to malloc(size)
40 */
41 assert(size);
42 if (!(p = realloc(p, size))) {
43 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
44 size);
45 exit(EXIT_FAILURE);
46 }
47 return p;
48 }
49
50 /**
51 * Paraslash's version of malloc().
52 *
53 * \param size The desired new size.
54 *
55 * A wrapper for malloc(3) which exits on errors.
56 *
57 * \return A pointer to the allocated memory, which is suitably aligned for any
58 * kind of variable.
59 *
60 * \sa malloc(3).
61 */
62 __must_check __malloc void *para_malloc(size_t size)
63 {
64 void *p;
65
66 assert(size);
67 p = malloc(size);
68 if (!p) {
69 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
70 size);
71 exit(EXIT_FAILURE);
72 }
73 return p;
74 }
75
76 /**
77 * Paraslash's version of calloc().
78 *
79 * \param size The desired new size.
80 *
81 * A wrapper for calloc(3) which exits on errors.
82 *
83 * \return A pointer to the allocated and zeroed-out memory, which is suitably
84 * aligned for any kind of variable.
85 *
86 * \sa calloc(3)
87 */
88 __must_check __malloc void *para_calloc(size_t size)
89 {
90 void *ret = para_malloc(size);
91
92 memset(ret, 0, size);
93 return ret;
94 }
95
96 /**
97 * Paraslash's version of strdup().
98 *
99 * \param s The string to be duplicated.
100 *
101 * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
102 * there is no need to check the return value in the caller.
103 *
104 * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
105 * an pointer to an empty string is returned.
106 *
107 * \sa strdup(3)
108 */
109 __must_check __malloc char *para_strdup(const char *s)
110 {
111 char *ret;
112
113 if ((ret = strdup(s? s: "")))
114 return ret;
115 PARA_EMERG_LOG("strdup failed, aborting\n");
116 exit(EXIT_FAILURE);
117 }
118
119 /**
120 * Print a formated message to a dynamically allocated string.
121 *
122 * \param result The formated string is returned here.
123 * \param fmt The format string.
124 * \param ap Initialized list of arguments.
125 *
126 * This function is similar to vasprintf(), a GNU extension which is not in C
127 * or POSIX. It allocates a string large enough to hold the output including
128 * the terminating null byte. The allocated string is returned via the first
129 * argument and must be freed by the caller. However, unlike vasprintf(), this
130 * function calls exit() if insufficient memory is available, while vasprintf()
131 * returns -1 in this case.
132 *
133 * \return Number of bytes written, not including the terminating \p NULL
134 * character.
135 *
136 * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
137 */
138 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
139 {
140 int ret;
141 size_t size = 150;
142 va_list aq;
143
144 *result = para_malloc(size + 1);
145 va_copy(aq, ap);
146 ret = vsnprintf(*result, size, fmt, aq);
147 va_end(aq);
148 assert(ret >= 0);
149 if (ret < size) /* OK */
150 return ret;
151 size = ret + 1;
152 *result = para_realloc(*result, size);
153 va_copy(aq, ap);
154 ret = vsnprintf(*result, size, fmt, aq);
155 va_end(aq);
156 assert(ret >= 0 && ret < size);
157 return ret;
158 }
159
160 /**
161 * Print to a dynamically allocated string, variable number of arguments.
162 *
163 * \param result See \ref xvasprintf().
164 * \param fmt Usual format string.
165 *
166 * \return The return value of the underlying call to \ref xvasprintf().
167 *
168 * \sa \ref xvasprintf() and the references mentioned there.
169 */
170 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
171 {
172 va_list ap;
173 unsigned ret;
174
175 va_start(ap, fmt);
176 ret = xvasprintf(result, fmt, ap);
177 va_end(ap);
178 return ret;
179 }
180
181 /**
182 * Allocate a sufficiently large string and print into it.
183 *
184 * \param fmt A usual format string.
185 *
186 * Produce output according to \p fmt. No artificial bound on the length of the
187 * resulting string is imposed.
188 *
189 * \return This function either returns a pointer to a string that must be
190 * freed by the caller or aborts without returning.
191 *
192 * \sa printf(3), \ref xasprintf().
193 */
194 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
195 {
196 char *msg;
197 va_list ap;
198
199 va_start(ap, fmt);
200 xvasprintf(&msg, fmt, ap);
201 va_end(ap);
202 return msg;
203 }
204
205 /**
206 * Free the content of a pointer and set it to \p NULL.
207 *
208 * This is equivalent to "free(*arg); *arg = NULL;".
209 *
210 * \param arg The pointer whose content should be freed.
211 */
212 void freep(void *arg)
213 {
214 void **ptr = (void **)arg;
215 free(*ptr);
216 *ptr = NULL;
217 }
218
219 /**
220 * Paraslash's version of strcat().
221 *
222 * \param a String to be appended to.
223 * \param b String to append.
224 *
225 * Append \p b to \p a.
226 *
227 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
228 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
229 * return \a a without making a copy of \a a. Otherwise, construct the
230 * concatenation \a c, free \a a (but not \a b) and return \a c.
231 *
232 * \sa strcat(3).
233 */
234 __must_check __malloc char *para_strcat(char *a, const char *b)
235 {
236 char *tmp;
237
238 if (!a)
239 return para_strdup(b);
240 if (!b)
241 return a;
242 tmp = make_message("%s%s", a, b);
243 free(a);
244 return tmp;
245 }
246
247 /**
248 * Paraslash's version of dirname().
249 *
250 * \param name Pointer to the full path.
251 *
252 * Compute the directory component of \p name.
253 *
254 * \return If \a name is \p NULL or the empty string, return \p NULL.
255 * Otherwise, Make a copy of \a name and return its directory component. Caller
256 * is responsible to free the result.
257 */
258 __must_check __malloc char *para_dirname(const char *name)
259 {
260 char *p, *ret;
261
262 if (!name || !*name)
263 return NULL;
264 ret = para_strdup(name);
265 p = strrchr(ret, '/');
266 if (!p)
267 *ret = '\0';
268 else
269 *p = '\0';
270 return ret;
271 }
272
273 /**
274 * Paraslash's version of basename().
275 *
276 * \param name Pointer to the full path.
277 *
278 * Compute the filename component of \a name.
279 *
280 * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
281 * ends with a slash. Otherwise, a pointer within \a name is returned. Caller
282 * must not free the result.
283 */
284 __must_check char *para_basename(const char *name)
285 {
286 char *ret;
287
288 if (!name || !*name)
289 return NULL;
290 ret = strrchr(name, '/');
291 if (!ret)
292 return (char *)name;
293 ret++;
294 return ret;
295 }
296
297 /**
298 * Get the logname of the current user.
299 *
300 * \return A dynamically allocated string that must be freed by the caller. On
301 * errors, the string "unknown_user" is returned, i.e. this function never
302 * returns \p NULL.
303 *
304 * \sa getpwuid(3).
305 */
306 __must_check __malloc char *para_logname(void)
307 {
308 struct passwd *pw = getpwuid(getuid());
309 return para_strdup(pw? pw->pw_name : "unknown_user");
310 }
311
312 /**
313 * Get the home directory of the current user.
314 *
315 * \return A dynamically allocated string that must be freed by the caller. If
316 * the home directory could not be found, this function returns "/tmp".
317 */
318 __must_check __malloc char *para_homedir(void)
319 {
320 struct passwd *pw = getpwuid(getuid());
321 return para_strdup(pw? pw->pw_dir : "/tmp");
322 }
323
324 /**
325 * Get the own hostname.
326 *
327 * \return A dynamically allocated string containing the hostname.
328 *
329 * \sa uname(2).
330 */
331 __malloc char *para_hostname(void)
332 {
333 struct utsname u;
334
335 uname(&u);
336 return para_strdup(u.nodename);
337 }
338
339 /**
340 * Call a custom function for each complete line.
341 *
342 * \param flags Any combination of flags defined in \ref for_each_line_flags.
343 * \param buf The buffer containing data separated by newlines.
344 * \param size The number of bytes in \a buf.
345 * \param line_handler The custom function.
346 * \param private_data Pointer passed to \a line_handler.
347 *
348 * For each complete line in \p buf, \p line_handler is called. The first
349 * argument to \p line_handler is (a copy of) the current line, and \p
350 * private_data is passed as the second argument. If the \p FELF_READ_ONLY
351 * flag is unset, a pointer into \a buf is passed to the line handler,
352 * otherwise a pointer to a copy of the current line is passed instead. This
353 * copy is freed immediately after the line handler returns.
354 *
355 * The function returns if \p line_handler returns a negative value or no more
356 * lines are in the buffer. The rest of the buffer (last chunk containing an
357 * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
358 * unset.
359 *
360 * \return On success this function returns the number of bytes not handled to
361 * \p line_handler. The only possible error is a negative return value from the
362 * line handler. In this case processing stops and the return value of the line
363 * handler is returned to indicate failure.
364 *
365 * \sa \ref for_each_line_flags.
366 */
367 int for_each_line(unsigned flags, char *buf, size_t size,
368 line_handler_t *line_handler, void *private_data)
369 {
370 char *start = buf, *end;
371 int ret, i, num_lines = 0;
372
373 // PARA_NOTICE_LOG("buf: %s\n", buf);
374 while (start < buf + size) {
375 char *next_null;
376 char *next_cr;
377
378 next_cr = memchr(start, '\n', buf + size - start);
379 next_null = memchr(start, '\0', next_cr?
380 next_cr - start : buf + size - start);
381 if (!next_cr && !next_null)
382 break;
383 if (next_null)
384 end = next_null;
385 else
386 end = next_cr;
387 num_lines++;
388 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
389 if (flags & FELF_READ_ONLY) {
390 size_t s = end - start;
391 char *b = para_malloc(s + 1);
392 memcpy(b, start, s);
393 b[s] = '\0';
394 ret = line_handler(b, private_data);
395 free(b);
396 } else {
397 *end = '\0';
398 ret = line_handler(start, private_data);
399 }
400 if (ret < 0)
401 return ret;
402 }
403 start = ++end;
404 }
405 i = buf + size - start;
406 if (i && i != size && !(flags & FELF_READ_ONLY))
407 memmove(buf, start, i);
408 return i;
409 }
410
411 /** Return the hex characters of the lower 4 bits. */
412 #define hex(a) (hexchar[(a) & 15])
413
414 static void write_size_header(char *buf, int n)
415 {
416 static char hexchar[] = "0123456789abcdef";
417
418 buf[0] = hex(n >> 12);
419 buf[1] = hex(n >> 8);
420 buf[2] = hex(n >> 4);
421 buf[3] = hex(n);
422 buf[4] = ' ';
423 }
424
425 /**
426 * Read a four-byte hex-number and return its value.
427 *
428 * Each status item sent by para_server is prefixed with such a hex number in
429 * ASCII which describes the size of the status item.
430 *
431 * \param buf The buffer which must be at least four bytes long.
432 *
433 * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
434 * buffer did not contain only hex digits.
435 */
436 int read_size_header(const char *buf)
437 {
438 int i, len = 0;
439
440 for (i = 0; i < 4; i++) {
441 unsigned char c = buf[i];
442 len <<= 4;
443 if (c >= '0' && c <= '9') {
444 len += c - '0';
445 continue;
446 }
447 if (c >= 'a' && c <= 'f') {
448 len += c - 'a' + 10;
449 continue;
450 }
451 return -E_SIZE_PREFIX;
452 }
453 if (buf[4] != ' ')
454 return -E_SIZE_PREFIX;
455 return len;
456 }
457
458 /**
459 * Safely print into a buffer at a given offset.
460 *
461 * \param b Determines the buffer, its size, and the offset.
462 * \param fmt The format string.
463 *
464 * This function prints into the buffer given by \a b at the offset which is
465 * also given by \a b. If there is not enough space to hold the result, the
466 * buffer size is doubled until the underlying call to vsnprintf() succeeds
467 * or the size of the buffer exceeds the maximal size specified in \a b.
468 *
469 * In the latter case the unmodified \a buf and \a offset values as well as the
470 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
471 * If this function succeeds, i.e. returns a non-negative value, the offset of
472 * \a b is reset to zero and the given data is written to the beginning of the
473 * buffer. If \a max_size_handler() returns a negative value, this value is
474 * returned by \a para_printf().
475 *
476 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
477 * calls to this function append data to what is already contained in the
478 * buffer.
479 *
480 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
481 * initial buffer is allocated.
482 *
483 * \return The number of bytes printed into the buffer (not including the
484 * terminating \p NULL byte) on success, negative on errors. If there is no
485 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
486 * fails.
487 *
488 * \sa make_message(), vsnprintf(3).
489 */
490 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
491 {
492 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
493
494 if (!b->buf) {
495 b->buf = para_malloc(128);
496 b->size = 128;
497 b->offset = 0;
498 }
499 while (1) {
500 char *p = b->buf + b->offset;
501 size_t size = b->size - b->offset;
502 va_list ap;
503
504 if (size > sz_off) {
505 va_start(ap, fmt);
506 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
507 va_end(ap);
508 if (ret > -1 && ret < size - sz_off) { /* success */
509 b->offset += ret + sz_off;
510 if (sz_off)
511 write_size_header(p, ret);
512 return ret + sz_off;
513 }
514 }
515 /* check if we may grow the buffer */
516 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
517 /* try again with more space */
518 b->size *= 2;
519 b->buf = para_realloc(b->buf, b->size);
520 continue;
521 }
522 /* can't grow buffer */
523 if (!b->offset || !b->max_size_handler) /* message too large */
524 return -ERRNO_TO_PARA_ERROR(ENOSPC);
525 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
526 if (ret < 0)
527 return ret;
528 b->offset = 0;
529 }
530 }
531
532 /** \cond llong_minmax */
533 /* LLONG_MAX and LLONG_MIN might not be defined. */
534 #ifndef LLONG_MAX
535 #define LLONG_MAX 9223372036854775807LL
536 #endif
537 #ifndef LLONG_MIN
538 #define LLONG_MIN (-LLONG_MAX - 1LL)
539 #endif
540 /** \endcond llong_minmax */
541
542 /**
543 * Convert a string to a 64-bit signed integer value.
544 *
545 * \param str The string to be converted.
546 * \param value Result pointer.
547 *
548 * \return Standard.
549 *
550 * \sa \ref para_atoi32(), strtol(3), atoi(3).
551 */
552 int para_atoi64(const char *str, int64_t *value)
553 {
554 char *endptr;
555 long long tmp;
556
557 errno = 0; /* To distinguish success/failure after call */
558 tmp = strtoll(str, &endptr, 10);
559 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
560 return -E_ATOI_OVERFLOW;
561 /*
562 * If there were no digits at all, strtoll() stores the original value
563 * of str in *endptr.
564 */
565 if (endptr == str)
566 return -E_ATOI_NO_DIGITS;
567 /*
568 * The implementation may also set errno and return 0 in case no
569 * conversion was performed.
570 */
571 if (errno != 0 && tmp == 0)
572 return -E_ATOI_NO_DIGITS;
573 if (*endptr != '\0') /* Further characters after number */
574 return -E_ATOI_JUNK_AT_END;
575 *value = tmp;
576 return 1;
577 }
578
579 /**
580 * Convert a string to a 32-bit signed integer value.
581 *
582 * \param str The string to be converted.
583 * \param value Result pointer.
584 *
585 * \return Standard.
586 *
587 * \sa \ref para_atoi64().
588 */
589 int para_atoi32(const char *str, int32_t *value)
590 {
591 int64_t tmp;
592 int ret;
593 const int32_t max = 2147483647;
594
595 ret = para_atoi64(str, &tmp);
596 if (ret < 0)
597 return ret;
598 if (tmp > max || tmp < -max - 1)
599 return -E_ATOI_OVERFLOW;
600 *value = tmp;
601 return 1;
602 }
603
604 static inline int loglevel_equal(const char *arg, const char * const ll)
605 {
606 return !strncasecmp(arg, ll, strlen(ll));
607 }
608
609 /**
610 * Compute the loglevel number from its name.
611 *
612 * \param txt The name of the loglevel (debug, info, ...).
613 *
614 * \return The numeric representation of the loglevel name.
615 */
616 int get_loglevel_by_name(const char *txt)
617 {
618 if (loglevel_equal(txt, "debug"))
619 return LL_DEBUG;
620 if (loglevel_equal(txt, "info"))
621 return LL_INFO;
622 if (loglevel_equal(txt, "notice"))
623 return LL_NOTICE;
624 if (loglevel_equal(txt, "warning"))
625 return LL_WARNING;
626 if (loglevel_equal(txt, "error"))
627 return LL_ERROR;
628 if (loglevel_equal(txt, "crit"))
629 return LL_CRIT;
630 if (loglevel_equal(txt, "emerg"))
631 return LL_EMERG;
632 return -E_BAD_LL;
633 }
634
635 static int get_next_word(const char *buf, const char *delim, char **word)
636 {
637 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
638 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
639 const char *in;
640 char *out;
641 int ret, state = 0;
642
643 out = para_malloc(strlen(buf) + 1);
644 *out = '\0';
645 *word = out;
646 for (in = buf; *in; in++) {
647 const char *p;
648
649 switch (*in) {
650 case '\\':
651 if (state & LSF_BACKSLASH) /* \\ */
652 goto copy_char;
653 state |= LSF_BACKSLASH;
654 state |= LSF_HAVE_WORD;
655 continue;
656 case 'n':
657 case 't':
658 if (state & LSF_BACKSLASH) { /* \n or \t */
659 *out++ = (*in == 'n')? '\n' : '\t';
660 state &= ~LSF_BACKSLASH;
661 continue;
662 }
663 goto copy_char;
664 case '"':
665 if (state & LSF_BACKSLASH) /* \" */
666 goto copy_char;
667 if (state & LSF_SINGLE_QUOTE) /* '" */
668 goto copy_char;
669 if (state & LSF_DOUBLE_QUOTE) {
670 state &= ~LSF_DOUBLE_QUOTE;
671 continue;
672 }
673 state |= LSF_HAVE_WORD;
674 state |= LSF_DOUBLE_QUOTE;
675 continue;
676 case '\'':
677 if (state & LSF_BACKSLASH) /* \' */
678 goto copy_char;
679 if (state & LSF_DOUBLE_QUOTE) /* "' */
680 goto copy_char;
681 if (state & LSF_SINGLE_QUOTE) {
682 state &= ~LSF_SINGLE_QUOTE;
683 continue;
684 }
685 state |= LSF_HAVE_WORD;
686 state |= LSF_SINGLE_QUOTE;
687 continue;
688 }
689 for (p = delim; *p; p++) {
690 if (*in != *p)
691 continue;
692 if (state & LSF_BACKSLASH)
693 goto copy_char;
694 if (state & LSF_SINGLE_QUOTE)
695 goto copy_char;
696 if (state & LSF_DOUBLE_QUOTE)
697 goto copy_char;
698 if (state & LSF_HAVE_WORD)
699 goto success;
700 break;
701 }
702 if (*p) /* ignore delimiter at the beginning */
703 continue;
704 copy_char:
705 state |= LSF_HAVE_WORD;
706 *out++ = *in;
707 state &= ~LSF_BACKSLASH;
708 }
709 ret = 0;
710 if (!(state & LSF_HAVE_WORD))
711 goto out;
712 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
713 if (state & LSF_BACKSLASH) {
714 PARA_ERROR_LOG("trailing backslash\n");
715 goto out;
716 }
717 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
718 PARA_ERROR_LOG("unmatched quote character\n");
719 goto out;
720 }
721 success:
722 *out = '\0';
723 return in - buf;
724 out:
725 free(*word);
726 *word = NULL;
727 return ret;
728 }
729
730 /**
731 * Get the number of the word the cursor is on.
732 *
733 * \param buf The zero-terminated line buffer.
734 * \param delim Characters that separate words.
735 * \param point The cursor position.
736 *
737 * \return Zero-based word number.
738 */
739 int compute_word_num(const char *buf, const char *delim, int point)
740 {
741 int ret, num_words;
742 const char *p;
743 char *word;
744
745 for (p = buf, num_words = 0; ; p += ret, num_words++) {
746 ret = get_next_word(p, delim, &word);
747 if (ret <= 0)
748 break;
749 free(word);
750 if (p + ret >= buf + point)
751 break;
752 }
753 return num_words;
754 }
755
756 /**
757 * Free an array of words created by create_argv() or create_shifted_argv().
758 *
759 * \param argv A pointer previously obtained by \ref create_argv().
760 */
761 void free_argv(char **argv)
762 {
763 int i;
764
765 if (!argv)
766 return;
767 for (i = 0; argv[i]; i++)
768 free(argv[i]);
769 free(argv);
770 }
771
772 static int create_argv_offset(int offset, const char *buf, const char *delim,
773 char ***result)
774 {
775 char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
776 const char *p;
777 int i, ret;
778
779 for (i = 0; i < offset; i++)
780 argv[i] = NULL;
781 for (p = buf; p && *p; p += ret, i++) {
782 ret = get_next_word(p, delim, &word);
783 if (ret < 0)
784 goto err;
785 if (!ret)
786 break;
787 argv = para_realloc(argv, (i + 2) * sizeof(char*));
788 argv[i] = word;
789 }
790 argv[i] = NULL;
791 *result = argv;
792 return i;
793 err:
794 while (i > 0)
795 free(argv[--i]);
796 free(argv);
797 *result = NULL;
798 return ret;
799 }
800
801 /**
802 * Split a buffer into words.
803 *
804 * This parser honors single and double quotes, backslash-escaped characters
805 * and special characters like \\n. The result contains pointers to copies of
806 * the words contained in buf and has to be freed by using \ref free_argv().
807 *
808 * \param buf The buffer to be split.
809 * \param delim Each character in this string is treated as a separator.
810 * \param result The array of words is returned here.
811 *
812 * It's OK to pass NULL as the buffer argument. This is equivalent to passing
813 * the empty string.
814 *
815 * \return Number of words in buf, negative on errors. The array returned
816 * through the result pointer is NULL terminated.
817 */
818 int create_argv(const char *buf, const char *delim, char ***result)
819 {
820 return create_argv_offset(0, buf, delim, result);
821 }
822
823 /**
824 * Split a buffer into words, offset one.
825 *
826 * This is similar to \ref create_argv() but the returned array is one element
827 * larger, words start at index one and element zero is initialized to \p NULL.
828 * Callers must set element zero to a non-NULL value before calling free_argv()
829 * on the returned array to avoid a memory leak.
830 *
831 * \param buf See \ref create_argv().
832 * \param delim See \ref create_argv().
833 * \param result See \ref create_argv().
834 *
835 * \return Number of words plus one on success, negative on errors.
836 */
837 int create_shifted_argv(const char *buf, const char *delim, char ***result)
838 {
839 return create_argv_offset(1, buf, delim, result);
840 }
841
842 /**
843 * Find out if the given string is contained in the arg vector.
844 *
845 * \param arg The string to look for.
846 * \param argv The array to search.
847 *
848 * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
849 * arg was not found in \a argv.
850 */
851 int find_arg(const char *arg, char **argv)
852 {
853 int i;
854
855 if (!argv)
856 return -E_ARG_NOT_FOUND;
857 for (i = 0; argv[i]; i++)
858 if (strcmp(arg, argv[i]) == 0)
859 return i;
860 return -E_ARG_NOT_FOUND;
861 }
862
863 /**
864 * Compile a regular expression.
865 *
866 * This simple wrapper calls regcomp() and logs a message on errors.
867 *
868 * \param preg See regcomp(3).
869 * \param regex See regcomp(3).
870 * \param cflags See regcomp(3).
871 *
872 * \return Standard.
873 */
874 int para_regcomp(regex_t *preg, const char *regex, int cflags)
875 {
876 char *buf;
877 size_t size;
878 int ret = regcomp(preg, regex, cflags);
879
880 if (ret == 0)
881 return 1;
882 size = regerror(ret, preg, NULL, 0);
883 buf = para_malloc(size);
884 regerror(ret, preg, buf, size);
885 PARA_ERROR_LOG("%s\n", buf);
886 free(buf);
887 return -E_REGEX;
888 }
889
890 /**
891 * strdup() for not necessarily zero-terminated strings.
892 *
893 * \param src The source buffer.
894 * \param len The number of bytes to be copied.
895 *
896 * \return A 0-terminated buffer of length \a len + 1.
897 *
898 * This is similar to strndup(), which is a GNU extension. However, one
899 * difference is that strndup() returns \p NULL if insufficient memory was
900 * available while this function aborts in this case.
901 *
902 * \sa strdup(), \ref para_strdup().
903 */
904 char *safe_strdup(const char *src, size_t len)
905 {
906 char *p;
907
908 assert(len < (size_t)-1);
909 p = para_malloc(len + 1);
910 if (len > 0)
911 memcpy(p, src, len);
912 p[len] = '\0';
913 return p;
914 }
915
916 /**
917 * Copy the value of a key=value pair.
918 *
919 * This checks whether the given buffer starts with "key=", ignoring case. If
920 * yes, a copy of the value is returned. The source buffer may not be
921 * zero-terminated.
922 *
923 * \param src The source buffer.
924 * \param len The number of bytes of the tag.
925 * \param key Only copy if it is the value of this key.
926 *
927 * \return A zero-terminated buffer, or \p NULL if the key was
928 * not of the given type.
929 */
930 char *key_value_copy(const char *src, size_t len, const char *key)
931 {
932 int keylen = strlen(key);
933
934 if (len <= keylen)
935 return NULL;
936 if (strncasecmp(src, key, keylen))
937 return NULL;
938 if (src[keylen] != '=')
939 return NULL;
940 return safe_strdup(src + keylen + 1, len - keylen - 1);
941 }
942
943 static bool utf8_mode(void)
944 {
945 static bool initialized, have_utf8;
946
947 if (!initialized) {
948 char *info = nl_langinfo(CODESET);
949 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
950 initialized = true;
951 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
952 have_utf8? "" : "not ");
953 }
954 return have_utf8;
955 }
956
957 static int xwcwidth(wchar_t wc, size_t pos)
958 {
959 int n;
960
961 /* special-case for tab */
962 if (wc == 0x09) /* tab */
963 return (pos | 7) + 1 - pos;
964 n = wcwidth(wc);
965 /* wcswidth() returns -1 for non-printable characters */
966 return n >= 0? n : 1;
967 }
968
969 static size_t xwcswidth(const wchar_t *s, size_t n)
970 {
971 size_t w = 0;
972
973 while (n--)
974 w += xwcwidth(*s++, w);
975 return w;
976 }
977
978 /**
979 * Skip a given number of cells at the beginning of a string.
980 *
981 * \param s The input string.
982 * \param cells_to_skip Desired number of cells that should be skipped.
983 * \param bytes_to_skip Result.
984 *
985 * This function computes how many input bytes must be skipped to advance a
986 * string by the given width. If the current character encoding is not UTF-8,
987 * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
988 * \a s is treated as a multibyte string and on successful return, \a s +
989 * bytes_to_skip points to the start of a multibyte string such that the total
990 * width of the multibyte characters that are skipped by advancing \a s that
991 * many bytes equals at least \a cells_to_skip.
992 *
993 * \return Standard.
994 */
995 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
996 {
997 wchar_t wc;
998 mbstate_t ps;
999 size_t n, bytes_parsed, cells_skipped;
1000
1001 *bytes_to_skip = 0;
1002 if (cells_to_skip == 0)
1003 return 0;
1004 if (!utf8_mode()) {
1005 *bytes_to_skip = cells_to_skip;
1006 return 0;
1007 }
1008 bytes_parsed = cells_skipped = 0;
1009 memset(&ps, 0, sizeof(ps));
1010 n = strlen(s);
1011 while (cells_to_skip > cells_skipped) {
1012 size_t mbret;
1013
1014 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
1015 assert(mbret != 0);
1016 if (mbret == (size_t)-1 || mbret == (size_t)-2)
1017 return -ERRNO_TO_PARA_ERROR(EILSEQ);
1018 bytes_parsed += mbret;
1019 cells_skipped += xwcwidth(wc, cells_skipped);
1020 }
1021 *bytes_to_skip = bytes_parsed;
1022 return 1;
1023 }
1024
1025 /**
1026 * Compute the width of an UTF-8 string.
1027 *
1028 * \param s The string.
1029 * \param result The width of \a s is returned here.
1030 *
1031 * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1032 * Otherwise \a s is treated as an UTF-8 string and its display width is
1033 * computed. Note that this function may fail if the underlying call to
1034 * mbsrtowcs(3) fails, so the caller must check the return value.
1035 *
1036 * \sa nl_langinfo(3), wcswidth(3).
1037 *
1038 * \return Standard.
1039 */
1040 __must_check int strwidth(const char *s, size_t *result)
1041 {
1042 const char *src = s;
1043 mbstate_t state;
1044 static wchar_t *dest;
1045 size_t num_wchars;
1046
1047 /*
1048 * Never call any log function here. This may result in an endless loop
1049 * as para_gui's para_log() calls this function.
1050 */
1051
1052 if (!utf8_mode()) {
1053 *result = strlen(s);
1054 return 0;
1055 }
1056 memset(&state, 0, sizeof(state));
1057 *result = 0;
1058 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1059 if (num_wchars == (size_t)-1)
1060 return -ERRNO_TO_PARA_ERROR(errno);
1061 if (num_wchars == 0)
1062 return 0;
1063 dest = para_malloc((num_wchars + 1) * sizeof(*dest));
1064 src = s;
1065 memset(&state, 0, sizeof(state));
1066 num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1067 assert(num_wchars > 0 && num_wchars != (size_t)-1);
1068 *result = xwcswidth(dest, num_wchars);
1069 free(dest);
1070 return 1;
1071 }
1072
1073 /**
1074 * Truncate and sanitize a (wide character) string.
1075 *
1076 * This replaces all non-printable characters by spaces and makes sure that the
1077 * modified string does not exceed the given maximal width.
1078 *
1079 * \param src The source string in multi-byte form.
1080 * \param max_width The maximal number of cells the result may occupy.
1081 * \param result Sanitized multi-byte string, must be freed by caller.
1082 * \param width The width of the sanitized string, always <= max_width.
1083 *
1084 * The function is wide-character aware but falls back to C strings for
1085 * non-UTF-8 locales.
1086 *
1087 * \return Standard. On success, *result points to a sanitized copy of the
1088 * given string. This copy was allocated with malloc() and should hence be
1089 * freed when the caller is no longer interested in the result.
1090 *
1091 * The function fails if the given string contains an invalid multibyte
1092 * sequence. In this case, *result is set to NULL, and *width to zero.
1093 */
1094 __must_check int sanitize_str(const char *src, size_t max_width,
1095 char **result, size_t *width)
1096 {
1097 mbstate_t state;
1098 static wchar_t *wcs;
1099 size_t num_wchars, n;
1100
1101 if (!utf8_mode()) {
1102 *result = para_strdup(src);
1103 /* replace non-printable characters by spaces */
1104 for (n = 0; n < max_width && src[n]; n++) {
1105 if (!isprint((unsigned char)src[n]))
1106 (*result)[n] = ' ';
1107 }
1108 (*result)[n] = '\0';
1109 *width = n;
1110 return 0;
1111 }
1112 *result = NULL;
1113 *width = 0;
1114 memset(&state, 0, sizeof(state));
1115 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1116 if (num_wchars == (size_t)-1)
1117 return -ERRNO_TO_PARA_ERROR(errno);
1118 wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
1119 memset(&state, 0, sizeof(state));
1120 num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1121 assert(num_wchars != (size_t)-1);
1122 for (n = 0; n < num_wchars && *width < max_width; n++) {
1123 if (!iswprint(wcs[n]))
1124 wcs[n] = L' ';
1125 *width += xwcwidth(wcs[n], *width);
1126 }
1127 wcs[n] = L'\0';
1128 n = wcstombs(NULL, wcs, 0) + 1;
1129 *result = para_malloc(n);
1130 num_wchars = wcstombs(*result, wcs, n);
1131 assert(num_wchars != (size_t)-1);
1132 free(wcs);
1133 return 1;
1134 }