Merge branch 'refs/heads/t/format-signedness'
[paraslash.git] / string.c
1 /*
2 * Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file string.c Memory allocation and string handling functions. */
8
9 #include "para.h"
10
11 #include <pwd.h>
12 #include <sys/utsname.h> /* uname() */
13 #include <string.h>
14 #include <regex.h>
15 #include <langinfo.h>
16 #include <wchar.h>
17 #include <wctype.h>
18
19 #include "string.h"
20 #include "error.h"
21
22 /**
23 * Paraslash's version of realloc().
24 *
25 * \param p Pointer to the memory block, may be \p NULL.
26 * \param size The desired new size.
27 *
28 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
29 * i.e. there is no need to check the return value in the caller.
30 *
31 * \return A pointer to newly allocated memory which is suitably aligned for
32 * any kind of variable and may be different from \a p.
33 *
34 * \sa realloc(3).
35 */
36 __must_check void *para_realloc(void *p, size_t size)
37 {
38 /*
39 * No need to check for NULL pointers: If p is NULL, the call
40 * to realloc is equivalent to malloc(size)
41 */
42 assert(size);
43 if (!(p = realloc(p, size))) {
44 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
45 size);
46 exit(EXIT_FAILURE);
47 }
48 return p;
49 }
50
51 /**
52 * Paraslash's version of malloc().
53 *
54 * \param size The desired new size.
55 *
56 * A wrapper for malloc(3) which exits on errors.
57 *
58 * \return A pointer to the allocated memory, which is suitably aligned for any
59 * kind of variable.
60 *
61 * \sa malloc(3).
62 */
63 __must_check __malloc void *para_malloc(size_t size)
64 {
65 void *p;
66
67 assert(size);
68 p = malloc(size);
69 if (!p) {
70 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
71 size);
72 exit(EXIT_FAILURE);
73 }
74 return p;
75 }
76
77 /**
78 * Paraslash's version of calloc().
79 *
80 * \param size The desired new size.
81 *
82 * A wrapper for calloc(3) which exits on errors.
83 *
84 * \return A pointer to the allocated and zeroed-out memory, which is suitably
85 * aligned for any kind of variable.
86 *
87 * \sa calloc(3)
88 */
89 __must_check __malloc void *para_calloc(size_t size)
90 {
91 void *ret = para_malloc(size);
92
93 memset(ret, 0, size);
94 return ret;
95 }
96
97 /**
98 * Paraslash's version of strdup().
99 *
100 * \param s The string to be duplicated.
101 *
102 * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
103 * there is no need to check the return value in the caller.
104 *
105 * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
106 * an pointer to an empty string is returned.
107 *
108 * \sa strdup(3)
109 */
110 __must_check __malloc char *para_strdup(const char *s)
111 {
112 char *ret;
113
114 if ((ret = strdup(s? s: "")))
115 return ret;
116 PARA_EMERG_LOG("strdup failed, aborting\n");
117 exit(EXIT_FAILURE);
118 }
119
120 /**
121 * Print a formated message to a dynamically allocated string.
122 *
123 * \param result The formated string is returned here.
124 * \param fmt The format string.
125 * \param ap Initialized list of arguments.
126 *
127 * This function is similar to vasprintf(), a GNU extension which is not in C
128 * or POSIX. It allocates a string large enough to hold the output including
129 * the terminating null byte. The allocated string is returned via the first
130 * argument and must be freed by the caller. However, unlike vasprintf(), this
131 * function calls exit() if insufficient memory is available, while vasprintf()
132 * returns -1 in this case.
133 *
134 * \return Number of bytes written, not including the terminating \p NULL
135 * character.
136 *
137 * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
138 */
139 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
140 {
141 int ret;
142 size_t size = 150;
143 va_list aq;
144
145 *result = para_malloc(size + 1);
146 va_copy(aq, ap);
147 ret = vsnprintf(*result, size, fmt, aq);
148 va_end(aq);
149 assert(ret >= 0);
150 if (ret < size) /* OK */
151 return ret;
152 size = ret + 1;
153 *result = para_realloc(*result, size);
154 va_copy(aq, ap);
155 ret = vsnprintf(*result, size, fmt, aq);
156 va_end(aq);
157 assert(ret >= 0 && ret < size);
158 return ret;
159 }
160
161 /**
162 * Print to a dynamically allocated string, variable number of arguments.
163 *
164 * \param result See \ref xvasprintf().
165 * \param fmt Usual format string.
166 *
167 * \return The return value of the underlying call to \ref xvasprintf().
168 *
169 * \sa \ref xvasprintf() and the references mentioned there.
170 */
171 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
172 {
173 va_list ap;
174 unsigned ret;
175
176 va_start(ap, fmt);
177 ret = xvasprintf(result, fmt, ap);
178 va_end(ap);
179 return ret;
180 }
181
182 /**
183 * Allocate a sufficiently large string and print into it.
184 *
185 * \param fmt A usual format string.
186 *
187 * Produce output according to \p fmt. No artificial bound on the length of the
188 * resulting string is imposed.
189 *
190 * \return This function either returns a pointer to a string that must be
191 * freed by the caller or aborts without returning.
192 *
193 * \sa printf(3), xasprintf().
194 */
195 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
196 {
197 char *msg;
198 va_list ap;
199
200 va_start(ap, fmt);
201 xvasprintf(&msg, fmt, ap);
202 va_end(ap);
203 return msg;
204 }
205
206 /**
207 * Free the content of a pointer and set it to \p NULL.
208 *
209 * This is equivalent to "free(*arg); *arg = NULL;".
210 *
211 * \param arg The pointer whose content should be freed.
212 */
213 void freep(void *arg)
214 {
215 void **ptr = (void **)arg;
216 free(*ptr);
217 *ptr = NULL;
218 }
219
220 /**
221 * Paraslash's version of strcat().
222 *
223 * \param a String to be appended to.
224 * \param b String to append.
225 *
226 * Append \p b to \p a.
227 *
228 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
229 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
230 * return \a a without making a copy of \a a. Otherwise, construct the
231 * concatenation \a c, free \a a (but not \a b) and return \a c.
232 *
233 * \sa strcat(3)
234 */
235 __must_check __malloc char *para_strcat(char *a, const char *b)
236 {
237 char *tmp;
238
239 if (!a)
240 return para_strdup(b);
241 if (!b)
242 return a;
243 tmp = make_message("%s%s", a, b);
244 free(a);
245 return tmp;
246 }
247
248 /**
249 * Paraslash's version of dirname().
250 *
251 * \param name Pointer to the full path.
252 *
253 * Compute the directory component of \p name.
254 *
255 * \return If \a name is \p NULL or the empty string, return \p NULL.
256 * Otherwise, Make a copy of \a name and return its directory component. Caller
257 * is responsible to free the result.
258 */
259 __must_check __malloc char *para_dirname(const char *name)
260 {
261 char *p, *ret;
262
263 if (!name || !*name)
264 return NULL;
265 ret = para_strdup(name);
266 p = strrchr(ret, '/');
267 if (!p)
268 *ret = '\0';
269 else
270 *p = '\0';
271 return ret;
272 }
273
274 /**
275 * Paraslash's version of basename().
276 *
277 * \param name Pointer to the full path.
278 *
279 * Compute the filename component of \a name.
280 *
281 * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
282 * ends with a slash. Otherwise, a pointer within \a name is returned. Caller
283 * must not free the result.
284 */
285 __must_check char *para_basename(const char *name)
286 {
287 char *ret;
288
289 if (!name || !*name)
290 return NULL;
291 ret = strrchr(name, '/');
292 if (!ret)
293 return (char *)name;
294 ret++;
295 return ret;
296 }
297
298 /**
299 * Get the logname of the current user.
300 *
301 * \return A dynamically allocated string that must be freed by the caller. On
302 * errors, the string "unknown_user" is returned, i.e. this function never
303 * returns \p NULL.
304 *
305 * \sa getpwuid(3).
306 */
307 __must_check __malloc char *para_logname(void)
308 {
309 struct passwd *pw = getpwuid(getuid());
310 return para_strdup(pw? pw->pw_name : "unknown_user");
311 }
312
313 /**
314 * Get the home directory of the current user.
315 *
316 * \return A dynamically allocated string that must be freed by the caller. If
317 * the home directory could not be found, this function returns "/tmp".
318 */
319 __must_check __malloc char *para_homedir(void)
320 {
321 struct passwd *pw = getpwuid(getuid());
322 return para_strdup(pw? pw->pw_dir : "/tmp");
323 }
324
325 /**
326 * Get the own hostname.
327 *
328 * \return A dynamically allocated string containing the hostname.
329 *
330 * \sa uname(2).
331 */
332 __malloc char *para_hostname(void)
333 {
334 struct utsname u;
335
336 uname(&u);
337 return para_strdup(u.nodename);
338 }
339
340 /**
341 * Call a custom function for each complete line.
342 *
343 * \param flags Any combination of flags defined in \ref for_each_line_flags.
344 * \param buf The buffer containing data separated by newlines.
345 * \param size The number of bytes in \a buf.
346 * \param line_handler The custom function.
347 * \param private_data Pointer passed to \a line_handler.
348 *
349 * For each complete line in \p buf, \p line_handler is called. The first
350 * argument to \p line_handler is (a copy of) the current line, and \p
351 * private_data is passed as the second argument. If the \p FELF_READ_ONLY
352 * flag is unset, a pointer into \a buf is passed to the line handler,
353 * otherwise a pointer to a copy of the current line is passed instead. This
354 * copy is freed immediately after the line handler returns.
355 *
356 * The function returns if \p line_handler returns a negative value or no more
357 * lines are in the buffer. The rest of the buffer (last chunk containing an
358 * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
359 * unset.
360 *
361 * \return On success this function returns the number of bytes not handled to
362 * \p line_handler. The only possible error is a negative return value from the
363 * line handler. In this case processing stops and the return value of the line
364 * handler is returned to indicate failure.
365 *
366 * \sa \ref for_each_line_flags.
367 */
368 int for_each_line(unsigned flags, char *buf, size_t size,
369 line_handler_t *line_handler, void *private_data)
370 {
371 char *start = buf, *end;
372 int ret, i, num_lines = 0;
373
374 // PARA_NOTICE_LOG("buf: %s\n", buf);
375 while (start < buf + size) {
376 char *next_null;
377 char *next_cr;
378
379 next_cr = memchr(start, '\n', buf + size - start);
380 next_null = memchr(start, '\0', next_cr?
381 next_cr - start : buf + size - start);
382 if (!next_cr && !next_null)
383 break;
384 if (next_null)
385 end = next_null;
386 else
387 end = next_cr;
388 num_lines++;
389 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
390 if (flags & FELF_READ_ONLY) {
391 size_t s = end - start;
392 char *b = para_malloc(s + 1);
393 memcpy(b, start, s);
394 b[s] = '\0';
395 ret = line_handler(b, private_data);
396 free(b);
397 } else {
398 *end = '\0';
399 ret = line_handler(start, private_data);
400 }
401 if (ret < 0)
402 return ret;
403 }
404 start = ++end;
405 }
406 i = buf + size - start;
407 if (i && i != size && !(flags & FELF_READ_ONLY))
408 memmove(buf, start, i);
409 return i;
410 }
411
412 /** Return the hex characters of the lower 4 bits. */
413 #define hex(a) (hexchar[(a) & 15])
414
415 static void write_size_header(char *buf, int n)
416 {
417 static char hexchar[] = "0123456789abcdef";
418
419 buf[0] = hex(n >> 12);
420 buf[1] = hex(n >> 8);
421 buf[2] = hex(n >> 4);
422 buf[3] = hex(n);
423 buf[4] = ' ';
424 }
425
426 /**
427 * Read a four-byte hex-number and return its value.
428 *
429 * Each status item sent by para_server is prefixed with such a hex number in
430 * ASCII which describes the size of the status item.
431 *
432 * \param buf The buffer which must be at least four bytes long.
433 *
434 * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
435 * buffer did not contain only hex digits.
436 */
437 int read_size_header(const char *buf)
438 {
439 int i, len = 0;
440
441 for (i = 0; i < 4; i++) {
442 unsigned char c = buf[i];
443 len <<= 4;
444 if (c >= '0' && c <= '9') {
445 len += c - '0';
446 continue;
447 }
448 if (c >= 'a' && c <= 'f') {
449 len += c - 'a' + 10;
450 continue;
451 }
452 return -E_SIZE_PREFIX;
453 }
454 if (buf[4] != ' ')
455 return -E_SIZE_PREFIX;
456 return len;
457 }
458
459 /**
460 * Safely print into a buffer at a given offset.
461 *
462 * \param b Determines the buffer, its size, and the offset.
463 * \param fmt The format string.
464 *
465 * This function prints into the buffer given by \a b at the offset which is
466 * also given by \a b. If there is not enough space to hold the result, the
467 * buffer size is doubled until the underlying call to vsnprintf() succeeds
468 * or the size of the buffer exceeds the maximal size specified in \a b.
469 *
470 * In the latter case the unmodified \a buf and \a offset values as well as the
471 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
472 * If this function succeeds, i.e. returns a non-negative value, the offset of
473 * \a b is reset to zero and the given data is written to the beginning of the
474 * buffer. If \a max_size_handler() returns a negative value, this value is
475 * returned by \a para_printf().
476 *
477 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
478 * calls to this function append data to what is already contained in the
479 * buffer.
480 *
481 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
482 * initial buffer is allocated.
483 *
484 * \return The number of bytes printed into the buffer (not including the
485 * terminating \p NULL byte) on success, negative on errors. If there is no
486 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
487 * fails.
488 *
489 * \sa make_message(), vsnprintf(3).
490 */
491 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
492 {
493 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
494
495 if (!b->buf) {
496 b->buf = para_malloc(128);
497 b->size = 128;
498 b->offset = 0;
499 }
500 while (1) {
501 char *p = b->buf + b->offset;
502 size_t size = b->size - b->offset;
503 va_list ap;
504
505 if (size > sz_off) {
506 va_start(ap, fmt);
507 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
508 va_end(ap);
509 if (ret > -1 && ret < size - sz_off) { /* success */
510 b->offset += ret + sz_off;
511 if (sz_off)
512 write_size_header(p, ret);
513 return ret + sz_off;
514 }
515 }
516 /* check if we may grow the buffer */
517 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
518 /* try again with more space */
519 b->size *= 2;
520 b->buf = para_realloc(b->buf, b->size);
521 continue;
522 }
523 /* can't grow buffer */
524 if (!b->offset || !b->max_size_handler) /* message too large */
525 return -ERRNO_TO_PARA_ERROR(ENOSPC);
526 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
527 if (ret < 0)
528 return ret;
529 b->offset = 0;
530 }
531 }
532
533 /** \cond llong_minmax */
534 /* LLONG_MAX and LLONG_MIN might not be defined. */
535 #ifndef LLONG_MAX
536 #define LLONG_MAX 9223372036854775807LL
537 #endif
538 #ifndef LLONG_MIN
539 #define LLONG_MIN (-LLONG_MAX - 1LL)
540 #endif
541 /** \endcond llong_minmax */
542
543 /**
544 * Convert a string to a 64-bit signed integer value.
545 *
546 * \param str The string to be converted.
547 * \param value Result pointer.
548 *
549 * \return Standard.
550 *
551 * \sa para_atoi32(), strtol(3), atoi(3).
552 */
553 int para_atoi64(const char *str, int64_t *value)
554 {
555 char *endptr;
556 long long tmp;
557
558 errno = 0; /* To distinguish success/failure after call */
559 tmp = strtoll(str, &endptr, 10);
560 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
561 return -E_ATOI_OVERFLOW;
562 /*
563 * If there were no digits at all, strtoll() stores the original value
564 * of str in *endptr.
565 */
566 if (endptr == str)
567 return -E_ATOI_NO_DIGITS;
568 /*
569 * The implementation may also set errno and return 0 in case no
570 * conversion was performed.
571 */
572 if (errno != 0 && tmp == 0)
573 return -E_ATOI_NO_DIGITS;
574 if (*endptr != '\0') /* Further characters after number */
575 return -E_ATOI_JUNK_AT_END;
576 *value = tmp;
577 return 1;
578 }
579
580 /**
581 * Convert a string to a 32-bit signed integer value.
582 *
583 * \param str The string to be converted.
584 * \param value Result pointer.
585 *
586 * \return Standard.
587 *
588 * \sa para_atoi64().
589 */
590 int para_atoi32(const char *str, int32_t *value)
591 {
592 int64_t tmp;
593 int ret;
594 const int32_t max = 2147483647;
595
596 ret = para_atoi64(str, &tmp);
597 if (ret < 0)
598 return ret;
599 if (tmp > max || tmp < -max - 1)
600 return -E_ATOI_OVERFLOW;
601 *value = tmp;
602 return 1;
603 }
604
605 static inline int loglevel_equal(const char *arg, const char * const ll)
606 {
607 return !strncasecmp(arg, ll, strlen(ll));
608 }
609
610 /**
611 * Compute the loglevel number from its name.
612 *
613 * \param txt The name of the loglevel (debug, info, ...).
614 *
615 * \return The numeric representation of the loglevel name.
616 */
617 int get_loglevel_by_name(const char *txt)
618 {
619 if (loglevel_equal(txt, "debug"))
620 return LL_DEBUG;
621 if (loglevel_equal(txt, "info"))
622 return LL_INFO;
623 if (loglevel_equal(txt, "notice"))
624 return LL_NOTICE;
625 if (loglevel_equal(txt, "warning"))
626 return LL_WARNING;
627 if (loglevel_equal(txt, "error"))
628 return LL_ERROR;
629 if (loglevel_equal(txt, "crit"))
630 return LL_CRIT;
631 if (loglevel_equal(txt, "emerg"))
632 return LL_EMERG;
633 return -E_BAD_LL;
634 }
635
636 static int get_next_word(const char *buf, const char *delim, char **word)
637 {
638 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
639 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
640 const char *in;
641 char *out;
642 int ret, state = 0;
643
644 out = para_malloc(strlen(buf) + 1);
645 *out = '\0';
646 *word = out;
647 for (in = buf; *in; in++) {
648 const char *p;
649
650 switch (*in) {
651 case '\\':
652 if (state & LSF_BACKSLASH) /* \\ */
653 goto copy_char;
654 state |= LSF_BACKSLASH;
655 state |= LSF_HAVE_WORD;
656 continue;
657 case 'n':
658 case 't':
659 if (state & LSF_BACKSLASH) { /* \n or \t */
660 *out++ = (*in == 'n')? '\n' : '\t';
661 state &= ~LSF_BACKSLASH;
662 continue;
663 }
664 goto copy_char;
665 case '"':
666 if (state & LSF_BACKSLASH) /* \" */
667 goto copy_char;
668 if (state & LSF_SINGLE_QUOTE) /* '" */
669 goto copy_char;
670 if (state & LSF_DOUBLE_QUOTE) {
671 state &= ~LSF_DOUBLE_QUOTE;
672 continue;
673 }
674 state |= LSF_HAVE_WORD;
675 state |= LSF_DOUBLE_QUOTE;
676 continue;
677 case '\'':
678 if (state & LSF_BACKSLASH) /* \' */
679 goto copy_char;
680 if (state & LSF_DOUBLE_QUOTE) /* "' */
681 goto copy_char;
682 if (state & LSF_SINGLE_QUOTE) {
683 state &= ~LSF_SINGLE_QUOTE;
684 continue;
685 }
686 state |= LSF_HAVE_WORD;
687 state |= LSF_SINGLE_QUOTE;
688 continue;
689 }
690 for (p = delim; *p; p++) {
691 if (*in != *p)
692 continue;
693 if (state & LSF_BACKSLASH)
694 goto copy_char;
695 if (state & LSF_SINGLE_QUOTE)
696 goto copy_char;
697 if (state & LSF_DOUBLE_QUOTE)
698 goto copy_char;
699 if (state & LSF_HAVE_WORD)
700 goto success;
701 break;
702 }
703 if (*p) /* ignore delimiter at the beginning */
704 continue;
705 copy_char:
706 state |= LSF_HAVE_WORD;
707 *out++ = *in;
708 state &= ~LSF_BACKSLASH;
709 }
710 ret = 0;
711 if (!(state & LSF_HAVE_WORD))
712 goto out;
713 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
714 if (state & LSF_BACKSLASH) {
715 PARA_ERROR_LOG("trailing backslash\n");
716 goto out;
717 }
718 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
719 PARA_ERROR_LOG("unmatched quote character\n");
720 goto out;
721 }
722 success:
723 *out = '\0';
724 return in - buf;
725 out:
726 free(*word);
727 *word = NULL;
728 return ret;
729 }
730
731 /**
732 * Get the number of the word the cursor is on.
733 *
734 * \param buf The zero-terminated line buffer.
735 * \param delim Characters that separate words.
736 * \param point The cursor position.
737 *
738 * \return Zero-based word number.
739 */
740 int compute_word_num(const char *buf, const char *delim, int point)
741 {
742 int ret, num_words;
743 const char *p;
744 char *word;
745
746 for (p = buf, num_words = 0; ; p += ret, num_words++) {
747 ret = get_next_word(p, delim, &word);
748 if (ret <= 0)
749 break;
750 free(word);
751 if (p + ret >= buf + point)
752 break;
753 }
754 return num_words;
755 }
756
757 /**
758 * Free an array of words created by create_argv() or create_shifted_argv().
759 *
760 * \param argv A pointer previously obtained by \ref create_argv().
761 */
762 void free_argv(char **argv)
763 {
764 int i;
765
766 if (!argv)
767 return;
768 for (i = 0; argv[i]; i++)
769 free(argv[i]);
770 free(argv);
771 }
772
773 static int create_argv_offset(int offset, const char *buf, const char *delim,
774 char ***result)
775 {
776 char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
777 const char *p;
778 int i, ret;
779
780 for (i = 0; i < offset; i++)
781 argv[i] = NULL;
782 for (p = buf; p && *p; p += ret, i++) {
783 ret = get_next_word(p, delim, &word);
784 if (ret < 0)
785 goto err;
786 if (!ret)
787 break;
788 argv = para_realloc(argv, (i + 2) * sizeof(char*));
789 argv[i] = word;
790 }
791 argv[i] = NULL;
792 *result = argv;
793 return i;
794 err:
795 while (i > 0)
796 free(argv[--i]);
797 free(argv);
798 *result = NULL;
799 return ret;
800 }
801
802 /**
803 * Split a buffer into words.
804 *
805 * This parser honors single and double quotes, backslash-escaped characters
806 * and special characters like \\n. The result contains pointers to copies of
807 * the words contained in buf and has to be freed by using \ref free_argv().
808 *
809 * \param buf The buffer to be split.
810 * \param delim Each character in this string is treated as a separator.
811 * \param result The array of words is returned here.
812 *
813 * It's OK to pass NULL as the buffer argument. This is equivalent to passing
814 * the empty string.
815 *
816 * \return Number of words in buf, negative on errors. The array returned
817 * through the result pointer is NULL terminated.
818 */
819 int create_argv(const char *buf, const char *delim, char ***result)
820 {
821 return create_argv_offset(0, buf, delim, result);
822 }
823
824 /**
825 * Split a buffer into words, offset one.
826 *
827 * This is similar to \ref create_argv() but the returned array is one element
828 * larger, words start at index one and element zero is initialized to \p NULL.
829 * Callers must set element zero to a non-NULL value before calling free_argv()
830 * on the returned array to avoid a memory leak.
831 *
832 * \param buf See \ref create_argv().
833 * \param delim See \ref create_argv().
834 * \param result See \ref create_argv().
835 *
836 * \return Number of words plus one on success, negative on errors.
837 */
838 int create_shifted_argv(const char *buf, const char *delim, char ***result)
839 {
840 return create_argv_offset(1, buf, delim, result);
841 }
842
843 /**
844 * Find out if the given string is contained in the arg vector.
845 *
846 * \param arg The string to look for.
847 * \param argv The array to search.
848 *
849 * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
850 * arg was not found in \a argv.
851 */
852 int find_arg(const char *arg, char **argv)
853 {
854 int i;
855
856 if (!argv)
857 return -E_ARG_NOT_FOUND;
858 for (i = 0; argv[i]; i++)
859 if (strcmp(arg, argv[i]) == 0)
860 return i;
861 return -E_ARG_NOT_FOUND;
862 }
863
864 /**
865 * Compile a regular expression.
866 *
867 * This simple wrapper calls regcomp() and logs a message on errors.
868 *
869 * \param preg See regcomp(3).
870 * \param regex See regcomp(3).
871 * \param cflags See regcomp(3).
872 *
873 * \return Standard.
874 */
875 int para_regcomp(regex_t *preg, const char *regex, int cflags)
876 {
877 char *buf;
878 size_t size;
879 int ret = regcomp(preg, regex, cflags);
880
881 if (ret == 0)
882 return 1;
883 size = regerror(ret, preg, NULL, 0);
884 buf = para_malloc(size);
885 regerror(ret, preg, buf, size);
886 PARA_ERROR_LOG("%s\n", buf);
887 free(buf);
888 return -E_REGEX;
889 }
890
891 /**
892 * strdup() for not necessarily zero-terminated strings.
893 *
894 * \param src The source buffer.
895 * \param len The number of bytes to be copied.
896 *
897 * \return A 0-terminated buffer of length \a len + 1.
898 *
899 * This is similar to strndup(), which is a GNU extension. However, one
900 * difference is that strndup() returns \p NULL if insufficient memory was
901 * available while this function aborts in this case.
902 *
903 * \sa strdup(), \ref para_strdup().
904 */
905 char *safe_strdup(const char *src, size_t len)
906 {
907 char *p;
908
909 assert(len < (size_t)-1);
910 p = para_malloc(len + 1);
911 if (len > 0)
912 memcpy(p, src, len);
913 p[len] = '\0';
914 return p;
915 }
916
917 /**
918 * Copy the value of a key=value pair.
919 *
920 * This checks whether the given buffer starts with "key=", ignoring case. If
921 * yes, a copy of the value is returned. The source buffer may not be
922 * zero-terminated.
923 *
924 * \param src The source buffer.
925 * \param len The number of bytes of the tag.
926 * \param key Only copy if it is the value of this key.
927 *
928 * \return A zero-terminated buffer, or \p NULL if the key was
929 * not of the given type.
930 */
931 char *key_value_copy(const char *src, size_t len, const char *key)
932 {
933 int keylen = strlen(key);
934
935 if (len <= keylen)
936 return NULL;
937 if (strncasecmp(src, key, keylen))
938 return NULL;
939 if (src[keylen] != '=')
940 return NULL;
941 return safe_strdup(src + keylen + 1, len - keylen - 1);
942 }
943
944 static bool utf8_mode(void)
945 {
946 static bool initialized, have_utf8;
947
948 if (!initialized) {
949 char *info = nl_langinfo(CODESET);
950 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
951 initialized = true;
952 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
953 have_utf8? "" : "not ");
954 }
955 return have_utf8;
956 }
957
958 static int xwcwidth(wchar_t wc, size_t pos)
959 {
960 int n;
961
962 /* special-case for tab */
963 if (wc == 0x09) /* tab */
964 return (pos | 7) + 1 - pos;
965 n = wcwidth(wc);
966 /* wcswidth() returns -1 for non-printable characters */
967 return n >= 0? n : 1;
968 }
969
970 static size_t xwcswidth(const wchar_t *s, size_t n)
971 {
972 size_t w = 0;
973
974 while (n--)
975 w += xwcwidth(*s++, w);
976 return w;
977 }
978
979 /**
980 * Skip a given number of cells at the beginning of a string.
981 *
982 * \param s The input string.
983 * \param cells_to_skip Desired number of cells that should be skipped.
984 * \param bytes_to_skip Result.
985 *
986 * This function computes how many input bytes must be skipped to advance a
987 * string by the given width. If the current character encoding is not UTF-8,
988 * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
989 * \a s is treated as a multibyte string and on successful return, \a s +
990 * bytes_to_skip points to the start of a multibyte string such that the total
991 * width of the multibyte characters that are skipped by advancing \a s that
992 * many bytes equals at least \a cells_to_skip.
993 *
994 * \return Standard.
995 */
996 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
997 {
998 wchar_t wc;
999 mbstate_t ps;
1000 size_t n, bytes_parsed, cells_skipped;
1001
1002 *bytes_to_skip = 0;
1003 if (cells_to_skip == 0)
1004 return 0;
1005 if (!utf8_mode()) {
1006 *bytes_to_skip = cells_to_skip;
1007 return 0;
1008 }
1009 bytes_parsed = cells_skipped = 0;
1010 memset(&ps, 0, sizeof(ps));
1011 n = strlen(s);
1012 while (cells_to_skip > cells_skipped) {
1013 size_t mbret;
1014
1015 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
1016 assert(mbret != 0);
1017 if (mbret == (size_t)-1 || mbret == (size_t)-2)
1018 return -ERRNO_TO_PARA_ERROR(EILSEQ);
1019 bytes_parsed += mbret;
1020 cells_skipped += xwcwidth(wc, cells_skipped);
1021 }
1022 *bytes_to_skip = bytes_parsed;
1023 return 1;
1024 }
1025
1026 /**
1027 * Compute the width of an UTF-8 string.
1028 *
1029 * \param s The string.
1030 * \param result The width of \a s is returned here.
1031 *
1032 * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1033 * Otherwise \a s is treated as an UTF-8 string and its display width is
1034 * computed. Note that this function may fail if the underlying call to
1035 * mbsrtowcs(3) fails, so the caller must check the return value.
1036 *
1037 * \sa nl_langinfo(3), wcswidth(3).
1038 *
1039 * \return Standard.
1040 */
1041 __must_check int strwidth(const char *s, size_t *result)
1042 {
1043 const char *src = s;
1044 mbstate_t state;
1045 static wchar_t *dest;
1046 size_t num_wchars;
1047
1048 /*
1049 * Never call any log function here. This may result in an endless loop
1050 * as para_gui's para_log() calls this function.
1051 */
1052
1053 if (!utf8_mode()) {
1054 *result = strlen(s);
1055 return 0;
1056 }
1057 memset(&state, 0, sizeof(state));
1058 *result = 0;
1059 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1060 if (num_wchars == (size_t)-1)
1061 return -ERRNO_TO_PARA_ERROR(errno);
1062 if (num_wchars == 0)
1063 return 0;
1064 dest = para_malloc((num_wchars + 1) * sizeof(*dest));
1065 src = s;
1066 memset(&state, 0, sizeof(state));
1067 num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1068 assert(num_wchars > 0 && num_wchars != (size_t)-1);
1069 *result = xwcswidth(dest, num_wchars);
1070 free(dest);
1071 return 1;
1072 }
1073
1074 /**
1075 * Truncate and sanitize a (wide character) string.
1076 *
1077 * This replaces all non-printable characters by spaces and makes sure that the
1078 * modified string does not exceed the given maximal width.
1079 *
1080 * \param src The source string in multi-byte form.
1081 * \param max_width The maximal number of cells the result may occupy.
1082 * \param result Sanitized multi-byte string, must be freed by caller.
1083 * \param width The width of the sanitized string, always <= max_width.
1084 *
1085 * The function is wide-character aware but falls back to C strings for
1086 * non-UTF-8 locales.
1087 *
1088 * \return Standard. On success, *result points to a sanitized copy of the
1089 * given string. This copy was allocated with malloc() and should hence be
1090 * freed when the caller is no longer interested in the result.
1091 *
1092 * The function fails if the given string contains an invalid multibyte
1093 * sequence. In this case, *result is set to NULL, and *width to zero.
1094 */
1095 __must_check int sanitize_str(const char *src, size_t max_width,
1096 char **result, size_t *width)
1097 {
1098 mbstate_t state;
1099 static wchar_t *wcs;
1100 size_t num_wchars, n;
1101
1102 if (!utf8_mode()) {
1103 *result = para_strdup(src);
1104 /* replace non-printable characters by spaces */
1105 for (n = 0; n < max_width && src[n]; n++) {
1106 if (!isprint((unsigned char)src[n]))
1107 (*result)[n] = ' ';
1108 }
1109 (*result)[n] = '\0';
1110 *width = n;
1111 return 0;
1112 }
1113 *result = NULL;
1114 *width = 0;
1115 memset(&state, 0, sizeof(state));
1116 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1117 if (num_wchars == (size_t)-1)
1118 return -ERRNO_TO_PARA_ERROR(errno);
1119 wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
1120 memset(&state, 0, sizeof(state));
1121 num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1122 assert(num_wchars != (size_t)-1);
1123 for (n = 0; n < num_wchars && *width < max_width; n++) {
1124 if (!iswprint(wcs[n]))
1125 wcs[n] = L' ';
1126 *width += xwcwidth(wcs[n], *width);
1127 }
1128 wcs[n] = L'\0';
1129 n = wcstombs(NULL, wcs, 0) + 1;
1130 *result = para_malloc(n);
1131 num_wchars = wcstombs(*result, wcs, n);
1132 assert(num_wchars != (size_t)-1);
1133 free(wcs);
1134 return 1;
1135 }