Add homepage link.
[paraslash.git] / string.c
1 /* Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
2
3 /** \file string.c Memory allocation and string handling functions. */
4
5 #include "para.h"
6
7 #include <pwd.h>
8 #include <sys/utsname.h> /* uname() */
9 #include <regex.h>
10 #include <langinfo.h>
11 #include <wchar.h>
12 #include <wctype.h>
13
14 #include "string.h"
15 #include "error.h"
16
17 /**
18 * Paraslash's version of realloc().
19 *
20 * \param p Pointer to the memory block, may be \p NULL.
21 * \param size The desired new size.
22 *
23 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
24 * i.e. there is no need to check the return value in the caller.
25 *
26 * \return A pointer to newly allocated memory which is suitably aligned for
27 * any kind of variable and may be different from \a p.
28 *
29 * \sa realloc(3).
30 */
31 __must_check void *para_realloc(void *p, size_t size)
32 {
33 /*
34 * No need to check for NULL pointers: If p is NULL, the call
35 * to realloc is equivalent to malloc(size)
36 */
37 assert(size);
38 if (!(p = realloc(p, size))) {
39 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
40 size);
41 exit(EXIT_FAILURE);
42 }
43 return p;
44 }
45
46 /**
47 * Paraslash's version of malloc().
48 *
49 * \param size The desired new size.
50 *
51 * A wrapper for malloc(3) which exits on errors.
52 *
53 * \return A pointer to the allocated memory, which is suitably aligned for any
54 * kind of variable.
55 *
56 * \sa malloc(3).
57 */
58 __must_check __malloc void *para_malloc(size_t size)
59 {
60 void *p;
61
62 assert(size);
63 p = malloc(size);
64 if (!p) {
65 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
66 size);
67 exit(EXIT_FAILURE);
68 }
69 return p;
70 }
71
72 /**
73 * Paraslash's version of calloc().
74 *
75 * \param size The desired new size.
76 *
77 * A wrapper for calloc(3) which exits on errors.
78 *
79 * \return A pointer to the allocated and zeroed-out memory, which is suitably
80 * aligned for any kind of variable.
81 *
82 * \sa calloc(3)
83 */
84 __must_check __malloc void *para_calloc(size_t size)
85 {
86 void *ret = para_malloc(size);
87
88 memset(ret, 0, size);
89 return ret;
90 }
91
92 /**
93 * Paraslash's version of strdup().
94 *
95 * \param s The string to be duplicated.
96 *
97 * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
98 * there is no need to check the return value in the caller.
99 *
100 * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
101 * an pointer to an empty string is returned.
102 *
103 * \sa strdup(3)
104 */
105 __must_check __malloc char *para_strdup(const char *s)
106 {
107 char *ret;
108
109 if ((ret = strdup(s? s: "")))
110 return ret;
111 PARA_EMERG_LOG("strdup failed, aborting\n");
112 exit(EXIT_FAILURE);
113 }
114
115 /**
116 * Print a formatted message to a dynamically allocated string.
117 *
118 * \param result The formatted string is returned here.
119 * \param fmt The format string.
120 * \param ap Initialized list of arguments.
121 *
122 * This function is similar to vasprintf(), a GNU extension which is not in C
123 * or POSIX. It allocates a string large enough to hold the output including
124 * the terminating null byte. The allocated string is returned via the first
125 * argument and must be freed by the caller. However, unlike vasprintf(), this
126 * function calls exit() if insufficient memory is available, while vasprintf()
127 * returns -1 in this case.
128 *
129 * \return Number of bytes written, not including the terminating \p NULL
130 * character.
131 *
132 * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
133 */
134 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
135 {
136 int ret;
137 size_t size = 150;
138 va_list aq;
139
140 *result = para_malloc(size + 1);
141 va_copy(aq, ap);
142 ret = vsnprintf(*result, size, fmt, aq);
143 va_end(aq);
144 assert(ret >= 0);
145 if (ret < size) /* OK */
146 return ret;
147 size = ret + 1;
148 *result = para_realloc(*result, size);
149 va_copy(aq, ap);
150 ret = vsnprintf(*result, size, fmt, aq);
151 va_end(aq);
152 assert(ret >= 0 && ret < size);
153 return ret;
154 }
155
156 /**
157 * Print to a dynamically allocated string, variable number of arguments.
158 *
159 * \param result See \ref xvasprintf().
160 * \param fmt Usual format string.
161 *
162 * \return The return value of the underlying call to \ref xvasprintf().
163 *
164 * \sa \ref xvasprintf() and the references mentioned there.
165 */
166 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
167 {
168 va_list ap;
169 unsigned ret;
170
171 va_start(ap, fmt);
172 ret = xvasprintf(result, fmt, ap);
173 va_end(ap);
174 return ret;
175 }
176
177 /**
178 * Allocate a sufficiently large string and print into it.
179 *
180 * \param fmt A usual format string.
181 *
182 * Produce output according to \p fmt. No artificial bound on the length of the
183 * resulting string is imposed.
184 *
185 * \return This function either returns a pointer to a string that must be
186 * freed by the caller or aborts without returning.
187 *
188 * \sa printf(3), \ref xasprintf().
189 */
190 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
191 {
192 char *msg;
193 va_list ap;
194
195 va_start(ap, fmt);
196 xvasprintf(&msg, fmt, ap);
197 va_end(ap);
198 return msg;
199 }
200
201 /**
202 * Free the content of a pointer and set it to \p NULL.
203 *
204 * This is equivalent to "free(*arg); *arg = NULL;".
205 *
206 * \param arg The pointer whose content should be freed.
207 */
208 void freep(void *arg)
209 {
210 void **ptr = (void **)arg;
211 free(*ptr);
212 *ptr = NULL;
213 }
214
215 /**
216 * Paraslash's version of strcat().
217 *
218 * \param a String to be appended to.
219 * \param b String to append.
220 *
221 * Append \p b to \p a.
222 *
223 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
224 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
225 * return \a a without making a copy of \a a. Otherwise, construct the
226 * concatenation \a c, free \a a (but not \a b) and return \a c.
227 *
228 * \sa strcat(3).
229 */
230 __must_check __malloc char *para_strcat(char *a, const char *b)
231 {
232 char *tmp;
233
234 if (!a)
235 return para_strdup(b);
236 if (!b)
237 return a;
238 tmp = make_message("%s%s", a, b);
239 free(a);
240 return tmp;
241 }
242
243 /**
244 * Paraslash's version of dirname().
245 *
246 * \param name Pointer to the full path.
247 *
248 * Compute the directory component of \p name.
249 *
250 * \return If \a name is \p NULL or the empty string, return \p NULL.
251 * Otherwise, Make a copy of \a name and return its directory component. Caller
252 * is responsible to free the result.
253 */
254 __must_check __malloc char *para_dirname(const char *name)
255 {
256 char *p, *ret;
257
258 if (!name || !*name)
259 return NULL;
260 ret = para_strdup(name);
261 p = strrchr(ret, '/');
262 if (!p)
263 *ret = '\0';
264 else
265 *p = '\0';
266 return ret;
267 }
268
269 /**
270 * Paraslash's version of basename().
271 *
272 * \param name Pointer to the full path.
273 *
274 * Compute the filename component of \a name.
275 *
276 * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
277 * ends with a slash. Otherwise, a pointer within \a name is returned. Caller
278 * must not free the result.
279 */
280 __must_check char *para_basename(const char *name)
281 {
282 char *ret;
283
284 if (!name || !*name)
285 return NULL;
286 ret = strrchr(name, '/');
287 if (!ret)
288 return (char *)name;
289 ret++;
290 return ret;
291 }
292
293 /**
294 * Get the logname of the current user.
295 *
296 * \return A dynamically allocated string that must be freed by the caller. On
297 * errors, the string "unknown_user" is returned, i.e. this function never
298 * returns \p NULL.
299 *
300 * \sa getpwuid(3).
301 */
302 __must_check __malloc char *para_logname(void)
303 {
304 struct passwd *pw = getpwuid(getuid());
305 return para_strdup(pw? pw->pw_name : "unknown_user");
306 }
307
308 /**
309 * Get the home directory of the current user.
310 *
311 * \return A dynamically allocated string that must be freed by the caller. If
312 * the home directory could not be found, this function returns "/tmp".
313 */
314 __must_check __malloc char *para_homedir(void)
315 {
316 struct passwd *pw = getpwuid(getuid());
317 return para_strdup(pw? pw->pw_dir : "/tmp");
318 }
319
320 /**
321 * Get the own hostname.
322 *
323 * \return A dynamically allocated string containing the hostname.
324 *
325 * \sa uname(2).
326 */
327 __malloc char *para_hostname(void)
328 {
329 struct utsname u;
330
331 uname(&u);
332 return para_strdup(u.nodename);
333 }
334
335 /**
336 * Call a custom function for each complete line.
337 *
338 * \param flags Any combination of flags defined in \ref for_each_line_flags.
339 * \param buf The buffer containing data separated by newlines.
340 * \param size The number of bytes in \a buf.
341 * \param line_handler The custom function.
342 * \param private_data Pointer passed to \a line_handler.
343 *
344 * For each complete line in \p buf, \p line_handler is called. The first
345 * argument to \p line_handler is (a copy of) the current line, and \p
346 * private_data is passed as the second argument. If the \p FELF_READ_ONLY
347 * flag is unset, a pointer into \a buf is passed to the line handler,
348 * otherwise a pointer to a copy of the current line is passed instead. This
349 * copy is freed immediately after the line handler returns.
350 *
351 * The function returns if \p line_handler returns a negative value or no more
352 * lines are in the buffer. The rest of the buffer (last chunk containing an
353 * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
354 * unset.
355 *
356 * \return On success this function returns the number of bytes not handled to
357 * \p line_handler. The only possible error is a negative return value from the
358 * line handler. In this case processing stops and the return value of the line
359 * handler is returned to indicate failure.
360 *
361 * \sa \ref for_each_line_flags.
362 */
363 int for_each_line(unsigned flags, char *buf, size_t size,
364 line_handler_t *line_handler, void *private_data)
365 {
366 char *start = buf, *end;
367 int ret, i, num_lines = 0;
368
369 // PARA_NOTICE_LOG("buf: %s\n", buf);
370 while (start < buf + size) {
371 char *next_null;
372 char *next_cr;
373
374 next_cr = memchr(start, '\n', buf + size - start);
375 next_null = memchr(start, '\0', next_cr?
376 next_cr - start : buf + size - start);
377 if (!next_cr && !next_null)
378 break;
379 if (next_null)
380 end = next_null;
381 else
382 end = next_cr;
383 num_lines++;
384 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
385 if (flags & FELF_READ_ONLY) {
386 size_t s = end - start;
387 char *b = para_malloc(s + 1);
388 memcpy(b, start, s);
389 b[s] = '\0';
390 ret = line_handler(b, private_data);
391 free(b);
392 } else {
393 *end = '\0';
394 ret = line_handler(start, private_data);
395 }
396 if (ret < 0)
397 return ret;
398 }
399 start = ++end;
400 }
401 i = buf + size - start;
402 if (i && i != size && !(flags & FELF_READ_ONLY))
403 memmove(buf, start, i);
404 return i;
405 }
406
407 /** Return the hex characters of the lower 4 bits. */
408 #define hex(a) (hexchar[(a) & 15])
409
410 static void write_size_header(char *buf, int n)
411 {
412 static char hexchar[] = "0123456789abcdef";
413
414 buf[0] = hex(n >> 12);
415 buf[1] = hex(n >> 8);
416 buf[2] = hex(n >> 4);
417 buf[3] = hex(n);
418 buf[4] = ' ';
419 }
420
421 /**
422 * Read a four-byte hex-number and return its value.
423 *
424 * Each status item sent by para_server is prefixed with such a hex number in
425 * ASCII which describes the size of the status item.
426 *
427 * \param buf The buffer which must be at least four bytes long.
428 *
429 * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
430 * buffer did not contain only hex digits.
431 */
432 int read_size_header(const char *buf)
433 {
434 int i, len = 0;
435
436 for (i = 0; i < 4; i++) {
437 unsigned char c = buf[i];
438 len <<= 4;
439 if (c >= '0' && c <= '9') {
440 len += c - '0';
441 continue;
442 }
443 if (c >= 'a' && c <= 'f') {
444 len += c - 'a' + 10;
445 continue;
446 }
447 return -E_SIZE_PREFIX;
448 }
449 if (buf[4] != ' ')
450 return -E_SIZE_PREFIX;
451 return len;
452 }
453
454 /**
455 * Safely print into a buffer at a given offset.
456 *
457 * \param b Determines the buffer, its size, and the offset.
458 * \param fmt The format string.
459 *
460 * This function prints into the buffer given by \a b at the offset which is
461 * also given by \a b. If there is not enough space to hold the result, the
462 * buffer size is doubled until the underlying call to vsnprintf() succeeds
463 * or the size of the buffer exceeds the maximal size specified in \a b.
464 *
465 * In the latter case the unmodified \a buf and \a offset values as well as the
466 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
467 * If this function succeeds, i.e. returns a non-negative value, the offset of
468 * \a b is reset to zero and the given data is written to the beginning of the
469 * buffer. If \a max_size_handler() returns a negative value, this value is
470 * returned by \a para_printf().
471 *
472 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
473 * calls to this function append data to what is already contained in the
474 * buffer.
475 *
476 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
477 * initial buffer is allocated.
478 *
479 * \return The number of bytes printed into the buffer (not including the
480 * terminating \p NULL byte) on success, negative on errors. If there is no
481 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
482 * fails.
483 *
484 * \sa make_message(), vsnprintf(3).
485 */
486 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
487 {
488 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
489
490 if (!b->buf) {
491 b->buf = para_malloc(128);
492 b->size = 128;
493 b->offset = 0;
494 }
495 while (1) {
496 char *p = b->buf + b->offset;
497 size_t size = b->size - b->offset;
498 va_list ap;
499
500 if (size > sz_off) {
501 va_start(ap, fmt);
502 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
503 va_end(ap);
504 if (ret > -1 && ret < size - sz_off) { /* success */
505 b->offset += ret + sz_off;
506 if (sz_off)
507 write_size_header(p, ret);
508 return ret + sz_off;
509 }
510 }
511 /* check if we may grow the buffer */
512 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
513 /* try again with more space */
514 b->size *= 2;
515 b->buf = para_realloc(b->buf, b->size);
516 continue;
517 }
518 /* can't grow buffer */
519 if (!b->offset || !b->max_size_handler) /* message too large */
520 return -ERRNO_TO_PARA_ERROR(ENOSPC);
521 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
522 if (ret < 0)
523 return ret;
524 b->offset = 0;
525 }
526 }
527
528 /** \cond llong_minmax */
529 /* LLONG_MAX and LLONG_MIN might not be defined. */
530 #ifndef LLONG_MAX
531 #define LLONG_MAX 9223372036854775807LL
532 #endif
533 #ifndef LLONG_MIN
534 #define LLONG_MIN (-LLONG_MAX - 1LL)
535 #endif
536 /** \endcond llong_minmax */
537
538 /**
539 * Convert a string to a 64-bit signed integer value.
540 *
541 * \param str The string to be converted.
542 * \param value Result pointer.
543 *
544 * \return Standard.
545 *
546 * \sa \ref para_atoi32(), strtol(3), atoi(3).
547 */
548 int para_atoi64(const char *str, int64_t *value)
549 {
550 char *endptr;
551 long long tmp;
552
553 errno = 0; /* To distinguish success/failure after call */
554 tmp = strtoll(str, &endptr, 10);
555 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
556 return -E_ATOI_OVERFLOW;
557 /*
558 * If there were no digits at all, strtoll() stores the original value
559 * of str in *endptr.
560 */
561 if (endptr == str)
562 return -E_ATOI_NO_DIGITS;
563 /*
564 * The implementation may also set errno and return 0 in case no
565 * conversion was performed.
566 */
567 if (errno != 0 && tmp == 0)
568 return -E_ATOI_NO_DIGITS;
569 if (*endptr != '\0') /* Further characters after number */
570 return -E_ATOI_JUNK_AT_END;
571 *value = tmp;
572 return 1;
573 }
574
575 /**
576 * Convert a string to a 32-bit signed integer value.
577 *
578 * \param str The string to be converted.
579 * \param value Result pointer.
580 *
581 * \return Standard.
582 *
583 * \sa \ref para_atoi64().
584 */
585 int para_atoi32(const char *str, int32_t *value)
586 {
587 int64_t tmp;
588 int ret;
589 const int32_t max = 2147483647;
590
591 ret = para_atoi64(str, &tmp);
592 if (ret < 0)
593 return ret;
594 if (tmp > max || tmp < -max - 1)
595 return -E_ATOI_OVERFLOW;
596 *value = tmp;
597 return 1;
598 }
599
600 static inline int loglevel_equal(const char *arg, const char * const ll)
601 {
602 return !strncasecmp(arg, ll, strlen(ll));
603 }
604
605 /**
606 * Compute the loglevel number from its name.
607 *
608 * \param txt The name of the loglevel (debug, info, ...).
609 *
610 * \return The numeric representation of the loglevel name.
611 */
612 int get_loglevel_by_name(const char *txt)
613 {
614 if (loglevel_equal(txt, "debug"))
615 return LL_DEBUG;
616 if (loglevel_equal(txt, "info"))
617 return LL_INFO;
618 if (loglevel_equal(txt, "notice"))
619 return LL_NOTICE;
620 if (loglevel_equal(txt, "warning"))
621 return LL_WARNING;
622 if (loglevel_equal(txt, "error"))
623 return LL_ERROR;
624 if (loglevel_equal(txt, "crit"))
625 return LL_CRIT;
626 if (loglevel_equal(txt, "emerg"))
627 return LL_EMERG;
628 return -E_BAD_LL;
629 }
630
631 static int get_next_word(const char *buf, const char *delim, char **word)
632 {
633 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
634 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
635 const char *in;
636 char *out;
637 int ret, state = 0;
638
639 out = para_malloc(strlen(buf) + 1);
640 *out = '\0';
641 *word = out;
642 for (in = buf; *in; in++) {
643 const char *p;
644
645 switch (*in) {
646 case '\\':
647 if (state & LSF_BACKSLASH) /* \\ */
648 goto copy_char;
649 state |= LSF_BACKSLASH;
650 state |= LSF_HAVE_WORD;
651 continue;
652 case 'n':
653 case 't':
654 if (state & LSF_BACKSLASH) { /* \n or \t */
655 *out++ = (*in == 'n')? '\n' : '\t';
656 state &= ~LSF_BACKSLASH;
657 continue;
658 }
659 goto copy_char;
660 case '"':
661 if (state & LSF_BACKSLASH) /* \" */
662 goto copy_char;
663 if (state & LSF_SINGLE_QUOTE) /* '" */
664 goto copy_char;
665 if (state & LSF_DOUBLE_QUOTE) {
666 state &= ~LSF_DOUBLE_QUOTE;
667 continue;
668 }
669 state |= LSF_HAVE_WORD;
670 state |= LSF_DOUBLE_QUOTE;
671 continue;
672 case '\'':
673 if (state & LSF_BACKSLASH) /* \' */
674 goto copy_char;
675 if (state & LSF_DOUBLE_QUOTE) /* "' */
676 goto copy_char;
677 if (state & LSF_SINGLE_QUOTE) {
678 state &= ~LSF_SINGLE_QUOTE;
679 continue;
680 }
681 state |= LSF_HAVE_WORD;
682 state |= LSF_SINGLE_QUOTE;
683 continue;
684 }
685 for (p = delim; *p; p++) {
686 if (*in != *p)
687 continue;
688 if (state & LSF_BACKSLASH)
689 goto copy_char;
690 if (state & LSF_SINGLE_QUOTE)
691 goto copy_char;
692 if (state & LSF_DOUBLE_QUOTE)
693 goto copy_char;
694 if (state & LSF_HAVE_WORD)
695 goto success;
696 break;
697 }
698 if (*p) /* ignore delimiter at the beginning */
699 continue;
700 copy_char:
701 state |= LSF_HAVE_WORD;
702 *out++ = *in;
703 state &= ~LSF_BACKSLASH;
704 }
705 ret = 0;
706 if (!(state & LSF_HAVE_WORD))
707 goto out;
708 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
709 if (state & LSF_BACKSLASH) {
710 PARA_ERROR_LOG("trailing backslash\n");
711 goto out;
712 }
713 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
714 PARA_ERROR_LOG("unmatched quote character\n");
715 goto out;
716 }
717 success:
718 *out = '\0';
719 return in - buf;
720 out:
721 free(*word);
722 *word = NULL;
723 return ret;
724 }
725
726 /**
727 * Get the number of the word the cursor is on.
728 *
729 * \param buf The zero-terminated line buffer.
730 * \param delim Characters that separate words.
731 * \param point The cursor position.
732 *
733 * \return Zero-based word number.
734 */
735 int compute_word_num(const char *buf, const char *delim, int point)
736 {
737 int ret, num_words;
738 const char *p;
739 char *word;
740
741 for (p = buf, num_words = 0; ; p += ret, num_words++) {
742 ret = get_next_word(p, delim, &word);
743 if (ret <= 0)
744 break;
745 free(word);
746 if (p + ret >= buf + point)
747 break;
748 }
749 return num_words;
750 }
751
752 /**
753 * Free an array of words created by create_argv() or create_shifted_argv().
754 *
755 * \param argv A pointer previously obtained by \ref create_argv().
756 */
757 void free_argv(char **argv)
758 {
759 int i;
760
761 if (!argv)
762 return;
763 for (i = 0; argv[i]; i++)
764 free(argv[i]);
765 free(argv);
766 }
767
768 static int create_argv_offset(int offset, const char *buf, const char *delim,
769 char ***result)
770 {
771 char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
772 const char *p;
773 int i, ret;
774
775 for (i = 0; i < offset; i++)
776 argv[i] = NULL;
777 for (p = buf; p && *p; p += ret, i++) {
778 ret = get_next_word(p, delim, &word);
779 if (ret < 0)
780 goto err;
781 if (!ret)
782 break;
783 argv = para_realloc(argv, (i + 2) * sizeof(char*));
784 argv[i] = word;
785 }
786 argv[i] = NULL;
787 *result = argv;
788 return i;
789 err:
790 while (i > 0)
791 free(argv[--i]);
792 free(argv);
793 *result = NULL;
794 return ret;
795 }
796
797 /**
798 * Split a buffer into words.
799 *
800 * This parser honors single and double quotes, backslash-escaped characters
801 * and special characters like \\n. The result contains pointers to copies of
802 * the words contained in buf and has to be freed by using \ref free_argv().
803 *
804 * \param buf The buffer to be split.
805 * \param delim Each character in this string is treated as a separator.
806 * \param result The array of words is returned here.
807 *
808 * It's OK to pass NULL as the buffer argument. This is equivalent to passing
809 * the empty string.
810 *
811 * \return Number of words in buf, negative on errors. The array returned
812 * through the result pointer is NULL terminated.
813 */
814 int create_argv(const char *buf, const char *delim, char ***result)
815 {
816 return create_argv_offset(0, buf, delim, result);
817 }
818
819 /**
820 * Split a buffer into words, offset one.
821 *
822 * This is similar to \ref create_argv() but the returned array is one element
823 * larger, words start at index one and element zero is initialized to \p NULL.
824 * Callers must set element zero to a non-NULL value before calling free_argv()
825 * on the returned array to avoid a memory leak.
826 *
827 * \param buf See \ref create_argv().
828 * \param delim See \ref create_argv().
829 * \param result See \ref create_argv().
830 *
831 * \return Number of words plus one on success, negative on errors.
832 */
833 int create_shifted_argv(const char *buf, const char *delim, char ***result)
834 {
835 return create_argv_offset(1, buf, delim, result);
836 }
837
838 /**
839 * Find out if the given string is contained in the arg vector.
840 *
841 * \param arg The string to look for.
842 * \param argv The array to search.
843 *
844 * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
845 * arg was not found in \a argv.
846 */
847 int find_arg(const char *arg, char **argv)
848 {
849 int i;
850
851 if (!argv)
852 return -E_ARG_NOT_FOUND;
853 for (i = 0; argv[i]; i++)
854 if (strcmp(arg, argv[i]) == 0)
855 return i;
856 return -E_ARG_NOT_FOUND;
857 }
858
859 /**
860 * Compile a regular expression.
861 *
862 * This simple wrapper calls regcomp() and logs a message on errors.
863 *
864 * \param preg See regcomp(3).
865 * \param regex See regcomp(3).
866 * \param cflags See regcomp(3).
867 *
868 * \return Standard.
869 */
870 int para_regcomp(regex_t *preg, const char *regex, int cflags)
871 {
872 char *buf;
873 size_t size;
874 int ret = regcomp(preg, regex, cflags);
875
876 if (ret == 0)
877 return 1;
878 size = regerror(ret, preg, NULL, 0);
879 buf = para_malloc(size);
880 regerror(ret, preg, buf, size);
881 PARA_ERROR_LOG("%s\n", buf);
882 free(buf);
883 return -E_REGEX;
884 }
885
886 /**
887 * strdup() for not necessarily zero-terminated strings.
888 *
889 * \param src The source buffer.
890 * \param len The number of bytes to be copied.
891 *
892 * \return A 0-terminated buffer of length \a len + 1.
893 *
894 * This is similar to strndup(), which is a GNU extension. However, one
895 * difference is that strndup() returns \p NULL if insufficient memory was
896 * available while this function aborts in this case.
897 *
898 * \sa strdup(), \ref para_strdup().
899 */
900 char *safe_strdup(const char *src, size_t len)
901 {
902 char *p;
903
904 assert(len < (size_t)-1);
905 p = para_malloc(len + 1);
906 if (len > 0)
907 memcpy(p, src, len);
908 p[len] = '\0';
909 return p;
910 }
911
912 /**
913 * Copy the value of a key=value pair.
914 *
915 * This checks whether the given buffer starts with "key=", ignoring case. If
916 * yes, a copy of the value is returned. The source buffer may not be
917 * zero-terminated.
918 *
919 * \param src The source buffer.
920 * \param len The number of bytes of the tag.
921 * \param key Only copy if it is the value of this key.
922 *
923 * \return A zero-terminated buffer, or \p NULL if the key was
924 * not of the given type.
925 */
926 char *key_value_copy(const char *src, size_t len, const char *key)
927 {
928 int keylen = strlen(key);
929
930 if (len <= keylen)
931 return NULL;
932 if (strncasecmp(src, key, keylen))
933 return NULL;
934 if (src[keylen] != '=')
935 return NULL;
936 return safe_strdup(src + keylen + 1, len - keylen - 1);
937 }
938
939 static bool utf8_mode(void)
940 {
941 static bool initialized, have_utf8;
942
943 if (!initialized) {
944 char *info = nl_langinfo(CODESET);
945 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
946 initialized = true;
947 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
948 have_utf8? "" : "not ");
949 }
950 return have_utf8;
951 }
952
953 static int xwcwidth(wchar_t wc, size_t pos)
954 {
955 int n;
956
957 /* special-case for tab */
958 if (wc == 0x09) /* tab */
959 return (pos | 7) + 1 - pos;
960 n = wcwidth(wc);
961 /* wcswidth() returns -1 for non-printable characters */
962 return n >= 0? n : 1;
963 }
964
965 static size_t xwcswidth(const wchar_t *s, size_t n)
966 {
967 size_t w = 0;
968
969 while (n--)
970 w += xwcwidth(*s++, w);
971 return w;
972 }
973
974 /**
975 * Skip a given number of cells at the beginning of a string.
976 *
977 * \param s The input string.
978 * \param cells_to_skip Desired number of cells that should be skipped.
979 * \param bytes_to_skip Result.
980 *
981 * This function computes how many input bytes must be skipped to advance a
982 * string by the given width. If the current character encoding is not UTF-8,
983 * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
984 * \a s is treated as a multibyte string and on successful return, \a s +
985 * bytes_to_skip points to the start of a multibyte string such that the total
986 * width of the multibyte characters that are skipped by advancing \a s that
987 * many bytes equals at least \a cells_to_skip.
988 *
989 * \return Standard.
990 */
991 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
992 {
993 wchar_t wc;
994 mbstate_t ps;
995 size_t n, bytes_parsed, cells_skipped;
996
997 *bytes_to_skip = 0;
998 if (cells_to_skip == 0)
999 return 0;
1000 if (!utf8_mode()) {
1001 *bytes_to_skip = cells_to_skip;
1002 return 0;
1003 }
1004 bytes_parsed = cells_skipped = 0;
1005 memset(&ps, 0, sizeof(ps));
1006 n = strlen(s);
1007 while (cells_to_skip > cells_skipped) {
1008 size_t mbret;
1009
1010 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
1011 assert(mbret != 0);
1012 if (mbret == (size_t)-1 || mbret == (size_t)-2)
1013 return -ERRNO_TO_PARA_ERROR(EILSEQ);
1014 bytes_parsed += mbret;
1015 cells_skipped += xwcwidth(wc, cells_skipped);
1016 }
1017 *bytes_to_skip = bytes_parsed;
1018 return 1;
1019 }
1020
1021 /**
1022 * Compute the width of an UTF-8 string.
1023 *
1024 * \param s The string.
1025 * \param result The width of \a s is returned here.
1026 *
1027 * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1028 * Otherwise \a s is treated as an UTF-8 string and its display width is
1029 * computed. Note that this function may fail if the underlying call to
1030 * mbsrtowcs(3) fails, so the caller must check the return value.
1031 *
1032 * \sa nl_langinfo(3), wcswidth(3).
1033 *
1034 * \return Standard.
1035 */
1036 __must_check int strwidth(const char *s, size_t *result)
1037 {
1038 const char *src = s;
1039 mbstate_t state;
1040 static wchar_t *dest;
1041 size_t num_wchars;
1042
1043 /*
1044 * Never call any log function here. This may result in an endless loop
1045 * as para_gui's para_log() calls this function.
1046 */
1047
1048 if (!utf8_mode()) {
1049 *result = strlen(s);
1050 return 0;
1051 }
1052 memset(&state, 0, sizeof(state));
1053 *result = 0;
1054 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1055 if (num_wchars == (size_t)-1)
1056 return -ERRNO_TO_PARA_ERROR(errno);
1057 if (num_wchars == 0)
1058 return 0;
1059 dest = para_malloc((num_wchars + 1) * sizeof(*dest));
1060 src = s;
1061 memset(&state, 0, sizeof(state));
1062 num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1063 assert(num_wchars > 0 && num_wchars != (size_t)-1);
1064 *result = xwcswidth(dest, num_wchars);
1065 free(dest);
1066 return 1;
1067 }
1068
1069 /**
1070 * Truncate and sanitize a (wide character) string.
1071 *
1072 * This replaces all non-printable characters by spaces and makes sure that the
1073 * modified string does not exceed the given maximal width.
1074 *
1075 * \param src The source string in multi-byte form.
1076 * \param max_width The maximal number of cells the result may occupy.
1077 * \param result Sanitized multi-byte string, must be freed by caller.
1078 * \param width The width of the sanitized string, always <= max_width.
1079 *
1080 * The function is wide-character aware but falls back to C strings for
1081 * non-UTF-8 locales.
1082 *
1083 * \return Standard. On success, *result points to a sanitized copy of the
1084 * given string. This copy was allocated with malloc() and should hence be
1085 * freed when the caller is no longer interested in the result.
1086 *
1087 * The function fails if the given string contains an invalid multibyte
1088 * sequence. In this case, *result is set to NULL, and *width to zero.
1089 */
1090 __must_check int sanitize_str(const char *src, size_t max_width,
1091 char **result, size_t *width)
1092 {
1093 mbstate_t state;
1094 static wchar_t *wcs;
1095 size_t num_wchars, n;
1096
1097 if (!utf8_mode()) {
1098 *result = para_strdup(src);
1099 /* replace non-printable characters by spaces */
1100 for (n = 0; n < max_width && src[n]; n++) {
1101 if (!isprint((unsigned char)src[n]))
1102 (*result)[n] = ' ';
1103 }
1104 (*result)[n] = '\0';
1105 *width = n;
1106 return 0;
1107 }
1108 *result = NULL;
1109 *width = 0;
1110 memset(&state, 0, sizeof(state));
1111 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1112 if (num_wchars == (size_t)-1)
1113 return -ERRNO_TO_PARA_ERROR(errno);
1114 wcs = para_malloc((num_wchars + 1) * sizeof(*wcs));
1115 memset(&state, 0, sizeof(state));
1116 num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1117 assert(num_wchars != (size_t)-1);
1118 for (n = 0; n < num_wchars && *width < max_width; n++) {
1119 if (!iswprint(wcs[n]))
1120 wcs[n] = L' ';
1121 *width += xwcwidth(wcs[n], *width);
1122 }
1123 wcs[n] = L'\0';
1124 n = wcstombs(NULL, wcs, 0) + 1;
1125 *result = para_malloc(n);
1126 num_wchars = wcstombs(*result, wcs, n);
1127 assert(num_wchars != (size_t)-1);
1128 free(wcs);
1129 return 1;
1130 }