Merge topic branch t/openssl-3 into master
[paraslash.git] / string.c
1 /* Copyright (C) 2004 Andre Noll <maan@tuebingen.mpg.de>, see file COPYING. */
2
3 /** \file string.c Memory allocation and string handling functions. */
4
5 #include "para.h"
6
7 #include <pwd.h>
8 #include <sys/utsname.h> /* uname() */
9 #include <regex.h>
10 #include <langinfo.h>
11 #include <wchar.h>
12 #include <wctype.h>
13
14 #include "string.h"
15 #include "error.h"
16
17 /**
18 * Reallocate an array, abort on failure or bugs.
19 *
20 * \param ptr Pointer to the memory block, may be NULL.
21 * \param nmemb Number of elements.
22 * \param size The size of one element in bytes.
23 *
24 * A wrapper for realloc(3) which aborts on invalid arguments or integer
25 * overflow. The wrapper also terminates the current process on allocation
26 * errors, so the caller does not need to check for failure.
27 *
28 * \return A pointer to newly allocated memory which is suitably aligned for
29 * any kind of variable and may be different from ptr.
30 *
31 * \sa realloc(3).
32 */
33 __must_check void *arr_realloc(void *ptr, size_t nmemb, size_t size)
34 {
35 size_t pr;
36
37 assert(size > 0);
38 assert(nmemb > 0);
39 assert(!__builtin_mul_overflow(nmemb, size, &pr));
40 assert(pr != 0);
41 ptr = realloc(ptr, pr);
42 assert(ptr);
43 return ptr;
44 }
45
46 /**
47 * Allocate an array, abort on failure or bugs.
48 *
49 * \param nmemb See \ref arr_realloc().
50 * \param size See \ref arr_realloc().
51 *
52 * Like \ref arr_realloc(), this aborts on invalid arguments, integer overflow
53 * and allocation errors.
54 *
55 * \return A pointer to newly allocated memory which is suitably aligned for
56 * any kind of variable.
57 *
58 * \sa See \ref arr_realloc().
59 */
60 __must_check __malloc void *arr_alloc(size_t nmemb, size_t size)
61 {
62 return arr_realloc(NULL, nmemb, size);
63 }
64
65 /**
66 * Allocate and initialize an array, abort on failure or bugs.
67 *
68 * \param nmemb See \ref arr_realloc().
69 * \param size See \ref arr_realloc().
70 *
71 * This calls \ref arr_alloc() and zeroes-out the array.
72 *
73 * \return See \ref arr_alloc().
74 */
75 __must_check __malloc void *arr_zalloc(size_t nmemb, size_t size)
76 {
77 void *ptr = arr_alloc(nmemb, size);
78
79 /*
80 * This multiplication can not overflow because the above call to \ref
81 * arr_alloc() aborts on overflow.
82 */
83 memset(ptr, 0, nmemb * size);
84 return ptr;
85 }
86
87 /**
88 * Allocate and initialize memory.
89 *
90 * \param size The desired new size.
91 *
92 * \return A pointer to the allocated and zeroed-out memory, which is suitably
93 * aligned for any kind of variable.
94 *
95 * \sa \ref alloc(), calloc(3).
96 */
97 __must_check void *zalloc(size_t size)
98 {
99 return arr_zalloc(1, size);
100 }
101
102 /**
103 * Paraslash's version of realloc().
104 *
105 * \param p Pointer to the memory block, may be \p NULL.
106 * \param size The desired new size.
107 *
108 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
109 * i.e. there is no need to check the return value in the caller.
110 *
111 * \return A pointer to newly allocated memory which is suitably aligned for
112 * any kind of variable and may be different from \a p.
113 *
114 * \sa realloc(3).
115 */
116 __must_check void *para_realloc(void *p, size_t size)
117 {
118 return arr_realloc(p, 1, size);
119 }
120
121 /**
122 * Paraslash's version of malloc().
123 *
124 * \param size The desired new size.
125 *
126 * A wrapper for malloc(3) which exits on errors.
127 *
128 * \return A pointer to the allocated memory, which is suitably aligned for any
129 * kind of variable.
130 *
131 * \sa malloc(3).
132 */
133 __must_check __malloc void *alloc(size_t size)
134 {
135 return arr_alloc(1, size);
136 }
137
138 /**
139 * Paraslash's version of strdup().
140 *
141 * \param s The string to be duplicated.
142 *
143 * A strdup(3)-like function which aborts if insufficient memory was available
144 * to allocate the duplicated string, absolving the caller from the
145 * responsibility to check for failure.
146 *
147 * \return A pointer to the duplicated string. Unlike strdup(3), the caller may
148 * pass NULL, in which case the function returns a pointer to an empty string.
149 * Regardless of whether or not NULL was passed, the returned string is
150 * allocated on the heap and has to be freed by the caller.
151 *
152 * \sa strdup(3).
153 */
154 __must_check __malloc char *para_strdup(const char *s)
155 {
156 char *dupped_string = strdup(s? s: "");
157
158 assert(dupped_string);
159 return dupped_string;
160 }
161
162 /**
163 * Print a formatted message to a dynamically allocated string.
164 *
165 * \param result The formatted string is returned here.
166 * \param fmt The format string.
167 * \param ap Initialized list of arguments.
168 *
169 * This function is similar to vasprintf(), a GNU extension which is not in C
170 * or POSIX. It allocates a string large enough to hold the output including
171 * the terminating null byte. The allocated string is returned via the first
172 * argument and must be freed by the caller. However, unlike vasprintf(), this
173 * function calls exit() if insufficient memory is available, while vasprintf()
174 * returns -1 in this case.
175 *
176 * \return Number of bytes written, not including the terminating \p NULL
177 * character.
178 *
179 * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
180 */
181 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
182 {
183 int ret;
184 size_t size = 150;
185 va_list aq;
186
187 *result = alloc(size + 1);
188 va_copy(aq, ap);
189 ret = vsnprintf(*result, size, fmt, aq);
190 va_end(aq);
191 assert(ret >= 0);
192 if (ret < size) /* OK */
193 return ret;
194 size = ret + 1;
195 *result = para_realloc(*result, size);
196 va_copy(aq, ap);
197 ret = vsnprintf(*result, size, fmt, aq);
198 va_end(aq);
199 assert(ret >= 0 && ret < size);
200 return ret;
201 }
202
203 /**
204 * Print to a dynamically allocated string, variable number of arguments.
205 *
206 * \param result See \ref xvasprintf().
207 * \param fmt Usual format string.
208 *
209 * \return The return value of the underlying call to \ref xvasprintf().
210 *
211 * \sa \ref xvasprintf() and the references mentioned there.
212 */
213 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
214 {
215 va_list ap;
216 unsigned ret;
217
218 va_start(ap, fmt);
219 ret = xvasprintf(result, fmt, ap);
220 va_end(ap);
221 return ret;
222 }
223
224 /**
225 * Allocate a sufficiently large string and print into it.
226 *
227 * \param fmt A usual format string.
228 *
229 * Produce output according to \p fmt. No artificial bound on the length of the
230 * resulting string is imposed.
231 *
232 * \return This function either returns a pointer to a string that must be
233 * freed by the caller or aborts without returning.
234 *
235 * \sa printf(3), \ref xasprintf().
236 */
237 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
238 {
239 char *msg;
240 va_list ap;
241
242 va_start(ap, fmt);
243 xvasprintf(&msg, fmt, ap);
244 va_end(ap);
245 return msg;
246 }
247
248 /**
249 * Free the content of a pointer and set it to NULL.
250 *
251 * \param arg A pointer to the pointer whose content should be freed.
252 *
253 * If arg is NULL, the function returns immediately. Otherwise it frees the
254 * memory pointed to by *arg and sets *arg to NULL. Hence callers have to pass
255 * the *address* of the pointer variable that points to the memory which should
256 * be freed.
257 */
258 void freep(void *arg)
259 {
260 if (arg) {
261 void **ptr = arg;
262 free(*ptr);
263 *ptr = NULL;
264 }
265 }
266
267 /**
268 * Paraslash's version of strcat().
269 *
270 * \param a String to be appended to.
271 * \param b String to append.
272 *
273 * Append \p b to \p a.
274 *
275 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
276 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
277 * return \a a without making a copy of \a a. Otherwise, construct the
278 * concatenation \a c, free \a a (but not \a b) and return \a c.
279 *
280 * \sa strcat(3).
281 */
282 __must_check __malloc char *para_strcat(char *a, const char *b)
283 {
284 char *tmp;
285
286 if (!a)
287 return para_strdup(b);
288 if (!b)
289 return a;
290 tmp = make_message("%s%s", a, b);
291 free(a);
292 return tmp;
293 }
294
295 /**
296 * Get the logname of the current user.
297 *
298 * \return A dynamically allocated string that must be freed by the caller. On
299 * errors, the string "unknown_user" is returned, i.e. this function never
300 * returns \p NULL.
301 *
302 * \sa getpwuid(3).
303 */
304 __must_check __malloc char *para_logname(void)
305 {
306 struct passwd *pw = getpwuid(getuid());
307 return para_strdup(pw? pw->pw_name : "unknown_user");
308 }
309
310 /**
311 * Get the home directory of the current user.
312 *
313 * \return A dynamically allocated string that must be freed by the caller. If
314 * the home directory could not be found, this function returns "/tmp".
315 */
316 __must_check __malloc char *para_homedir(void)
317 {
318 struct passwd *pw = getpwuid(getuid());
319 return para_strdup(pw? pw->pw_dir : "/tmp");
320 }
321
322 /**
323 * Get the own hostname.
324 *
325 * \return A dynamically allocated string containing the hostname.
326 *
327 * \sa uname(2).
328 */
329 __malloc char *para_hostname(void)
330 {
331 struct utsname u;
332
333 uname(&u);
334 return para_strdup(u.nodename);
335 }
336
337 /**
338 * Call a custom function for each complete line.
339 *
340 * \param flags Any combination of flags defined in \ref for_each_line_flags.
341 * \param buf The buffer containing data separated by newlines.
342 * \param size The number of bytes in \a buf.
343 * \param line_handler The custom function.
344 * \param private_data Pointer passed to \a line_handler.
345 *
346 * For each complete line in \p buf, \p line_handler is called. The first
347 * argument to \p line_handler is (a copy of) the current line, and \p
348 * private_data is passed as the second argument. If the \p FELF_READ_ONLY
349 * flag is unset, a pointer into \a buf is passed to the line handler,
350 * otherwise a pointer to a copy of the current line is passed instead. This
351 * copy is freed immediately after the line handler returns.
352 *
353 * The function returns if \p line_handler returns a negative value or no more
354 * lines are in the buffer. The rest of the buffer (last chunk containing an
355 * incomplete line) is moved to the beginning of the buffer if FELF_READ_ONLY is
356 * unset.
357 *
358 * \return On success this function returns the number of bytes not handled to
359 * \p line_handler. The only possible error is a negative return value from the
360 * line handler. In this case processing stops and the return value of the line
361 * handler is returned to indicate failure.
362 *
363 * \sa \ref for_each_line_flags.
364 */
365 int for_each_line(unsigned flags, char *buf, size_t size,
366 line_handler_t *line_handler, void *private_data)
367 {
368 char *start = buf, *end;
369 int ret, i, num_lines = 0;
370
371 // PARA_NOTICE_LOG("buf: %s\n", buf);
372 while (start < buf + size) {
373 char *next_null;
374 char *next_cr;
375
376 next_cr = memchr(start, '\n', buf + size - start);
377 next_null = memchr(start, '\0', next_cr?
378 next_cr - start : buf + size - start);
379 if (!next_cr && !next_null)
380 break;
381 if (next_null)
382 end = next_null;
383 else
384 end = next_cr;
385 num_lines++;
386 if (!(flags & FELF_DISCARD_FIRST) || start != buf) {
387 if (flags & FELF_READ_ONLY) {
388 size_t s = end - start;
389 char *b = alloc(s + 1);
390 memcpy(b, start, s);
391 b[s] = '\0';
392 ret = line_handler(b, private_data);
393 free(b);
394 } else {
395 *end = '\0';
396 ret = line_handler(start, private_data);
397 }
398 if (ret < 0)
399 return ret;
400 }
401 start = ++end;
402 }
403 i = buf + size - start;
404 if (i && i != size && !(flags & FELF_READ_ONLY))
405 memmove(buf, start, i);
406 return i;
407 }
408
409 /** Return the hex characters of the lower 4 bits. */
410 #define hex(a) (hexchar[(a) & 15])
411
412 static void write_size_header(char *buf, int n)
413 {
414 static char hexchar[] = "0123456789abcdef";
415
416 buf[0] = hex(n >> 12);
417 buf[1] = hex(n >> 8);
418 buf[2] = hex(n >> 4);
419 buf[3] = hex(n);
420 buf[4] = ' ';
421 }
422
423 /**
424 * Read a four-byte hex-number and return its value.
425 *
426 * Each status item sent by para_server is prefixed with such a hex number in
427 * ASCII which describes the size of the status item.
428 *
429 * \param buf The buffer which must be at least four bytes long.
430 *
431 * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
432 * buffer did not contain only hex digits.
433 */
434 int read_size_header(const char *buf)
435 {
436 int i, len = 0;
437
438 for (i = 0; i < 4; i++) {
439 unsigned char c = buf[i];
440 len <<= 4;
441 if (c >= '0' && c <= '9') {
442 len += c - '0';
443 continue;
444 }
445 if (c >= 'a' && c <= 'f') {
446 len += c - 'a' + 10;
447 continue;
448 }
449 return -E_SIZE_PREFIX;
450 }
451 if (buf[4] != ' ')
452 return -E_SIZE_PREFIX;
453 return len;
454 }
455
456 /**
457 * Safely print into a buffer at a given offset.
458 *
459 * \param b Determines the buffer, its size, and the offset.
460 * \param fmt The format string.
461 *
462 * This function prints into the buffer given by \a b at the offset which is
463 * also given by \a b. If there is not enough space to hold the result, the
464 * buffer size is doubled until the underlying call to vsnprintf() succeeds
465 * or the size of the buffer exceeds the maximal size specified in \a b.
466 *
467 * In the latter case the unmodified \a buf and \a offset values as well as the
468 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
469 * If this function succeeds, i.e. returns a non-negative value, the offset of
470 * \a b is reset to zero and the given data is written to the beginning of the
471 * buffer. If \a max_size_handler() returns a negative value, this value is
472 * returned by \a para_printf().
473 *
474 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
475 * calls to this function append data to what is already contained in the
476 * buffer.
477 *
478 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
479 * initial buffer is allocated.
480 *
481 * \return The number of bytes printed into the buffer (not including the
482 * terminating \p NULL byte) on success, negative on errors. If there is no
483 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
484 * fails.
485 *
486 * \sa make_message(), vsnprintf(3).
487 */
488 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
489 {
490 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
491
492 if (!b->buf) {
493 b->buf = alloc(128);
494 b->size = 128;
495 b->offset = 0;
496 }
497 while (1) {
498 char *p = b->buf + b->offset;
499 size_t size = b->size - b->offset;
500 va_list ap;
501
502 if (size > sz_off) {
503 va_start(ap, fmt);
504 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
505 va_end(ap);
506 if (ret > -1 && ret < size - sz_off) { /* success */
507 b->offset += ret + sz_off;
508 if (sz_off)
509 write_size_header(p, ret);
510 return ret + sz_off;
511 }
512 }
513 /* check if we may grow the buffer */
514 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
515 /* try again with more space */
516 b->size *= 2;
517 b->buf = para_realloc(b->buf, b->size);
518 continue;
519 }
520 /* can't grow buffer */
521 if (!b->offset || !b->max_size_handler) /* message too large */
522 return -ERRNO_TO_PARA_ERROR(ENOSPC);
523 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
524 if (ret < 0)
525 return ret;
526 b->offset = 0;
527 }
528 }
529
530 /** \cond llong_minmax */
531 /* LLONG_MAX and LLONG_MIN might not be defined. */
532 #ifndef LLONG_MAX
533 #define LLONG_MAX 9223372036854775807LL
534 #endif
535 #ifndef LLONG_MIN
536 #define LLONG_MIN (-LLONG_MAX - 1LL)
537 #endif
538 /** \endcond llong_minmax */
539
540 /**
541 * Convert a string to a 64-bit signed integer value.
542 *
543 * \param str The string to be converted.
544 * \param value Result pointer.
545 *
546 * \return Standard.
547 *
548 * \sa \ref para_atoi32(), strtol(3), atoi(3).
549 */
550 int para_atoi64(const char *str, int64_t *value)
551 {
552 char *endptr;
553 long long tmp;
554
555 errno = 0; /* To distinguish success/failure after call */
556 tmp = strtoll(str, &endptr, 10);
557 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
558 return -E_ATOI_OVERFLOW;
559 /*
560 * If there were no digits at all, strtoll() stores the original value
561 * of str in *endptr.
562 */
563 if (endptr == str)
564 return -E_ATOI_NO_DIGITS;
565 /*
566 * The implementation may also set errno and return 0 in case no
567 * conversion was performed.
568 */
569 if (errno != 0 && tmp == 0)
570 return -E_ATOI_NO_DIGITS;
571 if (*endptr != '\0') /* Further characters after number */
572 return -E_ATOI_JUNK_AT_END;
573 *value = tmp;
574 return 1;
575 }
576
577 /**
578 * Convert a string to a 32-bit signed integer value.
579 *
580 * \param str The string to be converted.
581 * \param value Result pointer.
582 *
583 * \return Standard.
584 *
585 * \sa \ref para_atoi64().
586 */
587 int para_atoi32(const char *str, int32_t *value)
588 {
589 int64_t tmp;
590 int ret;
591 const int32_t max = 2147483647;
592
593 ret = para_atoi64(str, &tmp);
594 if (ret < 0)
595 return ret;
596 if (tmp > max || tmp < -max - 1)
597 return -E_ATOI_OVERFLOW;
598 *value = tmp;
599 return 1;
600 }
601
602 static int get_next_word(const char *buf, const char *delim, char **word)
603 {
604 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
605 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
606 const char *in;
607 char *out;
608 int ret, state = 0;
609
610 out = alloc(strlen(buf) + 1);
611 *out = '\0';
612 *word = out;
613 for (in = buf; *in; in++) {
614 const char *p;
615
616 switch (*in) {
617 case '\\':
618 if (state & LSF_BACKSLASH) /* \\ */
619 goto copy_char;
620 state |= LSF_BACKSLASH;
621 state |= LSF_HAVE_WORD;
622 continue;
623 case 'n':
624 case 't':
625 if (state & LSF_BACKSLASH) { /* \n or \t */
626 *out++ = (*in == 'n')? '\n' : '\t';
627 state &= ~LSF_BACKSLASH;
628 continue;
629 }
630 goto copy_char;
631 case '"':
632 if (state & LSF_BACKSLASH) /* \" */
633 goto copy_char;
634 if (state & LSF_SINGLE_QUOTE) /* '" */
635 goto copy_char;
636 if (state & LSF_DOUBLE_QUOTE) {
637 state &= ~LSF_DOUBLE_QUOTE;
638 continue;
639 }
640 state |= LSF_HAVE_WORD;
641 state |= LSF_DOUBLE_QUOTE;
642 continue;
643 case '\'':
644 if (state & LSF_BACKSLASH) /* \' */
645 goto copy_char;
646 if (state & LSF_DOUBLE_QUOTE) /* "' */
647 goto copy_char;
648 if (state & LSF_SINGLE_QUOTE) {
649 state &= ~LSF_SINGLE_QUOTE;
650 continue;
651 }
652 state |= LSF_HAVE_WORD;
653 state |= LSF_SINGLE_QUOTE;
654 continue;
655 }
656 for (p = delim; *p; p++) {
657 if (*in != *p)
658 continue;
659 if (state & LSF_BACKSLASH)
660 goto copy_char;
661 if (state & LSF_SINGLE_QUOTE)
662 goto copy_char;
663 if (state & LSF_DOUBLE_QUOTE)
664 goto copy_char;
665 if (state & LSF_HAVE_WORD)
666 goto success;
667 break;
668 }
669 if (*p) /* ignore delimiter at the beginning */
670 continue;
671 copy_char:
672 state |= LSF_HAVE_WORD;
673 *out++ = *in;
674 state &= ~LSF_BACKSLASH;
675 }
676 ret = 0;
677 if (!(state & LSF_HAVE_WORD))
678 goto out;
679 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
680 if (state & LSF_BACKSLASH) {
681 PARA_ERROR_LOG("trailing backslash\n");
682 goto out;
683 }
684 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
685 PARA_ERROR_LOG("unmatched quote character\n");
686 goto out;
687 }
688 success:
689 *out = '\0';
690 return in - buf;
691 out:
692 free(*word);
693 *word = NULL;
694 return ret;
695 }
696
697 /**
698 * Get the number of the word the cursor is on.
699 *
700 * \param buf The zero-terminated line buffer.
701 * \param delim Characters that separate words.
702 * \param point The cursor position.
703 *
704 * \return Zero-based word number.
705 */
706 int compute_word_num(const char *buf, const char *delim, int point)
707 {
708 int ret, num_words;
709 const char *p;
710 char *word;
711
712 for (p = buf, num_words = 0; ; p += ret, num_words++) {
713 ret = get_next_word(p, delim, &word);
714 if (ret <= 0)
715 break;
716 free(word);
717 if (p + ret >= buf + point)
718 break;
719 }
720 return num_words;
721 }
722
723 /**
724 * Free an array of words created by create_argv() or create_shifted_argv().
725 *
726 * \param argv A pointer previously obtained by \ref create_argv().
727 */
728 void free_argv(char **argv)
729 {
730 int i;
731
732 if (!argv)
733 return;
734 for (i = 0; argv[i]; i++)
735 free(argv[i]);
736 free(argv);
737 }
738
739 static int create_argv_offset(int offset, const char *buf, const char *delim,
740 char ***result)
741 {
742 char *word, **argv = arr_alloc(offset + 1, sizeof(char *));
743 const char *p;
744 int i, ret;
745
746 for (i = 0; i < offset; i++)
747 argv[i] = NULL;
748 for (p = buf; p && *p; p += ret, i++) {
749 ret = get_next_word(p, delim, &word);
750 if (ret < 0)
751 goto err;
752 if (!ret)
753 break;
754 argv = arr_realloc(argv, i + 2, sizeof(char*));
755 argv[i] = word;
756 }
757 argv[i] = NULL;
758 *result = argv;
759 return i;
760 err:
761 while (i > 0)
762 free(argv[--i]);
763 free(argv);
764 *result = NULL;
765 return ret;
766 }
767
768 /**
769 * Split a buffer into words.
770 *
771 * This parser honors single and double quotes, backslash-escaped characters
772 * and special characters like \\n. The result contains pointers to copies of
773 * the words contained in buf and has to be freed by using \ref free_argv().
774 *
775 * \param buf The buffer to be split.
776 * \param delim Each character in this string is treated as a separator.
777 * \param result The array of words is returned here.
778 *
779 * It's OK to pass NULL as the buffer argument. This is equivalent to passing
780 * the empty string.
781 *
782 * \return Number of words in buf, negative on errors. The array returned
783 * through the result pointer is NULL terminated.
784 */
785 int create_argv(const char *buf, const char *delim, char ***result)
786 {
787 return create_argv_offset(0, buf, delim, result);
788 }
789
790 /**
791 * Split a buffer into words, offset one.
792 *
793 * This is similar to \ref create_argv() but the returned array is one element
794 * larger, words start at index one and element zero is initialized to \p NULL.
795 * Callers must set element zero to a non-NULL value before calling free_argv()
796 * on the returned array to avoid a memory leak.
797 *
798 * \param buf See \ref create_argv().
799 * \param delim See \ref create_argv().
800 * \param result See \ref create_argv().
801 *
802 * \return Number of words plus one on success, negative on errors.
803 */
804 int create_shifted_argv(const char *buf, const char *delim, char ***result)
805 {
806 return create_argv_offset(1, buf, delim, result);
807 }
808
809 /**
810 * Compile a regular expression.
811 *
812 * This simple wrapper calls regcomp() and logs a message on errors.
813 *
814 * \param preg See regcomp(3).
815 * \param regex See regcomp(3).
816 * \param cflags See regcomp(3).
817 *
818 * \return Standard.
819 */
820 int para_regcomp(regex_t *preg, const char *regex, int cflags)
821 {
822 char *buf;
823 size_t size;
824 int ret = regcomp(preg, regex, cflags);
825
826 if (ret == 0)
827 return 1;
828 size = regerror(ret, preg, NULL, 0);
829 buf = alloc(size);
830 regerror(ret, preg, buf, size);
831 PARA_ERROR_LOG("%s\n", buf);
832 free(buf);
833 return -E_REGEX;
834 }
835
836 /**
837 * strdup() for not necessarily zero-terminated strings.
838 *
839 * \param src The source buffer.
840 * \param len The number of bytes to be copied.
841 *
842 * \return A 0-terminated buffer of length \a len + 1.
843 *
844 * This is similar to strndup(), which is a GNU extension. However, one
845 * difference is that strndup() returns \p NULL if insufficient memory was
846 * available while this function aborts in this case.
847 *
848 * \sa strdup(), \ref para_strdup().
849 */
850 char *safe_strdup(const char *src, size_t len)
851 {
852 char *p;
853
854 assert(len < (size_t)-1);
855 p = alloc(len + 1);
856 if (len > 0)
857 memcpy(p, src, len);
858 p[len] = '\0';
859 return p;
860 }
861
862 /**
863 * Copy the value of a key=value pair.
864 *
865 * This checks whether the given buffer starts with "key=", ignoring case. If
866 * yes, a copy of the value is returned. The source buffer may not be
867 * zero-terminated.
868 *
869 * \param src The source buffer.
870 * \param len The number of bytes of the tag.
871 * \param key Only copy if it is the value of this key.
872 *
873 * \return A zero-terminated buffer, or \p NULL if the key was
874 * not of the given type.
875 */
876 char *key_value_copy(const char *src, size_t len, const char *key)
877 {
878 int keylen = strlen(key);
879
880 if (len <= keylen)
881 return NULL;
882 if (strncasecmp(src, key, keylen))
883 return NULL;
884 if (src[keylen] != '=')
885 return NULL;
886 return safe_strdup(src + keylen + 1, len - keylen - 1);
887 }
888
889 static bool utf8_mode(void)
890 {
891 static bool initialized, have_utf8;
892
893 if (!initialized) {
894 char *info = nl_langinfo(CODESET);
895 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
896 initialized = true;
897 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
898 have_utf8? "" : "not ");
899 }
900 return have_utf8;
901 }
902
903 static int xwcwidth(wchar_t wc, size_t pos)
904 {
905 int n;
906
907 /* special-case for tab */
908 if (wc == 0x09) /* tab */
909 return (pos | 7) + 1 - pos;
910 n = wcwidth(wc);
911 /* wcswidth() returns -1 for non-printable characters */
912 return n >= 0? n : 1;
913 }
914
915 static size_t xwcswidth(const wchar_t *s, size_t n)
916 {
917 size_t w = 0;
918
919 while (n--)
920 w += xwcwidth(*s++, w);
921 return w;
922 }
923
924 /**
925 * Skip a given number of cells at the beginning of a string.
926 *
927 * \param s The input string.
928 * \param cells_to_skip Desired number of cells that should be skipped.
929 * \param bytes_to_skip Result.
930 *
931 * This function computes how many input bytes must be skipped to advance a
932 * string by the given width. If the current character encoding is not UTF-8,
933 * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
934 * \a s is treated as a multibyte string and on successful return, \a s +
935 * bytes_to_skip points to the start of a multibyte string such that the total
936 * width of the multibyte characters that are skipped by advancing \a s that
937 * many bytes equals at least \a cells_to_skip.
938 *
939 * \return Standard.
940 */
941 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
942 {
943 wchar_t wc;
944 mbstate_t ps;
945 size_t n, bytes_parsed, cells_skipped;
946
947 *bytes_to_skip = 0;
948 if (cells_to_skip == 0)
949 return 0;
950 if (!utf8_mode()) {
951 *bytes_to_skip = cells_to_skip;
952 return 0;
953 }
954 bytes_parsed = cells_skipped = 0;
955 memset(&ps, 0, sizeof(ps));
956 n = strlen(s);
957 while (cells_to_skip > cells_skipped) {
958 size_t mbret;
959
960 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
961 assert(mbret != 0);
962 if (mbret == (size_t)-1 || mbret == (size_t)-2)
963 return -ERRNO_TO_PARA_ERROR(EILSEQ);
964 bytes_parsed += mbret;
965 cells_skipped += xwcwidth(wc, cells_skipped);
966 }
967 *bytes_to_skip = bytes_parsed;
968 return 1;
969 }
970
971 /**
972 * Compute the width of an UTF-8 string.
973 *
974 * \param s The string.
975 * \param result The width of \a s is returned here.
976 *
977 * If not in UTF8-mode. this function is just a wrapper for strlen(3).
978 * Otherwise \a s is treated as an UTF-8 string and its display width is
979 * computed. Note that this function may fail if the underlying call to
980 * mbsrtowcs(3) fails, so the caller must check the return value.
981 *
982 * \sa nl_langinfo(3), wcswidth(3).
983 *
984 * \return Standard.
985 */
986 __must_check int strwidth(const char *s, size_t *result)
987 {
988 const char *src = s;
989 mbstate_t state;
990 static wchar_t *dest;
991 size_t num_wchars;
992
993 /*
994 * Never call any log function here. This may result in an endless loop
995 * as para_gui's para_log() calls this function.
996 */
997
998 if (!utf8_mode()) {
999 *result = strlen(s);
1000 return 0;
1001 }
1002 memset(&state, 0, sizeof(state));
1003 *result = 0;
1004 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1005 if (num_wchars == (size_t)-1)
1006 return -ERRNO_TO_PARA_ERROR(errno);
1007 if (num_wchars == 0)
1008 return 0;
1009 dest = arr_alloc(num_wchars + 1, sizeof(*dest));
1010 src = s;
1011 memset(&state, 0, sizeof(state));
1012 num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1013 assert(num_wchars > 0 && num_wchars != (size_t)-1);
1014 *result = xwcswidth(dest, num_wchars);
1015 free(dest);
1016 return 1;
1017 }
1018
1019 /**
1020 * Truncate and sanitize a (wide character) string.
1021 *
1022 * This replaces all non-printable characters by spaces and makes sure that the
1023 * modified string does not exceed the given maximal width.
1024 *
1025 * \param src The source string in multi-byte form.
1026 * \param max_width The maximal number of cells the result may occupy.
1027 * \param result Sanitized multi-byte string, must be freed by caller.
1028 * \param width The width of the sanitized string, always <= max_width.
1029 *
1030 * The function is wide-character aware but falls back to C strings for
1031 * non-UTF-8 locales.
1032 *
1033 * \return Standard. On success, *result points to a sanitized copy of the
1034 * given string. This copy was allocated with malloc() and should hence be
1035 * freed when the caller is no longer interested in the result.
1036 *
1037 * The function fails if the given string contains an invalid multibyte
1038 * sequence. In this case, *result is set to NULL, and *width to zero.
1039 */
1040 __must_check int sanitize_str(const char *src, size_t max_width,
1041 char **result, size_t *width)
1042 {
1043 mbstate_t state;
1044 static wchar_t *wcs;
1045 size_t num_wchars, n;
1046
1047 if (!utf8_mode()) {
1048 *result = para_strdup(src);
1049 /* replace non-printable characters by spaces */
1050 for (n = 0; n < max_width && src[n]; n++) {
1051 if (!isprint((unsigned char)src[n]))
1052 (*result)[n] = ' ';
1053 }
1054 (*result)[n] = '\0';
1055 *width = n;
1056 return 0;
1057 }
1058 *result = NULL;
1059 *width = 0;
1060 memset(&state, 0, sizeof(state));
1061 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1062 if (num_wchars == (size_t)-1)
1063 return -ERRNO_TO_PARA_ERROR(errno);
1064 wcs = arr_alloc(num_wchars + 1, sizeof(*wcs));
1065 memset(&state, 0, sizeof(state));
1066 num_wchars = mbsrtowcs(wcs, &src, num_wchars + 1, &state);
1067 assert(num_wchars != (size_t)-1);
1068 for (n = 0; n < num_wchars && *width < max_width; n++) {
1069 if (!iswprint(wcs[n]))
1070 wcs[n] = L' ';
1071 *width += xwcwidth(wcs[n], *width);
1072 }
1073 wcs[n] = L'\0';
1074 n = wcstombs(NULL, wcs, 0) + 1;
1075 *result = alloc(n);
1076 num_wchars = wcstombs(*result, wcs, n);
1077 assert(num_wchars != (size_t)-1);
1078 free(wcs);
1079 return 1;
1080 }