UTF-8 support for para_gui.
[paraslash.git] / string.c
1 /*
2 * Copyright (C) 2004-2012 Andre Noll <maan@systemlinux.org>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file string.c Memory allocation and string handling functions. */
8
9 #define _GNU_SOURCE
10
11 #include <sys/time.h> /* gettimeofday */
12 #include <pwd.h>
13 #include <sys/utsname.h> /* uname() */
14
15 #include <string.h>
16 #include <regex.h>
17
18 #include <langinfo.h>
19 #include <wchar.h>
20 #include <wctype.h>
21
22 #include "para.h"
23 #include "string.h"
24 #include "error.h"
25
26 /**
27 * Paraslash's version of realloc().
28 *
29 * \param p Pointer to the memory block, may be \p NULL.
30 * \param size The desired new size.
31 *
32 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
33 * i.e. there is no need to check the return value in the caller.
34 *
35 * \return A pointer to the newly allocated memory, which is suitably aligned
36 * for any kind of variable and may be different from \a p.
37 *
38 * \sa realloc(3).
39 */
40 __must_check __malloc void *para_realloc(void *p, size_t size)
41 {
42 /*
43 * No need to check for NULL pointers: If p is NULL, the call
44 * to realloc is equivalent to malloc(size)
45 */
46 assert(size);
47 if (!(p = realloc(p, size))) {
48 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
49 size);
50 exit(EXIT_FAILURE);
51 }
52 return p;
53 }
54
55 /**
56 * Paraslash's version of malloc().
57 *
58 * \param size The desired new size.
59 *
60 * A wrapper for malloc(3) which exits on errors.
61 *
62 * \return A pointer to the allocated memory, which is suitably aligned for any
63 * kind of variable.
64 *
65 * \sa malloc(3).
66 */
67 __must_check __malloc void *para_malloc(size_t size)
68 {
69 void *p;
70
71 assert(size);
72 p = malloc(size);
73 if (!p) {
74 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
75 size);
76 exit(EXIT_FAILURE);
77 }
78 return p;
79 }
80
81 /**
82 * Paraslash's version of calloc().
83 *
84 * \param size The desired new size.
85 *
86 * A wrapper for calloc(3) which exits on errors.
87 *
88 * \return A pointer to the allocated and zeroed-out memory, which is suitably
89 * aligned for any kind of variable.
90 *
91 * \sa calloc(3)
92 */
93 __must_check __malloc void *para_calloc(size_t size)
94 {
95 void *ret = para_malloc(size);
96
97 memset(ret, 0, size);
98 return ret;
99 }
100
101 /**
102 * Paraslash's version of strdup().
103 *
104 * \param s The string to be duplicated.
105 *
106 * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
107 * there is no need to check the return value in the caller.
108 *
109 * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
110 * an pointer to an empty string is returned.
111 *
112 * \sa strdup(3)
113 */
114 __must_check __malloc char *para_strdup(const char *s)
115 {
116 char *ret;
117
118 if ((ret = strdup(s? s: "")))
119 return ret;
120 PARA_EMERG_LOG("strdup failed, aborting\n");
121 exit(EXIT_FAILURE);
122 }
123
124 /**
125 * Print a formated message to a dynamically allocated string.
126 *
127 * \param result The formated string is returned here.
128 * \param fmt The format string.
129 * \param ap Initialized list of arguments.
130 *
131 * This function is similar to vasprintf(), a GNU extension which is not in C
132 * or POSIX. It allocates a string large enough to hold the output including
133 * the terminating null byte. The allocated string is returned via the first
134 * argument and must be freed by the caller. However, unlike vasprintf(), this
135 * function calls exit() if insufficient memory is available, while vasprintf()
136 * returns -1 in this case.
137 *
138 * \return Number of bytes written, not including the terminating \p NULL
139 * character.
140 *
141 * \sa printf(3), vsnprintf(3), va_start(3), vasprintf(3), \ref xasprintf().
142 */
143 __printf_2_0 unsigned xvasprintf(char **result, const char *fmt, va_list ap)
144 {
145 int ret;
146 size_t size;
147 va_list aq;
148
149 va_copy(aq, ap);
150 ret = vsnprintf(NULL, 0, fmt, aq);
151 va_end(aq);
152 assert(ret >= 0);
153 size = ret + 1;
154 *result = para_malloc(size);
155 va_copy(aq, ap);
156 ret = vsnprintf(*result, size, fmt, aq);
157 va_end(aq);
158 assert(ret >= 0 && ret < size);
159 return ret;
160 }
161
162 /**
163 * Print to a dynamically allocated string, variable number of arguments.
164 *
165 * \param result See \ref xvasprintf().
166 * \param fmt Usual format string.
167 *
168 * \return The return value of the underlying call to \ref xvasprintf().
169 *
170 * \sa \ref xvasprintf() and the references mentioned there.
171 */
172 __printf_2_3 unsigned xasprintf(char **result, const char *fmt, ...)
173 {
174 va_list ap;
175 unsigned ret;
176
177 va_start(ap, fmt);
178 ret = xvasprintf(result, fmt, ap);
179 va_end(ap);
180 return ret;
181 }
182
183 /**
184 * Allocate a sufficiently large string and print into it.
185 *
186 * \param fmt A usual format string.
187 *
188 * Produce output according to \p fmt. No artificial bound on the length of the
189 * resulting string is imposed.
190 *
191 * \return This function either returns a pointer to a string that must be
192 * freed by the caller or aborts without returning.
193 *
194 * \sa printf(3), xasprintf().
195 */
196 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
197 {
198 char *msg;
199 va_list ap;
200
201 va_start(ap, fmt);
202 xvasprintf(&msg, fmt, ap);
203 va_end(ap);
204 return msg;
205 }
206
207 /**
208 * Free the content of a pointer and set it to \p NULL.
209 *
210 * This is equivalent to "free(*arg); *arg = NULL;".
211 *
212 * \param arg The pointer whose content should be freed.
213 */
214 void freep(void *arg)
215 {
216 void **ptr = (void **)arg;
217 free(*ptr);
218 *ptr = NULL;
219 }
220
221 /**
222 * Paraslash's version of strcat().
223 *
224 * \param a String to be appended to.
225 * \param b String to append.
226 *
227 * Append \p b to \p a.
228 *
229 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
230 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
231 * return \a a without making a copy of \a a. Otherwise, construct the
232 * concatenation \a c, free \a a (but not \a b) and return \a c.
233 *
234 * \sa strcat(3)
235 */
236 __must_check __malloc char *para_strcat(char *a, const char *b)
237 {
238 char *tmp;
239
240 if (!a)
241 return para_strdup(b);
242 if (!b)
243 return a;
244 tmp = make_message("%s%s", a, b);
245 free(a);
246 return tmp;
247 }
248
249 /**
250 * Paraslash's version of dirname().
251 *
252 * \param name Pointer to the full path.
253 *
254 * Compute the directory component of \p name.
255 *
256 * \return If \a name is \p NULL or the empty string, return \p NULL.
257 * Otherwise, Make a copy of \a name and return its directory component. Caller
258 * is responsible to free the result.
259 */
260 __must_check __malloc char *para_dirname(const char *name)
261 {
262 char *p, *ret;
263
264 if (!name || !*name)
265 return NULL;
266 ret = para_strdup(name);
267 p = strrchr(ret, '/');
268 if (!p)
269 *ret = '\0';
270 else
271 *p = '\0';
272 return ret;
273 }
274
275 /**
276 * Paraslash's version of basename().
277 *
278 * \param name Pointer to the full path.
279 *
280 * Compute the filename component of \a name.
281 *
282 * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
283 * ends with a slash. Otherwise, a pointer within \a name is returned. Caller
284 * must not free the result.
285 */
286 __must_check char *para_basename(const char *name)
287 {
288 char *ret;
289
290 if (!name || !*name)
291 return NULL;
292 ret = strrchr(name, '/');
293 if (!ret)
294 return (char *)name;
295 ret++;
296 return ret;
297 }
298
299 /**
300 * Cut trailing newline.
301 *
302 * \param buf The string to be chopped.
303 *
304 * Replace the last character in \p buf by zero if it is equal to
305 * the newline character.
306 */
307 void chop(char *buf)
308 {
309 int n = strlen(buf);
310
311 if (!n)
312 return;
313 if (buf[n - 1] == '\n')
314 buf[n - 1] = '\0';
315 }
316
317 /**
318 * Get the logname of the current user.
319 *
320 * \return A dynamically allocated string that must be freed by the caller. On
321 * errors, the string "unknown_user" is returned, i.e. this function never
322 * returns \p NULL.
323 *
324 * \sa getpwuid(3).
325 */
326 __must_check __malloc char *para_logname(void)
327 {
328 struct passwd *pw = getpwuid(getuid());
329 return para_strdup(pw? pw->pw_name : "unknown_user");
330 }
331
332 /**
333 * Get the home directory of the current user.
334 *
335 * \return A dynamically allocated string that must be freed by the caller. If
336 * the home directory could not be found, this function returns "/tmp".
337 */
338 __must_check __malloc char *para_homedir(void)
339 {
340 struct passwd *pw = getpwuid(getuid());
341 return para_strdup(pw? pw->pw_dir : "/tmp");
342 }
343
344 /**
345 * Get the own hostname.
346 *
347 * \return A dynamically allocated string containing the hostname.
348 *
349 * \sa uname(2).
350 */
351 __malloc char *para_hostname(void)
352 {
353 struct utsname u;
354
355 uname(&u);
356 return para_strdup(u.nodename);
357 }
358
359 /**
360 * Used to distinguish between read-only and read-write mode.
361 *
362 * \sa for_each_line(), for_each_line_ro().
363 */
364 enum for_each_line_modes{
365 /** Activate read-only mode. */
366 LINE_MODE_RO,
367 /** Activate read-write mode. */
368 LINE_MODE_RW
369 };
370
371 static int for_each_complete_line(enum for_each_line_modes mode, char *buf,
372 size_t size, line_handler_t *line_handler, void *private_data)
373 {
374 char *start = buf, *end;
375 int ret, i, num_lines = 0;
376
377 // PARA_NOTICE_LOG("buf: %s\n", buf);
378 while (start < buf + size) {
379 char *next_null;
380 char *next_cr;
381
382 next_cr = memchr(start, '\n', buf + size - start);
383 next_null = memchr(start, '\0', buf + size - start);
384 if (!next_cr && !next_null)
385 break;
386 if (next_cr && next_null) {
387 end = next_cr < next_null? next_cr : next_null;
388 } else if (next_null) {
389 end = next_null;
390 } else
391 end = next_cr;
392 num_lines++;
393 if (!line_handler) {
394 start = ++end;
395 continue;
396 }
397 if (mode == LINE_MODE_RO) {
398 size_t s = end - start;
399 char *b = para_malloc(s + 1);
400 memcpy(b, start, s);
401 b[s] = '\0';
402 // PARA_NOTICE_LOG("b: %s, start: %s\n", b, start);
403 ret = line_handler(b, private_data);
404 free(b);
405 } else {
406 *end = '\0';
407 ret = line_handler(start, private_data);
408 }
409 if (ret < 0)
410 return ret;
411 start = ++end;
412 }
413 if (!line_handler || mode == LINE_MODE_RO)
414 return num_lines;
415 i = buf + size - start;
416 if (i && i != size)
417 memmove(buf, start, i);
418 return i;
419 }
420
421 /**
422 * Call a custom function for each complete line.
423 *
424 * \param buf The buffer containing data separated by newlines.
425 * \param size The number of bytes in \a buf.
426 * \param line_handler The custom function.
427 * \param private_data Pointer passed to \a line_handler.
428 *
429 * If \p line_handler is \p NULL, the function returns the number of complete
430 * lines in \p buf. Otherwise, \p line_handler is called for each complete
431 * line in \p buf. The first argument to \p line_handler is the current line,
432 * and \p private_data is passed as the second argument. The function returns
433 * if \p line_handler returns a negative value or no more lines are in the
434 * buffer. The rest of the buffer (last chunk containing an incomplete line)
435 * is moved to the beginning of the buffer.
436 *
437 * \return If \p line_handler is not \p NULL, this function returns the number
438 * of bytes not handled to \p line_handler on success, or the negative return
439 * value of the \p line_handler on errors.
440 *
441 * \sa for_each_line_ro().
442 */
443 int for_each_line(char *buf, size_t size, line_handler_t *line_handler,
444 void *private_data)
445 {
446 return for_each_complete_line(LINE_MODE_RW, buf, size, line_handler,
447 private_data);
448 }
449
450 /**
451 * Call a custom function for each complete line.
452 *
453 * \param buf Same meaning as in \p for_each_line().
454 * \param size Same meaning as in \p for_each_line().
455 * \param line_handler Same meaning as in \p for_each_line().
456 * \param private_data Same meaning as in \p for_each_line().
457 *
458 * This function behaves like \p for_each_line(), but \a buf is left unchanged.
459 *
460 * \return On success, the function returns the number of complete lines in \p
461 * buf, otherwise the (negative) return value of \p line_handler is returned.
462 *
463 * \sa for_each_line().
464 */
465 int for_each_line_ro(char *buf, size_t size, line_handler_t *line_handler,
466 void *private_data)
467 {
468 return for_each_complete_line(LINE_MODE_RO, buf, size, line_handler,
469 private_data);
470 }
471
472 /** Return the hex characters of the lower 4 bits. */
473 #define hex(a) (hexchar[(a) & 15])
474
475 static void write_size_header(char *buf, int n)
476 {
477 static char hexchar[] = "0123456789abcdef";
478
479 buf[0] = hex(n >> 12);
480 buf[1] = hex(n >> 8);
481 buf[2] = hex(n >> 4);
482 buf[3] = hex(n);
483 buf[4] = ' ';
484 }
485
486 /**
487 * Read a four-byte hex-number and return its value.
488 *
489 * Each status item sent by para_server is prefixed with such a hex number in
490 * ASCII which describes the size of the status item.
491 *
492 * \param buf The buffer which must be at least four bytes long.
493 *
494 * \return The value of the hex number on success, \p -E_SIZE_PREFIX if the
495 * buffer did not contain only hex digits.
496 */
497 int read_size_header(const char *buf)
498 {
499 int i, len = 0;
500
501 for (i = 0; i < 4; i++) {
502 unsigned char c = buf[i];
503 len <<= 4;
504 if (c >= '0' && c <= '9') {
505 len += c - '0';
506 continue;
507 }
508 if (c >= 'a' && c <= 'f') {
509 len += c - 'a' + 10;
510 continue;
511 }
512 return -E_SIZE_PREFIX;
513 }
514 if (buf[4] != ' ')
515 return -E_SIZE_PREFIX;
516 return len;
517 }
518
519 /**
520 * Safely print into a buffer at a given offset.
521 *
522 * \param b Determines the buffer, its size, and the offset.
523 * \param fmt The format string.
524 *
525 * This function prints into the buffer given by \a b at the offset which is
526 * also given by \a b. If there is not enough space to hold the result, the
527 * buffer size is doubled until the underlying call to vsnprintf() succeeds
528 * or the size of the buffer exceeds the maximal size specified in \a b.
529 *
530 * In the latter case the unmodified \a buf and \a offset values as well as the
531 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
532 * If this function succeeds, i.e. returns a non-negative value, the offset of
533 * \a b is reset to zero and the given data is written to the beginning of the
534 * buffer. If \a max_size_handler() returns a negative value, this value is
535 * returned by \a para_printf().
536 *
537 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
538 * calls to this function append data to what is already contained in the
539 * buffer.
540 *
541 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
542 * initial buffer is allocated.
543 *
544 * \return The number of bytes printed into the buffer (not including the
545 * terminating \p NULL byte) on success, negative on errors. If there is no
546 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
547 * fails.
548 *
549 * \sa make_message(), vsnprintf(3).
550 */
551 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
552 {
553 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
554
555 if (!b->buf) {
556 b->buf = para_malloc(128);
557 b->size = 128;
558 b->offset = 0;
559 }
560 while (1) {
561 char *p = b->buf + b->offset;
562 size_t size = b->size - b->offset;
563 va_list ap;
564
565 if (size > sz_off) {
566 va_start(ap, fmt);
567 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
568 va_end(ap);
569 if (ret > -1 && ret < size - sz_off) { /* success */
570 b->offset += ret + sz_off;
571 if (sz_off)
572 write_size_header(p, ret);
573 return ret + sz_off;
574 }
575 }
576 /* check if we may grow the buffer */
577 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
578 /* try again with more space */
579 b->size *= 2;
580 b->buf = para_realloc(b->buf, b->size);
581 continue;
582 }
583 /* can't grow buffer */
584 if (!b->offset || !b->max_size_handler) /* message too large */
585 return -ERRNO_TO_PARA_ERROR(ENOSPC);
586 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
587 if (ret < 0)
588 return ret;
589 b->offset = 0;
590 }
591 }
592
593 /** \cond llong_minmax */
594 /* LLONG_MAX and LLONG_MIN might not be defined. */
595 #ifndef LLONG_MAX
596 #define LLONG_MAX 9223372036854775807LL
597 #endif
598 #ifndef LLONG_MIN
599 #define LLONG_MIN (-LLONG_MAX - 1LL)
600 #endif
601 /** \endcond llong_minmax */
602
603 /**
604 * Convert a string to a 64-bit signed integer value.
605 *
606 * \param str The string to be converted.
607 * \param value Result pointer.
608 *
609 * \return Standard.
610 *
611 * \sa para_atoi32(), strtol(3), atoi(3).
612 */
613 int para_atoi64(const char *str, int64_t *value)
614 {
615 char *endptr;
616 long long tmp;
617
618 errno = 0; /* To distinguish success/failure after call */
619 tmp = strtoll(str, &endptr, 10);
620 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
621 return -E_ATOI_OVERFLOW;
622 if (errno != 0 && tmp == 0) /* other error */
623 return -E_STRTOLL;
624 if (endptr == str)
625 return -E_ATOI_NO_DIGITS;
626 if (*endptr != '\0') /* Further characters after number */
627 return -E_ATOI_JUNK_AT_END;
628 *value = tmp;
629 return 1;
630 }
631
632 /**
633 * Convert a string to a 32-bit signed integer value.
634 *
635 * \param str The string to be converted.
636 * \param value Result pointer.
637 *
638 * \return Standard.
639 *
640 * \sa para_atoi64().
641 */
642 int para_atoi32(const char *str, int32_t *value)
643 {
644 int64_t tmp;
645 int ret;
646 const int32_t max = 2147483647;
647
648 ret = para_atoi64(str, &tmp);
649 if (ret < 0)
650 return ret;
651 if (tmp > max || tmp < -max - 1)
652 return -E_ATOI_OVERFLOW;
653 *value = tmp;
654 return 1;
655 }
656
657 static inline int loglevel_equal(const char *arg, const char * const ll)
658 {
659 return !strncasecmp(arg, ll, strlen(ll));
660 }
661
662 /**
663 * Compute the loglevel number from its name.
664 *
665 * \param txt The name of the loglevel (debug, info, ...).
666 *
667 * \return The numeric representation of the loglevel name.
668 */
669 int get_loglevel_by_name(const char *txt)
670 {
671 if (loglevel_equal(txt, "debug"))
672 return LL_DEBUG;
673 if (loglevel_equal(txt, "info"))
674 return LL_INFO;
675 if (loglevel_equal(txt, "notice"))
676 return LL_NOTICE;
677 if (loglevel_equal(txt, "warning"))
678 return LL_WARNING;
679 if (loglevel_equal(txt, "error"))
680 return LL_ERROR;
681 if (loglevel_equal(txt, "crit"))
682 return LL_CRIT;
683 if (loglevel_equal(txt, "emerg"))
684 return LL_EMERG;
685 return -1;
686 }
687
688 static int get_next_word(const char *buf, const char *delim, char **word)
689 {
690 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
691 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
692 const char *in;
693 char *out;
694 int ret, state = 0;
695
696 out = para_malloc(strlen(buf) + 1);
697 *out = '\0';
698 *word = out;
699 for (in = buf; *in; in++) {
700 const char *p;
701
702 switch (*in) {
703 case '\\':
704 if (state & LSF_BACKSLASH) /* \\ */
705 goto copy_char;
706 state |= LSF_BACKSLASH;
707 state |= LSF_HAVE_WORD;
708 continue;
709 case 'n':
710 case 't':
711 if (state & LSF_BACKSLASH) { /* \n or \t */
712 *out++ = (*in == 'n')? '\n' : '\t';
713 state &= ~LSF_BACKSLASH;
714 continue;
715 }
716 goto copy_char;
717 case '"':
718 if (state & LSF_BACKSLASH) /* \" */
719 goto copy_char;
720 if (state & LSF_SINGLE_QUOTE) /* '" */
721 goto copy_char;
722 if (state & LSF_DOUBLE_QUOTE) {
723 state &= ~LSF_DOUBLE_QUOTE;
724 continue;
725 }
726 state |= LSF_HAVE_WORD;
727 state |= LSF_DOUBLE_QUOTE;
728 continue;
729 case '\'':
730 if (state & LSF_BACKSLASH) /* \' */
731 goto copy_char;
732 if (state & LSF_DOUBLE_QUOTE) /* "' */
733 goto copy_char;
734 if (state & LSF_SINGLE_QUOTE) {
735 state &= ~LSF_SINGLE_QUOTE;
736 continue;
737 }
738 state |= LSF_HAVE_WORD;
739 state |= LSF_SINGLE_QUOTE;
740 continue;
741 }
742 for (p = delim; *p; p++) {
743 if (*in != *p)
744 continue;
745 if (state & LSF_BACKSLASH)
746 goto copy_char;
747 if (state & LSF_SINGLE_QUOTE)
748 goto copy_char;
749 if (state & LSF_DOUBLE_QUOTE)
750 goto copy_char;
751 if (state & LSF_HAVE_WORD)
752 goto success;
753 break;
754 }
755 if (*p) /* ignore delimiter at the beginning */
756 continue;
757 copy_char:
758 state |= LSF_HAVE_WORD;
759 *out++ = *in;
760 state &= ~LSF_BACKSLASH;
761 }
762 ret = 0;
763 if (!(state & LSF_HAVE_WORD))
764 goto out;
765 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
766 if (state & LSF_BACKSLASH) {
767 PARA_ERROR_LOG("trailing backslash\n");
768 goto out;
769 }
770 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
771 PARA_ERROR_LOG("unmatched quote character\n");
772 goto out;
773 }
774 success:
775 *out = '\0';
776 return in - buf;
777 out:
778 free(*word);
779 *word = NULL;
780 return ret;
781 }
782
783 /**
784 * Get the number of the word the cursor is on.
785 *
786 * \param buf The zero-terminated line buffer.
787 * \param delim Characters that separate words.
788 * \param point The cursor position.
789 *
790 * \return Zero-based word number.
791 */
792 int compute_word_num(const char *buf, const char *delim, int point)
793 {
794 int ret, num_words;
795 const char *p;
796 char *word;
797
798 for (p = buf, num_words = 0; ; p += ret, num_words++) {
799 ret = get_next_word(p, delim, &word);
800 if (ret <= 0)
801 break;
802 free(word);
803 if (p + ret >= buf + point)
804 break;
805 }
806 return num_words;
807 }
808
809 /**
810 * Free an array of words created by create_argv() or create_shifted_argv().
811 *
812 * \param argv A pointer previously obtained by \ref create_argv().
813 */
814 void free_argv(char **argv)
815 {
816 int i;
817
818 if (!argv)
819 return;
820 for (i = 0; argv[i]; i++)
821 free(argv[i]);
822 free(argv);
823 }
824
825 static int create_argv_offset(int offset, const char *buf, const char *delim,
826 char ***result)
827 {
828 char *word, **argv = para_malloc((offset + 1) * sizeof(char *));
829 const char *p;
830 int i, ret;
831
832 for (i = 0; i < offset; i++)
833 argv[i] = NULL;
834 for (p = buf; p && *p; p += ret, i++) {
835 ret = get_next_word(p, delim, &word);
836 if (ret < 0)
837 goto err;
838 if (!ret)
839 break;
840 argv = para_realloc(argv, (i + 2) * sizeof(char*));
841 argv[i] = word;
842 }
843 argv[i] = NULL;
844 *result = argv;
845 return i;
846 err:
847 while (i > 0)
848 free(argv[--i]);
849 free(argv);
850 *result = NULL;
851 return ret;
852 }
853
854 /**
855 * Split a buffer into words.
856 *
857 * This parser honors single and double quotes, backslash-escaped characters
858 * and special characters like \p \\n. The result contains pointers to copies
859 * of the words contained in \a buf and has to be freed by using \ref
860 * free_argv().
861 *
862 * \param buf The buffer to be split.
863 * \param delim Each character in this string is treated as a separator.
864 * \param result The array of words is returned here.
865 *
866 * \return Number of words in \a buf, negative on errors.
867 */
868 int create_argv(const char *buf, const char *delim, char ***result)
869 {
870 return create_argv_offset(0, buf, delim, result);
871 }
872
873 /**
874 * Split a buffer into words, offset one.
875 *
876 * This is similar to \ref create_argv() but the returned array is one element
877 * larger, words start at index one and element zero is initialized to \p NULL.
878 * Callers must set element zero to a non-NULL value before calling free_argv()
879 * on the returned array to avoid a memory leak.
880 *
881 * \param buf See \ref create_argv().
882 * \param delim See \ref create_argv().
883 * \param result See \ref create_argv().
884 *
885 * \return Number of words plus one on success, negative on errors.
886 */
887 int create_shifted_argv(const char *buf, const char *delim, char ***result)
888 {
889 return create_argv_offset(1, buf, delim, result);
890 }
891
892 /**
893 * Find out if the given string is contained in the arg vector.
894 *
895 * \param arg The string to look for.
896 * \param argv The array to search.
897 *
898 * \return The first index whose value equals \a arg, or \p -E_ARG_NOT_FOUND if
899 * arg was not found in \a argv.
900 */
901 int find_arg(const char *arg, char **argv)
902 {
903 int i;
904
905 if (!argv)
906 return -E_ARG_NOT_FOUND;
907 for (i = 0; argv[i]; i++)
908 if (strcmp(arg, argv[i]) == 0)
909 return i;
910 return -E_ARG_NOT_FOUND;
911 }
912
913 /**
914 * Compile a regular expression.
915 *
916 * This simple wrapper calls regcomp() and logs a message on errors.
917 *
918 * \param preg See regcomp(3).
919 * \param regex See regcomp(3).
920 * \param cflags See regcomp(3).
921 *
922 * \return Standard.
923 */
924 int para_regcomp(regex_t *preg, const char *regex, int cflags)
925 {
926 char *buf;
927 size_t size;
928 int ret = regcomp(preg, regex, cflags);
929
930 if (ret == 0)
931 return 1;
932 size = regerror(ret, preg, NULL, 0);
933 buf = para_malloc(size);
934 regerror(ret, preg, buf, size);
935 PARA_ERROR_LOG("%s\n", buf);
936 free(buf);
937 return -E_REGEX;
938 }
939
940 /**
941 * strdup() for not necessarily zero-terminated strings.
942 *
943 * \param src The source buffer.
944 * \param len The number of bytes to be copied.
945 *
946 * \return A 0-terminated buffer of length \a len + 1.
947 *
948 * This is similar to strndup(), which is a GNU extension. However, one
949 * difference is that strndup() returns \p NULL if insufficient memory was
950 * available while this function aborts in this case.
951 *
952 * \sa strdup(), \ref para_strdup().
953 */
954 char *safe_strdup(const char *src, size_t len)
955 {
956 char *p;
957
958 assert(len < (size_t)-1);
959 p = para_malloc(len + 1);
960 if (len > 0)
961 memcpy(p, src, len);
962 p[len] = '\0';
963 return p;
964 }
965
966 /**
967 * Copy the value of a key=value pair.
968 *
969 * This checks whether the given buffer starts with "key=", ignoring case. If
970 * yes, a copy of the value is returned. The source buffer may not be
971 * zero-terminated.
972 *
973 * \param src The source buffer.
974 * \param len The number of bytes of the tag.
975 * \param key Only copy if it is the value of this key.
976 *
977 * \return A zero-terminated buffer, or \p NULL if the key was
978 * not of the given type.
979 */
980 char *key_value_copy(const char *src, size_t len, const char *key)
981 {
982 int keylen = strlen(key);
983
984 if (len <= keylen)
985 return NULL;
986 if (strncasecmp(src, key, keylen))
987 return NULL;
988 if (src[keylen] != '=')
989 return NULL;
990 return safe_strdup(src + keylen + 1, len - keylen - 1);
991 }
992
993 static bool utf8_mode(void)
994 {
995 static bool initialized, have_utf8;
996
997 if (!initialized) {
998 char *info = nl_langinfo(CODESET);
999 have_utf8 = (info && strcmp(info, "UTF-8") == 0);
1000 initialized = true;
1001 PARA_INFO_LOG("%susing UTF-8 character encoding\n",
1002 have_utf8? "" : "not ");
1003 }
1004 return have_utf8;
1005 }
1006
1007 /*
1008 * glibc's wcswidth returns -1 if the string contains a tab character, which
1009 * makes the function next to useless. The two functions below are taken from
1010 * mutt.
1011 */
1012
1013 #define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0)
1014
1015 static int mutt_wcwidth(wchar_t wc, size_t pos)
1016 {
1017 int n;
1018
1019 if (wc == 0x09) /* tab */
1020 return (pos | 7) + 1 - pos;
1021 n = wcwidth(wc);
1022 if (IsWPrint(wc) && n > 0)
1023 return n;
1024 if (!(wc & ~0x7f))
1025 return 2;
1026 if (!(wc & ~0xffff))
1027 return 6;
1028 return 10;
1029 }
1030
1031 static size_t mutt_wcswidth(const wchar_t *s, size_t n)
1032 {
1033 size_t w = 0;
1034
1035 while (n--)
1036 w += mutt_wcwidth(*s++, w);
1037 return w;
1038 }
1039
1040 /**
1041 * Skip a given number of cells at the beginning of a string.
1042 *
1043 * \param s The input string.
1044 * \param cells_to_skip Desired number of cells that should be skipped.
1045 * \param bytes_to_skip Result.
1046 *
1047 * This function computes how many input bytes must be skipped to advance a
1048 * string by the given width. If the current character encoding is not UTF-8,
1049 * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise,
1050 * \a s is treated as a multibyte string and on successful return, \a s +
1051 * bytes_to_skip points to the start of a multibyte string such that the total
1052 * width of the multibyte characters that are skipped by advancing \a s that
1053 * many bytes equals at least \a cells_to_skip.
1054 *
1055 * \return Standard.
1056 */
1057 int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip)
1058 {
1059 wchar_t wc;
1060 mbstate_t ps;
1061 size_t n, bytes_parsed, cells_skipped;
1062
1063 *bytes_to_skip = 0;
1064 if (cells_to_skip == 0)
1065 return 0;
1066 if (!utf8_mode()) {
1067 *bytes_to_skip = cells_to_skip;
1068 return 0;
1069 }
1070 bytes_parsed = cells_skipped = 0;
1071 memset(&ps, 0, sizeof(ps));
1072 n = strlen(s);
1073 while (cells_to_skip > cells_skipped) {
1074 size_t mbret;
1075
1076 mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps);
1077 assert(mbret != 0);
1078 if (mbret == (size_t)-1 || mbret == (size_t)-2)
1079 return -ERRNO_TO_PARA_ERROR(EILSEQ);
1080 bytes_parsed += mbret;
1081 cells_skipped += mutt_wcwidth(wc, cells_skipped);
1082 }
1083 *bytes_to_skip = bytes_parsed;
1084 return 1;
1085 }
1086
1087 /**
1088 * Compute the width of an UTF-8 string.
1089 *
1090 * \param s The string.
1091 * \param result The width of \a s is returned here.
1092 *
1093 * If not in UTF8-mode. this function is just a wrapper for strlen(3).
1094 * Otherwise \s is treated as an UTF-8 string and its display width is
1095 * computed. Note that this function may fail if the underlying call to
1096 * mbsrtowcs(3) fails, so the caller must check the return value.
1097 *
1098 * \sa nl_langinfo(3), wcswidth(3).
1099 *
1100 * \return Standard.
1101 */
1102 __must_check int strwidth(const char *s, size_t *result)
1103 {
1104 const char *src = s;
1105 mbstate_t state;
1106 static wchar_t *dest;
1107 size_t num_wchars;
1108
1109 /*
1110 * Never call any log function here. This may result in an endless loop
1111 * as para_gui's para_log() calls this function.
1112 */
1113
1114 if (!utf8_mode()) {
1115 *result = strlen(s);
1116 return 0;
1117 }
1118 memset(&state, 0, sizeof(state));
1119 *result = 0;
1120 num_wchars = mbsrtowcs(NULL, &src, 0, &state);
1121 if (num_wchars == (size_t)-1)
1122 return -ERRNO_TO_PARA_ERROR(errno);
1123 if (num_wchars == 0)
1124 return 0;
1125 dest = para_malloc(num_wchars * sizeof(*dest));
1126 src = s;
1127 memset(&state, 0, sizeof(state));
1128 num_wchars = mbsrtowcs(dest, &src, num_wchars, &state);
1129 assert(num_wchars > 0 && num_wchars != (size_t)-1);
1130 *result = mutt_wcswidth(dest, num_wchars);
1131 free(dest);
1132 return 1;
1133 }