Merge branch 'master' into next
[paraslash.git] / string.c
1 /*
2 * Copyright (C) 2004-2009 Andre Noll <maan@systemlinux.org>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file string.c Memory allocation and string handling functions. */
8
9 #include <sys/time.h> /* gettimeofday */
10 #include <pwd.h>
11 #include <sys/utsname.h> /* uname() */
12 #include <string.h>
13 #include <regex.h>
14
15 #include "para.h"
16 #include "string.h"
17 #include "error.h"
18
19 /**
20 * Paraslash's version of realloc().
21 *
22 * \param p Pointer to the memory block, may be \p NULL.
23 * \param size The desired new size.
24 *
25 * A wrapper for realloc(3). It calls \p exit(\p EXIT_FAILURE) on errors,
26 * i.e. there is no need to check the return value in the caller.
27 *
28 * \return A pointer to the newly allocated memory, which is suitably aligned
29 * for any kind of variable and may be different from \a p.
30 *
31 * \sa realloc(3).
32 */
33 __must_check __malloc void *para_realloc(void *p, size_t size)
34 {
35 /*
36 * No need to check for NULL pointers: If p is NULL, the call
37 * to realloc is equivalent to malloc(size)
38 */
39 assert(size);
40 if (!(p = realloc(p, size))) {
41 PARA_EMERG_LOG("realloc failed (size = %zu), aborting\n",
42 size);
43 exit(EXIT_FAILURE);
44 }
45 return p;
46 }
47
48 /**
49 * Paraslash's version of malloc().
50 *
51 * \param size The desired new size.
52 *
53 * A wrapper for malloc(3) which exits on errors.
54 *
55 * \return A pointer to the allocated memory, which is suitably aligned for any
56 * kind of variable.
57 *
58 * \sa malloc(3).
59 */
60 __must_check __malloc void *para_malloc(size_t size)
61 {
62 void *p;
63
64 assert(size);
65 p = malloc(size);
66 if (!p) {
67 PARA_EMERG_LOG("malloc failed (size = %zu), aborting\n",
68 size);
69 exit(EXIT_FAILURE);
70 }
71 return p;
72 }
73
74 /**
75 * Paraslash's version of calloc().
76 *
77 * \param size The desired new size.
78 *
79 * A wrapper for calloc(3) which exits on errors.
80 *
81 * \return A pointer to the allocated and zeroed-out memory, which is suitably
82 * aligned for any kind of variable.
83 *
84 * \sa calloc(3)
85 */
86 __must_check __malloc void *para_calloc(size_t size)
87 {
88 void *ret = para_malloc(size);
89
90 memset(ret, 0, size);
91 return ret;
92 }
93
94 /**
95 * Paraslash's version of strdup().
96 *
97 * \param s The string to be duplicated.
98 *
99 * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
100 * there is no need to check the return value in the caller.
101 *
102 * \return A pointer to the duplicated string. If \a s was the \p NULL pointer,
103 * an pointer to an empty string is returned.
104 *
105 * \sa strdup(3)
106 */
107 __must_check __malloc char *para_strdup(const char *s)
108 {
109 char *ret;
110
111 if ((ret = strdup(s? s: "")))
112 return ret;
113 PARA_EMERG_LOG("strdup failed, aborting\n");
114 exit(EXIT_FAILURE);
115 }
116
117 /**
118 * Allocate a sufficiently large string and print into it.
119 *
120 * \param fmt A usual format string.
121 *
122 * Produce output according to \p fmt. No artificial bound on the length of the
123 * resulting string is imposed.
124 *
125 * \return This function either returns a pointer to a string that must be
126 * freed by the caller or aborts without returning.
127 *
128 * \sa printf(3).
129 */
130 __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...)
131 {
132 char *msg;
133
134 PARA_VSPRINTF(fmt, msg);
135 return msg;
136 }
137
138 /**
139 * Paraslash's version of strcat().
140 *
141 * \param a String to be appended to.
142 * \param b String to append.
143 *
144 * Append \p b to \p a.
145 *
146 * \return If \a a is \p NULL, return a pointer to a copy of \a b, i.e.
147 * para_strcat(NULL, b) is equivalent to para_strdup(b). If \a b is \p NULL,
148 * return \a a without making a copy of \a a. Otherwise, construct the
149 * concatenation \a c, free \a a (but not \a b) and return \a c.
150 *
151 * \sa strcat(3)
152 */
153 __must_check __malloc char *para_strcat(char *a, const char *b)
154 {
155 char *tmp;
156
157 if (!a)
158 return para_strdup(b);
159 if (!b)
160 return a;
161 tmp = make_message("%s%s", a, b);
162 free(a);
163 return tmp;
164 }
165
166 /**
167 * Paraslash's version of dirname().
168 *
169 * \param name Pointer to the full path.
170 *
171 * Compute the directory component of \p name.
172 *
173 * \return If \a name is \p NULL or the empty string, return \p NULL.
174 * Otherwise, Make a copy of \a name and return its directory component. Caller
175 * is responsible to free the result.
176 */
177 __must_check __malloc char *para_dirname(const char *name)
178 {
179 char *p, *ret;
180
181 if (!name || !*name)
182 return NULL;
183 ret = para_strdup(name);
184 p = strrchr(ret, '/');
185 if (!p)
186 *ret = '\0';
187 else
188 *p = '\0';
189 return ret;
190 }
191
192 /**
193 * Paraslash's version of basename().
194 *
195 * \param name Pointer to the full path.
196 *
197 * Compute the filename component of \a name.
198 *
199 * \return \p NULL if (a) \a name is the empty string or \p NULL, or (b) name
200 * ends with a slash. Otherwise, a pointer within \a name is returned. Caller
201 * must not free the result.
202 */
203 __must_check char *para_basename(const char *name)
204 {
205 char *ret;
206
207 if (!name || !*name)
208 return NULL;
209 ret = strrchr(name, '/');
210 if (!ret)
211 return (char *)name;
212 ret++;
213 return ret;
214 }
215
216 /**
217 * Cut trailing newline.
218 *
219 * \param buf The string to be chopped.
220 *
221 * Replace the last character in \p buf by zero if it is equal to
222 * the newline character.
223 */
224 void chop(char *buf)
225 {
226 int n = strlen(buf);
227
228 if (!n)
229 return;
230 if (buf[n - 1] == '\n')
231 buf[n - 1] = '\0';
232 }
233
234 /**
235 * Get the logname of the current user.
236 *
237 * \return A dynamically allocated string that must be freed by the caller. On
238 * errors, the string "unknown_user" is returned, i.e. this function never
239 * returns \p NULL.
240 *
241 * \sa getpwuid(3).
242 */
243 __must_check __malloc char *para_logname(void)
244 {
245 struct passwd *pw = getpwuid(getuid());
246 return para_strdup(pw? pw->pw_name : "unknown_user");
247 }
248
249 /**
250 * Get the home directory of the current user.
251 *
252 * \return A dynamically allocated string that must be freed by the caller. If
253 * the home directory could not be found, this function returns "/tmp".
254 */
255 __must_check __malloc char *para_homedir(void)
256 {
257 struct passwd *pw = getpwuid(getuid());
258 return para_strdup(pw? pw->pw_dir : "/tmp");
259 }
260
261 /**
262 * Get the own hostname.
263 *
264 * \return A dynamically allocated string containing the hostname.
265 *
266 * \sa uname(2).
267 */
268 __malloc char *para_hostname(void)
269 {
270 struct utsname u;
271
272 uname(&u);
273 return para_strdup(u.nodename);
274 }
275
276 /**
277 * Used to distinguish between read-only and read-write mode.
278 *
279 * \sa for_each_line(), for_each_line_ro().
280 */
281 enum for_each_line_modes{
282 /** Activate read-only mode. */
283 LINE_MODE_RO,
284 /** Activate read-write mode. */
285 LINE_MODE_RW
286 };
287
288 static int for_each_complete_line(enum for_each_line_modes mode, char *buf,
289 size_t size, line_handler_t *line_handler, void *private_data)
290 {
291 char *start = buf, *end;
292 int ret, i, num_lines = 0;
293
294 // PARA_NOTICE_LOG("buf: %s\n", buf);
295 while (start < buf + size) {
296 char *next_null;
297 char *next_cr;
298
299 next_cr = memchr(start, '\n', buf + size - start);
300 next_null = memchr(start, '\0', buf + size - start);
301 if (!next_cr && !next_null)
302 break;
303 if (next_cr && next_null) {
304 end = next_cr < next_null? next_cr : next_null;
305 } else if (next_null) {
306 end = next_null;
307 } else
308 end = next_cr;
309 num_lines++;
310 if (!line_handler) {
311 start = ++end;
312 continue;
313 }
314 if (mode == LINE_MODE_RO) {
315 size_t s = end - start;
316 char *b = para_malloc(s + 1);
317 memcpy(b, start, s);
318 b[s] = '\0';
319 // PARA_NOTICE_LOG("b: %s, start: %s\n", b, start);
320 ret = line_handler(b, private_data);
321 free(b);
322 } else {
323 *end = '\0';
324 ret = line_handler(start, private_data);
325 }
326 if (ret < 0)
327 return ret;
328 start = ++end;
329 }
330 if (!line_handler || mode == LINE_MODE_RO)
331 return num_lines;
332 i = buf + size - start;
333 if (i && i != size)
334 memmove(buf, start, i);
335 return i;
336 }
337
338 /**
339 * Call a custom function for each complete line.
340 *
341 * \param buf The buffer containing data separated by newlines.
342 * \param size The number of bytes in \a buf.
343 * \param line_handler The custom function.
344 * \param private_data Pointer passed to \a line_handler.
345 *
346 * If \p line_handler is \p NULL, the function returns the number of complete
347 * lines in \p buf. Otherwise, \p line_handler is called for each complete
348 * line in \p buf. The first argument to \p line_handler is the current line,
349 * and \p private_data is passed as the second argument. The function returns
350 * if \p line_handler returns a negative value or no more lines are in the
351 * buffer. The rest of the buffer (last chunk containing an incomplete line)
352 * is moved to the beginning of the buffer.
353 *
354 * \return If \p line_handler is not \p NULL, this function returns the number
355 * of bytes not handled to \p line_handler on success, or the negative return
356 * value of the \p line_handler on errors.
357 *
358 * \sa for_each_line_ro().
359 */
360 int for_each_line(char *buf, size_t size, line_handler_t *line_handler,
361 void *private_data)
362 {
363 return for_each_complete_line(LINE_MODE_RW, buf, size, line_handler,
364 private_data);
365 }
366
367 /**
368 * Call a custom function for each complete line.
369 *
370 * \param buf Same meaning as in \p for_each_line().
371 * \param size Same meaning as in \p for_each_line().
372 * \param line_handler Same meaning as in \p for_each_line().
373 * \param private_data Same meaning as in \p for_each_line().
374 *
375 * This function behaves like \p for_each_line(), but \a buf is left unchanged.
376 *
377 * \return On success, the function returns the number of complete lines in \p
378 * buf, otherwise the (negative) return value of \p line_handler is returned.
379 *
380 * \sa for_each_line().
381 */
382 int for_each_line_ro(char *buf, size_t size, line_handler_t *line_handler,
383 void *private_data)
384 {
385 return for_each_complete_line(LINE_MODE_RO, buf, size, line_handler,
386 private_data);
387 }
388
389 #define hex(a) (hexchar[(a) & 15])
390 static void write_size_header(char *buf, int n)
391 {
392 static char hexchar[] = "0123456789abcdef";
393
394 buf[0] = hex(n >> 12);
395 buf[1] = hex(n >> 8);
396 buf[2] = hex(n >> 4);
397 buf[3] = hex(n);
398 buf[4] = ' ';
399 }
400
401 int read_size_header(const char *buf)
402 {
403 int i, len = 0;
404
405 for (i = 0; i < 4; i++) {
406 unsigned char c = buf[i];
407 len <<= 4;
408 if (c >= '0' && c <= '9') {
409 len += c - '0';
410 continue;
411 }
412 if (c >= 'a' && c <= 'f') {
413 len += c - 'a' + 10;
414 continue;
415 }
416 return -E_SIZE_PREFIX;
417 }
418 if (buf[4] != ' ')
419 return -E_SIZE_PREFIX;
420 return len;
421 }
422
423 /**
424 * Safely print into a buffer at a given offset.
425 *
426 * \param b Determines the buffer, its size, and the offset.
427 * \param fmt The format string.
428 *
429 * This function prints into the buffer given by \a b at the offset which is
430 * also given by \a b. If there is not enough space to hold the result, the
431 * buffer size is doubled until the underlying call to vsnprintf() succeeds
432 * or the size of the buffer exceeds the maximal size specified in \a b.
433 *
434 * In the latter case the unmodified \a buf and \a offset values as well as the
435 * private_data pointer of \a b are passed to the \a max_size_handler of \a b.
436 * If this function succeeds, i.e. returns a non-negative value, the offset of
437 * \a b is reset to zero and the given data is written to the beginning of the
438 * buffer. If \a max_size_handler() returns a negative value, this value is
439 * returned by \a para_printf().
440 *
441 * Upon return, the offset of \a b is adjusted accordingly so that subsequent
442 * calls to this function append data to what is already contained in the
443 * buffer.
444 *
445 * It's OK to call this function with \p b->buf being \p NULL. In this case, an
446 * initial buffer is allocated.
447 *
448 * \return The number of bytes printed into the buffer (not including the
449 * terminating \p NULL byte) on success, negative on errors. If there is no
450 * size-bound on \a b, i.e. if \p b->max_size is zero, this function never
451 * fails.
452 *
453 * \sa make_message(), vsnprintf(3).
454 */
455 __printf_2_3 int para_printf(struct para_buffer *b, const char *fmt, ...)
456 {
457 int ret, sz_off = (b->flags & PBF_SIZE_PREFIX)? 5 : 0;
458
459 if (!b->buf) {
460 b->buf = para_malloc(128);
461 b->size = 128;
462 b->offset = 0;
463 }
464 while (1) {
465 char *p = b->buf + b->offset;
466 size_t size = b->size - b->offset;
467 va_list ap;
468
469 if (size > sz_off) {
470 va_start(ap, fmt);
471 ret = vsnprintf(p + sz_off, size - sz_off, fmt, ap);
472 va_end(ap);
473 if (ret > -1 && ret < size - sz_off) { /* success */
474 b->offset += ret + sz_off;
475 if (sz_off)
476 write_size_header(p, ret);
477 return ret + sz_off;
478 }
479 }
480 /* check if we may grow the buffer */
481 if (!b->max_size || 2 * b->size < b->max_size) { /* yes */
482 /* try again with more space */
483 b->size *= 2;
484 b->buf = para_realloc(b->buf, b->size);
485 continue;
486 }
487 /* can't grow buffer */
488 if (!b->offset || !b->max_size_handler) /* message too large */
489 return -ERRNO_TO_PARA_ERROR(ENOSPC);
490 ret = b->max_size_handler(b->buf, b->offset, b->private_data);
491 if (ret < 0)
492 return ret;
493 b->offset = 0;
494 }
495 }
496
497 /** \cond LLONG_MAX and LLONG_LIN might not be defined. */
498 #ifndef LLONG_MAX
499 #define LLONG_MAX (1 << (sizeof(long) - 1))
500 #endif
501 #ifndef LLONG_MIN
502 #define LLONG_MIN (-LLONG_MAX - 1LL)
503 #endif
504 /** \endcond */
505
506 /**
507 * Convert a string to a 64-bit signed integer value.
508 *
509 * \param str The string to be converted.
510 * \param value Result pointer.
511 *
512 * \return Standard.
513 *
514 * \sa para_atoi32(), strtol(3), atoi(3).
515 */
516 int para_atoi64(const char *str, int64_t *value)
517 {
518 char *endptr;
519 long long tmp;
520
521 errno = 0; /* To distinguish success/failure after call */
522 tmp = strtoll(str, &endptr, 10);
523 if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
524 return -E_ATOI_OVERFLOW;
525 if (errno != 0 && tmp == 0) /* other error */
526 return -E_STRTOLL;
527 if (endptr == str)
528 return -E_ATOI_NO_DIGITS;
529 if (*endptr != '\0') /* Further characters after number */
530 return -E_ATOI_JUNK_AT_END;
531 *value = tmp;
532 return 1;
533 }
534
535 /**
536 * Convert a string to a 32-bit signed integer value.
537 *
538 * \param str The string to be converted.
539 * \param value Result pointer.
540 *
541 * \return Standard.
542 *
543 * \sa para_atoi64().
544 */
545 int para_atoi32(const char *str, int32_t *value)
546 {
547 int64_t tmp;
548 int ret;
549 const int32_t max = 2147483647;
550
551 ret = para_atoi64(str, &tmp);
552 if (ret < 0)
553 return ret;
554 if (tmp > max || tmp < -max - 1)
555 return -E_ATOI_OVERFLOW;
556 *value = tmp;
557 return 1;
558 }
559
560 static inline int loglevel_equal(const char *arg, const char * const ll)
561 {
562 return !strncasecmp(arg, ll, strlen(ll));
563 }
564
565 /**
566 * Compute the loglevel number from its name.
567 *
568 * \param txt The name of the loglevel (debug, info, ...).
569 *
570 * \return The numeric representation of the loglevel name.
571 */
572 int get_loglevel_by_name(const char *txt)
573 {
574 if (loglevel_equal(txt, "debug"))
575 return LL_DEBUG;
576 if (loglevel_equal(txt, "info"))
577 return LL_INFO;
578 if (loglevel_equal(txt, "notice"))
579 return LL_NOTICE;
580 if (loglevel_equal(txt, "warning"))
581 return LL_WARNING;
582 if (loglevel_equal(txt, "error"))
583 return LL_ERROR;
584 if (loglevel_equal(txt, "crit"))
585 return LL_CRIT;
586 if (loglevel_equal(txt, "emerg"))
587 return LL_EMERG;
588 return -1;
589 }
590
591 static int get_next_word(const char *buf, const char *delim, char **word)
592 {
593 enum line_state_flags {LSF_HAVE_WORD = 1, LSF_BACKSLASH = 2,
594 LSF_SINGLE_QUOTE = 4, LSF_DOUBLE_QUOTE = 8};
595 const char *in;
596 char *out;
597 int ret, state = 0;
598
599 out = para_malloc(strlen(buf) + 1);
600 *out = '\0';
601 *word = out;
602 for (in = buf; *in; in++) {
603 const char *p;
604
605 switch (*in) {
606 case '\\':
607 if (state & LSF_BACKSLASH) /* \\ */
608 goto copy_char;
609 state |= LSF_BACKSLASH;
610 state |= LSF_HAVE_WORD;
611 continue;
612 case 'n':
613 case 't':
614 if (state & LSF_BACKSLASH) { /* \n or \t */
615 *out++ = (*in == 'n')? '\n' : '\t';
616 state &= ~LSF_BACKSLASH;
617 continue;
618 }
619 goto copy_char;
620 case '"':
621 if (state & LSF_BACKSLASH) /* \" */
622 goto copy_char;
623 if (state & LSF_SINGLE_QUOTE) /* '" */
624 goto copy_char;
625 if (state & LSF_DOUBLE_QUOTE) {
626 state &= ~LSF_DOUBLE_QUOTE;
627 continue;
628 }
629 state |= LSF_HAVE_WORD;
630 state |= LSF_DOUBLE_QUOTE;
631 continue;
632 case '\'':
633 if (state & LSF_BACKSLASH) /* \' */
634 goto copy_char;
635 if (state & LSF_DOUBLE_QUOTE) /* "' */
636 goto copy_char;
637 if (state & LSF_SINGLE_QUOTE) {
638 state &= ~LSF_SINGLE_QUOTE;
639 continue;
640 }
641 state |= LSF_HAVE_WORD;
642 state |= LSF_SINGLE_QUOTE;
643 continue;
644 }
645 for (p = delim; *p; p++) {
646 if (*in != *p)
647 continue;
648 if (state & LSF_BACKSLASH)
649 goto copy_char;
650 if (state & LSF_SINGLE_QUOTE)
651 goto copy_char;
652 if (state & LSF_DOUBLE_QUOTE)
653 goto copy_char;
654 if (state & LSF_HAVE_WORD)
655 goto success;
656 break;
657 }
658 if (*p) /* ignore delimiter at the beginning */
659 continue;
660 copy_char:
661 state |= LSF_HAVE_WORD;
662 *out++ = *in;
663 state &= ~LSF_BACKSLASH;
664 }
665 ret = 0;
666 if (!(state & LSF_HAVE_WORD))
667 goto out;
668 ret = -ERRNO_TO_PARA_ERROR(EINVAL);
669 if (state & LSF_BACKSLASH) {
670 PARA_ERROR_LOG("trailing backslash\n");
671 goto out;
672 }
673 if ((state & LSF_SINGLE_QUOTE) || (state & LSF_DOUBLE_QUOTE)) {
674 PARA_ERROR_LOG("unmatched quote character\n");
675 goto out;
676 }
677 success:
678 *out = '\0';
679 return in - buf;
680 out:
681 free(*word);
682 *word = NULL;
683 return ret;
684 }
685
686 /**
687 * Free an array of words created by create_argv().
688 *
689 * \param argv A pointer previously obtained by \ref create_argv().
690 */
691 void free_argv(char **argv)
692 {
693 int i;
694
695 for (i = 0; argv[i]; i++)
696 free(argv[i]);
697 free(argv);
698 }
699
700 /**
701 * Split a buffer into words.
702 *
703 * This parser honors single and double quotes, backslash-escaped characters
704 * and special characters like \p \\n. The result contains pointers to copies
705 * of the words contained in \a buf and has to be freed by using \ref
706 * free_argv().
707 *
708 * \param buf The buffer to be split.
709 * \param delim Each character in this string is treated as a separator.
710 * \param result The array of words is returned here.
711 *
712 * \return Number of words in \a buf, negative on errors.
713 */
714 int create_argv(const char *buf, const char *delim, char ***result)
715 {
716 char *word, **argv = para_malloc(2 * sizeof(char *));
717 const char *p;
718 int ret, num_words;
719
720 for (p = buf, num_words = 0; ; p += ret, num_words++) {
721 ret = get_next_word(p, delim, &word);
722 if (ret < 0)
723 goto err;
724 if (!ret)
725 break;
726 argv = para_realloc(argv, (num_words + 2) * sizeof(char*));
727 argv[num_words] = word;
728 }
729 argv[num_words] = NULL;
730 *result = argv;
731 return num_words;
732 err:
733 while (num_words > 0)
734 free(argv[--num_words]);
735 free(argv);
736 return ret;
737 }
738
739 int para_regcomp(regex_t *preg, const char *regex, int cflags)
740 {
741 char *buf;
742 size_t size;
743 int ret = regcomp(preg, regex, cflags);
744
745 if (ret == 0)
746 return 1;
747 size = regerror(ret, preg, NULL, 0);
748 buf = para_malloc(size);
749 regerror(ret, preg, buf, size);
750 PARA_ERROR_LOG("%s\n", buf);
751 free(buf);
752 return -E_REGEX;
753 }