Use semaphore locking to avoid starting dss multiple times.
[dss.git] / dss.c
1 /*
2 * Copyright (C) 2008-2010 Andre Noll <maan@systemlinux.org>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdarg.h>
9 #include <assert.h>
10 #include <errno.h>
11 #include <sys/types.h>
12 #include <signal.h>
13 #include <ctype.h>
14 #include <sys/stat.h>
15 #include <unistd.h>
16 #include <inttypes.h>
17 #include <sys/time.h>
18 #include <time.h>
19 #include <sys/wait.h>
20 #include <fnmatch.h>
21 #include <limits.h>
22
23
24 #include "gcc-compat.h"
25 #include "cmdline.h"
26 #include "log.h"
27 #include "string.h"
28 #include "error.h"
29 #include "fd.h"
30 #include "exec.h"
31 #include "daemon.h"
32 #include "signal.h"
33 #include "df.h"
34 #include "time.h"
35 #include "snap.h"
36 #include "ipc.h"
37
38 /** Command line and config file options. */
39 static struct gengetopt_args_info conf;
40 /** Non-NULL if we log to a file. */
41 static FILE *logfile;
42 /** The read end of the signal pipe */
43 static int signal_pipe;
44 /** Process id of current pre-create-hook/rsync/post-create-hook process. */
45 static pid_t create_pid;
46 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
47 static int create_process_stopped;
48 /** Process id of current pre-remove/rm/post-remove process. */
49 static pid_t remove_pid;
50 /** When the next snapshot is due. */
51 static int64_t next_snapshot_time;
52 /** When to try to remove something. */
53 static struct timeval next_removal_check;
54 /** Creation time of the snapshot currently being created. */
55 static int64_t current_snapshot_creation_time;
56 /** The snapshot currently being removed. */
57 struct snapshot *snapshot_currently_being_removed;
58 /** Needed by the post-create hook. */
59 static char *path_to_last_complete_snapshot;
60 static char *name_of_reference_snapshot;
61 /** \sa \ref snap.h for details. */
62 enum hook_status snapshot_creation_status;
63 /** \sa \ref snap.h for details. */
64 enum hook_status snapshot_removal_status;
65
66
67 DEFINE_DSS_ERRLIST;
68 static const char const *hook_status_description[] = {HOOK_STATUS_ARRAY};
69
70 /* may be called with ds == NULL. */
71 static int disk_space_low(struct disk_space *ds)
72 {
73 struct disk_space ds_struct;
74
75 if (!ds) {
76 int ret = get_disk_space(".", &ds_struct);
77 if (ret < 0)
78 return ret;
79 ds = &ds_struct;
80 }
81 if (conf.min_free_mb_arg)
82 if (ds->free_mb < conf.min_free_mb_arg)
83 return 1;
84 if (conf.min_free_percent_arg)
85 if (ds->percent_free < conf.min_free_percent_arg)
86 return 1;
87 if (conf.min_free_percent_inodes_arg)
88 if (ds->percent_free_inodes < conf.min_free_percent_inodes_arg)
89 return 1;
90 return 0;
91 }
92
93 static void dump_dss_config(const char *msg)
94 {
95 const char dash[] = "-----------------------------";
96 int ret;
97 FILE *log = logfile? logfile : stderr;
98 struct disk_space ds;
99 int64_t now = get_current_time();
100
101 if (conf.loglevel_arg > INFO)
102 return;
103
104 fprintf(log, "%s <%s config> %s\n", dash, msg, dash);
105 fprintf(log, "\n*** disk space ***\n\n");
106 ret = get_disk_space(".", &ds);
107 if (ret >= 0) {
108 DSS_INFO_LOG("disk space low: %s\n", disk_space_low(&ds)?
109 "yes" : "no");
110 log_disk_space(&ds);
111 } else
112 DSS_ERROR_LOG("can not get free disk space: %s\n",
113 dss_strerror(-ret));
114
115 /* we continue on errors from get_disk_space */
116
117 fprintf(log, "\n*** command line and config file options ***\n\n");
118 cmdline_parser_dump(log, &conf);
119 fprintf(log, "\n*** internal state ***\n\n");
120 fprintf(log,
121 "pid: %d\n"
122 "logile: %s\n"
123 "snapshot_currently_being_removed: %s\n"
124 "path_to_last_complete_snapshot: %s\n"
125 "reference_snapshot: %s\n"
126 "snapshot_creation_status: %s\n"
127 "snapshot_removal_status: %s\n"
128 ,
129 (int) getpid(),
130 logfile? conf.logfile_arg : "stderr",
131 snapshot_currently_being_removed?
132 snapshot_currently_being_removed->name : "(none)",
133 path_to_last_complete_snapshot?
134 path_to_last_complete_snapshot : "(none)",
135 name_of_reference_snapshot?
136 name_of_reference_snapshot : "(none)",
137 hook_status_description[snapshot_creation_status],
138 hook_status_description[snapshot_removal_status]
139 );
140 if (create_pid != 0)
141 fprintf(log,
142 "create_pid: %" PRId32 "\n"
143 "create process is %sstopped\n"
144 ,
145 create_pid,
146 create_process_stopped? "" : "not "
147 );
148 if (remove_pid != 0)
149 fprintf(log, "remove_pid: %" PRId32 "\n", remove_pid);
150 if (next_snapshot_time != 0)
151 fprintf(log, "next snapshot due in %" PRId64 " seconds\n",
152 next_snapshot_time - now);
153 if (current_snapshot_creation_time != 0)
154 fprintf(log, "current_snapshot_creation_time: %"
155 PRId64 " (%" PRId64 " seconds ago)\n",
156 current_snapshot_creation_time,
157 now - current_snapshot_creation_time
158 );
159 if (next_removal_check.tv_sec != 0) {
160 fprintf(log, "next removal check: %llu (%llu seconds ago)\n",
161 (long long unsigned)next_removal_check.tv_sec,
162 now - (long long unsigned)next_removal_check.tv_sec
163 );
164
165 }
166 fprintf(log, "%s </%s config> %s\n", dash, msg, dash);
167 }
168
169 /* a litte cpp magic helps to DRY */
170 #define COMMANDS \
171 COMMAND(ls) \
172 COMMAND(create) \
173 COMMAND(prune) \
174 COMMAND(run)
175 #define COMMAND(x) static int com_ ##x(void);
176 COMMANDS
177 #undef COMMAND
178 #define COMMAND(x) if (conf.x ##_given) return com_ ##x();
179 static int call_command_handler(void)
180 {
181 COMMANDS
182 DSS_EMERG_LOG("BUG: did not find command handler\n");
183 return -E_BUG;
184 }
185 #undef COMMAND
186 #undef COMMANDS
187
188 /**
189 * The log function of dss.
190 *
191 * \param ll Loglevel.
192 * \param fml Usual format string.
193 *
194 * All DSS_XXX_LOG() macros use this function.
195 */
196 __printf_2_3 void dss_log(int ll, const char* fmt,...)
197 {
198 va_list argp;
199 FILE *outfd;
200 struct tm *tm;
201 time_t t1;
202 char str[255] = "";
203
204 if (ll < conf.loglevel_arg)
205 return;
206 outfd = logfile? logfile : stderr;
207 time(&t1);
208 tm = localtime(&t1);
209 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
210 fprintf(outfd, "%s ", str);
211 if (conf.loglevel_arg <= INFO)
212 fprintf(outfd, "%i: ", ll);
213 va_start(argp, fmt);
214 vfprintf(outfd, fmt, argp);
215 va_end(argp);
216 }
217
218 /**
219 * Print a message either to stdout or to the log file.
220 */
221 static __printf_1_2 void dss_msg(const char* fmt,...)
222 {
223 FILE *outfd = conf.daemon_given? logfile : stdout;
224 va_list argp;
225 va_start(argp, fmt);
226 vfprintf(outfd, fmt, argp);
227 va_end(argp);
228 }
229
230 static char *get_config_file_name(void)
231 {
232 char *home, *config_file;
233
234 if (conf.config_file_given)
235 return dss_strdup(conf.config_file_arg);
236 home = get_homedir();
237 config_file = make_message("%s/.dssrc", home);
238 free(home);
239 return config_file;
240 }
241
242 static void dss_get_snapshot_list(struct snapshot_list *sl)
243 {
244 get_snapshot_list(sl, conf.unit_interval_arg, conf.num_intervals_arg);
245 }
246
247 static int64_t compute_next_snapshot_time(void)
248 {
249 int64_t x = 0, now = get_current_time(), unit_interval
250 = 24 * 3600 * conf.unit_interval_arg, ret;
251 unsigned wanted = desired_number_of_snapshots(0, conf.num_intervals_arg),
252 num_complete_snapshots = 0;
253 int i;
254 struct snapshot *s = NULL;
255 struct snapshot_list sl;
256
257 dss_get_snapshot_list(&sl);
258 FOR_EACH_SNAPSHOT(s, i, &sl) {
259 if (!(s->flags & SS_COMPLETE))
260 continue;
261 num_complete_snapshots++;
262 x += s->completion_time - s->creation_time;
263 }
264 assert(x >= 0);
265
266 ret = now;
267 if (num_complete_snapshots == 0)
268 goto out;
269 x /= num_complete_snapshots; /* avg time to create one snapshot */
270 if (unit_interval < x * wanted) /* oops, no sleep at all */
271 goto out;
272 ret = s->completion_time + unit_interval / wanted - x;
273 out:
274 free_snapshot_list(&sl);
275 return ret;
276 }
277
278 static inline void invalidate_next_snapshot_time(void)
279 {
280 next_snapshot_time = 0;
281 }
282
283 static inline int next_snapshot_time_is_valid(void)
284 {
285 return next_snapshot_time != 0;
286 }
287
288 static int next_snapshot_is_due(void)
289 {
290 int64_t now = get_current_time();
291
292 if (!next_snapshot_time_is_valid())
293 next_snapshot_time = compute_next_snapshot_time();
294 if (next_snapshot_time <= now) {
295 DSS_DEBUG_LOG("next snapshot: now\n");
296 return 1;
297 }
298 DSS_DEBUG_LOG("next snapshot due in %" PRId64 " seconds\n",
299 next_snapshot_time - now);
300 return 0;
301 }
302
303 static int pre_create_hook(void)
304 {
305 int ret, fds[3] = {0, 0, 0};
306
307 assert(snapshot_creation_status == HS_READY);
308 /* make sure that the next snapshot time will be recomputed */
309 invalidate_next_snapshot_time();
310 DSS_DEBUG_LOG("executing %s\n", conf.pre_create_hook_arg);
311 ret = dss_exec_cmdline_pid(&create_pid,
312 conf.pre_create_hook_arg, fds);
313 if (ret < 0)
314 return ret;
315 snapshot_creation_status = HS_PRE_RUNNING;
316 return ret;
317 }
318
319 static int pre_remove_hook(struct snapshot *s, const char *why)
320 {
321 int ret, fds[3] = {0, 0, 0};
322 char *cmd;
323
324 if (!s)
325 return 0;
326 DSS_DEBUG_LOG("%s snapshot %s\n", why, s->name);
327 assert(snapshot_removal_status == HS_READY);
328 assert(remove_pid == 0);
329 assert(!snapshot_currently_being_removed);
330
331 snapshot_currently_being_removed = dss_malloc(sizeof(struct snapshot));
332 *snapshot_currently_being_removed = *s;
333 snapshot_currently_being_removed->name = dss_strdup(s->name);
334
335 cmd = make_message("%s %s/%s", conf.pre_remove_hook_arg,
336 conf.dest_dir_arg, s->name);
337 DSS_DEBUG_LOG("executing %s\n", cmd);
338 ret = dss_exec_cmdline_pid(&remove_pid, cmd, fds);
339 free(cmd);
340 if (ret < 0)
341 return ret;
342 snapshot_removal_status = HS_PRE_RUNNING;
343 return ret;
344 }
345
346 static int exec_rm(void)
347 {
348 struct snapshot *s = snapshot_currently_being_removed;
349 int fds[3] = {0, 0, 0};
350 char *new_name = being_deleted_name(s);
351 char *argv[] = {"rm", "-rf", new_name, NULL};
352 int ret;
353
354 assert(snapshot_removal_status == HS_PRE_SUCCESS);
355 assert(remove_pid == 0);
356
357 DSS_NOTICE_LOG("removing %s (interval = %i)\n", s->name, s->interval);
358 ret = dss_rename(s->name, new_name);
359 if (ret < 0)
360 goto out;
361 ret = dss_exec(&remove_pid, argv[0], argv, fds);
362 if (ret < 0)
363 goto out;
364 snapshot_removal_status = HS_RUNNING;
365 out:
366 free(new_name);
367 return ret;
368 }
369
370 static int snapshot_is_being_created(struct snapshot *s)
371 {
372 return s->creation_time == current_snapshot_creation_time;
373 }
374
375 static struct snapshot *find_orphaned_snapshot(struct snapshot_list *sl)
376 {
377 struct snapshot *s;
378 int i;
379
380 DSS_DEBUG_LOG("looking for orphaned snapshots\n");
381 FOR_EACH_SNAPSHOT(s, i, sl) {
382 if (snapshot_is_being_created(s))
383 continue;
384 /*
385 * We know that no rm is currently running, so if s is marked
386 * as being deleted, a previously started rm must have failed.
387 */
388 if (s->flags & SS_BEING_DELETED)
389 return s;
390
391 if (s->flags & SS_COMPLETE) /* good snapshot */
392 continue;
393 /*
394 * This snapshot is incomplete and it is not the snapshot
395 * currently being created. However, we must not remove it if
396 * rsync is about to be restarted. As only the newest snapshot
397 * can be restarted, this snapshot is orphaned if it is not the
398 * newest snapshot or if we are not about to restart rsync.
399 */
400 if (get_newest_snapshot(sl) != s)
401 return s;
402 if (snapshot_creation_status != HS_NEEDS_RESTART)
403 return s;
404 }
405 /* no orphaned snapshots */
406 return NULL;
407 }
408
409 static int is_reference_snapshot(struct snapshot *s)
410 {
411 if (!name_of_reference_snapshot)
412 return 0;
413 return strcmp(s->name, name_of_reference_snapshot)? 0 : 1;
414 }
415
416 /*
417 * return: 0: no redundant snapshots, 1: rm process started, negative: error
418 */
419 static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl)
420 {
421 int i, interval;
422 struct snapshot *s;
423 unsigned missing = 0;
424
425 DSS_DEBUG_LOG("looking for intervals containing too many snapshots\n");
426 for (interval = conf.num_intervals_arg - 1; interval >= 0; interval--) {
427 unsigned keep = desired_number_of_snapshots(interval, conf.num_intervals_arg);
428 unsigned num = sl->interval_count[interval];
429 struct snapshot *victim = NULL, *prev = NULL;
430 int64_t score = LONG_MAX;
431
432 if (keep >= num)
433 missing += keep - num;
434 // DSS_DEBUG_LOG("interval %i: keep: %u, have: %u, missing: %u\n",
435 // interval, keep, num, missing);
436 if (keep + missing >= num)
437 continue;
438 /* redundant snapshot in this interval, pick snapshot with lowest score */
439 FOR_EACH_SNAPSHOT(s, i, sl) {
440 int64_t this_score;
441
442 if (snapshot_is_being_created(s))
443 continue;
444 if (is_reference_snapshot(s))
445 continue;
446 //DSS_DEBUG_LOG("checking %s\n", s->name);
447 if (s->interval > interval) {
448 prev = s;
449 continue;
450 }
451 if (s->interval < interval)
452 break;
453 if (!victim) {
454 victim = s;
455 prev = s;
456 continue;
457 }
458 assert(prev);
459 /* check if s is a better victim */
460 this_score = s->creation_time - prev->creation_time;
461 assert(this_score >= 0);
462 //DSS_DEBUG_LOG("%s: score %lli\n", s->name, (long long)score);
463 if (this_score < score) {
464 score = this_score;
465 victim = s;
466 }
467 prev = s;
468 }
469 assert(victim);
470 return victim;
471 }
472 return NULL;
473 }
474
475 static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl)
476 {
477 int i;
478 struct snapshot *s;
479
480 DSS_DEBUG_LOG("looking for snapshots belonging to intervals >= %d\n",
481 conf.num_intervals_arg);
482 FOR_EACH_SNAPSHOT(s, i, sl) {
483 if (snapshot_is_being_created(s))
484 continue;
485 if (is_reference_snapshot(s))
486 continue;
487 if (s->interval < conf.num_intervals_arg)
488 continue;
489 return s;
490 }
491 return NULL;
492 }
493
494 struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
495 {
496 int i;
497 struct snapshot *s;
498 FOR_EACH_SNAPSHOT(s, i, sl) {
499 if (snapshot_is_being_created(s))
500 continue;
501 if (is_reference_snapshot(s))
502 continue;
503 DSS_INFO_LOG("oldest removable snapshot: %s\n", s->name);
504 return s;
505 }
506 return NULL;
507 }
508
509 static int rename_incomplete_snapshot(int64_t start)
510 {
511 char *old_name;
512 int ret;
513
514 free(path_to_last_complete_snapshot);
515 ret = complete_name(start, get_current_time(),
516 &path_to_last_complete_snapshot);
517 if (ret < 0)
518 return ret;
519 old_name = incomplete_name(start);
520 ret = dss_rename(old_name, path_to_last_complete_snapshot);
521 if (ret >= 0)
522 DSS_NOTICE_LOG("%s -> %s\n", old_name,
523 path_to_last_complete_snapshot);
524 free(old_name);
525 return ret;
526 }
527
528 static int try_to_free_disk_space(void)
529 {
530 int ret;
531 struct snapshot_list sl;
532 struct snapshot *victim;
533 struct timeval now;
534 const char *why;
535 int low_disk_space;
536
537 ret = disk_space_low(NULL);
538 if (ret < 0)
539 return ret;
540 low_disk_space = ret;
541 gettimeofday(&now, NULL);
542 if (tv_diff(&next_removal_check, &now, NULL) > 0)
543 return 0;
544 if (!low_disk_space) {
545 if (conf.keep_redundant_given)
546 return 0;
547 if (snapshot_creation_status != HS_READY)
548 return 0;
549 if (next_snapshot_is_due())
550 return 0;
551 }
552 dss_get_snapshot_list(&sl);
553 ret = 0;
554 if (!low_disk_space && sl.num_snapshots <= 1)
555 goto out;
556 why = "outdated";
557 victim = find_outdated_snapshot(&sl);
558 if (victim)
559 goto remove;
560 why = "redundant";
561 victim = find_redundant_snapshot(&sl);
562 if (victim)
563 goto remove;
564 /* try harder only if disk space is low */
565 if (!low_disk_space)
566 goto out;
567 why = "orphaned";
568 victim = find_orphaned_snapshot(&sl);
569 if (victim)
570 goto remove;
571 DSS_WARNING_LOG("disk space low and nothing obvious to remove\n");
572 victim = find_oldest_removable_snapshot(&sl);
573 if (victim)
574 goto remove;
575 DSS_CRIT_LOG("uhuhu: disk space low and nothing to remove\n");
576 ret = -ERRNO_TO_DSS_ERROR(ENOSPC);
577 goto out;
578 remove:
579 ret = pre_remove_hook(victim, why);
580 out:
581 free_snapshot_list(&sl);
582 return ret;
583 }
584
585 static int post_create_hook(void)
586 {
587 int ret, fds[3] = {0, 0, 0};
588 char *cmd;
589
590 cmd = make_message("%s %s/%s", conf.post_create_hook_arg,
591 conf.dest_dir_arg, path_to_last_complete_snapshot);
592 DSS_NOTICE_LOG("executing %s\n", cmd);
593 ret = dss_exec_cmdline_pid(&create_pid, cmd, fds);
594 free(cmd);
595 if (ret < 0)
596 return ret;
597 snapshot_creation_status = HS_POST_RUNNING;
598 return ret;
599 }
600
601 static int post_remove_hook(void)
602 {
603 int ret, fds[3] = {0, 0, 0};
604 char *cmd;
605 struct snapshot *s = snapshot_currently_being_removed;
606
607 assert(s);
608
609 cmd = make_message("%s %s/%s", conf.post_remove_hook_arg,
610 conf.dest_dir_arg, s->name);
611 DSS_NOTICE_LOG("executing %s\n", cmd);
612 ret = dss_exec_cmdline_pid(&remove_pid, cmd, fds);
613 free(cmd);
614 if (ret < 0)
615 return ret;
616 snapshot_removal_status = HS_POST_RUNNING;
617 return ret;
618 }
619
620 static void dss_kill(pid_t pid, int sig, const char *msg)
621 {
622 const char *signame, *process_name;
623
624 if (pid == 0)
625 return;
626 switch (sig) {
627 case SIGTERM: signame = "TERM"; break;
628 case SIGSTOP: signame = "STOP"; break;
629 case SIGCONT: signame = "CONT"; break;
630 default: signame = "????";
631 }
632
633 if (pid == create_pid)
634 process_name = "create";
635 else if (pid == remove_pid)
636 process_name = "remove";
637 else process_name = "??????";
638
639 if (msg)
640 DSS_INFO_LOG("%s\n", msg);
641 DSS_DEBUG_LOG("sending signal %d (%s) to pid %d (%s process)\n",
642 sig, signame, (int)pid, process_name);
643 if (kill(pid, sig) >= 0)
644 return;
645 DSS_INFO_LOG("failed to send signal %d (%s) to pid %d (%s process)\n",
646 sig, signame, (int)pid, process_name);
647 }
648
649 static void stop_create_process(void)
650 {
651 if (create_process_stopped)
652 return;
653 dss_kill(create_pid, SIGSTOP, "suspending create process");
654 create_process_stopped = 1;
655 }
656
657 static void restart_create_process(void)
658 {
659 if (!create_process_stopped)
660 return;
661 dss_kill(create_pid, SIGCONT, "resuming create process");
662 create_process_stopped = 0;
663 }
664
665 /**
666 * Print a log message about the exit status of a child.
667 */
668 static void log_termination_msg(pid_t pid, int status)
669 {
670 if (WIFEXITED(status))
671 DSS_INFO_LOG("child %i exited. Exit status: %i\n", (int)pid,
672 WEXITSTATUS(status));
673 else if (WIFSIGNALED(status))
674 DSS_NOTICE_LOG("child %i was killed by signal %i\n", (int)pid,
675 WTERMSIG(status));
676 else
677 DSS_WARNING_LOG("child %i terminated abormally\n", (int)pid);
678 }
679
680 static int wait_for_process(pid_t pid, int *status)
681 {
682 int ret;
683
684 DSS_DEBUG_LOG("Waiting for process %d to terminate\n", (int)pid);
685 for (;;) {
686 fd_set rfds;
687
688 FD_ZERO(&rfds);
689 FD_SET(signal_pipe, &rfds);
690 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
691 if (ret < 0)
692 break;
693 ret = next_signal();
694 if (!ret)
695 continue;
696 if (ret == SIGCHLD) {
697 ret = waitpid(pid, status, 0);
698 if (ret >= 0)
699 break;
700 if (errno != EINTR) { /* error */
701 ret = -ERRNO_TO_DSS_ERROR(errno);
702 break;
703 }
704 }
705 /* SIGINT or SIGTERM */
706 dss_kill(pid, SIGTERM, "killing child process");
707 }
708 if (ret < 0)
709 DSS_ERROR_LOG("failed to wait for process %d\n", (int)pid);
710 else
711 log_termination_msg(pid, *status);
712 return ret;
713 }
714
715 static void handle_pre_remove_exit(int status)
716 {
717 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
718 snapshot_removal_status = HS_READY;
719 gettimeofday(&next_removal_check, NULL);
720 next_removal_check.tv_sec += 60;
721 return;
722 }
723 snapshot_removal_status = HS_PRE_SUCCESS;
724 }
725
726 static int handle_rm_exit(int status)
727 {
728 if (!WIFEXITED(status)) {
729 snapshot_removal_status = HS_READY;
730 return -E_INVOLUNTARY_EXIT;
731 }
732 if (WEXITSTATUS(status)) {
733 snapshot_removal_status = HS_READY;
734 return -E_BAD_EXIT_CODE;
735 }
736 snapshot_removal_status = HS_SUCCESS;
737 return 1;
738 }
739
740 static void handle_post_remove_exit(void)
741 {
742 snapshot_removal_status = HS_READY;
743 }
744
745 static int handle_remove_exit(int status)
746 {
747 int ret;
748 struct snapshot *s = snapshot_currently_being_removed;
749
750 assert(s);
751 switch (snapshot_removal_status) {
752 case HS_PRE_RUNNING:
753 handle_pre_remove_exit(status);
754 ret = 1;
755 break;
756 case HS_RUNNING:
757 ret = handle_rm_exit(status);
758 break;
759 case HS_POST_RUNNING:
760 handle_post_remove_exit();
761 ret = 1;
762 break;
763 default:
764 ret = -E_BUG;
765 }
766 if (snapshot_removal_status == HS_READY) {
767 free(s->name);
768 free(s);
769 snapshot_currently_being_removed = NULL;
770 }
771 remove_pid = 0;
772 return ret;
773 }
774
775 static int wait_for_remove_process(void)
776 {
777 int status, ret;
778
779 assert(remove_pid);
780 assert(
781 snapshot_removal_status == HS_PRE_RUNNING ||
782 snapshot_removal_status == HS_RUNNING ||
783 snapshot_removal_status == HS_POST_RUNNING
784 );
785 ret = wait_for_process(remove_pid, &status);
786 if (ret < 0)
787 return ret;
788 return handle_remove_exit(status);
789 }
790
791 static int handle_rsync_exit(int status)
792 {
793 int es, ret;
794
795 if (!WIFEXITED(status)) {
796 DSS_ERROR_LOG("rsync process %d died involuntary\n", (int)create_pid);
797 ret = -E_INVOLUNTARY_EXIT;
798 snapshot_creation_status = HS_READY;
799 goto out;
800 }
801 es = WEXITSTATUS(status);
802 /*
803 * Restart rsync on non-fatal errors:
804 * 12: Error in rsync protocol data stream
805 * 13: Errors with program diagnostics
806 */
807 if (es == 12 || es == 13) {
808 DSS_WARNING_LOG("rsync process %d returned %d -- restarting\n",
809 (int)create_pid, es);
810 snapshot_creation_status = HS_NEEDS_RESTART;
811 next_snapshot_time = get_current_time() + 60;
812 ret = 1;
813 goto out;
814 }
815 if (es != 0 && es != 23 && es != 24) {
816 DSS_ERROR_LOG("rsync process %d returned %d\n", (int)create_pid, es);
817 ret = -E_BAD_EXIT_CODE;
818 snapshot_creation_status = HS_READY;
819 goto out;
820 }
821 ret = rename_incomplete_snapshot(current_snapshot_creation_time);
822 if (ret < 0)
823 goto out;
824 snapshot_creation_status = HS_SUCCESS;
825 free(name_of_reference_snapshot);
826 name_of_reference_snapshot = NULL;
827 out:
828 create_process_stopped = 0;
829 return ret;
830 }
831
832 static int handle_pre_create_hook_exit(int status)
833 {
834 int es, ret;
835 static int warn_count;
836
837 if (!WIFEXITED(status)) {
838 snapshot_creation_status = HS_READY;
839 ret = -E_INVOLUNTARY_EXIT;
840 goto out;
841 }
842 es = WEXITSTATUS(status);
843 if (es) {
844 if (!warn_count--) {
845 DSS_NOTICE_LOG("pre_create_hook %s returned %d\n",
846 conf.pre_create_hook_arg, es);
847 DSS_NOTICE_LOG("deferring snapshot creation...\n");
848 warn_count = 60; /* warn only once per hour */
849 }
850 next_snapshot_time = get_current_time() + 60;
851 snapshot_creation_status = HS_READY;
852 ret = 0;
853 goto out;
854 }
855 warn_count = 0;
856 snapshot_creation_status = HS_PRE_SUCCESS;
857 ret = 1;
858 out:
859 return ret;
860 }
861
862 static int handle_sigchld(void)
863 {
864 pid_t pid;
865 int status, ret = reap_child(&pid, &status);
866
867 if (ret <= 0)
868 return ret;
869
870 if (pid == create_pid) {
871 switch (snapshot_creation_status) {
872 case HS_PRE_RUNNING:
873 ret = handle_pre_create_hook_exit(status);
874 break;
875 case HS_RUNNING:
876 ret = handle_rsync_exit(status);
877 break;
878 case HS_POST_RUNNING:
879 snapshot_creation_status = HS_READY;
880 ret = 1;
881 break;
882 default:
883 DSS_EMERG_LOG("BUG: create can't die in status %d\n",
884 snapshot_creation_status);
885 return -E_BUG;
886 }
887 create_pid = 0;
888 return ret;
889 }
890 if (pid == remove_pid) {
891 ret = handle_remove_exit(status);
892 if (ret < 0)
893 return ret;
894 return ret;
895 }
896 DSS_EMERG_LOG("BUG: unknown process %d died\n", (int)pid);
897 return -E_BUG;
898 }
899
900 static int check_config(void)
901 {
902 if (conf.unit_interval_arg <= 0) {
903 DSS_ERROR_LOG("bad unit interval: %i\n", conf.unit_interval_arg);
904 return -E_INVALID_NUMBER;
905 }
906 DSS_DEBUG_LOG("unit interval: %i day(s)\n", conf.unit_interval_arg);
907 if (conf.num_intervals_arg <= 0) {
908 DSS_ERROR_LOG("bad number of intervals %i\n", conf.num_intervals_arg);
909 return -E_INVALID_NUMBER;
910 }
911 DSS_DEBUG_LOG("number of intervals: %i\n", conf.num_intervals_arg);
912 return 1;
913 }
914
915 /*
916 * Returns < 0 on errors, 0 if no config file is given and > 0 if the config
917 * file was read successfully.
918 */
919 static int parse_config_file(int override)
920 {
921 int ret, config_file_exists;
922 char *config_file = get_config_file_name();
923 struct stat statbuf;
924 char *old_logfile_arg = NULL;
925 int old_daemon_given = 0;
926
927 if (override) { /* SIGHUP */
928 if (conf.logfile_given)
929 old_logfile_arg = dss_strdup(conf.logfile_arg);
930 old_daemon_given = conf.daemon_given;
931 }
932
933 config_file_exists = !stat(config_file, &statbuf);
934 if (!config_file_exists && conf.config_file_given) {
935 ret = -ERRNO_TO_DSS_ERROR(errno);
936 DSS_ERROR_LOG("failed to stat config file %s\n", config_file);
937 goto out;
938 }
939 if (config_file_exists) {
940 struct cmdline_parser_params params = {
941 .override = override,
942 .initialize = 0,
943 .check_required = 1,
944 .check_ambiguity = 0,
945 .print_errors = 1
946 };
947 if (override) { /* invalidate all rsync options */
948 int i;
949
950 for (i = 0; i < conf.rsync_option_given; i++) {
951 free(conf.rsync_option_arg[i]);
952 conf.rsync_option_arg[i] = NULL;
953 }
954 conf.rsync_option_given = 0;
955 }
956 cmdline_parser_config_file(config_file, &conf, &params);
957 }
958 ret = check_config();
959 if (ret < 0)
960 goto out;
961 if (override) {
962 /* don't change daemon mode on SIGHUP */
963 conf.daemon_given = old_daemon_given;
964 close_log(logfile);
965 logfile = NULL;
966 if (conf.logfile_given)
967 free(old_logfile_arg);
968 else if (conf.daemon_given) { /* re-use old logfile */
969 conf.logfile_arg = old_logfile_arg;
970 conf.logfile_given = 1;
971 }
972 }
973 if (conf.logfile_given && conf.run_given && conf.daemon_given) {
974 logfile = open_log(conf.logfile_arg);
975 log_welcome(conf.loglevel_arg);
976 }
977 DSS_DEBUG_LOG("loglevel: %d\n", conf.loglevel_arg);
978 ret = config_file_exists;
979 out:
980 free(config_file);
981 if (ret < 0)
982 DSS_EMERG_LOG("%s\n", dss_strerror(-ret));
983 return ret;
984 }
985
986 static int change_to_dest_dir(void)
987 {
988 DSS_INFO_LOG("changing cwd to %s\n", conf.dest_dir_arg);
989 return dss_chdir(conf.dest_dir_arg);
990 }
991
992 static int handle_sighup(void)
993 {
994 int ret;
995
996 DSS_NOTICE_LOG("SIGHUP, re-reading config\n");
997 dump_dss_config("old");
998 ret = parse_config_file(1);
999 if (ret < 0)
1000 return ret;
1001 dump_dss_config("reloaded");
1002 invalidate_next_snapshot_time();
1003 return change_to_dest_dir();
1004 }
1005
1006 static int handle_signal(void)
1007 {
1008 int sig, ret = next_signal();
1009
1010 if (ret <= 0)
1011 goto out;
1012 sig = ret;
1013 switch (sig) {
1014 case SIGINT:
1015 case SIGTERM:
1016 restart_create_process();
1017 dss_kill(create_pid, SIGTERM, NULL);
1018 dss_kill(remove_pid, SIGTERM, NULL);
1019 ret = -E_SIGNAL;
1020 break;
1021 case SIGHUP:
1022 ret = handle_sighup();
1023 break;
1024 case SIGCHLD:
1025 ret = handle_sigchld();
1026 break;
1027 }
1028 out:
1029 if (ret < 0)
1030 DSS_ERROR_LOG("%s\n", dss_strerror(-ret));
1031 return ret;
1032 }
1033
1034 /*
1035 * We can not use rsync locally if the local user is different from the remote
1036 * user or if the src dir is not on the local host (or both).
1037 */
1038 static int use_rsync_locally(char *logname)
1039 {
1040 char *h = conf.remote_host_arg;
1041
1042 if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
1043 return 0;
1044 if (conf.remote_user_given && strcmp(conf.remote_user_arg, logname))
1045 return 0;
1046 return 1;
1047 }
1048
1049 static int rename_resume_snap(int64_t creation_time)
1050 {
1051 struct snapshot_list sl = {.num_snapshots = 0};
1052 struct snapshot *s = NULL;
1053 char *new_name = incomplete_name(creation_time);
1054 int ret;
1055 const char *why;
1056
1057 ret = 0;
1058 if (conf.no_resume_given)
1059 goto out;
1060 dss_get_snapshot_list(&sl);
1061 /*
1062 * Snapshot recycling: We first look at the newest snapshot. If this
1063 * snapshot happens to be incomplete, the last rsync process was
1064 * aborted and we reuse this one. Otherwise we look at snapshots which
1065 * could be removed (outdated and redundant snapshots) as candidates
1066 * for recycling. If no outdated/redundant snapshot exists, we check if
1067 * there is an orphaned snapshot, which likely is useless anyway.
1068 *
1069 * Only if no existing snapshot is suitable for recycling, we bite the
1070 * bullet and create a new one.
1071 */
1072 s = get_newest_snapshot(&sl);
1073 if (!s) /* no snapshots at all */
1074 goto out;
1075 /* re-use last snapshot if it is incomplete */
1076 why = "aborted";
1077 if ((s->flags & SS_COMPLETE) == 0)
1078 goto out;
1079 why = "outdated";
1080 s = find_outdated_snapshot(&sl);
1081 if (s)
1082 goto out;
1083 why = "redundant";
1084 s = find_redundant_snapshot(&sl);
1085 if (s)
1086 goto out;
1087 why = "orphaned";
1088 s = find_orphaned_snapshot(&sl);
1089 out:
1090 if (s) {
1091 DSS_INFO_LOG("reusing %s snapshot %s\n", why, s->name);
1092 ret = dss_rename(s->name, new_name);
1093 }
1094 if (ret >= 0)
1095 DSS_NOTICE_LOG("creating new snapshot %s\n", new_name);
1096 free(new_name);
1097 free_snapshot_list(&sl);
1098 return ret;
1099 }
1100
1101 static void create_rsync_argv(char ***argv, int64_t *num)
1102 {
1103 char *logname;
1104 int i = 0, j;
1105 struct snapshot_list sl;
1106
1107 dss_get_snapshot_list(&sl);
1108 assert(!name_of_reference_snapshot);
1109 name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
1110 free_snapshot_list(&sl);
1111
1112 *argv = dss_malloc((15 + conf.rsync_option_given) * sizeof(char *));
1113 (*argv)[i++] = dss_strdup("rsync");
1114 (*argv)[i++] = dss_strdup("-aq");
1115 (*argv)[i++] = dss_strdup("--delete");
1116 for (j = 0; j < conf.rsync_option_given; j++)
1117 (*argv)[i++] = dss_strdup(conf.rsync_option_arg[j]);
1118 if (name_of_reference_snapshot) {
1119 DSS_INFO_LOG("using %s as reference\n", name_of_reference_snapshot);
1120 (*argv)[i++] = make_message("--link-dest=../%s",
1121 name_of_reference_snapshot);
1122 } else
1123 DSS_INFO_LOG("no suitable reference snapshot found\n");
1124 logname = dss_logname();
1125 if (use_rsync_locally(logname))
1126 (*argv)[i++] = dss_strdup(conf.source_dir_arg);
1127 else
1128 (*argv)[i++] = make_message("%s@%s:%s/", conf.remote_user_given?
1129 conf.remote_user_arg : logname,
1130 conf.remote_host_arg, conf.source_dir_arg);
1131 free(logname);
1132 *num = get_current_time();
1133 (*argv)[i++] = incomplete_name(*num);
1134 (*argv)[i++] = NULL;
1135 for (j = 0; j < i; j++)
1136 DSS_DEBUG_LOG("argv[%d] = %s\n", j, (*argv)[j]);
1137 }
1138
1139 static void free_rsync_argv(char **argv)
1140 {
1141 int i;
1142
1143 if (!argv)
1144 return;
1145 for (i = 0; argv[i]; i++)
1146 free(argv[i]);
1147 free(argv);
1148 }
1149
1150 static int create_snapshot(char **argv)
1151 {
1152 int ret, fds[3] = {0, 0, 0};
1153
1154 ret = rename_resume_snap(current_snapshot_creation_time);
1155 if (ret < 0)
1156 return ret;
1157 ret = dss_exec(&create_pid, argv[0], argv, fds);
1158 if (ret < 0)
1159 return ret;
1160 snapshot_creation_status = HS_RUNNING;
1161 return ret;
1162 }
1163
1164 static int select_loop(void)
1165 {
1166 int ret;
1167 /* check every 60 seconds for free disk space */
1168 struct timeval tv;
1169 char **rsync_argv = NULL;
1170
1171 for (;;) {
1172 fd_set rfds;
1173 struct timeval *tvp;
1174
1175 if (remove_pid)
1176 tvp = NULL; /* sleep until rm hook/process dies */
1177 else { /* sleep one minute */
1178 tv.tv_sec = 60;
1179 tv.tv_usec = 0;
1180 tvp = &tv;
1181 }
1182 FD_ZERO(&rfds);
1183 FD_SET(signal_pipe, &rfds);
1184 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1185 if (ret < 0)
1186 goto out;
1187 if (FD_ISSET(signal_pipe, &rfds)) {
1188 ret = handle_signal();
1189 if (ret < 0)
1190 goto out;
1191 }
1192 if (remove_pid)
1193 continue;
1194 if (snapshot_removal_status == HS_PRE_SUCCESS) {
1195 ret = exec_rm();
1196 if (ret < 0)
1197 goto out;
1198 continue;
1199 }
1200 if (snapshot_removal_status == HS_SUCCESS) {
1201 ret = post_remove_hook();
1202 if (ret < 0)
1203 goto out;
1204 continue;
1205 }
1206 ret = try_to_free_disk_space();
1207 if (ret < 0)
1208 goto out;
1209 if (snapshot_removal_status != HS_READY) {
1210 stop_create_process();
1211 continue;
1212 }
1213 restart_create_process();
1214 switch (snapshot_creation_status) {
1215 case HS_READY:
1216 if (!next_snapshot_is_due())
1217 continue;
1218 ret = pre_create_hook();
1219 if (ret < 0)
1220 goto out;
1221 continue;
1222 case HS_PRE_RUNNING:
1223 case HS_RUNNING:
1224 case HS_POST_RUNNING:
1225 continue;
1226 case HS_PRE_SUCCESS:
1227 if (!name_of_reference_snapshot) {
1228 free_rsync_argv(rsync_argv);
1229 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1230 }
1231 ret = create_snapshot(rsync_argv);
1232 if (ret < 0)
1233 goto out;
1234 continue;
1235 case HS_NEEDS_RESTART:
1236 if (!next_snapshot_is_due())
1237 continue;
1238 ret = create_snapshot(rsync_argv);
1239 if (ret < 0)
1240 goto out;
1241 continue;
1242 case HS_SUCCESS:
1243 ret = post_create_hook();
1244 if (ret < 0)
1245 goto out;
1246 continue;
1247 }
1248 }
1249 out:
1250 return ret;
1251 }
1252
1253 static void exit_hook(int exit_code)
1254 {
1255 int fds[3] = {0, 0, 0};
1256 char *argv[] = {conf.exit_hook_arg, dss_strerror(-exit_code), NULL};
1257 pid_t pid;
1258
1259 DSS_NOTICE_LOG("executing %s %s\n", argv[0], argv[1]);
1260 dss_exec(&pid, conf.exit_hook_arg, argv, fds);
1261 }
1262
1263 static void lock_dss_or_die(void)
1264 {
1265 char *config_file = get_config_file_name();
1266 int ret = lock_dss(config_file);
1267
1268 free(config_file);
1269 if (ret < 0) {
1270 DSS_EMERG_LOG("failed to lock: %s\n", dss_strerror(-ret));
1271 exit(EXIT_FAILURE);
1272 }
1273 }
1274
1275 static int com_run(void)
1276 {
1277 int ret;
1278
1279 lock_dss_or_die();
1280 if (conf.dry_run_given) {
1281 DSS_ERROR_LOG("dry_run not supported by this command\n");
1282 return -E_SYNTAX;
1283 }
1284 ret = install_sighandler(SIGHUP);
1285 if (ret < 0)
1286 return ret;
1287 ret = select_loop();
1288 if (ret >= 0) /* impossible */
1289 ret = -E_BUG;
1290 exit_hook(ret);
1291 return ret;
1292 }
1293
1294 static int com_prune(void)
1295 {
1296 int ret;
1297 struct snapshot_list sl;
1298 struct snapshot *victim;
1299 struct disk_space ds;
1300 const char *why;
1301
1302 lock_dss_or_die();
1303 ret = get_disk_space(".", &ds);
1304 if (ret < 0)
1305 return ret;
1306 log_disk_space(&ds);
1307 dss_get_snapshot_list(&sl);
1308 why = "outdated";
1309 victim = find_outdated_snapshot(&sl);
1310 if (victim)
1311 goto rm;
1312 why = "redundant";
1313 victim = find_redundant_snapshot(&sl);
1314 if (victim)
1315 goto rm;
1316 ret = 0;
1317 goto out;
1318 rm:
1319 if (conf.dry_run_given) {
1320 dss_msg("%s snapshot %s (interval = %i)\n",
1321 why, victim->name, victim->interval);
1322 ret = 0;
1323 goto out;
1324 }
1325 ret = pre_remove_hook(victim, why);
1326 if (ret < 0)
1327 goto out;
1328 if (snapshot_removal_status == HS_PRE_RUNNING) {
1329 ret = wait_for_remove_process();
1330 if (ret < 0)
1331 goto out;
1332 if (snapshot_removal_status != HS_PRE_SUCCESS)
1333 goto out;
1334 }
1335 ret = exec_rm();
1336 if (ret < 0)
1337 goto out;
1338 ret = wait_for_remove_process();
1339 if (ret < 0)
1340 goto out;
1341 if (snapshot_removal_status != HS_SUCCESS)
1342 goto out;
1343 ret = post_remove_hook();
1344 if (ret < 0)
1345 goto out;
1346 if (snapshot_removal_status != HS_POST_RUNNING)
1347 goto out;
1348 ret = wait_for_remove_process();
1349 if (ret < 0)
1350 goto out;
1351 ret = 1;
1352 out:
1353 free_snapshot_list(&sl);
1354 return ret;
1355 }
1356
1357 static int com_create(void)
1358 {
1359 int ret, status;
1360 char **rsync_argv;
1361
1362 lock_dss_or_die();
1363 if (conf.dry_run_given) {
1364 int i;
1365 char *msg = NULL;
1366 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1367 for (i = 0; rsync_argv[i]; i++) {
1368 char *tmp = msg;
1369 msg = make_message("%s%s%s", tmp? tmp : "",
1370 tmp? " " : "", rsync_argv[i]);
1371 free(tmp);
1372 }
1373 free_rsync_argv(rsync_argv);
1374 dss_msg("%s\n", msg);
1375 free(msg);
1376 return 1;
1377 }
1378 ret = pre_create_hook();
1379 if (ret < 0)
1380 return ret;
1381 if (create_pid) {
1382 ret = wait_for_process(create_pid, &status);
1383 if (ret < 0)
1384 return ret;
1385 ret = handle_pre_create_hook_exit(status);
1386 if (ret <= 0) /* error, or pre-create failed */
1387 return ret;
1388 }
1389 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1390 ret = create_snapshot(rsync_argv);
1391 if (ret < 0)
1392 goto out;
1393 ret = wait_for_process(create_pid, &status);
1394 if (ret < 0)
1395 goto out;
1396 ret = handle_rsync_exit(status);
1397 if (ret < 0)
1398 goto out;
1399 post_create_hook();
1400 if (create_pid)
1401 ret = wait_for_process(create_pid, &status);
1402 out:
1403 free_rsync_argv(rsync_argv);
1404 return ret;
1405 }
1406
1407 static int com_ls(void)
1408 {
1409 int i;
1410 struct snapshot_list sl;
1411 struct snapshot *s;
1412
1413 dss_get_snapshot_list(&sl);
1414 FOR_EACH_SNAPSHOT(s, i, &sl) {
1415 int64_t d = 0;
1416 if (s->flags & SS_COMPLETE)
1417 d = (s->completion_time - s->creation_time) / 60;
1418 dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval, s->name, d/60, d%60);
1419 };
1420 free_snapshot_list(&sl);
1421 return 1;
1422 }
1423
1424 static int setup_signal_handling(void)
1425 {
1426 int ret;
1427
1428 DSS_INFO_LOG("setting up signal handlers\n");
1429 signal_pipe = signal_init(); /* always successful */
1430 ret = install_sighandler(SIGINT);
1431 if (ret < 0)
1432 return ret;
1433 ret = install_sighandler(SIGTERM);
1434 if (ret < 0)
1435 return ret;
1436 return install_sighandler(SIGCHLD);
1437 }
1438
1439 /**
1440 * The main function of dss.
1441 *
1442 * \param argc Usual argument count.
1443 * \param argv Usual argument vector.
1444 */
1445 int main(int argc, char **argv)
1446 {
1447 int ret;
1448 struct cmdline_parser_params params = {
1449 .override = 0,
1450 .initialize = 1,
1451 .check_required = 0,
1452 .check_ambiguity = 0,
1453 .print_errors = 1
1454 };
1455
1456 cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1457 ret = parse_config_file(0);
1458 if (ret < 0)
1459 goto out;
1460 if (ret == 0) { /* no config file given */
1461 /*
1462 * Parse the command line options again, but this time check
1463 * that all required options are given.
1464 */
1465 params = (struct cmdline_parser_params) {
1466 .override = 1,
1467 .initialize = 1,
1468 .check_required = 1,
1469 .check_ambiguity = 1,
1470 .print_errors = 1
1471 };
1472 cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1473 }
1474 if (conf.daemon_given)
1475 daemon_init();
1476 ret = change_to_dest_dir();
1477 if (ret < 0)
1478 goto out;
1479 dump_dss_config("startup");
1480 ret = setup_signal_handling();
1481 if (ret < 0)
1482 goto out;
1483 ret = call_command_handler();
1484 out:
1485 if (ret < 0)
1486 DSS_EMERG_LOG("%s\n", dss_strerror(-ret));
1487 exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1488 }