Mark find_oldest_removable_snapshot() as static.
[dss.git] / dss.c
1 /*
2 * Copyright (C) 2008-2011 Andre Noll <maan@systemlinux.org>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdarg.h>
9 #include <assert.h>
10 #include <errno.h>
11 #include <sys/types.h>
12 #include <signal.h>
13 #include <ctype.h>
14 #include <sys/stat.h>
15 #include <unistd.h>
16 #include <inttypes.h>
17 #include <sys/time.h>
18 #include <time.h>
19 #include <sys/wait.h>
20 #include <fnmatch.h>
21 #include <limits.h>
22
23
24 #include "gcc-compat.h"
25 #include "cmdline.h"
26 #include "log.h"
27 #include "string.h"
28 #include "error.h"
29 #include "fd.h"
30 #include "exec.h"
31 #include "daemon.h"
32 #include "signal.h"
33 #include "df.h"
34 #include "time.h"
35 #include "snap.h"
36 #include "ipc.h"
37
38 /** Command line and config file options. */
39 static struct gengetopt_args_info conf;
40 /** Non-NULL if we log to a file. */
41 static FILE *logfile;
42 /** The read end of the signal pipe */
43 static int signal_pipe;
44 /** Process id of current pre-create-hook/rsync/post-create-hook process. */
45 static pid_t create_pid;
46 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
47 static int create_process_stopped;
48 /** Process id of current pre-remove/rm/post-remove process. */
49 static pid_t remove_pid;
50 /** When the next snapshot is due. */
51 static int64_t next_snapshot_time;
52 /** When to try to remove something. */
53 static struct timeval next_removal_check;
54 /** Creation time of the snapshot currently being created. */
55 static int64_t current_snapshot_creation_time;
56 /** The snapshot currently being removed. */
57 struct snapshot *snapshot_currently_being_removed;
58 /** Needed by the post-create hook. */
59 static char *path_to_last_complete_snapshot;
60 static char *name_of_reference_snapshot;
61 /** \sa \ref snap.h for details. */
62 enum hook_status snapshot_creation_status;
63 /** \sa \ref snap.h for details. */
64 enum hook_status snapshot_removal_status;
65
66
67 DEFINE_DSS_ERRLIST;
68 static const char *hook_status_description[] = {HOOK_STATUS_ARRAY};
69
70 /* may be called with ds == NULL. */
71 static int disk_space_low(struct disk_space *ds)
72 {
73 struct disk_space ds_struct;
74
75 if (!ds) {
76 int ret = get_disk_space(".", &ds_struct);
77 if (ret < 0)
78 return ret;
79 ds = &ds_struct;
80 }
81 if (conf.min_free_mb_arg)
82 if (ds->free_mb < conf.min_free_mb_arg)
83 return 1;
84 if (conf.min_free_percent_arg)
85 if (ds->percent_free < conf.min_free_percent_arg)
86 return 1;
87 if (conf.min_free_percent_inodes_arg)
88 if (ds->percent_free_inodes < conf.min_free_percent_inodes_arg)
89 return 1;
90 return 0;
91 }
92
93 static void dump_dss_config(const char *msg)
94 {
95 const char dash[] = "-----------------------------";
96 int ret;
97 FILE *log = logfile? logfile : stderr;
98 struct disk_space ds;
99 int64_t now = get_current_time();
100
101 if (conf.loglevel_arg > INFO)
102 return;
103
104 fprintf(log, "%s <%s config> %s\n", dash, msg, dash);
105 fprintf(log, "\n*** disk space ***\n\n");
106 ret = get_disk_space(".", &ds);
107 if (ret >= 0) {
108 DSS_INFO_LOG("disk space low: %s\n", disk_space_low(&ds)?
109 "yes" : "no");
110 log_disk_space(&ds);
111 } else
112 DSS_ERROR_LOG("can not get free disk space: %s\n",
113 dss_strerror(-ret));
114
115 /* we continue on errors from get_disk_space */
116
117 fprintf(log, "\n*** command line and config file options ***\n\n");
118 cmdline_parser_dump(log, &conf);
119 fprintf(log, "\n*** internal state ***\n\n");
120 fprintf(log,
121 "pid: %d\n"
122 "logile: %s\n"
123 "snapshot_currently_being_removed: %s\n"
124 "path_to_last_complete_snapshot: %s\n"
125 "reference_snapshot: %s\n"
126 "snapshot_creation_status: %s\n"
127 "snapshot_removal_status: %s\n"
128 ,
129 (int) getpid(),
130 logfile? conf.logfile_arg : "stderr",
131 snapshot_currently_being_removed?
132 snapshot_currently_being_removed->name : "(none)",
133 path_to_last_complete_snapshot?
134 path_to_last_complete_snapshot : "(none)",
135 name_of_reference_snapshot?
136 name_of_reference_snapshot : "(none)",
137 hook_status_description[snapshot_creation_status],
138 hook_status_description[snapshot_removal_status]
139 );
140 if (create_pid != 0)
141 fprintf(log,
142 "create_pid: %" PRId32 "\n"
143 "create process is %sstopped\n"
144 ,
145 create_pid,
146 create_process_stopped? "" : "not "
147 );
148 if (remove_pid != 0)
149 fprintf(log, "remove_pid: %" PRId32 "\n", remove_pid);
150 if (next_snapshot_time != 0)
151 fprintf(log, "next snapshot due in %" PRId64 " seconds\n",
152 next_snapshot_time - now);
153 if (current_snapshot_creation_time != 0)
154 fprintf(log, "current_snapshot_creation_time: %"
155 PRId64 " (%" PRId64 " seconds ago)\n",
156 current_snapshot_creation_time,
157 now - current_snapshot_creation_time
158 );
159 if (next_removal_check.tv_sec != 0) {
160 fprintf(log, "next removal check: %llu (%llu seconds ago)\n",
161 (long long unsigned)next_removal_check.tv_sec,
162 now - (long long unsigned)next_removal_check.tv_sec
163 );
164
165 }
166 fprintf(log, "%s </%s config> %s\n", dash, msg, dash);
167 }
168
169 /* a litte cpp magic helps to DRY */
170 #define COMMANDS \
171 COMMAND(ls) \
172 COMMAND(create) \
173 COMMAND(prune) \
174 COMMAND(run) \
175 COMMAND(kill) \
176 COMMAND(reload) \
177
178 #define COMMAND(x) static int com_ ##x(void);
179 COMMANDS
180 #undef COMMAND
181 #define COMMAND(x) if (conf.x ##_given) return com_ ##x();
182 static int call_command_handler(void)
183 {
184 COMMANDS
185 DSS_EMERG_LOG("BUG: did not find command handler\n");
186 return -E_BUG;
187 }
188 #undef COMMAND
189 #undef COMMANDS
190
191 /**
192 * The log function of dss.
193 *
194 * \param ll Loglevel.
195 * \param fml Usual format string.
196 *
197 * All DSS_XXX_LOG() macros use this function.
198 */
199 __printf_2_3 void dss_log(int ll, const char* fmt,...)
200 {
201 va_list argp;
202 FILE *outfd;
203 struct tm *tm;
204 time_t t1;
205 char str[255] = "";
206
207 if (ll < conf.loglevel_arg)
208 return;
209 outfd = logfile? logfile : stderr;
210 time(&t1);
211 tm = localtime(&t1);
212 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
213 fprintf(outfd, "%s ", str);
214 if (conf.loglevel_arg <= INFO)
215 fprintf(outfd, "%i: ", ll);
216 va_start(argp, fmt);
217 vfprintf(outfd, fmt, argp);
218 va_end(argp);
219 }
220
221 /**
222 * Print a message either to stdout or to the log file.
223 */
224 static __printf_1_2 void dss_msg(const char* fmt,...)
225 {
226 FILE *outfd = conf.daemon_given? logfile : stdout;
227 va_list argp;
228 va_start(argp, fmt);
229 vfprintf(outfd, fmt, argp);
230 va_end(argp);
231 }
232
233 static char *get_config_file_name(void)
234 {
235 char *home, *config_file;
236
237 if (conf.config_file_given)
238 return dss_strdup(conf.config_file_arg);
239 home = get_homedir();
240 config_file = make_message("%s/.dssrc", home);
241 free(home);
242 return config_file;
243 }
244
245 static int send_signal(int sig)
246 {
247 pid_t pid;
248 char *config_file = get_config_file_name();
249 int ret = get_dss_pid(config_file, &pid);
250
251 free(config_file);
252 if (ret < 0)
253 return ret;
254 if (conf.dry_run_given) {
255 dss_msg("%d\n", (int)pid);
256 return 0;
257 }
258 ret = kill(pid, sig);
259 if (ret < 0)
260 return -ERRNO_TO_DSS_ERROR(errno);
261 return 1;
262 }
263
264 static int com_kill(void)
265 {
266 return send_signal(SIGTERM);
267 }
268
269 static int com_reload(void)
270 {
271 return send_signal(SIGHUP);
272 }
273
274 static void dss_get_snapshot_list(struct snapshot_list *sl)
275 {
276 get_snapshot_list(sl, conf.unit_interval_arg, conf.num_intervals_arg);
277 }
278
279 static int64_t compute_next_snapshot_time(void)
280 {
281 int64_t x = 0, now = get_current_time(), unit_interval
282 = 24 * 3600 * conf.unit_interval_arg, ret;
283 unsigned wanted = desired_number_of_snapshots(0, conf.num_intervals_arg),
284 num_complete_snapshots = 0;
285 int i;
286 struct snapshot *s = NULL;
287 struct snapshot_list sl;
288
289 dss_get_snapshot_list(&sl);
290 FOR_EACH_SNAPSHOT(s, i, &sl) {
291 if (!(s->flags & SS_COMPLETE))
292 continue;
293 num_complete_snapshots++;
294 x += s->completion_time - s->creation_time;
295 }
296 assert(x >= 0);
297
298 ret = now;
299 if (num_complete_snapshots == 0)
300 goto out;
301 x /= num_complete_snapshots; /* avg time to create one snapshot */
302 if (unit_interval < x * wanted) /* oops, no sleep at all */
303 goto out;
304 ret = s->completion_time + unit_interval / wanted - x;
305 out:
306 free_snapshot_list(&sl);
307 return ret;
308 }
309
310 static inline void invalidate_next_snapshot_time(void)
311 {
312 next_snapshot_time = 0;
313 }
314
315 static inline int next_snapshot_time_is_valid(void)
316 {
317 return next_snapshot_time != 0;
318 }
319
320 static int next_snapshot_is_due(void)
321 {
322 int64_t now = get_current_time();
323
324 if (!next_snapshot_time_is_valid())
325 next_snapshot_time = compute_next_snapshot_time();
326 if (next_snapshot_time <= now) {
327 DSS_DEBUG_LOG("next snapshot: now\n");
328 return 1;
329 }
330 DSS_DEBUG_LOG("next snapshot due in %" PRId64 " seconds\n",
331 next_snapshot_time - now);
332 return 0;
333 }
334
335 static void pre_create_hook(void)
336 {
337 assert(snapshot_creation_status == HS_READY);
338 /* make sure that the next snapshot time will be recomputed */
339 invalidate_next_snapshot_time();
340 DSS_DEBUG_LOG("executing %s\n", conf.pre_create_hook_arg);
341 dss_exec_cmdline_pid(&create_pid, conf.pre_create_hook_arg);
342 snapshot_creation_status = HS_PRE_RUNNING;
343 }
344
345 static void pre_remove_hook(struct snapshot *s, const char *why)
346 {
347 char *cmd;
348
349 if (!s)
350 return;
351 DSS_DEBUG_LOG("%s snapshot %s\n", why, s->name);
352 assert(snapshot_removal_status == HS_READY);
353 assert(remove_pid == 0);
354 assert(!snapshot_currently_being_removed);
355
356 snapshot_currently_being_removed = dss_malloc(sizeof(struct snapshot));
357 *snapshot_currently_being_removed = *s;
358 snapshot_currently_being_removed->name = dss_strdup(s->name);
359
360 cmd = make_message("%s %s/%s", conf.pre_remove_hook_arg,
361 conf.dest_dir_arg, s->name);
362 DSS_DEBUG_LOG("executing %s\n", cmd);
363 dss_exec_cmdline_pid(&remove_pid, cmd);
364 free(cmd);
365 snapshot_removal_status = HS_PRE_RUNNING;
366 }
367
368 static int exec_rm(void)
369 {
370 struct snapshot *s = snapshot_currently_being_removed;
371 char *new_name = being_deleted_name(s);
372 char *argv[4];
373 int ret;
374
375 argv[0] = "rm";
376 argv[1] = "-rf";
377 argv[2] = new_name;
378 argv[3] = NULL;
379
380 assert(snapshot_removal_status == HS_PRE_SUCCESS);
381 assert(remove_pid == 0);
382
383 DSS_NOTICE_LOG("removing %s (interval = %i)\n", s->name, s->interval);
384 ret = dss_rename(s->name, new_name);
385 if (ret < 0)
386 goto out;
387 dss_exec(&remove_pid, argv[0], argv);
388 snapshot_removal_status = HS_RUNNING;
389 out:
390 free(new_name);
391 return ret;
392 }
393
394 static int snapshot_is_being_created(struct snapshot *s)
395 {
396 return s->creation_time == current_snapshot_creation_time;
397 }
398
399 static struct snapshot *find_orphaned_snapshot(struct snapshot_list *sl)
400 {
401 struct snapshot *s;
402 int i;
403
404 DSS_DEBUG_LOG("looking for orphaned snapshots\n");
405 FOR_EACH_SNAPSHOT(s, i, sl) {
406 if (snapshot_is_being_created(s))
407 continue;
408 /*
409 * We know that no rm is currently running, so if s is marked
410 * as being deleted, a previously started rm must have failed.
411 */
412 if (s->flags & SS_BEING_DELETED)
413 return s;
414
415 if (s->flags & SS_COMPLETE) /* good snapshot */
416 continue;
417 /*
418 * This snapshot is incomplete and it is not the snapshot
419 * currently being created. However, we must not remove it if
420 * rsync is about to be restarted. As only the newest snapshot
421 * can be restarted, this snapshot is orphaned if it is not the
422 * newest snapshot or if we are not about to restart rsync.
423 */
424 if (get_newest_snapshot(sl) != s)
425 return s;
426 if (snapshot_creation_status != HS_NEEDS_RESTART)
427 return s;
428 }
429 /* no orphaned snapshots */
430 return NULL;
431 }
432
433 static int is_reference_snapshot(struct snapshot *s)
434 {
435 if (!name_of_reference_snapshot)
436 return 0;
437 return strcmp(s->name, name_of_reference_snapshot)? 0 : 1;
438 }
439
440 /*
441 * return: 0: no redundant snapshots, 1: rm process started, negative: error
442 */
443 static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl)
444 {
445 int i, interval;
446 struct snapshot *s;
447 unsigned missing = 0;
448
449 DSS_DEBUG_LOG("looking for intervals containing too many snapshots\n");
450 for (interval = conf.num_intervals_arg - 1; interval >= 0; interval--) {
451 unsigned keep = desired_number_of_snapshots(interval, conf.num_intervals_arg);
452 unsigned num = sl->interval_count[interval];
453 struct snapshot *victim = NULL, *prev = NULL;
454 int64_t score = LONG_MAX;
455
456 if (keep >= num)
457 missing += keep - num;
458 if (keep + missing >= num)
459 continue;
460 /* redundant snapshot in this interval, pick snapshot with lowest score */
461 FOR_EACH_SNAPSHOT(s, i, sl) {
462 int64_t this_score;
463
464 if (snapshot_is_being_created(s))
465 continue;
466 if (is_reference_snapshot(s))
467 continue;
468 if (s->interval > interval) {
469 prev = s;
470 continue;
471 }
472 if (s->interval < interval)
473 break;
474 if (!victim) {
475 victim = s;
476 prev = s;
477 continue;
478 }
479 assert(prev);
480 /* check if s is a better victim */
481 this_score = s->creation_time - prev->creation_time;
482 assert(this_score >= 0);
483 if (this_score < score) {
484 score = this_score;
485 victim = s;
486 }
487 prev = s;
488 }
489 assert(victim);
490 return victim;
491 }
492 return NULL;
493 }
494
495 static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl)
496 {
497 int i;
498 struct snapshot *s;
499
500 DSS_DEBUG_LOG("looking for snapshots belonging to intervals >= %d\n",
501 conf.num_intervals_arg);
502 FOR_EACH_SNAPSHOT(s, i, sl) {
503 if (snapshot_is_being_created(s))
504 continue;
505 if (is_reference_snapshot(s))
506 continue;
507 if (s->interval < conf.num_intervals_arg)
508 continue;
509 return s;
510 }
511 return NULL;
512 }
513
514 static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
515 {
516 int i;
517 struct snapshot *s;
518 FOR_EACH_SNAPSHOT(s, i, sl) {
519 if (snapshot_is_being_created(s))
520 continue;
521 if (is_reference_snapshot(s))
522 continue;
523 DSS_INFO_LOG("oldest removable snapshot: %s\n", s->name);
524 return s;
525 }
526 return NULL;
527 }
528
529 static int rename_incomplete_snapshot(int64_t start)
530 {
531 char *old_name;
532 int ret;
533
534 free(path_to_last_complete_snapshot);
535 ret = complete_name(start, get_current_time(),
536 &path_to_last_complete_snapshot);
537 if (ret < 0)
538 return ret;
539 old_name = incomplete_name(start);
540 ret = dss_rename(old_name, path_to_last_complete_snapshot);
541 if (ret >= 0)
542 DSS_NOTICE_LOG("%s -> %s\n", old_name,
543 path_to_last_complete_snapshot);
544 free(old_name);
545 return ret;
546 }
547
548 static int try_to_free_disk_space(void)
549 {
550 int ret;
551 struct snapshot_list sl;
552 struct snapshot *victim;
553 struct timeval now;
554 const char *why;
555 int low_disk_space;
556
557 ret = disk_space_low(NULL);
558 if (ret < 0)
559 return ret;
560 low_disk_space = ret;
561 gettimeofday(&now, NULL);
562 if (tv_diff(&next_removal_check, &now, NULL) > 0)
563 return 0;
564 if (!low_disk_space) {
565 if (conf.keep_redundant_given)
566 return 0;
567 if (snapshot_creation_status != HS_READY)
568 return 0;
569 if (next_snapshot_is_due())
570 return 0;
571 }
572 dss_get_snapshot_list(&sl);
573 ret = 0;
574 if (!low_disk_space && sl.num_snapshots <= 1)
575 goto out;
576 why = "outdated";
577 victim = find_outdated_snapshot(&sl);
578 if (victim)
579 goto remove;
580 why = "redundant";
581 victim = find_redundant_snapshot(&sl);
582 if (victim)
583 goto remove;
584 /* try harder only if disk space is low */
585 if (!low_disk_space)
586 goto out;
587 why = "orphaned";
588 victim = find_orphaned_snapshot(&sl);
589 if (victim)
590 goto remove;
591 DSS_WARNING_LOG("disk space low and nothing obvious to remove\n");
592 victim = find_oldest_removable_snapshot(&sl);
593 if (victim)
594 goto remove;
595 DSS_CRIT_LOG("uhuhu: disk space low and nothing to remove\n");
596 ret = -ERRNO_TO_DSS_ERROR(ENOSPC);
597 goto out;
598 remove:
599 pre_remove_hook(victim, why);
600 out:
601 free_snapshot_list(&sl);
602 return ret;
603 }
604
605 static void post_create_hook(void)
606 {
607 char *cmd = make_message("%s %s/%s", conf.post_create_hook_arg,
608 conf.dest_dir_arg, path_to_last_complete_snapshot);
609 DSS_NOTICE_LOG("executing %s\n", cmd);
610 dss_exec_cmdline_pid(&create_pid, cmd);
611 free(cmd);
612 snapshot_creation_status = HS_POST_RUNNING;
613 }
614
615 static void post_remove_hook(void)
616 {
617 char *cmd;
618 struct snapshot *s = snapshot_currently_being_removed;
619
620 assert(s);
621
622 cmd = make_message("%s %s/%s", conf.post_remove_hook_arg,
623 conf.dest_dir_arg, s->name);
624 DSS_NOTICE_LOG("executing %s\n", cmd);
625 dss_exec_cmdline_pid(&remove_pid, cmd);
626 free(cmd);
627 snapshot_removal_status = HS_POST_RUNNING;
628 }
629
630 static void dss_kill(pid_t pid, int sig, const char *msg)
631 {
632 const char *signame, *process_name;
633
634 if (pid == 0)
635 return;
636 switch (sig) {
637 case SIGTERM: signame = "TERM"; break;
638 case SIGSTOP: signame = "STOP"; break;
639 case SIGCONT: signame = "CONT"; break;
640 default: signame = "????";
641 }
642
643 if (pid == create_pid)
644 process_name = "create";
645 else if (pid == remove_pid)
646 process_name = "remove";
647 else process_name = "??????";
648
649 if (msg)
650 DSS_INFO_LOG("%s\n", msg);
651 DSS_DEBUG_LOG("sending signal %d (%s) to pid %d (%s process)\n",
652 sig, signame, (int)pid, process_name);
653 if (kill(pid, sig) >= 0)
654 return;
655 DSS_INFO_LOG("failed to send signal %d (%s) to pid %d (%s process)\n",
656 sig, signame, (int)pid, process_name);
657 }
658
659 static void stop_create_process(void)
660 {
661 if (create_process_stopped)
662 return;
663 dss_kill(create_pid, SIGSTOP, "suspending create process");
664 create_process_stopped = 1;
665 }
666
667 static void restart_create_process(void)
668 {
669 if (!create_process_stopped)
670 return;
671 dss_kill(create_pid, SIGCONT, "resuming create process");
672 create_process_stopped = 0;
673 }
674
675 /**
676 * Print a log message about the exit status of a child.
677 */
678 static void log_termination_msg(pid_t pid, int status)
679 {
680 if (WIFEXITED(status))
681 DSS_INFO_LOG("child %i exited. Exit status: %i\n", (int)pid,
682 WEXITSTATUS(status));
683 else if (WIFSIGNALED(status))
684 DSS_NOTICE_LOG("child %i was killed by signal %i\n", (int)pid,
685 WTERMSIG(status));
686 else
687 DSS_WARNING_LOG("child %i terminated abormally\n", (int)pid);
688 }
689
690 static int wait_for_process(pid_t pid, int *status)
691 {
692 int ret;
693
694 DSS_DEBUG_LOG("Waiting for process %d to terminate\n", (int)pid);
695 for (;;) {
696 fd_set rfds;
697
698 FD_ZERO(&rfds);
699 FD_SET(signal_pipe, &rfds);
700 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
701 if (ret < 0)
702 break;
703 ret = next_signal();
704 if (!ret)
705 continue;
706 if (ret == SIGCHLD) {
707 ret = waitpid(pid, status, 0);
708 if (ret >= 0)
709 break;
710 if (errno != EINTR) { /* error */
711 ret = -ERRNO_TO_DSS_ERROR(errno);
712 break;
713 }
714 }
715 /* SIGINT or SIGTERM */
716 dss_kill(pid, SIGTERM, "killing child process");
717 }
718 if (ret < 0)
719 DSS_ERROR_LOG("failed to wait for process %d\n", (int)pid);
720 else
721 log_termination_msg(pid, *status);
722 return ret;
723 }
724
725 static void handle_pre_remove_exit(int status)
726 {
727 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
728 snapshot_removal_status = HS_READY;
729 gettimeofday(&next_removal_check, NULL);
730 next_removal_check.tv_sec += 60;
731 return;
732 }
733 snapshot_removal_status = HS_PRE_SUCCESS;
734 }
735
736 static int handle_rm_exit(int status)
737 {
738 if (!WIFEXITED(status)) {
739 snapshot_removal_status = HS_READY;
740 return -E_INVOLUNTARY_EXIT;
741 }
742 if (WEXITSTATUS(status)) {
743 snapshot_removal_status = HS_READY;
744 return -E_BAD_EXIT_CODE;
745 }
746 snapshot_removal_status = HS_SUCCESS;
747 return 1;
748 }
749
750 static void handle_post_remove_exit(void)
751 {
752 snapshot_removal_status = HS_READY;
753 }
754
755 static int handle_remove_exit(int status)
756 {
757 int ret;
758 struct snapshot *s = snapshot_currently_being_removed;
759
760 assert(s);
761 switch (snapshot_removal_status) {
762 case HS_PRE_RUNNING:
763 handle_pre_remove_exit(status);
764 ret = 1;
765 break;
766 case HS_RUNNING:
767 ret = handle_rm_exit(status);
768 break;
769 case HS_POST_RUNNING:
770 handle_post_remove_exit();
771 ret = 1;
772 break;
773 default:
774 ret = -E_BUG;
775 }
776 if (snapshot_removal_status == HS_READY) {
777 free(s->name);
778 free(s);
779 snapshot_currently_being_removed = NULL;
780 }
781 remove_pid = 0;
782 return ret;
783 }
784
785 static int wait_for_remove_process(void)
786 {
787 int status, ret;
788
789 assert(remove_pid);
790 assert(
791 snapshot_removal_status == HS_PRE_RUNNING ||
792 snapshot_removal_status == HS_RUNNING ||
793 snapshot_removal_status == HS_POST_RUNNING
794 );
795 ret = wait_for_process(remove_pid, &status);
796 if (ret < 0)
797 return ret;
798 return handle_remove_exit(status);
799 }
800
801 static int handle_rsync_exit(int status)
802 {
803 int es, ret;
804
805 if (!WIFEXITED(status)) {
806 DSS_ERROR_LOG("rsync process %d died involuntary\n", (int)create_pid);
807 ret = -E_INVOLUNTARY_EXIT;
808 snapshot_creation_status = HS_READY;
809 goto out;
810 }
811 es = WEXITSTATUS(status);
812 /*
813 * Restart rsync on non-fatal errors:
814 * 12: Error in rsync protocol data stream
815 * 13: Errors with program diagnostics
816 */
817 if (es == 12 || es == 13) {
818 DSS_WARNING_LOG("rsync process %d returned %d -- restarting\n",
819 (int)create_pid, es);
820 snapshot_creation_status = HS_NEEDS_RESTART;
821 next_snapshot_time = get_current_time() + 60;
822 ret = 1;
823 goto out;
824 }
825 if (es != 0 && es != 23 && es != 24) {
826 DSS_ERROR_LOG("rsync process %d returned %d\n", (int)create_pid, es);
827 ret = -E_BAD_EXIT_CODE;
828 snapshot_creation_status = HS_READY;
829 goto out;
830 }
831 ret = rename_incomplete_snapshot(current_snapshot_creation_time);
832 if (ret < 0)
833 goto out;
834 snapshot_creation_status = HS_SUCCESS;
835 free(name_of_reference_snapshot);
836 name_of_reference_snapshot = NULL;
837 out:
838 create_process_stopped = 0;
839 return ret;
840 }
841
842 static int handle_pre_create_hook_exit(int status)
843 {
844 int es, ret;
845 static int warn_count;
846
847 if (!WIFEXITED(status)) {
848 snapshot_creation_status = HS_READY;
849 ret = -E_INVOLUNTARY_EXIT;
850 goto out;
851 }
852 es = WEXITSTATUS(status);
853 if (es) {
854 if (!warn_count--) {
855 DSS_NOTICE_LOG("pre_create_hook %s returned %d\n",
856 conf.pre_create_hook_arg, es);
857 DSS_NOTICE_LOG("deferring snapshot creation...\n");
858 warn_count = 60; /* warn only once per hour */
859 }
860 next_snapshot_time = get_current_time() + 60;
861 snapshot_creation_status = HS_READY;
862 ret = 0;
863 goto out;
864 }
865 warn_count = 0;
866 snapshot_creation_status = HS_PRE_SUCCESS;
867 ret = 1;
868 out:
869 return ret;
870 }
871
872 static int handle_sigchld(void)
873 {
874 pid_t pid;
875 int status, ret = reap_child(&pid, &status);
876
877 if (ret <= 0)
878 return ret;
879
880 if (pid == create_pid) {
881 switch (snapshot_creation_status) {
882 case HS_PRE_RUNNING:
883 ret = handle_pre_create_hook_exit(status);
884 break;
885 case HS_RUNNING:
886 ret = handle_rsync_exit(status);
887 break;
888 case HS_POST_RUNNING:
889 snapshot_creation_status = HS_READY;
890 ret = 1;
891 break;
892 default:
893 DSS_EMERG_LOG("BUG: create can't die in status %d\n",
894 snapshot_creation_status);
895 return -E_BUG;
896 }
897 create_pid = 0;
898 return ret;
899 }
900 if (pid == remove_pid) {
901 ret = handle_remove_exit(status);
902 if (ret < 0)
903 return ret;
904 return ret;
905 }
906 DSS_EMERG_LOG("BUG: unknown process %d died\n", (int)pid);
907 return -E_BUG;
908 }
909
910 static int check_config(void)
911 {
912 if (conf.unit_interval_arg <= 0) {
913 DSS_ERROR_LOG("bad unit interval: %i\n", conf.unit_interval_arg);
914 return -E_INVALID_NUMBER;
915 }
916 DSS_DEBUG_LOG("unit interval: %i day(s)\n", conf.unit_interval_arg);
917 if (conf.num_intervals_arg <= 0) {
918 DSS_ERROR_LOG("bad number of intervals %i\n", conf.num_intervals_arg);
919 return -E_INVALID_NUMBER;
920 }
921 DSS_DEBUG_LOG("number of intervals: %i\n", conf.num_intervals_arg);
922 return 1;
923 }
924
925 /*
926 * Returns < 0 on errors, 0 if no config file is given and > 0 if the config
927 * file was read successfully.
928 */
929 static int parse_config_file(int override)
930 {
931 int ret, config_file_exists;
932 char *config_file = get_config_file_name();
933 struct stat statbuf;
934 char *old_logfile_arg = NULL;
935 int old_daemon_given = 0;
936
937 if (override) { /* SIGHUP */
938 if (conf.logfile_given)
939 old_logfile_arg = dss_strdup(conf.logfile_arg);
940 old_daemon_given = conf.daemon_given;
941 }
942
943 config_file_exists = !stat(config_file, &statbuf);
944 if (!config_file_exists && conf.config_file_given) {
945 ret = -ERRNO_TO_DSS_ERROR(errno);
946 DSS_ERROR_LOG("failed to stat config file %s\n", config_file);
947 goto out;
948 }
949 if (config_file_exists) {
950 struct cmdline_parser_params params;
951 params.override = override;
952 params.initialize = 0;
953 params.check_required = 1;
954 params.check_ambiguity = 0;
955 params.print_errors = 1;
956 if (override) { /* invalidate all rsync options */
957 int i;
958
959 for (i = 0; i < conf.rsync_option_given; i++) {
960 free(conf.rsync_option_arg[i]);
961 conf.rsync_option_arg[i] = NULL;
962 }
963 conf.rsync_option_given = 0;
964 }
965 cmdline_parser_config_file(config_file, &conf, &params);
966 }
967 ret = check_config();
968 if (ret < 0)
969 goto out;
970 if (override) {
971 /* don't change daemon mode on SIGHUP */
972 conf.daemon_given = old_daemon_given;
973 close_log(logfile);
974 logfile = NULL;
975 if (conf.logfile_given)
976 free(old_logfile_arg);
977 else if (conf.daemon_given) { /* re-use old logfile */
978 conf.logfile_arg = old_logfile_arg;
979 conf.logfile_given = 1;
980 }
981 }
982 if (conf.logfile_given && conf.run_given && conf.daemon_given) {
983 logfile = open_log(conf.logfile_arg);
984 log_welcome(conf.loglevel_arg);
985 }
986 DSS_DEBUG_LOG("loglevel: %d\n", conf.loglevel_arg);
987 ret = config_file_exists;
988 out:
989 free(config_file);
990 if (ret < 0)
991 DSS_EMERG_LOG("%s\n", dss_strerror(-ret));
992 return ret;
993 }
994
995 static int change_to_dest_dir(void)
996 {
997 DSS_INFO_LOG("changing cwd to %s\n", conf.dest_dir_arg);
998 return dss_chdir(conf.dest_dir_arg);
999 }
1000
1001 static int handle_sighup(void)
1002 {
1003 int ret;
1004
1005 DSS_NOTICE_LOG("SIGHUP, re-reading config\n");
1006 dump_dss_config("old");
1007 ret = parse_config_file(1);
1008 if (ret < 0)
1009 return ret;
1010 dump_dss_config("reloaded");
1011 invalidate_next_snapshot_time();
1012 return change_to_dest_dir();
1013 }
1014
1015 static int handle_signal(void)
1016 {
1017 int sig, ret = next_signal();
1018
1019 if (ret <= 0)
1020 goto out;
1021 sig = ret;
1022 switch (sig) {
1023 case SIGINT:
1024 case SIGTERM:
1025 restart_create_process();
1026 dss_kill(create_pid, SIGTERM, NULL);
1027 dss_kill(remove_pid, SIGTERM, NULL);
1028 ret = -E_SIGNAL;
1029 break;
1030 case SIGHUP:
1031 ret = handle_sighup();
1032 break;
1033 case SIGCHLD:
1034 ret = handle_sigchld();
1035 break;
1036 }
1037 out:
1038 if (ret < 0)
1039 DSS_ERROR_LOG("%s\n", dss_strerror(-ret));
1040 return ret;
1041 }
1042
1043 /*
1044 * We can not use rsync locally if the local user is different from the remote
1045 * user or if the src dir is not on the local host (or both).
1046 */
1047 static int use_rsync_locally(char *logname)
1048 {
1049 char *h = conf.remote_host_arg;
1050
1051 if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
1052 return 0;
1053 if (conf.remote_user_given && strcmp(conf.remote_user_arg, logname))
1054 return 0;
1055 return 1;
1056 }
1057
1058 static int rename_resume_snap(int64_t creation_time)
1059 {
1060 struct snapshot_list sl;
1061 struct snapshot *s = NULL;
1062 char *new_name = incomplete_name(creation_time);
1063 int ret;
1064 const char *why;
1065
1066 sl.num_snapshots = 0;
1067
1068 ret = 0;
1069 if (conf.no_resume_given)
1070 goto out;
1071 dss_get_snapshot_list(&sl);
1072 /*
1073 * Snapshot recycling: We first look at the newest snapshot. If this
1074 * snapshot happens to be incomplete, the last rsync process was
1075 * aborted and we reuse this one. Otherwise we look at snapshots which
1076 * could be removed (outdated and redundant snapshots) as candidates
1077 * for recycling. If no outdated/redundant snapshot exists, we check if
1078 * there is an orphaned snapshot, which likely is useless anyway.
1079 *
1080 * Only if no existing snapshot is suitable for recycling, we bite the
1081 * bullet and create a new one.
1082 */
1083 s = get_newest_snapshot(&sl);
1084 if (!s) /* no snapshots at all */
1085 goto out;
1086 /* re-use last snapshot if it is incomplete */
1087 why = "aborted";
1088 if ((s->flags & SS_COMPLETE) == 0)
1089 goto out;
1090 why = "outdated";
1091 s = find_outdated_snapshot(&sl);
1092 if (s)
1093 goto out;
1094 why = "redundant";
1095 s = find_redundant_snapshot(&sl);
1096 if (s)
1097 goto out;
1098 why = "orphaned";
1099 s = find_orphaned_snapshot(&sl);
1100 out:
1101 if (s) {
1102 DSS_INFO_LOG("reusing %s snapshot %s\n", why, s->name);
1103 ret = dss_rename(s->name, new_name);
1104 }
1105 if (ret >= 0)
1106 DSS_NOTICE_LOG("creating new snapshot %s\n", new_name);
1107 free(new_name);
1108 free_snapshot_list(&sl);
1109 return ret;
1110 }
1111
1112 static void create_rsync_argv(char ***argv, int64_t *num)
1113 {
1114 char *logname;
1115 int i = 0, j;
1116 struct snapshot_list sl;
1117
1118 dss_get_snapshot_list(&sl);
1119 assert(!name_of_reference_snapshot);
1120 name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
1121 free_snapshot_list(&sl);
1122
1123 *argv = dss_malloc((15 + conf.rsync_option_given) * sizeof(char *));
1124 (*argv)[i++] = dss_strdup("rsync");
1125 (*argv)[i++] = dss_strdup("-aq");
1126 (*argv)[i++] = dss_strdup("--delete");
1127 for (j = 0; j < conf.rsync_option_given; j++)
1128 (*argv)[i++] = dss_strdup(conf.rsync_option_arg[j]);
1129 if (name_of_reference_snapshot) {
1130 DSS_INFO_LOG("using %s as reference\n", name_of_reference_snapshot);
1131 (*argv)[i++] = make_message("--link-dest=../%s",
1132 name_of_reference_snapshot);
1133 } else
1134 DSS_INFO_LOG("no suitable reference snapshot found\n");
1135 logname = dss_logname();
1136 if (use_rsync_locally(logname))
1137 (*argv)[i++] = dss_strdup(conf.source_dir_arg);
1138 else
1139 (*argv)[i++] = make_message("%s@%s:%s/", conf.remote_user_given?
1140 conf.remote_user_arg : logname,
1141 conf.remote_host_arg, conf.source_dir_arg);
1142 free(logname);
1143 *num = get_current_time();
1144 (*argv)[i++] = incomplete_name(*num);
1145 (*argv)[i++] = NULL;
1146 for (j = 0; j < i; j++)
1147 DSS_DEBUG_LOG("argv[%d] = %s\n", j, (*argv)[j]);
1148 }
1149
1150 static void free_rsync_argv(char **argv)
1151 {
1152 int i;
1153
1154 if (!argv)
1155 return;
1156 for (i = 0; argv[i]; i++)
1157 free(argv[i]);
1158 free(argv);
1159 }
1160
1161 static int create_snapshot(char **argv)
1162 {
1163 int ret;
1164
1165 ret = rename_resume_snap(current_snapshot_creation_time);
1166 if (ret < 0)
1167 return ret;
1168 dss_exec(&create_pid, argv[0], argv);
1169 snapshot_creation_status = HS_RUNNING;
1170 return ret;
1171 }
1172
1173 static int select_loop(void)
1174 {
1175 int ret;
1176 /* check every 60 seconds for free disk space */
1177 struct timeval tv;
1178 char **rsync_argv = NULL;
1179
1180 for (;;) {
1181 fd_set rfds;
1182 struct timeval *tvp;
1183
1184 if (remove_pid)
1185 tvp = NULL; /* sleep until rm hook/process dies */
1186 else { /* sleep one minute */
1187 tv.tv_sec = 60;
1188 tv.tv_usec = 0;
1189 tvp = &tv;
1190 }
1191 FD_ZERO(&rfds);
1192 FD_SET(signal_pipe, &rfds);
1193 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1194 if (ret < 0)
1195 goto out;
1196 if (FD_ISSET(signal_pipe, &rfds)) {
1197 ret = handle_signal();
1198 if (ret < 0)
1199 goto out;
1200 }
1201 if (remove_pid)
1202 continue;
1203 if (snapshot_removal_status == HS_PRE_SUCCESS) {
1204 ret = exec_rm();
1205 if (ret < 0)
1206 goto out;
1207 continue;
1208 }
1209 if (snapshot_removal_status == HS_SUCCESS) {
1210 post_remove_hook();
1211 continue;
1212 }
1213 ret = try_to_free_disk_space();
1214 if (ret < 0)
1215 goto out;
1216 if (snapshot_removal_status != HS_READY) {
1217 stop_create_process();
1218 continue;
1219 }
1220 restart_create_process();
1221 switch (snapshot_creation_status) {
1222 case HS_READY:
1223 if (!next_snapshot_is_due())
1224 continue;
1225 pre_create_hook();
1226 continue;
1227 case HS_PRE_RUNNING:
1228 case HS_RUNNING:
1229 case HS_POST_RUNNING:
1230 continue;
1231 case HS_PRE_SUCCESS:
1232 if (!name_of_reference_snapshot) {
1233 free_rsync_argv(rsync_argv);
1234 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1235 }
1236 ret = create_snapshot(rsync_argv);
1237 if (ret < 0)
1238 goto out;
1239 continue;
1240 case HS_NEEDS_RESTART:
1241 if (!next_snapshot_is_due())
1242 continue;
1243 ret = create_snapshot(rsync_argv);
1244 if (ret < 0)
1245 goto out;
1246 continue;
1247 case HS_SUCCESS:
1248 post_create_hook();
1249 continue;
1250 }
1251 }
1252 out:
1253 return ret;
1254 }
1255
1256 static void exit_hook(int exit_code)
1257 {
1258 char *argv[3];
1259 pid_t pid;
1260
1261 argv[0] = conf.exit_hook_arg;
1262 argv[1] = dss_strerror(-exit_code);
1263 argv[2] = NULL;
1264
1265 DSS_NOTICE_LOG("executing %s %s\n", argv[0], argv[1]);
1266 dss_exec(&pid, conf.exit_hook_arg, argv);
1267 }
1268
1269 static void lock_dss_or_die(void)
1270 {
1271 char *config_file = get_config_file_name();
1272 int ret = lock_dss(config_file);
1273
1274 free(config_file);
1275 if (ret < 0) {
1276 DSS_EMERG_LOG("failed to lock: %s\n", dss_strerror(-ret));
1277 exit(EXIT_FAILURE);
1278 }
1279 }
1280
1281 static int com_run(void)
1282 {
1283 int ret;
1284
1285 lock_dss_or_die();
1286 if (conf.dry_run_given) {
1287 DSS_ERROR_LOG("dry_run not supported by this command\n");
1288 return -E_SYNTAX;
1289 }
1290 ret = install_sighandler(SIGHUP);
1291 if (ret < 0)
1292 return ret;
1293 ret = select_loop();
1294 if (ret >= 0) /* impossible */
1295 ret = -E_BUG;
1296 exit_hook(ret);
1297 return ret;
1298 }
1299
1300 static int com_prune(void)
1301 {
1302 int ret;
1303 struct snapshot_list sl;
1304 struct snapshot *victim;
1305 struct disk_space ds;
1306 const char *why;
1307
1308 lock_dss_or_die();
1309 ret = get_disk_space(".", &ds);
1310 if (ret < 0)
1311 return ret;
1312 log_disk_space(&ds);
1313 dss_get_snapshot_list(&sl);
1314 why = "outdated";
1315 victim = find_outdated_snapshot(&sl);
1316 if (victim)
1317 goto rm;
1318 why = "redundant";
1319 victim = find_redundant_snapshot(&sl);
1320 if (victim)
1321 goto rm;
1322 ret = 0;
1323 goto out;
1324 rm:
1325 if (conf.dry_run_given) {
1326 dss_msg("%s snapshot %s (interval = %i)\n",
1327 why, victim->name, victim->interval);
1328 ret = 0;
1329 goto out;
1330 }
1331 pre_remove_hook(victim, why);
1332 if (snapshot_removal_status == HS_PRE_RUNNING) {
1333 ret = wait_for_remove_process();
1334 if (ret < 0)
1335 goto out;
1336 if (snapshot_removal_status != HS_PRE_SUCCESS)
1337 goto out;
1338 }
1339 ret = exec_rm();
1340 if (ret < 0)
1341 goto out;
1342 ret = wait_for_remove_process();
1343 if (ret < 0)
1344 goto out;
1345 if (snapshot_removal_status != HS_SUCCESS)
1346 goto out;
1347 post_remove_hook();
1348 if (snapshot_removal_status != HS_POST_RUNNING)
1349 goto out;
1350 ret = wait_for_remove_process();
1351 if (ret < 0)
1352 goto out;
1353 ret = 1;
1354 out:
1355 free_snapshot_list(&sl);
1356 return ret;
1357 }
1358
1359 static int com_create(void)
1360 {
1361 int ret, status;
1362 char **rsync_argv;
1363
1364 lock_dss_or_die();
1365 if (conf.dry_run_given) {
1366 int i;
1367 char *msg = NULL;
1368 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1369 for (i = 0; rsync_argv[i]; i++) {
1370 char *tmp = msg;
1371 msg = make_message("%s%s%s", tmp? tmp : "",
1372 tmp? " " : "", rsync_argv[i]);
1373 free(tmp);
1374 }
1375 free_rsync_argv(rsync_argv);
1376 dss_msg("%s\n", msg);
1377 free(msg);
1378 return 1;
1379 }
1380 pre_create_hook();
1381 if (create_pid) {
1382 ret = wait_for_process(create_pid, &status);
1383 if (ret < 0)
1384 return ret;
1385 ret = handle_pre_create_hook_exit(status);
1386 if (ret <= 0) /* error, or pre-create failed */
1387 return ret;
1388 }
1389 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1390 ret = create_snapshot(rsync_argv);
1391 if (ret < 0)
1392 goto out;
1393 ret = wait_for_process(create_pid, &status);
1394 if (ret < 0)
1395 goto out;
1396 ret = handle_rsync_exit(status);
1397 if (ret < 0)
1398 goto out;
1399 post_create_hook();
1400 if (create_pid)
1401 ret = wait_for_process(create_pid, &status);
1402 out:
1403 free_rsync_argv(rsync_argv);
1404 return ret;
1405 }
1406
1407 static int com_ls(void)
1408 {
1409 int i;
1410 struct snapshot_list sl;
1411 struct snapshot *s;
1412
1413 dss_get_snapshot_list(&sl);
1414 FOR_EACH_SNAPSHOT(s, i, &sl) {
1415 int64_t d = 0;
1416 if (s->flags & SS_COMPLETE)
1417 d = (s->completion_time - s->creation_time) / 60;
1418 dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval, s->name, d/60, d%60);
1419 };
1420 free_snapshot_list(&sl);
1421 return 1;
1422 }
1423
1424 static int setup_signal_handling(void)
1425 {
1426 int ret;
1427
1428 DSS_INFO_LOG("setting up signal handlers\n");
1429 signal_pipe = signal_init(); /* always successful */
1430 ret = install_sighandler(SIGINT);
1431 if (ret < 0)
1432 return ret;
1433 ret = install_sighandler(SIGTERM);
1434 if (ret < 0)
1435 return ret;
1436 return install_sighandler(SIGCHLD);
1437 }
1438
1439 /**
1440 * The main function of dss.
1441 *
1442 * \param argc Usual argument count.
1443 * \param argv Usual argument vector.
1444 */
1445 int main(int argc, char **argv)
1446 {
1447 int ret;
1448 struct cmdline_parser_params params;
1449
1450 params.override = 0;
1451 params.initialize = 1;
1452 params.check_required = 0;
1453 params.check_ambiguity = 0;
1454 params.print_errors = 1;
1455
1456 cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1457 ret = parse_config_file(0);
1458 if (ret < 0)
1459 goto out;
1460 if (ret == 0) { /* no config file given */
1461 /*
1462 * Parse the command line options again, but this time check
1463 * that all required options are given.
1464 */
1465 struct cmdline_parser_params params;
1466 params.override = 1;
1467 params.initialize = 1;
1468 params.check_required = 1;
1469 params.check_ambiguity = 1;
1470 params.print_errors = 1;
1471 cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1472 }
1473 if (conf.daemon_given)
1474 daemon_init();
1475 ret = change_to_dest_dir();
1476 if (ret < 0)
1477 goto out;
1478 dump_dss_config("startup");
1479 ret = setup_signal_handling();
1480 if (ret < 0)
1481 goto out;
1482 ret = call_command_handler();
1483 out:
1484 if (ret < 0)
1485 DSS_EMERG_LOG("%s\n", dss_strerror(-ret));
1486 exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1487 }