24d77eb79dbd2ab0862b2b7012b73babde542c93
[dss.git] / dss.c
1 /*
2  * Copyright (C) 2008-2011 Andre Noll <maan@tuebingen.mpg.de>
3  *
4  * Licensed under the GPL v2. For licencing details see COPYING.
5  */
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <signal.h>
14 #include <ctype.h>
15 #include <stdbool.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <inttypes.h>
19 #include <sys/time.h>
20 #include <time.h>
21 #include <sys/wait.h>
22 #include <fnmatch.h>
23 #include <limits.h>
24 #include <fcntl.h>
25 #include <lopsub.h>
26 #include <sys/mman.h>
27
28 #include "gcc-compat.h"
29 #include "log.h"
30 #include "str.h"
31 #include "err.h"
32 #include "file.h"
33 #include "exec.h"
34 #include "daemon.h"
35 #include "sig.h"
36 #include "df.h"
37 #include "tv.h"
38 #include "snap.h"
39 #include "ipc.h"
40 #include "dss.lsg.h"
41
42 #define CMD_PTR(_cname) lls_cmd(LSG_DSS_CMD_ ## _cname, dss_suite)
43 #define OPT_RESULT(_cname, _oname) (lls_opt_result(\
44         LSG_DSS_ ## _cname ## _OPT_ ## _oname, (CMD_PTR(_cname) == CMD_PTR(DSS))? lpr : sublpr))
45 #define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
46 #define OPT_STRING_VAL(_cname, _oname) (lls_string_val(0, \
47         OPT_RESULT(_cname, _oname)))
48 #define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
49                 OPT_RESULT(_cname, _oname)))
50
51 struct dss_user_data {int (*handler)(void);};
52 #define EXPORT_CMD_HANDLER(_cmd) const struct dss_user_data \
53         lsg_dss_com_ ## _cmd ## _user_data = { \
54                 .handler = com_ ## _cmd \
55         };
56
57 /*
58  * Command line and active options. We need to keep a copy of the parsed
59  * command line options for the SIGHUP case where we merge the command line
60  * options and the new config file options.
61  */
62 static struct lls_parse_result *cmdline_lpr, *lpr;
63
64 /** Parsed subcommand options. */
65 static struct lls_parse_result *cmdline_sublpr, *sublpr;
66 /* The executing subcommand (NULL at startup). */
67 static const struct lls_command *subcmd;
68 /** Wether daemon_init() was called. */
69 static bool daemonized;
70 /** Non-NULL if we log to a file. */
71 static FILE *logfile;
72 /** The read end of the signal pipe */
73 static int signal_pipe;
74 /** Process id of current pre-create-hook/rsync/post-create-hook process. */
75 static pid_t create_pid;
76 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
77 static int create_process_stopped;
78 /** How many times in a row the rsync command failed. */
79 static int num_consecutive_rsync_errors;
80 /** Process id of current pre-remove/rm/post-remove process. */
81 static pid_t remove_pid;
82 /** When the next snapshot is due. */
83 static int64_t next_snapshot_time;
84 /** When to try to remove something. */
85 static struct timeval next_removal_check;
86 /** Creation time of the snapshot currently being created. */
87 static int64_t current_snapshot_creation_time;
88 /** The snapshot currently being removed. */
89 struct snapshot *snapshot_currently_being_removed;
90 /** Needed by the post-create hook. */
91 static char *path_to_last_complete_snapshot;
92 static char *name_of_reference_snapshot;
93 /** \sa \ref snap.h for details. */
94 enum hook_status snapshot_creation_status;
95 /** \sa \ref snap.h for details. */
96 enum hook_status snapshot_removal_status;
97
98
99 DEFINE_DSS_ERRLIST;
100 static const char *hook_status_description[] = {HOOK_STATUS_ARRAY};
101
102 /* may be called with ds == NULL. */
103 static int disk_space_low(struct disk_space *ds)
104 {
105         struct disk_space ds_struct;
106         uint32_t val;
107
108         if (!ds) {
109                 int ret = get_disk_space(".", &ds_struct);
110                 if (ret < 0)
111                         return ret;
112                 ds = &ds_struct;
113         }
114         val = OPT_UINT32_VAL(DSS, MIN_FREE_MB);
115         if (val != 0)
116                 if (ds->free_mb < val)
117                         return 1;
118         val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT);
119         if (val != 0)
120                 if (ds->percent_free < val)
121                         return 1;
122         val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT_INODES);
123         if (val != 0)
124                 if (ds->percent_free_inodes < val)
125                         return 1;
126         return 0;
127 }
128
129 static void dump_dss_config(const char *msg)
130 {
131         const char dash[] = "-----------------------------";
132         char *lopsub_dump;
133         int ret;
134         FILE *log = logfile? logfile : stderr;
135         struct disk_space ds;
136         int64_t now = get_current_time();
137
138         if (OPT_UINT32_VAL(DSS, LOGLEVEL) > INFO)
139                 return;
140
141         fprintf(log, "%s <%s config> %s\n", dash, msg, dash);
142         fprintf(log, "\n*** disk space ***\n\n");
143         ret = get_disk_space(".", &ds);
144         if (ret >= 0) {
145                 DSS_INFO_LOG(("disk space low: %s\n", disk_space_low(&ds)?
146                         "yes" : "no"));
147                 log_disk_space(&ds);
148         } else
149                 DSS_ERROR_LOG(("can not get free disk space: %s\n",
150                         dss_strerror(-ret)));
151
152         /* we continue on errors from get_disk_space */
153
154         fprintf(log, "\n*** non-default options ***\n\n");
155         lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(DSS), true);
156         fprintf(log, "%s", lopsub_dump);
157         free(lopsub_dump);
158         fprintf(log, "\n*** non-default options for \"run\" ***\n\n");
159         lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(RUN), true);
160         fprintf(log, "%s", lopsub_dump);
161         free(lopsub_dump);
162         fprintf(log, "\n*** internal state ***\n\n");
163         fprintf(log,
164                 "pid: %d\n"
165                 "logile: %s\n"
166                 "snapshot_currently_being_removed: %s\n"
167                 "path_to_last_complete_snapshot: %s\n"
168                 "reference_snapshot: %s\n"
169                 "snapshot_creation_status: %s\n"
170                 "snapshot_removal_status: %s\n"
171                 "num_consecutive_rsync_errors: %d\n"
172                 ,
173                 (int) getpid(),
174                 logfile? OPT_STRING_VAL(RUN, LOGFILE) : "stderr",
175                 snapshot_currently_being_removed?
176                         snapshot_currently_being_removed->name : "(none)",
177                 path_to_last_complete_snapshot?
178                         path_to_last_complete_snapshot : "(none)",
179                 name_of_reference_snapshot?
180                         name_of_reference_snapshot : "(none)",
181                 hook_status_description[snapshot_creation_status],
182                 hook_status_description[snapshot_removal_status],
183                 num_consecutive_rsync_errors
184         );
185         if (create_pid != 0)
186                 fprintf(log,
187                         "create_pid: %" PRId32 "\n"
188                         "create process is %sstopped\n"
189                         ,
190                         create_pid,
191                         create_process_stopped? "" : "not "
192                 );
193         if (remove_pid != 0)
194                 fprintf(log, "remove_pid: %" PRId32 "\n", remove_pid);
195         if (next_snapshot_time != 0)
196                 fprintf(log, "next snapshot due in %" PRId64 " seconds\n",
197                         next_snapshot_time - now);
198         if (current_snapshot_creation_time != 0)
199                 fprintf(log, "current_snapshot_creation_time: %"
200                         PRId64 " (%" PRId64 " seconds ago)\n",
201                         current_snapshot_creation_time,
202                         now - current_snapshot_creation_time
203                 );
204         if (next_removal_check.tv_sec != 0) {
205                 fprintf(log, "next removal check: %llu (%llu seconds ago)\n",
206                         (long long unsigned)next_removal_check.tv_sec,
207                         now - (long long unsigned)next_removal_check.tv_sec
208                 );
209
210         }
211         fprintf(log, "%s </%s config> %s\n", dash, msg, dash);
212 }
213
214 static int loglevel = -1;
215 static const char *location_file = NULL;
216 static int         location_line = -1;
217 static const char *location_func = NULL;
218
219 void dss_log_set_params(int ll, const char *file, int line, const char *func)
220 {
221         loglevel = ll;
222         location_file = file;
223         location_line = line;
224         location_func = func;
225 }
226
227 /**
228  * The log function of dss.
229  *
230  * \param ll Loglevel.
231  * \param fml Usual format string.
232  *
233  * All DSS_XXX_LOG() macros use this function.
234  */
235 __printf_1_2 void dss_log(const char* fmt,...)
236 {
237         va_list argp;
238         FILE *outfd;
239         struct tm *tm;
240         time_t t1;
241         char str[255] = "";
242         int lpr_ll = lpr? OPT_UINT32_VAL(DSS, LOGLEVEL) : WARNING;
243
244         if (loglevel < lpr_ll)
245                 return;
246         outfd = logfile? logfile : stderr;
247         if (subcmd == CMD_PTR(RUN)) {
248                 time(&t1);
249                 tm = localtime(&t1);
250                 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
251                 fprintf(outfd, "%s ", str);
252                 if (lpr_ll <= INFO)
253                         fprintf(outfd, "%i: ", loglevel);
254         }
255         if (subcmd == CMD_PTR(RUN))
256 #ifdef DSS_NO_FUNC_NAMES
257                 fprintf(outfd, "%s:%d: ", location_file, location_line);
258 #else
259                 fprintf(outfd, "%s: ", location_func);
260 #endif
261         va_start(argp, fmt);
262         vfprintf(outfd, fmt, argp);
263         va_end(argp);
264 }
265
266 /**
267  * Print a message either to stdout or to the log file.
268  */
269 static __printf_1_2 void dss_msg(const char* fmt,...)
270 {
271         FILE *outfd = logfile? logfile : stdout;
272         va_list argp;
273         va_start(argp, fmt);
274         vfprintf(outfd, fmt, argp);
275         va_end(argp);
276 }
277
278 static char *get_config_file_name(void)
279 {
280         char *home, *config_file;
281
282         if (OPT_GIVEN(DSS, CONFIG_FILE))
283                 return dss_strdup(OPT_STRING_VAL(DSS, CONFIG_FILE));
284         home = get_homedir();
285         config_file = make_message("%s/.dssrc", home);
286         free(home);
287         return config_file;
288 }
289
290 static int send_signal(int sig)
291 {
292         pid_t pid;
293         char *config_file = get_config_file_name();
294         int ret = get_dss_pid(config_file, &pid);
295
296         free(config_file);
297         if (ret < 0)
298                 return ret;
299         if (OPT_GIVEN(DSS, DRY_RUN)) {
300                 dss_msg("%d\n", (int)pid);
301                 return 0;
302         }
303         DSS_NOTICE_LOG(("sending signal %d to pid %d\n", sig, (int)pid));
304         ret = kill(pid, sig);
305         if (ret < 0)
306                 return -ERRNO_TO_DSS_ERROR(errno);
307         return 1;
308 }
309
310 struct signal_info {
311         const char * const name;
312         int num;
313 };
314
315 /*
316  * The table below was taken 2016 from proc/sig.c of procps-3.2.8. Copyright
317  * 1998-2003 by Albert Cahalan, GPLv2.
318  */
319 static const struct signal_info signal_table[] = {
320         {"ABRT",   SIGABRT},  /* IOT */
321         {"ALRM",   SIGALRM},
322         {"BUS",    SIGBUS},
323         {"CHLD",   SIGCHLD},  /* CLD */
324         {"CONT",   SIGCONT},
325         {"FPE",    SIGFPE},
326         {"HUP",    SIGHUP},
327         {"ILL",    SIGILL},
328         {"INT",    SIGINT},
329         {"KILL",   SIGKILL},
330         {"PIPE",   SIGPIPE},
331 #ifdef SIGPOLL
332         {"POLL",   SIGPOLL},  /* IO */
333 #endif
334         {"PROF",   SIGPROF},
335 #ifdef SIGPWR
336         {"PWR",    SIGPWR},
337 #endif
338         {"QUIT",   SIGQUIT},
339         {"SEGV",   SIGSEGV},
340 #ifdef SIGSTKFLT
341         {"STKFLT", SIGSTKFLT},
342 #endif
343         {"STOP",   SIGSTOP},
344         {"SYS",    SIGSYS},   /* UNUSED */
345         {"TERM",   SIGTERM},
346         {"TRAP",   SIGTRAP},
347         {"TSTP",   SIGTSTP},
348         {"TTIN",   SIGTTIN},
349         {"TTOU",   SIGTTOU},
350         {"URG",    SIGURG},
351         {"USR1",   SIGUSR1},
352         {"USR2",   SIGUSR2},
353         {"VTALRM", SIGVTALRM},
354         {"WINCH",  SIGWINCH},
355         {"XCPU",   SIGXCPU},
356         {"XFSZ",   SIGXFSZ}
357 };
358
359 #define SIGNAL_TABLE_SIZE (sizeof(signal_table) / sizeof(signal_table[0]))
360 #ifndef SIGRTMAX
361 #define SIGRTMAX 64
362 #endif
363
364 static int com_kill(void)
365 {
366         const char *arg = OPT_STRING_VAL(KILL, SIGNAL);
367         int ret, i;
368
369         if (*arg >= '0' && *arg <= '9') {
370                 int64_t val;
371                 ret = dss_atoi64(arg, &val);
372                 if (ret < 0)
373                         return ret;
374                 if (val < 0 || val > SIGRTMAX)
375                         return -ERRNO_TO_DSS_ERROR(EINVAL);
376                 return send_signal(val);
377         }
378         if (strncasecmp(arg, "sig", 3) == 0)
379                 arg += 3;
380         if (strcasecmp(arg, "CLD") == 0)
381                 return send_signal(SIGCHLD);
382         if (strcasecmp(arg, "IOT") == 0)
383                 return send_signal(SIGABRT);
384         for (i = 0; i < SIGNAL_TABLE_SIZE; i++)
385                 if (strcasecmp(arg, signal_table[i].name) == 0)
386                         return send_signal(signal_table[i].num);
387         DSS_ERROR_LOG(("invalid sigspec: %s\n", arg));
388         return -ERRNO_TO_DSS_ERROR(EINVAL);
389 }
390 EXPORT_CMD_HANDLER(kill);
391
392 static void dss_get_snapshot_list(struct snapshot_list *sl)
393 {
394         get_snapshot_list(sl, OPT_UINT32_VAL(DSS, UNIT_INTERVAL),
395                 OPT_UINT32_VAL(DSS, NUM_INTERVALS));
396 }
397
398 static int64_t compute_next_snapshot_time(void)
399 {
400         int64_t x = 0, now = get_current_time(), unit_interval
401                 = 24 * 3600 * OPT_UINT32_VAL(DSS, UNIT_INTERVAL), ret;
402         unsigned wanted = desired_number_of_snapshots(0,
403                 OPT_UINT32_VAL(DSS, NUM_INTERVALS)),
404                 num_complete = 0;
405         int i;
406         struct snapshot *s = NULL;
407         struct snapshot_list sl;
408
409         dss_get_snapshot_list(&sl);
410         FOR_EACH_SNAPSHOT(s, i, &sl) {
411                 if (!(s->flags & SS_COMPLETE))
412                         continue;
413                 num_complete++;
414                 x += s->completion_time - s->creation_time;
415         }
416         assert(x >= 0);
417
418         ret = now;
419         if (num_complete == 0)
420                 goto out;
421         x /= num_complete; /* avg time to create one snapshot */
422         if (unit_interval < x * wanted) /* oops, no sleep at all */
423                 goto out;
424         ret = s->completion_time + unit_interval / wanted - x;
425 out:
426         free_snapshot_list(&sl);
427         return ret;
428 }
429
430 static inline void invalidate_next_snapshot_time(void)
431 {
432         next_snapshot_time = 0;
433 }
434
435 static inline int next_snapshot_time_is_valid(void)
436 {
437         return next_snapshot_time != 0;
438 }
439
440 static int next_snapshot_is_due(void)
441 {
442         int64_t now = get_current_time();
443
444         if (!next_snapshot_time_is_valid())
445                 next_snapshot_time = compute_next_snapshot_time();
446         if (next_snapshot_time <= now) {
447                 DSS_DEBUG_LOG(("next snapshot: now\n"));
448                 return 1;
449         }
450         DSS_DEBUG_LOG(("next snapshot due in %" PRId64 " seconds\n",
451                 next_snapshot_time - now));
452         return 0;
453 }
454
455 static void pre_create_hook(void)
456 {
457         assert(snapshot_creation_status == HS_READY);
458         /* make sure that the next snapshot time will be recomputed */
459         invalidate_next_snapshot_time();
460         DSS_DEBUG_LOG(("executing %s\n", OPT_STRING_VAL(DSS, PRE_CREATE_HOOK)));
461         dss_exec_cmdline_pid(&create_pid, OPT_STRING_VAL(DSS, PRE_CREATE_HOOK));
462         snapshot_creation_status = HS_PRE_RUNNING;
463 }
464
465 static void pre_remove_hook(struct snapshot *s, const char *why)
466 {
467         char *cmd;
468
469         if (!s)
470                 return;
471         DSS_DEBUG_LOG(("%s snapshot %s\n", why, s->name));
472         assert(snapshot_removal_status == HS_READY);
473         assert(remove_pid == 0);
474         assert(!snapshot_currently_being_removed);
475
476         snapshot_currently_being_removed = dss_malloc(sizeof(struct snapshot));
477         *snapshot_currently_being_removed = *s;
478         snapshot_currently_being_removed->name = dss_strdup(s->name);
479
480         cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, PRE_REMOVE_HOOK),
481                 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
482         DSS_DEBUG_LOG(("executing %s\n", cmd));
483         dss_exec_cmdline_pid(&remove_pid, cmd);
484         free(cmd);
485         snapshot_removal_status = HS_PRE_RUNNING;
486 }
487
488 static int exec_rm(void)
489 {
490         struct snapshot *s = snapshot_currently_being_removed;
491         char *new_name = being_deleted_name(s);
492         char *argv[4];
493         int ret;
494
495         argv[0] = "rm";
496         argv[1] = "-rf";
497         argv[2] = new_name;
498         argv[3] = NULL;
499
500         assert(snapshot_removal_status == HS_PRE_SUCCESS);
501         assert(remove_pid == 0);
502
503         DSS_NOTICE_LOG(("removing %s (interval = %i)\n", s->name, s->interval));
504         ret = dss_rename(s->name, new_name);
505         if (ret < 0)
506                 goto out;
507         dss_exec(&remove_pid, argv[0], argv);
508         snapshot_removal_status = HS_RUNNING;
509 out:
510         free(new_name);
511         return ret;
512 }
513
514 static int snapshot_is_being_created(struct snapshot *s)
515 {
516         return s->creation_time == current_snapshot_creation_time;
517 }
518
519 static struct snapshot *find_orphaned_snapshot(struct snapshot_list *sl)
520 {
521         struct snapshot *s;
522         int i;
523
524         DSS_DEBUG_LOG(("looking for old incomplete snapshots\n"));
525         FOR_EACH_SNAPSHOT(s, i, sl) {
526                 if (snapshot_is_being_created(s))
527                         continue;
528                 /*
529                  * We know that no rm is currently running, so if s is marked
530                  * as being deleted, a previously started rm must have failed.
531                  */
532                 if (s->flags & SS_BEING_DELETED)
533                         return s;
534
535                 if (s->flags & SS_COMPLETE) /* good snapshot */
536                         continue;
537                 /*
538                  * This snapshot is incomplete and it is not the snapshot
539                  * currently being created. However, we must not remove it if
540                  * rsync is about to be restarted. As only the newest snapshot
541                  * can be restarted, this snapshot is orphaned if it is not the
542                  * newest snapshot or if we are not about to restart rsync.
543                  */
544                 if (get_newest_snapshot(sl) != s)
545                         return s;
546                 if (snapshot_creation_status != HS_NEEDS_RESTART)
547                         return s;
548         }
549         /* no orphaned snapshots */
550         return NULL;
551 }
552
553 static int is_reference_snapshot(struct snapshot *s)
554 {
555         if (!name_of_reference_snapshot)
556                 return 0;
557         return strcmp(s->name, name_of_reference_snapshot)? 0 : 1;
558 }
559
560 /*
561  * return: 0: no redundant snapshots, 1: rm process started, negative: error
562  */
563 static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl)
564 {
565         int i, interval;
566         struct snapshot *s;
567         unsigned missing = 0;
568         uint32_t N = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
569
570         DSS_DEBUG_LOG(("looking for intervals containing too many snapshots\n"));
571         for (interval = N - 1; interval >= 0; interval--) {
572                 unsigned keep = desired_number_of_snapshots(interval, N);
573                 unsigned num = sl->interval_count[interval];
574                 struct snapshot *victim = NULL, *prev = NULL;
575                 int64_t score = LONG_MAX;
576
577                 if (keep >= num)
578                         missing += keep - num;
579                 if (keep + missing >= num)
580                         continue;
581                 /* redundant snapshot in this interval, pick snapshot with lowest score */
582                 FOR_EACH_SNAPSHOT(s, i, sl) {
583                         int64_t this_score;
584
585                         if (snapshot_is_being_created(s))
586                                 continue;
587                         if (is_reference_snapshot(s))
588                                 continue;
589                         if (s->interval > interval) {
590                                 prev = s;
591                                 continue;
592                         }
593                         if (s->interval < interval)
594                                 break;
595                         if (!victim) {
596                                 victim = s;
597                                 prev = s;
598                                 continue;
599                         }
600                         assert(prev);
601                         /* check if s is a better victim */
602                         this_score = s->creation_time - prev->creation_time;
603                         assert(this_score >= 0);
604                         if (this_score < score) {
605                                 score = this_score;
606                                 victim = s;
607                         }
608                         prev = s;
609                 }
610                 assert(victim);
611                 return victim;
612         }
613         return NULL;
614 }
615
616 static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl)
617 {
618         int i;
619         struct snapshot *s;
620
621         DSS_DEBUG_LOG(("looking for snapshots belonging to intervals >= %d\n",
622                 OPT_UINT32_VAL(DSS, NUM_INTERVALS)));
623         FOR_EACH_SNAPSHOT(s, i, sl) {
624                 if (snapshot_is_being_created(s))
625                         continue;
626                 if (is_reference_snapshot(s))
627                         continue;
628                 if (s->interval < OPT_UINT32_VAL(DSS, NUM_INTERVALS))
629                         continue;
630                 return s;
631         }
632         return NULL;
633 }
634
635 static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
636 {
637         int i, num_complete;
638         struct snapshot *s, *ref = NULL;
639
640         num_complete = num_complete_snapshots(sl);
641         if (num_complete <= OPT_UINT32_VAL(DSS, MIN_COMPLETE))
642                 return NULL;
643         FOR_EACH_SNAPSHOT(s, i, sl) {
644                 if (snapshot_is_being_created(s))
645                         continue;
646                 if (is_reference_snapshot(s)) { /* avoid this one */
647                         ref = s;
648                         continue;
649                 }
650                 DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name));
651                 return s;
652         }
653         assert(ref);
654         DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name));
655         return ref;
656 }
657
658 static int rename_incomplete_snapshot(int64_t start)
659 {
660         char *old_name;
661         int ret;
662         int64_t now;
663
664         /*
665          * We don't want the dss_rename() below to fail with EEXIST because the
666          * last complete snapshot was created (and completed) in the same
667          * second as this one.
668          */
669         while ((now = get_current_time()) == start)
670                 sleep(1);
671         free(path_to_last_complete_snapshot);
672         ret = complete_name(start, now, &path_to_last_complete_snapshot);
673         if (ret < 0)
674                 return ret;
675         old_name = incomplete_name(start);
676         ret = dss_rename(old_name, path_to_last_complete_snapshot);
677         if (ret >= 0)
678                 DSS_NOTICE_LOG(("%s -> %s\n", old_name,
679                         path_to_last_complete_snapshot));
680         free(old_name);
681         return ret;
682 }
683
684 static int try_to_free_disk_space(void)
685 {
686         int ret;
687         struct snapshot_list sl;
688         struct snapshot *victim;
689         struct timeval now;
690         const char *why;
691         int low_disk_space;
692
693         ret = disk_space_low(NULL);
694         if (ret < 0)
695                 return ret;
696         low_disk_space = ret;
697         gettimeofday(&now, NULL);
698         if (tv_diff(&next_removal_check, &now, NULL) > 0)
699                 return 0;
700         if (!low_disk_space) {
701                 if (OPT_GIVEN(DSS, KEEP_REDUNDANT))
702                         return 0;
703                 if (snapshot_creation_status != HS_READY)
704                         return 0;
705                 if (next_snapshot_is_due())
706                         return 0;
707         }
708         /*
709          * Idle and --keep_redundant not given, or low disk space. Look at
710          * existing snapshots.
711          */
712         dss_get_snapshot_list(&sl);
713         ret = 0;
714         /*
715          * Don't remove anything if there is free space and we have fewer
716          * snapshots than configured, plus one. This way there is always one
717          * snapshot that can be recycled.
718          */
719         if (!low_disk_space && sl.num_snapshots <=
720                         1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS))
721                 goto out;
722         why = "outdated";
723         victim = find_outdated_snapshot(&sl);
724         if (victim)
725                 goto remove;
726         why = "redundant";
727         victim = find_redundant_snapshot(&sl);
728         if (victim)
729                 goto remove;
730         why = "orphaned";
731         victim = find_orphaned_snapshot(&sl);
732         if (victim)
733                 goto remove;
734         /* try harder only if disk space is low */
735         if (!low_disk_space)
736                 goto out;
737         DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n"));
738         why = "oldest";
739         victim = find_oldest_removable_snapshot(&sl);
740         if (victim)
741                 goto remove;
742         DSS_CRIT_LOG(("uhuhu: disk space low and nothing to remove\n"));
743         ret = -ERRNO_TO_DSS_ERROR(ENOSPC);
744         goto out;
745 remove:
746         pre_remove_hook(victim, why);
747 out:
748         free_snapshot_list(&sl);
749         return ret;
750 }
751
752 static void post_create_hook(void)
753 {
754         char *cmd = make_message("%s %s/%s",
755                 OPT_STRING_VAL(DSS, POST_CREATE_HOOK),
756                 OPT_STRING_VAL(DSS, DEST_DIR), path_to_last_complete_snapshot);
757         DSS_NOTICE_LOG(("executing %s\n", cmd));
758         dss_exec_cmdline_pid(&create_pid, cmd);
759         free(cmd);
760         snapshot_creation_status = HS_POST_RUNNING;
761 }
762
763 static void post_remove_hook(void)
764 {
765         char *cmd;
766         struct snapshot *s = snapshot_currently_being_removed;
767
768         assert(s);
769
770         cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, POST_REMOVE_HOOK),
771                 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
772         DSS_NOTICE_LOG(("executing %s\n", cmd));
773         dss_exec_cmdline_pid(&remove_pid, cmd);
774         free(cmd);
775         snapshot_removal_status = HS_POST_RUNNING;
776 }
777
778 static void dss_kill(pid_t pid, int sig, const char *msg)
779 {
780         const char *signame, *process_name;
781
782         if (pid == 0)
783                 return;
784         switch (sig) {
785         case SIGTERM: signame = "TERM"; break;
786         case SIGSTOP: signame = "STOP"; break;
787         case SIGCONT: signame = "CONT"; break;
788         default: signame = "????";
789         }
790
791         if (pid == create_pid)
792                 process_name = "create";
793         else if (pid == remove_pid)
794                 process_name = "remove";
795         else process_name = "??????";
796
797         if (msg)
798                 DSS_INFO_LOG(("%s\n", msg));
799         DSS_DEBUG_LOG(("sending signal %d (%s) to pid %d (%s process)\n",
800                 sig, signame, (int)pid, process_name));
801         if (kill(pid, sig) >= 0)
802                 return;
803         DSS_INFO_LOG(("failed to send signal %d (%s) to pid %d (%s process)\n",
804                 sig, signame, (int)pid, process_name));
805 }
806
807 static void stop_create_process(void)
808 {
809         if (create_process_stopped)
810                 return;
811         dss_kill(create_pid, SIGSTOP, "suspending create process");
812         create_process_stopped = 1;
813 }
814
815 static void restart_create_process(void)
816 {
817         if (!create_process_stopped)
818                 return;
819         dss_kill(create_pid, SIGCONT, "resuming create process");
820         create_process_stopped = 0;
821 }
822
823 /**
824  * Print a log message about the exit status of a child.
825  */
826 static void log_termination_msg(pid_t pid, int status)
827 {
828         if (WIFEXITED(status))
829                 DSS_INFO_LOG(("child %i exited. Exit status: %i\n", (int)pid,
830                         WEXITSTATUS(status)));
831         else if (WIFSIGNALED(status))
832                 DSS_NOTICE_LOG(("child %i was killed by signal %i\n", (int)pid,
833                         WTERMSIG(status)));
834         else
835                 DSS_WARNING_LOG(("child %i terminated abormally\n", (int)pid));
836 }
837
838 static int wait_for_process(pid_t pid, int *status)
839 {
840         int ret;
841
842         DSS_DEBUG_LOG(("Waiting for process %d to terminate\n", (int)pid));
843         for (;;) {
844                 fd_set rfds;
845
846                 FD_ZERO(&rfds);
847                 FD_SET(signal_pipe, &rfds);
848                 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
849                 if (ret < 0)
850                         break;
851                 ret = next_signal();
852                 if (!ret)
853                         continue;
854                 if (ret == SIGCHLD) {
855                         ret = waitpid(pid, status, 0);
856                         if (ret >= 0)
857                                 break;
858                         if (errno != EINTR) { /* error */
859                                 ret = -ERRNO_TO_DSS_ERROR(errno);
860                                 break;
861                         }
862                 }
863                 /* SIGINT or SIGTERM */
864                 dss_kill(pid, SIGTERM, "killing child process");
865         }
866         if (ret < 0)
867                 DSS_ERROR_LOG(("failed to wait for process %d\n", (int)pid));
868         else
869                 log_termination_msg(pid, *status);
870         return ret;
871 }
872
873 static void handle_pre_remove_exit(int status)
874 {
875         if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
876                 snapshot_removal_status = HS_READY;
877                 gettimeofday(&next_removal_check, NULL);
878                 next_removal_check.tv_sec += 60;
879                 return;
880         }
881         snapshot_removal_status = HS_PRE_SUCCESS;
882 }
883
884 static int handle_rm_exit(int status)
885 {
886         if (!WIFEXITED(status)) {
887                 snapshot_removal_status = HS_READY;
888                 return -E_INVOLUNTARY_EXIT;
889         }
890         if (WEXITSTATUS(status)) {
891                 snapshot_removal_status = HS_READY;
892                 return -E_BAD_EXIT_CODE;
893         }
894         snapshot_removal_status = HS_SUCCESS;
895         return 1;
896 }
897
898 static void handle_post_remove_exit(void)
899 {
900         snapshot_removal_status = HS_READY;
901 }
902
903 static int handle_remove_exit(int status)
904 {
905         int ret;
906         struct snapshot *s = snapshot_currently_being_removed;
907
908         assert(s);
909         switch (snapshot_removal_status) {
910         case HS_PRE_RUNNING:
911                 handle_pre_remove_exit(status);
912                 ret = 1;
913                 break;
914         case HS_RUNNING:
915                 ret = handle_rm_exit(status);
916                 break;
917         case HS_POST_RUNNING:
918                 handle_post_remove_exit();
919                 ret = 1;
920                 break;
921         default:
922                 ret = -E_BUG;
923         }
924         if (snapshot_removal_status == HS_READY) {
925                 free(s->name);
926                 free(s);
927                 snapshot_currently_being_removed = NULL;
928         }
929         remove_pid = 0;
930         return ret;
931 }
932
933 static int wait_for_remove_process(void)
934 {
935         int status, ret;
936
937         assert(remove_pid);
938         assert(
939                 snapshot_removal_status == HS_PRE_RUNNING ||
940                 snapshot_removal_status == HS_RUNNING ||
941                 snapshot_removal_status == HS_POST_RUNNING
942         );
943         ret = wait_for_process(remove_pid, &status);
944         if (ret < 0)
945                 return ret;
946         return handle_remove_exit(status);
947 }
948
949 static int handle_rsync_exit(int status)
950 {
951         int es, ret;
952
953         if (!WIFEXITED(status)) {
954                 DSS_ERROR_LOG(("rsync process %d died involuntary\n", (int)create_pid));
955                 ret = -E_INVOLUNTARY_EXIT;
956                 snapshot_creation_status = HS_READY;
957                 goto out;
958         }
959         es = WEXITSTATUS(status);
960         /*
961          * Restart rsync on non-fatal errors:
962          * 24: Partial transfer due to vanished source files
963          */
964         if (es != 0 && es != 24) {
965                 DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
966                         es, ++num_consecutive_rsync_errors));
967                 if (!logfile) { /* called by com_run() */
968                         ret = -E_BAD_EXIT_CODE;
969                         goto out;
970                 }
971                 if (num_consecutive_rsync_errors >
972                                 OPT_UINT32_VAL(RUN, MAX_RSYNC_ERRORS)) {
973                         ret = -E_TOO_MANY_RSYNC_ERRORS;
974                         snapshot_creation_status = HS_READY;
975                         goto out;
976                 }
977                 DSS_WARNING_LOG(("restarting rsync process\n"));
978                 snapshot_creation_status = HS_NEEDS_RESTART;
979                 next_snapshot_time = get_current_time() + 60;
980                 ret = 1;
981                 goto out;
982         }
983         num_consecutive_rsync_errors = 0;
984         ret = rename_incomplete_snapshot(current_snapshot_creation_time);
985         if (ret < 0)
986                 goto out;
987         snapshot_creation_status = HS_SUCCESS;
988         free(name_of_reference_snapshot);
989         name_of_reference_snapshot = NULL;
990 out:
991         create_process_stopped = 0;
992         return ret;
993 }
994
995 static int handle_pre_create_hook_exit(int status)
996 {
997         int es, ret;
998         static int warn_count;
999
1000         if (!WIFEXITED(status)) {
1001                 snapshot_creation_status = HS_READY;
1002                 ret = -E_INVOLUNTARY_EXIT;
1003                 goto out;
1004         }
1005         es = WEXITSTATUS(status);
1006         if (es) {
1007                 if (!warn_count--) {
1008                         DSS_NOTICE_LOG(("pre_create_hook %s returned %d\n",
1009                                 OPT_STRING_VAL(DSS, PRE_CREATE_HOOK), es));
1010                         DSS_NOTICE_LOG(("deferring snapshot creation...\n"));
1011                         warn_count = 60; /* warn only once per hour */
1012                 }
1013                 next_snapshot_time = get_current_time() + 60;
1014                 snapshot_creation_status = HS_READY;
1015                 ret = 0;
1016                 goto out;
1017         }
1018         warn_count = 0;
1019         snapshot_creation_status = HS_PRE_SUCCESS;
1020         ret = 1;
1021 out:
1022         return ret;
1023 }
1024
1025 static int handle_sigchld(void)
1026 {
1027         pid_t pid;
1028         int status, ret = reap_child(&pid, &status);
1029
1030         if (ret <= 0)
1031                 return ret;
1032
1033         if (pid == create_pid) {
1034                 switch (snapshot_creation_status) {
1035                 case HS_PRE_RUNNING:
1036                         ret = handle_pre_create_hook_exit(status);
1037                         break;
1038                 case HS_RUNNING:
1039                         ret = handle_rsync_exit(status);
1040                         break;
1041                 case HS_POST_RUNNING:
1042                         snapshot_creation_status = HS_READY;
1043                         ret = 1;
1044                         break;
1045                 default:
1046                         DSS_EMERG_LOG(("BUG: create can't die in status %d\n",
1047                                 snapshot_creation_status));
1048                         return -E_BUG;
1049                 }
1050                 create_pid = 0;
1051                 return ret;
1052         }
1053         if (pid == remove_pid) {
1054                 ret = handle_remove_exit(status);
1055                 if (ret < 0)
1056                         return ret;
1057                 return ret;
1058         }
1059         DSS_EMERG_LOG(("BUG: unknown process %d died\n", (int)pid));
1060         return -E_BUG;
1061 }
1062
1063 /* also checks if . is a mountpoint, if --mountpoint was given */
1064 static int change_to_dest_dir(void)
1065 {
1066         int ret;
1067         const char *dd = OPT_STRING_VAL(DSS, DEST_DIR);
1068         struct stat dot, dotdot;
1069
1070         DSS_INFO_LOG(("changing cwd to %s\n", dd));
1071         if (chdir(dd) < 0) {
1072                 ret = -ERRNO_TO_DSS_ERROR(errno);
1073                 DSS_ERROR_LOG(("could not change cwd to %s\n", dd));
1074                 return ret;
1075         }
1076         if (!OPT_GIVEN(DSS, MOUNTPOINT))
1077                 return 0;
1078         if (stat(".", &dot) < 0) {
1079                 ret = -ERRNO_TO_DSS_ERROR(errno);
1080                 DSS_ERROR_LOG(("could not stat .\n"));
1081                 return ret;
1082         }
1083         if (stat("..", &dotdot) < 0) {
1084                 ret = -ERRNO_TO_DSS_ERROR(errno);
1085                 DSS_ERROR_LOG(("could not stat ..\n"));
1086                 return ret;
1087         }
1088         if (dot.st_dev == dotdot.st_dev && dot.st_ino != dotdot.st_ino) {
1089                 DSS_ERROR_LOG(("mountpoint check failed for %s\n", dd));
1090                 return -E_MOUNTPOINT;
1091         }
1092         return 1;
1093 }
1094
1095 static int check_config(void)
1096 {
1097         int ret;
1098         uint32_t unit_interval = OPT_UINT32_VAL(DSS, UNIT_INTERVAL);
1099         uint32_t num_intervals = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
1100
1101         if (unit_interval == 0) {
1102                 DSS_ERROR_LOG(("bad unit interval: %i\n", unit_interval));
1103                 return -E_INVALID_NUMBER;
1104         }
1105         DSS_DEBUG_LOG(("unit interval: %i day(s)\n", unit_interval));
1106
1107         if (num_intervals == 0 || num_intervals > 30) {
1108                 DSS_ERROR_LOG(("bad number of intervals: %i\n", num_intervals));
1109                 return -E_INVALID_NUMBER;
1110         }
1111         if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE))
1112                 if (!OPT_GIVEN(DSS, SOURCE_DIR)) {
1113                         DSS_ERROR_LOG(("--source-dir required\n"));
1114                         return -E_SYNTAX;
1115                 }
1116         if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE)
1117                         || subcmd == CMD_PTR(LS) || subcmd == CMD_PTR(PRUNE)) {
1118                 if (!OPT_GIVEN(DSS, DEST_DIR)) {
1119                         DSS_ERROR_LOG(("--dest-dir required\n"));
1120                         return -E_SYNTAX;
1121                 }
1122                 ret = change_to_dest_dir();
1123                 if (ret < 0)
1124                         return ret;
1125         }
1126         DSS_DEBUG_LOG(("number of intervals: %i\n", num_intervals));
1127         return 1;
1128 }
1129
1130 static int lopsub_error(int lopsub_ret, char **errctx)
1131 {
1132         const char *msg = lls_strerror(-lopsub_ret);
1133         if (*errctx)
1134                 DSS_ERROR_LOG(("%s: %s\n", *errctx, msg));
1135         else
1136                 DSS_ERROR_LOG(("%s\n", msg));
1137         free(*errctx);
1138         *errctx = NULL;
1139         return -E_LOPSUB;
1140 }
1141
1142 static int parse_config_file(bool sighup, const struct lls_command *cmd)
1143 {
1144         int ret, fd = -1;
1145         char *config_file = get_config_file_name();
1146         struct stat statbuf;
1147         void *map;
1148         size_t sz;
1149         int cf_argc;
1150         char **cf_argv, *errctx = NULL;
1151         struct lls_parse_result *cf_lpr, *merged_lpr, *clpr;
1152         const char *subcmd_name;
1153
1154         ret = open(config_file, O_RDONLY);
1155         if (ret < 0) {
1156                 if (errno != ENOENT || OPT_GIVEN(DSS, CONFIG_FILE)) {
1157                         ret = -ERRNO_TO_DSS_ERROR(errno);
1158                         DSS_ERROR_LOG(("config file %s can not be opened\n",
1159                                 config_file));
1160                         goto out;
1161                 }
1162                 /* no config file -- nothing to do */
1163                 ret = 0;
1164                 goto success;
1165         }
1166         fd = ret;
1167         ret = fstat(fd, &statbuf);
1168         if (ret < 0) {
1169                 ret = -ERRNO_TO_DSS_ERROR(errno);
1170                 DSS_ERROR_LOG(("failed to stat config file %s\n", config_file));
1171                 goto close_fd;
1172         }
1173         sz = statbuf.st_size;
1174         if (sz == 0) { /* config file is empty -- nothing to do */
1175                 ret = 0;
1176                 goto success;
1177         }
1178         map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
1179         if (map == MAP_FAILED) {
1180                 ret = -ERRNO_TO_DSS_ERROR(errno);
1181                 DSS_ERROR_LOG(("failed to mmap config file %s\n",
1182                         config_file));
1183                 goto close_fd;
1184         }
1185         if (cmd == CMD_PTR(DSS))
1186                 subcmd_name = NULL;
1187         else
1188                 subcmd_name = lls_command_name(cmd);
1189         ret = lls_convert_config(map, sz, subcmd_name, &cf_argv, &errctx);
1190         munmap(map, sz);
1191         if (ret < 0) {
1192                 DSS_ERROR_LOG(("failed to convert config file %s\n",
1193                         config_file));
1194                 ret = lopsub_error(ret, &errctx);
1195                 goto close_fd;
1196         }
1197         cf_argc = ret;
1198         ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
1199         lls_free_argv(cf_argv);
1200         if (ret < 0) {
1201                 ret = lopsub_error(ret, &errctx);
1202                 goto close_fd;
1203         }
1204         clpr = cmd == CMD_PTR(DSS)? cmdline_lpr : cmdline_sublpr;
1205         if (sighup) /* config file overrides command line */
1206                 ret = lls_merge(cf_lpr, clpr, cmd, &merged_lpr, &errctx);
1207         else /* command line options overrride config file options */
1208                 ret = lls_merge(clpr, cf_lpr, cmd, &merged_lpr, &errctx);
1209         lls_free_parse_result(cf_lpr, cmd);
1210         if (ret < 0) {
1211                 ret = lopsub_error(ret, &errctx);
1212                 goto close_fd;
1213         }
1214         ret = 1;
1215 success:
1216         assert(ret >= 0);
1217         DSS_DEBUG_LOG(("loglevel: %d\n", OPT_UINT32_VAL(DSS, LOGLEVEL)));
1218         if (cmd != CMD_PTR(DSS)) {
1219                 if (ret > 0) {
1220                         if (sublpr != cmdline_sublpr)
1221                                 lls_free_parse_result(sublpr, cmd);
1222                         sublpr = merged_lpr;
1223                 } else
1224                         sublpr = cmdline_sublpr;
1225         } else {
1226                 if (ret > 0) {
1227                         if (lpr != cmdline_lpr)
1228                                 lls_free_parse_result(lpr, cmd);
1229                         lpr = merged_lpr;
1230                 } else
1231                         lpr = cmdline_lpr;
1232         }
1233 close_fd:
1234         if (fd >= 0)
1235                 close(fd);
1236 out:
1237         free(config_file);
1238         if (ret < 0)
1239                 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1240         return ret;
1241 }
1242
1243 static int handle_sighup(void)
1244 {
1245         int ret;
1246
1247         DSS_NOTICE_LOG(("SIGHUP, re-reading config\n"));
1248         dump_dss_config("old");
1249         ret = parse_config_file(true /* SIGHUP */, CMD_PTR(DSS));
1250         if (ret < 0)
1251                 return ret;
1252         ret = parse_config_file(true /* SIGHUP */, CMD_PTR(RUN));
1253         if (ret < 0)
1254                 return ret;
1255         ret = check_config();
1256         if (ret < 0)
1257                 return ret;
1258         close_log(logfile);
1259         logfile = NULL;
1260         if (OPT_GIVEN(RUN, DAEMON) || daemonized) {
1261                 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1262                 log_welcome(OPT_UINT32_VAL(DSS, LOGLEVEL));
1263                 daemonized = true;
1264         }
1265         dump_dss_config("reloaded");
1266         invalidate_next_snapshot_time();
1267         return 1;
1268 }
1269
1270 static void kill_children(void)
1271 {
1272         restart_create_process();
1273         dss_kill(create_pid, SIGTERM, NULL);
1274         dss_kill(remove_pid, SIGTERM, NULL);
1275 }
1276
1277 static int handle_signal(void)
1278 {
1279         int sig, ret = next_signal();
1280
1281         if (ret <= 0)
1282                 goto out;
1283         sig = ret;
1284         switch (sig) {
1285         case SIGINT:
1286         case SIGTERM:
1287                 kill_children();
1288                 ret = -E_SIGNAL;
1289                 break;
1290         case SIGHUP:
1291                 ret = handle_sighup();
1292                 break;
1293         case SIGCHLD:
1294                 ret = handle_sigchld();
1295                 break;
1296         }
1297 out:
1298         if (ret < 0)
1299                 DSS_ERROR_LOG(("%s\n", dss_strerror(-ret)));
1300         return ret;
1301 }
1302
1303 /*
1304  * We can not use rsync locally if the local user is different from the remote
1305  * user or if the src dir is not on the local host (or both).
1306  */
1307 static int use_rsync_locally(char *logname)
1308 {
1309         const char *h = OPT_STRING_VAL(DSS, REMOTE_HOST);
1310
1311         if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
1312                 return 0;
1313         if (OPT_GIVEN(DSS, REMOTE_USER) &&
1314                         strcmp(OPT_STRING_VAL(DSS, REMOTE_USER), logname))
1315                 return 0;
1316         return 1;
1317 }
1318
1319 static int rename_resume_snap(int64_t creation_time)
1320 {
1321         struct snapshot_list sl;
1322         struct snapshot *s = NULL;
1323         char *new_name = incomplete_name(creation_time);
1324         int ret;
1325         const char *why;
1326
1327         sl.num_snapshots = 0;
1328
1329         ret = 0;
1330         dss_get_snapshot_list(&sl);
1331         /*
1332          * Snapshot recycling: We first look at the newest snapshot. If this
1333          * snapshot happens to be incomplete, the last rsync process was
1334          * aborted and we reuse this one. Otherwise we look at snapshots which
1335          * could be removed (outdated and redundant snapshots) as candidates
1336          * for recycling. If no outdated/redundant snapshot exists, we check if
1337          * there is an orphaned snapshot, which likely is useless anyway.
1338          *
1339          * Only if no existing snapshot is suitable for recycling, we bite the
1340          * bullet and create a new one.
1341          */
1342         s = get_newest_snapshot(&sl);
1343         if (!s) /* no snapshots at all */
1344                 goto out;
1345         /* re-use last snapshot if it is incomplete */
1346         why = "aborted";
1347         if ((s->flags & SS_COMPLETE) == 0)
1348                 goto out;
1349         why = "outdated";
1350         s = find_outdated_snapshot(&sl);
1351         if (s)
1352                 goto out;
1353         why = "redundant";
1354         s = find_redundant_snapshot(&sl);
1355         if (s)
1356                 goto out;
1357         why = "orphaned";
1358         s = find_orphaned_snapshot(&sl);
1359 out:
1360         if (s) {
1361                 DSS_NOTICE_LOG(("recycling %s snapshot %s\n", why, s->name));
1362                 ret = dss_rename(s->name, new_name);
1363         }
1364         if (ret >= 0)
1365                 DSS_NOTICE_LOG(("creating %s\n", new_name));
1366         free(new_name);
1367         free_snapshot_list(&sl);
1368         return ret;
1369 }
1370
1371 static void create_rsync_argv(char ***argv, int64_t *num)
1372 {
1373         char *logname;
1374         int i = 0, j, N = OPT_GIVEN(DSS, RSYNC_OPTION);
1375         struct snapshot_list sl;
1376         static bool seeded;
1377
1378         dss_get_snapshot_list(&sl);
1379         assert(!name_of_reference_snapshot);
1380         name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
1381         free_snapshot_list(&sl);
1382
1383         *argv = dss_malloc((15 + N) * sizeof(char *));
1384         (*argv)[i++] = dss_strdup("rsync");
1385         (*argv)[i++] = dss_strdup("-a");
1386         (*argv)[i++] = dss_strdup("--delete");
1387         if (!seeded) {
1388                 srandom((unsigned)time(NULL)); /* no need to be fancy here */
1389                 seeded = true;
1390         }
1391         if (1000 * (random() / (RAND_MAX + 1.0)) < OPT_UINT32_VAL(DSS, CHECKSUM)) {
1392                 DSS_NOTICE_LOG(("adding --checksum to rsync options\n"));
1393                 (*argv)[i++] = dss_strdup("--checksum");
1394         }
1395         for (j = 0; j < N; j++)
1396                 (*argv)[i++] = dss_strdup(lls_string_val(j,
1397                         OPT_RESULT(DSS, RSYNC_OPTION)));
1398         if (name_of_reference_snapshot) {
1399                 DSS_INFO_LOG(("using %s as reference\n", name_of_reference_snapshot));
1400                 (*argv)[i++] = make_message("--link-dest=../%s",
1401                         name_of_reference_snapshot);
1402         } else
1403                 DSS_INFO_LOG(("no suitable reference snapshot found\n"));
1404         logname = dss_logname();
1405         if (use_rsync_locally(logname))
1406                 (*argv)[i++] = dss_strdup(OPT_STRING_VAL(DSS, SOURCE_DIR));
1407         else
1408                 (*argv)[i++] = make_message("%s@%s:%s/",
1409                         OPT_GIVEN(DSS, REMOTE_USER)?
1410                                 OPT_STRING_VAL(DSS, REMOTE_USER) : logname,
1411                         OPT_STRING_VAL(DSS, REMOTE_HOST),
1412                         OPT_STRING_VAL(DSS, SOURCE_DIR));
1413         free(logname);
1414         *num = get_current_time();
1415         (*argv)[i++] = incomplete_name(*num);
1416         (*argv)[i++] = NULL;
1417         for (j = 0; j < i; j++)
1418                 DSS_DEBUG_LOG(("argv[%d] = %s\n", j, (*argv)[j]));
1419 }
1420
1421 static void free_rsync_argv(char **argv)
1422 {
1423         int i;
1424
1425         if (!argv)
1426                 return;
1427         for (i = 0; argv[i]; i++)
1428                 free(argv[i]);
1429         free(argv);
1430 }
1431
1432 static int create_snapshot(char **argv)
1433 {
1434         int ret;
1435
1436         ret = rename_resume_snap(current_snapshot_creation_time);
1437         if (ret < 0)
1438                 return ret;
1439         dss_exec(&create_pid, argv[0], argv);
1440         snapshot_creation_status = HS_RUNNING;
1441         return ret;
1442 }
1443
1444 static int select_loop(void)
1445 {
1446         int ret;
1447         /* check every 60 seconds for free disk space */
1448         struct timeval tv;
1449         char **rsync_argv = NULL;
1450
1451         for (;;) {
1452                 fd_set rfds;
1453                 struct timeval *tvp;
1454
1455                 if (remove_pid)
1456                         tvp = NULL; /* sleep until rm hook/process dies */
1457                 else { /* sleep one minute */
1458                         tv.tv_sec = 60;
1459                         tv.tv_usec = 0;
1460                         tvp = &tv;
1461                 }
1462                 FD_ZERO(&rfds);
1463                 FD_SET(signal_pipe, &rfds);
1464                 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1465                 if (ret < 0)
1466                         goto out;
1467                 if (FD_ISSET(signal_pipe, &rfds)) {
1468                         ret = handle_signal();
1469                         if (ret < 0)
1470                                 goto out;
1471                 }
1472                 if (remove_pid)
1473                         continue;
1474                 if (snapshot_removal_status == HS_PRE_SUCCESS) {
1475                         ret = exec_rm();
1476                         if (ret < 0)
1477                                 goto out;
1478                         continue;
1479                 }
1480                 if (snapshot_removal_status == HS_SUCCESS) {
1481                         post_remove_hook();
1482                         continue;
1483                 }
1484                 ret = try_to_free_disk_space();
1485                 if (ret < 0)
1486                         goto out;
1487                 if (snapshot_removal_status != HS_READY) {
1488                         stop_create_process();
1489                         continue;
1490                 }
1491                 restart_create_process();
1492                 switch (snapshot_creation_status) {
1493                 case HS_READY:
1494                         if (!next_snapshot_is_due())
1495                                 continue;
1496                         pre_create_hook();
1497                         continue;
1498                 case HS_PRE_RUNNING:
1499                 case HS_RUNNING:
1500                 case HS_POST_RUNNING:
1501                         continue;
1502                 case HS_PRE_SUCCESS:
1503                         if (!name_of_reference_snapshot) {
1504                                 free_rsync_argv(rsync_argv);
1505                                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1506                         }
1507                         ret = create_snapshot(rsync_argv);
1508                         if (ret < 0)
1509                                 goto out;
1510                         continue;
1511                 case HS_NEEDS_RESTART:
1512                         if (!next_snapshot_is_due())
1513                                 continue;
1514                         ret = create_snapshot(rsync_argv);
1515                         if (ret < 0)
1516                                 goto out;
1517                         continue;
1518                 case HS_SUCCESS:
1519                         post_create_hook();
1520                         continue;
1521                 }
1522         }
1523 out:
1524         return ret;
1525 }
1526
1527 static void exit_hook(int exit_code)
1528 {
1529         const char *argv[3];
1530         pid_t pid;
1531
1532         argv[0] = OPT_STRING_VAL(DSS, EXIT_HOOK);
1533         argv[1] = dss_strerror(-exit_code);
1534         argv[2] = NULL;
1535
1536         DSS_NOTICE_LOG(("executing %s %s\n", argv[0], argv[1]));
1537         dss_exec(&pid, argv[0], (char **)argv);
1538 }
1539
1540 static void lock_dss_or_die(void)
1541 {
1542         char *config_file = get_config_file_name();
1543         int ret = lock_dss(config_file);
1544
1545         free(config_file);
1546         if (ret < 0) {
1547                 DSS_EMERG_LOG(("failed to lock: %s\n", dss_strerror(-ret)));
1548                 exit(EXIT_FAILURE);
1549         }
1550 }
1551
1552 static int com_run(void)
1553 {
1554         int ret, fd = -1;
1555         char *config_file;
1556         pid_t pid;
1557
1558         if (OPT_GIVEN(DSS, DRY_RUN)) {
1559                 DSS_ERROR_LOG(("dry run not supported by this command\n"));
1560                 return -E_SYNTAX;
1561         }
1562         config_file = get_config_file_name();
1563         ret = get_dss_pid(config_file, &pid);
1564         free(config_file);
1565         if (ret >= 0) {
1566                 DSS_ERROR_LOG(("pid %d\n", (int)pid));
1567                 return -E_ALREADY_RUNNING;
1568         }
1569         if (OPT_GIVEN(RUN, DAEMON)) {
1570                 fd = daemon_init();
1571                 daemonized = true;
1572                 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1573         }
1574         lock_dss_or_die();
1575         dump_dss_config("startup");
1576         ret = install_sighandler(SIGHUP);
1577         if (ret < 0)
1578                 return ret;
1579         if (fd >= 0) {
1580                 ret = write(fd, "\0", 1);
1581                 if (ret != 1) {
1582                         DSS_ERROR_LOG(("write to daemon pipe returned %d\n",
1583                                 ret));
1584                         if (ret < 0)
1585                                 return -ERRNO_TO_DSS_ERROR(errno);
1586                         return -E_BUG;
1587                 }
1588         }
1589         ret = select_loop();
1590         if (ret >= 0) /* impossible */
1591                 ret = -E_BUG;
1592         kill_children();
1593         exit_hook(ret);
1594         return ret;
1595 }
1596 EXPORT_CMD_HANDLER(run);
1597
1598 static int com_prune(void)
1599 {
1600         int ret;
1601         struct snapshot_list sl;
1602         struct snapshot *victim;
1603         struct disk_space ds;
1604         const char *why;
1605
1606         lock_dss_or_die();
1607         ret = get_disk_space(".", &ds);
1608         if (ret < 0)
1609                 return ret;
1610         log_disk_space(&ds);
1611         dss_get_snapshot_list(&sl);
1612         why = "outdated";
1613         victim = find_outdated_snapshot(&sl);
1614         if (victim)
1615                 goto rm;
1616         why = "redundant";
1617         victim = find_redundant_snapshot(&sl);
1618         if (victim)
1619                 goto rm;
1620         ret = 0;
1621         goto out;
1622 rm:
1623         if (OPT_GIVEN(DSS, DRY_RUN)) {
1624                 dss_msg("%s snapshot %s (interval = %i)\n",
1625                         why, victim->name, victim->interval);
1626                 ret = 0;
1627                 goto out;
1628         }
1629         pre_remove_hook(victim, why);
1630         if (snapshot_removal_status == HS_PRE_RUNNING) {
1631                 ret = wait_for_remove_process();
1632                 if (ret < 0)
1633                         goto out;
1634                 if (snapshot_removal_status != HS_PRE_SUCCESS)
1635                         goto out;
1636         }
1637         ret = exec_rm();
1638         if (ret < 0)
1639                 goto out;
1640         ret = wait_for_remove_process();
1641         if (ret < 0)
1642                 goto out;
1643         if (snapshot_removal_status != HS_SUCCESS)
1644                 goto out;
1645         post_remove_hook();
1646         if (snapshot_removal_status != HS_POST_RUNNING)
1647                 goto out;
1648         ret = wait_for_remove_process();
1649         if (ret < 0)
1650                 goto out;
1651         ret = 1;
1652 out:
1653         free_snapshot_list(&sl);
1654         return ret;
1655 }
1656 EXPORT_CMD_HANDLER(prune);
1657
1658 static int com_create(void)
1659 {
1660         int ret, status;
1661         char **rsync_argv;
1662
1663         lock_dss_or_die();
1664         if (OPT_GIVEN(DSS, DRY_RUN)) {
1665                 int i;
1666                 char *msg = NULL;
1667                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1668                 for (i = 0; rsync_argv[i]; i++) {
1669                         char *tmp = msg;
1670                         msg = make_message("%s%s%s", tmp? tmp : "",
1671                                 tmp? " " : "", rsync_argv[i]);
1672                         free(tmp);
1673                 }
1674                 free_rsync_argv(rsync_argv);
1675                 dss_msg("%s\n", msg);
1676                 free(msg);
1677                 return 1;
1678         }
1679         pre_create_hook();
1680         if (create_pid) {
1681                 ret = wait_for_process(create_pid, &status);
1682                 if (ret < 0)
1683                         return ret;
1684                 ret = handle_pre_create_hook_exit(status);
1685                 if (ret <= 0) /* error, or pre-create failed */
1686                         return ret;
1687         }
1688         create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1689         ret = create_snapshot(rsync_argv);
1690         if (ret < 0)
1691                 goto out;
1692         ret = wait_for_process(create_pid, &status);
1693         if (ret < 0)
1694                 goto out;
1695         ret = handle_rsync_exit(status);
1696         if (ret < 0)
1697                 goto out;
1698         post_create_hook();
1699         if (create_pid)
1700                 ret = wait_for_process(create_pid, &status);
1701 out:
1702         free_rsync_argv(rsync_argv);
1703         return ret;
1704 }
1705 EXPORT_CMD_HANDLER(create);
1706
1707 static int com_ls(void)
1708 {
1709         int i;
1710         struct snapshot_list sl;
1711         struct snapshot *s;
1712         int64_t now = get_current_time();
1713
1714         dss_get_snapshot_list(&sl);
1715         FOR_EACH_SNAPSHOT(s, i, &sl) {
1716                 int64_t d;
1717                 if (s->flags & SS_COMPLETE)
1718                         d = (s->completion_time - s->creation_time) / 60;
1719                 else
1720                         d = (now - s->creation_time) / 60;
1721                 dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval,
1722                         s->name, d / 60, d % 60);
1723         }
1724         free_snapshot_list(&sl);
1725         return 1;
1726 }
1727 EXPORT_CMD_HANDLER(ls);
1728
1729 static int com_configtest(void)
1730 {
1731         printf("Syntax Ok\n");
1732         return 0;
1733 }
1734 EXPORT_CMD_HANDLER(configtest);
1735
1736 static int setup_signal_handling(void)
1737 {
1738         int ret;
1739
1740         DSS_INFO_LOG(("setting up signal handlers\n"));
1741         signal_pipe = signal_init(); /* always successful */
1742         ret = install_sighandler(SIGINT);
1743         if (ret < 0)
1744                 return ret;
1745         ret = install_sighandler(SIGTERM);
1746         if (ret < 0)
1747                 return ret;
1748         return install_sighandler(SIGCHLD);
1749 }
1750
1751 static void handle_version_and_help(void)
1752 {
1753         char *txt;
1754
1755         if (OPT_GIVEN(DSS, DETAILED_HELP))
1756                 txt = lls_long_help(CMD_PTR(DSS));
1757         else if (OPT_GIVEN(DSS, HELP))
1758                 txt = lls_short_help(CMD_PTR(DSS));
1759         else if (OPT_GIVEN(DSS, VERSION))
1760                 txt = dss_strdup(VERSION_STRING);
1761         else
1762                 return;
1763         printf("%s", txt);
1764         free(txt);
1765         exit(EXIT_SUCCESS);
1766 }
1767
1768 static void show_subcommand_summary(void)
1769 {
1770         const struct lls_command *cmd;
1771         int i;
1772
1773         printf("Available subcommands:\n");
1774         for (i = 1; (cmd = lls_cmd(i, dss_suite)); i++) {
1775                 const char *name = lls_command_name(cmd);
1776                 const char *purpose = lls_purpose(cmd);
1777                 printf("%-11s%s\n", name, purpose);
1778         }
1779         exit(EXIT_SUCCESS);
1780 }
1781
1782 int main(int argc, char **argv)
1783 {
1784         int ret;
1785         char *errctx = NULL;
1786         unsigned num_inputs;
1787         const struct dss_user_data *ud;
1788
1789         ret = lls_parse(argc, argv, CMD_PTR(DSS), &cmdline_lpr, &errctx);
1790         if (ret < 0) {
1791                 ret = lopsub_error(ret, &errctx);
1792                 goto out;
1793         }
1794         lpr = cmdline_lpr;
1795         ret = parse_config_file(false /* no SIGHUP */, CMD_PTR(DSS));
1796         if (ret < 0)
1797                 goto out;
1798         handle_version_and_help();
1799         num_inputs = lls_num_inputs(lpr);
1800         if (num_inputs == 0)
1801                 show_subcommand_summary();
1802         ret = lls_lookup_subcmd(argv[argc - num_inputs], dss_suite, &errctx);
1803         if (ret < 0) {
1804                 ret = lopsub_error(ret, &errctx);
1805                 goto out;
1806         }
1807         subcmd = lls_cmd(ret, dss_suite);
1808         ret = lls_parse(num_inputs, argv + argc - num_inputs, subcmd,
1809                 &cmdline_sublpr, &errctx);
1810         if (ret < 0) {
1811                 ret = lopsub_error(ret, &errctx);
1812                 goto out;
1813         }
1814         sublpr = cmdline_sublpr;
1815         ret = parse_config_file(false /* no SIGHUP */, subcmd);
1816         if (ret < 0)
1817                 goto out;
1818         ret = check_config();
1819         if (ret < 0)
1820                 goto out;
1821         ret = setup_signal_handling();
1822         if (ret < 0)
1823                 goto out;
1824         ud = lls_user_data(subcmd);
1825         ret = ud->handler();
1826         signal_shutdown();
1827 out:
1828         if (ret < 0) {
1829                 if (errctx)
1830                         DSS_ERROR_LOG(("%s\n", errctx));
1831                 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1832         }
1833         free(errctx);
1834         lls_free_parse_result(lpr, CMD_PTR(DSS));
1835         if (lpr != cmdline_lpr)
1836                 lls_free_parse_result(cmdline_lpr, CMD_PTR(DSS));
1837         lls_free_parse_result(sublpr, subcmd);
1838         if (sublpr != cmdline_sublpr)
1839                 lls_free_parse_result(cmdline_sublpr, subcmd);
1840         exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1841 }