Subcommand sensitive logging.
[dss.git] / dss.c
1 /*
2  * Copyright (C) 2008-2011 Andre Noll <maan@tuebingen.mpg.de>
3  *
4  * Licensed under the GPL v2. For licencing details see COPYING.
5  */
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <signal.h>
14 #include <ctype.h>
15 #include <stdbool.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <inttypes.h>
19 #include <sys/time.h>
20 #include <time.h>
21 #include <sys/wait.h>
22 #include <fnmatch.h>
23 #include <limits.h>
24 #include <fcntl.h>
25 #include <lopsub.h>
26 #include <sys/mman.h>
27
28 #include "gcc-compat.h"
29 #include "log.h"
30 #include "str.h"
31 #include "err.h"
32 #include "file.h"
33 #include "exec.h"
34 #include "daemon.h"
35 #include "sig.h"
36 #include "df.h"
37 #include "tv.h"
38 #include "snap.h"
39 #include "ipc.h"
40 #include "dss.lsg.h"
41
42 #define CMD_PTR(_cname) lls_cmd(LSG_DSS_CMD_ ## _cname, dss_suite)
43 #define OPT_RESULT(_cname, _oname) (lls_opt_result(\
44         LSG_DSS_ ## _cname ## _OPT_ ## _oname, (CMD_PTR(_cname) == CMD_PTR(DSS))? lpr : sublpr))
45 #define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
46 #define OPT_STRING_VAL(_cname, _oname) (lls_string_val(0, \
47         OPT_RESULT(_cname, _oname)))
48 #define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
49                 OPT_RESULT(_cname, _oname)))
50
51 struct dss_user_data {int (*handler)(void);};
52 #define EXPORT_CMD_HANDLER(_cmd) const struct dss_user_data \
53         lsg_dss_com_ ## _cmd ## _user_data = { \
54                 .handler = com_ ## _cmd \
55         };
56
57 /*
58  * Command line and active options. We need to keep a copy of the parsed
59  * command line options for the SIGHUP case where we merge the command line
60  * options and the new config file options.
61  */
62 static struct lls_parse_result *cmdline_lpr, *lpr;
63
64 /** Parsed subcommand options. */
65 static struct lls_parse_result *cmdline_sublpr, *sublpr;
66 /* The executing subcommand (NULL at startup). */
67 static const struct lls_command *subcmd;
68 /** Wether daemon_init() was called. */
69 static bool daemonized;
70 /** Non-NULL if we log to a file. */
71 static FILE *logfile;
72 /** The read end of the signal pipe */
73 static int signal_pipe;
74 /** Process id of current pre-create-hook/rsync/post-create-hook process. */
75 static pid_t create_pid;
76 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
77 static int create_process_stopped;
78 /** How many times in a row the rsync command failed. */
79 static int num_consecutive_rsync_errors;
80 /** Process id of current pre-remove/rm/post-remove process. */
81 static pid_t remove_pid;
82 /** When the next snapshot is due. */
83 static int64_t next_snapshot_time;
84 /** When to try to remove something. */
85 static struct timeval next_removal_check;
86 /** Creation time of the snapshot currently being created. */
87 static int64_t current_snapshot_creation_time;
88 /** The snapshot currently being removed. */
89 struct snapshot *snapshot_currently_being_removed;
90 /** Needed by the post-create hook. */
91 static char *path_to_last_complete_snapshot;
92 static char *name_of_reference_snapshot;
93 /** \sa \ref snap.h for details. */
94 enum hook_status snapshot_creation_status;
95 /** \sa \ref snap.h for details. */
96 enum hook_status snapshot_removal_status;
97
98
99 DEFINE_DSS_ERRLIST;
100 static const char *hook_status_description[] = {HOOK_STATUS_ARRAY};
101
102 /* may be called with ds == NULL. */
103 static int disk_space_low(struct disk_space *ds)
104 {
105         struct disk_space ds_struct;
106         uint32_t val;
107
108         if (!ds) {
109                 int ret = get_disk_space(".", &ds_struct);
110                 if (ret < 0)
111                         return ret;
112                 ds = &ds_struct;
113         }
114         val = OPT_UINT32_VAL(DSS, MIN_FREE_MB);
115         if (val != 0)
116                 if (ds->free_mb < val)
117                         return 1;
118         val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT);
119         if (val != 0)
120                 if (ds->percent_free < val)
121                         return 1;
122         val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT_INODES);
123         if (val != 0)
124                 if (ds->percent_free_inodes < val)
125                         return 1;
126         return 0;
127 }
128
129 static void dump_dss_config(const char *msg)
130 {
131         const char dash[] = "-----------------------------";
132         char *lopsub_dump;
133         int ret;
134         FILE *log = logfile? logfile : stderr;
135         struct disk_space ds;
136         int64_t now = get_current_time();
137
138         if (OPT_UINT32_VAL(DSS, LOGLEVEL) > INFO)
139                 return;
140
141         fprintf(log, "%s <%s config> %s\n", dash, msg, dash);
142         fprintf(log, "\n*** disk space ***\n\n");
143         ret = get_disk_space(".", &ds);
144         if (ret >= 0) {
145                 DSS_INFO_LOG(("disk space low: %s\n", disk_space_low(&ds)?
146                         "yes" : "no"));
147                 log_disk_space(&ds);
148         } else
149                 DSS_ERROR_LOG(("can not get free disk space: %s\n",
150                         dss_strerror(-ret)));
151
152         /* we continue on errors from get_disk_space */
153
154         fprintf(log, "\n*** non-default options ***\n\n");
155         lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(DSS), true);
156         fprintf(log, "%s", lopsub_dump);
157         free(lopsub_dump);
158         fprintf(log, "\n*** non-default options for \"run\" ***\n\n");
159         lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(RUN), true);
160         fprintf(log, "%s", lopsub_dump);
161         free(lopsub_dump);
162         fprintf(log, "\n*** internal state ***\n\n");
163         fprintf(log,
164                 "pid: %d\n"
165                 "logile: %s\n"
166                 "snapshot_currently_being_removed: %s\n"
167                 "path_to_last_complete_snapshot: %s\n"
168                 "reference_snapshot: %s\n"
169                 "snapshot_creation_status: %s\n"
170                 "snapshot_removal_status: %s\n"
171                 "num_consecutive_rsync_errors: %d\n"
172                 ,
173                 (int) getpid(),
174                 logfile? OPT_STRING_VAL(RUN, LOGFILE) : "stderr",
175                 snapshot_currently_being_removed?
176                         snapshot_currently_being_removed->name : "(none)",
177                 path_to_last_complete_snapshot?
178                         path_to_last_complete_snapshot : "(none)",
179                 name_of_reference_snapshot?
180                         name_of_reference_snapshot : "(none)",
181                 hook_status_description[snapshot_creation_status],
182                 hook_status_description[snapshot_removal_status],
183                 num_consecutive_rsync_errors
184         );
185         if (create_pid != 0)
186                 fprintf(log,
187                         "create_pid: %" PRId32 "\n"
188                         "create process is %sstopped\n"
189                         ,
190                         create_pid,
191                         create_process_stopped? "" : "not "
192                 );
193         if (remove_pid != 0)
194                 fprintf(log, "remove_pid: %" PRId32 "\n", remove_pid);
195         if (next_snapshot_time != 0)
196                 fprintf(log, "next snapshot due in %" PRId64 " seconds\n",
197                         next_snapshot_time - now);
198         if (current_snapshot_creation_time != 0)
199                 fprintf(log, "current_snapshot_creation_time: %"
200                         PRId64 " (%" PRId64 " seconds ago)\n",
201                         current_snapshot_creation_time,
202                         now - current_snapshot_creation_time
203                 );
204         if (next_removal_check.tv_sec != 0) {
205                 fprintf(log, "next removal check: %llu (%llu seconds ago)\n",
206                         (long long unsigned)next_removal_check.tv_sec,
207                         now - (long long unsigned)next_removal_check.tv_sec
208                 );
209
210         }
211         fprintf(log, "%s </%s config> %s\n", dash, msg, dash);
212 }
213
214 static int loglevel = -1;
215 static const char *location_file = NULL;
216 static int         location_line = -1;
217 static const char *location_func = NULL;
218
219 void dss_log_set_params(int ll, const char *file, int line, const char *func)
220 {
221         loglevel = ll;
222         location_file = file;
223         location_line = line;
224         location_func = func;
225 }
226
227 /**
228  * The log function of dss.
229  *
230  * \param ll Loglevel.
231  * \param fml Usual format string.
232  *
233  * All DSS_XXX_LOG() macros use this function.
234  */
235 __printf_1_2 void dss_log(const char* fmt,...)
236 {
237         va_list argp;
238         FILE *outfd;
239         struct tm *tm;
240         time_t t1;
241         char str[255] = "";
242         int lpr_ll = lpr? OPT_UINT32_VAL(DSS, LOGLEVEL) : WARNING;
243
244         if (loglevel < lpr_ll)
245                 return;
246         outfd = logfile? logfile : stderr;
247         if (subcmd == CMD_PTR(RUN)) {
248                 time(&t1);
249                 tm = localtime(&t1);
250                 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
251                 fprintf(outfd, "%s ", str);
252                 if (lpr_ll <= INFO)
253                         fprintf(outfd, "%i: ", loglevel);
254         }
255         if (subcmd == CMD_PTR(RUN))
256 #ifdef DSS_NO_FUNC_NAMES
257                 fprintf(outfd, "%s:%d: ", location_file, location_line);
258 #else
259                 fprintf(outfd, "%s: ", location_func);
260 #endif
261         va_start(argp, fmt);
262         vfprintf(outfd, fmt, argp);
263         va_end(argp);
264 }
265
266 /**
267  * Print a message either to stdout or to the log file.
268  */
269 static __printf_1_2 void dss_msg(const char* fmt,...)
270 {
271         FILE *outfd = logfile? logfile : stdout;
272         va_list argp;
273         va_start(argp, fmt);
274         vfprintf(outfd, fmt, argp);
275         va_end(argp);
276 }
277
278 static char *get_config_file_name(void)
279 {
280         char *home, *config_file;
281
282         if (OPT_GIVEN(DSS, CONFIG_FILE))
283                 return dss_strdup(OPT_STRING_VAL(DSS, CONFIG_FILE));
284         home = get_homedir();
285         config_file = make_message("%s/.dssrc", home);
286         free(home);
287         return config_file;
288 }
289
290 static int send_signal(int sig)
291 {
292         pid_t pid;
293         char *config_file = get_config_file_name();
294         int ret = get_dss_pid(config_file, &pid);
295
296         free(config_file);
297         if (ret < 0)
298                 return ret;
299         if (OPT_GIVEN(DSS, DRY_RUN)) {
300                 dss_msg("%d\n", (int)pid);
301                 return 0;
302         }
303         ret = kill(pid, sig);
304         if (ret < 0)
305                 return -ERRNO_TO_DSS_ERROR(errno);
306         return 1;
307 }
308
309 struct signal_info {
310         const char * const name;
311         int num;
312 };
313
314 /*
315  * The table below was taken 2016 from proc/sig.c of procps-3.2.8. Copyright
316  * 1998-2003 by Albert Cahalan, GPLv2.
317  */
318 static const struct signal_info signal_table[] = {
319         {"ABRT",   SIGABRT},  /* IOT */
320         {"ALRM",   SIGALRM},
321         {"BUS",    SIGBUS},
322         {"CHLD",   SIGCHLD},  /* CLD */
323         {"CONT",   SIGCONT},
324         {"FPE",    SIGFPE},
325         {"HUP",    SIGHUP},
326         {"ILL",    SIGILL},
327         {"INT",    SIGINT},
328         {"KILL",   SIGKILL},
329         {"PIPE",   SIGPIPE},
330 #ifdef SIGPOLL
331         {"POLL",   SIGPOLL},  /* IO */
332 #endif
333         {"PROF",   SIGPROF},
334 #ifdef SIGPWR
335         {"PWR",    SIGPWR},
336 #endif
337         {"QUIT",   SIGQUIT},
338         {"SEGV",   SIGSEGV},
339 #ifdef SIGSTKFLT
340         {"STKFLT", SIGSTKFLT},
341 #endif
342         {"STOP",   SIGSTOP},
343         {"SYS",    SIGSYS},   /* UNUSED */
344         {"TERM",   SIGTERM},
345         {"TRAP",   SIGTRAP},
346         {"TSTP",   SIGTSTP},
347         {"TTIN",   SIGTTIN},
348         {"TTOU",   SIGTTOU},
349         {"URG",    SIGURG},
350         {"USR1",   SIGUSR1},
351         {"USR2",   SIGUSR2},
352         {"VTALRM", SIGVTALRM},
353         {"WINCH",  SIGWINCH},
354         {"XCPU",   SIGXCPU},
355         {"XFSZ",   SIGXFSZ}
356 };
357
358 #define SIGNAL_TABLE_SIZE (sizeof(signal_table) / sizeof(signal_table[0]))
359 #ifndef SIGRTMAX
360 #define SIGRTMAX 64
361 #endif
362
363 static int com_kill(void)
364 {
365         const char *arg = OPT_STRING_VAL(KILL, SIGNAL);
366         int ret, i;
367
368         if (*arg >= '0' && *arg <= '9') {
369                 int64_t val;
370                 ret = dss_atoi64(arg, &val);
371                 if (ret < 0)
372                         return ret;
373                 if (val < 0 || val > SIGRTMAX)
374                         return -ERRNO_TO_DSS_ERROR(EINVAL);
375                 return send_signal(val);
376         }
377         if (strncasecmp(arg, "sig", 3) == 0)
378                 arg += 3;
379         if (strcasecmp(arg, "CLD") == 0)
380                 return send_signal(SIGCHLD);
381         if (strcasecmp(arg, "IOT") == 0)
382                 return send_signal(SIGABRT);
383         for (i = 0; i < SIGNAL_TABLE_SIZE; i++)
384                 if (strcasecmp(arg, signal_table[i].name) == 0)
385                         return send_signal(signal_table[i].num);
386         DSS_ERROR_LOG(("invalid sigspec: %s\n", arg));
387         return -ERRNO_TO_DSS_ERROR(EINVAL);
388 }
389 EXPORT_CMD_HANDLER(kill);
390
391 static void dss_get_snapshot_list(struct snapshot_list *sl)
392 {
393         get_snapshot_list(sl, OPT_UINT32_VAL(DSS, UNIT_INTERVAL),
394                 OPT_UINT32_VAL(DSS, NUM_INTERVALS));
395 }
396
397 static int64_t compute_next_snapshot_time(void)
398 {
399         int64_t x = 0, now = get_current_time(), unit_interval
400                 = 24 * 3600 * OPT_UINT32_VAL(DSS, UNIT_INTERVAL), ret;
401         unsigned wanted = desired_number_of_snapshots(0,
402                 OPT_UINT32_VAL(DSS, NUM_INTERVALS)),
403                 num_complete = 0;
404         int i;
405         struct snapshot *s = NULL;
406         struct snapshot_list sl;
407
408         dss_get_snapshot_list(&sl);
409         FOR_EACH_SNAPSHOT(s, i, &sl) {
410                 if (!(s->flags & SS_COMPLETE))
411                         continue;
412                 num_complete++;
413                 x += s->completion_time - s->creation_time;
414         }
415         assert(x >= 0);
416
417         ret = now;
418         if (num_complete == 0)
419                 goto out;
420         x /= num_complete; /* avg time to create one snapshot */
421         if (unit_interval < x * wanted) /* oops, no sleep at all */
422                 goto out;
423         ret = s->completion_time + unit_interval / wanted - x;
424 out:
425         free_snapshot_list(&sl);
426         return ret;
427 }
428
429 static inline void invalidate_next_snapshot_time(void)
430 {
431         next_snapshot_time = 0;
432 }
433
434 static inline int next_snapshot_time_is_valid(void)
435 {
436         return next_snapshot_time != 0;
437 }
438
439 static int next_snapshot_is_due(void)
440 {
441         int64_t now = get_current_time();
442
443         if (!next_snapshot_time_is_valid())
444                 next_snapshot_time = compute_next_snapshot_time();
445         if (next_snapshot_time <= now) {
446                 DSS_DEBUG_LOG(("next snapshot: now\n"));
447                 return 1;
448         }
449         DSS_DEBUG_LOG(("next snapshot due in %" PRId64 " seconds\n",
450                 next_snapshot_time - now));
451         return 0;
452 }
453
454 static void pre_create_hook(void)
455 {
456         assert(snapshot_creation_status == HS_READY);
457         /* make sure that the next snapshot time will be recomputed */
458         invalidate_next_snapshot_time();
459         DSS_DEBUG_LOG(("executing %s\n", OPT_STRING_VAL(DSS, PRE_CREATE_HOOK)));
460         dss_exec_cmdline_pid(&create_pid, OPT_STRING_VAL(DSS, PRE_CREATE_HOOK));
461         snapshot_creation_status = HS_PRE_RUNNING;
462 }
463
464 static void pre_remove_hook(struct snapshot *s, const char *why)
465 {
466         char *cmd;
467
468         if (!s)
469                 return;
470         DSS_DEBUG_LOG(("%s snapshot %s\n", why, s->name));
471         assert(snapshot_removal_status == HS_READY);
472         assert(remove_pid == 0);
473         assert(!snapshot_currently_being_removed);
474
475         snapshot_currently_being_removed = dss_malloc(sizeof(struct snapshot));
476         *snapshot_currently_being_removed = *s;
477         snapshot_currently_being_removed->name = dss_strdup(s->name);
478
479         cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, PRE_REMOVE_HOOK),
480                 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
481         DSS_DEBUG_LOG(("executing %s\n", cmd));
482         dss_exec_cmdline_pid(&remove_pid, cmd);
483         free(cmd);
484         snapshot_removal_status = HS_PRE_RUNNING;
485 }
486
487 static int exec_rm(void)
488 {
489         struct snapshot *s = snapshot_currently_being_removed;
490         char *new_name = being_deleted_name(s);
491         char *argv[4];
492         int ret;
493
494         argv[0] = "rm";
495         argv[1] = "-rf";
496         argv[2] = new_name;
497         argv[3] = NULL;
498
499         assert(snapshot_removal_status == HS_PRE_SUCCESS);
500         assert(remove_pid == 0);
501
502         DSS_NOTICE_LOG(("removing %s (interval = %i)\n", s->name, s->interval));
503         ret = dss_rename(s->name, new_name);
504         if (ret < 0)
505                 goto out;
506         dss_exec(&remove_pid, argv[0], argv);
507         snapshot_removal_status = HS_RUNNING;
508 out:
509         free(new_name);
510         return ret;
511 }
512
513 static int snapshot_is_being_created(struct snapshot *s)
514 {
515         return s->creation_time == current_snapshot_creation_time;
516 }
517
518 static struct snapshot *find_orphaned_snapshot(struct snapshot_list *sl)
519 {
520         struct snapshot *s;
521         int i;
522
523         DSS_DEBUG_LOG(("looking for orphaned snapshots\n"));
524         FOR_EACH_SNAPSHOT(s, i, sl) {
525                 if (snapshot_is_being_created(s))
526                         continue;
527                 /*
528                  * We know that no rm is currently running, so if s is marked
529                  * as being deleted, a previously started rm must have failed.
530                  */
531                 if (s->flags & SS_BEING_DELETED)
532                         return s;
533
534                 if (s->flags & SS_COMPLETE) /* good snapshot */
535                         continue;
536                 /*
537                  * This snapshot is incomplete and it is not the snapshot
538                  * currently being created. However, we must not remove it if
539                  * rsync is about to be restarted. As only the newest snapshot
540                  * can be restarted, this snapshot is orphaned if it is not the
541                  * newest snapshot or if we are not about to restart rsync.
542                  */
543                 if (get_newest_snapshot(sl) != s)
544                         return s;
545                 if (snapshot_creation_status != HS_NEEDS_RESTART)
546                         return s;
547         }
548         /* no orphaned snapshots */
549         return NULL;
550 }
551
552 static int is_reference_snapshot(struct snapshot *s)
553 {
554         if (!name_of_reference_snapshot)
555                 return 0;
556         return strcmp(s->name, name_of_reference_snapshot)? 0 : 1;
557 }
558
559 /*
560  * return: 0: no redundant snapshots, 1: rm process started, negative: error
561  */
562 static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl)
563 {
564         int i, interval;
565         struct snapshot *s;
566         unsigned missing = 0;
567         uint32_t N = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
568
569         DSS_DEBUG_LOG(("looking for intervals containing too many snapshots\n"));
570         for (interval = N - 1; interval >= 0; interval--) {
571                 unsigned keep = desired_number_of_snapshots(interval, N);
572                 unsigned num = sl->interval_count[interval];
573                 struct snapshot *victim = NULL, *prev = NULL;
574                 int64_t score = LONG_MAX;
575
576                 if (keep >= num)
577                         missing += keep - num;
578                 if (keep + missing >= num)
579                         continue;
580                 /* redundant snapshot in this interval, pick snapshot with lowest score */
581                 FOR_EACH_SNAPSHOT(s, i, sl) {
582                         int64_t this_score;
583
584                         if (snapshot_is_being_created(s))
585                                 continue;
586                         if (is_reference_snapshot(s))
587                                 continue;
588                         if (s->interval > interval) {
589                                 prev = s;
590                                 continue;
591                         }
592                         if (s->interval < interval)
593                                 break;
594                         if (!victim) {
595                                 victim = s;
596                                 prev = s;
597                                 continue;
598                         }
599                         assert(prev);
600                         /* check if s is a better victim */
601                         this_score = s->creation_time - prev->creation_time;
602                         assert(this_score >= 0);
603                         if (this_score < score) {
604                                 score = this_score;
605                                 victim = s;
606                         }
607                         prev = s;
608                 }
609                 assert(victim);
610                 return victim;
611         }
612         return NULL;
613 }
614
615 static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl)
616 {
617         int i;
618         struct snapshot *s;
619
620         DSS_DEBUG_LOG(("looking for snapshots belonging to intervals >= %d\n",
621                 OPT_UINT32_VAL(DSS, NUM_INTERVALS)));
622         FOR_EACH_SNAPSHOT(s, i, sl) {
623                 if (snapshot_is_being_created(s))
624                         continue;
625                 if (is_reference_snapshot(s))
626                         continue;
627                 if (s->interval < OPT_UINT32_VAL(DSS, NUM_INTERVALS))
628                         continue;
629                 return s;
630         }
631         return NULL;
632 }
633
634 static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
635 {
636         int i, num_complete;
637         struct snapshot *s, *ref = NULL;
638
639         num_complete = num_complete_snapshots(sl);
640         if (num_complete <= OPT_UINT32_VAL(DSS, MIN_COMPLETE))
641                 return NULL;
642         FOR_EACH_SNAPSHOT(s, i, sl) {
643                 if (snapshot_is_being_created(s))
644                         continue;
645                 if (is_reference_snapshot(s)) { /* avoid this one */
646                         ref = s;
647                         continue;
648                 }
649                 DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name));
650                 return s;
651         }
652         assert(ref);
653         DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name));
654         return ref;
655 }
656
657 static int rename_incomplete_snapshot(int64_t start)
658 {
659         char *old_name;
660         int ret;
661         int64_t now;
662
663         /*
664          * We don't want the dss_rename() below to fail with EEXIST because the
665          * last complete snapshot was created (and completed) in the same
666          * second as this one.
667          */
668         while ((now = get_current_time()) == start)
669                 sleep(1);
670         free(path_to_last_complete_snapshot);
671         ret = complete_name(start, now, &path_to_last_complete_snapshot);
672         if (ret < 0)
673                 return ret;
674         old_name = incomplete_name(start);
675         ret = dss_rename(old_name, path_to_last_complete_snapshot);
676         if (ret >= 0)
677                 DSS_NOTICE_LOG(("%s -> %s\n", old_name,
678                         path_to_last_complete_snapshot));
679         free(old_name);
680         return ret;
681 }
682
683 static int try_to_free_disk_space(void)
684 {
685         int ret;
686         struct snapshot_list sl;
687         struct snapshot *victim;
688         struct timeval now;
689         const char *why;
690         int low_disk_space;
691
692         ret = disk_space_low(NULL);
693         if (ret < 0)
694                 return ret;
695         low_disk_space = ret;
696         gettimeofday(&now, NULL);
697         if (tv_diff(&next_removal_check, &now, NULL) > 0)
698                 return 0;
699         if (!low_disk_space) {
700                 if (OPT_GIVEN(DSS, KEEP_REDUNDANT))
701                         return 0;
702                 if (snapshot_creation_status != HS_READY)
703                         return 0;
704                 if (next_snapshot_is_due())
705                         return 0;
706         }
707         /*
708          * Idle and --keep_redundant not given, or low disk space. Look at
709          * existing snapshots.
710          */
711         dss_get_snapshot_list(&sl);
712         ret = 0;
713         /*
714          * Don't remove anything if there is free space and we have fewer
715          * snapshots than configured, plus one. This way there is always one
716          * snapshot that can be recycled.
717          */
718         if (!low_disk_space && sl.num_snapshots <=
719                         1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS))
720                 goto out;
721         why = "outdated";
722         victim = find_outdated_snapshot(&sl);
723         if (victim)
724                 goto remove;
725         why = "redundant";
726         victim = find_redundant_snapshot(&sl);
727         if (victim)
728                 goto remove;
729         why = "orphaned";
730         victim = find_orphaned_snapshot(&sl);
731         if (victim)
732                 goto remove;
733         /* try harder only if disk space is low */
734         if (!low_disk_space)
735                 goto out;
736         DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n"));
737         victim = find_oldest_removable_snapshot(&sl);
738         if (victim)
739                 goto remove;
740         DSS_CRIT_LOG(("uhuhu: disk space low and nothing to remove\n"));
741         ret = -ERRNO_TO_DSS_ERROR(ENOSPC);
742         goto out;
743 remove:
744         pre_remove_hook(victim, why);
745 out:
746         free_snapshot_list(&sl);
747         return ret;
748 }
749
750 static void post_create_hook(void)
751 {
752         char *cmd = make_message("%s %s/%s",
753                 OPT_STRING_VAL(DSS, POST_CREATE_HOOK),
754                 OPT_STRING_VAL(DSS, DEST_DIR), path_to_last_complete_snapshot);
755         DSS_NOTICE_LOG(("executing %s\n", cmd));
756         dss_exec_cmdline_pid(&create_pid, cmd);
757         free(cmd);
758         snapshot_creation_status = HS_POST_RUNNING;
759 }
760
761 static void post_remove_hook(void)
762 {
763         char *cmd;
764         struct snapshot *s = snapshot_currently_being_removed;
765
766         assert(s);
767
768         cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, POST_REMOVE_HOOK),
769                 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
770         DSS_NOTICE_LOG(("executing %s\n", cmd));
771         dss_exec_cmdline_pid(&remove_pid, cmd);
772         free(cmd);
773         snapshot_removal_status = HS_POST_RUNNING;
774 }
775
776 static void dss_kill(pid_t pid, int sig, const char *msg)
777 {
778         const char *signame, *process_name;
779
780         if (pid == 0)
781                 return;
782         switch (sig) {
783         case SIGTERM: signame = "TERM"; break;
784         case SIGSTOP: signame = "STOP"; break;
785         case SIGCONT: signame = "CONT"; break;
786         default: signame = "????";
787         }
788
789         if (pid == create_pid)
790                 process_name = "create";
791         else if (pid == remove_pid)
792                 process_name = "remove";
793         else process_name = "??????";
794
795         if (msg)
796                 DSS_INFO_LOG(("%s\n", msg));
797         DSS_DEBUG_LOG(("sending signal %d (%s) to pid %d (%s process)\n",
798                 sig, signame, (int)pid, process_name));
799         if (kill(pid, sig) >= 0)
800                 return;
801         DSS_INFO_LOG(("failed to send signal %d (%s) to pid %d (%s process)\n",
802                 sig, signame, (int)pid, process_name));
803 }
804
805 static void stop_create_process(void)
806 {
807         if (create_process_stopped)
808                 return;
809         dss_kill(create_pid, SIGSTOP, "suspending create process");
810         create_process_stopped = 1;
811 }
812
813 static void restart_create_process(void)
814 {
815         if (!create_process_stopped)
816                 return;
817         dss_kill(create_pid, SIGCONT, "resuming create process");
818         create_process_stopped = 0;
819 }
820
821 /**
822  * Print a log message about the exit status of a child.
823  */
824 static void log_termination_msg(pid_t pid, int status)
825 {
826         if (WIFEXITED(status))
827                 DSS_INFO_LOG(("child %i exited. Exit status: %i\n", (int)pid,
828                         WEXITSTATUS(status)));
829         else if (WIFSIGNALED(status))
830                 DSS_NOTICE_LOG(("child %i was killed by signal %i\n", (int)pid,
831                         WTERMSIG(status)));
832         else
833                 DSS_WARNING_LOG(("child %i terminated abormally\n", (int)pid));
834 }
835
836 static int wait_for_process(pid_t pid, int *status)
837 {
838         int ret;
839
840         DSS_DEBUG_LOG(("Waiting for process %d to terminate\n", (int)pid));
841         for (;;) {
842                 fd_set rfds;
843
844                 FD_ZERO(&rfds);
845                 FD_SET(signal_pipe, &rfds);
846                 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
847                 if (ret < 0)
848                         break;
849                 ret = next_signal();
850                 if (!ret)
851                         continue;
852                 if (ret == SIGCHLD) {
853                         ret = waitpid(pid, status, 0);
854                         if (ret >= 0)
855                                 break;
856                         if (errno != EINTR) { /* error */
857                                 ret = -ERRNO_TO_DSS_ERROR(errno);
858                                 break;
859                         }
860                 }
861                 /* SIGINT or SIGTERM */
862                 dss_kill(pid, SIGTERM, "killing child process");
863         }
864         if (ret < 0)
865                 DSS_ERROR_LOG(("failed to wait for process %d\n", (int)pid));
866         else
867                 log_termination_msg(pid, *status);
868         return ret;
869 }
870
871 static void handle_pre_remove_exit(int status)
872 {
873         if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
874                 snapshot_removal_status = HS_READY;
875                 gettimeofday(&next_removal_check, NULL);
876                 next_removal_check.tv_sec += 60;
877                 return;
878         }
879         snapshot_removal_status = HS_PRE_SUCCESS;
880 }
881
882 static int handle_rm_exit(int status)
883 {
884         if (!WIFEXITED(status)) {
885                 snapshot_removal_status = HS_READY;
886                 return -E_INVOLUNTARY_EXIT;
887         }
888         if (WEXITSTATUS(status)) {
889                 snapshot_removal_status = HS_READY;
890                 return -E_BAD_EXIT_CODE;
891         }
892         snapshot_removal_status = HS_SUCCESS;
893         return 1;
894 }
895
896 static void handle_post_remove_exit(void)
897 {
898         snapshot_removal_status = HS_READY;
899 }
900
901 static int handle_remove_exit(int status)
902 {
903         int ret;
904         struct snapshot *s = snapshot_currently_being_removed;
905
906         assert(s);
907         switch (snapshot_removal_status) {
908         case HS_PRE_RUNNING:
909                 handle_pre_remove_exit(status);
910                 ret = 1;
911                 break;
912         case HS_RUNNING:
913                 ret = handle_rm_exit(status);
914                 break;
915         case HS_POST_RUNNING:
916                 handle_post_remove_exit();
917                 ret = 1;
918                 break;
919         default:
920                 ret = -E_BUG;
921         }
922         if (snapshot_removal_status == HS_READY) {
923                 free(s->name);
924                 free(s);
925                 snapshot_currently_being_removed = NULL;
926         }
927         remove_pid = 0;
928         return ret;
929 }
930
931 static int wait_for_remove_process(void)
932 {
933         int status, ret;
934
935         assert(remove_pid);
936         assert(
937                 snapshot_removal_status == HS_PRE_RUNNING ||
938                 snapshot_removal_status == HS_RUNNING ||
939                 snapshot_removal_status == HS_POST_RUNNING
940         );
941         ret = wait_for_process(remove_pid, &status);
942         if (ret < 0)
943                 return ret;
944         return handle_remove_exit(status);
945 }
946
947 static int handle_rsync_exit(int status)
948 {
949         int es, ret;
950
951         if (!WIFEXITED(status)) {
952                 DSS_ERROR_LOG(("rsync process %d died involuntary\n", (int)create_pid));
953                 ret = -E_INVOLUNTARY_EXIT;
954                 snapshot_creation_status = HS_READY;
955                 goto out;
956         }
957         es = WEXITSTATUS(status);
958         /*
959          * Restart rsync on non-fatal errors:
960          * 24: Partial transfer due to vanished source files
961          */
962         if (es != 0 && es != 24) {
963                 DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
964                         es, ++num_consecutive_rsync_errors));
965                 if (!logfile) { /* called by com_run() */
966                         ret = -E_BAD_EXIT_CODE;
967                         goto out;
968                 }
969                 if (num_consecutive_rsync_errors >
970                                 OPT_UINT32_VAL(RUN, MAX_RSYNC_ERRORS)) {
971                         ret = -E_TOO_MANY_RSYNC_ERRORS;
972                         snapshot_creation_status = HS_READY;
973                         goto out;
974                 }
975                 DSS_WARNING_LOG(("restarting rsync process\n"));
976                 snapshot_creation_status = HS_NEEDS_RESTART;
977                 next_snapshot_time = get_current_time() + 60;
978                 ret = 1;
979                 goto out;
980         }
981         num_consecutive_rsync_errors = 0;
982         ret = rename_incomplete_snapshot(current_snapshot_creation_time);
983         if (ret < 0)
984                 goto out;
985         snapshot_creation_status = HS_SUCCESS;
986         free(name_of_reference_snapshot);
987         name_of_reference_snapshot = NULL;
988 out:
989         create_process_stopped = 0;
990         return ret;
991 }
992
993 static int handle_pre_create_hook_exit(int status)
994 {
995         int es, ret;
996         static int warn_count;
997
998         if (!WIFEXITED(status)) {
999                 snapshot_creation_status = HS_READY;
1000                 ret = -E_INVOLUNTARY_EXIT;
1001                 goto out;
1002         }
1003         es = WEXITSTATUS(status);
1004         if (es) {
1005                 if (!warn_count--) {
1006                         DSS_NOTICE_LOG(("pre_create_hook %s returned %d\n",
1007                                 OPT_STRING_VAL(DSS, PRE_CREATE_HOOK), es));
1008                         DSS_NOTICE_LOG(("deferring snapshot creation...\n"));
1009                         warn_count = 60; /* warn only once per hour */
1010                 }
1011                 next_snapshot_time = get_current_time() + 60;
1012                 snapshot_creation_status = HS_READY;
1013                 ret = 0;
1014                 goto out;
1015         }
1016         warn_count = 0;
1017         snapshot_creation_status = HS_PRE_SUCCESS;
1018         ret = 1;
1019 out:
1020         return ret;
1021 }
1022
1023 static int handle_sigchld(void)
1024 {
1025         pid_t pid;
1026         int status, ret = reap_child(&pid, &status);
1027
1028         if (ret <= 0)
1029                 return ret;
1030
1031         if (pid == create_pid) {
1032                 switch (snapshot_creation_status) {
1033                 case HS_PRE_RUNNING:
1034                         ret = handle_pre_create_hook_exit(status);
1035                         break;
1036                 case HS_RUNNING:
1037                         ret = handle_rsync_exit(status);
1038                         break;
1039                 case HS_POST_RUNNING:
1040                         snapshot_creation_status = HS_READY;
1041                         ret = 1;
1042                         break;
1043                 default:
1044                         DSS_EMERG_LOG(("BUG: create can't die in status %d\n",
1045                                 snapshot_creation_status));
1046                         return -E_BUG;
1047                 }
1048                 create_pid = 0;
1049                 return ret;
1050         }
1051         if (pid == remove_pid) {
1052                 ret = handle_remove_exit(status);
1053                 if (ret < 0)
1054                         return ret;
1055                 return ret;
1056         }
1057         DSS_EMERG_LOG(("BUG: unknown process %d died\n", (int)pid));
1058         return -E_BUG;
1059 }
1060
1061 static int change_to_dest_dir(void)
1062 {
1063         int ret;
1064         const char *dd = OPT_STRING_VAL(DSS, DEST_DIR);
1065
1066         DSS_INFO_LOG(("changing cwd to %s\n", dd));
1067         if (chdir(dd) >= 0)
1068                 return 1;
1069         ret = -ERRNO_TO_DSS_ERROR(errno);
1070         DSS_ERROR_LOG(("could not change cwd to %s\n", dd));
1071         return ret;
1072 }
1073
1074 static int check_config(void)
1075 {
1076         int ret;
1077         uint32_t unit_interval = OPT_UINT32_VAL(DSS, UNIT_INTERVAL);
1078         uint32_t num_intervals = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
1079
1080         if (unit_interval == 0) {
1081                 DSS_ERROR_LOG(("bad unit interval: %i\n", unit_interval));
1082                 return -E_INVALID_NUMBER;
1083         }
1084         DSS_DEBUG_LOG(("unit interval: %i day(s)\n", unit_interval));
1085
1086         if (num_intervals == 0 || num_intervals > 30) {
1087                 DSS_ERROR_LOG(("bad number of intervals: %i\n", num_intervals));
1088                 return -E_INVALID_NUMBER;
1089         }
1090         if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE))
1091                 if (!OPT_GIVEN(DSS, SOURCE_DIR)) {
1092                         DSS_ERROR_LOG(("--source-dir required\n"));
1093                         return -E_SYNTAX;
1094                 }
1095         if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE)
1096                         || subcmd == CMD_PTR(LS) || subcmd == CMD_PTR(PRUNE)) {
1097                 if (!OPT_GIVEN(DSS, DEST_DIR)) {
1098                         DSS_ERROR_LOG(("--dest-dir required\n"));
1099                         return -E_SYNTAX;
1100                 }
1101                 ret = change_to_dest_dir();
1102                 if (ret < 0)
1103                         return ret;
1104         }
1105         DSS_DEBUG_LOG(("number of intervals: %i\n", num_intervals));
1106         return 1;
1107 }
1108
1109 static int lopsub_error(int lopsub_ret, char **errctx)
1110 {
1111         const char *msg = lls_strerror(-lopsub_ret);
1112         if (*errctx)
1113                 DSS_ERROR_LOG(("%s: %s\n", *errctx, msg));
1114         else
1115                 DSS_ERROR_LOG(("%s\n", msg));
1116         free(*errctx);
1117         *errctx = NULL;
1118         return -E_LOPSUB;
1119 }
1120
1121 static int parse_config_file(bool sighup, const struct lls_command *cmd)
1122 {
1123         int ret, fd = -1;
1124         char *config_file = get_config_file_name();
1125         struct stat statbuf;
1126         void *map;
1127         size_t sz;
1128         int cf_argc;
1129         char **cf_argv, *errctx = NULL;
1130         struct lls_parse_result *cf_lpr, *merged_lpr, *clpr;
1131         const char *subcmd_name;
1132
1133         ret = open(config_file, O_RDONLY);
1134         if (ret < 0) {
1135                 if (errno != ENOENT || OPT_GIVEN(DSS, CONFIG_FILE)) {
1136                         ret = -ERRNO_TO_DSS_ERROR(errno);
1137                         DSS_ERROR_LOG(("config file %s can not be opened\n",
1138                                 config_file));
1139                         goto out;
1140                 }
1141                 /* no config file -- nothing to do */
1142                 ret = 0;
1143                 goto success;
1144         }
1145         fd = ret;
1146         ret = fstat(fd, &statbuf);
1147         if (ret < 0) {
1148                 ret = -ERRNO_TO_DSS_ERROR(errno);
1149                 DSS_ERROR_LOG(("failed to stat config file %s\n", config_file));
1150                 goto close_fd;
1151         }
1152         sz = statbuf.st_size;
1153         if (sz == 0) { /* config file is empty -- nothing to do */
1154                 ret = 0;
1155                 goto success;
1156         }
1157         map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
1158         if (map == MAP_FAILED) {
1159                 ret = -ERRNO_TO_DSS_ERROR(errno);
1160                 DSS_ERROR_LOG(("failed to mmap config file %s\n",
1161                         config_file));
1162                 goto close_fd;
1163         }
1164         if (cmd == CMD_PTR(DSS))
1165                 subcmd_name = NULL;
1166         else
1167                 subcmd_name = lls_command_name(cmd);
1168         ret = lls_convert_config(map, sz, subcmd_name, &cf_argv, &errctx);
1169         munmap(map, sz);
1170         if (ret < 0) {
1171                 DSS_ERROR_LOG(("failed to convert config file %s\n",
1172                         config_file));
1173                 ret = lopsub_error(ret, &errctx);
1174                 goto close_fd;
1175         }
1176         cf_argc = ret;
1177         ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
1178         lls_free_argv(cf_argv);
1179         if (ret < 0) {
1180                 ret = lopsub_error(ret, &errctx);
1181                 goto close_fd;
1182         }
1183         clpr = cmd == CMD_PTR(DSS)? cmdline_lpr : cmdline_sublpr;
1184         if (sighup) /* config file overrides command line */
1185                 ret = lls_merge(cf_lpr, clpr, cmd, &merged_lpr, &errctx);
1186         else /* command line options overrride config file options */
1187                 ret = lls_merge(clpr, cf_lpr, cmd, &merged_lpr, &errctx);
1188         lls_free_parse_result(cf_lpr, cmd);
1189         if (ret < 0) {
1190                 ret = lopsub_error(ret, &errctx);
1191                 goto close_fd;
1192         }
1193         ret = 1;
1194 success:
1195         assert(ret >= 0);
1196         DSS_DEBUG_LOG(("loglevel: %d\n", OPT_UINT32_VAL(DSS, LOGLEVEL)));
1197         if (cmd != CMD_PTR(DSS)) {
1198                 if (ret > 0) {
1199                         if (sublpr != cmdline_sublpr)
1200                                 lls_free_parse_result(sublpr, cmd);
1201                         sublpr = merged_lpr;
1202                 } else
1203                         sublpr = cmdline_sublpr;
1204         } else {
1205                 if (ret > 0) {
1206                         if (lpr != cmdline_lpr)
1207                                 lls_free_parse_result(lpr, cmd);
1208                         lpr = merged_lpr;
1209                 } else
1210                         lpr = cmdline_lpr;
1211         }
1212 close_fd:
1213         if (fd >= 0)
1214                 close(fd);
1215 out:
1216         free(config_file);
1217         if (ret < 0)
1218                 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1219         return ret;
1220 }
1221
1222 static int handle_sighup(void)
1223 {
1224         int ret;
1225
1226         DSS_NOTICE_LOG(("SIGHUP, re-reading config\n"));
1227         dump_dss_config("old");
1228         ret = parse_config_file(true /* SIGHUP */, CMD_PTR(DSS));
1229         if (ret < 0)
1230                 return ret;
1231         ret = parse_config_file(true /* SIGHUP */, CMD_PTR(RUN));
1232         if (ret < 0)
1233                 return ret;
1234         ret = check_config();
1235         if (ret < 0)
1236                 return ret;
1237         close_log(logfile);
1238         logfile = NULL;
1239         if (OPT_GIVEN(RUN, DAEMON) || daemonized) {
1240                 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1241                 log_welcome(OPT_UINT32_VAL(DSS, LOGLEVEL));
1242                 daemonized = true;
1243         }
1244         dump_dss_config("reloaded");
1245         invalidate_next_snapshot_time();
1246         return 1;
1247 }
1248
1249 static void kill_children(void)
1250 {
1251         restart_create_process();
1252         dss_kill(create_pid, SIGTERM, NULL);
1253         dss_kill(remove_pid, SIGTERM, NULL);
1254 }
1255
1256 static int handle_signal(void)
1257 {
1258         int sig, ret = next_signal();
1259
1260         if (ret <= 0)
1261                 goto out;
1262         sig = ret;
1263         switch (sig) {
1264         case SIGINT:
1265         case SIGTERM:
1266                 kill_children();
1267                 ret = -E_SIGNAL;
1268                 break;
1269         case SIGHUP:
1270                 ret = handle_sighup();
1271                 break;
1272         case SIGCHLD:
1273                 ret = handle_sigchld();
1274                 break;
1275         }
1276 out:
1277         if (ret < 0)
1278                 DSS_ERROR_LOG(("%s\n", dss_strerror(-ret)));
1279         return ret;
1280 }
1281
1282 /*
1283  * We can not use rsync locally if the local user is different from the remote
1284  * user or if the src dir is not on the local host (or both).
1285  */
1286 static int use_rsync_locally(char *logname)
1287 {
1288         const char *h = OPT_STRING_VAL(DSS, REMOTE_HOST);
1289
1290         if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
1291                 return 0;
1292         if (OPT_GIVEN(DSS, REMOTE_USER) &&
1293                         strcmp(OPT_STRING_VAL(DSS, REMOTE_USER), logname))
1294                 return 0;
1295         return 1;
1296 }
1297
1298 static int rename_resume_snap(int64_t creation_time)
1299 {
1300         struct snapshot_list sl;
1301         struct snapshot *s = NULL;
1302         char *new_name = incomplete_name(creation_time);
1303         int ret;
1304         const char *why;
1305
1306         sl.num_snapshots = 0;
1307
1308         ret = 0;
1309         dss_get_snapshot_list(&sl);
1310         /*
1311          * Snapshot recycling: We first look at the newest snapshot. If this
1312          * snapshot happens to be incomplete, the last rsync process was
1313          * aborted and we reuse this one. Otherwise we look at snapshots which
1314          * could be removed (outdated and redundant snapshots) as candidates
1315          * for recycling. If no outdated/redundant snapshot exists, we check if
1316          * there is an orphaned snapshot, which likely is useless anyway.
1317          *
1318          * Only if no existing snapshot is suitable for recycling, we bite the
1319          * bullet and create a new one.
1320          */
1321         s = get_newest_snapshot(&sl);
1322         if (!s) /* no snapshots at all */
1323                 goto out;
1324         /* re-use last snapshot if it is incomplete */
1325         why = "aborted";
1326         if ((s->flags & SS_COMPLETE) == 0)
1327                 goto out;
1328         why = "outdated";
1329         s = find_outdated_snapshot(&sl);
1330         if (s)
1331                 goto out;
1332         why = "redundant";
1333         s = find_redundant_snapshot(&sl);
1334         if (s)
1335                 goto out;
1336         why = "orphaned";
1337         s = find_orphaned_snapshot(&sl);
1338 out:
1339         if (s) {
1340                 DSS_NOTICE_LOG(("recycling %s snapshot %s\n", why, s->name));
1341                 ret = dss_rename(s->name, new_name);
1342         }
1343         if (ret >= 0)
1344                 DSS_NOTICE_LOG(("creating %s\n", new_name));
1345         free(new_name);
1346         free_snapshot_list(&sl);
1347         return ret;
1348 }
1349
1350 static void create_rsync_argv(char ***argv, int64_t *num)
1351 {
1352         char *logname;
1353         int i = 0, j, N = OPT_GIVEN(DSS, RSYNC_OPTION);
1354         struct snapshot_list sl;
1355         static bool seeded;
1356
1357         dss_get_snapshot_list(&sl);
1358         assert(!name_of_reference_snapshot);
1359         name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
1360         free_snapshot_list(&sl);
1361
1362         *argv = dss_malloc((15 + N) * sizeof(char *));
1363         (*argv)[i++] = dss_strdup("rsync");
1364         (*argv)[i++] = dss_strdup("-a");
1365         (*argv)[i++] = dss_strdup("--delete");
1366         if (!seeded) {
1367                 srandom((unsigned)time(NULL)); /* no need to be fancy here */
1368                 seeded = true;
1369         }
1370         if (1000 * (random() / (RAND_MAX + 1.0)) < OPT_UINT32_VAL(DSS, CHECKSUM)) {
1371                 DSS_NOTICE_LOG(("adding --checksum to rsync options\n"));
1372                 (*argv)[i++] = dss_strdup("--checksum");
1373         }
1374         for (j = 0; j < N; j++)
1375                 (*argv)[i++] = dss_strdup(lls_string_val(j,
1376                         OPT_RESULT(DSS, RSYNC_OPTION)));
1377         if (name_of_reference_snapshot) {
1378                 DSS_INFO_LOG(("using %s as reference\n", name_of_reference_snapshot));
1379                 (*argv)[i++] = make_message("--link-dest=../%s",
1380                         name_of_reference_snapshot);
1381         } else
1382                 DSS_INFO_LOG(("no suitable reference snapshot found\n"));
1383         logname = dss_logname();
1384         if (use_rsync_locally(logname))
1385                 (*argv)[i++] = dss_strdup(OPT_STRING_VAL(DSS, SOURCE_DIR));
1386         else
1387                 (*argv)[i++] = make_message("%s@%s:%s/",
1388                         OPT_GIVEN(DSS, REMOTE_USER)?
1389                                 OPT_STRING_VAL(DSS, REMOTE_USER) : logname,
1390                         OPT_STRING_VAL(DSS, REMOTE_HOST),
1391                         OPT_STRING_VAL(DSS, SOURCE_DIR));
1392         free(logname);
1393         *num = get_current_time();
1394         (*argv)[i++] = incomplete_name(*num);
1395         (*argv)[i++] = NULL;
1396         for (j = 0; j < i; j++)
1397                 DSS_DEBUG_LOG(("argv[%d] = %s\n", j, (*argv)[j]));
1398 }
1399
1400 static void free_rsync_argv(char **argv)
1401 {
1402         int i;
1403
1404         if (!argv)
1405                 return;
1406         for (i = 0; argv[i]; i++)
1407                 free(argv[i]);
1408         free(argv);
1409 }
1410
1411 static int create_snapshot(char **argv)
1412 {
1413         int ret;
1414
1415         ret = rename_resume_snap(current_snapshot_creation_time);
1416         if (ret < 0)
1417                 return ret;
1418         dss_exec(&create_pid, argv[0], argv);
1419         snapshot_creation_status = HS_RUNNING;
1420         return ret;
1421 }
1422
1423 static int select_loop(void)
1424 {
1425         int ret;
1426         /* check every 60 seconds for free disk space */
1427         struct timeval tv;
1428         char **rsync_argv = NULL;
1429
1430         for (;;) {
1431                 fd_set rfds;
1432                 struct timeval *tvp;
1433
1434                 if (remove_pid)
1435                         tvp = NULL; /* sleep until rm hook/process dies */
1436                 else { /* sleep one minute */
1437                         tv.tv_sec = 60;
1438                         tv.tv_usec = 0;
1439                         tvp = &tv;
1440                 }
1441                 FD_ZERO(&rfds);
1442                 FD_SET(signal_pipe, &rfds);
1443                 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1444                 if (ret < 0)
1445                         goto out;
1446                 if (FD_ISSET(signal_pipe, &rfds)) {
1447                         ret = handle_signal();
1448                         if (ret < 0)
1449                                 goto out;
1450                 }
1451                 if (remove_pid)
1452                         continue;
1453                 if (snapshot_removal_status == HS_PRE_SUCCESS) {
1454                         ret = exec_rm();
1455                         if (ret < 0)
1456                                 goto out;
1457                         continue;
1458                 }
1459                 if (snapshot_removal_status == HS_SUCCESS) {
1460                         post_remove_hook();
1461                         continue;
1462                 }
1463                 ret = try_to_free_disk_space();
1464                 if (ret < 0)
1465                         goto out;
1466                 if (snapshot_removal_status != HS_READY) {
1467                         stop_create_process();
1468                         continue;
1469                 }
1470                 restart_create_process();
1471                 switch (snapshot_creation_status) {
1472                 case HS_READY:
1473                         if (!next_snapshot_is_due())
1474                                 continue;
1475                         pre_create_hook();
1476                         continue;
1477                 case HS_PRE_RUNNING:
1478                 case HS_RUNNING:
1479                 case HS_POST_RUNNING:
1480                         continue;
1481                 case HS_PRE_SUCCESS:
1482                         if (!name_of_reference_snapshot) {
1483                                 free_rsync_argv(rsync_argv);
1484                                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1485                         }
1486                         ret = create_snapshot(rsync_argv);
1487                         if (ret < 0)
1488                                 goto out;
1489                         continue;
1490                 case HS_NEEDS_RESTART:
1491                         if (!next_snapshot_is_due())
1492                                 continue;
1493                         ret = create_snapshot(rsync_argv);
1494                         if (ret < 0)
1495                                 goto out;
1496                         continue;
1497                 case HS_SUCCESS:
1498                         post_create_hook();
1499                         continue;
1500                 }
1501         }
1502 out:
1503         return ret;
1504 }
1505
1506 static void exit_hook(int exit_code)
1507 {
1508         const char *argv[3];
1509         pid_t pid;
1510
1511         argv[0] = OPT_STRING_VAL(DSS, EXIT_HOOK);
1512         argv[1] = dss_strerror(-exit_code);
1513         argv[2] = NULL;
1514
1515         DSS_NOTICE_LOG(("executing %s %s\n", argv[0], argv[1]));
1516         dss_exec(&pid, argv[0], (char **)argv);
1517 }
1518
1519 static void lock_dss_or_die(void)
1520 {
1521         char *config_file = get_config_file_name();
1522         int ret = lock_dss(config_file);
1523
1524         free(config_file);
1525         if (ret < 0) {
1526                 DSS_EMERG_LOG(("failed to lock: %s\n", dss_strerror(-ret)));
1527                 exit(EXIT_FAILURE);
1528         }
1529 }
1530
1531 static int com_run(void)
1532 {
1533         int ret, fd = -1;
1534         char *config_file;
1535         pid_t pid;
1536
1537         if (OPT_GIVEN(DSS, DRY_RUN)) {
1538                 DSS_ERROR_LOG(("dry run not supported by this command\n"));
1539                 return -E_SYNTAX;
1540         }
1541         config_file = get_config_file_name();
1542         ret = get_dss_pid(config_file, &pid);
1543         free(config_file);
1544         if (ret >= 0) {
1545                 DSS_ERROR_LOG(("pid %d\n", (int)pid));
1546                 return -E_ALREADY_RUNNING;
1547         }
1548         if (OPT_GIVEN(RUN, DAEMON)) {
1549                 fd = daemon_init();
1550                 daemonized = true;
1551                 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1552         }
1553         lock_dss_or_die();
1554         dump_dss_config("startup");
1555         ret = install_sighandler(SIGHUP);
1556         if (ret < 0)
1557                 return ret;
1558         if (fd >= 0) {
1559                 ret = write(fd, "\0", 1);
1560                 if (ret != 1) {
1561                         DSS_ERROR_LOG(("write to daemon pipe returned %d\n",
1562                                 ret));
1563                         if (ret < 0)
1564                                 return -ERRNO_TO_DSS_ERROR(errno);
1565                         return -E_BUG;
1566                 }
1567         }
1568         ret = select_loop();
1569         if (ret >= 0) /* impossible */
1570                 ret = -E_BUG;
1571         kill_children();
1572         exit_hook(ret);
1573         return ret;
1574 }
1575 EXPORT_CMD_HANDLER(run);
1576
1577 static int com_prune(void)
1578 {
1579         int ret;
1580         struct snapshot_list sl;
1581         struct snapshot *victim;
1582         struct disk_space ds;
1583         const char *why;
1584
1585         lock_dss_or_die();
1586         ret = get_disk_space(".", &ds);
1587         if (ret < 0)
1588                 return ret;
1589         log_disk_space(&ds);
1590         dss_get_snapshot_list(&sl);
1591         why = "outdated";
1592         victim = find_outdated_snapshot(&sl);
1593         if (victim)
1594                 goto rm;
1595         why = "redundant";
1596         victim = find_redundant_snapshot(&sl);
1597         if (victim)
1598                 goto rm;
1599         ret = 0;
1600         goto out;
1601 rm:
1602         if (OPT_GIVEN(DSS, DRY_RUN)) {
1603                 dss_msg("%s snapshot %s (interval = %i)\n",
1604                         why, victim->name, victim->interval);
1605                 ret = 0;
1606                 goto out;
1607         }
1608         pre_remove_hook(victim, why);
1609         if (snapshot_removal_status == HS_PRE_RUNNING) {
1610                 ret = wait_for_remove_process();
1611                 if (ret < 0)
1612                         goto out;
1613                 if (snapshot_removal_status != HS_PRE_SUCCESS)
1614                         goto out;
1615         }
1616         ret = exec_rm();
1617         if (ret < 0)
1618                 goto out;
1619         ret = wait_for_remove_process();
1620         if (ret < 0)
1621                 goto out;
1622         if (snapshot_removal_status != HS_SUCCESS)
1623                 goto out;
1624         post_remove_hook();
1625         if (snapshot_removal_status != HS_POST_RUNNING)
1626                 goto out;
1627         ret = wait_for_remove_process();
1628         if (ret < 0)
1629                 goto out;
1630         ret = 1;
1631 out:
1632         free_snapshot_list(&sl);
1633         return ret;
1634 }
1635 EXPORT_CMD_HANDLER(prune);
1636
1637 static int com_create(void)
1638 {
1639         int ret, status;
1640         char **rsync_argv;
1641
1642         lock_dss_or_die();
1643         if (OPT_GIVEN(DSS, DRY_RUN)) {
1644                 int i;
1645                 char *msg = NULL;
1646                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1647                 for (i = 0; rsync_argv[i]; i++) {
1648                         char *tmp = msg;
1649                         msg = make_message("%s%s%s", tmp? tmp : "",
1650                                 tmp? " " : "", rsync_argv[i]);
1651                         free(tmp);
1652                 }
1653                 free_rsync_argv(rsync_argv);
1654                 dss_msg("%s\n", msg);
1655                 free(msg);
1656                 return 1;
1657         }
1658         pre_create_hook();
1659         if (create_pid) {
1660                 ret = wait_for_process(create_pid, &status);
1661                 if (ret < 0)
1662                         return ret;
1663                 ret = handle_pre_create_hook_exit(status);
1664                 if (ret <= 0) /* error, or pre-create failed */
1665                         return ret;
1666         }
1667         create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1668         ret = create_snapshot(rsync_argv);
1669         if (ret < 0)
1670                 goto out;
1671         ret = wait_for_process(create_pid, &status);
1672         if (ret < 0)
1673                 goto out;
1674         ret = handle_rsync_exit(status);
1675         if (ret < 0)
1676                 goto out;
1677         post_create_hook();
1678         if (create_pid)
1679                 ret = wait_for_process(create_pid, &status);
1680 out:
1681         free_rsync_argv(rsync_argv);
1682         return ret;
1683 }
1684 EXPORT_CMD_HANDLER(create);
1685
1686 static int com_ls(void)
1687 {
1688         int i;
1689         struct snapshot_list sl;
1690         struct snapshot *s;
1691
1692         dss_get_snapshot_list(&sl);
1693         FOR_EACH_SNAPSHOT(s, i, &sl) {
1694                 int64_t d = 0;
1695                 if (s->flags & SS_COMPLETE)
1696                         d = (s->completion_time - s->creation_time) / 60;
1697                 dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval, s->name, d/60, d%60);
1698         }
1699         free_snapshot_list(&sl);
1700         return 1;
1701 }
1702 EXPORT_CMD_HANDLER(ls);
1703
1704 static int setup_signal_handling(void)
1705 {
1706         int ret;
1707
1708         DSS_INFO_LOG(("setting up signal handlers\n"));
1709         signal_pipe = signal_init(); /* always successful */
1710         ret = install_sighandler(SIGINT);
1711         if (ret < 0)
1712                 return ret;
1713         ret = install_sighandler(SIGTERM);
1714         if (ret < 0)
1715                 return ret;
1716         return install_sighandler(SIGCHLD);
1717 }
1718
1719 static void handle_version_and_help(void)
1720 {
1721         char *txt;
1722
1723         if (OPT_GIVEN(DSS, DETAILED_HELP))
1724                 txt = lls_long_help(CMD_PTR(DSS));
1725         else if (OPT_GIVEN(DSS, HELP))
1726                 txt = lls_short_help(CMD_PTR(DSS));
1727         else if (OPT_GIVEN(DSS, VERSION))
1728                 txt = dss_strdup(VERSION_STRING);
1729         else
1730                 return;
1731         printf("%s", txt);
1732         free(txt);
1733         exit(EXIT_SUCCESS);
1734 }
1735
1736 static void show_subcommand_summary(void)
1737 {
1738         const struct lls_command *cmd;
1739         int i;
1740
1741         printf("Available subcommands:\n");
1742         for (i = 1; (cmd = lls_cmd(i, dss_suite)); i++) {
1743                 const char *name = lls_command_name(cmd);
1744                 const char *purpose = lls_purpose(cmd);
1745                 printf("%-10s%s\n", name, purpose);
1746         }
1747         exit(EXIT_SUCCESS);
1748 }
1749
1750 int main(int argc, char **argv)
1751 {
1752         int ret;
1753         char *errctx = NULL;
1754         unsigned num_inputs;
1755         const struct dss_user_data *ud;
1756
1757         ret = lls_parse(argc, argv, CMD_PTR(DSS), &cmdline_lpr, &errctx);
1758         if (ret < 0) {
1759                 ret = lopsub_error(ret, &errctx);
1760                 goto out;
1761         }
1762         lpr = cmdline_lpr;
1763         ret = parse_config_file(false /* no SIGHUP */, CMD_PTR(DSS));
1764         if (ret < 0)
1765                 goto out;
1766         handle_version_and_help();
1767         num_inputs = lls_num_inputs(lpr);
1768         if (num_inputs == 0)
1769                 show_subcommand_summary();
1770         ret = lls_lookup_subcmd(argv[argc - num_inputs], dss_suite, &errctx);
1771         if (ret < 0) {
1772                 ret = lopsub_error(ret, &errctx);
1773                 goto out;
1774         }
1775         subcmd = lls_cmd(ret, dss_suite);
1776         ret = lls_parse(num_inputs, argv + argc - num_inputs, subcmd,
1777                 &cmdline_sublpr, &errctx);
1778         if (ret < 0) {
1779                 ret = lopsub_error(ret, &errctx);
1780                 goto out;
1781         }
1782         sublpr = cmdline_sublpr;
1783         ret = parse_config_file(false /* no SIGHUP */, subcmd);
1784         if (ret < 0)
1785                 goto out;
1786         ret = check_config();
1787         if (ret < 0)
1788                 goto out;
1789         ret = setup_signal_handling();
1790         if (ret < 0)
1791                 goto out;
1792         ud = lls_user_data(subcmd);
1793         ret = ud->handler();
1794         signal_shutdown();
1795 out:
1796         if (ret < 0) {
1797                 if (errctx)
1798                         DSS_ERROR_LOG(("%s\n", errctx));
1799                 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1800         }
1801         free(errctx);
1802         lls_free_parse_result(lpr, CMD_PTR(DSS));
1803         if (lpr != cmdline_lpr)
1804                 lls_free_parse_result(cmdline_lpr, CMD_PTR(DSS));
1805         lls_free_parse_result(sublpr, subcmd);
1806         if (sublpr != cmdline_sublpr)
1807                 lls_free_parse_result(cmdline_sublpr, subcmd);
1808         exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1809 }