]> git.tuebingen.mpg.de Git - dss.git/blob - dss.c
f7919c0dfd267f21bad649845642f5003e0a3c83
[dss.git] / dss.c
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <string.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include <assert.h>
7 #include <errno.h>
8 #include <sys/types.h>
9 #include <signal.h>
10 #include <ctype.h>
11 #include <stdbool.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15 #include <sys/time.h>
16 #include <time.h>
17 #include <sys/wait.h>
18 #include <fnmatch.h>
19 #include <limits.h>
20 #include <fcntl.h>
21 #include <lopsub.h>
22 #include <sys/mman.h>
23
24 #include "gcc-compat.h"
25 #include "log.h"
26 #include "str.h"
27 #include "err.h"
28 #include "file.h"
29 #include "exec.h"
30 #include "daemon.h"
31 #include "sig.h"
32 #include "df.h"
33 #include "tv.h"
34 #include "snap.h"
35 #include "ipc.h"
36 #include "dss.lsg.h"
37
38 #define CMD_PTR(_cname) lls_cmd(LSG_DSS_CMD_ ## _cname, dss_suite)
39 #define OPT_RESULT(_cname, _oname) (lls_opt_result(\
40         LSG_DSS_ ## _cname ## _OPT_ ## _oname, (CMD_PTR(_cname) == CMD_PTR(DSS))? lpr : sublpr))
41 #define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
42 #define OPT_STRING_VAL(_cname, _oname) (lls_string_val(0, \
43         OPT_RESULT(_cname, _oname)))
44 #define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
45                 OPT_RESULT(_cname, _oname)))
46
47 struct dss_user_data {int (*handler)(void);};
48 #define EXPORT_CMD_HANDLER(_cmd) const struct dss_user_data \
49         lsg_dss_com_ ## _cmd ## _user_data = { \
50                 .handler = com_ ## _cmd \
51         };
52
53 /*
54  * Command line and active options. We need to keep a copy of the parsed
55  * command line options for the SIGHUP case where we merge the command line
56  * options and the new config file options.
57  */
58 static struct lls_parse_result *cmdline_lpr, *lpr;
59
60 /** Parsed subcommand options. */
61 static struct lls_parse_result *cmdline_sublpr, *sublpr;
62 /* The executing subcommand (NULL at startup). */
63 static const struct lls_command *subcmd;
64 /** Wether daemon_init() was called. */
65 static bool daemonized;
66 /** Non-NULL if we log to a file. */
67 static FILE *logfile;
68 /* Realpath of the config file. */
69 static char *config_file;
70 /** The read end of the signal pipe */
71 static int signal_pipe;
72 /** Process id of current pre-create-hook/rsync/post-create-hook process. */
73 static pid_t create_pid;
74 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
75 static int create_process_stopped;
76 /** How many times in a row the rsync command failed. */
77 static int num_consecutive_rsync_errors;
78 /** Process id of current pre-remove/rm/post-remove process. */
79 static pid_t remove_pid;
80 /** When the next snapshot is due. */
81 static int64_t next_snapshot_time;
82 /** When to try to remove something. */
83 static struct timeval next_removal_check;
84 /** Creation time of the snapshot currently being created. */
85 static int64_t current_snapshot_creation_time;
86 /* Set by the pre-rm hook, cleared by handle_remove_exit(). */
87 struct snapshot *snapshot_currently_being_removed;
88 /** Needed by the post-create hook. */
89 static char *path_to_last_complete_snapshot;
90 static char *name_of_reference_snapshot;
91 /** \sa \ref snap.h for details. */
92 enum hook_status snapshot_creation_status;
93 /** \sa \ref snap.h for details. */
94 enum hook_status snapshot_removal_status;
95
96
97 DEFINE_DSS_ERRLIST;
98 static const char *hook_status_description[] = {HOOK_STATUS_ARRAY};
99
100 /* may be called with ds == NULL. */
101 static int disk_space_low(struct disk_space *ds)
102 {
103         struct disk_space ds_struct;
104         uint32_t val;
105
106         if (!ds) {
107                 int ret = get_disk_space(".", &ds_struct);
108                 if (ret < 0)
109                         return ret;
110                 ds = &ds_struct;
111         }
112         val = OPT_UINT32_VAL(DSS, MIN_FREE_MB);
113         if (val != 0)
114                 if (ds->free_mb < val)
115                         return 1;
116         val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT);
117         if (val != 0)
118                 if (ds->percent_free < val)
119                         return 1;
120         val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT_INODES);
121         if (val != 0)
122                 if (ds->percent_free_inodes < val)
123                         return 1;
124         return 0;
125 }
126
127 static void dump_dss_config(const char *msg)
128 {
129         const char dash[] = "-----------------------------";
130         char *lopsub_dump;
131         int ret;
132         FILE *log = logfile? logfile : stderr;
133         struct disk_space ds;
134         int64_t now = get_current_time();
135
136         if (OPT_UINT32_VAL(DSS, LOGLEVEL) > INFO)
137                 return;
138
139         fprintf(log, "%s <%s config> %s\n", dash, msg, dash);
140         fprintf(log, "\n*** disk space ***\n\n");
141         ret = get_disk_space(".", &ds);
142         if (ret >= 0) {
143                 DSS_INFO_LOG(("disk space low: %s\n", disk_space_low(&ds)?
144                         "yes" : "no"));
145                 log_disk_space(&ds);
146         } else
147                 DSS_ERROR_LOG(("can not get free disk space: %s\n",
148                         dss_strerror(-ret)));
149
150         /* we continue on errors from get_disk_space */
151
152         fprintf(log, "\n*** non-default options ***\n\n");
153         lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(DSS), true);
154         fprintf(log, "%s", lopsub_dump);
155         free(lopsub_dump);
156         fprintf(log, "\n*** non-default options for \"run\" ***\n\n");
157         lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(RUN), true);
158         fprintf(log, "%s", lopsub_dump);
159         free(lopsub_dump);
160         fprintf(log, "\n*** internal state ***\n\n");
161         fprintf(log,
162                 "pid: %d\n"
163                 "logfile: %s\n"
164                 "snapshot_currently_being_removed: %s\n"
165                 "path_to_last_complete_snapshot: %s\n"
166                 "reference_snapshot: %s\n"
167                 "snapshot_creation_status: %s\n"
168                 "snapshot_removal_status: %s\n"
169                 "num_consecutive_rsync_errors: %d\n"
170                 ,
171                 (int) getpid(),
172                 logfile? OPT_STRING_VAL(RUN, LOGFILE) : "stderr",
173                 snapshot_currently_being_removed?
174                         snapshot_currently_being_removed->name : "(none)",
175                 path_to_last_complete_snapshot?
176                         path_to_last_complete_snapshot : "(none)",
177                 name_of_reference_snapshot?
178                         name_of_reference_snapshot : "(none)",
179                 hook_status_description[snapshot_creation_status],
180                 hook_status_description[snapshot_removal_status],
181                 num_consecutive_rsync_errors
182         );
183         if (create_pid != 0)
184                 fprintf(log,
185                         "create_pid: %" PRId32 "\n"
186                         "create process is %sstopped\n"
187                         ,
188                         create_pid,
189                         create_process_stopped? "" : "not "
190                 );
191         if (remove_pid != 0)
192                 fprintf(log, "remove_pid: %" PRId32 "\n", remove_pid);
193         if (next_snapshot_time != 0)
194                 fprintf(log, "next snapshot due in %" PRId64 " seconds\n",
195                         next_snapshot_time - now);
196         if (current_snapshot_creation_time != 0)
197                 fprintf(log, "current_snapshot_creation_time: %"
198                         PRId64 " (%" PRId64 " seconds ago)\n",
199                         current_snapshot_creation_time,
200                         now - current_snapshot_creation_time
201                 );
202         if (next_removal_check.tv_sec != 0) {
203                 fprintf(log, "next removal check: %llu (%llu seconds ago)\n",
204                         (long long unsigned)next_removal_check.tv_sec,
205                         now - (long long unsigned)next_removal_check.tv_sec
206                 );
207
208         }
209         fprintf(log, "%s </%s config> %s\n", dash, msg, dash);
210 }
211
212 static int loglevel = -1;
213 static const char *location_file = NULL;
214 static int         location_line = -1;
215 static const char *location_func = NULL;
216
217 void dss_log_set_params(int ll, const char *file, int line, const char *func)
218 {
219         loglevel = ll;
220         location_file = file;
221         location_line = line;
222         location_func = func;
223 }
224
225 /**
226  * The log function of dss.
227  *
228  * \param ll Loglevel.
229  * \param fml Usual format string.
230  *
231  * All DSS_XXX_LOG() macros use this function.
232  */
233 __printf_1_2 void dss_log(const char* fmt,...)
234 {
235         va_list argp;
236         FILE *outfd;
237         struct tm *tm;
238         time_t t1;
239         char str[255] = "";
240         int lpr_ll = lpr? OPT_UINT32_VAL(DSS, LOGLEVEL) : WARNING;
241
242         if (loglevel < lpr_ll)
243                 return;
244         outfd = logfile? logfile : stderr;
245         if (subcmd == CMD_PTR(RUN)) {
246                 time(&t1);
247                 tm = localtime(&t1);
248                 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
249                 fprintf(outfd, "%s ", str);
250                 if (lpr_ll <= INFO)
251                         fprintf(outfd, "%i: ", loglevel);
252         }
253         if (subcmd == CMD_PTR(RUN))
254 #ifdef DSS_NO_FUNC_NAMES
255                 fprintf(outfd, "%s:%d: ", location_file, location_line);
256 #else
257                 fprintf(outfd, "%s: ", location_func);
258 #endif
259         va_start(argp, fmt);
260         vfprintf(outfd, fmt, argp);
261         va_end(argp);
262 }
263
264 /**
265  * Print a message either to stdout or to the log file.
266  */
267 static __printf_1_2 void dss_msg(const char* fmt,...)
268 {
269         FILE *outfd = logfile? logfile : stdout;
270         va_list argp;
271         va_start(argp, fmt);
272         vfprintf(outfd, fmt, argp);
273         va_end(argp);
274 }
275
276 static void set_config_file_name(void)
277 {
278
279         if (OPT_GIVEN(DSS, CONFIG_FILE)) {
280                 const char *arg = OPT_STRING_VAL(DSS, CONFIG_FILE);
281                 config_file = realpath(arg, NULL);
282                 if (!config_file) {
283                         DSS_EMERG_LOG(("could not resolve path %s: %s\n", arg,
284                                 strerror(errno)));
285                         exit(EXIT_FAILURE);
286                 }
287         } else {
288                 char *home = get_homedir();
289                 char *arg = make_message("%s/.dssrc", home);
290                 free(home);
291                 config_file = realpath(arg, NULL);
292                 if (config_file)
293                         free(arg);
294                 else /* not fatal */
295                         config_file = arg;
296         }
297         DSS_DEBUG_LOG(("config file: %s\n", config_file));
298 }
299
300 static int send_signal(int sig, bool wait)
301 {
302         pid_t pid;
303         int ret = get_dss_pid(config_file, &pid);
304         unsigned ms = 32;
305         struct timespec ts;
306
307         if (ret < 0)
308                 return ret;
309         if (OPT_GIVEN(DSS, DRY_RUN)) {
310                 dss_msg("%d\n", (int)pid);
311                 return 0;
312         }
313         DSS_NOTICE_LOG(("sending signal %d to pid %d\n", sig, (int)pid));
314         ret = kill(pid, sig);
315         if (ret < 0)
316                 return -ERRNO_TO_DSS_ERROR(errno);
317         if (!wait)
318                 return 1;
319         while (ms < 5000) {
320                 ts.tv_sec = ms / 1000;
321                 ts.tv_nsec = (ms % 1000) * 1000 * 1000;
322                 ret = nanosleep(&ts, NULL);
323                 if (ret < 0)
324                         return -ERRNO_TO_DSS_ERROR(errno);
325                 ret = kill(pid, 0);
326                 if (ret < 0) {
327                         if (errno != ESRCH)
328                                 return -ERRNO_TO_DSS_ERROR(errno);
329                         return 1;
330                 }
331                 ms *= 2;
332         }
333         return -E_KILL_TIMEOUT;
334 }
335
336 struct signal_info {
337         const char * const name;
338         int num;
339 };
340
341 /*
342  * The table below was taken 2016 from proc/sig.c of procps-3.2.8. Copyright
343  * 1998-2003 by Albert Cahalan, GPLv2.
344  */
345 static const struct signal_info signal_table[] = {
346         {"ABRT",   SIGABRT},  /* IOT */
347         {"ALRM",   SIGALRM},
348         {"BUS",    SIGBUS},
349         {"CHLD",   SIGCHLD},  /* CLD */
350         {"CONT",   SIGCONT},
351         {"FPE",    SIGFPE},
352         {"HUP",    SIGHUP},
353         {"ILL",    SIGILL},
354         {"INT",    SIGINT},
355         {"KILL",   SIGKILL},
356         {"PIPE",   SIGPIPE},
357 #ifdef SIGPOLL
358         {"POLL",   SIGPOLL},  /* IO */
359 #endif
360         {"PROF",   SIGPROF},
361 #ifdef SIGPWR
362         {"PWR",    SIGPWR},
363 #endif
364         {"QUIT",   SIGQUIT},
365         {"SEGV",   SIGSEGV},
366 #ifdef SIGSTKFLT
367         {"STKFLT", SIGSTKFLT},
368 #endif
369         {"STOP",   SIGSTOP},
370         {"SYS",    SIGSYS},   /* UNUSED */
371         {"TERM",   SIGTERM},
372         {"TRAP",   SIGTRAP},
373         {"TSTP",   SIGTSTP},
374         {"TTIN",   SIGTTIN},
375         {"TTOU",   SIGTTOU},
376         {"URG",    SIGURG},
377         {"USR1",   SIGUSR1},
378         {"USR2",   SIGUSR2},
379         {"VTALRM", SIGVTALRM},
380         {"WINCH",  SIGWINCH},
381         {"XCPU",   SIGXCPU},
382         {"XFSZ",   SIGXFSZ}
383 };
384
385 #define SIGNAL_TABLE_SIZE (sizeof(signal_table) / sizeof(signal_table[0]))
386 #ifndef SIGRTMAX
387 #define SIGRTMAX 64
388 #endif
389
390 static int com_kill(void)
391 {
392         bool w_given = OPT_GIVEN(KILL, WAIT);
393         const char *arg = OPT_STRING_VAL(KILL, SIGNAL);
394         int ret, i;
395
396         if (*arg >= '0' && *arg <= '9') {
397                 int64_t val;
398                 ret = dss_atoi64(arg, &val);
399                 if (ret < 0)
400                         return ret;
401                 if (val < 0 || val > SIGRTMAX)
402                         return -ERRNO_TO_DSS_ERROR(EINVAL);
403                 return send_signal(val, w_given);
404         }
405         if (strncasecmp(arg, "sig", 3) == 0)
406                 arg += 3;
407         if (strcasecmp(arg, "CLD") == 0)
408                 return send_signal(SIGCHLD, w_given);
409         if (strcasecmp(arg, "IOT") == 0)
410                 return send_signal(SIGABRT, w_given);
411         for (i = 0; i < SIGNAL_TABLE_SIZE; i++)
412                 if (strcasecmp(arg, signal_table[i].name) == 0)
413                         return send_signal(signal_table[i].num, w_given);
414         DSS_ERROR_LOG(("invalid sigspec: %s\n", arg));
415         return -ERRNO_TO_DSS_ERROR(EINVAL);
416 }
417 EXPORT_CMD_HANDLER(kill);
418
419 static void dss_get_snapshot_list(struct snapshot_list *sl)
420 {
421         get_snapshot_list(sl, OPT_UINT32_VAL(DSS, UNIT_INTERVAL),
422                 OPT_UINT32_VAL(DSS, NUM_INTERVALS));
423 }
424
425 static int64_t compute_next_snapshot_time(void)
426 {
427         int64_t x = 0, now = get_current_time(), unit_interval
428                 = 24 * 3600 * OPT_UINT32_VAL(DSS, UNIT_INTERVAL), ret,
429                 last_completion_time;
430         unsigned wanted = desired_number_of_snapshots(0,
431                 OPT_UINT32_VAL(DSS, NUM_INTERVALS)),
432                 num_complete = 0;
433         int i;
434         struct snapshot *s = NULL;
435         struct snapshot_list sl;
436
437         dss_get_snapshot_list(&sl);
438         FOR_EACH_SNAPSHOT(s, i, &sl) {
439                 if (!(s->flags & SS_COMPLETE))
440                         continue;
441                 num_complete++;
442                 x += s->completion_time - s->creation_time;
443                 last_completion_time = s->completion_time;
444         }
445         assert(x >= 0);
446
447         ret = now;
448         if (num_complete == 0)
449                 goto out;
450         x /= num_complete; /* avg time to create one snapshot */
451         if (unit_interval < x * wanted) /* oops, no sleep at all */
452                 goto out;
453         ret = last_completion_time + unit_interval / wanted - x;
454 out:
455         free_snapshot_list(&sl);
456         return ret;
457 }
458
459 static inline void invalidate_next_snapshot_time(void)
460 {
461         next_snapshot_time = 0;
462 }
463
464 static inline int next_snapshot_time_is_valid(void)
465 {
466         return next_snapshot_time != 0;
467 }
468
469 static int next_snapshot_is_due(void)
470 {
471         int64_t now = get_current_time();
472
473         if (!next_snapshot_time_is_valid())
474                 next_snapshot_time = compute_next_snapshot_time();
475         if (next_snapshot_time <= now) {
476                 DSS_DEBUG_LOG(("next snapshot: now\n"));
477                 return 1;
478         }
479         DSS_DEBUG_LOG(("next snapshot due in %" PRId64 " seconds\n",
480                 next_snapshot_time - now));
481         return 0;
482 }
483
484 static void pre_create_hook(void)
485 {
486         assert(snapshot_creation_status == HS_READY);
487         /* make sure that the next snapshot time will be recomputed */
488         invalidate_next_snapshot_time();
489         DSS_DEBUG_LOG(("executing %s\n", OPT_STRING_VAL(DSS, PRE_CREATE_HOOK)));
490         dss_exec_cmdline_pid(&create_pid, OPT_STRING_VAL(DSS, PRE_CREATE_HOOK));
491         snapshot_creation_status = HS_PRE_RUNNING;
492 }
493
494 static void pre_remove_hook(struct snapshot *s, const char *why)
495 {
496         char *cmd;
497
498         if (!s)
499                 return;
500         DSS_DEBUG_LOG(("%s snapshot %s\n", why, s->name));
501         assert(snapshot_removal_status == HS_READY);
502         assert(remove_pid == 0);
503         assert(!snapshot_currently_being_removed);
504
505         snapshot_currently_being_removed = dss_malloc(sizeof(struct snapshot));
506         *snapshot_currently_being_removed = *s;
507         snapshot_currently_being_removed->name = dss_strdup(s->name);
508
509         cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, PRE_REMOVE_HOOK),
510                 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
511         DSS_DEBUG_LOG(("executing %s\n", cmd));
512         dss_exec_cmdline_pid(&remove_pid, cmd);
513         free(cmd);
514         snapshot_removal_status = HS_PRE_RUNNING;
515 }
516
517 static int exec_rm(void)
518 {
519         struct snapshot *s = snapshot_currently_being_removed;
520         char *new_name = being_deleted_name(s);
521         char *argv[4];
522         int ret;
523
524         argv[0] = "rm";
525         argv[1] = "-rf";
526         argv[2] = new_name;
527         argv[3] = NULL;
528
529         assert(snapshot_removal_status == HS_PRE_SUCCESS);
530         assert(remove_pid == 0);
531
532         DSS_NOTICE_LOG(("removing %s (interval = %i)\n", s->name, s->interval));
533         ret = dss_rename(s->name, new_name);
534         if (ret < 0)
535                 goto out;
536         dss_exec(&remove_pid, argv[0], argv);
537         snapshot_removal_status = HS_RUNNING;
538 out:
539         free(new_name);
540         return ret;
541 }
542
543 static int snapshot_is_being_created(struct snapshot *s)
544 {
545         return s->creation_time == current_snapshot_creation_time;
546 }
547
548 static struct snapshot *find_orphaned_snapshot(struct snapshot_list *sl)
549 {
550         struct snapshot *s;
551         int i;
552
553         DSS_DEBUG_LOG(("looking for old incomplete snapshots\n"));
554         FOR_EACH_SNAPSHOT(s, i, sl) {
555                 if (snapshot_is_being_created(s))
556                         continue;
557                 /*
558                  * We know that no rm is currently running, so if s is marked
559                  * as being deleted, a previously started rm must have failed.
560                  */
561                 if (s->flags & SS_BEING_DELETED)
562                         return s;
563
564                 if (s->flags & SS_COMPLETE) /* good snapshot */
565                         continue;
566                 /*
567                  * This snapshot is incomplete and it is not the snapshot
568                  * currently being created. However, we must not remove it if
569                  * rsync is about to be restarted. As only the newest snapshot
570                  * can be restarted, this snapshot is orphaned if it is not the
571                  * newest snapshot or if we are not about to restart rsync.
572                  */
573                 if (get_newest_snapshot(sl) != s)
574                         return s;
575                 if (snapshot_creation_status != HS_NEEDS_RESTART)
576                         return s;
577         }
578         /* no orphaned snapshots */
579         return NULL;
580 }
581
582 static int is_reference_snapshot(struct snapshot *s)
583 {
584         if (!name_of_reference_snapshot)
585                 return 0;
586         return strcmp(s->name, name_of_reference_snapshot)? 0 : 1;
587 }
588
589 static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl)
590 {
591         int i, interval;
592         struct snapshot *s;
593         unsigned missing = 0;
594         uint32_t N = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
595
596         DSS_DEBUG_LOG(("looking for intervals containing too many snapshots\n"));
597         for (interval = N - 1; interval >= 0; interval--) {
598                 unsigned keep = desired_number_of_snapshots(interval, N);
599                 unsigned num = sl->interval_count[interval];
600                 struct snapshot *victim = NULL, *prev = NULL;
601                 int64_t score = LONG_MAX;
602
603                 if (keep >= num)
604                         missing += keep - num;
605                 if (keep + missing >= num)
606                         continue;
607                 /* redundant snapshot in this interval, pick snapshot with lowest score */
608                 FOR_EACH_SNAPSHOT(s, i, sl) {
609                         int64_t this_score;
610
611                         if (snapshot_is_being_created(s))
612                                 continue;
613                         if (is_reference_snapshot(s))
614                                 continue;
615                         if (s->interval > interval) {
616                                 prev = s;
617                                 continue;
618                         }
619                         if (s->interval < interval)
620                                 break;
621                         if (!victim) {
622                                 victim = s;
623                                 prev = s;
624                                 continue;
625                         }
626                         assert(prev);
627                         /* check if s is a better victim */
628                         this_score = s->creation_time - prev->creation_time;
629                         assert(this_score >= 0);
630                         if (this_score < score) {
631                                 score = this_score;
632                                 victim = s;
633                         }
634                         prev = s;
635                 }
636                 assert(victim);
637                 return victim;
638         }
639         return NULL;
640 }
641
642 static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl)
643 {
644         int i;
645         struct snapshot *s;
646
647         DSS_DEBUG_LOG(("looking for snapshots belonging to intervals >= %d\n",
648                 OPT_UINT32_VAL(DSS, NUM_INTERVALS)));
649         FOR_EACH_SNAPSHOT(s, i, sl) {
650                 if (snapshot_is_being_created(s))
651                         continue;
652                 if (is_reference_snapshot(s))
653                         continue;
654                 if (s->interval < OPT_UINT32_VAL(DSS, NUM_INTERVALS))
655                         continue;
656                 return s;
657         }
658         return NULL;
659 }
660
661 static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
662 {
663         int i, num_complete;
664         struct snapshot *s, *ref = NULL;
665
666         DSS_DEBUG_LOG(("picking snapshot with earliest creation time\n"));
667         num_complete = num_complete_snapshots(sl);
668         if (num_complete <= OPT_UINT32_VAL(DSS, MIN_COMPLETE))
669                 return NULL;
670         FOR_EACH_SNAPSHOT(s, i, sl) {
671                 if (snapshot_is_being_created(s))
672                         continue;
673                 if (is_reference_snapshot(s)) { /* avoid this one */
674                         ref = s;
675                         continue;
676                 }
677                 return s;
678         }
679         assert(ref);
680         DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name));
681         return ref;
682 }
683
684 /* returns NULL <==> *reason is set to NULL */
685 static struct snapshot *find_removable_snapshot(struct snapshot_list *sl,
686                 bool try_hard, char **reason)
687 {
688         struct snapshot *victim;
689
690         /*
691          * Don't remove anything if there is free space and we have fewer
692          * snapshots than configured, plus one. This way there is always one
693          * snapshot that can be recycled.
694          */
695         if (!try_hard && sl->num_snapshots <=
696                         1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS))
697                 goto nope;
698         victim = find_orphaned_snapshot(sl);
699         if (victim) {
700                 *reason = make_message("orphaned");
701                 return victim;
702         }
703         victim = find_outdated_snapshot(sl);
704         if (victim) {
705                 *reason = make_message("outdated");
706                 return victim;
707         }
708         if (!OPT_GIVEN(DSS, KEEP_REDUNDANT)) {
709                 victim = find_redundant_snapshot(sl);
710                 if (victim) {
711                         *reason = make_message("redundant");
712                         return victim;
713                 }
714         }
715         if (!try_hard)
716                 goto nope;
717         DSS_WARNING_LOG(("nothing obvious to remove\n"));
718         victim = find_oldest_removable_snapshot(sl);
719         if (victim) {
720                 *reason = make_message("oldest");
721                 return victim;
722         }
723 nope:
724         *reason = NULL;
725         return NULL;
726 }
727
728 static int rename_incomplete_snapshot(int64_t start)
729 {
730         char *old_name;
731         int ret;
732         int64_t now;
733
734         /*
735          * We don't want the dss_rename() below to fail with EEXIST because the
736          * last complete snapshot was created (and completed) in the same
737          * second as this one.
738          */
739         while ((now = get_current_time()) == start)
740                 sleep(1);
741         free(path_to_last_complete_snapshot);
742         ret = complete_name(start, now, &path_to_last_complete_snapshot);
743         if (ret < 0)
744                 return ret;
745         old_name = incomplete_name(start);
746         ret = dss_rename(old_name, path_to_last_complete_snapshot);
747         if (ret >= 0)
748                 DSS_NOTICE_LOG(("%s -> %s\n", old_name,
749                         path_to_last_complete_snapshot));
750         free(old_name);
751         return ret;
752 }
753
754 static int try_to_free_disk_space(void)
755 {
756         int ret;
757         struct snapshot_list sl;
758         struct snapshot *victim;
759         struct timeval now;
760         char *why;
761         int low_disk_space;
762
763         ret = disk_space_low(NULL);
764         if (ret < 0)
765                 return ret;
766         low_disk_space = ret;
767         gettimeofday(&now, NULL);
768         if (tv_diff(&next_removal_check, &now, NULL) > 0)
769                 return 0;
770         if (!low_disk_space) {
771                 if (snapshot_creation_status != HS_READY)
772                         return 0;
773                 if (next_snapshot_is_due())
774                         return 0;
775         }
776         /* Idle or low disk space, look at existing snapshots. */
777         dss_get_snapshot_list(&sl);
778         victim = find_removable_snapshot(&sl, low_disk_space, &why);
779         if (victim) {
780                 pre_remove_hook(victim, why);
781                 free(why);
782         }
783         free_snapshot_list(&sl);
784         if (victim)
785                 return 1;
786         if (!low_disk_space)
787                 return 0;
788         DSS_CRIT_LOG(("uhuhu: disk space low and nothing to remove\n"));
789         return -ERRNO_TO_DSS_ERROR(ENOSPC);
790 }
791
792 static void post_create_hook(void)
793 {
794         char *cmd = make_message("%s %s/%s",
795                 OPT_STRING_VAL(DSS, POST_CREATE_HOOK),
796                 OPT_STRING_VAL(DSS, DEST_DIR), path_to_last_complete_snapshot);
797         DSS_NOTICE_LOG(("executing %s\n", cmd));
798         dss_exec_cmdline_pid(&create_pid, cmd);
799         free(cmd);
800         snapshot_creation_status = HS_POST_RUNNING;
801 }
802
803 static void post_remove_hook(void)
804 {
805         char *cmd;
806         struct snapshot *s = snapshot_currently_being_removed;
807
808         assert(s);
809
810         cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, POST_REMOVE_HOOK),
811                 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
812         DSS_NOTICE_LOG(("executing %s\n", cmd));
813         dss_exec_cmdline_pid(&remove_pid, cmd);
814         free(cmd);
815         snapshot_removal_status = HS_POST_RUNNING;
816 }
817
818 static void dss_kill(pid_t pid, int sig, const char *msg)
819 {
820         const char *signame, *process_name;
821
822         if (pid == 0)
823                 return;
824         switch (sig) {
825         case SIGTERM: signame = "TERM"; break;
826         case SIGSTOP: signame = "STOP"; break;
827         case SIGCONT: signame = "CONT"; break;
828         default: signame = "????";
829         }
830
831         if (pid == create_pid)
832                 process_name = "create";
833         else if (pid == remove_pid)
834                 process_name = "remove";
835         else process_name = "??????";
836
837         if (msg)
838                 DSS_INFO_LOG(("%s\n", msg));
839         DSS_DEBUG_LOG(("sending signal %d (%s) to pid %d (%s process)\n",
840                 sig, signame, (int)pid, process_name));
841         if (kill(pid, sig) >= 0)
842                 return;
843         DSS_INFO_LOG(("failed to send signal %d (%s) to pid %d (%s process)\n",
844                 sig, signame, (int)pid, process_name));
845 }
846
847 static void stop_create_process(void)
848 {
849         if (create_process_stopped)
850                 return;
851         dss_kill(create_pid, SIGSTOP, "suspending create process");
852         create_process_stopped = 1;
853 }
854
855 static void restart_create_process(void)
856 {
857         if (!create_process_stopped)
858                 return;
859         dss_kill(create_pid, SIGCONT, "resuming create process");
860         create_process_stopped = 0;
861 }
862
863 /**
864  * Print a log message about the exit status of a child.
865  */
866 static void log_termination_msg(pid_t pid, int status)
867 {
868         if (WIFEXITED(status))
869                 DSS_INFO_LOG(("child %i exited. Exit status: %i\n", (int)pid,
870                         WEXITSTATUS(status)));
871         else if (WIFSIGNALED(status))
872                 DSS_NOTICE_LOG(("child %i was killed by signal %i\n", (int)pid,
873                         WTERMSIG(status)));
874         else
875                 DSS_WARNING_LOG(("child %i terminated abormally\n", (int)pid));
876 }
877
878 static int wait_for_process(pid_t pid, int *status)
879 {
880         int ret;
881
882         DSS_DEBUG_LOG(("Waiting for process %d to terminate\n", (int)pid));
883         for (;;) {
884                 fd_set rfds;
885
886                 FD_ZERO(&rfds);
887                 FD_SET(signal_pipe, &rfds);
888                 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
889                 if (ret < 0)
890                         break;
891                 ret = next_signal();
892                 if (!ret)
893                         continue;
894                 if (ret == SIGCHLD) {
895                         ret = waitpid(pid, status, 0);
896                         if (ret >= 0)
897                                 break;
898                         if (errno != EINTR) { /* error */
899                                 ret = -ERRNO_TO_DSS_ERROR(errno);
900                                 break;
901                         }
902                 }
903                 /* SIGINT or SIGTERM */
904                 dss_kill(pid, SIGTERM, "killing child process");
905         }
906         if (ret < 0)
907                 DSS_ERROR_LOG(("failed to wait for process %d\n", (int)pid));
908         else
909                 log_termination_msg(pid, *status);
910         return ret;
911 }
912
913 static void handle_pre_remove_exit(int status)
914 {
915         if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
916                 snapshot_removal_status = HS_READY;
917                 gettimeofday(&next_removal_check, NULL);
918                 next_removal_check.tv_sec += 60;
919                 return;
920         }
921         snapshot_removal_status = HS_PRE_SUCCESS;
922 }
923
924 static int handle_rm_exit(int status)
925 {
926         if (!WIFEXITED(status)) {
927                 snapshot_removal_status = HS_READY;
928                 return -E_INVOLUNTARY_EXIT;
929         }
930         if (WEXITSTATUS(status)) {
931                 snapshot_removal_status = HS_READY;
932                 return -E_BAD_EXIT_CODE;
933         }
934         snapshot_removal_status = HS_SUCCESS;
935         return 1;
936 }
937
938 static void handle_post_remove_exit(void)
939 {
940         snapshot_removal_status = HS_READY;
941 }
942
943 static int handle_remove_exit(int status)
944 {
945         int ret;
946         struct snapshot *s = snapshot_currently_being_removed;
947
948         assert(s);
949         switch (snapshot_removal_status) {
950         case HS_PRE_RUNNING:
951                 handle_pre_remove_exit(status);
952                 ret = 1;
953                 break;
954         case HS_RUNNING:
955                 ret = handle_rm_exit(status);
956                 break;
957         case HS_POST_RUNNING:
958                 handle_post_remove_exit();
959                 ret = 1;
960                 break;
961         default:
962                 ret = -E_BUG;
963         }
964         if (snapshot_removal_status == HS_READY) {
965                 free(s->name);
966                 free(s);
967                 snapshot_currently_being_removed = NULL;
968         }
969         remove_pid = 0;
970         return ret;
971 }
972
973 static int wait_for_remove_process(void)
974 {
975         int status, ret;
976
977         assert(remove_pid);
978         assert(
979                 snapshot_removal_status == HS_PRE_RUNNING ||
980                 snapshot_removal_status == HS_RUNNING ||
981                 snapshot_removal_status == HS_POST_RUNNING
982         );
983         ret = wait_for_process(remove_pid, &status);
984         if (ret < 0)
985                 return ret;
986         return handle_remove_exit(status);
987 }
988
989 static int handle_rsync_exit(int status)
990 {
991         int es, ret;
992
993         if (!WIFEXITED(status)) {
994                 DSS_ERROR_LOG(("rsync process %d died involuntary\n", (int)create_pid));
995                 ret = -E_INVOLUNTARY_EXIT;
996                 snapshot_creation_status = HS_READY;
997                 goto out;
998         }
999         es = WEXITSTATUS(status);
1000         /*
1001          * Restart rsync on non-fatal errors:
1002          * 24: Partial transfer due to vanished source files
1003          */
1004         if (es != 0 && es != 24) {
1005                 DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
1006                         es, ++num_consecutive_rsync_errors));
1007                 if (!logfile) { /* called by com_run() */
1008                         ret = -E_BAD_EXIT_CODE;
1009                         goto out;
1010                 }
1011                 if (num_consecutive_rsync_errors >
1012                                 OPT_UINT32_VAL(RUN, MAX_RSYNC_ERRORS)) {
1013                         ret = -E_TOO_MANY_RSYNC_ERRORS;
1014                         snapshot_creation_status = HS_READY;
1015                         goto out;
1016                 }
1017                 DSS_WARNING_LOG(("restarting rsync process\n"));
1018                 snapshot_creation_status = HS_NEEDS_RESTART;
1019                 next_snapshot_time = get_current_time() + 60;
1020                 ret = 1;
1021                 goto out;
1022         }
1023         num_consecutive_rsync_errors = 0;
1024         ret = rename_incomplete_snapshot(current_snapshot_creation_time);
1025         if (ret < 0)
1026                 goto out;
1027         snapshot_creation_status = HS_SUCCESS;
1028         free(name_of_reference_snapshot);
1029         name_of_reference_snapshot = NULL;
1030 out:
1031         create_process_stopped = 0;
1032         return ret;
1033 }
1034
1035 static int handle_pre_create_hook_exit(int status)
1036 {
1037         int es, ret;
1038         static int warn_count;
1039
1040         if (!WIFEXITED(status)) {
1041                 snapshot_creation_status = HS_READY;
1042                 ret = -E_INVOLUNTARY_EXIT;
1043                 goto out;
1044         }
1045         es = WEXITSTATUS(status);
1046         if (es) {
1047                 if (!warn_count--) {
1048                         DSS_NOTICE_LOG(("pre_create_hook %s returned %d\n",
1049                                 OPT_STRING_VAL(DSS, PRE_CREATE_HOOK), es));
1050                         DSS_NOTICE_LOG(("deferring snapshot creation...\n"));
1051                         warn_count = 60; /* warn only once per hour */
1052                 }
1053                 next_snapshot_time = get_current_time() + 60;
1054                 snapshot_creation_status = HS_READY;
1055                 ret = 0;
1056                 goto out;
1057         }
1058         warn_count = 0;
1059         snapshot_creation_status = HS_PRE_SUCCESS;
1060         ret = 1;
1061 out:
1062         return ret;
1063 }
1064
1065 static int handle_sigchld(void)
1066 {
1067         pid_t pid;
1068         int status, ret = reap_child(&pid, &status);
1069
1070         if (ret <= 0)
1071                 return ret;
1072
1073         if (pid == create_pid) {
1074                 switch (snapshot_creation_status) {
1075                 case HS_PRE_RUNNING:
1076                         ret = handle_pre_create_hook_exit(status);
1077                         break;
1078                 case HS_RUNNING:
1079                         ret = handle_rsync_exit(status);
1080                         break;
1081                 case HS_POST_RUNNING:
1082                         snapshot_creation_status = HS_READY;
1083                         ret = 1;
1084                         break;
1085                 default:
1086                         DSS_EMERG_LOG(("BUG: create can't die in status %d\n",
1087                                 snapshot_creation_status));
1088                         return -E_BUG;
1089                 }
1090                 create_pid = 0;
1091                 return ret;
1092         }
1093         if (pid == remove_pid) {
1094                 ret = handle_remove_exit(status);
1095                 if (ret < 0)
1096                         return ret;
1097                 return ret;
1098         }
1099         DSS_EMERG_LOG(("BUG: unknown process %d died\n", (int)pid));
1100         return -E_BUG;
1101 }
1102
1103 /* also checks if . is a mountpoint, if --mountpoint was given */
1104 static int change_to_dest_dir(void)
1105 {
1106         int ret;
1107         const char *dd = OPT_STRING_VAL(DSS, DEST_DIR);
1108         struct stat dot, dotdot;
1109
1110         DSS_INFO_LOG(("changing cwd to %s\n", dd));
1111         if (chdir(dd) < 0) {
1112                 ret = -ERRNO_TO_DSS_ERROR(errno);
1113                 DSS_ERROR_LOG(("could not change cwd to %s\n", dd));
1114                 return ret;
1115         }
1116         if (!OPT_GIVEN(DSS, MOUNTPOINT))
1117                 return 0;
1118         if (stat(".", &dot) < 0) {
1119                 ret = -ERRNO_TO_DSS_ERROR(errno);
1120                 DSS_ERROR_LOG(("could not stat .\n"));
1121                 return ret;
1122         }
1123         if (stat("..", &dotdot) < 0) {
1124                 ret = -ERRNO_TO_DSS_ERROR(errno);
1125                 DSS_ERROR_LOG(("could not stat ..\n"));
1126                 return ret;
1127         }
1128         if (dot.st_dev == dotdot.st_dev && dot.st_ino != dotdot.st_ino) {
1129                 DSS_ERROR_LOG(("mountpoint check failed for %s\n", dd));
1130                 return -E_MOUNTPOINT;
1131         }
1132         return 1;
1133 }
1134
1135 static int check_config(void)
1136 {
1137         uint32_t unit_interval = OPT_UINT32_VAL(DSS, UNIT_INTERVAL);
1138         uint32_t num_intervals = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
1139
1140         if (unit_interval == 0) {
1141                 DSS_ERROR_LOG(("bad unit interval: %i\n", unit_interval));
1142                 return -E_INVALID_NUMBER;
1143         }
1144         DSS_DEBUG_LOG(("unit interval: %i day(s)\n", unit_interval));
1145
1146         if (num_intervals == 0 || num_intervals > 30) {
1147                 DSS_ERROR_LOG(("bad number of intervals: %i\n", num_intervals));
1148                 return -E_INVALID_NUMBER;
1149         }
1150         if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE))
1151                 if (!OPT_GIVEN(DSS, SOURCE_DIR)) {
1152                         DSS_ERROR_LOG(("--source-dir required\n"));
1153                         return -E_SYNTAX;
1154                 }
1155         if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE)
1156                         || subcmd == CMD_PTR(LS) || subcmd == CMD_PTR(PRUNE)) {
1157                 if (!OPT_GIVEN(DSS, DEST_DIR)) {
1158                         DSS_ERROR_LOG(("--dest-dir required\n"));
1159                         return -E_SYNTAX;
1160                 }
1161         }
1162         DSS_DEBUG_LOG(("number of intervals: %i\n", num_intervals));
1163         return 1;
1164 }
1165
1166 static int lopsub_error(int lopsub_ret, char **errctx)
1167 {
1168         const char *msg = lls_strerror(-lopsub_ret);
1169         if (*errctx)
1170                 DSS_ERROR_LOG(("%s: %s\n", *errctx, msg));
1171         else
1172                 DSS_ERROR_LOG(("%s\n", msg));
1173         free(*errctx);
1174         *errctx = NULL;
1175         return -E_LOPSUB;
1176 }
1177
1178 static int parse_config_file(bool sighup, const struct lls_command *cmd)
1179 {
1180         int ret, fd = -1;
1181         struct stat statbuf;
1182         void *map;
1183         size_t sz;
1184         int cf_argc;
1185         char **cf_argv, *errctx = NULL;
1186         struct lls_parse_result *cf_lpr, *merged_lpr, *clpr;
1187         const char *subcmd_name;
1188
1189         ret = open(config_file, O_RDONLY);
1190         if (ret < 0) {
1191                 if (errno != ENOENT || OPT_GIVEN(DSS, CONFIG_FILE)) {
1192                         ret = -ERRNO_TO_DSS_ERROR(errno);
1193                         DSS_ERROR_LOG(("config file %s can not be opened\n",
1194                                 config_file));
1195                         goto out;
1196                 }
1197                 /* no config file -- nothing to do */
1198                 ret = 0;
1199                 goto success;
1200         }
1201         fd = ret;
1202         ret = fstat(fd, &statbuf);
1203         if (ret < 0) {
1204                 ret = -ERRNO_TO_DSS_ERROR(errno);
1205                 DSS_ERROR_LOG(("failed to stat config file %s\n", config_file));
1206                 goto close_fd;
1207         }
1208         sz = statbuf.st_size;
1209         if (sz == 0) { /* config file is empty -- nothing to do */
1210                 ret = 0;
1211                 goto success;
1212         }
1213         map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
1214         if (map == MAP_FAILED) {
1215                 ret = -ERRNO_TO_DSS_ERROR(errno);
1216                 DSS_ERROR_LOG(("failed to mmap config file %s\n",
1217                         config_file));
1218                 goto close_fd;
1219         }
1220         if (cmd == CMD_PTR(DSS))
1221                 subcmd_name = NULL;
1222         else
1223                 subcmd_name = lls_command_name(cmd);
1224         ret = lls_convert_config(map, sz, subcmd_name, &cf_argv, &errctx);
1225         munmap(map, sz);
1226         if (ret < 0) {
1227                 DSS_ERROR_LOG(("failed to convert config file %s\n",
1228                         config_file));
1229                 ret = lopsub_error(ret, &errctx);
1230                 goto close_fd;
1231         }
1232         cf_argc = ret;
1233         ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
1234         lls_free_argv(cf_argv);
1235         if (ret < 0) {
1236                 ret = lopsub_error(ret, &errctx);
1237                 goto close_fd;
1238         }
1239         clpr = cmd == CMD_PTR(DSS)? cmdline_lpr : cmdline_sublpr;
1240         if (sighup) /* config file overrides command line */
1241                 ret = lls_merge(cf_lpr, clpr, cmd, &merged_lpr, &errctx);
1242         else /* command line options overrride config file options */
1243                 ret = lls_merge(clpr, cf_lpr, cmd, &merged_lpr, &errctx);
1244         lls_free_parse_result(cf_lpr, cmd);
1245         if (ret < 0) {
1246                 ret = lopsub_error(ret, &errctx);
1247                 goto close_fd;
1248         }
1249         ret = 1;
1250 success:
1251         assert(ret >= 0);
1252         DSS_DEBUG_LOG(("loglevel: %d\n", OPT_UINT32_VAL(DSS, LOGLEVEL)));
1253         if (cmd != CMD_PTR(DSS)) {
1254                 if (ret > 0) {
1255                         if (sublpr != cmdline_sublpr)
1256                                 lls_free_parse_result(sublpr, cmd);
1257                         sublpr = merged_lpr;
1258                 } else
1259                         sublpr = cmdline_sublpr;
1260         } else {
1261                 if (ret > 0) {
1262                         if (lpr != cmdline_lpr)
1263                                 lls_free_parse_result(lpr, cmd);
1264                         lpr = merged_lpr;
1265                 } else
1266                         lpr = cmdline_lpr;
1267         }
1268 close_fd:
1269         if (fd >= 0)
1270                 close(fd);
1271 out:
1272         return ret;
1273 }
1274
1275 static int handle_sighup(void)
1276 {
1277         int ret;
1278
1279         DSS_NOTICE_LOG(("SIGHUP, re-reading config\n"));
1280         dump_dss_config("old");
1281         ret = parse_config_file(true /* SIGHUP */, CMD_PTR(DSS));
1282         if (ret < 0)
1283                 return ret;
1284         ret = parse_config_file(true /* SIGHUP */, CMD_PTR(RUN));
1285         if (ret < 0)
1286                 return ret;
1287         ret = check_config();
1288         if (ret < 0)
1289                 return ret;
1290         close_log(logfile);
1291         logfile = NULL;
1292         if (OPT_GIVEN(RUN, DAEMON) || daemonized) {
1293                 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1294                 log_welcome(OPT_UINT32_VAL(DSS, LOGLEVEL));
1295                 daemonized = true;
1296         }
1297         dump_dss_config("reloaded");
1298         invalidate_next_snapshot_time();
1299         return 1;
1300 }
1301
1302 static void kill_children(void)
1303 {
1304         restart_create_process();
1305         dss_kill(create_pid, SIGTERM, NULL);
1306         dss_kill(remove_pid, SIGTERM, NULL);
1307 }
1308
1309 static int handle_signal(void)
1310 {
1311         int sig, ret = next_signal();
1312
1313         if (ret <= 0)
1314                 goto out;
1315         sig = ret;
1316         switch (sig) {
1317         case SIGINT:
1318         case SIGTERM:
1319                 return -E_SIGNAL;
1320         case SIGHUP:
1321                 ret = handle_sighup();
1322                 break;
1323         case SIGCHLD:
1324                 ret = handle_sigchld();
1325                 break;
1326         }
1327 out:
1328         if (ret < 0)
1329                 DSS_ERROR_LOG(("%s\n", dss_strerror(-ret)));
1330         return ret;
1331 }
1332
1333 /*
1334  * We can not use rsync locally if the local user is different from the remote
1335  * user or if the src dir is not on the local host (or both).
1336  */
1337 static int use_rsync_locally(char *logname)
1338 {
1339         const char *h = OPT_STRING_VAL(DSS, REMOTE_HOST);
1340
1341         if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
1342                 return 0;
1343         if (OPT_GIVEN(DSS, REMOTE_USER) &&
1344                         strcmp(OPT_STRING_VAL(DSS, REMOTE_USER), logname))
1345                 return 0;
1346         return 1;
1347 }
1348
1349 static int rename_resume_snap(int64_t creation_time)
1350 {
1351         struct snapshot_list sl;
1352         struct snapshot *s = NULL;
1353         char *new_name = incomplete_name(creation_time);
1354         int ret;
1355         const char *why;
1356
1357         sl.num_snapshots = 0;
1358
1359         ret = 0;
1360         dss_get_snapshot_list(&sl);
1361         /*
1362          * Snapshot recycling: We first look at the newest snapshot. If this
1363          * snapshot happens to be incomplete, the last rsync process was
1364          * aborted and we reuse this one. Otherwise we look at snapshots which
1365          * could be removed (outdated and redundant snapshots) as candidates
1366          * for recycling. If no outdated/redundant snapshot exists, we check if
1367          * there is an orphaned snapshot, which likely is useless anyway.
1368          *
1369          * Only if no existing snapshot is suitable for recycling, we bite the
1370          * bullet and create a new one.
1371          */
1372         s = get_newest_snapshot(&sl);
1373         if (!s) /* no snapshots at all */
1374                 goto out;
1375         /* re-use last snapshot if it is incomplete */
1376         why = "aborted";
1377         if ((s->flags & SS_COMPLETE) == 0)
1378                 goto out;
1379         why = "outdated";
1380         s = find_outdated_snapshot(&sl);
1381         if (s)
1382                 goto out;
1383         why = "redundant";
1384         s = find_redundant_snapshot(&sl);
1385         if (s)
1386                 goto out;
1387         why = "orphaned";
1388         s = find_orphaned_snapshot(&sl);
1389 out:
1390         if (s) {
1391                 DSS_NOTICE_LOG(("recycling %s snapshot %s\n", why, s->name));
1392                 ret = dss_rename(s->name, new_name);
1393         }
1394         if (ret >= 0)
1395                 DSS_NOTICE_LOG(("creating %s\n", new_name));
1396         free(new_name);
1397         free_snapshot_list(&sl);
1398         return ret;
1399 }
1400
1401 static void create_rsync_argv(char ***argv, int64_t *num)
1402 {
1403         char *logname;
1404         int i = 0, j, N;
1405         struct snapshot_list sl;
1406         static bool seeded;
1407
1408         dss_get_snapshot_list(&sl);
1409         assert(!name_of_reference_snapshot);
1410         name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
1411         free_snapshot_list(&sl);
1412
1413         /*
1414          * We specify up to 6 arguments, one argument per given rsync option
1415          * and one argument per given source dir. We also need space for the
1416          * terminating NULL pointer.
1417          */
1418         N = OPT_GIVEN(DSS, RSYNC_OPTION) + OPT_GIVEN(DSS, SOURCE_DIR);
1419         *argv = dss_malloc((7 + N) * sizeof(char *));
1420         (*argv)[i++] = dss_strdup("rsync");
1421         (*argv)[i++] = dss_strdup("-a");
1422         (*argv)[i++] = dss_strdup("--delete");
1423         if (!seeded) {
1424                 srandom((unsigned)time(NULL)); /* no need to be fancy here */
1425                 seeded = true;
1426         }
1427         if (1000 * (random() / (RAND_MAX + 1.0)) < OPT_UINT32_VAL(DSS, CHECKSUM)) {
1428                 DSS_NOTICE_LOG(("adding --checksum to rsync options\n"));
1429                 (*argv)[i++] = dss_strdup("--checksum");
1430         }
1431         for (j = 0; j < OPT_GIVEN(DSS, RSYNC_OPTION); j++)
1432                 (*argv)[i++] = dss_strdup(lls_string_val(j,
1433                         OPT_RESULT(DSS, RSYNC_OPTION)));
1434         if (name_of_reference_snapshot) {
1435                 DSS_INFO_LOG(("using %s as reference\n", name_of_reference_snapshot));
1436                 (*argv)[i++] = make_message("--link-dest=../%s",
1437                         name_of_reference_snapshot);
1438         } else
1439                 DSS_INFO_LOG(("no suitable reference snapshot found\n"));
1440         logname = dss_logname();
1441         if (use_rsync_locally(logname)) {
1442                 for (j = 0; j < OPT_GIVEN(DSS, SOURCE_DIR); j++)
1443                         (*argv)[i++] = dss_strdup(lls_string_val(j,
1444                                 OPT_RESULT(DSS, SOURCE_DIR)));
1445         } else {
1446                 /*
1447                  * dss-1.0 and earlier did not support multiple source
1448                  * directories.  These versions appended a slash to the end of
1449                  * the source directory to make sure that only the contents of
1450                  * the single source directory, but not the directory itself,
1451                  * are copied to the destination. For multiple source
1452                  * directories, however, this is not a good idea because the
1453                  * source directories may well contain identical file names,
1454                  * which would then be copied to the same location on the
1455                  * destination, overwriting each other. Moreover, we want the
1456                  * directory on the destination match the source. To preserve
1457                  * the old behaviour, we thus have to special-case N=1.
1458                  */
1459                 for (j = 0; j < OPT_GIVEN(DSS, SOURCE_DIR); j++) {
1460                         (*argv)[i++] = make_message("%s@%s:%s%s",
1461                                 OPT_GIVEN(DSS, REMOTE_USER)?
1462                                         OPT_STRING_VAL(DSS, REMOTE_USER) : logname,
1463                                 OPT_STRING_VAL(DSS, REMOTE_HOST),
1464                                 lls_string_val(j, OPT_RESULT(DSS, SOURCE_DIR)),
1465                                 OPT_GIVEN(DSS, SOURCE_DIR) == 1? "/" : ""
1466                         );
1467                 }
1468         }
1469         free(logname);
1470         *num = get_current_time();
1471         (*argv)[i++] = incomplete_name(*num);
1472         (*argv)[i++] = NULL;
1473         for (j = 0; j < i; j++)
1474                 DSS_DEBUG_LOG(("argv[%d] = %s\n", j, (*argv)[j]));
1475 }
1476
1477 static void free_rsync_argv(char **argv)
1478 {
1479         int i;
1480
1481         if (!argv)
1482                 return;
1483         for (i = 0; argv[i]; i++)
1484                 free(argv[i]);
1485         free(argv);
1486 }
1487
1488 static int create_snapshot(char **argv)
1489 {
1490         int ret;
1491
1492         assert(argv);
1493         ret = rename_resume_snap(current_snapshot_creation_time);
1494         if (ret < 0)
1495                 return ret;
1496         dss_exec(&create_pid, argv[0], argv);
1497         snapshot_creation_status = HS_RUNNING;
1498         return ret;
1499 }
1500
1501 static int select_loop(void)
1502 {
1503         int ret;
1504         /* check every 60 seconds for free disk space */
1505         struct timeval tv;
1506         char **rsync_argv = NULL;
1507
1508         for (;;) {
1509                 fd_set rfds;
1510                 struct timeval *tvp;
1511
1512                 if (remove_pid)
1513                         tvp = NULL; /* sleep until rm hook/process dies */
1514                 else { /* sleep one minute */
1515                         tv.tv_sec = 60;
1516                         tv.tv_usec = 0;
1517                         tvp = &tv;
1518                 }
1519                 FD_ZERO(&rfds);
1520                 FD_SET(signal_pipe, &rfds);
1521                 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1522                 if (ret < 0)
1523                         goto out;
1524                 if (FD_ISSET(signal_pipe, &rfds)) {
1525                         ret = handle_signal();
1526                         if (ret < 0)
1527                                 goto out;
1528                 }
1529                 if (remove_pid)
1530                         continue;
1531                 if (snapshot_removal_status == HS_PRE_SUCCESS) {
1532                         ret = exec_rm();
1533                         if (ret < 0)
1534                                 goto out;
1535                         continue;
1536                 }
1537                 if (snapshot_removal_status == HS_SUCCESS) {
1538                         post_remove_hook();
1539                         continue;
1540                 }
1541                 ret = try_to_free_disk_space();
1542                 if (ret < 0)
1543                         goto out;
1544                 if (snapshot_removal_status != HS_READY) {
1545                         stop_create_process();
1546                         continue;
1547                 }
1548                 restart_create_process();
1549                 switch (snapshot_creation_status) {
1550                 case HS_READY:
1551                         if (!next_snapshot_is_due())
1552                                 continue;
1553                         pre_create_hook();
1554                         continue;
1555                 case HS_PRE_RUNNING:
1556                 case HS_RUNNING:
1557                 case HS_POST_RUNNING:
1558                         continue;
1559                 case HS_PRE_SUCCESS:
1560                         if (!name_of_reference_snapshot) {
1561                                 free_rsync_argv(rsync_argv);
1562                                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1563                         }
1564                         ret = create_snapshot(rsync_argv);
1565                         if (ret < 0)
1566                                 goto out;
1567                         continue;
1568                 case HS_NEEDS_RESTART:
1569                         if (!next_snapshot_is_due())
1570                                 continue;
1571                         ret = create_snapshot(rsync_argv);
1572                         if (ret < 0)
1573                                 goto out;
1574                         continue;
1575                 case HS_SUCCESS:
1576                         post_create_hook();
1577                         continue;
1578                 }
1579         }
1580 out:
1581         return ret;
1582 }
1583
1584 static void exit_hook(int exit_code)
1585 {
1586         pid_t pid;
1587         char **argv, *tmp = dss_strdup(OPT_STRING_VAL(DSS, EXIT_HOOK));
1588         unsigned n = split_args(tmp, &argv);
1589
1590         n++;
1591         argv = dss_realloc(argv, (n + 1) * sizeof(char *));
1592         argv[n - 1] = dss_strdup(dss_strerror(-exit_code));
1593         argv[n] = NULL;
1594         dss_exec(&pid, argv[0], argv);
1595         free(argv[n - 1]);
1596         free(argv);
1597         free(tmp);
1598 }
1599
1600 static void lock_dss_or_die(void)
1601 {
1602         int ret = lock_dss(config_file);
1603
1604         if (ret < 0) {
1605                 DSS_EMERG_LOG(("failed to lock: %s\n", dss_strerror(-ret)));
1606                 exit(EXIT_FAILURE);
1607         }
1608 }
1609
1610 static int com_run(void)
1611 {
1612         int ret, fd = -1;
1613         pid_t pid;
1614
1615         if (OPT_GIVEN(DSS, DRY_RUN)) {
1616                 DSS_ERROR_LOG(("dry run not supported by this command\n"));
1617                 return -E_SYNTAX;
1618         }
1619         ret = get_dss_pid(config_file, &pid);
1620         if (ret >= 0) {
1621                 DSS_ERROR_LOG(("pid %d\n", (int)pid));
1622                 return -E_ALREADY_RUNNING;
1623         }
1624         /*
1625          * Order is important here: Since daemon_init() forks, it would drop
1626          * the lock if it had been acquired already. Changing the cwd before
1627          * grabbing the lock causes stat(2) to fail in case a relative config
1628          * file path was given, which results in a different key ID for
1629          * locking. Therefore we must first daemonize, then lock, then change
1630          * the cwd.
1631          */
1632         if (OPT_GIVEN(RUN, DAEMON)) {
1633                 fd = daemon_init();
1634                 daemonized = true;
1635                 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1636         }
1637         lock_dss_or_die();
1638         ret = change_to_dest_dir();
1639         if (ret < 0)
1640                 return ret;
1641         dump_dss_config("startup");
1642         ret = install_sighandler(SIGHUP);
1643         if (ret < 0)
1644                 return ret;
1645         if (fd >= 0) {
1646                 ret = write(fd, "\0", 1);
1647                 if (ret != 1) {
1648                         DSS_ERROR_LOG(("write to daemon pipe returned %d\n",
1649                                 ret));
1650                         if (ret < 0)
1651                                 return -ERRNO_TO_DSS_ERROR(errno);
1652                         return -E_BUG;
1653                 }
1654         }
1655         ret = select_loop();
1656         if (ret >= 0) /* impossible */
1657                 ret = -E_BUG;
1658         kill_children();
1659         exit_hook(ret);
1660         while (wait(NULL) >= 0 || errno != ECHILD)
1661                 ; /* still have children to wait for */
1662         return ret;
1663 }
1664 EXPORT_CMD_HANDLER(run);
1665
1666 static int com_prune(void)
1667 {
1668         int ret;
1669         struct snapshot_list sl;
1670         struct snapshot *victim;
1671         struct disk_space ds;
1672         char *why;
1673         bool try_hard;
1674
1675         lock_dss_or_die();
1676         ret = change_to_dest_dir();
1677         if (ret < 0)
1678                 return ret;
1679         switch (OPT_UINT32_VAL(PRUNE, DISK_SPACE)) {
1680         case FDS_LOW: try_hard = true; break;
1681         case FDS_HIGH: try_hard = false; break;
1682         default:
1683                 ret = get_disk_space(".", &ds);
1684                 if (ret < 0)
1685                         return ret;
1686                 log_disk_space(&ds);
1687                 try_hard = disk_space_low(&ds);
1688         }
1689         dss_get_snapshot_list(&sl);
1690         victim = find_removable_snapshot(&sl, try_hard, &why);
1691         if (!victim) {
1692                 dss_msg("nothing to prune\n");
1693                 ret = 0;
1694                 goto free_sl;
1695         }
1696         if (OPT_GIVEN(DSS, DRY_RUN)) {
1697                 dss_msg("picking %s snapshot %s (interval = %i)\n",
1698                         why, victim->name, victim->interval);
1699                 ret = 0;
1700                 goto free_why;
1701         }
1702         pre_remove_hook(victim, why);
1703         if (snapshot_removal_status == HS_PRE_RUNNING) {
1704                 ret = wait_for_remove_process();
1705                 if (ret < 0)
1706                         goto free_why;
1707                 ret = -E_HOOK_FAILED;
1708                 if (snapshot_removal_status != HS_PRE_SUCCESS)
1709                         goto free_why;
1710         }
1711         ret = exec_rm();
1712         if (ret < 0)
1713                 goto free_why;
1714         ret = wait_for_remove_process();
1715         if (ret < 0)
1716                 goto free_why;
1717         assert(snapshot_removal_status == HS_SUCCESS);
1718         post_remove_hook();
1719         assert(snapshot_removal_status == HS_POST_RUNNING);
1720         ret = wait_for_remove_process();
1721 free_why:
1722         free(why);
1723 free_sl:
1724         free_snapshot_list(&sl);
1725         return ret;
1726 }
1727 EXPORT_CMD_HANDLER(prune);
1728
1729 static int com_create(void)
1730 {
1731         int ret, status;
1732         char **rsync_argv;
1733
1734         lock_dss_or_die();
1735         ret = change_to_dest_dir();
1736         if (ret < 0)
1737                 return ret;
1738         if (OPT_GIVEN(DSS, DRY_RUN)) {
1739                 int i;
1740                 char *msg = NULL;
1741                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1742                 for (i = 0; rsync_argv[i]; i++) {
1743                         char *tmp = msg;
1744                         msg = make_message("%s%s%s", tmp? tmp : "",
1745                                 tmp? " " : "", rsync_argv[i]);
1746                         free(tmp);
1747                 }
1748                 free_rsync_argv(rsync_argv);
1749                 dss_msg("%s\n", msg);
1750                 free(msg);
1751                 return 1;
1752         }
1753         pre_create_hook();
1754         if (create_pid) {
1755                 ret = wait_for_process(create_pid, &status);
1756                 if (ret < 0)
1757                         return ret;
1758                 ret = handle_pre_create_hook_exit(status);
1759                 if (ret <= 0) /* error, or pre-create failed */
1760                         return ret;
1761         }
1762         create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1763         ret = create_snapshot(rsync_argv);
1764         if (ret < 0)
1765                 goto out;
1766         ret = wait_for_process(create_pid, &status);
1767         if (ret < 0)
1768                 goto out;
1769         ret = handle_rsync_exit(status);
1770         if (ret < 0)
1771                 goto out;
1772         post_create_hook();
1773         if (create_pid)
1774                 ret = wait_for_process(create_pid, &status);
1775 out:
1776         free_rsync_argv(rsync_argv);
1777         return ret;
1778 }
1779 EXPORT_CMD_HANDLER(create);
1780
1781 static int com_ls(void)
1782 {
1783         int i, ret;
1784         struct snapshot_list sl;
1785         struct snapshot *s;
1786         int64_t now = get_current_time();
1787
1788         ret = change_to_dest_dir();
1789         if (ret < 0)
1790                 return ret;
1791         dss_get_snapshot_list(&sl);
1792         FOR_EACH_SNAPSHOT(s, i, &sl) {
1793                 int64_t d;
1794                 if (s->flags & SS_COMPLETE)
1795                         d = (s->completion_time - s->creation_time) / 60;
1796                 else
1797                         d = (now - s->creation_time) / 60;
1798                 dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval,
1799                         s->name, d / 60, d % 60);
1800         }
1801         free_snapshot_list(&sl);
1802         return 1;
1803 }
1804 EXPORT_CMD_HANDLER(ls);
1805
1806 static int com_configtest(void)
1807 {
1808         printf("Syntax Ok\n");
1809         return 0;
1810 }
1811 EXPORT_CMD_HANDLER(configtest);
1812
1813 static int setup_signal_handling(void)
1814 {
1815         int ret;
1816
1817         DSS_INFO_LOG(("setting up signal handlers\n"));
1818         signal_pipe = signal_init(); /* always successful */
1819         ret = install_sighandler(SIGINT);
1820         if (ret < 0)
1821                 return ret;
1822         ret = install_sighandler(SIGTERM);
1823         if (ret < 0)
1824                 return ret;
1825         return install_sighandler(SIGCHLD);
1826 }
1827
1828 const char *dss_version(void);
1829 static void handle_version_and_help(void)
1830 {
1831         char *txt;
1832
1833         if (OPT_GIVEN(DSS, DETAILED_HELP))
1834                 txt = lls_long_help(CMD_PTR(DSS));
1835         else if (OPT_GIVEN(DSS, HELP))
1836                 txt = lls_short_help(CMD_PTR(DSS));
1837         else if (OPT_GIVEN(DSS, VERSION))
1838                 txt = make_message("%s\n", dss_version());
1839         else
1840                 return;
1841         printf("%s", txt);
1842         free(txt);
1843         exit(EXIT_SUCCESS);
1844 }
1845
1846 static void show_subcommand_summary(bool verbose)
1847 {
1848         const struct lls_command *cmd;
1849         int i;
1850
1851         printf("Available subcommands: ");
1852         if (!verbose) {
1853                 for (i = 1; (cmd = lls_cmd(i, dss_suite)); i++) {
1854                         if (i > 1)
1855                                 printf(", ");
1856                         printf("%s", lls_command_name(cmd));
1857                 }
1858                 printf("\n");
1859                 return;
1860         }
1861         printf("\n");
1862         for (i = 1; (cmd = lls_cmd(i, dss_suite)); i++) {
1863                 const char *name = lls_command_name(cmd);
1864                 const char *purpose = lls_purpose(cmd);
1865                 printf("%-11s%s\n", name, purpose);
1866         }
1867 }
1868
1869 static int com_help(void)
1870 {
1871         int ret;
1872         char *errctx, *help;
1873         const char *arg;
1874         const struct lls_command *cmd;
1875
1876         ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
1877         if (ret < 0)
1878                 return lopsub_error(ret, &errctx);
1879         if (lls_num_inputs(sublpr) == 0) {
1880                 show_subcommand_summary(OPT_GIVEN(HELP, LONG));
1881                 return 0;
1882         }
1883         arg = lls_input(0, sublpr);
1884         ret = lls_lookup_subcmd(arg, dss_suite, &errctx);
1885         if (ret < 0)
1886                 return lopsub_error(ret, &errctx);
1887         cmd = lls_cmd(ret, dss_suite);
1888         if (OPT_GIVEN(HELP, LONG))
1889                 help = lls_long_help(cmd);
1890         else
1891                 help = lls_short_help(cmd);
1892         printf("%s\n", help);
1893         free(help);
1894         return 0;
1895 }
1896 EXPORT_CMD_HANDLER(help);
1897
1898 int main(int argc, char **argv)
1899 {
1900         int ret;
1901         char *errctx = NULL;
1902         unsigned num_inputs;
1903         const struct dss_user_data *ud;
1904
1905         ret = lls_parse(argc, argv, CMD_PTR(DSS), &cmdline_lpr, &errctx);
1906         if (ret < 0) {
1907                 ret = lopsub_error(ret, &errctx);
1908                 goto out;
1909         }
1910         lpr = cmdline_lpr;
1911         set_config_file_name();
1912         ret = parse_config_file(false /* no SIGHUP */, CMD_PTR(DSS));
1913         if (ret < 0)
1914                 goto out;
1915         handle_version_and_help();
1916         num_inputs = lls_num_inputs(lpr);
1917         if (num_inputs == 0) { /* show verbose summary */
1918                 show_subcommand_summary(true);
1919                 ret = 0;
1920                 goto out;
1921         }
1922         ret = lls_lookup_subcmd(argv[argc - num_inputs], dss_suite, &errctx);
1923         if (ret < 0) {
1924                 ret = lopsub_error(ret, &errctx);
1925                 goto out;
1926         }
1927         subcmd = lls_cmd(ret, dss_suite);
1928         ret = lls_parse(num_inputs, argv + argc - num_inputs, subcmd,
1929                 &cmdline_sublpr, &errctx);
1930         if (ret < 0) {
1931                 ret = lopsub_error(ret, &errctx);
1932                 goto out;
1933         }
1934         sublpr = cmdline_sublpr;
1935         ret = parse_config_file(false /* no SIGHUP */, subcmd);
1936         if (ret < 0)
1937                 goto out;
1938         ret = check_config();
1939         if (ret < 0)
1940                 goto out;
1941         ret = setup_signal_handling();
1942         if (ret < 0)
1943                 goto out;
1944         ud = lls_user_data(subcmd);
1945         ret = ud->handler();
1946         signal_shutdown();
1947 out:
1948         if (ret < 0) {
1949                 if (errctx)
1950                         DSS_ERROR_LOG(("%s\n", errctx));
1951                 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1952         }
1953         free(errctx);
1954         lls_free_parse_result(lpr, CMD_PTR(DSS));
1955         if (lpr != cmdline_lpr)
1956                 lls_free_parse_result(cmdline_lpr, CMD_PTR(DSS));
1957         lls_free_parse_result(sublpr, subcmd);
1958         if (sublpr != cmdline_sublpr)
1959                 lls_free_parse_result(cmdline_sublpr, subcmd);
1960         free(config_file);
1961         exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1962 }