Merge branch 'refs/heads/t/short-log'
[dss.git] / dss.c
1 /*
2 * Copyright (C) 2008-2011 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <sys/types.h>
13 #include <signal.h>
14 #include <ctype.h>
15 #include <stdbool.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <inttypes.h>
19 #include <sys/time.h>
20 #include <time.h>
21 #include <sys/wait.h>
22 #include <fnmatch.h>
23 #include <limits.h>
24 #include <fcntl.h>
25 #include <lopsub.h>
26 #include <sys/mman.h>
27
28 #include "gcc-compat.h"
29 #include "log.h"
30 #include "str.h"
31 #include "err.h"
32 #include "file.h"
33 #include "exec.h"
34 #include "daemon.h"
35 #include "sig.h"
36 #include "df.h"
37 #include "tv.h"
38 #include "snap.h"
39 #include "ipc.h"
40 #include "dss.lsg.h"
41
42 #define CMD_PTR(_cname) lls_cmd(LSG_DSS_CMD_ ## _cname, dss_suite)
43 #define OPT_RESULT(_cname, _oname) (lls_opt_result(\
44 LSG_DSS_ ## _cname ## _OPT_ ## _oname, (CMD_PTR(_cname) == CMD_PTR(DSS))? lpr : sublpr))
45 #define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
46 #define OPT_STRING_VAL(_cname, _oname) (lls_string_val(0, \
47 OPT_RESULT(_cname, _oname)))
48 #define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
49 OPT_RESULT(_cname, _oname)))
50
51 struct dss_user_data {int (*handler)(void);};
52 #define EXPORT_CMD_HANDLER(_cmd) const struct dss_user_data \
53 lsg_dss_com_ ## _cmd ## _user_data = { \
54 .handler = com_ ## _cmd \
55 };
56
57 /*
58 * Command line and active options. We need to keep a copy of the parsed
59 * command line options for the SIGHUP case where we merge the command line
60 * options and the new config file options.
61 */
62 static struct lls_parse_result *cmdline_lpr, *lpr;
63
64 /** Parsed subcommand options. */
65 static struct lls_parse_result *cmdline_sublpr, *sublpr;
66 /* The executing subcommand (NULL at startup). */
67 static const struct lls_command *subcmd;
68 /** Wether daemon_init() was called. */
69 static bool daemonized;
70 /** Non-NULL if we log to a file. */
71 static FILE *logfile;
72 /** The read end of the signal pipe */
73 static int signal_pipe;
74 /** Process id of current pre-create-hook/rsync/post-create-hook process. */
75 static pid_t create_pid;
76 /** Whether the pre-create-hook/rsync/post-create-hook is currently stopped. */
77 static int create_process_stopped;
78 /** How many times in a row the rsync command failed. */
79 static int num_consecutive_rsync_errors;
80 /** Process id of current pre-remove/rm/post-remove process. */
81 static pid_t remove_pid;
82 /** When the next snapshot is due. */
83 static int64_t next_snapshot_time;
84 /** When to try to remove something. */
85 static struct timeval next_removal_check;
86 /** Creation time of the snapshot currently being created. */
87 static int64_t current_snapshot_creation_time;
88 /** The snapshot currently being removed. */
89 struct snapshot *snapshot_currently_being_removed;
90 /** Needed by the post-create hook. */
91 static char *path_to_last_complete_snapshot;
92 static char *name_of_reference_snapshot;
93 /** \sa \ref snap.h for details. */
94 enum hook_status snapshot_creation_status;
95 /** \sa \ref snap.h for details. */
96 enum hook_status snapshot_removal_status;
97
98
99 DEFINE_DSS_ERRLIST;
100 static const char *hook_status_description[] = {HOOK_STATUS_ARRAY};
101
102 /* may be called with ds == NULL. */
103 static int disk_space_low(struct disk_space *ds)
104 {
105 struct disk_space ds_struct;
106 uint32_t val;
107
108 if (!ds) {
109 int ret = get_disk_space(".", &ds_struct);
110 if (ret < 0)
111 return ret;
112 ds = &ds_struct;
113 }
114 val = OPT_UINT32_VAL(DSS, MIN_FREE_MB);
115 if (val != 0)
116 if (ds->free_mb < val)
117 return 1;
118 val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT);
119 if (val != 0)
120 if (ds->percent_free < val)
121 return 1;
122 val = OPT_UINT32_VAL(DSS, MIN_FREE_PERCENT_INODES);
123 if (val != 0)
124 if (ds->percent_free_inodes < val)
125 return 1;
126 return 0;
127 }
128
129 static void dump_dss_config(const char *msg)
130 {
131 const char dash[] = "-----------------------------";
132 char *lopsub_dump;
133 int ret;
134 FILE *log = logfile? logfile : stderr;
135 struct disk_space ds;
136 int64_t now = get_current_time();
137
138 if (OPT_UINT32_VAL(DSS, LOGLEVEL) > INFO)
139 return;
140
141 fprintf(log, "%s <%s config> %s\n", dash, msg, dash);
142 fprintf(log, "\n*** disk space ***\n\n");
143 ret = get_disk_space(".", &ds);
144 if (ret >= 0) {
145 DSS_INFO_LOG(("disk space low: %s\n", disk_space_low(&ds)?
146 "yes" : "no"));
147 log_disk_space(&ds);
148 } else
149 DSS_ERROR_LOG(("can not get free disk space: %s\n",
150 dss_strerror(-ret)));
151
152 /* we continue on errors from get_disk_space */
153
154 fprintf(log, "\n*** non-default options ***\n\n");
155 lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(DSS), true);
156 fprintf(log, "%s", lopsub_dump);
157 free(lopsub_dump);
158 fprintf(log, "\n*** non-default options for \"run\" ***\n\n");
159 lopsub_dump = lls_dump_parse_result(lpr, CMD_PTR(RUN), true);
160 fprintf(log, "%s", lopsub_dump);
161 free(lopsub_dump);
162 fprintf(log, "\n*** internal state ***\n\n");
163 fprintf(log,
164 "pid: %d\n"
165 "logile: %s\n"
166 "snapshot_currently_being_removed: %s\n"
167 "path_to_last_complete_snapshot: %s\n"
168 "reference_snapshot: %s\n"
169 "snapshot_creation_status: %s\n"
170 "snapshot_removal_status: %s\n"
171 "num_consecutive_rsync_errors: %d\n"
172 ,
173 (int) getpid(),
174 logfile? OPT_STRING_VAL(RUN, LOGFILE) : "stderr",
175 snapshot_currently_being_removed?
176 snapshot_currently_being_removed->name : "(none)",
177 path_to_last_complete_snapshot?
178 path_to_last_complete_snapshot : "(none)",
179 name_of_reference_snapshot?
180 name_of_reference_snapshot : "(none)",
181 hook_status_description[snapshot_creation_status],
182 hook_status_description[snapshot_removal_status],
183 num_consecutive_rsync_errors
184 );
185 if (create_pid != 0)
186 fprintf(log,
187 "create_pid: %" PRId32 "\n"
188 "create process is %sstopped\n"
189 ,
190 create_pid,
191 create_process_stopped? "" : "not "
192 );
193 if (remove_pid != 0)
194 fprintf(log, "remove_pid: %" PRId32 "\n", remove_pid);
195 if (next_snapshot_time != 0)
196 fprintf(log, "next snapshot due in %" PRId64 " seconds\n",
197 next_snapshot_time - now);
198 if (current_snapshot_creation_time != 0)
199 fprintf(log, "current_snapshot_creation_time: %"
200 PRId64 " (%" PRId64 " seconds ago)\n",
201 current_snapshot_creation_time,
202 now - current_snapshot_creation_time
203 );
204 if (next_removal_check.tv_sec != 0) {
205 fprintf(log, "next removal check: %llu (%llu seconds ago)\n",
206 (long long unsigned)next_removal_check.tv_sec,
207 now - (long long unsigned)next_removal_check.tv_sec
208 );
209
210 }
211 fprintf(log, "%s </%s config> %s\n", dash, msg, dash);
212 }
213
214 static int loglevel = -1;
215 static const char *location_file = NULL;
216 static int location_line = -1;
217 static const char *location_func = NULL;
218
219 void dss_log_set_params(int ll, const char *file, int line, const char *func)
220 {
221 loglevel = ll;
222 location_file = file;
223 location_line = line;
224 location_func = func;
225 }
226
227 /**
228 * The log function of dss.
229 *
230 * \param ll Loglevel.
231 * \param fml Usual format string.
232 *
233 * All DSS_XXX_LOG() macros use this function.
234 */
235 __printf_1_2 void dss_log(const char* fmt,...)
236 {
237 va_list argp;
238 FILE *outfd;
239 struct tm *tm;
240 time_t t1;
241 char str[255] = "";
242 int lpr_ll = lpr? OPT_UINT32_VAL(DSS, LOGLEVEL) : WARNING;
243
244 if (loglevel < lpr_ll)
245 return;
246 outfd = logfile? logfile : stderr;
247 if (subcmd == CMD_PTR(RUN)) {
248 time(&t1);
249 tm = localtime(&t1);
250 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
251 fprintf(outfd, "%s ", str);
252 if (lpr_ll <= INFO)
253 fprintf(outfd, "%i: ", loglevel);
254 }
255 if (subcmd == CMD_PTR(RUN))
256 #ifdef DSS_NO_FUNC_NAMES
257 fprintf(outfd, "%s:%d: ", location_file, location_line);
258 #else
259 fprintf(outfd, "%s: ", location_func);
260 #endif
261 va_start(argp, fmt);
262 vfprintf(outfd, fmt, argp);
263 va_end(argp);
264 }
265
266 /**
267 * Print a message either to stdout or to the log file.
268 */
269 static __printf_1_2 void dss_msg(const char* fmt,...)
270 {
271 FILE *outfd = logfile? logfile : stdout;
272 va_list argp;
273 va_start(argp, fmt);
274 vfprintf(outfd, fmt, argp);
275 va_end(argp);
276 }
277
278 static char *get_config_file_name(void)
279 {
280 char *home, *config_file;
281
282 if (OPT_GIVEN(DSS, CONFIG_FILE))
283 return dss_strdup(OPT_STRING_VAL(DSS, CONFIG_FILE));
284 home = get_homedir();
285 config_file = make_message("%s/.dssrc", home);
286 free(home);
287 return config_file;
288 }
289
290 static int send_signal(int sig)
291 {
292 pid_t pid;
293 char *config_file = get_config_file_name();
294 int ret = get_dss_pid(config_file, &pid);
295
296 free(config_file);
297 if (ret < 0)
298 return ret;
299 if (OPT_GIVEN(DSS, DRY_RUN)) {
300 dss_msg("%d\n", (int)pid);
301 return 0;
302 }
303 DSS_NOTICE_LOG(("sending signal %d to pid %d\n", sig, (int)pid));
304 ret = kill(pid, sig);
305 if (ret < 0)
306 return -ERRNO_TO_DSS_ERROR(errno);
307 return 1;
308 }
309
310 struct signal_info {
311 const char * const name;
312 int num;
313 };
314
315 /*
316 * The table below was taken 2016 from proc/sig.c of procps-3.2.8. Copyright
317 * 1998-2003 by Albert Cahalan, GPLv2.
318 */
319 static const struct signal_info signal_table[] = {
320 {"ABRT", SIGABRT}, /* IOT */
321 {"ALRM", SIGALRM},
322 {"BUS", SIGBUS},
323 {"CHLD", SIGCHLD}, /* CLD */
324 {"CONT", SIGCONT},
325 {"FPE", SIGFPE},
326 {"HUP", SIGHUP},
327 {"ILL", SIGILL},
328 {"INT", SIGINT},
329 {"KILL", SIGKILL},
330 {"PIPE", SIGPIPE},
331 #ifdef SIGPOLL
332 {"POLL", SIGPOLL}, /* IO */
333 #endif
334 {"PROF", SIGPROF},
335 #ifdef SIGPWR
336 {"PWR", SIGPWR},
337 #endif
338 {"QUIT", SIGQUIT},
339 {"SEGV", SIGSEGV},
340 #ifdef SIGSTKFLT
341 {"STKFLT", SIGSTKFLT},
342 #endif
343 {"STOP", SIGSTOP},
344 {"SYS", SIGSYS}, /* UNUSED */
345 {"TERM", SIGTERM},
346 {"TRAP", SIGTRAP},
347 {"TSTP", SIGTSTP},
348 {"TTIN", SIGTTIN},
349 {"TTOU", SIGTTOU},
350 {"URG", SIGURG},
351 {"USR1", SIGUSR1},
352 {"USR2", SIGUSR2},
353 {"VTALRM", SIGVTALRM},
354 {"WINCH", SIGWINCH},
355 {"XCPU", SIGXCPU},
356 {"XFSZ", SIGXFSZ}
357 };
358
359 #define SIGNAL_TABLE_SIZE (sizeof(signal_table) / sizeof(signal_table[0]))
360 #ifndef SIGRTMAX
361 #define SIGRTMAX 64
362 #endif
363
364 static int com_kill(void)
365 {
366 const char *arg = OPT_STRING_VAL(KILL, SIGNAL);
367 int ret, i;
368
369 if (*arg >= '0' && *arg <= '9') {
370 int64_t val;
371 ret = dss_atoi64(arg, &val);
372 if (ret < 0)
373 return ret;
374 if (val < 0 || val > SIGRTMAX)
375 return -ERRNO_TO_DSS_ERROR(EINVAL);
376 return send_signal(val);
377 }
378 if (strncasecmp(arg, "sig", 3) == 0)
379 arg += 3;
380 if (strcasecmp(arg, "CLD") == 0)
381 return send_signal(SIGCHLD);
382 if (strcasecmp(arg, "IOT") == 0)
383 return send_signal(SIGABRT);
384 for (i = 0; i < SIGNAL_TABLE_SIZE; i++)
385 if (strcasecmp(arg, signal_table[i].name) == 0)
386 return send_signal(signal_table[i].num);
387 DSS_ERROR_LOG(("invalid sigspec: %s\n", arg));
388 return -ERRNO_TO_DSS_ERROR(EINVAL);
389 }
390 EXPORT_CMD_HANDLER(kill);
391
392 static void dss_get_snapshot_list(struct snapshot_list *sl)
393 {
394 get_snapshot_list(sl, OPT_UINT32_VAL(DSS, UNIT_INTERVAL),
395 OPT_UINT32_VAL(DSS, NUM_INTERVALS));
396 }
397
398 static int64_t compute_next_snapshot_time(void)
399 {
400 int64_t x = 0, now = get_current_time(), unit_interval
401 = 24 * 3600 * OPT_UINT32_VAL(DSS, UNIT_INTERVAL), ret;
402 unsigned wanted = desired_number_of_snapshots(0,
403 OPT_UINT32_VAL(DSS, NUM_INTERVALS)),
404 num_complete = 0;
405 int i;
406 struct snapshot *s = NULL;
407 struct snapshot_list sl;
408
409 dss_get_snapshot_list(&sl);
410 FOR_EACH_SNAPSHOT(s, i, &sl) {
411 if (!(s->flags & SS_COMPLETE))
412 continue;
413 num_complete++;
414 x += s->completion_time - s->creation_time;
415 }
416 assert(x >= 0);
417
418 ret = now;
419 if (num_complete == 0)
420 goto out;
421 x /= num_complete; /* avg time to create one snapshot */
422 if (unit_interval < x * wanted) /* oops, no sleep at all */
423 goto out;
424 ret = s->completion_time + unit_interval / wanted - x;
425 out:
426 free_snapshot_list(&sl);
427 return ret;
428 }
429
430 static inline void invalidate_next_snapshot_time(void)
431 {
432 next_snapshot_time = 0;
433 }
434
435 static inline int next_snapshot_time_is_valid(void)
436 {
437 return next_snapshot_time != 0;
438 }
439
440 static int next_snapshot_is_due(void)
441 {
442 int64_t now = get_current_time();
443
444 if (!next_snapshot_time_is_valid())
445 next_snapshot_time = compute_next_snapshot_time();
446 if (next_snapshot_time <= now) {
447 DSS_DEBUG_LOG(("next snapshot: now\n"));
448 return 1;
449 }
450 DSS_DEBUG_LOG(("next snapshot due in %" PRId64 " seconds\n",
451 next_snapshot_time - now));
452 return 0;
453 }
454
455 static void pre_create_hook(void)
456 {
457 assert(snapshot_creation_status == HS_READY);
458 /* make sure that the next snapshot time will be recomputed */
459 invalidate_next_snapshot_time();
460 DSS_DEBUG_LOG(("executing %s\n", OPT_STRING_VAL(DSS, PRE_CREATE_HOOK)));
461 dss_exec_cmdline_pid(&create_pid, OPT_STRING_VAL(DSS, PRE_CREATE_HOOK));
462 snapshot_creation_status = HS_PRE_RUNNING;
463 }
464
465 static void pre_remove_hook(struct snapshot *s, const char *why)
466 {
467 char *cmd;
468
469 if (!s)
470 return;
471 DSS_DEBUG_LOG(("%s snapshot %s\n", why, s->name));
472 assert(snapshot_removal_status == HS_READY);
473 assert(remove_pid == 0);
474 assert(!snapshot_currently_being_removed);
475
476 snapshot_currently_being_removed = dss_malloc(sizeof(struct snapshot));
477 *snapshot_currently_being_removed = *s;
478 snapshot_currently_being_removed->name = dss_strdup(s->name);
479
480 cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, PRE_REMOVE_HOOK),
481 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
482 DSS_DEBUG_LOG(("executing %s\n", cmd));
483 dss_exec_cmdline_pid(&remove_pid, cmd);
484 free(cmd);
485 snapshot_removal_status = HS_PRE_RUNNING;
486 }
487
488 static int exec_rm(void)
489 {
490 struct snapshot *s = snapshot_currently_being_removed;
491 char *new_name = being_deleted_name(s);
492 char *argv[4];
493 int ret;
494
495 argv[0] = "rm";
496 argv[1] = "-rf";
497 argv[2] = new_name;
498 argv[3] = NULL;
499
500 assert(snapshot_removal_status == HS_PRE_SUCCESS);
501 assert(remove_pid == 0);
502
503 DSS_NOTICE_LOG(("removing %s (interval = %i)\n", s->name, s->interval));
504 ret = dss_rename(s->name, new_name);
505 if (ret < 0)
506 goto out;
507 dss_exec(&remove_pid, argv[0], argv);
508 snapshot_removal_status = HS_RUNNING;
509 out:
510 free(new_name);
511 return ret;
512 }
513
514 static int snapshot_is_being_created(struct snapshot *s)
515 {
516 return s->creation_time == current_snapshot_creation_time;
517 }
518
519 static struct snapshot *find_orphaned_snapshot(struct snapshot_list *sl)
520 {
521 struct snapshot *s;
522 int i;
523
524 DSS_DEBUG_LOG(("looking for old incomplete snapshots\n"));
525 FOR_EACH_SNAPSHOT(s, i, sl) {
526 if (snapshot_is_being_created(s))
527 continue;
528 /*
529 * We know that no rm is currently running, so if s is marked
530 * as being deleted, a previously started rm must have failed.
531 */
532 if (s->flags & SS_BEING_DELETED)
533 return s;
534
535 if (s->flags & SS_COMPLETE) /* good snapshot */
536 continue;
537 /*
538 * This snapshot is incomplete and it is not the snapshot
539 * currently being created. However, we must not remove it if
540 * rsync is about to be restarted. As only the newest snapshot
541 * can be restarted, this snapshot is orphaned if it is not the
542 * newest snapshot or if we are not about to restart rsync.
543 */
544 if (get_newest_snapshot(sl) != s)
545 return s;
546 if (snapshot_creation_status != HS_NEEDS_RESTART)
547 return s;
548 }
549 /* no orphaned snapshots */
550 return NULL;
551 }
552
553 static int is_reference_snapshot(struct snapshot *s)
554 {
555 if (!name_of_reference_snapshot)
556 return 0;
557 return strcmp(s->name, name_of_reference_snapshot)? 0 : 1;
558 }
559
560 /*
561 * return: 0: no redundant snapshots, 1: rm process started, negative: error
562 */
563 static struct snapshot *find_redundant_snapshot(struct snapshot_list *sl)
564 {
565 int i, interval;
566 struct snapshot *s;
567 unsigned missing = 0;
568 uint32_t N = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
569
570 DSS_DEBUG_LOG(("looking for intervals containing too many snapshots\n"));
571 for (interval = N - 1; interval >= 0; interval--) {
572 unsigned keep = desired_number_of_snapshots(interval, N);
573 unsigned num = sl->interval_count[interval];
574 struct snapshot *victim = NULL, *prev = NULL;
575 int64_t score = LONG_MAX;
576
577 if (keep >= num)
578 missing += keep - num;
579 if (keep + missing >= num)
580 continue;
581 /* redundant snapshot in this interval, pick snapshot with lowest score */
582 FOR_EACH_SNAPSHOT(s, i, sl) {
583 int64_t this_score;
584
585 if (snapshot_is_being_created(s))
586 continue;
587 if (is_reference_snapshot(s))
588 continue;
589 if (s->interval > interval) {
590 prev = s;
591 continue;
592 }
593 if (s->interval < interval)
594 break;
595 if (!victim) {
596 victim = s;
597 prev = s;
598 continue;
599 }
600 assert(prev);
601 /* check if s is a better victim */
602 this_score = s->creation_time - prev->creation_time;
603 assert(this_score >= 0);
604 if (this_score < score) {
605 score = this_score;
606 victim = s;
607 }
608 prev = s;
609 }
610 assert(victim);
611 return victim;
612 }
613 return NULL;
614 }
615
616 static struct snapshot *find_outdated_snapshot(struct snapshot_list *sl)
617 {
618 int i;
619 struct snapshot *s;
620
621 DSS_DEBUG_LOG(("looking for snapshots belonging to intervals >= %d\n",
622 OPT_UINT32_VAL(DSS, NUM_INTERVALS)));
623 FOR_EACH_SNAPSHOT(s, i, sl) {
624 if (snapshot_is_being_created(s))
625 continue;
626 if (is_reference_snapshot(s))
627 continue;
628 if (s->interval < OPT_UINT32_VAL(DSS, NUM_INTERVALS))
629 continue;
630 return s;
631 }
632 return NULL;
633 }
634
635 static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
636 {
637 int i, num_complete;
638 struct snapshot *s, *ref = NULL;
639
640 num_complete = num_complete_snapshots(sl);
641 if (num_complete <= OPT_UINT32_VAL(DSS, MIN_COMPLETE))
642 return NULL;
643 FOR_EACH_SNAPSHOT(s, i, sl) {
644 if (snapshot_is_being_created(s))
645 continue;
646 if (is_reference_snapshot(s)) { /* avoid this one */
647 ref = s;
648 continue;
649 }
650 DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name));
651 return s;
652 }
653 assert(ref);
654 DSS_WARNING_LOG(("removing reference snapshot %s\n", ref->name));
655 return ref;
656 }
657
658 static int rename_incomplete_snapshot(int64_t start)
659 {
660 char *old_name;
661 int ret;
662 int64_t now;
663
664 /*
665 * We don't want the dss_rename() below to fail with EEXIST because the
666 * last complete snapshot was created (and completed) in the same
667 * second as this one.
668 */
669 while ((now = get_current_time()) == start)
670 sleep(1);
671 free(path_to_last_complete_snapshot);
672 ret = complete_name(start, now, &path_to_last_complete_snapshot);
673 if (ret < 0)
674 return ret;
675 old_name = incomplete_name(start);
676 ret = dss_rename(old_name, path_to_last_complete_snapshot);
677 if (ret >= 0)
678 DSS_NOTICE_LOG(("%s -> %s\n", old_name,
679 path_to_last_complete_snapshot));
680 free(old_name);
681 return ret;
682 }
683
684 static int try_to_free_disk_space(void)
685 {
686 int ret;
687 struct snapshot_list sl;
688 struct snapshot *victim;
689 struct timeval now;
690 const char *why;
691 int low_disk_space;
692
693 ret = disk_space_low(NULL);
694 if (ret < 0)
695 return ret;
696 low_disk_space = ret;
697 gettimeofday(&now, NULL);
698 if (tv_diff(&next_removal_check, &now, NULL) > 0)
699 return 0;
700 if (!low_disk_space) {
701 if (OPT_GIVEN(DSS, KEEP_REDUNDANT))
702 return 0;
703 if (snapshot_creation_status != HS_READY)
704 return 0;
705 if (next_snapshot_is_due())
706 return 0;
707 }
708 /*
709 * Idle and --keep_redundant not given, or low disk space. Look at
710 * existing snapshots.
711 */
712 dss_get_snapshot_list(&sl);
713 ret = 0;
714 /*
715 * Don't remove anything if there is free space and we have fewer
716 * snapshots than configured, plus one. This way there is always one
717 * snapshot that can be recycled.
718 */
719 if (!low_disk_space && sl.num_snapshots <=
720 1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS))
721 goto out;
722 why = "outdated";
723 victim = find_outdated_snapshot(&sl);
724 if (victim)
725 goto remove;
726 why = "redundant";
727 victim = find_redundant_snapshot(&sl);
728 if (victim)
729 goto remove;
730 why = "orphaned";
731 victim = find_orphaned_snapshot(&sl);
732 if (victim)
733 goto remove;
734 /* try harder only if disk space is low */
735 if (!low_disk_space)
736 goto out;
737 DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n"));
738 why = "oldest";
739 victim = find_oldest_removable_snapshot(&sl);
740 if (victim)
741 goto remove;
742 DSS_CRIT_LOG(("uhuhu: disk space low and nothing to remove\n"));
743 ret = -ERRNO_TO_DSS_ERROR(ENOSPC);
744 goto out;
745 remove:
746 pre_remove_hook(victim, why);
747 out:
748 free_snapshot_list(&sl);
749 return ret;
750 }
751
752 static void post_create_hook(void)
753 {
754 char *cmd = make_message("%s %s/%s",
755 OPT_STRING_VAL(DSS, POST_CREATE_HOOK),
756 OPT_STRING_VAL(DSS, DEST_DIR), path_to_last_complete_snapshot);
757 DSS_NOTICE_LOG(("executing %s\n", cmd));
758 dss_exec_cmdline_pid(&create_pid, cmd);
759 free(cmd);
760 snapshot_creation_status = HS_POST_RUNNING;
761 }
762
763 static void post_remove_hook(void)
764 {
765 char *cmd;
766 struct snapshot *s = snapshot_currently_being_removed;
767
768 assert(s);
769
770 cmd = make_message("%s %s/%s", OPT_STRING_VAL(DSS, POST_REMOVE_HOOK),
771 OPT_STRING_VAL(DSS, DEST_DIR), s->name);
772 DSS_NOTICE_LOG(("executing %s\n", cmd));
773 dss_exec_cmdline_pid(&remove_pid, cmd);
774 free(cmd);
775 snapshot_removal_status = HS_POST_RUNNING;
776 }
777
778 static void dss_kill(pid_t pid, int sig, const char *msg)
779 {
780 const char *signame, *process_name;
781
782 if (pid == 0)
783 return;
784 switch (sig) {
785 case SIGTERM: signame = "TERM"; break;
786 case SIGSTOP: signame = "STOP"; break;
787 case SIGCONT: signame = "CONT"; break;
788 default: signame = "????";
789 }
790
791 if (pid == create_pid)
792 process_name = "create";
793 else if (pid == remove_pid)
794 process_name = "remove";
795 else process_name = "??????";
796
797 if (msg)
798 DSS_INFO_LOG(("%s\n", msg));
799 DSS_DEBUG_LOG(("sending signal %d (%s) to pid %d (%s process)\n",
800 sig, signame, (int)pid, process_name));
801 if (kill(pid, sig) >= 0)
802 return;
803 DSS_INFO_LOG(("failed to send signal %d (%s) to pid %d (%s process)\n",
804 sig, signame, (int)pid, process_name));
805 }
806
807 static void stop_create_process(void)
808 {
809 if (create_process_stopped)
810 return;
811 dss_kill(create_pid, SIGSTOP, "suspending create process");
812 create_process_stopped = 1;
813 }
814
815 static void restart_create_process(void)
816 {
817 if (!create_process_stopped)
818 return;
819 dss_kill(create_pid, SIGCONT, "resuming create process");
820 create_process_stopped = 0;
821 }
822
823 /**
824 * Print a log message about the exit status of a child.
825 */
826 static void log_termination_msg(pid_t pid, int status)
827 {
828 if (WIFEXITED(status))
829 DSS_INFO_LOG(("child %i exited. Exit status: %i\n", (int)pid,
830 WEXITSTATUS(status)));
831 else if (WIFSIGNALED(status))
832 DSS_NOTICE_LOG(("child %i was killed by signal %i\n", (int)pid,
833 WTERMSIG(status)));
834 else
835 DSS_WARNING_LOG(("child %i terminated abormally\n", (int)pid));
836 }
837
838 static int wait_for_process(pid_t pid, int *status)
839 {
840 int ret;
841
842 DSS_DEBUG_LOG(("Waiting for process %d to terminate\n", (int)pid));
843 for (;;) {
844 fd_set rfds;
845
846 FD_ZERO(&rfds);
847 FD_SET(signal_pipe, &rfds);
848 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
849 if (ret < 0)
850 break;
851 ret = next_signal();
852 if (!ret)
853 continue;
854 if (ret == SIGCHLD) {
855 ret = waitpid(pid, status, 0);
856 if (ret >= 0)
857 break;
858 if (errno != EINTR) { /* error */
859 ret = -ERRNO_TO_DSS_ERROR(errno);
860 break;
861 }
862 }
863 /* SIGINT or SIGTERM */
864 dss_kill(pid, SIGTERM, "killing child process");
865 }
866 if (ret < 0)
867 DSS_ERROR_LOG(("failed to wait for process %d\n", (int)pid));
868 else
869 log_termination_msg(pid, *status);
870 return ret;
871 }
872
873 static void handle_pre_remove_exit(int status)
874 {
875 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
876 snapshot_removal_status = HS_READY;
877 gettimeofday(&next_removal_check, NULL);
878 next_removal_check.tv_sec += 60;
879 return;
880 }
881 snapshot_removal_status = HS_PRE_SUCCESS;
882 }
883
884 static int handle_rm_exit(int status)
885 {
886 if (!WIFEXITED(status)) {
887 snapshot_removal_status = HS_READY;
888 return -E_INVOLUNTARY_EXIT;
889 }
890 if (WEXITSTATUS(status)) {
891 snapshot_removal_status = HS_READY;
892 return -E_BAD_EXIT_CODE;
893 }
894 snapshot_removal_status = HS_SUCCESS;
895 return 1;
896 }
897
898 static void handle_post_remove_exit(void)
899 {
900 snapshot_removal_status = HS_READY;
901 }
902
903 static int handle_remove_exit(int status)
904 {
905 int ret;
906 struct snapshot *s = snapshot_currently_being_removed;
907
908 assert(s);
909 switch (snapshot_removal_status) {
910 case HS_PRE_RUNNING:
911 handle_pre_remove_exit(status);
912 ret = 1;
913 break;
914 case HS_RUNNING:
915 ret = handle_rm_exit(status);
916 break;
917 case HS_POST_RUNNING:
918 handle_post_remove_exit();
919 ret = 1;
920 break;
921 default:
922 ret = -E_BUG;
923 }
924 if (snapshot_removal_status == HS_READY) {
925 free(s->name);
926 free(s);
927 snapshot_currently_being_removed = NULL;
928 }
929 remove_pid = 0;
930 return ret;
931 }
932
933 static int wait_for_remove_process(void)
934 {
935 int status, ret;
936
937 assert(remove_pid);
938 assert(
939 snapshot_removal_status == HS_PRE_RUNNING ||
940 snapshot_removal_status == HS_RUNNING ||
941 snapshot_removal_status == HS_POST_RUNNING
942 );
943 ret = wait_for_process(remove_pid, &status);
944 if (ret < 0)
945 return ret;
946 return handle_remove_exit(status);
947 }
948
949 static int handle_rsync_exit(int status)
950 {
951 int es, ret;
952
953 if (!WIFEXITED(status)) {
954 DSS_ERROR_LOG(("rsync process %d died involuntary\n", (int)create_pid));
955 ret = -E_INVOLUNTARY_EXIT;
956 snapshot_creation_status = HS_READY;
957 goto out;
958 }
959 es = WEXITSTATUS(status);
960 /*
961 * Restart rsync on non-fatal errors:
962 * 24: Partial transfer due to vanished source files
963 */
964 if (es != 0 && es != 24) {
965 DSS_WARNING_LOG(("rsync exit code %d, error count %d\n",
966 es, ++num_consecutive_rsync_errors));
967 if (!logfile) { /* called by com_run() */
968 ret = -E_BAD_EXIT_CODE;
969 goto out;
970 }
971 if (num_consecutive_rsync_errors >
972 OPT_UINT32_VAL(RUN, MAX_RSYNC_ERRORS)) {
973 ret = -E_TOO_MANY_RSYNC_ERRORS;
974 snapshot_creation_status = HS_READY;
975 goto out;
976 }
977 DSS_WARNING_LOG(("restarting rsync process\n"));
978 snapshot_creation_status = HS_NEEDS_RESTART;
979 next_snapshot_time = get_current_time() + 60;
980 ret = 1;
981 goto out;
982 }
983 num_consecutive_rsync_errors = 0;
984 ret = rename_incomplete_snapshot(current_snapshot_creation_time);
985 if (ret < 0)
986 goto out;
987 snapshot_creation_status = HS_SUCCESS;
988 free(name_of_reference_snapshot);
989 name_of_reference_snapshot = NULL;
990 out:
991 create_process_stopped = 0;
992 return ret;
993 }
994
995 static int handle_pre_create_hook_exit(int status)
996 {
997 int es, ret;
998 static int warn_count;
999
1000 if (!WIFEXITED(status)) {
1001 snapshot_creation_status = HS_READY;
1002 ret = -E_INVOLUNTARY_EXIT;
1003 goto out;
1004 }
1005 es = WEXITSTATUS(status);
1006 if (es) {
1007 if (!warn_count--) {
1008 DSS_NOTICE_LOG(("pre_create_hook %s returned %d\n",
1009 OPT_STRING_VAL(DSS, PRE_CREATE_HOOK), es));
1010 DSS_NOTICE_LOG(("deferring snapshot creation...\n"));
1011 warn_count = 60; /* warn only once per hour */
1012 }
1013 next_snapshot_time = get_current_time() + 60;
1014 snapshot_creation_status = HS_READY;
1015 ret = 0;
1016 goto out;
1017 }
1018 warn_count = 0;
1019 snapshot_creation_status = HS_PRE_SUCCESS;
1020 ret = 1;
1021 out:
1022 return ret;
1023 }
1024
1025 static int handle_sigchld(void)
1026 {
1027 pid_t pid;
1028 int status, ret = reap_child(&pid, &status);
1029
1030 if (ret <= 0)
1031 return ret;
1032
1033 if (pid == create_pid) {
1034 switch (snapshot_creation_status) {
1035 case HS_PRE_RUNNING:
1036 ret = handle_pre_create_hook_exit(status);
1037 break;
1038 case HS_RUNNING:
1039 ret = handle_rsync_exit(status);
1040 break;
1041 case HS_POST_RUNNING:
1042 snapshot_creation_status = HS_READY;
1043 ret = 1;
1044 break;
1045 default:
1046 DSS_EMERG_LOG(("BUG: create can't die in status %d\n",
1047 snapshot_creation_status));
1048 return -E_BUG;
1049 }
1050 create_pid = 0;
1051 return ret;
1052 }
1053 if (pid == remove_pid) {
1054 ret = handle_remove_exit(status);
1055 if (ret < 0)
1056 return ret;
1057 return ret;
1058 }
1059 DSS_EMERG_LOG(("BUG: unknown process %d died\n", (int)pid));
1060 return -E_BUG;
1061 }
1062
1063 static int change_to_dest_dir(void)
1064 {
1065 int ret;
1066 const char *dd = OPT_STRING_VAL(DSS, DEST_DIR);
1067
1068 DSS_INFO_LOG(("changing cwd to %s\n", dd));
1069 if (chdir(dd) >= 0)
1070 return 1;
1071 ret = -ERRNO_TO_DSS_ERROR(errno);
1072 DSS_ERROR_LOG(("could not change cwd to %s\n", dd));
1073 return ret;
1074 }
1075
1076 static int check_config(void)
1077 {
1078 int ret;
1079 uint32_t unit_interval = OPT_UINT32_VAL(DSS, UNIT_INTERVAL);
1080 uint32_t num_intervals = OPT_UINT32_VAL(DSS, NUM_INTERVALS);
1081
1082 if (unit_interval == 0) {
1083 DSS_ERROR_LOG(("bad unit interval: %i\n", unit_interval));
1084 return -E_INVALID_NUMBER;
1085 }
1086 DSS_DEBUG_LOG(("unit interval: %i day(s)\n", unit_interval));
1087
1088 if (num_intervals == 0 || num_intervals > 30) {
1089 DSS_ERROR_LOG(("bad number of intervals: %i\n", num_intervals));
1090 return -E_INVALID_NUMBER;
1091 }
1092 if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE))
1093 if (!OPT_GIVEN(DSS, SOURCE_DIR)) {
1094 DSS_ERROR_LOG(("--source-dir required\n"));
1095 return -E_SYNTAX;
1096 }
1097 if (subcmd == CMD_PTR(RUN) || subcmd == CMD_PTR(CREATE)
1098 || subcmd == CMD_PTR(LS) || subcmd == CMD_PTR(PRUNE)) {
1099 if (!OPT_GIVEN(DSS, DEST_DIR)) {
1100 DSS_ERROR_LOG(("--dest-dir required\n"));
1101 return -E_SYNTAX;
1102 }
1103 ret = change_to_dest_dir();
1104 if (ret < 0)
1105 return ret;
1106 }
1107 DSS_DEBUG_LOG(("number of intervals: %i\n", num_intervals));
1108 return 1;
1109 }
1110
1111 static int lopsub_error(int lopsub_ret, char **errctx)
1112 {
1113 const char *msg = lls_strerror(-lopsub_ret);
1114 if (*errctx)
1115 DSS_ERROR_LOG(("%s: %s\n", *errctx, msg));
1116 else
1117 DSS_ERROR_LOG(("%s\n", msg));
1118 free(*errctx);
1119 *errctx = NULL;
1120 return -E_LOPSUB;
1121 }
1122
1123 static int parse_config_file(bool sighup, const struct lls_command *cmd)
1124 {
1125 int ret, fd = -1;
1126 char *config_file = get_config_file_name();
1127 struct stat statbuf;
1128 void *map;
1129 size_t sz;
1130 int cf_argc;
1131 char **cf_argv, *errctx = NULL;
1132 struct lls_parse_result *cf_lpr, *merged_lpr, *clpr;
1133 const char *subcmd_name;
1134
1135 ret = open(config_file, O_RDONLY);
1136 if (ret < 0) {
1137 if (errno != ENOENT || OPT_GIVEN(DSS, CONFIG_FILE)) {
1138 ret = -ERRNO_TO_DSS_ERROR(errno);
1139 DSS_ERROR_LOG(("config file %s can not be opened\n",
1140 config_file));
1141 goto out;
1142 }
1143 /* no config file -- nothing to do */
1144 ret = 0;
1145 goto success;
1146 }
1147 fd = ret;
1148 ret = fstat(fd, &statbuf);
1149 if (ret < 0) {
1150 ret = -ERRNO_TO_DSS_ERROR(errno);
1151 DSS_ERROR_LOG(("failed to stat config file %s\n", config_file));
1152 goto close_fd;
1153 }
1154 sz = statbuf.st_size;
1155 if (sz == 0) { /* config file is empty -- nothing to do */
1156 ret = 0;
1157 goto success;
1158 }
1159 map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
1160 if (map == MAP_FAILED) {
1161 ret = -ERRNO_TO_DSS_ERROR(errno);
1162 DSS_ERROR_LOG(("failed to mmap config file %s\n",
1163 config_file));
1164 goto close_fd;
1165 }
1166 if (cmd == CMD_PTR(DSS))
1167 subcmd_name = NULL;
1168 else
1169 subcmd_name = lls_command_name(cmd);
1170 ret = lls_convert_config(map, sz, subcmd_name, &cf_argv, &errctx);
1171 munmap(map, sz);
1172 if (ret < 0) {
1173 DSS_ERROR_LOG(("failed to convert config file %s\n",
1174 config_file));
1175 ret = lopsub_error(ret, &errctx);
1176 goto close_fd;
1177 }
1178 cf_argc = ret;
1179 ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
1180 lls_free_argv(cf_argv);
1181 if (ret < 0) {
1182 ret = lopsub_error(ret, &errctx);
1183 goto close_fd;
1184 }
1185 clpr = cmd == CMD_PTR(DSS)? cmdline_lpr : cmdline_sublpr;
1186 if (sighup) /* config file overrides command line */
1187 ret = lls_merge(cf_lpr, clpr, cmd, &merged_lpr, &errctx);
1188 else /* command line options overrride config file options */
1189 ret = lls_merge(clpr, cf_lpr, cmd, &merged_lpr, &errctx);
1190 lls_free_parse_result(cf_lpr, cmd);
1191 if (ret < 0) {
1192 ret = lopsub_error(ret, &errctx);
1193 goto close_fd;
1194 }
1195 ret = 1;
1196 success:
1197 assert(ret >= 0);
1198 DSS_DEBUG_LOG(("loglevel: %d\n", OPT_UINT32_VAL(DSS, LOGLEVEL)));
1199 if (cmd != CMD_PTR(DSS)) {
1200 if (ret > 0) {
1201 if (sublpr != cmdline_sublpr)
1202 lls_free_parse_result(sublpr, cmd);
1203 sublpr = merged_lpr;
1204 } else
1205 sublpr = cmdline_sublpr;
1206 } else {
1207 if (ret > 0) {
1208 if (lpr != cmdline_lpr)
1209 lls_free_parse_result(lpr, cmd);
1210 lpr = merged_lpr;
1211 } else
1212 lpr = cmdline_lpr;
1213 }
1214 close_fd:
1215 if (fd >= 0)
1216 close(fd);
1217 out:
1218 free(config_file);
1219 if (ret < 0)
1220 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1221 return ret;
1222 }
1223
1224 static int handle_sighup(void)
1225 {
1226 int ret;
1227
1228 DSS_NOTICE_LOG(("SIGHUP, re-reading config\n"));
1229 dump_dss_config("old");
1230 ret = parse_config_file(true /* SIGHUP */, CMD_PTR(DSS));
1231 if (ret < 0)
1232 return ret;
1233 ret = parse_config_file(true /* SIGHUP */, CMD_PTR(RUN));
1234 if (ret < 0)
1235 return ret;
1236 ret = check_config();
1237 if (ret < 0)
1238 return ret;
1239 close_log(logfile);
1240 logfile = NULL;
1241 if (OPT_GIVEN(RUN, DAEMON) || daemonized) {
1242 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1243 log_welcome(OPT_UINT32_VAL(DSS, LOGLEVEL));
1244 daemonized = true;
1245 }
1246 dump_dss_config("reloaded");
1247 invalidate_next_snapshot_time();
1248 return 1;
1249 }
1250
1251 static void kill_children(void)
1252 {
1253 restart_create_process();
1254 dss_kill(create_pid, SIGTERM, NULL);
1255 dss_kill(remove_pid, SIGTERM, NULL);
1256 }
1257
1258 static int handle_signal(void)
1259 {
1260 int sig, ret = next_signal();
1261
1262 if (ret <= 0)
1263 goto out;
1264 sig = ret;
1265 switch (sig) {
1266 case SIGINT:
1267 case SIGTERM:
1268 kill_children();
1269 ret = -E_SIGNAL;
1270 break;
1271 case SIGHUP:
1272 ret = handle_sighup();
1273 break;
1274 case SIGCHLD:
1275 ret = handle_sigchld();
1276 break;
1277 }
1278 out:
1279 if (ret < 0)
1280 DSS_ERROR_LOG(("%s\n", dss_strerror(-ret)));
1281 return ret;
1282 }
1283
1284 /*
1285 * We can not use rsync locally if the local user is different from the remote
1286 * user or if the src dir is not on the local host (or both).
1287 */
1288 static int use_rsync_locally(char *logname)
1289 {
1290 const char *h = OPT_STRING_VAL(DSS, REMOTE_HOST);
1291
1292 if (strcmp(h, "localhost") && strcmp(h, "127.0.0.1"))
1293 return 0;
1294 if (OPT_GIVEN(DSS, REMOTE_USER) &&
1295 strcmp(OPT_STRING_VAL(DSS, REMOTE_USER), logname))
1296 return 0;
1297 return 1;
1298 }
1299
1300 static int rename_resume_snap(int64_t creation_time)
1301 {
1302 struct snapshot_list sl;
1303 struct snapshot *s = NULL;
1304 char *new_name = incomplete_name(creation_time);
1305 int ret;
1306 const char *why;
1307
1308 sl.num_snapshots = 0;
1309
1310 ret = 0;
1311 dss_get_snapshot_list(&sl);
1312 /*
1313 * Snapshot recycling: We first look at the newest snapshot. If this
1314 * snapshot happens to be incomplete, the last rsync process was
1315 * aborted and we reuse this one. Otherwise we look at snapshots which
1316 * could be removed (outdated and redundant snapshots) as candidates
1317 * for recycling. If no outdated/redundant snapshot exists, we check if
1318 * there is an orphaned snapshot, which likely is useless anyway.
1319 *
1320 * Only if no existing snapshot is suitable for recycling, we bite the
1321 * bullet and create a new one.
1322 */
1323 s = get_newest_snapshot(&sl);
1324 if (!s) /* no snapshots at all */
1325 goto out;
1326 /* re-use last snapshot if it is incomplete */
1327 why = "aborted";
1328 if ((s->flags & SS_COMPLETE) == 0)
1329 goto out;
1330 why = "outdated";
1331 s = find_outdated_snapshot(&sl);
1332 if (s)
1333 goto out;
1334 why = "redundant";
1335 s = find_redundant_snapshot(&sl);
1336 if (s)
1337 goto out;
1338 why = "orphaned";
1339 s = find_orphaned_snapshot(&sl);
1340 out:
1341 if (s) {
1342 DSS_NOTICE_LOG(("recycling %s snapshot %s\n", why, s->name));
1343 ret = dss_rename(s->name, new_name);
1344 }
1345 if (ret >= 0)
1346 DSS_NOTICE_LOG(("creating %s\n", new_name));
1347 free(new_name);
1348 free_snapshot_list(&sl);
1349 return ret;
1350 }
1351
1352 static void create_rsync_argv(char ***argv, int64_t *num)
1353 {
1354 char *logname;
1355 int i = 0, j, N = OPT_GIVEN(DSS, RSYNC_OPTION);
1356 struct snapshot_list sl;
1357 static bool seeded;
1358
1359 dss_get_snapshot_list(&sl);
1360 assert(!name_of_reference_snapshot);
1361 name_of_reference_snapshot = name_of_newest_complete_snapshot(&sl);
1362 free_snapshot_list(&sl);
1363
1364 *argv = dss_malloc((15 + N) * sizeof(char *));
1365 (*argv)[i++] = dss_strdup("rsync");
1366 (*argv)[i++] = dss_strdup("-a");
1367 (*argv)[i++] = dss_strdup("--delete");
1368 if (!seeded) {
1369 srandom((unsigned)time(NULL)); /* no need to be fancy here */
1370 seeded = true;
1371 }
1372 if (1000 * (random() / (RAND_MAX + 1.0)) < OPT_UINT32_VAL(DSS, CHECKSUM)) {
1373 DSS_NOTICE_LOG(("adding --checksum to rsync options\n"));
1374 (*argv)[i++] = dss_strdup("--checksum");
1375 }
1376 for (j = 0; j < N; j++)
1377 (*argv)[i++] = dss_strdup(lls_string_val(j,
1378 OPT_RESULT(DSS, RSYNC_OPTION)));
1379 if (name_of_reference_snapshot) {
1380 DSS_INFO_LOG(("using %s as reference\n", name_of_reference_snapshot));
1381 (*argv)[i++] = make_message("--link-dest=../%s",
1382 name_of_reference_snapshot);
1383 } else
1384 DSS_INFO_LOG(("no suitable reference snapshot found\n"));
1385 logname = dss_logname();
1386 if (use_rsync_locally(logname))
1387 (*argv)[i++] = dss_strdup(OPT_STRING_VAL(DSS, SOURCE_DIR));
1388 else
1389 (*argv)[i++] = make_message("%s@%s:%s/",
1390 OPT_GIVEN(DSS, REMOTE_USER)?
1391 OPT_STRING_VAL(DSS, REMOTE_USER) : logname,
1392 OPT_STRING_VAL(DSS, REMOTE_HOST),
1393 OPT_STRING_VAL(DSS, SOURCE_DIR));
1394 free(logname);
1395 *num = get_current_time();
1396 (*argv)[i++] = incomplete_name(*num);
1397 (*argv)[i++] = NULL;
1398 for (j = 0; j < i; j++)
1399 DSS_DEBUG_LOG(("argv[%d] = %s\n", j, (*argv)[j]));
1400 }
1401
1402 static void free_rsync_argv(char **argv)
1403 {
1404 int i;
1405
1406 if (!argv)
1407 return;
1408 for (i = 0; argv[i]; i++)
1409 free(argv[i]);
1410 free(argv);
1411 }
1412
1413 static int create_snapshot(char **argv)
1414 {
1415 int ret;
1416
1417 ret = rename_resume_snap(current_snapshot_creation_time);
1418 if (ret < 0)
1419 return ret;
1420 dss_exec(&create_pid, argv[0], argv);
1421 snapshot_creation_status = HS_RUNNING;
1422 return ret;
1423 }
1424
1425 static int select_loop(void)
1426 {
1427 int ret;
1428 /* check every 60 seconds for free disk space */
1429 struct timeval tv;
1430 char **rsync_argv = NULL;
1431
1432 for (;;) {
1433 fd_set rfds;
1434 struct timeval *tvp;
1435
1436 if (remove_pid)
1437 tvp = NULL; /* sleep until rm hook/process dies */
1438 else { /* sleep one minute */
1439 tv.tv_sec = 60;
1440 tv.tv_usec = 0;
1441 tvp = &tv;
1442 }
1443 FD_ZERO(&rfds);
1444 FD_SET(signal_pipe, &rfds);
1445 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1446 if (ret < 0)
1447 goto out;
1448 if (FD_ISSET(signal_pipe, &rfds)) {
1449 ret = handle_signal();
1450 if (ret < 0)
1451 goto out;
1452 }
1453 if (remove_pid)
1454 continue;
1455 if (snapshot_removal_status == HS_PRE_SUCCESS) {
1456 ret = exec_rm();
1457 if (ret < 0)
1458 goto out;
1459 continue;
1460 }
1461 if (snapshot_removal_status == HS_SUCCESS) {
1462 post_remove_hook();
1463 continue;
1464 }
1465 ret = try_to_free_disk_space();
1466 if (ret < 0)
1467 goto out;
1468 if (snapshot_removal_status != HS_READY) {
1469 stop_create_process();
1470 continue;
1471 }
1472 restart_create_process();
1473 switch (snapshot_creation_status) {
1474 case HS_READY:
1475 if (!next_snapshot_is_due())
1476 continue;
1477 pre_create_hook();
1478 continue;
1479 case HS_PRE_RUNNING:
1480 case HS_RUNNING:
1481 case HS_POST_RUNNING:
1482 continue;
1483 case HS_PRE_SUCCESS:
1484 if (!name_of_reference_snapshot) {
1485 free_rsync_argv(rsync_argv);
1486 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1487 }
1488 ret = create_snapshot(rsync_argv);
1489 if (ret < 0)
1490 goto out;
1491 continue;
1492 case HS_NEEDS_RESTART:
1493 if (!next_snapshot_is_due())
1494 continue;
1495 ret = create_snapshot(rsync_argv);
1496 if (ret < 0)
1497 goto out;
1498 continue;
1499 case HS_SUCCESS:
1500 post_create_hook();
1501 continue;
1502 }
1503 }
1504 out:
1505 return ret;
1506 }
1507
1508 static void exit_hook(int exit_code)
1509 {
1510 const char *argv[3];
1511 pid_t pid;
1512
1513 argv[0] = OPT_STRING_VAL(DSS, EXIT_HOOK);
1514 argv[1] = dss_strerror(-exit_code);
1515 argv[2] = NULL;
1516
1517 DSS_NOTICE_LOG(("executing %s %s\n", argv[0], argv[1]));
1518 dss_exec(&pid, argv[0], (char **)argv);
1519 }
1520
1521 static void lock_dss_or_die(void)
1522 {
1523 char *config_file = get_config_file_name();
1524 int ret = lock_dss(config_file);
1525
1526 free(config_file);
1527 if (ret < 0) {
1528 DSS_EMERG_LOG(("failed to lock: %s\n", dss_strerror(-ret)));
1529 exit(EXIT_FAILURE);
1530 }
1531 }
1532
1533 static int com_run(void)
1534 {
1535 int ret, fd = -1;
1536 char *config_file;
1537 pid_t pid;
1538
1539 if (OPT_GIVEN(DSS, DRY_RUN)) {
1540 DSS_ERROR_LOG(("dry run not supported by this command\n"));
1541 return -E_SYNTAX;
1542 }
1543 config_file = get_config_file_name();
1544 ret = get_dss_pid(config_file, &pid);
1545 free(config_file);
1546 if (ret >= 0) {
1547 DSS_ERROR_LOG(("pid %d\n", (int)pid));
1548 return -E_ALREADY_RUNNING;
1549 }
1550 if (OPT_GIVEN(RUN, DAEMON)) {
1551 fd = daemon_init();
1552 daemonized = true;
1553 logfile = open_log(OPT_STRING_VAL(RUN, LOGFILE));
1554 }
1555 lock_dss_or_die();
1556 dump_dss_config("startup");
1557 ret = install_sighandler(SIGHUP);
1558 if (ret < 0)
1559 return ret;
1560 if (fd >= 0) {
1561 ret = write(fd, "\0", 1);
1562 if (ret != 1) {
1563 DSS_ERROR_LOG(("write to daemon pipe returned %d\n",
1564 ret));
1565 if (ret < 0)
1566 return -ERRNO_TO_DSS_ERROR(errno);
1567 return -E_BUG;
1568 }
1569 }
1570 ret = select_loop();
1571 if (ret >= 0) /* impossible */
1572 ret = -E_BUG;
1573 kill_children();
1574 exit_hook(ret);
1575 return ret;
1576 }
1577 EXPORT_CMD_HANDLER(run);
1578
1579 static int com_prune(void)
1580 {
1581 int ret;
1582 struct snapshot_list sl;
1583 struct snapshot *victim;
1584 struct disk_space ds;
1585 const char *why;
1586
1587 lock_dss_or_die();
1588 ret = get_disk_space(".", &ds);
1589 if (ret < 0)
1590 return ret;
1591 log_disk_space(&ds);
1592 dss_get_snapshot_list(&sl);
1593 why = "outdated";
1594 victim = find_outdated_snapshot(&sl);
1595 if (victim)
1596 goto rm;
1597 why = "redundant";
1598 victim = find_redundant_snapshot(&sl);
1599 if (victim)
1600 goto rm;
1601 ret = 0;
1602 goto out;
1603 rm:
1604 if (OPT_GIVEN(DSS, DRY_RUN)) {
1605 dss_msg("%s snapshot %s (interval = %i)\n",
1606 why, victim->name, victim->interval);
1607 ret = 0;
1608 goto out;
1609 }
1610 pre_remove_hook(victim, why);
1611 if (snapshot_removal_status == HS_PRE_RUNNING) {
1612 ret = wait_for_remove_process();
1613 if (ret < 0)
1614 goto out;
1615 if (snapshot_removal_status != HS_PRE_SUCCESS)
1616 goto out;
1617 }
1618 ret = exec_rm();
1619 if (ret < 0)
1620 goto out;
1621 ret = wait_for_remove_process();
1622 if (ret < 0)
1623 goto out;
1624 if (snapshot_removal_status != HS_SUCCESS)
1625 goto out;
1626 post_remove_hook();
1627 if (snapshot_removal_status != HS_POST_RUNNING)
1628 goto out;
1629 ret = wait_for_remove_process();
1630 if (ret < 0)
1631 goto out;
1632 ret = 1;
1633 out:
1634 free_snapshot_list(&sl);
1635 return ret;
1636 }
1637 EXPORT_CMD_HANDLER(prune);
1638
1639 static int com_create(void)
1640 {
1641 int ret, status;
1642 char **rsync_argv;
1643
1644 lock_dss_or_die();
1645 if (OPT_GIVEN(DSS, DRY_RUN)) {
1646 int i;
1647 char *msg = NULL;
1648 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1649 for (i = 0; rsync_argv[i]; i++) {
1650 char *tmp = msg;
1651 msg = make_message("%s%s%s", tmp? tmp : "",
1652 tmp? " " : "", rsync_argv[i]);
1653 free(tmp);
1654 }
1655 free_rsync_argv(rsync_argv);
1656 dss_msg("%s\n", msg);
1657 free(msg);
1658 return 1;
1659 }
1660 pre_create_hook();
1661 if (create_pid) {
1662 ret = wait_for_process(create_pid, &status);
1663 if (ret < 0)
1664 return ret;
1665 ret = handle_pre_create_hook_exit(status);
1666 if (ret <= 0) /* error, or pre-create failed */
1667 return ret;
1668 }
1669 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1670 ret = create_snapshot(rsync_argv);
1671 if (ret < 0)
1672 goto out;
1673 ret = wait_for_process(create_pid, &status);
1674 if (ret < 0)
1675 goto out;
1676 ret = handle_rsync_exit(status);
1677 if (ret < 0)
1678 goto out;
1679 post_create_hook();
1680 if (create_pid)
1681 ret = wait_for_process(create_pid, &status);
1682 out:
1683 free_rsync_argv(rsync_argv);
1684 return ret;
1685 }
1686 EXPORT_CMD_HANDLER(create);
1687
1688 static int com_ls(void)
1689 {
1690 int i;
1691 struct snapshot_list sl;
1692 struct snapshot *s;
1693
1694 dss_get_snapshot_list(&sl);
1695 FOR_EACH_SNAPSHOT(s, i, &sl) {
1696 int64_t d = 0;
1697 if (s->flags & SS_COMPLETE)
1698 d = (s->completion_time - s->creation_time) / 60;
1699 dss_msg("%u\t%s\t%3" PRId64 ":%02" PRId64 "\n", s->interval, s->name, d/60, d%60);
1700 }
1701 free_snapshot_list(&sl);
1702 return 1;
1703 }
1704 EXPORT_CMD_HANDLER(ls);
1705
1706 static int com_configtest(void)
1707 {
1708 printf("Syntax Ok\n");
1709 return 0;
1710 }
1711 EXPORT_CMD_HANDLER(configtest);
1712
1713 static int setup_signal_handling(void)
1714 {
1715 int ret;
1716
1717 DSS_INFO_LOG(("setting up signal handlers\n"));
1718 signal_pipe = signal_init(); /* always successful */
1719 ret = install_sighandler(SIGINT);
1720 if (ret < 0)
1721 return ret;
1722 ret = install_sighandler(SIGTERM);
1723 if (ret < 0)
1724 return ret;
1725 return install_sighandler(SIGCHLD);
1726 }
1727
1728 static void handle_version_and_help(void)
1729 {
1730 char *txt;
1731
1732 if (OPT_GIVEN(DSS, DETAILED_HELP))
1733 txt = lls_long_help(CMD_PTR(DSS));
1734 else if (OPT_GIVEN(DSS, HELP))
1735 txt = lls_short_help(CMD_PTR(DSS));
1736 else if (OPT_GIVEN(DSS, VERSION))
1737 txt = dss_strdup(VERSION_STRING);
1738 else
1739 return;
1740 printf("%s", txt);
1741 free(txt);
1742 exit(EXIT_SUCCESS);
1743 }
1744
1745 static void show_subcommand_summary(void)
1746 {
1747 const struct lls_command *cmd;
1748 int i;
1749
1750 printf("Available subcommands:\n");
1751 for (i = 1; (cmd = lls_cmd(i, dss_suite)); i++) {
1752 const char *name = lls_command_name(cmd);
1753 const char *purpose = lls_purpose(cmd);
1754 printf("%-11s%s\n", name, purpose);
1755 }
1756 exit(EXIT_SUCCESS);
1757 }
1758
1759 int main(int argc, char **argv)
1760 {
1761 int ret;
1762 char *errctx = NULL;
1763 unsigned num_inputs;
1764 const struct dss_user_data *ud;
1765
1766 ret = lls_parse(argc, argv, CMD_PTR(DSS), &cmdline_lpr, &errctx);
1767 if (ret < 0) {
1768 ret = lopsub_error(ret, &errctx);
1769 goto out;
1770 }
1771 lpr = cmdline_lpr;
1772 ret = parse_config_file(false /* no SIGHUP */, CMD_PTR(DSS));
1773 if (ret < 0)
1774 goto out;
1775 handle_version_and_help();
1776 num_inputs = lls_num_inputs(lpr);
1777 if (num_inputs == 0)
1778 show_subcommand_summary();
1779 ret = lls_lookup_subcmd(argv[argc - num_inputs], dss_suite, &errctx);
1780 if (ret < 0) {
1781 ret = lopsub_error(ret, &errctx);
1782 goto out;
1783 }
1784 subcmd = lls_cmd(ret, dss_suite);
1785 ret = lls_parse(num_inputs, argv + argc - num_inputs, subcmd,
1786 &cmdline_sublpr, &errctx);
1787 if (ret < 0) {
1788 ret = lopsub_error(ret, &errctx);
1789 goto out;
1790 }
1791 sublpr = cmdline_sublpr;
1792 ret = parse_config_file(false /* no SIGHUP */, subcmd);
1793 if (ret < 0)
1794 goto out;
1795 ret = check_config();
1796 if (ret < 0)
1797 goto out;
1798 ret = setup_signal_handling();
1799 if (ret < 0)
1800 goto out;
1801 ud = lls_user_data(subcmd);
1802 ret = ud->handler();
1803 signal_shutdown();
1804 out:
1805 if (ret < 0) {
1806 if (errctx)
1807 DSS_ERROR_LOG(("%s\n", errctx));
1808 DSS_EMERG_LOG(("%s\n", dss_strerror(-ret)));
1809 }
1810 free(errctx);
1811 lls_free_parse_result(lpr, CMD_PTR(DSS));
1812 if (lpr != cmdline_lpr)
1813 lls_free_parse_result(cmdline_lpr, CMD_PTR(DSS));
1814 lls_free_parse_result(sublpr, subcmd);
1815 if (sublpr != cmdline_sublpr)
1816 lls_free_parse_result(cmdline_sublpr, subcmd);
1817 exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1818 }