]> git.tuebingen.mpg.de Git - dss.git/blob - dss.c
1bab9aef5f2dce9c92c5416e0d266a1bd1df35cc
[dss.git] / dss.c
1 #include <string.h>
2 #include <stdlib.h>
3 #include <stdarg.h>
4 #include <assert.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <signal.h>
8 #include <ctype.h>
9 #include <sys/stat.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12 #include <sys/time.h>
13 #include <time.h>
14 #include <sys/wait.h>
15 #include <fnmatch.h>
16 #include <limits.h>
17
18
19 #include "gcc-compat.h"
20 #include "cmdline.h"
21 #include "log.h"
22 #include "string.h"
23 #include "error.h"
24 #include "fd.h"
25 #include "exec.h"
26 #include "daemon.h"
27 #include "signal.h"
28 #include "df.h"
29 #include "time.h"
30
31
32 struct gengetopt_args_info conf;
33 char *dss_error_txt = NULL;
34 static FILE *logfile;
35 static int signal_pipe;
36
37 /** Process id of current rsync process. */
38 static pid_t rsync_pid;
39 /** Whether the rsync process is currently stopped */
40 static int rsync_stopped;
41 /** Process id of current rm process. */
42 static pid_t rm_pid;
43 /** When the next snapshot is due. */
44 struct timeval next_snapshot_time;
45
46 pid_t pre_create_hook_pid;
47 pid_t post_create_hook_pid;
48
49 /* Creation time of the snapshot currently being created. */
50 int64_t current_snapshot_creation_time;
51
52 static char *path_to_last_complete_snapshot;
53
54 enum {
55         SCS_READY,
56         SCS_PRE_HOOK_RUNNING,
57         SCS_PRE_HOOK_SUCCESS,
58         SCS_RSYNC_RUNNING,
59         SCS_RSYNC_SUCCESS,
60         SCS_POST_HOOK_RUNNING,
61 };
62
63 static unsigned snapshot_creation_status;
64
65
66 DEFINE_DSS_ERRLIST;
67
68
69 /* a litte cpp magic helps to DRY */
70 #define COMMANDS \
71         COMMAND(ls) \
72         COMMAND(create) \
73         COMMAND(prune) \
74         COMMAND(run)
75 #define COMMAND(x) int com_ ##x(void);
76 COMMANDS
77 #undef COMMAND
78 #define COMMAND(x) if (conf.x ##_given) return com_ ##x();
79 int call_command_handler(void)
80 {
81         COMMANDS
82         DSS_EMERG_LOG("BUG: did not find command handler\n");
83         exit(EXIT_FAILURE);
84 }
85 #undef COMMAND
86 #undef COMMANDS
87
88 /*
89  * complete, not being deleted: 1204565370-1204565371.Sun_Mar_02_2008_14_33-Sun_Mar_02_2008_14_43
90  * complete, being deleted: 1204565370-1204565371.being_deleted
91  * incomplete, not being deleted: 1204565370-incomplete
92  * incomplete, being deleted: 1204565370-incomplete.being_deleted
93  */
94 enum snapshot_status_flags {
95         /** The rsync process terminated successfully. */
96         SS_COMPLETE = 1,
97         /** The rm process is running to remove this snapshot. */
98         SS_BEING_DELETED = 2,
99 };
100
101 struct snapshot {
102         char *name;
103         int64_t creation_time;
104         int64_t completion_time;
105         enum snapshot_status_flags flags;
106         unsigned interval;
107 };
108
109 /*
110  * An edge snapshot is either the oldest one or the newest one.
111  *
112  * We need to find either of them occasionally: The create code
113  * needs to know the newest snapshot because that is the one
114  * used as the link destination dir. The pruning code needs to
115  * find the oldest one in case disk space becomes low.
116  */
117 struct edge_snapshot_data {
118         int64_t now;
119         struct snapshot snap;
120 };
121
122 __printf_2_3 void dss_log(int ll, const char* fmt,...)
123 {
124         va_list argp;
125         FILE *outfd;
126         struct tm *tm;
127         time_t t1;
128         char str[255] = "";
129
130         if (ll < conf.loglevel_arg)
131                 return;
132         outfd = logfile? logfile : stderr;
133         time(&t1);
134         tm = localtime(&t1);
135         strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
136         fprintf(outfd, "%s ", str);
137         if (conf.loglevel_arg <= INFO)
138                 fprintf(outfd, "%i: ", ll);
139         va_start(argp, fmt);
140         vfprintf(outfd, fmt, argp);
141         va_end(argp);
142 }
143
144 /**
145  * Print a message either to stdout or to the log file.
146  */
147 __printf_1_2 void dss_msg(const char* fmt,...)
148 {
149         FILE *outfd = conf.daemon_given? logfile : stdout;
150         va_list argp;
151         va_start(argp, fmt);
152         vfprintf(outfd, fmt, argp);
153         va_end(argp);
154 }
155
156 /**
157  * Return the desired number of snapshots of an interval.
158  */
159 unsigned num_snapshots(int interval)
160 {
161         unsigned n;
162
163         assert(interval >= 0);
164
165         if (interval >= conf.num_intervals_arg)
166                 return 0;
167         n = conf.num_intervals_arg - interval - 1;
168         return 1 << n;
169 }
170
171 /* return: Whether dirname is a snapshot directory (0: no, 1: yes) */
172 int is_snapshot(const char *dirname, int64_t now, struct snapshot *s)
173 {
174         int i, ret;
175         char *dash, *dot, *tmp;
176         int64_t num;
177
178         assert(dirname);
179         dash = strchr(dirname, '-');
180         if (!dash || !dash[1] || dash == dirname)
181                 return 0;
182         for (i = 0; dirname[i] != '-'; i++)
183                 if (!isdigit(dirname[i]))
184                         return 0;
185         tmp = dss_strdup(dirname);
186         tmp[i] = '\0';
187         ret = dss_atoi64(tmp, &num);
188         free(tmp);
189         if (ret < 0) {
190                 free(dss_error_txt);
191                 return 0;
192         }
193         assert(num >= 0);
194         if (num > now)
195                 return 0;
196         s->creation_time = num;
197         //DSS_DEBUG_LOG("%s start time: %lli\n", dirname, (long long)s->creation_time);
198         s->interval = (long long) ((now - s->creation_time)
199                 / conf.unit_interval_arg / 24 / 3600);
200         if (!strcmp(dash + 1, "incomplete")) {
201                 s->completion_time = -1;
202                 s->flags = 0; /* neither complete, nor being deleted */
203                 goto success;
204         }
205         if (!strcmp(dash + 1, "incomplete.being_deleted")) {
206                 s->completion_time = -1;
207                 s->flags = SS_BEING_DELETED; /* mot cpmplete, being deleted */
208                 goto success;
209         }
210         tmp = dash + 1;
211         dot = strchr(tmp, '.');
212         if (!dot || !dot[1] || dot == tmp)
213                 return 0;
214         for (i = 0; tmp[i] != '.'; i++)
215                 if (!isdigit(tmp[i]))
216                         return 0;
217         tmp = dss_strdup(dash + 1);
218         tmp[i] = '\0';
219         ret = dss_atoi64(tmp, &num);
220         free(tmp);
221         if (ret < 0) {
222                 free(dss_error_txt);
223                 return 0;
224         }
225         if (num > now)
226                 return 0;
227         s->completion_time = num;
228         s->flags = SS_COMPLETE;
229         if (!strcmp(dot + 1, "being_deleted"))
230                 s->flags |= SS_BEING_DELETED;
231 success:
232         s->name = dss_strdup(dirname);
233         return 1;
234 }
235
236 int64_t get_current_time(void)
237 {
238         time_t now;
239         time(&now);
240         DSS_DEBUG_LOG("now: %lli\n", (long long) now);
241         return (int64_t)now;
242 }
243
244 char *incomplete_name(int64_t start)
245 {
246         return make_message("%lli-incomplete", (long long)start);
247 }
248
249 char *being_deleted_name(struct snapshot *s)
250 {
251         if (s->flags & SS_COMPLETE)
252                 return make_message("%lli-%lli.being_deleted",
253                         (long long)s->creation_time,
254                         (long long)s->completion_time);
255         return make_message("%lli-incomplete.being_deleted",
256                 (long long)s->creation_time);
257 }
258
259 int complete_name(int64_t start, int64_t end, char **result)
260 {
261         struct tm start_tm, end_tm;
262         time_t *start_seconds = (time_t *) (uint64_t *)&start; /* STFU, gcc */
263         time_t *end_seconds = (time_t *) (uint64_t *)&end; /* STFU, gcc */
264         char start_str[200], end_str[200];
265
266         if (!localtime_r(start_seconds, &start_tm)) {
267                 make_err_msg("%lli", (long long)start);
268                 return -E_LOCALTIME;
269         }
270         if (!localtime_r(end_seconds, &end_tm)) {
271                 make_err_msg("%lli", (long long)end);
272                 return -E_LOCALTIME;
273         }
274         if (!strftime(start_str, sizeof(start_str), "%a_%b_%d_%Y_%H_%M_%S", &start_tm)) {
275                 make_err_msg("%lli", (long long)start);
276                 return -E_STRFTIME;
277         }
278         if (!strftime(end_str, sizeof(end_str), "%a_%b_%d_%Y_%H_%M_%S", &end_tm)) {
279                 make_err_msg("%lli", (long long)end);
280                 return -E_STRFTIME;
281         }
282         *result = make_message("%lli-%lli.%s-%s", (long long) start, (long long) end,
283                 start_str, end_str);
284         return 1;
285 }
286
287 struct snapshot_list {
288         int64_t now;
289         unsigned num_snapshots;
290         unsigned array_size;
291         struct snapshot **snapshots;
292         /**
293          * Array of size num_intervals + 1
294          *
295          * It contains the number of snapshots in each interval. interval_count[num_intervals]
296          * is the number of snapshots which belong to any interval greater than num_intervals.
297          */
298         unsigned *interval_count;
299 };
300
301 #define FOR_EACH_SNAPSHOT(s, i, sl) \
302         for ((i) = 0; (i) < (sl)->num_snapshots && ((s) = (sl)->snapshots[(i)]); (i)++)
303
304
305
306 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
307
308 static int compare_snapshots(const void *a, const void *b)
309 {
310         struct snapshot *s1 = *(struct snapshot **)a;
311         struct snapshot *s2 = *(struct snapshot **)b;
312         return NUM_COMPARE(s2->creation_time, s1->creation_time);
313 }
314
315 /** Compute the minimum of \a a and \a b. */
316 #define DSS_MIN(a,b) ((a) < (b) ? (a) : (b))
317
318 int add_snapshot(const char *dirname, void *private)
319 {
320         struct snapshot_list *sl = private;
321         struct snapshot s;
322         int ret = is_snapshot(dirname, sl->now, &s);
323
324         if (!ret)
325                 return 1;
326         if (sl->num_snapshots >= sl->array_size) {
327                 sl->array_size = 2 * sl->array_size + 1;
328                 sl->snapshots = dss_realloc(sl->snapshots,
329                         sl->array_size * sizeof(struct snapshot *));
330         }
331         sl->snapshots[sl->num_snapshots] = dss_malloc(sizeof(struct snapshot));
332         *(sl->snapshots[sl->num_snapshots]) = s;
333         sl->interval_count[DSS_MIN(s.interval, conf.num_intervals_arg)]++;
334         sl->num_snapshots++;
335         return 1;
336 }
337
338 void get_snapshot_list(struct snapshot_list *sl)
339 {
340         sl->now = get_current_time();
341         sl->num_snapshots = 0;
342         sl->array_size = 0;
343         sl->snapshots = NULL;
344         sl->interval_count = dss_calloc((conf.num_intervals_arg + 1) * sizeof(unsigned));
345         for_each_subdir(add_snapshot, sl);
346         qsort(sl->snapshots, sl->num_snapshots, sizeof(struct snapshot *),
347                 compare_snapshots);
348 }
349
350 void free_snapshot_list(struct snapshot_list *sl)
351 {
352         int i;
353         struct snapshot *s;
354
355         FOR_EACH_SNAPSHOT(s, i, sl) {
356                 free(s->name);
357                 free(s);
358         }
359         free(sl->interval_count);
360         sl->interval_count = NULL;
361         free(sl->snapshots);
362         sl->snapshots = NULL;
363         sl->num_snapshots = 0;
364 }
365
366 void stop_rsync_process(void)
367 {
368         if (!rsync_pid || rsync_stopped)
369                 return;
370         kill(SIGSTOP, rsync_pid);
371         rsync_stopped = 1;
372 }
373
374 void restart_rsync_process(void)
375 {
376         if (!rsync_pid || !rsync_stopped)
377                 return;
378         kill (SIGCONT, rsync_pid);
379         rsync_stopped = 0;
380 }
381
382 /**
383  * Print a log message about the exit status of a child.
384  */
385 void log_termination_msg(pid_t pid, int status)
386 {
387         if (WIFEXITED(status))
388                 DSS_INFO_LOG("child %i exited. Exit status: %i\n", (int)pid,
389                         WEXITSTATUS(status));
390         else if (WIFSIGNALED(status))
391                 DSS_NOTICE_LOG("child %i was killed by signal %i\n", (int)pid,
392                         WTERMSIG(status));
393         else
394                 DSS_WARNING_LOG("child %i terminated abormally\n", (int)pid);
395 }
396
397 int wait_for_process(pid_t pid, int *status)
398 {
399         int ret;
400
401         DSS_DEBUG_LOG("Waiting for process %d to terminate\n", (int)pid);
402         for (;;) {
403                 fd_set rfds;
404
405                 FD_ZERO(&rfds);
406                 FD_SET(signal_pipe, &rfds);
407                 ret = dss_select(signal_pipe + 1, &rfds, NULL, NULL);
408                 if (ret < 0)
409                         break;
410                 ret = next_signal();
411                 if (!ret)
412                         continue;
413                 if (ret == SIGCHLD) {
414                         ret = waitpid(pid, status, 0);
415                         if (ret >= 0)
416                                 break;
417                         if (errno != EINTR) { /* error */
418                                 ret = -ERRNO_TO_DSS_ERROR(errno);
419                                 break;
420                         }
421                 }
422                 /* SIGINT or SIGTERM */
423                 DSS_WARNING_LOG("sending SIGTERM to pid %d\n", (int)pid);
424                 kill(pid, SIGTERM);
425         }
426         if (ret < 0)
427                 make_err_msg("failed to wait for process %d", (int)pid);
428         else
429                 log_termination_msg(pid, *status);
430         return ret;
431 }
432
433 int remove_snapshot(struct snapshot *s)
434 {
435         int fds[3] = {0, 0, 0};
436         assert(!rm_pid);
437         char *new_name = being_deleted_name(s);
438         int ret = dss_rename(s->name, new_name);
439         char *argv[] = {"rm", "-rf", new_name, NULL};
440
441         if (ret < 0)
442                 goto out;
443         DSS_NOTICE_LOG("removing %s (interval = %i)\n", s->name, s->interval);
444         stop_rsync_process();
445         ret = dss_exec(&rm_pid, argv[0], argv, fds);
446 out:
447         free(new_name);
448         return ret;
449 }
450
451 /*
452  * return: 0: no redundant snapshots, 1: rm process started, negative: error
453  */
454 int remove_redundant_snapshot(struct snapshot_list *sl)
455 {
456         int ret, i, interval;
457         struct snapshot *s;
458         unsigned missing = 0;
459
460         DSS_INFO_LOG("looking for intervals containing too many snapshots\n");
461         for (interval = conf.num_intervals_arg - 1; interval >= 0; interval--) {
462                 unsigned keep = num_snapshots(interval);
463                 unsigned num = sl->interval_count[interval];
464                 struct snapshot *victim = NULL, *prev = NULL;
465                 int64_t score = LONG_MAX;
466
467                 if (keep >= num)
468                         missing += keep - num;
469 //              DSS_DEBUG_LOG("interval %i: keep: %u, have: %u, missing: %u\n",
470 //                      interval, keep, num, missing);
471                 if (keep + missing >= num)
472                         continue;
473                 /* redundant snapshot in this interval, pick snapshot with lowest score */
474                 FOR_EACH_SNAPSHOT(s, i, sl) {
475                         int64_t this_score;
476
477                         //DSS_DEBUG_LOG("checking %s\n", s->name);
478                         if (s->interval > interval) {
479                                 prev = s;
480                                 continue;
481                         }
482                         if (s->interval < interval)
483                                 break;
484                         if (!victim) {
485                                 victim = s;
486                                 prev = s;
487                                 continue;
488                         }
489                         assert(prev);
490                         /* check if s is a better victim */
491                         this_score = s->creation_time - prev->creation_time;
492                         assert(this_score >= 0);
493                         //DSS_DEBUG_LOG("%s: score %lli\n", s->name, (long long)score);
494                         if (this_score < score) {
495                                 score = this_score;
496                                 victim = s;
497                         }
498                         prev = s;
499                 }
500                 assert(victim);
501                 if (conf.dry_run_given) {
502                         dss_msg("%s would be removed (interval = %i)\n",
503                                 victim->name, victim->interval);
504                         continue;
505                 }
506                 ret = remove_snapshot(victim);
507                 return ret < 0? ret : 1;
508         }
509         return 0;
510 }
511
512 int remove_outdated_snapshot(struct snapshot_list *sl)
513 {
514         int i, ret;
515         struct snapshot *s;
516
517         DSS_INFO_LOG("looking for snapshots belonging to intervals greater than %d\n",
518                 conf.num_intervals_arg);
519         FOR_EACH_SNAPSHOT(s, i, sl) {
520                 if (s->interval <= conf.num_intervals_arg)
521                         continue;
522                 if (conf.dry_run_given) {
523                         dss_msg("%s would be removed (interval = %i)\n",
524                                 s->name, s->interval);
525                         continue;
526                 }
527                 ret = remove_snapshot(s);
528                 if (ret < 0)
529                         return ret;
530                 return 1;
531         }
532         return 0;
533 }
534
535 int handle_rm_exit(int status)
536 {
537         int es, ret;
538
539         if (!WIFEXITED(status)) {
540                 make_err_msg("rm process %d died involuntary", (int)rm_pid);
541                 ret = -E_INVOLUNTARY_EXIT;
542                 goto out;
543         }
544         es = WEXITSTATUS(status);
545         if (es) {
546                 make_err_msg("rm process %d returned %d", (int)rm_pid, es);
547                 ret = -E_BAD_EXIT_CODE;
548                 goto out;
549         }
550         ret = 1;
551         rm_pid = 0;
552 out:
553         return ret;
554 }
555
556 int wait_for_rm_process(void)
557 {
558         int status, ret = wait_for_process(rm_pid, &status);
559
560         if (ret < 0)
561                 return ret;
562         return handle_rm_exit(status);
563 }
564
565 void kill_process(pid_t pid)
566 {
567         if (!pid)
568                 return;
569         DSS_WARNING_LOG("sending SIGTERM to pid %d\n", (int)pid);
570         kill(pid, SIGTERM);
571 }
572
573 int check_config(void)
574 {
575         if (conf.unit_interval_arg <= 0) {
576                 make_err_msg("bad unit interval: %i", conf.unit_interval_arg);
577                 return -E_INVALID_NUMBER;
578         }
579         DSS_DEBUG_LOG("unit interval: %i day(s)\n", conf.unit_interval_arg);
580         if (conf.num_intervals_arg <= 0) {
581                 make_err_msg("bad number of intervals  %i", conf.num_intervals_arg);
582                 return -E_INVALID_NUMBER;
583         }
584         DSS_DEBUG_LOG("number of intervals: %i\n", conf.num_intervals_arg);
585         return 1;
586 }
587
588 /* exits on errors */
589 void parse_config_file(int override)
590 {
591         int ret;
592         char *config_file;
593         struct stat statbuf;
594         char *old_logfile_arg = NULL;
595         int old_daemon_given = 0;
596
597         if (conf.config_file_given)
598                 config_file = dss_strdup(conf.config_file_arg);
599         else {
600                 char *home = get_homedir();
601                 config_file = make_message("%s/.dssrc", home);
602                 free(home);
603         }
604         if (override) { /* SIGHUP */
605                 if (conf.logfile_given)
606                         old_logfile_arg = dss_strdup(conf.logfile_arg);
607                 old_daemon_given = conf.daemon_given;
608         }
609
610         ret = stat(config_file, &statbuf);
611         if (ret && conf.config_file_given) {
612                 ret = -ERRNO_TO_DSS_ERROR(errno);
613                 make_err_msg("failed to stat config file %s", config_file);
614                 goto out;
615         }
616         if (!ret) {
617                 struct cmdline_parser_params params = {
618                         .override = override,
619                         .initialize = 0,
620                         .check_required = 1,
621                         .check_ambiguity = 0
622                 };
623                 cmdline_parser_config_file(config_file, &conf, &params);
624         }
625         ret = check_config();
626         if (ret < 0)
627                 goto out;
628         if (override) {
629                 /* don't change daemon mode on SIGHUP */
630                 conf.daemon_given = old_daemon_given;
631                 close_log(logfile);
632                 logfile = NULL;
633                 if (conf.logfile_given)
634                         free(old_logfile_arg);
635                 else if (conf.daemon_given) { /* re-use old logfile */
636                         conf.logfile_arg = old_logfile_arg;
637                         conf.logfile_given = 1;
638                 }
639         }
640         if (conf.logfile_given) {
641                 logfile = open_log(conf.logfile_arg);
642                 log_welcome(conf.loglevel_arg);
643         }
644         DSS_EMERG_LOG("loglevel: %d\n", conf.loglevel_arg);
645 //      cmdline_parser_dump(logfile? logfile : stdout, &conf);
646         ret = dss_chdir(conf.dest_dir_arg);
647 out:
648         free(config_file);
649         if (ret >= 0)
650                 return;
651         log_err_msg(EMERG, -ret);
652         exit(EXIT_FAILURE);
653 }
654
655 void handle_sighup(void)
656 {
657         DSS_NOTICE_LOG("SIGHUP\n");
658         parse_config_file(1);
659 }
660
661 int rename_incomplete_snapshot(int64_t start)
662 {
663         char *old_name;
664         int ret;
665
666         free(path_to_last_complete_snapshot);
667         ret = complete_name(start, get_current_time(),
668                 &path_to_last_complete_snapshot);
669         if (ret < 0)
670                 return ret;
671         old_name = incomplete_name(start);
672         ret = dss_rename(old_name, path_to_last_complete_snapshot);
673         if (ret >= 0)
674                 DSS_NOTICE_LOG("%s -> %s\n", old_name,
675                         path_to_last_complete_snapshot);
676         free(old_name);
677         return ret;
678 }
679
680 void compute_next_snapshot_time(void)
681 {
682         struct timeval now, unit_interval = {.tv_sec = 24 * 3600 * conf.unit_interval_arg},
683                 tmp, diff;
684         int64_t x = 0;
685         unsigned wanted = num_snapshots(0), num_complete_snapshots = 0;
686         int i, ret;
687         struct snapshot *s = NULL;
688         struct snapshot_list sl;
689
690         assert(snapshot_creation_status == SCS_READY);
691         current_snapshot_creation_time = 0;
692         get_snapshot_list(&sl);
693         FOR_EACH_SNAPSHOT(s, i, &sl) {
694                 if (!(s->flags & SS_COMPLETE))
695                         continue;
696                 num_complete_snapshots++;
697                 x += s->completion_time - s->creation_time;
698         }
699         assert(x >= 0);
700         if (num_complete_snapshots)
701                 x /= num_complete_snapshots; /* avg time to create one snapshot */
702         x *= wanted; /* time to create all snapshots in interval 0 */
703         tmp.tv_sec = x;
704         tmp.tv_usec = 0;
705         ret = tv_diff(&unit_interval, &tmp, &diff); /* total sleep time per unit interval */
706         gettimeofday(&now, NULL);
707         if (ret < 0 || !s)
708                 goto min_sleep;
709         tv_divide(wanted, &diff, &tmp); /* sleep time betweeen two snapshots */
710         diff.tv_sec = s->completion_time;
711         diff.tv_usec = 0;
712         tv_add(&diff, &tmp, &next_snapshot_time);
713         if (tv_diff(&now, &next_snapshot_time, NULL) < 0)
714                 goto out;
715 min_sleep:
716         next_snapshot_time = now;
717         next_snapshot_time.tv_sec += 60;
718 out:
719         free_snapshot_list(&sl);
720 }
721
722 int handle_rsync_exit(int status)
723 {
724         int es, ret;
725
726         if (!WIFEXITED(status)) {
727                 make_err_msg("rsync process %d died involuntary", (int)rsync_pid);
728                 ret = -E_INVOLUNTARY_EXIT;
729                 snapshot_creation_status = SCS_READY;
730                 compute_next_snapshot_time();
731                 goto out;
732         }
733         es = WEXITSTATUS(status);
734         if (es != 0 && es != 23 && es != 24) {
735                 make_err_msg("rsync process %d returned %d", (int)rsync_pid, es);
736                 ret = -E_BAD_EXIT_CODE;
737                 snapshot_creation_status = SCS_READY;
738                 compute_next_snapshot_time();
739                 goto out;
740         }
741         ret = rename_incomplete_snapshot(current_snapshot_creation_time);
742         if (ret < 0)
743                 goto out;
744         snapshot_creation_status = SCS_RSYNC_SUCCESS;
745 out:
746         rsync_pid = 0;
747         rsync_stopped = 0;
748         return ret;
749 }
750
751 int get_newest_complete(const char *dirname, void *private)
752 {
753         struct edge_snapshot_data *esd = private;
754         struct snapshot s;
755         int ret = is_snapshot(dirname, esd->now, &s);
756
757         if (ret <= 0)
758                 return 1;
759         if (s.flags != SS_COMPLETE) /* incomplete or being deleted */
760                 return 1;
761         if (s.creation_time < esd->snap.creation_time)
762                 return 1;
763         free(esd->snap.name);
764         esd->snap = s;
765         return 1;
766 }
767
768 __malloc char *name_of_newest_complete_snapshot(void)
769 {
770         struct edge_snapshot_data esd = {
771                 .now = get_current_time(),
772                 .snap = {.creation_time = -1}
773         };
774         for_each_subdir(get_newest_complete, &esd);
775         return esd.snap.name;
776 }
777
778 void create_rsync_argv(char ***argv, int64_t *num)
779 {
780         char *logname, *newest = name_of_newest_complete_snapshot();
781         int i = 0, j;
782
783         *argv = dss_malloc((15 + conf.rsync_option_given) * sizeof(char *));
784         (*argv)[i++] = dss_strdup("rsync");
785         (*argv)[i++] = dss_strdup("-aq");
786         (*argv)[i++] = dss_strdup("--delete");
787         for (j = 0; j < conf.rsync_option_given; j++)
788                 (*argv)[i++] = dss_strdup(conf.rsync_option_arg[j]);
789         if (newest) {
790                 DSS_INFO_LOG("using %s as reference snapshot\n", newest);
791                 (*argv)[i++] = make_message("--link-dest=../%s", newest);
792                 free(newest);
793         } else
794                 DSS_INFO_LOG("no previous snapshot found\n");
795         if (conf.exclude_patterns_given) {
796                 (*argv)[i++] = dss_strdup("--exclude-from");
797                 (*argv)[i++] = dss_strdup(conf.exclude_patterns_arg);
798
799         }
800         logname = dss_logname();
801         if (conf.remote_user_given && !strcmp(conf.remote_user_arg, logname))
802                 (*argv)[i++] = dss_strdup(conf.source_dir_arg);
803         else
804                 (*argv)[i++] = make_message("%s@%s:%s/", conf.remote_user_given?
805                         conf.remote_user_arg : logname,
806                         conf.remote_host_arg, conf.source_dir_arg);
807         free(logname);
808         *num = get_current_time();
809         (*argv)[i++] = incomplete_name(*num);
810         (*argv)[i++] = NULL;
811         for (j = 0; j < i; j++)
812                 DSS_DEBUG_LOG("argv[%d] = %s\n", j, (*argv)[j]);
813 }
814
815 void free_rsync_argv(char **argv)
816 {
817         int i;
818         for (i = 0; argv[i]; i++)
819                 free(argv[i]);
820         free(argv);
821 }
822
823 int pre_create_hook(void)
824 {
825         int ret, fds[3] = {0, 0, 0};
826
827         if (!conf.pre_create_hook_given) {
828                 snapshot_creation_status = SCS_PRE_HOOK_SUCCESS;
829                 return 0;
830         }
831         DSS_NOTICE_LOG("executing %s\n", conf.pre_create_hook_arg);
832         ret = dss_exec_cmdline_pid(&pre_create_hook_pid,
833                 conf.pre_create_hook_arg, fds);
834         if (ret < 0)
835                 return ret;
836         snapshot_creation_status = SCS_PRE_HOOK_RUNNING;
837         return ret;
838 }
839
840 int post_create_hook(void)
841 {
842         int ret, fds[3] = {0, 0, 0};
843         char *cmd;
844
845         if (!conf.post_create_hook_given) {
846                 snapshot_creation_status = SCS_READY;
847                 compute_next_snapshot_time();
848                 return 0;
849         }
850         cmd = make_message("%s %s", conf.post_create_hook_arg,
851                 path_to_last_complete_snapshot);
852         DSS_NOTICE_LOG("executing %s\n", cmd);
853         ret = dss_exec_cmdline_pid(&post_create_hook_pid, cmd, fds);
854         free(cmd);
855         if (ret < 0)
856                 return ret;
857         snapshot_creation_status = SCS_POST_HOOK_RUNNING;
858         return ret;
859 }
860
861 int create_snapshot(char **argv)
862 {
863         int ret, fds[3] = {0, 0, 0};
864         char *name;
865
866         name = incomplete_name(current_snapshot_creation_time);
867         DSS_NOTICE_LOG("creating new snapshot %s\n", name);
868         free(name);
869         ret = dss_exec(&rsync_pid, argv[0], argv, fds);
870         if (ret < 0)
871                 return ret;
872         snapshot_creation_status = SCS_RSYNC_RUNNING;
873         return ret;
874 }
875
876 int handle_pre_create_hook_exit(int status)
877 {
878         int es, ret;
879
880         if (!WIFEXITED(status)) {
881                 make_err_msg("pre-create-hook %d died involuntary",
882                         (int)pre_create_hook_pid);
883                 snapshot_creation_status = SCS_READY;
884                 compute_next_snapshot_time();
885                 ret = -E_INVOLUNTARY_EXIT;
886                 goto out;
887         }
888         es = WEXITSTATUS(status);
889         if (es) {
890                 make_err_msg("pre-create-hook %d returned %d",
891                         (int)pre_create_hook_pid, es);
892                 snapshot_creation_status = SCS_READY;
893                 compute_next_snapshot_time();
894                 ret = -E_BAD_EXIT_CODE;
895                 goto out;
896         }
897         snapshot_creation_status = SCS_PRE_HOOK_SUCCESS;
898         ret = 1;
899 out:
900         pre_create_hook_pid = 0;
901         return ret;
902 }
903
904 int handle_sigchld()
905 {
906         pid_t pid;
907         int status, ret = reap_child(&pid, &status);
908
909         if (ret <= 0)
910                 return ret;
911         if (pid == rsync_pid)
912                 return handle_rsync_exit(status);
913         if (pid == rm_pid)
914                 return handle_rm_exit(status);
915         if (pid == pre_create_hook_pid)
916                 return handle_pre_create_hook_exit(status);
917         if (pid == post_create_hook_pid) {
918                 snapshot_creation_status = SCS_READY;
919                 compute_next_snapshot_time();
920                 return 1;
921         }
922         DSS_EMERG_LOG("BUG: unknown process %d died\n", (int)pid);
923         exit(EXIT_FAILURE);
924 }
925
926 void handle_signal(void)
927 {
928         int sig, ret = next_signal();
929
930         if (ret <= 0)
931                 goto out;
932         sig = ret;
933         switch (sig) {
934         case SIGINT:
935         case SIGTERM:
936                 restart_rsync_process();
937                 kill_process(rsync_pid);
938                 kill_process(rm_pid);
939                 exit(EXIT_FAILURE);
940         case SIGHUP:
941                 handle_sighup();
942                 ret = 1;
943                 break;
944         case SIGCHLD:
945                 ret = handle_sigchld();
946                 break;
947         }
948 out:
949         if (ret < 0)
950                 log_err_msg(ERROR, -ret);
951 }
952
953 int get_oldest(const char *dirname, void *private)
954 {
955         struct edge_snapshot_data *esd = private;
956         struct snapshot s;
957         int ret = is_snapshot(dirname, esd->now, &s);
958
959         if (ret <= 0)
960                 return 1;
961         if (s.creation_time > esd->snap.creation_time)
962                 return 1;
963         free(esd->snap.name);
964         esd->snap = s;
965         return 1;
966 }
967
968 int remove_oldest_snapshot()
969 {
970         int ret;
971         struct edge_snapshot_data esd = {
972                 .now = get_current_time(),
973                 .snap = {.creation_time = LLONG_MAX}
974         };
975         for_each_subdir(get_oldest, &esd);
976         if (!esd.snap.name) /* no snapshot found */
977                 return 0;
978         DSS_INFO_LOG("oldest snapshot: %s\n", esd.snap.name);
979         ret = 0;
980         if (esd.snap.creation_time == current_snapshot_creation_time)
981                 goto out; /* do not remove the snapshot currently being created */
982         ret = remove_snapshot(&esd.snap);
983 out:
984         free(esd.snap.name);
985         return ret;
986 }
987
988 /* TODO: Also consider number of inodes. */
989 int disk_space_low(void)
990 {
991         struct disk_space ds;
992         int ret = get_disk_space(".", &ds);
993
994         if (ret < 0)
995                 return ret;
996         if (conf.min_free_mb_arg)
997                 if (ds.free_mb < conf.min_free_mb_arg)
998                         return 1;
999         if (conf.min_free_percent_arg)
1000                 if (ds.percent_free < conf.min_free_percent_arg)
1001                         return 1;
1002         return 0;
1003 }
1004
1005 int try_to_free_disk_space(int low_disk_space)
1006 {
1007         int ret;
1008         struct snapshot_list sl;
1009
1010         get_snapshot_list(&sl);
1011         ret = remove_outdated_snapshot(&sl);
1012         if (ret) /* error, or we are removing something */
1013                 goto out;
1014         /* no outdated snapshot */
1015         ret = remove_redundant_snapshot(&sl);
1016         if (ret)
1017                 goto out;
1018         ret = 0;
1019         if (!low_disk_space)
1020                 goto out;
1021         DSS_WARNING_LOG("disk space low and nothing obvious to remove\n");
1022         ret = remove_oldest_snapshot();
1023         if (ret)
1024                 goto out;
1025         make_err_msg("uhuhu: not enough disk space for a single snapshot");
1026         ret= -ENOSPC;
1027 out:
1028         free_snapshot_list(&sl);
1029         return ret;
1030 }
1031
1032 int select_loop(void)
1033 {
1034         int ret;
1035         struct timeval tv = {.tv_sec = 0, .tv_usec = 0};
1036
1037         for (;;) {
1038                 fd_set rfds;
1039                 int low_disk_space;
1040                 char **rsync_argv;
1041                 struct timeval now, *tvp = &tv;
1042
1043                 if (rsync_pid)
1044                         tv.tv_sec = 60; /* check every 60 seconds for free disk space */
1045                 else if (rm_pid)
1046                         tvp = NULL; /* sleep until rm process dies */
1047                 FD_ZERO(&rfds);
1048                 FD_SET(signal_pipe, &rfds);
1049                 DSS_DEBUG_LOG("tvp: %p, tv_sec: %lu\n", tvp, (long unsigned) tv.tv_sec);
1050                 ret = dss_select(signal_pipe + 1, &rfds, NULL, tvp);
1051                 if (ret < 0)
1052                         return ret;
1053                 if (FD_ISSET(signal_pipe, &rfds))
1054                         handle_signal();
1055                 if (rm_pid)
1056                         continue;
1057                 ret = disk_space_low();
1058                 if (ret < 0)
1059                         break;
1060                 low_disk_space = ret;
1061                 if (low_disk_space)
1062                         stop_rsync_process();
1063                 ret = try_to_free_disk_space(low_disk_space);
1064                 if (ret < 0)
1065                         break;
1066                 if (rm_pid)
1067                         continue;
1068                 restart_rsync_process();
1069                 gettimeofday(&now, NULL);
1070                 if (tv_diff(&next_snapshot_time, &now, &tv) > 0)
1071                         continue;
1072                 switch (snapshot_creation_status) {
1073                 case SCS_READY:
1074                         ret = pre_create_hook();
1075                         if (ret < 0)
1076                                 goto out;
1077                         continue;
1078                 case SCS_PRE_HOOK_RUNNING:
1079                         continue;
1080                 case SCS_PRE_HOOK_SUCCESS:
1081                         create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1082                         ret = create_snapshot(rsync_argv);
1083                         free_rsync_argv(rsync_argv);
1084                         if (ret < 0)
1085                                 goto out;
1086                         continue;
1087                 case SCS_RSYNC_RUNNING:
1088                         continue;
1089                 case SCS_RSYNC_SUCCESS:
1090                         ret = post_create_hook();
1091                         if (ret < 0)
1092                                 goto out;
1093                         continue;
1094                 case SCS_POST_HOOK_RUNNING:
1095                         continue;
1096                 }
1097         }
1098 out:
1099         return ret;
1100 }
1101
1102 int com_run(void)
1103 {
1104         int ret;
1105
1106         if (conf.dry_run_given) {
1107                 make_err_msg("dry_run not supported by this command");
1108                 return -E_SYNTAX;
1109         }
1110         ret = install_sighandler(SIGHUP);
1111         if (ret < 0)
1112                 return ret;
1113         compute_next_snapshot_time();
1114         return select_loop();
1115 }
1116
1117 void log_disk_space(struct disk_space *ds)
1118 {
1119         DSS_INFO_LOG("free: %uM/%uM (%u%%), %u%% inodes unused\n",
1120                 ds->free_mb, ds->total_mb, ds->percent_free,
1121                 ds->percent_free_inodes);
1122 }
1123
1124 int com_prune(void)
1125 {
1126         int ret;
1127         struct snapshot_list sl;
1128         struct disk_space ds;
1129
1130         ret = get_disk_space(".", &ds);
1131         if (ret < 0)
1132                 return ret;
1133         log_disk_space(&ds);
1134         for (;;) {
1135                 get_snapshot_list(&sl);
1136                 ret = remove_outdated_snapshot(&sl);
1137                 free_snapshot_list(&sl);
1138                 if (ret < 0)
1139                         return ret;
1140                 if (!ret)
1141                         break;
1142                 ret = wait_for_rm_process();
1143                 if (ret < 0)
1144                         goto out;
1145         }
1146         for (;;) {
1147                 get_snapshot_list(&sl);
1148                 ret = remove_redundant_snapshot(&sl);
1149                 free_snapshot_list(&sl);
1150                 if (ret < 0)
1151                         return ret;
1152                 if (!ret)
1153                         break;
1154                 ret = wait_for_rm_process();
1155                 if (ret < 0)
1156                         goto out;
1157         }
1158         return 1;
1159 out:
1160         return ret;
1161 }
1162
1163 int com_create(void)
1164 {
1165         int ret, status;
1166         char **rsync_argv;
1167
1168         if (conf.dry_run_given) {
1169                 int i;
1170                 char *msg = NULL;
1171                 create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1172                 for (i = 0; rsync_argv[i]; i++) {
1173                         char *tmp = msg;
1174                         msg = make_message("%s%s%s", tmp? tmp : "",
1175                                 tmp? " " : "", rsync_argv[i]);
1176                         free(tmp);
1177                 }
1178                 free_rsync_argv(rsync_argv);
1179                 dss_msg("%s\n", msg);
1180                 free(msg);
1181                 return 1;
1182         }
1183         ret = pre_create_hook();
1184         if (ret < 0)
1185                 return ret;
1186         if (pre_create_hook_pid) {
1187                 ret = wait_for_process(pre_create_hook_pid, &status);
1188                 if (ret < 0)
1189                         return ret;
1190                 ret = handle_pre_create_hook_exit(status);
1191                 if (ret < 0)
1192                         return ret;
1193         }
1194         create_rsync_argv(&rsync_argv, &current_snapshot_creation_time);
1195         ret = create_snapshot(rsync_argv);
1196         if (ret < 0)
1197                 goto out;
1198         ret = wait_for_process(rsync_pid, &status);
1199         if (ret < 0)
1200                 goto out;
1201         ret = handle_rsync_exit(status);
1202         if (ret < 0)
1203                 goto out;
1204         post_create_hook();
1205         if (post_create_hook_pid)
1206                 ret = wait_for_process(post_create_hook_pid, &status);
1207 out:
1208         free_rsync_argv(rsync_argv);
1209         return ret;
1210 }
1211
1212 int com_ls(void)
1213 {
1214         int i;
1215         struct snapshot_list sl;
1216         struct snapshot *s;
1217         get_snapshot_list(&sl);
1218         FOR_EACH_SNAPSHOT(s, i, &sl)
1219                 dss_msg("%u\t%s\n", s->interval, s->name);
1220         free_snapshot_list(&sl);
1221         return 1;
1222 }
1223
1224 __noreturn void clean_exit(int status)
1225 {
1226         free(dss_error_txt);
1227         exit(status);
1228 }
1229 static void setup_signal_handling(void)
1230 {
1231         int ret;
1232
1233         DSS_INFO_LOG("setting up signal handlers\n");
1234         signal_pipe = signal_init(); /* always successful */
1235         ret = install_sighandler(SIGINT);
1236         if (ret < 0)
1237                 goto err;
1238         ret = install_sighandler(SIGTERM);
1239         if (ret < 0)
1240                 goto err;
1241         ret = install_sighandler(SIGCHLD);
1242         if (ret < 0)
1243                 goto err;
1244         return;
1245 err:
1246         DSS_EMERG_LOG("could not install signal handlers\n");
1247         exit(EXIT_FAILURE);
1248 }
1249
1250 int main(int argc, char **argv)
1251 {
1252         int ret;
1253         struct cmdline_parser_params params = {
1254                 .override = 0,
1255                 .initialize = 1,
1256                 .check_required = 0,
1257                 .check_ambiguity = 0
1258         };
1259
1260         cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1261         parse_config_file(0);
1262
1263         if (conf.daemon_given)
1264                 daemon_init();
1265         setup_signal_handling();
1266         ret = call_command_handler();
1267         if (ret < 0)
1268                 log_err_msg(EMERG, -ret);
1269         clean_exit(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE);
1270 }