]> git.tuebingen.mpg.de Git - dss.git/commitdiff
Merge branch 'refs/heads/t/prune'
authorAndre Noll <maan@tuebingen.mpg.de>
Thu, 7 Nov 2019 11:31:02 +0000 (12:31 +0100)
committerAndre Noll <maan@tuebingen.mpg.de>
Thu, 7 Nov 2019 11:31:02 +0000 (12:31 +0100)
A couple of cleanups and unifications for the snapshot pruning code,
which is executed by the prune and run subcommands. With the patches
applied, both subcommands behave identically, i.e. they consider
the same set of snapshots as candidates for pruning. Also the prune
command gained the new --disk-space option to force it to act as if
disk space was high or low.

Cooking for six weeks.

* refs/heads/t/prune:
  Introduce prune --disk-space.
  Revamp com_prune().
  Factor out find_removable_snapshot().
  prune Simplify rm exit code logic.
  find_oldest_removable_snapshot(): Improve log message.
  prune: Print a message if there is nothing to prune.
  get_snapshot_list(): Add comment about sorting.
  prune: Fail gracefully if pre-rm hook fails.

dss.c
dss.suite
err.h
snap.c

diff --git a/dss.c b/dss.c
index 5f35435727e956c13bb82f6801fdbe251204665f..f61f33597d7863e236a804d6c0541934e6ced8d5 100644 (file)
--- a/dss.c
+++ b/dss.c
@@ -651,6 +651,7 @@ static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
        int i, num_complete;
        struct snapshot *s, *ref = NULL;
 
+       DSS_DEBUG_LOG(("picking snapshot with earliest creation time\n"));
        num_complete = num_complete_snapshots(sl);
        if (num_complete <= OPT_UINT32_VAL(DSS, MIN_COMPLETE))
                return NULL;
@@ -661,7 +662,6 @@ static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
                        ref = s;
                        continue;
                }
-               DSS_INFO_LOG(("oldest removable snapshot: %s\n", s->name));
                return s;
        }
        assert(ref);
@@ -669,6 +669,50 @@ static struct snapshot *find_oldest_removable_snapshot(struct snapshot_list *sl)
        return ref;
 }
 
+/* returns NULL <==> *reason is set to NULL */
+static struct snapshot *find_removable_snapshot(struct snapshot_list *sl,
+               bool try_hard, char **reason)
+{
+       struct snapshot *victim;
+
+       /*
+        * Don't remove anything if there is free space and we have fewer
+        * snapshots than configured, plus one. This way there is always one
+        * snapshot that can be recycled.
+        */
+       if (!try_hard && sl->num_snapshots <=
+                       1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS))
+               goto nope;
+       victim = find_orphaned_snapshot(sl);
+       if (victim) {
+               *reason = make_message("orphaned");
+               return victim;
+       }
+       victim = find_outdated_snapshot(sl);
+       if (victim) {
+               *reason = make_message("outdated");
+               return victim;
+       }
+       if (!OPT_GIVEN(DSS, KEEP_REDUNDANT)) {
+               victim = find_redundant_snapshot(sl);
+               if (victim) {
+                       *reason = make_message("redundant");
+                       return victim;
+               }
+       }
+       if (!try_hard)
+               goto nope;
+       DSS_WARNING_LOG(("nothing obvious to remove\n"));
+       victim = find_oldest_removable_snapshot(sl);
+       if (victim) {
+               *reason = make_message("oldest");
+               return victim;
+       }
+nope:
+       *reason = NULL;
+       return NULL;
+}
+
 static int rename_incomplete_snapshot(int64_t start)
 {
        char *old_name;
@@ -701,7 +745,7 @@ static int try_to_free_disk_space(void)
        struct snapshot_list sl;
        struct snapshot *victim;
        struct timeval now;
-       const char *why;
+       char *why;
        int low_disk_space;
 
        ret = disk_space_low(NULL);
@@ -712,55 +756,25 @@ static int try_to_free_disk_space(void)
        if (tv_diff(&next_removal_check, &now, NULL) > 0)
                return 0;
        if (!low_disk_space) {
-               if (OPT_GIVEN(DSS, KEEP_REDUNDANT))
-                       return 0;
                if (snapshot_creation_status != HS_READY)
                        return 0;
                if (next_snapshot_is_due())
                        return 0;
        }
-       /*
-        * Idle and --keep_redundant not given, or low disk space. Look at
-        * existing snapshots.
-        */
+       /* Idle or low disk space, look at existing snapshots. */
        dss_get_snapshot_list(&sl);
-       ret = 0;
-       /*
-        * Don't remove anything if there is free space and we have fewer
-        * snapshots than configured, plus one. This way there is always one
-        * snapshot that can be recycled.
-        */
-       if (!low_disk_space && sl.num_snapshots <=
-                       1 << OPT_UINT32_VAL(DSS, NUM_INTERVALS))
-               goto out;
-       why = "outdated";
-       victim = find_outdated_snapshot(&sl);
-       if (victim)
-               goto remove;
-       why = "redundant";
-       victim = find_redundant_snapshot(&sl);
-       if (victim)
-               goto remove;
-       why = "orphaned";
-       victim = find_orphaned_snapshot(&sl);
+       victim = find_removable_snapshot(&sl, low_disk_space, &why);
+       if (victim) {
+               pre_remove_hook(victim, why);
+               free(why);
+       }
+       free_snapshot_list(&sl);
        if (victim)
-               goto remove;
-       /* try harder only if disk space is low */
+               return 1;
        if (!low_disk_space)
-               goto out;
-       DSS_WARNING_LOG(("disk space low and nothing obvious to remove\n"));
-       why = "oldest";
-       victim = find_oldest_removable_snapshot(&sl);
-       if (victim)
-               goto remove;
+               return 0;
        DSS_CRIT_LOG(("uhuhu: disk space low and nothing to remove\n"));
-       ret = -ERRNO_TO_DSS_ERROR(ENOSPC);
-       goto out;
-remove:
-       pre_remove_hook(victim, why);
-out:
-       free_snapshot_list(&sl);
-       return ret;
+       return -ERRNO_TO_DSS_ERROR(ENOSPC);
 }
 
 static void post_create_hook(void)
@@ -1645,55 +1659,55 @@ static int com_prune(void)
        struct snapshot_list sl;
        struct snapshot *victim;
        struct disk_space ds;
-       const char *why;
+       char *why;
+       bool try_hard;
 
        lock_dss_or_die();
-       ret = get_disk_space(".", &ds);
-       if (ret < 0)
-               return ret;
-       log_disk_space(&ds);
+       switch (OPT_UINT32_VAL(PRUNE, DISK_SPACE)) {
+       case FDS_LOW: try_hard = true; break;
+       case FDS_HIGH: try_hard = false; break;
+       default:
+               ret = get_disk_space(".", &ds);
+               if (ret < 0)
+                       return ret;
+               log_disk_space(&ds);
+               try_hard = disk_space_low(&ds);
+       }
        dss_get_snapshot_list(&sl);
-       why = "outdated";
-       victim = find_outdated_snapshot(&sl);
-       if (victim)
-               goto rm;
-       why = "redundant";
-       victim = find_redundant_snapshot(&sl);
-       if (victim)
-               goto rm;
-       ret = 0;
-       goto out;
-rm:
+       victim = find_removable_snapshot(&sl, try_hard, &why);
+       if (!victim) {
+               dss_msg("nothing to prune\n");
+               ret = 0;
+               goto free_sl;
+       }
        if (OPT_GIVEN(DSS, DRY_RUN)) {
-               dss_msg("%s snapshot %s (interval = %i)\n",
+               dss_msg("picking %s snapshot %s (interval = %i)\n",
                        why, victim->name, victim->interval);
                ret = 0;
-               goto out;
+               goto free_why;
        }
        pre_remove_hook(victim, why);
        if (snapshot_removal_status == HS_PRE_RUNNING) {
                ret = wait_for_remove_process();
                if (ret < 0)
-                       goto out;
+                       goto free_why;
+               ret = -E_HOOK_FAILED;
                if (snapshot_removal_status != HS_PRE_SUCCESS)
-                       goto out;
+                       goto free_why;
        }
        ret = exec_rm();
        if (ret < 0)
-               goto out;
+               goto free_why;
        ret = wait_for_remove_process();
        if (ret < 0)
-               goto out;
-       if (snapshot_removal_status != HS_SUCCESS)
-               goto out;
+               goto free_why;
+       assert(snapshot_removal_status == HS_SUCCESS);
        post_remove_hook();
-       if (snapshot_removal_status != HS_POST_RUNNING)
-               goto out;
+       assert(snapshot_removal_status == HS_POST_RUNNING);
        ret = wait_for_remove_process();
-       if (ret < 0)
-               goto out;
-       ret = 1;
-out:
+free_why:
+       free(why);
+free_sl:
        free_snapshot_list(&sl);
        return ret;
 }
index 16c7e58542eff8750c8437dcea137f347a435950..f9cf8af5993345452fddc7b3d9790d0d7e6d7a13 100644 (file)
--- a/dss.suite
+++ b/dss.suite
@@ -450,19 +450,38 @@ caption = Subcommands
                snapshots.
        [/description]
 [subcommand prune]
-       purpose = remove redundant and outdated snapshots
+       purpose = remove snapshots
        [description]
-               A snapshot is considered outdated if its interval number is greater or
-               equal than the specified number of unit intervals. See --unit-interval
-               and --num-intervals above.
-
-               A snapshot is said to be redundant if the interval it belongs to
-               contains more than the configured number of snapshots.
-
-               The prune command gets rid of both outdated and redundant snapshots. At
-               most one snapshot is removed per invocation. If --dry-run is given, the
-               subcommand only prints the snapshot that would be removed.
+               A snapshot is said to be (a) outdated if its interval number is greater
+               or equal than the specified number of unit intervals, (b) redundant if
+               the interval it belongs to contains more than the configured number of
+               snapshots, and (c) orphaned if it is incomplete and not being created
+               or deleted. All other snapshots are called regular.
+
+               Unless --dry-run is given, which just prints the snapshot that would be
+               removed, this subcommand gets rid of non-regular snapshots.  At most
+               one snapshot is removed per invocation. If no such snapshot exists
+               and disk space is low, the subcommand also removes regular snapshots,
+               always picking the oldest one.
+
+               The subcommand fails if there is another dss "run" process.
        [/description]
+       [option disk-space]
+               summary = act as if free disk space was high/low
+               arg_info = required_arg
+               arg_type = string
+               typestr = mode
+               values = {
+                       FDS_CHECK = "check",
+                       FDS_HIGH = "high",
+                       FDS_LOW = "low"
+               }
+               default_val = check
+               [help]
+                       By default, free disk space is checked and even regular snapshots
+                       become candidates for removal if disk space is low. This option
+                       overrides the result of the check.
+               [/help]
 [subcommand ls]
        purpose = print the list of all snapshots
        [description]
diff --git a/err.h b/err.h
index 9505dfcc99f718ce1f8fe540401446e1dff82e8e..bd22554dd90d8ee2e282ff3c8c24b2fd56b7d5cd 100644 (file)
--- a/err.h
+++ b/err.h
@@ -44,6 +44,7 @@ static inline char *dss_strerror(int num)
        DSS_ERROR(INVALID_NUMBER, "invalid number"), \
        DSS_ERROR(STRFTIME, "strftime() failed"), \
        DSS_ERROR(LOCALTIME, "localtime() failed"), \
+       DSS_ERROR(HOOK_FAILED, "hook failure"), \
        DSS_ERROR(MOUNTPOINT, "destination directory is no mountpoint"), \
        DSS_ERROR(NULL_OPEN, "can not open /dev/null"), \
        DSS_ERROR(DUP_PIPE, "exec error: can not create pipe"), \
diff --git a/snap.c b/snap.c
index 8de76d459da1a8844a452fa576d706014b229be9..da95c1f342dc09bcc01d1eef58a075dd721472ef 100644 (file)
--- a/snap.c
+++ b/snap.c
@@ -146,7 +146,7 @@ static int compare_snapshots(const void *a, const void *b)
        return NUM_COMPARE(s2->creation_time, s1->creation_time);
 }
 
-
+/* The returned snapshot list is sorted by creation time. */
 void get_snapshot_list(struct snapshot_list *sl, int unit_interval,
                int num_intervals)
 {