Add para_fsck.
[paraslash.git] / fsck.c
diff --git a/fsck.c b/fsck.c
new file mode 100644 (file)
index 0000000..bc4083a
--- /dev/null
+++ b/fsck.c
@@ -0,0 +1,888 @@
+#include "para.h"
+#include "error.h"
+#include "osl_core.h"
+
+#define OSL_DUMP_DIR "/tmp/osldump"
+
+enum errors {
+       E_FSCK_SYNTAX = 501,
+       E_RANGE_VIOLATION,
+       E_NO_DS_FILE,
+       E_NOT_A_REGULAR_FILE,
+       E_BAD_HASH_PATH,
+};
+
+INIT_STDERR_LOGGING(1);
+
+/* taken from git */
+signed char hexval_table[256] = {
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 00-07 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 08-0f */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 10-17 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 18-1f */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 20-27 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 28-2f */
+         0,  1,  2,  3,  4,  5,  6,  7,                /* 30-37 */
+         8,  9, -1, -1, -1, -1, -1, -1,                /* 38-3f */
+        -1, 10, 11, 12, 13, 14, 15, -1,                /* 40-47 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 48-4f */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 50-57 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 58-5f */
+        -1, 10, 11, 12, 13, 14, 15, -1,                /* 60-67 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 68-67 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 70-77 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 78-7f */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 80-87 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 88-8f */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 90-97 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* 98-9f */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* a0-a7 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* a8-af */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* b0-b7 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* b8-bf */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* c0-c7 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* c8-cf */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* d0-d7 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* d8-df */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* e0-e7 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* e8-ef */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* f0-f7 */
+        -1, -1, -1, -1, -1, -1, -1, -1,                /* f8-ff */
+};
+
+int asc_to_hash(const char *asc_hash, int len, HASH_TYPE *hash)
+{
+       int i = 0;
+       const unsigned char *asc = (const unsigned char *) asc_hash;
+
+       while (*asc && i++ < len) {
+               unsigned int val = (hexval_table[asc[0]] << 4) | hexval_table[asc[1]];
+               if (val & ~0xff)
+                       return -1;
+               *hash++ = val;
+               asc += 2;
+
+       }
+       return 1;
+}
+
+/*
+ * check for object boundary violations
+ *
+ * test whether the range pointed to by the index entry for a given cell is
+ * contained in mapped data file. This should always be the case. Otherwise
+ * we are in real trouble.
+ */
+static int check_range(struct osl_table *t, uint32_t row_num, uint32_t col_num)
+{
+       char *index_entry;
+       struct osl_object obj;
+       struct osl_column *col;
+       int ret;
+       char *map_start, *obj_start;
+
+       ret = get_cell_index(t, row_num, col_num, &index_entry);
+       if (ret < 0)
+               return ret;
+       ret = get_mapped_object(t, col_num, row_num, &obj);
+       if (ret < 0)
+               return ret;
+       col = t->columns + col_num;
+       obj_start = obj.data;
+       map_start = col->data_map.data;
+//     PARA_INFO_LOG("obj: %p..%p\n", obj_start, obj_start + obj.size);
+//     PARA_INFO_LOG("map: %p..%p\n", map_start, map_start + col->data_map.size);
+       if (obj_start < map_start || obj_start + obj.size > map_start + col->data_map.size) {
+               PARA_CRIT_LOG("row %u, col %u: range violation, very bad\n", row_num, col_num);
+               return -E_RANGE_VIOLATION;
+       }
+       PARA_DEBUG_LOG("col %u: ok\n", col_num);
+       return 1;
+}
+
+/*
+ * check all cells of the given table for boundary violations
+ */
+static int check_index_ranges(struct osl_table *t)
+{
+       int i, j, ret;
+
+       PARA_NOTICE_LOG("checking for range violations in index\n");
+       //PARA_DEBUG_LOG("%d rows. %d columns\n", t->num_rows, t->desc->num_columns);
+       t->num_invalid_rows = 0;
+       for (i = 0; i < t->num_rows; i++) {
+               if (row_is_invalid(t, i)) {
+                       t->num_invalid_rows++;
+                       continue;
+               }
+               for (j = 0; j < t->desc->num_columns; j++) { /* FXIME */
+                       const struct osl_column_description *cd =
+                               get_column_description(t->desc, j);
+                       if (cd->storage_type != OSL_MAPPED_STORAGE)
+                               continue;
+                       ret = check_range(t, i, j);
+                       if (ret < 0) {
+                               if (ret != -E_INVALID_OBJECT &&
+                                               ret != -E_RANGE_VIOLATION)
+                                       goto err;
+                               if (ret == -E_INVALID_OBJECT) {
+                                       PARA_CRIT_LOG("row %d, col %d maps to an "
+                                               "invalid object\n", i, j);
+                               }
+                               ret = mark_row_invalid(t, i);
+                               if (ret < 0)
+                                       goto err;
+                               t->num_invalid_rows++;
+                               break;
+                       }
+               }
+
+       }
+       if (t->num_invalid_rows)
+               PARA_NOTICE_LOG("ranges OK. %d invalid row(s) detected\n",
+                       t->num_invalid_rows);
+       else
+               PARA_INFO_LOG("no invalid rows, no range violations, good\n");
+       return 1;
+err:
+       return ret;
+}
+
+static int move_index_entry(struct osl_table *t, uint32_t dest, uint32_t src)
+{
+       char *dest_ie, *src_ie;
+       int ret = get_row_index(t, dest, &dest_ie);
+
+       if (ret < 0)
+               return ret;
+       ret = get_row_index(t, src, &src_ie);
+       if (ret < 0)
+               return ret;
+       PARA_INFO_LOG("moving entry #%u to position %u\n", src, dest);
+       memcpy(dest_ie, src_ie, t->row_index_size);
+       return 1;
+}
+
+static int map_index(const struct osl_table_description *desc, struct osl_object *map)
+{
+       char *filename = index_filename(desc);
+       int ret;
+
+       ret = mmap_full_file(filename, O_RDWR, map);
+       PARA_INFO_LOG("mapping index %s: ret: %d, size: %zu\n", filename, ret, map->size);
+       free(filename);
+       return ret;
+}
+
+static int prune_invalid_rows_from_index(struct osl_table *t)
+{
+       uint32_t top = 0, bottom;
+       char *filename;
+       int ret;
+
+       if (!t->num_invalid_rows) {
+               PARA_INFO_LOG("all rows are valid, good\n");
+               return 1;
+       }
+       PARA_NOTICE_LOG("deleting %u invalid row(s) (%d bytes) from index\n",
+               t->num_invalid_rows, t->row_index_size * t->num_invalid_rows);
+       bottom = t->num_rows - 1;
+       while (top < bottom) {
+               if (!row_is_invalid(t, top)) {
+                       top++;
+                       continue;
+               }
+               while (bottom > top) {
+                       if (row_is_invalid(t, bottom)) {
+                               bottom--;
+                               continue;
+                       }
+                       /* move bottom index entry to top */
+                       move_index_entry(t, top, bottom);
+                       bottom--;
+                       top++;
+                       break;
+               }
+       }
+       PARA_INFO_LOG("unmapping index\n");
+       para_munmap(t->index_map.data, t->index_map.size);
+       filename = index_filename(t->desc);
+       ret = para_truncate(filename, t->row_index_size
+               * t->num_invalid_rows);
+       free(filename);
+       if (ret < 0)
+               return ret;
+       ret = map_index(t->desc, &t->index_map);
+       if (ret < 0)
+               return ret;
+       t->num_rows = table_num_rows(t);
+       return 1;
+}
+
+static int check_for_invalid_objects(struct osl_table *t, uint32_t **lost_bytes)
+{
+       int i, j, ret;
+       const struct osl_column_description *cd;
+       uint32_t *loss = para_malloc(sizeof(uint32_t) * t->desc->num_columns);
+
+       PARA_NOTICE_LOG("looking for mapped objects not contained in index\n");
+       /* first count used bytes */
+       FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+               loss[i] = t->columns[i].data_map.size;
+               for (j = 0; j < t->num_rows; j++) {
+                       struct osl_object obj;
+                       ret = get_mapped_object(t, i, j, &obj);
+                       if (ret >= 0) {
+                               loss[i] -= obj.size + 1; /* add one for header byte */
+                               continue;
+                       }
+                       if (ret != -E_INVALID_OBJECT)
+                               goto err;
+                       PARA_CRIT_LOG("row %d, col %d points to an invalid "
+                               "mapped object, bad\n", j, i);
+               }
+       }
+       ret = 0;
+       FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+               if (loss[i]) {
+                       PARA_NOTICE_LOG("column %u contains %u lost bytes\n",
+                               i, loss[i]);
+                       ret = 1;
+               }
+       }
+       if (!ret)
+               PARA_INFO_LOG("all mapped objects are valid, good\n");
+       *lost_bytes = loss;
+       return ret;
+err:
+       free(loss);
+       return ret;
+}
+
+/* prune_invalid_rows() must be run on the table before calling this */
+static int prune_mapped_column(struct osl_table *t, uint32_t col_num, int fd)
+{
+       int i, ret;
+       uint32_t written = 0;
+       struct osl_column *col = t->columns + col_num;
+
+       PARA_INFO_LOG("pruning col %u\n", col_num);
+       for (i = 0; i < t->num_rows; i++) {
+               struct osl_object obj;
+               char *index_entry;
+
+               PARA_DEBUG_LOG("checking row %u/%u\n", i, t->num_rows);
+               ret = get_mapped_object(t, col_num, i, &obj);
+               if (ret < 0)
+                       return ret;
+               ret = para_write_all(fd, (char *)(obj.data) - 1, obj.size + 1);
+               if (ret < 0)
+                       return ret;
+               written += obj.size + 1;
+               ret = get_row_index(t, i, &index_entry);
+               if (ret < 0)
+                       return ret;
+               update_cell_index(index_entry, col, written, obj.size);
+       }
+       return 1;
+}
+
+static int prune_objects(struct osl_table *t, uint32_t *lost_bytes)
+{
+       int i, ret;
+       const struct osl_column_description *cd;
+       char **col_filenames = para_calloc(t->desc->num_columns * sizeof(char *));
+       char **new_col_filenames = para_calloc(t->desc->num_columns * sizeof(char *));
+       char *idx_filename = index_filename(t->desc);
+       char *old_idx_filename = make_message("%s.bak", idx_filename);
+       int fd;
+
+       PARA_NOTICE_LOG("removing unreferenced objects from data files\n");
+       /* first make a copy of the index */
+       ret = para_open(old_idx_filename, O_WRONLY | O_CREAT | O_EXCL, 0644);
+       if (ret < 0)
+               goto out_free;
+       fd = ret;
+       ret = para_write_all(fd, t->index_map.data, t->index_map.size);
+       close(fd);
+       if (ret < 0)
+               goto out_free;
+       FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+               if (!lost_bytes[i])
+                       continue;
+               col_filenames[i] = column_filename(t, i);
+               new_col_filenames[i] = make_message("%s.fsck", col_filenames[i]);
+               ret = para_open(new_col_filenames[i], O_WRONLY | O_CREAT | O_EXCL, 0644);
+               if (ret < 0)
+                       goto out_unlink_data;
+               fd = ret;
+               ret = prune_mapped_column(t, i, fd);
+               close(fd);
+               if (ret < 0)
+                       goto out_unlink_data;
+       }
+       ret = unmap_table(t, OSL_MARK_CLEAN);
+       if (ret < 0)
+               goto out_unlink_data;
+       FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+               if (!lost_bytes[i])
+                       continue;
+               ret = para_rename(new_col_filenames[i], col_filenames[i]);
+               if (ret < 0) { /* we're kinda screwed here */
+                       PARA_CRIT_LOG("rename of col %i failed: %s\n", i,
+                               strerror(errno));
+                       goto out_free;
+               }
+       }
+       unlink(old_idx_filename);
+       ret = map_table(t, 0);
+       goto out_free;
+out_unlink_data:
+       FOR_EACH_MAPPED_COLUMN(i, t, cd)
+               unlink(new_col_filenames[i]);
+out_free:
+       free(old_idx_filename);
+       free(idx_filename);
+       FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+               free(col_filenames[i]);
+               free(new_col_filenames[i]);
+       }
+       free(col_filenames);
+       free(new_col_filenames);
+       return ret;
+}
+
+static struct osl_column_description hash_tree_table_cols[] = {
+       {
+               .storage_type = OSL_NO_STORAGE,
+               .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
+               .name = "hash",
+               .compare_function = uint32_compare,
+               .data_size = HASH_SIZE
+       },
+};
+
+static const struct osl_table_description hash_tree_table_desc = {
+       .dir = "/", /* irrelevant */
+       .name = "hash_tree",
+       .num_columns = 1,
+       .flags = 0,
+       .column_descriptions = hash_tree_table_cols
+};
+
+/**
+ * The hash_tree table contains all hashes of the disk storage name column.
+ * of each row. It is used for checking if a disk storage file has a reference
+ * in the table.
+ */
+static struct osl_table *hash_tree_table;
+static HASH_TYPE *hashes;
+
+static int check_disk_storage_column(struct osl_table *t, int row_num,
+               int col_num, char *ds_name, unsigned *num_missing_objects)
+{
+       int ret;
+       struct stat statbuf;
+       char *path = disk_storage_path(t, col_num, ds_name);
+       unsigned dsnc = t->disk_storage_name_column;
+       struct osl_object obj;
+
+       PARA_DEBUG_LOG("checking if %s is a regular file\n", path);
+       ret = stat(path, &statbuf);
+       if (ret < 0 && errno == ENOENT) {
+               struct osl_row *row;
+               (*num_missing_objects)++;
+               PARA_ERROR_LOG("row %d: object %s is missing\n", row_num, path);
+               PARA_NOTICE_LOG("trying to delete row %d\n", row_num);
+               ret = osl_get_row(t, dsnc, &obj, &row);
+               if (ret < 0) {
+                       PARA_CRIT_LOG("unable to get row %d\n", row_num);
+                       mark_row_invalid(t, row_num);
+                       PARA_CRIT_LOG("Please re-run fsck\n");
+                       goto out;
+               }
+               ret = osl_del_row(t, row);
+               if (ret < 0)
+                       goto out;
+       }
+out:
+       free(path);
+       if (ret < 0)
+               return ret;
+       ret = -E_NOT_A_REGULAR_FILE;
+       if (!(S_IFREG & statbuf.st_mode))
+               return ret;
+       return 1;
+}
+
+static int check_disk_storage_presence(struct osl_table *t)
+{
+       int ret, i, j;
+       struct osl_object obj, hash_obj = {.size = HASH_SIZE};
+       char *ds_name;
+       const struct osl_column_description *cd;
+       unsigned dsnc = t->disk_storage_name_column, missing_objects = 0;
+
+       if (!t->num_rows)
+               return 1;
+       hashes = para_malloc(t->num_rows * HASH_SIZE);
+       PARA_NOTICE_LOG("looking for missing disk storage objects\n");
+       for (i = 0; i < t->num_rows; i++) {
+               if (row_is_invalid(t, i))
+                       continue;
+               ret = get_mapped_object(t, dsnc, i, &obj);
+               if (ret < 0)
+                       return ret;
+               hash_object(&obj, hashes + i * HASH_SIZE);
+               hash_obj.data = hashes + i * HASH_SIZE;
+               osl_add_row(hash_tree_table, &hash_obj);
+               ds_name = disk_storage_name_of_hash(t, hashes + i * HASH_SIZE);
+               FOR_EACH_DISK_STORAGE_COLUMN(j, t, cd) {
+                       ret = check_disk_storage_column(t, i, j, ds_name,
+                               &missing_objects);
+                       if (ret < 0)
+                               goto err;
+               }
+               free(ds_name);
+       }
+       if (!missing_objects)
+               PARA_INFO_LOG("all referenced disk storage objects exist, good\n");
+       else
+               PARA_NOTICE_LOG("%d missing object(s)\n", missing_objects);
+       return missing_objects;
+err:
+       free(ds_name);
+       return ret;
+}
+
+static int dummy_compare(const struct osl_object *obj1, const struct osl_object *obj2)
+{
+       if (obj1 < obj2)
+               return -1;
+       if (obj1 > obj2)
+               return 1;
+       return 0;
+}
+
+static unsigned files_pruned;
+
+int prune_disk_storage_file(const char *path, const void *private_data)
+{
+       HASH_TYPE hash[HASH_SIZE];
+       unsigned flags = *(unsigned *)private_data;
+       struct osl_object obj = {.data = hash, .size = HASH_SIZE};
+       struct osl_row *row;
+       int ret = -1;
+       size_t len = strlen(path);
+
+
+       PARA_DEBUG_LOG("path: %s\n", path);
+       if (flags & OSL_LARGE_TABLE) {
+               if (len < HASH_SIZE * 2 + 2)
+                       goto invalid;
+//             PARA_NOTICE_LOG("p: %s\n", path + len - 2 * HASH_SIZE - 1);
+               ret = asc_to_hash(path + len - 2 * HASH_SIZE - 1, 1, hash);
+               if (ret < 0)
+                       goto invalid;
+               ret = asc_to_hash(path + len - 2 * HASH_SIZE + 2, HASH_SIZE - 1,
+                       hash + 1);
+               if (ret < 0)
+                       goto invalid;
+//             PARA_INFO_LOG("high: %x, low: %x, hash: %x\n", high, low, hash);
+       } else {
+               if (len < 2 * HASH_SIZE + 1)
+                       goto invalid;
+               ret = asc_to_hash(path + len - 2 * HASH_SIZE, 2 * HASH_SIZE, hash);
+               if (ret < 0)
+                       goto invalid;
+//             PARA_INFO_LOG("hash: %x\n", hash);
+       }
+#if 0
+{
+       char asc[2 * HASH_SIZE + 1];
+       hash_to_asc(hash, asc);
+       PARA_NOTICE_LOG("before: %s\nafter: %s\n", path, asc);
+}
+#endif
+       ret = osl_get_row(hash_tree_table, 0, &obj, &row);
+       if (ret >= 0)
+               return 1;
+       PARA_NOTICE_LOG("unreferenced file in hash dir: %s\n", path);
+       goto remove;
+invalid:
+       PARA_ERROR_LOG("could not read hash value of %s\n", path);
+remove:
+       PARA_NOTICE_LOG("removing %s\n", path);
+       unlink(path);
+       files_pruned++;
+       return 1;
+}
+
+static int prune_disk_storage_files(struct osl_table *t)
+{
+       int i, ret = 1;
+       const struct osl_column_description *cd;
+
+       PARA_NOTICE_LOG("looking for unreferenced disk storage files\n");
+       FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) {
+               char *dirname = column_filename(t, i);
+               ret = for_each_file_in_dir(dirname, prune_disk_storage_file, &t->desc->flags);
+               free(dirname);
+       }
+       if (files_pruned)
+               PARA_NOTICE_LOG("%u disk storage files deleted\n",
+                       files_pruned);
+       else
+               PARA_INFO_LOG("all files are are referenced, good\n");
+       return ret;
+}
+
+static int check_disk_storage_columns(struct osl_table *t)
+{
+       int ret, i;
+       const struct osl_column_description *cd;
+
+       if (!t->num_disk_storage_columns) {
+               PARA_NOTICE_LOG("no disk storage columns in table '%s', "
+                       "skipping checks\n", t->desc->name);
+               return 1;
+       }
+       FOR_EACH_COLUMN(i, t->desc, cd)
+               t->desc->column_descriptions[i].compare_function = dummy_compare;
+       ret = init_rbtrees(t);
+       if (ret < 0)
+               return ret;
+       PARA_NOTICE_LOG("creating rbtree for disk storage hash values\n");
+       ret = osl_open_table(&hash_tree_table_desc, &hash_tree_table);
+       if (ret < 0)
+               goto out;
+       ret = check_disk_storage_presence(t);
+       if (ret < 0)
+               goto out_close_hash_tree;
+       ret = prune_disk_storage_files(t);
+out_close_hash_tree:
+       osl_close_table(hash_tree_table, 0);
+       free(hashes);
+out:
+       clear_rbtrees(t); /* TODO why are we doing that here? Seems odd */
+       return ret;
+}
+
+#define FSCK 1
+#define FORCE 1
+#define DUMP 0
+
+static void set_dummy_contents(struct osl_table_description *desc)
+{
+       int i;
+       struct osl_column_description *cd;
+
+       for (i = 0; i < desc->num_columns; i++) {
+               cd = get_column_description(desc, i);
+               cd->compare_function = dummy_compare;
+       }
+}
+
+static int fsck_init(struct osl_table_description *desc, struct osl_table **t)
+{
+       struct osl_object map;
+       int ret = map_index(desc, &map);
+
+       if (ret < 0)
+               goto out;
+       ret = read_table_desc(&map, desc);
+       if (ret < 0) {
+               para_munmap(map.data, map.size);
+               goto out;
+       }
+       set_dummy_contents(desc);
+       ret = init_table_structure(desc, t);
+       if (ret < 0) {
+               para_munmap(map.data, map.size);
+               goto out;
+       }
+       PARA_INFO_LOG("unmapping index\n");
+       para_munmap(map.data, map.size);
+       if (FORCE)
+               ret = map_table(*t, (MAP_TBL_FL_IGNORE_DIRTY));
+       else
+               ret = map_table(*t, 0);
+       if (ret >= 0)
+               (*t)->num_rows = table_num_rows(*t);
+out:
+       return ret;
+}
+
+static void fsck_cleanup(struct osl_table *t)
+{
+       int i;
+       if (t->desc->column_descriptions) {
+               struct osl_column_description *cd;
+               for (i = 0; i < t->desc->num_columns; i++) {
+                       cd = get_column_description(t->desc, i);
+                       free((char*)cd->name);
+               }
+               free(t->desc->column_descriptions);
+       }
+       if (t) {
+               free(t->columns);
+               free(t);
+       }
+
+}
+
+#define ST_CASE(st) case st: return #st
+
+const char *get_asc_storage_type(enum osl_storage_type st)
+{
+       switch (st) {
+               ST_CASE(OSL_MAPPED_STORAGE);
+               ST_CASE(OSL_DISK_STORAGE);
+               ST_CASE(OSL_NO_STORAGE);
+       }
+       return NULL;
+}
+
+#define APPEND_ASC_SF(sf, flag, str) do { if (sf & flag) { \
+       if (str) str = para_strcat(str, " | " # flag); \
+       else str = para_strdup(#flag); }} while (0)
+
+
+char *get_asc_storage_flags(enum osl_storage_type sf)
+{
+       char *asc_sf = NULL;
+
+       APPEND_ASC_SF(sf, OSL_RBTREE, asc_sf);
+       APPEND_ASC_SF(sf, OSL_FIXED_SIZE, asc_sf);
+       APPEND_ASC_SF(sf, OSL_UNIQUE, asc_sf);
+       return asc_sf;
+}
+
+static int dump_table_desc(struct osl_table *t, int fd)
+{
+       const struct osl_table_description *desc = t->desc;
+       int ret, i;
+       struct osl_column_description *cd;
+       char *msg = make_message("static struct osl_column_description cols[] = {\n");
+       ret = para_write_all(fd, msg, strlen(msg));
+       if (ret < 0)
+               return ret;
+       free(msg);
+       FOR_EACH_COLUMN(i, desc, cd) {
+               const char *asc_st;
+               msg = make_message("\t[%d] = {\n", i);
+               ret = para_write_all(fd, msg, strlen(msg));
+               if (ret < 0)
+                       return ret;
+               free(msg);
+               asc_st = get_asc_storage_type(cd->storage_type);
+               msg = make_message("\t\t.storage_type = %s,\n", asc_st);
+               ret = para_write_all(fd, msg, strlen(msg));
+               if (ret < 0)
+                       return ret;
+               free(msg);
+               if (cd->storage_flags) {
+                       char *asc_sf = get_asc_storage_flags(cd->storage_flags);
+                       msg = make_message("\t\t,storage_flags = %s,\n", asc_sf);
+                       free(asc_sf);
+                       ret = para_write_all(fd, msg, strlen(msg));
+                       if (ret < 0)
+                               return ret;
+                       free(msg);
+               }
+               if (cd->storage_flags & OSL_FIXED_SIZE) {
+                       msg = make_message("\t\t.data_size = %u,\n", cd->data_size);
+                       ret = para_write_all(fd, msg, strlen(msg));
+                       if (ret < 0)
+                               return ret;
+                       free(msg);
+               }
+               msg = make_message("\t\t.name = \"%s\",\n", cd->name);
+               ret = para_write_all(fd, msg, strlen(msg));
+               if (ret < 0)
+                       return ret;
+               free(msg);
+               if (cd->storage_flags & OSL_RBTREE) {
+                       msg = make_message("\t\t.compare_function = compare_func,\n");
+                       ret = para_write_all(fd, msg, strlen(msg));
+                       if (ret < 0)
+                               return ret;
+                       free(msg);
+               }
+               msg = make_message("\t},\n");
+               ret = para_write_all(fd, msg, strlen(msg));
+               if (ret < 0)
+                       return ret;
+               free(msg);
+       }
+       msg = make_message("};\n");
+       ret = para_write_all(fd, msg, strlen(msg));
+       if (ret < 0)
+               return ret;
+       free(msg);
+       return 1;
+}
+
+static int dump_row(struct osl_table *t, unsigned row_num, const char *row_dir)
+{
+       int ret, i;
+       const struct osl_column_description *cd;
+       unsigned dsnc;
+       struct osl_object obj;
+       char *ds_name;
+       HASH_TYPE hash[HASH_SIZE];
+       char *filename;
+
+       FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+               ret = get_mapped_object(t, i, row_num, &obj);
+               if (ret < 0)
+                       return ret;
+               filename = make_message("%s/col_%03u", row_dir, i);
+               ret = para_write_file(filename, obj.data, obj.size);
+               free(filename);
+               if (ret < 0)
+                       return ret;
+       }
+       if (!t->num_disk_storage_columns)
+               return 1;
+       dsnc = t->disk_storage_name_column;
+       ret = get_mapped_object(t, dsnc, i, &obj);
+       if (ret < 0)
+               return ret;
+       hash_object(&obj, hash);
+       ds_name = disk_storage_name_of_hash(t, hash);
+       FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) {
+               filename = disk_storage_path(t, i, ds_name);
+               ret = mmap_full_file(filename, O_RDONLY, &obj);
+               free(filename);
+               if (ret < 0)
+                       goto out;
+               filename = make_message("%s/col_%03u", row_dir, i);
+               ret = para_write_file(filename, obj.data, obj.size);
+               free(filename);
+               if (ret < 0)
+                       goto out;
+       }
+       ret = 1;
+out:
+       free(ds_name);
+       return ret;
+}
+
+static int dump_rows(char *dump_dir, struct osl_table *t)
+{
+       unsigned i;
+       char *current_dir = NULL;
+       int ret = 0;
+
+       for (i = 0; i < t->num_rows; i++) {
+               char *row_dir;
+               if (row_is_invalid(t, i))
+                       continue;
+               if (!(i % 1000)) {
+                       free(current_dir);
+                       current_dir = make_message("%s/rows_%u-%u", dump_dir, i, i + 999);
+                       PARA_NOTICE_LOG("dumping rows %u - %u\n", i, i + 999);
+                       ret = para_mkdir(current_dir, 0777);
+                       if (ret < 0)
+                               goto out;
+               }
+               row_dir = make_message("%s/row_%03u", current_dir, i);
+               ret = para_mkdir(row_dir, 0777);
+               if (ret < 0) {
+                       free(row_dir);
+                       goto out;
+               }
+               ret = dump_row(t, i, row_dir);
+               free(row_dir);
+               if (ret < 0)
+                       goto out;
+       }
+out:
+       free(current_dir);
+       return ret;
+}
+
+static int dump_table(char *dump_dir, struct osl_table_description *desc)
+{
+       struct osl_table *t = NULL;
+       int fd, ret = fsck_init(desc, &t);
+       char *desc_file;
+
+       if (ret < 0)
+               goto out;
+       ret = para_mkdir(dump_dir, 0777);
+       if (ret < 0)
+               goto out;
+       desc_file = make_message("%s/table_description.c", dump_dir);
+       ret = para_open(desc_file, O_WRONLY | O_CREAT | O_EXCL, 0644);
+       free(desc_file);
+       if (ret < 0)
+               goto out;
+       fd = ret;
+       ret = dump_table_desc(t, fd);
+       close(fd);
+       if (ret < 0)
+               goto out;
+       ret = dump_rows(dump_dir, t);
+out:
+       fsck_cleanup(t);
+       return ret;
+}
+
+static int fsck(struct osl_table_description *desc)
+{
+       int ret;
+       struct osl_table *t = NULL;
+       uint32_t *lost_bytes = NULL;
+
+       ret = fsck_init(desc, &t);
+       if (ret < 0)
+               goto out;
+       ret = check_index_ranges(t);
+       if (ret < 0)
+               goto out_unmap;
+       ret = check_disk_storage_columns(t);
+       if (ret < 0)
+               goto out_unmap;
+       ret = prune_invalid_rows_from_index(t);
+       if (ret < 0)
+               goto out_unmap;
+       ret = check_for_invalid_objects(t, &lost_bytes);
+       if (ret < 0)
+               goto out_unmap;
+       if (ret > 0) { /* at least one mapped data file needs pruning */
+               ret = prune_objects(t, lost_bytes);
+               if (ret < 0)
+                       goto out_unmap;
+       }
+       free(lost_bytes);
+       PARA_INFO_LOG("success\n");
+out_unmap:
+       unmap_table(t, OSL_MARK_CLEAN);
+out:
+       fsck_cleanup(t);
+       return ret;
+}
+int main(__a_unused int argc, __a_unused char **argv)
+{
+       int ret;
+       struct osl_table_description desc = {.column_descriptions = NULL};
+
+       ret = -E_FSCK_SYNTAX;
+       if (argc < 3)
+               goto out;
+       desc.dir = argv[1];
+       desc.name = argv[2];
+       if (FSCK) {
+               ret = fsck(&desc);
+               if (ret < 0)
+                       goto out;
+       }
+       if (DUMP)
+               ret = dump_table(OSL_DUMP_DIR, &desc);
+out:
+       if (ret < 0)
+               PARA_ERROR_LOG("error %d\n", ret);
+       return ret < 0? EXIT_FAILURE: EXIT_SUCCESS;
+}