From: Andre Noll <maan@systemlinux.org>
Date: Thu, 5 Jun 2008 19:17:07 +0000 (+0200)
Subject: Add oslfsck.
X-Git-Tag: v0.1.0~79
X-Git-Url: https://git.tuebingen.mpg.de/?a=commitdiff_plain;h=dd58755b9a202ab317f96e5dd5d9220c4354774a;p=osl.git

Add oslfsck.

This needs more work, but at least it compiles with only one warning.
---

diff --git a/Makefile b/Makefile
index 872d4d1..3259a3a 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ libdir := $(prefix)/lib
 includedir := $(prefix)/include
 
 objects := osl.o fd.o rbtree.o sha1.o
+fsck_objects := fsck.fsck.o osl.fsck.o fd.fsck.o rbtree.fsck.o sha1.fsck.o fsck.cmdline.o
 headers := osl.h
 
 INSTALL := install
@@ -17,6 +18,7 @@ LN := ln
 x := 0
 y := 1
 z := 0
+VERSION := $(x).$(y).$(z)
 
 # common flags
 CFLAGS += -Wno-sign-compare -g -Wunused -Wundef -W
@@ -30,8 +32,10 @@ CFLAGS += -Werror-implicit-function-declaration
 CFLAGS += -Wmissing-format-attribute
 CFLAGS += -Wunused-macros
 CFLAGS += -Wbad-function-cast
-CFLAGS += -fPIC
-CFLAGS += -fvisibility=hidden
+
+# cflags used only for building library objects
+LIBCFLAGS += -fPIC
+LIBCFLAGS += -fvisibility=hidden
 
 uname_s := $(shell uname -s 2>/dev/null || echo "UNKNOWN_OS")
 uname_rs := $(shell uname -rs)
@@ -87,14 +91,34 @@ ifeq ($(uname_s),SunOS)
 endif
 
 all: $(realname)
-
 Makefile.deps: $(wildcard *.c *.h)
 	$(CC) -MM -MG *.c > $@
+osl.c: errtab.h
+
 
 -include Makefile.deps
 
+fsck.cmdline.o: fsck.cmdline.c fsck.cmdline.h
+	$(CC) -c -DVERSION='"$(VERSION)"' $<
+
+%.fsck.o: %.c Makefile
+	$(CC) -c -DVERSION='"$(VERSION)"' $(CPPFLAGS) $(CFLAGS) $< -o $@
+
 %.o: %.c Makefile
-	$(CC) -c $(CPPFLAGS) $(CFLAGS) $<
+	$(CC) -c $(CPPFLAGS) $(CFLAGS) $(LIBCFLAGS) $<
+
+fsck.cmdline.h fsck.cmdline.c: fsck.ggo
+	gengetopt $$O \
+		--conf-parser \
+		--unamed-opts=table \
+		--file-name=fsck.cmdline \
+		--func-name=fsck_cmdline_parser \
+		--set-package="oslfsck" \
+		--arg-struct-name=fsck_args_info \
+		< $<
+
+oslfsck: $(fsck_objects)
+	$(CC) -o $@ $(fsck_objects) -lcrypto
 
 $(realname): $(objects)
 	$(CC) $(LDFLAGS) -o $@ $(objects) -lcrypto
@@ -111,7 +135,7 @@ errtab.h: errlist
 osl.h: osl.h.in osl_errors.h
 	cat $^ > $@
 clean:
-	rm -f *.o $(realname) osl.h osl_errors.h errtab.h
+	rm -f *.o $(realname) osl.h osl_errors.h errtab.h fsck.cmdline.h fsck.cmdline.c
 
 install: all
 	$(MKDIR) $(libdir) $(includedir)
diff --git a/fsck.c b/fsck.c
new file mode 100644
index 0000000..d800017
--- /dev/null
+++ b/fsck.c
@@ -0,0 +1,1188 @@
+/*
+ * Copyright (C) 2007-2008 Andre Noll <maan@systemlinux.org>
+ *
+ * Licensed under the GPL v2. For licencing details see COPYING.
+ */
+
+/** \file fsck.c The program used to check an osl table. */
+
+/* FIXME: check error codes of make_message or write wrapper  */
+
+
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <assert.h>
+#include <pwd.h>
+
+#include "log.h"
+#include "osl.h"
+#include "error.h"
+#include "fd.h"
+#include "osl_core.h"
+#include "fsck.cmdline.h"
+
+/** version text used by various commands if -V switch was given */
+#define VERSION_TEXT(prefix) "osl_" prefix " " VERSION " " "\n" \
+	"Copyright (C) 2008 Andre Noll\n" \
+	"This is free software with ABSOLUTELY NO WARRANTY." \
+	" See COPYING for details.\n" \
+	"Written by Andre Noll.\n" \
+	"Report bugs to <maan@systemlinux.org>.\n"
+
+/** print out \p VERSION_TEXT and exit if version flag was given */
+#define HANDLE_VERSION_FLAG(_prefix, _args_info_struct) \
+	if (_args_info_struct.version_given) { \
+		printf("%s", VERSION_TEXT(_prefix)); \
+		exit(EXIT_SUCCESS); \
+	}
+
+static struct fsck_args_info conf;
+
+enum fsck_errors {
+	E_RANGE_VIOLATION,
+	E_INVALID_OBJECT,
+	E_NOT_A_REGULAR_FILE,
+	E_FSCK_SYNTAX,
+};
+
+__printf_2_3 void para_log(int ll, const char* fmt,...)
+{
+	va_list argp;
+
+	if (ll < conf.loglevel_arg)
+		return;
+	va_start(argp, fmt);
+	vfprintf(stderr, fmt, argp);
+	va_end(argp);
+}
+
+/* taken from git */
+signed char hexval_table[256] = {
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 00-07 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 08-0f */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 10-17 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 18-1f */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 20-27 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 28-2f */
+	  0,  1,  2,  3,  4,  5,  6,  7,		/* 30-37 */
+	  8,  9, -1, -1, -1, -1, -1, -1,		/* 38-3f */
+	 -1, 10, 11, 12, 13, 14, 15, -1,		/* 40-47 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 48-4f */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 50-57 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 58-5f */
+	 -1, 10, 11, 12, 13, 14, 15, -1,		/* 60-67 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 68-67 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 70-77 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 78-7f */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 80-87 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 88-8f */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 90-97 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 98-9f */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* a0-a7 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* a8-af */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* b0-b7 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* b8-bf */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* c0-c7 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* c8-cf */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* d0-d7 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* d8-df */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* e0-e7 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* e8-ef */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* f0-f7 */
+	 -1, -1, -1, -1, -1, -1, -1, -1,		/* f8-ff */
+};
+
+int asc_to_hash(const char *asc_hash, int len, HASH_TYPE *hash)
+{
+	int i = 0;
+	const unsigned char *asc = (const unsigned char *) asc_hash;
+
+	while (*asc && i++ < len) {
+		unsigned int val = (hexval_table[asc[0]] << 4) | hexval_table[asc[1]];
+		if (val & ~0xff)
+			return -1;
+		*hash++ = val;
+		asc += 2;
+
+	}
+	return 1;
+}
+
+static int _write_all(int fd, const char *buf, size_t len)
+{
+	return write_all(fd, buf, &len);
+}
+
+/**
+ * Paraslash's version of malloc().
+ *
+ * \param size The desired new size.
+ *
+ * A wrapper for malloc(3) which exits on errors.
+ *
+ * \return A pointer to the allocated memory, which is suitably aligned for any
+ * kind of variable.
+ *
+ * \sa malloc(3).
+ */
+__must_check __malloc static void *para_malloc(size_t size)
+{
+	assert(size);
+	void *p = malloc(size);
+
+	if (!p) {
+		EMERG_LOG("malloc failed (size = %zu),  aborting\n",
+			size);
+		exit(EXIT_FAILURE);
+	}
+	return p;
+}
+
+/**
+ * Paraslash's version of calloc().
+ *
+ * \param size The desired new size.
+ *
+ * A wrapper for calloc(3) which exits on errors.
+ *
+ * \return A pointer to the allocated and zeroed-out memory, which is suitably
+ * aligned for any kind of variable.
+ *
+ * \sa calloc(3)
+ */
+__must_check __malloc static void *para_calloc(size_t size)
+{
+	void *ret = para_malloc(size);
+
+	memset(ret, 0, size);
+	return ret;
+}
+
+/**
+ * Paraslash's version of strdup().
+ *
+ * \param s The string to be duplicated.
+ *
+ * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e.
+ * there is no need to check the return value in the caller.
+ *
+ * \return A pointer to the duplicated string. If \p s was the NULL pointer,
+ * an pointer to an empty string is returned.
+ *
+ * \sa strdup(3)
+ */
+__must_check __malloc static char *para_strdup(const char *s)
+{
+	char *ret;
+
+	if ((ret = strdup(s? s: "")))
+		return ret;
+	EMERG_LOG("strdup failed, aborting\n");
+	exit(EXIT_FAILURE);
+}
+
+/**
+ * Get the home directory of the current user.
+ *
+ * \return A dynammically allocated string that must be freed by the caller. If
+ * the home directory could not be found, this function returns "/tmp".
+ */
+__must_check __malloc static char *para_homedir(void)
+{
+	struct passwd *pw = getpwuid(getuid());
+	return para_strdup(pw? pw->pw_dir : "/tmp");
+}
+
+/**
+ * Compare two osl objects pointing to unsigned integers of 32 bit size.
+ *
+ * \param obj1 Pointer to the first integer.
+ * \param obj2 Pointer to the second integer.
+ *
+ * \return The values required for an osl compare function.
+ *
+ * \sa osl_compare_func, osl_hash_compare().
+ */
+static int uint32_compare(const struct osl_object *obj1, const struct osl_object *obj2)
+{
+	uint32_t d1 = read_u32((const char *)obj1->data);
+	uint32_t d2 = read_u32((const char *)obj2->data);
+
+	if (d1 < d2)
+		return 1;
+	if (d1 > d2)
+		return -1;
+	return 0;
+}
+
+/**
+ * Traverse the given directory recursively.
+ *
+ * \param dirname The directory to traverse.
+ * \param func The function to call for each entry.
+ * \param private_data Pointer to an arbitrary data structure.
+ *
+ * For each regular file under \a dirname, the supplied function \a func is
+ * called.  The full path of the regular file and the \a private_data pointer
+ * are passed to \a func. Directories for which the calling process has no
+ * permissions to change to are silently ignored.
+ *
+ * \return Standard.
+ */
+static int for_each_file_in_dir(const char *dirname,
+		int (*func)(const char *, void *), void *private_data)
+{
+	DIR *dir;
+	struct dirent *entry;
+	int cwd_fd, ret2, ret = para_opendir(dirname, &dir, &cwd_fd);
+
+	if (ret < 0)
+		return ret == -ERRNO_TO_ERROR(EACCES)? 1 : ret;
+	/* scan cwd recursively */
+	while ((entry = readdir(dir))) {
+		mode_t m;
+		char *tmp;
+		struct stat s;
+
+		if (!strcmp(entry->d_name, "."))
+			continue;
+		if (!strcmp(entry->d_name, ".."))
+			continue;
+		if (lstat(entry->d_name, &s) == -1)
+			continue;
+		m = s.st_mode;
+		if (!S_ISREG(m) && !S_ISDIR(m))
+			continue;
+		tmp = make_message("%s/%s", dirname, entry->d_name);
+		if (!S_ISDIR(m)) {
+			ret = func(tmp, private_data);
+			free(tmp);
+			if (ret < 0)
+				goto out;
+			continue;
+		}
+		/* directory */
+		ret = for_each_file_in_dir(tmp, func, private_data);
+		free(tmp);
+		if (ret < 0)
+			goto out;
+	}
+	ret = 1;
+out:
+	closedir(dir);
+	ret2 = para_fchdir(cwd_fd);
+	if (ret2 < 0 && ret >= 0)
+		ret = ret2;
+	close(cwd_fd);
+	return ret;
+}
+
+/*
+ * check for object boundary violations
+ *
+ * test whether the range pointed to by the index entry for a given cell is
+ * contained in mapped data file. This should always be the case. Otherwise
+ * we are in real trouble.
+ */
+static int check_range(struct osl_table *t, uint32_t row_num, uint32_t col_num)
+{
+	char *index_entry;
+	struct osl_object obj;
+	struct osl_column *col;
+	int ret;
+	char *map_start, *obj_start;
+
+	ret = get_cell_index(t, row_num, col_num, &index_entry);
+	if (ret < 0)
+		return ret;
+	ret = get_mapped_object(t, col_num, row_num, &obj);
+	if (ret < 0)
+		return ret;
+	col = t->columns + col_num;
+	obj_start = obj.data;
+	map_start = col->data_map.data;
+//	INFO_LOG("obj: %p..%p\n", obj_start, obj_start + obj.size);
+//	INFO_LOG("map: %p..%p\n", map_start, map_start + col->data_map.size);
+	if (obj_start < map_start || obj_start + obj.size > map_start + col->data_map.size) {
+		CRIT_LOG("range violation in row %u, col %u\n", row_num,
+			col_num);
+		return -E_RANGE_VIOLATION;
+	}
+	DEBUG_LOG("col %u: ok\n", col_num);
+	return 1;
+}
+
+/*
+ * check all cells of the given table for boundary violations
+ */
+static int check_index_ranges(struct osl_table *t)
+{
+	int i, j, ret;
+
+	INFO_LOG("checking for range violations in index\n");
+	//DEBUG_LOG("%d rows. %d columns\n", t->num_rows, t->desc->num_columns);
+	t->num_invalid_rows = 0;
+	for (i = 0; i < t->num_rows; i++) {
+		if (row_is_invalid(t, i)) {
+			t->num_invalid_rows++;
+			continue;
+		}
+		for (j = 0; j < t->desc->num_columns; j++) { /* FXIME */
+			const struct osl_column_description *cd =
+				get_column_description(t->desc, j);
+			if (cd->storage_type != OSL_MAPPED_STORAGE)
+				continue;
+			ret = check_range(t, i, j);
+			if (ret < 0) {
+				if (ret != -E_INVALID_OBJECT &&
+						ret != -E_RANGE_VIOLATION)
+					goto err;
+				if (ret == -E_INVALID_OBJECT) {
+					CRIT_LOG("row %d, col %d maps to an "
+						"invalid object\n", i, j);
+				}
+				ret = mark_row_invalid(t, i);
+				if (ret < 0)
+					goto err;
+				t->num_invalid_rows++;
+				break;
+			}
+		}
+
+	}
+	if (t->num_invalid_rows)
+		NOTICE_LOG("ranges OK. %d invalid row(s) detected\n",
+			t->num_invalid_rows);
+	else
+		INFO_LOG("no invalid rows, no range violations, good\n");
+	return 1;
+err:
+	return ret;
+}
+
+static int move_index_entry(struct osl_table *t, uint32_t dest, uint32_t src)
+{
+	char *dest_ie, *src_ie;
+	int ret = get_row_index(t, dest, &dest_ie);
+
+	if (ret < 0)
+		return ret;
+	ret = get_row_index(t, src, &src_ie);
+	if (ret < 0)
+		return ret;
+	INFO_LOG("moving entry #%u to position %u\n", src, dest);
+	memcpy(dest_ie, src_ie, t->row_index_size);
+	return 1;
+}
+
+static int map_index(const struct osl_table_description *desc, struct osl_object *map)
+{
+	char *filename = index_filename(desc);
+	int ret;
+
+	ret = mmap_full_file(filename, O_RDWR, &map->data, &map->size, NULL);
+	DEBUG_LOG("mapping index %s: ret: %d, size: %zu\n", filename, ret, map->size);
+	free(filename);
+	return ret;
+}
+
+static int prune_invalid_rows_from_index(struct osl_table *t)
+{
+	uint32_t top = 0, bottom;
+	char *filename;
+	int ret;
+
+	if (!t->num_invalid_rows) {
+		INFO_LOG("all rows are valid, good\n");
+		return 1;
+	}
+	NOTICE_LOG("deleting %u invalid row(s) (%d bytes) from index\n",
+		t->num_invalid_rows, t->row_index_size * t->num_invalid_rows);
+	bottom = t->num_rows - 1;
+	while (top < bottom) {
+		if (!row_is_invalid(t, top)) {
+			top++;
+			continue;
+		}
+		while (bottom > top) {
+			if (row_is_invalid(t, bottom)) {
+				bottom--;
+				continue;
+			}
+			/* move bottom index entry to top */
+			move_index_entry(t, top, bottom);
+			bottom--;
+			top++;
+			break;
+		}
+	}
+	DEBUG_LOG("unmapping index\n");
+	osl_munmap(t->index_map.data, t->index_map.size);
+	filename = index_filename(t->desc);
+	ret = para_truncate(filename, t->row_index_size
+		* t->num_invalid_rows);
+	free(filename);
+	if (ret < 0)
+		return ret;
+	ret = map_index(t->desc, &t->index_map);
+	if (ret < 0)
+		return ret;
+	t->num_rows = table_num_rows(t);
+	return 1;
+}
+
+static int check_for_invalid_objects(struct osl_table *t, uint32_t **lost_bytes)
+{
+	int i, j, ret;
+	const struct osl_column_description *cd;
+	uint32_t *loss = para_malloc(sizeof(uint32_t) * t->desc->num_columns);
+
+	INFO_LOG("looking for mapped objects not contained in index\n");
+	/* first count used bytes */
+	FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+		loss[i] = t->columns[i].data_map.size;
+		for (j = 0; j < t->num_rows; j++) {
+			struct osl_object obj;
+			ret = get_mapped_object(t, i, j, &obj);
+			if (ret >= 0) {
+				loss[i] -= obj.size + 1; /* add one for header byte */
+				continue;
+			}
+			if (ret != -E_INVALID_OBJECT)
+				goto err;
+			CRIT_LOG("row %d, col %d points to an invalid "
+				"mapped object, bad\n", j, i);
+		}
+	}
+	ret = 0;
+	FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+		if (loss[i]) {
+			NOTICE_LOG("column %u contains %u lost bytes\n",
+				i, loss[i]);
+			ret = 1;
+		}
+	}
+	if (!ret)
+		INFO_LOG("all mapped objects are valid, good\n");
+	*lost_bytes = loss;
+	return ret;
+err:
+	free(loss);
+	return ret;
+}
+
+/* prune_invalid_rows() must be run on the table before calling this */
+static int prune_mapped_column(struct osl_table *t, uint32_t col_num, int fd)
+{
+	int i, ret;
+	uint32_t written = 0;
+	struct osl_column *col = t->columns + col_num;
+
+	INFO_LOG("pruning col %u\n", col_num);
+	for (i = 0; i < t->num_rows; i++) {
+		struct osl_object obj;
+		char *index_entry;
+
+		DEBUG_LOG("checking row %u/%u\n", i, t->num_rows);
+		ret = get_mapped_object(t, col_num, i, &obj);
+		if (ret < 0)
+			return ret;
+		ret = _write_all(fd, (char *)(obj.data) - 1, obj.size + 1);
+		if (ret < 0)
+			return ret;
+		written += obj.size + 1;
+		ret = get_row_index(t, i, &index_entry);
+		if (ret < 0)
+			return ret;
+		update_cell_index(index_entry, col, written, obj.size);
+	}
+	return 1;
+}
+
+static int prune_objects(struct osl_table *t, uint32_t *lost_bytes)
+{
+	int i, ret;
+	const struct osl_column_description *cd;
+	char **col_filenames = para_calloc(t->desc->num_columns * sizeof(char *));
+	char **new_col_filenames = para_calloc(t->desc->num_columns * sizeof(char *));
+	char *idx_filename = index_filename(t->desc);
+	char *old_idx_filename = make_message("%s.bak", idx_filename);
+	int fd;
+
+	NOTICE_LOG("removing unreferenced objects from data files\n");
+	/* first make a copy of the index */
+	ret = osl_open(old_idx_filename, O_WRONLY | O_CREAT | O_EXCL, 0644);
+	if (ret < 0)
+		goto out_free;
+	fd = ret;
+	ret = _write_all(fd, t->index_map.data, t->index_map.size);
+	close(fd);
+	if (ret < 0)
+		goto out_free;
+	FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+		if (!lost_bytes[i])
+			continue;
+		col_filenames[i] = column_filename(t, i);
+		new_col_filenames[i] = make_message("%s.fsck", col_filenames[i]);
+		ret = osl_open(new_col_filenames[i], O_WRONLY | O_CREAT | O_EXCL, 0644);
+		if (ret < 0)
+			goto out_unlink_data;
+		fd = ret;
+		ret = prune_mapped_column(t, i, fd);
+		close(fd);
+		if (ret < 0)
+			goto out_unlink_data;
+	}
+	ret = unmap_table(t, OSL_MARK_CLEAN);
+	if (ret < 0)
+		goto out_unlink_data;
+	FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+		if (!lost_bytes[i])
+			continue;
+		ret = osl_rename(new_col_filenames[i], col_filenames[i]);
+		if (ret < 0) { /* we're kinda screwed here */
+			CRIT_LOG("rename of col %i failed: %s\n", i,
+				strerror(errno));
+			goto out_free;
+		}
+	}
+	unlink(old_idx_filename);
+	ret = map_table(t, 0);
+	goto out_free;
+out_unlink_data:
+	FOR_EACH_MAPPED_COLUMN(i, t, cd)
+		unlink(new_col_filenames[i]);
+out_free:
+	free(old_idx_filename);
+	free(idx_filename);
+	FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+		free(col_filenames[i]);
+		free(new_col_filenames[i]);
+	}
+	free(col_filenames);
+	free(new_col_filenames);
+	return ret;
+}
+
+static struct osl_column_description hash_tree_table_cols[] = {
+	{
+		.storage_type = OSL_NO_STORAGE,
+		.storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
+		.name = "hash",
+		.compare_function = uint32_compare,
+		.data_size = HASH_SIZE
+	},
+};
+
+static const struct osl_table_description hash_tree_table_desc = {
+	.dir = "/", /* irrelevant */
+	.name = "hash_tree",
+	.num_columns = 1,
+	.flags = 0,
+	.column_descriptions = hash_tree_table_cols
+};
+
+/**
+ * The hash_tree table contains all hashes of the disk storage name column.
+ * of each row. It is used for checking if a disk storage file has a reference
+ * in the table.
+ */
+static struct osl_table *hash_tree_table;
+static HASH_TYPE *hashes;
+
+static int check_disk_storage_column(struct osl_table *t, int row_num,
+		int col_num, char *ds_name, unsigned *num_missing_objects)
+{
+	int ret;
+	struct stat statbuf;
+	char *path = disk_storage_path(t, col_num, ds_name);
+	unsigned dsnc = t->disk_storage_name_column;
+	struct osl_object obj;
+
+	DEBUG_LOG("checking if %s is a regular file\n", path);
+	ret = stat(path, &statbuf);
+	if (ret < 0 && errno == ENOENT) {
+		struct osl_row *row;
+		(*num_missing_objects)++;
+		ERROR_LOG("row %d: object %s is missing\n", row_num, path);
+		NOTICE_LOG("trying to delete row %d\n", row_num);
+		ret = osl_get_row(t, dsnc, &obj, &row);
+		if (ret < 0) {
+			CRIT_LOG("unable to get row %d\n", row_num);
+			mark_row_invalid(t, row_num);
+			CRIT_LOG("Please re-run fsck\n");
+			goto out;
+		}
+		ret = osl_del_row(t, row);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	free(path);
+	if (ret < 0)
+		return ret;
+	ret = -E_NOT_A_REGULAR_FILE;
+	if (!(S_IFREG & statbuf.st_mode))
+		return ret;
+	return 1;
+}
+
+static int check_disk_storage_presence(struct osl_table *t)
+{
+	int ret, i, j;
+	struct osl_object obj, hash_obj = {.size = HASH_SIZE};
+	char *ds_name;
+	const struct osl_column_description *cd;
+	unsigned dsnc = t->disk_storage_name_column, missing_objects = 0;
+
+	if (!t->num_rows)
+		return 1;
+	hashes = para_malloc(t->num_rows * HASH_SIZE);
+	INFO_LOG("looking for missing disk storage objects\n");
+	for (i = 0; i < t->num_rows; i++) {
+		if (row_is_invalid(t, i))
+			continue;
+		ret = get_mapped_object(t, dsnc, i, &obj);
+		if (ret < 0)
+			return ret;
+		hash_object(&obj, hashes + i * HASH_SIZE);
+		hash_obj.data = hashes + i * HASH_SIZE;
+		osl_add_row(hash_tree_table, &hash_obj);
+		ds_name = disk_storage_name_of_hash(t, hashes + i * HASH_SIZE);
+		FOR_EACH_DISK_STORAGE_COLUMN(j, t, cd) {
+			ret = check_disk_storage_column(t, i, j, ds_name,
+				&missing_objects);
+			if (ret < 0)
+				goto err;
+		}
+		free(ds_name);
+	}
+	if (!missing_objects)
+		INFO_LOG("all referenced disk storage objects exist, good\n");
+	else
+		NOTICE_LOG("%d missing object(s)\n", missing_objects);
+	return missing_objects;
+err:
+	free(ds_name);
+	return ret;
+}
+
+static int dummy_compare(const struct osl_object *obj1, const struct osl_object *obj2)
+{
+	if (obj1 < obj2)
+		return -1;
+	if (obj1 > obj2)
+		return 1;
+	return 0;
+}
+
+static unsigned files_pruned;
+
+int prune_disk_storage_file(const char *path, void *private_data)
+{
+	HASH_TYPE hash[HASH_SIZE];
+	unsigned flags = *(unsigned *)private_data;
+	struct osl_object obj = {.data = hash, .size = HASH_SIZE};
+	struct osl_row *row;
+	int ret = -1;
+	size_t len = strlen(path);
+
+
+	DEBUG_LOG("path: %s\n", path);
+	if (flags & OSL_LARGE_TABLE) {
+		if (len < HASH_SIZE * 2 + 2)
+			goto invalid;
+//		NOTICE_LOG("p: %s\n", path + len - 2 * HASH_SIZE - 1);
+		ret = asc_to_hash(path + len - 2 * HASH_SIZE - 1, 1, hash);
+		if (ret < 0)
+			goto invalid;
+		ret = asc_to_hash(path + len - 2 * HASH_SIZE + 2, HASH_SIZE - 1,
+			hash + 1);
+		if (ret < 0)
+			goto invalid;
+//		INFO_LOG("high: %x, low: %x, hash: %x\n", high, low, hash);
+	} else {
+		if (len < 2 * HASH_SIZE + 1)
+			goto invalid;
+		ret = asc_to_hash(path + len - 2 * HASH_SIZE, 2 * HASH_SIZE, hash);
+		if (ret < 0)
+			goto invalid;
+//		INFO_LOG("hash: %x\n", hash);
+	}
+#if 0
+{
+	char asc[2 * HASH_SIZE + 1];
+	hash_to_asc(hash, asc);
+	NOTICE_LOG("before: %s\nafter: %s\n", path, asc);
+}
+#endif
+	ret = osl_get_row(hash_tree_table, 0, &obj, &row);
+	if (ret >= 0)
+		return 1;
+	NOTICE_LOG("unreferenced file in hash dir: %s\n", path);
+	goto remove;
+invalid:
+	ERROR_LOG("could not read hash value of %s\n", path);
+remove:
+	NOTICE_LOG("removing %s\n", path);
+	unlink(path);
+	files_pruned++;
+	return 1;
+}
+
+static int prune_disk_storage_files(struct osl_table *t)
+{
+	int i, ret = 1;
+	const struct osl_column_description *cd;
+
+	INFO_LOG("looking for unreferenced disk storage files\n");
+	FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) {
+		char *dirname = column_filename(t, i);
+		ret = for_each_file_in_dir(dirname, prune_disk_storage_file,
+			(unsigned *)&t->desc->flags);
+		free(dirname);
+	}
+	if (files_pruned)
+		NOTICE_LOG("%u disk storage files deleted\n",
+			files_pruned);
+	else
+		INFO_LOG("all files are are referenced, good\n");
+	return ret;
+}
+
+static int check_disk_storage_columns(struct osl_table *t)
+{
+	int ret, i;
+	const struct osl_column_description *cd;
+
+	if (!t->num_disk_storage_columns) {
+		INFO_LOG("no disk storage columns in table '%s', "
+			"skipping checks\n", t->desc->name);
+		return 1;
+	}
+	FOR_EACH_COLUMN(i, t->desc, cd)
+		t->desc->column_descriptions[i].compare_function = dummy_compare;
+	ret = init_rbtrees(t);
+	if (ret < 0)
+		return ret;
+	INFO_LOG("creating rbtree for disk storage hash values\n");
+	ret = osl_open_table(&hash_tree_table_desc, &hash_tree_table);
+	if (ret < 0)
+		goto out;
+	ret = check_disk_storage_presence(t);
+	if (ret < 0)
+		goto out_close_hash_tree;
+	ret = prune_disk_storage_files(t);
+out_close_hash_tree:
+	osl_close_table(hash_tree_table, 0);
+	free(hashes);
+	hashes = NULL;
+out:
+	clear_rbtrees(t); /* TODO why are we doing that here? Seems odd */
+	return ret;
+}
+
+static void set_dummy_contents(struct osl_table_description *desc)
+{
+	int i;
+	struct osl_column_description *cd;
+
+	for (i = 0; i < desc->num_columns; i++) {
+		cd = get_column_description(desc, i);
+		cd->compare_function = dummy_compare;
+	}
+}
+
+static int fsck_init(struct osl_table_description *desc, struct osl_table **t)
+{
+	struct osl_object map;
+	int ret = map_index(desc, &map);
+
+	if (ret < 0)
+		goto out;
+	ret = read_table_desc(&map, desc);
+	if (ret < 0) {
+		osl_munmap(map.data, map.size);
+		goto out;
+	}
+	set_dummy_contents(desc);
+	ret = init_table_structure(desc, t);
+	if (ret < 0) {
+		osl_munmap(map.data, map.size);
+		goto out;
+	}
+	DEBUG_LOG("unmapping index\n");
+	osl_munmap(map.data, map.size);
+	if (conf.force_given)
+		ret = map_table(*t, (MAP_TBL_FL_IGNORE_DIRTY));
+	else
+		ret = map_table(*t, 0);
+	if (ret >= 0)
+		(*t)->num_rows = table_num_rows(*t);
+out:
+	return ret;
+}
+
+static void fsck_cleanup(struct osl_table *t)
+{
+	int i;
+
+	if (!t)
+		return;
+	if (t->desc->column_descriptions) {
+		struct osl_column_description *cd;
+		for (i = 0; i < t->desc->num_columns; i++) {
+			cd = get_column_description(t->desc, i);
+			free((char*)cd->name);
+		}
+		free(t->desc->column_descriptions);
+	}
+	free(t->columns);
+	free(t);
+
+}
+
+#define ST_CASE(st) case st: return #st
+
+const char *get_asc_storage_type(enum osl_storage_type st)
+{
+	switch (st) {
+		ST_CASE(OSL_MAPPED_STORAGE);
+		ST_CASE(OSL_DISK_STORAGE);
+		ST_CASE(OSL_NO_STORAGE);
+	}
+	return NULL;
+}
+
+#define APPEND_ASC_SF(sf, flag, str) do { if (sf & flag) { \
+	if (str) str = make_message("%s%s", str, " | " # flag); \
+	else str = para_strdup(#flag); }} while (0)
+
+
+char *get_asc_storage_flags(enum osl_storage_type sf)
+{
+	char *asc_sf = NULL;
+
+	APPEND_ASC_SF(sf, OSL_RBTREE, asc_sf);
+	APPEND_ASC_SF(sf, OSL_FIXED_SIZE, asc_sf);
+	APPEND_ASC_SF(sf, OSL_UNIQUE, asc_sf);
+	return asc_sf;
+}
+
+static int dump_table_desc(struct osl_table *t, int fd)
+{
+	const struct osl_table_description *desc = t->desc;
+	int ret, i;
+	struct osl_column_description *cd;
+	char *msg = make_message("static struct osl_column_description cols[] = {\n");
+	ret = _write_all(fd, msg, strlen(msg));
+	if (ret < 0)
+		return ret;
+	free(msg);
+	FOR_EACH_COLUMN(i, desc, cd) {
+		const char *asc_st;
+		msg = make_message("\t[%d] = {\n", i);
+		ret = _write_all(fd, msg, strlen(msg));
+		if (ret < 0)
+			return ret;
+		free(msg);
+		asc_st = get_asc_storage_type(cd->storage_type);
+		msg = make_message("\t\t.storage_type = %s,\n", asc_st);
+		ret = _write_all(fd, msg, strlen(msg));
+		if (ret < 0)
+			return ret;
+		free(msg);
+		if (cd->storage_flags) {
+			char *asc_sf = get_asc_storage_flags(cd->storage_flags);
+			msg = make_message("\t\t,storage_flags = %s,\n", asc_sf);
+			free(asc_sf);
+			ret = _write_all(fd, msg, strlen(msg));
+			if (ret < 0)
+				return ret;
+			free(msg);
+		}
+		if (cd->storage_flags & OSL_FIXED_SIZE) {
+			msg = make_message("\t\t.data_size = %u,\n", cd->data_size);
+			ret = _write_all(fd, msg, strlen(msg));
+			if (ret < 0)
+				return ret;
+			free(msg);
+		}
+		msg = make_message("\t\t.name = \"%s\",\n", cd->name);
+		ret = _write_all(fd, msg, strlen(msg));
+		if (ret < 0)
+			return ret;
+		free(msg);
+		if (cd->storage_flags & OSL_RBTREE) {
+			msg = make_message("\t\t.compare_function = compare_func,\n");
+			ret = _write_all(fd, msg, strlen(msg));
+			if (ret < 0)
+				return ret;
+			free(msg);
+		}
+		msg = make_message("\t},\n");
+		ret = _write_all(fd, msg, strlen(msg));
+		if (ret < 0)
+			return ret;
+		free(msg);
+	}
+	msg = make_message("};\n");
+	ret = _write_all(fd, msg, strlen(msg));
+	if (ret < 0)
+		return ret;
+	free(msg);
+	return 1;
+}
+
+static int dump_row(struct osl_table *t, unsigned row_num, const char *row_dir)
+{
+	int ret, i;
+	const struct osl_column_description *cd;
+	unsigned dsnc;
+	struct osl_object obj;
+	char *ds_name;
+	HASH_TYPE hash[HASH_SIZE];
+	char *filename;
+
+	FOR_EACH_MAPPED_COLUMN(i, t, cd) {
+		ret = get_mapped_object(t, i, row_num, &obj);
+		if (ret < 0)
+			return ret;
+		filename = make_message("%s/col_%03u", row_dir, i);
+		ret = write_file(filename, obj.data, obj.size);
+		free(filename);
+		if (ret < 0)
+			return ret;
+	}
+	if (!t->num_disk_storage_columns)
+		return 1;
+	dsnc = t->disk_storage_name_column;
+	ret = get_mapped_object(t, dsnc, row_num, &obj);
+	if (ret < 0)
+		return ret;
+	hash_object(&obj, hash);
+	ds_name = disk_storage_name_of_hash(t, hash);
+	FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) {
+		filename = disk_storage_path(t, i, ds_name);
+		ret = mmap_full_file(filename, O_RDONLY, &obj.data, &obj.size, NULL);
+		free(filename);
+		if (ret < 0)
+			goto out;
+		filename = make_message("%s/col_%03u", row_dir, i);
+		ret = write_file(filename, obj.data, obj.size);
+		free(filename);
+		if (ret < 0)
+			goto out;
+	}
+	ret = 1;
+out:
+	free(ds_name);
+	return ret;
+}
+
+static int dump_rows(char *dump_dir, struct osl_table *t)
+{
+	unsigned i;
+	char *current_dir = NULL;
+	int ret = 0;
+
+	for (i = 0; i < t->num_rows; i++) {
+		char *row_dir;
+		if (row_is_invalid(t, i))
+			continue;
+		if (!(i % 1000)) {
+			free(current_dir);
+			current_dir = make_message("%s/rows_%u-%u", dump_dir, i, i + 999);
+			NOTICE_LOG("dumping rows %u - %u\n", i, i + 999);
+			ret = osl_mkdir(current_dir, 0777);
+			if (ret < 0 && !is_errno(-ret, EEXIST))
+				goto out;
+		}
+		row_dir = make_message("%s/row_%03u", current_dir, i);
+		ret = osl_mkdir(row_dir, 0777);
+		if (ret < 0 && !is_errno(-ret, EEXIST)) {
+			free(row_dir);
+			goto out;
+		}
+		ret = dump_row(t, i, row_dir);
+		free(row_dir);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	free(current_dir);
+	return ret;
+}
+
+static int dump_table(char *dump_dir, struct osl_table_description *desc)
+{
+	struct osl_table *t = NULL;
+	int fd, ret = fsck_init(desc, &t);
+	char *desc_file;
+	char *table_dump_dir = NULL;
+
+	if (ret < 0)
+		goto out;
+	ret = osl_mkdir(dump_dir, 0777);
+	if (ret < 0 && !is_errno(-ret, EEXIST))
+		goto out;
+	table_dump_dir = make_message("%s/%s", dump_dir, desc->name);
+	ret = osl_mkdir(table_dump_dir, 0777);
+	if (ret < 0 && !is_errno(-ret, EEXIST))
+		goto out;
+	desc_file = make_message("%s/table_description.c", table_dump_dir);
+	ret = osl_open(desc_file, O_WRONLY | O_CREAT | O_EXCL, 0644);
+	free(desc_file);
+	if (ret < 0)
+		goto out;
+	fd = ret;
+	ret = dump_table_desc(t, fd);
+	close(fd);
+	if (ret < 0)
+		goto out;
+	ret = dump_rows(table_dump_dir, t);
+out:
+	free(table_dump_dir);
+	fsck_cleanup(t);
+	return ret;
+}
+
+static int fsck(struct osl_table_description *desc)
+{
+	int ret;
+	struct osl_table *t = NULL;
+	uint32_t *lost_bytes = NULL;
+
+	ret = fsck_init(desc, &t);
+	if (ret < 0)
+		goto out;
+	ret = check_index_ranges(t);
+	if (ret < 0)
+		goto out_unmap;
+	ret = check_disk_storage_columns(t);
+	if (ret < 0)
+		goto out_unmap;
+	ret = prune_invalid_rows_from_index(t);
+	if (ret < 0)
+		goto out_unmap;
+	ret = check_for_invalid_objects(t, &lost_bytes);
+	if (ret < 0)
+		goto out_unmap;
+	if (ret > 0) { /* at least one mapped data file needs pruning */
+		ret = prune_objects(t, lost_bytes);
+		if (ret < 0)
+			goto out_unmap;
+	}
+	free(lost_bytes);
+out_unmap:
+	unmap_table(t, OSL_MARK_CLEAN);
+out:
+	fsck_cleanup(t);
+	return ret;
+}
+
+static int check_table(char *base_dir, char *table_name)
+{
+	struct osl_table_description desc = {
+		.column_descriptions = NULL,
+		.dir = base_dir,
+		.name = table_name
+	};
+	int ret;
+
+	INFO_LOG("checking table %s\n", table_name);
+	if (!conf.no_fsck_given) {
+		ret = fsck(&desc);
+		if (ret < 0)
+			goto out;
+	}
+	ret = 1;
+	if (!conf.dump_dir_given || !*conf.dump_dir_arg)
+		goto out;
+	ret = dump_table(conf.dump_dir_arg, &desc);
+out:
+	if (ret < 0)
+		ERROR_LOG("failed to check table %s\n", table_name);
+	else
+		NOTICE_LOG("successfully checked table %s\n", table_name);
+	return ret;
+}
+
+static int check_all_tables(char *base_dir)
+{
+	DIR *dir;
+	struct dirent *entry;
+	int cwd_fd, ret2, ret = para_opendir(base_dir, &dir, &cwd_fd);
+
+	if (ret < 0)
+		return ret;
+	while ((entry = readdir(dir))) {
+		mode_t m;
+		struct stat s;
+		if (!strcmp(entry->d_name, "."))
+			continue;
+		if (!strcmp(entry->d_name, ".."))
+			continue;
+		if (lstat(entry->d_name, &s) == -1)
+			continue;
+		m = s.st_mode;
+		if (!S_ISDIR(m))
+			continue;
+		ret = check_table(base_dir, entry->d_name);
+		if (ret < 0)
+			break;
+	}
+	closedir(dir);
+	ret2 = para_fchdir(cwd_fd);
+	if (ret2 < 0 && ret >= 0)
+		ret = ret2;
+	close(cwd_fd);
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int i, ret;
+	char *base_dir = NULL;
+
+	ret = fsck_cmdline_parser(argc, argv, &conf);
+	if (ret < 0) {
+		ret = -E_FSCK_SYNTAX;
+		goto out;
+	}
+	HANDLE_VERSION_FLAG("fsck", conf);
+	if (conf.base_dir_given)
+		base_dir = para_strdup(conf.base_dir_arg);
+	else {
+		char *home = para_homedir();
+		base_dir = make_message("%s/.paraslash/afs_database", home);
+		free(home);
+	}
+	if (!conf.inputs_num) {
+		ret = check_all_tables(base_dir);
+		goto out;
+	}
+	for (i = 0; i < conf.inputs_num; i++) {
+		ret = check_table(base_dir, conf.inputs[i]);
+		if (ret < 0)
+			break;
+	}
+out:
+	if (ret < 0) {
+		/* FIXME: osl_strerror() is BAD!!! */
+		ERROR_LOG("%s%s: %s\n",
+			base_dir? "base_dir: " : "",
+			base_dir? base_dir : "",
+			osl_strerror(-ret)
+		);
+		if (conf.loglevel_arg > 1)
+			EMERG_LOG("re-run with \"--loglevel %d\" to increase verbosity\n",
+				conf.loglevel_arg - 1);
+	} else
+		NOTICE_LOG("success\n");
+	if (base_dir)
+		free(base_dir);
+	return ret < 0? EXIT_FAILURE : EXIT_SUCCESS;
+}