Replace id table by hash table.
authorAndre Noll <maan@systemlinux.org>
Sun, 25 May 2008 16:08:27 +0000 (18:08 +0200)
committerAndre Noll <maan@systemlinux.org>
Sun, 25 May 2008 16:08:27 +0000 (18:08 +0200)
This should make uid lookups much faster. We use a power of two for
the hash table size and open addressing with double hashing to handle
hash collisions.

adu.c
error.h

diff --git a/adu.c b/adu.c
index 03df3e1..a7ac6b4 100644 (file)
--- a/adu.c
+++ b/adu.c
@@ -138,58 +138,6 @@ static struct osl_table_description dir_table_desc = {
        .dir = "/tmp/adu"
 };
 
-/** The columns of the id table. */
-enum id_table_columns {
-       /** The user id. */
-       IDT_UID,
-       /** The number of bytes of all regular files owned by this id. */
-       IDT_BYTES,
-       /** The number of regular files owned by this id. */
-       IDT_FILES,
-       /** The user table for this uid. */
-       IDT_TABLE,
-       /** Number of columns in this table. */
-       NUM_IDT_COLUMNS
-};
-
-static struct osl_column_description id_table_cols[] = {
-       [IDT_UID] = {
-               .storage_type = OSL_MAPPED_STORAGE,
-               .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
-               .name = "uid",
-               .compare_function = uint32_compare,
-               .data_size = sizeof(uint32_t)
-       },
-       [IDT_BYTES] = {
-               .storage_type = OSL_MAPPED_STORAGE,
-               .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
-               .compare_function = size_compare,
-               .name = "num_bytes",
-               .data_size = sizeof(uint64_t)
-       },
-       [IDT_FILES] = {
-               .storage_type = OSL_MAPPED_STORAGE,
-               .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
-               .compare_function = size_compare,
-               .name = "num_filess",
-               .data_size = sizeof(uint64_t)
-       },
-       [IDT_TABLE] = {
-               .storage_type = OSL_NO_STORAGE,
-               .storage_flags = OSL_FIXED_SIZE | OSL_UNIQUE,
-               .name = "user_table",
-               .data_size = sizeof(void *)
-       }
-};
-
-static struct osl_table_description id_table_desc = {
-       .name = "id_table",
-       .num_columns = NUM_IDT_COLUMNS,
-       .flags = 0,
-       .column_descriptions = id_table_cols,
-       .dir = "/tmp/adu"
-};
-
 /** The columns of the id table. */
 enum user_table_columns {
        /** The numer of the directory. */
@@ -210,14 +158,14 @@ static struct osl_column_description user_table_cols[] = {
                .compare_function = uint32_compare,
                .data_size = sizeof(uint32_t)
        },
-       [IDT_BYTES] = {
+       [UT_BYTES] = {
                .storage_type = OSL_MAPPED_STORAGE,
                .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
                .compare_function = size_compare,
                .name = "num_bytes",
                .data_size = sizeof(uint64_t)
        },
-       [IDT_FILES] = {
+       [UT_FILES] = {
                .storage_type = OSL_MAPPED_STORAGE,
                .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
                .compare_function = size_compare,
@@ -226,25 +174,7 @@ static struct osl_column_description user_table_cols[] = {
        },
 };
 
-static struct osl_table_description user_table_desc = {
-       .num_columns = NUM_UT_COLUMNS,
-       .flags = 0,
-       .column_descriptions = user_table_cols,
-       .dir = "/tmp/adu"
-};
 static struct osl_table *dir_table;
-static struct osl_table *id_table;
-
-static int create_tables(void)
-{
-       int ret = osl_create_table(&dir_table_desc);
-       if (ret < 0)
-               return ret;
-       ret = osl_create_table(&id_table_desc);
-       if (ret < 0)
-               return ret;
-       return 1;
-}
 
 int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size,
                uint64_t *dir_files)
@@ -282,57 +212,64 @@ int create_and_open_user_table(uint32_t uid, struct osl_table **t)
        return osl_open_table(desc, t);
 }
 
-static int insert_id_row(uint32_t uid, struct osl_table *t, struct osl_row **row)
+
+#define uid_hash_bits 8
+static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
+#define PRIME1 0x811c9dc5
+#define PRIME2 0x01000193
+
+struct user_info {
+       uint32_t uid;
+       struct osl_table *table;
+       uint64_t files;
+       uint64_t bytes;
+};
+
+static struct user_info *uid_hash_table;
+
+static void create_hash_table(void)
 {
-       struct osl_object objects[NUM_IDT_COLUMNS];
-       uint64_t num = 0;
-
-       struct osl_table **table_ptr = para_malloc(sizeof(*table_ptr));
-       *table_ptr = t;
-
-       INFO_LOG("§§§§§§§§§§§§§§§§§§§§§ uid: %d, t: %p\n", uid, t);
-       objects[IDT_UID].data = &uid;
-       objects[IDT_UID].size = sizeof(uid);
-       objects[IDT_BYTES].data = &num;
-       objects[IDT_BYTES].size = sizeof(num);
-       objects[IDT_FILES].data = &num;
-       objects[IDT_FILES].size = sizeof(num);
-       objects[IDT_TABLE].data = table_ptr;
-       objects[IDT_TABLE].size = sizeof(*table_ptr);
-       return osl_add_and_get_row(id_table, objects, row);
+       uid_hash_table = para_calloc(uid_hash_table_size
+               * sizeof(struct user_info));
 }
 
-static int get_user_table(struct osl_row *row, struct osl_table **t)
+static int create_tables(void)
 {
-       struct osl_object obj;
-
-       int ret = osl_get_object(id_table, row, IDT_TABLE, &obj);
-       if (ret < 0)
-               return ret;
-       *t = *(struct osl_table **)obj.data;
-       INFO_LOG("^^^^^^^^^^^^^^^^^^ t: %p\n", *t);
-       return 1;
+       create_hash_table();
+       return osl_create_table(&dir_table_desc);
 }
 
-static int add_id_bytes(struct osl_row *row, uint64_t *add)
+
+static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
 {
-       uint64_t num;
-       struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
+       return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) % uid_hash_table_size;
+}
 
-       /* update number of bytes */
-       int ret = osl_get_object(id_table, row, IDT_BYTES, &obj1);
-       if (ret < 0)
-               return ret;
-       num = *(uint64_t *)obj1.data + *add;
-       ret = osl_update_object(id_table, row, IDT_BYTES, &obj2);
-       if (ret < 0)
-               return ret;
-       /* increment number of files */
-       ret = osl_get_object(id_table, row, IDT_FILES, &obj1);
-       if (ret < 0)
-               return ret;
-       num = *(uint64_t *)obj1.data + 1;
-       return osl_update_object(id_table, row, IDT_FILES, &obj2);
+#define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui < uid_hash_table \
+               + uid_hash_table_size; ui++)
+
+static int search_uid(uint32_t uid, int insert, struct user_info **ui)
+{
+       uint32_t p;
+
+       for (p = 0; p < uid_hash_table_size; p++) {
+               struct user_info *i = uid_hash_table + double_hash(uid, p);
+               if (!i->table) {
+                       if (!insert)
+                               return -E_BAD_UID;
+                       int ret = create_and_open_user_table(uid, &i->table);
+                       if (ret < 0)
+                               return ret;
+                       i->uid = uid;
+                       *ui = i;
+                       return 1;
+               }
+               if (i->uid != uid)
+                       continue;
+               *ui = i;
+               return 0;
+       }
+       return insert? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
 }
 
 static int update_user_row(struct osl_table *t, uint32_t dir_num,
@@ -378,7 +315,9 @@ static int update_user_row(struct osl_table *t, uint32_t dir_num,
        }
 }
 
-static uint32_t dir_num;
+static uint32_t num_dirs;
+static uint32_t num_files;
+static uint64_t num_bytes;
 
 int scan_dir(char *dirname)
 {
@@ -386,9 +325,9 @@ int scan_dir(char *dirname)
        struct dirent *entry;
        int ret, cwd_fd, ret2;
        uint64_t dir_size = 0, dir_files = 0;
-       struct osl_object obj;
+       uint32_t this_dir_num = num_dirs++;
 
-       INFO_LOG("----------------- %s\n", dirname);
+       DEBUG_LOG("----------------- %u: %s\n", num_dirs, dirname);
        ret = para_opendir(dirname, &dir, &cwd_fd);
        if (ret < 0) {
                if (ret != -ERRNO_TO_ERROR(EACCES))
@@ -402,15 +341,17 @@ int scan_dir(char *dirname)
                struct stat s;
                uint32_t uid;
                uint64_t size;
-               struct osl_row *id_row;
-               struct osl_table *user_table;
+               struct user_info *ui;
 
                if (!strcmp(entry->d_name, "."))
                        continue;
                if (!strcmp(entry->d_name, ".."))
                        continue;
-               if (lstat(entry->d_name, &s) == -1)
+               if (lstat(entry->d_name, &s) == -1) {
+                       WARNING_LOG("lstat error for %s/%s\n", dirname,
+                               entry->d_name);
                        continue;
+               }
                m = s.st_mode;
                if (!S_ISREG(m) && !S_ISDIR(m))
                        continue;
@@ -425,36 +366,20 @@ int scan_dir(char *dirname)
                /* regular file */
                size = s.st_size;
                dir_size += size;
+               num_bytes += size;
                dir_files++;
+               num_files++;
                uid = s.st_uid;
-               INFO_LOG("++++++++++++++++++++++++++ %s, uid: %u\n", entry->d_name, uid);
-               obj.data = &uid;
-               obj.size = sizeof(uid);
-               ret = osl_get_row(id_table, IDT_UID, &obj, &id_row);
-               if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND)
-                       goto out;
-               if (ret < 0) {
-                       ret = create_and_open_user_table(uid, &user_table);
-                       if (ret < 0)
-                               goto out;
-                       ret = insert_id_row(uid, user_table, &id_row);
-                       if (ret < 0)
-                               goto out;
-               } else {
-                       ret = get_user_table(id_row, &user_table);
-                       if (ret < 0)
-                               goto out;
-               }
-               ret = add_id_bytes(id_row, &size);
+               ret = search_uid(uid, 1, &ui);
                if (ret < 0)
                        goto out;
-               INFO_LOG("user_table: %p\n", user_table);
-               ret = update_user_row(user_table, dir_num, &size);
-               INFO_LOG("update_user  ret: %d\n", ret);
+               ui->bytes += size;
+               ui->files++;
+               ret = update_user_row(ui->table, this_dir_num, &size);
                if (ret < 0)
                        goto out;
        }
-       ret = add_directory(dirname, dir_num++, &dir_size, &dir_files);
+       ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files);
 out:
        closedir(dir);
        ret2 = para_fchdir(cwd_fd);
@@ -513,52 +438,39 @@ static int print_dirname_and_file_count(struct osl_row *row, void *data)
        return 1;
 }
 
-static int print_id_stats(struct osl_row *row, __a_unused void *data)
+static void print_id_stats(void)
 {
-       struct osl_object obj;
-       uint32_t uid;
-       uint64_t bytes, files;
-       int ret = osl_get_object(id_table, row, IDT_UID, &obj);
+       struct user_info *ui;
 
-       if (ret < 0)
-               return ret;
-       uid = *(uint32_t *)obj.data;
-       ret = osl_get_object(id_table, row, IDT_BYTES, &obj);
-       if (ret < 0)
-               return ret;
-       bytes = *(uint64_t *)obj.data;
-       ret = osl_get_object(id_table, row, IDT_FILES, &obj);
-       if (ret < 0)
-               return ret;
-       files = *(uint64_t *)obj.data;
-
-       printf("%u\t%llu\t%llu\n", (unsigned)uid, (long long unsigned)files,
-               (long long unsigned)bytes);
-       return 1;
+       FOR_EACH_USER(ui) {
+               if (!ui->table)
+                       continue;
+               printf("%u\t%llu\t%llu\n", (unsigned)ui->uid, (long long unsigned)ui->files,
+                       (long long unsigned)ui->bytes);
+       }
 }
 
-struct id_dir_stat_info {
+struct big_dir_info {
        unsigned count;
        struct osl_table *user_table;
 };
 
 static int print_big_dir(struct osl_row *row, void *data)
 {
-       struct id_dir_stat_info *info = data;
-       info->count++;
+       struct big_dir_info *bdi = data;
        int ret;
        struct osl_row *dir_row;
        char *dirname;
        uint64_t bytes;
        struct osl_object obj;
 
-       if (info->count > 10)
+       if (bdi->count++ > 10)
                return -E_LOOP_COMPLETE;
-       ret = osl_get_object(info->user_table, row, UT_BYTES, &obj);
+       ret = osl_get_object(bdi->user_table, row, UT_BYTES, &obj);
        if (ret < 0)
                return ret;
        bytes = *(uint64_t *)obj.data;
-       ret = osl_get_object(info->user_table, row, UT_DIR_NUM, &obj);
+       ret = osl_get_object(bdi->user_table, row, UT_DIR_NUM, &obj);
        if (ret < 0)
                return ret;
        ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row);
@@ -572,25 +484,18 @@ static int print_big_dir(struct osl_row *row, void *data)
        return 1;
 }
 
-static int print_id_dir_stats(struct osl_row *row, __a_unused void *data)
+static void print_id_dir_stats(void)
 {
-       struct osl_object obj;
-       uint32_t uid;
-       int ret = osl_get_object(id_table, row, IDT_UID, &obj);
-       struct id_dir_stat_info info = {.count = 0};
-
-       if (ret < 0)
-               return ret;
-       uid = *(uint32_t *)obj.data;
-
-       ret = osl_get_object(id_table, row, IDT_TABLE, &obj);
-       if (ret < 0)
-               return ret;
-       info.user_table = *(struct osl_table **)obj.data;
+       struct user_info *ui;
 
-       printf("************************* Big dirs owned by uid %u\n", (unsigned) uid);
-       osl_rbtree_loop_reverse(info.user_table, IDT_BYTES, &info, print_big_dir);
-       return 1;
+       FOR_EACH_USER(ui) {
+               struct big_dir_info bdi = {.count = 0};
+               if (!ui->table)
+                       continue;
+               bdi.user_table = ui->table;
+               printf("************************* Big dirs owned by uid %u\n", (unsigned) ui->uid);
+               osl_rbtree_loop_reverse(ui->table, UT_BYTES, &bdi, print_big_dir);
+       }
 }
 
 static int print_statistics(void)
@@ -598,6 +503,8 @@ static int print_statistics(void)
        unsigned count = 0;
        int ret;
 
+       printf("Summary: %u dirs, %u files, %llu bytes\n", (unsigned)num_dirs,
+               (unsigned)num_files, (long long unsigned)num_bytes);
        printf("************************* Biggest dirs\n");
        ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size);
        if (ret < 0 && ret != -E_LOOP_COMPLETE)
@@ -609,11 +516,9 @@ static int print_statistics(void)
                return ret;
 
        printf("************************* dirs stats by owner\n");
-       ret = osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_stats);
-       if (ret < 0)
-               return ret;
-
-       return osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_dir_stats);
+       print_id_stats();
+       print_id_dir_stats();
+       return 1;
 }
 
 
@@ -623,9 +528,6 @@ int main(int argc, char **argv)
        if (ret < 0)
                goto out;
        ret = osl_open_table(&dir_table_desc, &dir_table);
-       if (ret < 0)
-               goto out;
-       ret = osl_open_table(&id_table_desc, &id_table);
        if (ret < 0)
                goto out;
        ret = -E_SYNTAX;
diff --git a/error.h b/error.h
index 8603291..1067e7f 100644 (file)
--- a/error.h
+++ b/error.h
@@ -91,7 +91,9 @@ static inline char *error_txt(int num)
        _ERROR(EMPTY, "file empty") \
        _ERROR(MMAP, "mmap error") \
        _ERROR(SYNTAX, "syntax error") \
-       _ERROR(LOOP_COMPLETE, "loop complete")
+       _ERROR(LOOP_COMPLETE, "loop complete") \
+       _ERROR(HASH_TABLE_OVERFLOW, "hash table too small") \
+       _ERROR(BAD_UID, "uid not found in hash table")
 
 
 /**