From: Andre Noll Date: Fri, 30 May 2008 16:11:32 +0000 (+0200) Subject: Only store the number of the parent dir, not the dir itsself. X-Git-Tag: v0.0.2~36^2 X-Git-Url: http://git.tuebingen.mpg.de/?p=adu.git;a=commitdiff_plain;h=3b0eb2b6211084fd0efd237e8db04ae7b0473943 Only store the number of the parent dir, not the dir itsself. This should reduce the size of the database considerably while also improving performance. --- diff --git a/adu.c b/adu.c index 7c6ba6f..280d3c0 100644 --- a/adu.c +++ b/adu.c @@ -196,6 +196,8 @@ enum dir_table_columns { DT_NAME, /** The dir count number. */ DT_NUM, + /** The number of the parent directory. */ + DT_PARENT_NUM, /** The number of bytes of all regular files. */ DT_BYTES, /** The number of all regular files. */ @@ -217,6 +219,13 @@ static struct osl_column_description dir_table_cols[] = { .compare_function = uint64_compare, .data_size = sizeof(uint64_t) }, + [DT_PARENT_NUM] = { + .storage_type = OSL_MAPPED_STORAGE, + .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, + .name = "parent_num", + .compare_function = size_compare, + .data_size = sizeof(uint64_t) + }, [DT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, @@ -278,16 +287,18 @@ static struct osl_column_description user_table_cols[] = { static struct osl_table *dir_table; -static int add_directory(char *dirname, uint64_t dir_num, uint64_t *dir_size, - uint64_t *dir_files) +static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num, + uint64_t *dir_size, uint64_t *dir_files) { struct osl_object dir_objects[NUM_DT_COLUMNS]; INFO_LOG("adding #%llu: %s\n", (long long unsigned)dir_num, dirname); dir_objects[DT_NAME].data = dirname; dir_objects[DT_NAME].size = strlen(dirname) + 1; - dir_objects[DT_NUM].data = &dir_num; - dir_objects[DT_NUM].size = sizeof(dir_num); + dir_objects[DT_NUM].data = dir_num; + dir_objects[DT_NUM].size = sizeof(*dir_num); + dir_objects[DT_PARENT_NUM].data = parent_dir_num; + dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num); dir_objects[DT_BYTES].data = dir_size; dir_objects[DT_BYTES].size = sizeof(*dir_size); dir_objects[DT_FILES].data = dir_files; @@ -488,13 +499,13 @@ static uint64_t num_dirs; static uint64_t num_files; static uint64_t num_bytes; -int scan_dir(char *dirname) +int scan_dir(char *dirname, uint64_t *parent_dir_num) { DIR *dir; struct dirent *entry; int ret, cwd_fd, ret2; uint64_t dir_size = 0, dir_files = 0; - uint64_t this_dir_num = num_dirs++; + uint64_t this_dir_num = ++num_dirs; DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname); ret = para_opendir(dirname, &dir, &cwd_fd); @@ -506,7 +517,6 @@ int scan_dir(char *dirname) } while ((entry = readdir(dir))) { mode_t m; - char *tmp; struct stat s; uint32_t uid; uint64_t size; @@ -525,9 +535,7 @@ int scan_dir(char *dirname) if (!S_ISREG(m) && !S_ISDIR(m)) continue; if (S_ISDIR(m)) { - tmp = make_message("%s/%s", dirname, entry->d_name); - ret = scan_dir(tmp); - free(tmp); + ret = scan_dir(entry->d_name, &this_dir_num); if (ret < 0) goto out; continue; @@ -548,7 +556,8 @@ int scan_dir(char *dirname) if (ret < 0) goto out; } - ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files); + ret = add_directory(dirname, &this_dir_num, parent_dir_num, + &dir_size, &dir_files); out: closedir(dir); ret2 = para_fchdir(cwd_fd); @@ -558,15 +567,69 @@ out: return ret; } -static int get_dir_name(struct osl_row *row, char **name) +static int get_dir_name_by_number(uint64_t *dirnum, char **name) +{ + char *result = NULL, *tmp; + struct osl_row *row; + uint64_t val = *dirnum; + struct osl_object obj = {.data = &val, .size = sizeof(val)}; + int ret; + +again: + ret = osl_get_row(dir_table, DT_NUM, &obj, &row); + if (ret < 0) + goto out; + ret = osl_get_object(dir_table, row, DT_NAME, &obj); + if (ret < 0) + goto out; + if (result) { + tmp = make_message("%s/%s", (char *)obj.data, result); + free(result); + result = tmp; + } else + result = para_strdup((char *)obj.data); + ret = osl_get_object(dir_table, row, DT_PARENT_NUM, &obj); + if (ret < 0) + goto out; + val = *(uint64_t *)obj.data; + if (val) + goto again; +out: + if (ret < 0) { + free(result); + *name = NULL; + } else + *name = result; + return ret; +} + +static int get_dir_name_of_row(struct osl_row *dir_table_row, char **name) { struct osl_object obj; - int ret = osl_get_object(dir_table, row, DT_NAME, &obj); + int ret; + char *this_dir, *prefix = NULL; + *name = NULL; + ret = osl_get_object(dir_table, dir_table_row, DT_NAME, &obj); if (ret < 0) return ret; - *name = obj.data; - return 1; + this_dir = para_strdup((char *)obj.data); + ret = osl_get_object(dir_table, dir_table_row, DT_PARENT_NUM, &obj); + if (ret < 0) + goto out; + if (!*(uint64_t *)obj.data) { + *name = this_dir; + return 1; + } + ret = get_dir_name_by_number((uint64_t *)obj.data, &prefix); + if (ret < 0) + goto out; + *name = make_message("%s/%s", prefix, this_dir); + free(prefix); + ret = 1; +out: + free(this_dir); + return ret; } const uint64_t size_unit_divisors[] = { @@ -627,7 +690,7 @@ static int global_stats_loop_function(struct osl_row *row, void *data) if (!gsi->count && !summary) return -E_LOOP_COMPLETE; if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) { - ret = get_dir_name(row, &dirname); + ret = get_dir_name_of_row(row, &dirname); if (ret < 0) return ret; printf("%s%s", dirname, @@ -723,13 +786,9 @@ static int user_stats_loop_function(struct osl_row *row, void *data) ret = osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj); if (ret < 0) return ret; - ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj); + ret = get_dir_name_by_number((uint64_t *)obj.data, &dirname); if (ret < 0) return ret; - dirname = obj.data; printf("%s%s", dirname, (usi->flags & (USF_PRINT_FILES | USF_PRINT_BYTES))? @@ -916,13 +975,15 @@ static void close_all_tables(void) static int com_create() { + uint64_t zero = 0ULL; int ret = create_tables(); + if (ret < 0) return ret; ret = open_dir_table(); if (ret < 0) return ret; - ret = scan_dir(conf.base_dir_arg); + ret = scan_dir(conf.base_dir_arg, &zero); if (ret < 0) goto out; ret = write_uid_list();