From: Andre Noll Date: Mon, 26 May 2008 09:24:34 +0000 (+0200) Subject: Merge commit 'meins/master' X-Git-Tag: v0.0.2~42 X-Git-Url: http://git.tuebingen.mpg.de/?p=adu.git;a=commitdiff_plain;h=2cfa8c8aaa4662f44bc7e7cd4d3591ed6e1326bd;hp=-c Merge commit 'meins/master' Conflicts: Makefile adu.c --- 2cfa8c8aaa4662f44bc7e7cd4d3591ed6e1326bd diff --combined Makefile index 3cd9cc4,e039865..b590b29 --- a/Makefile +++ b/Makefile @@@ -1,4 -1,4 +1,4 @@@ - objects := adu.o string.o -objects := osl.o fd.o rbtree.o string.o adu.o sha1.o cmdline.o ++objects := adu.o string.o cmdline.o all: adu DEBUG_CPPFLAGS += -Wno-sign-compare -g -Wunused -Wundef -W @@@ -19,8 -19,15 +19,15 @@@ Makefile.deps: $(wildcard *.c *.h -include Makefile.deps adu: $(objects) - $(CC) -o $@ $(objects) -lcrypto + $(CC) -o $@ $(objects) -lcrypto -losl + cmdline.o: cmdline.c cmdline.h + $(CC) -c $(CPPFLAGS) $< + + cmdline.c cmdline.h: adu.ggo + gengetopt --conf-parser < $< + + %.o: %.c Makefile $(CC) -c $(CPPFLAGS) $(DEBUG_CPPFLAGS) $< diff --combined adu.c index 598919a,8068d20..9edcc58 --- a/adu.c +++ b/adu.c @@@ -2,13 -2,31 +2,30 @@@ #include /* readdir() */ #include "gcc-compat.h" + #include "cmdline.h" -#include "osl.h" #include "fd.h" -#include "hash.h" #include "string.h" #include "error.h" ++#include "portable_io.h" DEFINE_ERRLIST; + #define DATABASE_DIR "/tmp/adu" + #define UID_LIST DATABASE_DIR "/" "uid_list" + /** Command line and config file options. */ + static struct gengetopt_args_info conf; + + struct user_info { + uint32_t uid; + struct osl_table *table; + uint64_t files; + uint64_t bytes; + uint64_t dirs; + struct osl_table_description *desc; + }; + + static struct user_info *uid_hash_table; /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */ #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y))) @@@ -30,7 -48,7 +47,7 @@@ __printf_2_3 void __log(int ll, const c time_t t1; char str[255] = ""; - if (ll < 4) + if (ll < conf.loglevel_arg) return; outfd = stderr; time(&t1); @@@ -74,18 -92,42 +91,42 @@@ static int size_compare(const struct os * are taken into account. * * \return It returns an integer less than, equal to, or greater than zero if - * \a obj1 is found, respectively, to be less than, to match, or be greater than - * obj2. + * \a obj1 is found, respectively, to be less than, to match, or be greater + * than obj2. * * \sa strcmp(3), strncmp(3), osl_compare_func. */ - int string_compare(const struct osl_object *obj1, const struct osl_object *obj2) + static int string_compare(const struct osl_object *obj1, + const struct osl_object *obj2) { const char *str1 = (const char *)obj1->data; const char *str2 = (const char *)obj2->data; return strncmp(str1, str2, MIN(obj1->size, obj2->size)); } + /** + * Compare two osl objects pointing to unsigned integers of 64 bit size. + * + * \param obj1 Pointer to the first integer. + * \param obj2 Pointer to the second integer. + * + * \return The values required for an osl compare function. + * + * \sa osl_compare_func, osl_hash_compare(). + */ + static int uint64_compare(const struct osl_object *obj1, + const struct osl_object *obj2) + { + uint64_t d1 = read_u64((const char *)obj1->data); + uint64_t d2 = read_u64((const char *)obj2->data); + + if (d1 < d2) + return 1; + if (d1 > d2) + return -1; + return 0; + } + /** The columns of the directory table. */ enum dir_table_columns { /** The name of the directory. */ @@@ -111,8 -153,8 +152,8 @@@ static struct osl_column_description di .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, .name = "num", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) + .compare_function = uint64_compare, + .data_size = sizeof(uint64_t) }, [DT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, @@@ -135,59 -177,7 +176,7 @@@ static struct osl_table_description dir .num_columns = NUM_DT_COLUMNS, .flags = 0, .column_descriptions = dir_table_cols, - .dir = "/tmp/adu" - }; - - /** The columns of the id table. */ - enum id_table_columns { - /** The user id. */ - IDT_UID, - /** The number of bytes of all regular files owned by this id. */ - IDT_BYTES, - /** The number of regular files owned by this id. */ - IDT_FILES, - /** The user table for this uid. */ - IDT_TABLE, - /** Number of columns in this table. */ - NUM_IDT_COLUMNS - }; - - static struct osl_column_description id_table_cols[] = { - [IDT_UID] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, - .name = "uid", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) - }, - [IDT_BYTES] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, - .compare_function = size_compare, - .name = "num_bytes", - .data_size = sizeof(uint64_t) - }, - [IDT_FILES] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, - .compare_function = size_compare, - .name = "num_filess", - .data_size = sizeof(uint64_t) - }, - [IDT_TABLE] = { - .storage_type = OSL_NO_STORAGE, - .storage_flags = OSL_FIXED_SIZE | OSL_UNIQUE, - .name = "user_table", - .data_size = sizeof(void *) - } - }; - - static struct osl_table_description id_table_desc = { - .name = "id_table", - .num_columns = NUM_IDT_COLUMNS, - .flags = 0, - .column_descriptions = id_table_cols, - .dir = "/tmp/adu" + .dir = DATABASE_DIR }; /** The columns of the id table. */ @@@ -207,17 -197,17 +196,17 @@@ static struct osl_column_description us .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, .name = "dir_num", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) + .compare_function = uint64_compare, + .data_size = sizeof(uint64_t) }, - [IDT_BYTES] = { + [UT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, .compare_function = size_compare, .name = "num_bytes", .data_size = sizeof(uint64_t) }, - [IDT_FILES] = { + [UT_FILES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, .compare_function = size_compare, @@@ -226,32 -216,14 +215,14 @@@ }, }; - static struct osl_table_description user_table_desc = { - .num_columns = NUM_UT_COLUMNS, - .flags = 0, - .column_descriptions = user_table_cols, - .dir = "/tmp/adu" - }; static struct osl_table *dir_table; - static struct osl_table *id_table; - - static int create_tables(void) - { - int ret = osl_create_table(&dir_table_desc); - if (ret < 0) - return ret; - ret = osl_create_table(&id_table_desc); - if (ret < 0) - return ret; - return 1; - } - int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size, + static int add_directory(char *dirname, uint64_t dir_num, uint64_t *dir_size, uint64_t *dir_files) { struct osl_object dir_objects[NUM_DT_COLUMNS]; - INFO_LOG("adding #%u: %s\n", dir_num, dirname); + INFO_LOG("adding #%llu: %s\n", (long long unsigned)dir_num, dirname); dir_objects[DT_NAME].data = dirname; dir_objects[DT_NAME].size = strlen(dirname) + 1; dir_objects[DT_NUM].data = &dir_num; @@@ -264,78 -236,130 +235,130 @@@ return osl_add_row(dir_table, dir_objects); } - int create_and_open_user_table(uint32_t uid, struct osl_table **t) + static uint32_t num_uids; + + static int open_user_table(struct user_info *ui, int create) { int ret; - struct osl_table_description *desc = para_malloc(sizeof(*desc)); - - desc->num_columns = NUM_UT_COLUMNS; - desc->flags = 0; - desc->column_descriptions = user_table_cols; - desc->dir = para_strdup("/tmp/adu"); - desc->name = make_message("%u", uid); - INFO_LOG("................................. %u\n", uid); - // user_table_desc.name = make_message("%u", uid); - ret = osl_create_table(desc); + + ui->desc = para_malloc(sizeof(*ui->desc)); + ui->desc->num_columns = NUM_UT_COLUMNS; + ui->desc->flags = 0; + ui->desc->column_descriptions = user_table_cols; + ui->desc->dir = para_strdup(DATABASE_DIR); + ui->desc->name = make_message("%u", (unsigned)ui->uid); + num_uids++; + INFO_LOG(".............................uid #%u: %u\n", + (unsigned)num_uids, (unsigned)ui->uid); + if (create) { + ret = osl_create_table(ui->desc); + if (ret < 0) + goto err; + } + ret = osl_open_table(ui->desc, &ui->table); if (ret < 0) - return ret; - return osl_open_table(desc, t); + goto err; + return 1; + err: + free((char *)ui->desc->name); + free((char *)ui->desc->dir); + free(ui->desc); + ui->desc->name = NULL; + ui->desc->dir = NULL; + ui->desc = NULL; + ui->table = NULL; + return ret; } - static int insert_id_row(uint32_t uid, struct osl_table *t, struct osl_row **row) + #define uid_hash_bits 8 + static uint32_t uid_hash_table_size = 1 << uid_hash_bits; + #define PRIME1 0x811c9dc5 + #define PRIME2 0x01000193 + + static void create_hash_table(void) { - struct osl_object objects[NUM_IDT_COLUMNS]; - uint64_t num = 0; - - struct osl_table **table_ptr = para_malloc(sizeof(*table_ptr)); - *table_ptr = t; - - INFO_LOG("§§§§§§§§§§§§§§§§§§§§§ uid: %d, t: %p\n", uid, t); - objects[IDT_UID].data = &uid; - objects[IDT_UID].size = sizeof(uid); - objects[IDT_BYTES].data = # - objects[IDT_BYTES].size = sizeof(num); - objects[IDT_FILES].data = # - objects[IDT_FILES].size = sizeof(num); - objects[IDT_TABLE].data = table_ptr; - objects[IDT_TABLE].size = sizeof(*table_ptr); - return osl_add_and_get_row(id_table, objects, row); + uid_hash_table = para_calloc(uid_hash_table_size + * sizeof(struct user_info)); } - static int get_user_table(struct osl_row *row, struct osl_table **t) + static void free_hash_table(void) { - struct osl_object obj; + free(uid_hash_table); + uid_hash_table = NULL; + } + + static int create_tables(void) + { + int ret; - int ret = osl_get_object(id_table, row, IDT_TABLE, &obj); + ret = osl_create_table(&dir_table_desc); if (ret < 0) return ret; - *t = *(struct osl_table **)obj.data; - INFO_LOG("^^^^^^^^^^^^^^^^^^ t: %p\n", *t); + create_hash_table(); return 1; } - static int add_id_bytes(struct osl_row *row, uint64_t *add) + /* + * We use a hash table of size s=2^uid_hash_bits to map the uids into the + * interval [0..s]. Hash collisions are treated by open addressing, i.e. + * unused slots in the table are used to store different uids that hash to the + * same slot. + * + * If a hash collision occurs, different slots are successively probed in order + * to find an unused slot for the new uid. Probing is implemented via a second + * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an + * odd number. + * + * An odd number is sufficient to make sure each entry of the hash table gets + * probed for probe_num between 0 and s-1 because s is a power of two, hence + * the second hash value never hash a common divisor with the hash table size. + * IOW: h is invertible in the ring [0..s]. + */ + static uint32_t double_hash(uint32_t uid, uint32_t probe_num) { - uint64_t num; - struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)}; + return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) + % uid_hash_table_size; + } - /* update number of bytes */ - int ret = osl_get_object(id_table, row, IDT_BYTES, &obj1); - if (ret < 0) - return ret; - num = *(uint64_t *)obj1.data + *add; - ret = osl_update_object(id_table, row, IDT_BYTES, &obj2); - if (ret < 0) - return ret; - /* increment number of files */ - ret = osl_get_object(id_table, row, IDT_FILES, &obj1); - if (ret < 0) - return ret; - num = *(uint64_t *)obj1.data + 1; - return osl_update_object(id_table, row, IDT_FILES, &obj2); + #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \ + + uid_hash_table_size; ui++) + + enum search_uid_flags { + OPEN_USER_TABLE = 1, + CREATE_USER_TABLE = 2, + }; + + static int search_uid(uint32_t uid, enum search_uid_flags flags, + struct user_info **ui_ptr) + { + uint32_t p; + + for (p = 0; p < uid_hash_table_size; p++) { + struct user_info *ui = uid_hash_table + double_hash(uid, p); + + if (!ui->table) { + int ret; + + if (!flags) + return -E_BAD_UID; + ui->uid = uid; + ret = open_user_table(ui, flags & CREATE_USER_TABLE); + if (ret < 0) + return ret; + if (ui_ptr) + *ui_ptr = ui; + return 1; + } + if (ui->uid != uid) + continue; + if (ui_ptr) + *ui_ptr = ui; + return 0; + } + return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID; } - static int update_user_row(struct osl_table *t, uint32_t dir_num, + static int update_user_row(struct osl_table *t, uint64_t dir_num, uint64_t *add) { struct osl_row *row; @@@ -378,7 -402,9 +401,9 @@@ } } - static uint32_t dir_num; + static uint64_t num_dirs; + static uint64_t num_files; + static uint64_t num_bytes; int scan_dir(char *dirname) { @@@ -386,9 -412,9 +411,9 @@@ struct dirent *entry; int ret, cwd_fd, ret2; uint64_t dir_size = 0, dir_files = 0; - struct osl_object obj; + uint64_t this_dir_num = num_dirs++; - INFO_LOG("----------------- %s\n", dirname); + DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname); ret = para_opendir(dirname, &dir, &cwd_fd); if (ret < 0) { if (ret != -ERRNO_TO_ERROR(EACCES)) @@@ -402,15 -428,17 +427,17 @@@ struct stat s; uint32_t uid; uint64_t size; - struct osl_row *id_row; - struct osl_table *user_table; + struct user_info *ui; if (!strcmp(entry->d_name, ".")) continue; if (!strcmp(entry->d_name, "..")) continue; - if (lstat(entry->d_name, &s) == -1) + if (lstat(entry->d_name, &s) == -1) { + WARNING_LOG("lstat error for %s/%s\n", dirname, + entry->d_name); continue; + } m = s.st_mode; if (!S_ISREG(m) && !S_ISDIR(m)) continue; @@@ -425,36 -453,20 +452,20 @@@ /* regular file */ size = s.st_size; dir_size += size; + num_bytes += size; dir_files++; + num_files++; uid = s.st_uid; - INFO_LOG("++++++++++++++++++++++++++ %s, uid: %u\n", entry->d_name, uid); - obj.data = &uid; - obj.size = sizeof(uid); - ret = osl_get_row(id_table, IDT_UID, &obj, &id_row); - if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND) - goto out; - if (ret < 0) { - ret = create_and_open_user_table(uid, &user_table); - if (ret < 0) - goto out; - ret = insert_id_row(uid, user_table, &id_row); - if (ret < 0) - goto out; - } else { - ret = get_user_table(id_row, &user_table); - if (ret < 0) - goto out; - } - ret = add_id_bytes(id_row, &size); + ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui); if (ret < 0) goto out; - INFO_LOG("user_table: %p\n", user_table); - ret = update_user_row(user_table, dir_num, &size); - INFO_LOG("update_user ret: %d\n", ret); + ui->bytes += size; + ui->files++; + ret = update_user_row(ui->table, this_dir_num, &size); if (ret < 0) goto out; } - ret = add_directory(dirname, dir_num++, &dir_size, &dir_files); + ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files); out: closedir(dir); ret2 = para_fchdir(cwd_fd); @@@ -475,166 -487,361 +486,361 @@@ static int get_dir_name(struct osl_row return 1; } - static int print_dirname_and_size(struct osl_row *row, void *data) + enum global_stats_flags { + GSF_PRINT_DIRNAME = 1, + GSF_PRINT_BYTES = 2, + GSF_PRINT_FILES = 4, + GSF_COMPUTE_SUMMARY = 8, + }; + + struct global_stats_info { + uint32_t count; + enum global_stats_flags flags; + }; + + static int global_stats_loop_function(struct osl_row *row, void *data) { - unsigned *count = data; + struct global_stats_info *gsi = data; struct osl_object obj; - char *name; - int ret; + char *dirname; + int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY; - if ((*count)++ > 100) + if (!gsi->count && !summary) return -E_LOOP_COMPLETE; - ret = get_dir_name(row, &name); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, row, DT_BYTES, &obj); - if (ret < 0) - return ret; - printf("%s\t%llu\n", name, *(long long unsigned *)obj.data); + if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) { + ret = get_dir_name(row, &dirname); + if (ret < 0) + return ret; + printf("%s%s", dirname, + (gsi->flags & (GSF_PRINT_FILES | GSF_PRINT_BYTES))? + "\t" : "\n" + ); + } + if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) { + uint64_t files; + ret = osl_get_object(dir_table, row, DT_FILES, &obj); + if (ret < 0) + return ret; + files = *(uint64_t *)obj.data; + if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) + printf("%llu%s", (long long unsigned)files, + (gsi->flags & GSF_PRINT_BYTES)? "\t" : "\n"); + if (summary) + num_files += files; + } + if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) { + uint64_t bytes; + ret = osl_get_object(dir_table, row, DT_BYTES, &obj); + if (ret < 0) + return ret; + bytes = *(uint64_t *)obj.data; + if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) + printf("%llu\n", (long long unsigned)bytes); + if (summary) { + num_bytes += bytes; + num_dirs++; + } + } + if (gsi->count) + gsi->count--; return 1; } - static int print_dirname_and_file_count(struct osl_row *row, void *data) + static void print_id_stats(void) + { + struct user_info *ui; + + printf("--------------------- user summary (uid/dirs/files/bytes):\n"); + FOR_EACH_USER(ui) { + if (!ui->table) + continue; + printf("%u\t%llu\t%llu\t%llu\n", (unsigned)ui->uid, + (long long unsigned)ui->dirs, + (long long unsigned)ui->files, + (long long unsigned)ui->bytes); + } + } + + enum user_stats_flags { + USF_PRINT_DIRNAME = 1, + USF_PRINT_BYTES = 2, + USF_PRINT_FILES = 4, + USF_COMPUTE_SUMMARY = 8, + }; + + struct user_stats_info { + uint32_t count; + enum user_stats_flags flags; + struct user_info *ui; + }; + + static int user_stats_loop_function(struct osl_row *row, void *data) { - unsigned *count = data; + struct user_stats_info *usi = data; + struct osl_row *dir_row; struct osl_object obj; - char *name; - int ret; + int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY; - if ((*count)++ > 100) + if (!usi->count && !summary) return -E_LOOP_COMPLETE; - ret = get_dir_name(row, &name); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, row, DT_FILES, &obj); - if (ret < 0) - return ret; - printf("%s\t%llu\n", name, *(long long unsigned *)obj.data); + if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) { + char *dirname; + ret = osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj); + if (ret < 0) + return ret; + ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row); + if (ret < 0) + return ret; + ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj); + if (ret < 0) + return ret; + dirname = obj.data; + printf("%s%s", + dirname, + (usi->flags & (USF_PRINT_FILES | USF_PRINT_BYTES))? + "\t" : "\n" + ); + } + if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) { + uint64_t files; + ret = osl_get_object(usi->ui->table, row, UT_FILES, &obj); + if (ret < 0) + return ret; + files = *(uint64_t *)obj.data; + if (usi->count && (usi->flags & USF_PRINT_FILES)) + printf("%llu%s", + (long long unsigned)files, + (usi->flags & USF_PRINT_BYTES)? "\t" : "\n" + ); + if (summary) + usi->ui->files += files; + } + if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) { + uint64_t bytes; + ret = osl_get_object(usi->ui->table, row, UT_BYTES, &obj); + if (ret < 0) + return ret; + bytes = *(uint64_t *)obj.data; + if (usi->count && (usi->flags & USF_PRINT_BYTES)) + printf("%llu\n", (long long unsigned)bytes); + if (summary) { + usi->ui->bytes += bytes; + usi->ui->dirs++; + } + + } + if (usi->count) + usi->count--; return 1; } - static int print_id_stats(struct osl_row *row, __a_unused void *data) + static void print_user_stats(void) { - struct osl_object obj; - uint32_t uid; - uint64_t bytes, files; - int ret = osl_get_object(id_table, row, IDT_UID, &obj); + struct user_info *ui; + + FOR_EACH_USER(ui) { + struct user_stats_info usi = { + .count = 10, + .ui = ui + }; + if (!ui->table) + continue; + usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY; + printf("************************************************ uid %u\n", + (unsigned) ui->uid); + if (!ui->table) + continue; + printf("----------------- Largest dirs -------------------\n"); + osl_rbtree_loop_reverse(ui->table, UT_BYTES, &usi, + user_stats_loop_function); + printf("---------- dirs containing most files ------------\n"); + usi.count = 10; + usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES; + osl_rbtree_loop_reverse(ui->table, UT_FILES, &usi, + user_stats_loop_function); + } + } - if (ret < 0) - return ret; - uid = *(uint32_t *)obj.data; - ret = osl_get_object(id_table, row, IDT_BYTES, &obj); - if (ret < 0) + static int print_statistics(void) + { + int ret; + struct global_stats_info gsi = { + .count = 10, + .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY + }; + + printf("----------------- Largest dirs -------------------\n"); + ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &gsi, + global_stats_loop_function); + if (ret < 0 && ret != -E_LOOP_COMPLETE) return ret; - bytes = *(uint64_t *)obj.data; - ret = osl_get_object(id_table, row, IDT_FILES, &obj); - if (ret < 0) + gsi.count = 10; + + gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES; + printf("---------- dirs containing most files ------------\n"); + ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &gsi, + global_stats_loop_function); + if (ret < 0 && ret != -E_LOOP_COMPLETE) return ret; - files = *(uint64_t *)obj.data; - printf("%u\t%llu\t%llu\n", (unsigned)uid, (long long unsigned)files, - (long long unsigned)bytes); + printf("------------------ Global summary (dirs/files/bytes)\n" + "%llu\t%llu\t%llu\n", + (long long unsigned)num_dirs, (long long unsigned)num_files, + (long long unsigned)num_bytes); + print_user_stats(); + print_id_stats(); return 1; } - struct id_dir_stat_info { - unsigned count; - struct osl_table *user_table; - }; + static int write_uid_list(void) + { + char *buf; + uint32_t count = 0; + struct user_info *ui; + size_t size = num_uids * sizeof(uint32_t); + int ret; + + if (!num_uids) + return 0; + buf = para_malloc(size); + FOR_EACH_USER(ui) { + if (!ui->table) + continue; + write_u32(buf + count++ * sizeof(uint32_t), ui->uid); + } + ret = para_write_file(UID_LIST, buf, size); + free(buf); + return ret; + } - static int print_big_dir(struct osl_row *row, void *data) + static int open_dir_table(void) + { + return osl_open_table(&dir_table_desc, &dir_table); + } + + static void close_dir_table(void) { - struct id_dir_stat_info *info = data; - info->count++; int ret; - struct osl_row *dir_row; - char *dirname; - uint64_t bytes; - struct osl_object obj; - if (info->count > 10) - return -E_LOOP_COMPLETE; - ret = osl_get_object(info->user_table, row, UT_BYTES, &obj); + if (!dir_table) + return; + ret = osl_close_table(dir_table, OSL_MARK_CLEAN); if (ret < 0) - return ret; - bytes = *(uint64_t *)obj.data; - ret = osl_get_object(info->user_table, row, UT_DIR_NUM, &obj); + ERROR_LOG("failed to close dir table: %s\n", error_txt(-ret)); + dir_table = NULL; + } + + static void close_user_table(struct user_info *ui) + { + int ret; + + if (!ui || !ui->table) + return; + ret = osl_close_table(ui->table, OSL_MARK_CLEAN); if (ret < 0) - return ret; - ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row); + ERROR_LOG("failed to close user table %u: %s\n", + (unsigned) ui->uid, error_txt(-ret)); + free((char *)ui->desc->name); + ui->desc->name = NULL; + free((char *)ui->desc->dir); + ui->desc->dir = NULL; + free(ui->desc); + ui->desc = NULL; + ui->table = NULL; + } + + static void close_user_tables(void) + { + struct user_info *ui; + + FOR_EACH_USER(ui) + close_user_table(ui); + } + + static void close_all_tables(void) + { + close_dir_table(); + close_user_tables(); + free_hash_table(); + } + + static int com_create() + { + int ret = create_tables(); if (ret < 0) return ret; - ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj); + ret = open_dir_table(); if (ret < 0) return ret; - dirname = obj.data; - printf("%s: %llu\n", dirname, (long long unsigned)bytes); - return 1; + ret = scan_dir(conf.base_dir_arg); + if (ret < 0) + goto out; + ret = write_uid_list(); + out: + close_all_tables(); + return ret; } - static int print_id_dir_stats(struct osl_row *row, __a_unused void *data) + static int read_uid_file(void) { - struct osl_object obj; - uint32_t uid; - int ret = osl_get_object(id_table, row, IDT_UID, &obj); - struct id_dir_stat_info info = {.count = 0}; + char *map; + size_t size; + int ret = mmap_full_file(UID_LIST, O_RDONLY, (void **)&map, &size, NULL); + uint32_t n; if (ret < 0) return ret; - uid = *(uint32_t *)obj.data; - - ret = osl_get_object(id_table, row, IDT_TABLE, &obj); - if (ret < 0) - return ret; - info.user_table = *(struct osl_table **)obj.data; - - printf("************************* Big dirs owned by uid %u\n", (unsigned) uid); - osl_rbtree_loop_reverse(info.user_table, IDT_BYTES, &info, print_big_dir); - return 1; + num_uids = size / 4; + /* hash table size should be a power of two and larger than the number of uids */ + uid_hash_table_size = 4; + while (uid_hash_table_size < num_uids) + uid_hash_table_size *= 2; + create_hash_table(); + for (n = 0; n < num_uids; n++) { + uint32_t uid = read_u32(map + n * sizeof(uid)); + ret = search_uid(uid, OPEN_USER_TABLE, NULL); + if (ret < 0) + goto out; + } + out: + para_munmap(map, size); + return ret; } - static int print_statistics(void) + static int com_select(void) { - unsigned count = 0; int ret; - printf("************************* Biggest dirs\n"); - ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size); - if (ret < 0 && ret != -E_LOOP_COMPLETE) - return ret; - count = 0; - printf("************************* dirs containing many files\n"); - ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &count, print_dirname_and_file_count); - if (ret < 0 && ret != -E_LOOP_COMPLETE) + ret = open_dir_table(); + if (ret < 0) return ret; - - printf("************************* dirs stats by owner\n"); - ret = osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_stats); + ret = read_uid_file(); if (ret < 0) return ret; - - return osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_dir_stats); + print_statistics(); + close_all_tables(); + return 1; } - int main(int argc, char **argv) { - int ret = create_tables(); - if (ret < 0) - goto out; - ret = osl_open_table(&dir_table_desc, &dir_table); - if (ret < 0) - goto out; - ret = osl_open_table(&id_table_desc, &id_table); - if (ret < 0) - goto out; + int ret; + struct cmdline_parser_params params = { + .override = 0, + .initialize = 1, + .check_required = 0, + .check_ambiguity = 0, + .print_errors = 1 + }; + + cmdline_parser_ext(argc, argv, &conf, ¶ms); /* aborts on errors */ ret = -E_SYNTAX; - if (argc != 2) - goto out; - ret = scan_dir(argv[1]); + if (conf.select_given) + ret = com_select(); + else + ret = com_create(); if (ret < 0) goto out; - print_statistics(); out: if (ret < 0) { ERROR_LOG("%s\n", error_txt(-ret)); @@@ -642,4 -849,3 +848,3 @@@ } return EXIT_SUCCESS; } -