From: Andre Noll Date: Mon, 26 May 2008 09:24:34 +0000 (+0200) Subject: Merge commit 'meins/master' X-Git-Tag: v0.0.2~42 X-Git-Url: http://git.tuebingen.mpg.de/?p=adu.git;a=commitdiff_plain;h=2cfa8c8aaa4662f44bc7e7cd4d3591ed6e1326bd;hp=3ebfb15c8a53bd1e1fbd0d5f8d4f00c7eeb76fd0 Merge commit 'meins/master' Conflicts: Makefile adu.c --- diff --git a/Makefile b/Makefile index 3cd9cc4..b590b29 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -objects := adu.o string.o +objects := adu.o string.o cmdline.o all: adu DEBUG_CPPFLAGS += -Wno-sign-compare -g -Wunused -Wundef -W @@ -21,6 +21,13 @@ Makefile.deps: $(wildcard *.c *.h) adu: $(objects) $(CC) -o $@ $(objects) -lcrypto -losl +cmdline.o: cmdline.c cmdline.h + $(CC) -c $(CPPFLAGS) $< + +cmdline.c cmdline.h: adu.ggo + gengetopt --conf-parser < $< + + %.o: %.c Makefile $(CC) -c $(CPPFLAGS) $(DEBUG_CPPFLAGS) $< diff --git a/adu.c b/adu.c index 598919a..9edcc58 100644 --- a/adu.c +++ b/adu.c @@ -2,13 +2,30 @@ #include /* readdir() */ #include "gcc-compat.h" +#include "cmdline.h" #include "fd.h" #include "string.h" #include "error.h" +#include "portable_io.h" DEFINE_ERRLIST; +#define DATABASE_DIR "/tmp/adu" +#define UID_LIST DATABASE_DIR "/" "uid_list" +/** Command line and config file options. */ +static struct gengetopt_args_info conf; + +struct user_info { + uint32_t uid; + struct osl_table *table; + uint64_t files; + uint64_t bytes; + uint64_t dirs; + struct osl_table_description *desc; +}; + +static struct user_info *uid_hash_table; /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */ #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y))) @@ -30,7 +47,7 @@ __printf_2_3 void __log(int ll, const char* fmt,...) time_t t1; char str[255] = ""; - if (ll < 4) + if (ll < conf.loglevel_arg) return; outfd = stderr; time(&t1); @@ -74,18 +91,42 @@ static int size_compare(const struct osl_object *obj1, const struct osl_object * * are taken into account. * * \return It returns an integer less than, equal to, or greater than zero if - * \a obj1 is found, respectively, to be less than, to match, or be greater than - * obj2. + * \a obj1 is found, respectively, to be less than, to match, or be greater + * than obj2. * * \sa strcmp(3), strncmp(3), osl_compare_func. */ -int string_compare(const struct osl_object *obj1, const struct osl_object *obj2) +static int string_compare(const struct osl_object *obj1, + const struct osl_object *obj2) { const char *str1 = (const char *)obj1->data; const char *str2 = (const char *)obj2->data; return strncmp(str1, str2, MIN(obj1->size, obj2->size)); } +/** + * Compare two osl objects pointing to unsigned integers of 64 bit size. + * + * \param obj1 Pointer to the first integer. + * \param obj2 Pointer to the second integer. + * + * \return The values required for an osl compare function. + * + * \sa osl_compare_func, osl_hash_compare(). + */ +static int uint64_compare(const struct osl_object *obj1, + const struct osl_object *obj2) +{ + uint64_t d1 = read_u64((const char *)obj1->data); + uint64_t d2 = read_u64((const char *)obj2->data); + + if (d1 < d2) + return 1; + if (d1 > d2) + return -1; + return 0; +} + /** The columns of the directory table. */ enum dir_table_columns { /** The name of the directory. */ @@ -111,8 +152,8 @@ static struct osl_column_description dir_table_cols[] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, .name = "num", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) + .compare_function = uint64_compare, + .data_size = sizeof(uint64_t) }, [DT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, @@ -135,59 +176,7 @@ static struct osl_table_description dir_table_desc = { .num_columns = NUM_DT_COLUMNS, .flags = 0, .column_descriptions = dir_table_cols, - .dir = "/tmp/adu" -}; - -/** The columns of the id table. */ -enum id_table_columns { - /** The user id. */ - IDT_UID, - /** The number of bytes of all regular files owned by this id. */ - IDT_BYTES, - /** The number of regular files owned by this id. */ - IDT_FILES, - /** The user table for this uid. */ - IDT_TABLE, - /** Number of columns in this table. */ - NUM_IDT_COLUMNS -}; - -static struct osl_column_description id_table_cols[] = { - [IDT_UID] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, - .name = "uid", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) - }, - [IDT_BYTES] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, - .compare_function = size_compare, - .name = "num_bytes", - .data_size = sizeof(uint64_t) - }, - [IDT_FILES] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, - .compare_function = size_compare, - .name = "num_filess", - .data_size = sizeof(uint64_t) - }, - [IDT_TABLE] = { - .storage_type = OSL_NO_STORAGE, - .storage_flags = OSL_FIXED_SIZE | OSL_UNIQUE, - .name = "user_table", - .data_size = sizeof(void *) - } -}; - -static struct osl_table_description id_table_desc = { - .name = "id_table", - .num_columns = NUM_IDT_COLUMNS, - .flags = 0, - .column_descriptions = id_table_cols, - .dir = "/tmp/adu" + .dir = DATABASE_DIR }; /** The columns of the id table. */ @@ -207,17 +196,17 @@ static struct osl_column_description user_table_cols[] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, .name = "dir_num", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) + .compare_function = uint64_compare, + .data_size = sizeof(uint64_t) }, - [IDT_BYTES] = { + [UT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, .compare_function = size_compare, .name = "num_bytes", .data_size = sizeof(uint64_t) }, - [IDT_FILES] = { + [UT_FILES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, .compare_function = size_compare, @@ -226,32 +215,14 @@ static struct osl_column_description user_table_cols[] = { }, }; -static struct osl_table_description user_table_desc = { - .num_columns = NUM_UT_COLUMNS, - .flags = 0, - .column_descriptions = user_table_cols, - .dir = "/tmp/adu" -}; static struct osl_table *dir_table; -static struct osl_table *id_table; - -static int create_tables(void) -{ - int ret = osl_create_table(&dir_table_desc); - if (ret < 0) - return ret; - ret = osl_create_table(&id_table_desc); - if (ret < 0) - return ret; - return 1; -} -int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size, +static int add_directory(char *dirname, uint64_t dir_num, uint64_t *dir_size, uint64_t *dir_files) { struct osl_object dir_objects[NUM_DT_COLUMNS]; - INFO_LOG("adding #%u: %s\n", dir_num, dirname); + INFO_LOG("adding #%llu: %s\n", (long long unsigned)dir_num, dirname); dir_objects[DT_NAME].data = dirname; dir_objects[DT_NAME].size = strlen(dirname) + 1; dir_objects[DT_NUM].data = &dir_num; @@ -264,78 +235,130 @@ int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size, return osl_add_row(dir_table, dir_objects); } -int create_and_open_user_table(uint32_t uid, struct osl_table **t) +static uint32_t num_uids; + +static int open_user_table(struct user_info *ui, int create) { int ret; - struct osl_table_description *desc = para_malloc(sizeof(*desc)); - - desc->num_columns = NUM_UT_COLUMNS; - desc->flags = 0; - desc->column_descriptions = user_table_cols; - desc->dir = para_strdup("/tmp/adu"); - desc->name = make_message("%u", uid); - INFO_LOG("................................. %u\n", uid); -// user_table_desc.name = make_message("%u", uid); - ret = osl_create_table(desc); + + ui->desc = para_malloc(sizeof(*ui->desc)); + ui->desc->num_columns = NUM_UT_COLUMNS; + ui->desc->flags = 0; + ui->desc->column_descriptions = user_table_cols; + ui->desc->dir = para_strdup(DATABASE_DIR); + ui->desc->name = make_message("%u", (unsigned)ui->uid); + num_uids++; + INFO_LOG(".............................uid #%u: %u\n", + (unsigned)num_uids, (unsigned)ui->uid); + if (create) { + ret = osl_create_table(ui->desc); + if (ret < 0) + goto err; + } + ret = osl_open_table(ui->desc, &ui->table); if (ret < 0) - return ret; - return osl_open_table(desc, t); + goto err; + return 1; +err: + free((char *)ui->desc->name); + free((char *)ui->desc->dir); + free(ui->desc); + ui->desc->name = NULL; + ui->desc->dir = NULL; + ui->desc = NULL; + ui->table = NULL; + return ret; } -static int insert_id_row(uint32_t uid, struct osl_table *t, struct osl_row **row) +#define uid_hash_bits 8 +static uint32_t uid_hash_table_size = 1 << uid_hash_bits; +#define PRIME1 0x811c9dc5 +#define PRIME2 0x01000193 + +static void create_hash_table(void) { - struct osl_object objects[NUM_IDT_COLUMNS]; - uint64_t num = 0; - - struct osl_table **table_ptr = para_malloc(sizeof(*table_ptr)); - *table_ptr = t; - - INFO_LOG("§§§§§§§§§§§§§§§§§§§§§ uid: %d, t: %p\n", uid, t); - objects[IDT_UID].data = &uid; - objects[IDT_UID].size = sizeof(uid); - objects[IDT_BYTES].data = # - objects[IDT_BYTES].size = sizeof(num); - objects[IDT_FILES].data = # - objects[IDT_FILES].size = sizeof(num); - objects[IDT_TABLE].data = table_ptr; - objects[IDT_TABLE].size = sizeof(*table_ptr); - return osl_add_and_get_row(id_table, objects, row); + uid_hash_table = para_calloc(uid_hash_table_size + * sizeof(struct user_info)); } -static int get_user_table(struct osl_row *row, struct osl_table **t) +static void free_hash_table(void) { - struct osl_object obj; + free(uid_hash_table); + uid_hash_table = NULL; +} + +static int create_tables(void) +{ + int ret; - int ret = osl_get_object(id_table, row, IDT_TABLE, &obj); + ret = osl_create_table(&dir_table_desc); if (ret < 0) return ret; - *t = *(struct osl_table **)obj.data; - INFO_LOG("^^^^^^^^^^^^^^^^^^ t: %p\n", *t); + create_hash_table(); return 1; } -static int add_id_bytes(struct osl_row *row, uint64_t *add) +/* + * We use a hash table of size s=2^uid_hash_bits to map the uids into the + * interval [0..s]. Hash collisions are treated by open addressing, i.e. + * unused slots in the table are used to store different uids that hash to the + * same slot. + * + * If a hash collision occurs, different slots are successively probed in order + * to find an unused slot for the new uid. Probing is implemented via a second + * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an + * odd number. + * + * An odd number is sufficient to make sure each entry of the hash table gets + * probed for probe_num between 0 and s-1 because s is a power of two, hence + * the second hash value never hash a common divisor with the hash table size. + * IOW: h is invertible in the ring [0..s]. + */ +static uint32_t double_hash(uint32_t uid, uint32_t probe_num) { - uint64_t num; - struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)}; + return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) + % uid_hash_table_size; +} - /* update number of bytes */ - int ret = osl_get_object(id_table, row, IDT_BYTES, &obj1); - if (ret < 0) - return ret; - num = *(uint64_t *)obj1.data + *add; - ret = osl_update_object(id_table, row, IDT_BYTES, &obj2); - if (ret < 0) - return ret; - /* increment number of files */ - ret = osl_get_object(id_table, row, IDT_FILES, &obj1); - if (ret < 0) - return ret; - num = *(uint64_t *)obj1.data + 1; - return osl_update_object(id_table, row, IDT_FILES, &obj2); +#define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \ + + uid_hash_table_size; ui++) + +enum search_uid_flags { + OPEN_USER_TABLE = 1, + CREATE_USER_TABLE = 2, +}; + +static int search_uid(uint32_t uid, enum search_uid_flags flags, + struct user_info **ui_ptr) +{ + uint32_t p; + + for (p = 0; p < uid_hash_table_size; p++) { + struct user_info *ui = uid_hash_table + double_hash(uid, p); + + if (!ui->table) { + int ret; + + if (!flags) + return -E_BAD_UID; + ui->uid = uid; + ret = open_user_table(ui, flags & CREATE_USER_TABLE); + if (ret < 0) + return ret; + if (ui_ptr) + *ui_ptr = ui; + return 1; + } + if (ui->uid != uid) + continue; + if (ui_ptr) + *ui_ptr = ui; + return 0; + } + return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID; } -static int update_user_row(struct osl_table *t, uint32_t dir_num, +static int update_user_row(struct osl_table *t, uint64_t dir_num, uint64_t *add) { struct osl_row *row; @@ -378,7 +401,9 @@ static int update_user_row(struct osl_table *t, uint32_t dir_num, } } -static uint32_t dir_num; +static uint64_t num_dirs; +static uint64_t num_files; +static uint64_t num_bytes; int scan_dir(char *dirname) { @@ -386,9 +411,9 @@ int scan_dir(char *dirname) struct dirent *entry; int ret, cwd_fd, ret2; uint64_t dir_size = 0, dir_files = 0; - struct osl_object obj; + uint64_t this_dir_num = num_dirs++; - INFO_LOG("----------------- %s\n", dirname); + DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname); ret = para_opendir(dirname, &dir, &cwd_fd); if (ret < 0) { if (ret != -ERRNO_TO_ERROR(EACCES)) @@ -402,15 +427,17 @@ int scan_dir(char *dirname) struct stat s; uint32_t uid; uint64_t size; - struct osl_row *id_row; - struct osl_table *user_table; + struct user_info *ui; if (!strcmp(entry->d_name, ".")) continue; if (!strcmp(entry->d_name, "..")) continue; - if (lstat(entry->d_name, &s) == -1) + if (lstat(entry->d_name, &s) == -1) { + WARNING_LOG("lstat error for %s/%s\n", dirname, + entry->d_name); continue; + } m = s.st_mode; if (!S_ISREG(m) && !S_ISDIR(m)) continue; @@ -425,36 +452,20 @@ int scan_dir(char *dirname) /* regular file */ size = s.st_size; dir_size += size; + num_bytes += size; dir_files++; + num_files++; uid = s.st_uid; - INFO_LOG("++++++++++++++++++++++++++ %s, uid: %u\n", entry->d_name, uid); - obj.data = &uid; - obj.size = sizeof(uid); - ret = osl_get_row(id_table, IDT_UID, &obj, &id_row); - if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND) - goto out; - if (ret < 0) { - ret = create_and_open_user_table(uid, &user_table); - if (ret < 0) - goto out; - ret = insert_id_row(uid, user_table, &id_row); - if (ret < 0) - goto out; - } else { - ret = get_user_table(id_row, &user_table); - if (ret < 0) - goto out; - } - ret = add_id_bytes(id_row, &size); + ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui); if (ret < 0) goto out; - INFO_LOG("user_table: %p\n", user_table); - ret = update_user_row(user_table, dir_num, &size); - INFO_LOG("update_user ret: %d\n", ret); + ui->bytes += size; + ui->files++; + ret = update_user_row(ui->table, this_dir_num, &size); if (ret < 0) goto out; } - ret = add_directory(dirname, dir_num++, &dir_size, &dir_files); + ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files); out: closedir(dir); ret2 = para_fchdir(cwd_fd); @@ -475,166 +486,361 @@ static int get_dir_name(struct osl_row *row, char **name) return 1; } -static int print_dirname_and_size(struct osl_row *row, void *data) +enum global_stats_flags { + GSF_PRINT_DIRNAME = 1, + GSF_PRINT_BYTES = 2, + GSF_PRINT_FILES = 4, + GSF_COMPUTE_SUMMARY = 8, +}; + +struct global_stats_info { + uint32_t count; + enum global_stats_flags flags; +}; + +static int global_stats_loop_function(struct osl_row *row, void *data) { - unsigned *count = data; + struct global_stats_info *gsi = data; struct osl_object obj; - char *name; - int ret; + char *dirname; + int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY; - if ((*count)++ > 100) + if (!gsi->count && !summary) return -E_LOOP_COMPLETE; - ret = get_dir_name(row, &name); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, row, DT_BYTES, &obj); - if (ret < 0) - return ret; - printf("%s\t%llu\n", name, *(long long unsigned *)obj.data); + if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) { + ret = get_dir_name(row, &dirname); + if (ret < 0) + return ret; + printf("%s%s", dirname, + (gsi->flags & (GSF_PRINT_FILES | GSF_PRINT_BYTES))? + "\t" : "\n" + ); + } + if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) { + uint64_t files; + ret = osl_get_object(dir_table, row, DT_FILES, &obj); + if (ret < 0) + return ret; + files = *(uint64_t *)obj.data; + if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) + printf("%llu%s", (long long unsigned)files, + (gsi->flags & GSF_PRINT_BYTES)? "\t" : "\n"); + if (summary) + num_files += files; + } + if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) { + uint64_t bytes; + ret = osl_get_object(dir_table, row, DT_BYTES, &obj); + if (ret < 0) + return ret; + bytes = *(uint64_t *)obj.data; + if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) + printf("%llu\n", (long long unsigned)bytes); + if (summary) { + num_bytes += bytes; + num_dirs++; + } + } + if (gsi->count) + gsi->count--; return 1; } -static int print_dirname_and_file_count(struct osl_row *row, void *data) +static void print_id_stats(void) +{ + struct user_info *ui; + + printf("--------------------- user summary (uid/dirs/files/bytes):\n"); + FOR_EACH_USER(ui) { + if (!ui->table) + continue; + printf("%u\t%llu\t%llu\t%llu\n", (unsigned)ui->uid, + (long long unsigned)ui->dirs, + (long long unsigned)ui->files, + (long long unsigned)ui->bytes); + } +} + +enum user_stats_flags { + USF_PRINT_DIRNAME = 1, + USF_PRINT_BYTES = 2, + USF_PRINT_FILES = 4, + USF_COMPUTE_SUMMARY = 8, +}; + +struct user_stats_info { + uint32_t count; + enum user_stats_flags flags; + struct user_info *ui; +}; + +static int user_stats_loop_function(struct osl_row *row, void *data) { - unsigned *count = data; + struct user_stats_info *usi = data; + struct osl_row *dir_row; struct osl_object obj; - char *name; - int ret; + int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY; - if ((*count)++ > 100) + if (!usi->count && !summary) return -E_LOOP_COMPLETE; - ret = get_dir_name(row, &name); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, row, DT_FILES, &obj); - if (ret < 0) - return ret; - printf("%s\t%llu\n", name, *(long long unsigned *)obj.data); + if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) { + char *dirname; + ret = osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj); + if (ret < 0) + return ret; + ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row); + if (ret < 0) + return ret; + ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj); + if (ret < 0) + return ret; + dirname = obj.data; + printf("%s%s", + dirname, + (usi->flags & (USF_PRINT_FILES | USF_PRINT_BYTES))? + "\t" : "\n" + ); + } + if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) { + uint64_t files; + ret = osl_get_object(usi->ui->table, row, UT_FILES, &obj); + if (ret < 0) + return ret; + files = *(uint64_t *)obj.data; + if (usi->count && (usi->flags & USF_PRINT_FILES)) + printf("%llu%s", + (long long unsigned)files, + (usi->flags & USF_PRINT_BYTES)? "\t" : "\n" + ); + if (summary) + usi->ui->files += files; + } + if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) { + uint64_t bytes; + ret = osl_get_object(usi->ui->table, row, UT_BYTES, &obj); + if (ret < 0) + return ret; + bytes = *(uint64_t *)obj.data; + if (usi->count && (usi->flags & USF_PRINT_BYTES)) + printf("%llu\n", (long long unsigned)bytes); + if (summary) { + usi->ui->bytes += bytes; + usi->ui->dirs++; + } + + } + if (usi->count) + usi->count--; return 1; } -static int print_id_stats(struct osl_row *row, __a_unused void *data) +static void print_user_stats(void) { - struct osl_object obj; - uint32_t uid; - uint64_t bytes, files; - int ret = osl_get_object(id_table, row, IDT_UID, &obj); + struct user_info *ui; + + FOR_EACH_USER(ui) { + struct user_stats_info usi = { + .count = 10, + .ui = ui + }; + if (!ui->table) + continue; + usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY; + printf("************************************************ uid %u\n", + (unsigned) ui->uid); + if (!ui->table) + continue; + printf("----------------- Largest dirs -------------------\n"); + osl_rbtree_loop_reverse(ui->table, UT_BYTES, &usi, + user_stats_loop_function); + printf("---------- dirs containing most files ------------\n"); + usi.count = 10; + usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES; + osl_rbtree_loop_reverse(ui->table, UT_FILES, &usi, + user_stats_loop_function); + } +} - if (ret < 0) - return ret; - uid = *(uint32_t *)obj.data; - ret = osl_get_object(id_table, row, IDT_BYTES, &obj); - if (ret < 0) +static int print_statistics(void) +{ + int ret; + struct global_stats_info gsi = { + .count = 10, + .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY + }; + + printf("----------------- Largest dirs -------------------\n"); + ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &gsi, + global_stats_loop_function); + if (ret < 0 && ret != -E_LOOP_COMPLETE) return ret; - bytes = *(uint64_t *)obj.data; - ret = osl_get_object(id_table, row, IDT_FILES, &obj); - if (ret < 0) + gsi.count = 10; + + gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES; + printf("---------- dirs containing most files ------------\n"); + ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &gsi, + global_stats_loop_function); + if (ret < 0 && ret != -E_LOOP_COMPLETE) return ret; - files = *(uint64_t *)obj.data; - printf("%u\t%llu\t%llu\n", (unsigned)uid, (long long unsigned)files, - (long long unsigned)bytes); + printf("------------------ Global summary (dirs/files/bytes)\n" + "%llu\t%llu\t%llu\n", + (long long unsigned)num_dirs, (long long unsigned)num_files, + (long long unsigned)num_bytes); + print_user_stats(); + print_id_stats(); return 1; } -struct id_dir_stat_info { - unsigned count; - struct osl_table *user_table; -}; +static int write_uid_list(void) +{ + char *buf; + uint32_t count = 0; + struct user_info *ui; + size_t size = num_uids * sizeof(uint32_t); + int ret; + + if (!num_uids) + return 0; + buf = para_malloc(size); + FOR_EACH_USER(ui) { + if (!ui->table) + continue; + write_u32(buf + count++ * sizeof(uint32_t), ui->uid); + } + ret = para_write_file(UID_LIST, buf, size); + free(buf); + return ret; +} -static int print_big_dir(struct osl_row *row, void *data) +static int open_dir_table(void) +{ + return osl_open_table(&dir_table_desc, &dir_table); +} + +static void close_dir_table(void) { - struct id_dir_stat_info *info = data; - info->count++; int ret; - struct osl_row *dir_row; - char *dirname; - uint64_t bytes; - struct osl_object obj; - if (info->count > 10) - return -E_LOOP_COMPLETE; - ret = osl_get_object(info->user_table, row, UT_BYTES, &obj); + if (!dir_table) + return; + ret = osl_close_table(dir_table, OSL_MARK_CLEAN); if (ret < 0) - return ret; - bytes = *(uint64_t *)obj.data; - ret = osl_get_object(info->user_table, row, UT_DIR_NUM, &obj); + ERROR_LOG("failed to close dir table: %s\n", error_txt(-ret)); + dir_table = NULL; +} + +static void close_user_table(struct user_info *ui) +{ + int ret; + + if (!ui || !ui->table) + return; + ret = osl_close_table(ui->table, OSL_MARK_CLEAN); if (ret < 0) - return ret; - ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row); + ERROR_LOG("failed to close user table %u: %s\n", + (unsigned) ui->uid, error_txt(-ret)); + free((char *)ui->desc->name); + ui->desc->name = NULL; + free((char *)ui->desc->dir); + ui->desc->dir = NULL; + free(ui->desc); + ui->desc = NULL; + ui->table = NULL; +} + +static void close_user_tables(void) +{ + struct user_info *ui; + + FOR_EACH_USER(ui) + close_user_table(ui); +} + +static void close_all_tables(void) +{ + close_dir_table(); + close_user_tables(); + free_hash_table(); +} + +static int com_create() +{ + int ret = create_tables(); if (ret < 0) return ret; - ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj); + ret = open_dir_table(); if (ret < 0) return ret; - dirname = obj.data; - printf("%s: %llu\n", dirname, (long long unsigned)bytes); - return 1; + ret = scan_dir(conf.base_dir_arg); + if (ret < 0) + goto out; + ret = write_uid_list(); +out: + close_all_tables(); + return ret; } -static int print_id_dir_stats(struct osl_row *row, __a_unused void *data) +static int read_uid_file(void) { - struct osl_object obj; - uint32_t uid; - int ret = osl_get_object(id_table, row, IDT_UID, &obj); - struct id_dir_stat_info info = {.count = 0}; + char *map; + size_t size; + int ret = mmap_full_file(UID_LIST, O_RDONLY, (void **)&map, &size, NULL); + uint32_t n; if (ret < 0) return ret; - uid = *(uint32_t *)obj.data; - - ret = osl_get_object(id_table, row, IDT_TABLE, &obj); - if (ret < 0) - return ret; - info.user_table = *(struct osl_table **)obj.data; - - printf("************************* Big dirs owned by uid %u\n", (unsigned) uid); - osl_rbtree_loop_reverse(info.user_table, IDT_BYTES, &info, print_big_dir); - return 1; + num_uids = size / 4; + /* hash table size should be a power of two and larger than the number of uids */ + uid_hash_table_size = 4; + while (uid_hash_table_size < num_uids) + uid_hash_table_size *= 2; + create_hash_table(); + for (n = 0; n < num_uids; n++) { + uint32_t uid = read_u32(map + n * sizeof(uid)); + ret = search_uid(uid, OPEN_USER_TABLE, NULL); + if (ret < 0) + goto out; + } +out: + para_munmap(map, size); + return ret; } -static int print_statistics(void) +static int com_select(void) { - unsigned count = 0; int ret; - printf("************************* Biggest dirs\n"); - ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size); - if (ret < 0 && ret != -E_LOOP_COMPLETE) - return ret; - count = 0; - printf("************************* dirs containing many files\n"); - ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &count, print_dirname_and_file_count); - if (ret < 0 && ret != -E_LOOP_COMPLETE) + ret = open_dir_table(); + if (ret < 0) return ret; - - printf("************************* dirs stats by owner\n"); - ret = osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_stats); + ret = read_uid_file(); if (ret < 0) return ret; - - return osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_dir_stats); + print_statistics(); + close_all_tables(); + return 1; } - int main(int argc, char **argv) { - int ret = create_tables(); - if (ret < 0) - goto out; - ret = osl_open_table(&dir_table_desc, &dir_table); - if (ret < 0) - goto out; - ret = osl_open_table(&id_table_desc, &id_table); - if (ret < 0) - goto out; + int ret; + struct cmdline_parser_params params = { + .override = 0, + .initialize = 1, + .check_required = 0, + .check_ambiguity = 0, + .print_errors = 1 + }; + + cmdline_parser_ext(argc, argv, &conf, ¶ms); /* aborts on errors */ ret = -E_SYNTAX; - if (argc != 2) - goto out; - ret = scan_dir(argv[1]); + if (conf.select_given) + ret = com_select(); + else + ret = com_create(); if (ret < 0) goto out; - print_statistics(); out: if (ret < 0) { ERROR_LOG("%s\n", error_txt(-ret)); @@ -642,4 +848,3 @@ out: } return EXIT_SUCCESS; } - diff --git a/adu.ggo b/adu.ggo new file mode 100644 index 0000000..ab6e025 --- /dev/null +++ b/adu.ggo @@ -0,0 +1,163 @@ +# Copyright (C) 2008 Andre Noll +# +# Licensed under the GPL v2. For licencing details see COPYING. + +package "adu" +version "0.0.1" +purpose "advanced disk usage + +adu creates a database containing disk usage statistics of a given +directory. It allows to query that database to quickly retrieve +usage patterns of subdirectories and/or files owned by a given user id. +" + +######################### +section "General options" +######################### + +option "config-file" c +#~~~~~~~~~~~~~~~~~~~~~ +"(default='~/.adurc')" +string typestr="filename" +optional +details=" + Options may be given at the command line or in the + configuration file. As usual, if an option is given both at + the command line and in the configuration file, the command + line option takes precedence. + +" + +option "database-dir" d +#~~~~~~~~~~~~~~~~~~~~~~ +"directory containing the osl tables" +string typestr="path" +required +details=" + Full path to the directory containing the osl tables. This + directory must exist. It must be writable for the user running + adu in --create mode and readable in --select mode. + +" +option "loglevel" l +#~~~~~~~~~~~~~~~~~~ +"Set loglevel (0-6)" +int typestr="level" +default="3" +optional +details=" + Log messages are always written to stderr while normal output + goes to stdout. Lower values mean more verbose logging. +" + +option "uid" u +#~~~~~~~~~~~~~ +"user id(s) to take into account" +string typestr="uid_spec" +optional +multiple +details=" + An uid specifier may be a single number, or a range of uids. + Example: + + --uid 42 # only consider uid 42 + --uid 42- # only consider uids greater or equal than 42 + --uid 23-42 # only consider uids between 23 and 42, inclusively. + + This option may be given multiple times. An uid is taken into + account if it satisfies at least one --uid option. +" + + +option "paths" p +#~~~~~~~~~~~~~~~ +"files to take into account" +string typestr="pattern" +optional +details=" + Shell wildcard pattern that must match a file in order to be + included in the database in --create mode or in the output + for --select mode. Only the part of the filename below the + base directory is matched against the pattern. The default + is to take all files into account. See fnmatch(3) for details. +" + +############### +section "Modes" +############### + +defgroup "mode" +#============== +groupdesc=" + adu may started in one of two possible modes, each of which + corresponds to a different command line option. Exactly one + of these options must be given. + +" +required + +groupoption "create" C +#~~~~~~~~~~~~~~~~~~~~~ +"Create a new database" +group="mode" +details=" + Traverse the given directory and track disk user on a per-user + basis. Results are stored in N + 1 osl tables where N is + the number of uids that own at least one regular file in + that directory. +" + +groupoption "select" S +#~~~~~~~~~~~~~~~~~~~~~ +"query a database previously created with --create" +group="mode" +details=" + This option prints statistics about matching subdirectories to + stdout. The output depends on the other options, see below. +" + +############################## +section "Options for --create" +############################## + +option "base-dir" b +#~~~~~~~~~~~~~~~~~~ +"directory to traverse" +string typestr="path" +dependon="create" +required +details=" + The base directory to be traversed recursively. A warning + message is printed for each subdirectory that could not be + read because of insufficient permission. These directories + will be ignored when computing statistics. +" + + +############################## +section "Options for --select" +############################## + +option "limit" L +#~~~~~~~~~~~~~~~ +"Limit output" +int typestr="num" +required +dependon="select" +details=" + Only print num lines of output. +" + +option "units" U +#~~~~~~~~~~~~~~~ +"select numerical output format" +string typestr="format" +optional +details=" + Print the number of files/directories and the sizes in + the given format. All sizes are output in these units: + (h)uman-readable, (b)ytes, (k)ilobytes, (m)egabytes, + (g)igabytes, (t)erabytes. Capitalise to use multiples + of 1000 (S.I.) instead of 1024. The default is \"h\", + i.e. human-readable. +" diff --git a/error.h b/error.h index 8603291..1067e7f 100644 --- a/error.h +++ b/error.h @@ -91,7 +91,9 @@ static inline char *error_txt(int num) _ERROR(EMPTY, "file empty") \ _ERROR(MMAP, "mmap error") \ _ERROR(SYNTAX, "syntax error") \ - _ERROR(LOOP_COMPLETE, "loop complete") + _ERROR(LOOP_COMPLETE, "loop complete") \ + _ERROR(HASH_TABLE_OVERFLOW, "hash table too small") \ + _ERROR(BAD_UID, "uid not found in hash table") /**