X-Git-Url: http://git.tuebingen.mpg.de/?p=adu.git;a=blobdiff_plain;f=adu.c;h=54af136ad789314b88392658077fa88c926d766f;hp=0e32f8564079b4d5eac0c9302f5e95a248d64062;hb=694171759afb90bfdc552676ff6d04697c93af00;hpb=a4906ffe6dce718374052de10b9def84433c84df diff --git a/adu.c b/adu.c index 0e32f85..54af136 100644 --- a/adu.c +++ b/adu.c @@ -2,18 +2,129 @@ #include /* readdir() */ #include "gcc-compat.h" -#include "osl.h" +#include "cmdline.h" #include "fd.h" -#include "hash.h" #include "string.h" #include "error.h" +#include "portable_io.h" DEFINE_ERRLIST; +int osl_errno; + +/** Command line and config file options. */ +static struct gengetopt_args_info conf; + +enum uid_info_flags { + /** whether this slot of the hash table is used. */ + UI_FL_SLOT_USED = 1, + /** whether this uid should be taken into account. */ + UI_FL_ADMISSIBLE = 2, +}; + +struct user_info { + uint32_t uid; + uint32_t flags; + struct osl_table *table; + uint64_t files; + uint64_t bytes; + uint64_t dirs; + struct osl_table_description *desc; +}; + +/** The decimal representation of an uint64_t never exceeds that size. */ +#define FORMATED_VALUE_SIZE 25 + +#define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \ + + uid_hash_table_size; ui++) + + +/** + * Contains info for each user that owns at least one regular file. + * + * Even users that are not taken into account because of the --uid + * option occupy a slot in this hash table. This allows to find out + * quicky whether a uid is admissible. And yes, this has to be fast. + */ +static struct user_info *uid_hash_table; + +/* these get filled in by the select command. */ +static char count_unit_buf[4] = "( )", size_unit_buf[4] = "( )"; + +static inline int ui_used(struct user_info *ui) +{ + return ui->flags & UI_FL_SLOT_USED; +} + +static inline int ui_admissible(struct user_info *ui) +{ + return ui->flags & UI_FL_ADMISSIBLE; +} + +struct uid_range { + uint32_t low; + uint32_t high; +}; + +static struct uid_range *admissible_uids; + +static inline int check_uid_arg(const char *arg, uint32_t *uid) +{ + const uint32_t max = ~0U; + /* + * we need an 64-bit int for string -> uid conversion because strtoll() + * returns a signed value. + */ + int64_t val; + int ret = para_atoi64(arg, &val); + + if (ret < 0) + return ret; + if (val < 0 || val > max) + return -ERRNO_TO_ERROR(EINVAL); + *uid = val; + return 1; +} + +static int parse_uid_range(const char *orig_arg, struct uid_range *ur) +{ + int ret; + char *arg = para_strdup(orig_arg), *p = strchr(arg, '-'); + + if (!p || p == arg) { /* -42 or 42 */ + ret = check_uid_arg(p? p + 1 : arg, &ur->high); + if (ret < 0) + goto out; + ur->low = p? 0 : ur->high; + ret = 1; + goto out; + } + /* 42- or 42-4711 */ + *p = '\0'; + p++; + ret = check_uid_arg(arg, &ur->low); + if (ret < 0) + goto out; + ur->high = ~0U; + if (*p) { /* 42-4711 */ + ret = check_uid_arg(p, &ur->high); + if (ret < 0) + goto out; + } + if (ur->low > ur->high) + ret = -ERRNO_TO_ERROR(EINVAL); +out: + if (ret < 0) + ERROR_LOG("bad uid option: %s\n", orig_arg); + else + INFO_LOG("admissible uid range: %u - %u\n", ur->low, + ur->high); + free(arg); + return ret; +} /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */ #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y))) - /** * The log function. * @@ -30,7 +141,7 @@ __printf_2_3 void __log(int ll, const char* fmt,...) time_t t1; char str[255] = ""; - if (ll < 4) + if (ll < conf.loglevel_arg) return; outfd = stderr; time(&t1); @@ -65,25 +176,26 @@ static int size_compare(const struct osl_object *obj1, const struct osl_object * } /** - * Compare two osl objects of string type. + * Compare two osl objects pointing to unsigned integers of 64 bit size. * - * \param obj1 Pointer to the first object. - * \param obj2 Pointer to the second object. - * - * In any case, only \p MIN(obj1->size, obj2->size) characters of each string - * are taken into account. + * \param obj1 Pointer to the first integer. + * \param obj2 Pointer to the second integer. * - * \return It returns an integer less than, equal to, or greater than zero if - * \a obj1 is found, respectively, to be less than, to match, or be greater than - * obj2. + * \return The values required for an osl compare function. * - * \sa strcmp(3), strncmp(3), osl_compare_func. + * \sa osl_compare_func, osl_hash_compare(). */ -int string_compare(const struct osl_object *obj1, const struct osl_object *obj2) +static int uint64_compare(const struct osl_object *obj1, + const struct osl_object *obj2) { - const char *str1 = (const char *)obj1->data; - const char *str2 = (const char *)obj2->data; - return strncmp(str1, str2, MIN(obj1->size, obj2->size)); + uint64_t d1 = read_u64((const char *)obj1->data); + uint64_t d2 = read_u64((const char *)obj2->data); + + if (d1 < d2) + return 1; + if (d1 > d2) + return -1; + return 0; } /** The columns of the directory table. */ @@ -92,6 +204,8 @@ enum dir_table_columns { DT_NAME, /** The dir count number. */ DT_NUM, + /** The number of the parent directory. */ + DT_PARENT_NUM, /** The number of bytes of all regular files. */ DT_BYTES, /** The number of all regular files. */ @@ -103,16 +217,22 @@ enum dir_table_columns { static struct osl_column_description dir_table_cols[] = { [DT_NAME] = { .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_UNIQUE, + .storage_flags = 0, .name = "dir", - .compare_function = string_compare, }, [DT_NUM] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, .name = "num", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) + .compare_function = uint64_compare, + .data_size = sizeof(uint64_t) + }, + [DT_PARENT_NUM] = { + .storage_type = OSL_MAPPED_STORAGE, + .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, + .name = "parent_num", + .compare_function = size_compare, + .data_size = sizeof(uint64_t) }, [DT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, @@ -135,59 +255,6 @@ static struct osl_table_description dir_table_desc = { .num_columns = NUM_DT_COLUMNS, .flags = 0, .column_descriptions = dir_table_cols, - .dir = "/tmp/adu" -}; - -/** The columns of the id table. */ -enum id_table_columns { - /** The user id. */ - IDT_UID, - /** The number of bytes of all regular files owned by this id. */ - IDT_BYTES, - /** The number of regular files owned by this id. */ - IDT_FILES, - /** The user table for this uid. */ - IDT_TABLE, - /** Number of columns in this table. */ - NUM_IDT_COLUMNS -}; - -static struct osl_column_description id_table_cols[] = { - [IDT_UID] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, - .name = "uid", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) - }, - [IDT_BYTES] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, - .compare_function = size_compare, - .name = "num_bytes", - .data_size = sizeof(uint64_t) - }, - [IDT_FILES] = { - .storage_type = OSL_MAPPED_STORAGE, - .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, - .compare_function = size_compare, - .name = "num_filess", - .data_size = sizeof(uint64_t) - }, - [IDT_TABLE] = { - .storage_type = OSL_NO_STORAGE, - .storage_flags = OSL_FIXED_SIZE | OSL_UNIQUE, - .name = "user_table", - .data_size = sizeof(void *) - } -}; - -static struct osl_table_description id_table_desc = { - .name = "id_table", - .num_columns = NUM_IDT_COLUMNS, - .flags = 0, - .column_descriptions = id_table_cols, - .dir = "/tmp/adu" }; /** The columns of the id table. */ @@ -207,17 +274,17 @@ static struct osl_column_description user_table_cols[] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE, .name = "dir_num", - .compare_function = uint32_compare, - .data_size = sizeof(uint32_t) + .compare_function = uint64_compare, + .data_size = sizeof(uint64_t) }, - [IDT_BYTES] = { + [UT_BYTES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, .compare_function = size_compare, .name = "num_bytes", .data_size = sizeof(uint64_t) }, - [IDT_FILES] = { + [UT_FILES] = { .storage_type = OSL_MAPPED_STORAGE, .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE, .compare_function = size_compare, @@ -226,124 +293,251 @@ static struct osl_column_description user_table_cols[] = { }, }; -static struct osl_table_description user_table_desc = { - .num_columns = NUM_UT_COLUMNS, - .flags = 0, - .column_descriptions = user_table_cols, - .dir = "/tmp/adu" -}; static struct osl_table *dir_table; -static struct osl_table *id_table; -static int create_tables(void) -{ - int ret = osl_create_table(&dir_table_desc); - if (ret < 0) - return ret; - ret = osl_create_table(&id_table_desc); - if (ret < 0) - return ret; - return 1; -} - -int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size, - uint64_t *dir_files) +static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num, + uint64_t *dir_size, uint64_t *dir_files) { struct osl_object dir_objects[NUM_DT_COLUMNS]; - INFO_LOG("adding #%u: %s\n", dir_num, dirname); + INFO_LOG("adding #%llu: %s\n", (long long unsigned)*dir_num, dirname); dir_objects[DT_NAME].data = dirname; dir_objects[DT_NAME].size = strlen(dirname) + 1; - dir_objects[DT_NUM].data = &dir_num; - dir_objects[DT_NUM].size = sizeof(dir_num); + dir_objects[DT_NUM].data = dir_num; + dir_objects[DT_NUM].size = sizeof(*dir_num); + dir_objects[DT_PARENT_NUM].data = parent_dir_num; + dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num); dir_objects[DT_BYTES].data = dir_size; dir_objects[DT_BYTES].size = sizeof(*dir_size); dir_objects[DT_FILES].data = dir_files; dir_objects[DT_FILES].size = sizeof(*dir_files); - - return osl_add_row(dir_table, dir_objects); + return osl(osl_add_row(dir_table, dir_objects)); } -int create_and_open_user_table(uint32_t uid, struct osl_table **t) +static uint32_t num_uids; + +static int open_user_table(struct user_info *ui, int create) { int ret; - struct osl_table_description *desc = para_malloc(sizeof(*desc)); - - desc->num_columns = NUM_UT_COLUMNS; - desc->flags = 0; - desc->column_descriptions = user_table_cols; - desc->dir = para_strdup("/tmp/adu"); - desc->name = make_message("%u", uid); - INFO_LOG("................................. %u\n", uid); -// user_table_desc.name = make_message("%u", uid); - ret = osl_create_table(desc); + + ui->desc = para_malloc(sizeof(*ui->desc)); + ui->desc->num_columns = NUM_UT_COLUMNS; + ui->desc->flags = 0; + ui->desc->column_descriptions = user_table_cols; + ui->desc->dir = para_strdup(conf.database_dir_arg); + ui->desc->name = make_message("%u", (unsigned)ui->uid); + INFO_LOG(".............................uid #%u: %u\n", + (unsigned)num_uids, (unsigned)ui->uid); + if (create) { + ret = osl(osl_create_table(ui->desc)); + if (ret < 0) + goto err; + num_uids++; + } + ret = osl(osl_open_table(ui->desc, &ui->table)); if (ret < 0) - return ret; - return osl_open_table(desc, t); + goto err; + return 1; +err: + free((char *)ui->desc->name); + free((char *)ui->desc->dir); + free(ui->desc); + ui->desc->name = NULL; + ui->desc->dir = NULL; + ui->desc = NULL; + ui->table = NULL; + ui->flags = 0; + return ret; } -static int insert_id_row(uint32_t uid, struct osl_table *t, struct osl_row **row) -{ - struct osl_object objects[NUM_IDT_COLUMNS]; - uint64_t num = 0; +#define uid_hash_bits 8 +static uint32_t uid_hash_table_size = 1 << uid_hash_bits; +#define PRIME1 0x811c9dc5 +#define PRIME2 0x01000193 - struct osl_table **table_ptr = para_malloc(sizeof(*table_ptr)); - *table_ptr = t; +static void create_hash_table(void) +{ + uid_hash_table = para_calloc(uid_hash_table_size + * sizeof(struct user_info)); +} - INFO_LOG("§§§§§§§§§§§§§§§§§§§§§ uid: %d, t: %p\n", uid, t); - objects[IDT_UID].data = &uid; - objects[IDT_UID].size = sizeof(uid); - objects[IDT_BYTES].data = # - objects[IDT_BYTES].size = sizeof(num); - objects[IDT_FILES].data = # - objects[IDT_FILES].size = sizeof(num); - objects[IDT_TABLE].data = table_ptr; - objects[IDT_TABLE].size = sizeof(*table_ptr); - return osl_add_and_get_row(id_table, objects, row); +static void free_hash_table(void) +{ + free(uid_hash_table); + uid_hash_table = NULL; } -static int get_user_table(struct osl_row *row, struct osl_table **t) +static int create_tables(void) { - struct osl_object obj; + int ret; - int ret = osl_get_object(id_table, row, IDT_TABLE, &obj); + dir_table_desc.dir = para_strdup(conf.database_dir_arg); + ret = osl(osl_create_table(&dir_table_desc)); if (ret < 0) return ret; - *t = *(struct osl_table **)obj.data; - INFO_LOG("^^^^^^^^^^^^^^^^^^ t: %p\n", *t); + create_hash_table(); return 1; } -static int add_id_bytes(struct osl_row *row, uint64_t *add) +static void close_dir_table(void) { - uint64_t num; - struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)}; + int ret; - /* update number of bytes */ - int ret = osl_get_object(id_table, row, IDT_BYTES, &obj1); - if (ret < 0) - return ret; - num = *(uint64_t *)obj1.data + *add; - ret = osl_update_object(id_table, row, IDT_BYTES, &obj2); + if (!dir_table) + return; + ret = osl(osl_close_table(dir_table, OSL_MARK_CLEAN)); if (ret < 0) - return ret; - /* increment number of files */ - ret = osl_get_object(id_table, row, IDT_FILES, &obj1); + ERROR_LOG("failed to close dir table: %s\n", adu_strerror(-ret)); + free((char *)dir_table_desc.dir); + dir_table = NULL; +} + +static void close_user_table(struct user_info *ui) +{ + int ret; + + if (!ui || !ui_used(ui) || !ui_admissible(ui)) + return; + ret = osl(osl_close_table(ui->table, OSL_MARK_CLEAN)); if (ret < 0) - return ret; - num = *(uint64_t *)obj1.data + 1; - return osl_update_object(id_table, row, IDT_FILES, &obj2); + ERROR_LOG("failed to close user table %u: %s\n", + (unsigned) ui->uid, adu_strerror(-ret)); + free((char *)ui->desc->name); + ui->desc->name = NULL; + free((char *)ui->desc->dir); + ui->desc->dir = NULL; + free(ui->desc); + ui->desc = NULL; + ui->table = NULL; + ui->flags = 0; +} + +static void close_user_tables(void) +{ + struct user_info *ui; + + FOR_EACH_USER(ui) + close_user_table(ui); } -static int update_user_row(struct osl_table *t, uint32_t dir_num, +static void close_all_tables(void) +{ + close_dir_table(); + close_user_tables(); + free_hash_table(); +} + +static int signum; + +static void signal_handler(int s) +{ + signum = s; +} + +static void check_signals(void) +{ + if (likely(!signum)) + return; + EMERG_LOG("caught signal %d\n", signum); + close_all_tables(); + exit(EXIT_FAILURE); +} + +static int init_signals(void) +{ + if (signal(SIGINT, &signal_handler) == SIG_ERR) + return -E_SIGNAL_SIG_ERR; + if (signal(SIGTERM, &signal_handler) == SIG_ERR) + return -E_SIGNAL_SIG_ERR; + return 1; +} + +/* + * We use a hash table of size s=2^uid_hash_bits to map the uids into the + * interval [0..s]. Hash collisions are treated by open addressing, i.e. + * unused slots in the table are used to store different uids that hash to the + * same slot. + * + * If a hash collision occurs, different slots are successively probed in order + * to find an unused slot for the new uid. Probing is implemented via a second + * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an + * odd number. + * + * An odd number is sufficient to make sure each entry of the hash table gets + * probed for probe_num between 0 and s-1 because s is a power of two, hence + * the second hash value has never a common divisor with the hash table size. + * IOW: h is invertible in the ring [0..s]. + */ +static uint32_t double_hash(uint32_t uid, uint32_t probe_num) +{ + return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) + % uid_hash_table_size; +} + +enum search_uid_flags { + OPEN_USER_TABLE = 1, + CREATE_USER_TABLE = 2, +}; + +static int uid_is_admissible(uint32_t uid) +{ + int i; + + for (i = 0; i < conf.uid_given; i++) { + struct uid_range *ur = admissible_uids + i; + + if (ur->low <= uid && ur->high >= uid) + break; + } + i = !conf.uid_given || i < conf.uid_given; + DEBUG_LOG("uid %u is %sadmissible\n", (unsigned)uid, + i? "" : "not "); + return i; +} + +static int search_uid(uint32_t uid, enum search_uid_flags flags, + struct user_info **ui_ptr) +{ + uint32_t p; + + for (p = 0; p < uid_hash_table_size; p++) { + struct user_info *ui = uid_hash_table + double_hash(uid, p); + + if (!ui_used(ui)) { + int ret; + if (!flags) + return -E_BAD_UID; + ui->uid = uid; + ui->flags |= UI_FL_SLOT_USED; + if (!uid_is_admissible(uid)) + return 0; + ui->flags |= UI_FL_ADMISSIBLE; + ret = open_user_table(ui, flags & CREATE_USER_TABLE); + if (ret < 0) + return ret; + + if (ui_ptr) + *ui_ptr = ui; + return 1; + } + if (ui->uid != uid) + continue; + if (ui_ptr) + *ui_ptr = ui; + return 0; + } + return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID; +} + +static int update_user_row(struct osl_table *t, uint64_t dir_num, uint64_t *add) { struct osl_row *row; struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)}; - int ret = osl_get_row(t, UT_DIR_NUM, &obj, &row); + int ret = osl(osl_get_row(t, UT_DIR_NUM, &obj, &row)); - if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND) + if (ret == -E_OSL && osl_errno != E_OSL_RB_KEY_NOT_FOUND) return ret; if (ret < 0) { /* this is the first file we add */ struct osl_object objects[NUM_UT_COLUMNS]; @@ -356,39 +550,42 @@ static int update_user_row(struct osl_table *t, uint32_t dir_num, objects[UT_FILES].data = &num_files; objects[UT_FILES].size = sizeof(num_files); INFO_LOG("######################### ret: %d\n", ret); - ret = osl_add_row(t, objects); + ret = osl(osl_add_row(t, objects)); INFO_LOG("######################### ret: %d\n", ret); return ret; } else { /* add size and increment file count */ uint64_t num; struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)}; - ret = osl_get_object(t, row, UT_BYTES, &obj1); + ret = osl(osl_get_object(t, row, UT_BYTES, &obj1)); if (ret < 0) return ret; num = *(uint64_t *)obj1.data + *add; - ret = osl_update_object(t, row, UT_BYTES, &obj2); + ret = osl(osl_update_object(t, row, UT_BYTES, &obj2)); if (ret < 0) return ret; - ret = osl_get_object(t, row, UT_FILES, &obj1); + ret = osl(osl_get_object(t, row, UT_FILES, &obj1)); if (ret < 0) return ret; num = *(uint64_t *)obj1.data + 1; - return osl_update_object(t, row, UT_FILES, &obj2); + return osl(osl_update_object(t, row, UT_FILES, &obj2)); } } -static uint32_t dir_num; +static uint64_t num_dirs; +static uint64_t num_files; +static uint64_t num_bytes; -int scan_dir(char *dirname) +int scan_dir(char *dirname, uint64_t *parent_dir_num) { DIR *dir; struct dirent *entry; int ret, cwd_fd, ret2; uint64_t dir_size = 0, dir_files = 0; - struct osl_object obj; + uint64_t this_dir_num = ++num_dirs; - INFO_LOG("----------------- %s\n", dirname); + check_signals(); + DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname); ret = para_opendir(dirname, &dir, &cwd_fd); if (ret < 0) { if (ret != -ERRNO_TO_ERROR(EACCES)) @@ -398,26 +595,25 @@ int scan_dir(char *dirname) } while ((entry = readdir(dir))) { mode_t m; - char *tmp; struct stat s; uint32_t uid; uint64_t size; - struct osl_row *id_row; - struct osl_table *user_table; + struct user_info *ui; if (!strcmp(entry->d_name, ".")) continue; if (!strcmp(entry->d_name, "..")) continue; - if (lstat(entry->d_name, &s) == -1) + if (lstat(entry->d_name, &s) == -1) { + WARNING_LOG("lstat error for %s/%s\n", dirname, + entry->d_name); continue; + } m = s.st_mode; if (!S_ISREG(m) && !S_ISDIR(m)) continue; - tmp = make_message("%s/%s", dirname, entry->d_name); if (S_ISDIR(m)) { - ret = scan_dir(tmp); - free(tmp); + ret = scan_dir(entry->d_name, &this_dir_num); if (ret < 0) goto out; continue; @@ -425,36 +621,21 @@ int scan_dir(char *dirname) /* regular file */ size = s.st_size; dir_size += size; + num_bytes += size; dir_files++; + num_files++; uid = s.st_uid; - INFO_LOG("++++++++++++++++++++++++++ %s, uid: %u\n", entry->d_name, uid); - obj.data = &uid; - obj.size = sizeof(uid); - ret = osl_get_row(id_table, IDT_UID, &obj, &id_row); - if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND) - goto out; - if (ret < 0) { - ret = create_and_open_user_table(uid, &user_table); - if (ret < 0) - goto out; - ret = insert_id_row(uid, user_table, &id_row); - if (ret < 0) - goto out; - } else { - ret = get_user_table(id_row, &user_table); - if (ret < 0) - goto out; - } - ret = add_id_bytes(id_row, &size); + ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui); if (ret < 0) goto out; - INFO_LOG("user_table: %p\n", user_table); - ret = update_user_row(user_table, dir_num, &size); - INFO_LOG("update_user ret: %d\n", ret); + ui->bytes += size; + ui->files++; + ret = update_user_row(ui->table, this_dir_num, &size); if (ret < 0) goto out; } - ret = add_directory(dirname, dir_num++, &dir_size, &dir_files); + ret = add_directory(dirname, &this_dir_num, parent_dir_num, + &dir_size, &dir_files); out: closedir(dir); ret2 = para_fchdir(cwd_fd); @@ -464,182 +645,611 @@ out: return ret; } -static int get_dir_name(struct osl_row *row, char **name) +static int get_dir_name_by_number(uint64_t *dirnum, char **name) { - struct osl_object obj; - int ret = osl_get_object(dir_table, row, DT_NAME, &obj); + char *result = NULL, *tmp; + struct osl_row *row; + uint64_t val = *dirnum; + struct osl_object obj = {.data = &val, .size = sizeof(val)}; + int ret; +again: + ret = osl(osl_get_row(dir_table, DT_NUM, &obj, &row)); if (ret < 0) - return ret; - *name = obj.data; - return 1; + goto out; + ret = osl(osl_get_object(dir_table, row, DT_NAME, &obj)); + if (ret < 0) + goto out; + if (result) { + tmp = make_message("%s/%s", (char *)obj.data, result); + free(result); + result = tmp; + } else + result = para_strdup((char *)obj.data); + ret = osl(osl_get_object(dir_table, row, DT_PARENT_NUM, &obj)); + if (ret < 0) + goto out; + val = *(uint64_t *)obj.data; + if (val) + goto again; +out: + if (ret < 0) { + free(result); + *name = NULL; + } else + *name = result; + return ret; } -static int print_dirname_and_size(struct osl_row *row, void *data) +static int get_dir_name_of_row(struct osl_row *dir_table_row, char **name) { - unsigned *count = data; struct osl_object obj; - char *name; int ret; + char *this_dir, *prefix = NULL; - if ((*count)++ > 100) - return -E_LOOP_COMPLETE; - ret = get_dir_name(row, &name); + *name = NULL; + ret = osl(osl_get_object(dir_table, dir_table_row, DT_NAME, &obj)); if (ret < 0) return ret; - ret = osl_get_object(dir_table, row, DT_BYTES, &obj); + this_dir = para_strdup((char *)obj.data); + ret = osl(osl_get_object(dir_table, dir_table_row, DT_PARENT_NUM, &obj)); if (ret < 0) - return ret; - printf("%s\t%llu\n", name, *(long long unsigned *)obj.data); - return 1; + goto out; + if (!*(uint64_t *)obj.data) { + *name = this_dir; + return 1; + } + ret = get_dir_name_by_number((uint64_t *)obj.data, &prefix); + if (ret < 0) + goto out; + *name = make_message("%s/%s", prefix, this_dir); + free(prefix); + ret = 1; +out: + free(this_dir); + return ret; } -static int print_dirname_and_file_count(struct osl_row *row, void *data) +const uint64_t size_unit_divisors[] = { + [size_unit_arg_b] = 1ULL, + [size_unit_arg_k] = 1024ULL, + [size_unit_arg_m] = 1024ULL * 1024ULL, + [size_unit_arg_g] = 1024ULL * 1024ULL * 1024ULL, + [size_unit_arg_t] = 1024ULL * 1024ULL * 1024ULL * 1024ULL, +}; + +const uint64_t count_unit_divisors[] = { + + [count_unit_arg_n] = 1ULL, + [count_unit_arg_k] = 1000ULL, + [count_unit_arg_m] = 1000ULL * 1000ULL, + [count_unit_arg_g] = 1000ULL * 1000ULL * 1000ULL, + [count_unit_arg_t] = 1000ULL * 1000ULL * 1000ULL * 1000ULL, +}; + +const char size_unit_abbrevs[] = " BKMGT"; +const char count_unit_abbrevs[] = " kmgt"; + +static enum enum_size_unit format_size_value(enum enum_size_unit unit, + uint64_t value, int print_unit, char *result) { - unsigned *count = data; - struct osl_object obj; - char *name; + enum enum_size_unit u = unit; + char unit_buf[2] = "\0\0"; + + if (unit == size_unit_arg_h) /* human readable */ + for (u = size_unit_arg_b; u < size_unit_arg_t && + value > size_unit_divisors[u + 1]; u++) + ; /* nothing */ + if (print_unit) + unit_buf[0] = size_unit_abbrevs[u]; + sprintf(result, "%llu%s", + (long long unsigned)value / size_unit_divisors[u], unit_buf); + return u; +} + +static enum enum_count_unit format_count_value(enum enum_count_unit unit, + uint64_t value, int print_unit, char *result) +{ + enum enum_count_unit u = unit; + char unit_buf[2] = "\0\0"; + + if (unit == count_unit_arg_h) /* human readable */ + for (u = count_unit_arg_n; u < count_unit_arg_t && + value > count_unit_divisors[u + 1]; u++) + ; /* nothing */ + if (print_unit) + unit_buf[0] = count_unit_abbrevs[u]; + sprintf(result, "%llu%s", + (long long unsigned)value / count_unit_divisors[u], unit_buf); + return u; +} + +enum global_stats_flags { + GSF_PRINT_DIRNAME = 1, + GSF_PRINT_BYTES = 2, + GSF_PRINT_FILES = 4, + GSF_COMPUTE_SUMMARY = 8, +}; + +struct global_stats_info { + uint32_t count; int ret; + int osl_errno; + enum global_stats_flags flags; +}; - if ((*count)++ > 100) - return -E_LOOP_COMPLETE; - ret = get_dir_name(row, &name); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, row, DT_FILES, &obj); - if (ret < 0) - return ret; - printf("%s\t%llu\n", name, *(long long unsigned *)obj.data); +static int global_stats_loop_function(struct osl_row *row, void *data) +{ + struct global_stats_info *gsi = data; + struct osl_object obj; + char *dirname, formated_value[FORMATED_VALUE_SIZE]; + int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY; + + check_signals(); + if (!gsi->count && !summary) { + ret = -E_LOOP_COMPLETE; + goto err; + } + if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) { + uint64_t files; + ret = osl(osl_get_object(dir_table, row, DT_FILES, &obj)); + if (ret < 0) + goto err; + files = *(uint64_t *)obj.data; + if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) { + format_count_value(conf.count_unit_arg, files, + conf.count_unit_arg == count_unit_arg_h, + formated_value); + printf("\t%s%s", formated_value, + (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_DIRNAME))? + "\t" : "\n"); + } + if (summary) + num_files += files; + } + if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) { + uint64_t bytes; + ret = osl(osl_get_object(dir_table, row, DT_BYTES, &obj)); + if (ret < 0) + goto err; + bytes = *(uint64_t *)obj.data; + if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) { + format_size_value(conf.size_unit_arg, bytes, + conf.size_unit_arg == size_unit_arg_h, + formated_value); + printf("%s%s%s", + (gsi->flags & GSF_PRINT_FILES)? "" : "\t", + formated_value, + (gsi->flags & GSF_PRINT_DIRNAME)? "\t" : "\n" + ); + } + if (summary) { + num_bytes += bytes; + num_dirs++; + } + } + if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) { + ret = get_dir_name_of_row(row, &dirname); + if (ret < 0) + goto err; + printf("%s%s\n", + (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_FILES))? "" : "\t", + dirname); + free(dirname); + } + if (gsi->count > 0) + gsi->count--; return 1; +err: + gsi->ret = ret; + gsi->osl_errno = (ret == -E_OSL)? osl_errno : 0; + return -1; } -static int print_id_stats(struct osl_row *row, __a_unused void *data) +static void print_id_stats(void) { + struct user_info *ui; + + printf("User summary " + "(uid/dirs%s/files%s/size%s):\n", + count_unit_buf, count_unit_buf, size_unit_buf); + FOR_EACH_USER(ui) { + char formated_dir_count[FORMATED_VALUE_SIZE], + formated_file_count[FORMATED_VALUE_SIZE], + formated_bytes[FORMATED_VALUE_SIZE ]; + if (!ui_used(ui) || !ui_admissible(ui)) + continue; + format_count_value(conf.count_unit_arg, ui->dirs, + conf.count_unit_arg == count_unit_arg_h, + formated_dir_count); + format_count_value(conf.count_unit_arg, ui->files, + conf.count_unit_arg == count_unit_arg_h, + formated_file_count); + format_size_value(conf.size_unit_arg, ui->bytes, + conf.size_unit_arg == size_unit_arg_h, + formated_bytes); + printf("\t%u\t%s\t%s\t%s\n", (unsigned)ui->uid, + formated_dir_count, + formated_file_count, + formated_bytes + ); + } +} + +enum user_stats_flags { + USF_PRINT_DIRNAME = 1, + USF_PRINT_BYTES = 2, + USF_PRINT_FILES = 4, + USF_COMPUTE_SUMMARY = 8, +}; + +struct user_stats_info { + uint32_t count; + enum user_stats_flags flags; + int ret; + int osl_errno; + struct user_info *ui; +}; + +static int user_stats_loop_function(struct osl_row *row, void *data) +{ + struct user_stats_info *usi = data; struct osl_object obj; - uint32_t uid; - uint64_t bytes, files; - int ret = osl_get_object(id_table, row, IDT_UID, &obj); + int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY; + char formated_value[FORMATED_VALUE_SIZE]; - if (ret < 0) - return ret; - uid = *(uint32_t *)obj.data; - ret = osl_get_object(id_table, row, IDT_BYTES, &obj); - if (ret < 0) + check_signals(); + if (!usi->count && !summary) { + ret = -E_LOOP_COMPLETE; + goto err; + } + if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) { + uint64_t files; + ret = osl(osl_get_object(usi->ui->table, row, UT_FILES, &obj)); + if (ret < 0) + goto err; + files = *(uint64_t *)obj.data; + if (usi->count && (usi->flags & USF_PRINT_FILES)) { + format_count_value(conf.count_unit_arg, files, + conf.count_unit_arg == count_unit_arg_h, + formated_value); + printf("\t%s%s", formated_value, + (usi->flags & (USF_PRINT_BYTES | USF_PRINT_DIRNAME))? + "\t" : "\n" + ); + } + if (summary) + usi->ui->files += files; + } + if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) { + uint64_t bytes; + ret = osl(osl_get_object(usi->ui->table, row, UT_BYTES, &obj)); + if (ret < 0) + goto err; + bytes = *(uint64_t *)obj.data; + if (usi->count && (usi->flags & USF_PRINT_BYTES)) { + format_size_value(conf.size_unit_arg, bytes, + conf.size_unit_arg == size_unit_arg_h, + formated_value); + printf("%s%s%s", + (usi->flags & USF_PRINT_FILES)? "" : "\t", + formated_value, + usi->flags & USF_PRINT_DIRNAME? "\t" : "\n" + ); + } + if (summary) { + usi->ui->bytes += bytes; + usi->ui->dirs++; + } + + } + if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) { + char *dirname; + ret = osl(osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj)); + if (ret < 0) + goto err; + ret = get_dir_name_by_number((uint64_t *)obj.data, &dirname); + if (ret < 0) + goto err; + printf("%s%s\n", + (usi->flags & (USF_PRINT_BYTES | USF_PRINT_FILES))? "" : "\t", + dirname); + free(dirname); + } + if (usi->count > 0) + usi->count--; + return 1; +err: + usi->ret = ret; + usi->osl_errno = (ret == -E_OSL)? osl_errno : 0; + return -1; +} + +static int check_loop_return(int ret, int loop_ret, int loop_osl_errno) +{ + if (ret >= 0) return ret; - bytes = *(uint64_t *)obj.data; - ret = osl_get_object(id_table, row, IDT_FILES, &obj); - if (ret < 0) + assert(ret == -E_OSL); + if (osl_errno != E_OSL_LOOP) + /* error not caused by loop function returning negative. */ return ret; - files = *(uint64_t *)obj.data; + assert(loop_ret < 0); + if (loop_ret == -E_LOOP_COMPLETE) /* no error */ + return 1; + if (loop_ret == -E_OSL) { /* osl error in loop function */ + assert(loop_osl_errno); + osl_errno = loop_osl_errno; + } + return loop_ret; +} + +static int adu_loop_reverse(struct osl_table *t, unsigned col_num, void *private_data, + osl_rbtree_loop_func *func, int *loop_ret, int *loop_osl_errno) +{ + int ret = osl(osl_rbtree_loop_reverse(t, col_num, private_data, func)); + return check_loop_return(ret, *loop_ret, *loop_osl_errno); +} + +static int print_user_stats(void) +{ + struct user_info *ui; + int ret; - printf("%u\t%llu%llu\n", (unsigned)uid, (long long unsigned)files, - (long long unsigned)bytes); + FOR_EACH_USER(ui) { + struct user_stats_info usi = { + .count = conf.limit_arg, + .ui = ui + }; + if (!ui_used(ui) || !ui_admissible(ui)) + continue; + usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY; + printf("uid %u, by size%s:\n", + (unsigned) ui->uid, size_unit_buf); + ret = adu_loop_reverse(ui->table, UT_BYTES, &usi, user_stats_loop_function, + &usi.ret, &usi.osl_errno); + if (ret < 0) + return ret; + printf("\nuid %u, by file count%s:\n", + (unsigned) ui->uid, count_unit_buf); + usi.count = conf.limit_arg, + usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES; + ret = adu_loop_reverse(ui->table, UT_FILES, &usi, user_stats_loop_function, + &usi.ret, &usi.osl_errno); + if (ret < 0) + return ret; + printf("\n"); + } return 1; } -struct id_dir_stat_info { - unsigned count; - struct osl_table *user_table; -}; +static void print_global_summary(void) +{ + char d[FORMATED_VALUE_SIZE], f[FORMATED_VALUE_SIZE], + s[FORMATED_VALUE_SIZE]; + enum enum_count_unit ud, uf; + enum enum_size_unit us; + + ud = format_count_value(conf.count_unit_arg, num_dirs, 0, d); + uf = format_count_value(conf.count_unit_arg, num_files, 0, f); + us = format_size_value(conf.size_unit_arg, num_bytes, 0, s); + + printf("Global summary " + "(dirs(%c)/files(%c)/size(%c))\n" + "\t%s\t%s\t%s\n\n", + count_unit_abbrevs[ud], + count_unit_abbrevs[uf], + size_unit_abbrevs[us], + d, f, s + ); + +} -static int print_big_dir(struct osl_row *row, void *data) +static int print_statistics(void) { - struct id_dir_stat_info *info = data; - info->count++; int ret; - struct osl_row *dir_row; - char *dirname; - uint64_t bytes; - struct osl_object obj; + struct global_stats_info gsi = { + .count = conf.limit_arg, + .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY + }; - if (info->count > 10) - return -E_LOOP_COMPLETE; - ret = osl_get_object(info->user_table, row, UT_BYTES, &obj); - if (ret < 0) - return ret; - bytes = *(uint64_t *)obj.data; - ret = osl_get_object(info->user_table, row, UT_DIR_NUM, &obj); + printf("By size%s:\n", + size_unit_buf); + ret = adu_loop_reverse(dir_table, DT_BYTES, &gsi, + global_stats_loop_function, &gsi.ret, &gsi.osl_errno); if (ret < 0) return ret; - ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row); - if (ret < 0) - return ret; - ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj); + printf("\n"); + + gsi.count = conf.limit_arg; + gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES; + printf("By file count%s:\n", + count_unit_buf); + ret = adu_loop_reverse(dir_table, DT_FILES, &gsi, + global_stats_loop_function, &gsi.ret, &gsi.osl_errno); if (ret < 0) return ret; - dirname = obj.data; - printf("%s: %llu\n", dirname, (long long unsigned)bytes); + printf("\n"); + print_global_summary(); + print_user_stats(); + print_id_stats(); return 1; } -static int print_id_dir_stats(struct osl_row *row, __a_unused void *data) +static char *get_uid_list_name(void) { - struct osl_object obj; - uint32_t uid; - int ret = osl_get_object(id_table, row, IDT_UID, &obj); - struct id_dir_stat_info info = {.count = 0}; + return make_message("%s/uid_list", conf.database_dir_arg); +} + +static int write_uid_list(void) +{ + char *buf, *filename; + uint32_t count = 0; + struct user_info *ui; + size_t size = num_uids * sizeof(uint32_t); + int ret; + + if (!num_uids) + return 0; + buf = para_malloc(size); + FOR_EACH_USER(ui) { + if (!ui_used(ui) || !ui_admissible(ui)) + continue; + DEBUG_LOG("saving uid %u\n", (unsigned) ui->uid); + write_u32(buf + count++ * sizeof(uint32_t), ui->uid); + } + filename = get_uid_list_name(); + ret = para_write_file(filename, buf, size); + free(filename); + free(buf); + return ret; +} + +static int open_dir_table(void) +{ + if (!dir_table_desc.dir) /* we did not create the table */ + dir_table_desc.dir = para_strdup(conf.database_dir_arg); + return osl(osl_open_table(&dir_table_desc, &dir_table)); +} +static int com_create() +{ + uint64_t zero = 0ULL; + int ret = create_tables(); if (ret < 0) return ret; - uid = *(uint32_t *)obj.data; - - ret = osl_get_object(id_table, row, IDT_TABLE, &obj); + check_signals(); + ret = open_dir_table(); if (ret < 0) return ret; - info.user_table = *(struct osl_table **)obj.data; + check_signals(); + ret = scan_dir(conf.base_dir_arg, &zero); + if (ret < 0) + goto out; + ret = write_uid_list(); +out: + close_all_tables(); + return ret; +} - printf("************************* Big dirs owned by uid %u\n", (unsigned) uid); - osl_rbtree_loop_reverse(info.user_table, IDT_BYTES, &info, print_big_dir); - return 1; +static int read_uid_file(void) +{ + size_t size; + uint32_t n; + char *filename = get_uid_list_name(), *map; + int ret = mmap_full_file(filename, O_RDONLY, (void **)&map, &size, NULL); + + if (ret < 0) { + INFO_LOG("failed to map %s\n", filename); + free(filename); + return ret; + } + num_uids = size / 4; + INFO_LOG("found %u uids in %s\n", (unsigned)num_uids, filename); + free(filename); + /* hash table size should be a power of two and larger than the number of uids */ + uid_hash_table_size = 4; + while (uid_hash_table_size < num_uids) + uid_hash_table_size *= 2; + create_hash_table(); + for (n = 0; n < num_uids; n++) { + uint32_t uid = read_u32(map + n * sizeof(uid)); + ret = search_uid(uid, OPEN_USER_TABLE, NULL); + if (ret < 0) + goto out; + } +out: + para_munmap(map, size); + return ret; } -static int print_statistics(void) +static int com_select(void) { - unsigned count = 0; int ret; - printf("************************* Biggest dirs\n"); - ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size); - if (ret < 0 && ret != -E_LOOP_COMPLETE) - return ret; - count = 0; - printf("************************* dirs containing many files\n"); - ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &count, print_dirname_and_file_count); - if (ret < 0 && ret != -E_LOOP_COMPLETE) - return ret; + if (conf.count_unit_arg != count_unit_arg_h) + count_unit_buf[1] = count_unit_abbrevs[conf.count_unit_arg]; + else + count_unit_buf[0] = '\0'; + if (conf.size_unit_arg != size_unit_arg_h) + size_unit_buf[1] = size_unit_abbrevs[conf.size_unit_arg]; + else + size_unit_buf[0] = '\0'; - printf("************************* dirs stats by owner\n"); - ret = osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_stats); + ret = open_dir_table(); if (ret < 0) return ret; - - return osl_rbtree_loop(id_table, IDT_BYTES, NULL, print_id_dir_stats); + check_signals(); + ret = read_uid_file(); + if (ret < 0) + return ret; + check_signals(); + ret = print_statistics(); + close_all_tables(); + return ret; } +static int check_args(void) +{ + int i, ret; + + /* remove trailing slashes from base-dir arg */ + if (conf.base_dir_given) { + size_t len = strlen(conf.base_dir_arg); + for (;;) { + if (!len) /* empty string */ + return -ERRNO_TO_ERROR(EINVAL); + if (!--len) /* length 1 is always OK */ + break; + if (conf.base_dir_arg[len] != '/') + break; /* no trailing slash, also OK */ + conf.base_dir_arg[len] = '\0'; + } + } + if (!conf.uid_given) + return 0; + admissible_uids = para_malloc(conf.uid_given * sizeof(*admissible_uids)); + for (i = 0; i < conf.uid_given; i++) { + ret = parse_uid_range(conf.uid_arg[i], admissible_uids + i); + if (ret < 0) + goto err; + } + return 1; +err: + free(admissible_uids); + admissible_uids = NULL; + return ret; +} int main(int argc, char **argv) { - int ret = create_tables(); - if (ret < 0) - goto out; - ret = osl_open_table(&dir_table_desc, &dir_table); + int ret; + struct cmdline_parser_params params = { + .override = 0, + .initialize = 1, + .check_required = 0, + .check_ambiguity = 0, + .print_errors = 1 + }; + + cmdline_parser_ext(argc, argv, &conf, ¶ms); /* aborts on errors */ + ret = init_signals(); if (ret < 0) goto out; - ret = osl_open_table(&id_table_desc, &id_table); + ret = check_args(); if (ret < 0) goto out; ret = -E_SYNTAX; - if (argc != 2) - goto out; - ret = scan_dir(argv[1]); + if (conf.select_given) + ret = com_select(); + else + ret = com_create(); if (ret < 0) goto out; - print_statistics(); out: + free(admissible_uids); if (ret < 0) { - ERROR_LOG("%s\n", error_txt(-ret)); + ERROR_LOG("%s\n", adu_strerror(-ret)); return -EXIT_FAILURE; } return EXIT_SUCCESS; } -