2 #include <dirent.h> /* readdir() */
4 #include "gcc-compat.h"
9 #include "portable_io.h"
13 /** Command line and config file options. */
14 static struct gengetopt_args_info conf;
17 /** whether this slot of the hash table is used. */
19 /** whether this uid should be taken into account. */
26 struct osl_table *table;
30 struct osl_table_description *desc;
34 * Contains info for each user that owns at least one regular file.
36 * Even users that are not taken into account because of the --uid
37 * option occupy a slot in this hash table. This allows to find out
38 * quicky whether a uid is admissible. And yes, this has to be fast.
40 static struct user_info *uid_hash_table;
42 static inline int ui_used(struct user_info *ui)
44 return ui->flags & UI_FL_SLOT_USED;
47 static inline int ui_admissible(struct user_info *ui)
49 return ui->flags & UI_FL_ADMISSIBLE;
57 static struct uid_range *admissible_uids;
59 static inline int check_uid_arg(const char *arg, uint32_t *uid)
61 const uint32_t max = ~0U;
63 * we need an 64-bit int for string -> uid conversion because strtoll()
64 * returns a signed value.
67 int ret = para_atoi64(arg, &val);
71 if (val < 0 || val > max)
72 return -ERRNO_TO_ERROR(EINVAL);
77 static int parse_uid_range(const char *orig_arg, struct uid_range *ur)
80 char *arg = para_strdup(orig_arg), *p = strchr(arg, '-');
83 if (p == arg) /* -42 */
85 ret = check_uid_arg(p, &ur->high);
88 ur->low = p? 0 : ur->high;
95 ret = check_uid_arg(arg, &ur->low);
99 if (*p) { /* 42-4711 */
100 ret = check_uid_arg(p, &ur->high);
104 if (ur->low > ur->high)
105 ret = -ERRNO_TO_ERROR(EINVAL);
108 ERROR_LOG("bad uid option: %s\n", orig_arg);
110 INFO_LOG("admissible uid range: %u - %u\n", ur->low,
117 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
118 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
123 * \param ll Loglevel.
124 * \param fml Usual format string.
126 * All XXX_LOG() macros use this function.
128 __printf_2_3 void __log(int ll, const char* fmt,...)
136 if (ll < conf.loglevel_arg)
141 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
142 fprintf(outfd, "%s ", str);
144 vfprintf(outfd, fmt, argp);
149 * Compare the size of two directories
151 * \param obj1 Pointer to the first object.
152 * \param obj2 Pointer to the second object.
154 * This function first compares the size values as usual integers. If they compare as
155 * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
156 * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
158 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
160 uint64_t d1 = *(uint64_t *)obj1->data;
161 uint64_t d2 = *(uint64_t *)obj2->data;
162 int ret = NUM_COMPARE(d2, d1);
166 //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
167 return NUM_COMPARE(obj2->data, obj1->data);
171 * Compare two osl objects pointing to unsigned integers of 64 bit size.
173 * \param obj1 Pointer to the first integer.
174 * \param obj2 Pointer to the second integer.
176 * \return The values required for an osl compare function.
178 * \sa osl_compare_func, osl_hash_compare().
180 static int uint64_compare(const struct osl_object *obj1,
181 const struct osl_object *obj2)
183 uint64_t d1 = read_u64((const char *)obj1->data);
184 uint64_t d2 = read_u64((const char *)obj2->data);
193 /** The columns of the directory table. */
194 enum dir_table_columns {
195 /** The name of the directory. */
197 /** The dir count number. */
199 /** The number of bytes of all regular files. */
201 /** The number of all regular files. */
203 /** Number of columns in this table. */
207 static struct osl_column_description dir_table_cols[] = {
209 .storage_type = OSL_MAPPED_STORAGE,
214 .storage_type = OSL_MAPPED_STORAGE,
215 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
217 .compare_function = uint64_compare,
218 .data_size = sizeof(uint64_t)
221 .storage_type = OSL_MAPPED_STORAGE,
222 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
223 .compare_function = size_compare,
225 .data_size = sizeof(uint64_t)
228 .storage_type = OSL_MAPPED_STORAGE,
229 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
230 .compare_function = size_compare,
232 .data_size = sizeof(uint64_t)
236 static struct osl_table_description dir_table_desc = {
238 .num_columns = NUM_DT_COLUMNS,
240 .column_descriptions = dir_table_cols,
243 /** The columns of the id table. */
244 enum user_table_columns {
245 /** The numer of the directory. */
247 /** The number of bytes of all regular files in this dir owned by this id. */
249 /** The number of files in this dir owned by this id. */
251 /** Number of columns in this table. */
255 static struct osl_column_description user_table_cols[] = {
257 .storage_type = OSL_MAPPED_STORAGE,
258 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
260 .compare_function = uint64_compare,
261 .data_size = sizeof(uint64_t)
264 .storage_type = OSL_MAPPED_STORAGE,
265 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
266 .compare_function = size_compare,
268 .data_size = sizeof(uint64_t)
271 .storage_type = OSL_MAPPED_STORAGE,
272 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
273 .compare_function = size_compare,
275 .data_size = sizeof(uint64_t)
279 static struct osl_table *dir_table;
281 static int add_directory(char *dirname, uint64_t dir_num, uint64_t *dir_size,
284 struct osl_object dir_objects[NUM_DT_COLUMNS];
286 INFO_LOG("adding #%llu: %s\n", (long long unsigned)dir_num, dirname);
287 dir_objects[DT_NAME].data = dirname;
288 dir_objects[DT_NAME].size = strlen(dirname) + 1;
289 dir_objects[DT_NUM].data = &dir_num;
290 dir_objects[DT_NUM].size = sizeof(dir_num);
291 dir_objects[DT_BYTES].data = dir_size;
292 dir_objects[DT_BYTES].size = sizeof(*dir_size);
293 dir_objects[DT_FILES].data = dir_files;
294 dir_objects[DT_FILES].size = sizeof(*dir_files);
296 return osl_add_row(dir_table, dir_objects);
299 static uint32_t num_uids;
301 static int open_user_table(struct user_info *ui, int create)
305 ui->desc = para_malloc(sizeof(*ui->desc));
306 ui->desc->num_columns = NUM_UT_COLUMNS;
308 ui->desc->column_descriptions = user_table_cols;
309 ui->desc->dir = para_strdup(conf.database_dir_arg);
310 ui->desc->name = make_message("%u", (unsigned)ui->uid);
311 INFO_LOG(".............................uid #%u: %u\n",
312 (unsigned)num_uids, (unsigned)ui->uid);
314 ret = osl_create_table(ui->desc);
319 ret = osl_open_table(ui->desc, &ui->table);
324 free((char *)ui->desc->name);
325 free((char *)ui->desc->dir);
327 ui->desc->name = NULL;
328 ui->desc->dir = NULL;
335 #define uid_hash_bits 8
336 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
337 #define PRIME1 0x811c9dc5
338 #define PRIME2 0x01000193
340 static void create_hash_table(void)
342 uid_hash_table = para_calloc(uid_hash_table_size
343 * sizeof(struct user_info));
346 static void free_hash_table(void)
348 free(uid_hash_table);
349 uid_hash_table = NULL;
352 static int create_tables(void)
356 dir_table_desc.dir = para_strdup(conf.database_dir_arg);
357 ret = osl_create_table(&dir_table_desc);
365 * We use a hash table of size s=2^uid_hash_bits to map the uids into the
366 * interval [0..s]. Hash collisions are treated by open addressing, i.e.
367 * unused slots in the table are used to store different uids that hash to the
370 * If a hash collision occurs, different slots are successively probed in order
371 * to find an unused slot for the new uid. Probing is implemented via a second
372 * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an
375 * An odd number is sufficient to make sure each entry of the hash table gets
376 * probed for probe_num between 0 and s-1 because s is a power of two, hence
377 * the second hash value has never a common divisor with the hash table size.
378 * IOW: h is invertible in the ring [0..s].
380 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
382 return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num)
383 % uid_hash_table_size;
386 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \
387 + uid_hash_table_size; ui++)
389 enum search_uid_flags {
391 CREATE_USER_TABLE = 2,
394 static int uid_is_admissible(uint32_t uid)
398 for (i = 0; i < conf.uid_given; i++) {
399 struct uid_range *ur = admissible_uids + i;
401 if (ur->low <= uid && ur->high >= uid)
404 i = !conf.uid_given || i < conf.uid_given;
405 DEBUG_LOG("uid %u is %sadmissible\n", (unsigned)uid,
410 static int search_uid(uint32_t uid, enum search_uid_flags flags,
411 struct user_info **ui_ptr)
415 for (p = 0; p < uid_hash_table_size; p++) {
416 struct user_info *ui = uid_hash_table + double_hash(uid, p);
423 ui->flags |= UI_FL_SLOT_USED;
424 if (!uid_is_admissible(uid))
426 ui->flags |= UI_FL_ADMISSIBLE;
427 ret = open_user_table(ui, flags & CREATE_USER_TABLE);
441 return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
444 static int update_user_row(struct osl_table *t, uint64_t dir_num,
448 struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
450 int ret = osl_get_row(t, UT_DIR_NUM, &obj, &row);
452 if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND)
454 if (ret < 0) { /* this is the first file we add */
455 struct osl_object objects[NUM_UT_COLUMNS];
456 uint64_t num_files = 1;
458 objects[UT_DIR_NUM].data = &dir_num;
459 objects[UT_DIR_NUM].size = sizeof(dir_num);
460 objects[UT_BYTES].data = add;
461 objects[UT_BYTES].size = sizeof(*add);
462 objects[UT_FILES].data = &num_files;
463 objects[UT_FILES].size = sizeof(num_files);
464 INFO_LOG("######################### ret: %d\n", ret);
465 ret = osl_add_row(t, objects);
466 INFO_LOG("######################### ret: %d\n", ret);
468 } else { /* add size and increment file count */
470 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
472 ret = osl_get_object(t, row, UT_BYTES, &obj1);
475 num = *(uint64_t *)obj1.data + *add;
476 ret = osl_update_object(t, row, UT_BYTES, &obj2);
479 ret = osl_get_object(t, row, UT_FILES, &obj1);
482 num = *(uint64_t *)obj1.data + 1;
483 return osl_update_object(t, row, UT_FILES, &obj2);
487 static uint64_t num_dirs;
488 static uint64_t num_files;
489 static uint64_t num_bytes;
491 int scan_dir(char *dirname)
494 struct dirent *entry;
495 int ret, cwd_fd, ret2;
496 uint64_t dir_size = 0, dir_files = 0;
497 uint64_t this_dir_num = num_dirs++;
499 DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname);
500 ret = para_opendir(dirname, &dir, &cwd_fd);
502 if (ret != -ERRNO_TO_ERROR(EACCES))
504 WARNING_LOG("permission denied for %s\n", dirname);
507 while ((entry = readdir(dir))) {
513 struct user_info *ui;
515 if (!strcmp(entry->d_name, "."))
517 if (!strcmp(entry->d_name, ".."))
519 if (lstat(entry->d_name, &s) == -1) {
520 WARNING_LOG("lstat error for %s/%s\n", dirname,
525 if (!S_ISREG(m) && !S_ISDIR(m))
528 tmp = make_message("%s/%s", dirname, entry->d_name);
542 ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui);
547 ret = update_user_row(ui->table, this_dir_num, &size);
551 ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files);
554 ret2 = para_fchdir(cwd_fd);
555 if (ret2 < 0 && ret >= 0)
561 static int get_dir_name(struct osl_row *row, char **name)
563 struct osl_object obj;
564 int ret = osl_get_object(dir_table, row, DT_NAME, &obj);
572 const uint64_t size_unit_divisors[] = {
573 [size_unit_arg_b] = 1ULL,
574 [size_unit_arg_k] = 1024ULL,
575 [size_unit_arg_m] = 1024ULL * 1024ULL,
576 [size_unit_arg_g] = 1024ULL * 1024ULL * 1024ULL,
577 [size_unit_arg_t] = 1024ULL * 1024ULL * 1024ULL * 1024ULL,
580 const uint64_t count_unit_divisors[] = {
582 [count_unit_arg_n] = 1ULL,
583 [count_unit_arg_k] = 1000ULL,
584 [count_unit_arg_m] = 1000ULL * 1000ULL,
585 [count_unit_arg_g] = 1000ULL * 1000ULL * 1000ULL,
586 [count_unit_arg_t] = 1000ULL * 1000ULL * 1000ULL * 1000ULL,
589 const char size_unit_abbrevs[] = " BKMGT";
590 const char count_unit_abbrevs[] = " KMGT";
592 static void format_size_value(enum enum_size_unit unit, uint64_t value, char *result)
594 if (unit == size_unit_arg_h) /* human readable */
595 for (unit = size_unit_arg_b; unit < size_unit_arg_t && value > size_unit_divisors[unit + 1]; unit++)
597 sprintf(result, "%llu%c", (long long unsigned)value / size_unit_divisors[unit], size_unit_abbrevs[unit]);
600 static void format_count_value(enum enum_count_unit unit, uint64_t value, char *result)
602 if (unit == count_unit_arg_h) /* human readable */
603 for (unit = count_unit_arg_n; unit < count_unit_arg_t && value > count_unit_divisors[unit + 1]; unit++)
605 sprintf(result, "%llu%c", (long long unsigned)value / count_unit_divisors[unit], count_unit_abbrevs[unit]);
608 enum global_stats_flags {
609 GSF_PRINT_DIRNAME = 1,
612 GSF_COMPUTE_SUMMARY = 8,
615 struct global_stats_info {
617 enum global_stats_flags flags;
620 static int global_stats_loop_function(struct osl_row *row, void *data)
622 struct global_stats_info *gsi = data;
623 struct osl_object obj;
624 char *dirname, formated_value[25];
625 int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY;
627 if (!gsi->count && !summary)
628 return -E_LOOP_COMPLETE;
629 if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) {
630 ret = get_dir_name(row, &dirname);
633 printf("%s%s", dirname,
634 (gsi->flags & (GSF_PRINT_FILES | GSF_PRINT_BYTES))?
638 if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) {
640 ret = osl_get_object(dir_table, row, DT_FILES, &obj);
643 files = *(uint64_t *)obj.data;
644 if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) {
645 format_size_value(conf.size_unit_arg, files,
647 printf("%s%s", formated_value,
648 (gsi->flags & GSF_PRINT_BYTES)? "\t" : "\n");
653 if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) {
655 ret = osl_get_object(dir_table, row, DT_BYTES, &obj);
658 bytes = *(uint64_t *)obj.data;
659 if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) {
660 format_size_value(conf.size_unit_arg, bytes,
662 printf("%s\n", formated_value);
674 static void print_id_stats(void)
676 struct user_info *ui;
678 printf("--------------------- user summary (uid/dirs/files/bytes):\n");
680 char formated_dir_count[25], formated_file_count[25],
684 format_count_value(conf.count_unit_arg, ui->dirs,
686 format_count_value(conf.count_unit_arg, ui->files,
687 formated_file_count);
688 format_size_value(conf.size_unit_arg, ui->bytes,
690 printf("%u\t%s\t%s\t%s\n", (unsigned)ui->uid,
698 enum user_stats_flags {
699 USF_PRINT_DIRNAME = 1,
702 USF_COMPUTE_SUMMARY = 8,
705 struct user_stats_info {
707 enum user_stats_flags flags;
708 struct user_info *ui;
711 static int user_stats_loop_function(struct osl_row *row, void *data)
713 struct user_stats_info *usi = data;
714 struct osl_row *dir_row;
715 struct osl_object obj;
716 int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY;
717 char formated_value[25];
719 if (!usi->count && !summary)
720 return -E_LOOP_COMPLETE;
721 if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) {
723 ret = osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj);
726 ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row);
729 ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj);
735 (usi->flags & (USF_PRINT_FILES | USF_PRINT_BYTES))?
739 if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) {
741 ret = osl_get_object(usi->ui->table, row, UT_FILES, &obj);
744 files = *(uint64_t *)obj.data;
745 if (usi->count && (usi->flags & USF_PRINT_FILES)) {
746 format_size_value(conf.size_unit_arg, files,
748 printf("%s%s", formated_value,
749 (usi->flags & USF_PRINT_BYTES)? "\t" : "\n"
753 usi->ui->files += files;
755 if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) {
757 ret = osl_get_object(usi->ui->table, row, UT_BYTES, &obj);
760 bytes = *(uint64_t *)obj.data;
761 if (usi->count && (usi->flags & USF_PRINT_BYTES)) {
762 format_size_value(conf.size_unit_arg, bytes,
764 printf("%s\n", formated_value);
767 usi->ui->bytes += bytes;
777 static void print_user_stats(void)
779 struct user_info *ui;
782 struct user_stats_info usi = {
783 .count = conf.limit_arg,
786 if (!ui_used(ui) || !ui_admissible(ui))
788 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY;
789 printf("************************************************ uid %u\n",
791 printf("----------------- Largest dirs -------------------\n");
792 osl_rbtree_loop_reverse(ui->table, UT_BYTES, &usi,
793 user_stats_loop_function);
794 printf("---------- dirs containing most files ------------\n");
795 usi.count = conf.limit_arg,
796 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES;
797 osl_rbtree_loop_reverse(ui->table, UT_FILES, &usi,
798 user_stats_loop_function);
802 static int print_statistics(void)
805 struct global_stats_info gsi = {
806 .count = conf.limit_arg,
807 .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY
810 printf("----------------- Largest dirs -------------------\n");
811 ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &gsi,
812 global_stats_loop_function);
813 if (ret < 0 && ret != -E_LOOP_COMPLETE)
815 gsi.count = conf.limit_arg;
817 gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES;
818 printf("---------- dirs containing most files ------------\n");
819 ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &gsi,
820 global_stats_loop_function);
821 if (ret < 0 && ret != -E_LOOP_COMPLETE)
824 printf("------------------ Global summary (dirs/files/bytes)\n"
825 "%llu\t%llu\t%llu\n",
826 (long long unsigned)num_dirs, (long long unsigned)num_files,
827 (long long unsigned)num_bytes);
833 static char *get_uid_list_name(void)
835 return make_message("%s/uid_list", conf.database_dir_arg);
838 static int write_uid_list(void)
840 char *buf, *filename;
842 struct user_info *ui;
843 size_t size = num_uids * sizeof(uint32_t);
848 buf = para_malloc(size);
850 if (!ui_used(ui) || !ui_admissible(ui))
852 DEBUG_LOG("saving uid %u\n", (unsigned) ui->uid);
853 write_u32(buf + count++ * sizeof(uint32_t), ui->uid);
855 filename = get_uid_list_name();
856 ret = para_write_file(filename, buf, size);
862 static int open_dir_table(void)
864 if (!dir_table_desc.dir) /* we did not create the table */
865 dir_table_desc.dir = para_strdup(conf.database_dir_arg);
866 return osl_open_table(&dir_table_desc, &dir_table);
869 static void close_dir_table(void)
875 ret = osl_close_table(dir_table, OSL_MARK_CLEAN);
877 ERROR_LOG("failed to close dir table: %s\n", error_txt(-ret));
878 free((char *)dir_table_desc.dir);
882 static void close_user_table(struct user_info *ui)
886 if (!ui || !ui_used(ui) || !ui_admissible(ui))
888 ret = osl_close_table(ui->table, OSL_MARK_CLEAN);
890 ERROR_LOG("failed to close user table %u: %s\n",
891 (unsigned) ui->uid, error_txt(-ret));
892 free((char *)ui->desc->name);
893 ui->desc->name = NULL;
894 free((char *)ui->desc->dir);
895 ui->desc->dir = NULL;
902 static void close_user_tables(void)
904 struct user_info *ui;
907 close_user_table(ui);
910 static void close_all_tables(void)
917 static int com_create()
919 int ret = create_tables();
922 ret = open_dir_table();
925 ret = scan_dir(conf.base_dir_arg);
928 ret = write_uid_list();
934 static int read_uid_file(void)
938 char *filename = get_uid_list_name(), *map;
939 int ret = mmap_full_file(filename, O_RDONLY, (void **)&map, &size, NULL);
942 INFO_LOG("failed to map %s\n", filename);
947 INFO_LOG("found %u uids in %s\n", (unsigned)num_uids, filename);
949 /* hash table size should be a power of two and larger than the number of uids */
950 uid_hash_table_size = 4;
951 while (uid_hash_table_size < num_uids)
952 uid_hash_table_size *= 2;
954 for (n = 0; n < num_uids; n++) {
955 uint32_t uid = read_u32(map + n * sizeof(uid));
956 ret = search_uid(uid, OPEN_USER_TABLE, NULL);
961 para_munmap(map, size);
965 static int com_select(void)
969 ret = open_dir_table();
972 ret = read_uid_file();
980 static int check_args(void)
987 admissible_uids = para_malloc(conf.uid_given * sizeof(*admissible_uids));
989 for (i = 0; i < conf.uid_given; i++) {
990 ret = parse_uid_range(conf.uid_arg[i], admissible_uids + i);
996 free(admissible_uids);
997 admissible_uids = NULL;
1001 int main(int argc, char **argv)
1004 struct cmdline_parser_params params = {
1007 .check_required = 0,
1008 .check_ambiguity = 0,
1012 cmdline_parser_ext(argc, argv, &conf, ¶ms); /* aborts on errors */
1017 if (conf.select_given)
1024 free(admissible_uids);
1026 ERROR_LOG("%s\n", error_txt(-ret));
1027 return -EXIT_FAILURE;
1029 return EXIT_SUCCESS;