]> git.tuebingen.mpg.de Git - adu.git/blob - adu.c
Implement --one-file-system (-x).
[adu.git] / adu.c
1 #include "adu.h"
2 #include <dirent.h> /* readdir() */
3
4 #include "gcc-compat.h"
5 #include "cmdline.h"
6 #include "fd.h"
7 #include "string.h"
8 #include "error.h"
9 #include "portable_io.h"
10
11 DEFINE_ERRLIST;
12 int osl_errno;
13
14 /** Command line and config file options. */
15 static struct gengetopt_args_info conf;
16
17 enum uid_info_flags {
18         /** whether this slot of the hash table is used. */
19         UI_FL_SLOT_USED = 1,
20         /** whether this uid should be taken into account. */
21         UI_FL_ADMISSIBLE = 2,
22 };
23
24 struct user_info {
25         uint32_t uid;
26         uint32_t flags;
27         struct osl_table *table;
28         uint64_t files;
29         uint64_t bytes;
30         uint64_t dirs;
31         struct osl_table_description *desc;
32 };
33
34 /** The decimal representation of an uint64_t never exceeds that size. */
35 #define FORMATED_VALUE_SIZE 25
36
37 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \
38                 + uid_hash_table_size; ui++)
39
40
41 /**
42  * Contains info for each user that owns at least one regular file.
43  *
44  * Even users that are not taken into account because of the --uid
45  * option occupy a slot in this hash table. This allows to find out
46  * quicky whether a uid is admissible. And yes, this has to be fast.
47  */
48 static struct user_info *uid_hash_table;
49
50 /* these get filled in by the select command. */
51 static char count_unit_buf[4] = "( )", size_unit_buf[4] = "( )";
52
53 static inline int ui_used(struct user_info *ui)
54 {
55         return ui->flags & UI_FL_SLOT_USED;
56 }
57
58 static inline int ui_admissible(struct user_info *ui)
59 {
60         return ui->flags & UI_FL_ADMISSIBLE;
61 }
62
63 struct uid_range {
64         uint32_t low;
65         uint32_t high;
66 };
67
68 static struct uid_range *admissible_uids;
69
70 static inline int check_uid_arg(const char *arg, uint32_t *uid)
71 {
72         const uint32_t max = ~0U;
73         /*
74          * we need an 64-bit int for string -> uid conversion because strtoll()
75          * returns a signed value.
76          */
77         int64_t val;
78         int ret = atoi64(arg, &val);
79
80         if (ret < 0)
81                 return ret;
82         if (val < 0 || val > max)
83                 return -ERRNO_TO_ERROR(EINVAL);
84         *uid = val;
85         return 1;
86 }
87
88 static int parse_uid_range(const char *orig_arg, struct uid_range *ur)
89 {
90         int ret;
91         char *arg = adu_strdup(orig_arg), *p = strchr(arg, '-');
92
93         if (!p || p == arg) { /* -42 or 42 */
94                 ret = check_uid_arg(p? p + 1 : arg, &ur->high);
95                 if (ret < 0)
96                         goto out;
97                 ur->low = p? 0 : ur->high;
98                 ret = 1;
99                 goto out;
100         }
101         /* 42- or 42-4711 */
102         *p = '\0';
103         p++;
104         ret = check_uid_arg(arg, &ur->low);
105         if (ret < 0)
106                 goto out;
107         ur->high = ~0U;
108         if (*p) { /* 42-4711 */
109                 ret = check_uid_arg(p, &ur->high);
110                 if (ret < 0)
111                         goto out;
112         }
113         if (ur->low > ur->high)
114                 ret = -ERRNO_TO_ERROR(EINVAL);
115 out:
116         if (ret < 0)
117                 ERROR_LOG("bad uid option: %s\n", orig_arg);
118         else
119                 INFO_LOG("admissible uid range: %u - %u\n", ur->low,
120                         ur->high);
121         free(arg);
122         return ret;
123 }
124
125 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
126 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
127
128 /**
129  * The log function.
130  *
131  * \param ll Loglevel.
132  * \param fml Usual format string.
133  *
134  * All XXX_LOG() macros use this function.
135  */
136 __printf_2_3 void __log(int ll, const char* fmt,...)
137 {
138         va_list argp;
139         FILE *outfd;
140         struct tm *tm;
141         time_t t1;
142         char str[255] = "";
143
144         if (ll < conf.loglevel_arg)
145                 return;
146         outfd = stderr;
147         time(&t1);
148         tm = localtime(&t1);
149         strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
150         fprintf(outfd, "%s ", str);
151         va_start(argp, fmt);
152         vfprintf(outfd, fmt, argp);
153         va_end(argp);
154 }
155
156 /**
157  * Compare the size of two directories
158  *
159  * \param obj1 Pointer to the first object.
160  * \param obj2 Pointer to the second object.
161  *
162  * This function first compares the size values as usual integers. If they compare as
163  * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
164  * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
165  */
166 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
167 {
168         uint64_t d1 = *(uint64_t *)obj1->data;
169         uint64_t d2 = *(uint64_t *)obj2->data;
170         int ret = NUM_COMPARE(d2, d1);
171
172         if (ret)
173                 return ret;
174         //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
175         return NUM_COMPARE(obj2->data, obj1->data);
176 }
177
178 /**
179  * Compare two osl objects pointing to unsigned integers of 64 bit size.
180  *
181  * \param obj1 Pointer to the first integer.
182  * \param obj2 Pointer to the second integer.
183  *
184  * \return The values required for an osl compare function.
185  *
186  * \sa osl_compare_func, osl_hash_compare().
187  */
188 static int uint64_compare(const struct osl_object *obj1,
189                 const struct osl_object *obj2)
190 {
191         uint64_t d1 = read_u64((const char *)obj1->data);
192         uint64_t d2 = read_u64((const char *)obj2->data);
193
194         if (d1 < d2)
195                 return 1;
196         if (d1 > d2)
197                 return -1;
198         return 0;
199 }
200
201 /** The columns of the directory table. */
202 enum dir_table_columns {
203         /** The name of the directory. */
204         DT_NAME,
205         /** The dir count number. */
206         DT_NUM,
207         /** The number of the parent directory. */
208         DT_PARENT_NUM,
209         /** The number of bytes of all regular files. */
210         DT_BYTES,
211         /** The number of all regular files. */
212         DT_FILES,
213         /** Number of columns in this table. */
214         NUM_DT_COLUMNS
215 };
216
217 static struct osl_column_description dir_table_cols[] = {
218         [DT_NAME] = {
219                 .storage_type = OSL_MAPPED_STORAGE,
220                 .storage_flags = 0,
221                 .name = "dir",
222         },
223         [DT_NUM] = {
224                 .storage_type = OSL_MAPPED_STORAGE,
225                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
226                 .name = "num",
227                 .compare_function = uint64_compare,
228                 .data_size = sizeof(uint64_t)
229         },
230         [DT_PARENT_NUM] = {
231                 .storage_type = OSL_MAPPED_STORAGE,
232                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
233                 .name = "parent_num",
234                 .compare_function = size_compare,
235                 .data_size = sizeof(uint64_t)
236         },
237         [DT_BYTES] = {
238                 .storage_type = OSL_MAPPED_STORAGE,
239                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
240                 .compare_function = size_compare,
241                 .name = "num_bytes",
242                 .data_size = sizeof(uint64_t)
243         },
244         [DT_FILES] = {
245                 .storage_type = OSL_MAPPED_STORAGE,
246                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
247                 .compare_function = size_compare,
248                 .name = "num_files",
249                 .data_size = sizeof(uint64_t)
250         }
251 };
252
253 static struct osl_table_description dir_table_desc = {
254         .name = "dir_table",
255         .num_columns = NUM_DT_COLUMNS,
256         .flags = 0,
257         .column_descriptions = dir_table_cols,
258 };
259
260 /** The columns of the id table. */
261 enum user_table_columns {
262         /** The numer of the directory. */
263         UT_DIR_NUM,
264         /** The number of bytes of all regular files in this dir owned by this id. */
265         UT_BYTES,
266         /** The number of files in this dir owned by this id. */
267         UT_FILES,
268         /** Number of columns in this table. */
269         NUM_UT_COLUMNS
270 };
271
272 static struct osl_column_description user_table_cols[] = {
273         [UT_DIR_NUM] = {
274                 .storage_type = OSL_MAPPED_STORAGE,
275                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
276                 .name = "dir_num",
277                 .compare_function = uint64_compare,
278                 .data_size = sizeof(uint64_t)
279         },
280         [UT_BYTES] = {
281                 .storage_type = OSL_MAPPED_STORAGE,
282                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
283                 .compare_function = size_compare,
284                 .name = "num_bytes",
285                 .data_size = sizeof(uint64_t)
286         },
287         [UT_FILES] = {
288                 .storage_type = OSL_MAPPED_STORAGE,
289                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
290                 .compare_function = size_compare,
291                 .name = "num_files",
292                 .data_size = sizeof(uint64_t)
293         },
294 };
295
296 static struct osl_table *dir_table;
297
298 static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
299                 uint64_t *dir_size, uint64_t *dir_files)
300 {
301         struct osl_object dir_objects[NUM_DT_COLUMNS];
302
303         INFO_LOG("adding #%llu: %s\n", (long long unsigned)*dir_num, dirname);
304         dir_objects[DT_NAME].data = dirname;
305         dir_objects[DT_NAME].size = strlen(dirname) + 1;
306         dir_objects[DT_NUM].data = dir_num;
307         dir_objects[DT_NUM].size = sizeof(*dir_num);
308         dir_objects[DT_PARENT_NUM].data = parent_dir_num;
309         dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num);
310         dir_objects[DT_BYTES].data = dir_size;
311         dir_objects[DT_BYTES].size = sizeof(*dir_size);
312         dir_objects[DT_FILES].data = dir_files;
313         dir_objects[DT_FILES].size = sizeof(*dir_files);
314         return osl(osl_add_row(dir_table, dir_objects));
315 }
316
317 static uint32_t num_uids;
318
319 static int open_user_table(struct user_info *ui, int create)
320 {
321         int ret;
322
323         ui->desc = adu_malloc(sizeof(*ui->desc));
324         ui->desc->num_columns = NUM_UT_COLUMNS;
325         ui->desc->flags = 0;
326         ui->desc->column_descriptions = user_table_cols;
327         ui->desc->dir = adu_strdup(conf.database_dir_arg);
328         ui->desc->name = make_message("%u", (unsigned)ui->uid);
329         INFO_LOG(".............................uid #%u: %u\n",
330                 (unsigned)num_uids, (unsigned)ui->uid);
331         if (create) {
332                 ret = osl(osl_create_table(ui->desc));
333                 if (ret < 0)
334                         goto err;
335                 num_uids++;
336         }
337         ret = osl(osl_open_table(ui->desc, &ui->table));
338         if (ret < 0)
339                 goto err;
340         return 1;
341 err:
342         free((char *)ui->desc->name);
343         free((char *)ui->desc->dir);
344         free(ui->desc);
345         ui->desc->name = NULL;
346         ui->desc->dir = NULL;
347         ui->desc = NULL;
348         ui->table = NULL;
349         ui->flags = 0;
350         return ret;
351 }
352
353 #define uid_hash_bits 8
354 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
355 #define PRIME1 0x811c9dc5
356 #define PRIME2 0x01000193
357
358 static void create_hash_table(void)
359 {
360         uid_hash_table = adu_calloc(uid_hash_table_size
361                 * sizeof(struct user_info));
362 }
363
364 static void free_hash_table(void)
365 {
366         free(uid_hash_table);
367         uid_hash_table = NULL;
368 }
369
370 static int create_tables(void)
371 {
372         int ret;
373
374         dir_table_desc.dir = adu_strdup(conf.database_dir_arg);
375         ret = osl(osl_create_table(&dir_table_desc));
376         if (ret < 0)
377                 return ret;
378         create_hash_table();
379         return 1;
380 }
381
382 static void close_dir_table(void)
383 {
384         int ret;
385
386         if (!dir_table)
387                 return;
388         ret = osl(osl_close_table(dir_table, OSL_MARK_CLEAN));
389         if (ret < 0)
390                 ERROR_LOG("failed to close dir table: %s\n", adu_strerror(-ret));
391         free((char *)dir_table_desc.dir);
392         dir_table = NULL;
393 }
394
395 static void close_user_table(struct user_info *ui)
396 {
397         int ret;
398
399         if (!ui || !ui_used(ui) || !ui_admissible(ui))
400                 return;
401         ret = osl(osl_close_table(ui->table, OSL_MARK_CLEAN));
402         if (ret < 0)
403                 ERROR_LOG("failed to close user table %u: %s\n",
404                         (unsigned) ui->uid, adu_strerror(-ret));
405         free((char *)ui->desc->name);
406         ui->desc->name = NULL;
407         free((char *)ui->desc->dir);
408         ui->desc->dir = NULL;
409         free(ui->desc);
410         ui->desc = NULL;
411         ui->table = NULL;
412         ui->flags = 0;
413 }
414
415 static void close_user_tables(void)
416 {
417         struct user_info *ui;
418
419         FOR_EACH_USER(ui)
420                 close_user_table(ui);
421 }
422
423 static void close_all_tables(void)
424 {
425         close_dir_table();
426         close_user_tables();
427         free_hash_table();
428 }
429
430 static int signum;
431
432 static void signal_handler(int s)
433 {
434         signum = s;
435 }
436
437 static void check_signals(void)
438 {
439         if (likely(!signum))
440                 return;
441         EMERG_LOG("caught signal %d\n", signum);
442         close_all_tables();
443         exit(EXIT_FAILURE);
444 }
445
446 static int init_signals(void)
447 {
448         if (signal(SIGINT, &signal_handler) == SIG_ERR)
449                 return -E_SIGNAL_SIG_ERR;
450         if (signal(SIGTERM, &signal_handler) == SIG_ERR)
451                 return -E_SIGNAL_SIG_ERR;
452         return 1;
453 }
454
455 /*
456  * We use a hash table of size s=2^uid_hash_bits to map the uids into the
457  * interval [0..s]. Hash collisions are treated by open addressing, i.e.
458  * unused slots in the table are used to store different uids that hash to the
459  * same slot.
460  *
461  * If a hash collision occurs, different slots are successively probed in order
462  * to find an unused slot for the new uid. Probing is implemented via a second
463  * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an
464  * odd number.
465  *
466  * An odd number is sufficient to make sure each entry of the hash table gets
467  * probed for probe_num between 0 and s-1 because s is a power of two, hence
468  * the second hash value has never a common divisor with the hash table size.
469  * IOW: h is invertible in the ring [0..s].
470  */
471 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
472 {
473         return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num)
474                 % uid_hash_table_size;
475 }
476
477 enum search_uid_flags {
478         OPEN_USER_TABLE = 1,
479         CREATE_USER_TABLE = 2,
480 };
481
482 static int uid_is_admissible(uint32_t uid)
483 {
484         int i;
485
486         for (i = 0; i < conf.uid_given; i++) {
487                 struct uid_range *ur = admissible_uids + i;
488
489                 if (ur->low <= uid && ur->high >= uid)
490                         break;
491         }
492         i = !conf.uid_given || i < conf.uid_given;
493         DEBUG_LOG("uid %u is %sadmissible\n", (unsigned)uid,
494                 i? "" : "not ");
495         return i;
496 }
497
498 static int search_uid(uint32_t uid, enum search_uid_flags flags,
499                 struct user_info **ui_ptr)
500 {
501         uint32_t p;
502
503         for (p = 0; p < uid_hash_table_size; p++) {
504                 struct user_info *ui = uid_hash_table + double_hash(uid, p);
505
506                 if (!ui_used(ui)) {
507                         int ret;
508                         if (!flags)
509                                 return -E_BAD_UID;
510                         ui->uid = uid;
511                         ui->flags |= UI_FL_SLOT_USED;
512                         if (!uid_is_admissible(uid))
513                                 return 0;
514                         ui->flags |= UI_FL_ADMISSIBLE;
515                         ret = open_user_table(ui, flags & CREATE_USER_TABLE);
516                         if (ret < 0)
517                                 return ret;
518
519                         if (ui_ptr)
520                                 *ui_ptr = ui;
521                         return 1;
522                 }
523                 if (ui->uid != uid)
524                         continue;
525                 if (ui_ptr)
526                         *ui_ptr = ui;
527                 return 0;
528         }
529         return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
530 }
531
532 static int update_user_row(struct osl_table *t, uint64_t dir_num,
533                 uint64_t *add)
534 {
535         struct osl_row *row;
536         struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
537
538         int ret = osl(osl_get_row(t, UT_DIR_NUM, &obj, &row));
539
540         if (ret == -E_OSL && osl_errno != E_OSL_RB_KEY_NOT_FOUND)
541                 return ret;
542         if (ret < 0) { /* this is the first file we add */
543                 struct osl_object objects[NUM_UT_COLUMNS];
544                 uint64_t num_files = 1;
545
546                 objects[UT_DIR_NUM].data = &dir_num;
547                 objects[UT_DIR_NUM].size = sizeof(dir_num);
548                 objects[UT_BYTES].data = add;
549                 objects[UT_BYTES].size = sizeof(*add);
550                 objects[UT_FILES].data = &num_files;
551                 objects[UT_FILES].size = sizeof(num_files);
552                 INFO_LOG("######################### ret: %d\n", ret);
553                 ret = osl(osl_add_row(t, objects));
554                 INFO_LOG("######################### ret: %d\n", ret);
555                 return ret;
556         } else { /* add size and increment file count */
557                 uint64_t num;
558                 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
559
560                 ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
561                 if (ret < 0)
562                         return ret;
563                 num = *(uint64_t *)obj1.data + *add;
564                 ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
565                 if (ret < 0)
566                         return ret;
567                 ret = osl(osl_get_object(t, row, UT_FILES, &obj1));
568                 if (ret < 0)
569                         return ret;
570                 num = *(uint64_t *)obj1.data + 1;
571                 return osl(osl_update_object(t, row, UT_FILES, &obj2));
572         }
573 }
574
575 static uint64_t num_dirs;
576 static uint64_t num_files;
577 static uint64_t num_bytes;
578
579 /* id of the device containing the base dir. */
580 static dev_t device_id;
581
582 static int scan_dir(char *dirname, uint64_t *parent_dir_num)
583 {
584         DIR *dir;
585         struct dirent *entry;
586         int ret, cwd_fd, ret2;
587         uint64_t dir_size = 0, dir_files = 0;
588         uint64_t this_dir_num = ++num_dirs;
589
590         check_signals();
591         DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname);
592         ret = adu_opendir(dirname, &dir, &cwd_fd);
593         if (ret < 0) {
594                 if (ret != -ERRNO_TO_ERROR(EACCES))
595                         return ret;
596                 WARNING_LOG("permission denied for %s\n", dirname);
597                 return 1;
598         }
599         while ((entry = readdir(dir))) {
600                 mode_t m;
601                 struct stat s;
602                 uint32_t uid;
603                 uint64_t size;
604                 struct user_info *ui;
605
606                 if (!strcmp(entry->d_name, "."))
607                         continue;
608                 if (!strcmp(entry->d_name, ".."))
609                         continue;
610                 if (lstat(entry->d_name, &s) == -1) {
611                         WARNING_LOG("lstat error for %s/%s\n", dirname,
612                                 entry->d_name);
613                         continue;
614                 }
615                 m = s.st_mode;
616                 if (!S_ISREG(m) && !S_ISDIR(m))
617                         continue;
618                 if (S_ISDIR(m)) {
619                         if (conf.one_file_system_given && s.st_dev != device_id)
620                                 continue;
621                         ret = scan_dir(entry->d_name, &this_dir_num);
622                         if (ret < 0)
623                                 goto out;
624                         continue;
625                 }
626                 /* regular file */
627                 size = s.st_size;
628                 dir_size += size;
629                 num_bytes += size;
630                 dir_files++;
631                 num_files++;
632                 uid = s.st_uid;
633                 ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui);
634                 if (ret < 0)
635                         goto out;
636                 ui->bytes += size;
637                 ui->files++;
638                 ret = update_user_row(ui->table, this_dir_num, &size);
639                 if (ret < 0)
640                         goto out;
641         }
642         ret = add_directory(dirname, &this_dir_num, parent_dir_num,
643                         &dir_size, &dir_files);
644 out:
645         closedir(dir);
646         ret2 = adu_fchdir(cwd_fd);
647         if (ret2 < 0 && ret >= 0)
648                 ret = ret2;
649         close(cwd_fd);
650         return ret;
651 }
652
653 static int get_dir_name_by_number(uint64_t *dirnum, char **name)
654 {
655         char *result = NULL, *tmp;
656         struct osl_row *row;
657         uint64_t val = *dirnum;
658         struct osl_object obj = {.data = &val, .size = sizeof(val)};
659         int ret;
660
661 again:
662         ret = osl(osl_get_row(dir_table, DT_NUM, &obj, &row));
663         if (ret < 0)
664                 goto out;
665         ret = osl(osl_get_object(dir_table, row, DT_NAME, &obj));
666         if (ret < 0)
667                 goto out;
668         if (result) {
669                 tmp = make_message("%s/%s", (char *)obj.data, result);
670                 free(result);
671                 result = tmp;
672         } else
673                 result = adu_strdup((char *)obj.data);
674         ret = osl(osl_get_object(dir_table, row, DT_PARENT_NUM, &obj));
675         if (ret < 0)
676                 goto out;
677         val = *(uint64_t *)obj.data;
678         if (val)
679                 goto again;
680 out:
681         if (ret < 0) {
682                 free(result);
683                 *name = NULL;
684         } else
685                 *name = result;
686         return ret;
687 }
688
689 static int get_dir_name_of_row(struct osl_row *dir_table_row, char **name)
690 {
691         struct osl_object obj;
692         int ret;
693         char *this_dir, *prefix = NULL;
694
695         *name = NULL;
696         ret = osl(osl_get_object(dir_table, dir_table_row, DT_NAME, &obj));
697         if (ret < 0)
698                 return ret;
699         this_dir = adu_strdup((char *)obj.data);
700         ret = osl(osl_get_object(dir_table, dir_table_row, DT_PARENT_NUM, &obj));
701         if (ret < 0)
702                 goto out;
703         if (!*(uint64_t *)obj.data) {
704                 *name = this_dir;
705                 return 1;
706         }
707         ret = get_dir_name_by_number((uint64_t *)obj.data, &prefix);
708         if (ret < 0)
709                 goto out;
710         *name = make_message("%s/%s", prefix, this_dir);
711         free(prefix);
712         ret = 1;
713 out:
714         free(this_dir);
715         return ret;
716 }
717
718 const uint64_t size_unit_divisors[] = {
719         [size_unit_arg_b] = 1ULL,
720         [size_unit_arg_k] = 1024ULL,
721         [size_unit_arg_m] = 1024ULL * 1024ULL,
722         [size_unit_arg_g] = 1024ULL * 1024ULL * 1024ULL,
723         [size_unit_arg_t] = 1024ULL * 1024ULL * 1024ULL * 1024ULL,
724 };
725
726 const uint64_t count_unit_divisors[] = {
727
728         [count_unit_arg_n] = 1ULL,
729         [count_unit_arg_k] = 1000ULL,
730         [count_unit_arg_m] = 1000ULL * 1000ULL,
731         [count_unit_arg_g] = 1000ULL * 1000ULL * 1000ULL,
732         [count_unit_arg_t] = 1000ULL * 1000ULL * 1000ULL * 1000ULL,
733 };
734
735 const char size_unit_abbrevs[] = " BKMGT";
736 const char count_unit_abbrevs[] = "  kmgt";
737
738 static enum enum_size_unit format_size_value(enum enum_size_unit unit,
739                 uint64_t value, int print_unit, char *result)
740 {
741         enum enum_size_unit u = unit;
742         char unit_buf[2] = "\0\0";
743
744         if (unit == size_unit_arg_h) /* human readable */
745                 for (u = size_unit_arg_b; u < size_unit_arg_t &&
746                                 value > size_unit_divisors[u + 1]; u++)
747                         ; /* nothing */
748         if (print_unit)
749                 unit_buf[0] = size_unit_abbrevs[u];
750         sprintf(result, "%llu%s",
751                 (long long unsigned)value / size_unit_divisors[u], unit_buf);
752         return u;
753 }
754
755 static enum enum_count_unit format_count_value(enum enum_count_unit unit,
756                 uint64_t value, int print_unit, char *result)
757 {
758         enum enum_count_unit u = unit;
759         char unit_buf[2] = "\0\0";
760
761         if (unit == count_unit_arg_h) /* human readable */
762                 for (u = count_unit_arg_n; u < count_unit_arg_t &&
763                                 value > count_unit_divisors[u + 1]; u++)
764                         ; /* nothing */
765         if (print_unit)
766                 unit_buf[0] = count_unit_abbrevs[u];
767         sprintf(result, "%llu%s",
768                 (long long unsigned)value / count_unit_divisors[u], unit_buf);
769         return u;
770 }
771
772 enum global_stats_flags {
773         GSF_PRINT_DIRNAME = 1,
774         GSF_PRINT_BYTES = 2,
775         GSF_PRINT_FILES = 4,
776         GSF_COMPUTE_SUMMARY = 8,
777 };
778
779 struct global_stats_info {
780         uint32_t count;
781         int ret;
782         int osl_errno;
783         enum global_stats_flags flags;
784 };
785
786 static int global_stats_loop_function(struct osl_row *row, void *data)
787 {
788         struct global_stats_info *gsi = data;
789         struct osl_object obj;
790         char *dirname, formated_value[FORMATED_VALUE_SIZE];
791         int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY;
792
793         check_signals();
794         if (!gsi->count && !summary) {
795                 ret = -E_LOOP_COMPLETE;
796                 goto err;
797         }
798         if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) {
799                 uint64_t files;
800                 ret = osl(osl_get_object(dir_table, row, DT_FILES, &obj));
801                 if (ret < 0)
802                         goto err;
803                 files = *(uint64_t *)obj.data;
804                 if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) {
805                         format_count_value(conf.count_unit_arg, files,
806                                 conf.count_unit_arg == count_unit_arg_h,
807                                 formated_value);
808                         printf("\t%s%s", formated_value,
809                                 (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_DIRNAME))?
810                                 "\t" : "\n");
811                 }
812                 if (summary)
813                         num_files += files;
814         }
815         if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) {
816                 uint64_t bytes;
817                 ret = osl(osl_get_object(dir_table, row, DT_BYTES, &obj));
818                 if (ret < 0)
819                         goto err;
820                 bytes = *(uint64_t *)obj.data;
821                 if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) {
822                         format_size_value(conf.size_unit_arg, bytes,
823                                 conf.size_unit_arg == size_unit_arg_h,
824                                 formated_value);
825                         printf("%s%s%s",
826                                 (gsi->flags & GSF_PRINT_FILES)? "" : "\t",
827                                 formated_value,
828                                 (gsi->flags & GSF_PRINT_DIRNAME)? "\t" : "\n"
829                         );
830                 }
831                 if (summary) {
832                         num_bytes += bytes;
833                         num_dirs++;
834                 }
835         }
836         if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) {
837                 ret = get_dir_name_of_row(row, &dirname);
838                 if (ret < 0)
839                         goto err;
840                 printf("%s%s\n",
841                         (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_FILES))? "" : "\t",
842                         dirname);
843                 free(dirname);
844         }
845         if (gsi->count > 0)
846                 gsi->count--;
847         return 1;
848 err:
849         gsi->ret = ret;
850         gsi->osl_errno = (ret == -E_OSL)? osl_errno : 0;
851         return -1;
852 }
853
854 static void print_id_stats(void)
855 {
856         struct user_info *ui;
857
858         printf("User summary "
859                 "(uid/dirs%s/files%s/size%s):\n",
860                 count_unit_buf, count_unit_buf, size_unit_buf);
861         FOR_EACH_USER(ui) {
862                 char formated_dir_count[FORMATED_VALUE_SIZE],
863                         formated_file_count[FORMATED_VALUE_SIZE],
864                         formated_bytes[FORMATED_VALUE_SIZE ];
865                 if (!ui_used(ui) || !ui_admissible(ui))
866                         continue;
867                 format_count_value(conf.count_unit_arg, ui->dirs,
868                         conf.count_unit_arg == count_unit_arg_h,
869                         formated_dir_count);
870                 format_count_value(conf.count_unit_arg, ui->files,
871                         conf.count_unit_arg == count_unit_arg_h,
872                         formated_file_count);
873                 format_size_value(conf.size_unit_arg, ui->bytes,
874                         conf.size_unit_arg == size_unit_arg_h,
875                         formated_bytes);
876                 printf("\t%u\t%s\t%s\t%s\n", (unsigned)ui->uid,
877                         formated_dir_count,
878                         formated_file_count,
879                         formated_bytes
880                 );
881         }
882 }
883
884 enum user_stats_flags {
885         USF_PRINT_DIRNAME = 1,
886         USF_PRINT_BYTES = 2,
887         USF_PRINT_FILES = 4,
888         USF_COMPUTE_SUMMARY = 8,
889 };
890
891 struct user_stats_info {
892         uint32_t count;
893         enum user_stats_flags flags;
894         int ret;
895         int osl_errno;
896         struct user_info *ui;
897 };
898
899 static int user_stats_loop_function(struct osl_row *row, void *data)
900 {
901         struct user_stats_info *usi = data;
902         struct osl_object obj;
903         int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY;
904         char formated_value[FORMATED_VALUE_SIZE];
905
906         check_signals();
907         if (!usi->count && !summary) {
908                 ret = -E_LOOP_COMPLETE;
909                 goto err;
910         }
911         if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) {
912                 uint64_t files;
913                 ret = osl(osl_get_object(usi->ui->table, row, UT_FILES, &obj));
914                 if (ret < 0)
915                         goto err;
916                 files = *(uint64_t *)obj.data;
917                 if (usi->count && (usi->flags & USF_PRINT_FILES)) {
918                         format_count_value(conf.count_unit_arg, files,
919                                 conf.count_unit_arg == count_unit_arg_h,
920                                 formated_value);
921                         printf("\t%s%s", formated_value,
922                                 (usi->flags & (USF_PRINT_BYTES | USF_PRINT_DIRNAME))?
923                                         "\t" : "\n"
924                         );
925                 }
926                 if (summary)
927                         usi->ui->files += files;
928         }
929         if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) {
930                 uint64_t bytes;
931                 ret = osl(osl_get_object(usi->ui->table, row, UT_BYTES, &obj));
932                 if (ret < 0)
933                         goto err;
934                 bytes = *(uint64_t *)obj.data;
935                 if (usi->count && (usi->flags & USF_PRINT_BYTES)) {
936                         format_size_value(conf.size_unit_arg, bytes,
937                                 conf.size_unit_arg == size_unit_arg_h,
938                                 formated_value);
939                         printf("%s%s%s",
940                                 (usi->flags & USF_PRINT_FILES)? "" : "\t",
941                                 formated_value,
942                                 usi->flags & USF_PRINT_DIRNAME?  "\t" : "\n"
943                         );
944                 }
945                 if (summary) {
946                         usi->ui->bytes += bytes;
947                         usi->ui->dirs++;
948                 }
949
950         }
951         if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) {
952                 char *dirname;
953                 ret = osl(osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj));
954                 if (ret < 0)
955                         goto err;
956                 ret = get_dir_name_by_number((uint64_t *)obj.data, &dirname);
957                 if (ret < 0)
958                         goto err;
959                 printf("%s%s\n",
960                         (usi->flags & (USF_PRINT_BYTES | USF_PRINT_FILES))? "" : "\t",
961                         dirname);
962                 free(dirname);
963         }
964         if (usi->count > 0)
965                 usi->count--;
966         return 1;
967 err:
968         usi->ret = ret;
969         usi->osl_errno = (ret == -E_OSL)? osl_errno : 0;
970         return -1;
971 }
972
973 static int check_loop_return(int ret, int loop_ret, int loop_osl_errno)
974 {
975         if (ret >= 0)
976                 return ret;
977         assert(ret == -E_OSL);
978         if (osl_errno != E_OSL_LOOP)
979                 /* error not caused by loop function returning negative. */
980                 return ret;
981         assert(loop_ret < 0);
982         if (loop_ret == -E_LOOP_COMPLETE) /* no error */
983                 return 1;
984         if (loop_ret == -E_OSL) { /* osl error in loop function */
985                 assert(loop_osl_errno);
986                 osl_errno = loop_osl_errno;
987         }
988         return loop_ret;
989 }
990
991 static int adu_loop_reverse(struct osl_table *t, unsigned col_num, void *private_data,
992                 osl_rbtree_loop_func *func, int *loop_ret, int *loop_osl_errno)
993 {
994         int ret = osl(osl_rbtree_loop_reverse(t, col_num, private_data, func));
995         return check_loop_return(ret, *loop_ret, *loop_osl_errno);
996 }
997
998 static int print_user_stats(void)
999 {
1000         struct user_info *ui;
1001         int ret;
1002
1003         FOR_EACH_USER(ui) {
1004                 struct user_stats_info usi = {
1005                         .count = conf.limit_arg,
1006                         .ui = ui
1007                 };
1008                 if (!ui_used(ui) || !ui_admissible(ui))
1009                         continue;
1010                 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY;
1011                 printf("uid %u, by size%s:\n",
1012                         (unsigned) ui->uid, size_unit_buf);
1013                 ret = adu_loop_reverse(ui->table, UT_BYTES, &usi, user_stats_loop_function,
1014                         &usi.ret, &usi.osl_errno);
1015                 if (ret < 0)
1016                         return ret;
1017                 printf("\nuid %u, by file count%s:\n",
1018                         (unsigned) ui->uid, count_unit_buf);
1019                 usi.count = conf.limit_arg,
1020                 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES;
1021                 ret = adu_loop_reverse(ui->table, UT_FILES, &usi, user_stats_loop_function,
1022                         &usi.ret, &usi.osl_errno);
1023                 if (ret < 0)
1024                         return ret;
1025                 printf("\n");
1026         }
1027         return 1;
1028 }
1029
1030 static void print_global_summary(void)
1031 {
1032         char d[FORMATED_VALUE_SIZE], f[FORMATED_VALUE_SIZE],
1033                 s[FORMATED_VALUE_SIZE];
1034         enum enum_count_unit ud, uf;
1035         enum enum_size_unit us;
1036
1037         ud = format_count_value(conf.count_unit_arg, num_dirs, 0, d);
1038         uf = format_count_value(conf.count_unit_arg, num_files, 0, f);
1039         us = format_size_value(conf.size_unit_arg, num_bytes, 0, s);
1040
1041         printf("Global summary "
1042                 "(dirs(%c)/files(%c)/size(%c))\n"
1043                 "\t%s\t%s\t%s\n\n",
1044                 count_unit_abbrevs[ud],
1045                 count_unit_abbrevs[uf],
1046                 size_unit_abbrevs[us],
1047                 d, f, s
1048         );
1049
1050 }
1051
1052 static int print_statistics(void)
1053 {
1054         int ret;
1055         struct global_stats_info gsi = {
1056                 .count = conf.limit_arg,
1057                 .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY
1058         };
1059
1060         printf("By size%s:\n",
1061                 size_unit_buf);
1062         ret = adu_loop_reverse(dir_table, DT_BYTES, &gsi,
1063                 global_stats_loop_function, &gsi.ret, &gsi.osl_errno);
1064         if (ret < 0)
1065                 return ret;
1066         printf("\n");
1067
1068         gsi.count = conf.limit_arg;
1069         gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES;
1070         printf("By file count%s:\n",
1071                 count_unit_buf);
1072         ret = adu_loop_reverse(dir_table, DT_FILES, &gsi,
1073                 global_stats_loop_function, &gsi.ret, &gsi.osl_errno);
1074         if (ret < 0)
1075                 return ret;
1076         printf("\n");
1077         print_global_summary();
1078         print_user_stats();
1079         print_id_stats();
1080         return 1;
1081 }
1082
1083 static char *get_uid_list_name(void)
1084 {
1085         return make_message("%s/uid_list", conf.database_dir_arg);
1086 }
1087
1088 static int write_uid_list(void)
1089 {
1090         char *buf, *filename;
1091         uint32_t count = 0;
1092         struct user_info *ui;
1093         size_t size = num_uids * sizeof(uint32_t);
1094         int ret;
1095
1096         if (!num_uids)
1097                 return 0;
1098         buf = adu_malloc(size);
1099         FOR_EACH_USER(ui) {
1100                 if (!ui_used(ui) || !ui_admissible(ui))
1101                         continue;
1102                 DEBUG_LOG("saving uid %u\n", (unsigned) ui->uid);
1103                 write_u32(buf + count++ * sizeof(uint32_t), ui->uid);
1104         }
1105         filename = get_uid_list_name();
1106         ret = adu_write_file(filename, buf, size);
1107         free(filename);
1108         free(buf);
1109         return ret;
1110 }
1111
1112 static int open_dir_table(void)
1113 {
1114         if (!dir_table_desc.dir) /* we did not create the table */
1115                 dir_table_desc.dir = adu_strdup(conf.database_dir_arg);
1116         return osl(osl_open_table(&dir_table_desc, &dir_table));
1117 }
1118 static int com_create()
1119 {
1120         uint64_t zero = 0ULL;
1121         int ret;
1122         struct stat statbuf;
1123
1124         if (lstat(conf.base_dir_arg, &statbuf) == -1)
1125                 return -ERRNO_TO_ERROR(errno);
1126         if (!S_ISDIR(statbuf.st_mode))
1127                 return -ERRNO_TO_ERROR(ENOTDIR);
1128         device_id = statbuf.st_dev;
1129         ret = create_tables();
1130         if (ret < 0)
1131                 return ret;
1132         check_signals();
1133         ret = open_dir_table();
1134         if (ret < 0)
1135                 return ret;
1136         check_signals();
1137         ret = scan_dir(conf.base_dir_arg, &zero);
1138         if (ret < 0)
1139                 goto out;
1140         ret = write_uid_list();
1141 out:
1142         close_all_tables();
1143         return ret;
1144 }
1145
1146 static int read_uid_file(void)
1147 {
1148         size_t size;
1149         uint32_t n;
1150         char *filename = get_uid_list_name(), *map;
1151         int ret = mmap_full_file(filename, O_RDONLY, (void **)&map, &size, NULL);
1152
1153         if (ret < 0) {
1154                 INFO_LOG("failed to map %s\n", filename);
1155                 free(filename);
1156                 return ret;
1157         }
1158         num_uids = size / 4;
1159         INFO_LOG("found %u uids in %s\n", (unsigned)num_uids, filename);
1160         free(filename);
1161         /* hash table size should be a power of two and larger than the number of uids */
1162         uid_hash_table_size = 4;
1163         while (uid_hash_table_size < num_uids)
1164                 uid_hash_table_size *= 2;
1165         create_hash_table();
1166         for (n = 0; n < num_uids; n++) {
1167                 uint32_t uid = read_u32(map + n * sizeof(uid));
1168                 ret = search_uid(uid, OPEN_USER_TABLE, NULL);
1169                 if (ret < 0)
1170                         goto out;
1171         }
1172 out:
1173         adu_munmap(map, size);
1174         return ret;
1175 }
1176
1177 static int com_select(void)
1178 {
1179         int ret;
1180
1181         if (conf.count_unit_arg != count_unit_arg_h)
1182                 count_unit_buf[1] = count_unit_abbrevs[conf.count_unit_arg];
1183         else
1184                 count_unit_buf[0] = '\0';
1185         if (conf.size_unit_arg != size_unit_arg_h)
1186                 size_unit_buf[1] = size_unit_abbrevs[conf.size_unit_arg];
1187         else
1188                 size_unit_buf[0] = '\0';
1189
1190         ret = open_dir_table();
1191         if (ret < 0)
1192                 return ret;
1193         check_signals();
1194         ret = read_uid_file();
1195         if (ret < 0)
1196                 return ret;
1197         check_signals();
1198         ret = print_statistics();
1199         close_all_tables();
1200         return ret;
1201 }
1202
1203 static int check_args(void)
1204 {
1205         int i, ret;
1206
1207         /* remove trailing slashes from base-dir arg */
1208         if (conf.base_dir_given) {
1209                 size_t len = strlen(conf.base_dir_arg);
1210                 for (;;) {
1211                         if (!len) /* empty string */
1212                                 return -ERRNO_TO_ERROR(EINVAL);
1213                         if (!--len) /* length 1 is always OK */
1214                                 break;
1215                         if (conf.base_dir_arg[len] != '/')
1216                                 break; /* no trailing slash, also OK */
1217                         conf.base_dir_arg[len] = '\0';
1218                 }
1219         }
1220         if (!conf.uid_given)
1221                 return 0;
1222         admissible_uids = adu_malloc(conf.uid_given * sizeof(*admissible_uids));
1223         for (i = 0; i < conf.uid_given; i++) {
1224                 ret = parse_uid_range(conf.uid_arg[i], admissible_uids + i);
1225                 if (ret < 0)
1226                         goto err;
1227         }
1228         return 1;
1229 err:
1230         free(admissible_uids);
1231         admissible_uids = NULL;
1232         return ret;
1233 }
1234
1235 int main(int argc, char **argv)
1236 {
1237         int ret;
1238         struct cmdline_parser_params params = {
1239                 .override = 0,
1240                 .initialize = 1,
1241                 .check_required = 0,
1242                 .check_ambiguity = 0,
1243                 .print_errors = 1
1244         };
1245
1246         cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1247         ret = init_signals();
1248         if (ret < 0)
1249                 goto out;
1250         ret = check_args();
1251         if (ret < 0)
1252                 goto out;
1253         ret = -E_SYNTAX;
1254         if (conf.select_given)
1255                 ret = com_select();
1256         else
1257                 ret = com_create();
1258         if (ret < 0)
1259                 goto out;
1260 out:
1261         free(admissible_uids);
1262         if (ret < 0) {
1263                 ERROR_LOG("%s\n", adu_strerror(-ret));
1264                 return -EXIT_FAILURE;
1265         }
1266         return EXIT_SUCCESS;
1267 }