9b800eaf1cb2e75b6b990ba7976049d7d0e82c15
[adu.git] / adu.c
1 #include "adu.h"
2 #include <dirent.h> /* readdir() */
3
4 #include "gcc-compat.h"
5 #include "cmdline.h"
6 #include "fd.h"
7 #include "string.h"
8 #include "error.h"
9 #include "portable_io.h"
10
11 DEFINE_ERRLIST;
12 int osl_errno;
13
14 /** Command line and config file options. */
15 static struct gengetopt_args_info conf;
16
17 enum uid_info_flags {
18         /** whether this slot of the hash table is used. */
19         UI_FL_SLOT_USED = 1,
20         /** whether this uid should be taken into account. */
21         UI_FL_ADMISSIBLE = 2,
22 };
23
24 struct user_info {
25         uint32_t uid;
26         uint32_t flags;
27         struct osl_table *table;
28         uint64_t files;
29         uint64_t bytes;
30         uint64_t dirs;
31         struct osl_table_description *desc;
32 };
33
34 /** The decimal representation of an uint64_t never exceeds that size. */
35 #define FORMATED_VALUE_SIZE 25
36
37
38 /**
39  * Contains info for each user that owns at least one regular file.
40  *
41  * Even users that are not taken into account because of the --uid
42  * option occupy a slot in this hash table. This allows to find out
43  * quicky whether a uid is admissible. And yes, this has to be fast.
44  */
45 static struct user_info *uid_hash_table;
46
47 /* these get filled in by the select command. */
48 static char count_unit_buf[4] = "( )", size_unit_buf[4] = "( )";
49
50 static inline int ui_used(struct user_info *ui)
51 {
52         return ui->flags & UI_FL_SLOT_USED;
53 }
54
55 static inline int ui_admissible(struct user_info *ui)
56 {
57         return ui->flags & UI_FL_ADMISSIBLE;
58 }
59
60 struct uid_range {
61         uint32_t low;
62         uint32_t high;
63 };
64
65 static struct uid_range *admissible_uids;
66
67 static inline int check_uid_arg(const char *arg, uint32_t *uid)
68 {
69         const uint32_t max = ~0U;
70         /*
71          * we need an 64-bit int for string -> uid conversion because strtoll()
72          * returns a signed value.
73          */
74         int64_t val;
75         int ret = para_atoi64(arg, &val);
76
77         if (ret < 0)
78                 return ret;
79         if (val < 0 || val > max)
80                 return -ERRNO_TO_ERROR(EINVAL);
81         *uid = val;
82         return 1;
83 }
84
85 static int parse_uid_range(const char *orig_arg, struct uid_range *ur)
86 {
87         int ret;
88         char *arg = para_strdup(orig_arg), *p = strchr(arg, '-');
89
90         if (!p || p == arg) { /* -42 or 42 */
91                 ret = check_uid_arg(p? p + 1 : arg, &ur->high);
92                 if (ret < 0)
93                         goto out;
94                 ur->low = p? 0 : ur->high;
95                 ret = 1;
96                 goto out;
97         }
98         /* 42- or 42-4711 */
99         *p = '\0';
100         p++;
101         ret = check_uid_arg(arg, &ur->low);
102         if (ret < 0)
103                 goto out;
104         ur->high = ~0U;
105         if (*p) { /* 42-4711 */
106                 ret = check_uid_arg(p, &ur->high);
107                 if (ret < 0)
108                         goto out;
109         }
110         if (ur->low > ur->high)
111                 ret = -ERRNO_TO_ERROR(EINVAL);
112 out:
113         if (ret < 0)
114                 ERROR_LOG("bad uid option: %s\n", orig_arg);
115         else
116                 INFO_LOG("admissible uid range: %u - %u\n", ur->low,
117                         ur->high);
118         free(arg);
119         return ret;
120 }
121
122
123 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
124 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
125
126 /**
127  * The log function.
128  *
129  * \param ll Loglevel.
130  * \param fml Usual format string.
131  *
132  * All XXX_LOG() macros use this function.
133  */
134 __printf_2_3 void __log(int ll, const char* fmt,...)
135 {
136         va_list argp;
137         FILE *outfd;
138         struct tm *tm;
139         time_t t1;
140         char str[255] = "";
141
142         if (ll < conf.loglevel_arg)
143                 return;
144         outfd = stderr;
145         time(&t1);
146         tm = localtime(&t1);
147         strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
148         fprintf(outfd, "%s ", str);
149         va_start(argp, fmt);
150         vfprintf(outfd, fmt, argp);
151         va_end(argp);
152 }
153
154 /**
155  * Compare the size of two directories
156  *
157  * \param obj1 Pointer to the first object.
158  * \param obj2 Pointer to the second object.
159  *
160  * This function first compares the size values as usual integers. If they compare as
161  * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
162  * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
163  */
164 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
165 {
166         uint64_t d1 = *(uint64_t *)obj1->data;
167         uint64_t d2 = *(uint64_t *)obj2->data;
168         int ret = NUM_COMPARE(d2, d1);
169
170         if (ret)
171                 return ret;
172         //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
173         return NUM_COMPARE(obj2->data, obj1->data);
174 }
175
176 /**
177  * Compare two osl objects pointing to unsigned integers of 64 bit size.
178  *
179  * \param obj1 Pointer to the first integer.
180  * \param obj2 Pointer to the second integer.
181  *
182  * \return The values required for an osl compare function.
183  *
184  * \sa osl_compare_func, osl_hash_compare().
185  */
186 static int uint64_compare(const struct osl_object *obj1,
187                 const struct osl_object *obj2)
188 {
189         uint64_t d1 = read_u64((const char *)obj1->data);
190         uint64_t d2 = read_u64((const char *)obj2->data);
191
192         if (d1 < d2)
193                 return 1;
194         if (d1 > d2)
195                 return -1;
196         return 0;
197 }
198
199 /** The columns of the directory table. */
200 enum dir_table_columns {
201         /** The name of the directory. */
202         DT_NAME,
203         /** The dir count number. */
204         DT_NUM,
205         /** The number of the parent directory. */
206         DT_PARENT_NUM,
207         /** The number of bytes of all regular files. */
208         DT_BYTES,
209         /** The number of all regular files. */
210         DT_FILES,
211         /** Number of columns in this table. */
212         NUM_DT_COLUMNS
213 };
214
215 static struct osl_column_description dir_table_cols[] = {
216         [DT_NAME] = {
217                 .storage_type = OSL_MAPPED_STORAGE,
218                 .storage_flags = 0,
219                 .name = "dir",
220         },
221         [DT_NUM] = {
222                 .storage_type = OSL_MAPPED_STORAGE,
223                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
224                 .name = "num",
225                 .compare_function = uint64_compare,
226                 .data_size = sizeof(uint64_t)
227         },
228         [DT_PARENT_NUM] = {
229                 .storage_type = OSL_MAPPED_STORAGE,
230                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
231                 .name = "parent_num",
232                 .compare_function = size_compare,
233                 .data_size = sizeof(uint64_t)
234         },
235         [DT_BYTES] = {
236                 .storage_type = OSL_MAPPED_STORAGE,
237                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
238                 .compare_function = size_compare,
239                 .name = "num_bytes",
240                 .data_size = sizeof(uint64_t)
241         },
242         [DT_FILES] = {
243                 .storage_type = OSL_MAPPED_STORAGE,
244                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
245                 .compare_function = size_compare,
246                 .name = "num_files",
247                 .data_size = sizeof(uint64_t)
248         }
249 };
250
251 static struct osl_table_description dir_table_desc = {
252         .name = "dir_table",
253         .num_columns = NUM_DT_COLUMNS,
254         .flags = 0,
255         .column_descriptions = dir_table_cols,
256 };
257
258 /** The columns of the id table. */
259 enum user_table_columns {
260         /** The numer of the directory. */
261         UT_DIR_NUM,
262         /** The number of bytes of all regular files in this dir owned by this id. */
263         UT_BYTES,
264         /** The number of files in this dir owned by this id. */
265         UT_FILES,
266         /** Number of columns in this table. */
267         NUM_UT_COLUMNS
268 };
269
270 static struct osl_column_description user_table_cols[] = {
271         [UT_DIR_NUM] = {
272                 .storage_type = OSL_MAPPED_STORAGE,
273                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
274                 .name = "dir_num",
275                 .compare_function = uint64_compare,
276                 .data_size = sizeof(uint64_t)
277         },
278         [UT_BYTES] = {
279                 .storage_type = OSL_MAPPED_STORAGE,
280                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
281                 .compare_function = size_compare,
282                 .name = "num_bytes",
283                 .data_size = sizeof(uint64_t)
284         },
285         [UT_FILES] = {
286                 .storage_type = OSL_MAPPED_STORAGE,
287                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
288                 .compare_function = size_compare,
289                 .name = "num_files",
290                 .data_size = sizeof(uint64_t)
291         },
292 };
293
294 static struct osl_table *dir_table;
295
296 static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
297                 uint64_t *dir_size, uint64_t *dir_files)
298 {
299         struct osl_object dir_objects[NUM_DT_COLUMNS];
300
301         INFO_LOG("adding #%llu: %s\n", (long long unsigned)*dir_num, dirname);
302         dir_objects[DT_NAME].data = dirname;
303         dir_objects[DT_NAME].size = strlen(dirname) + 1;
304         dir_objects[DT_NUM].data = dir_num;
305         dir_objects[DT_NUM].size = sizeof(*dir_num);
306         dir_objects[DT_PARENT_NUM].data = parent_dir_num;
307         dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num);
308         dir_objects[DT_BYTES].data = dir_size;
309         dir_objects[DT_BYTES].size = sizeof(*dir_size);
310         dir_objects[DT_FILES].data = dir_files;
311         dir_objects[DT_FILES].size = sizeof(*dir_files);
312         return osl(osl_add_row(dir_table, dir_objects));
313 }
314
315 static uint32_t num_uids;
316
317 static int open_user_table(struct user_info *ui, int create)
318 {
319         int ret;
320
321         ui->desc = para_malloc(sizeof(*ui->desc));
322         ui->desc->num_columns = NUM_UT_COLUMNS;
323         ui->desc->flags = 0;
324         ui->desc->column_descriptions = user_table_cols;
325         ui->desc->dir = para_strdup(conf.database_dir_arg);
326         ui->desc->name = make_message("%u", (unsigned)ui->uid);
327         INFO_LOG(".............................uid #%u: %u\n",
328                 (unsigned)num_uids, (unsigned)ui->uid);
329         if (create) {
330                 ret = osl(osl_create_table(ui->desc));
331                 if (ret < 0)
332                         goto err;
333                 num_uids++;
334         }
335         ret = osl(osl_open_table(ui->desc, &ui->table));
336         if (ret < 0)
337                 goto err;
338         return 1;
339 err:
340         free((char *)ui->desc->name);
341         free((char *)ui->desc->dir);
342         free(ui->desc);
343         ui->desc->name = NULL;
344         ui->desc->dir = NULL;
345         ui->desc = NULL;
346         ui->table = NULL;
347         ui->flags = 0;
348         return ret;
349 }
350
351 #define uid_hash_bits 8
352 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
353 #define PRIME1 0x811c9dc5
354 #define PRIME2 0x01000193
355
356 static void create_hash_table(void)
357 {
358         uid_hash_table = para_calloc(uid_hash_table_size
359                 * sizeof(struct user_info));
360 }
361
362 static void free_hash_table(void)
363 {
364         free(uid_hash_table);
365         uid_hash_table = NULL;
366 }
367
368 static int create_tables(void)
369 {
370         int ret;
371
372         dir_table_desc.dir = para_strdup(conf.database_dir_arg);
373         ret = osl(osl_create_table(&dir_table_desc));
374         if (ret < 0)
375                 return ret;
376         create_hash_table();
377         return 1;
378 }
379
380 /*
381  * We use a hash table of size s=2^uid_hash_bits to map the uids into the
382  * interval [0..s]. Hash collisions are treated by open addressing, i.e.
383  * unused slots in the table are used to store different uids that hash to the
384  * same slot.
385  *
386  * If a hash collision occurs, different slots are successively probed in order
387  * to find an unused slot for the new uid. Probing is implemented via a second
388  * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an
389  * odd number.
390  *
391  * An odd number is sufficient to make sure each entry of the hash table gets
392  * probed for probe_num between 0 and s-1 because s is a power of two, hence
393  * the second hash value has never a common divisor with the hash table size.
394  * IOW: h is invertible in the ring [0..s].
395  */
396 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
397 {
398         return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num)
399                 % uid_hash_table_size;
400 }
401
402 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \
403                 + uid_hash_table_size; ui++)
404
405 enum search_uid_flags {
406         OPEN_USER_TABLE = 1,
407         CREATE_USER_TABLE = 2,
408 };
409
410 static int uid_is_admissible(uint32_t uid)
411 {
412         int i;
413
414         for (i = 0; i < conf.uid_given; i++) {
415                 struct uid_range *ur = admissible_uids + i;
416
417                 if (ur->low <= uid && ur->high >= uid)
418                         break;
419         }
420         i = !conf.uid_given || i < conf.uid_given;
421         DEBUG_LOG("uid %u is %sadmissible\n", (unsigned)uid,
422                 i? "" : "not ");
423         return i;
424 }
425
426 static int search_uid(uint32_t uid, enum search_uid_flags flags,
427                 struct user_info **ui_ptr)
428 {
429         uint32_t p;
430
431         for (p = 0; p < uid_hash_table_size; p++) {
432                 struct user_info *ui = uid_hash_table + double_hash(uid, p);
433
434                 if (!ui_used(ui)) {
435                         int ret;
436                         if (!flags)
437                                 return -E_BAD_UID;
438                         ui->uid = uid;
439                         ui->flags |= UI_FL_SLOT_USED;
440                         if (!uid_is_admissible(uid))
441                                 return 0;
442                         ui->flags |= UI_FL_ADMISSIBLE;
443                         ret = open_user_table(ui, flags & CREATE_USER_TABLE);
444                         if (ret < 0)
445                                 return ret;
446
447                         if (ui_ptr)
448                                 *ui_ptr = ui;
449                         return 1;
450                 }
451                 if (ui->uid != uid)
452                         continue;
453                 if (ui_ptr)
454                         *ui_ptr = ui;
455                 return 0;
456         }
457         return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
458 }
459
460 static int update_user_row(struct osl_table *t, uint64_t dir_num,
461                 uint64_t *add)
462 {
463         struct osl_row *row;
464         struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
465
466         int ret = osl(osl_get_row(t, UT_DIR_NUM, &obj, &row));
467
468         if (ret == -E_OSL && osl_errno != E_OSL_RB_KEY_NOT_FOUND)
469                 return ret;
470         if (ret < 0) { /* this is the first file we add */
471                 struct osl_object objects[NUM_UT_COLUMNS];
472                 uint64_t num_files = 1;
473
474                 objects[UT_DIR_NUM].data = &dir_num;
475                 objects[UT_DIR_NUM].size = sizeof(dir_num);
476                 objects[UT_BYTES].data = add;
477                 objects[UT_BYTES].size = sizeof(*add);
478                 objects[UT_FILES].data = &num_files;
479                 objects[UT_FILES].size = sizeof(num_files);
480                 INFO_LOG("######################### ret: %d\n", ret);
481                 ret = osl(osl_add_row(t, objects));
482                 INFO_LOG("######################### ret: %d\n", ret);
483                 return ret;
484         } else { /* add size and increment file count */
485                 uint64_t num;
486                 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
487
488                 ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
489                 if (ret < 0)
490                         return ret;
491                 num = *(uint64_t *)obj1.data + *add;
492                 ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
493                 if (ret < 0)
494                         return ret;
495                 ret = osl(osl_get_object(t, row, UT_FILES, &obj1));
496                 if (ret < 0)
497                         return ret;
498                 num = *(uint64_t *)obj1.data + 1;
499                 return osl(osl_update_object(t, row, UT_FILES, &obj2));
500         }
501 }
502
503 static uint64_t num_dirs;
504 static uint64_t num_files;
505 static uint64_t num_bytes;
506
507 int scan_dir(char *dirname, uint64_t *parent_dir_num)
508 {
509         DIR *dir;
510         struct dirent *entry;
511         int ret, cwd_fd, ret2;
512         uint64_t dir_size = 0, dir_files = 0;
513         uint64_t this_dir_num = ++num_dirs;
514
515         DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname);
516         ret = para_opendir(dirname, &dir, &cwd_fd);
517         if (ret < 0) {
518                 if (ret != -ERRNO_TO_ERROR(EACCES))
519                         return ret;
520                 WARNING_LOG("permission denied for %s\n", dirname);
521                 return 1;
522         }
523         while ((entry = readdir(dir))) {
524                 mode_t m;
525                 struct stat s;
526                 uint32_t uid;
527                 uint64_t size;
528                 struct user_info *ui;
529
530                 if (!strcmp(entry->d_name, "."))
531                         continue;
532                 if (!strcmp(entry->d_name, ".."))
533                         continue;
534                 if (lstat(entry->d_name, &s) == -1) {
535                         WARNING_LOG("lstat error for %s/%s\n", dirname,
536                                 entry->d_name);
537                         continue;
538                 }
539                 m = s.st_mode;
540                 if (!S_ISREG(m) && !S_ISDIR(m))
541                         continue;
542                 if (S_ISDIR(m)) {
543                         ret = scan_dir(entry->d_name, &this_dir_num);
544                         if (ret < 0)
545                                 goto out;
546                         continue;
547                 }
548                 /* regular file */
549                 size = s.st_size;
550                 dir_size += size;
551                 num_bytes += size;
552                 dir_files++;
553                 num_files++;
554                 uid = s.st_uid;
555                 ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui);
556                 if (ret < 0)
557                         goto out;
558                 ui->bytes += size;
559                 ui->files++;
560                 ret = update_user_row(ui->table, this_dir_num, &size);
561                 if (ret < 0)
562                         goto out;
563         }
564         ret = add_directory(dirname, &this_dir_num, parent_dir_num,
565                         &dir_size, &dir_files);
566 out:
567         closedir(dir);
568         ret2 = para_fchdir(cwd_fd);
569         if (ret2 < 0 && ret >= 0)
570                 ret = ret2;
571         close(cwd_fd);
572         return ret;
573 }
574
575 static int get_dir_name_by_number(uint64_t *dirnum, char **name)
576 {
577         char *result = NULL, *tmp;
578         struct osl_row *row;
579         uint64_t val = *dirnum;
580         struct osl_object obj = {.data = &val, .size = sizeof(val)};
581         int ret;
582
583 again:
584         ret = osl(osl_get_row(dir_table, DT_NUM, &obj, &row));
585         if (ret < 0)
586                 goto out;
587         ret = osl(osl_get_object(dir_table, row, DT_NAME, &obj));
588         if (ret < 0)
589                 goto out;
590         if (result) {
591                 tmp = make_message("%s/%s", (char *)obj.data, result);
592                 free(result);
593                 result = tmp;
594         } else
595                 result = para_strdup((char *)obj.data);
596         ret = osl(osl_get_object(dir_table, row, DT_PARENT_NUM, &obj));
597         if (ret < 0)
598                 goto out;
599         val = *(uint64_t *)obj.data;
600         if (val)
601                 goto again;
602 out:
603         if (ret < 0) {
604                 free(result);
605                 *name = NULL;
606         } else
607                 *name = result;
608         return ret;
609 }
610
611 static int get_dir_name_of_row(struct osl_row *dir_table_row, char **name)
612 {
613         struct osl_object obj;
614         int ret;
615         char *this_dir, *prefix = NULL;
616
617         *name = NULL;
618         ret = osl(osl_get_object(dir_table, dir_table_row, DT_NAME, &obj));
619         if (ret < 0)
620                 return ret;
621         this_dir = para_strdup((char *)obj.data);
622         ret = osl(osl_get_object(dir_table, dir_table_row, DT_PARENT_NUM, &obj));
623         if (ret < 0)
624                 goto out;
625         if (!*(uint64_t *)obj.data) {
626                 *name = this_dir;
627                 return 1;
628         }
629         ret = get_dir_name_by_number((uint64_t *)obj.data, &prefix);
630         if (ret < 0)
631                 goto out;
632         *name = make_message("%s/%s", prefix, this_dir);
633         free(prefix);
634         ret = 1;
635 out:
636         free(this_dir);
637         return ret;
638 }
639
640 const uint64_t size_unit_divisors[] = {
641         [size_unit_arg_b] = 1ULL,
642         [size_unit_arg_k] = 1024ULL,
643         [size_unit_arg_m] = 1024ULL * 1024ULL,
644         [size_unit_arg_g] = 1024ULL * 1024ULL * 1024ULL,
645         [size_unit_arg_t] = 1024ULL * 1024ULL * 1024ULL * 1024ULL,
646 };
647
648 const uint64_t count_unit_divisors[] = {
649
650         [count_unit_arg_n] = 1ULL,
651         [count_unit_arg_k] = 1000ULL,
652         [count_unit_arg_m] = 1000ULL * 1000ULL,
653         [count_unit_arg_g] = 1000ULL * 1000ULL * 1000ULL,
654         [count_unit_arg_t] = 1000ULL * 1000ULL * 1000ULL * 1000ULL,
655 };
656
657 const char size_unit_abbrevs[] = " BKMGT";
658 const char count_unit_abbrevs[] = "  kmgt";
659
660 static enum enum_size_unit format_size_value(enum enum_size_unit unit,
661                 uint64_t value, int print_unit, char *result)
662 {
663         enum enum_size_unit u = unit;
664         char unit_buf[2] = "\0\0";
665
666         if (unit == size_unit_arg_h) /* human readable */
667                 for (u = size_unit_arg_b; u < size_unit_arg_t &&
668                                 value > size_unit_divisors[u + 1]; u++)
669                         ; /* nothing */
670         if (print_unit)
671                 unit_buf[0] = size_unit_abbrevs[u];
672         sprintf(result, "%llu%s",
673                 (long long unsigned)value / size_unit_divisors[u], unit_buf);
674         return u;
675 }
676
677 static enum enum_count_unit format_count_value(enum enum_count_unit unit,
678                 uint64_t value, int print_unit, char *result)
679 {
680         enum enum_count_unit u = unit;
681         char unit_buf[2] = "\0\0";
682
683         if (unit == count_unit_arg_h) /* human readable */
684                 for (u = count_unit_arg_n; u < count_unit_arg_t &&
685                                 value > count_unit_divisors[u + 1]; u++)
686                         ; /* nothing */
687         if (print_unit)
688                 unit_buf[0] = count_unit_abbrevs[u];
689         sprintf(result, "%llu%s",
690                 (long long unsigned)value / count_unit_divisors[u], unit_buf);
691         return u;
692 }
693
694 enum global_stats_flags {
695         GSF_PRINT_DIRNAME = 1,
696         GSF_PRINT_BYTES = 2,
697         GSF_PRINT_FILES = 4,
698         GSF_COMPUTE_SUMMARY = 8,
699 };
700
701 struct global_stats_info {
702         uint32_t count;
703         int ret;
704         int osl_errno;
705         enum global_stats_flags flags;
706 };
707
708 static int global_stats_loop_function(struct osl_row *row, void *data)
709 {
710         struct global_stats_info *gsi = data;
711         struct osl_object obj;
712         char *dirname, formated_value[FORMATED_VALUE_SIZE];
713         int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY;
714
715         if (!gsi->count && !summary) {
716                 ret = -E_LOOP_COMPLETE;
717                 goto err;
718         }
719         if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) {
720                 uint64_t files;
721                 ret = osl(osl_get_object(dir_table, row, DT_FILES, &obj));
722                 if (ret < 0)
723                         goto err;
724                 files = *(uint64_t *)obj.data;
725                 if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) {
726                         format_count_value(conf.count_unit_arg, files,
727                                 conf.count_unit_arg == count_unit_arg_h,
728                                 formated_value);
729                         printf("\t%s%s", formated_value,
730                                 (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_DIRNAME))?
731                                 "\t" : "\n");
732                 }
733                 if (summary)
734                         num_files += files;
735         }
736         if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) {
737                 uint64_t bytes;
738                 ret = osl(osl_get_object(dir_table, row, DT_BYTES, &obj));
739                 if (ret < 0)
740                         goto err;
741                 bytes = *(uint64_t *)obj.data;
742                 if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) {
743                         format_size_value(conf.size_unit_arg, bytes,
744                                 conf.size_unit_arg == size_unit_arg_h,
745                                 formated_value);
746                         printf("%s%s%s",
747                                 (gsi->flags & GSF_PRINT_FILES)? "" : "\t",
748                                 formated_value,
749                                 (gsi->flags & GSF_PRINT_DIRNAME)? "\t" : "\n"
750                         );
751                 }
752                 if (summary) {
753                         num_bytes += bytes;
754                         num_dirs++;
755                 }
756         }
757         if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) {
758                 ret = get_dir_name_of_row(row, &dirname);
759                 if (ret < 0)
760                         goto err;
761                 printf("%s%s\n",
762                         (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_FILES))? "" : "\t",
763                         dirname);
764         }
765         if (gsi->count > 0)
766                 gsi->count--;
767         return 1;
768 err:
769         gsi->ret = ret;
770         gsi->osl_errno = (ret == -E_OSL)? osl_errno : 0;
771         return -1;
772 }
773
774 static void print_id_stats(void)
775 {
776         struct user_info *ui;
777
778         printf("User summary "
779                 "(uid/dirs%s/files%s/size%s):\n",
780                 count_unit_buf, count_unit_buf, size_unit_buf);
781         FOR_EACH_USER(ui) {
782                 char formated_dir_count[FORMATED_VALUE_SIZE],
783                         formated_file_count[FORMATED_VALUE_SIZE],
784                         formated_bytes[FORMATED_VALUE_SIZE ];
785                 if (!ui_used(ui) || !ui_admissible(ui))
786                         continue;
787                 format_count_value(conf.count_unit_arg, ui->dirs,
788                         conf.count_unit_arg == count_unit_arg_h,
789                         formated_dir_count);
790                 format_count_value(conf.count_unit_arg, ui->files,
791                         conf.count_unit_arg == count_unit_arg_h,
792                         formated_file_count);
793                 format_size_value(conf.size_unit_arg, ui->bytes,
794                         conf.size_unit_arg == size_unit_arg_h,
795                         formated_bytes);
796                 printf("\t%u\t%s\t%s\t%s\n", (unsigned)ui->uid,
797                         formated_dir_count,
798                         formated_file_count,
799                         formated_bytes
800                 );
801         }
802 }
803
804 enum user_stats_flags {
805         USF_PRINT_DIRNAME = 1,
806         USF_PRINT_BYTES = 2,
807         USF_PRINT_FILES = 4,
808         USF_COMPUTE_SUMMARY = 8,
809 };
810
811 struct user_stats_info {
812         uint32_t count;
813         enum user_stats_flags flags;
814         int ret;
815         int osl_errno;
816         struct user_info *ui;
817 };
818
819 static int user_stats_loop_function(struct osl_row *row, void *data)
820 {
821         struct user_stats_info *usi = data;
822         struct osl_object obj;
823         int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY;
824         char formated_value[FORMATED_VALUE_SIZE];
825
826         if (!usi->count && !summary) {
827                 ret = -E_LOOP_COMPLETE;
828                 goto err;
829         }
830         if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) {
831                 uint64_t files;
832                 ret = osl(osl_get_object(usi->ui->table, row, UT_FILES, &obj));
833                 if (ret < 0)
834                         goto err;
835                 files = *(uint64_t *)obj.data;
836                 if (usi->count && (usi->flags & USF_PRINT_FILES)) {
837                         format_count_value(conf.count_unit_arg, files,
838                                 conf.count_unit_arg == count_unit_arg_h,
839                                 formated_value);
840                         printf("\t%s%s", formated_value,
841                                 (usi->flags & (USF_PRINT_BYTES | USF_PRINT_DIRNAME))?
842                                         "\t" : "\n"
843                         );
844                 }
845                 if (summary)
846                         usi->ui->files += files;
847         }
848         if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) {
849                 uint64_t bytes;
850                 ret = osl(osl_get_object(usi->ui->table, row, UT_BYTES, &obj));
851                 if (ret < 0)
852                         goto err;
853                 bytes = *(uint64_t *)obj.data;
854                 if (usi->count && (usi->flags & USF_PRINT_BYTES)) {
855                         format_size_value(conf.size_unit_arg, bytes,
856                                 conf.size_unit_arg == size_unit_arg_h,
857                                 formated_value);
858                         printf("%s%s%s",
859                                 (usi->flags & USF_PRINT_FILES)? "" : "\t",
860                                 formated_value,
861                                 usi->flags & USF_PRINT_DIRNAME?  "\t" : "\n"
862                         );
863                 }
864                 if (summary) {
865                         usi->ui->bytes += bytes;
866                         usi->ui->dirs++;
867                 }
868
869         }
870         if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) {
871                 char *dirname;
872                 ret = osl(osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj));
873                 if (ret < 0)
874                         goto err;
875                 ret = get_dir_name_by_number((uint64_t *)obj.data, &dirname);
876                 if (ret < 0)
877                         goto err;
878                 printf("%s%s\n",
879                         (usi->flags & (USF_PRINT_BYTES | USF_PRINT_FILES))? "" : "\t",
880                         dirname);
881         }
882         if (usi->count > 0)
883                 usi->count--;
884         return 1;
885 err:
886         usi->ret = ret;
887         usi->osl_errno = (ret == -E_OSL)? osl_errno : 0;
888         return -1;
889 }
890
891 static int check_loop_return(int ret, int loop_ret, int loop_osl_errno)
892 {
893         if (ret >= 0)
894                 return ret;
895         assert(ret == -E_OSL);
896         if (osl_errno != E_OSL_LOOP)
897                 /* error not caused by loop function returning negative. */
898                 return ret;
899         assert(loop_ret < 0);
900         if (loop_ret == -E_LOOP_COMPLETE) /* no error */
901                 return 1;
902         if (loop_ret == -E_OSL) { /* osl error in loop function */
903                 assert(loop_osl_errno);
904                 osl_errno = loop_osl_errno;
905         }
906         return loop_ret;
907 }
908
909 static int adu_loop_reverse(struct osl_table *t, unsigned col_num, void *private_data,
910                 osl_rbtree_loop_func *func, int *loop_ret, int *loop_osl_errno)
911 {
912         int ret = osl(osl_rbtree_loop_reverse(t, col_num, private_data, func));
913         return check_loop_return(ret, *loop_ret, *loop_osl_errno);
914 }
915
916 static int print_user_stats(void)
917 {
918         struct user_info *ui;
919         int ret;
920
921         FOR_EACH_USER(ui) {
922                 struct user_stats_info usi = {
923                         .count = conf.limit_arg,
924                         .ui = ui
925                 };
926                 if (!ui_used(ui) || !ui_admissible(ui))
927                         continue;
928                 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY;
929                 printf("uid %u, by size%s:\n",
930                         (unsigned) ui->uid, size_unit_buf);
931                 ret = adu_loop_reverse(ui->table, UT_BYTES, &usi, user_stats_loop_function,
932                         &usi.ret, &usi.osl_errno);
933                 if (ret < 0)
934                         return ret;
935                 printf("\nuid %u, by file count%s:\n",
936                         (unsigned) ui->uid, count_unit_buf);
937                 usi.count = conf.limit_arg,
938                 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES;
939                 ret = adu_loop_reverse(ui->table, UT_FILES, &usi, user_stats_loop_function,
940                         &usi.ret, &usi.osl_errno);
941                 if (ret < 0)
942                         return ret;
943                 printf("\n");
944         }
945         return 1;
946 }
947
948 static void print_global_summary(void)
949 {
950         char d[FORMATED_VALUE_SIZE], f[FORMATED_VALUE_SIZE],
951                 s[FORMATED_VALUE_SIZE];
952         enum enum_count_unit ud, uf;
953         enum enum_size_unit us;
954
955         ud = format_count_value(conf.count_unit_arg, num_dirs, 0, d);
956         uf = format_count_value(conf.count_unit_arg, num_files, 0, f);
957         us = format_size_value(conf.size_unit_arg, num_bytes, 0, s);
958
959         printf("Global summary "
960                 "(dirs(%c)/files(%c)/size(%c))\n"
961                 "\t%s\t%s\t%s\n\n",
962                 count_unit_abbrevs[ud],
963                 count_unit_abbrevs[uf],
964                 size_unit_abbrevs[us],
965                 d, f, s
966         );
967
968 }
969
970 static int print_statistics(void)
971 {
972         int ret;
973         struct global_stats_info gsi = {
974                 .count = conf.limit_arg,
975                 .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY
976         };
977
978         printf("By size%s:\n",
979                 size_unit_buf);
980         ret = adu_loop_reverse(dir_table, DT_BYTES, &gsi,
981                 global_stats_loop_function, &gsi.ret, &gsi.osl_errno);
982         if (ret < 0)
983                 return ret;
984         printf("\n");
985
986         gsi.count = conf.limit_arg;
987         gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES;
988         printf("By file count%s:\n",
989                 count_unit_buf);
990         ret = adu_loop_reverse(dir_table, DT_FILES, &gsi,
991                 global_stats_loop_function, &gsi.ret, &gsi.osl_errno);
992         if (ret < 0)
993                 return ret;
994         printf("\n");
995         print_global_summary();
996         print_user_stats();
997         print_id_stats();
998         return 1;
999 }
1000
1001 static char *get_uid_list_name(void)
1002 {
1003         return make_message("%s/uid_list", conf.database_dir_arg);
1004 }
1005
1006 static int write_uid_list(void)
1007 {
1008         char *buf, *filename;
1009         uint32_t count = 0;
1010         struct user_info *ui;
1011         size_t size = num_uids * sizeof(uint32_t);
1012         int ret;
1013
1014         if (!num_uids)
1015                 return 0;
1016         buf = para_malloc(size);
1017         FOR_EACH_USER(ui) {
1018                 if (!ui_used(ui) || !ui_admissible(ui))
1019                         continue;
1020                 DEBUG_LOG("saving uid %u\n", (unsigned) ui->uid);
1021                 write_u32(buf + count++ * sizeof(uint32_t), ui->uid);
1022         }
1023         filename = get_uid_list_name();
1024         ret = para_write_file(filename, buf, size);
1025         free(filename);
1026         free(buf);
1027         return ret;
1028 }
1029
1030 static int open_dir_table(void)
1031 {
1032         if (!dir_table_desc.dir) /* we did not create the table */
1033                 dir_table_desc.dir = para_strdup(conf.database_dir_arg);
1034         return osl(osl_open_table(&dir_table_desc, &dir_table));
1035 }
1036
1037 static void close_dir_table(void)
1038 {
1039         int ret;
1040
1041         if (!dir_table)
1042                 return;
1043         ret = osl(osl_close_table(dir_table, OSL_MARK_CLEAN));
1044         if (ret < 0)
1045                 ERROR_LOG("failed to close dir table: %s\n", adu_strerror(-ret));
1046         free((char *)dir_table_desc.dir);
1047         dir_table = NULL;
1048 }
1049
1050 static void close_user_table(struct user_info *ui)
1051 {
1052         int ret;
1053
1054         if (!ui || !ui_used(ui) || !ui_admissible(ui))
1055                 return;
1056         ret = osl(osl_close_table(ui->table, OSL_MARK_CLEAN));
1057         if (ret < 0)
1058                 ERROR_LOG("failed to close user table %u: %s\n",
1059                         (unsigned) ui->uid, adu_strerror(-ret));
1060         free((char *)ui->desc->name);
1061         ui->desc->name = NULL;
1062         free((char *)ui->desc->dir);
1063         ui->desc->dir = NULL;
1064         free(ui->desc);
1065         ui->desc = NULL;
1066         ui->table = NULL;
1067         ui->flags = 0;
1068 }
1069
1070 static void close_user_tables(void)
1071 {
1072         struct user_info *ui;
1073
1074         FOR_EACH_USER(ui)
1075                 close_user_table(ui);
1076 }
1077
1078 static void close_all_tables(void)
1079 {
1080         close_dir_table();
1081         close_user_tables();
1082         free_hash_table();
1083 }
1084
1085 static int com_create()
1086 {
1087         uint64_t zero = 0ULL;
1088         int ret = create_tables();
1089
1090         if (ret < 0)
1091                 return ret;
1092         ret = open_dir_table();
1093         if (ret < 0)
1094                 return ret;
1095         ret = scan_dir(conf.base_dir_arg, &zero);
1096         if (ret < 0)
1097                 goto out;
1098         ret = write_uid_list();
1099 out:
1100         close_all_tables();
1101         return ret;
1102 }
1103
1104 static int read_uid_file(void)
1105 {
1106         size_t size;
1107         uint32_t n;
1108         char *filename = get_uid_list_name(), *map;
1109         int ret = mmap_full_file(filename, O_RDONLY, (void **)&map, &size, NULL);
1110
1111         if (ret < 0) {
1112                 INFO_LOG("failed to map %s\n", filename);
1113                 free(filename);
1114                 return ret;
1115         }
1116         num_uids = size / 4;
1117         INFO_LOG("found %u uids in %s\n", (unsigned)num_uids, filename);
1118         free(filename);
1119         /* hash table size should be a power of two and larger than the number of uids */
1120         uid_hash_table_size = 4;
1121         while (uid_hash_table_size < num_uids)
1122                 uid_hash_table_size *= 2;
1123         create_hash_table();
1124         for (n = 0; n < num_uids; n++) {
1125                 uint32_t uid = read_u32(map + n * sizeof(uid));
1126                 ret = search_uid(uid, OPEN_USER_TABLE, NULL);
1127                 if (ret < 0)
1128                         goto out;
1129         }
1130 out:
1131         para_munmap(map, size);
1132         return ret;
1133 }
1134
1135 static int com_select(void)
1136 {
1137         int ret;
1138
1139         if (conf.count_unit_arg != count_unit_arg_h)
1140                 count_unit_buf[1] = count_unit_abbrevs[conf.count_unit_arg];
1141         else
1142                 count_unit_buf[0] = '\0';
1143         if (conf.size_unit_arg != size_unit_arg_h)
1144                 size_unit_buf[1] = size_unit_abbrevs[conf.size_unit_arg];
1145         else
1146                 size_unit_buf[0] = '\0';
1147
1148         ret = open_dir_table();
1149         if (ret < 0)
1150                 return ret;
1151         ret = read_uid_file();
1152         if (ret < 0)
1153                 return ret;
1154         ret = print_statistics();
1155         close_all_tables();
1156         return ret;
1157 }
1158
1159 static int check_args(void)
1160 {
1161         int i, ret;
1162
1163         /* remove trailing slashes from base-dir arg */
1164         if (conf.base_dir_given) {
1165                 size_t len = strlen(conf.base_dir_arg);
1166                 for (;;) {
1167                         if (!len) /* empty string */
1168                                 return -ERRNO_TO_ERROR(EINVAL);
1169                         if (!--len) /* length 1 is always OK */
1170                                 break;
1171                         if (conf.base_dir_arg[len] != '/')
1172                                 break; /* no trailing slash, also OK */
1173                         conf.base_dir_arg[len] = '\0';
1174                 }
1175         }
1176         if (!conf.uid_given)
1177                 return 0;
1178         admissible_uids = para_malloc(conf.uid_given * sizeof(*admissible_uids));
1179         for (i = 0; i < conf.uid_given; i++) {
1180                 ret = parse_uid_range(conf.uid_arg[i], admissible_uids + i);
1181                 if (ret < 0)
1182                         goto err;
1183         }
1184         return 1;
1185 err:
1186         free(admissible_uids);
1187         admissible_uids = NULL;
1188         return ret;
1189 }
1190
1191 int main(int argc, char **argv)
1192 {
1193         int ret;
1194         struct cmdline_parser_params params = {
1195                 .override = 0,
1196                 .initialize = 1,
1197                 .check_required = 0,
1198                 .check_ambiguity = 0,
1199                 .print_errors = 1
1200         };
1201
1202         cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1203         ret = check_args();
1204         if (ret < 0)
1205                 goto out;
1206         ret = -E_SYNTAX;
1207         if (conf.select_given)
1208                 ret = com_select();
1209         else
1210                 ret = com_create();
1211         if (ret < 0)
1212                 goto out;
1213 out:
1214         free(admissible_uids);
1215         if (ret < 0) {
1216                 ERROR_LOG("%s\n", adu_strerror(-ret));
1217                 return -EXIT_FAILURE;
1218         }
1219         return EXIT_SUCCESS;
1220 }