]> git.tuebingen.mpg.de Git - adu.git/blob - adu.c
c3a278e4da4b3f0a66339b7b8e919c4821217995
[adu.git] / adu.c
1 #include "adu.h"
2 #include <dirent.h> /* readdir() */
3 #include <pwd.h>
4
5 #include "gcc-compat.h"
6 #include "cmdline.h"
7 #include "fd.h"
8 #include "string.h"
9 #include "error.h"
10 #include "portable_io.h"
11
12 DEFINE_ERRLIST;
13 int osl_errno;
14
15 /** Command line and config file options. */
16 static struct gengetopt_args_info conf;
17
18 enum uid_info_flags {
19         /** whether this slot of the hash table is used. */
20         UI_FL_SLOT_USED = 1,
21         /** whether this uid should be taken into account. */
22         UI_FL_ADMISSIBLE = 2,
23 };
24
25 struct user_info {
26         uint32_t uid;
27         uint32_t flags;
28         char *pw_name;
29         struct osl_table *table;
30         uint64_t files;
31         uint64_t bytes;
32         uint64_t dirs;
33         struct osl_table_description *desc;
34 };
35
36 /** The decimal representation of an uint64_t never exceeds that size. */
37 #define FORMATED_VALUE_SIZE 25
38
39 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui && ui < uid_hash_table \
40                 + uid_hash_table_size; ui++)
41
42
43 /**
44  * Contains info for each user that owns at least one regular file.
45  *
46  * Even users that are not taken into account because of the --uid
47  * option occupy a slot in this hash table. This allows to find out
48  * quicky whether a uid is admissible. And yes, this has to be fast.
49  */
50 static struct user_info *uid_hash_table;
51
52 /* these get filled in by the select command. */
53 static char count_unit_buf[4] = "( )", size_unit_buf[4] = "( )";
54
55 static inline int ui_used(struct user_info *ui)
56 {
57         return ui->flags & UI_FL_SLOT_USED;
58 }
59
60 static inline int ui_admissible(struct user_info *ui)
61 {
62         return ui->flags & UI_FL_ADMISSIBLE;
63 }
64
65 struct uid_range {
66         uint32_t low;
67         uint32_t high;
68 };
69
70 static struct uid_range *admissible_uids;
71
72 static inline int check_uid_arg(const char *arg, uint32_t *uid)
73 {
74         const uint32_t max = ~0U;
75         /*
76          * we need an 64-bit int for string -> uid conversion because strtoll()
77          * returns a signed value.
78          */
79         int64_t val;
80         int ret = atoi64(arg, &val);
81
82         if (ret < 0)
83                 return ret;
84         if (val < 0 || val > max)
85                 return -ERRNO_TO_ERROR(EINVAL);
86         *uid = val;
87         return 1;
88 }
89
90 static int parse_uid_range(const char *orig_arg, struct uid_range *ur)
91 {
92         int ret;
93         char *arg = adu_strdup(orig_arg), *p = strchr(arg, '-');
94
95         if (!p || p == arg) { /* -42 or 42 */
96                 ret = check_uid_arg(p? p + 1 : arg, &ur->high);
97                 if (ret < 0)
98                         goto out;
99                 ur->low = p? 0 : ur->high;
100                 ret = 1;
101                 goto out;
102         }
103         /* 42- or 42-4711 */
104         *p = '\0';
105         p++;
106         ret = check_uid_arg(arg, &ur->low);
107         if (ret < 0)
108                 goto out;
109         ur->high = ~0U;
110         if (*p) { /* 42-4711 */
111                 ret = check_uid_arg(p, &ur->high);
112                 if (ret < 0)
113                         goto out;
114         }
115         if (ur->low > ur->high)
116                 ret = -ERRNO_TO_ERROR(EINVAL);
117 out:
118         if (ret < 0)
119                 ERROR_LOG("bad uid option: %s\n", orig_arg);
120         else
121                 INFO_LOG("admissible uid range: %u - %u\n", ur->low,
122                         ur->high);
123         free(arg);
124         return ret;
125 }
126
127 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
128 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
129
130 /**
131  * The log function.
132  *
133  * \param ll Loglevel.
134  * \param fml Usual format string.
135  *
136  * All XXX_LOG() macros use this function.
137  */
138 __printf_2_3 void __log(int ll, const char* fmt,...)
139 {
140         va_list argp;
141         FILE *outfd;
142         struct tm *tm;
143         time_t t1;
144         char str[255] = "";
145
146         if (ll < conf.loglevel_arg)
147                 return;
148         outfd = stderr;
149         time(&t1);
150         tm = localtime(&t1);
151         strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
152         fprintf(outfd, "%s ", str);
153         va_start(argp, fmt);
154         vfprintf(outfd, fmt, argp);
155         va_end(argp);
156 }
157
158 /**
159  * Compare the size of two directories
160  *
161  * \param obj1 Pointer to the first object.
162  * \param obj2 Pointer to the second object.
163  *
164  * This function first compares the size values as usual integers. If they compare as
165  * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
166  * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
167  */
168 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
169 {
170         uint64_t d1 = *(uint64_t *)obj1->data;
171         uint64_t d2 = *(uint64_t *)obj2->data;
172         int ret = NUM_COMPARE(d2, d1);
173
174         if (ret)
175                 return ret;
176         //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
177         return NUM_COMPARE(obj2->data, obj1->data);
178 }
179
180 /**
181  * Compare two osl objects pointing to unsigned integers of 64 bit size.
182  *
183  * \param obj1 Pointer to the first integer.
184  * \param obj2 Pointer to the second integer.
185  *
186  * \return The values required for an osl compare function.
187  *
188  * \sa osl_compare_func, osl_hash_compare().
189  */
190 static int uint64_compare(const struct osl_object *obj1,
191                 const struct osl_object *obj2)
192 {
193         uint64_t d1 = read_u64((const char *)obj1->data);
194         uint64_t d2 = read_u64((const char *)obj2->data);
195
196         if (d1 < d2)
197                 return 1;
198         if (d1 > d2)
199                 return -1;
200         return 0;
201 }
202
203 /** The columns of the directory table. */
204 enum dir_table_columns {
205         /** The name of the directory. */
206         DT_NAME,
207         /** The dir count number. */
208         DT_NUM,
209         /** The number of the parent directory. */
210         DT_PARENT_NUM,
211         /** The number of bytes of all regular files. */
212         DT_BYTES,
213         /** The number of all regular files. */
214         DT_FILES,
215         /** Number of columns in this table. */
216         NUM_DT_COLUMNS
217 };
218
219 static struct osl_column_description dir_table_cols[] = {
220         [DT_NAME] = {
221                 .storage_type = OSL_MAPPED_STORAGE,
222                 .storage_flags = 0,
223                 .name = "dir",
224         },
225         [DT_NUM] = {
226                 .storage_type = OSL_MAPPED_STORAGE,
227                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
228                 .name = "num",
229                 .compare_function = uint64_compare,
230                 .data_size = sizeof(uint64_t)
231         },
232         [DT_PARENT_NUM] = {
233                 .storage_type = OSL_MAPPED_STORAGE,
234                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
235                 .name = "parent_num",
236                 .compare_function = size_compare,
237                 .data_size = sizeof(uint64_t)
238         },
239         [DT_BYTES] = {
240                 .storage_type = OSL_MAPPED_STORAGE,
241                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
242                 .compare_function = size_compare,
243                 .name = "num_bytes",
244                 .data_size = sizeof(uint64_t)
245         },
246         [DT_FILES] = {
247                 .storage_type = OSL_MAPPED_STORAGE,
248                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
249                 .compare_function = size_compare,
250                 .name = "num_files",
251                 .data_size = sizeof(uint64_t)
252         }
253 };
254
255 static struct osl_table_description dir_table_desc = {
256         .name = "dir_table",
257         .num_columns = NUM_DT_COLUMNS,
258         .flags = 0,
259         .column_descriptions = dir_table_cols,
260 };
261
262 /** The columns of the id table. */
263 enum user_table_columns {
264         /** The numer of the directory. */
265         UT_DIR_NUM,
266         /** The number of bytes of all regular files in this dir owned by this id. */
267         UT_BYTES,
268         /** The number of files in this dir owned by this id. */
269         UT_FILES,
270         /** Number of columns in this table. */
271         NUM_UT_COLUMNS
272 };
273
274 static struct osl_column_description user_table_cols[] = {
275         [UT_DIR_NUM] = {
276                 .storage_type = OSL_MAPPED_STORAGE,
277                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
278                 .name = "dir_num",
279                 .compare_function = uint64_compare,
280                 .data_size = sizeof(uint64_t)
281         },
282         [UT_BYTES] = {
283                 .storage_type = OSL_MAPPED_STORAGE,
284                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
285                 .compare_function = size_compare,
286                 .name = "num_bytes",
287                 .data_size = sizeof(uint64_t)
288         },
289         [UT_FILES] = {
290                 .storage_type = OSL_MAPPED_STORAGE,
291                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
292                 .compare_function = size_compare,
293                 .name = "num_files",
294                 .data_size = sizeof(uint64_t)
295         },
296 };
297
298 static struct osl_table *dir_table;
299
300 static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
301                 uint64_t *dir_size, uint64_t *dir_files)
302 {
303         struct osl_object dir_objects[NUM_DT_COLUMNS];
304
305         INFO_LOG("adding #%llu: %s\n", (long long unsigned)*dir_num, dirname);
306         dir_objects[DT_NAME].data = dirname;
307         dir_objects[DT_NAME].size = strlen(dirname) + 1;
308         dir_objects[DT_NUM].data = dir_num;
309         dir_objects[DT_NUM].size = sizeof(*dir_num);
310         dir_objects[DT_PARENT_NUM].data = parent_dir_num;
311         dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num);
312         dir_objects[DT_BYTES].data = dir_size;
313         dir_objects[DT_BYTES].size = sizeof(*dir_size);
314         dir_objects[DT_FILES].data = dir_files;
315         dir_objects[DT_FILES].size = sizeof(*dir_files);
316         return osl(osl_add_row(dir_table, dir_objects));
317 }
318
319 static uint32_t num_uids;
320
321 static int open_user_table(struct user_info *ui, int create)
322 {
323         int ret;
324         struct passwd *pw;
325
326         ui->desc = adu_malloc(sizeof(*ui->desc));
327         ui->desc->num_columns = NUM_UT_COLUMNS;
328         ui->desc->flags = 0;
329         ui->desc->column_descriptions = user_table_cols;
330         ui->desc->dir = adu_strdup(conf.database_dir_arg);
331         ui->desc->name = make_message("%u", (unsigned)ui->uid);
332         pw = getpwuid(ui->uid);
333         if (pw && pw->pw_name)
334                 ui->pw_name = adu_strdup(pw->pw_name);
335
336         INFO_LOG(".............................uid #%u: %u\n",
337                 (unsigned)num_uids, (unsigned)ui->uid);
338         if (create) {
339                 ret = osl(osl_create_table(ui->desc));
340                 if (ret < 0)
341                         goto err;
342                 num_uids++;
343         }
344         ret = osl(osl_open_table(ui->desc, &ui->table));
345         if (ret < 0)
346                 goto err;
347         return 1;
348 err:
349         free((char *)ui->desc->name);
350         free((char *)ui->desc->dir);
351         free(ui->pw_name);
352         free(ui->desc);
353         ui->desc->name = NULL;
354         ui->desc->dir = NULL;
355         ui->desc = NULL;
356         ui->table = NULL;
357         ui->flags = 0;
358         return ret;
359 }
360
361 #define uid_hash_bits 8
362 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
363 #define PRIME1 0x811c9dc5
364 #define PRIME2 0x01000193
365
366 static void create_hash_table(void)
367 {
368         uid_hash_table = adu_calloc(uid_hash_table_size
369                 * sizeof(struct user_info));
370 }
371
372 static void free_hash_table(void)
373 {
374         free(uid_hash_table);
375         uid_hash_table = NULL;
376 }
377
378 static int create_tables(void)
379 {
380         int ret;
381
382         dir_table_desc.dir = adu_strdup(conf.database_dir_arg);
383         ret = osl(osl_create_table(&dir_table_desc));
384         if (ret < 0)
385                 return ret;
386         create_hash_table();
387         return 1;
388 }
389
390 static void close_dir_table(void)
391 {
392         int ret;
393
394         if (!dir_table)
395                 return;
396         ret = osl(osl_close_table(dir_table, OSL_MARK_CLEAN));
397         if (ret < 0)
398                 ERROR_LOG("failed to close dir table: %s\n", adu_strerror(-ret));
399         free((char *)dir_table_desc.dir);
400         dir_table = NULL;
401 }
402
403 static void close_user_table(struct user_info *ui)
404 {
405         int ret;
406
407         if (!ui || !ui_used(ui) || !ui_admissible(ui))
408                 return;
409         ret = osl(osl_close_table(ui->table, OSL_MARK_CLEAN));
410         if (ret < 0)
411                 ERROR_LOG("failed to close user table %u: %s\n",
412                         (unsigned) ui->uid, adu_strerror(-ret));
413         free((char *)ui->desc->name);
414         ui->desc->name = NULL;
415         free((char *)ui->desc->dir);
416         ui->desc->dir = NULL;
417         free(ui->pw_name);
418         ui->pw_name = NULL;
419         free(ui->desc);
420         ui->desc = NULL;
421         ui->table = NULL;
422         ui->flags = 0;
423 }
424
425 static void close_user_tables(void)
426 {
427         struct user_info *ui;
428
429         FOR_EACH_USER(ui)
430                 close_user_table(ui);
431 }
432
433 static void close_all_tables(void)
434 {
435         close_dir_table();
436         close_user_tables();
437         free_hash_table();
438 }
439
440 static int signum;
441
442 static void signal_handler(int s)
443 {
444         signum = s;
445 }
446
447 static void check_signals(void)
448 {
449         if (likely(!signum))
450                 return;
451         EMERG_LOG("caught signal %d\n", signum);
452         close_all_tables();
453         exit(EXIT_FAILURE);
454 }
455
456 static int init_signals(void)
457 {
458         if (signal(SIGINT, &signal_handler) == SIG_ERR)
459                 return -E_SIGNAL_SIG_ERR;
460         if (signal(SIGTERM, &signal_handler) == SIG_ERR)
461                 return -E_SIGNAL_SIG_ERR;
462         return 1;
463 }
464
465 /*
466  * We use a hash table of size s=2^uid_hash_bits to map the uids into the
467  * interval [0..s]. Hash collisions are treated by open addressing, i.e.
468  * unused slots in the table are used to store different uids that hash to the
469  * same slot.
470  *
471  * If a hash collision occurs, different slots are successively probed in order
472  * to find an unused slot for the new uid. Probing is implemented via a second
473  * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an
474  * odd number.
475  *
476  * An odd number is sufficient to make sure each entry of the hash table gets
477  * probed for probe_num between 0 and s-1 because s is a power of two, hence
478  * the second hash value has never a common divisor with the hash table size.
479  * IOW: h is invertible in the ring [0..s].
480  */
481 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
482 {
483         return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num)
484                 % uid_hash_table_size;
485 }
486
487 enum search_uid_flags {
488         OPEN_USER_TABLE = 1,
489         CREATE_USER_TABLE = 2,
490 };
491
492 static int uid_is_admissible(uint32_t uid)
493 {
494         int i;
495
496         for (i = 0; i < conf.uid_given; i++) {
497                 struct uid_range *ur = admissible_uids + i;
498
499                 if (ur->low <= uid && ur->high >= uid)
500                         break;
501         }
502         i = !conf.uid_given || i < conf.uid_given;
503         DEBUG_LOG("uid %u is %sadmissible\n", (unsigned)uid,
504                 i? "" : "not ");
505         return i;
506 }
507
508 static int search_uid(uint32_t uid, enum search_uid_flags flags,
509                 struct user_info **ui_ptr)
510 {
511         uint32_t p;
512
513         for (p = 0; p < uid_hash_table_size; p++) {
514                 struct user_info *ui = uid_hash_table + double_hash(uid, p);
515
516                 if (!ui_used(ui)) {
517                         int ret;
518                         if (!flags)
519                                 return -E_BAD_UID;
520                         ui->uid = uid;
521                         ui->flags |= UI_FL_SLOT_USED;
522                         if (!uid_is_admissible(uid))
523                                 return 0;
524                         ui->flags |= UI_FL_ADMISSIBLE;
525                         ret = open_user_table(ui, flags & CREATE_USER_TABLE);
526                         if (ret < 0)
527                                 return ret;
528
529                         if (ui_ptr)
530                                 *ui_ptr = ui;
531                         return 1;
532                 }
533                 if (ui->uid != uid)
534                         continue;
535                 if (ui_ptr)
536                         *ui_ptr = ui;
537                 return 0;
538         }
539         return flags? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
540 }
541
542 static int update_user_row(struct osl_table *t, uint64_t dir_num,
543                 uint64_t *add)
544 {
545         struct osl_row *row;
546         struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
547
548         int ret = osl(osl_get_row(t, UT_DIR_NUM, &obj, &row));
549
550         if (ret == -E_OSL && osl_errno != E_OSL_RB_KEY_NOT_FOUND)
551                 return ret;
552         if (ret < 0) { /* this is the first file we add */
553                 struct osl_object objects[NUM_UT_COLUMNS];
554                 uint64_t num_files = 1;
555
556                 objects[UT_DIR_NUM].data = &dir_num;
557                 objects[UT_DIR_NUM].size = sizeof(dir_num);
558                 objects[UT_BYTES].data = add;
559                 objects[UT_BYTES].size = sizeof(*add);
560                 objects[UT_FILES].data = &num_files;
561                 objects[UT_FILES].size = sizeof(num_files);
562                 INFO_LOG("######################### ret: %d\n", ret);
563                 ret = osl(osl_add_row(t, objects));
564                 INFO_LOG("######################### ret: %d\n", ret);
565                 return ret;
566         } else { /* add size and increment file count */
567                 uint64_t num;
568                 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
569
570                 ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
571                 if (ret < 0)
572                         return ret;
573                 num = *(uint64_t *)obj1.data + *add;
574                 ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
575                 if (ret < 0)
576                         return ret;
577                 ret = osl(osl_get_object(t, row, UT_FILES, &obj1));
578                 if (ret < 0)
579                         return ret;
580                 num = *(uint64_t *)obj1.data + 1;
581                 return osl(osl_update_object(t, row, UT_FILES, &obj2));
582         }
583 }
584
585 static uint64_t num_dirs;
586 static uint64_t num_files;
587 static uint64_t num_bytes;
588
589 /* id of the device containing the base dir. */
590 static dev_t device_id;
591
592 static int scan_dir(char *dirname, uint64_t *parent_dir_num)
593 {
594         DIR *dir;
595         struct dirent *entry;
596         int ret, cwd_fd, ret2;
597         uint64_t dir_size = 0, dir_files = 0;
598         uint64_t this_dir_num = ++num_dirs;
599
600         check_signals();
601         DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)num_dirs, dirname);
602         ret = adu_opendir(dirname, &dir, &cwd_fd);
603         if (ret < 0) {
604                 if (ret != -ERRNO_TO_ERROR(EACCES))
605                         return ret;
606                 WARNING_LOG("permission denied for %s\n", dirname);
607                 return 1;
608         }
609         while ((entry = readdir(dir))) {
610                 mode_t m;
611                 struct stat s;
612                 uint32_t uid;
613                 uint64_t size;
614                 struct user_info *ui;
615
616                 if (!strcmp(entry->d_name, "."))
617                         continue;
618                 if (!strcmp(entry->d_name, ".."))
619                         continue;
620                 if (lstat(entry->d_name, &s) == -1) {
621                         WARNING_LOG("lstat error for %s/%s\n", dirname,
622                                 entry->d_name);
623                         continue;
624                 }
625                 m = s.st_mode;
626                 if (!S_ISREG(m) && !S_ISDIR(m))
627                         continue;
628                 if (S_ISDIR(m)) {
629                         if (conf.one_file_system_given && s.st_dev != device_id)
630                                 continue;
631                         ret = scan_dir(entry->d_name, &this_dir_num);
632                         if (ret < 0)
633                                 goto out;
634                         continue;
635                 }
636                 /* regular file */
637                 size = s.st_size;
638                 dir_size += size;
639                 num_bytes += size;
640                 dir_files++;
641                 num_files++;
642                 uid = s.st_uid;
643                 ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui);
644                 if (ret < 0)
645                         goto out;
646                 ui->bytes += size;
647                 ui->files++;
648                 ret = update_user_row(ui->table, this_dir_num, &size);
649                 if (ret < 0)
650                         goto out;
651         }
652         ret = add_directory(dirname, &this_dir_num, parent_dir_num,
653                         &dir_size, &dir_files);
654 out:
655         closedir(dir);
656         ret2 = adu_fchdir(cwd_fd);
657         if (ret2 < 0 && ret >= 0)
658                 ret = ret2;
659         close(cwd_fd);
660         return ret;
661 }
662
663 static int get_dir_name_by_number(uint64_t *dirnum, char **name)
664 {
665         char *result = NULL, *tmp;
666         struct osl_row *row;
667         uint64_t val = *dirnum;
668         struct osl_object obj = {.data = &val, .size = sizeof(val)};
669         int ret;
670
671 again:
672         ret = osl(osl_get_row(dir_table, DT_NUM, &obj, &row));
673         if (ret < 0)
674                 goto out;
675         ret = osl(osl_get_object(dir_table, row, DT_NAME, &obj));
676         if (ret < 0)
677                 goto out;
678         if (result) {
679                 tmp = make_message("%s/%s", (char *)obj.data, result);
680                 free(result);
681                 result = tmp;
682         } else
683                 result = adu_strdup((char *)obj.data);
684         ret = osl(osl_get_object(dir_table, row, DT_PARENT_NUM, &obj));
685         if (ret < 0)
686                 goto out;
687         val = *(uint64_t *)obj.data;
688         if (val)
689                 goto again;
690 out:
691         if (ret < 0) {
692                 free(result);
693                 *name = NULL;
694         } else
695                 *name = result;
696         return ret;
697 }
698
699 static int get_dir_name_of_row(struct osl_row *dir_table_row, char **name)
700 {
701         struct osl_object obj;
702         int ret;
703         char *this_dir, *prefix = NULL;
704
705         *name = NULL;
706         ret = osl(osl_get_object(dir_table, dir_table_row, DT_NAME, &obj));
707         if (ret < 0)
708                 return ret;
709         this_dir = adu_strdup((char *)obj.data);
710         ret = osl(osl_get_object(dir_table, dir_table_row, DT_PARENT_NUM, &obj));
711         if (ret < 0)
712                 goto out;
713         if (!*(uint64_t *)obj.data) {
714                 *name = this_dir;
715                 return 1;
716         }
717         ret = get_dir_name_by_number((uint64_t *)obj.data, &prefix);
718         if (ret < 0)
719                 goto out;
720         *name = make_message("%s/%s", prefix, this_dir);
721         free(prefix);
722         ret = 1;
723 out:
724         free(this_dir);
725         return ret;
726 }
727
728 const uint64_t size_unit_divisors[] = {
729         [size_unit_arg_b] = 1ULL,
730         [size_unit_arg_k] = 1024ULL,
731         [size_unit_arg_m] = 1024ULL * 1024ULL,
732         [size_unit_arg_g] = 1024ULL * 1024ULL * 1024ULL,
733         [size_unit_arg_t] = 1024ULL * 1024ULL * 1024ULL * 1024ULL,
734 };
735
736 const uint64_t count_unit_divisors[] = {
737
738         [count_unit_arg_n] = 1ULL,
739         [count_unit_arg_k] = 1000ULL,
740         [count_unit_arg_m] = 1000ULL * 1000ULL,
741         [count_unit_arg_g] = 1000ULL * 1000ULL * 1000ULL,
742         [count_unit_arg_t] = 1000ULL * 1000ULL * 1000ULL * 1000ULL,
743 };
744
745 const char size_unit_abbrevs[] = " BKMGT";
746 const char count_unit_abbrevs[] = "  kmgt";
747
748 static enum enum_size_unit format_size_value(enum enum_size_unit unit,
749                 uint64_t value, int print_unit, char *result)
750 {
751         enum enum_size_unit u = unit;
752         char unit_buf[2] = "\0\0";
753
754         if (unit == size_unit_arg_h) /* human readable */
755                 for (u = size_unit_arg_b; u < size_unit_arg_t &&
756                                 value > size_unit_divisors[u + 1]; u++)
757                         ; /* nothing */
758         if (print_unit)
759                 unit_buf[0] = size_unit_abbrevs[u];
760         sprintf(result, "%llu%s",
761                 (long long unsigned)value / size_unit_divisors[u], unit_buf);
762         return u;
763 }
764
765 static enum enum_count_unit format_count_value(enum enum_count_unit unit,
766                 uint64_t value, int print_unit, char *result)
767 {
768         enum enum_count_unit u = unit;
769         char unit_buf[2] = "\0\0";
770
771         if (unit == count_unit_arg_h) /* human readable */
772                 for (u = count_unit_arg_n; u < count_unit_arg_t &&
773                                 value > count_unit_divisors[u + 1]; u++)
774                         ; /* nothing */
775         if (print_unit)
776                 unit_buf[0] = count_unit_abbrevs[u];
777         sprintf(result, "%llu%s",
778                 (long long unsigned)value / count_unit_divisors[u], unit_buf);
779         return u;
780 }
781
782 enum global_stats_flags {
783         GSF_PRINT_DIRNAME = 1,
784         GSF_PRINT_BYTES = 2,
785         GSF_PRINT_FILES = 4,
786         GSF_COMPUTE_SUMMARY = 8,
787 };
788
789 struct global_stats_info {
790         uint32_t count;
791         int ret;
792         int osl_errno;
793         enum global_stats_flags flags;
794 };
795
796 static int global_stats_loop_function(struct osl_row *row, void *data)
797 {
798         struct global_stats_info *gsi = data;
799         struct osl_object obj;
800         char *dirname, formated_value[FORMATED_VALUE_SIZE];
801         int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY;
802
803         check_signals();
804         if (!gsi->count && !summary) {
805                 ret = -E_LOOP_COMPLETE;
806                 goto err;
807         }
808         if (summary || (gsi->count && (gsi->flags & GSF_PRINT_FILES))) {
809                 uint64_t files;
810                 ret = osl(osl_get_object(dir_table, row, DT_FILES, &obj));
811                 if (ret < 0)
812                         goto err;
813                 files = *(uint64_t *)obj.data;
814                 if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) {
815                         format_count_value(conf.count_unit_arg, files,
816                                 conf.count_unit_arg == count_unit_arg_h,
817                                 formated_value);
818                         printf("\t%s%s", formated_value,
819                                 (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_DIRNAME))?
820                                 "\t" : "\n");
821                 }
822                 if (summary)
823                         num_files += files;
824         }
825         if (summary || (gsi->count && (gsi->flags & GSF_PRINT_BYTES))) {
826                 uint64_t bytes;
827                 ret = osl(osl_get_object(dir_table, row, DT_BYTES, &obj));
828                 if (ret < 0)
829                         goto err;
830                 bytes = *(uint64_t *)obj.data;
831                 if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) {
832                         format_size_value(conf.size_unit_arg, bytes,
833                                 conf.size_unit_arg == size_unit_arg_h,
834                                 formated_value);
835                         printf("%s%s%s",
836                                 (gsi->flags & GSF_PRINT_FILES)? "" : "\t",
837                                 formated_value,
838                                 (gsi->flags & GSF_PRINT_DIRNAME)? "\t" : "\n"
839                         );
840                 }
841                 if (summary) {
842                         num_bytes += bytes;
843                         num_dirs++;
844                 }
845         }
846         if (gsi->count && (gsi->flags & GSF_PRINT_DIRNAME)) {
847                 ret = get_dir_name_of_row(row, &dirname);
848                 if (ret < 0)
849                         goto err;
850                 printf("%s%s\n",
851                         (gsi->flags & (GSF_PRINT_BYTES | GSF_PRINT_FILES))? "" : "\t",
852                         dirname);
853                 free(dirname);
854         }
855         if (gsi->count > 0)
856                 gsi->count--;
857         return 1;
858 err:
859         gsi->ret = ret;
860         gsi->osl_errno = (ret == -E_OSL)? osl_errno : 0;
861         return -1;
862 }
863
864 static void print_id_stats(void)
865 {
866         struct user_info *ui;
867
868         printf("User summary "
869                 "(pw_name/uid/dirs%s/files%s/size%s):\n",
870                 count_unit_buf, count_unit_buf, size_unit_buf);
871         FOR_EACH_USER(ui) {
872                 char formated_dir_count[FORMATED_VALUE_SIZE],
873                         formated_file_count[FORMATED_VALUE_SIZE],
874                         formated_bytes[FORMATED_VALUE_SIZE ];
875                 if (!ui_used(ui) || !ui_admissible(ui))
876                         continue;
877                 format_count_value(conf.count_unit_arg, ui->dirs,
878                         conf.count_unit_arg == count_unit_arg_h,
879                         formated_dir_count);
880                 format_count_value(conf.count_unit_arg, ui->files,
881                         conf.count_unit_arg == count_unit_arg_h,
882                         formated_file_count);
883                 format_size_value(conf.size_unit_arg, ui->bytes,
884                         conf.size_unit_arg == size_unit_arg_h,
885                         formated_bytes);
886                 printf("\t%s\t%u\t%s\t%s\t%s\n",
887                         ui->pw_name? ui->pw_name : "?",
888                         (unsigned)ui->uid,
889                         formated_dir_count,
890                         formated_file_count,
891                         formated_bytes
892                 );
893         }
894 }
895
896 enum user_stats_flags {
897         USF_PRINT_DIRNAME = 1,
898         USF_PRINT_BYTES = 2,
899         USF_PRINT_FILES = 4,
900         USF_COMPUTE_SUMMARY = 8,
901 };
902
903 struct user_stats_info {
904         uint32_t count;
905         enum user_stats_flags flags;
906         int ret;
907         int osl_errno;
908         struct user_info *ui;
909 };
910
911 static int user_stats_loop_function(struct osl_row *row, void *data)
912 {
913         struct user_stats_info *usi = data;
914         struct osl_object obj;
915         int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY;
916         char formated_value[FORMATED_VALUE_SIZE];
917
918         check_signals();
919         if (!usi->count && !summary) {
920                 ret = -E_LOOP_COMPLETE;
921                 goto err;
922         }
923         if (summary || (usi->count && (usi->flags & USF_PRINT_FILES))) {
924                 uint64_t files;
925                 ret = osl(osl_get_object(usi->ui->table, row, UT_FILES, &obj));
926                 if (ret < 0)
927                         goto err;
928                 files = *(uint64_t *)obj.data;
929                 if (usi->count && (usi->flags & USF_PRINT_FILES)) {
930                         format_count_value(conf.count_unit_arg, files,
931                                 conf.count_unit_arg == count_unit_arg_h,
932                                 formated_value);
933                         printf("\t%s%s", formated_value,
934                                 (usi->flags & (USF_PRINT_BYTES | USF_PRINT_DIRNAME))?
935                                         "\t" : "\n"
936                         );
937                 }
938                 if (summary)
939                         usi->ui->files += files;
940         }
941         if (summary || (usi->count && (usi->flags & USF_PRINT_BYTES))) {
942                 uint64_t bytes;
943                 ret = osl(osl_get_object(usi->ui->table, row, UT_BYTES, &obj));
944                 if (ret < 0)
945                         goto err;
946                 bytes = *(uint64_t *)obj.data;
947                 if (usi->count && (usi->flags & USF_PRINT_BYTES)) {
948                         format_size_value(conf.size_unit_arg, bytes,
949                                 conf.size_unit_arg == size_unit_arg_h,
950                                 formated_value);
951                         printf("%s%s%s",
952                                 (usi->flags & USF_PRINT_FILES)? "" : "\t",
953                                 formated_value,
954                                 usi->flags & USF_PRINT_DIRNAME?  "\t" : "\n"
955                         );
956                 }
957                 if (summary) {
958                         usi->ui->bytes += bytes;
959                         usi->ui->dirs++;
960                 }
961
962         }
963         if (usi->count && (usi->flags & USF_PRINT_DIRNAME)) {
964                 char *dirname;
965                 ret = osl(osl_get_object(usi->ui->table, row, UT_DIR_NUM, &obj));
966                 if (ret < 0)
967                         goto err;
968                 ret = get_dir_name_by_number((uint64_t *)obj.data, &dirname);
969                 if (ret < 0)
970                         goto err;
971                 printf("%s%s\n",
972                         (usi->flags & (USF_PRINT_BYTES | USF_PRINT_FILES))? "" : "\t",
973                         dirname);
974                 free(dirname);
975         }
976         if (usi->count > 0)
977                 usi->count--;
978         return 1;
979 err:
980         usi->ret = ret;
981         usi->osl_errno = (ret == -E_OSL)? osl_errno : 0;
982         return -1;
983 }
984
985 static int check_loop_return(int ret, int loop_ret, int loop_osl_errno)
986 {
987         if (ret >= 0)
988                 return ret;
989         assert(ret == -E_OSL);
990         if (osl_errno != E_OSL_LOOP)
991                 /* error not caused by loop function returning negative. */
992                 return ret;
993         assert(loop_ret < 0);
994         if (loop_ret == -E_LOOP_COMPLETE) /* no error */
995                 return 1;
996         if (loop_ret == -E_OSL) { /* osl error in loop function */
997                 assert(loop_osl_errno);
998                 osl_errno = loop_osl_errno;
999         }
1000         return loop_ret;
1001 }
1002
1003 static int adu_loop_reverse(struct osl_table *t, unsigned col_num, void *private_data,
1004                 osl_rbtree_loop_func *func, int *loop_ret, int *loop_osl_errno)
1005 {
1006         int ret = osl(osl_rbtree_loop_reverse(t, col_num, private_data, func));
1007         return check_loop_return(ret, *loop_ret, *loop_osl_errno);
1008 }
1009
1010 static int print_user_stats(void)
1011 {
1012         struct user_info *ui;
1013         int ret;
1014
1015         FOR_EACH_USER(ui) {
1016                 struct user_stats_info usi = {
1017                         .count = conf.limit_arg,
1018                         .ui = ui
1019                 };
1020                 if (!ui_used(ui) || !ui_admissible(ui))
1021                         continue;
1022                 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY;
1023                 printf("%s (uid %u), by size%s:\n",
1024                         ui->pw_name? ui->pw_name : "?", (unsigned)ui->uid,
1025                         size_unit_buf);
1026                 ret = adu_loop_reverse(ui->table, UT_BYTES, &usi, user_stats_loop_function,
1027                         &usi.ret, &usi.osl_errno);
1028                 if (ret < 0)
1029                         return ret;
1030                 printf("\n%s (uid %u), by file count%s:\n",
1031                         ui->pw_name? ui->pw_name : "?", (unsigned)ui->uid,
1032                         count_unit_buf);
1033                 usi.count = conf.limit_arg,
1034                 usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES;
1035                 ret = adu_loop_reverse(ui->table, UT_FILES, &usi, user_stats_loop_function,
1036                         &usi.ret, &usi.osl_errno);
1037                 if (ret < 0)
1038                         return ret;
1039                 printf("\n");
1040         }
1041         return 1;
1042 }
1043
1044 static void print_global_summary(void)
1045 {
1046         char d[FORMATED_VALUE_SIZE], f[FORMATED_VALUE_SIZE],
1047                 s[FORMATED_VALUE_SIZE];
1048         enum enum_count_unit ud, uf;
1049         enum enum_size_unit us;
1050
1051         ud = format_count_value(conf.count_unit_arg, num_dirs, 0, d);
1052         uf = format_count_value(conf.count_unit_arg, num_files, 0, f);
1053         us = format_size_value(conf.size_unit_arg, num_bytes, 0, s);
1054
1055         printf("Global summary "
1056                 "(dirs(%c)/files(%c)/size(%c))\n"
1057                 "\t%s\t%s\t%s\n\n",
1058                 count_unit_abbrevs[ud],
1059                 count_unit_abbrevs[uf],
1060                 size_unit_abbrevs[us],
1061                 d, f, s
1062         );
1063
1064 }
1065
1066 static int print_statistics(void)
1067 {
1068         int ret;
1069         struct global_stats_info gsi = {
1070                 .count = conf.limit_arg,
1071                 .flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY
1072         };
1073
1074         printf("By size%s:\n",
1075                 size_unit_buf);
1076         ret = adu_loop_reverse(dir_table, DT_BYTES, &gsi,
1077                 global_stats_loop_function, &gsi.ret, &gsi.osl_errno);
1078         if (ret < 0)
1079                 return ret;
1080         printf("\n");
1081
1082         gsi.count = conf.limit_arg;
1083         gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES;
1084         printf("By file count%s:\n",
1085                 count_unit_buf);
1086         ret = adu_loop_reverse(dir_table, DT_FILES, &gsi,
1087                 global_stats_loop_function, &gsi.ret, &gsi.osl_errno);
1088         if (ret < 0)
1089                 return ret;
1090         printf("\n");
1091         print_global_summary();
1092         print_user_stats();
1093         print_id_stats();
1094         return 1;
1095 }
1096
1097 static char *get_uid_list_name(void)
1098 {
1099         return make_message("%s/uid_list", conf.database_dir_arg);
1100 }
1101
1102 static int write_uid_list(void)
1103 {
1104         char *buf, *filename;
1105         uint32_t count = 0;
1106         struct user_info *ui;
1107         size_t size = num_uids * sizeof(uint32_t);
1108         int ret;
1109
1110         if (!num_uids)
1111                 return 0;
1112         buf = adu_malloc(size);
1113         FOR_EACH_USER(ui) {
1114                 if (!ui_used(ui) || !ui_admissible(ui))
1115                         continue;
1116                 DEBUG_LOG("saving uid %u\n", (unsigned) ui->uid);
1117                 write_u32(buf + count++ * sizeof(uint32_t), ui->uid);
1118         }
1119         filename = get_uid_list_name();
1120         ret = adu_write_file(filename, buf, size);
1121         free(filename);
1122         free(buf);
1123         return ret;
1124 }
1125
1126 static int open_dir_table(void)
1127 {
1128         if (!dir_table_desc.dir) /* we did not create the table */
1129                 dir_table_desc.dir = adu_strdup(conf.database_dir_arg);
1130         return osl(osl_open_table(&dir_table_desc, &dir_table));
1131 }
1132 static int com_create()
1133 {
1134         uint64_t zero = 0ULL;
1135         int ret;
1136         struct stat statbuf;
1137
1138         if (lstat(conf.base_dir_arg, &statbuf) == -1)
1139                 return -ERRNO_TO_ERROR(errno);
1140         if (!S_ISDIR(statbuf.st_mode))
1141                 return -ERRNO_TO_ERROR(ENOTDIR);
1142         device_id = statbuf.st_dev;
1143         ret = create_tables();
1144         if (ret < 0)
1145                 return ret;
1146         check_signals();
1147         ret = open_dir_table();
1148         if (ret < 0)
1149                 return ret;
1150         check_signals();
1151         ret = scan_dir(conf.base_dir_arg, &zero);
1152         if (ret < 0)
1153                 goto out;
1154         ret = write_uid_list();
1155 out:
1156         close_all_tables();
1157         return ret;
1158 }
1159
1160 static int read_uid_file(void)
1161 {
1162         size_t size;
1163         uint32_t n;
1164         char *filename = get_uid_list_name(), *map;
1165         int ret = mmap_full_file(filename, O_RDONLY, (void **)&map, &size, NULL);
1166
1167         if (ret < 0) {
1168                 INFO_LOG("failed to map %s\n", filename);
1169                 free(filename);
1170                 return ret;
1171         }
1172         num_uids = size / 4;
1173         INFO_LOG("found %u uids in %s\n", (unsigned)num_uids, filename);
1174         free(filename);
1175         /* hash table size should be a power of two and larger than the number of uids */
1176         uid_hash_table_size = 4;
1177         while (uid_hash_table_size < num_uids)
1178                 uid_hash_table_size *= 2;
1179         create_hash_table();
1180         for (n = 0; n < num_uids; n++) {
1181                 uint32_t uid = read_u32(map + n * sizeof(uid));
1182                 ret = search_uid(uid, OPEN_USER_TABLE, NULL);
1183                 if (ret < 0)
1184                         goto out;
1185         }
1186 out:
1187         adu_munmap(map, size);
1188         return ret;
1189 }
1190
1191 static int com_select(void)
1192 {
1193         int ret;
1194
1195         if (conf.count_unit_arg != count_unit_arg_h)
1196                 count_unit_buf[1] = count_unit_abbrevs[conf.count_unit_arg];
1197         else
1198                 count_unit_buf[0] = '\0';
1199         if (conf.size_unit_arg != size_unit_arg_h)
1200                 size_unit_buf[1] = size_unit_abbrevs[conf.size_unit_arg];
1201         else
1202                 size_unit_buf[0] = '\0';
1203
1204         ret = open_dir_table();
1205         if (ret < 0)
1206                 return ret;
1207         check_signals();
1208         ret = read_uid_file();
1209         if (ret < 0)
1210                 return ret;
1211         check_signals();
1212         ret = print_statistics();
1213         close_all_tables();
1214         return ret;
1215 }
1216
1217 static int check_args(void)
1218 {
1219         int i, ret;
1220
1221         /* remove trailing slashes from base-dir arg */
1222         if (conf.base_dir_given) {
1223                 size_t len = strlen(conf.base_dir_arg);
1224                 for (;;) {
1225                         if (!len) /* empty string */
1226                                 return -ERRNO_TO_ERROR(EINVAL);
1227                         if (!--len) /* length 1 is always OK */
1228                                 break;
1229                         if (conf.base_dir_arg[len] != '/')
1230                                 break; /* no trailing slash, also OK */
1231                         conf.base_dir_arg[len] = '\0';
1232                 }
1233         }
1234         if (!conf.uid_given)
1235                 return 0;
1236         admissible_uids = adu_malloc(conf.uid_given * sizeof(*admissible_uids));
1237         for (i = 0; i < conf.uid_given; i++) {
1238                 ret = parse_uid_range(conf.uid_arg[i], admissible_uids + i);
1239                 if (ret < 0)
1240                         goto err;
1241         }
1242         return 1;
1243 err:
1244         free(admissible_uids);
1245         admissible_uids = NULL;
1246         return ret;
1247 }
1248
1249 int main(int argc, char **argv)
1250 {
1251         int ret;
1252         struct cmdline_parser_params params = {
1253                 .override = 0,
1254                 .initialize = 1,
1255                 .check_required = 0,
1256                 .check_ambiguity = 0,
1257                 .print_errors = 1
1258         };
1259
1260         cmdline_parser_ext(argc, argv, &conf, &params); /* aborts on errors */
1261         ret = init_signals();
1262         if (ret < 0)
1263                 goto out;
1264         ret = check_args();
1265         if (ret < 0)
1266                 goto out;
1267         ret = -E_SYNTAX;
1268         if (conf.select_given)
1269                 ret = com_select();
1270         else
1271                 ret = com_create();
1272         if (ret < 0)
1273                 goto out;
1274 out:
1275         free(admissible_uids);
1276         if (ret < 0) {
1277                 ERROR_LOG("%s\n", adu_strerror(-ret));
1278                 return -EXIT_FAILURE;
1279         }
1280         return EXIT_SUCCESS;
1281 }