]> git.tuebingen.mpg.de Git - adu.git/blob - adu.c
5f674dc78e06a3389b0e10fe158f2aea2affa285
[adu.git] / adu.c
1 #include "adu.h"
2 #include <dirent.h> /* readdir() */
3
4 #include "gcc-compat.h"
5 #include "osl.h"
6 #include "fd.h"
7 #include "hash.h"
8 #include "string.h"
9 #include "error.h"
10
11 DEFINE_ERRLIST;
12
13 #define DATABASE_DIR "/tmp/adu"
14
15 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
16 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
17
18
19 /**
20  * The log function.
21  *
22  * \param ll Loglevel.
23  * \param fml Usual format string.
24  *
25  * All XXX_LOG() macros use this function.
26  */
27 __printf_2_3 void __log(int ll, const char* fmt,...)
28 {
29         va_list argp;
30         FILE *outfd;
31         struct tm *tm;
32         time_t t1;
33         char str[255] = "";
34
35         if (ll < 4)
36                 return;
37         outfd = stderr;
38         time(&t1);
39         tm = localtime(&t1);
40         strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
41         fprintf(outfd, "%s ", str);
42         va_start(argp, fmt);
43         vfprintf(outfd, fmt, argp);
44         va_end(argp);
45 }
46
47 /**
48  * Compare the size of two directories
49  *
50  * \param obj1 Pointer to the first object.
51  * \param obj2 Pointer to the second object.
52  *
53  * This function first compares the size values as usual integers. If they compare as
54  * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
55  * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
56  */
57 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
58 {
59         uint64_t d1 = *(uint64_t *)obj1->data;
60         uint64_t d2 = *(uint64_t *)obj2->data;
61         int ret = NUM_COMPARE(d2, d1);
62
63         if (ret)
64                 return ret;
65         //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
66         return NUM_COMPARE(obj2->data, obj1->data);
67 }
68
69 /**
70  * Compare two osl objects of string type.
71  *
72  * \param obj1 Pointer to the first object.
73  * \param obj2 Pointer to the second object.
74  *
75  * In any case, only \p MIN(obj1->size, obj2->size) characters of each string
76  * are taken into account.
77  *
78  * \return It returns an integer less than, equal to, or greater than zero if
79  * \a obj1 is found, respectively, to be less than, to match, or be greater than
80  * obj2.
81  *
82  * \sa strcmp(3), strncmp(3), osl_compare_func.
83  */
84 int string_compare(const struct osl_object *obj1, const struct osl_object *obj2)
85 {
86         const char *str1 = (const char *)obj1->data;
87         const char *str2 = (const char *)obj2->data;
88         return strncmp(str1, str2, MIN(obj1->size, obj2->size));
89 }
90
91 /** The columns of the directory table. */
92 enum dir_table_columns {
93         /** The name of the directory. */
94         DT_NAME,
95         /** The dir count number. */
96         DT_NUM,
97         /** The number of bytes of all regular files. */
98         DT_BYTES,
99         /** The number of all regular files. */
100         DT_FILES,
101         /** Number of columns in this table. */
102         NUM_DT_COLUMNS
103 };
104
105 static struct osl_column_description dir_table_cols[] = {
106         [DT_NAME] = {
107                 .storage_type = OSL_MAPPED_STORAGE,
108                 .storage_flags = OSL_RBTREE | OSL_UNIQUE,
109                 .name = "dir",
110                 .compare_function = string_compare,
111         },
112         [DT_NUM] = {
113                 .storage_type = OSL_MAPPED_STORAGE,
114                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
115                 .name = "num",
116                 .compare_function = uint32_compare,
117                 .data_size = sizeof(uint32_t)
118         },
119         [DT_BYTES] = {
120                 .storage_type = OSL_MAPPED_STORAGE,
121                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
122                 .compare_function = size_compare,
123                 .name = "num_bytes",
124                 .data_size = sizeof(uint64_t)
125         },
126         [DT_FILES] = {
127                 .storage_type = OSL_MAPPED_STORAGE,
128                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
129                 .compare_function = size_compare,
130                 .name = "num_files",
131                 .data_size = sizeof(uint64_t)
132         }
133 };
134
135 static struct osl_table_description dir_table_desc = {
136         .name = "dir_table",
137         .num_columns = NUM_DT_COLUMNS,
138         .flags = 0,
139         .column_descriptions = dir_table_cols,
140         .dir = DATABASE_DIR
141 };
142
143 /** The columns of the id table. */
144 enum user_table_columns {
145         /** The numer of the directory. */
146         UT_DIR_NUM,
147         /** The number of bytes of all regular files in this dir owned by this id. */
148         UT_BYTES,
149         /** The number of files in this dir owned by this id. */
150         UT_FILES,
151         /** Number of columns in this table. */
152         NUM_UT_COLUMNS
153 };
154
155 static struct osl_column_description user_table_cols[] = {
156         [UT_DIR_NUM] = {
157                 .storage_type = OSL_MAPPED_STORAGE,
158                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
159                 .name = "dir_num",
160                 .compare_function = uint32_compare,
161                 .data_size = sizeof(uint32_t)
162         },
163         [UT_BYTES] = {
164                 .storage_type = OSL_MAPPED_STORAGE,
165                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
166                 .compare_function = size_compare,
167                 .name = "num_bytes",
168                 .data_size = sizeof(uint64_t)
169         },
170         [UT_FILES] = {
171                 .storage_type = OSL_MAPPED_STORAGE,
172                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
173                 .compare_function = size_compare,
174                 .name = "num_files",
175                 .data_size = sizeof(uint64_t)
176         },
177 };
178
179 static struct osl_table *dir_table;
180
181 int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size,
182                 uint64_t *dir_files)
183 {
184         struct osl_object dir_objects[NUM_DT_COLUMNS];
185
186         INFO_LOG("adding #%u: %s\n", dir_num, dirname);
187         dir_objects[DT_NAME].data = dirname;
188         dir_objects[DT_NAME].size = strlen(dirname) + 1;
189         dir_objects[DT_NUM].data = &dir_num;
190         dir_objects[DT_NUM].size = sizeof(dir_num);
191         dir_objects[DT_BYTES].data = dir_size;
192         dir_objects[DT_BYTES].size = sizeof(*dir_size);
193         dir_objects[DT_FILES].data = dir_files;
194         dir_objects[DT_FILES].size = sizeof(*dir_files);
195
196         return osl_add_row(dir_table, dir_objects);
197 }
198
199 static uint32_t num_uids;
200
201 int create_and_open_user_table(uint32_t uid, struct osl_table **t)
202 {
203         int ret;
204         struct osl_table_description *desc = para_malloc(sizeof(*desc));
205
206         desc->num_columns = NUM_UT_COLUMNS;
207         desc->flags = 0;
208         desc->column_descriptions = user_table_cols;
209         desc->dir = para_strdup(DATABASE_DIR);
210         desc->name = make_message("%u", uid);
211         num_uids++;
212         INFO_LOG(".............................uid #%u: %u\n",
213                 (unsigned)num_uids, (unsigned)uid);
214 //      user_table_desc.name = make_message("%u", uid);
215         ret = osl_create_table(desc);
216         if (ret < 0)
217                 return ret;
218         return osl_open_table(desc, t);
219 }
220
221
222 #define uid_hash_bits 8
223 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
224 #define PRIME1 0x811c9dc5
225 #define PRIME2 0x01000193
226
227 struct user_info {
228         uint32_t uid;
229         struct osl_table *table;
230         uint64_t files;
231         uint64_t bytes;
232 };
233
234 static struct user_info *uid_hash_table;
235
236 static void create_hash_table(void)
237 {
238         uid_hash_table = para_calloc(uid_hash_table_size
239                 * sizeof(struct user_info));
240 }
241
242 static int create_tables(void)
243 {
244         create_hash_table();
245         return osl_create_table(&dir_table_desc);
246 }
247
248
249 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
250 {
251         return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) % uid_hash_table_size;
252 }
253
254 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui < uid_hash_table \
255                 + uid_hash_table_size; ui++)
256
257 static int search_uid(uint32_t uid, int insert, struct user_info **ui)
258 {
259         uint32_t p;
260
261         for (p = 0; p < uid_hash_table_size; p++) {
262                 struct user_info *i = uid_hash_table + double_hash(uid, p);
263                 if (!i->table) {
264                         if (!insert)
265                                 return -E_BAD_UID;
266                         int ret = create_and_open_user_table(uid, &i->table);
267                         if (ret < 0)
268                                 return ret;
269                         i->uid = uid;
270                         *ui = i;
271                         return 1;
272                 }
273                 if (i->uid != uid)
274                         continue;
275                 *ui = i;
276                 return 0;
277         }
278         return insert? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
279 }
280
281 static int update_user_row(struct osl_table *t, uint32_t dir_num,
282                 uint64_t *add)
283 {
284         struct osl_row *row;
285         struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
286
287         int ret = osl_get_row(t, UT_DIR_NUM, &obj, &row);
288
289         if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND)
290                 return ret;
291         if (ret < 0) { /* this is the first file we add */
292                 struct osl_object objects[NUM_UT_COLUMNS];
293                 uint64_t num_files = 1;
294
295                 objects[UT_DIR_NUM].data = &dir_num;
296                 objects[UT_DIR_NUM].size = sizeof(dir_num);
297                 objects[UT_BYTES].data = add;
298                 objects[UT_BYTES].size = sizeof(*add);
299                 objects[UT_FILES].data = &num_files;
300                 objects[UT_FILES].size = sizeof(num_files);
301                 INFO_LOG("######################### ret: %d\n", ret);
302                 ret = osl_add_row(t, objects);
303                 INFO_LOG("######################### ret: %d\n", ret);
304                 return ret;
305         } else { /* add size and increment file count */
306                 uint64_t num;
307                 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
308
309                 ret = osl_get_object(t, row, UT_BYTES, &obj1);
310                 if (ret < 0)
311                         return ret;
312                 num = *(uint64_t *)obj1.data + *add;
313                 ret = osl_update_object(t, row, UT_BYTES, &obj2);
314                 if (ret < 0)
315                         return ret;
316                 ret = osl_get_object(t, row, UT_FILES, &obj1);
317                 if (ret < 0)
318                         return ret;
319                 num = *(uint64_t *)obj1.data + 1;
320                 return osl_update_object(t, row, UT_FILES, &obj2);
321         }
322 }
323
324 static uint32_t num_dirs;
325 static uint32_t num_files;
326 static uint64_t num_bytes;
327
328 int scan_dir(char *dirname)
329 {
330         DIR *dir;
331         struct dirent *entry;
332         int ret, cwd_fd, ret2;
333         uint64_t dir_size = 0, dir_files = 0;
334         uint32_t this_dir_num = num_dirs++;
335
336         DEBUG_LOG("----------------- %u: %s\n", num_dirs, dirname);
337         ret = para_opendir(dirname, &dir, &cwd_fd);
338         if (ret < 0) {
339                 if (ret != -ERRNO_TO_ERROR(EACCES))
340                         return ret;
341                 WARNING_LOG("permission denied for %s\n", dirname);
342                 return 1;
343         }
344         while ((entry = readdir(dir))) {
345                 mode_t m;
346                 char *tmp;
347                 struct stat s;
348                 uint32_t uid;
349                 uint64_t size;
350                 struct user_info *ui;
351
352                 if (!strcmp(entry->d_name, "."))
353                         continue;
354                 if (!strcmp(entry->d_name, ".."))
355                         continue;
356                 if (lstat(entry->d_name, &s) == -1) {
357                         WARNING_LOG("lstat error for %s/%s\n", dirname,
358                                 entry->d_name);
359                         continue;
360                 }
361                 m = s.st_mode;
362                 if (!S_ISREG(m) && !S_ISDIR(m))
363                         continue;
364                 if (S_ISDIR(m)) {
365                         tmp = make_message("%s/%s", dirname, entry->d_name);
366                         ret = scan_dir(tmp);
367                         free(tmp);
368                         if (ret < 0)
369                                 goto out;
370                         continue;
371                 }
372                 /* regular file */
373                 size = s.st_size;
374                 dir_size += size;
375                 num_bytes += size;
376                 dir_files++;
377                 num_files++;
378                 uid = s.st_uid;
379                 ret = search_uid(uid, 1, &ui);
380                 if (ret < 0)
381                         goto out;
382                 ui->bytes += size;
383                 ui->files++;
384                 ret = update_user_row(ui->table, this_dir_num, &size);
385                 if (ret < 0)
386                         goto out;
387         }
388         ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files);
389 out:
390         closedir(dir);
391         ret2 = para_fchdir(cwd_fd);
392         if (ret2 < 0 && ret >= 0)
393                 ret = ret2;
394         close(cwd_fd);
395         return ret;
396 }
397
398 static int get_dir_name(struct osl_row *row, char **name)
399 {
400         struct osl_object obj;
401         int ret = osl_get_object(dir_table, row, DT_NAME, &obj);
402
403         if (ret < 0)
404                 return ret;
405         *name = obj.data;
406         return 1;
407 }
408
409 static int print_dirname_and_size(struct osl_row *row, void *data)
410 {
411         unsigned *count = data;
412         struct osl_object obj;
413         char *name;
414         int ret;
415
416         if ((*count)++ > 100)
417                 return -E_LOOP_COMPLETE;
418         ret = get_dir_name(row, &name);
419         if (ret < 0)
420                 return ret;
421         ret = osl_get_object(dir_table, row, DT_BYTES, &obj);
422         if (ret < 0)
423                 return ret;
424         printf("%s\t%llu\n", name, *(long long unsigned *)obj.data);
425         return 1;
426 }
427
428 static int print_dirname_and_file_count(struct osl_row *row, void *data)
429 {
430         unsigned *count = data;
431         struct osl_object obj;
432         char *name;
433         int ret;
434
435         if ((*count)++ > 100)
436                 return -E_LOOP_COMPLETE;
437         ret = get_dir_name(row, &name);
438         if (ret < 0)
439                 return ret;
440         ret = osl_get_object(dir_table, row, DT_FILES, &obj);
441         if (ret < 0)
442                 return ret;
443         printf("%s\t%llu\n", name, *(long long unsigned *)obj.data);
444         return 1;
445 }
446
447 static void print_id_stats(void)
448 {
449         struct user_info *ui;
450
451         FOR_EACH_USER(ui) {
452                 if (!ui->table)
453                         continue;
454                 printf("%u\t%llu\t%llu\n", (unsigned)ui->uid, (long long unsigned)ui->files,
455                         (long long unsigned)ui->bytes);
456         }
457 }
458
459 struct big_dir_info {
460         unsigned count;
461         struct osl_table *user_table;
462 };
463
464 static int print_big_dir(struct osl_row *row, void *data)
465 {
466         struct big_dir_info *bdi = data;
467         int ret;
468         struct osl_row *dir_row;
469         char *dirname;
470         uint64_t bytes;
471         struct osl_object obj;
472
473         if (bdi->count++ > 10)
474                 return -E_LOOP_COMPLETE;
475         ret = osl_get_object(bdi->user_table, row, UT_BYTES, &obj);
476         if (ret < 0)
477                 return ret;
478         bytes = *(uint64_t *)obj.data;
479         ret = osl_get_object(bdi->user_table, row, UT_DIR_NUM, &obj);
480         if (ret < 0)
481                 return ret;
482         ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row);
483         if (ret < 0)
484                 return ret;
485         ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj);
486         if (ret < 0)
487                 return ret;
488         dirname = obj.data;
489         printf("%s: %llu\n", dirname, (long long unsigned)bytes);
490         return 1;
491 }
492
493 static void print_id_dir_stats(void)
494 {
495         struct user_info *ui;
496
497         FOR_EACH_USER(ui) {
498                 struct big_dir_info bdi = {.count = 0};
499                 if (!ui->table)
500                         continue;
501                 bdi.user_table = ui->table;
502                 printf("************************* Big dirs owned by uid %u\n", (unsigned) ui->uid);
503                 osl_rbtree_loop_reverse(ui->table, UT_BYTES, &bdi, print_big_dir);
504         }
505 }
506
507 static int print_statistics(void)
508 {
509         unsigned count = 0;
510         int ret;
511
512         printf("Summary: %u dirs, %u files, %llu bytes\n", (unsigned)num_dirs,
513                 (unsigned)num_files, (long long unsigned)num_bytes);
514         printf("************************* Biggest dirs\n");
515         ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size);
516         if (ret < 0 && ret != -E_LOOP_COMPLETE)
517                 return ret;
518         count = 0;
519         printf("************************* dirs containing many files\n");
520         ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &count, print_dirname_and_file_count);
521         if (ret < 0 && ret != -E_LOOP_COMPLETE)
522                 return ret;
523
524         printf("************************* dirs stats by owner\n");
525         print_id_stats();
526         print_id_dir_stats();
527         return 1;
528 }
529
530 static int write_uid_list(void)
531 {
532         char *buf, *filename = DATABASE_DIR "/" "uid_list";
533         uint32_t count = 0;
534         struct user_info *ui;
535         size_t size = num_uids * sizeof(uint32_t);
536         int ret;
537
538         if (!num_uids)
539                 return 0;
540         buf = para_malloc(size);
541         FOR_EACH_USER(ui) {
542                 if (!ui->table)
543                         continue;
544                 write_u32(buf + count++ * sizeof(uint32_t), ui->uid);
545         }
546         ret = para_write_file(filename, buf, size);
547         free(buf);
548         return ret;
549 }
550
551 static int com_create(char *dirname)
552 {
553         int ret = scan_dir(dirname);
554
555         if (ret < 0)
556                 return ret;
557         return write_uid_list();
558 }
559
560 int main(int argc, char **argv)
561 {
562         int ret = create_tables();
563         if (ret < 0)
564                 goto out;
565         ret = osl_open_table(&dir_table_desc, &dir_table);
566         if (ret < 0)
567                 goto out;
568         ret = -E_SYNTAX;
569         if (argc != 2)
570                 goto out;
571         ret = com_create(argv[1]);
572         if (ret < 0)
573                 goto out;
574         print_statistics();
575 out:
576         if (ret < 0) {
577                 ERROR_LOG("%s\n", error_txt(-ret));
578                 return -EXIT_FAILURE;
579         }
580         return EXIT_SUCCESS;
581 }
582