]> git.tuebingen.mpg.de Git - adu.git/blob - adu.c
Replace id table by hash table.
[adu.git] / adu.c
1 #include "adu.h"
2 #include <dirent.h> /* readdir() */
3
4 #include "gcc-compat.h"
5 #include "osl.h"
6 #include "fd.h"
7 #include "hash.h"
8 #include "string.h"
9 #include "error.h"
10
11 DEFINE_ERRLIST;
12
13 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
14 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
15
16
17 /**
18  * The log function.
19  *
20  * \param ll Loglevel.
21  * \param fml Usual format string.
22  *
23  * All XXX_LOG() macros use this function.
24  */
25 __printf_2_3 void __log(int ll, const char* fmt,...)
26 {
27         va_list argp;
28         FILE *outfd;
29         struct tm *tm;
30         time_t t1;
31         char str[255] = "";
32
33         if (ll < 4)
34                 return;
35         outfd = stderr;
36         time(&t1);
37         tm = localtime(&t1);
38         strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
39         fprintf(outfd, "%s ", str);
40         va_start(argp, fmt);
41         vfprintf(outfd, fmt, argp);
42         va_end(argp);
43 }
44
45 /**
46  * Compare the size of two directories
47  *
48  * \param obj1 Pointer to the first object.
49  * \param obj2 Pointer to the second object.
50  *
51  * This function first compares the size values as usual integers. If they compare as
52  * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
53  * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
54  */
55 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
56 {
57         uint64_t d1 = *(uint64_t *)obj1->data;
58         uint64_t d2 = *(uint64_t *)obj2->data;
59         int ret = NUM_COMPARE(d2, d1);
60
61         if (ret)
62                 return ret;
63         //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
64         return NUM_COMPARE(obj2->data, obj1->data);
65 }
66
67 /**
68  * Compare two osl objects of string type.
69  *
70  * \param obj1 Pointer to the first object.
71  * \param obj2 Pointer to the second object.
72  *
73  * In any case, only \p MIN(obj1->size, obj2->size) characters of each string
74  * are taken into account.
75  *
76  * \return It returns an integer less than, equal to, or greater than zero if
77  * \a obj1 is found, respectively, to be less than, to match, or be greater than
78  * obj2.
79  *
80  * \sa strcmp(3), strncmp(3), osl_compare_func.
81  */
82 int string_compare(const struct osl_object *obj1, const struct osl_object *obj2)
83 {
84         const char *str1 = (const char *)obj1->data;
85         const char *str2 = (const char *)obj2->data;
86         return strncmp(str1, str2, MIN(obj1->size, obj2->size));
87 }
88
89 /** The columns of the directory table. */
90 enum dir_table_columns {
91         /** The name of the directory. */
92         DT_NAME,
93         /** The dir count number. */
94         DT_NUM,
95         /** The number of bytes of all regular files. */
96         DT_BYTES,
97         /** The number of all regular files. */
98         DT_FILES,
99         /** Number of columns in this table. */
100         NUM_DT_COLUMNS
101 };
102
103 static struct osl_column_description dir_table_cols[] = {
104         [DT_NAME] = {
105                 .storage_type = OSL_MAPPED_STORAGE,
106                 .storage_flags = OSL_RBTREE | OSL_UNIQUE,
107                 .name = "dir",
108                 .compare_function = string_compare,
109         },
110         [DT_NUM] = {
111                 .storage_type = OSL_MAPPED_STORAGE,
112                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
113                 .name = "num",
114                 .compare_function = uint32_compare,
115                 .data_size = sizeof(uint32_t)
116         },
117         [DT_BYTES] = {
118                 .storage_type = OSL_MAPPED_STORAGE,
119                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
120                 .compare_function = size_compare,
121                 .name = "num_bytes",
122                 .data_size = sizeof(uint64_t)
123         },
124         [DT_FILES] = {
125                 .storage_type = OSL_MAPPED_STORAGE,
126                 .storage_flags =  OSL_RBTREE | OSL_FIXED_SIZE,
127                 .compare_function = size_compare,
128                 .name = "num_files",
129                 .data_size = sizeof(uint64_t)
130         }
131 };
132
133 static struct osl_table_description dir_table_desc = {
134         .name = "dir_table",
135         .num_columns = NUM_DT_COLUMNS,
136         .flags = 0,
137         .column_descriptions = dir_table_cols,
138         .dir = "/tmp/adu"
139 };
140
141 /** The columns of the id table. */
142 enum user_table_columns {
143         /** The numer of the directory. */
144         UT_DIR_NUM,
145         /** The number of bytes of all regular files in this dir owned by this id. */
146         UT_BYTES,
147         /** The number of files in this dir owned by this id. */
148         UT_FILES,
149         /** Number of columns in this table. */
150         NUM_UT_COLUMNS
151 };
152
153 static struct osl_column_description user_table_cols[] = {
154         [UT_DIR_NUM] = {
155                 .storage_type = OSL_MAPPED_STORAGE,
156                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
157                 .name = "dir_num",
158                 .compare_function = uint32_compare,
159                 .data_size = sizeof(uint32_t)
160         },
161         [UT_BYTES] = {
162                 .storage_type = OSL_MAPPED_STORAGE,
163                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
164                 .compare_function = size_compare,
165                 .name = "num_bytes",
166                 .data_size = sizeof(uint64_t)
167         },
168         [UT_FILES] = {
169                 .storage_type = OSL_MAPPED_STORAGE,
170                 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
171                 .compare_function = size_compare,
172                 .name = "num_files",
173                 .data_size = sizeof(uint64_t)
174         },
175 };
176
177 static struct osl_table *dir_table;
178
179 int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size,
180                 uint64_t *dir_files)
181 {
182         struct osl_object dir_objects[NUM_DT_COLUMNS];
183
184         INFO_LOG("adding #%u: %s\n", dir_num, dirname);
185         dir_objects[DT_NAME].data = dirname;
186         dir_objects[DT_NAME].size = strlen(dirname) + 1;
187         dir_objects[DT_NUM].data = &dir_num;
188         dir_objects[DT_NUM].size = sizeof(dir_num);
189         dir_objects[DT_BYTES].data = dir_size;
190         dir_objects[DT_BYTES].size = sizeof(*dir_size);
191         dir_objects[DT_FILES].data = dir_files;
192         dir_objects[DT_FILES].size = sizeof(*dir_files);
193
194         return osl_add_row(dir_table, dir_objects);
195 }
196
197 int create_and_open_user_table(uint32_t uid, struct osl_table **t)
198 {
199         int ret;
200         struct osl_table_description *desc = para_malloc(sizeof(*desc));
201
202         desc->num_columns = NUM_UT_COLUMNS;
203         desc->flags = 0;
204         desc->column_descriptions = user_table_cols;
205         desc->dir = para_strdup("/tmp/adu");
206         desc->name = make_message("%u", uid);
207         INFO_LOG("................................. %u\n", uid);
208 //      user_table_desc.name = make_message("%u", uid);
209         ret = osl_create_table(desc);
210         if (ret < 0)
211                 return ret;
212         return osl_open_table(desc, t);
213 }
214
215
216 #define uid_hash_bits 8
217 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
218 #define PRIME1 0x811c9dc5
219 #define PRIME2 0x01000193
220
221 struct user_info {
222         uint32_t uid;
223         struct osl_table *table;
224         uint64_t files;
225         uint64_t bytes;
226 };
227
228 static struct user_info *uid_hash_table;
229
230 static void create_hash_table(void)
231 {
232         uid_hash_table = para_calloc(uid_hash_table_size
233                 * sizeof(struct user_info));
234 }
235
236 static int create_tables(void)
237 {
238         create_hash_table();
239         return osl_create_table(&dir_table_desc);
240 }
241
242
243 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
244 {
245         return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) % uid_hash_table_size;
246 }
247
248 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui < uid_hash_table \
249                 + uid_hash_table_size; ui++)
250
251 static int search_uid(uint32_t uid, int insert, struct user_info **ui)
252 {
253         uint32_t p;
254
255         for (p = 0; p < uid_hash_table_size; p++) {
256                 struct user_info *i = uid_hash_table + double_hash(uid, p);
257                 if (!i->table) {
258                         if (!insert)
259                                 return -E_BAD_UID;
260                         int ret = create_and_open_user_table(uid, &i->table);
261                         if (ret < 0)
262                                 return ret;
263                         i->uid = uid;
264                         *ui = i;
265                         return 1;
266                 }
267                 if (i->uid != uid)
268                         continue;
269                 *ui = i;
270                 return 0;
271         }
272         return insert? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
273 }
274
275 static int update_user_row(struct osl_table *t, uint32_t dir_num,
276                 uint64_t *add)
277 {
278         struct osl_row *row;
279         struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
280
281         int ret = osl_get_row(t, UT_DIR_NUM, &obj, &row);
282
283         if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND)
284                 return ret;
285         if (ret < 0) { /* this is the first file we add */
286                 struct osl_object objects[NUM_UT_COLUMNS];
287                 uint64_t num_files = 1;
288
289                 objects[UT_DIR_NUM].data = &dir_num;
290                 objects[UT_DIR_NUM].size = sizeof(dir_num);
291                 objects[UT_BYTES].data = add;
292                 objects[UT_BYTES].size = sizeof(*add);
293                 objects[UT_FILES].data = &num_files;
294                 objects[UT_FILES].size = sizeof(num_files);
295                 INFO_LOG("######################### ret: %d\n", ret);
296                 ret = osl_add_row(t, objects);
297                 INFO_LOG("######################### ret: %d\n", ret);
298                 return ret;
299         } else { /* add size and increment file count */
300                 uint64_t num;
301                 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
302
303                 ret = osl_get_object(t, row, UT_BYTES, &obj1);
304                 if (ret < 0)
305                         return ret;
306                 num = *(uint64_t *)obj1.data + *add;
307                 ret = osl_update_object(t, row, UT_BYTES, &obj2);
308                 if (ret < 0)
309                         return ret;
310                 ret = osl_get_object(t, row, UT_FILES, &obj1);
311                 if (ret < 0)
312                         return ret;
313                 num = *(uint64_t *)obj1.data + 1;
314                 return osl_update_object(t, row, UT_FILES, &obj2);
315         }
316 }
317
318 static uint32_t num_dirs;
319 static uint32_t num_files;
320 static uint64_t num_bytes;
321
322 int scan_dir(char *dirname)
323 {
324         DIR *dir;
325         struct dirent *entry;
326         int ret, cwd_fd, ret2;
327         uint64_t dir_size = 0, dir_files = 0;
328         uint32_t this_dir_num = num_dirs++;
329
330         DEBUG_LOG("----------------- %u: %s\n", num_dirs, dirname);
331         ret = para_opendir(dirname, &dir, &cwd_fd);
332         if (ret < 0) {
333                 if (ret != -ERRNO_TO_ERROR(EACCES))
334                         return ret;
335                 WARNING_LOG("permission denied for %s\n", dirname);
336                 return 1;
337         }
338         while ((entry = readdir(dir))) {
339                 mode_t m;
340                 char *tmp;
341                 struct stat s;
342                 uint32_t uid;
343                 uint64_t size;
344                 struct user_info *ui;
345
346                 if (!strcmp(entry->d_name, "."))
347                         continue;
348                 if (!strcmp(entry->d_name, ".."))
349                         continue;
350                 if (lstat(entry->d_name, &s) == -1) {
351                         WARNING_LOG("lstat error for %s/%s\n", dirname,
352                                 entry->d_name);
353                         continue;
354                 }
355                 m = s.st_mode;
356                 if (!S_ISREG(m) && !S_ISDIR(m))
357                         continue;
358                 if (S_ISDIR(m)) {
359                         tmp = make_message("%s/%s", dirname, entry->d_name);
360                         ret = scan_dir(tmp);
361                         free(tmp);
362                         if (ret < 0)
363                                 goto out;
364                         continue;
365                 }
366                 /* regular file */
367                 size = s.st_size;
368                 dir_size += size;
369                 num_bytes += size;
370                 dir_files++;
371                 num_files++;
372                 uid = s.st_uid;
373                 ret = search_uid(uid, 1, &ui);
374                 if (ret < 0)
375                         goto out;
376                 ui->bytes += size;
377                 ui->files++;
378                 ret = update_user_row(ui->table, this_dir_num, &size);
379                 if (ret < 0)
380                         goto out;
381         }
382         ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files);
383 out:
384         closedir(dir);
385         ret2 = para_fchdir(cwd_fd);
386         if (ret2 < 0 && ret >= 0)
387                 ret = ret2;
388         close(cwd_fd);
389         return ret;
390 }
391
392 static int get_dir_name(struct osl_row *row, char **name)
393 {
394         struct osl_object obj;
395         int ret = osl_get_object(dir_table, row, DT_NAME, &obj);
396
397         if (ret < 0)
398                 return ret;
399         *name = obj.data;
400         return 1;
401 }
402
403 static int print_dirname_and_size(struct osl_row *row, void *data)
404 {
405         unsigned *count = data;
406         struct osl_object obj;
407         char *name;
408         int ret;
409
410         if ((*count)++ > 100)
411                 return -E_LOOP_COMPLETE;
412         ret = get_dir_name(row, &name);
413         if (ret < 0)
414                 return ret;
415         ret = osl_get_object(dir_table, row, DT_BYTES, &obj);
416         if (ret < 0)
417                 return ret;
418         printf("%s\t%llu\n", name, *(long long unsigned *)obj.data);
419         return 1;
420 }
421
422 static int print_dirname_and_file_count(struct osl_row *row, void *data)
423 {
424         unsigned *count = data;
425         struct osl_object obj;
426         char *name;
427         int ret;
428
429         if ((*count)++ > 100)
430                 return -E_LOOP_COMPLETE;
431         ret = get_dir_name(row, &name);
432         if (ret < 0)
433                 return ret;
434         ret = osl_get_object(dir_table, row, DT_FILES, &obj);
435         if (ret < 0)
436                 return ret;
437         printf("%s\t%llu\n", name, *(long long unsigned *)obj.data);
438         return 1;
439 }
440
441 static void print_id_stats(void)
442 {
443         struct user_info *ui;
444
445         FOR_EACH_USER(ui) {
446                 if (!ui->table)
447                         continue;
448                 printf("%u\t%llu\t%llu\n", (unsigned)ui->uid, (long long unsigned)ui->files,
449                         (long long unsigned)ui->bytes);
450         }
451 }
452
453 struct big_dir_info {
454         unsigned count;
455         struct osl_table *user_table;
456 };
457
458 static int print_big_dir(struct osl_row *row, void *data)
459 {
460         struct big_dir_info *bdi = data;
461         int ret;
462         struct osl_row *dir_row;
463         char *dirname;
464         uint64_t bytes;
465         struct osl_object obj;
466
467         if (bdi->count++ > 10)
468                 return -E_LOOP_COMPLETE;
469         ret = osl_get_object(bdi->user_table, row, UT_BYTES, &obj);
470         if (ret < 0)
471                 return ret;
472         bytes = *(uint64_t *)obj.data;
473         ret = osl_get_object(bdi->user_table, row, UT_DIR_NUM, &obj);
474         if (ret < 0)
475                 return ret;
476         ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row);
477         if (ret < 0)
478                 return ret;
479         ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj);
480         if (ret < 0)
481                 return ret;
482         dirname = obj.data;
483         printf("%s: %llu\n", dirname, (long long unsigned)bytes);
484         return 1;
485 }
486
487 static void print_id_dir_stats(void)
488 {
489         struct user_info *ui;
490
491         FOR_EACH_USER(ui) {
492                 struct big_dir_info bdi = {.count = 0};
493                 if (!ui->table)
494                         continue;
495                 bdi.user_table = ui->table;
496                 printf("************************* Big dirs owned by uid %u\n", (unsigned) ui->uid);
497                 osl_rbtree_loop_reverse(ui->table, UT_BYTES, &bdi, print_big_dir);
498         }
499 }
500
501 static int print_statistics(void)
502 {
503         unsigned count = 0;
504         int ret;
505
506         printf("Summary: %u dirs, %u files, %llu bytes\n", (unsigned)num_dirs,
507                 (unsigned)num_files, (long long unsigned)num_bytes);
508         printf("************************* Biggest dirs\n");
509         ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size);
510         if (ret < 0 && ret != -E_LOOP_COMPLETE)
511                 return ret;
512         count = 0;
513         printf("************************* dirs containing many files\n");
514         ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &count, print_dirname_and_file_count);
515         if (ret < 0 && ret != -E_LOOP_COMPLETE)
516                 return ret;
517
518         printf("************************* dirs stats by owner\n");
519         print_id_stats();
520         print_id_dir_stats();
521         return 1;
522 }
523
524
525 int main(int argc, char **argv)
526 {
527         int ret = create_tables();
528         if (ret < 0)
529                 goto out;
530         ret = osl_open_table(&dir_table_desc, &dir_table);
531         if (ret < 0)
532                 goto out;
533         ret = -E_SYNTAX;
534         if (argc != 2)
535                 goto out;
536         ret = scan_dir(argv[1]);
537         if (ret < 0)
538                 goto out;
539         print_statistics();
540 out:
541         if (ret < 0) {
542                 ERROR_LOG("%s\n", error_txt(-ret));
543                 return -EXIT_FAILURE;
544         }
545         return EXIT_SUCCESS;
546 }
547