Add #define for database dir.
[adu.git] / adu.c
1 #include "adu.h"
2 #include <dirent.h> /* readdir() */
3
4 #include "gcc-compat.h"
5 #include "osl.h"
6 #include "fd.h"
7 #include "hash.h"
8 #include "string.h"
9 #include "error.h"
10
11 DEFINE_ERRLIST;
12
13 #define DATABASE_DIR "/tmp/adu"
14
15 /** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
16 #define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
17
18
19 /**
20 * The log function.
21 *
22 * \param ll Loglevel.
23 * \param fml Usual format string.
24 *
25 * All XXX_LOG() macros use this function.
26 */
27 __printf_2_3 void __log(int ll, const char* fmt,...)
28 {
29 va_list argp;
30 FILE *outfd;
31 struct tm *tm;
32 time_t t1;
33 char str[255] = "";
34
35 if (ll < 4)
36 return;
37 outfd = stderr;
38 time(&t1);
39 tm = localtime(&t1);
40 strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
41 fprintf(outfd, "%s ", str);
42 va_start(argp, fmt);
43 vfprintf(outfd, fmt, argp);
44 va_end(argp);
45 }
46
47 /**
48 * Compare the size of two directories
49 *
50 * \param obj1 Pointer to the first object.
51 * \param obj2 Pointer to the second object.
52 *
53 * This function first compares the size values as usual integers. If they compare as
54 * equal, the address of \a obj1 and \a obj2 are compared. So this compare function
55 * returns zero if and only if \a obj1 and \a obj2 point to the same memory area.
56 */
57 static int size_compare(const struct osl_object *obj1, const struct osl_object *obj2)
58 {
59 uint64_t d1 = *(uint64_t *)obj1->data;
60 uint64_t d2 = *(uint64_t *)obj2->data;
61 int ret = NUM_COMPARE(d2, d1);
62
63 if (ret)
64 return ret;
65 //INFO_LOG("addresses: %p, %p\n", obj1->data, obj2->data);
66 return NUM_COMPARE(obj2->data, obj1->data);
67 }
68
69 /**
70 * Compare two osl objects of string type.
71 *
72 * \param obj1 Pointer to the first object.
73 * \param obj2 Pointer to the second object.
74 *
75 * In any case, only \p MIN(obj1->size, obj2->size) characters of each string
76 * are taken into account.
77 *
78 * \return It returns an integer less than, equal to, or greater than zero if
79 * \a obj1 is found, respectively, to be less than, to match, or be greater than
80 * obj2.
81 *
82 * \sa strcmp(3), strncmp(3), osl_compare_func.
83 */
84 int string_compare(const struct osl_object *obj1, const struct osl_object *obj2)
85 {
86 const char *str1 = (const char *)obj1->data;
87 const char *str2 = (const char *)obj2->data;
88 return strncmp(str1, str2, MIN(obj1->size, obj2->size));
89 }
90
91 /** The columns of the directory table. */
92 enum dir_table_columns {
93 /** The name of the directory. */
94 DT_NAME,
95 /** The dir count number. */
96 DT_NUM,
97 /** The number of bytes of all regular files. */
98 DT_BYTES,
99 /** The number of all regular files. */
100 DT_FILES,
101 /** Number of columns in this table. */
102 NUM_DT_COLUMNS
103 };
104
105 static struct osl_column_description dir_table_cols[] = {
106 [DT_NAME] = {
107 .storage_type = OSL_MAPPED_STORAGE,
108 .storage_flags = OSL_RBTREE | OSL_UNIQUE,
109 .name = "dir",
110 .compare_function = string_compare,
111 },
112 [DT_NUM] = {
113 .storage_type = OSL_MAPPED_STORAGE,
114 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
115 .name = "num",
116 .compare_function = uint32_compare,
117 .data_size = sizeof(uint32_t)
118 },
119 [DT_BYTES] = {
120 .storage_type = OSL_MAPPED_STORAGE,
121 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
122 .compare_function = size_compare,
123 .name = "num_bytes",
124 .data_size = sizeof(uint64_t)
125 },
126 [DT_FILES] = {
127 .storage_type = OSL_MAPPED_STORAGE,
128 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
129 .compare_function = size_compare,
130 .name = "num_files",
131 .data_size = sizeof(uint64_t)
132 }
133 };
134
135 static struct osl_table_description dir_table_desc = {
136 .name = "dir_table",
137 .num_columns = NUM_DT_COLUMNS,
138 .flags = 0,
139 .column_descriptions = dir_table_cols,
140 .dir = DATABASE_DIR
141 };
142
143 /** The columns of the id table. */
144 enum user_table_columns {
145 /** The numer of the directory. */
146 UT_DIR_NUM,
147 /** The number of bytes of all regular files in this dir owned by this id. */
148 UT_BYTES,
149 /** The number of files in this dir owned by this id. */
150 UT_FILES,
151 /** Number of columns in this table. */
152 NUM_UT_COLUMNS
153 };
154
155 static struct osl_column_description user_table_cols[] = {
156 [UT_DIR_NUM] = {
157 .storage_type = OSL_MAPPED_STORAGE,
158 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE | OSL_UNIQUE,
159 .name = "dir_num",
160 .compare_function = uint32_compare,
161 .data_size = sizeof(uint32_t)
162 },
163 [UT_BYTES] = {
164 .storage_type = OSL_MAPPED_STORAGE,
165 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
166 .compare_function = size_compare,
167 .name = "num_bytes",
168 .data_size = sizeof(uint64_t)
169 },
170 [UT_FILES] = {
171 .storage_type = OSL_MAPPED_STORAGE,
172 .storage_flags = OSL_RBTREE | OSL_FIXED_SIZE,
173 .compare_function = size_compare,
174 .name = "num_files",
175 .data_size = sizeof(uint64_t)
176 },
177 };
178
179 static struct osl_table *dir_table;
180
181 int add_directory(char *dirname, uint32_t dir_num, uint64_t *dir_size,
182 uint64_t *dir_files)
183 {
184 struct osl_object dir_objects[NUM_DT_COLUMNS];
185
186 INFO_LOG("adding #%u: %s\n", dir_num, dirname);
187 dir_objects[DT_NAME].data = dirname;
188 dir_objects[DT_NAME].size = strlen(dirname) + 1;
189 dir_objects[DT_NUM].data = &dir_num;
190 dir_objects[DT_NUM].size = sizeof(dir_num);
191 dir_objects[DT_BYTES].data = dir_size;
192 dir_objects[DT_BYTES].size = sizeof(*dir_size);
193 dir_objects[DT_FILES].data = dir_files;
194 dir_objects[DT_FILES].size = sizeof(*dir_files);
195
196 return osl_add_row(dir_table, dir_objects);
197 }
198
199 int create_and_open_user_table(uint32_t uid, struct osl_table **t)
200 {
201 int ret;
202 struct osl_table_description *desc = para_malloc(sizeof(*desc));
203
204 desc->num_columns = NUM_UT_COLUMNS;
205 desc->flags = 0;
206 desc->column_descriptions = user_table_cols;
207 desc->dir = para_strdup(DATABASE_DIR);
208 desc->name = make_message("%u", uid);
209 INFO_LOG("................................. %u\n", uid);
210 // user_table_desc.name = make_message("%u", uid);
211 ret = osl_create_table(desc);
212 if (ret < 0)
213 return ret;
214 return osl_open_table(desc, t);
215 }
216
217
218 #define uid_hash_bits 8
219 static uint32_t uid_hash_table_size = 1 << uid_hash_bits;
220 #define PRIME1 0x811c9dc5
221 #define PRIME2 0x01000193
222
223 struct user_info {
224 uint32_t uid;
225 struct osl_table *table;
226 uint64_t files;
227 uint64_t bytes;
228 };
229
230 static struct user_info *uid_hash_table;
231
232 static void create_hash_table(void)
233 {
234 uid_hash_table = para_calloc(uid_hash_table_size
235 * sizeof(struct user_info));
236 }
237
238 static int create_tables(void)
239 {
240 create_hash_table();
241 return osl_create_table(&dir_table_desc);
242 }
243
244
245 static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
246 {
247 return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num) % uid_hash_table_size;
248 }
249
250 #define FOR_EACH_USER(ui) for (ui = uid_hash_table; ui < uid_hash_table \
251 + uid_hash_table_size; ui++)
252
253 static int search_uid(uint32_t uid, int insert, struct user_info **ui)
254 {
255 uint32_t p;
256
257 for (p = 0; p < uid_hash_table_size; p++) {
258 struct user_info *i = uid_hash_table + double_hash(uid, p);
259 if (!i->table) {
260 if (!insert)
261 return -E_BAD_UID;
262 int ret = create_and_open_user_table(uid, &i->table);
263 if (ret < 0)
264 return ret;
265 i->uid = uid;
266 *ui = i;
267 return 1;
268 }
269 if (i->uid != uid)
270 continue;
271 *ui = i;
272 return 0;
273 }
274 return insert? -E_HASH_TABLE_OVERFLOW : -E_BAD_UID;
275 }
276
277 static int update_user_row(struct osl_table *t, uint32_t dir_num,
278 uint64_t *add)
279 {
280 struct osl_row *row;
281 struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
282
283 int ret = osl_get_row(t, UT_DIR_NUM, &obj, &row);
284
285 if (ret < 0 && ret != -E_RB_KEY_NOT_FOUND)
286 return ret;
287 if (ret < 0) { /* this is the first file we add */
288 struct osl_object objects[NUM_UT_COLUMNS];
289 uint64_t num_files = 1;
290
291 objects[UT_DIR_NUM].data = &dir_num;
292 objects[UT_DIR_NUM].size = sizeof(dir_num);
293 objects[UT_BYTES].data = add;
294 objects[UT_BYTES].size = sizeof(*add);
295 objects[UT_FILES].data = &num_files;
296 objects[UT_FILES].size = sizeof(num_files);
297 INFO_LOG("######################### ret: %d\n", ret);
298 ret = osl_add_row(t, objects);
299 INFO_LOG("######################### ret: %d\n", ret);
300 return ret;
301 } else { /* add size and increment file count */
302 uint64_t num;
303 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
304
305 ret = osl_get_object(t, row, UT_BYTES, &obj1);
306 if (ret < 0)
307 return ret;
308 num = *(uint64_t *)obj1.data + *add;
309 ret = osl_update_object(t, row, UT_BYTES, &obj2);
310 if (ret < 0)
311 return ret;
312 ret = osl_get_object(t, row, UT_FILES, &obj1);
313 if (ret < 0)
314 return ret;
315 num = *(uint64_t *)obj1.data + 1;
316 return osl_update_object(t, row, UT_FILES, &obj2);
317 }
318 }
319
320 static uint32_t num_dirs;
321 static uint32_t num_files;
322 static uint64_t num_bytes;
323
324 int scan_dir(char *dirname)
325 {
326 DIR *dir;
327 struct dirent *entry;
328 int ret, cwd_fd, ret2;
329 uint64_t dir_size = 0, dir_files = 0;
330 uint32_t this_dir_num = num_dirs++;
331
332 DEBUG_LOG("----------------- %u: %s\n", num_dirs, dirname);
333 ret = para_opendir(dirname, &dir, &cwd_fd);
334 if (ret < 0) {
335 if (ret != -ERRNO_TO_ERROR(EACCES))
336 return ret;
337 WARNING_LOG("permission denied for %s\n", dirname);
338 return 1;
339 }
340 while ((entry = readdir(dir))) {
341 mode_t m;
342 char *tmp;
343 struct stat s;
344 uint32_t uid;
345 uint64_t size;
346 struct user_info *ui;
347
348 if (!strcmp(entry->d_name, "."))
349 continue;
350 if (!strcmp(entry->d_name, ".."))
351 continue;
352 if (lstat(entry->d_name, &s) == -1) {
353 WARNING_LOG("lstat error for %s/%s\n", dirname,
354 entry->d_name);
355 continue;
356 }
357 m = s.st_mode;
358 if (!S_ISREG(m) && !S_ISDIR(m))
359 continue;
360 if (S_ISDIR(m)) {
361 tmp = make_message("%s/%s", dirname, entry->d_name);
362 ret = scan_dir(tmp);
363 free(tmp);
364 if (ret < 0)
365 goto out;
366 continue;
367 }
368 /* regular file */
369 size = s.st_size;
370 dir_size += size;
371 num_bytes += size;
372 dir_files++;
373 num_files++;
374 uid = s.st_uid;
375 ret = search_uid(uid, 1, &ui);
376 if (ret < 0)
377 goto out;
378 ui->bytes += size;
379 ui->files++;
380 ret = update_user_row(ui->table, this_dir_num, &size);
381 if (ret < 0)
382 goto out;
383 }
384 ret = add_directory(dirname, this_dir_num, &dir_size, &dir_files);
385 out:
386 closedir(dir);
387 ret2 = para_fchdir(cwd_fd);
388 if (ret2 < 0 && ret >= 0)
389 ret = ret2;
390 close(cwd_fd);
391 return ret;
392 }
393
394 static int get_dir_name(struct osl_row *row, char **name)
395 {
396 struct osl_object obj;
397 int ret = osl_get_object(dir_table, row, DT_NAME, &obj);
398
399 if (ret < 0)
400 return ret;
401 *name = obj.data;
402 return 1;
403 }
404
405 static int print_dirname_and_size(struct osl_row *row, void *data)
406 {
407 unsigned *count = data;
408 struct osl_object obj;
409 char *name;
410 int ret;
411
412 if ((*count)++ > 100)
413 return -E_LOOP_COMPLETE;
414 ret = get_dir_name(row, &name);
415 if (ret < 0)
416 return ret;
417 ret = osl_get_object(dir_table, row, DT_BYTES, &obj);
418 if (ret < 0)
419 return ret;
420 printf("%s\t%llu\n", name, *(long long unsigned *)obj.data);
421 return 1;
422 }
423
424 static int print_dirname_and_file_count(struct osl_row *row, void *data)
425 {
426 unsigned *count = data;
427 struct osl_object obj;
428 char *name;
429 int ret;
430
431 if ((*count)++ > 100)
432 return -E_LOOP_COMPLETE;
433 ret = get_dir_name(row, &name);
434 if (ret < 0)
435 return ret;
436 ret = osl_get_object(dir_table, row, DT_FILES, &obj);
437 if (ret < 0)
438 return ret;
439 printf("%s\t%llu\n", name, *(long long unsigned *)obj.data);
440 return 1;
441 }
442
443 static void print_id_stats(void)
444 {
445 struct user_info *ui;
446
447 FOR_EACH_USER(ui) {
448 if (!ui->table)
449 continue;
450 printf("%u\t%llu\t%llu\n", (unsigned)ui->uid, (long long unsigned)ui->files,
451 (long long unsigned)ui->bytes);
452 }
453 }
454
455 struct big_dir_info {
456 unsigned count;
457 struct osl_table *user_table;
458 };
459
460 static int print_big_dir(struct osl_row *row, void *data)
461 {
462 struct big_dir_info *bdi = data;
463 int ret;
464 struct osl_row *dir_row;
465 char *dirname;
466 uint64_t bytes;
467 struct osl_object obj;
468
469 if (bdi->count++ > 10)
470 return -E_LOOP_COMPLETE;
471 ret = osl_get_object(bdi->user_table, row, UT_BYTES, &obj);
472 if (ret < 0)
473 return ret;
474 bytes = *(uint64_t *)obj.data;
475 ret = osl_get_object(bdi->user_table, row, UT_DIR_NUM, &obj);
476 if (ret < 0)
477 return ret;
478 ret = osl_get_row(dir_table, DT_NUM, &obj, &dir_row);
479 if (ret < 0)
480 return ret;
481 ret = osl_get_object(dir_table, dir_row, DT_NAME, &obj);
482 if (ret < 0)
483 return ret;
484 dirname = obj.data;
485 printf("%s: %llu\n", dirname, (long long unsigned)bytes);
486 return 1;
487 }
488
489 static void print_id_dir_stats(void)
490 {
491 struct user_info *ui;
492
493 FOR_EACH_USER(ui) {
494 struct big_dir_info bdi = {.count = 0};
495 if (!ui->table)
496 continue;
497 bdi.user_table = ui->table;
498 printf("************************* Big dirs owned by uid %u\n", (unsigned) ui->uid);
499 osl_rbtree_loop_reverse(ui->table, UT_BYTES, &bdi, print_big_dir);
500 }
501 }
502
503 static int print_statistics(void)
504 {
505 unsigned count = 0;
506 int ret;
507
508 printf("Summary: %u dirs, %u files, %llu bytes\n", (unsigned)num_dirs,
509 (unsigned)num_files, (long long unsigned)num_bytes);
510 printf("************************* Biggest dirs\n");
511 ret = osl_rbtree_loop_reverse(dir_table, DT_BYTES, &count, print_dirname_and_size);
512 if (ret < 0 && ret != -E_LOOP_COMPLETE)
513 return ret;
514 count = 0;
515 printf("************************* dirs containing many files\n");
516 ret = osl_rbtree_loop_reverse(dir_table, DT_FILES, &count, print_dirname_and_file_count);
517 if (ret < 0 && ret != -E_LOOP_COMPLETE)
518 return ret;
519
520 printf("************************* dirs stats by owner\n");
521 print_id_stats();
522 print_id_dir_stats();
523 return 1;
524 }
525
526
527 int main(int argc, char **argv)
528 {
529 int ret = create_tables();
530 if (ret < 0)
531 goto out;
532 ret = osl_open_table(&dir_table_desc, &dir_table);
533 if (ret < 0)
534 goto out;
535 ret = -E_SYNTAX;
536 if (argc != 2)
537 goto out;
538 ret = scan_dir(argv[1]);
539 if (ret < 0)
540 goto out;
541 print_statistics();
542 out:
543 if (ret < 0) {
544 ERROR_LOG("%s\n", error_txt(-ret));
545 return -EXIT_FAILURE;
546 }
547 return EXIT_SUCCESS;
548 }
549