]> git.tuebingen.mpg.de Git - adu.git/blob - create.c
Rename bloom_init() to bloom_new().
[adu.git] / create.c
1 /*
2  * Copyright (C) 2008 Andre Noll <maan@systemlinux.org>
3  *
4  * Licensed under the GPL v2. For licencing details see COPYING.
5  */
6
7 /** \file create.c \brief The create mode of adu. */
8
9 #include <dirent.h> /* readdir() */
10 #include "format.h"
11 #include "adu.h"
12 #include "gcc-compat.h"
13 #include "cmdline.h"
14 #include "fd.h"
15 #include "string.h"
16 #include "error.h"
17 #include "user.h"
18 #include "bloom.h"
19
20 /* Id of the device containing the base dir. */
21 static dev_t device_id;
22 static struct bloom *global_bloom_filter;
23 static struct bloom *user_bloom_filter;
24
25 static int consider_bloom(struct stat64 *s)
26 {
27         if (!global_bloom_filter)
28                 return 0;
29         if (s->st_nlink <= 1)
30                 return 0;
31         return 1;
32 }
33
34 /** Data size to hash for the global bloom filter. */
35 #define GLOBAL_BLOOM_BUF_SIZE (sizeof(ino_t) + sizeof(dev_t) + sizeof(off_t))
36 /** For the user bloom filter also the uid is being hashed. */
37 #define USER_BLOOM_BUF_SIZE (GLOBAL_BLOOM_BUF_SIZE + sizeof(uid_t))
38
39 static void make_bloom_buf(struct stat64 *s, uint8_t buf[USER_BLOOM_BUF_SIZE])
40 {
41         uint8_t *p = buf;
42
43         if (!consider_bloom(s))
44                 return;
45         memcpy(p, &s->st_ino, sizeof(ino_t));
46         p += sizeof(ino_t);
47         memcpy(p, &s->st_dev, sizeof(dev_t));
48         p += sizeof(dev_t);
49         memcpy(p, &s->st_size, sizeof(off_t));
50         p += sizeof(off_t);
51         memcpy(p, &s->st_uid, sizeof(uid_t));
52 }
53
54 static int insert_global_bloom(struct stat64 *s,
55                 uint8_t buf[USER_BLOOM_BUF_SIZE])
56 {
57         if (!consider_bloom(s))
58                 return 0;
59         return bloom_test_and_insert(buf, GLOBAL_BLOOM_BUF_SIZE,
60                 global_bloom_filter);
61 }
62
63 static int insert_user_bloom(struct stat64 *s,
64                 uint8_t buf[USER_BLOOM_BUF_SIZE])
65 {
66         if (!consider_bloom(s))
67                 return 0;
68         return bloom_test_and_insert(buf, USER_BLOOM_BUF_SIZE,
69                 user_bloom_filter);
70 }
71
72 static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
73                 uint64_t *dir_size, uint64_t *dir_files)
74 {
75         struct osl_object dir_objects[NUM_DT_COLUMNS];
76
77         INFO_LOG("adding #%llu: %s\n", (long long unsigned)*dir_num, dirname);
78         dir_objects[DT_NAME].data = dirname;
79         dir_objects[DT_NAME].size = strlen(dirname) + 1;
80         dir_objects[DT_NUM].data = dir_num;
81         dir_objects[DT_NUM].size = sizeof(*dir_num);
82         dir_objects[DT_PARENT_NUM].data = parent_dir_num;
83         dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num);
84         dir_objects[DT_BYTES].data = dir_size;
85         dir_objects[DT_BYTES].size = sizeof(*dir_size);
86         dir_objects[DT_FILES].data = dir_files;
87         dir_objects[DT_FILES].size = sizeof(*dir_files);
88         return osl(osl_add_row(dir_table, dir_objects));
89 }
90
91 static int update_user_row(struct osl_table *t, uint64_t dir_num,
92                 uint64_t add)
93 {
94         struct osl_row *row;
95         struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
96
97         int ret = osl(osl_get_row(t, UT_DIR_NUM, &obj, &row));
98
99         if (ret == -E_OSL && osl_errno != E_OSL_RB_KEY_NOT_FOUND)
100                 return ret;
101         if (ret < 0) { /* this is the first file we add */
102                 struct osl_object objects[NUM_UT_COLUMNS];
103                 uint64_t num_files = 1;
104
105                 objects[UT_DIR_NUM].data = &dir_num;
106                 objects[UT_DIR_NUM].size = sizeof(dir_num);
107                 objects[UT_BYTES].data = &add;
108                 objects[UT_BYTES].size = sizeof(add);
109                 objects[UT_FILES].data = &num_files;
110                 objects[UT_FILES].size = sizeof(num_files);
111                 ret = osl(osl_add_row(t, objects));
112                 return ret;
113         } else { /* add size and increment file count */
114                 uint64_t num;
115                 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
116
117                 ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
118                 if (ret < 0)
119                         return ret;
120                 num = *(uint64_t *)obj1.data + add;
121                 ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
122                 if (ret < 0)
123                         return ret;
124                 ret = osl(osl_get_object(t, row, UT_FILES, &obj1));
125                 if (ret < 0)
126                         return ret;
127                 num = *(uint64_t *)obj1.data + 1;
128                 return osl(osl_update_object(t, row, UT_FILES, &obj2));
129         }
130 }
131
132 static int scan_dir(char *dirname, uint64_t *parent_dir_num)
133 {
134         DIR *dir;
135         struct dirent *entry;
136         int ret, cwd_fd, ret2;
137         uint64_t dir_size = 0, dir_files = 0;
138         /* dir count. */
139         static uint64_t current_dir_num;
140         uint64_t this_dir_num = ++current_dir_num;
141
142         check_signals();
143         DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)current_dir_num, dirname);
144         ret = adu_opendir(dirname, &dir, &cwd_fd);
145         if (ret < 0) {
146                 if (ret != -ERRNO_TO_ERROR(EACCES))
147                         return ret;
148                 WARNING_LOG("permission denied for %s\n", dirname);
149                 return 1;
150         }
151         while ((entry = readdir(dir))) {
152                 mode_t m;
153                 struct stat64 s;
154                 struct user_info *ui;
155                 uint8_t bloom_buf[USER_BLOOM_BUF_SIZE];
156
157                 if (!strcmp(entry->d_name, "."))
158                         continue;
159                 if (!strcmp(entry->d_name, ".."))
160                         continue;
161                 if (lstat64(entry->d_name, &s) == -1) {
162                         WARNING_LOG("lstat64 error for %s/%s (%s)\n",
163                                 dirname, entry->d_name, strerror(errno));
164                         continue;
165                 }
166                 m = s.st_mode;
167                 if (!S_ISREG(m) && !S_ISDIR(m))
168                         continue;
169                 if (S_ISDIR(m)) {
170                         if (conf.one_file_system_given && s.st_dev != device_id)
171                                 continue;
172                         dir_size += s.st_size;
173                         dir_files++;
174                         ret = create_user_table(s.st_uid, &ui);
175                         if (ret < 0)
176                                 goto out;
177                         ret = update_user_row(ui->table, this_dir_num,
178                                 s.st_size);
179                         if (ret < 0)
180                                 goto out;
181                         ret = scan_dir(entry->d_name, &this_dir_num);
182                         if (ret < 0)
183                                 goto out;
184                         continue;
185                 }
186
187                 /* regular file */
188                 make_bloom_buf(&s, bloom_buf);
189                 if (insert_global_bloom(&s, bloom_buf))
190                         DEBUG_LOG("global hard link: %s/%s\n", dirname,
191                                 entry->d_name);
192                 else
193                         dir_size += s.st_size;
194                 dir_files++;
195                 ret = create_user_table(s.st_uid, &ui);
196                 if (ret < 0)
197                         goto out;
198                 ret = insert_user_bloom(&s, bloom_buf);
199                 if (ret)
200                         DEBUG_LOG("hard link for uid %d: %s/%s\n",
201                                 (unsigned)s.st_uid, dirname, entry->d_name);
202                 ret = update_user_row(ui->table, this_dir_num,
203                         ret? 0 : s.st_size);
204                 if (ret < 0)
205                         goto out;
206         }
207         ret = add_directory(dirname, &this_dir_num, parent_dir_num,
208                         &dir_size, &dir_files);
209 out:
210         closedir(dir);
211         ret2 = adu_fchdir(cwd_fd);
212         if (ret2 < 0 && ret >= 0)
213                 ret = ret2;
214         close(cwd_fd);
215         return ret;
216 }
217
218 static void log_bloom_stat(struct bloom *b)
219 {
220         unsigned percent;
221
222         NOTICE_LOG("\tfilter contains %llu entries\n",
223                 (long long unsigned)b->num_entries);
224         percent = b->num_set_bits * 100ULL / (1ULL << b->order);
225         NOTICE_LOG("\t%u%% of bits are set\n", percent);
226         if (percent > 50) {
227                 WARNING_LOG("results may be unreliable!\n");
228                 WARNING_LOG("consider incrasing bllom filter size\n");
229         }
230 }
231
232 static void log_bloom_stats(void)
233 {
234         struct bloom *b = global_bloom_filter;
235         if (!b)
236                 return;
237         NOTICE_LOG("global bloom filter statistics:\n");
238         log_bloom_stat(b);
239         NOTICE_LOG("user bloom filter statistics:\n");
240         b = user_bloom_filter;
241         log_bloom_stat(b);
242 }
243
244 /**
245  * The main function of the create mode.
246  *
247  * \return Standard.
248  */
249 int com_create(void)
250 {
251         uint64_t zero = 0ULL;
252         int ret, order = conf.bloom_filter_order_arg,
253                 num = conf.num_bloom_filter_hash_functions_arg;
254         struct stat statbuf;
255
256         if (lstat(conf.base_dir_arg, &statbuf) == -1)
257                 return -ERRNO_TO_ERROR(errno);
258         if (!S_ISDIR(statbuf.st_mode))
259                 return -ERRNO_TO_ERROR(ENOTDIR);
260         if (order >= 10 && num > 0) {
261                 global_bloom_filter = bloom_new(order, num);
262                 user_bloom_filter = bloom_new(order, num);
263         } else
264                 WARNING_LOG("hard link detection deactivated\n");
265         device_id = statbuf.st_dev;
266         create_hash_table(conf.hash_table_bits_arg);
267         ret = open_dir_table(1);
268         if (ret < 0)
269                 goto out;
270         check_signals();
271         ret = scan_dir(conf.base_dir_arg, &zero);
272         if (ret < 0)
273                 goto out;
274         ret = write_uid_file();
275         log_bloom_stats();
276 out:
277         bloom_free(global_bloom_filter);
278         bloom_free(user_bloom_filter);
279         return ret;
280 }