Merge branch 'refs/heads/t/doc-improvements'
[adu.git] / create.c
1 /*
2 * Copyright (C) 2008 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file create.c \brief The create mode of adu. */
8
9 #include <dirent.h> /* readdir() */
10 #include "format.h"
11 #include "adu.h"
12 #include "gcc-compat.h"
13 #include "cmdline.h"
14 #include "fd.h"
15 #include "string.h"
16 #include "error.h"
17 #include "user.h"
18 #include "bloom.h"
19
20 /* Id of the device containing the base dir. */
21 static dev_t device_id;
22 static struct bloom *global_bloom_filter;
23 static struct bloom *user_bloom_filter;
24
25 static int consider_bloom(struct stat64 *s)
26 {
27 if (!global_bloom_filter)
28 return 0;
29 if (s->st_nlink <= 1)
30 return 0;
31 return 1;
32 }
33
34 /** Data size to hash for the global bloom filter. */
35 #define GLOBAL_BLOOM_BUF_SIZE (sizeof(ino_t) + sizeof(dev_t) + sizeof(off_t))
36 /** For the user bloom filter also the uid is being hashed. */
37 #define USER_BLOOM_BUF_SIZE (GLOBAL_BLOOM_BUF_SIZE + sizeof(uid_t))
38
39 static void make_bloom_buf(struct stat64 *s, uint8_t buf[USER_BLOOM_BUF_SIZE])
40 {
41 uint8_t *p = buf;
42
43 if (!consider_bloom(s))
44 return;
45 memcpy(p, &s->st_ino, sizeof(ino_t));
46 p += sizeof(ino_t);
47 memcpy(p, &s->st_dev, sizeof(dev_t));
48 p += sizeof(dev_t);
49 memcpy(p, &s->st_size, sizeof(off_t));
50 p += sizeof(off_t);
51 memcpy(p, &s->st_uid, sizeof(uid_t));
52 }
53
54 static int insert_global_bloom(struct stat64 *s,
55 uint8_t buf[USER_BLOOM_BUF_SIZE])
56 {
57 if (!consider_bloom(s))
58 return 0;
59 return bloom_insert(buf, GLOBAL_BLOOM_BUF_SIZE, global_bloom_filter);
60 }
61
62 static int insert_user_bloom(struct stat64 *s,
63 uint8_t buf[USER_BLOOM_BUF_SIZE])
64 {
65 if (!consider_bloom(s))
66 return 0;
67 return bloom_insert(buf, USER_BLOOM_BUF_SIZE, user_bloom_filter);
68 }
69
70 static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
71 uint64_t *dir_size, uint64_t *dir_files)
72 {
73 struct osl_object dir_objects[NUM_DT_COLUMNS];
74
75 INFO_LOG("adding #%llu: %s\n", (long long unsigned)*dir_num, dirname);
76 dir_objects[DT_NAME].data = dirname;
77 dir_objects[DT_NAME].size = strlen(dirname) + 1;
78 dir_objects[DT_NUM].data = dir_num;
79 dir_objects[DT_NUM].size = sizeof(*dir_num);
80 dir_objects[DT_PARENT_NUM].data = parent_dir_num;
81 dir_objects[DT_PARENT_NUM].size = sizeof(*parent_dir_num);
82 dir_objects[DT_BYTES].data = dir_size;
83 dir_objects[DT_BYTES].size = sizeof(*dir_size);
84 dir_objects[DT_FILES].data = dir_files;
85 dir_objects[DT_FILES].size = sizeof(*dir_files);
86 return osl(osl_add_row(dir_table, dir_objects));
87 }
88
89 static int update_user_row(struct osl_table *t, uint64_t dir_num,
90 uint64_t add)
91 {
92 struct osl_row *row;
93 struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
94
95 int ret = osl(osl_get_row(t, UT_DIR_NUM, &obj, &row));
96
97 if (ret == -E_OSL && osl_errno != E_OSL_RB_KEY_NOT_FOUND)
98 return ret;
99 if (ret < 0) { /* this is the first file we add */
100 struct osl_object objects[NUM_UT_COLUMNS];
101 uint64_t num_files = 1;
102
103 objects[UT_DIR_NUM].data = &dir_num;
104 objects[UT_DIR_NUM].size = sizeof(dir_num);
105 objects[UT_BYTES].data = &add;
106 objects[UT_BYTES].size = sizeof(add);
107 objects[UT_FILES].data = &num_files;
108 objects[UT_FILES].size = sizeof(num_files);
109 ret = osl(osl_add_row(t, objects));
110 return ret;
111 } else { /* add size and increment file count */
112 uint64_t num;
113 struct osl_object obj1, obj2 = {.data = &num, .size = sizeof(num)};
114
115 ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
116 if (ret < 0)
117 return ret;
118 num = *(uint64_t *)obj1.data + add;
119 ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
120 if (ret < 0)
121 return ret;
122 ret = osl(osl_get_object(t, row, UT_FILES, &obj1));
123 if (ret < 0)
124 return ret;
125 num = *(uint64_t *)obj1.data + 1;
126 return osl(osl_update_object(t, row, UT_FILES, &obj2));
127 }
128 }
129
130 static int scan_dir(char *dirname, uint64_t *parent_dir_num)
131 {
132 DIR *dir;
133 struct dirent *entry;
134 int ret, cwd_fd, ret2;
135 uint64_t dir_size = 0, dir_files = 0;
136 /* dir count. */
137 static uint64_t current_dir_num;
138 uint64_t this_dir_num = ++current_dir_num;
139
140 check_signals();
141 DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)current_dir_num, dirname);
142 ret = adu_opendir(dirname, &dir, &cwd_fd);
143 if (ret < 0) { /* Non-fatal, continue with next dir */
144 WARNING_LOG("skipping dir %s: %s\n", dirname,
145 adu_strerror(-ret));
146 return 1;
147 }
148 while ((entry = readdir(dir))) {
149 mode_t m;
150 struct stat64 s;
151 struct user_info *ui;
152 uint8_t bloom_buf[USER_BLOOM_BUF_SIZE];
153
154 if (!strcmp(entry->d_name, "."))
155 continue;
156 if (!strcmp(entry->d_name, ".."))
157 continue;
158 if (lstat64(entry->d_name, &s) == -1) {
159 WARNING_LOG("lstat64 error for %s/%s (%s)\n",
160 dirname, entry->d_name, strerror(errno));
161 continue;
162 }
163 m = s.st_mode;
164 if (!S_ISREG(m) && !S_ISDIR(m))
165 continue;
166 if (S_ISDIR(m)) {
167 if (conf.one_file_system_given && s.st_dev != device_id)
168 continue;
169 dir_size += s.st_size;
170 dir_files++;
171 ret = create_user_table(s.st_uid, &ui);
172 if (ret < 0)
173 goto out;
174 ret = update_user_row(ui->table, this_dir_num,
175 s.st_size);
176 if (ret < 0)
177 goto out;
178 ret = scan_dir(entry->d_name, &this_dir_num);
179 if (ret < 0)
180 goto out;
181 continue;
182 }
183
184 /* regular file */
185 make_bloom_buf(&s, bloom_buf);
186 if (insert_global_bloom(&s, bloom_buf))
187 DEBUG_LOG("global hard link: %s/%s\n", dirname,
188 entry->d_name);
189 else
190 dir_size += s.st_size;
191 dir_files++;
192 ret = create_user_table(s.st_uid, &ui);
193 if (ret < 0)
194 goto out;
195 ret = insert_user_bloom(&s, bloom_buf);
196 if (ret)
197 DEBUG_LOG("hard link for uid %d: %s/%s\n",
198 (unsigned)s.st_uid, dirname, entry->d_name);
199 ret = update_user_row(ui->table, this_dir_num,
200 ret? 0 : s.st_size);
201 if (ret < 0)
202 goto out;
203 }
204 ret = add_directory(dirname, &this_dir_num, parent_dir_num,
205 &dir_size, &dir_files);
206 out:
207 closedir(dir);
208 ret2 = adu_fchdir(cwd_fd);
209 if (ret2 < 0 && ret >= 0)
210 ret = ret2;
211 close(cwd_fd);
212 return ret;
213 }
214
215 static void log_bloom_stat(struct bloom *b)
216 {
217 unsigned percent;
218
219 NOTICE_LOG("\tfilter contains %llu entries\n",
220 (long long unsigned)b->num_entries);
221 percent = b->num_set_bits * 100ULL / (1ULL << b->order);
222 NOTICE_LOG("\t%u%% of bits are set\n", percent);
223 if (percent > 50) {
224 WARNING_LOG("results may be unreliable!\n");
225 WARNING_LOG("consider increasing bloom filter size\n");
226 }
227 }
228
229 static void log_bloom_stats(void)
230 {
231 struct bloom *b = global_bloom_filter;
232 if (!b)
233 return;
234 NOTICE_LOG("global bloom filter statistics:\n");
235 log_bloom_stat(b);
236 NOTICE_LOG("user bloom filter statistics:\n");
237 b = user_bloom_filter;
238 log_bloom_stat(b);
239 }
240
241 /**
242 * The main function of the create mode.
243 *
244 * \return Standard.
245 */
246 int com_create(void)
247 {
248 uint64_t zero = 0ULL;
249 int ret, order = conf.bloom_filter_order_arg,
250 num = conf.num_bloom_filter_hash_functions_arg;
251 struct stat statbuf;
252
253 if (lstat(conf.base_dir_arg, &statbuf) == -1)
254 return -ERRNO_TO_ERROR(errno);
255 if (!S_ISDIR(statbuf.st_mode))
256 return -ERRNO_TO_ERROR(ENOTDIR);
257 if (order >= 10 && num > 0) {
258 global_bloom_filter = bloom_new(order, num);
259 user_bloom_filter = bloom_new(order, num);
260 } else
261 WARNING_LOG("hard link detection deactivated\n");
262 device_id = statbuf.st_dev;
263 create_hash_table(conf.hash_table_bits_arg);
264 ret = open_dir_table(1);
265 if (ret < 0)
266 goto out;
267 check_signals();
268 ret = scan_dir(conf.base_dir_arg, &zero);
269 if (ret < 0)
270 goto out;
271 ret = write_uid_file();
272 log_bloom_stats();
273 out:
274 bloom_free(global_bloom_filter);
275 bloom_free(user_bloom_filter);
276 return ret;
277 }