X-Git-Url: http://git.tuebingen.mpg.de/?p=adu.git;a=blobdiff_plain;f=create.c;h=1b4ba880a1c3d220adc664b83bdae3ab1a46eef0;hp=65567e03ee592065cfd4126ac6398b8890dd71e6;hb=7231c544e2ee3f53f5b2c8bc393b7fd1e0b8d0a7;hpb=d2ce6e60cb915dff3a0920a0b48f786435bd4ec8 diff --git a/create.c b/create.c index 65567e0..1b4ba88 100644 --- a/create.c +++ b/create.c @@ -15,9 +15,57 @@ #include "string.h" #include "error.h" #include "user.h" +#include "bloom.h" /* Id of the device containing the base dir. */ static dev_t device_id; +static struct bloom *global_bloom_filter; +static struct bloom *user_bloom_filter; + +static int consider_bloom(struct stat64 *s) +{ + if (!global_bloom_filter) + return 0; + if (s->st_nlink <= 1) + return 0; + return 1; +} + +#define GLOBAL_BLOOM_BUF_SIZE (sizeof(ino_t) + sizeof(dev_t) + sizeof(off_t)) +#define USER_BLOOM_BUF_SIZE (GLOBAL_BLOOM_BUF_SIZE + sizeof(uid_t)) + +static void make_bloom_buf(struct stat64 *s, uint8_t buf[USER_BLOOM_BUF_SIZE]) +{ + uint8_t *p = buf; + + if (!consider_bloom(s)) + return; + memcpy(p, &s->st_ino, sizeof(ino_t)); + p += sizeof(ino_t); + memcpy(p, &s->st_dev, sizeof(dev_t)); + p += sizeof(dev_t); + memcpy(p, &s->st_size, sizeof(off_t)); + p += sizeof(off_t); + memcpy(p, &s->st_uid, sizeof(uid_t)); +} + +static int insert_global_bloom(struct stat64 *s, + uint8_t buf[USER_BLOOM_BUF_SIZE]) +{ + if (!consider_bloom(s)) + return 0; + return bloom_test_and_insert(buf, GLOBAL_BLOOM_BUF_SIZE, + global_bloom_filter); +} + +static int insert_user_bloom(struct stat64 *s, + uint8_t buf[USER_BLOOM_BUF_SIZE]) +{ + if (!consider_bloom(s)) + return 0; + return bloom_test_and_insert(buf, USER_BLOOM_BUF_SIZE, + user_bloom_filter); +} static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num, uint64_t *dir_size, uint64_t *dir_files) @@ -39,7 +87,7 @@ static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_ } static int update_user_row(struct osl_table *t, uint64_t dir_num, - uint64_t *add) + uint64_t add) { struct osl_row *row; struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)}; @@ -54,8 +102,8 @@ static int update_user_row(struct osl_table *t, uint64_t dir_num, objects[UT_DIR_NUM].data = &dir_num; objects[UT_DIR_NUM].size = sizeof(dir_num); - objects[UT_BYTES].data = add; - objects[UT_BYTES].size = sizeof(*add); + objects[UT_BYTES].data = &add; + objects[UT_BYTES].size = sizeof(add); objects[UT_FILES].data = &num_files; objects[UT_FILES].size = sizeof(num_files); ret = osl(osl_add_row(t, objects)); @@ -67,7 +115,7 @@ static int update_user_row(struct osl_table *t, uint64_t dir_num, ret = osl(osl_get_object(t, row, UT_BYTES, &obj1)); if (ret < 0) return ret; - num = *(uint64_t *)obj1.data + *add; + num = *(uint64_t *)obj1.data + add; ret = osl(osl_update_object(t, row, UT_BYTES, &obj2)); if (ret < 0) return ret; @@ -87,7 +135,6 @@ static int scan_dir(char *dirname, uint64_t *parent_dir_num) uint64_t dir_size = 0, dir_files = 0; /* dir count. */ static uint64_t current_dir_num; - uint64_t this_dir_num = ++current_dir_num; check_signals(); @@ -102,9 +149,8 @@ static int scan_dir(char *dirname, uint64_t *parent_dir_num) while ((entry = readdir(dir))) { mode_t m; struct stat64 s; - uint32_t uid; - uint64_t size; struct user_info *ui; + uint8_t bloom_buf[USER_BLOOM_BUF_SIZE]; if (!strcmp(entry->d_name, ".")) continue; @@ -121,22 +167,38 @@ static int scan_dir(char *dirname, uint64_t *parent_dir_num) if (S_ISDIR(m)) { if (conf.one_file_system_given && s.st_dev != device_id) continue; + dir_size += s.st_size; + dir_files++; + ret = create_user_table(s.st_uid, &ui); + if (ret < 0) + goto out; + ret = update_user_row(ui->table, this_dir_num, + s.st_size); + if (ret < 0) + goto out; ret = scan_dir(entry->d_name, &this_dir_num); if (ret < 0) goto out; continue; } + /* regular file */ - size = s.st_size; - dir_size += size; + make_bloom_buf(&s, bloom_buf); + if (insert_global_bloom(&s, bloom_buf)) + DEBUG_LOG("global hard link: %s/%s\n", dirname, + entry->d_name); + else + dir_size += s.st_size; dir_files++; - uid = s.st_uid; - ret = create_user_table(uid, &ui); + ret = create_user_table(s.st_uid, &ui); if (ret < 0) goto out; - ui->bytes += size; - ui->files++; - ret = update_user_row(ui->table, this_dir_num, &size); + ret = insert_user_bloom(&s, bloom_buf); + if (ret) + DEBUG_LOG("hard link for uid %d: %s/%s\n", + (unsigned)s.st_uid, dirname, entry->d_name); + ret = update_user_row(ui->table, this_dir_num, + ret? 0 : s.st_size); if (ret < 0) goto out; } @@ -151,6 +213,32 @@ out: return ret; } +static void log_bloom_stat(struct bloom *b) +{ + unsigned percent; + + NOTICE_LOG("\tfilter contains %llu entries\n", + (long long unsigned)b->num_entries); + percent = b->num_set_bits * 100ULL / (1ULL << b->order); + NOTICE_LOG("\t%u%% of bits are set\n", percent); + if (percent > 50) { + WARNING_LOG("results may be unreliable!\n"); + WARNING_LOG("consider incrasing bllom filter size\n"); + } +} + +static void log_bloom_stats(void) +{ + struct bloom *b = global_bloom_filter; + if (!b) + return; + NOTICE_LOG("global bloom filter statistics:\n"); + log_bloom_stat(b); + NOTICE_LOG("user bloom filter statistics:\n"); + b = user_bloom_filter; + log_bloom_stat(b); +} + /** * The main function of the create mode. * @@ -159,23 +247,32 @@ out: int com_create(void) { uint64_t zero = 0ULL; - int ret; + int ret, order = conf.bloom_filter_order_arg, + num = conf.num_bloom_filter_hash_functions_arg; struct stat statbuf; if (lstat(conf.base_dir_arg, &statbuf) == -1) return -ERRNO_TO_ERROR(errno); if (!S_ISDIR(statbuf.st_mode)) return -ERRNO_TO_ERROR(ENOTDIR); + if (order >= 10 && num > 0) { + bloom_init(order, num, &global_bloom_filter); + bloom_init(order, num, &user_bloom_filter); + } else + WARNING_LOG("hard link detection deactivated\n"); device_id = statbuf.st_dev; create_hash_table(conf.hash_table_bits_arg); ret = open_dir_table(1); if (ret < 0) - return ret; + goto out; check_signals(); ret = scan_dir(conf.base_dir_arg, &zero); if (ret < 0) goto out; ret = write_uid_file(); + log_bloom_stats(); out: + bloom_free(global_bloom_filter); + bloom_free(user_bloom_filter); return ret; }