/*
- * Copyright (C) 2008 Andre Noll <maan@systemlinux.org>
+ * Copyright (C) 2008 Andre Noll <maan@tuebingen.mpg.de>
*
* Licensed under the GPL v2. For licencing details see COPYING.
*/
-/** \file create.c The create mode of adu. */
+/** \file create.c \brief The create mode of adu. */
#include <dirent.h> /* readdir() */
+#include "format.h"
#include "adu.h"
#include "gcc-compat.h"
#include "cmdline.h"
#include "fd.h"
#include "string.h"
#include "error.h"
-#include "portable_io.h"
+#include "user.h"
+#include "bloom.h"
/* Id of the device containing the base dir. */
static dev_t device_id;
+static struct bloom *global_bloom_filter;
+static struct bloom *user_bloom_filter;
-static int write_uid(struct user_info *ui, void *data)
+static int consider_bloom(struct stat64 *s)
{
- char **p = data;
-
- write_u32(*p, ui->uid);
- *p += sizeof(uint32_t);
+ if (!global_bloom_filter)
+ return 0;
+ if (s->st_nlink <= 1)
+ return 0;
return 1;
}
-static int write_uid_list(void)
+/** Data size to hash for the global bloom filter. */
+#define GLOBAL_BLOOM_BUF_SIZE (sizeof(ino_t) + sizeof(dev_t) + sizeof(off_t))
+/** For the user bloom filter also the uid is being hashed. */
+#define USER_BLOOM_BUF_SIZE (GLOBAL_BLOOM_BUF_SIZE + sizeof(uid_t))
+
+static void make_bloom_buf(struct stat64 *s, uint8_t buf[USER_BLOOM_BUF_SIZE])
{
- char *buf, *p, *filename;
- uint32_t count = 0;
- struct user_info *ui;
- size_t size = num_uids * sizeof(uint32_t);
- int ret;
+ uint8_t *p = buf;
- if (!num_uids)
+ if (!consider_bloom(s))
+ return;
+ memcpy(p, &s->st_ino, sizeof(ino_t));
+ p += sizeof(ino_t);
+ memcpy(p, &s->st_dev, sizeof(dev_t));
+ p += sizeof(dev_t);
+ memcpy(p, &s->st_size, sizeof(off_t));
+ p += sizeof(off_t);
+ memcpy(p, &s->st_uid, sizeof(uid_t));
+}
+
+static int insert_global_bloom(struct stat64 *s,
+ uint8_t buf[USER_BLOOM_BUF_SIZE])
+{
+ if (!consider_bloom(s))
return 0;
- buf = p = adu_malloc(size);
- ret = for_each_admissible_user(write_uid, &p);
- if (ret < 0)
- goto out;
- filename = get_uid_list_name();
- ret = adu_write_file(filename, buf, size);
- free(filename);
-out:
- free(buf);
- return ret;
+ return bloom_insert(buf, GLOBAL_BLOOM_BUF_SIZE, global_bloom_filter);
+}
+
+static int insert_user_bloom(struct stat64 *s,
+ uint8_t buf[USER_BLOOM_BUF_SIZE])
+{
+ if (!consider_bloom(s))
+ return 0;
+ return bloom_insert(buf, USER_BLOOM_BUF_SIZE, user_bloom_filter);
}
static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
}
static int update_user_row(struct osl_table *t, uint64_t dir_num,
- uint64_t *add)
+ uint64_t add)
{
struct osl_row *row;
struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
objects[UT_DIR_NUM].data = &dir_num;
objects[UT_DIR_NUM].size = sizeof(dir_num);
- objects[UT_BYTES].data = add;
- objects[UT_BYTES].size = sizeof(*add);
+ objects[UT_BYTES].data = &add;
+ objects[UT_BYTES].size = sizeof(add);
objects[UT_FILES].data = &num_files;
objects[UT_FILES].size = sizeof(num_files);
- INFO_LOG("######################### ret: %d\n", ret);
ret = osl(osl_add_row(t, objects));
- INFO_LOG("######################### ret: %d\n", ret);
return ret;
} else { /* add size and increment file count */
uint64_t num;
ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
if (ret < 0)
return ret;
- num = *(uint64_t *)obj1.data + *add;
+ num = *(uint64_t *)obj1.data + add;
ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
if (ret < 0)
return ret;
uint64_t dir_size = 0, dir_files = 0;
/* dir count. */
static uint64_t current_dir_num;
-
uint64_t this_dir_num = ++current_dir_num;
check_signals();
DEBUG_LOG("----------------- %llu: %s\n", (long long unsigned)current_dir_num, dirname);
ret = adu_opendir(dirname, &dir, &cwd_fd);
- if (ret < 0) {
- if (ret != -ERRNO_TO_ERROR(EACCES))
- return ret;
- WARNING_LOG("permission denied for %s\n", dirname);
+ if (ret < 0) { /* Non-fatal, continue with next dir */
+ WARNING_LOG("skipping dir %s: %s\n", dirname,
+ adu_strerror(-ret));
return 1;
}
while ((entry = readdir(dir))) {
mode_t m;
- struct stat s;
- uint32_t uid;
- uint64_t size;
+ struct stat64 s;
struct user_info *ui;
+ uint8_t bloom_buf[USER_BLOOM_BUF_SIZE];
if (!strcmp(entry->d_name, "."))
continue;
if (!strcmp(entry->d_name, ".."))
continue;
- if (lstat(entry->d_name, &s) == -1) {
- WARNING_LOG("lstat error for %s/%s (%s)\n",
+ if (lstat64(entry->d_name, &s) == -1) {
+ WARNING_LOG("lstat64 error for %s/%s (%s)\n",
dirname, entry->d_name, strerror(errno));
continue;
}
if (S_ISDIR(m)) {
if (conf.one_file_system_given && s.st_dev != device_id)
continue;
+ dir_size += s.st_size;
+ dir_files++;
+ ret = create_user_table(s.st_uid, &ui);
+ if (ret < 0)
+ goto out;
+ ret = update_user_row(ui->table, this_dir_num,
+ s.st_size);
+ if (ret < 0)
+ goto out;
ret = scan_dir(entry->d_name, &this_dir_num);
if (ret < 0)
goto out;
continue;
}
+
/* regular file */
- size = s.st_size;
- dir_size += size;
+ make_bloom_buf(&s, bloom_buf);
+ if (insert_global_bloom(&s, bloom_buf))
+ DEBUG_LOG("global hard link: %s/%s\n", dirname,
+ entry->d_name);
+ else
+ dir_size += s.st_size;
dir_files++;
- uid = s.st_uid;
- ret = search_uid(uid, CREATE_USER_TABLE | OPEN_USER_TABLE, &ui);
+ ret = create_user_table(s.st_uid, &ui);
if (ret < 0)
goto out;
- ui->bytes += size;
- ui->files++;
- ret = update_user_row(ui->table, this_dir_num, &size);
+ ret = insert_user_bloom(&s, bloom_buf);
+ if (ret)
+ DEBUG_LOG("hard link for uid %d: %s/%s\n",
+ (unsigned)s.st_uid, dirname, entry->d_name);
+ ret = update_user_row(ui->table, this_dir_num,
+ ret? 0 : s.st_size);
if (ret < 0)
goto out;
}
return ret;
}
-int com_create()
+static void log_bloom_stat(struct bloom *b)
+{
+ unsigned percent;
+
+ NOTICE_LOG("\tfilter contains %llu entries\n",
+ (long long unsigned)b->num_entries);
+ percent = b->num_set_bits * 100ULL / (1ULL << b->order);
+ NOTICE_LOG("\t%u%% of bits are set\n", percent);
+ if (percent > 50) {
+ WARNING_LOG("results may be unreliable!\n");
+ WARNING_LOG("consider increasing bloom filter size\n");
+ }
+}
+
+static void log_bloom_stats(void)
+{
+ struct bloom *b = global_bloom_filter;
+ if (!b)
+ return;
+ NOTICE_LOG("global bloom filter statistics:\n");
+ log_bloom_stat(b);
+ NOTICE_LOG("user bloom filter statistics:\n");
+ b = user_bloom_filter;
+ log_bloom_stat(b);
+}
+
+/**
+ * The main function of the create mode.
+ *
+ * \return Standard.
+ */
+int com_create(void)
{
uint64_t zero = 0ULL;
- int ret;
+ int ret, order = conf.bloom_filter_order_arg,
+ num = conf.num_bloom_filter_hash_functions_arg;
struct stat statbuf;
if (lstat(conf.base_dir_arg, &statbuf) == -1)
return -ERRNO_TO_ERROR(errno);
if (!S_ISDIR(statbuf.st_mode))
return -ERRNO_TO_ERROR(ENOTDIR);
+ if (order >= 10 && num > 0) {
+ global_bloom_filter = bloom_new(order, num);
+ user_bloom_filter = bloom_new(order, num);
+ } else
+ WARNING_LOG("hard link detection deactivated\n");
device_id = statbuf.st_dev;
create_hash_table(conf.hash_table_bits_arg);
ret = open_dir_table(1);
if (ret < 0)
- return ret;
+ goto out;
check_signals();
ret = scan_dir(conf.base_dir_arg, &zero);
if (ret < 0)
goto out;
- ret = write_uid_list();
+ ret = write_uid_file();
+ log_bloom_stats();
out:
- close_all_tables();
+ bloom_free(global_bloom_filter);
+ bloom_free(user_bloom_filter);
return ret;
}