]> git.tuebingen.mpg.de Git - adu.git/commitdiff
Merge branch 'master' into bloom
authorAndre Noll <maan@systemlinux.org>
Thu, 25 Dec 2008 11:23:10 +0000 (12:23 +0100)
committerAndre Noll <maan@systemlinux.org>
Thu, 25 Dec 2008 11:23:10 +0000 (12:23 +0100)
Conflicts:
create.c

1  2 
create.c

diff --combined create.c
index cd000c2bb2225f0e8be72962ac96454942618765,2925d570d862b150b5786531c6a5dce51f5cb0e5..c4d7195878b64c80eef176835743a631110eff9c
+++ b/create.c
  #include "string.h"
  #include "error.h"
  #include "user.h"
 +#include "bloom.h"
  
  /* Id of the device containing the base dir. */
  static dev_t device_id;
 +static struct bloom *global_bloom_filter;
 +static struct bloom *user_bloom_filter;
 +
 +static int consider_bloom(struct stat64 *s)
 +{
 +      if (!global_bloom_filter)
 +              return 0;
 +      if (s->st_nlink <= 1)
 +              return 0;
 +      return 1;
 +}
 +
 +/** Data size to hash for the global bloom filter. */
 +#define GLOBAL_BLOOM_BUF_SIZE (sizeof(ino_t) + sizeof(dev_t) + sizeof(off_t))
 +/** For the user bloom filter also the uid is being hashed. */
 +#define USER_BLOOM_BUF_SIZE (GLOBAL_BLOOM_BUF_SIZE + sizeof(uid_t))
 +
 +static void make_bloom_buf(struct stat64 *s, uint8_t buf[USER_BLOOM_BUF_SIZE])
 +{
 +      uint8_t *p = buf;
 +
 +      if (!consider_bloom(s))
 +              return;
 +      memcpy(p, &s->st_ino, sizeof(ino_t));
 +      p += sizeof(ino_t);
 +      memcpy(p, &s->st_dev, sizeof(dev_t));
 +      p += sizeof(dev_t);
 +      memcpy(p, &s->st_size, sizeof(off_t));
 +      p += sizeof(off_t);
 +      memcpy(p, &s->st_uid, sizeof(uid_t));
 +}
 +
 +static int insert_global_bloom(struct stat64 *s,
 +              uint8_t buf[USER_BLOOM_BUF_SIZE])
 +{
 +      if (!consider_bloom(s))
 +              return 0;
 +      return bloom_insert(buf, GLOBAL_BLOOM_BUF_SIZE, global_bloom_filter);
 +}
 +
 +static int insert_user_bloom(struct stat64 *s,
 +              uint8_t buf[USER_BLOOM_BUF_SIZE])
 +{
 +      if (!consider_bloom(s))
 +              return 0;
 +      return bloom_insert(buf, USER_BLOOM_BUF_SIZE, user_bloom_filter);
 +}
  
  static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
                uint64_t *dir_size, uint64_t *dir_files)
@@@ -87,7 -39,7 +87,7 @@@
  }
  
  static int update_user_row(struct osl_table *t, uint64_t dir_num,
 -              uint64_t *add)
 +              uint64_t add)
  {
        struct osl_row *row;
        struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
  
                objects[UT_DIR_NUM].data = &dir_num;
                objects[UT_DIR_NUM].size = sizeof(dir_num);
 -              objects[UT_BYTES].data = add;
 -              objects[UT_BYTES].size = sizeof(*add);
 +              objects[UT_BYTES].data = &add;
 +              objects[UT_BYTES].size = sizeof(add);
                objects[UT_FILES].data = &num_files;
                objects[UT_FILES].size = sizeof(num_files);
                ret = osl(osl_add_row(t, objects));
                ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
                if (ret < 0)
                        return ret;
 -              num = *(uint64_t *)obj1.data + *add;
 +              num = *(uint64_t *)obj1.data + add;
                ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
                if (ret < 0)
                        return ret;
@@@ -135,6 -87,7 +135,6 @@@ static int scan_dir(char *dirname, uint
        uint64_t dir_size = 0, dir_files = 0;
        /* dir count. */
        static uint64_t current_dir_num;
 -
        uint64_t this_dir_num = ++current_dir_num;
  
        check_signals();
        while ((entry = readdir(dir))) {
                mode_t m;
                struct stat64 s;
 -              uint32_t uid;
 -              uint64_t size;
                struct user_info *ui;
 +              uint8_t bloom_buf[USER_BLOOM_BUF_SIZE];
  
                if (!strcmp(entry->d_name, "."))
                        continue;
                if (S_ISDIR(m)) {
                        if (conf.one_file_system_given && s.st_dev != device_id)
                                continue;
-                       ret = create_user_table(s.st_uid, &ui);
 +                      dir_size += s.st_size;
 +                      dir_files++;
++                      ret = create_user_table(conf.database_dir_arg, s.st_uid, &ui);
 +                      if (ret < 0)
 +                              goto out;
 +                      ret = update_user_row(ui->table, this_dir_num,
 +                              s.st_size);
 +                      if (ret < 0)
 +                              goto out;
                        ret = scan_dir(entry->d_name, &this_dir_num);
                        if (ret < 0)
                                goto out;
                        continue;
                }
 +
                /* regular file */
 -              size = s.st_size;
 -              dir_size += size;
 +              make_bloom_buf(&s, bloom_buf);
 +              if (insert_global_bloom(&s, bloom_buf))
 +                      DEBUG_LOG("global hard link: %s/%s\n", dirname,
 +                              entry->d_name);
 +              else
 +                      dir_size += s.st_size;
                dir_files++;
-               ret = create_user_table(s.st_uid, &ui);
 -              uid = s.st_uid;
 -              ret = create_user_table(conf.database_dir_arg, uid, &ui);
++              ret = create_user_table(conf.database_dir_arg, s.st_uid, &ui);
                if (ret < 0)
                        goto out;
 -              ret = update_user_row(ui->table, this_dir_num, &size);
 +              ret = insert_user_bloom(&s, bloom_buf);
 +              if (ret)
 +                      DEBUG_LOG("hard link for uid %d: %s/%s\n",
 +                              (unsigned)s.st_uid, dirname, entry->d_name);
 +              ret = update_user_row(ui->table, this_dir_num,
 +                      ret? 0 : s.st_size);
                if (ret < 0)
                        goto out;
        }
@@@ -213,32 -149,6 +213,32 @@@ out
        return ret;
  }
  
 +static void log_bloom_stat(struct bloom *b)
 +{
 +      unsigned percent;
 +
 +      NOTICE_LOG("\tfilter contains %llu entries\n",
 +              (long long unsigned)b->num_entries);
 +      percent = b->num_set_bits * 100ULL / (1ULL << b->order);
 +      NOTICE_LOG("\t%u%% of bits are set\n", percent);
 +      if (percent > 50) {
 +              WARNING_LOG("results may be unreliable!\n");
 +              WARNING_LOG("consider incrasing bllom filter size\n");
 +      }
 +}
 +
 +static void log_bloom_stats(void)
 +{
 +      struct bloom *b = global_bloom_filter;
 +      if (!b)
 +              return;
 +      NOTICE_LOG("global bloom filter statistics:\n");
 +      log_bloom_stat(b);
 +      NOTICE_LOG("user bloom filter statistics:\n");
 +      b = user_bloom_filter;
 +      log_bloom_stat(b);
 +}
 +
  /**
   * The main function of the create mode.
   *
  int com_create(void)
  {
        uint64_t zero = 0ULL;
 -      int ret;
 +      int ret, order = conf.bloom_filter_order_arg,
 +              num = conf.num_bloom_filter_hash_functions_arg;
        struct stat statbuf;
  
        if (lstat(conf.base_dir_arg, &statbuf) == -1)
                return -ERRNO_TO_ERROR(errno);
        if (!S_ISDIR(statbuf.st_mode))
                return -ERRNO_TO_ERROR(ENOTDIR);
 +      if (order >= 10 && num > 0) {
 +              global_bloom_filter = bloom_new(order, num);
 +              user_bloom_filter = bloom_new(order, num);
 +      } else
 +              WARNING_LOG("hard link detection deactivated\n");
        device_id = statbuf.st_dev;
        create_hash_table(conf.hash_table_bits_arg);
        ret = open_dir_table(1);
        if (ret < 0)
 -              return ret;
 +              goto out;
        check_signals();
        ret = scan_dir(conf.base_dir_arg, &zero);
        if (ret < 0)
                goto out;
-       ret = write_uid_file();
+       ret = write_uid_file(conf.database_dir_arg);
 +      log_bloom_stats();
  out:
 +      bloom_free(global_bloom_filter);
 +      bloom_free(user_bloom_filter);
        return ret;
  }