* - Modes of operation: \ref select.c, \ref create.c, \ref interactive.c
* - User handling: \ref user.c
* - Error handling: \ref error.h
- * - Library-type functions: \ref fd.c, \ref format.c, \ref string.c, \ref portable_io.h
+ * - Library-type functions: \ref fd.c, \ref format.c, \ref string.c, \ref portable_io.h, \ref bloom.c
*/
#include "adu.h"
/** Options passed to --select-options. */
struct select_args_info select_conf;
+/** Computed database dir */
+char *database_dir;
/**
* The table containing the directory names and statistics.
*/
int open_dir_table(int create)
{
+ int ret;
if (dir_table)
return 1;
- dir_table_desc.dir = adu_strdup(conf.database_dir_arg);
+ dir_table_desc.dir = adu_strdup(database_dir);
if (create) {
+ INFO_LOG("creating database directory structure\n");
+ ret = mkpath(dir_table_desc.dir, 0777);
+ if (ret < 0)
+ goto out;
NOTICE_LOG("creating dir table\n");
- int ret = osl(osl_create_table(&dir_table_desc));
- if (ret < 0) {
- free((char *)dir_table_desc.dir);
- return ret;
- }
+ ret = osl(osl_create_table(&dir_table_desc));
+ if (ret < 0)
+ goto out;
}
INFO_LOG("opening dir table\n");
return osl(osl_open_table(&dir_table_desc, &dir_table));
+out:
+ free((char *)dir_table_desc.dir);
+ return ret;
}
static int check_args(void)
if (ret < 0)
goto out;
ret = -E_SYNTAX;
+ if (conf.database_dir_given)
+ database_dir = adu_strdup(conf.database_dir_arg);
+ else
+ database_dir = make_message("%s%s",
+ conf.database_root_arg, conf.base_dir_arg);
if (conf.select_given)
ret = com_select();
else if (conf.create_given)
ERROR_LOG("%s\n", adu_strerror(-ret));
return -EXIT_FAILURE;
}
+ free(database_dir);
cmdline_parser_free(&conf);
select_cmdline_parser_free(&select_conf);
return EXIT_SUCCESS;
section "General options"
#########################
-option "database-dir" d
-#~~~~~~~~~~~~~~~~~~~~~~
-"directory containing the osl tables"
-string typestr="path"
-required
+option "config-file" c
+#~~~~~~~~~~~~~~~~~~~~~
+"(default='~/.adurc')"
+string typestr="filename"
+optional
details="
- Full path to the directory containing the osl tables. This
- directory must exist. It must be writable for the user running
- adu in --create mode and readable in --select mode.
-
+ Options may be given at the command line or in the
+ configuration file. As usual, if an option is given both at
+ the command line and in the configuration file, the command
+ line option takes precedence.
"
+
option "loglevel" l
#~~~~~~~~~~~~~~~~~~
"Set loglevel (0-6)"
goes to stdout. Lower values mean more verbose logging.
"
+defgroup "database"
+#==================
+groupdesc="
+ There are two ways to specify a database directory. You can either
+ specify a full path using the database-dir option or a root path
+ using the database-root option. In the latter case, a directory
+ structure matching that of the base-dir argument is created
+ below the given full path.
+
+ The advantage of using database-root is that the base-dir is
+ used to find the relevant database both in create and select mode
+ and you do not have to care for setting the database-dir explicitly.
+"
+
+groupoption "database-dir" d
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"directory containing the osl tables"
+group="database"
+string typestr="path"
+details="
+ Full path to the directory containing the osl tables. This
+ directory is created if it does not exist. It must be writable for the
+ user running adu in --create mode and readable in --select mode.
+"
+
+groupoption "database-root" r
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"directory containing directories containing the osl tables"
+group="database"
+string typestr="path"
+default="/var/lib/adu"
+dependon="base-dir"
+optional
+details="
+ Base path to the directory containing the osl tables. The real
+ database-dir is generated by appending base-dir. This
+ directory is created if it does not exist. When used in select
+ mode you have to specify the base-dir as well.
+"
+
###############
section "Modes"
###############
adu may be started in one of three possible modes, each of
which corresponds to a different command line option. Exactly
one of these options must be given.
-
"
required
#~~~~~~~~~~~~~~~~~~
"directory to traverse"
string typestr="path"
-dependon="create"
optional
details="
The base directory to be traversed recursively. A warning
users. Decreasing the value causes adu to use slightly less memory.
"
+ option "bloom-filter-order" B
+ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ "use bloom filters for hard link detection"
+ int typestr="order"
+ dependon="create"
+ default="23"
+ optional
+ details="
+ Allocate bloom filters of size 2^order bits. Each regular
+ file with hard link count greater than one is added to these
+ filters which allows to detect hard links on a per-user basis.
+ Greater values reduce the probability of false positives but
+ require more memory.
+
+ Values less than 10 deactivate this feature so that no hard
+ links are being detected.
+ "
+
+ option "num-bloom-filter-hash-functions" N
+ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ "number of hash functions for the bloom filters"
+ int typestr="num"
+ dependon="create"
+ default="10"
+ optional
+ details="
+ Cause each entry which is added to the bloom filter to set
+ \"num\" bits of the bloom filter.
+ "
+
##############################
section "Options for --select"
##############################
#include "string.h"
#include "error.h"
#include "user.h"
+ #include "bloom.h"
/* Id of the device containing the base dir. */
static dev_t device_id;
+ static struct bloom *global_bloom_filter;
+ static struct bloom *user_bloom_filter;
+
+ static int consider_bloom(struct stat64 *s)
+ {
+ if (!global_bloom_filter)
+ return 0;
+ if (s->st_nlink <= 1)
+ return 0;
+ return 1;
+ }
+
+ /** Data size to hash for the global bloom filter. */
+ #define GLOBAL_BLOOM_BUF_SIZE (sizeof(ino_t) + sizeof(dev_t) + sizeof(off_t))
+ /** For the user bloom filter also the uid is being hashed. */
+ #define USER_BLOOM_BUF_SIZE (GLOBAL_BLOOM_BUF_SIZE + sizeof(uid_t))
+
+ static void make_bloom_buf(struct stat64 *s, uint8_t buf[USER_BLOOM_BUF_SIZE])
+ {
+ uint8_t *p = buf;
+
+ if (!consider_bloom(s))
+ return;
+ memcpy(p, &s->st_ino, sizeof(ino_t));
+ p += sizeof(ino_t);
+ memcpy(p, &s->st_dev, sizeof(dev_t));
+ p += sizeof(dev_t);
+ memcpy(p, &s->st_size, sizeof(off_t));
+ p += sizeof(off_t);
+ memcpy(p, &s->st_uid, sizeof(uid_t));
+ }
+
+ static int insert_global_bloom(struct stat64 *s,
+ uint8_t buf[USER_BLOOM_BUF_SIZE])
+ {
+ if (!consider_bloom(s))
+ return 0;
+ return bloom_insert(buf, GLOBAL_BLOOM_BUF_SIZE, global_bloom_filter);
+ }
+
+ static int insert_user_bloom(struct stat64 *s,
+ uint8_t buf[USER_BLOOM_BUF_SIZE])
+ {
+ if (!consider_bloom(s))
+ return 0;
+ return bloom_insert(buf, USER_BLOOM_BUF_SIZE, user_bloom_filter);
+ }
static int add_directory(char *dirname, uint64_t *dir_num, uint64_t *parent_dir_num,
uint64_t *dir_size, uint64_t *dir_files)
}
static int update_user_row(struct osl_table *t, uint64_t dir_num,
- uint64_t *add)
+ uint64_t add)
{
struct osl_row *row;
struct osl_object obj = {.data = &dir_num, .size = sizeof(dir_num)};
objects[UT_DIR_NUM].data = &dir_num;
objects[UT_DIR_NUM].size = sizeof(dir_num);
- objects[UT_BYTES].data = add;
- objects[UT_BYTES].size = sizeof(*add);
+ objects[UT_BYTES].data = &add;
+ objects[UT_BYTES].size = sizeof(add);
objects[UT_FILES].data = &num_files;
objects[UT_FILES].size = sizeof(num_files);
ret = osl(osl_add_row(t, objects));
ret = osl(osl_get_object(t, row, UT_BYTES, &obj1));
if (ret < 0)
return ret;
- num = *(uint64_t *)obj1.data + *add;
+ num = *(uint64_t *)obj1.data + add;
ret = osl(osl_update_object(t, row, UT_BYTES, &obj2));
if (ret < 0)
return ret;
uint64_t dir_size = 0, dir_files = 0;
/* dir count. */
static uint64_t current_dir_num;
-
uint64_t this_dir_num = ++current_dir_num;
check_signals();
while ((entry = readdir(dir))) {
mode_t m;
struct stat64 s;
- uint32_t uid;
- uint64_t size;
struct user_info *ui;
+ uint8_t bloom_buf[USER_BLOOM_BUF_SIZE];
if (!strcmp(entry->d_name, "."))
continue;
if (S_ISDIR(m)) {
if (conf.one_file_system_given && s.st_dev != device_id)
continue;
- ret = create_user_table(conf.database_dir_arg, s.st_uid, &ui);
+ dir_size += s.st_size;
+ dir_files++;
++ ret = create_user_table(s.st_uid, &ui);
+ if (ret < 0)
+ goto out;
+ ret = update_user_row(ui->table, this_dir_num,
+ s.st_size);
+ if (ret < 0)
+ goto out;
ret = scan_dir(entry->d_name, &this_dir_num);
if (ret < 0)
goto out;
continue;
}
+
/* regular file */
- size = s.st_size;
- dir_size += size;
+ make_bloom_buf(&s, bloom_buf);
+ if (insert_global_bloom(&s, bloom_buf))
+ DEBUG_LOG("global hard link: %s/%s\n", dirname,
+ entry->d_name);
+ else
+ dir_size += s.st_size;
dir_files++;
- uid = s.st_uid;
- ret = create_user_table(uid, &ui);
- ret = create_user_table(conf.database_dir_arg, s.st_uid, &ui);
++ ret = create_user_table(s.st_uid, &ui);
if (ret < 0)
goto out;
- ret = update_user_row(ui->table, this_dir_num, &size);
+ ret = insert_user_bloom(&s, bloom_buf);
+ if (ret)
+ DEBUG_LOG("hard link for uid %d: %s/%s\n",
+ (unsigned)s.st_uid, dirname, entry->d_name);
+ ret = update_user_row(ui->table, this_dir_num,
+ ret? 0 : s.st_size);
if (ret < 0)
goto out;
}
return ret;
}
+ static void log_bloom_stat(struct bloom *b)
+ {
+ unsigned percent;
+
+ NOTICE_LOG("\tfilter contains %llu entries\n",
+ (long long unsigned)b->num_entries);
+ percent = b->num_set_bits * 100ULL / (1ULL << b->order);
+ NOTICE_LOG("\t%u%% of bits are set\n", percent);
+ if (percent > 50) {
+ WARNING_LOG("results may be unreliable!\n");
+ WARNING_LOG("consider increasing bloom filter size\n");
+ }
+ }
+
+ static void log_bloom_stats(void)
+ {
+ struct bloom *b = global_bloom_filter;
+ if (!b)
+ return;
+ NOTICE_LOG("global bloom filter statistics:\n");
+ log_bloom_stat(b);
+ NOTICE_LOG("user bloom filter statistics:\n");
+ b = user_bloom_filter;
+ log_bloom_stat(b);
+ }
+
/**
* The main function of the create mode.
*
int com_create(void)
{
uint64_t zero = 0ULL;
- int ret;
+ int ret, order = conf.bloom_filter_order_arg,
+ num = conf.num_bloom_filter_hash_functions_arg;
struct stat statbuf;
if (lstat(conf.base_dir_arg, &statbuf) == -1)
return -ERRNO_TO_ERROR(errno);
if (!S_ISDIR(statbuf.st_mode))
return -ERRNO_TO_ERROR(ENOTDIR);
+ if (order >= 10 && num > 0) {
+ global_bloom_filter = bloom_new(order, num);
+ user_bloom_filter = bloom_new(order, num);
+ } else
+ WARNING_LOG("hard link detection deactivated\n");
device_id = statbuf.st_dev;
create_hash_table(conf.hash_table_bits_arg);
ret = open_dir_table(1);
if (ret < 0)
- return ret;
+ goto out;
check_signals();
ret = scan_dir(conf.base_dir_arg, &zero);
if (ret < 0)
goto out;
- ret = write_uid_file(conf.database_dir_arg);
+ ret = write_uid_file();
+ log_bloom_stats();
out:
+ bloom_free(global_bloom_filter);
+ bloom_free(user_bloom_filter);
return ret;
}