#include <dirent.h> /* readdir() */
#include "gcc-compat.h"
-#include "osl.h"
+#include "cmdline.h"
#include "fd.h"
-#include "hash.h"
#include "string.h"
#include "error.h"
+#include "portable_io.h"
DEFINE_ERRLIST;
-#define DATABASE_DIR "/tmp/adu"
-#define UID_LIST DATABASE_DIR "/" "uid_list"
+/** Command line and config file options. */
+static struct gengetopt_args_info conf;
+
+enum uid_info_flags {
+ /** whether this slot of the hash table is used. */
+ UI_FL_SLOT_USED = 1,
+ /** whether this uid should be taken into account. */
+ UI_FL_ADMISSIBLE = 2,
+};
struct user_info {
uint32_t uid;
+ uint32_t flags;
struct osl_table *table;
uint64_t files;
uint64_t bytes;
struct osl_table_description *desc;
};
+/**
+ * Contains info for each user that owns at least one regular file.
+ *
+ * Even users that are not taken into account because of the --uid
+ * option occupy a slot in this hash table. This allows to find out
+ * quicky whether a uid is admissible. And yes, this has to be fast.
+ */
static struct user_info *uid_hash_table;
+static inline int ui_used(struct user_info *ui)
+{
+ return ui->flags & UI_FL_SLOT_USED;
+}
+
+static inline int ui_admissible(struct user_info *ui)
+{
+ return ui->flags & UI_FL_ADMISSIBLE;
+}
+
+struct uid_range {
+ uint32_t low;
+ uint32_t high;
+};
+
+static struct uid_range *admissible_uids;
+
+static inline int check_uid_arg(const char *arg, uint32_t *uid)
+{
+ const uint32_t max = ~0U;
+ /*
+ * we need an 64-bit int for string -> uid conversion because strtoll()
+ * returns a signed value.
+ */
+ int64_t val;
+ int ret = para_atoi64(arg, &val);
+
+ if (ret < 0)
+ return ret;
+ if (val < 0 || val > max)
+ return -ERRNO_TO_ERROR(EINVAL);
+ *uid = val;
+ return 1;
+}
+
+static int parse_uid_range(const char *orig_arg, struct uid_range *ur)
+{
+ int ret;
+ char *arg = para_strdup(orig_arg), *p = strchr(arg, '-');
+
+ if (!p || p == arg) {
+ if (p == arg) /* -42 */
+ p++;
+ ret = check_uid_arg(p, &ur->high);
+ if (ret < 0)
+ goto out;
+ ur->low = p? 0 : ur->high;
+ ret = 1;
+ goto out;
+ }
+ /* 42- or 42-4711 */
+ *p = '\0';
+ p++;
+ ret = check_uid_arg(arg, &ur->low);
+ if (ret < 0)
+ goto out;
+ ur->high = ~0U;
+ if (*p) { /* 42-4711 */
+ ret = check_uid_arg(p, &ur->high);
+ if (ret < 0)
+ goto out;
+ }
+ if (ur->low > ur->high)
+ ret = -ERRNO_TO_ERROR(EINVAL);
+out:
+ if (ret < 0)
+ ERROR_LOG("bad uid option: %s\n", orig_arg);
+ else
+ INFO_LOG("admissible uid range: %u - %u\n", ur->low,
+ ur->high);
+ free(arg);
+ return ret;
+}
+
+
/** evaluates to 1 if x < y, to -1 if x > y and to 0 if x == y */
#define NUM_COMPARE(x, y) ((int)((x) < (y)) - (int)((x) > (y)))
-
/**
* The log function.
*
time_t t1;
char str[255] = "";
- if (ll < 4)
+ if (ll < conf.loglevel_arg)
return;
outfd = stderr;
time(&t1);
return NUM_COMPARE(obj2->data, obj1->data);
}
-/**
- * Compare two osl objects of string type.
- *
- * \param obj1 Pointer to the first object.
- * \param obj2 Pointer to the second object.
- *
- * In any case, only \p MIN(obj1->size, obj2->size) characters of each string
- * are taken into account.
- *
- * \return It returns an integer less than, equal to, or greater than zero if
- * \a obj1 is found, respectively, to be less than, to match, or be greater
- * than obj2.
- *
- * \sa strcmp(3), strncmp(3), osl_compare_func.
- */
-static int string_compare(const struct osl_object *obj1,
- const struct osl_object *obj2)
-{
- const char *str1 = (const char *)obj1->data;
- const char *str2 = (const char *)obj2->data;
- return strncmp(str1, str2, MIN(obj1->size, obj2->size));
-}
-
/**
* Compare two osl objects pointing to unsigned integers of 64 bit size.
*
static struct osl_column_description dir_table_cols[] = {
[DT_NAME] = {
.storage_type = OSL_MAPPED_STORAGE,
- .storage_flags = OSL_RBTREE | OSL_UNIQUE,
+ .storage_flags = 0,
.name = "dir",
- .compare_function = string_compare,
},
[DT_NUM] = {
.storage_type = OSL_MAPPED_STORAGE,
.num_columns = NUM_DT_COLUMNS,
.flags = 0,
.column_descriptions = dir_table_cols,
- .dir = DATABASE_DIR
};
/** The columns of the id table. */
ui->desc->num_columns = NUM_UT_COLUMNS;
ui->desc->flags = 0;
ui->desc->column_descriptions = user_table_cols;
- ui->desc->dir = para_strdup(DATABASE_DIR);
+ ui->desc->dir = para_strdup(conf.database_dir_arg);
ui->desc->name = make_message("%u", (unsigned)ui->uid);
- num_uids++;
INFO_LOG(".............................uid #%u: %u\n",
(unsigned)num_uids, (unsigned)ui->uid);
if (create) {
ret = osl_create_table(ui->desc);
if (ret < 0)
goto err;
+ num_uids++;
}
ret = osl_open_table(ui->desc, &ui->table);
if (ret < 0)
ui->desc->dir = NULL;
ui->desc = NULL;
ui->table = NULL;
+ ui->flags = 0;
return ret;
}
{
int ret;
+ dir_table_desc.dir = para_strdup(conf.database_dir_arg);
ret = osl_create_table(&dir_table_desc);
if (ret < 0)
return ret;
return 1;
}
-
+/*
+ * We use a hash table of size s=2^uid_hash_bits to map the uids into the
+ * interval [0..s]. Hash collisions are treated by open addressing, i.e.
+ * unused slots in the table are used to store different uids that hash to the
+ * same slot.
+ *
+ * If a hash collision occurs, different slots are successively probed in order
+ * to find an unused slot for the new uid. Probing is implemented via a second
+ * hash function that maps the uid to h=(uid * PRIME2) | 1, which is always an
+ * odd number.
+ *
+ * An odd number is sufficient to make sure each entry of the hash table gets
+ * probed for probe_num between 0 and s-1 because s is a power of two, hence
+ * the second hash value has never a common divisor with the hash table size.
+ * IOW: h is invertible in the ring [0..s].
+ */
static uint32_t double_hash(uint32_t uid, uint32_t probe_num)
{
return (uid * PRIME1 + ((uid * PRIME2) | 1) * probe_num)
CREATE_USER_TABLE = 2,
};
+static int uid_is_admissible(uint32_t uid)
+{
+ int i;
+
+ for (i = 0; i < conf.uid_given; i++) {
+ struct uid_range *ur = admissible_uids + i;
+
+ if (ur->low <= uid && ur->high >= uid)
+ break;
+ }
+ i = !conf.uid_given || i < conf.uid_given;
+ DEBUG_LOG("uid %u is %sadmissible\n", (unsigned)uid,
+ i? "" : "not ");
+ return i;
+}
+
static int search_uid(uint32_t uid, enum search_uid_flags flags,
struct user_info **ui_ptr)
{
for (p = 0; p < uid_hash_table_size; p++) {
struct user_info *ui = uid_hash_table + double_hash(uid, p);
- if (!ui->table) {
+ if (!ui_used(ui)) {
int ret;
-
if (!flags)
return -E_BAD_UID;
ui->uid = uid;
+ ui->flags |= UI_FL_SLOT_USED;
+ if (!uid_is_admissible(uid))
+ return 0;
+ ui->flags |= UI_FL_ADMISSIBLE;
ret = open_user_table(ui, flags & CREATE_USER_TABLE);
if (ret < 0)
return ret;
+
if (ui_ptr)
*ui_ptr = ui;
return 1;
return 1;
}
+const uint64_t size_unit_divisors[] = {
+ [size_unit_arg_b] = 1ULL,
+ [size_unit_arg_k] = 1024ULL,
+ [size_unit_arg_m] = 1024ULL * 1024ULL,
+ [size_unit_arg_g] = 1024ULL * 1024ULL * 1024ULL,
+ [size_unit_arg_t] = 1024ULL * 1024ULL * 1024ULL * 1024ULL,
+};
+
+const uint64_t count_unit_divisors[] = {
+
+ [count_unit_arg_n] = 1ULL,
+ [count_unit_arg_k] = 1000ULL,
+ [count_unit_arg_m] = 1000ULL * 1000ULL,
+ [count_unit_arg_g] = 1000ULL * 1000ULL * 1000ULL,
+ [count_unit_arg_t] = 1000ULL * 1000ULL * 1000ULL * 1000ULL,
+};
+
+const char size_unit_abbrevs[] = " BKMGT";
+const char count_unit_abbrevs[] = " KMGT";
+
+static void format_size_value(enum enum_size_unit unit, uint64_t value, char *result)
+{
+ if (unit == size_unit_arg_h) /* human readable */
+ for (unit = size_unit_arg_b; unit < size_unit_arg_t && value > size_unit_divisors[unit + 1]; unit++)
+ ; /* nothing */
+ sprintf(result, "%llu%c", (long long unsigned)value / size_unit_divisors[unit], size_unit_abbrevs[unit]);
+}
+
+static void format_count_value(enum enum_count_unit unit, uint64_t value, char *result)
+{
+ if (unit == count_unit_arg_h) /* human readable */
+ for (unit = count_unit_arg_n; unit < count_unit_arg_t && value > count_unit_divisors[unit + 1]; unit++)
+ ; /* nothing */
+ sprintf(result, "%llu%c", (long long unsigned)value / count_unit_divisors[unit], count_unit_abbrevs[unit]);
+}
+
enum global_stats_flags {
GSF_PRINT_DIRNAME = 1,
GSF_PRINT_BYTES = 2,
{
struct global_stats_info *gsi = data;
struct osl_object obj;
- char *dirname;
+ char *dirname, formated_value[25];
int ret, summary = gsi->flags & GSF_COMPUTE_SUMMARY;
if (!gsi->count && !summary)
if (ret < 0)
return ret;
files = *(uint64_t *)obj.data;
- if (gsi->count && (gsi->flags & GSF_PRINT_FILES))
- printf("%llu%s", (long long unsigned)files,
+ if (gsi->count && (gsi->flags & GSF_PRINT_FILES)) {
+ format_size_value(conf.size_unit_arg, files,
+ formated_value);
+ printf("%s%s", formated_value,
(gsi->flags & GSF_PRINT_BYTES)? "\t" : "\n");
+ }
if (summary)
num_files += files;
}
if (ret < 0)
return ret;
bytes = *(uint64_t *)obj.data;
- if (gsi->count && (gsi->flags & GSF_PRINT_BYTES))
- printf("%llu\n", (long long unsigned)bytes);
+ if (gsi->count && (gsi->flags & GSF_PRINT_BYTES)) {
+ format_size_value(conf.size_unit_arg, bytes,
+ formated_value);
+ printf("%s\n", formated_value);
+ }
if (summary) {
num_bytes += bytes;
num_dirs++;
}
}
- if (gsi->count)
+ if (gsi->count > 0)
gsi->count--;
return 1;
}
printf("--------------------- user summary (uid/dirs/files/bytes):\n");
FOR_EACH_USER(ui) {
- if (!ui->table)
+ char formated_dir_count[25], formated_file_count[25],
+ formated_bytes[25];
+ if (!ui_used(ui))
continue;
- printf("%u\t%llu\t%llu\t%llu\n", (unsigned)ui->uid,
- (long long unsigned)ui->dirs,
- (long long unsigned)ui->files,
- (long long unsigned)ui->bytes);
+ format_count_value(conf.count_unit_arg, ui->dirs,
+ formated_dir_count);
+ format_count_value(conf.count_unit_arg, ui->files,
+ formated_file_count);
+ format_size_value(conf.size_unit_arg, ui->bytes,
+ formated_bytes);
+ printf("%u\t%s\t%s\t%s\n", (unsigned)ui->uid,
+ formated_dir_count,
+ formated_file_count,
+ formated_bytes
+ );
}
}
struct osl_row *dir_row;
struct osl_object obj;
int ret, summary = usi->flags & GSF_COMPUTE_SUMMARY;
+ char formated_value[25];
if (!usi->count && !summary)
return -E_LOOP_COMPLETE;
if (ret < 0)
return ret;
files = *(uint64_t *)obj.data;
- if (usi->count && (usi->flags & USF_PRINT_FILES))
- printf("%llu%s",
- (long long unsigned)files,
+ if (usi->count && (usi->flags & USF_PRINT_FILES)) {
+ format_size_value(conf.size_unit_arg, files,
+ formated_value);
+ printf("%s%s", formated_value,
(usi->flags & USF_PRINT_BYTES)? "\t" : "\n"
);
+ }
if (summary)
usi->ui->files += files;
}
if (ret < 0)
return ret;
bytes = *(uint64_t *)obj.data;
- if (usi->count && (usi->flags & USF_PRINT_BYTES))
- printf("%llu\n", (long long unsigned)bytes);
+ if (usi->count && (usi->flags & USF_PRINT_BYTES)) {
+ format_size_value(conf.size_unit_arg, bytes,
+ formated_value);
+ printf("%s\n", formated_value);
+ }
if (summary) {
usi->ui->bytes += bytes;
usi->ui->dirs++;
}
}
- if (usi->count)
+ if (usi->count > 0)
usi->count--;
return 1;
}
FOR_EACH_USER(ui) {
struct user_stats_info usi = {
- .count = 10,
+ .count = conf.limit_arg,
.ui = ui
};
- if (!ui->table)
+ if (!ui_used(ui) || !ui_admissible(ui))
continue;
usi.flags = USF_PRINT_DIRNAME | USF_PRINT_BYTES | USF_COMPUTE_SUMMARY;
printf("************************************************ uid %u\n",
(unsigned) ui->uid);
- if (!ui->table)
- continue;
printf("----------------- Largest dirs -------------------\n");
osl_rbtree_loop_reverse(ui->table, UT_BYTES, &usi,
user_stats_loop_function);
printf("---------- dirs containing most files ------------\n");
- usi.count = 10;
+ usi.count = conf.limit_arg,
usi.flags = USF_PRINT_DIRNAME | USF_PRINT_FILES;
osl_rbtree_loop_reverse(ui->table, UT_FILES, &usi,
user_stats_loop_function);
{
int ret;
struct global_stats_info gsi = {
- .count = 10,
+ .count = conf.limit_arg,
.flags = GSF_PRINT_DIRNAME | GSF_PRINT_BYTES | GSF_COMPUTE_SUMMARY
};
global_stats_loop_function);
if (ret < 0 && ret != -E_LOOP_COMPLETE)
return ret;
- gsi.count = 10;
+ gsi.count = conf.limit_arg;
gsi.flags = GSF_PRINT_DIRNAME | GSF_PRINT_FILES;
printf("---------- dirs containing most files ------------\n");
return 1;
}
+static char *get_uid_list_name(void)
+{
+ return make_message("%s/uid_list", conf.database_dir_arg);
+}
+
static int write_uid_list(void)
{
- char *buf;
+ char *buf, *filename;
uint32_t count = 0;
struct user_info *ui;
size_t size = num_uids * sizeof(uint32_t);
return 0;
buf = para_malloc(size);
FOR_EACH_USER(ui) {
- if (!ui->table)
+ if (!ui_used(ui) || !ui_admissible(ui))
continue;
+ DEBUG_LOG("saving uid %u\n", (unsigned) ui->uid);
write_u32(buf + count++ * sizeof(uint32_t), ui->uid);
}
- ret = para_write_file(UID_LIST, buf, size);
+ filename = get_uid_list_name();
+ ret = para_write_file(filename, buf, size);
+ free(filename);
free(buf);
return ret;
}
static int open_dir_table(void)
{
+ if (!dir_table_desc.dir) /* we did not create the table */
+ dir_table_desc.dir = para_strdup(conf.database_dir_arg);
return osl_open_table(&dir_table_desc, &dir_table);
}
ret = osl_close_table(dir_table, OSL_MARK_CLEAN);
if (ret < 0)
ERROR_LOG("failed to close dir table: %s\n", error_txt(-ret));
+ free((char *)dir_table_desc.dir);
dir_table = NULL;
}
{
int ret;
- if (!ui || !ui->table)
+ if (!ui || !ui_used(ui) || !ui_admissible(ui))
return;
ret = osl_close_table(ui->table, OSL_MARK_CLEAN);
if (ret < 0)
free(ui->desc);
ui->desc = NULL;
ui->table = NULL;
+ ui->flags = 0;
}
static void close_user_tables(void)
free_hash_table();
}
-static int com_create(char *dirname)
+static int com_create()
{
int ret = create_tables();
if (ret < 0)
ret = open_dir_table();
if (ret < 0)
return ret;
- ret = scan_dir(dirname);
+ ret = scan_dir(conf.base_dir_arg);
if (ret < 0)
goto out;
ret = write_uid_list();
static int read_uid_file(void)
{
- char *map;
size_t size;
- int ret = mmap_full_file(UID_LIST, O_RDONLY, (void **)&map, &size, NULL);
uint32_t n;
+ char *filename = get_uid_list_name(), *map;
+ int ret = mmap_full_file(filename, O_RDONLY, (void **)&map, &size, NULL);
- if (ret < 0)
+ if (ret < 0) {
+ INFO_LOG("failed to map %s\n", filename);
+ free(filename);
return ret;
+ }
num_uids = size / 4;
+ INFO_LOG("found %u uids in %s\n", (unsigned)num_uids, filename);
+ free(filename);
/* hash table size should be a power of two and larger than the number of uids */
uid_hash_table_size = 4;
while (uid_hash_table_size < num_uids)
ret = read_uid_file();
if (ret < 0)
return ret;
- print_statistics();
+ ret = print_statistics();
close_all_tables();
+ return ret;
+}
+
+static int check_args(void)
+{
+ int i, ret;
+
+ if (!conf.uid_given)
+ return 0;
+
+ admissible_uids = para_malloc(conf.uid_given * sizeof(*admissible_uids));
+
+ for (i = 0; i < conf.uid_given; i++) {
+ ret = parse_uid_range(conf.uid_arg[i], admissible_uids + i);
+ if (ret < 0)
+ goto err;
+ }
return 1;
+err:
+ free(admissible_uids);
+ admissible_uids = NULL;
+ return ret;
}
int main(int argc, char **argv)
{
- int ret = -E_SYNTAX;
- if (argc > 2)
+ int ret;
+ struct cmdline_parser_params params = {
+ .override = 0,
+ .initialize = 1,
+ .check_required = 0,
+ .check_ambiguity = 0,
+ .print_errors = 1
+ };
+
+ cmdline_parser_ext(argc, argv, &conf, ¶ms); /* aborts on errors */
+ ret = check_args();
+ if (ret < 0)
goto out;
- if (argc == 1)
+ ret = -E_SYNTAX;
+ if (conf.select_given)
ret = com_select();
else
- ret = com_create(argv[1]);
+ ret = com_create();
if (ret < 0)
goto out;
out:
+ free(admissible_uids);
if (ret < 0) {
ERROR_LOG("%s\n", error_txt(-ret));
return -EXIT_FAILURE;