From: Andre Noll Date: Mon, 26 May 2008 19:37:38 +0000 (+0200) Subject: Initial git import. X-Git-Tag: v0.1.0~109 X-Git-Url: http://git.tuebingen.mpg.de/?p=osl.git;a=commitdiff_plain;h=5952112a37ecdaedf3b76e08f97d307f1056c512;ds=sidebyside Initial git import. This code is taken straight from the paraslash package. It needs a lot more work to transform it into a proper library package. --- 5952112a37ecdaedf3b76e08f97d307f1056c512 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e79390c --- /dev/null +++ b/Makefile @@ -0,0 +1,60 @@ +uname_s := $(shell uname -s 2>/dev/null || echo "UNKNOWN_OS") +uname_rs := $(shell uname -rs) + +objects := osl.o fd.o rbtree.o string.o sha1.o +major_version := 0 +minor_version := 1 +patchlevel_version := 0 +libname := osl + + +ifeq ($(uname_s),Linux) + dso_opts := --shared -Wl,-soname,libosl.so.$(major_version) + dso_filename :=lib$(libname).so.$(major_version).$(minor_version).$(patchlevel_version) +endif +ifeq ($(uname_s),Darwin) + mac_major_version := $(shell expr 41 + $(major_version)) + dso_opts := -dynamiclib -current_version $(major_version).$(minor_version) \ + -compatibility_version $(major_version).$(minor_version) -fvisibility=hidden + dso_filename := lib$(libname).$(shell printf "\\x"$(mac_major_version).dylib) +endif +ifeq ($(uname_s),SunOS) + dso_opts := --shared -z text -z defs + dso_filename :=lib$(libname).so.$(major_version).$(minor_version).$(patchlevel_version) + CPPFLAGS += -I/opt/csw/include +endif +ifeq ($(uname_s),FreeBSD) + dso_opts := --shared -Wl,-soname,libosl.so.$(major_version) + dso_filename :=lib$(libname).so.$(major_version).$(minor_version).$(patchlevel_version) +endif +ifeq ($(uname_s),NetBSD) + dso_opts := --shared -Wl,-soname,libosl.so.$(major_version) + dso_filename :=lib$(libname).so.$(major_version).$(minor_version).$(patchlevel_version) +endif +all: $(dso_filename) + +DEBUG_CPPFLAGS += -Wno-sign-compare -g -Wunused -Wundef -W +DEBUG_CPPFLAGS += -Wredundant-decls +#CPPFLAGS += -Os +CPPFLAGS += -Wall +#CPPFLAGS += -Wuninitialized +CPPFLAGS += -Wchar-subscripts +CPPFLAGS += -Wformat-security +CPPFLAGS += -Werror-implicit-function-declaration +CPPFLAGS += -Wmissing-format-attribute +CPPFLAGS += -Wunused-macros +CPPFLAGS += -Wbad-function-cast +CPPFLAGS += -fPIC + +Makefile.deps: $(wildcard *.c *.h) + gcc -MM -MG *.c > $@ + +-include Makefile.deps + +%.o: %.c Makefile + $(CC) -c $(CPPFLAGS) $(DEBUG_CPPFLAGS) $< +$(dso_filename): $(objects) + $(CC) $(dso_opts) -o $@ $(objects) -lcrypto + +clean: + rm -f *.o $(dso_filename) diff --git a/Makefile.deps b/Makefile.deps new file mode 100644 index 0000000..3f50651 --- /dev/null +++ b/Makefile.deps @@ -0,0 +1,6 @@ +fd.o: fd.c log.h gcc-compat.h error.h +osl.o: osl.c log.h gcc-compat.h error.h fd.h list.h osl_core.h rbtree.h \ + osl.h string.h hash.h portable_io.h sha1.h +rbtree.o: rbtree.c rbtree.h +sha1.o: sha1.c log.h gcc-compat.h +string.o: string.c log.h gcc-compat.h string.h error.h diff --git a/error.h b/error.h new file mode 100644 index 0000000..e311807 --- /dev/null +++ b/error.h @@ -0,0 +1,100 @@ +extern char *__errlist[]; +extern char *__error_txt; + +/** + * This bit indicates whether a number is considered a system error number + * If yes, the system errno is just the result of clearing this bit from + * the given number. + */ +#define SYSTEM_ERROR_BIT 30 + +/** Check whether the system error bit is set. */ +#define IS_SYSTEM_ERROR(num) (!!((num) & (1 << SYSTEM_ERROR_BIT))) + +/** Set the system error bit for the given number. */ +#define ERRNO_TO_ERROR(num) ((num) | (1 << SYSTEM_ERROR_BIT)) + +/** + * Check whether a given number is a system error number. + * + * \param num The value to be checked. + * \param _errno The system error number. + * + * \return True if \a num is the representation of the system + * error identified by \a _errno. + */ +static inline int is_errno(int num, int _errno) +{ + assert(num > 0 && _errno > 0); + return ERRNO_TO_ERROR(_errno) == num; +} + +/** + * version of strerror(3). + * + * \param num The error number. + * + * \return The error text of \a num. + */ +static inline char *error_txt(int num) +{ + assert(num > 0); + if (IS_SYSTEM_ERROR(num)) + return strerror((num) & ((1 << SYSTEM_ERROR_BIT) - 1)); + else + return __errlist[num]; +} + +#define ALL_ERRORS \ + _ERROR(SUCCESS, "success") \ + _ERROR(BAD_DB_DIR, "invalid database directory") \ + _ERROR(NO_COLUMN_DESC, "missing column description") \ + _ERROR(BAD_NAME, "invalid name for a column/table") \ + _ERROR(BAD_STORAGE_TYPE, "invalid storage type") \ + _ERROR(BAD_STORAGE_FLAGS, "invalid storage flags") \ + _ERROR(NO_COLUMN_NAME, "missing column name") \ + _ERROR(NO_COLUMNS, "at least one column required") \ + _ERROR(BAD_COLUMN_NAME, "invalid name for a table column") \ + _ERROR(NO_UNIQUE_RBTREE_COLUMN, "need at least one mapped column with OSL_UNIQE and OSL_RBTREE") \ + _ERROR(NO_RBTREE_COL, "at least one column needs an rbtree") \ + _ERROR(DUPLICATE_COL_NAME, "column name given twice") \ + _ERROR(BAD_STORAGE_SIZE, "invalid storage size") \ + _ERROR(NO_COMPARE_FUNC, "missing compare function") \ + _ERROR(BAD_DATA_SIZE, "wrong data size for fixed-size column") \ + _ERROR(NOT_MAPPED, "file not mapped") \ + _ERROR(ALREADY_MAPPED, "file already mapped") \ + _ERROR(BAD_SIZE, "invalid size specified") \ + _ERROR(TRUNC, "failed to truncate file") \ + _ERROR(BAD_TABLE, "table not open") \ + _ERROR(BAD_TABLE_DESC, "invalid table description") \ + _ERROR(RB_KEY_EXISTS, "key already exists in rbtree") \ + _ERROR(RB_KEY_NOT_FOUND, "key not found in rbtree") \ + _ERROR(BAD_ROW_NUM, "invalid row number") \ + _ERROR(INDEX_CORRUPTION, "index corruption detected") \ + _ERROR(INVALID_OBJECT, "reference to invalid object") \ + _ERROR(STAT, "can not stat file") \ + _ERROR(WRITE, "write error") \ + _ERROR(LSEEK, "lseek error") \ + _ERROR(BUSY, "table is busy") \ + _ERROR(SHORT_TABLE, "table too short") \ + _ERROR(NO_MAGIC, "missing table header magic") \ + _ERROR(VERSION_MISMATCH, "table version not supported") \ + _ERROR(BAD_COLUMN_NUM, "invalid column number") \ + _ERROR(BAD_TABLE_FLAGS, "invalid flags in table description") \ + _ERROR(BAD_ROW, "invalid row") \ + _ERROR(EMPTY, "file empty") \ + _ERROR(MMAP, "mmap error") \ + + +/** + * This is temporarily defined to expand to its first argument (prefixed by + * 'E_') and gets later redefined to expand to the error text only + */ +#define _ERROR(err, msg) E_ ## err, + +enum error_codes { + ALL_ERRORS +}; +#undef _ERROR +#define _ERROR(err, msg) msg, +#define DEFINE_ERRLIST char *__errlist[] = {ALL_ERRORS} diff --git a/fd.c b/fd.c new file mode 100644 index 0000000..c7c4f0f --- /dev/null +++ b/fd.c @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2006-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file fd.c Helper functions for file descriptor handling. */ + +#include +#include +#include +#include +#include + +#include "log.h" +#include "error.h" + +/** + * Write a buffer to a file descriptor, re-write on short writes. + * + * \param fd The file descriptor. + * \param buf The buffer to be sent. + * \param len The length of \a buf. + * + * \return Standard. In any case, the number of bytes that have been written is + * stored in \a len. + */ +int write_all(int fd, const char *buf, size_t *len) +{ + size_t total = *len; + + assert(total); + *len = 0; + while (*len < total) { + int ret = write(fd, buf + *len, total - *len); + if (ret == -1) + return -ERRNO_TO_ERROR(errno); + *len += ret; + } + return 1; +} + +/** + * Wrapper for the open(2) system call. + * + * \param path The filename. + * \param flags The usual open(2) flags. + * \param mode Specifies the permissions to use. + * + * The mode parameter must be specified when O_CREAT is in the flags, and is + * ignored otherwise. + * + * \return The file descriptor on success, negative on errors. + * + * \sa open(2). + */ +int para_open(const char *path, int flags, mode_t mode) +{ + int ret = open(path, flags, mode); + + if (ret >= 0) + return ret; + return -ERRNO_TO_ERROR(errno); +} + +/** + * Wrapper for chdir(2). + * + * \param path The specified directory. + * + * \return Standard. + */ +static int para_chdir(const char *path) +{ + int ret = chdir(path); + + if (ret >= 0) + return 1; + return -ERRNO_TO_ERROR(errno); +} + +/** + * Save the cwd and open a given directory. + * + * \param dirname Path to the directory to open. + * \param dir Result pointer. + * \param cwd File descriptor of the current working directory. + * + * \return Standard. + * + * Opening the current directory (".") and calling fchdir() to return is + * usually faster and more reliable than saving cwd in some buffer and calling + * chdir() afterwards. + * + * If \a cwd is not \p NULL "." is opened and the resulting file descriptor is + * stored in \a cwd. If the function returns success, and \a cwd is not \p + * NULL, the caller must close this file descriptor (probably after calling + * fchdir(*cwd)). + * + * On errors, the function undos everything, so the caller needs neither close + * any files, nor change back to the original working directory. + * + * \sa getcwd(3). + * + */ +int para_opendir(const char *dirname, DIR **dir, int *cwd) +{ + int ret; + + if (cwd) { + ret = para_open(".", O_RDONLY, 0); + if (ret < 0) + return ret; + *cwd = ret; + } + ret = para_chdir(dirname); + if (ret < 0) + goto close_cwd; + *dir = opendir("."); + if (*dir) + return 1; + ret = -ERRNO_TO_ERROR(errno); +/* Ignore return value of fchdir() and close(). We're busted anyway. */ + if (cwd) + fchdir(*cwd); +close_cwd: + if (cwd) + close(*cwd); + return ret; +} + +/** + * A wrapper for fchdir(). + * + * \param fd An open file descriptor. + * + * \return Standard. + */ +int para_fchdir(int fd) +{ + if (fchdir(fd) < 0) + return -ERRNO_TO_ERROR(errno); + return 1; +} + +/** + * A wrapper for mkdir(2). + * + * \param path Name of the directory to create. + * \param mode The permissions to use. + * + * \return Standard. + */ +int para_mkdir(const char *path, mode_t mode) +{ + if (!mkdir(path, mode)) + return 1; + return -ERRNO_TO_ERROR(errno); +} + +/** + * Open a file and map it into memory. + * + * \param path Name of the regular file to map. + * \param open_mode Either \p O_RDONLY or \p O_RDWR. + * \param map On success, the mapping is returned here. + * \param size size of the mapping. + * \param fd_ptr The file descriptor of the mapping. + * + * If \a fd_ptr is \p NULL, the file descriptor resulting from the underlying + * open call is closed after mmap(). Otherwise the file is kept open and the + * file descriptor is returned in \a fd_ptr. + * + * \return Standard. + * + * \sa para_open(), mmap(2). + */ +int mmap_full_file(const char *path, int open_mode, void **map, + size_t *size, int *fd_ptr) +{ + int fd, ret, mmap_prot, mmap_flags; + struct stat file_status; + + if (open_mode == O_RDONLY) { + mmap_prot = PROT_READ; + mmap_flags = MAP_PRIVATE; + } else { + mmap_prot = PROT_READ | PROT_WRITE; + mmap_flags = MAP_SHARED; + } + ret = para_open(path, open_mode, 0); + if (ret < 0) + return ret; + fd = ret; + if (fstat(fd, &file_status) < 0) { + ret = -ERRNO_TO_ERROR(errno); + goto out; + } + *size = file_status.st_size; + ret = -E_EMPTY; + DEBUG_LOG("%s: size %zu\n", path, *size); + if (!*size) + goto out; + *map = mmap(NULL, *size, mmap_prot, mmap_flags, fd, 0); + if (*map == MAP_FAILED) { + *map = NULL; + ret = -E_MMAP; + goto out; + } + ret = 1; +out: + if (ret < 0 || !fd_ptr) + close(fd); + else + *fd_ptr = fd; + return ret; +} + +/** + * A wrapper for munmap(2). + * + * \param start The start address of the memory mapping. + * \param length The size of the mapping. + * + * \return Positive on success, \p -E_MUNMAP on errors. + * + * \sa munmap(2), mmap_full_file(). + */ +int para_munmap(void *start, size_t length) +{ + int err; + if (munmap(start, length) >= 0) + return 1; + err = errno; + ERROR_LOG("munmap (%p/%zu) failed: %s\n", start, length, + strerror(err)); + return -ERRNO_TO_ERROR(err); +} diff --git a/fd.h b/fd.h new file mode 100644 index 0000000..80f484a --- /dev/null +++ b/fd.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2006-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file fd.h exported symbols from fd.c */ + +int write_all(int fd, const char *buf, size_t *len); +int para_open(const char *path, int flags, mode_t mode); +int para_opendir(const char *dirname, DIR **dir, int *cwd); +int para_mkdir(const char *path, mode_t mode); +int para_fchdir(int fd); +int mmap_full_file(const char *filename, int open_mode, void **map, + size_t *size, int *fd_ptr); +int para_munmap(void *start, size_t length); diff --git a/gcc-compat.h b/gcc-compat.h new file mode 100644 index 0000000..61c3c88 --- /dev/null +++ b/gcc-compat.h @@ -0,0 +1,25 @@ +# define inline inline __attribute__ ((always_inline)) +# define __noreturn __attribute__ ((noreturn)) +# define __malloc __attribute__ ((malloc)) +# define __a_unused __attribute__ ((unused)) +# define likely(x) __builtin_expect (!!(x), 1) +# define unlikely(x) __builtin_expect (!!(x), 0) +/* + * p is the number of the "format string" parameter, and q is + * the number of the first variadic parameter + */ +# define __printf(p,q) __attribute__ ((format (printf, p, q))) +/* + * as direct use of __printf(p,q) confuses doxygen, here are two extra macros + * for those values p,q that are actually used by paraslash. + */ +#define __printf_1_2 __printf(1,2) +#define __printf_2_3 __printf(2,3) + +# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) +# define __must_check __attribute__ ((warn_unused_result)) +# else +# define __must_check /* no warn_unused_result */ +# endif + +#define _static_inline_ static inline diff --git a/hash.h b/hash.h new file mode 100644 index 0000000..03b45e0 --- /dev/null +++ b/hash.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2007-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file hash.h Inline functions for hash values. */ + +#include "portable_io.h" + +/** hash arrays are always unsigned char. */ +#define HASH_TYPE unsigned char + +#include "sha1.h" +/** We use openssl's sha1 implementation. */ +#define hash_function sha1_hash + +/** + * Compare two hashes. + * + * \param h1 Pointer to the first hash value. + * \param h2 Pointer to the second hash value. + * + * \return 1, -1, or zero, depending on whether \a h1 is greater than, + * less than or equal to h2, respectively. + */ +_static_inline_ int hash_compare(HASH_TYPE *h1, HASH_TYPE *h2) +{ + int i; + + for (i = 0; i < HASH_SIZE; i++) { + if (h1[i] < h2[i]) + return -1; + if (h1[i] > h2[i]) + return 1; + } + return 0; +} + +/** + * Convert a hash value to ascii format. + * + * \param hash the hash value. + * \param asc Result pointer. + * + * \a asc must point to an area of at least 2 * \p HASH_SIZE + 1 bytes which + * will be filled by the function with the ascii representation of the hash + * value given by \a hash, and a terminating \p NULL byte. + */ +_static_inline_ void hash_to_asc(HASH_TYPE *hash, char *asc) +{ + int i; + const char hexchar[] = "0123456789abcdef"; + + for (i = 0; i < HASH_SIZE; i++) { + asc[2 * i] = hexchar[hash[i] >> 4]; + asc[2 * i + 1] = hexchar[hash[i] & 0xf]; + } + asc[2 * HASH_SIZE] = '\0'; +} diff --git a/list.h b/list.h new file mode 100644 index 0000000..de04ab9 --- /dev/null +++ b/list.h @@ -0,0 +1,202 @@ +/* + * Copied from the Linux kernel source tree, version 2.6.13. + * + * Licensed under the GPL v2 as per the whole kernel source tree. + * + */ + +/** \file list.h doubly linked list implementation */ + +#include /* offsetof */ + +/** get the struct this entry is embedded in */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/** + * Non-NULL pointers that will result in page faults under normal + * circumstances, used to verify that nobody uses non-initialized list entries. + * Used for poisoning the \a next pointer of struct list_head. + */ +#define LIST_POISON1 ((void *) 0x00100100) +/** Non-null pointer, used for poisoning the \a prev pointer of struct + * list_head + */ +#define LIST_POISON2 ((void *) 0x00200200) + +/** Simple doubly linked list implementation. */ +struct list_head { + /** pointer to the next list entry */ + struct list_head *next; + /** pointer to the previous list entry */ + struct list_head *prev; +}; + +/** must be called before using any other list functions */ +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + + +/* + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * add a new entry + * + * \param new new entry to be added + * \param head list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void para_list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * add a new entry + * + * \param new new entry to be added + * \param head list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * Delete entry from list. + * + * \param entry the element to delete from the list. + * + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +/** + * delete from one list and add as another's head + * + * \param list: the entry to move + * \param head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + para_list_add(list, head); +} + +/** + * test whether a list is empty + * + * \param head the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * get the struct for this entry + * + * \param ptr the &struct list_head pointer. + * \param type the type of the struct this is embedded in. + * \param member the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * iterate over a list safe against removal of list entry + * + * \param pos the &struct list_head to use as a loop counter. + * \param n another &struct list_head to use as temporary storage + * \param head the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * iterate over list of given type + * + * \param pos the type * to use as a loop counter. + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * iterate over list of given type safe against removal of list entry + * + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * iterate backwards over list of given type safe against removal of list entry + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) diff --git a/log.h b/log.h new file mode 100644 index 0000000..d78b48a --- /dev/null +++ b/log.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 1997-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +#include +#include +#include +#include +#include +#include /* time(), localtime() */ +#include +#include +#include +#include +#include +#include +#include +#include +#include /* needed by create_pf_socket */ +#include +#include +#include "gcc-compat.h" + +/** compute the maximum of \a a and \a b */ +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +/** debug loglevel, gets really noisy */ +#define DEBUG 1 +/** still noisy, but won't fill your disk */ +#define INFO 2 +/** normal, but significant event */ +#define NOTICE 3 +/** unexpected event that can be handled */ +#define WARNING 4 +/** unhandled error condition */ +#define ERROR 5 +/** system might be unreliable */ +#define CRIT 6 +/** last message before exit */ +#define EMERG 7 + +/** Log messages with lower priority than that will not be compiled in. */ +#define COMPILE_TIME_LOGLEVEL 0 + +/** \cond */ +#if DEBUG > COMPILE_TIME_LOGLEVEL +#define DEBUG_LOG(f,...) __log(DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define DEBUG_LOG(...) do {;} while (0) +#endif + +#if INFO > COMPILE_TIME_LOGLEVEL +#define INFO_LOG(f,...) __log(INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define INFO_LOG(...) do {;} while (0) +#endif + +#if NOTICE > COMPILE_TIME_LOGLEVEL +#define NOTICE_LOG(f,...) __log(NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define NOTICE_LOG(...) do {;} while (0) +#endif + +#if WARNING > COMPILE_TIME_LOGLEVEL +#define WARNING_LOG(f,...) __log(WARNING, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define WARNING_LOG(...) do {;} while (0) +#endif + +#if ERROR > COMPILE_TIME_LOGLEVEL +#define ERROR_LOG(f,...) __log(ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define ERROR_LOG(...) do {;} while (0) +#endif + +#if CRIT > COMPILE_TIME_LOGLEVEL +#define CRIT_LOG(f,...) __log(CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define CRIT_LOG(...) do {;} while (0) +#endif + +#if EMERG > COMPILE_TIME_LOGLEVEL +#define EMERG_LOG(f,...) __log(EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#else +#define EMERG_LOG(...) +#endif +/** \endcond */ + +__printf_2_3 void __log(int, const char*, ...); + +/** + * Write a log message to a dynamically allocated string. + * + * \param fmt Usual format string. + * \param p Result pointer. + * + * \sa printf(3). */ +#define VSPRINTF(fmt, p) \ +{ \ + int n; \ + size_t size = 100; \ + p = para_malloc(size); \ + while (1) { \ + va_list ap; \ + /* Try to print in the allocated space. */ \ + va_start(ap, fmt); \ + n = vsnprintf(p, size, fmt, ap); \ + va_end(ap); \ + /* If that worked, return the string. */ \ + if (n > -1 && n < size) \ + break; \ + /* Else try again with more space. */ \ + if (n > -1) /* glibc 2.1 */ \ + size = n + 1; /* precisely what is needed */ \ + else /* glibc 2.0 */ \ + size *= 2; /* twice the old size */ \ + p = realloc(p, size); \ + if (!p) \ + break; \ + } \ +} diff --git a/osl.c b/osl.c new file mode 100644 index 0000000..b64a3c8 --- /dev/null +++ b/osl.c @@ -0,0 +1,2151 @@ +/* + * Copyright (C) 2007-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file osl.c Object storage layer functions. */ +#include /* readdir() */ +#include + + +#include "log.h" +#include "error.h" +#include "fd.h" +#include "list.h" +#include "osl_core.h" + +/** + * Allocate a sufficiently large string and print into it. + * + * \param fmt A usual format string. + * + * Produce output according to \p fmt. No artificial bound on the length of the + * resulting string is imposed. + * + * \return This function either returns a pointer to a string that must be + * freed by the caller or aborts without returning. + * + * \sa printf(3). + */ +static __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...) +{ + char *msg; + + VSPRINTF(fmt, msg); + return msg; +} + +/** + * The log function. + * + * \param ll Loglevel. + * \param fml Usual format string. + * + * All XXX_LOG() macros use this function. + */ +__printf_2_3 void __log(int ll, const char* fmt,...) +{ + va_list argp; + FILE *outfd; + struct tm *tm; + time_t t1; + char str[255] = ""; + + if (ll < 2) + return; + outfd = stderr; + time(&t1); + tm = localtime(&t1); + strftime(str, sizeof(str), "%b %d %H:%M:%S", tm); + fprintf(outfd, "%s ", str); + va_start(argp, fmt); + vfprintf(outfd, fmt, argp); + va_end(argp); +} + +/** + * A wrapper for lseek(2). + * + * \param fd The file descriptor whose offset is to be to repositioned. + * \param offset A value-result parameter. + * \param whence Usual repositioning directive. + * + * Reposition the offset of the file descriptor \a fd to the argument \a offset + * according to the directive \a whence. Upon successful return, \a offset + * contains the resulting offset location as measured in bytes from the + * beginning of the file. + * + * \return Positive on success. Otherwise, the function returns \p -E_LSEEK. + * + * \sa lseek(2). + */ +int para_lseek(int fd, off_t *offset, int whence) +{ + *offset = lseek(fd, *offset, whence); + int ret = -E_LSEEK; + if (*offset == -1) + return ret; + return 1; +} + +/** + * Wrapper for the write system call. + * + * \param fd The file descriptor to write to. + * \param buf The buffer to write. + * \param size The length of \a buf in bytes. + * + * This function writes out the given buffer and retries if an interrupt + * occurred during the write. + * + * \return On success, the number of bytes written is returned, otherwise, the + * function returns \p -E_WRITE. + * + * \sa write(2). + */ +ssize_t para_write(int fd, const void *buf, size_t size) +{ + ssize_t ret; + + for (;;) { + ret = write(fd, buf, size); + if ((ret < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + return ret >= 0? ret : -E_WRITE; + } +} + +/** + * Write the whole buffer to a file descriptor. + * + * \param fd The file descriptor to write to. + * \param buf The buffer to write. + * \param size The length of \a buf in bytes. + * + * This function writes the given buffer and continues on short writes and + * when interrupted by a signal. + * + * \return Positive on success, negative on errors. Possible errors: any + * errors returned by para_write(). + * + * \sa para_write(). + */ +ssize_t para_write_all(int fd, const void *buf, size_t size) +{ +// DEBUG_LOG("writing %zu bytes\n", size); + const char *b = buf; + while (size) { + ssize_t ret = para_write(fd, b, size); +// DEBUG_LOG("ret: %zd\n", ret); + if (ret < 0) + return ret; + b += ret; + size -= ret; + } + return 1; +} +/** + * Open a file, write the given buffer and close the file. + * + * \param filename Full path to the file to open. + * \param buf The buffer to write to the file. + * \param size The size of \a buf. + * + * \return Positive on success, negative on errors. Possible errors include: + * any errors from para_open() or para_write(). + * + * \sa para_open(), para_write(). + */ +int para_write_file(const char *filename, const void *buf, size_t size) +{ + int ret, fd; + + ret = para_open(filename, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (ret < 0) + return ret; + fd = ret; + ret = para_write_all(fd, buf, size); + if (ret < 0) + goto out; + ret = 1; +out: + close(fd); + return ret; +} + +static int append_file(const char *filename, char *header, size_t header_size, + char *data, size_t data_size, uint32_t *new_pos) +{ + int ret, fd; + +// DEBUG_LOG("appending %zu + %zu bytes\n", header_size, data_size); + ret = para_open(filename, O_WRONLY | O_CREAT | O_APPEND, 0644); + if (ret < 0) + return ret; + fd = ret; + if (header && header_size) { + ret = para_write_all(fd, header, header_size); + if (ret < 0) + goto out; + } + ret = para_write_all(fd, data, data_size); + if (ret < 0) + goto out; + if (new_pos) { + off_t offset = 0; + ret = para_lseek(fd, &offset, SEEK_END); + if (ret < 0) + goto out; +// DEBUG_LOG("new file size: " FMT_OFF_T "\n", offset); + *new_pos = offset; + } + ret = 1; +out: + close(fd); + return ret; +} + +/** + * Traverse the given directory recursively. + * + * \param dirname The directory to traverse. + * \param func The function to call for each entry. + * \param private_data Pointer to an arbitrary data structure. + * + * For each regular file under \a dirname, the supplied function \a func is + * called. The full path of the regular file and the \a private_data pointer + * are passed to \a func. Directories for which the calling process has no + * permissions to change to are silently ignored. + * + * \return Standard. + */ +int for_each_file_in_dir(const char *dirname, + int (*func)(const char *, void *), void *private_data) +{ + DIR *dir; + struct dirent *entry; + int cwd_fd, ret2, ret = para_opendir(dirname, &dir, &cwd_fd); + + if (ret < 0) + return ret == -ERRNO_TO_ERROR(EACCES)? 1 : ret; + /* scan cwd recursively */ + while ((entry = readdir(dir))) { + mode_t m; + char *tmp; + struct stat s; + + if (!strcmp(entry->d_name, ".")) + continue; + if (!strcmp(entry->d_name, "..")) + continue; + if (lstat(entry->d_name, &s) == -1) + continue; + m = s.st_mode; + if (!S_ISREG(m) && !S_ISDIR(m)) + continue; + tmp = make_message("%s/%s", dirname, entry->d_name); + if (!S_ISDIR(m)) { + ret = func(tmp, private_data); + free(tmp); + if (ret < 0) + goto out; + continue; + } + /* directory */ + ret = for_each_file_in_dir(tmp, func, private_data); + free(tmp); + if (ret < 0) + goto out; + } + ret = 1; +out: + closedir(dir); + ret2 = para_fchdir(cwd_fd); + if (ret2 < 0 && ret >= 0) + ret = ret2; + close(cwd_fd); + return ret; +} + +static int verify_name(const char *name) +{ + if (!name) + return -E_BAD_NAME; + if (!*name) + return -E_BAD_NAME; + if (strchr(name, '/')) + return -E_BAD_NAME; + if (!strcmp(name, "..")) + return -E_BAD_NAME; + if (!strcmp(name, ".")) + return -E_BAD_NAME; + return 1; +} + +/** + * Compare two osl objects pointing to unsigned integers of 32 bit size. + * + * \param obj1 Pointer to the first integer. + * \param obj2 Pointer to the second integer. + * + * \return The values required for an osl compare function. + * + * \sa osl_compare_func, osl_hash_compare(). + */ +int uint32_compare(const struct osl_object *obj1, const struct osl_object *obj2) +{ + uint32_t d1 = read_u32((const char *)obj1->data); + uint32_t d2 = read_u32((const char *)obj2->data); + + if (d1 < d2) + return 1; + if (d1 > d2) + return -1; + return 0; +} + +/** + * Compare two osl objects pointing to hash values. + * + * \param obj1 Pointer to the first hash object. + * \param obj2 Pointer to the second hash object. + * + * \return The values required for an osl compare function. + * + * \sa osl_compare_func, uint32_compare(). + */ +int osl_hash_compare(const struct osl_object *obj1, const struct osl_object *obj2) +{ + return hash_compare((HASH_TYPE *)obj1->data, (HASH_TYPE *)obj2->data); +} + +static char *disk_storage_dirname(const struct osl_table *t, unsigned col_num, + const char *ds_name) +{ + char *dirname, *column_name = column_filename(t, col_num); + + if (!(t->desc->flags & OSL_LARGE_TABLE)) + return column_name; + dirname = make_message("%s/%.2s", column_name, ds_name); + free(column_name); + return dirname; +} + +static char *disk_storage_name_of_object(const struct osl_table *t, + const struct osl_object *obj) +{ + HASH_TYPE hash[HASH_SIZE]; + hash_object(obj, hash); + return disk_storage_name_of_hash(t, hash); +} + +static int disk_storage_name_of_row(const struct osl_table *t, + const struct osl_row *row, char **name) +{ + struct osl_object obj; + int ret = osl_get_object(t, row, t->disk_storage_name_column, &obj); + + if (ret < 0) + return ret; + *name = disk_storage_name_of_object(t, &obj); + return 1; +} + +static void column_name_hash(const char *col_name, HASH_TYPE *hash) +{ + hash_function(col_name, strlen(col_name), hash); +} + +static int init_column_descriptions(struct osl_table *t) +{ + int i, j, ret; + const struct osl_column_description *cd; + + ret = -E_BAD_TABLE_DESC; + ret = verify_name(t->desc->name); + if (ret < 0) + goto err; + ret = -E_BAD_DB_DIR; + if (!t->desc->dir && (t->num_disk_storage_columns || t->num_mapped_columns)) + goto err; + /* the size of the index header without column descriptions */ + t->index_header_size = IDX_COLUMN_DESCRIPTIONS; + FOR_EACH_COLUMN(i, t->desc, cd) { + struct osl_column *col = t->columns + i; + if (cd->storage_flags & OSL_RBTREE) { + if (!cd->compare_function) + return -E_NO_COMPARE_FUNC; + } + if (cd->storage_type == OSL_NO_STORAGE) + continue; + ret = -E_NO_COLUMN_NAME; + if (!cd->name || !cd->name[0]) + goto err; + ret = verify_name(cd->name); + if (ret < 0) + goto err; + t->index_header_size += index_column_description_size(cd->name); + column_name_hash(cd->name, col->name_hash); + ret = -E_DUPLICATE_COL_NAME; + for (j = i + 1; j < t->desc->num_columns; j++) { + const char *name2 = get_column_description(t->desc, + j)->name; + if (cd->name && name2 && !strcmp(cd->name, name2)) + goto err; + } + } + return 1; +err: + return ret; +} + +/** + * Initialize a struct table from given table description. + * + * \param desc The description of the osl table. + * \param table_ptr Result is returned here. + * + * This function performs several sanity checks on \p desc and returns if any + * of these tests fail. On success, a struct \p osl_table is allocated and + * initialized with data derived from \p desc. + * + * \return Positive on success, negative on errors. Possible errors include: \p + * E_BAD_TABLE_DESC, \p E_NO_COLUMN_DESC, \p E_NO_COLUMNS, \p + * E_BAD_STORAGE_TYPE, \p E_BAD_STORAGE_FLAGS, \p E_BAD_STORAGE_SIZE, \p + * E_NO_UNIQUE_RBTREE_COLUMN, \p E_NO_RBTREE_COL. + * + * \sa struct osl_table. + */ +int init_table_structure(const struct osl_table_description *desc, + struct osl_table **table_ptr) +{ + const struct osl_column_description *cd; + struct osl_table *t = para_calloc(sizeof(*t)); + int i, ret = -E_BAD_TABLE_DESC, have_disk_storage_name_column = 0; + + if (!desc) + goto err; + DEBUG_LOG("creating table structure for '%s' from table " + "description\n", desc->name); + ret = -E_NO_COLUMN_DESC; + if (!desc->column_descriptions) + goto err; + ret = -E_NO_COLUMNS; + if (!desc->num_columns) + goto err; + t->columns = para_calloc(desc->num_columns * sizeof(struct osl_column)); + t->desc = desc; + FOR_EACH_COLUMN(i, t->desc, cd) { + enum osl_storage_type st = cd->storage_type; + enum osl_storage_flags sf = cd->storage_flags; + struct osl_column *col = &t->columns[i]; + + ret = -E_BAD_STORAGE_TYPE; + if (st != OSL_MAPPED_STORAGE && st != OSL_DISK_STORAGE + && st != OSL_NO_STORAGE) + goto err; + ret = -E_BAD_STORAGE_FLAGS; + if (st == OSL_DISK_STORAGE && sf & OSL_RBTREE) + goto err; + ret = -E_BAD_STORAGE_SIZE; + if (sf & OSL_FIXED_SIZE && !cd->data_size) + goto err; + switch (st) { + case OSL_DISK_STORAGE: + t->num_disk_storage_columns++; + break; + case OSL_MAPPED_STORAGE: + t->num_mapped_columns++; + col->index_offset = t->row_index_size; + t->row_index_size += 8; + break; + case OSL_NO_STORAGE: + col->volatile_num = t->num_volatile_columns; + t->num_volatile_columns++; + break; + } + if (sf & OSL_RBTREE) { + col->rbtree_num = t->num_rbtrees; + t->num_rbtrees++; + if ((sf & OSL_UNIQUE) && (st == OSL_MAPPED_STORAGE)) { + if (!have_disk_storage_name_column) + t->disk_storage_name_column = i; + have_disk_storage_name_column = 1; + } + } + } + ret = -E_NO_UNIQUE_RBTREE_COLUMN; + if (t->num_disk_storage_columns && !have_disk_storage_name_column) + goto err; + ret = -E_NO_RBTREE_COL; + if (!t->num_rbtrees) + goto err; + /* success */ + DEBUG_LOG("OK. Index entry size: %u\n", t->row_index_size); + ret = init_column_descriptions(t); + if (ret < 0) + goto err; + *table_ptr = t; + return 1; +err: + free(t->columns); + free(t); + return ret; +} + +/** + * Read the table description from index header. + * + * \param map The memory mapping of the index file. + * \param desc The values found in the index header are returned here. + * + * Read the index header, check for the paraslash magic string and the table version number. + * Read all information stored in the index header into \a desc. + * + * \return Positive on success, negative on errors. + * + * \sa struct osl_table_description, osl_create_table. + */ +int read_table_desc(struct osl_object *map, struct osl_table_description *desc) +{ + char *buf = map->data; + uint8_t version; + uint16_t header_size; + int ret, i; + unsigned offset; + struct osl_column_description *cd; + + if (map->size < MIN_INDEX_HEADER_SIZE(1)) + return -E_SHORT_TABLE; + if (strncmp(buf + IDX_PARA_MAGIC, PARA_MAGIC, strlen(PARA_MAGIC))) + return -E_NO_MAGIC; + version = read_u8(buf + IDX_VERSION); + if (version < MIN_TABLE_VERSION || version > MAX_TABLE_VERSION) + return -E_VERSION_MISMATCH; + desc->flags = read_u8(buf + IDX_TABLE_FLAGS); + desc->num_columns = read_u16(buf + IDX_NUM_COLUMNS); + DEBUG_LOG("%u columns\n", desc->num_columns); + if (!desc->num_columns) + return -E_NO_COLUMNS; + header_size = read_u16(buf + IDX_HEADER_SIZE); + if (map->size < header_size) + return -E_BAD_SIZE; + desc->column_descriptions = para_calloc(desc->num_columns + * sizeof(struct osl_column_description)); + offset = IDX_COLUMN_DESCRIPTIONS; + FOR_EACH_COLUMN(i, desc, cd) { + char *null_byte; + + ret = -E_SHORT_TABLE; + if (map->size < offset + MIN_IDX_COLUMN_DESCRIPTION_SIZE) { + ERROR_LOG("map size = %zu < %u = offset + min desc size\n", + map->size, offset + MIN_IDX_COLUMN_DESCRIPTION_SIZE); + goto err; + } + cd->storage_type = read_u16(buf + offset + IDX_CD_STORAGE_TYPE); + cd->storage_flags = read_u16(buf + offset + + IDX_CD_STORAGE_FLAGS); + cd->data_size = read_u32(buf + offset + IDX_CD_DATA_SIZE); + null_byte = memchr(buf + offset + IDX_CD_NAME, '\0', + map->size - offset - IDX_CD_NAME); + ret = -E_INDEX_CORRUPTION; + if (!null_byte) + goto err; + cd->name = para_strdup(buf + offset + IDX_CD_NAME); + offset += index_column_description_size(cd->name); + } + if (offset != header_size) { + ret = -E_INDEX_CORRUPTION; + ERROR_LOG("real header size = %u != %u = stored header size\n", + offset, header_size); + goto err; + } + return 1; +err: + FOR_EACH_COLUMN(i, desc, cd) + free(cd->name); + return ret; +} + +/* + * check whether the table description given by \p t->desc matches the on-disk + * table structure stored in the index of \a t. + */ +static int compare_table_descriptions(struct osl_table *t) +{ + int i, ret; + struct osl_table_description desc; + const struct osl_column_description *cd1, *cd2; + + /* read the on-disk structure into desc */ + ret = read_table_desc(&t->index_map, &desc); + if (ret < 0) + return ret; + ret = -E_BAD_TABLE_FLAGS; + if (desc.flags != t->desc->flags) + goto out; + ret = -E_BAD_COLUMN_NUM; + if (desc.num_columns > t->desc->num_columns) + goto out; + if (desc.num_columns < t->desc->num_columns) { + struct osl_column_description *cd; + unsigned diff = t->desc->num_columns - desc.num_columns; + INFO_LOG("extending table by %u volatile columns\n", diff); + ret = -ERRNO_TO_ERROR(ENOMEM); + desc.column_descriptions = realloc(desc.column_descriptions, + t->desc->num_columns * sizeof(struct osl_column_description)); + if (!desc.column_descriptions) + goto out; + for (i = desc.num_columns; i < t->desc->num_columns; i++) { + cd = get_column_description(&desc, i); + cd->storage_type = OSL_NO_STORAGE; + cd->name = NULL; + } + desc.num_columns += diff; + } + FOR_EACH_COLUMN(i, t->desc, cd1) { + cd2 = get_column_description(&desc, i); + ret = -E_BAD_STORAGE_TYPE; + if (cd1->storage_type != cd2->storage_type) + goto out; + if (cd1->storage_type == OSL_NO_STORAGE) + continue; + ret = -E_BAD_STORAGE_FLAGS; + if (cd1->storage_flags != cd2->storage_flags) { + ERROR_LOG("sf1 = %u != %u = sf2\n", + cd1->storage_flags, cd2->storage_flags); + goto out; + } + ret = -E_BAD_DATA_SIZE; + if (cd1->storage_flags & OSL_FIXED_SIZE) + if (cd1->data_size != cd2->data_size) + goto out; + ret = -E_BAD_COLUMN_NAME; + if (strcmp(cd1->name, cd2->name)) + goto out; + } + DEBUG_LOG("table description of '%s' matches on-disk data, good\n", + t->desc->name); + ret = 1; +out: + FOR_EACH_COLUMN(i, &desc, cd1) + free(cd1->name); + free(desc.column_descriptions); + return ret; +} + +static int create_table_index(struct osl_table *t) +{ + char *buf, *filename; + int i, ret; + size_t size = t->index_header_size; + const struct osl_column_description *cd; + unsigned offset; + + INFO_LOG("creating %zu byte index for table %s\n", size, + t->desc->name); + buf = para_calloc(size); + sprintf(buf + IDX_PARA_MAGIC, "%s", PARA_MAGIC); + write_u8(buf + IDX_TABLE_FLAGS, t->desc->flags); + write_u8(buf + IDX_DIRTY_FLAG, 0); + write_u8(buf + IDX_VERSION, CURRENT_TABLE_VERSION); + write_u16(buf + IDX_NUM_COLUMNS, t->num_mapped_columns + t->num_disk_storage_columns); + write_u16(buf + IDX_HEADER_SIZE, t->index_header_size); + offset = IDX_COLUMN_DESCRIPTIONS; + FOR_EACH_COLUMN(i, t->desc, cd) { + /* no need to store info about volatile storage */ + if (cd->storage_type == OSL_NO_STORAGE) + continue; + write_u16(buf + offset + IDX_CD_STORAGE_TYPE, + cd->storage_type); + write_u16(buf + offset + IDX_CD_STORAGE_FLAGS, + cd->storage_flags); + if (cd->storage_flags & OSL_FIXED_SIZE) + write_u32(buf + offset + IDX_CD_DATA_SIZE, + cd->data_size); + strcpy(buf + offset + IDX_CD_NAME, cd->name); + offset += index_column_description_size(cd->name); + } + assert(offset = size); + filename = index_filename(t->desc); + ret = para_write_file(filename, buf, size); + free(buf); + free(filename); + return ret; +} + +/** + * Create a new osl table. + * + * \param desc Pointer to the table description. + * + * \return Standard. + */ +int osl_create_table(const struct osl_table_description *desc) +{ + const struct osl_column_description *cd; + char *table_dir = NULL, *filename; + struct osl_table *t; + int i, ret = init_table_structure(desc, &t); + + if (ret < 0) + return ret; + INFO_LOG("creating %s\n", desc->name); + FOR_EACH_COLUMN(i, t->desc, cd) { + if (cd->storage_type == OSL_NO_STORAGE) + continue; + if (!table_dir) { + ret = para_mkdir(desc->dir, 0777); + if (ret < 0 && !is_errno(-ret, EEXIST)) + goto out; + table_dir = make_message("%s/%s", desc->dir, + desc->name); + ret = para_mkdir(table_dir, 0777); + if (ret < 0) + goto out; + } + filename = column_filename(t, i); + INFO_LOG("filename: %s\n", filename); + if (cd->storage_type == OSL_MAPPED_STORAGE) { + ret = para_open(filename, O_RDWR | O_CREAT | O_EXCL, + 0644); + free(filename); + if (ret < 0) + goto out; + close(ret); + continue; + } + /* DISK STORAGE */ + ret = para_mkdir(filename, 0777); + free(filename); + if (ret < 0) + goto out; + } + if (t->num_mapped_columns) { + ret = create_table_index(t); + if (ret < 0) + goto out; + } + ret = 1; +out: + free(table_dir); + free(t->columns); + free(t); + return ret; +} + +static int table_is_dirty(struct osl_table *t) +{ + char *buf = (char *)t->index_map.data + IDX_DIRTY_FLAG; + uint8_t dirty = read_u8(buf) & 0x1; + return !!dirty; +} + +static void mark_table_dirty(struct osl_table *t) +{ + char *buf = (char *)t->index_map.data + IDX_DIRTY_FLAG; + write_u8(buf, read_u8(buf) | 1); +} + +static void mark_table_clean(struct osl_table *t) +{ + char *buf = (char *)t->index_map.data + IDX_DIRTY_FLAG; + write_u8(buf, read_u8(buf) & 0xfe); +} + +static void unmap_column(struct osl_table *t, unsigned col_num) +{ + struct osl_object map = t->columns[col_num].data_map; + int ret; + if (!map.data) + return; + ret = para_munmap(map.data, map.size); + assert(ret > 0); + map.data = NULL; +} + +/** + * Unmap all mapped files of an osl table. + * + * \param t Pointer to a mapped table. + * \param flags Options for unmapping. + * + * \return Positive on success, negative on errors. + * + * \sa map_table(), enum osl_close_flags, para_munmap(). + */ +int unmap_table(struct osl_table *t, enum osl_close_flags flags) +{ + unsigned i; + const struct osl_column_description *cd; + int ret; + + if (!t->num_mapped_columns) /* can this ever happen? */ + return 1; + DEBUG_LOG("unmapping table '%s'\n", t->desc->name); + if (!t->index_map.data) + return -E_NOT_MAPPED; + if (flags & OSL_MARK_CLEAN) + mark_table_clean(t); + ret = para_munmap(t->index_map.data, t->index_map.size); + if (ret < 0) + return ret; + t->index_map.data = NULL; + if (!t->num_rows) + return 1; + FOR_EACH_MAPPED_COLUMN(i, t, cd) + unmap_column(t, i); + return 1; +} + +static int map_column(struct osl_table *t, unsigned col_num) +{ + struct stat statbuf; + char *filename = column_filename(t, col_num); + int ret = -E_STAT; + if (stat(filename, &statbuf) < 0) { + free(filename); + return ret; + } + if (!(S_IFREG & statbuf.st_mode)) { + free(filename); + return ret; + } + ret = mmap_full_file(filename, O_RDWR, + &t->columns[col_num].data_map.data, + &t->columns[col_num].data_map.size, + NULL); + free(filename); + return ret; +} + +/** + * Map the index file and all columns of type \p OSL_MAPPED_STORAGE into memory. + * + * \param t Pointer to an initialized table structure. + * \param flags Mapping options. + * + * \return Negative return value on errors; on success the number of rows + * (including invalid rows) is returned. + * + * \sa unmap_table(), enum map_table_flags, osl_open_table(), mmap(2). + */ +int map_table(struct osl_table *t, enum map_table_flags flags) +{ + char *filename; + const struct osl_column_description *cd; + int i = 0, ret, num_rows = 0; + + if (!t->num_mapped_columns) + return 0; + if (t->index_map.data) + return -E_ALREADY_MAPPED; + filename = index_filename(t->desc); + DEBUG_LOG("mapping table '%s' (index: %s)\n", t->desc->name, filename); + ret = mmap_full_file(filename, flags & MAP_TBL_FL_MAP_RDONLY? + O_RDONLY : O_RDWR, &t->index_map.data, &t->index_map.size, NULL); + free(filename); + if (ret < 0) + return ret; + if (flags & MAP_TBL_FL_VERIFY_INDEX) { + ret = compare_table_descriptions(t); + if (ret < 0) + goto err; + } + ret = -E_BUSY; + if (!(flags & MAP_TBL_FL_IGNORE_DIRTY)) { + if (table_is_dirty(t)) { + ERROR_LOG("%s is dirty\n", t->desc->name); + goto err; + } + } + mark_table_dirty(t); + num_rows = table_num_rows(t); + if (!num_rows) + return num_rows; + /* map data files */ + FOR_EACH_MAPPED_COLUMN(i, t, cd) { + ret = map_column(t, i); + if (ret < 0) + goto err; + } + return num_rows; +err: /* unmap what is already mapped */ + for (i--; i >= 0; i--) { + struct osl_object map = t->columns[i].data_map; + para_munmap(map.data, map.size); + map.data = NULL; + } + para_munmap(t->index_map.data, t->index_map.size); + t->index_map.data = NULL; + return ret; +} + +/** + * Retrieve a mapped object by row and column number. + * + * \param t Pointer to an open osl table. + * \param col_num Number of the mapped column containing the object to retrieve. + * \param row_num Number of the row containing the object to retrieve. + * \param obj The result is returned here. + * + * It is considered an error if \a col_num does not refer to a column + * of storage type \p OSL_MAPPED_STORAGE. + * + * \return Positive on success, negative on errors. Possible errors include: + * \p E_BAD_ROW_NUM, \p E_INVALID_OBJECT. + * + * \sa osl_storage_type. + */ +int get_mapped_object(const struct osl_table *t, unsigned col_num, + uint32_t row_num, struct osl_object *obj) +{ + struct osl_column *col = &t->columns[col_num]; + uint32_t offset; + char *header; + char *cell_index; + int ret; + + if (t->num_rows <= row_num) + return -E_BAD_ROW_NUM; + ret = get_cell_index(t, row_num, col_num, &cell_index); + if (ret < 0) + return ret; + offset = read_u32(cell_index); + obj->size = read_u32(cell_index + 4) - 1; + header = col->data_map.data + offset; + obj->data = header + 1; + if (read_u8(header) == 0xff) { + ERROR_LOG("col %u, size %zu, offset %u\n", col_num, + obj->size, offset); + return -E_INVALID_OBJECT; + } + return 1; +} + +static int search_rbtree(const struct osl_object *obj, + const struct osl_table *t, unsigned col_num, + struct rb_node **result, struct rb_node ***rb_link) +{ + struct osl_column *col = &t->columns[col_num]; + struct rb_node **new = &col->rbtree.rb_node, *parent = NULL; + const struct osl_column_description *cd = + get_column_description(t->desc, col_num); + enum osl_storage_type st = cd->storage_type; + while (*new) { + struct osl_row *this_row = get_row_pointer(*new, + col->rbtree_num); + int ret; + struct osl_object this_obj; + parent = *new; + if (st == OSL_MAPPED_STORAGE) { + ret = get_mapped_object(t, col_num, this_row->num, + &this_obj); + if (ret < 0) + return ret; + } else + this_obj = this_row->volatile_objects[col->volatile_num]; + ret = cd->compare_function(obj, &this_obj); + if (!ret) { + if (result) + *result = get_rb_node_pointer(this_row, + col->rbtree_num); + return 1; + } + if (ret < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + if (result) + *result = parent; + if (rb_link) + *rb_link = new; + return -E_RB_KEY_NOT_FOUND; +} + +static int insert_rbtree(struct osl_table *t, unsigned col_num, + const struct osl_row *row, const struct osl_object *obj) +{ + struct rb_node *parent, **rb_link; + unsigned rbtree_num; + struct rb_node *n; + int ret = search_rbtree(obj, t, col_num, &parent, &rb_link); + + if (ret > 0) + return -E_RB_KEY_EXISTS; + rbtree_num = t->columns[col_num].rbtree_num; + n = get_rb_node_pointer(row, rbtree_num); + rb_link_node(n, parent, rb_link); + rb_insert_color(n, &t->columns[col_num].rbtree); + return 1; +} + +static void remove_rb_node(struct osl_table *t, unsigned col_num, + const struct osl_row *row) +{ + struct osl_column *col = &t->columns[col_num]; + const struct osl_column_description *cd = + get_column_description(t->desc, col_num); + enum osl_storage_flags sf = cd->storage_flags; + struct rb_node *victim, *splice_out_node, *tmp; + if (!(sf & OSL_RBTREE)) + return; + /* + * Which node is removed/spliced out actually depends on how many + * children the victim node has: If it has no children, it gets + * deleted. If it has one child, it gets spliced out. If it has two + * children, its successor (which has at most a right child) gets + * spliced out. + */ + victim = get_rb_node_pointer(row, col->rbtree_num); + if (victim->rb_left && victim->rb_right) + splice_out_node = rb_next(victim); + else + splice_out_node = victim; + /* Go up to the root and decrement the size of each node in the path. */ + for (tmp = splice_out_node; tmp; tmp = rb_parent(tmp)) + tmp->size--; + rb_erase(victim, &col->rbtree); +} + +static int add_row_to_rbtrees(struct osl_table *t, uint32_t row_num, + struct osl_object *volatile_objs, struct osl_row **row_ptr) +{ + unsigned i; + int ret; + struct osl_row *row = allocate_row(t->num_rbtrees); + const struct osl_column_description *cd; + + row->num = row_num; + row->volatile_objects = volatile_objs; + FOR_EACH_RBTREE_COLUMN(i, t, cd) { + if (cd->storage_type == OSL_MAPPED_STORAGE) { + struct osl_object obj; + ret = get_mapped_object(t, i, row_num, &obj); + if (ret < 0) + goto err; + ret = insert_rbtree(t, i, row, &obj); + } else { /* volatile */ + const struct osl_object *obj + = volatile_objs + t->columns[i].volatile_num; + ret = insert_rbtree(t, i, row, obj); + } + if (ret < 0) + goto err; + } + if (row_ptr) + *row_ptr = row; + return 1; +err: /* rollback changes, i.e. remove added entries from rbtrees */ + while (i) + remove_rb_node(t, i--, row); + free(row); + return ret; +} + +static void free_volatile_objects(const struct osl_table *t, + enum osl_close_flags flags) +{ + int i, j; + struct rb_node *n; + struct osl_column *rb_col; + const struct osl_column_description *cd; + + if (!t->num_volatile_columns) + return; + /* find the first rbtree column (any will do) */ + FOR_EACH_RBTREE_COLUMN(i, t, cd) + break; + rb_col = t->columns + i; + /* walk that rbtree and free all volatile objects */ + for (n = rb_first(&rb_col->rbtree); n; n = rb_next(n)) { + struct osl_row *r = get_row_pointer(n, rb_col->rbtree_num); + if (flags & OSL_FREE_VOLATILE) + FOR_EACH_VOLATILE_COLUMN(j, t, cd) { + if (cd->storage_flags & OSL_DONT_FREE) + continue; + free(r->volatile_objects[ + t->columns[j].volatile_num].data); + } +// for (j = 0; j < t->num_volatile_columns; j++) +// free(r->volatile_objects[j].data); + free(r->volatile_objects); + } +} + +/** + * Erase all rbtree nodes and free resources. + * + * \param t Pointer to an open osl table. + * + * This function is called by osl_close_table(). + */ +void clear_rbtrees(struct osl_table *t) +{ + const struct osl_column_description *cd; + unsigned i, rbtrees_cleared = 0; + + FOR_EACH_RBTREE_COLUMN(i, t, cd) { + struct osl_column *col = &t->columns[i]; + struct rb_node *n; + rbtrees_cleared++; + for (n = rb_first(&col->rbtree); n;) { + struct osl_row *r; + rb_erase(n, &col->rbtree); + if (rbtrees_cleared == t->num_rbtrees) { + r = get_row_pointer(n, col->rbtree_num); + n = rb_next(n); + free(r); + } else + n = rb_next(n); + } + } + +} + +/** + * Close an osl table. + * + * \param t Pointer to the table to be closed. + * \param flags Options for what should be cleaned up. + * + * If osl_open_table() succeeds, the resulting table pointer must later be + * passed to this function in order to flush all changes to the file system and + * to free the resources that were allocated by osl_open_table(). + * + * \return Positive on success, negative on errors. Possible errors: \p E_BAD_TABLE, + * errors returned by unmap_table(). + * + * \sa osl_open_table(), unmap_table(). + */ +int osl_close_table(struct osl_table *t, enum osl_close_flags flags) +{ + int ret; + + if (!t) + return -E_BAD_TABLE; + free_volatile_objects(t, flags); + clear_rbtrees(t); + ret = unmap_table(t, flags); + if (ret < 0) + ERROR_LOG("unmap_table failed: %d\n", ret); + free(t->columns); + free(t); + return ret; +} + +/** + * Find out whether the given row number corresponds to an invalid row. + * + * \param t Pointer to the osl table. + * \param row_num The number of the row in question. + * + * By definition, a row is considered invalid if all its index entries + * are invalid. + * + * \return Positive if \a row_num corresponds to an invalid row, + * zero if it corresponds to a valid row, negative on errors. + */ +int row_is_invalid(struct osl_table *t, uint32_t row_num) +{ + char *row_index; + int i, ret = get_row_index(t, row_num, &row_index); + + if (ret < 0) + return ret; + for (i = 0; i < t->row_index_size; i++) { + if ((unsigned char)row_index[i] != 0xff) + return 0; + } + INFO_LOG("row %d is invalid\n", row_num); + return 1; +} + +/** + * Invalidate a row of an osl table. + * + * \param t Pointer to an open osl table. + * \param row_num Number of the row to mark as invalid. + * + * This function marks each mapped object in the index entry of \a row as + * invalid. + * + * \return Positive on success, negative on errors. + */ +int mark_row_invalid(struct osl_table *t, uint32_t row_num) +{ + char *row_index; + int ret = get_row_index(t, row_num, &row_index); + + if (ret < 0) + return ret; + INFO_LOG("marking row %d as invalid\n", row_num); + memset(row_index, 0xff, t->row_index_size); + return 1; +} + +/** + * Initialize all rbtrees and compute number of invalid rows. + * + * \param t The table containing the rbtrees to be initialized. + * + * \return Positive on success, negative on errors. + */ +int init_rbtrees(struct osl_table *t) +{ + int i, ret; + const struct osl_column_description *cd; + + /* create rbtrees */ + FOR_EACH_RBTREE_COLUMN(i, t, cd) + t->columns[i].rbtree = RB_ROOT; + /* add valid rows to rbtrees */ + t->num_invalid_rows = 0; + for (i = 0; i < t->num_rows; i++) { + ret = row_is_invalid(t, i); + if (ret < 0) + return ret; + if (ret) { + t->num_invalid_rows++; + continue; + } + ret = add_row_to_rbtrees(t, i, NULL, NULL); + if (ret < 0) + return ret; + } + return 1; +} + +/** + * Open an osl table. + * + * Each osl table must be opened before its data can be accessed. + * + * \param table_desc Describes the table to be opened. + * \param result Contains a pointer to the open table on success. + * + * The table description given by \a desc should coincide with the + * description used at creation time. + * + * \return Standard. + */ +int osl_open_table(const struct osl_table_description *table_desc, + struct osl_table **result) +{ + int i, ret; + struct osl_table *t; + const struct osl_column_description *cd; + + INFO_LOG("opening table %s\n", table_desc->name); + ret = init_table_structure(table_desc, &t); + if (ret < 0) + return ret; + FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) { + /* check if directory exists */ + char *dirname = column_filename(t, i); + struct stat statbuf; + ret = stat(dirname, &statbuf); + free(dirname); + if (ret < 0) { + ret = -ERRNO_TO_ERROR(errno); + goto err; + } + ret = -ERRNO_TO_ERROR(ENOTDIR); + if (!S_ISDIR(statbuf.st_mode)) + goto err; + } + ret = map_table(t, MAP_TBL_FL_VERIFY_INDEX); + if (ret < 0) + goto err; + t->num_rows = ret; + DEBUG_LOG("num rows: %d\n", t->num_rows); + ret = init_rbtrees(t); + if (ret < 0) { + osl_close_table(t, OSL_MARK_CLEAN); /* ignore further errors */ + return ret; + } + *result = t; + return 1; +err: + free(t->columns); + free(t); + return ret; +} + +static int create_disk_storage_object_dir(const struct osl_table *t, + unsigned col_num, const char *ds_name) +{ + char *dirname; + int ret; + + if (!(t->desc->flags & OSL_LARGE_TABLE)) + return 1; + dirname = disk_storage_dirname(t, col_num, ds_name); + ret = para_mkdir(dirname, 0777); + free(dirname); + if (ret < 0 && !is_errno(-ret, EEXIST)) + return ret; + return 1; +} + +static int write_disk_storage_file(const struct osl_table *t, unsigned col_num, + const struct osl_object *obj, const char *ds_name) +{ + int ret; + char *filename; + + ret = create_disk_storage_object_dir(t, col_num, ds_name); + if (ret < 0) + return ret; + filename = disk_storage_path(t, col_num, ds_name); + ret = para_write_file(filename, obj->data, obj->size); + free(filename); + return ret; +} + +static int append_map_file(const struct osl_table *t, unsigned col_num, + const struct osl_object *obj, uint32_t *new_size) +{ + char *filename = column_filename(t, col_num); + int ret; + char header = 0; /* zero means valid object */ + +// DEBUG_LOG("appending %zu + 1 byte\n", obj->size); + ret = append_file(filename, &header, 1, obj->data, obj->size, + new_size); + free(filename); + return ret; +} + +static int append_row_index(const struct osl_table *t, char *row_index) +{ + char *filename; + int ret; + + if (!t->num_mapped_columns) + return 1; + filename = index_filename(t->desc); + ret = append_file(filename, NULL, 0, row_index, + t->row_index_size, NULL); + free(filename); + return ret; +} + +/** + * A wrapper for truncate(2) + * + * \param path Name of the regular file to truncate + * \param size Number of bytes to \b shave \b off + * + * Truncate the regular file named by \a path by \a size bytes. + * + * \return Positive on success, negative on errors. Possible errors include: \p + * E_STAT, \p E_BAD_SIZE, \p E_TRUNC. + * + * \sa truncate(2) + */ +int para_truncate(const char *path, off_t size) +{ + int ret; + struct stat statbuf; + + ret = -E_STAT; + if (stat(path, &statbuf) < 0) + goto out; + ret = -E_BAD_SIZE; + if (statbuf.st_size < size) + goto out; + ret = -E_TRUNC; + if (truncate(path, statbuf.st_size - size) < 0) + goto out; + ret = 1; +out: + return ret; +} + +static int truncate_mapped_file(const struct osl_table *t, unsigned col_num, + off_t size) +{ + char *filename = column_filename(t, col_num); + int ret = para_truncate(filename, size); + free(filename); + return ret; +} + +static int delete_disk_storage_file(const struct osl_table *t, unsigned col_num, + const char *ds_name) +{ + char *dirname, *filename = disk_storage_path(t, col_num, ds_name); + int ret = unlink(filename), err = errno; + + free(filename); + if (ret < 0) + return -ERRNO_TO_ERROR(err); + if (!(t->desc->flags & OSL_LARGE_TABLE)) + return 1; + dirname = disk_storage_dirname(t, col_num, ds_name); + rmdir(dirname); + free(dirname); + return 1; +} + +/** + * Add a new row to an osl table and retrieve this row. + * + * \param t Pointer to an open osl table. + * \param objects Array of objects to be added. + * \param row Result pointer. + * + * The \a objects parameter must point to an array containing one object per + * column. The order of the objects in the array is given by the table + * description of \a table. Several sanity checks are performed during object + * insertion and the function returns without modifying the table if any of + * these tests fail. In fact, it is atomic in the sense that it either + * succeeds or leaves the table unchanged (i.e. either all or none of the + * objects are added to the table). + * + * It is considered an error if an object is added to a column with associated + * rbtree if this object is equal to an object already contained in that column + * (i.e. the compare function for the column's rbtree returns zero). + * + * Possible errors include: \p E_RB_KEY_EXISTS, \p E_BAD_DATA_SIZE. + * + * \return Positive on success, negative on errors. + * + * \sa struct osl_table_description, osl_compare_func, osl_add_row(). + */ +int osl_add_and_get_row(struct osl_table *t, struct osl_object *objects, + struct osl_row **row) +{ + int i, ret; + char *ds_name = NULL; + struct rb_node **rb_parents = NULL, ***rb_links = NULL; + char *new_row_index = NULL; + struct osl_object *volatile_objs = NULL; + const struct osl_column_description *cd; + + if (!t) + return -E_BAD_TABLE; + rb_parents = para_malloc(t->num_rbtrees * sizeof(struct rn_node*)); + rb_links = para_malloc(t->num_rbtrees * sizeof(struct rn_node**)); + if (t->num_mapped_columns) + new_row_index = para_malloc(t->row_index_size); + /* pass 1: sanity checks */ +// DEBUG_LOG("sanity tests: %p:%p\n", objects[0].data, +// objects[1].data); + FOR_EACH_COLUMN(i, t->desc, cd) { + enum osl_storage_type st = cd->storage_type; + enum osl_storage_flags sf = cd->storage_flags; + +// ret = -E_NULL_OBJECT; +// if (!objects[i]) +// goto out; + if (st == OSL_DISK_STORAGE) + continue; + if (sf & OSL_RBTREE) { + unsigned rbtree_num = t->columns[i].rbtree_num; + ret = -E_RB_KEY_EXISTS; +// DEBUG_LOG("checking whether %p exists\n", +// objects[i].data); + if (search_rbtree(objects + i, t, i, + &rb_parents[rbtree_num], + &rb_links[rbtree_num]) > 0) + goto out; + } + if (sf & OSL_FIXED_SIZE) { +// DEBUG_LOG("fixed size. need: %zu, have: %d\n", +// objects[i].size, cd->data_size); + ret = -E_BAD_DATA_SIZE; + if (objects[i].size != cd->data_size) + goto out; + } + } + if (t->num_disk_storage_columns) + ds_name = disk_storage_name_of_object(t, + &objects[t->disk_storage_name_column]); + ret = unmap_table(t, OSL_MARK_CLEAN); + if (ret < 0) + goto out; +// DEBUG_LOG("sanity tests passed%s\n", ""); + /* pass 2: create data files, append map data */ + FOR_EACH_COLUMN(i, t->desc, cd) { + enum osl_storage_type st = cd->storage_type; + if (st == OSL_NO_STORAGE) + continue; + if (st == OSL_MAPPED_STORAGE) { + uint32_t new_size; + struct osl_column *col = &t->columns[i]; +// DEBUG_LOG("appending object of size %zu\n", +// objects[i].size); + ret = append_map_file(t, i, objects + i, &new_size); + if (ret < 0) + goto rollback; + update_cell_index(new_row_index, col, new_size, + objects[i].size); + continue; + } + /* DISK_STORAGE */ + ret = write_disk_storage_file(t, i, objects + i, ds_name); + if (ret < 0) + goto rollback; + } + ret = append_row_index(t, new_row_index); + if (ret < 0) + goto rollback; + ret = map_table(t, MAP_TBL_FL_VERIFY_INDEX); + if (ret < 0) { /* truncate index and rollback changes */ + char *filename = index_filename(t->desc); + para_truncate(filename, t->row_index_size); + free(filename); + goto rollback; + } + /* pass 3: add entry to rbtrees */ + if (t->num_volatile_columns) { + volatile_objs = para_calloc(t->num_volatile_columns + * sizeof(struct osl_object)); + FOR_EACH_VOLATILE_COLUMN(i, t, cd) + volatile_objs[t->columns[i].volatile_num] = objects[i]; + } + t->num_rows++; +// DEBUG_LOG("adding new entry as row #%d\n", t->num_rows - 1); + ret = add_row_to_rbtrees(t, t->num_rows - 1, volatile_objs, row); + if (ret < 0) + goto out; +// DEBUG_LOG("added new entry as row #%d\n", t->num_rows - 1); + ret = 1; + goto out; +rollback: /* rollback all changes made, ignore further errors */ + for (i--; i >= 0; i--) { + cd = get_column_description(t->desc, i); + enum osl_storage_type st = cd->storage_type; + if (st == OSL_NO_STORAGE) + continue; + + if (st == OSL_MAPPED_STORAGE) + truncate_mapped_file(t, i, objects[i].size); + else /* disk storage */ + delete_disk_storage_file(t, i, ds_name); + } + /* ignore error and return previous error value */ + map_table(t, MAP_TBL_FL_VERIFY_INDEX); +out: + free(new_row_index); + free(ds_name); + free(rb_parents); + free(rb_links); + return ret; +} + +/** + * Add a new row to an osl table. + * + * \param t Same meaning as osl_add_and_get_row(). + * \param objects Same meaning as osl_add_and_get_row(). + * + * \return The return value of the underlying call to osl_add_and_get_row(). + * + * This is equivalent to osl_add_and_get_row(t, objects, NULL). + */ +int osl_add_row(struct osl_table *t, struct osl_object *objects) +{ + return osl_add_and_get_row(t, objects, NULL); +} + +/** + * Retrieve an object identified by row and column + * + * \param t Pointer to an open osl table. + * \param r Pointer to the row. + * \param col_num The column number. + * \param object The result pointer. + * + * The column determined by \a col_num must be of type \p OSL_MAPPED_STORAGE + * or \p OSL_NO_STORAGE, i.e. no disk storage objects may be retrieved by this + * function. + * + * \return Positive if object was found, negative on errors. Possible errors + * include: \p E_BAD_TABLE, \p E_BAD_STORAGE_TYPE. + * + * \sa osl_storage_type, osl_open_disk_object(). + */ +int osl_get_object(const struct osl_table *t, const struct osl_row *r, + unsigned col_num, struct osl_object *object) +{ + const struct osl_column_description *cd; + + if (!t) + return -E_BAD_TABLE; + cd = get_column_description(t->desc, col_num); + /* col must not be disk storage */ + if (cd->storage_type == OSL_DISK_STORAGE) + return -E_BAD_STORAGE_TYPE; + if (cd->storage_type == OSL_MAPPED_STORAGE) + return get_mapped_object(t, col_num, r->num, object); + /* volatile */ + *object = r->volatile_objects[t->columns[col_num].volatile_num]; + return 1; +} + +static int mark_mapped_object_invalid(const struct osl_table *t, + uint32_t row_num, unsigned col_num) +{ + struct osl_object obj; + char *p; + int ret = get_mapped_object(t, col_num, row_num, &obj); + + if (ret < 0) + return ret; + p = obj.data; + p--; + *p = 0xff; + return 1; +} + +/** + * Delete a row from an osl table. + * + * \param t Pointer to an open osl table. + * \param row Pointer to the row to delete. + * + * This removes all disk storage objects, removes all rbtree nodes, and frees + * all volatile objects belonging to the given row. For mapped columns, the + * data is merely marked invalid and may be pruned from time to time by + * para_fsck. + * + * \return Positive on success, negative on errors. Possible errors include: + * \p E_BAD_TABLE, errors returned by osl_get_object(). + */ +int osl_del_row(struct osl_table *t, struct osl_row *row) +{ + struct osl_row *r = row; + int i, ret; + const struct osl_column_description *cd; + + if (!t) + return -E_BAD_TABLE; + INFO_LOG("deleting row %p\n", row); + + if (t->num_disk_storage_columns) { + char *ds_name; + ret = disk_storage_name_of_row(t, r, &ds_name); + if (ret < 0) + goto out; + FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) + delete_disk_storage_file(t, i, ds_name); + free(ds_name); + } + FOR_EACH_COLUMN(i, t->desc, cd) { + struct osl_column *col = t->columns + i; + enum osl_storage_type st = cd->storage_type; + remove_rb_node(t, i, r); + if (st == OSL_MAPPED_STORAGE) { + mark_mapped_object_invalid(t, r->num, i); + continue; + } + if (st == OSL_NO_STORAGE && !(cd->storage_flags & OSL_DONT_FREE)) + free(r->volatile_objects[col->volatile_num].data); + } + if (t->num_mapped_columns) { + ret = mark_row_invalid(t, r->num); + if (ret < 0) + goto out; + t->num_invalid_rows++; + } else + t->num_rows--; + ret = 1; +out: + free(r->volatile_objects); + free(r); + return ret; +} + +/* test if column has an rbtree */ +static int check_rbtree_col(const struct osl_table *t, unsigned col_num, + struct osl_column **col) +{ + if (!t) + return -E_BAD_TABLE; + if (!(get_column_description(t->desc, col_num)->storage_flags & OSL_RBTREE)) + return -E_BAD_STORAGE_FLAGS; + *col = t->columns + col_num; + return 1; +} + +/** + * Get the row that contains the given object. + * + * \param t Pointer to an open osl table. + * \param col_num The number of the column to be searched. + * \param obj The object to be looked up. + * \param result Points to the row containing \a obj. + * + * Lookup \a obj in \a t and return the row containing \a obj. The column + * specified by \a col_num must have an associated rbtree. + * + * \return Positive on success, negative on errors. If an error occurred, \a + * result is set to \p NULL. Possible errors include: \p E_BAD_TABLE, \p + * E_BAD_STORAGE_FLAGS, errors returned by get_mapped_object(), \p + * E_RB_KEY_NOT_FOUND. + * + * \sa osl_storage_flags + */ +int osl_get_row(const struct osl_table *t, unsigned col_num, + const struct osl_object *obj, struct osl_row **result) +{ + int ret; + struct rb_node *node; + struct osl_row *row; + struct osl_column *col; + + *result = NULL; + ret = check_rbtree_col(t, col_num, &col); + if (ret < 0) + return ret; + ret = search_rbtree(obj, t, col_num, &node, NULL); + if (ret < 0) + return ret; + row = get_row_pointer(node, t->columns[col_num].rbtree_num); + *result = row; + return 1; +} + +static int rbtree_loop(struct osl_column *col, void *private_data, + osl_rbtree_loop_func *func) +{ + struct rb_node *n, *tmp; + + /* this for-loop is safe against removal of an entry */ + for (n = rb_first(&col->rbtree), tmp = n? rb_next(n) : NULL; + n; + n = tmp, tmp = tmp? rb_next(tmp) : NULL) { + struct osl_row *r = get_row_pointer(n, col->rbtree_num); + int ret = func(r, private_data); + if (ret < 0) + return ret; + } + return 1; +} + +static int rbtree_loop_reverse(struct osl_column *col, void *private_data, + osl_rbtree_loop_func *func) +{ + struct rb_node *n, *tmp; + + /* safe against removal of an entry */ + for (n = rb_last(&col->rbtree), tmp = n? rb_prev(n) : NULL; + n; + n = tmp, tmp = tmp? rb_prev(tmp) : NULL) { + struct osl_row *r = get_row_pointer(n, col->rbtree_num); + int ret = func(r, private_data); + if (ret < 0) + return ret; + } + return 1; +} + +/** + * Loop over all nodes in an rbtree. + * + * \param t Pointer to an open osl table. + * \param col_num The column to use for iterating over the elements. + * \param private_data Pointer that gets passed to \a func. + * \param func The function to be called for each node in the rbtree. + * + * This function does an in-order walk of the rbtree associated with \a + * col_num. It is an error if the \p OSL_RBTREE flag is not set for this + * column. For each node in the rbtree, the given function \a func is called + * with two pointers as arguments: The first osl_row* argument points to the + * row that contains the object corresponding to the rbtree node currently + * traversed, and the \a private_data pointer is passed verbatim to \a func as the + * second argument. The loop terminates either if \a func returns a negative + * value, or if all nodes of the tree have been visited. + * + * + * \return Positive on success, negative on errors. If the termination of the + * loop was caused by \a func returning a negative value, this value is + * returned. + * + * \sa osl_storage_flags, osl_rbtree_loop_reverse(), osl_compare_func. + */ +int osl_rbtree_loop(const struct osl_table *t, unsigned col_num, + void *private_data, osl_rbtree_loop_func *func) +{ + struct osl_column *col; + + int ret = check_rbtree_col(t, col_num, &col); + if (ret < 0) + return ret; + return rbtree_loop(col, private_data, func); +} + +/** + * Loop over all nodes in an rbtree in reverse order. + * + * \param t Identical meaning as in \p osl_rbtree_loop(). + * \param col_num Identical meaning as in \p osl_rbtree_loop(). + * \param private_data Identical meaning as in \p osl_rbtree_loop(). + * \param func Identical meaning as in \p osl_rbtree_loop(). + * + * This function is identical to \p osl_rbtree_loop(), the only difference + * is that the tree is walked in reverse order. + * + * \return The same return value as \p osl_rbtree_loop(). + * + * \sa osl_rbtree_loop(). + */ +int osl_rbtree_loop_reverse(const struct osl_table *t, unsigned col_num, + void *private_data, osl_rbtree_loop_func *func) +{ + struct osl_column *col; + + int ret = check_rbtree_col(t, col_num, &col); + if (ret < 0) + return ret; + return rbtree_loop_reverse(col, private_data, func); +} + +/* TODO: Rollback changes on errors */ +static int rename_disk_storage_objects(struct osl_table *t, + struct osl_object *old_obj, struct osl_object *new_obj) +{ + int i, ret; + const struct osl_column_description *cd; + char *old_ds_name, *new_ds_name; + + if (!t->num_disk_storage_columns) + return 1; /* nothing to do */ + if (old_obj->size == new_obj->size && !memcmp(new_obj->data, + old_obj->data, new_obj->size)) + return 1; /* object did not change */ + old_ds_name = disk_storage_name_of_object(t, old_obj); + new_ds_name = disk_storage_name_of_object(t, new_obj); + FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) { + char *old_filename, *new_filename; + ret = create_disk_storage_object_dir(t, i, new_ds_name); + if (ret < 0) + goto out; + old_filename = disk_storage_path(t, i, old_ds_name); + new_filename = disk_storage_path(t, i, new_ds_name); + ret = para_rename(old_filename, new_filename); + free(old_filename); + free(new_filename); + if (ret < 0) + goto out; + } + ret = 1; +out: + free(old_ds_name); + free(new_ds_name); + return ret; + +} + +/** + * Change an object in an osl table. + * + * \param t Pointer to an open osl table. + * \param r Pointer to the row containing the object to be updated. + * \param col_num Number of the column containing the object to be updated. + * \param obj Pointer to the replacement object. + * + * This function gets rid of all references to the old object. This includes + * removal of the rbtree node in case there is an rbtree associated with \a + * col_num. It then inserts \a obj into the table and the rbtree if necessary. + * + * If the \p OSL_RBTREE flag is set for \a col_num, you \b MUST call this + * function in order to change the contents of an object, even for volatile or + * mapped columns of constant size (which may be updated directly if \p + * OSL_RBTREE is not set). Otherwise the rbtree might become corrupted. + * + * \return Standard + */ +int osl_update_object(struct osl_table *t, const struct osl_row *r, + unsigned col_num, struct osl_object *obj) +{ + struct osl_column *col; + const struct osl_column_description *cd; + int ret; + + if (!t) + return -E_BAD_TABLE; + col = &t->columns[col_num]; + cd = get_column_description(t->desc, col_num); + DEBUG_LOG("updating column %u of %s\n", col_num, t->desc->name); + if (cd->storage_flags & OSL_RBTREE) { + if (search_rbtree(obj, t, col_num, NULL, NULL) > 0) + return -E_RB_KEY_EXISTS; + } + if (cd->storage_flags & OSL_FIXED_SIZE) { + if (obj->size != cd->data_size) + return -E_BAD_DATA_SIZE; + } + remove_rb_node(t, col_num, r); + if (cd->storage_type == OSL_NO_STORAGE) { /* TODO: If fixed size, reuse object? */ + free(r->volatile_objects[col->volatile_num].data); + r->volatile_objects[col->volatile_num] = *obj; + } else if (cd->storage_type == OSL_DISK_STORAGE) { + char *ds_name; + ret = disk_storage_name_of_row(t, r, &ds_name); + if (ret < 0) + return ret; + ret = delete_disk_storage_file(t, col_num, ds_name); + if (ret < 0 && !is_errno(-ret, ENOENT)) { + free(ds_name); + return ret; + } + ret = write_disk_storage_file(t, col_num, obj, ds_name); + free(ds_name); + if (ret < 0) + return ret; + } else { /* mapped storage */ + struct osl_object old_obj; + ret = get_mapped_object(t, col_num, r->num, &old_obj); + if (ret < 0) + return ret; + /* + * If the updated column is the disk storage name column, the + * disk storage name changes, so we have to rename all disk + * storage objects accordingly. + */ + if (col_num == t->disk_storage_name_column) { + ret = rename_disk_storage_objects(t, &old_obj, obj); + if (ret < 0) + return ret; + } + if (cd->storage_flags & OSL_FIXED_SIZE) + memcpy(old_obj.data, obj->data, cd->data_size); + else { /* TODO: if the size doesn't change, use old space */ + uint32_t new_data_map_size; + char *row_index; + ret = get_row_index(t, r->num, &row_index); + if (ret < 0) + return ret; + ret = mark_mapped_object_invalid(t, r->num, col_num); + if (ret < 0) + return ret; + unmap_column(t, col_num); + ret = append_map_file(t, col_num, obj, + &new_data_map_size); + if (ret < 0) + return ret; + ret = map_column(t, col_num); + if (ret < 0) + return ret; + update_cell_index(row_index, col, new_data_map_size, + obj->size); + } + } + if (cd->storage_flags & OSL_RBTREE) { + ret = insert_rbtree(t, col_num, r, obj); + if (ret < 0) + return ret; + } + return 1; +} + +/** + * Retrieve an object of type \p OSL_DISK_STORAGE by row and column. + * + * \param t Pointer to an open osl table. + * \param r Pointer to the row containing the object. + * \param col_num The column number. + * \param obj Points to the result upon successful return. + * + * For columns of type \p OSL_DISK_STORAGE, this function must be used to + * retrieve one of its containing objects. Afterwards, osl_close_disk_object() + * must be called in order to deallocate the resources. + * + * \return Positive on success, negative on errors. Possible errors include: + * \p E_BAD_TABLE, \p E_BAD_STORAGE_TYPE, errors returned by osl_get_object(). + * + * \sa osl_get_object(), osl_storage_type, osl_close_disk_object(). + */ +int osl_open_disk_object(const struct osl_table *t, const struct osl_row *r, + unsigned col_num, struct osl_object *obj) +{ + const struct osl_column_description *cd; + char *ds_name, *filename; + int ret; + + if (!t) + return -E_BAD_TABLE; + cd = get_column_description(t->desc, col_num); + if (cd->storage_type != OSL_DISK_STORAGE) + return -E_BAD_STORAGE_TYPE; + + ret = disk_storage_name_of_row(t, r, &ds_name); + if (ret < 0) + return ret; + filename = disk_storage_path(t, col_num, ds_name); + free(ds_name); + DEBUG_LOG("filename: %s\n", filename); + ret = mmap_full_file(filename, O_RDONLY, &obj->data, &obj->size, NULL); + free(filename); + return ret; +} + +/** + * Free resources that were allocated during osl_open_disk_object(). + * + * \param obj Pointer to the object previously returned by open_disk_object(). + * + * \return The return value of the underlying call to para_munmap(). + * + * \sa para_munmap(). + */ +int osl_close_disk_object(struct osl_object *obj) +{ + return para_munmap(obj->data, obj->size); +} + +/** + * Get the number of rows of the given table. + * + * \param t Pointer to an open osl table. + * \param num_rows Result is returned here. + * + * The number of rows returned via \a num_rows excluding any invalid rows. + * + * \return Positive on success, \p -E_BAD_TABLE if \a t is \p NULL. + */ +int osl_get_num_rows(const struct osl_table *t, unsigned *num_rows) +{ + if (!t) + return -E_BAD_TABLE; + assert(t->num_rows >= t->num_invalid_rows); + *num_rows = t->num_rows - t->num_invalid_rows; + return 1; +} + +/** + * Get the rank of a row. + * + * \param t An open osl table. + * \param r The row to get the rank of. + * \param col_num The number of an rbtree column. + * \param rank Result pointer. + * + * The rank is, by definition, the position of the row in the linear order + * determined by an in-order tree walk of the rbtree associated with column + * number \a col_num of \a table. + * + * \return Positive on success, negative on errors. + * + * \sa osl_get_nth_row(). + */ +int osl_get_rank(const struct osl_table *t, struct osl_row *r, + unsigned col_num, unsigned *rank) +{ + struct osl_object obj; + struct osl_column *col; + struct rb_node *node; + int ret = check_rbtree_col(t, col_num, &col); + + if (ret < 0) + return ret; + ret = osl_get_object(t, r, col_num, &obj); + if (ret < 0) + return ret; + ret = search_rbtree(&obj, t, col_num, &node, NULL); + if (ret < 0) + return ret; + ret = rb_rank(node, rank); + if (ret < 0) + return -E_BAD_ROW; + return 1; +} + +/** + * Get the row with n-th greatest value. + * + * \param t Pointer to an open osl table. + * \param col_num The column number. + * \param n The rank of the desired row. + * \param result Row is returned here. + * + * Retrieve the n-th order statistic with respect to the compare function + * of the rbtree column \a col_num. In other words, get that row with + * \a n th greatest value in column \a col_num. It's an error if + * \a col_num is not a rbtree column, or if \a n is larger than the + * number of rows in the table. + * + * \return Positive on success, negative on errors. Possible errors: + * \p E_BAD_TABLE, \p E_BAD_STORAGE_FLAGS, \p E_RB_KEY_NOT_FOUND. + * + * \sa osl_storage_flags, osl_compare_func, osl_get_row(), + * osl_rbtree_last_row(), osl_rbtree_first_row(), osl_get_rank(). + */ +int osl_get_nth_row(const struct osl_table *t, unsigned col_num, + unsigned n, struct osl_row **result) +{ + struct osl_column *col; + struct rb_node *node; + unsigned num_rows; + int ret; + + if (n == 0) + return -E_RB_KEY_NOT_FOUND; + ret = osl_get_num_rows(t, &num_rows); + if (ret < 0) + return ret; + if (n > num_rows) + return -E_RB_KEY_NOT_FOUND; + ret = check_rbtree_col(t, col_num, &col); + if (ret < 0) + return ret; + node = rb_nth(col->rbtree.rb_node, n); + if (!node) + return -E_RB_KEY_NOT_FOUND; + *result = get_row_pointer(node, col->rbtree_num); + return 1; +} + +/** + * Get the row corresponding to the smallest rbtree node of a column. + * + * \param t An open rbtree table. + * \param col_num The number of the rbtree column. + * \param result A pointer to the first row is returned here. + * + * The rbtree node of the smallest object (with respect to the corresponding + * compare function) is selected and the row containing this object is + * returned. It is an error if \a col_num refers to a column without an + * associated rbtree. + * + * \return Positive on success, negative on errors. + * + * \sa osl_get_nth_row(), osl_rbtree_last_row(). + */ +int osl_rbtree_first_row(const struct osl_table *t, unsigned col_num, + struct osl_row **result) +{ + return osl_get_nth_row(t, col_num, 1, result); +} + +/** + * Get the row corresponding to the greatest rbtree node of a column. + * + * \param t The same meaning as in \p osl_rbtree_first_row(). + * \param col_num The same meaning as in \p osl_rbtree_first_row(). + * \param result The same meaning as in \p osl_rbtree_first_row(). + * + * This function works just like osl_rbtree_first_row(), the only difference + * is that the row containing the greatest rather than the smallest object is + * returned. + * + * \return Positive on success, negative on errors. + * + * \sa osl_get_nth_row(), osl_rbtree_first_row(). + */ +int osl_rbtree_last_row(const struct osl_table *t, unsigned col_num, + struct osl_row **result) +{ + unsigned num_rows; + int ret = osl_get_num_rows(t, &num_rows); + + if (ret < 0) + return ret; + return osl_get_nth_row(t, col_num, num_rows, result); +} diff --git a/osl.h b/osl.h new file mode 100644 index 0000000..bbfac46 --- /dev/null +++ b/osl.h @@ -0,0 +1,187 @@ +#include +/* + * Copyright (C) 2007-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file osl.h User interface for the object storage layer. */ + +/** describes an object of the object storage layer (osl) */ +struct osl_object { + /** Pointer to the data of the object. */ + void *data; + /** The object's size. */ + size_t size; +}; + +/** Flags that change the internal handling of osl tables. */ +enum osl_table_flags { + /** This table will have many rows. */ + OSL_LARGE_TABLE = 1 +}; + +/** The three different types of storage for an osl column */ +enum osl_storage_type { + /** + * All data for this column is stored in one file which gets mmapped by + * osl_open_table(). This is suitable for columns that do not hold much + * data. + */ + OSL_MAPPED_STORAGE, + /** + * Each entry is stored on disk and is loaded on demand by + * open_disk_object(). This is the preferable storage type for large + * objects that need not be in memory all the time. + */ + OSL_DISK_STORAGE, + /** + * Objects for columns of this type are volatile: They are only stored + * in memory and are discarded once the table gets closed. + */ + OSL_NO_STORAGE +}; + +/** + * Additional per-column flags + */ +enum osl_storage_flags { + /** + * Build an rbtree for this column. This is only possible if the + * storage type of the column is either \a OSL_MAPPED_STORAGE or \a + * OSL_NO_STORAGE. In order to lookup objects in the table by using \a + * osl_get_row(), the lookup column must have an associated rbtree. + * + * \sa osl_storage_type, osl_get_row() + */ + OSL_RBTREE = 1, + /** The data for this column will have constant size. */ + OSL_FIXED_SIZE = 2, + /** All values of this column will be different. */ + OSL_UNIQUE = 4, + /** Do not free the data for this column (\p OSL_NO_STORAGE). */ + OSL_DONT_FREE = 8 +}; + +struct osl_table; +struct osl_row; + +/** + * In order to build up an rbtree a compare function for the objects must be + * specified. Such a function always takes pointers to the two objects to be + * compared. It must return -1, zero, or 1, if the first argument is considered + * to be respectively less than, equal to, or greater than the second. If two + * members compare as equal, their order in the sorted array is undefined. + */ +typedef int osl_compare_func(const struct osl_object *obj1, + const struct osl_object *obj2); +typedef int osl_rbtree_loop_func(struct osl_row *row, void *data); + +osl_compare_func osl_hash_compare, uint32_compare; + +/** + * Describes one column of a osl table. + */ +struct osl_column_description { + /** One of zje tree possible types of storage */ + enum osl_storage_type storage_type; + /** Specifies further properties of the column */ + enum osl_storage_flags storage_flags; + /** + * The column name determines the name of the directory where all data + * for this column will be stored. Its hash is stored in the table + * header. This field is ignored if the storage type is \a NO_STORAGE + */ + char *name; + /** + * For columns with an associated rbtree, this must point to a function + * that compares the values of two objects, either a built-in function + * or a function defined by the application may be supplied. This + * field is ignored if the column does not have an associated rbtree. + * + * \sa osl_storage_flags, osl_compare_func + */ + osl_compare_func *compare_function; + /** + * If the \a OSL_FIXED_SIZE flag is set for this column, this value + * determines the fixed size of all objects of this column. It is + * ignored, if \a OSL_FIXED_SIZE is not set. + */ + uint32_t data_size; +}; + +/** + * Describes one osl table. + */ +struct osl_table_description { + /** The directory which contains all files of this table. */ + const char *dir; + /** + * The table name. A subdirectory of \a dir called \a name is created + * at table creation time. It must be a valid name for a subdirectory. + * In particular, no slashes are allowed for \a name. + */ + const char *name; + /** The number of columns of this table. */ + uint16_t num_columns; + /** Further table-wide information. */ + enum osl_table_flags flags; + /** The array describing the individual columns of the table. */ + struct osl_column_description *column_descriptions; +}; + +/** Flags to be passed to \a osl_close_table(). */ +enum osl_close_flags { + /** + * The table header contains a "dirty" flag which specifies whether + * the table is currently open by another process. This flag specifies + * that the dirty flag should be cleared before closing the table. + */ + OSL_MARK_CLEAN = 1, + /** + * If the table contains columns of type \a OSL_NO_STORAGE and this + * flag is passed to osl_close_table(), free(3) is called for each + * object of each column of type \a OSL_NO_STORAGE. + */ + OSL_FREE_VOLATILE = 2 +}; + + + +int osl_create_table(const struct osl_table_description *desc); +int osl_open_table(const struct osl_table_description *desc, + struct osl_table **result); +int osl_close_table(struct osl_table *t, enum osl_close_flags flags); +int osl_get_row(const struct osl_table *t, unsigned col_num, + const struct osl_object *obj, struct osl_row **result); +int osl_get_object(const struct osl_table *t, const struct osl_row *row, + unsigned col_num, struct osl_object *object); +int osl_open_disk_object(const struct osl_table *t, + const struct osl_row *r, unsigned col_num, struct osl_object *obj); +int osl_close_disk_object(struct osl_object *obj); +int osl_add_and_get_row(struct osl_table *t, struct osl_object *objects, + struct osl_row **row); +int osl_add_row(struct osl_table *t, struct osl_object *objects); +int osl_del_row(struct osl_table *t, struct osl_row *row); +int osl_rbtree_loop(const struct osl_table *t, unsigned col_num, + void *private_data, osl_rbtree_loop_func *func); +int osl_rbtree_loop_reverse(const struct osl_table *t, unsigned col_num, + void *private_data, osl_rbtree_loop_func *func); +int osl_update_object(struct osl_table *t, const struct osl_row *r, + unsigned col_num, struct osl_object *obj); +int osl_get_num_rows(const struct osl_table *t, unsigned *num_rows); +int osl_rbtree_first_row(const struct osl_table *t, unsigned col_num, + struct osl_row **result); +int osl_rbtree_last_row(const struct osl_table *t, unsigned col_num, + struct osl_row **result); +int osl_get_nth_row(const struct osl_table *t, unsigned col_num, + unsigned n, struct osl_row **result); +int osl_get_rank(const struct osl_table *t, struct osl_row *r, + unsigned col_num, unsigned *rank); + +int for_each_file_in_dir(const char *dirname, + int (*func)(const char *, void *), void *private_data); +//ssize_t para_write_all(int fd, const void *buf, size_t size); +int para_lseek(int fd, off_t *offset, int whence); +int para_write_file(const char *filename, const void *buf, size_t size); + diff --git a/osl_core.h b/osl_core.h new file mode 100644 index 0000000..87a1188 --- /dev/null +++ b/osl_core.h @@ -0,0 +1,592 @@ +/* + * Copyright (C) 2007-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file osl_core.h Object storage layer details, not visible to users. */ + +#include "rbtree.h" +#include "osl.h" +#include "string.h" +#include "hash.h" + +static __must_check __printf_1_2 __malloc char *make_message(const char *fmt, ...); + +/** Internal representation of a column of an osl table. */ +struct osl_column { + /** The memory mapping of this comumn (only used for mapped columns). */ + struct osl_object data_map; + /** The root of the rbtree (only used for columns with rbtrees). */ + struct rb_root rbtree; + /** The index in the array of rb nodes (only used for columns with rbtrees). */ + unsigned rbtree_num; + /** Index for volatile_objects of struct osl_row. */ + unsigned volatile_num; + /** + * Starting point of the data for this column within the index + * (only used for mapped columns). + */ + uint16_t index_offset; + /** + * The hash value of the name of this column. + * + * This is only used for mapped and disk storage columns). + */ + HASH_TYPE name_hash[HASH_SIZE]; +}; + +/** Internal representation of an osl table */ +struct osl_table { + /** Pointer to the table description */ + const struct osl_table_description *desc; + /** The size of the index header of this table. */ + uint16_t index_header_size; + /** Contains the mapping of the table's index file */ + struct osl_object index_map; + /** Total number of rows, including invalid rows. */ + unsigned num_rows; + /** Keeps track of the number of invalid rows in the table. */ + uint32_t num_invalid_rows; + /** Number of columns of type \p OSL_MAPPED_STORAGE. */ + unsigned num_mapped_columns; + /** Number of columns of type \p OSL_NO_STORAGE. */ + unsigned num_volatile_columns; + /** Number of columns of type \p OSL_DISK_STORAGE. */ + unsigned num_disk_storage_columns; + /** Number of columns with associated rbtree. */ + unsigned num_rbtrees; + /** + * The number of the column that determines the name of the disk + * storage objects. + */ + unsigned disk_storage_name_column; + /** The number of bytes of an index entry of a row. */ + unsigned row_index_size; + /** Pointer to the internal representation of the columns. */ + struct osl_column *columns; +}; + +/** Internal representation of a row of an osl table */ +struct osl_row { + /** + * The row number. + * + * It is only used if there is at least one mapped column. + */ + off_t num; + /** Array of size \a num_volatile_columns. */ + struct osl_object *volatile_objects; +}; + +int read_table_desc(struct osl_object *map, struct osl_table_description *desc); +int init_table_structure(const struct osl_table_description *desc, + struct osl_table **table_ptr); +int row_is_invalid(struct osl_table *t, uint32_t row_num); +int get_mapped_object(const struct osl_table *t, unsigned col_num, + uint32_t row_num, struct osl_object *obj); +int para_truncate(const char *filename, off_t size); +int unmap_table(struct osl_table *t, enum osl_close_flags flags); +int init_rbtrees(struct osl_table *t); + +/** + * Flags to be specified for map_table(). + * + * \sa map_table(). + */ +enum map_table_flags { + /** + * Check whether the entries in the table index match the entries in + * the table description. + */ + MAP_TBL_FL_VERIFY_INDEX = 1, + /** Do not complain even if the dirty flag is set. */ + MAP_TBL_FL_IGNORE_DIRTY = 2, + /** Use read-only mappings. */ + MAP_TBL_FL_MAP_RDONLY = 4 +}; + +int map_table(struct osl_table *t, enum map_table_flags flags); +void clear_rbtrees(struct osl_table *t); +int mark_row_invalid(struct osl_table *t, uint32_t row_num); + + +/** + * Get the description of a column by column number + * + * \param d Pointer to the table description. + * \param col_num The number of the column to get the description for. + * + * \return The table description. + * + * \sa struct osl_column_description. + */ +_static_inline_ struct osl_column_description *get_column_description( + const struct osl_table_description *d, unsigned col_num) +{ + return &d->column_descriptions[col_num]; +} + +/** + * Format of the header of the index file of an osl table. + * + * Bytes 16-31 are currently unused. + * + * \sa enum index_column_desc_offsets, HASH_SIZE, osl_table_flags. + */ +enum index_header_offsets { + /** Bytes 0-8: PARASLASH. */ + IDX_PARA_MAGIC = 0, + /** Byte 9: Dirty flag (nonzero if table is mapped). */ + IDX_DIRTY_FLAG = 9, + /** Byte 10: osl table version number. */ + IDX_VERSION = 10, + /** Byte 11: Table flags.*/ + IDX_TABLE_FLAGS = 11, + /** Byte 12,13: Number of columns. */ + IDX_NUM_COLUMNS, + /** Byte 14,15 Size of the index header. */ + IDX_HEADER_SIZE = 14, + /** Column descriptions start here. */ + IDX_COLUMN_DESCRIPTIONS = 32 +}; + +/** + * Format of the column description in the index header. + * + * \sa index_header_offsets. + */ +enum index_column_desc_offsets { + /** Bytes 0,1: Storage_type. */ + IDX_CD_STORAGE_TYPE = 0, + /** Bytes 2,3: Storage_flags. */ + IDX_CD_STORAGE_FLAGS = 2, + /** Bytes 4 - 7: Data_size (only used for fixed size columns). */ + IDX_CD_DATA_SIZE = 4, + /** Bytes 8 - ... Name of the column. */ + IDX_CD_NAME = 8, +}; + +/** Magic string contained in the header of the index file of each osl table. */ +#define PARA_MAGIC "PARASLASH" + +/** + * The minimal number of bytes for a column in the index header. + * + * The column name starts at byte IDX_CD_NAME and must at least contain one + * character, plus the terminating NULL byte. + */ +#define MIN_IDX_COLUMN_DESCRIPTION_SIZE (IDX_CD_NAME + 2) + +/** + * Get the number of bytes used for a column in the index header. + * + * \param name The name of the column. + * + * The amount of space used for a column in the index header of a table depends + * on the (length of the) name of the column. + * + * \return The total number of bytes needed to store one column of this name. + */ +_static_inline_ size_t index_column_description_size(const char *name) +{ + return MIN_IDX_COLUMN_DESCRIPTION_SIZE + strlen(name) - 1; +} + +#define CURRENT_TABLE_VERSION 1 +#define MIN_TABLE_VERSION 1 +#define MAX_TABLE_VERSION 1 +/** An index header must be at least that many bytes long. */ +#define MIN_INDEX_HEADER_SIZE(num_cols) (MIN_IDX_COLUMN_DESCRIPTION_SIZE \ + * num_cols + IDX_COLUMN_DESCRIPTIONS) + +/** + * Get the number of rows from the size of the memory mapping. + * + * \param t Pointer to an open table. + * + * \return The number of rows, including invalid rows. + */ +_static_inline_ unsigned table_num_rows(const struct osl_table *t) +{ + return (t->index_map.size - t->index_header_size) + / t->row_index_size; +} + +/** + * Get the path of the index file from a table description. + * + * \param d The table description. + * + * \return The full path of the index file. Result must be freed by + * the caller. + */ +_static_inline_ char *index_filename(const struct osl_table_description *d) +{ + return make_message("%s/%s/index", d->dir, d->name); +} + +/** + * Get the path of storage of a column. + * + * \param t Pointer to an initialized table. + * \param col_num The number of the column to get the path for. + * + * \return The full path of the mapped data file (mapped storage) or the + * data directory (disk storage). Result must be freed by the caller. + * + * \sa osl_storage_type. + */ +_static_inline_ char *column_filename(const struct osl_table *t, unsigned col_num) +{ + char asc[2 * HASH_SIZE + 1]; + hash_to_asc(t->columns[col_num].name_hash, asc); + return make_message("%s/%s/%s", t->desc->dir, t->desc->name, asc); +} + +/** + * Get the start of an index entry + * + * \param t Pointer to a table which has been mapped. + * \param row_num The number of the row whose index entry should be retrieved. + * \param row_index Result pointer. + * + * \return Positive on success, \p -E_INDEX_CORRUPTION otherwise. + * + * \sa get_cell_index(). + */ +_static_inline_ int get_row_index(const struct osl_table *t, uint32_t row_num, + char **row_index) +{ + uint32_t index_offset; + index_offset = t->index_header_size + t->row_index_size * row_num; + if (index_offset + 8 > t->index_map.size) { + *row_index = NULL; + return -E_INDEX_CORRUPTION; + } + *row_index = (char *)(t->index_map.data) + index_offset; + return 1; +} + +/** + * Get the index entry of a row/column pair. + * + * \param t Pointer to a table which has been mapped. + * \param row_num The number of the row whose index entry should be retrieved. + * \param col_num The number of the column whose index entry should be retrieved. + * \param cell_index Result pointer. + * + * \return Positive on success, \p -E_INDEX_CORRUPTION otherwise. + * + * \sa get_row_index(). + */ +_static_inline_ int get_cell_index(const struct osl_table *t, uint32_t row_num, + uint32_t col_num, char **cell_index) +{ + int ret = get_row_index(t, row_num, cell_index); + if (ret < 0) + return ret; + *cell_index += t->columns[col_num].index_offset; + return ret; +} + +/** + * Change an index entry of a column after object was added. + * + * \param row_index Pointer to the index of the row to update. + * \param col Pointer to the column. + * \param map_size The new size of the data file. + * \param object_size The size of the object just appended to the data file. + * + * This is called right after an object was appended to the data file for a + * mapped column. + * + * \sa get_row_index(). + */ +_static_inline_ void update_cell_index(char *row_index, struct osl_column *col, + uint32_t map_size, uint32_t object_size) +{ + write_u32(row_index + col->index_offset, map_size - object_size - 1); + write_u32(row_index + col->index_offset + 4, object_size + 1); +} + +/** + * Get the full path of a disk storage object + * + * \param t Pointer to an initialized table. + * \param col_num The number of the column the disk storage object belongs to. + * \param ds_name The disk storage name of the object. + * + * \return Pointer to the full path which must be freed by the caller. + * + * \sa column_filename(), disk_storage_name_of_row(). + */ +_static_inline_ char *disk_storage_path(const struct osl_table *t, + unsigned col_num, const char *ds_name) +{ + char *dirname = column_filename(t, col_num); + char *filename = make_message("%s/%s", dirname, ds_name); + free(dirname); + return filename; +} + +/** + * Get the column description of the next column of a given type. + * + * \param type the desired storage type. + * \param col_num the column to start the search. + * \param t the osl table. + * \param cd result is returned here. + * + * \return On success, \a cd contains the column description of the next column + * of type \a type, and the number of that column is returned. Otherwise, \a + * cd is set to \p NULL and the function returns t->num_columns + 1. + * + * \sa FOR_EACH_COLUMN_OF_TYPE, FOR_EACH_MAPPED_COLUMN, FOR_EACH_RBTREE_COLUMN, + * FOR_EACH_DISK_STORAGE_COLUMN, FOR_EACH_VOLATILE_COLUMN, osl_storage_type. + */ +_static_inline_ int next_column_of_type(enum osl_storage_type type, int col_num, + const struct osl_table *t, + const struct osl_column_description **cd) +{ + *cd = NULL; + while (col_num < t->desc->num_columns) { + *cd = get_column_description(t->desc, col_num); + if ((*cd)->storage_type == type) + break; + col_num++; + } + return col_num; +} + +/** + * Find the next column with an associated rbtree. + * + * \param col_num The column to start the search. + * \param t The osl table. + * \param cd Result is returned here. + + * \return On success, \a cd contains the column description of the next column + * with associated rbtree, and the number of that column is returned. + * Otherwise, \a cd is set to \p NULL and the function returns t->num_columns + + * 1. + * + * \sa FOR_EACH_COLUMN_OF_TYPE, FOR_EACH_MAPPED_COLUMN, FOR_EACH_RBTREE_COLUMN, + * FOR_EACH_DISK_STORAGE_COLUMN, FOR_EACH_VOLATILE_COLUMN, osl_storage_flags. + */ +_static_inline_ int next_rbtree_column(int col_num, const struct osl_table *t, + const struct osl_column_description **cd) +{ + *cd = NULL; + while (col_num < t->desc->num_columns) { + *cd = get_column_description(t->desc, col_num); + if ((*cd)->storage_flags & OSL_RBTREE) + break; + col_num++; + } + return col_num; +} + + +/* quite some dirty hacks */ + +/** Round up size of struct osl_row to multiple of 8 */ +#define RB_NODES_OFFSET ((sizeof(struct osl_row) + 7) / 8 * 8) + +/** + * Allocate a new osl row. + * + * \param num_rbtrees The number of rbtrees for this row. + * + * \return A pointer to a zeroed-out area suitable for holding an osl row + * with \a num_rbtrees rbtree columns. + */ +_static_inline_ struct osl_row *allocate_row(unsigned num_rbtrees) +{ + size_t s = RB_NODES_OFFSET + num_rbtrees * sizeof(struct rb_node); + return para_calloc(s); +} + +/** + * Compute the pointer to a rbtree node embedded in a osl row. + * + * \param row The row to extract the rbtree pointer from. + * \param rbtree_num The number of the rbtree node to extract. + * + * \return A pointer to the \a rbtree_num th node contained in \a row. + */ +_static_inline_ struct rb_node *get_rb_node_pointer(const struct osl_row *row, uint32_t rbtree_num) +{ + return ((struct rb_node *)(((char *)row) + RB_NODES_OFFSET)) + rbtree_num; +} + +/** + * Get a pointer to the struct containing the given rbtree node. + * + * \param node Pointer to the rbtree node. + * \param rbtree_num Number of \a node in the array of rbtree nodes. + * + * \return A pointer to the row containing \a node. + */ +_static_inline_ struct osl_row *get_row_pointer(const struct rb_node *node, + unsigned rbtree_num) +{ + node -= rbtree_num; + return (struct osl_row *)(((char *)node) - RB_NODES_OFFSET); +} + +/** + * Compute a cryptographic hash of an osl object. + * + * \param obj the Object to compute the hash value from. + * \param hash Result is returned here. + */ +_static_inline_ void hash_object(const struct osl_object *obj, HASH_TYPE *hash) +{ + hash_function(obj->data, obj->size, hash); +} + +/** + * Get the relative path of an object, given the hash value. + * + * \param t Pointer to an initialized osl table. + * \param hash An arbitrary hash value. + * + * This function is typically called with \a hash being the hash of the object + * stored in the disk storage name column of a row. If the OSL_LARGE_TABLE + * flag is set, the first two hex digits get separated with a slash from the + * remaining digits. + * + * \return The relative path for all disk storage objects corresponding to \a + * hash. + * + * \sa struct osl_table:disk_storage_name_column. + */ +_static_inline_ char *disk_storage_name_of_hash(const struct osl_table *t, HASH_TYPE *hash) +{ + char asc[2 * HASH_SIZE + 2]; + + hash_to_asc(hash, asc); + if (t->desc->flags & OSL_LARGE_TABLE) + return make_message("%.2s/%s", asc, asc + 2); + return para_strdup(asc); +} + +/** + * A wrapper for rename(2). + * + * \param old_path The source path. + * \param new_path The destination path. + * + * \return Standard. + * + * \sa rename(2). + */ +_static_inline_ int para_rename(const char *old_path, const char *new_path) +{ + if (rename(old_path, new_path) < 0) + return -ERRNO_TO_ERROR(errno); + return 1; +} + +/** + * Iterate over each column of an initialized table. + * + * \param col A pointer to a struct osl_column. + * \param desc Pointer to the table description. + * \param cd Pointer to struct osl_column_description. + * + * On each iteration, \a col will point to the next column of the table and \a + * cd will point to the column description of this column. + * + * \sa struct osl_column FOR_EACH_RBTREE_COLUMN, FOR_EACH_COLUMN_OF_TYPE, + * FOR_EACH_MAPPED_COLUMN, FOR_EACH_DISK_STORAGE_COLUMN, + * FOR_EACH_VOLATILE_COLUMN. + */ +#define FOR_EACH_COLUMN(col, desc, cd) \ + for (col = 0; col < (desc)->num_columns && \ + (cd = get_column_description(desc, col)); col++) + +/** + * Iterate over each column with associated rbtree. + * + * \param col Same meaning as in FOR_EACH_COLUMN(). + * \param table Same meaning as in FOR_EACH_COLUMN(). + * \param cd Same meaning as in FOR_EACH_COLUMN(). + * + * \sa osl_storage_flags::OSL_RBTREE, FOR_EACH_COLUMN, FOR_EACH_COLUMN_OF_TYPE, + * FOR_EACH_MAPPED_COLUMN, FOR_EACH_DISK_STORAGE_COLUMN, + * FOR_EACH_VOLATILE_COLUMN. + */ +#define FOR_EACH_RBTREE_COLUMN(col, table, cd) \ + for (col = next_rbtree_column(0, table, &cd); \ + col < table->desc->num_columns; \ + col = next_rbtree_column(++col, table, &cd)) + +/** + * Iterate over each column of given storage type. + * + * \param type The osl_storage_type to iterate over. + * \param col Same meaning as in FOR_EACH_COLUMN(). + * \param table Same meaning as in FOR_EACH_COLUMN(). + * \param cd Same meaning as in FOR_EACH_COLUMN(). + * + * \sa osl_storage_type, FOR_EACH_COLUMN, FOR_EACH_RBTREE_COLUMN, + * FOR_EACH_MAPPED_COLUMN, FOR_EACH_DISK_STORAGE_COLUMN, + * FOR_EACH_VOLATILE_COLUMN. + */ +#define FOR_EACH_COLUMN_OF_TYPE(type, col, table, cd) \ + for (col = next_column_of_type(type, 0, table, &cd); \ + col < table->desc->num_columns; \ + col = next_column_of_type(type, ++col, table, &cd)) + +/** + * Iterate over each mapped column. + * + * \param col Same meaning as in FOR_EACH_COLUMN(). + * \param table Same meaning as in FOR_EACH_COLUMN(). + * \param cd Same meaning as in FOR_EACH_COLUMN(). + * + * Just like FOR_EACH_COLUMN(), but skip columns which are + * not of type \p OSL_MAPPED_STORAGE. + * + * \sa osl_storage_flags::OSL_MAPPED_STORAGE, FOR_EACH_COLUMN, + * FOR_EACH_RBTREE_COLUMN, FOR_EACH_COLUMN_OF_TYPE, + * FOR_EACH_DISK_STORAGE_COLUMN, FOR_EACH_VOLATILE_COLUMN. + */ +#define FOR_EACH_MAPPED_COLUMN(col, table, cd) \ + FOR_EACH_COLUMN_OF_TYPE(OSL_MAPPED_STORAGE, col, table, cd) + +/** + * Iterate over each disk storage column. + * + * \param col Same meaning as in FOR_EACH_COLUMN(). + * \param table Same meaning as in FOR_EACH_COLUMN(). + * \param cd Same meaning as in FOR_EACH_COLUMN(). + * + * Just like FOR_EACH_COLUMN(), but skip columns which are + * not of type \p OSL_DISK_STORAGE. + * + * \sa osl_storage_flags::OSL_DISK_STORAGE, FOR_EACH_COLUMN, + * FOR_EACH_RBTREE_COLUMN, FOR_EACH_COLUMN_OF_TYPE, FOR_EACH_MAPPED_COLUMN, + * FOR_EACH_VOLATILE_COLUMN. + */ +#define FOR_EACH_DISK_STORAGE_COLUMN(col, table, cd) \ + FOR_EACH_COLUMN_OF_TYPE(OSL_DISK_STORAGE, col, table, cd) + +/** + * Iterate over each volatile column. + * + * \param col Same meaning as in FOR_EACH_COLUMN(). + * \param table Same meaning as in FOR_EACH_COLUMN(). + * \param cd Same meaning as in FOR_EACH_COLUMN(). + * + * Just like FOR_EACH_COLUMN(), but skip columns which are + * not of type \p OSL_NO_STORAGE. + * + * \sa osl_storage_flags::OSL_NO_STORAGE, FOR_EACH_COLUMN, + * FOR_EACH_RBTREE_COLUMN, FOR_EACH_COLUMN_OF_TYPE, FOR_EACH_MAPPED_COLUMN, + * FOR_EACH_DISK_STORAGE_COLUMN. + */ +#define FOR_EACH_VOLATILE_COLUMN(col, table, cd) \ + FOR_EACH_COLUMN_OF_TYPE(OSL_NO_STORAGE, col, table, cd) diff --git a/portable_io.h b/portable_io.h new file mode 100644 index 0000000..e296bcb --- /dev/null +++ b/portable_io.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2007-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file portable_io.h Inline functions for endian-independent binary IO. */ + +static inline uint64_t read_portable(unsigned bits, const char *buf) +{ + uint64_t ret = 0; + int i, num_bytes = bits / 8; + + for (i = 0; i < num_bytes; i++) { + unsigned char c = buf[i]; + ret += ((uint64_t)c << (8 * i)); + } + return ret; +} + +static inline uint64_t read_u64(const char *buf) +{ + return read_portable(64, buf); +} + +static inline uint32_t read_u32(const char *buf) +{ + return read_portable(32, buf); +} + +static inline uint16_t read_u16(const char *buf) +{ + return read_portable(16, buf); +} + +static inline uint8_t read_u8(const char *buf) +{ + return read_portable(8, buf); +} + +static inline void write_portable(unsigned bits, char *buf, uint64_t val) +{ + int i, num_bytes = bits / 8; +// fprintf(stderr, "val: %lu\n", val); + for (i = 0; i < num_bytes; i++) { + buf[i] = val & 0xff; +// fprintf(stderr, "buf[%d]=%x\n", i, buf[i]); + val = val >> 8; + } +} + +static inline void write_u64(char *buf, uint64_t val) +{ + write_portable(64, buf, val); +} + +static inline void write_u32(char *buf, uint32_t val) +{ + write_portable(32, buf, (uint64_t) val); +} + +static inline void write_u16(char *buf, uint16_t val) +{ + write_portable(16, buf, (uint64_t) val); +} + +static inline void write_u8(char *buf, uint8_t val) +{ + write_portable(8, buf, (uint64_t) val); +} diff --git a/rbtree.c b/rbtree.c new file mode 100644 index 0000000..5d24aeb --- /dev/null +++ b/rbtree.c @@ -0,0 +1,457 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2007 Andre Noll + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +/** \file rbtree.c Red-black tree implementation. */ + +#include "stddef.h" +#include "rbtree.h" + +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *right = node->rb_right; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_right = right->rb_left)) + rb_set_parent(right->rb_left, node); + right->rb_left = node; + + rb_set_parent(right, parent); + + if (parent) + { + if (node == parent->rb_left) + parent->rb_left = right; + else + parent->rb_right = right; + } + else + root->rb_node = right; + rb_set_parent(node, right); + right->size = node->size; + node->size = 1; + if (node->rb_right) + node->size += node->rb_right->size; + if (node->rb_left) + node->size += node->rb_left->size; +} + +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *left = node->rb_left; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_left = left->rb_right)) + rb_set_parent(left->rb_right, node); + left->rb_right = node; + + rb_set_parent(left, parent); + + if (parent) + { + if (node == parent->rb_right) + parent->rb_right = left; + else + parent->rb_left = left; + } + else + root->rb_node = left; + rb_set_parent(node, left); + left->size = node->size; + node->size = 1; + if (node->rb_right) + node->size += node->rb_right->size; + if (node->rb_left) + node->size += node->rb_left->size; +} + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *parent, *gparent; + + while ((parent = rb_parent(node)) && rb_is_red(parent)) + { + gparent = rb_parent(parent); + + if (parent == gparent->rb_left) + { + { + register struct rb_node *uncle = gparent->rb_right; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_right == node) + { + register struct rb_node *tmp; + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_right(gparent, root); + } else { + { + register struct rb_node *uncle = gparent->rb_left; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_left == node) + { + register struct rb_node *tmp; + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_left(gparent, root); + } + } + + rb_set_black(root->rb_node); +} + +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + struct rb_root *root) +{ + struct rb_node *other; + + while ((!node || rb_is_black(node)) && node != root->rb_node) + { + if (parent->rb_left == node) + { + other = parent->rb_right; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_left(parent, root); + other = parent->rb_right; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_right || rb_is_black(other->rb_right)) + { + struct rb_node *o_left; + if ((o_left = other->rb_left)) + rb_set_black(o_left); + rb_set_red(other); + __rb_rotate_right(other, root); + other = parent->rb_right; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + if (other->rb_right) + rb_set_black(other->rb_right); + __rb_rotate_left(parent, root); + node = root->rb_node; + break; + } + } + else + { + other = parent->rb_left; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_right(parent, root); + other = parent->rb_left; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_left || rb_is_black(other->rb_left)) + { + register struct rb_node *o_right; + if ((o_right = other->rb_right)) + rb_set_black(o_right); + rb_set_red(other); + __rb_rotate_left(other, root); + other = parent->rb_left; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + if (other->rb_left) + rb_set_black(other->rb_left); + __rb_rotate_right(parent, root); + node = root->rb_node; + break; + } + } + } + if (node) + rb_set_black(node); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *child, *parent; + int color; + + if (!node->rb_left) + child = node->rb_right; + else if (!node->rb_right) + child = node->rb_left; + else + { + struct rb_node *old = node, *left; + + node = node->rb_right; + while ((left = node->rb_left) != NULL) + node = left; + child = node->rb_right; + parent = rb_parent(node); + color = rb_color(node); + + if (child) + rb_set_parent(child, parent); + if (parent == old) { + parent->rb_right = child; + parent = node; + } else + parent->rb_left = child; + + node->rb_parent_color = old->rb_parent_color; + node->rb_right = old->rb_right; + node->rb_left = old->rb_left; + node->size = old->size; + + if (rb_parent(old)) + { + if (rb_parent(old)->rb_left == old) + rb_parent(old)->rb_left = node; + else + rb_parent(old)->rb_right = node; + } else + root->rb_node = node; + + rb_set_parent(old->rb_left, node); + if (old->rb_right) + rb_set_parent(old->rb_right, node); + goto color; + } + + parent = rb_parent(node); + color = rb_color(node); + + if (child) + rb_set_parent(child, parent); + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + color: + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a right-hand child, go down and then left as far + as we can. */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return node; + } + + /* No right-hand children. Everything down and left is + smaller than us, so any 'next' node must be in the general + direction of our parent. Go up the tree; any time the + ancestor is a right-hand child of its parent, keep going + up. First time it's a left-hand child of its parent, said + parent is our 'next' node. */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a left-hand child, go down and then right as far + as we can. */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return node; + } + + /* No left-hand children. Go up till we find an ancestor which + is a right-hand child of its parent */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + if (parent) { + if (victim == parent->rb_left) + parent->rb_left = new; + else + parent->rb_right = new; + } else { + root->rb_node = new; + } + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +/** + * Get the n-th node (in sort order) of the tree. + * + * \param node The root of the subtree to consider. + * \param n The order statistic to compute. + * + * \return Pointer to the \a n th greatest node on success, \p NULL on errors. + */ +struct rb_node *rb_nth(struct rb_node *node, unsigned n) +{ + unsigned size = 1; + + if (!node) + return NULL; + if (node->rb_left) + size += node->rb_left->size; + if (n == size) + return node; + if (n < size) + return rb_nth(node->rb_left, n); + return rb_nth(node->rb_right, n - size); +} + +/** + * Get the rank of a node in O(log n) time. + * + * \param node The node to get the rank of. + * \param rank Result pointer. + * + * \return Positive on success, -1 on errors. + */ +int rb_rank(struct rb_node *node, unsigned *rank) +{ + *rank = 1; + struct rb_node *parent; + + if (!node) + return -1; + if (node->rb_left) + *rank += node->rb_left->size; + while ((parent = rb_parent(node))) { + if (node == parent->rb_right) { + (*rank)++; + if (parent->rb_left) + *rank += parent->rb_left->size; + } + node = parent; + } + return 1; +} diff --git a/rbtree.h b/rbtree.h new file mode 100644 index 0000000..38b7752 --- /dev/null +++ b/rbtree.h @@ -0,0 +1,174 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + Some example of insert and search follows here. The search is a plain + normal search over an ordered tree. The insert instead must be implemented + int two steps: as first thing the code must insert the element in + order as a red leaf in the tree, then the support library function + rb_insert_color() must be called. Such function will do the + not trivial work to rebalance the rbtree if necessary. + +----------------------------------------------------------------------- +static inline struct page * rb_search_page_cache(struct inode * inode, + unsigned long offset) +{ + struct rb_node * n = inode->i_rb_page_cache.rb_node; + struct page * page; + + while (n) + { + page = rb_entry(n, struct page, rb_page_cache); + + if (offset < page->offset) + n = n->rb_left; + else if (offset > page->offset) + n = n->rb_right; + else + return page; + } + return NULL; +} + +static inline struct page * __rb_insert_page_cache(struct inode * inode, + unsigned long offset, + struct rb_node * node) +{ + struct rb_node ** p = &inode->i_rb_page_cache.rb_node; + struct rb_node * parent = NULL; + struct page * page; + + while (*p) + { + parent = *p; + page = rb_entry(parent, struct page, rb_page_cache); + + if (offset < page->offset) + p = &(*p)->rb_left; + else if (offset > page->offset) + p = &(*p)->rb_right; + else + return page; + } + + rb_link_node(node, parent, p); + + return NULL; +} + +static inline struct page * rb_insert_page_cache(struct inode * inode, + unsigned long offset, + struct rb_node * node) +{ + struct page * ret; + if ((ret = __rb_insert_page_cache(inode, offset, node))) + goto out; + rb_insert_color(node, &inode->i_rb_page_cache); + out: + return ret; +} +----------------------------------------------------------------------- +*/ + +/** \file rbtree.h Exported symbols from rbtree.h */ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + + +/** get the struct this entry is embedded in */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + + +struct rb_node +{ + unsigned long rb_parent_color; +#define RB_RED 0 +#define RB_BLACK 1 + struct rb_node *rb_right; + struct rb_node *rb_left; + unsigned size; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root +{ + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) +#define rb_color(r) ((r)->rb_parent_color & 1) +#define rb_is_red(r) (!rb_color(r)) +#define rb_is_black(r) rb_color(r) +#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) +#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; +} +static inline void rb_set_color(struct rb_node *rb, int color) +{ + rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; +} + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) +#define RB_EMPTY_NODE(node) (rb_parent(node) == node) +#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(struct rb_node *); +extern struct rb_node *rb_prev(struct rb_node *); +extern struct rb_node *rb_first(struct rb_root *); +extern struct rb_node *rb_last(struct rb_root *); +extern struct rb_node *rb_nth(struct rb_node *node, unsigned n); +extern int rb_rank(struct rb_node *node, unsigned *rank); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, + struct rb_node ** rb_link) +{ + node->size = 1; + node->rb_parent_color = (unsigned long )parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; + /* Fixup the size fields in the tree */ + while ((node = rb_parent(node))) + node->size++; +} + +#endif /* _LINUX_RBTREE_H */ diff --git a/sha1.c b/sha1.c new file mode 100644 index 0000000..3cb2ea4 --- /dev/null +++ b/sha1.c @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2007-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file sha1.c Secure Hash Algorithm, provided by openssl. */ + +#include "log.h" +#include + +/** + * Compute the sha1 hash. + * + * \param data Pointer to the data to compute the hash value from. + * \param len The length of \a data in bytes. + * \param sha1 Result pointer. + * + * \a sha1 must point to an area at least 20 bytes large. + * + * \sa sha(3), openssl(1). + * */ +void sha1_hash(const char *data, unsigned long len, unsigned char *sha1) +{ + SHA_CTX c; + SHA1_Init(&c); + SHA1_Update(&c, data, len); + SHA1_Final(sha1, &c); +} diff --git a/sha1.h b/sha1.h new file mode 100644 index 0000000..4d21733 --- /dev/null +++ b/sha1.h @@ -0,0 +1,5 @@ +/** \file sha1.h Secure Hash Algorithm prototype */ + +/** Size of the hash value in bytes. */ +#define HASH_SIZE 20 +void sha1_hash(const char *data, unsigned long len, unsigned char *sha1); diff --git a/string.c b/string.c new file mode 100644 index 0000000..7ac77d0 --- /dev/null +++ b/string.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2004-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file string.c Memory allocation and string handling functions. */ + +#include "log.h" +#include "string.h" + +#include /* gettimeofday */ +#include +#include /* uname() */ +#include + +#include "error.h" + +/** + * Paraslash's version of malloc(). + * + * \param size The desired new size. + * + * A wrapper for malloc(3) which exits on errors. + * + * \return A pointer to the allocated memory, which is suitably aligned for any + * kind of variable. + * + * \sa malloc(3). + */ +__must_check __malloc void *para_malloc(size_t size) +{ + assert(size); + void *p = malloc(size); + + if (!p) { + EMERG_LOG("malloc failed (size = %zu), aborting\n", + size); + exit(EXIT_FAILURE); + } + return p; +} + +/** + * Paraslash's version of calloc(). + * + * \param size The desired new size. + * + * A wrapper for calloc(3) which exits on errors. + * + * \return A pointer to the allocated and zeroed-out memory, which is suitably + * aligned for any kind of variable. + * + * \sa calloc(3) + */ +__must_check __malloc void *para_calloc(size_t size) +{ + void *ret = para_malloc(size); + + memset(ret, 0, size); + return ret; +} + +/** + * Paraslash's version of strdup(). + * + * \param s The string to be duplicated. + * + * A wrapper for strdup(3). It calls \p exit(EXIT_FAILURE) on errors, i.e. + * there is no need to check the return value in the caller. + * + * \return A pointer to the duplicated string. If \p s was the NULL pointer, + * an pointer to an empty string is returned. + * + * \sa strdup(3) + */ +__must_check __malloc char *para_strdup(const char *s) +{ + char *ret; + + if ((ret = strdup(s? s: ""))) + return ret; + EMERG_LOG("strdup failed, aborting\n"); + exit(EXIT_FAILURE); +} diff --git a/string.h b/string.h new file mode 100644 index 0000000..3bee128 --- /dev/null +++ b/string.h @@ -0,0 +1,11 @@ +/* + * Copyright (C) 2006-2008 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** \file string.h exported sybmols from string.c */ + +__must_check __malloc void *para_malloc(size_t size); +__must_check __malloc void *para_calloc(size_t size); +__must_check __malloc char *para_strdup(const char *s);