X-Git-Url: http://git.tuebingen.mpg.de/?a=blobdiff_plain;f=osl.c;fp=osl.c;h=0000000000000000000000000000000000000000;hb=b2e6a24448a9e49e0766ab4f32163580eeff469e;hp=e2c1ef46f40624adddf6dd7611e07ec2a9f97ef8;hpb=09b7971c7a862924d4ce669f8a33bda567ba570b;p=paraslash.git diff --git a/osl.c b/osl.c deleted file mode 100644 index e2c1ef46..00000000 --- a/osl.c +++ /dev/null @@ -1,2082 +0,0 @@ -/* - * Copyright (C) 2007-2009 Andre Noll - * - * Licensed under the GPL v2. For licencing details see COPYING. - */ - -/** \file osl.c Object storage layer functions. */ -#include /* readdir() */ -#include - - -#include "para.h" -#include "error.h" -#include "fd.h" -#include "osl_core.h" -/** - * A wrapper for lseek(2). - * - * \param fd The file descriptor whose offset is to be to repositioned. - * \param offset A value-result parameter. - * \param whence Usual repositioning directive. - * - * Reposition the offset of the file descriptor \a fd to the argument \a offset - * according to the directive \a whence. Upon successful return, \a offset - * contains the resulting offset location as measured in bytes from the - * beginning of the file. - * - * \return Positive on success. Otherwise, the function returns \p -E_LSEEK. - * - * \sa lseek(2). - */ -int para_lseek(int fd, off_t *offset, int whence) -{ - int ret = -E_LSEEK; - - *offset = lseek(fd, *offset, whence); - if (*offset == -1) - return ret; - return 1; -} - -/** - * Wrapper for the write system call. - * - * \param fd The file descriptor to write to. - * \param buf The buffer to write. - * \param size The length of \a buf in bytes. - * - * This function writes out the given buffer and retries if an interrupt - * occurred during the write. - * - * \return On success, the number of bytes written is returned, otherwise, the - * function returns \p -E_WRITE. - * - * \sa write(2). - */ -ssize_t para_write(int fd, const void *buf, size_t size) -{ - ssize_t ret; - - for (;;) { - ret = write(fd, buf, size); - if ((ret < 0) && (errno == EAGAIN || errno == EINTR)) - continue; - return ret >= 0? ret : -E_WRITE; - } -} - -/** - * Write the whole buffer to a file descriptor. - * - * \param fd The file descriptor to write to. - * \param buf The buffer to write. - * \param size The length of \a buf in bytes. - * - * This function writes the given buffer and continues on short writes and - * when interrupted by a signal. - * - * \return Positive on success, negative on errors. Possible errors: any - * errors returned by para_write(). - * - * \sa para_write(). - */ -ssize_t para_write_all(int fd, const void *buf, size_t size) -{ -// PARA_DEBUG_LOG("writing %zu bytes\n", size); - const char *b = buf; - while (size) { - ssize_t ret = para_write(fd, b, size); -// PARA_DEBUG_LOG("ret: %zd\n", ret); - if (ret < 0) - return ret; - b += ret; - size -= ret; - } - return 1; -} -/** - * Open a file, write the given buffer and close the file. - * - * \param filename Full path to the file to open. - * \param buf The buffer to write to the file. - * \param size The size of \a buf. - * - * \return Positive on success, negative on errors. Possible errors include: - * any errors from para_open() or para_write(). - * - * \sa para_open(), para_write(). - */ -int para_write_file(const char *filename, const void *buf, size_t size) -{ - int ret, fd; - - ret = para_open(filename, O_WRONLY | O_CREAT | O_EXCL, 0644); - if (ret < 0) - return ret; - fd = ret; - ret = para_write_all(fd, buf, size); - if (ret < 0) - goto out; - ret = 1; -out: - close(fd); - return ret; -} - -static int append_file(const char *filename, char *header, size_t header_size, - char *data, size_t data_size, uint32_t *new_pos) -{ - int ret, fd; - -// PARA_DEBUG_LOG("appending %zu + %zu bytes\n", header_size, data_size); - ret = para_open(filename, O_WRONLY | O_CREAT | O_APPEND, 0644); - if (ret < 0) - return ret; - fd = ret; - if (header && header_size) { - ret = para_write_all(fd, header, header_size); - if (ret < 0) - goto out; - } - ret = para_write_all(fd, data, data_size); - if (ret < 0) - goto out; - if (new_pos) { - off_t offset = 0; - ret = para_lseek(fd, &offset, SEEK_END); - if (ret < 0) - goto out; -// PARA_DEBUG_LOG("new file size: " FMT_OFF_T "\n", offset); - *new_pos = offset; - } - ret = 1; -out: - close(fd); - return ret; -} - -/** - * Traverse the given directory recursively. - * - * \param dirname The directory to traverse. - * \param func The function to call for each entry. - * \param private_data Pointer to an arbitrary data structure. - * - * For each regular file under \a dirname, the supplied function \a func is - * called. The full path of the regular file and the \a private_data pointer - * are passed to \a func. Directories for which the calling process has no - * permissions to change to are silently ignored. - * - * \return Standard. - */ -int for_each_file_in_dir(const char *dirname, - int (*func)(const char *, void *), void *private_data) -{ - DIR *dir; - struct dirent *entry; - int cwd_fd, ret2, ret = para_opendir(dirname, &dir, &cwd_fd); - - if (ret < 0) - return ret == -ERRNO_TO_PARA_ERROR(EACCES)? 1 : ret; - /* scan cwd recursively */ - while ((entry = readdir(dir))) { - mode_t m; - char *tmp; - struct stat s; - - if (!strcmp(entry->d_name, ".")) - continue; - if (!strcmp(entry->d_name, "..")) - continue; - if (lstat(entry->d_name, &s) == -1) - continue; - m = s.st_mode; - if (!S_ISREG(m) && !S_ISDIR(m)) - continue; - tmp = make_message("%s/%s", dirname, entry->d_name); - if (!S_ISDIR(m)) { - ret = func(tmp, private_data); - free(tmp); - if (ret < 0) - goto out; - continue; - } - /* directory */ - ret = for_each_file_in_dir(tmp, func, private_data); - free(tmp); - if (ret < 0) - goto out; - } - ret = 1; -out: - closedir(dir); - ret2 = para_fchdir(cwd_fd); - if (ret2 < 0 && ret >= 0) - ret = ret2; - close(cwd_fd); - return ret; -} - -static int verify_name(const char *name) -{ - if (!name) - return -E_BAD_NAME; - if (!*name) - return -E_BAD_NAME; - if (strchr(name, '/')) - return -E_BAD_NAME; - if (!strcmp(name, "..")) - return -E_BAD_NAME; - if (!strcmp(name, ".")) - return -E_BAD_NAME; - return 1; -} - -/** - * Compare two osl objects pointing to unsigned integers of 32 bit size. - * - * \param obj1 Pointer to the first integer. - * \param obj2 Pointer to the second integer. - * - * \return The values required for an osl compare function. - * - * \sa osl_compare_func, osl_hash_compare(). - */ -int uint32_compare(const struct osl_object *obj1, const struct osl_object *obj2) -{ - uint32_t d1 = read_u32((const char *)obj1->data); - uint32_t d2 = read_u32((const char *)obj2->data); - - if (d1 < d2) - return 1; - if (d1 > d2) - return -1; - return 0; -} - -/** - * Compare two osl objects pointing to hash values. - * - * \param obj1 Pointer to the first hash object. - * \param obj2 Pointer to the second hash object. - * - * \return The values required for an osl compare function. - * - * \sa osl_compare_func, uint32_compare(). - */ -int osl_hash_compare(const struct osl_object *obj1, const struct osl_object *obj2) -{ - return hash_compare((HASH_TYPE *)obj1->data, (HASH_TYPE *)obj2->data); -} - -static char *disk_storage_dirname(const struct osl_table *t, unsigned col_num, - const char *ds_name) -{ - char *dirname, *column_name = column_filename(t, col_num); - - if (!(t->desc->flags & OSL_LARGE_TABLE)) - return column_name; - dirname = make_message("%s/%.2s", column_name, ds_name); - free(column_name); - return dirname; -} - -static char *disk_storage_name_of_object(const struct osl_table *t, - const struct osl_object *obj) -{ - HASH_TYPE hash[HASH_SIZE]; - hash_object(obj, hash); - return disk_storage_name_of_hash(t, hash); -} - -static int disk_storage_name_of_row(const struct osl_table *t, - const struct osl_row *row, char **name) -{ - struct osl_object obj; - int ret = osl_get_object(t, row, t->disk_storage_name_column, &obj); - - if (ret < 0) - return ret; - *name = disk_storage_name_of_object(t, &obj); - return 1; -} - -static void column_name_hash(const char *col_name, HASH_TYPE *hash) -{ - hash_function(col_name, strlen(col_name), hash); -} - -static int init_column_descriptions(struct osl_table *t) -{ - int i, j, ret; - const struct osl_column_description *cd; - - ret = verify_name(t->desc->name); - if (ret < 0) - goto err; - ret = -E_BAD_DB_DIR; - if (!t->desc->dir && (t->num_disk_storage_columns || t->num_mapped_columns)) - goto err; - /* the size of the index header without column descriptions */ - t->index_header_size = IDX_COLUMN_DESCRIPTIONS; - FOR_EACH_COLUMN(i, t->desc, cd) { - struct osl_column *col = t->columns + i; - if (cd->storage_flags & OSL_RBTREE) { - if (!cd->compare_function) - return -E_NO_COMPARE_FUNC; - } - if (cd->storage_type == OSL_NO_STORAGE) - continue; - ret = -E_NO_COLUMN_NAME; - if (!cd->name || !cd->name[0]) - goto err; - ret = verify_name(cd->name); - if (ret < 0) - goto err; - t->index_header_size += index_column_description_size(cd->name); - column_name_hash(cd->name, col->name_hash); - ret = -E_DUPLICATE_COL_NAME; - for (j = i + 1; j < t->desc->num_columns; j++) { - const char *name2 = get_column_description(t->desc, - j)->name; - if (cd->name && name2 && !strcmp(cd->name, name2)) - goto err; - } - } - return 1; -err: - return ret; -} - -/** - * Initialize a struct table from given table description. - * - * \param desc The description of the osl table. - * \param table_ptr Result is returned here. - * - * This function performs several sanity checks on \p desc and returns if any - * of these tests fail. On success, a struct \p osl_table is allocated and - * initialized with data derived from \p desc. - * - * \return Positive on success, negative on errors. Possible errors include: \p - * E_BAD_TABLE_DESC, \p E_NO_COLUMN_DESC, \p E_NO_COLUMNS, \p - * E_BAD_STORAGE_TYPE, \p E_BAD_STORAGE_FLAGS, \p E_BAD_STORAGE_SIZE, \p - * E_NO_UNIQUE_RBTREE_COLUMN, \p E_NO_RBTREE_COL. - * - * \sa struct osl_table. - */ -int init_table_structure(const struct osl_table_description *desc, - struct osl_table **table_ptr) -{ - const struct osl_column_description *cd; - struct osl_table *t = para_calloc(sizeof(*t)); - int i, ret = -E_BAD_TABLE_DESC, have_disk_storage_name_column = 0; - - if (!desc) - goto err; - PARA_DEBUG_LOG("creating table structure for '%s' from table " - "description\n", desc->name); - ret = -E_NO_COLUMN_DESC; - if (!desc->column_descriptions) - goto err; - ret = -E_NO_COLUMNS; - if (!desc->num_columns) - goto err; - t->columns = para_calloc(desc->num_columns * sizeof(struct osl_column)); - t->desc = desc; - FOR_EACH_COLUMN(i, t->desc, cd) { - enum osl_storage_type st = cd->storage_type; - enum osl_storage_flags sf = cd->storage_flags; - struct osl_column *col = &t->columns[i]; - - ret = -E_BAD_STORAGE_TYPE; - if (st != OSL_MAPPED_STORAGE && st != OSL_DISK_STORAGE - && st != OSL_NO_STORAGE) - goto err; - ret = -E_BAD_STORAGE_FLAGS; - if (st == OSL_DISK_STORAGE && sf & OSL_RBTREE) - goto err; - ret = -E_BAD_STORAGE_SIZE; - if (sf & OSL_FIXED_SIZE && !cd->data_size) - goto err; - switch (st) { - case OSL_DISK_STORAGE: - t->num_disk_storage_columns++; - break; - case OSL_MAPPED_STORAGE: - t->num_mapped_columns++; - col->index_offset = t->row_index_size; - t->row_index_size += 8; - break; - case OSL_NO_STORAGE: - col->volatile_num = t->num_volatile_columns; - t->num_volatile_columns++; - break; - } - if (sf & OSL_RBTREE) { - col->rbtree_num = t->num_rbtrees; - t->num_rbtrees++; - if ((sf & OSL_UNIQUE) && (st == OSL_MAPPED_STORAGE)) { - if (!have_disk_storage_name_column) - t->disk_storage_name_column = i; - have_disk_storage_name_column = 1; - } - } - } - ret = -E_NO_UNIQUE_RBTREE_COLUMN; - if (t->num_disk_storage_columns && !have_disk_storage_name_column) - goto err; - ret = -E_NO_RBTREE_COL; - if (!t->num_rbtrees) - goto err; - /* success */ - PARA_DEBUG_LOG("OK. Index entry size: %u\n", t->row_index_size); - ret = init_column_descriptions(t); - if (ret < 0) - goto err; - *table_ptr = t; - return 1; -err: - free(t->columns); - free(t); - return ret; -} - -/** - * Read the table description from index header. - * - * \param map The memory mapping of the index file. - * \param desc The values found in the index header are returned here. - * - * Read the index header, check for the paraslash magic string and the table version number. - * Read all information stored in the index header into \a desc. - * - * \return Positive on success, negative on errors. - * - * \sa struct osl_table_description, osl_create_table. - */ -int read_table_desc(struct osl_object *map, struct osl_table_description *desc) -{ - char *buf = map->data; - uint8_t version; - uint16_t header_size; - int ret, i; - unsigned offset; - struct osl_column_description *cd; - - if (map->size < MIN_INDEX_HEADER_SIZE(1)) - return -E_SHORT_TABLE; - if (strncmp(buf + IDX_PARA_MAGIC, PARA_MAGIC, strlen(PARA_MAGIC))) - return -E_NO_MAGIC; - version = read_u8(buf + IDX_VERSION); - if (version < MIN_TABLE_VERSION || version > MAX_TABLE_VERSION) - return -E_VERSION_MISMATCH; - desc->num_columns = read_u8(buf + IDX_TABLE_FLAGS); - desc->flags = read_u8(buf + IDX_TABLE_FLAGS); - desc->num_columns = read_u16(buf + IDX_NUM_COLUMNS); - PARA_DEBUG_LOG("%u columns\n", desc->num_columns); - if (!desc->num_columns) - return -E_NO_COLUMNS; - header_size = read_u16(buf + IDX_HEADER_SIZE); - if (map->size < header_size) - return -E_BAD_SIZE; - desc->column_descriptions = para_calloc(desc->num_columns - * sizeof(struct osl_column_description)); - offset = IDX_COLUMN_DESCRIPTIONS; - FOR_EACH_COLUMN(i, desc, cd) { - char *null_byte; - - ret = -E_SHORT_TABLE; - if (map->size < offset + MIN_IDX_COLUMN_DESCRIPTION_SIZE) { - PARA_ERROR_LOG("map size = %zu < %u = offset + min desc size\n", - map->size, offset + MIN_IDX_COLUMN_DESCRIPTION_SIZE); - goto err; - } - cd->storage_type = read_u16(buf + offset + IDX_CD_STORAGE_TYPE); - cd->storage_flags = read_u16(buf + offset + - IDX_CD_STORAGE_FLAGS); - cd->data_size = read_u32(buf + offset + IDX_CD_DATA_SIZE); - null_byte = memchr(buf + offset + IDX_CD_NAME, '\0', - map->size - offset - IDX_CD_NAME); - ret = -E_INDEX_CORRUPTION; - if (!null_byte) - goto err; - cd->name = para_strdup(buf + offset + IDX_CD_NAME); - offset += index_column_description_size(cd->name); - } - if (offset != header_size) { - ret = -E_INDEX_CORRUPTION; - PARA_ERROR_LOG("real header size = %u != %u = stored header size\n", - offset, header_size); - goto err; - } - return 1; -err: - FOR_EACH_COLUMN(i, desc, cd) - free(cd->name); - return ret; -} - -/* - * check whether the table description given by \p t->desc matches the on-disk - * table structure stored in the index of \a t. - */ -static int compare_table_descriptions(struct osl_table *t) -{ - int i, ret; - struct osl_table_description desc; - const struct osl_column_description *cd1, *cd2; - - /* read the on-disk structure into desc */ - ret = read_table_desc(&t->index_map, &desc); - if (ret < 0) - return ret; - ret = -E_BAD_TABLE_FLAGS; - if (desc.flags != t->desc->flags) - goto out; - ret = -E_BAD_COLUMN_NUM; - if (desc.num_columns != t->desc->num_columns) - goto out; - FOR_EACH_COLUMN(i, t->desc, cd1) { - cd2 = get_column_description(&desc, i); - ret = -E_BAD_STORAGE_TYPE; - if (cd1->storage_type != cd2->storage_type) - goto out; - ret = -E_BAD_STORAGE_FLAGS; - if (cd1->storage_flags != cd2->storage_flags) { - PARA_ERROR_LOG("sf1 = %u != %u = sf2\n", - cd1->storage_flags, cd2->storage_flags); - goto out; - } - ret = -E_BAD_DATA_SIZE; - if (cd1->storage_flags & OSL_FIXED_SIZE) - if (cd1->data_size != cd2->data_size) - goto out; - ret = -E_BAD_COLUMN_NAME; - if (strcmp(cd1->name, cd2->name)) - goto out; - } - PARA_DEBUG_LOG("table description of '%s' matches on-disk data, good\n", - t->desc->name); - ret = 1; -out: - FOR_EACH_COLUMN(i, &desc, cd1) - free(cd1->name); - free(desc.column_descriptions); - return ret; -} - -static int create_table_index(struct osl_table *t) -{ - char *buf, *filename; - int i, ret; - size_t size = t->index_header_size; - const struct osl_column_description *cd; - unsigned offset; - - PARA_INFO_LOG("creating %zu byte index for table %s\n", size, - t->desc->name); - buf = para_calloc(size); - sprintf(buf + IDX_PARA_MAGIC, "%s", PARA_MAGIC); - write_u8(buf + IDX_TABLE_FLAGS, t->desc->flags); - write_u8(buf + IDX_DIRTY_FLAG, 0); - write_u8(buf + IDX_VERSION, CURRENT_TABLE_VERSION); - write_u16(buf + IDX_NUM_COLUMNS, t->desc->num_columns); - write_u16(buf + IDX_HEADER_SIZE, t->index_header_size); - offset = IDX_COLUMN_DESCRIPTIONS; - FOR_EACH_COLUMN(i, t->desc, cd) { - write_u16(buf + offset + IDX_CD_STORAGE_TYPE, - cd->storage_type); - write_u16(buf + offset + IDX_CD_STORAGE_FLAGS, - cd->storage_flags); - if (cd->storage_flags & OSL_FIXED_SIZE) - write_u32(buf + offset + IDX_CD_DATA_SIZE, - cd->data_size); - strcpy(buf + offset + IDX_CD_NAME, cd->name); - offset += index_column_description_size(cd->name); - } - assert(offset = size); - filename = index_filename(t->desc); - ret = para_write_file(filename, buf, size); - free(buf); - free(filename); - return ret; -} - -/** - * Create a new osl table. - * - * \param desc Pointer to the table description. - * - * \return Standard. - */ -int osl_create_table(const struct osl_table_description *desc) -{ - const struct osl_column_description *cd; - char *table_dir = NULL, *filename; - struct osl_table *t; - int i, ret = init_table_structure(desc, &t); - - if (ret < 0) - return ret; - PARA_INFO_LOG("creating %s\n", desc->name); - FOR_EACH_COLUMN(i, t->desc, cd) { - if (cd->storage_type == OSL_NO_STORAGE) - continue; - if (!table_dir) { - ret = para_mkdir(desc->dir, 0777); - if (ret < 0 && !is_errno(-ret, EEXIST)) - goto out; - table_dir = make_message("%s/%s", desc->dir, - desc->name); - ret = para_mkdir(table_dir, 0777); - if (ret < 0) - goto out; - } - filename = column_filename(t, i); - PARA_INFO_LOG("filename: %s\n", filename); - if (cd->storage_type == OSL_MAPPED_STORAGE) { - ret = para_open(filename, O_RDWR | O_CREAT | O_EXCL, - 0644); - free(filename); - if (ret < 0) - goto out; - close(ret); - continue; - } - /* DISK STORAGE */ - ret = para_mkdir(filename, 0777); - free(filename); - if (ret < 0) - goto out; - } - if (t->num_mapped_columns) { - ret = create_table_index(t); - if (ret < 0) - goto out; - } - ret = 1; -out: - free(table_dir); - free(t->columns); - free(t); - return ret; -} - -static int table_is_dirty(struct osl_table *t) -{ - char *buf = (char *)t->index_map.data + IDX_DIRTY_FLAG; - uint8_t dirty = read_u8(buf) & 0x1; - return !!dirty; -} - -static void mark_table_dirty(struct osl_table *t) -{ - char *buf = (char *)t->index_map.data + IDX_DIRTY_FLAG; - write_u8(buf, read_u8(buf) | 1); -} - -static void mark_table_clean(struct osl_table *t) -{ - char *buf = (char *)t->index_map.data + IDX_DIRTY_FLAG; - write_u8(buf, read_u8(buf) & 0xfe); -} - -static void unmap_column(struct osl_table *t, unsigned col_num) -{ - struct osl_object map = t->columns[col_num].data_map; - int ret; - if (!map.data) - return; - ret = para_munmap(map.data, map.size); - assert(ret > 0); - map.data = NULL; -} - -/** - * Unmap all mapped files of an osl table. - * - * \param t Pointer to a mapped table. - * \param flags Options for unmapping. - * - * \return Positive on success, negative on errors. - * - * \sa map_table(), enum osl_close_flags, para_munmap(). - */ -int unmap_table(struct osl_table *t, enum osl_close_flags flags) -{ - unsigned i; - const struct osl_column_description *cd; - int ret; - - if (!t->num_mapped_columns) /* can this ever happen? */ - return 1; - PARA_DEBUG_LOG("unmapping table '%s'\n", t->desc->name); - if (!t->index_map.data) - return -E_NOT_MAPPED; - if (flags & OSL_MARK_CLEAN) - mark_table_clean(t); - ret = para_munmap(t->index_map.data, t->index_map.size); - if (ret < 0) - return ret; - t->index_map.data = NULL; - if (!t->num_rows) - return 1; - FOR_EACH_MAPPED_COLUMN(i, t, cd) - unmap_column(t, i); - return 1; -} - -static int map_column(struct osl_table *t, unsigned col_num) -{ - struct stat statbuf; - char *filename = column_filename(t, col_num); - int ret = -E_STAT; - if (stat(filename, &statbuf) < 0) { - free(filename); - return ret; - } - if (!(S_IFREG & statbuf.st_mode)) { - free(filename); - return ret; - } - ret = mmap_full_file(filename, O_RDWR, - &t->columns[col_num].data_map.data, - &t->columns[col_num].data_map.size, - NULL); - free(filename); - return ret; -} - -/** - * Map the index file and all columns of type \p OSL_MAPPED_STORAGE into memory. - * - * \param t Pointer to an initialized table structure. - * \param flags Mapping options. - * - * \return Negative return value on errors; on success the number of rows - * (including invalid rows) is returned. - * - * \sa unmap_table(), enum map_table_flags, osl_open_table(), mmap(2). - */ -int map_table(struct osl_table *t, enum map_table_flags flags) -{ - char *filename; - const struct osl_column_description *cd; - int i = 0, ret, num_rows = 0; - - if (!t->num_mapped_columns) - return 0; - if (t->index_map.data) - return -E_ALREADY_MAPPED; - filename = index_filename(t->desc); - PARA_DEBUG_LOG("mapping table '%s' (index: %s)\n", t->desc->name, filename); - ret = mmap_full_file(filename, flags & MAP_TBL_FL_MAP_RDONLY? - O_RDONLY : O_RDWR, &t->index_map.data, &t->index_map.size, NULL); - free(filename); - if (ret < 0) - return ret; - if (flags & MAP_TBL_FL_VERIFY_INDEX) { - ret = compare_table_descriptions(t); - if (ret < 0) - goto err; - } - ret = -E_BUSY; - if (!(flags & MAP_TBL_FL_IGNORE_DIRTY)) { - if (table_is_dirty(t)) { - PARA_ERROR_LOG("%s is dirty\n", t->desc->name); - goto err; - } - } - mark_table_dirty(t); - num_rows = table_num_rows(t); - if (!num_rows) - return num_rows; - /* map data files */ - FOR_EACH_MAPPED_COLUMN(i, t, cd) { - ret = map_column(t, i); - if (ret < 0) - goto err; - } - return num_rows; -err: /* unmap what is already mapped */ - for (i--; i >= 0; i--) { - struct osl_object map = t->columns[i].data_map; - para_munmap(map.data, map.size); - map.data = NULL; - } - para_munmap(t->index_map.data, t->index_map.size); - t->index_map.data = NULL; - return ret; -} - -/** - * Retrieve a mapped object by row and column number. - * - * \param t Pointer to an open osl table. - * \param col_num Number of the mapped column containing the object to retrieve. - * \param row_num Number of the row containing the object to retrieve. - * \param obj The result is returned here. - * - * It is considered an error if \a col_num does not refer to a column - * of storage type \p OSL_MAPPED_STORAGE. - * - * \return Positive on success, negative on errors. Possible errors include: - * \p E_BAD_ROW_NUM, \p E_INVALID_OBJECT. - * - * \sa osl_storage_type. - */ -int get_mapped_object(const struct osl_table *t, unsigned col_num, - uint32_t row_num, struct osl_object *obj) -{ - struct osl_column *col = &t->columns[col_num]; - uint32_t offset; - char *header; - char *cell_index; - int ret; - - if (t->num_rows <= row_num) - return -E_BAD_ROW_NUM; - ret = get_cell_index(t, row_num, col_num, &cell_index); - if (ret < 0) - return ret; - offset = read_u32(cell_index); - obj->size = read_u32(cell_index + 4) - 1; - header = col->data_map.data + offset; - obj->data = header + 1; - if (read_u8(header) == 0xff) { - PARA_ERROR_LOG("col %u, size %zu, offset %u\n", col_num, - obj->size, offset); - return -E_INVALID_OBJECT; - } - return 1; -} - -static int search_rbtree(const struct osl_object *obj, - const struct osl_table *t, unsigned col_num, - struct rb_node **result, struct rb_node ***rb_link) -{ - struct osl_column *col = &t->columns[col_num]; - struct rb_node **new = &col->rbtree.rb_node, *parent = NULL; - const struct osl_column_description *cd = - get_column_description(t->desc, col_num); - enum osl_storage_type st = cd->storage_type; - while (*new) { - struct osl_row *this_row = get_row_pointer(*new, - col->rbtree_num); - int ret; - struct osl_object this_obj; - parent = *new; - if (st == OSL_MAPPED_STORAGE) { - ret = get_mapped_object(t, col_num, this_row->num, - &this_obj); - if (ret < 0) - return ret; - } else - this_obj = this_row->volatile_objects[col->volatile_num]; - ret = cd->compare_function(obj, &this_obj); - if (!ret) { - if (result) - *result = get_rb_node_pointer(this_row, - col->rbtree_num); - return 1; - } - if (ret < 0) - new = &((*new)->rb_left); - else - new = &((*new)->rb_right); - } - if (result) - *result = parent; - if (rb_link) - *rb_link = new; - return -E_RB_KEY_NOT_FOUND; -} - -static int insert_rbtree(struct osl_table *t, unsigned col_num, - const struct osl_row *row, const struct osl_object *obj) -{ - struct rb_node *parent, **rb_link; - unsigned rbtree_num; - struct rb_node *n; - int ret = search_rbtree(obj, t, col_num, &parent, &rb_link); - - if (ret > 0) - return -E_RB_KEY_EXISTS; - rbtree_num = t->columns[col_num].rbtree_num; - n = get_rb_node_pointer(row, rbtree_num); - rb_link_node(n, parent, rb_link); - rb_insert_color(n, &t->columns[col_num].rbtree); - return 1; -} - -static void remove_rb_node(struct osl_table *t, unsigned col_num, - const struct osl_row *row) -{ - struct osl_column *col = &t->columns[col_num]; - const struct osl_column_description *cd = - get_column_description(t->desc, col_num); - enum osl_storage_flags sf = cd->storage_flags; - struct rb_node *victim, *splice_out_node, *tmp; - if (!(sf & OSL_RBTREE)) - return; - /* - * Which node is removed/spliced out actually depends on how many - * children the victim node has: If it has no children, it gets - * deleted. If it has one child, it gets spliced out. If it has two - * children, its successor (which has at most a right child) gets - * spliced out. - */ - victim = get_rb_node_pointer(row, col->rbtree_num); - if (victim->rb_left && victim->rb_right) - splice_out_node = rb_next(victim); - else - splice_out_node = victim; - /* Go up to the root and decrement the size of each node in the path. */ - for (tmp = splice_out_node; tmp; tmp = rb_parent(tmp)) - tmp->size--; - rb_erase(victim, &col->rbtree); -} - -static int add_row_to_rbtrees(struct osl_table *t, uint32_t row_num, - struct osl_object *volatile_objs, struct osl_row **row_ptr) -{ - unsigned i; - int ret; - struct osl_row *row = allocate_row(t->num_rbtrees); - const struct osl_column_description *cd; - - row->num = row_num; - row->volatile_objects = volatile_objs; - FOR_EACH_RBTREE_COLUMN(i, t, cd) { - if (cd->storage_type == OSL_MAPPED_STORAGE) { - struct osl_object obj; - ret = get_mapped_object(t, i, row_num, &obj); - if (ret < 0) - goto err; - ret = insert_rbtree(t, i, row, &obj); - } else { /* volatile */ - const struct osl_object *obj - = volatile_objs + t->columns[i].volatile_num; - ret = insert_rbtree(t, i, row, obj); - } - if (ret < 0) - goto err; - } - if (row_ptr) - *row_ptr = row; - return 1; -err: /* rollback changes, i.e. remove added entries from rbtrees */ - while (i) - remove_rb_node(t, i--, row); - free(row); - return ret; -} - -static void free_volatile_objects(const struct osl_table *t, - enum osl_close_flags flags) -{ - int i, j; - struct rb_node *n; - struct osl_column *rb_col; - const struct osl_column_description *cd; - - if (!t->num_volatile_columns) - return; - /* find the first rbtree column (any will do) */ - FOR_EACH_RBTREE_COLUMN(i, t, cd) - break; - rb_col = t->columns + i; - /* walk that rbtree and free all volatile objects */ - for (n = rb_first(&rb_col->rbtree); n; n = rb_next(n)) { - struct osl_row *r = get_row_pointer(n, rb_col->rbtree_num); - if (flags & OSL_FREE_VOLATILE) - FOR_EACH_VOLATILE_COLUMN(j, t, cd) { - if (cd->storage_flags & OSL_DONT_FREE) - continue; - free(r->volatile_objects[ - t->columns[j].volatile_num].data); - } -// for (j = 0; j < t->num_volatile_columns; j++) -// free(r->volatile_objects[j].data); - free(r->volatile_objects); - } -} - -/** - * Erase all rbtree nodes and free resources. - * - * \param t Pointer to an open osl table. - * - * This function is called by osl_close_table(). - */ -void clear_rbtrees(struct osl_table *t) -{ - const struct osl_column_description *cd; - unsigned i, rbtrees_cleared = 0; - - FOR_EACH_RBTREE_COLUMN(i, t, cd) { - struct osl_column *col = &t->columns[i]; - struct rb_node *n; - rbtrees_cleared++; - for (n = rb_first(&col->rbtree); n;) { - struct osl_row *r; - rb_erase(n, &col->rbtree); - if (rbtrees_cleared == t->num_rbtrees) { - r = get_row_pointer(n, col->rbtree_num); - n = rb_next(n); - free(r); - } else - n = rb_next(n); - } - } - -} - -/** - * Close an osl table. - * - * \param t Pointer to the table to be closed. - * \param flags Options for what should be cleaned up. - * - * If osl_open_table() succeeds, the resulting table pointer must later be - * passed to this function in order to flush all changes to the file system and - * to free the resources that were allocated by osl_open_table(). - * - * \return Positive on success, negative on errors. Possible errors: \p E_BAD_TABLE, - * errors returned by unmap_table(). - * - * \sa osl_open_table(), unmap_table(). - */ -int osl_close_table(struct osl_table *t, enum osl_close_flags flags) -{ - int ret; - - if (!t) - return -E_BAD_TABLE; - free_volatile_objects(t, flags); - clear_rbtrees(t); - ret = unmap_table(t, flags); - if (ret < 0) - PARA_ERROR_LOG("unmap_table failed: %d\n", ret); - free(t->columns); - free(t); - return ret; -} - -/** - * Find out whether the given row number corresponds to an invalid row. - * - * \param t Pointer to the osl table. - * \param row_num The number of the row in question. - * - * By definition, a row is considered invalid if all its index entries - * are invalid. - * - * \return Positive if \a row_num corresponds to an invalid row, - * zero if it corresponds to a valid row, negative on errors. - */ -int row_is_invalid(struct osl_table *t, uint32_t row_num) -{ - char *row_index; - int i, ret = get_row_index(t, row_num, &row_index); - - if (ret < 0) - return ret; - for (i = 0; i < t->row_index_size; i++) { - if ((unsigned char)row_index[i] != 0xff) - return 0; - } - PARA_INFO_LOG("row %d is invalid\n", row_num); - return 1; -} - -/** - * Invalidate a row of an osl table. - * - * \param t Pointer to an open osl table. - * \param row_num Number of the row to mark as invalid. - * - * This function marks each mapped object in the index entry of \a row as - * invalid. - * - * \return Positive on success, negative on errors. - */ -int mark_row_invalid(struct osl_table *t, uint32_t row_num) -{ - char *row_index; - int ret = get_row_index(t, row_num, &row_index); - - if (ret < 0) - return ret; - PARA_INFO_LOG("marking row %d as invalid\n", row_num); - memset(row_index, 0xff, t->row_index_size); - return 1; -} - -/** - * Initialize all rbtrees and compute number of invalid rows. - * - * \param t The table containing the rbtrees to be initialized. - * - * \return Positive on success, negative on errors. - */ -int init_rbtrees(struct osl_table *t) -{ - int i, ret; - const struct osl_column_description *cd; - - /* create rbtrees */ - FOR_EACH_RBTREE_COLUMN(i, t, cd) - t->columns[i].rbtree = RB_ROOT; - /* add valid rows to rbtrees */ - t->num_invalid_rows = 0; - for (i = 0; i < t->num_rows; i++) { - ret = row_is_invalid(t, i); - if (ret < 0) - return ret; - if (ret) { - t->num_invalid_rows++; - continue; - } - ret = add_row_to_rbtrees(t, i, NULL, NULL); - if (ret < 0) - return ret; - } - return 1; -} - -/** - * Open an osl table. - * - * Each osl table must be opened before its data can be accessed. - * - * \param table_desc Describes the table to be opened. - * \param result Contains a pointer to the open table on success. - * - * The table description given by \a desc should coincide with the - * description used at creation time. - * - * \return Standard. - */ -int osl_open_table(const struct osl_table_description *table_desc, - struct osl_table **result) -{ - int i, ret; - struct osl_table *t; - const struct osl_column_description *cd; - - PARA_INFO_LOG("opening table %s\n", table_desc->name); - ret = init_table_structure(table_desc, &t); - if (ret < 0) - return ret; - FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) { - /* check if directory exists */ - char *dirname = column_filename(t, i); - struct stat statbuf; - ret = stat(dirname, &statbuf); - free(dirname); - if (ret < 0) { - ret = -ERRNO_TO_PARA_ERROR(errno); - goto err; - } - ret = -ERRNO_TO_PARA_ERROR(ENOTDIR); - if (!S_ISDIR(statbuf.st_mode)) - goto err; - } - ret = map_table(t, MAP_TBL_FL_VERIFY_INDEX); - if (ret < 0) - goto err; - t->num_rows = ret; - PARA_DEBUG_LOG("num rows: %d\n", t->num_rows); - ret = init_rbtrees(t); - if (ret < 0) { - osl_close_table(t, OSL_MARK_CLEAN); /* ignore further errors */ - return ret; - } - *result = t; - return 1; -err: - free(t->columns); - free(t); - return ret; -} - -static int create_disk_storage_object_dir(const struct osl_table *t, - unsigned col_num, const char *ds_name) -{ - char *dirname; - int ret; - - if (!(t->desc->flags & OSL_LARGE_TABLE)) - return 1; - dirname = disk_storage_dirname(t, col_num, ds_name); - ret = para_mkdir(dirname, 0777); - free(dirname); - if (ret < 0 && !is_errno(-ret, EEXIST)) - return ret; - return 1; -} - -static int write_disk_storage_file(const struct osl_table *t, unsigned col_num, - const struct osl_object *obj, const char *ds_name) -{ - int ret; - char *filename; - - ret = create_disk_storage_object_dir(t, col_num, ds_name); - if (ret < 0) - return ret; - filename = disk_storage_path(t, col_num, ds_name); - ret = para_write_file(filename, obj->data, obj->size); - free(filename); - return ret; -} - -static int append_map_file(const struct osl_table *t, unsigned col_num, - const struct osl_object *obj, uint32_t *new_size) -{ - char *filename = column_filename(t, col_num); - int ret; - char header = 0; /* zero means valid object */ - -// PARA_DEBUG_LOG("appending %zu + 1 byte\n", obj->size); - ret = append_file(filename, &header, 1, obj->data, obj->size, - new_size); - free(filename); - return ret; -} - -static int append_row_index(const struct osl_table *t, char *row_index) -{ - char *filename; - int ret; - - if (!t->num_mapped_columns) - return 1; - filename = index_filename(t->desc); - ret = append_file(filename, NULL, 0, row_index, - t->row_index_size, NULL); - free(filename); - return ret; -} - -/** - * A wrapper for truncate(2) - * - * \param path Name of the regular file to truncate - * \param size Number of bytes to \b shave \b off - * - * Truncate the regular file named by \a path by \a size bytes. - * - * \return Positive on success, negative on errors. Possible errors include: \p - * E_STAT, \p E_BAD_SIZE, \p E_TRUNC. - * - * \sa truncate(2) - */ -int para_truncate(const char *path, off_t size) -{ - int ret; - struct stat statbuf; - - ret = -E_STAT; - if (stat(path, &statbuf) < 0) - goto out; - ret = -E_BAD_SIZE; - if (statbuf.st_size < size) - goto out; - ret = -E_TRUNC; - if (truncate(path, statbuf.st_size - size) < 0) - goto out; - ret = 1; -out: - return ret; -} - -static int truncate_mapped_file(const struct osl_table *t, unsigned col_num, - off_t size) -{ - char *filename = column_filename(t, col_num); - int ret = para_truncate(filename, size); - free(filename); - return ret; -} - -static int delete_disk_storage_file(const struct osl_table *t, unsigned col_num, - const char *ds_name) -{ - char *dirname, *filename = disk_storage_path(t, col_num, ds_name); - int ret = unlink(filename), err = errno; - - free(filename); - if (ret < 0) - return -ERRNO_TO_PARA_ERROR(err); - if (!(t->desc->flags & OSL_LARGE_TABLE)) - return 1; - dirname = disk_storage_dirname(t, col_num, ds_name); - rmdir(dirname); - free(dirname); - return 1; -} - -/** - * Add a new row to an osl table and retrieve this row. - * - * \param t Pointer to an open osl table. - * \param objects Array of objects to be added. - * \param row Result pointer. - * - * The \a objects parameter must point to an array containing one object per - * column. The order of the objects in the array is given by the table - * description of \a table. Several sanity checks are performed during object - * insertion and the function returns without modifying the table if any of - * these tests fail. In fact, it is atomic in the sense that it either - * succeeds or leaves the table unchanged (i.e. either all or none of the - * objects are added to the table). - * - * It is considered an error if an object is added to a column with associated - * rbtree if this object is equal to an object already contained in that column - * (i.e. the compare function for the column's rbtree returns zero). - * - * Possible errors include: \p E_RB_KEY_EXISTS, \p E_BAD_DATA_SIZE. - * - * \return Positive on success, negative on errors. - * - * \sa struct osl_table_description, osl_compare_func, osl_add_row(). - */ -int osl_add_and_get_row(struct osl_table *t, struct osl_object *objects, - struct osl_row **row) -{ - int i, ret; - char *ds_name = NULL; - struct rb_node **rb_parents = NULL, ***rb_links = NULL; - char *new_row_index = NULL; - struct osl_object *volatile_objs = NULL; - const struct osl_column_description *cd; - - if (!t) - return -E_BAD_TABLE; - rb_parents = para_malloc(t->num_rbtrees * sizeof(struct rn_node*)); - rb_links = para_malloc(t->num_rbtrees * sizeof(struct rn_node**)); - if (t->num_mapped_columns) - new_row_index = para_malloc(t->row_index_size); - /* pass 1: sanity checks */ -// PARA_DEBUG_LOG("sanity tests: %p:%p\n", objects[0].data, -// objects[1].data); - FOR_EACH_COLUMN(i, t->desc, cd) { - enum osl_storage_type st = cd->storage_type; - enum osl_storage_flags sf = cd->storage_flags; - -// ret = -E_NULL_OBJECT; -// if (!objects[i]) -// goto out; - if (st == OSL_DISK_STORAGE) - continue; - if (sf & OSL_RBTREE) { - unsigned rbtree_num = t->columns[i].rbtree_num; - ret = -E_RB_KEY_EXISTS; -// PARA_DEBUG_LOG("checking whether %p exists\n", -// objects[i].data); - if (search_rbtree(objects + i, t, i, - &rb_parents[rbtree_num], - &rb_links[rbtree_num]) > 0) - goto out; - } - if (sf & OSL_FIXED_SIZE) { -// PARA_DEBUG_LOG("fixed size. need: %zu, have: %d\n", -// objects[i].size, cd->data_size); - ret = -E_BAD_DATA_SIZE; - if (objects[i].size != cd->data_size) - goto out; - } - } - if (t->num_disk_storage_columns) - ds_name = disk_storage_name_of_object(t, - &objects[t->disk_storage_name_column]); - ret = unmap_table(t, OSL_MARK_CLEAN); - if (ret < 0) - goto out; -// PARA_DEBUG_LOG("sanity tests passed%s\n", ""); - /* pass 2: create data files, append map data */ - FOR_EACH_COLUMN(i, t->desc, cd) { - enum osl_storage_type st = cd->storage_type; - if (st == OSL_NO_STORAGE) - continue; - if (st == OSL_MAPPED_STORAGE) { - uint32_t new_size; - struct osl_column *col = &t->columns[i]; -// PARA_DEBUG_LOG("appending object of size %zu\n", -// objects[i].size); - ret = append_map_file(t, i, objects + i, &new_size); - if (ret < 0) - goto rollback; - update_cell_index(new_row_index, col, new_size, - objects[i].size); - continue; - } - /* DISK_STORAGE */ - ret = write_disk_storage_file(t, i, objects + i, ds_name); - if (ret < 0) - goto rollback; - } - ret = append_row_index(t, new_row_index); - if (ret < 0) - goto rollback; - ret = map_table(t, MAP_TBL_FL_VERIFY_INDEX); - if (ret < 0) { /* truncate index and rollback changes */ - char *filename = index_filename(t->desc); - para_truncate(filename, t->row_index_size); - free(filename); - goto rollback; - } - /* pass 3: add entry to rbtrees */ - if (t->num_volatile_columns) { - volatile_objs = para_calloc(t->num_volatile_columns - * sizeof(struct osl_object)); - FOR_EACH_VOLATILE_COLUMN(i, t, cd) - volatile_objs[t->columns[i].volatile_num] = objects[i]; - } - t->num_rows++; -// PARA_DEBUG_LOG("adding new entry as row #%d\n", t->num_rows - 1); - ret = add_row_to_rbtrees(t, t->num_rows - 1, volatile_objs, row); - if (ret < 0) - goto out; -// PARA_DEBUG_LOG("added new entry as row #%d\n", t->num_rows - 1); - ret = 1; - goto out; -rollback: /* rollback all changes made, ignore further errors */ - for (i--; i >= 0; i--) { - enum osl_storage_type st; - - cd = get_column_description(t->desc, i); - st = cd->storage_type; - if (st == OSL_NO_STORAGE) - continue; - - if (st == OSL_MAPPED_STORAGE) - truncate_mapped_file(t, i, objects[i].size); - else /* disk storage */ - delete_disk_storage_file(t, i, ds_name); - } - /* ignore error and return previous error value */ - map_table(t, MAP_TBL_FL_VERIFY_INDEX); -out: - free(new_row_index); - free(ds_name); - free(rb_parents); - free(rb_links); - return ret; -} - -/** - * Add a new row to an osl table. - * - * \param t Same meaning as osl_add_and_get_row(). - * \param objects Same meaning as osl_add_and_get_row(). - * - * \return The return value of the underlying call to osl_add_and_get_row(). - * - * This is equivalent to osl_add_and_get_row(t, objects, NULL). - */ -int osl_add_row(struct osl_table *t, struct osl_object *objects) -{ - return osl_add_and_get_row(t, objects, NULL); -} - -/** - * Retrieve an object identified by row and column - * - * \param t Pointer to an open osl table. - * \param r Pointer to the row. - * \param col_num The column number. - * \param object The result pointer. - * - * The column determined by \a col_num must be of type \p OSL_MAPPED_STORAGE - * or \p OSL_NO_STORAGE, i.e. no disk storage objects may be retrieved by this - * function. - * - * \return Positive if object was found, negative on errors. Possible errors - * include: \p E_BAD_TABLE, \p E_BAD_STORAGE_TYPE. - * - * \sa osl_storage_type, osl_open_disk_object(). - */ -int osl_get_object(const struct osl_table *t, const struct osl_row *r, - unsigned col_num, struct osl_object *object) -{ - const struct osl_column_description *cd; - - if (!t) - return -E_BAD_TABLE; - cd = get_column_description(t->desc, col_num); - /* col must not be disk storage */ - if (cd->storage_type == OSL_DISK_STORAGE) - return -E_BAD_STORAGE_TYPE; - if (cd->storage_type == OSL_MAPPED_STORAGE) - return get_mapped_object(t, col_num, r->num, object); - /* volatile */ - *object = r->volatile_objects[t->columns[col_num].volatile_num]; - return 1; -} - -static int mark_mapped_object_invalid(const struct osl_table *t, - uint32_t row_num, unsigned col_num) -{ - struct osl_object obj; - char *p; - int ret = get_mapped_object(t, col_num, row_num, &obj); - - if (ret < 0) - return ret; - p = obj.data; - p--; - *p = 0xff; - return 1; -} - -/** - * Delete a row from an osl table. - * - * \param t Pointer to an open osl table. - * \param row Pointer to the row to delete. - * - * This removes all disk storage objects, removes all rbtree nodes, and frees - * all volatile objects belonging to the given row. For mapped columns, the - * data is merely marked invalid and may be pruned from time to time by - * para_fsck. - * - * \return Positive on success, negative on errors. Possible errors include: - * \p E_BAD_TABLE, errors returned by osl_get_object(). - */ -int osl_del_row(struct osl_table *t, struct osl_row *row) -{ - struct osl_row *r = row; - int i, ret; - const struct osl_column_description *cd; - - if (!t) - return -E_BAD_TABLE; - PARA_INFO_LOG("deleting row %p\n", row); - - if (t->num_disk_storage_columns) { - char *ds_name; - ret = disk_storage_name_of_row(t, r, &ds_name); - if (ret < 0) - goto out; - FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) - delete_disk_storage_file(t, i, ds_name); - free(ds_name); - } - FOR_EACH_COLUMN(i, t->desc, cd) { - struct osl_column *col = t->columns + i; - enum osl_storage_type st = cd->storage_type; - remove_rb_node(t, i, r); - if (st == OSL_MAPPED_STORAGE) { - mark_mapped_object_invalid(t, r->num, i); - continue; - } - if (st == OSL_NO_STORAGE && !(cd->storage_flags & OSL_DONT_FREE)) - free(r->volatile_objects[col->volatile_num].data); - } - if (t->num_mapped_columns) { - ret = mark_row_invalid(t, r->num); - if (ret < 0) - goto out; - t->num_invalid_rows++; - } else - t->num_rows--; - ret = 1; -out: - free(r->volatile_objects); - free(r); - return ret; -} - -/* test if column has an rbtree */ -static int check_rbtree_col(const struct osl_table *t, unsigned col_num, - struct osl_column **col) -{ - if (!t) - return -E_BAD_TABLE; - if (!(get_column_description(t->desc, col_num)->storage_flags & OSL_RBTREE)) - return -E_BAD_STORAGE_FLAGS; - *col = t->columns + col_num; - return 1; -} - -/** - * Get the row that contains the given object. - * - * \param t Pointer to an open osl table. - * \param col_num The number of the column to be searched. - * \param obj The object to be looked up. - * \param result Points to the row containing \a obj. - * - * Lookup \a obj in \a t and return the row containing \a obj. The column - * specified by \a col_num must have an associated rbtree. - * - * \return Positive on success, negative on errors. If an error occurred, \a - * result is set to \p NULL. Possible errors include: \p E_BAD_TABLE, \p - * E_BAD_STORAGE_FLAGS, errors returned by get_mapped_object(), \p - * E_RB_KEY_NOT_FOUND. - * - * \sa osl_storage_flags - */ -int osl_get_row(const struct osl_table *t, unsigned col_num, - const struct osl_object *obj, struct osl_row **result) -{ - int ret; - struct rb_node *node; - struct osl_row *row; - struct osl_column *col; - - *result = NULL; - ret = check_rbtree_col(t, col_num, &col); - if (ret < 0) - return ret; - ret = search_rbtree(obj, t, col_num, &node, NULL); - if (ret < 0) - return ret; - row = get_row_pointer(node, t->columns[col_num].rbtree_num); - *result = row; - return 1; -} - -static int rbtree_loop(struct osl_column *col, void *private_data, - osl_rbtree_loop_func *func) -{ - struct rb_node *n, *tmp; - - /* this for-loop is safe against removal of an entry */ - for (n = rb_first(&col->rbtree), tmp = n? rb_next(n) : NULL; - n; - n = tmp, tmp = tmp? rb_next(tmp) : NULL) { - struct osl_row *r = get_row_pointer(n, col->rbtree_num); - int ret = func(r, private_data); - if (ret < 0) - return ret; - } - return 1; -} - -static int rbtree_loop_reverse(struct osl_column *col, void *private_data, - osl_rbtree_loop_func *func) -{ - struct rb_node *n, *tmp; - - /* safe against removal of an entry */ - for (n = rb_last(&col->rbtree), tmp = n? rb_prev(n) : NULL; - n; - n = tmp, tmp = tmp? rb_prev(tmp) : NULL) { - struct osl_row *r = get_row_pointer(n, col->rbtree_num); - int ret = func(r, private_data); - if (ret < 0) - return ret; - } - return 1; -} - -/** - * Loop over all nodes in an rbtree. - * - * \param t Pointer to an open osl table. - * \param col_num The column to use for iterating over the elements. - * \param private_data Pointer that gets passed to \a func. - * \param func The function to be called for each node in the rbtree. - * - * This function does an in-order walk of the rbtree associated with \a - * col_num. It is an error if the \p OSL_RBTREE flag is not set for this - * column. For each node in the rbtree, the given function \a func is called - * with two pointers as arguments: The first osl_row* argument points to the - * row that contains the object corresponding to the rbtree node currently - * traversed, and the \a private_data pointer is passed verbatim to \a func as the - * second argument. The loop terminates either if \a func returns a negative - * value, or if all nodes of the tree have been visited. - * - * - * \return Positive on success, negative on errors. If the termination of the - * loop was caused by \a func returning a negative value, this value is - * returned. - * - * \sa osl_storage_flags, osl_rbtree_loop_reverse(), osl_compare_func. - */ -int osl_rbtree_loop(const struct osl_table *t, unsigned col_num, - void *private_data, osl_rbtree_loop_func *func) -{ - struct osl_column *col; - - int ret = check_rbtree_col(t, col_num, &col); - if (ret < 0) - return ret; - return rbtree_loop(col, private_data, func); -} - -/** - * Loop over all nodes in an rbtree in reverse order. - * - * \param t Identical meaning as in \p osl_rbtree_loop(). - * \param col_num Identical meaning as in \p osl_rbtree_loop(). - * \param private_data Identical meaning as in \p osl_rbtree_loop(). - * \param func Identical meaning as in \p osl_rbtree_loop(). - * - * This function is identical to \p osl_rbtree_loop(), the only difference - * is that the tree is walked in reverse order. - * - * \return The same return value as \p osl_rbtree_loop(). - * - * \sa osl_rbtree_loop(). - */ -int osl_rbtree_loop_reverse(const struct osl_table *t, unsigned col_num, - void *private_data, osl_rbtree_loop_func *func) -{ - struct osl_column *col; - - int ret = check_rbtree_col(t, col_num, &col); - if (ret < 0) - return ret; - return rbtree_loop_reverse(col, private_data, func); -} - -/* TODO: Rollback changes on errors */ -static int rename_disk_storage_objects(struct osl_table *t, - struct osl_object *old_obj, struct osl_object *new_obj) -{ - int i, ret; - const struct osl_column_description *cd; - char *old_ds_name, *new_ds_name; - - if (!t->num_disk_storage_columns) - return 1; /* nothing to do */ - if (old_obj->size == new_obj->size && !memcmp(new_obj->data, - old_obj->data, new_obj->size)) - return 1; /* object did not change */ - old_ds_name = disk_storage_name_of_object(t, old_obj); - new_ds_name = disk_storage_name_of_object(t, new_obj); - FOR_EACH_DISK_STORAGE_COLUMN(i, t, cd) { - char *old_filename, *new_filename; - ret = create_disk_storage_object_dir(t, i, new_ds_name); - if (ret < 0) - goto out; - old_filename = disk_storage_path(t, i, old_ds_name); - new_filename = disk_storage_path(t, i, new_ds_name); - ret = para_rename(old_filename, new_filename); - free(old_filename); - free(new_filename); - if (ret < 0) - goto out; - } - ret = 1; -out: - free(old_ds_name); - free(new_ds_name); - return ret; - -} - -/** - * Change an object in an osl table. - * - * \param t Pointer to an open osl table. - * \param r Pointer to the row containing the object to be updated. - * \param col_num Number of the column containing the object to be updated. - * \param obj Pointer to the replacement object. - * - * This function gets rid of all references to the old object. This includes - * removal of the rbtree node in case there is an rbtree associated with \a - * col_num. It then inserts \a obj into the table and the rbtree if necessary. - * - * If the \p OSL_RBTREE flag is set for \a col_num, you \b MUST call this - * function in order to change the contents of an object, even for volatile or - * mapped columns of constant size (which may be updated directly if \p - * OSL_RBTREE is not set). Otherwise the rbtree might become corrupted. - * - * \return Standard - */ -int osl_update_object(struct osl_table *t, const struct osl_row *r, - unsigned col_num, struct osl_object *obj) -{ - struct osl_column *col; - const struct osl_column_description *cd; - int ret; - - if (!t) - return -E_BAD_TABLE; - col = &t->columns[col_num]; - cd = get_column_description(t->desc, col_num); - PARA_DEBUG_LOG("updating column %u of %s\n", col_num, t->desc->name); - if (cd->storage_flags & OSL_RBTREE) { - if (search_rbtree(obj, t, col_num, NULL, NULL) > 0) - return -E_RB_KEY_EXISTS; - } - if (cd->storage_flags & OSL_FIXED_SIZE) { - if (obj->size != cd->data_size) - return -E_BAD_DATA_SIZE; - } - remove_rb_node(t, col_num, r); - if (cd->storage_type == OSL_NO_STORAGE) { /* TODO: If fixed size, reuse object? */ - free(r->volatile_objects[col->volatile_num].data); - r->volatile_objects[col->volatile_num] = *obj; - } else if (cd->storage_type == OSL_DISK_STORAGE) { - char *ds_name; - ret = disk_storage_name_of_row(t, r, &ds_name); - if (ret < 0) - return ret; - ret = delete_disk_storage_file(t, col_num, ds_name); - if (ret < 0 && !is_errno(-ret, ENOENT)) { - free(ds_name); - return ret; - } - ret = write_disk_storage_file(t, col_num, obj, ds_name); - free(ds_name); - if (ret < 0) - return ret; - } else { /* mapped storage */ - struct osl_object old_obj; - ret = get_mapped_object(t, col_num, r->num, &old_obj); - if (ret < 0) - return ret; - /* - * If the updated column is the disk storage name column, the - * disk storage name changes, so we have to rename all disk - * storage objects accordingly. - */ - if (col_num == t->disk_storage_name_column) { - ret = rename_disk_storage_objects(t, &old_obj, obj); - if (ret < 0) - return ret; - } - if (cd->storage_flags & OSL_FIXED_SIZE) - memcpy(old_obj.data, obj->data, cd->data_size); - else { /* TODO: if the size doesn't change, use old space */ - uint32_t new_data_map_size; - char *row_index; - ret = get_row_index(t, r->num, &row_index); - if (ret < 0) - return ret; - ret = mark_mapped_object_invalid(t, r->num, col_num); - if (ret < 0) - return ret; - unmap_column(t, col_num); - ret = append_map_file(t, col_num, obj, - &new_data_map_size); - if (ret < 0) - return ret; - ret = map_column(t, col_num); - if (ret < 0) - return ret; - update_cell_index(row_index, col, new_data_map_size, - obj->size); - } - } - if (cd->storage_flags & OSL_RBTREE) { - ret = insert_rbtree(t, col_num, r, obj); - if (ret < 0) - return ret; - } - return 1; -} - -/** - * Retrieve an object of type \p OSL_DISK_STORAGE by row and column. - * - * \param t Pointer to an open osl table. - * \param r Pointer to the row containing the object. - * \param col_num The column number. - * \param obj Points to the result upon successful return. - * - * For columns of type \p OSL_DISK_STORAGE, this function must be used to - * retrieve one of its containing objects. Afterwards, osl_close_disk_object() - * must be called in order to deallocate the resources. - * - * \return Positive on success, negative on errors. Possible errors include: - * \p E_BAD_TABLE, \p E_BAD_STORAGE_TYPE, errors returned by osl_get_object(). - * - * \sa osl_get_object(), osl_storage_type, osl_close_disk_object(). - */ -int osl_open_disk_object(const struct osl_table *t, const struct osl_row *r, - unsigned col_num, struct osl_object *obj) -{ - const struct osl_column_description *cd; - char *ds_name, *filename; - int ret; - - if (!t) - return -E_BAD_TABLE; - cd = get_column_description(t->desc, col_num); - if (cd->storage_type != OSL_DISK_STORAGE) - return -E_BAD_STORAGE_TYPE; - - ret = disk_storage_name_of_row(t, r, &ds_name); - if (ret < 0) - return ret; - filename = disk_storage_path(t, col_num, ds_name); - free(ds_name); - PARA_DEBUG_LOG("filename: %s\n", filename); - ret = mmap_full_file(filename, O_RDONLY, &obj->data, &obj->size, NULL); - free(filename); - return ret; -} - -/** - * Free resources that were allocated during osl_open_disk_object(). - * - * \param obj Pointer to the object previously returned by open_disk_object(). - * - * \return The return value of the underlying call to para_munmap(). - * - * \sa para_munmap(). - */ -int osl_close_disk_object(struct osl_object *obj) -{ - return para_munmap(obj->data, obj->size); -} - -/** - * Get the number of rows of the given table. - * - * \param t Pointer to an open osl table. - * \param num_rows Result is returned here. - * - * The number of rows returned via \a num_rows excluding any invalid rows. - * - * \return Positive on success, \p -E_BAD_TABLE if \a t is \p NULL. - */ -int osl_get_num_rows(const struct osl_table *t, unsigned *num_rows) -{ - if (!t) - return -E_BAD_TABLE; - assert(t->num_rows >= t->num_invalid_rows); - *num_rows = t->num_rows - t->num_invalid_rows; - return 1; -} - -/** - * Get the rank of a row. - * - * \param t An open osl table. - * \param r The row to get the rank of. - * \param col_num The number of an rbtree column. - * \param rank Result pointer. - * - * The rank is, by definition, the position of the row in the linear order - * determined by an in-order tree walk of the rbtree associated with column - * number \a col_num of \a table. - * - * \return Positive on success, negative on errors. - * - * \sa osl_get_nth_row(). - */ -int osl_get_rank(const struct osl_table *t, struct osl_row *r, - unsigned col_num, unsigned *rank) -{ - struct osl_object obj; - struct osl_column *col; - struct rb_node *node; - int ret = check_rbtree_col(t, col_num, &col); - - if (ret < 0) - return ret; - ret = osl_get_object(t, r, col_num, &obj); - if (ret < 0) - return ret; - ret = search_rbtree(&obj, t, col_num, &node, NULL); - if (ret < 0) - return ret; - ret = rb_rank(node, rank); - if (ret < 0) - return -E_BAD_ROW; - return 1; -} - -/** - * Get the row with n-th greatest value. - * - * \param t Pointer to an open osl table. - * \param col_num The column number. - * \param n The rank of the desired row. - * \param result Row is returned here. - * - * Retrieve the n-th order statistic with respect to the compare function - * of the rbtree column \a col_num. In other words, get that row with - * \a n th greatest value in column \a col_num. It's an error if - * \a col_num is not a rbtree column, or if \a n is larger than the - * number of rows in the table. - * - * \return Positive on success, negative on errors. Possible errors: - * \p E_BAD_TABLE, \p E_BAD_STORAGE_FLAGS, \p E_RB_KEY_NOT_FOUND. - * - * \sa osl_storage_flags, osl_compare_func, osl_get_row(), - * osl_rbtree_last_row(), osl_rbtree_first_row(), osl_get_rank(). - */ -int osl_get_nth_row(const struct osl_table *t, unsigned col_num, - unsigned n, struct osl_row **result) -{ - struct osl_column *col; - struct rb_node *node; - unsigned num_rows; - int ret; - - if (n == 0) - return -E_RB_KEY_NOT_FOUND; - ret = osl_get_num_rows(t, &num_rows); - if (ret < 0) - return ret; - if (n > num_rows) - return -E_RB_KEY_NOT_FOUND; - ret = check_rbtree_col(t, col_num, &col); - if (ret < 0) - return ret; - node = rb_nth(col->rbtree.rb_node, n); - if (!node) - return -E_RB_KEY_NOT_FOUND; - *result = get_row_pointer(node, col->rbtree_num); - return 1; -} - -/** - * Get the row corresponding to the smallest rbtree node of a column. - * - * \param t An open rbtree table. - * \param col_num The number of the rbtree column. - * \param result A pointer to the first row is returned here. - * - * The rbtree node of the smallest object (with respect to the corresponding - * compare function) is selected and the row containing this object is - * returned. It is an error if \a col_num refers to a column without an - * associated rbtree. - * - * \return Positive on success, negative on errors. - * - * \sa osl_get_nth_row(), osl_rbtree_last_row(). - */ -int osl_rbtree_first_row(const struct osl_table *t, unsigned col_num, - struct osl_row **result) -{ - return osl_get_nth_row(t, col_num, 1, result); -} - -/** - * Get the row corresponding to the greatest rbtree node of a column. - * - * \param t The same meaning as in \p osl_rbtree_first_row(). - * \param col_num The same meaning as in \p osl_rbtree_first_row(). - * \param result The same meaning as in \p osl_rbtree_first_row(). - * - * This function works just like osl_rbtree_first_row(), the only difference - * is that the row containing the greatest rather than the smallest object is - * returned. - * - * \return Positive on success, negative on errors. - * - * \sa osl_get_nth_row(), osl_rbtree_first_row(). - */ -int osl_rbtree_last_row(const struct osl_table *t, unsigned col_num, - struct osl_row **result) -{ - unsigned num_rows; - int ret = osl_get_num_rows(t, &num_rows); - - if (ret < 0) - return ret; - return osl_get_nth_row(t, col_num, num_rows, result); -}