/* * Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd. * * This file is part of Rast. * See the file COPYING for redistribution information. * */ #include #include #include #include #include #include #include "rast/config.h" #include "rast/local_db.h" #include "rast/pack.h" #include "rast/text_index.h" #include "rast/query.h" #include "rast/util.h" #define METADATA_VERSION 3 #define NOT_REQUIRED -1 #define NOT_FOUND -1 #define DATETIME_VALUE_SIZE 19 static rast_error_t * check_properties(rast_property_t *properties, int num_properties, apr_pool_t *pool) { int i; char *p; apr_hash_t *names = apr_hash_make(pool); for (i = 0; i < num_properties; i++) { char *name = properties[i].name; if (apr_hash_get(names, name, strlen(name)) != NULL) { return rast_error(RAST_ERROR_INVALID_ARGUMENT, "duplicate property name (%s)", name); } apr_hash_set(names, name, strlen(name), name); if (properties[i].type != RAST_TYPE_STRING && properties[i].flags & (RAST_PROPERTY_FLAG_TEXT_SEARCH | RAST_PROPERTY_FLAG_FULL_TEXT_SEARCH)) { return rast_error(RAST_ERROR_GENERAL, "RAST_PROPERTY_FLAG_TEXT_SEARCH and " "RAST_PROPERTY_FLAG_FULL_TEXT_SEARCH are " "available for string properties only"); } if (!(properties[i].flags & RAST_PROPERTY_FLAG_SEARCH) && properties[i].flags & RAST_PROPERTY_FLAG_UNIQUE) { return rast_error(RAST_ERROR_GENERAL, "turn on RAST_PROPERTY_FLAG_SEARCH " "to use RAST_PROPERTY_FLAG_UNIQUE"); } for (p = properties[i].name; *p != '\0'; p++) { if (strchr("abcdefghijklmnlopqrstuvwxyz" "ABCDEFGHIJKLMNLOPQRSTUVWXYZ" "0123456789_-", *p) == NULL) { return rast_error(RAST_ERROR_GENERAL, "invalid property name: %s", properties[i].name); } } } return RAST_OK; } static rast_error_t * write_number(apr_file_t *file, rast_uint_t number, int is_native) { rast_uint_t n; apr_size_t nbytes; n = rast_fix_byte_order(number, is_native); nbytes = sizeof(rast_uint_t); return apr_status_to_rast_error(apr_file_write(file, &n, &nbytes)); } static rast_error_t * write_string(apr_file_t *file, const char *s, int len, int is_native) { rast_error_t *error; apr_size_t nbytes; error = write_number(file, len, is_native); if (error != RAST_OK) { return error; } nbytes = len; return apr_status_to_rast_error(apr_file_write(file, s, &nbytes)); } static rast_error_t * write_property(apr_file_t *file, rast_property_t *property, int is_native) { rast_error_t *error; error = write_string(file, property->name, strlen(property->name), is_native); if (error != RAST_OK) { return error; } error = write_number(file, property->type, is_native); if (error != RAST_OK) { return error; } return write_number(file, property->flags, is_native); } static rast_error_t * write_metadata(apr_file_t *file, rast_db_create_option_t *options, rast_byte_order_e byte_order, int is_native) { apr_status_t status; rast_error_t *error; int i; status = apr_file_putc(byte_order, file); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } error = write_number(file, METADATA_VERSION, is_native); if (error != RAST_OK) { return error; } error = write_number(file, options->pos_block_size, is_native); if (error != RAST_OK) { return error; } error = write_string(file, options->encoding, strlen(options->encoding), is_native); if (error != RAST_OK) { return error; } error = write_number(file, options->preserve_text, is_native); if (error != RAST_OK) { return error; } error = write_number(file, options->num_properties, is_native); if (error != RAST_OK) { return error; } for (i = 0; i < options->num_properties; i++) { error = write_property(file, options->properties + i, is_native); if (error != RAST_OK) { return error; } } return RAST_OK; } static rast_error_t * create_empty_database(const char *filename, DB_ENV *db_env, u_int32_t db_type, u_int32_t flags, int lorder) { DB *db; int dberr; dberr = db_create(&db, db_env, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } db->set_flags(db, flags); db->set_lorder(db, lorder); dberr = db->open(db, NULL, filename, NULL, db_type, DB_CREATE | DB_EXCL, 0666); db->close(db, 0); return db_error_to_rast_error(dberr); } static rast_error_t * create_properties_db(apr_pool_t *pool, const char *db_name, DB_ENV *db_env, int lorder) { char *filename; filename = apr_pstrcat(pool, db_name, "/properties.db", NULL); return create_empty_database(filename, db_env, DB_BTREE, 0, lorder); } static rast_error_t * create_inv_index(apr_pool_t *pool, const char *properties_dir, rast_property_t *property, DB_ENV *db_env, int lorder) { char *filename; u_int32_t flags; filename = apr_pstrcat(pool, properties_dir, "/", property->name, ".inv", NULL); if (property->flags & RAST_PROPERTY_FLAG_UNIQUE) { flags = 0; } else { flags = DB_DUP; } return create_empty_database(filename, db_env, DB_BTREE, flags, lorder); } static rast_error_t * create_text_index(apr_pool_t *pool, const char *db_name, const char *basename, rast_encoding_module_t *encoding_module, DB_ENV *db_env, int lorder, rast_size_t block_size) { rast_text_index_t *index; rast_error_t *error; char *index_name; index_name = apr_pstrcat(pool, db_name, "/", basename, NULL); error = rast_text_index_open(&index, index_name, RAST_DB_RDWR, encoding_module, db_env, lorder, block_size, pool); if (error != RAST_OK) { return error; } rast_text_index_close(index); return RAST_OK; } static rast_error_t * create_property_indices(apr_pool_t *pool, const char *db_name, rast_property_t *properties, int num_properties, rast_encoding_module_t *encoding_module, DB_ENV *db_env, int lorder, rast_size_t block_size) { int i; rast_error_t *error; apr_status_t status; char *dir; dir = apr_pstrcat(pool, db_name, "/properties", NULL); status = apr_dir_make(dir, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } for (i = 0; i < num_properties; i++) { rast_property_t *property = properties + i; if (property->flags & RAST_PROPERTY_FLAG_SEARCH) { error = create_inv_index(pool, dir, property, db_env, lorder); if (error != RAST_OK) { return error; } } if (property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { error = create_text_index(pool, dir, property->name, encoding_module, db_env, lorder, block_size); if (error != RAST_OK) { return error; } } } return RAST_OK; } static rast_error_t * create_bdb_env(apr_pool_t *pool, const char *db_name, DB_ENV **db_env) { int dberr; u_int32_t flags; dberr = db_env_create(db_env, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } flags = DB_CREATE | DB_INIT_MPOOL | DB_THREAD; dberr = (*db_env)->open(*db_env, db_name, flags, 0666); return db_error_to_rast_error(dberr); } static rast_error_t * create_text_db(apr_pool_t *pool, const char *db_name, DB_ENV *db_env, int lorder) { char *filename; filename = apr_pstrcat(pool, db_name, "/text.db", NULL); return create_empty_database(filename, db_env, DB_RECNO, 0, lorder); } static rast_error_t * create_doc_info(apr_pool_t *pool, const char *db_name, const char *basename, rast_doc_id_t max_doc_id, int is_native) { rast_error_t *error; apr_status_t status; char *filename; apr_file_t *file; filename = apr_pstrcat(pool, db_name, "/", basename, NULL); status = apr_file_open(&file, filename, APR_CREATE | APR_WRITE, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } error = write_number(file, max_doc_id, is_native); if (error != RAST_OK) { return error; } return write_number(file, max_doc_id, is_native); } static rast_error_t * create_lock_file(apr_pool_t *pool, const char *db_name) { char *lock_filename; apr_file_t *file; apr_status_t status; lock_filename = (char *) apr_pstrcat(pool, db_name, "/lock", NULL); status = apr_file_open(&file, lock_filename, APR_CREATE | APR_WRITE, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } return apr_status_to_rast_error(apr_file_close(file)); } rast_error_t * rast_local_db_create(const char *name, rast_db_create_option_t *options, apr_pool_t *pool) { rast_encoding_module_t *encoding_module; char *path, *metadata_filename; apr_file_t *metadata_file; rast_byte_order_e byte_order; DB_ENV *bdb_env; apr_status_t status; rast_error_t *error; int is_native, lorder; error = rast_get_encoding_module(options->encoding, &encoding_module); if (error != RAST_OK) { return error; } error = check_properties(options->properties, options->num_properties, pool); if (error != RAST_OK) { return error; } status = apr_filepath_merge(&path, NULL, name, 0, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } status = apr_dir_make(path, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } metadata_filename = (char *) apr_pstrcat(pool, path, "/METADATA", (void *) NULL); status = apr_file_open(&metadata_file, metadata_filename, APR_CREATE | APR_WRITE, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } if (options->byte_order == RAST_NATIVE_ENDIAN) { byte_order = rast_check_byte_order(); } else { byte_order = options->byte_order; } is_native = rast_check_byte_order() == byte_order; lorder = byte_order == RAST_LITTLE_ENDIAN ? 1234 : 4321; error = write_metadata(metadata_file, options, byte_order, is_native); apr_file_close(metadata_file); if (error != RAST_OK) { return error; } error = create_doc_info(pool, path, "doc_info", 0, is_native); if (error != RAST_OK) { return error; } error = create_bdb_env(pool, path, &bdb_env); if (error != RAST_OK) { return error; } if (options->preserve_text) { error = create_text_db(pool, path, bdb_env, lorder); if (error != RAST_OK) { return error; } } error = create_text_index(pool, path, "text", encoding_module, bdb_env, lorder, options->pos_block_size); if (error != RAST_OK) { return error; } error = create_properties_db(pool, path, bdb_env, lorder); if (error != RAST_OK) { return error; } error = create_property_indices(pool, path, options->properties, options->num_properties, encoding_module, bdb_env, lorder, options->pos_block_size); if (error != RAST_OK) { return error; } bdb_env->close(bdb_env, 0); return create_lock_file(pool, path); } static rast_error_t * read_number(apr_file_t *file, rast_uint_t *number, int is_native) { rast_uint_t n; apr_size_t nbytes = sizeof(rast_uint_t); apr_status_t status; if ((status = apr_file_read(file, &n, &nbytes)) != APR_SUCCESS) { return apr_status_to_rast_error(status); } *number = rast_fix_byte_order(n, is_native); return RAST_OK; } static rast_error_t * read_string(apr_pool_t *pool, apr_file_t *file, char **s, rast_size_t *len, int is_native) { rast_error_t *error; apr_size_t nbytes; rast_size_t n; apr_status_t status; if ((error = read_number(file, &n, is_native)) != RAST_OK) { return error; } if (len != NULL) { *len = n; } nbytes = n; *s = (char *) apr_palloc(pool, nbytes + 1); status = apr_file_read(file, *s, &nbytes); (*s)[nbytes] = '\0'; return apr_status_to_rast_error(status); } static rast_error_t * read_property(apr_pool_t *pool, apr_file_t *file, rast_property_t *property, int is_native) { rast_error_t *error; rast_uint_t n; error = read_string(pool, file, &property->name, NULL, is_native); if (error != RAST_OK) { return error; } error = read_number(file, &n, is_native); property->type = n; if (error != RAST_OK) { return error; } return read_number(file, &property->flags, is_native); } static rast_error_t * read_metadata(apr_pool_t *pool, rast_local_db_t *db, apr_file_t *file) { apr_status_t status; char byte_order; rast_uint_t n, i; rast_error_t *error; char *s; rast_property_t *properties; if ((status = apr_file_getc(&byte_order, file)) != APR_SUCCESS) { return apr_status_to_rast_error(status); } db->is_native = rast_check_byte_order() == byte_order; error = read_number(file, &n, db->is_native); if (error != RAST_OK) { return error; } if (n != METADATA_VERSION) { return rast_error(RAST_ERROR_GENERAL, "not supported version: %d", n); } error = read_number(file, &db->pos_block_size, db->is_native); if (error != RAST_OK) { return error; } error = read_string(pool, file, &s, NULL, db->is_native); if (error != RAST_OK) { return error; } db->encoding = s; error = rast_get_encoding_module(db->encoding, &db->encoding_module); if (error != RAST_OK) { return error; } error = read_number(file, &db->preserve_text, db->is_native); if (error != RAST_OK) { return error; } error = read_number(file, &db->num_properties, db->is_native); if (error != RAST_OK) { return error; } properties = (rast_property_t *) apr_palloc(pool, sizeof(rast_property_t) * db->num_properties); for (i = 0; i < db->num_properties; i++) { error = read_property(pool, file, properties + i, db->is_native); if (error != RAST_OK) { return error; } } db->properties = properties; return RAST_OK; } static u_int32_t get_local_db_flags(rast_local_db_t *db) { return (db->flags & RAST_DB_RDONLY) ? DB_RDONLY : 0; } static int compare_uint_keys(DB *db, const DBT *x, const DBT *y) { rast_uint_t xi, yi; int is_swapped; memcpy(&xi, x->data, sizeof(rast_uint_t)); memcpy(&yi, y->data, sizeof(rast_uint_t)); db->get_byteswapped(db, &is_swapped); return rast_fix_byte_order(xi, !is_swapped) - rast_fix_byte_order(yi, !is_swapped); } static int compare_string_keys(DB *db, const DBT *x, const DBT *y) { size_t n; int result; n = x->size < y->size ? x->size : y->size; result = memcmp(x->data, y->data, n); if (result == 0) { return x->size - y->size; } else { return result; } } int rast_compare_keys(rast_type_e type, DB *db, const DBT *x, const DBT *y) { switch (type) { case RAST_TYPE_STRING: case RAST_TYPE_DATE: case RAST_TYPE_DATETIME: return compare_string_keys(db, x, y); break; case RAST_TYPE_UINT: return compare_uint_keys(db, x, y); break; default: return 0; } } static rast_error_t * bdb_open(DB_ENV *bdb_env, DB_TXN *bdb_txn, const char *filename, int flags, DBTYPE type, int (*compare_func)(DB *, const DBT *, const DBT *), DB **bdb) { int dberr; dberr = db_create(bdb, bdb_env, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } if (compare_func != NULL) { (*bdb)->set_bt_compare(*bdb, compare_func); } dberr = (*bdb)->open(*bdb, bdb_txn, filename, NULL, type, flags, 0666); return db_error_to_rast_error(dberr); } static rast_error_t * text_db_open(rast_local_db_t *db, apr_pool_t *pool) { char *filename; filename = apr_pstrcat(pool, db->path, "/text.db", NULL); return bdb_open(db->bdb_env, db->bdb_txn, filename, get_local_db_flags(db), DB_RECNO, NULL, &db->text_db); } static rast_error_t * properties_db_open(rast_local_db_t *db, apr_pool_t *pool) { char *filename; filename = apr_pstrcat(pool, db->path, "/properties.db", NULL); return bdb_open(db->bdb_env, db->bdb_txn, filename, get_local_db_flags(db), DB_BTREE, compare_uint_keys, &db->properties_db); } static rast_error_t * property_indices_open(rast_local_db_t *db, apr_pool_t *pool) { char *inv_db_name, *text_db_name; int i, dberr; rast_error_t *error; db->property_indices = (rast_property_index_t *) apr_palloc(pool, sizeof(rast_property_index_t) * db->num_properties); for (i = 0; i < db->num_properties; i++) { rast_property_t *property = db->properties + i; rast_property_index_t *property_index = db->property_indices + i; if (property->flags & RAST_PROPERTY_FLAG_SEARCH) { dberr = db_create(&property_index->inv, db->bdb_env, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } inv_db_name = apr_pstrcat(pool, db->path, "/properties/", property->name, ".inv", NULL); switch (property->type) { case RAST_TYPE_STRING: case RAST_TYPE_DATE: case RAST_TYPE_DATETIME: property_index->inv->set_bt_compare(property_index->inv, compare_string_keys); break; case RAST_TYPE_UINT: property_index->inv->set_bt_compare(property_index->inv, compare_uint_keys); break; } dberr = property_index->inv->open(property_index->inv, db->bdb_txn, inv_db_name, NULL, DB_BTREE, get_local_db_flags(db), 0666); if (dberr != 0) { return db_error_to_rast_error(dberr); } } if (property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { text_db_name = apr_pstrcat(pool, db->path, "/properties/", property->name, NULL); error = rast_text_index_open(&property_index->text, text_db_name, db->flags, db->encoding_module, db->bdb_env, 0, db->pos_block_size, pool); if (error != RAST_OK) { return error; } } } return RAST_OK; } static rast_error_t * bdb_env_open(rast_local_db_t *db, apr_pool_t *pool) { int dberr; u_int32_t flags; dberr = db_env_create(&db->bdb_env, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } #if 0 flags = DB_CREATE | DB_INIT_LOCK DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD; #else flags = DB_CREATE | DB_INIT_MPOOL | DB_THREAD; #endif dberr = db->bdb_env->open(db->bdb_env, db->path, flags, 0666); if (dberr != 0) { return db_error_to_rast_error(dberr); } #ifdef RAST_DEBUG db->bdb_env->set_errpfx(db->bdb_env, "rast"); db->bdb_env->set_errfile(db->bdb_env, stderr); #endif return RAST_OK; } static rast_error_t * open_locked_file(const char *lock_filename, int lock_flag, int open_flag, apr_file_t **lock_file, apr_pool_t *pool) { apr_status_t status = apr_file_open(lock_file, lock_filename, open_flag, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } status = apr_file_lock(*lock_file, lock_flag); if (status != APR_SUCCESS) { apr_file_close(*lock_file); return apr_status_to_rast_error(status); } return RAST_OK; } rast_error_t * rast_local_db_open(rast_db_t **base, const char *name, int flags, rast_db_open_option_t *options, apr_pool_t *pool) { const static rast_db_t default_base = { NULL, rast_local_db_sync, rast_local_db_close, rast_local_db_register, rast_local_db_create_document, rast_local_db_search, rast_local_db_delete, rast_local_db_update, rast_local_db_get_text, rast_local_db_byte_order, rast_local_db_encoding, rast_local_db_properties, rast_local_db_sync_threshold_chars, }; rast_local_db_t *db; char *path, *metadata_filename, *text_db_name; apr_file_t *metadata_file; apr_status_t status; rast_error_t *error; char *lock_filename; const char *local_file_scheme = "file://"; if (strncmp(name, local_file_scheme, strlen(local_file_scheme)) == 0) { name += strlen(local_file_scheme); } status = apr_filepath_merge(&path, NULL, name, 0, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } metadata_filename = (char *) apr_pstrcat(pool, path, "/METADATA", NULL); status = apr_file_open(&metadata_file, metadata_filename, APR_READ, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } db = (rast_local_db_t *) apr_palloc(pool, sizeof(rast_local_db_t)); *base = (rast_db_t *) db; db->base = default_base; db->base.pool = pool; db->path = path; db->flags = flags; db->bdb_txn = NULL; db->registered_chars = 0; if (options == NULL) { rast_db_open_option_t *default_options; apr_pool_t *sub_pool; apr_pool_create(&sub_pool, pool); default_options = rast_db_open_option_create(sub_pool); db->sync_threshold_chars = default_options->sync_threshold_chars; apr_pool_destroy(sub_pool); } else { db->sync_threshold_chars = options->sync_threshold_chars; } error = read_metadata(pool, db, metadata_file); apr_file_close(metadata_file); if (error != RAST_OK) { return error; } if (flags & RAST_DB_RDONLY) { #if 0 db->lock_file = NULL; #else /* this lock will be unnecessary in the future. */ lock_filename = (char *) apr_pstrcat(pool, name, "/lock", NULL); error = open_locked_file(lock_filename, APR_FLOCK_SHARED, APR_READ, &db->lock_file, pool); if (error != RAST_OK) { return error; } #endif } else { lock_filename = (char *) apr_pstrcat(pool, name, "/lock", NULL); error = open_locked_file(lock_filename, APR_FLOCK_EXCLUSIVE, APR_WRITE, &db->lock_file, pool); if (error != RAST_OK) { return error; } } error = bdb_env_open(db, pool); if (error != RAST_OK) { return error; } if (db->preserve_text) { error = text_db_open(db, pool); if (error != RAST_OK) { return error; } } error = properties_db_open(db, pool); if (error != RAST_OK) { return error; } error = property_indices_open(db, pool); if (error != RAST_OK) { return error; } text_db_name = apr_pstrcat(pool, path, "/text", NULL); error = rast_text_index_open(&db->text_index, text_db_name, flags, db->encoding_module, db->bdb_env, 0, db->pos_block_size, pool); return error; } static rast_error_t * property_indices_sync(rast_local_db_t *db) { int i, dberr; rast_error_t *error; for (i = 0; i < db->num_properties; i++) { rast_property_t *property = db->properties + i; rast_property_index_t *property_index = db->property_indices + i; if (property->flags & RAST_PROPERTY_FLAG_SEARCH) { dberr = property_index->inv->sync(property_index->inv, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } } if (property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { error = rast_text_index_sync(property_index->text); if (error != RAST_OK) { return error; } } } return RAST_OK; } rast_error_t * rast_local_db_sync(rast_db_t *base) { rast_local_db_t *db = (rast_local_db_t *) base; rast_error_t *e, *error = RAST_OK; int dberr; if (db->flags & RAST_DB_RDONLY) { return rast_error(RAST_ERROR_BAD_DB, "can't sync read-only db"); } e = rast_text_index_sync(db->text_index); if (e != RAST_OK) { error = e; } e = property_indices_sync(db); if (e != RAST_OK) { error = e; } if ((dberr = db->properties_db->sync(db->properties_db, 0)) != 0) { error = db_error_to_rast_error(dberr); } #if 0 /* todo: for bdb transaction */ dberr = db->bdb_txn->commit(db->bdb_txn, DB_TXN_SYNC); if (dberr != 0) { error = db_error_to_rast_error(dberr); } db->bdb_txn = NULL; dberr = db->bdb_env->txn_checkpoint(db->bdb_env, 0, 0, 0); if (dberr != 0) { error = db_error_to_rast_error(dberr); } #endif db->registered_chars = 0; return error; } static rast_error_t * property_indices_close(rast_local_db_t *db) { int i, dberr; rast_error_t *error; for (i = 0; i < db->num_properties; i++) { rast_property_t *property = db->properties + i; rast_property_index_t *property_index = db->property_indices + i; if (property->flags & RAST_PROPERTY_FLAG_SEARCH) { dberr = property_index->inv->close(property_index->inv, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } } if (property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { error = rast_text_index_close(property_index->text); if (error != RAST_OK) { return error; } } } return RAST_OK; } rast_error_t * rast_local_db_close(rast_db_t *base) { rast_local_db_t *db = (rast_local_db_t *) base; rast_error_t *e, *error = RAST_OK; if (!(db->flags & RAST_DB_RDONLY)) { e = rast_local_db_sync(base); if (e != RAST_OK) { error = e; } } e = rast_text_index_close(db->text_index); if (e != RAST_OK) { error = e; } e = property_indices_close(db); if (e != RAST_OK) { error = e; } db->properties_db->close(db->properties_db, 0); if (db->preserve_text) { db->text_db->close(db->text_db, 0); } db->bdb_env->close(db->bdb_env, 0); apr_file_unlock(db->lock_file); apr_file_close(db->lock_file); return error; } rast_byte_order_e rast_local_db_byte_order(rast_db_t *base) { rast_local_db_t *db = (rast_local_db_t *) base; if (rast_check_byte_order() == RAST_LITTLE_ENDIAN) { return db->is_native ? RAST_LITTLE_ENDIAN : RAST_BIG_ENDIAN; } else { return db->is_native ? RAST_BIG_ENDIAN : RAST_LITTLE_ENDIAN; } } const char * rast_local_db_encoding(rast_db_t *base) { rast_local_db_t *db = (rast_local_db_t *) base; return db->encoding_module->encoding; } const rast_property_t * rast_local_db_properties(rast_db_t *base, int *num_properties) { rast_local_db_t *db = (rast_local_db_t *) base; *num_properties = db->num_properties; return db->properties; } int rast_local_db_sync_threshold_chars(rast_db_t *base) { rast_local_db_t *db = (rast_local_db_t *) base; return db->sync_threshold_chars; } static rast_error_t * get_max_doc_id(rast_local_db_t *db, rast_doc_id_t *doc_id, apr_pool_t *pool) { apr_file_t *file; const char *filename; rast_error_t *error; filename = apr_pstrcat(pool, db->path, "/doc_info", NULL); error = open_locked_file(filename, APR_FLOCK_SHARED, APR_READ, &file, pool); if (error != RAST_OK) { return error; } error = read_number(file, doc_id, db->is_native); apr_file_unlock(file); apr_file_close(file); return error; } static rast_error_t * get_num_docs(rast_local_db_t *db, rast_size_t *num_docs, apr_pool_t *pool) { apr_file_t *file; const char *filename; rast_error_t *error; rast_uint_t max_doc_id; filename = apr_pstrcat(pool, db->path, "/doc_info", NULL); error = open_locked_file(filename, APR_FLOCK_SHARED, APR_READ, &file, pool); if (error != RAST_OK) { return error; } error = read_number(file, &max_doc_id, db->is_native); if (error == RAST_OK) { error = read_number(file, num_docs, db->is_native); } apr_file_unlock(file); apr_file_close(file); return error; } static rast_error_t * register_summary_text(rast_local_db_t *db, rast_doc_id_t doc_id, const char *text, rast_size_t nbytes, apr_pool_t *pool) { DBT db_key = { 0 }, db_value = { 0 }; rast_uint_t fixed_doc_id; int dberr; fixed_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_key.data = &fixed_doc_id; db_key.size = sizeof(rast_uint_t); db_value.data = (void *) text; db_value.size = nbytes; dberr = db->text_db->put(db->text_db, db->bdb_txn, &db_key, &db_value, 0); return db_error_to_rast_error(dberr); } static rast_error_t * text_index_register(rast_local_db_t *db, rast_text_index_t *index, rast_doc_id_t doc_id, const char *s, int nbytes, apr_pool_t *pool) { rast_text_indexer_t *indexer; apr_pool_t *sub_pool; rast_error_t *error; rast_size_t n; apr_pool_create(&sub_pool, pool); error = rast_text_index_register(index, doc_id, &indexer, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = rast_text_indexer_add(indexer, s, nbytes, &n); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } db->registered_chars += n; error = rast_text_indexer_commit(indexer); apr_pool_destroy(sub_pool); return error; } static rast_error_t * register_full_text_search_property(rast_local_db_t *db, rast_doc_id_t doc_id, const rast_value_t *property_values, rast_text_indexer_t *indexer, rast_encoding_module_t *encoding_module, apr_pool_t *pool) { int i; rast_size_t n; rast_error_t *error; for (i = 0; i < db->num_properties; i++) { if (db->properties[i].flags & RAST_PROPERTY_FLAG_FULL_TEXT_SEARCH) { char *s; rast_size_t s_nbytes; error = rast_text_indexer_add(indexer, "\0", 1, &n); if (error != RAST_OK) { return error; } s = rast_value_string(property_values + i); s = rast_normalize_text(encoding_module, s, strlen(s), &s_nbytes, pool); error = rast_text_indexer_add(indexer, s, s_nbytes, &n); if (error != RAST_OK) { return error; } db->registered_chars += n; } } return RAST_OK; } static rast_error_t * register_text(rast_local_db_t *db, rast_doc_id_t doc_id, const char *text, rast_size_t nbytes, const rast_value_t *property_values, rast_encoding_module_t *encoding_module, rast_size_t *num_chars, apr_pool_t *pool) { char *summary_text, *index_text; rast_size_t summary_nbytes, index_text_nbytes, n; rast_error_t *error; rast_text_indexer_t *indexer; apr_pool_t *sub_pool; encoding_module->normalize_text(pool, text, nbytes, &summary_text, &summary_nbytes); if (db->preserve_text) { error = register_summary_text(db, doc_id, summary_text, summary_nbytes, pool); if (error != RAST_OK) { return error; } } encoding_module->normalize_chars(pool, summary_text, summary_nbytes, &index_text, &index_text_nbytes); apr_pool_create(&sub_pool, pool); error = rast_text_index_register(db->text_index, doc_id, &indexer, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = rast_text_indexer_add(indexer, index_text, index_text_nbytes, &n); db->registered_chars += n; *num_chars = n; if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = register_full_text_search_property(db, doc_id, property_values, indexer, encoding_module, pool); if (error == RAST_OK) { error = rast_text_indexer_commit(indexer); } apr_pool_destroy(sub_pool); return error; } static int pack_properties_length(rast_local_db_t *db, rast_value_t *properties) { int len, i, property_nbytes; len = 0; for (i = 0; i < db->num_properties; i++) { if (db->properties[i].flags & RAST_PROPERTY_FLAG_OMIT) { continue; } switch (db->properties[i].type) { case RAST_TYPE_STRING: len += sizeof(rast_size_t); len += strlen(rast_value_string(properties + i)); break; case RAST_TYPE_DATE: len += sizeof(rast_size_t); len += strlen(rast_value_date(properties + i)); break; case RAST_TYPE_DATETIME: len += sizeof(rast_size_t); property_nbytes = strlen(rast_value_datetime(properties + i)); if (property_nbytes > DATETIME_VALUE_SIZE) { property_nbytes = DATETIME_VALUE_SIZE; } len += property_nbytes; break; case RAST_TYPE_UINT: len += sizeof(rast_uint_t); break; } } return len; } static void write_nbytes_string(const char *s, int nbytes, int is_native, char **p) { *(rast_size_t *) *p = rast_fix_byte_order(nbytes, is_native); *p += sizeof(rast_size_t); memcpy(*p, s, nbytes); *p += nbytes; } static void pack_properties(rast_local_db_t *db, rast_value_t *properties, char *s) { char *p = s, *string; int len, i, property_nbytes; len = 0; for (i = 0; i < db->num_properties; i++) { if (db->properties[i].flags & RAST_PROPERTY_FLAG_OMIT) { continue; } switch (db->properties[i].type) { case RAST_TYPE_STRING: string = rast_value_string(properties + i); write_nbytes_string(string, strlen(string), db->is_native, &p); break; case RAST_TYPE_DATE: string = rast_value_date(properties + i); write_nbytes_string(string, strlen(string), db->is_native, &p); break; case RAST_TYPE_DATETIME: string = rast_value_datetime(properties + i); property_nbytes = strlen(string); if (property_nbytes > DATETIME_VALUE_SIZE) { property_nbytes = DATETIME_VALUE_SIZE; } write_nbytes_string(string, property_nbytes, db->is_native, &p); break; case RAST_TYPE_UINT: *(rast_uint_t *) p = rast_fix_byte_order(rast_value_uint(properties + i), db->is_native); p += sizeof(rast_uint_t); break; } } } static rast_error_t * registerable_property_index(rast_local_db_t *db, rast_doc_id_t doc_id, rast_value_t *values, apr_pool_t *pool) { rast_property_t *property; rast_property_index_t *property_index; rast_value_t *value; rast_uint_t fixed_number; int i, dberr; for (i = 0; i < db->num_properties; i++) { property = db->properties + i; property_index = db->property_indices + i; value = values + i; if ((property->flags & RAST_PROPERTY_FLAG_SEARCH) && (property->flags & RAST_PROPERTY_FLAG_UNIQUE)) { DBT db_key = { 0 }, db_value = { 0 }; int property_nbytes; switch (property->type) { case RAST_TYPE_STRING: db_key.data = (char *) rast_value_string(value); db_key.size = strlen(rast_value_string(value)); break; case RAST_TYPE_DATE: db_key.data = (char *) rast_value_date(value); db_key.size = strlen(rast_value_date(value)); break; case RAST_TYPE_DATETIME: db_key.data = (char *) rast_value_string(value); property_nbytes = strlen(rast_value_datetime(value)); if (property_nbytes > DATETIME_VALUE_SIZE) { property_nbytes = DATETIME_VALUE_SIZE; } db_key.size = property_nbytes; break; case RAST_TYPE_UINT: fixed_number = rast_fix_byte_order(rast_value_uint(value), db->is_native); db_key.data = (char *) &fixed_number; db_key.size = sizeof(rast_uint_t); break; } db_value.flags |= DB_DBT_MALLOC; dberr = property_index->inv->get(property_index->inv, db->bdb_txn, &db_key, &db_value, 0); if (dberr == 0) { int delete_flag; db_key.data = db_value.data; db_key.size = db_value.size; db->properties_db->get(db->properties_db, db->bdb_txn, &db_key, &db_value, 0); free(db_key.data); delete_flag = *(char *) db_value.data; free(db_value.data); if (!delete_flag) { return rast_error(RAST_ERROR_INVALID_ARGUMENT, "property/doc_id pair already exists in " "inv database"); } } else if (dberr != DB_NOTFOUND) { free(db_value.data); return db_error_to_rast_error(dberr); } free(db_value.data); } } return RAST_OK; } static rast_error_t * register_property_index(rast_local_db_t *db, rast_doc_id_t doc_id, rast_property_t *property, rast_property_index_t *property_index, rast_value_t *value, apr_pool_t *pool) { rast_uint_t fixed_number; rast_doc_id_t fixed_doc_id; int dberr; rast_error_t *error; if (property->flags & RAST_PROPERTY_FLAG_SEARCH) { DBT db_key = { 0 }, db_value = { 0 }; u_int32_t flags; int property_nbytes; switch (property->type) { case RAST_TYPE_STRING: db_key.data = (char *) rast_value_string(value); db_key.size = strlen(rast_value_string(value)); break; case RAST_TYPE_DATE: db_key.data = (char *) rast_value_date(value); db_key.size = strlen(rast_value_date(value)); break; case RAST_TYPE_DATETIME: db_key.data = (char *) rast_value_datetime(value); property_nbytes = strlen(rast_value_datetime(value)); if (property_nbytes > DATETIME_VALUE_SIZE) { property_nbytes = DATETIME_VALUE_SIZE; } db_key.size = property_nbytes; break; case RAST_TYPE_UINT: fixed_number = rast_fix_byte_order(rast_value_uint(value), db->is_native); db_key.data = (char *) &fixed_number; db_key.size = sizeof(rast_uint_t); break; } fixed_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_value.data = (char *) &fixed_doc_id; db_value.size = sizeof(rast_doc_id_t); if (property->flags & RAST_PROPERTY_FLAG_UNIQUE) { flags = DB_NOOVERWRITE; } else { flags = 0; } dberr = property_index->inv->put(property_index->inv, db->bdb_txn, &db_key, &db_value, flags); if (dberr != 0) { return db_error_to_rast_error(dberr); } } if (property->flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { const char *text = NULL; char *s; rast_size_t nbytes; switch (property->type) { case RAST_TYPE_STRING: text = rast_value_string(value); break; case RAST_TYPE_DATE: /* never reached */ break; case RAST_TYPE_DATETIME: /* never reached */ break; case RAST_TYPE_UINT: /* never reached */ break; } s = rast_normalize_text(db->encoding_module, text, strlen(text), &nbytes, pool); error = text_index_register(db, property_index->text, doc_id, s, nbytes, pool); if (error != RAST_OK) { return error; } } return RAST_OK; } static rast_error_t * register_property_indices(rast_local_db_t *db, rast_doc_id_t doc_id, rast_value_t *properties, apr_pool_t *pool) { int i; rast_error_t *error; error = registerable_property_index(db, doc_id, properties, pool); if (error != RAST_OK) { return error; } for (i = 0; i < db->num_properties; i++) { error = register_property_index(db, doc_id, db->properties + i, db->property_indices + i, properties + i, pool); if (error != RAST_OK) { return error; } } return RAST_OK; } static rast_error_t * register_properties(rast_local_db_t *db, rast_doc_id_t doc_id, rast_size_t num_chars, rast_value_t *properties, apr_pool_t *pool) { int dberr; char *data; char *delete_flag; DBT db_key = { 0 }, db_value = { 0 }; rast_size_t data_nbytes; rast_doc_id_t db_byte_order_doc_id; data_nbytes = 1 + sizeof(rast_size_t); data_nbytes += pack_properties_length(db, properties); data = (char *) apr_palloc(pool, data_nbytes); delete_flag = data; *delete_flag = 0; *(rast_size_t *) (data + 1) = rast_fix_byte_order(num_chars, db->is_native); pack_properties(db, properties, data + 1 + sizeof(rast_size_t)); db_byte_order_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_key.data = (char *) &db_byte_order_doc_id; db_key.size = sizeof(rast_doc_id_t); db_value.data = (char *) data; db_value.size = data_nbytes; dberr = db->properties_db->put(db->properties_db, db->bdb_txn, &db_key, &db_value, 0); return db_error_to_rast_error(dberr); } static rast_error_t * change_doc_info(rast_local_db_t *db, rast_doc_id_t doc_id_difference, rast_size_t num_docs_difference, apr_pool_t *pool) { apr_file_t *file; const char *filename; apr_status_t status; rast_error_t *error; rast_doc_id_t doc_id; rast_size_t num_docs; apr_off_t offset; filename = apr_pstrcat(pool, db->path, "/doc_info", NULL); error = open_locked_file(filename, APR_FLOCK_EXCLUSIVE, APR_READ | APR_WRITE, &file, pool); if (error != RAST_OK) { return error; } if ((error = read_number(file, &doc_id, db->is_native)) != RAST_OK || (error = read_number(file, &num_docs, db->is_native)) != RAST_OK) { goto error; } offset = 0; status = apr_file_seek(file, APR_SET, &offset); if (status != APR_SUCCESS) { error = apr_status_to_rast_error(status); goto error; } if ((error = write_number(file, doc_id + doc_id_difference, db->is_native)) != RAST_OK) { goto error; } error = write_number(file, num_docs + num_docs_difference, db->is_native); error: apr_file_unlock(file); apr_file_close(file); return error; } rast_error_t * rast_local_db_register(rast_db_t *base, const char *text, rast_size_t nbytes, rast_value_t *properties, rast_doc_id_t *new_doc_id) { rast_local_db_t *db = (rast_local_db_t *) base; apr_pool_t *sub_pool; rast_size_t num_chars; rast_error_t *error; rast_doc_id_t doc_id; if (db->flags & RAST_DB_RDONLY) { return rast_error(RAST_ERROR_BAD_DB, "can't register to read-only db"); } #if 0 /* todo: for bdb transaction */ if (db->bdb_txn == NULL) { int dberr; dberr = db->bdb_env->txn_begin(db->bdb_env, NULL, &db->bdb_txn, DB_TXN_SYNC); if (dberr != 0) { return db_error_to_rast_error(dberr); } } #endif apr_pool_create(&sub_pool, db->base.pool); error = get_max_doc_id(db, &doc_id, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } apr_pool_clear(sub_pool); doc_id++; error = register_property_indices(db, doc_id, properties, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } apr_pool_clear(sub_pool); error = register_text(db, doc_id, text, nbytes, properties, db->encoding_module, &num_chars, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } apr_pool_clear(sub_pool); error = register_properties(db, doc_id, num_chars, properties, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } apr_pool_clear(sub_pool); error = change_doc_info(db, +1, +1, sub_pool); apr_pool_destroy(sub_pool); if (new_doc_id != NULL) { *new_doc_id = doc_id; } if (db->registered_chars >= db->sync_threshold_chars) { return rast_local_db_sync(base); } return error; } static rast_error_t * register_deleted_doc_id(rast_local_db_t *db, rast_doc_id_t doc_id) { int dberr; DBT db_key = { 0 }, db_value = { 0 }; rast_doc_id_t db_byte_order_doc_id; char buf[1 + sizeof(rast_size_t)]; char *delete_flag = buf; rast_size_t *num_chars = (rast_size_t *) (buf + 1); *delete_flag = 1; *num_chars = 0; db_byte_order_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_key.data = (char *) &db_byte_order_doc_id; db_key.size = sizeof(rast_doc_id_t); db_value.data = buf; db_value.size = sizeof(buf); dberr = db->properties_db->put(db->properties_db, db->bdb_txn, &db_key, &db_value, 0); return db_error_to_rast_error(dberr); } static rast_error_t *local_document_add_text(rast_document_t *doc, const char *text, int nbytes); static rast_error_t *local_document_set_property(rast_document_t *doc, const char *name, const rast_value_t *value); static rast_error_t *local_document_commit(rast_document_t *doc); static rast_error_t *local_document_abort(rast_document_t *doc); static rast_error_t *local_document_get_doc_id(rast_document_t *doc, rast_doc_id_t *doc_id); rast_error_t * rast_local_db_create_document(rast_db_t *base, rast_document_t **result) { const static rast_document_t default_base = { NULL, NULL, local_document_add_text, local_document_set_property, local_document_commit, local_document_abort, local_document_get_doc_id, }; rast_local_db_t *db = (rast_local_db_t *) base; apr_pool_t *pool, *sub_pool; rast_doc_id_t doc_id; rast_local_document_t *doc; rast_text_indexer_t *indexer; rast_error_t *error; if (db->flags & RAST_DB_RDONLY) { return rast_error(RAST_ERROR_BAD_DB, "can't register to read-only db"); } apr_pool_create(&pool, db->base.pool); apr_pool_create(&sub_pool, pool); error = get_max_doc_id(db, &doc_id, sub_pool); if (error != RAST_OK) { apr_pool_destroy(pool); return error; } apr_pool_clear(sub_pool); doc_id++; error = change_doc_info(db, +1, 0, sub_pool); if (error != RAST_OK) { apr_pool_destroy(pool); return error; } apr_pool_destroy(sub_pool); error = register_deleted_doc_id(db, doc_id); if (error != RAST_OK) { apr_pool_destroy(pool); return error; } error = rast_text_index_register(db->text_index, doc_id, &indexer, pool); if (error != RAST_OK) { apr_pool_destroy(pool); return error; } doc = (rast_local_document_t *) apr_palloc(pool, sizeof(rast_local_document_t)); doc->base = default_base; doc->base.pool = pool; doc->base.db = base; doc->doc_id = doc_id; doc->nchars = 0; doc->indexer = indexer; doc->property_values = apr_hash_make(doc->base.pool); *result = (rast_document_t *) doc; return RAST_OK; } static int get_property_number(rast_local_db_t *db, const char *name) { int i; for (i = 0; i < db->num_properties; i++) { if (strcmp(db->properties[i].name, name) == 0) { return i; } } return NOT_FOUND; } static rast_error_t * get_required_property_indices(rast_local_db_t *db, rast_search_option_t *options, int **result, apr_pool_t *pool) { int i; if (options->num_properties == 0) { return RAST_OK; } if (options->num_properties > 0) { *result = (int *) apr_palloc(pool, sizeof(int) * db->num_properties); for (i = 0; i < db->num_properties; i++) { (*result)[i] = NOT_REQUIRED; } for (i = 0; i < options->num_properties; i++) { int number = get_property_number(db, options->properties[i]); if (number == NOT_FOUND) { return rast_error(RAST_ERROR_INVALID_QUERY, "no such property: %s", options->properties[i]); } (*result)[number] = i; } } return RAST_OK; } static rast_error_t * get_sort_property_indices(rast_local_db_t *db, rast_search_option_t *options, int **result, apr_pool_t *pool) { int number; int i; if (options->sort_method != RAST_SORT_METHOD_PROPERTY) { return RAST_OK; } if (options->sort_property == NULL) { return rast_error(RAST_ERROR_INVALID_QUERY, "no set sorted property"); } *result = (int *) apr_palloc(pool, sizeof(int) * db->num_properties); for (i = 0; i < db->num_properties; i++) { (*result)[i] = NOT_REQUIRED; } number = get_property_number(db, options->sort_property); if (number == NOT_FOUND) { return rast_error(RAST_ERROR_INVALID_QUERY, "no such property: %s", options->sort_property); } (*result)[number] = 0; return RAST_OK; } typedef struct { rast_local_db_t *db; rast_doc_id_t doc_id; rast_term_frequency_ring_t *terms; int score; int deleted; rast_size_t num_chars; const char *properties_data; int properties_data_nbytes; rast_value_t *sort_property; } doc_data_t; static rast_error_t * get_document(rast_local_db_t *db, DBC *cursor, DBT *db_key, DBT *db_value, rast_doc_id_t doc_id, rast_doc_id_t prev_doc_id, doc_data_t **doc_data, apr_pool_t *pool) { rast_doc_id_t id; const char *p; int dberr; if (prev_doc_id == 0 || doc_id != prev_doc_id + 1) { id = rast_fix_byte_order(doc_id, db->is_native); db_key->data = &id; dberr = cursor->c_get(cursor, db_key, db_value, DB_SET); if (dberr != 0) { return db_error_to_rast_error(dberr); } } else { dberr = cursor->c_get(cursor, db_key, db_value, DB_NEXT); if (dberr != 0) { return db_error_to_rast_error(dberr); } id = rast_fix_byte_order(*(rast_doc_id_t *) db_key->data, db->is_native); if (id != doc_id) { free(db_value->data); return rast_error(RAST_ERROR_BAD_DB, "doc_id not found in properties.db: %d", doc_id); } } *doc_data = (doc_data_t *) apr_palloc(pool, sizeof(doc_data_t)); (*doc_data)->db = db; (*doc_data)->doc_id = doc_id; (*doc_data)->score = 0; p = (const char *) db_value->data; (*doc_data)->deleted = (int) *p; p++; (*doc_data)->num_chars = rast_fix_byte_order(*((rast_size_t *) p), db->is_native); p += sizeof(rast_size_t); (*doc_data)->properties_data_nbytes = db_value->size - (p - (char *) db_value->data); (*doc_data)->properties_data = apr_pmemdup(pool, p, (*doc_data)->properties_data_nbytes); free(db_value->data); return RAST_OK; } static rast_error_t * get_properties(doc_data_t *doc, int *required_indices, int num_properties, rast_value_t **result, apr_pool_t *pool) { const char *p, *p_end; int i, index; rast_size_t nbytes; p = doc->properties_data; p_end = p + doc->properties_data_nbytes; *result = (rast_value_t *) apr_palloc(pool, sizeof(rast_value_t) * num_properties); for (i = 0; i < doc->db->num_properties; i++) { index = required_indices[i]; if (index != -1) { if (doc->db->properties[i].flags & RAST_PROPERTY_FLAG_OMIT) { return rast_error(RAST_ERROR_INVALID_ARGUMENT, "%s is omit property", doc->db->properties[i].name); } (*result)[index].type = doc->db->properties[i].type; } if (doc->db->properties[i].flags & RAST_PROPERTY_FLAG_OMIT) { continue; } switch (doc->db->properties[i].type) { case RAST_TYPE_STRING: case RAST_TYPE_DATE: case RAST_TYPE_DATETIME: if (p + sizeof(rast_size_t) > p_end) { return rast_error(RAST_ERROR_BAD_DB, "broken properties.db"); } nbytes = rast_fix_byte_order(*(rast_size_t *) p, doc->db->is_native); p += sizeof(rast_size_t); if (p + nbytes > p_end) { return rast_error(RAST_ERROR_BAD_DB, "broken properties.db"); } if (index != -1) { rast_value_set_string((*result) + index, apr_pstrndup(pool, p, nbytes)); } p += nbytes; break; case RAST_TYPE_UINT: if (p + sizeof(rast_uint_t) > p_end) { return rast_error(RAST_ERROR_BAD_DB, "broken properties.db"); } if (index != -1) { rast_value_set_uint((*result) + index, *(rast_uint_t *) p); } p += sizeof(rast_uint_t); break; } } return RAST_OK; } typedef struct idf_t { double value; APR_RING_ENTRY(idf_t) link; } idf_t; APR_RING_HEAD(idf_ring_t, idf_t); typedef struct idf_ring_t idf_ring_t; static rast_error_t * create_idf_list(rast_local_db_t *db, rast_query_result_t *query_result, int all_num_docs, idf_ring_t **result, int *result_len, apr_pool_t *pool) { idf_ring_t *idf_list; idf_t *idf; rast_term_t *term; int n; idf_list = (idf_ring_t *) apr_palloc(pool, sizeof(idf_ring_t)); APR_RING_INIT(idf_list, idf_t, link); n = 0; for (term = APR_RING_FIRST(&query_result->terms); term != APR_RING_SENTINEL(&query_result->terms, rast_term_t, link); term = APR_RING_NEXT(term, link)) { idf = (idf_t *) apr_palloc(pool, sizeof(idf_t)); idf->value = log10((double) all_num_docs / term->doc_count) + 1; APR_RING_INSERT_TAIL(idf_list, idf, idf_t, link); n++; } *result = idf_list; *result_len = n; return RAST_OK; } static rast_error_t * create_idf_list_with_receive_terms(rast_local_db_t *db, int *terms, int num_terms, int all_num_docs, idf_ring_t **result, apr_pool_t *pool) { idf_ring_t *idf_list; int i; idf_list = (idf_ring_t *) apr_palloc(pool, sizeof(idf_ring_t)); APR_RING_INIT(idf_list, idf_t, link); for (i = 0; i < num_terms; i++) { idf_t *idf; idf = (idf_t *) apr_palloc(pool, sizeof(idf_t)); idf->value = log10((double) all_num_docs / terms[i]) + 1; APR_RING_INSERT_TAIL(idf_list, idf, idf_t, link); } *result = idf_list; return RAST_OK; } static rast_result_term_t * create_result_terms(apr_pool_t *pool, rast_query_result_t *query_result, int num_terms) { rast_result_term_t *result_terms; rast_term_t *term; int i; result_terms = (rast_result_term_t *) apr_palloc(pool, sizeof(rast_result_term_t) * num_terms); i = 0; for (term = APR_RING_FIRST(&query_result->terms); term != APR_RING_SENTINEL(&query_result->terms, rast_term_t, link); term = APR_RING_NEXT(term, link)) { result_terms[i].term = apr_pstrdup(pool, term->term); result_terms[i].doc_count = term->doc_count; i++; } return result_terms; } static void calc_score(doc_data_t *doc, rast_candidate_t *candidate, idf_ring_t *idf_list) { rast_term_frequency_t *tf; idf_t *idf; double score = 0.0; idf = APR_RING_FIRST(idf_list); for (tf = APR_RING_FIRST(&candidate->terms); tf != APR_RING_SENTINEL(&candidate->terms, rast_term_frequency_t, link); tf = APR_RING_NEXT(tf, link)) { score += ((double) tf->count / doc->num_chars) * idf->value; idf = APR_RING_NEXT(idf, link); } doc->score = (int) (score * 10000000); } static rast_error_t * create_documents(rast_local_db_t *db, rast_query_result_t *query_result, idf_ring_t *idf_list, int *sort_property_indices, doc_data_t ***docs, int *num_docs, apr_pool_t *pool) { rast_error_t *error; rast_candidate_t *candidate; doc_data_t *doc; DBC *cursor; int i, dberr; DBT db_key = { 0 }, db_value = { 0 }; rast_doc_id_t prev_doc_id; *num_docs = 0; for (candidate = APR_RING_FIRST(&query_result->candidates); candidate != APR_RING_SENTINEL(&query_result->candidates, rast_candidate_t, link); candidate = APR_RING_NEXT(candidate, link)) { (*num_docs)++; } *docs = (doc_data_t **) apr_palloc(pool, sizeof(doc_data_t *) * *num_docs); candidate = APR_RING_FIRST(&query_result->candidates); i = 0; dberr = db->properties_db->cursor(db->properties_db, db->bdb_txn, &cursor, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } db_key.data = NULL; db_key.size = sizeof(rast_doc_id_t); db_value.flags |= DB_DBT_MALLOC; prev_doc_id = 0; while (candidate != APR_RING_SENTINEL(&query_result->candidates, rast_candidate_t, link)) { error = get_document(db, cursor, &db_key, &db_value, candidate->doc_id, prev_doc_id, &doc, pool); if (error != RAST_OK) { cursor->c_close(cursor); return error; } prev_doc_id = candidate->doc_id; if (doc->deleted) { (*num_docs)--; } else { doc->terms = &candidate->terms; calc_score(doc, candidate, idf_list); if (sort_property_indices != NULL) { error = get_properties(doc, sort_property_indices, 1, &doc->sort_property, pool); if (error != RAST_OK) { cursor->c_close(cursor); return error; } } (*docs)[i] = doc; i++; } candidate = APR_RING_NEXT(candidate, link); } cursor->c_close(cursor); return RAST_OK; } static int get_start_pos(rast_size_t src_nchars, rast_pos_t pos, rast_size_t term_nchars, rast_size_t summary_nchars) { int end_pos; if (pos > src_nchars) { return 0; } end_pos = pos + term_nchars / 2 + summary_nchars / 2; if (end_pos > src_nchars) { return src_nchars - summary_nchars; } return pos + (term_nchars + term_nchars % 2) / 2 - (summary_nchars + summary_nchars % 2) / 2; } static rast_error_t * pass_chars(rast_tokenizer_t *tokenizer, int count) { rast_error_t *error; int i; for (i = 0; i < count && !rast_char_tokenizer_is_done(tokenizer); i++) { if ((error = rast_char_tokenizer_next(tokenizer)) != RAST_OK) { return error; } } return RAST_OK; } static rast_error_t * get_summary(apr_pool_t *pool, rast_encoding_module_t *encoding_module, const char *src_text, rast_size_t src_nbytes, rast_size_t src_nchars, rast_pos_t pos, rast_size_t term_nchars, rast_size_t summary_nchars, char **dst_text, rast_size_t *dst_nbytes) { rast_tokenizer_t *tokenizer; int start_pos; rast_error_t *error; rast_char_t start_ch, end_ch; start_pos = get_start_pos(src_nchars, pos, term_nchars, summary_nchars); tokenizer = rast_char_tokenizer_create(pool, encoding_module, src_text, src_nbytes); if ((error = pass_chars(tokenizer, start_pos)) != RAST_OK) { return error; } rast_char_tokenizer_get_current(tokenizer, &start_ch); if ((error = pass_chars(tokenizer, summary_nchars)) != RAST_OK) { return error; } rast_char_tokenizer_get_current(tokenizer, &end_ch); *dst_nbytes = end_ch.ptr - start_ch.ptr; *dst_text = (char *) apr_pmemdup(pool, start_ch.ptr, *dst_nbytes); return RAST_OK; } static rast_error_t * create_result_items(doc_data_t **docs, int num_docs, rast_search_option_t *options, int *required_indices, rast_result_term_t *terms, rast_result_item_t ***result, int *result_len, apr_pool_t *pool) { rast_error_t *error; rast_result_item_t **items; int i; apr_pool_t *sub_pool; rast_size_t term_nchars = 0; if (options->need_summary && terms != NULL && num_docs > 0) { term_nchars = rast_count_chars(docs[0]->db->encoding_module, terms[0].term, strlen(terms[0].term), pool); } apr_pool_create(&sub_pool, pool); items = (rast_result_item_t **) apr_palloc(pool, sizeof(rast_result_item_t *) * num_docs); for (i = 0; i < num_docs; i++) { items[i] = (rast_result_item_t *) apr_palloc(pool, sizeof(rast_result_item_t)); items[i]->db_index = 0; items[i]->doc_id = docs[i]->doc_id; items[i]->score = docs[i]->score; if (options->need_summary) { rast_size_t summary_nbytes; char *summary; rast_encoding_module_t *encoding_module; rast_term_frequency_t *tf; error = rast_local_db_get_text((rast_db_t *) docs[i]->db, docs[i]->doc_id, &summary, &summary_nbytes, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } encoding_module = docs[i]->db->encoding_module; tf = APR_RING_FIRST(docs[i]->terms); if (tf == APR_RING_SENTINEL(&docs[i]->terms, rast_term_frequency_t, link)) { error = get_summary(pool, encoding_module, summary, summary_nbytes, docs[i]->num_chars, 0, term_nchars, options->summary_nchars, &items[i]->summary, &items[i]->summary_nbytes); } else { error = get_summary(pool, encoding_module, summary, summary_nbytes, docs[i]->num_chars, tf->pos, term_nchars, options->summary_nchars, &items[i]->summary, &items[i]->summary_nbytes); } if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } apr_pool_clear(sub_pool); } else { items[i]->summary = NULL; items[i]->summary_nbytes = 0; } if (options->num_properties > 0) { error = get_properties(docs[i], required_indices, options->num_properties, &items[i]->properties, pool); if (error != RAST_OK) { return error; } } else { items[i]->properties = NULL; } } apr_pool_destroy(sub_pool); *result = items; *result_len = num_docs; return RAST_OK; } static int score_ascending_sort_compare_func(const void *v1, const void *v2) { doc_data_t *doc1 = *(doc_data_t **) v1; doc_data_t *doc2 = *(doc_data_t **) v2; return doc1->score - doc2->score; } static int score_descending_sort_compare_func(const void *v1, const void *v2) { return score_ascending_sort_compare_func(v2, v1); } static int property_string_ascending_sort_compare_func(const void *v1, const void *v2) { doc_data_t *doc1 = *(doc_data_t **) v1; doc_data_t *doc2 = *(doc_data_t **) v2; if (doc1->sort_property->type == RAST_TYPE_UINT) { rast_uint_t n1 = rast_value_uint(doc1->sort_property); rast_uint_t n2 = rast_value_uint(doc2->sort_property); if (n1 < n2) { return -1; } else if (n1 == n2) { return 0; } else { return 1; } } else { return strcmp(rast_value_string(doc1->sort_property), rast_value_string(doc2->sort_property)); } } static int property_string_descending_sort_compare_func(const void *v1, const void *v2) { return property_string_ascending_sort_compare_func(v2, v1); } static rast_error_t * create_result(rast_local_db_t *db, rast_query_result_t *query_result, rast_search_option_t *options, int *required_property_indices, int *sort_property_indices, rast_result_t **result, apr_pool_t *pool, apr_pool_t *sub_pool) { rast_error_t *error; int num_terms; idf_ring_t *idf_list; doc_data_t **docs; int num_docs, all_num_docs; int (*sort_func)(const void *, const void *); *result = (rast_result_t *) apr_palloc(pool, sizeof(rast_result_t)); (*result)->num_indices = 1; error = get_num_docs(db, &all_num_docs, pool); if (error != RAST_OK) { return error; } (*result)->num_docs = all_num_docs; if (options->all_num_docs == RAST_CALC_SCORE_STANDALONE) { error = create_idf_list(db, query_result, all_num_docs, &idf_list, &num_terms, sub_pool); } else { error = create_idf_list_with_receive_terms(db, options->terms, options->num_terms, options->all_num_docs, &idf_list, sub_pool); num_terms = options->num_terms; } if (error != RAST_OK) { return error; } if (num_terms > 0) { (*result)->terms = create_result_terms(pool, query_result, num_terms); } else { (*result)->terms = NULL; } (*result)->num_terms = num_terms; if (APR_RING_EMPTY(&query_result->candidates, rast_candidate_t, link)) { (*result)->hit_count = 0; (*result)->items = NULL; (*result)->num_items = 0; return RAST_OK; } error = create_documents(db, query_result, idf_list, sort_property_indices, &docs, &num_docs, sub_pool); if (error != RAST_OK) { return error; } (*result)->hit_count = num_docs; if (options->sort_method == RAST_SORT_METHOD_SCORE) { if (options->sort_order == RAST_SORT_ORDER_ASCENDING) { sort_func = score_ascending_sort_compare_func; } else { sort_func = score_descending_sort_compare_func; } } else { if (options->sort_order == RAST_SORT_ORDER_DESCENDING) { sort_func = property_string_descending_sort_compare_func; } else { sort_func = property_string_ascending_sort_compare_func; } } qsort(docs, num_docs, sizeof(doc_data_t *), sort_func); if (num_docs < options->start_no) { num_docs = 0; } else { docs += options->start_no; num_docs -= options->start_no; if (options->num_items != RAST_RESULT_ALL_ITEMS && num_docs > options->num_items) { num_docs = options->num_items; } } error = create_result_items(docs, num_docs, options, required_property_indices, (*result)->terms, &(*result)->items, &(*result)->num_items, pool); return error; } rast_error_t * rast_local_db_search(rast_db_t *base, const char *query_string, rast_search_option_t *options, rast_result_t **result, apr_pool_t *pool) { rast_local_db_t *db = (rast_local_db_t *) base; rast_error_t *error; rast_query_t *query; rast_query_result_t *query_result; apr_pool_t *sub_pool; int *required_property_indices, *sort_property_indices; rast_query_option_t query_options; apr_pool_create(&sub_pool, pool); error = get_required_property_indices(db, options, &required_property_indices, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } sort_property_indices = NULL; error = get_sort_property_indices(db, options, &sort_property_indices, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = rast_parse_query(sub_pool, db->encoding_module, query_string, &query); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = rast_query_optimize(query, &query, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } query_options.score_method = options->score_method; error = rast_query_exec(query, db, &query_options, &query_result, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = create_result(db, query_result, options, required_property_indices, sort_property_indices, result, pool, sub_pool); apr_pool_destroy(sub_pool); return error; } rast_error_t * rast_local_db_delete(rast_db_t *base, rast_doc_id_t doc_id) { rast_local_db_t *db = (rast_local_db_t *) base; DBT db_key = { 0 }, db_value = { 0 }; rast_doc_id_t db_byte_order_doc_id; char *delete_flag, *p, *p_end; int dberr, i; rast_size_t nbytes; apr_pool_t *pool; rast_error_t *error; db_byte_order_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_key.data = (char *) &db_byte_order_doc_id; db_key.size = sizeof(rast_doc_id_t); db_value.flags |= DB_DBT_MALLOC; dberr = db->properties_db->get(db->properties_db, db->bdb_txn, &db_key, &db_value, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } p = (char *) db_value.data; p_end = p + db_value.size; p += 1 + sizeof(rast_size_t); for (i = 0; i < db->num_properties; i++) { DBT inv_key = { 0 }, inv_value = { 0 }; DB *inv = db->property_indices[i].inv; if (db->properties[i].flags & RAST_PROPERTY_FLAG_OMIT) { continue; } inv_key.flags |= DB_DBT_MALLOC; inv_value.flags |= DB_DBT_MALLOC; switch (db->properties[i].type) { case RAST_TYPE_STRING: case RAST_TYPE_DATE: case RAST_TYPE_DATETIME: if (p + sizeof(rast_size_t) > p_end) { return rast_error(RAST_ERROR_BAD_DB, "broken properties.db"); } nbytes = rast_fix_byte_order(*(rast_size_t *) p, db->is_native); p += sizeof(rast_size_t); if (p + nbytes > p_end) { return rast_error(RAST_ERROR_BAD_DB, "broken properties.db"); } inv_key.data = p; inv_key.size = nbytes; p += nbytes; break; case RAST_TYPE_UINT: if (p + sizeof(rast_uint_t) > p_end) { return rast_error(RAST_ERROR_BAD_DB, "broken properties.db"); } inv_key.data = p; inv_key.size = sizeof(rast_uint_t); p += sizeof(rast_uint_t); break; } if (db->properties[i].flags & RAST_PROPERTY_FLAG_UNIQUE) { dberr = inv->del(inv, db->bdb_txn, &inv_key, 0); } if (dberr != 0) { return db_error_to_rast_error(dberr); } } delete_flag = (char *) db_value.data; *delete_flag = 1; db_value.size = 1 + sizeof(rast_size_t); dberr = db->properties_db->put(db->properties_db, db->bdb_txn, &db_key, &db_value, 0); free(db_value.data); if (dberr != 0) { db_error_to_rast_error(dberr); } apr_pool_create(&pool, base->pool); error = change_doc_info(db, 0, -1, pool); apr_pool_destroy(pool); return error; } rast_error_t * rast_local_db_update(rast_db_t *db, rast_doc_id_t doc_id, const char *text, rast_size_t nbytes, rast_value_t *properties, rast_doc_id_t *new_doc_id) { rast_error_t *error; error = rast_local_db_delete(db, doc_id); if (error != RAST_OK) { return error; } return rast_local_db_register(db, text, nbytes, properties, new_doc_id); } rast_error_t * rast_local_db_get_text(rast_db_t *base, rast_doc_id_t doc_id, char **s, rast_size_t *nbytes, apr_pool_t *pool) { rast_local_db_t *db = (rast_local_db_t *) base; DBT db_key = { 0 }, db_value = { 0 }; rast_uint_t fixed_doc_id; int dberr; if (!db->preserve_text) { *s = NULL; *nbytes = 0; return RAST_OK; } fixed_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_key.data = &fixed_doc_id; db_key.size = sizeof(rast_uint_t); db_value.flags |= DB_DBT_MALLOC; dberr = db->text_db->get(db->text_db, db->bdb_txn, &db_key, &db_value, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } *s = apr_pmemdup(pool, db_value.data, db_value.size); *nbytes = db_value.size; free(db_value.data); return RAST_OK; } static rast_error_t * create_non_inverted_indices(rast_local_db_t *db, rast_doc_id_t new_doc_id, DBT *db_key, DBT *db_value, DB *new_text_db, DB *new_properties_db, apr_hash_t *doc_id_table, apr_pool_t *pool) { DBT text_db_value = { 0 }; rast_doc_id_t old_doc_id; int dberr; if (db->preserve_text) { text_db_value.flags |= DB_DBT_MALLOC; dberr = db->text_db->get(db->text_db, db->bdb_txn, db_key, &text_db_value, 0); if (dberr != 0) { return db_error_to_rast_error(dberr); } } old_doc_id = rast_fix_byte_order(*(rast_doc_id_t *) db_key->data, db->is_native); apr_hash_set(doc_id_table, apr_pmemdup(pool, &old_doc_id, sizeof(rast_doc_id_t)), sizeof(rast_doc_id_t), apr_pmemdup(pool, &new_doc_id, sizeof(rast_doc_id_t))); *(rast_doc_id_t *) db_key->data = rast_fix_byte_order(new_doc_id, db->is_native); dberr = new_properties_db->put(new_properties_db, db->bdb_txn, db_key, db_value, 0); if (dberr != 0) { free(text_db_value.data); return db_error_to_rast_error(dberr); } if (db->preserve_text) { dberr = new_text_db->put(new_text_db, db->bdb_txn, db_key, &text_db_value, 0); free(text_db_value.data); if (dberr != 0) { return db_error_to_rast_error(dberr); } } return RAST_OK; } static rast_error_t * create_optimized_property_indices(rast_local_db_t *db, apr_hash_t *doc_id_table, int lorder, apr_pool_t *pool) { int i, dberr; char *filename; DB *inv, *new_inv; DBC *cursor; DBT db_key = { 0 }, db_value = { 0 }, new_db_value = { 0 }; int (*compare_func)(DB *, const DBT *, const DBT *); rast_error_t *error = RAST_OK; db_key.flags = DB_DBT_REALLOC; for (i = 0; i < db->num_properties; i++) { if (db->properties[i].flags & RAST_PROPERTY_FLAG_SEARCH) { filename = apr_pstrcat(pool, db->path, "/properties/new_", db->properties[i].name, ".inv", NULL); switch (db->properties[i].type) { case RAST_TYPE_STRING: case RAST_TYPE_DATE: case RAST_TYPE_DATETIME: compare_func = compare_string_keys; break; case RAST_TYPE_UINT: compare_func = compare_uint_keys; break; } error = bdb_open(db->bdb_env, db->bdb_txn, filename, DB_CREATE | DB_EXCL, DB_BTREE, compare_func, &new_inv); if (error != RAST_OK) { break; } inv = db->property_indices[i].inv; dberr = inv->cursor(inv, db->bdb_txn, &cursor, 0); if (dberr != 0) { error = db_error_to_rast_error(dberr); break; } dberr = cursor->c_get(cursor, &db_key, &db_value, DB_FIRST); if (dberr != 0 && dberr != DB_NOTFOUND) { cursor->c_close(cursor); error = db_error_to_rast_error(dberr); break; } while (dberr == 0) { rast_doc_id_t db_byte_order_doc_id, old_doc_id, *new_doc_id; db_byte_order_doc_id = *(rast_doc_id_t *) db_value.data; old_doc_id = rast_fix_byte_order(db_byte_order_doc_id, db->is_native); new_doc_id = (rast_doc_id_t *) apr_hash_get(doc_id_table, &old_doc_id, sizeof(rast_doc_id_t)); if (new_doc_id != NULL) { db_byte_order_doc_id = rast_fix_byte_order(*new_doc_id, db->is_native); new_db_value.data = &db_byte_order_doc_id; new_db_value.size = sizeof(rast_doc_id_t); dberr = new_inv->put(new_inv, db->bdb_txn, &db_key, &new_db_value, 0); if (dberr != 0) { break; } } dberr = cursor->c_get(cursor, &db_key, &db_value, DB_NEXT); } cursor->c_close(cursor); new_inv->close(new_inv, 0); if (dberr != DB_NOTFOUND) { error = db_error_to_rast_error(dberr); break; } } if (db->properties[i].flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { filename = apr_pstrcat(pool, db->path, "/properties/new_", db->properties[i].name, NULL); error = rast_text_index_optimize(db->bdb_env, db->bdb_txn, lorder, db->property_indices[i].text, filename, doc_id_table); if (error != RAST_OK) { break; } } } if (db_key.data != NULL) { free(db_key.data); } return error; } static rast_error_t * create_optimized_db(rast_local_db_t *db, const rast_db_optimize_option_t *options, int lorder, apr_pool_t *pool) { apr_hash_t *doc_id_table; DBC *cursor; DB *new_properties_db, *new_text_db; const char *filename; int dberr; DBT db_key = { 0 }, db_value = { 0 }; rast_error_t *error; rast_doc_id_t new_doc_id; filename = apr_pstrcat(pool, db->path, "/new_properties.db", NULL); error = bdb_open(db->bdb_env, db->bdb_txn, filename, DB_CREATE | DB_EXCL, DB_BTREE, compare_uint_keys, &new_properties_db); if (error != RAST_OK) { return error; } if (db->preserve_text) { filename = apr_pstrcat(pool, db->path, "/new_text.db", NULL); error = bdb_open(db->bdb_env, db->bdb_txn, filename, DB_CREATE | DB_EXCL, DB_RECNO, NULL, &new_text_db); if (error != RAST_OK) { new_properties_db->close(new_properties_db, 0); return error; } } dberr = db->properties_db->cursor(db->properties_db, db->bdb_txn, &cursor, 0); if (dberr != 0) { if (db->preserve_text) { new_text_db->close(new_text_db, 0); } new_properties_db->close(new_properties_db, 0); return db_error_to_rast_error(dberr); } db_key.flags |= DB_DBT_REALLOC; db_value.flags |= DB_DBT_REALLOC; dberr = cursor->c_get(cursor, &db_key, &db_value, DB_FIRST); if (dberr != 0 && dberr != DB_NOTFOUND) { cursor->c_close(cursor); if (db->preserve_text) { new_text_db->close(new_text_db, 0); } new_properties_db->close(new_properties_db, 0); return db_error_to_rast_error(dberr); } new_doc_id = 0; doc_id_table = apr_hash_make(pool); while (dberr == 0) { int delete_flag; delete_flag = *(char *) db_value.data; if (!delete_flag) { if (options->squeeze_doc_id) { new_doc_id++; } else { rast_doc_id_t db_byte_order_doc_id; db_byte_order_doc_id = *(rast_doc_id_t *) db_key.data; new_doc_id = rast_fix_byte_order(db_byte_order_doc_id, db->is_native); } error = create_non_inverted_indices(db, new_doc_id, &db_key, &db_value, new_text_db, new_properties_db, doc_id_table, pool); if (error != RAST_OK) { free(db_value.data); free(db_key.data); cursor->c_close(cursor); new_text_db->close(new_text_db, 0); new_properties_db->close(new_properties_db, 0); return error; } } dberr = cursor->c_get(cursor, &db_key, &db_value, DB_NEXT); } if (db_value.data != NULL) { free(db_value.data); } if (db_key.data != NULL) { free(db_key.data); } cursor->c_close(cursor); new_properties_db->close(new_properties_db, 0); if (db->preserve_text) { new_text_db->close(new_text_db, 0); } if (dberr != DB_NOTFOUND) { return db_error_to_rast_error(dberr); } error = create_optimized_property_indices(db, doc_id_table, lorder, pool); if (error != RAST_OK) { return error; } filename = apr_pstrcat(pool, db->path, "/new_text", NULL); error = rast_text_index_optimize(db->bdb_env, db->bdb_txn, lorder, db->text_index, filename, doc_id_table); if (error != RAST_OK) { return error; } if (options->squeeze_doc_id) { return create_doc_info(pool, db->path, "new_doc_info", new_doc_id, db->is_native); } else { char *old_doc_info_path, *new_doc_info_path; apr_status_t status; old_doc_info_path = apr_pstrcat(pool, db->path, "/doc_info", NULL); new_doc_info_path = apr_pstrcat(pool, db->path, "/new_doc_info", NULL); status = apr_file_copy(old_doc_info_path, new_doc_info_path, APR_FILE_SOURCE_PERMS, pool); return apr_status_to_rast_error(status); } } static rast_error_t * rename_rast_file(const char *dirname, const char *name, const char *from_prefix, const char *to_prefix, apr_pool_t *pool) { apr_status_t status; status = apr_file_rename(apr_pstrcat(pool, dirname, "/", from_prefix, name, NULL), apr_pstrcat(pool, dirname, "/", to_prefix, name, NULL), pool); return apr_status_to_rast_error(status); } static rast_error_t * rename_text_index_files(const char *db_name, const char *name, const char *old_prefix, const char *new_prefix, apr_pool_t *pool) { const char *suffices[] = { ".ngm", ".pos", ".rng", ".pfl", NULL, }; const char **suffix; const char *basename; rast_error_t *error; for (suffix = suffices; *suffix != NULL; suffix++) { basename = apr_pstrcat(pool, name, *suffix, NULL); error = rename_rast_file(db_name, basename, old_prefix, new_prefix, pool); if (error != RAST_OK) { return error; } } return RAST_OK; } static rast_error_t * rename_rast_files(const char *db_name, rast_property_t *properties, int num_properties, int preserve_text, const char *old_prefix, const char *new_prefix, apr_pool_t *pool) { const char *names[] = { "properties.db", "doc_info", NULL }; int i; const char *properties_path, **name; rast_error_t *error; if (preserve_text) { error = rename_rast_file(db_name, "text.db", old_prefix, new_prefix, pool); if (error != RAST_OK) { return error; } } for (name = names; *name != NULL; name++) { error = rename_rast_file(db_name, *name, old_prefix, new_prefix, pool); if (error != RAST_OK) { return error; } } error = rename_text_index_files(db_name, "text", old_prefix, new_prefix, pool); if (error != RAST_OK) { return error; } properties_path = apr_pstrcat(pool, db_name, "/properties", NULL); for (i = 0; i < num_properties; i++) { if (properties[i].flags & RAST_PROPERTY_FLAG_SEARCH) { error = rename_rast_file(properties_path, apr_pstrcat(pool, properties[i].name, ".inv", NULL), old_prefix, new_prefix, pool); if (error != RAST_OK) { return error; } } if (properties[i].flags & RAST_PROPERTY_FLAG_TEXT_SEARCH) { error = rename_text_index_files(properties_path, properties[i].name, old_prefix, new_prefix, pool); if (error != RAST_OK) { return error; } } } return RAST_OK; } static rast_error_t * remove_files(const char *dirname, const char *prefix, apr_pool_t *pool) { apr_dir_t *dir; apr_finfo_t finfo; apr_status_t status; int prefix_len; status = apr_dir_open(&dir, dirname, pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } prefix_len = strlen(prefix); while (1) { status = apr_dir_read(&finfo, APR_FINFO_TYPE | APR_FINFO_NAME, dir); if (status == APR_ENOENT) { break; } if (status != APR_SUCCESS || finfo.filetype != APR_REG) { continue; } if (strncmp(finfo.name, prefix, prefix_len) == 0) { status = apr_file_remove(apr_pstrcat(pool, dirname, "/", finfo.name, NULL), pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } } } status = apr_dir_close(dir); return apr_status_to_rast_error(status); } static rast_error_t * replace_new_rast_files(const char *db_name, rast_property_t *properties, int num_properties, int preserve_text, apr_pool_t *pool) { rast_error_t *error; /* todo: must think critical case. */ error = rename_rast_files(db_name, properties, num_properties, preserve_text, "", "old_", pool); if (error != RAST_OK) { return error; } error = rename_rast_files(db_name, properties, num_properties, preserve_text, "new_", "", pool); if (error != RAST_OK) { return error; } error = remove_files(db_name, "old_", pool); if (error != RAST_OK) { return error; } error = remove_files(apr_pstrcat(pool, db_name, "/properties", NULL), "old_", pool); return error; } rast_error_t * rast_local_db_optimize(const char *name, const rast_db_optimize_option_t *options, apr_pool_t *pool) { rast_db_t *base; rast_local_db_t *db; int lorder, num_properties, i; rast_property_t *properties; rast_error_t *error; apr_pool_t *sub_pool; char *lock_filename; apr_file_t *lock_file; int preserve_text; int status; apr_pool_create(&sub_pool, pool); lock_filename = apr_pstrcat(sub_pool, name, "/lock", NULL); error = open_locked_file(lock_filename, APR_FLOCK_SHARED, APR_READ | APR_WRITE, &lock_file, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } error = rast_local_db_open(&base, name, RAST_DB_RDONLY, NULL, sub_pool); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } db = (rast_local_db_t *) base; if (rast_check_byte_order() == RAST_LITTLE_ENDIAN) { lorder = db->is_native ? 1234 : 4321; } else { lorder = db->is_native ? 4321 : 1234; } error = create_optimized_db(db, options, lorder, sub_pool); if (error != RAST_OK) { remove_files(name, "new_", sub_pool); remove_files(apr_pstrcat(sub_pool, name, "/properties", NULL), "new_", sub_pool); rast_local_db_close(base); apr_pool_destroy(sub_pool); return error; } num_properties = db->num_properties; properties = (rast_property_t *) apr_palloc(sub_pool, sizeof(rast_property_t) * num_properties); for (i = 0; i < num_properties; i++) { properties[i].name = apr_pstrdup(sub_pool, db->properties[i].name); properties[i].flags = db->properties[i].flags; } preserve_text = db->preserve_text; error = rast_local_db_close(base); if (error != RAST_OK) { apr_pool_destroy(sub_pool); return error; } status = apr_file_lock(lock_file, APR_FLOCK_EXCLUSIVE); if (status != APR_SUCCESS) { apr_pool_destroy(sub_pool); return apr_status_to_rast_error(status); } error = replace_new_rast_files(name, properties, num_properties, preserve_text, sub_pool); if (error != RAST_OK) { remove_files(name, "new_", sub_pool); remove_files(apr_pstrcat(sub_pool, name, "/properties", NULL), "new_", sub_pool); rename_rast_files(name, properties, num_properties, preserve_text, "old_", "", sub_pool); } apr_file_unlock(lock_file); apr_file_close(lock_file); apr_pool_destroy(sub_pool); return error; } /* todo: use bdb append api */ rast_error_t * add_summary_text(rast_local_db_t *db, rast_doc_id_t doc_id, const char *summary_text, rast_size_t summary_nbytes, apr_pool_t *pool) { DBT db_key = { 0 }, db_value = { 0 }; rast_doc_id_t fixed_doc_id; char *buf; rast_size_t buf_nbytes; int dberr; fixed_doc_id = rast_fix_byte_order(doc_id, db->is_native); db_key.data = &fixed_doc_id; db_key.size = sizeof(rast_uint_t); db_value.flags |= DB_DBT_MALLOC; dberr = db->text_db->get(db->text_db, db->bdb_txn, &db_key, &db_value, 0); if (dberr != 0 && dberr != DB_NOTFOUND) { return db_error_to_rast_error(dberr); } buf = (char *) apr_palloc(pool, db_value.size + summary_nbytes); memcpy(buf, db_value.data, db_value.size); memcpy(buf + db_value.size, summary_text, summary_nbytes); buf_nbytes = db_value.size + summary_nbytes; free(db_value.data); db_value.data = buf; db_value.size = buf_nbytes; db_value.flags = 0; dberr = db->text_db->put(db->text_db, db->bdb_txn, &db_key, &db_value, 0); return db_error_to_rast_error(dberr); } static rast_error_t * local_document_add_text(rast_document_t *base, const char *text, int nbytes) { rast_local_document_t *doc = (rast_local_document_t *) base; rast_local_db_t *db; char *summary_text, *index_text; rast_size_t summary_nbytes, index_text_nbytes, n; apr_pool_t *pool; rast_error_t *error = RAST_OK; apr_pool_create(&pool, doc->base.pool); db = (rast_local_db_t *) doc->base.db; db->encoding_module->normalize_text(pool, text, nbytes, &summary_text, &summary_nbytes); if (db->preserve_text) { error = add_summary_text(db, doc->doc_id, summary_text, summary_nbytes, pool); } apr_pool_destroy(pool); if (error != RAST_OK) { return error; } db->encoding_module->normalize_chars(doc->base.pool, summary_text, summary_nbytes, &index_text, &index_text_nbytes); error = rast_text_indexer_add(doc->indexer, index_text, index_text_nbytes, &n); if (error != RAST_OK) { return error; } db->registered_chars += n; doc->nchars += n; return RAST_OK; } static rast_error_t * local_document_set_property(rast_document_t *base, const char *name, const rast_value_t *value) { rast_local_document_t *doc = (rast_local_document_t *) base; rast_value_t *property_value; property_value = (rast_value_t *) apr_palloc(doc->base.pool, sizeof(rast_value_t)); rast_value_set_type(property_value, value->type); switch (value->type) { case RAST_TYPE_STRING: rast_value_set_string(property_value, apr_pstrdup(doc->base.pool, rast_value_string(value))); break; case RAST_TYPE_DATE: rast_value_set_date(property_value, apr_pstrdup(doc->base.pool, rast_value_date(value))); break; case RAST_TYPE_DATETIME: rast_value_set_datetime(property_value, apr_pstrdup(doc->base.pool, rast_value_datetime(value))); break; case RAST_TYPE_UINT: rast_value_set_uint(property_value, rast_value_uint(value)); break; } rast_value_set_type(property_value, value->type); apr_hash_set(doc->property_values, apr_pstrdup(doc->base.pool, name), strlen(name), property_value); return RAST_OK; } static rast_error_t * local_document_commit(rast_document_t *base) { rast_local_document_t *doc = (rast_local_document_t *) base; rast_local_db_t *db = (rast_local_db_t *) doc->base.db; rast_error_t *error; rast_value_t *property_values; error = local_document_add_text(base, NULL, 0); if (error != RAST_OK) { return error; } error = rast_apr_hash_to_rast_value_array(db->properties, db->num_properties, doc->property_values, &property_values, doc->base.pool); if (error != RAST_OK) { return error; } error = register_full_text_search_property(db, doc->doc_id, property_values, doc->indexer, db->encoding_module, doc->base.pool); if (error != RAST_OK) { return error; } error = rast_text_indexer_commit(doc->indexer); if (error != RAST_OK) { return error; } error = register_property_indices(db, doc->doc_id, property_values, doc->base.pool); if (error != RAST_OK) { return error; } error = register_properties(db, doc->doc_id, doc->nchars, property_values, doc->base.pool); if (error != RAST_OK) { return error; } error = change_doc_info(db, 0, +1, doc->base.pool); if (error != RAST_OK) { return error; } return local_document_abort((rast_document_t *) doc); } static rast_error_t * local_document_abort(rast_document_t *base) { rast_local_document_t *doc = (rast_local_document_t *) base; if (doc != NULL) { apr_pool_destroy(doc->base.pool); } return RAST_OK; } static rast_error_t * local_document_get_doc_id(rast_document_t *base, rast_doc_id_t *doc_id) { rast_local_document_t *doc = (rast_local_document_t *) base; if (doc == NULL) { return rast_error(RAST_ERROR_INVALID_ARGUMENT, NULL); } *doc_id = doc->doc_id; return RAST_OK; } /* vim: set filetype=c sw=4 expandtab : */