/* * Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd. * * This file is part of Rast. * See the file COPYING for redistribution information. * */ #include #include #include #include #include #include #include #include "rast/config.h" #include "rast/encoding.h" static apr_pool_t *encoding_module_pool; static apr_hash_t *encoding_modules; typedef struct { apr_dso_handle_t *dso_handle; rast_encoding_module_t *module; } encoding_module_info_t; rast_error_t * rast_load_encoding_modules(const char *dirname) { apr_status_t status; apr_dir_t *dir; apr_finfo_t finfo; int name_len, shrext_len = strlen(SHREXT); if (encoding_modules != NULL) { return rast_error(RAST_ERROR_GENERAL, "encoding modules are already loaded"); } apr_pool_create(&encoding_module_pool, rast_get_global_pool()); encoding_modules = apr_hash_make(encoding_module_pool); status = apr_dir_open(&dir, dirname, encoding_module_pool); if (status != APR_SUCCESS) { return apr_status_to_rast_error(status); } while (1) { char *path, *var_name; const char *module_name; apr_dso_handle_t *handle; apr_dso_handle_sym_t sym; encoding_module_info_t *module_info; status = apr_dir_read(&finfo, APR_FINFO_TYPE | APR_FINFO_NAME, dir); if (status == APR_ENOENT) { break; } if (status != APR_SUCCESS || finfo.filetype != APR_REG) { continue; } name_len = strlen(finfo.name); if (name_len <= shrext_len || strcmp(finfo.name + name_len - shrext_len, SHREXT) != 0) { continue; } path = apr_pstrcat(encoding_module_pool, dirname, "/", finfo.name, NULL); status = apr_dso_load(&handle, path, encoding_module_pool); if (status != APR_SUCCESS) { continue; } module_name = apr_pstrndup(encoding_module_pool, finfo.name, name_len - shrext_len); var_name = apr_pstrcat(encoding_module_pool, "rast_encoding_", module_name, NULL); status = apr_dso_sym(&sym, handle, var_name); if (status != APR_SUCCESS) { apr_dso_unload(handle); apr_dir_close(dir); return apr_status_to_rast_error(status); } module_info = (encoding_module_info_t *) apr_palloc(encoding_module_pool, sizeof(encoding_module_info_t)); module_info->dso_handle = handle; module_info->module = (rast_encoding_module_t *) sym; apr_hash_set(encoding_modules, module_name, strlen(module_name), module_info); } status = apr_dir_close(dir); return apr_status_to_rast_error(status); } rast_error_t * rast_unload_encoding_modules() { apr_hash_index_t *hi; apr_status_t status; rast_error_t *error = RAST_OK; for (hi = apr_hash_first(encoding_module_pool, encoding_modules); hi; hi = apr_hash_next(hi)) { const void *key; apr_ssize_t key_nbytes; void *val; encoding_module_info_t *module_info; apr_hash_this(hi, &key, &key_nbytes, &val); module_info = (encoding_module_info_t *) val; status = apr_dso_unload(module_info->dso_handle); if (status != APR_SUCCESS) { error = apr_status_to_rast_error(status); } } apr_pool_destroy(encoding_module_pool); encoding_module_pool = NULL; return error; } rast_error_t * rast_get_encoding_module(const char *name, rast_encoding_module_t **encoding_module) { encoding_module_info_t *module_info; if (encoding_modules == NULL) { return rast_error(RAST_ERROR_GENERAL, "encoding modules are not loaded yet"); } module_info = (encoding_module_info_t *) apr_hash_get(encoding_modules, name, strlen(name)); if (module_info == NULL) { return rast_error(RAST_ERROR_GENERAL, "not supported encoding: %s", name); } *encoding_module = module_info->module; return RAST_OK; } char * rast_normalize_text(rast_encoding_module_t *encoding_module, const char *s, rast_size_t nbytes, rast_size_t *new_nbytes, apr_pool_t *pool) { char *tmp, *res; rast_size_t tmp_nbytes, res_nbytes; apr_pool_t *sub_pool; apr_pool_create(&sub_pool, pool); encoding_module->normalize_text(sub_pool, s, nbytes, &tmp, &tmp_nbytes); encoding_module->normalize_chars(pool, tmp, tmp_nbytes, &res, &res_nbytes); apr_pool_destroy(sub_pool); if (new_nbytes != NULL) { *new_nbytes = res_nbytes; } return res; } static inline rast_tokenizer_t * tokenizer_create(apr_pool_t *pool, rast_encoding_module_t *encoding_module, const char *s, rast_size_t nbytes) { rast_tokenizer_t *tokenizer = (rast_tokenizer_t *) apr_palloc(pool, sizeof(rast_tokenizer_t)); tokenizer->encoding_module = encoding_module; tokenizer->pool = pool; tokenizer->ptr = s; tokenizer->ptr_end = s + nbytes; tokenizer->pos = 0; tokenizer->context = NULL; return tokenizer; } static inline rast_error_t * tokenizer_get_token(rast_tokenizer_t *tokenizer, rast_token_t *token) { rast_error_t *error; token->ptr = tokenizer->ptr; token->pos = tokenizer->pos; error = tokenizer->encoding_module->get_token(tokenizer, token); if (error != RAST_OK) { return error; } return RAST_OK; } static inline int tokenizer_is_done(rast_tokenizer_t *tokenizer) { return tokenizer->ptr >= tokenizer->ptr_end; } rast_tokenizer_t * rast_char_tokenizer_create(apr_pool_t *pool, rast_encoding_module_t *encoding_module, const char *s, rast_size_t nbytes) { return tokenizer_create(pool, encoding_module, s, nbytes); } static rast_error_t * get_char_len(rast_tokenizer_t *tokenizer, rast_size_t *char_len) { if (tokenizer->ptr >= tokenizer->ptr_end) { *char_len = 0; return RAST_OK; } return tokenizer->encoding_module->get_char_len(tokenizer, char_len); } rast_error_t * rast_char_tokenizer_next(rast_tokenizer_t *tokenizer) { rast_error_t *error; rast_size_t char_len; error = get_char_len(tokenizer, &char_len); if (error != RAST_OK) { return error; } tokenizer->ptr += char_len; return RAST_OK; } rast_error_t * rast_char_tokenizer_get_current(rast_tokenizer_t *tokenizer, rast_char_t *ch) { rast_error_t *error; rast_size_t len; error = get_char_len(tokenizer, &len); if (error != RAST_OK) { return error; } ch->encoding_module = tokenizer->encoding_module; ch->ptr = tokenizer->ptr; ch->nbytes = len; return RAST_OK; } int rast_char_tokenizer_is_done(rast_tokenizer_t *tokenizer) { return tokenizer_is_done(tokenizer); } rast_tokenizer_t * rast_register_tokenizer_create(apr_pool_t *pool, rast_encoding_module_t *encoding_module, const char *s, rast_size_t nbytes) { return tokenizer_create(pool, encoding_module, s, nbytes); } rast_error_t * rast_register_tokenizer_next(rast_tokenizer_t *tokenizer) { rast_error_t *error; rast_size_t byte_offset, char_offset; error = tokenizer->encoding_module->get_next_offset(tokenizer, &byte_offset, &char_offset); if (error != RAST_OK) { return error; } tokenizer->ptr += byte_offset; tokenizer->pos += char_offset; return RAST_OK; } rast_error_t * rast_register_tokenizer_get_current(rast_tokenizer_t *tokenizer, rast_token_t *token) { return tokenizer_get_token(tokenizer, token); } int rast_register_tokenizer_is_done(rast_tokenizer_t *tokenizer) { return tokenizer_is_done(tokenizer); } rast_tokenizer_t * rast_search_tokenizer_create(apr_pool_t *pool, rast_encoding_module_t *encoding_module, const char *s, rast_size_t nbytes) { return tokenizer_create(pool, encoding_module, s, nbytes); } rast_error_t * rast_search_tokenizer_next(rast_tokenizer_t *tokenizer) { rast_error_t *error; rast_size_t byte_offset, char_offset; rast_token_t token; error = tokenizer->encoding_module->get_next_offset(tokenizer, &byte_offset, &char_offset); if (error != RAST_OK) { return error; } error = tokenizer->encoding_module->get_token(tokenizer, &token); if (error != RAST_OK) { return error; } if (tokenizer->ptr + token.nbytes >= tokenizer->ptr_end) { tokenizer->ptr = tokenizer->ptr_end; return RAST_OK; } tokenizer->ptr += byte_offset; tokenizer->pos += char_offset; return RAST_OK; } rast_error_t * rast_search_tokenizer_get_current(rast_tokenizer_t *tokenizer, rast_token_t *token) { return tokenizer_get_token(tokenizer, token); } int rast_search_tokenizer_is_done(rast_tokenizer_t *tokenizer) { return tokenizer_is_done(tokenizer); } int rast_count_chars(rast_encoding_module_t *encoding_module, const char *s, rast_size_t nbytes, apr_pool_t *pool) { int count = 0; rast_tokenizer_t *tokenizer; for (tokenizer = rast_char_tokenizer_create(pool, encoding_module, s, nbytes); !rast_char_tokenizer_is_done(tokenizer); rast_char_tokenizer_next(tokenizer)) { count++; } return count; } int rast_char_is_space(rast_char_t *ch) { if (ch->encoding_module->is_space == NULL) { return isspace((unsigned char) *ch->ptr); } return ch->encoding_module->is_space(ch); } /* vim: set filetype=c sw=4 expandtab : */