// // CRF++ -- Yet Another CRF toolkit // // $Id: feature_index.h 1588 2007-02-12 09:03:39Z taku $; // // Copyright(C) 2005-2007 Taku Kudo // #ifndef CRFPP_FEATURE_INDEX_H__ #define CRFPP_FEATURE_INDEX_H__ #include #include #include #include "common.h" #include "scoped_ptr.h" #include "feature_cache.h" #include "path.h" #include "node.h" #include "freelist.h" #include "mmap.h" #include "darts.h" namespace CRFPP { class TaggerImpl; class FeatureIndex { protected: unsigned int maxid_; double *alpha_; float *alpha_float_; double cost_factor_; unsigned int xsize_; unsigned int max_xsize_; size_t thread_num_; FeatureCache feature_cache_; std::vector unigram_templs_; std::vector bigram_templs_; std::vector y_; FreeList char_freelist_; scoped_array< FreeList > path_freelist_; scoped_array< FreeList > node_freelist_; whatlog what_; virtual int getID(const char *) = 0; const char *get_index(char *&, size_t, const TaggerImpl &); bool apply_rule(string_buffer *, char *, size_t, const TaggerImpl &); public: static const unsigned int version = MODEL_VERSION; size_t size() const { return maxid_; } size_t xsize() const { return xsize_; } size_t ysize() const { return y_.size(); } const char* y(size_t i) const { return y_[i]; } void set_alpha(double *alpha) { alpha_ = alpha; } const float *alpha_float() { return const_cast(alpha_float_); } const double *alpha() { return const_cast(alpha_); } void set_cost_factor(double cost_factor) { cost_factor_ = cost_factor; } double cost_factor() { return cost_factor_; } char *strdup(const char *); void calcCost(Node *); void calcCost(Path *); bool buildFeatures(TaggerImpl *); void rebuildFeatures(TaggerImpl *); const char* what() { return what_.str(); } virtual bool open(const char*, const char*) = 0; virtual void clear() = 0; void init() { path_freelist_.reset(new FreeList [thread_num_]); node_freelist_.reset(new FreeList [thread_num_]); for (size_t i = 0; i < thread_num_; ++i) { path_freelist_[i].set_size(8192 * 16); node_freelist_[i].set_size(8192); } } explicit FeatureIndex(): maxid_(0), alpha_(0), alpha_float_(0), cost_factor_(1.0), xsize_(0), max_xsize_(0), thread_num_(1), char_freelist_(8192) {} virtual ~FeatureIndex() {} }; class EncoderFeatureIndex: public FeatureIndex { private: std::map > dic_; int getID(const char *); bool openTemplate(const char *); bool openTagSet(const char *); public: explicit EncoderFeatureIndex(size_t n) { thread_num_ = n; init(); } bool open(const char*, const char*); bool save(const char *, bool); bool convert(const char *, const char*); void clear(); void shrink(size_t) ; }; class DecoderFeatureIndex: public FeatureIndex { private: Mmap mmap_; Darts::DoubleArray da_; int getID(const char *); public: explicit DecoderFeatureIndex() { init(); } bool open(const char *, const char *); void clear(); }; } #endif