//
// CRF++ -- Yet Another CRF toolkit
//
// $Id: feature_index.h 1588 2007-02-12 09:03:39Z taku $;
//
// Copyright(C) 2005-2007 Taku Kudo <taku@chasen.org>
//
#ifndef CRFPP_FEATURE_INDEX_H__
#define CRFPP_FEATURE_INDEX_H__
#include <vector>
#include <map>
#include <iostream>
#include "common.h"
#include "scoped_ptr.h"
#include "feature_cache.h"
#include "path.h"
#include "node.h"
#include "freelist.h"
#include "mmap.h"
#include "darts.h"
namespace CRFPP {
class TaggerImpl;
class FeatureIndex {
protected:
unsigned int maxid_;
double *alpha_;
float *alpha_float_;
double cost_factor_;
unsigned int xsize_;
unsigned int max_xsize_;
size_t thread_num_;
FeatureCache feature_cache_;
std::vector<char*> unigram_templs_;
std::vector<char*> bigram_templs_;
std::vector<char*> y_;
FreeList<char> char_freelist_;
scoped_array< FreeList<Path> > path_freelist_;
scoped_array< FreeList<Node> > node_freelist_;
whatlog what_;
virtual int getID(const char *) = 0;
const char *get_index(char *&, size_t, const TaggerImpl &);
bool apply_rule(string_buffer *,
char *,
size_t, const TaggerImpl &);
public:
static const unsigned int version = MODEL_VERSION;
size_t size() const { return maxid_; }
size_t xsize() const { return xsize_; }
size_t ysize() const { return y_.size(); }
const char* y(size_t i) const { return y_[i]; }
void set_alpha(double *alpha) { alpha_ = alpha; }
const float *alpha_float() { return const_cast<float *>(alpha_float_); }
const double *alpha() { return const_cast<double *>(alpha_); }
void set_cost_factor(double cost_factor) { cost_factor_ = cost_factor; }
double cost_factor() { return cost_factor_; }
char *strdup(const char *);
void calcCost(Node *);
void calcCost(Path *);
bool buildFeatures(TaggerImpl *);
void rebuildFeatures(TaggerImpl *);
const char* what() { return what_.str(); }
virtual bool open(const char*, const char*) = 0;
virtual void clear() = 0;
void init() {
path_freelist_.reset(new FreeList<Path> [thread_num_]);
node_freelist_.reset(new FreeList<Node> [thread_num_]);
for (size_t i = 0; i < thread_num_; ++i) {
path_freelist_[i].set_size(8192 * 16);
node_freelist_[i].set_size(8192);
}
}
explicit FeatureIndex(): maxid_(0), alpha_(0), alpha_float_(0),
cost_factor_(1.0), xsize_(0), max_xsize_(0),
thread_num_(1), char_freelist_(8192) {}
virtual ~FeatureIndex() {}
};
class EncoderFeatureIndex: public FeatureIndex {
private:
std::map <std::string, std::pair<int, unsigned int> > dic_;
int getID(const char *);
bool openTemplate(const char *);
bool openTagSet(const char *);
public:
explicit EncoderFeatureIndex(size_t n) {
thread_num_ = n;
init();
}
bool open(const char*, const char*);
bool save(const char *, bool);
bool convert(const char *, const char*);
void clear();
void shrink(size_t) ;
};
class DecoderFeatureIndex: public FeatureIndex {
private:
Mmap <char> mmap_;
Darts::DoubleArray da_;
int getID(const char *);
public:
explicit DecoderFeatureIndex() { init(); }
bool open(const char *, const char *);
void clear();
};
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1