// SKKの辞書を管理するクラス群 // SKKFileDic関係をclean up しないといけない #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __GNUC__ # ifdef HAVE_ALLOCA_H # include # endif #endif #include "skkconv.h" struct SKKOkuriEnt{ std::list head; jstring_t okuri; }; struct SKKDicEnt{ bool cached;// learn_map上にある。(このエントリの解放はセーブ時に行なわれる) int hash; // learn_map内でのhash値 jstring_t head; cchar okuri_head; std::list okurinasi; std::list okuri; void commit(SKKStat *s,jstring_t *c); private: }; class SKKNetDic : public SKKDic{ public: SKKNetDic(); bool init(); virtual Candidates *getCandidates(SKKStat *); virtual void commit(SKKStat *s,jstring_t *a); private: bool connect_serv(char *hostname); bool send_request(SKKStat *); void split_reply(Candidates *,char *r); char *recv_reply(); int sock_fd;// -1の時は接続されていない。 }; class SKKFileDic : public SKKDic{ public: SKKFileDic(); virtual ~SKKFileDic(); bool init(char *fn,bool isLearn); virtual Candidates *getCandidates(SKKStat *); virtual void commit(SKKStat *,jstring_t *); virtual void save(); private: bool do_map(char *fn); bool make_hash(); int next_line(int ); void add_hash_entry(int i,char *); int compare_jstring_with_str(jstring_t *sy,char *str); void add_to_learn_ent(SKKStat *,SKKDicEnt *); SKKDicEnt *get_dic_ent(SKKStat *); SKKDicEnt *get_dic_ent_from_off_map(SKKStat *st,jstring_t *s,int h); SKKDicEnt *get_dic_ent_from_learn(jstring_t *head,cchar okuri,int h); void save_1(FILE *fp,bool okuri); void save_1_orig(FILE *fp,bool okuri); void save_1_learn(FILE *fp,bool okuri); std::map off_map;// hash , offset std::map learn_map; int m_col; // number of hash collision int m_size; char *m_fn; char *m_ptr; bool m_isLearn; bool mIsDirty; }; class SKKMasterDic : public SKKDic{ public: SKKMasterDic(); virtual ~SKKMasterDic(); virtual Candidates *getCandidates(SKKStat *); virtual void commit(SKKStat *,jstring_t *); private: void open_file_dics(); void open_file_dic(char *); Candidates *merge_candidate(Candidates *x,Candidates *y); void merge_candidate_append(Candidates *dst,Candidates *src); SKKNetDic *m_netdic; std::listm_filedics; SKKFileDic *m_learndic; }; SKKDic *createSKKDic() { return new SKKMasterDic(); } int skk_serv_port = 1178; // global functions for skk dictionary static int linelen(char *s) { int i; for (i = 0; s[i] != '\n'; i++); return i; } static char *linedup(char *s) { int l = linelen(s); char *b = (char *)malloc(l+1); memcpy(b, s ,l); b[l] = 0; return b; } static int calc_hash(jstring_t *s) { jstring_t::iterator i; int h = 0; for (i = s->begin(); i!= s->end(); i++) { h = h * 33 + (*i); } return h; } static int calc_str_hash(char *s) { jstring_t sy; int i; for ( i = 0 ; s[i] && s[i]!=' ' ; i++); char *buf= (char *)alloca(i+2); strncpy(buf,s,i); buf[i] = 0; str_to_jstring(&sy,buf); return calc_hash(&sy); } static void make_dic_key_str(SKKStat *st,jstring_t *s) { append_jstring(s,&st->head); if ( st->okuri_head ){ s->push_back(st->okuri_head); } } void get_dic_ent_str(SKKDicEnt *e,jstring_t *s) { append_jstring(s,&e->head); if ( e->okuri_head ){ s->push_back(e->okuri_head); } s->push_back(' '); s->push_back('/'); std::list::iterator i; for ( i = e->okurinasi.begin(); i != e->okurinasi.end() ; i++){ append_jstring(s,&(*i)); s->push_back('/'); } std::list::iterator j; for ( j = e->okuri.begin() ; j != e->okuri.end() ; j ++){ s->push_back('['); append_jstring(s,&((*j).okuri)); for ( i = (*j).head.begin() ; i != (*j).head.end() ; i++){ s->push_back('/'); append_jstring(s,&(*i)); } s->push_back(']'); s->push_back('/'); } } jstring_t::iterator parse_okuri_ent(SKKDicEnt *e,jstring_t::iterator i) { SKKOkuriEnt o; //一文字目に送り仮名 while(*i != '/'){ o.okuri.push_back(*i); i++; } i++; while( *i != ']'){ jstring_t x; while ( *i != ']' && *i != '/'){ x.push_back(*i); i++; } o.head.push_back(x); if ( *i == '/'){ i++; } } i++; i++; e->okuri.push_back(o); return i; } jstring_t::iterator parse_a_ent(SKKDicEnt *e,jstring_t::iterator i) { if ( *i == '['){ i++; return parse_okuri_ent(e,i); } jstring_t x; while ( *i != '/'){ x.push_back(*i); i++; } e->okurinasi.push_back(x); i++; return i; } SKKDicEnt *make_dic_ent_from_line(char *ls) { SKKDicEnt *e; char *l = linedup(ls); jstring_t s; str_to_jstring(&s,l); int h = calc_str_hash(l); free(l); jstring_t::iterator i; s.push_back(0); e = new SKKDicEnt(); e->okuri_head = 0; e->cached = false; e->hash = h; for ( i = s.begin(); i != s.end() && *i != ' ' ; i++){ if ( *s.begin()> 255 && *i < 256 ){ // okuri e->okuri_head = *i; i++; break; }else{ e->head.push_back(*i); } } if ( i == s.end() || *i != ' '){ delete e; return 0; } // 最初の/の次の文字から i++; i++; while(i != s.end() && *i){ i = parse_a_ent(e,i); if ( *i == 0 ){ return e; } } printf("Syntax error in skk dictionary.\n"); delete e; return 0; } //リストの先頭にある要素を追加する、同じものがリスト中にあれば //削除する void jstring_unique_prepend(std::list *sl,jstring_t *s) { std::list::iterator i; for ( i = sl->begin() ; i != sl->end() ; i++){ if ( *i == *s ){ sl->erase(i); goto prepend; } } prepend: sl->push_front(*s); } void fprint_jstring(FILE *fp,jstring_t *s) { char *str; str = jstring_to_str(s); fprintf(fp,str); free((void *)str); } // SKKMasterDic::SKKMasterDic() { m_netdic = new SKKNetDic(); if (!m_netdic->init()) { delete m_netdic; m_netdic = 0; } m_learndic = 0; open_file_dics(); } SKKMasterDic::~SKKMasterDic() { if ( m_learndic){ m_learndic->save(); delete m_learndic; } if ( m_netdic ){ delete m_netdic; } std::list::iterator i; for ( i = m_filedics.begin() ; i != m_filedics.end() ; i++){ delete *i; } } Candidates *SKKMasterDic::getCandidates(SKKStat *s) { Candidates *n=0,*f=0; if ( m_learndic ){ f = m_learndic->getCandidates(s); } std::list::iterator i; for ( i = m_filedics.begin() ; i != m_filedics.end() ; i++ ){ SKKFileDic *fdic= *i; Candidates *tmp1,*tmp2; tmp1 = fdic->getCandidates(s); tmp2 = merge_candidate(f,tmp1); if ( tmp1 ){ delete tmp1; } if ( f ){ delete f; } f = tmp2; } if ( m_netdic ){ n = m_netdic->getCandidates(s); } Candidates *tmp; tmp = merge_candidate(f,n); if ( tmp ){ if (f){ delete f; } if (n ){ delete n; } return tmp; } return 0; } void SKKMasterDic::commit(SKKStat *s,jstring_t *c) { if ( c->size() == 0 ){ return ; } if ( s->cands && s->cands->nth == 0){ return ; } if ( m_learndic ){ m_learndic->commit(s,c); } } void SKKMasterDic::open_file_dics() { char *fnbuf; atom_t a=0; char *s; // ホームディレクトリにある辞書 do{ a = get_bound_atoms(A_skk_personal_dic,a); if (a) { s = get_atom_name(a); fnbuf = (char *)alloca(strlen(homedir)+ strlen(s)+2); sprintf(fnbuf,"%s/%s",homedir,s); open_file_dic(fnbuf); } }while(a); // 共有されている辞書 a = 0; do{ a = get_bound_atoms(A_skk_share_dic, a); if (a) { s = get_atom_name(a); open_file_dic(s); } } while(a); // 書き込みできる辞書 a = get_bound_atoms(A_skk_learn_dic,0); if ( nr_bound_atoms(A_skk_learn_dic) > 1 ){ printf("many files are specified as skk learn dic.\n"); } if ( a && ( s = get_atom_name(a))){ fnbuf = (char *)alloca(strlen(homedir)+ strlen(s)+2); sprintf(fnbuf,"%s/%s",homedir,s); SKKFileDic *fdic = new SKKFileDic(); fdic->init(fnbuf,true); m_learndic = fdic; } } void SKKMasterDic::open_file_dic(char *fn) { SKKFileDic *fdic= new SKKFileDic(); if ( fdic->init(fn,false)){ m_filedics.push_back(fdic); }else{ delete fdic; } } Candidates *SKKMasterDic::merge_candidate(Candidates *x,Candidates *y) { Candidates *z; if ( ! x && !y ){ return 0; } z = new Candidates(); merge_candidate_append(z,x); merge_candidate_append(z,y); return z; } void SKKMasterDic::merge_candidate_append(Candidates *dst, Candidates *src) { if ( ! src ){ return ; } std::vector::iterator it,jt; for ( it = src->cands.begin() ; it != src->cands.end() ; it++){ bool bFound = false; for ( jt = dst->cands.begin() ; jt != dst->cands.end() ; jt++){ if ( *jt == *it ){ bFound = true; } } if ( !bFound ){ dst->cands.push_back(*it); } } } SKKNetDic::SKKNetDic() { sock_fd = -1; } bool SKKNetDic::init() { char *h; h = my_getenv("SKKSERVER"); if ( h ){ if ( !connect_serv( h )){ printf("failed to connect SKKSERVER(%s)\n",h); return false; }else{ return true; } } return false; } void SKKNetDic::commit(SKKStat *,jstring_t *) { } bool SKKNetDic::connect_serv(char *hostname) { struct sockaddr_in peer; struct hostent *server_ip; sock_fd = -1; sock_fd = socket(AF_INET,SOCK_STREAM,0); server_ip = gethostbyname(hostname); if ( sock_fd == -1 || !server_ip){ return false; } memset(&peer,0,sizeof(peer)); peer.sin_family = AF_INET; peer.sin_port = htons(skk_serv_port); memcpy(&peer.sin_addr,server_ip->h_addr,server_ip->h_length); if ( connect(sock_fd,(struct sockaddr*)&peer,sizeof(peer)) == -1){ sock_fd = -1; return false; } return true; } Candidates *SKKNetDic::getCandidates(SKKStat *st) { Candidates *cand = new Candidates(); if ( sock_fd > -1 && send_request(st)){ char *r; r = recv_reply(); if ( r){ split_reply(cand,r); free(r); } } if ( cand->cands.size() == 0){ delete cand; return 0; } return cand; } bool SKKNetDic::send_request(SKKStat *st) { char *p,*de; int c; p = (char *)alloca( st->head.size()*2+10); p[0]='1'; p[1]=0; jstring_t sy; make_dic_key_str(st,&sy); de = jstring_to_str(&sy); strcat(p,de); strcat(p," \n"); free(de); c = strlen(p); if (send(sock_fd,p,c,0)!= c ){ printf("failed to send."); return false; } return true; } char *SKKNetDic::recv_reply() { int len=16; char *p; p = (char *)malloc(len); do{ int l,i; l = recv(sock_fd,p,len-1,MSG_PEEK); if ( l ==0 ){ free(p); return NULL; } for ( i = 0 ; i < l ; i++){ if ( p[i]=='\n' ){ recv(sock_fd,p,i+1,0); p[i]=0; if ( i>0 && p[i-1]=='\r'){ p[i-1]=0; } return p; } } if ( l == len-1 ){ len = len *2; p = (char *)realloc(p,len); } }while(1); return NULL; } void SKKNetDic::split_reply(Candidates *c,char *r) { char *buf; int l,i,o; buf = (char *)alloca(strlen(r)); l = strlen (r ); o =0; for ( i = 2 ; i < l ; i++){ if ( r[i] != '/'){ buf[o] = r[i]; o++; }else{ buf[o] = 0; o = 0; jstring_t s; str_to_jstring(&s,buf); c->cands.push_back(s); } } } SKKFileDic::SKKFileDic() { m_fn = 0; m_ptr = 0; mIsDirty = false; } SKKFileDic::~SKKFileDic() { if ( m_fn ){ free(m_fn); } if ( m_ptr ){ munmap(m_ptr,m_size); } } bool SKKFileDic::init(char *fn,bool isLearn) { m_isLearn = isLearn; //学習用の辞書なら失敗しても可 if (!do_map(fn)){ return isLearn; } return make_hash(); } Candidates *SKKFileDic::getCandidates(SKKStat *st) { SKKDicEnt *e; e = get_dic_ent(st); if ( !e ){ return 0; } Candidates *c = new Candidates(); //送り仮名があって、そのエントリで一致したらそれを返す if ( st->okuri.size()){ std::list::iterator i; for ( i = e->okuri.begin() ; i != e->okuri.end() ; i++){ if ( (*i).okuri == st->okuri ){ std::list::iterator j; for ( j = (*i).head.begin() ; j != (*i).head.end() ; j++){ c->cands.push_back(*j); } return c; } } } std::list::iterator i; for ( i = e->okurinasi.begin() ; i != e->okurinasi.end() ; i ++){ c->cands.push_back(*i); } if ( !e->cached ){ delete e; } return c; } SKKDicEnt *SKKFileDic::get_dic_ent(SKKStat *st) { SKKDicEnt *e; jstring_t sy; int h; make_dic_key_str(st, &sy); h = calc_hash(&sy); e = get_dic_ent_from_learn(&st->head,st->okuri_head,h); if ( e ){ return e; } e = get_dic_ent_from_off_map(st,&sy,h); //学習用の辞書ならキャッシュする if ( e && m_isLearn ){ add_to_learn_ent(st,e); } return e; } SKKDicEnt * SKKFileDic::get_dic_ent_from_off_map(SKKStat *st,jstring_t *sy,int h) { if ( m_ptr == 0 ){ return 0; } std::map::iterator i; int off; SKKDicEnt *e; do{ i = off_map.find(h); if ( i == off_map.end()){ return 0; } off = (*i).second; if ( !compare_jstring_with_str(sy,&m_ptr[off])){ e = make_dic_ent_from_line(&m_ptr[off]); return e; } h++; }while(1); return 0; } SKKDicEnt *SKKFileDic::get_dic_ent_from_learn(jstring_t *s,cchar okuri,int h) { std::map::iterator i; while(1){ i = learn_map.find(h); if ( i == learn_map.end() ){ return 0; } SKKDicEnt *e; e = (*i).second; if ( e->head == *s && e->okuri_head == okuri){ return e; } h++; } return 0; } void SKKFileDic::commit(SKKStat *st,jstring_t *c) { SKKDicEnt *e; e = get_dic_ent(st); if ( !e){ e = new SKKDicEnt(); add_to_learn_ent(st,e); } e->commit(st,c); mIsDirty = true; } void SKKFileDic::save() { if ( !mIsDirty ){ return ; } FILE *fp; char *fn; fn = (char *)alloca(strlen(m_fn)+5); strcpy(fn,m_fn); strcat(fn,".bak"); fp = fopen( fn, "w"); if ( ! fp ){ return ; } printf("saving dictionary.(%s)\n",fn); save_1(fp,true); save_1(fp,false); fclose(fp); rename(fn,m_fn); } void SKKFileDic::save_1(FILE *fp,bool okuri) { save_1_orig(fp,okuri); save_1_learn(fp,okuri); } void SKKFileDic::save_1_orig(FILE *fp,bool okuri) { std::map::iterator i; for ( i = off_map.begin() ; i != off_map.end() ; i++){ SKKDicEnt *e; e = make_dic_ent_from_line(&m_ptr[(*i).second]); if ( e && (( e->okuri_head && okuri ) || (!e->okuri_head && !okuri))){ if ( !get_dic_ent_from_learn(&e->head,e->okuri_head,e->hash)){ // need to save jstring_t s; get_dic_ent_str(e,&s); delete e; s.push_back('\n'); fprint_jstring(fp,&s); } } } } void SKKFileDic::save_1_learn(FILE *fp,bool okuri) { std::map::iterator i; for ( i = learn_map.begin() ; i!= learn_map.end() ;i++){ SKKDicEnt *e = (*i).second; if ( (e->okuri_head && okuri ) || (!e->okuri_head && !okuri)){ jstring_t s; get_dic_ent_str(e,&s); s.push_back('\n'); fprint_jstring(fp,&s); } } } bool SKKFileDic::do_map(char *fn) { int fd; m_fn = strdup(fn); fd = open ( m_fn , O_RDONLY ); if ( fd == -1 ){ // maybe file not exist return false; } struct stat st; if ( fstat( fd,&st) == -1){ perror("Failed to stat dictionary."); return false; } //printf("size = %d bytes.\n",st.st_size); m_size = st.st_size; void *ptr; ptr = mmap(0,st.st_size,PROT_READ,MAP_SHARED,fd,0); if ( ptr == MAP_FAILED ){ perror("Failed to mmap dictionary."); return false; } m_ptr = (char *)ptr; close(fd); return true; } bool SKKFileDic::make_hash() { int index = 0; int nr_ent =0; m_col = 0; while( index >= 0 && index < m_size){ if ( m_ptr[index] != ';'){ nr_ent ++; add_hash_entry(index,&m_ptr[index]); } index = next_line(index); } return true; } int SKKFileDic::next_line(int i) { while ( i < m_size && m_ptr[i] != '\n'){ i++; } if ( i >= m_size-1 ){ return -1; } return i+1; } void SKKFileDic::add_hash_entry(int of,char *s) { int hash = calc_str_hash(s); std::map::iterator i; while(1){ i = off_map.find(hash); if ( i == off_map.end()){ std::pair p(hash,of); off_map.insert(p); return ; }else{ hash ++; m_col ++; } } } int SKKFileDic::compare_jstring_with_str(jstring_t *sy,char *str) { int i; for ( i = 0 ; str[i] && str[i] != ' ' ; i++); char *buf=(char *)alloca(i+2); strncpy(buf,str,i); buf[i] = 0; jstring_t s; str_to_jstring(&s,buf); if ( s == *sy ){ return 0; } return 1; } void SKKFileDic::add_to_learn_ent(SKKStat *st,SKKDicEnt *e) { jstring_t sy; make_dic_key_str(st,&sy); int h = calc_hash(&sy); e->cached = true; e->head = st->head; e->okuri_head = st->okuri_head; while(1){ std::map::iterator i; i = learn_map.find(h); if ( i == learn_map.end()){ std::pair p(h,e); learn_map.insert(p); e->hash = h; return ; } h++; } } void SKKDicEnt::commit(SKKStat *st,jstring_t *s) { if ( st->okuri.size()){ // 送り仮名のエントリに対して学習 std::list::iterator i; for ( i = okuri.begin() ; i != okuri.end() ; i++){ if ( (*i).okuri == st->okuri ){ jstring_unique_prepend(&((*i).head),s); goto rest; } } // 新しい送り仮名のエントリを追加 SKKOkuriEnt e; e.okuri = st->okuri; e.head.push_back(*s); okuri.push_back(e); } rest: // 送り仮名で無い部分を追加 jstring_unique_prepend(&okurinasi,s); } /* * Local variables: * c-indent-level: 4 * c-basic-offset: 4 * End: */