/*
* 用例辞書を作る
*
* Copyright (C) 2003-2005 TABATA Yusuke
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <anthy/matrix.h>
#include "mkdic.h"
#define LINE_LEN 256
/* 用例 */
struct use_case {
int id[2];
struct use_case *next;
};
/* 用例辞書 */
struct uc_dict {
/* 用例リスト */
struct use_case uc_head;
int nr_ucs;
};
/* 用例定義の行から単語のidを求める
*/
static int
get_id_from_word_line(char *buf)
{
char yomi[LINE_LEN];
char okuri[LINE_LEN];
char wt[LINE_LEN];
char kanji[LINE_LEN];
int res, id;
xstr *xs;
res = sscanf(buf, "%s %s %s %s", yomi, okuri, wt, kanji);
if (res != 4) {
return -1;
}
xs = anthy_cstr_to_xstr(kanji, 0);
id = anthy_xstr_hash(xs);
anthy_free_xstr(xs);
return id;
}
static void
commit_uc(struct uc_dict *dict, int x, int y)
{
struct use_case *uc;
if (x < 0 || y < 0) {
return ;
}
uc = malloc(sizeof(struct use_case));
uc->id[0] = x;
uc->id[1] = y;
/**/
uc->next = dict->uc_head.next;
dict->uc_head.next = uc;
dict->nr_ucs ++;
}
/* 用例データベースを作る */
struct uc_dict *
create_uc_dict(void)
{
struct uc_dict *dict = malloc(sizeof(struct uc_dict));
dict->uc_head.next = NULL;
dict->nr_ucs = 0;
return dict;
}
/* 用例ファイルを読み込む */
void
read_uc_file(struct uc_dict *dict, const char *fn)
{
char buf[LINE_LEN];
FILE *uc_file;
int off, base = 0, cur;
int line_number = 0;
uc_file = fopen(fn, "r");
if (!uc_file) {
return ;
}
/* off=0 : 最初の単語
* off=1,2..n : それと関係ある単語
*/
off = 0;
while (fgets(buf, LINE_LEN, uc_file)) {
/**/
line_number ++;
/**/
if (buf[0] == '#') {
/* コメント */
continue;
}
if (buf[0] == '-') {
/* 区切り記号 */
off = 0;
continue;
}
cur = get_id_from_word_line(buf);
if (cur == -1) {
fprintf(stderr, "Invalid line(%d):%s\n", line_number, buf);
}
/**/
if (off == 0) {
/* 一つめの項目 */
base = cur;
} else {
/* 二つめ以降の項目 */
commit_uc(dict, cur, base);
}
off ++;
}
}
/* 用例辞書をファイルに書き出す */
void
make_ucdict(FILE *uc_out, struct uc_dict *dict)
{
struct use_case *uc;
struct sparse_matrix *sm;
struct matrix_image *mi;
int i;
/* 疎行列に詰め込む */
sm = anthy_sparse_matrix_new();
if (dict) {
for (uc = dict->uc_head.next; uc; uc = uc->next) {
anthy_sparse_matrix_set(sm, uc->id[0], uc->id[1], 1, NULL);
}
}
anthy_sparse_matrix_make_matrix(sm);
/* 疎行列のイメージを作成してファイルに書き出す */
mi = anthy_matrix_image_new(sm);
for (i = 0; i < mi->size; i++) {
write_nl(uc_out, mi->image[i]);
}
if (dict) {
printf("udic: %d use examples.\n", dict->nr_ucs);
} else {
printf("udic: no use examples.\n");
}
}
syntax highlighted by Code2HTML, v. 0.9.1