/*
* $Id: gethinsi.c,v 1.6 2002/03/24 01:25:13 hiroo Exp $
*/
/*
* FreeWnn is a network-extensible Kana-to-Kanji conversion system.
* This file is part of FreeWnn.
*
* Copyright Kyoto University Research Institute for Mathematical Sciences
* 1987, 1988, 1989, 1990, 1991, 1992
* Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
* Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
* Copyright FreeWnn Project 1999, 2000, 2002
*
* Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* 品詞ファイルの構造に関する定義 */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdio.h>
#if STDC_HEADERS
# include <string.h>
#elif HAVE_STRINGS_H
# include <strings.h>
#endif /* STDC_HEADERS */
#include "commonhd.h"
#include "wnn_config.h"
#include "wnnerror.h"
#include "jslib.h"
#include "hinsi_file.h"
#include "wnn_os.h"
#include "wnn_string.h"
/*
wnn_loadhinsi(NULL) 品詞の情報を読み込む
wnn_find_hinsi_by_name(c) 名前を与えて、品詞番号を取る
char *c;
char *wnn_get_hinsi_name(k) 品詞番号から名前を取る
int k;
int
wnn_get_fukugou_component(k,str, ) 複合品詞に対して 構成要素を求める
int k; 構成要素の個数が返値として返され、
unsigned short **str; 構成要素は str 以降に返される。
#ifdef nodef
int wnn_get_hinsi_list(area) 品詞のリストを得る。
品詞は、品詞名の配列として管理されている。
配列の先頭番地を area に入れ、配列の大きさを返す。
w_char ***area;
int wnn_get_fukugou_list(area, start) 複合品詞のリストを得る
複合品詞は、複合品詞構造体の配列として管理されている。
配列の先頭番地を area に入れ、配列の大きさを返す。
n 番目の複合品詞の品詞番号は、FUKUGOU_START - n である。
FUKUGOU_START の値を start に入れて返す。
struct wnn_fukugou **area;
int *start;
int wnn_hinsi_node_component(name, area)
w_char **area;
w_char *name;
品詞ノード名から、その子どものノードの名前の列を得る。
個数が返る。
もし、ノードではなく本当の品詞名(リーフ)なら、0 が返る。
ノードでも品詞名でもないとき、-1 が返る。
#endif
*/
extern int wnn_errorno;
#ifdef JSERVER
/* must be #include "de_header.h" ? */
extern void log_debug ();
#define error1 log_debug
#endif
/*
*here start the real program
*/
int wnnerror_hinsi;
static int hinsi_loaded = 0;
static int line_no = 0;
static w_char heap[HEAP_LEN];
static w_char *hp = heap;
static unsigned short wheap[WHEAP_LEN];
static unsigned short *whp = wheap;
#define SIZE 1024
static w_char *hinsi[MAXHINSI];
static struct wnn_fukugou fukugou[MAXFUKUGOU];
static struct wnn_hinsi_node node[MAXNODE];
int mhinsi = 0;
int mfukugou = 0;
int mnode = 0;
static void
error_long ()
{
wnnerror_hinsi = WNN_TOO_LONG_HINSI_FILE_LINE;
}
static void
error_no_heap ()
{
wnnerror_hinsi = WNN_TOO_BIG_HINSI_FILE;
}
static int
get_char0 (fp)
FILE *fp;
{
int c, d;
for (; (c = getc (fp)) == COMMENT_CHAR || c == CONTINUE_CHAR || c == IGNORE_CHAR1 || c == IGNORE_CHAR2;)
{
if (c == CONTINUE_CHAR)
{
if ((d = getc (fp)) == EOF)
{
break;
}
if (d == '\n')
{
line_no += 1;
continue;
}
else
{
ungetc (d, fp);
break;
}
}
else if (c == COMMENT_CHAR)
{
for (;;)
{
if ((c = getc (fp)) == EOF)
{
return (EOF);
}
if (c == '\n')
{
ungetc (c, fp);
line_no += 1;
break;
}
}
}
}
if (c == '\n')
line_no += 1;
return (c);
}
static int
get_char (fp) /* remove null lines */
FILE *fp;
{
static int c = -1;
int d;
static int fufufu = 0;
if (c != -1)
{
d = c;
c = -1;
return (d);
}
else
{
if (fufufu == 0)
{ /* remove all new lines in the head of the file */
for (; (d = get_char0 (fp)) == '\n';);
fufufu = 1;
}
else
{
d = get_char0 (fp);
}
if (d == '\n')
{
while ((c = get_char0 (fp)) == '\n');
}
return (d);
}
}
/* get one phrase and return the separater */
static int
get_phrase (s0, size, fp)
UCHAR *s0;
int size;
FILE *fp;
{
UCHAR *s = s0;
int c;
static int eof = 0;
if (eof)
{
*s0 = 0;
return (EOF);
}
while ((c = get_char (fp)) != '\n' && c != DEVIDE_CHAR && c != NODE_CHAR && c != HINSI_SEPARATE_CHAR && c != EOF)
{
if (s - s0 >= size)
{
error_long ();
return (HINSI_ERR);
}
*s++ = c;
}
if (c == EOF)
eof = 1;
if (s - s0 >= size - 1)
{
error_long ();
return (HINSI_ERR);
}
*s++ = '\0';
return (c);
}
static int
stradd (cp, str)
w_char **cp;
char *str;
{
int len = strlen (str);
if (hp + len + 1 >= heap + HEAP_LEN)
{
error_no_heap ();
return (-1);
}
*cp = hp;
wnn_Sstrcpy (hp, str);
hp += wnn_Strlen (hp) + 1;
return (0);
}
static int
w_stradd (cp, str)
unsigned short **cp;
unsigned short *str;
{
*cp = whp;
for (; *str != TERMINATE; str++, whp++)
{
if (whp >= wheap + WHEAP_LEN)
{
error_no_heap ();
return (-1);
}
*whp = *str;
}
*whp++ = TERMINATE;
return (0);
}
int
wnn_loadhinsi (fname)
unsigned char *fname;
{
FILE *fp;
UCHAR buf[SIZE];
unsigned short fukugou_str[MAXHINSI];
int sep;
int h;
unsigned short *c;
char tmp[256];
extern int wnn_find_hinsi_by_name ();
if (fname == NULL)
{
#ifdef JSERVER
if (hinsi_loaded)
return (0);
#endif /* JSERVER */
strcpy (tmp, LIBDIR);
strcat (tmp, HINSIDATA_FILE);
fname = (unsigned char *) tmp;
}
#ifdef JSERVER
error1 ("Read HINSI DATA FILE %s\n", fname);
#endif /* JSERVER */
if ((fp = fopen ((char *) fname, "r")) == NULL)
{
wnnerror_hinsi = WNN_NO_HINSI_DATA_FILE;
goto err_1;
}
hinsi_loaded = 1;
while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
{
if (sep == HINSI_ERR)
{
goto err; /* wnnerror_hinsi set in get_phrase */
}
if (buf[0] == YOYAKU_CHAR)
{ /* yoyaku */
if (sep != '\n')
{
wnnerror_hinsi = WNN_BAD_HINSI_FILE;
goto err;
}
hinsi[mhinsi++] = NULL;
}
else if (sep == '\n')
{ /* hinsi */
if (stradd (&hinsi[mhinsi++], buf))
goto err;
}
else if (sep == DEVIDE_CHAR)
{ /* fukugou */
if (stradd (&fukugou[mfukugou].name, buf))
goto err;
c = fukugou_str;
while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
{
if (sep == -1)
{
goto err; /* wnnerror_hinsi set in get_phrase */
}
if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n')
{
wnnerror_hinsi = WNN_BAD_HINSI_FILE;
goto err;
}
if ((h = wnn_find_hinsi_by_name (buf)) == -1 || h >= mhinsi)
{
wnnerror_hinsi = WNN_BAD_HINSI_FILE;
goto err;
}
*c++ = h;
if (sep == '\n' || sep == EOF)
break;
}
*c = TERMINATE;
if (w_stradd (&fukugou[mfukugou++].component, fukugou_str))
goto err;
}
else if (sep == NODE_CHAR)
{
int first = 1;
w_char *dummy;
node[mnode].kosuu = 0;
if (stradd (&node[mnode].name, buf))
goto err;
while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
{
if (sep == -1)
{
goto err; /* wnnerror_hinsi set in get_phrase */
}
if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n')
{
wnnerror_hinsi = WNN_BAD_HINSI_FILE;
goto err;
}
node[mnode].kosuu++;
if (first)
{
if (stradd (&node[mnode].son, buf))
goto err;
first = 0;
}
else
{
if (stradd (&dummy, buf))
goto err;
}
if (sep == '\n' || sep == EOF)
break;
}
mnode++;
}
}
fclose (fp);
return (0);
err:
fclose (fp);
err_1:
#ifdef JSERVER
error1 ("Error reading HINSI DATA FILE %s\n", fname);
#endif /* JSERVER */
return (HINSI_ERR);
}
static int
find_hinsi_by_name (c)
register w_char *c;
{
register int k;
if (!hinsi_loaded)
{
if (wnn_loadhinsi (NULL) != 0)
{
return (-1);
}
}
for (k = 0; k < mhinsi; k++)
{
if (hinsi[k] && wnn_Strcmp (hinsi[k], c) == 0)
{
return (k);
}
}
for (k = 0; k < mfukugou; k++)
{
if (fukugou[k].name && wnn_Strcmp (fukugou[k].name, c) == 0)
{
return (FUKUGOU_START - k);
}
}
return (-1);
}
int
wnn_find_hinsi_by_name (c)
register char *c;
{
w_char hin[WNN_HINSI_NAME_LEN];
wnn_Sstrcpy (hin, c);
return (find_hinsi_by_name (hin));
}
static w_char *
get_hinsi_name (k)
int k;
{
if (!hinsi_loaded)
{
if (wnn_loadhinsi (NULL) != 0)
{
return (NULL);
}
}
if (k < mhinsi && k >= 0)
{
return (hinsi[k]);
}
else if (k > FUKUGOU_START - mfukugou)
{
return (fukugou[FUKUGOU_START - k].name);
}
return (NULL);
}
char *
wnn_get_hinsi_name (k)
int k;
{
w_char *s;
static char hin[WNN_HINSI_NAME_LEN * 2];
if ((s = get_hinsi_name (k)) == NULL)
return (NULL);
wnn_sStrcpy (hin, s);
return (hin);
}
#ifndef JSERVER
static
#endif /* JSERVER */
int
wnn_get_fukugou_component_body (k, shp)
register int k;
register unsigned short **shp;
{
static unsigned short tmp;
register unsigned short *s;
int index; /* need for NEWS-OS 6.0 */
if (k < mhinsi && k >= 0)
{
tmp = k;
*shp = &tmp;
return (1);
}
if (k > FUKUGOU_START - mfukugou && k <= FUKUGOU_START)
{
index = FUKUGOU_START - k;
for (*shp = s = fukugou[index].component; *s != TERMINATE; s++);
/*
If next line in NEWS-OS 6.0, jserver down when kanji henkan.
for(*shp = s = fukugou[FUKUGOU_START - k].component;*s != TERMINATE;s++);
*/
return (s - *shp);
}
return (-1);
}
int
wnn_get_fukugou_component (k, shp)
register int k;
register unsigned short **shp;
{
if (!hinsi_loaded)
{
if (wnn_loadhinsi (NULL) != 0)
{
return (-1);
}
}
return (wnn_get_fukugou_component_body (k, shp));
}
#ifdef JSERVER
w_char *
wnn_hinsi_name (no)
int no;
{
w_char *c;
if ((c = get_hinsi_name (no)) == NULL)
{
wnn_errorno = WNN_BAD_HINSI_NO;
}
return (c);
}
int
wnn_hinsi_number (name)
w_char *name;
{
int n;
if ((n = find_hinsi_by_name (name)) == -1)
{
wnn_errorno = WNN_BAD_HINSI_NAME;
}
return (n);
}
int
wnn_hinsi_list (name, c, mynode, mmynode)
w_char *name;
w_char **c;
struct wnn_hinsi_node *mynode;
int mmynode;
{
int k;
if (mynode == NULL)
{
mynode = node;
mmynode = mnode;
}
if (!hinsi_loaded)
wnn_loadhinsi (NULL);
for (k = 0; k < mmynode; k++)
{
if (wnn_Strcmp (name, mynode[k].name) == 0)
{
*c = mynode[k].son;
return (mynode[k].kosuu);
}
}
if (find_hinsi_by_name (name) == -1)
{
wnn_errorno = WNN_BAD_HINSI_NAME;
return (-1);
}
return (0);
}
int
wnn_has_hinsi (mynode, mmynode, name)
struct wnn_hinsi_node *mynode;
int mmynode;
w_char *name;
{
w_char *c;
int k, j;
if (mynode == NULL)
{
mynode = node;
mmynode = mnode;
}
for (k = 0; k < mmynode; k++)
{
if (wnn_Strcmp (name, mynode[k].name) == 0)
{
return (1);
}
else
{
c = mynode[k].son;
for (j = 0; j < mynode[k].kosuu; j++)
{
if (wnn_Strcmp (name, c) == 0)
{
return (1);
}
else
{
c += wnn_Strlen (c) + 1;
}
}
}
}
return (0);
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1