/*
* roma kana converter
*
* 理解するためには,構文解析について書かれたテキストで
* SLR(1)について調べるよろし.
*
* $Id: rkconv.c,v 1.16 2002/11/16 03:35:21 yusuke Exp $
*
* Copyright (C) 2001-2002 UGAWA Tomoharu
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "rkconv.h"
#define MAX_CONV_CHARS 1024
#define MAX_MAP_PALETTE 10
#define SPECIAL_CHAR '\xff'
#define TERMINATE_CHAR '\n'
/* break_into_roman */
struct break_roman {
int decided_length;
int pending_size;
char* pending;
};
struct rk_rule_set
{
struct rk_rule* rules;
int nr_rules;
};
struct next_array
{
struct rk_slr_closure* array[128];
};
struct rk_slr_closure
{
char* prefix;
struct rk_rule* r;
int is_reduction_only;
struct next_array *next;
};
struct rk_map
{
struct rk_rule_set* rs;
struct rk_slr_closure* root_cl;
int refcount;
};
struct rk_conv_context
{
struct rk_map* map;
int map_no;
int old_map_no;
struct rk_slr_closure* cur_state;
char cur_str[MAX_CONV_CHARS + 1];
int cur_str_len;
struct rk_map* map_palette[MAX_MAP_PALETTE];
struct break_roman *brk_roman;
};
static void
rk_rule_set_free(struct rk_rule_set* rs)
{
int i;
for (i = 0; i < rs->nr_rules; i++) {
free((void *) rs->rules[i].lhs);
free((void *) rs->rules[i].rhs);
free((void *) rs->rules[i].follow);
}
free(rs->rules);
free(rs);
}
static int
rk_rule_copy_to(const struct rk_rule* from, struct rk_rule* to)
{
char *lhs, *rhs;
if ((lhs = strdup(from->lhs))) {
if ((rhs = strdup(from->rhs))) {
if (!(to->follow = from->follow)
|| (to->follow = strdup(from->follow))) {
to->lhs = lhs;
to->rhs = rhs;
return 0;
}
free(rhs);
}
free(lhs);
}
to->lhs = NULL;
to->rhs = NULL;
return -1;
}
static struct rk_rule_set*
rk_rule_set_create(const struct rk_rule* rules)
{
int i;
struct rk_rule_set* rs;
rs = (struct rk_rule_set*) malloc(sizeof(struct rk_rule_set));
if (rs == NULL) {
return NULL;
}
for (i = 0; rules[i].lhs != NULL; i++);
rs->nr_rules = i;
rs->rules = (struct rk_rule*) malloc(sizeof(struct rk_rule) * i);
if (rs->rules == NULL) {
free(rs);
return NULL;
}
for (i = 0; i < rs->nr_rules; i++) {
if (rk_rule_copy_to(rules + i, rs->rules + i) != 0) {
rs->nr_rules = i;
rk_rule_set_free(rs);
return NULL;
}
}
return rs;
}
static void
rk_slr_closure_free(struct rk_slr_closure* cl)
{
int i;
free(cl->prefix);
if (cl->next) {
for (i = 0; i < 128; i++) {
if (cl->next->array[i]) {
rk_slr_closure_free(cl->next->array[i]);
}
}
free(cl->next);
}
free(cl);
}
static struct next_array *
alloc_next_array(void)
{
int i;
struct next_array *na = malloc(sizeof(struct next_array));
for (i = 0; i < 128; i++) {
na->array[i] = NULL;
}
return na;
}
static struct rk_slr_closure*
rk_slr_closure_create(struct rk_rule_set* rs,
const char* prefix, int pflen)
{
struct rk_slr_closure* cl;
int i;
cl = (struct rk_slr_closure*) malloc(sizeof(struct rk_slr_closure));
if (cl == NULL) {
return NULL;
}
if (prefix != NULL) {
cl->prefix = (char*) malloc(pflen + 1);
if (cl->prefix == NULL) {
free(cl);
return NULL;
}
memcpy(cl->prefix, prefix, pflen);
cl->prefix[pflen] = '\0';
} else {
cl->prefix = strdup("");
if (cl->prefix == NULL) {
free(cl);
return NULL;
}
}
cl->r = NULL;
cl->is_reduction_only = 1;
cl->next = NULL;
for (i = 0; i < rs->nr_rules; i++) {
struct rk_rule* r;
int c;
r = rs->rules + i;
if (pflen > 0 && strncmp(prefix, r->lhs, pflen) != 0)
continue;
c = r->lhs[pflen] & 0x7f;
if (c == '\0') { /* reduce */
cl->r = r;
if (r->follow != NULL)
cl->is_reduction_only = 0;
} else {
cl->is_reduction_only = 0;
if (cl->next == NULL) {
cl->next = alloc_next_array();
}
if (cl->next->array[c] == NULL) {
cl->next->array[c] = rk_slr_closure_create(rs, r->lhs,
pflen + 1);
if (cl->next->array[c] == NULL) {
rk_slr_closure_free(cl);
return NULL;
}
}
}
}
return cl;
}
struct rk_map*
rk_map_create(const struct rk_rule* rules)
{
struct rk_map* map;
map = (struct rk_map*) malloc(sizeof(struct rk_map));
if (map == NULL) {
return NULL;
}
map->rs = rk_rule_set_create(rules);
if (map->rs == NULL) {
free(map);
return NULL;
}
map->root_cl = rk_slr_closure_create(map->rs, NULL, 0);
if (map->root_cl == NULL) {
rk_rule_set_free(map->rs);
free(map);
return NULL;
}
map->refcount = 0;
return map;
}
int
rk_map_free(struct rk_map* map)
{
if (map->refcount > 0) {
return -1;
}
rk_rule_set_free(map->rs);
rk_slr_closure_free(map->root_cl);
free(map);
return 0;
}
static int
rk_reduce(struct rk_conv_context* cc,
struct rk_slr_closure* cur_state, char* buf, int size)
{
struct rk_rule* r;
const char* p;
char* q, * end;
r = cur_state->r;
if (r == NULL || size <= 0)
return 0;
if (r->rhs[0] == SPECIAL_CHAR) {
if (r->rhs[1] == 'o')
rk_select_registered_map(cc, cc->old_map_no);
else {
int mapn = r->rhs[1] - '0';
rk_select_registered_map(cc, mapn);
}
return 0;
}
p = r->rhs;
q = buf;
end = buf + size - 1;
while (*p && q < end)
*q ++ = *p++;
*q = '\0';
return q - buf;
}
static void
rk_convert_iterative(struct rk_conv_context* cc, int c,
char* buf, int size)
{
struct rk_slr_closure* cur_state = cc->cur_state;
if (cc->map == NULL)
return;
if (size > 0)
*buf = '\0';
AGAIN:
if (cur_state->next && cur_state->next->array[c]) {
struct rk_slr_closure* next_state = cur_state->next->array[c];
if (next_state->is_reduction_only) {
rk_reduce(cc, next_state, buf, size);
if (cc->map == NULL) {
cc->cur_state = NULL;
return;
}
cur_state = cc->map->root_cl;
} else
cur_state = next_state;
} else if (cur_state->r != NULL &&
(cur_state->r->follow == NULL ||
strchr(cur_state->r->follow, c))) {
int len;
len = rk_reduce(cc, cur_state, buf, size);
if (cc->map == NULL) {
cc->cur_state = NULL;
return;
}
cur_state = cc->map->root_cl;
buf += len;
size -= len;
goto AGAIN;
} else if (cur_state != cc->map->root_cl) {
cur_state = cc->map->root_cl;
goto AGAIN;
}
cc->cur_state = cur_state;
}
static void
brk_roman_init(struct rk_conv_context *rkctx)
{
rkctx->brk_roman= (struct break_roman *)malloc(sizeof(struct break_roman));
rkctx->brk_roman->pending=NULL;
rkctx->brk_roman->pending_size=0;
}
static void
brk_roman_free(struct rk_conv_context *rkctx)
{
struct break_roman *br=rkctx->brk_roman;
if(!br)
return;
if (br->pending) {
free(br->pending);
}
free(br);
}
static void
brk_roman_save_pending(struct rk_conv_context *rkctx)
{
struct break_roman *br=rkctx->brk_roman;
int len;
if(!br)
return;
len = rk_get_pending_str(rkctx,NULL,0);
if(br->pending_size < len){
br->pending_size=len;
if(br->pending)
free(br->pending);
br->pending=(char *)malloc(len);
}
rk_get_pending_str(rkctx,br->pending,len);
}
static void
brk_roman_set_decided_len(struct rk_conv_context *rkctx,int len)
{
struct break_roman *br=rkctx->brk_roman;
if(!br)
return;
br->decided_length=len;
}
static void
brk_roman_flush(struct rk_conv_context *rkctx)
{
struct break_roman *br=rkctx->brk_roman;
if(!br)
return;
if(br->pending)
br->pending[0]='\0';
br->decided_length=0;
}
struct rk_conv_context*
rk_context_create(int brk)
{
struct rk_conv_context* cc;
cc = (struct rk_conv_context*) malloc(sizeof(struct rk_conv_context));
if (cc == NULL) {
return NULL;
}
cc->map = NULL;
memset(&cc->map_palette, 0, sizeof(struct rk_map*) * MAX_MAP_PALETTE);
cc->map_no = -1;
cc->old_map_no = -1;
cc->brk_roman = NULL;
if (brk) {
brk_roman_init(cc);
}
rk_flush(cc);
return cc;
}
void
rk_context_free(struct rk_conv_context* cc)
{
int i;
brk_roman_free(cc);
rk_select_map(cc, NULL);
for (i = 0; i < MAX_MAP_PALETTE; i++) {
rk_register_map(cc, i, NULL);
}
free(cc);
}
int
rk_push_key(struct rk_conv_context* cc, int c)
{
int increased_length;
c &= 0x7f;
if (cc->cur_state == NULL)
return -1;
brk_roman_save_pending(cc);
rk_convert_iterative(cc, c,
cc->cur_str + cc->cur_str_len,
MAX_CONV_CHARS + 1 - cc->cur_str_len);
increased_length = strlen(cc->cur_str + cc->cur_str_len);
brk_roman_set_decided_len(cc,increased_length);
cc->cur_str_len += increased_length;
return 0;
}
void
rk_terminate(struct rk_conv_context* cc)
{
rk_push_key(cc, TERMINATE_CHAR);
}
void
rk_flush(struct rk_conv_context* cc)
{
brk_roman_flush(cc);
cc->cur_state = (cc->map == NULL) ? NULL : cc->map->root_cl;
cc->cur_str[0] = '\0';
cc->cur_str_len = 0;
}
int
rk_partial_result(struct rk_conv_context* cc, char* buf, int size)
{
int nr_rules = cc->map->rs->nr_rules;
int i, pending_len;
char *pending_buf;
struct rk_rule *rule = cc->map->rs->rules;
pending_len = rk_get_pending_str(cc, NULL, 0);
if (pending_len == 0) {
return 0;
}
pending_buf = alloca(pending_len);
rk_get_pending_str(cc, pending_buf, pending_len);
for (i = 0; i < nr_rules; i++) {
if (!strcmp(rule[i].lhs, pending_buf)) {
const char *res = rule[i].rhs;
if (size <= 0) {
return strlen(res) + 1;
}
return snprintf(buf, size, "%s", res);
}
}
return 0;
}
int
rk_result(struct rk_conv_context* cc, char* buf, int size)
{
int copy_len;
if (size <= 0)
return cc->cur_str_len;
copy_len = (size - 1 < cc->cur_str_len) ? size - 1 : cc->cur_str_len;
memcpy(buf, cc->cur_str, copy_len);
buf[copy_len] = '\0';
if (copy_len < cc->cur_str_len)
memmove(cc->cur_str, cc->cur_str + copy_len,
cc->cur_str_len - copy_len + 1);
cc->cur_str_len -= copy_len;
return cc->cur_str_len;
}
struct rk_map*
rk_select_map(struct rk_conv_context* cc, struct rk_map* map)
{
struct rk_map* old_map;
cc->old_map_no = cc->map_no;
old_map = cc->map;
if (old_map) {
old_map->refcount--;
}
cc->map = map;
if (cc->map == NULL) {
cc->cur_state = NULL;
} else {
map->refcount++;
cc->cur_state = map->root_cl;
rk_flush(cc);
}
cc->map_no = -1;
return old_map;
}
int
rk_get_pending_str(struct rk_conv_context* cc, char* buf, int size)
{
const char* p, *end;
char *q;
p = (cc->cur_state == NULL) ? "" : cc->cur_state->prefix;
if (size <= 0)
return strlen(p) + 1;
q = buf;
end = buf + size - 1;
while (*p && q < end)
*q++ = *p++;
*q = '\0';
return strlen(p);
}
struct rk_map*
rk_register_map(struct rk_conv_context* cc, int mapn, struct rk_map* map)
{
struct rk_map* old_map;
if (mapn < 0 || MAX_MAP_PALETTE <= mapn)
return NULL;
old_map = cc->map_palette[mapn];
if (old_map)
old_map->refcount--;
cc->map_palette[mapn] = map;
if (map)
map->refcount++;
return old_map;
}
void
rk_select_registered_map(struct rk_conv_context* cc, int mapn)
{
if (0 <= mapn && mapn < 0 + MAX_MAP_PALETTE) {
rk_select_map(cc, cc->map_palette[mapn]);
cc->map_no = mapn;
} else {
rk_select_map(cc, NULL);
cc->map_no = -1;
}
}
int
rk_selected_map(struct rk_conv_context* cc)
{
return cc->map_no;
}
/* some utitlity functions to merge rk_rule */
static int
rk_rule_length(const struct rk_rule* rules)
{
int i;
for (i = 0; rules[i].lhs != NULL; i++);
return i;
}
static int
rk_my_strcmp(const char *s1, const char *s2)
{
while (*s1 == *s2) {
if (!(*s1)) {
return 0;
}
s1++;
s2++;
}
return (*s1) - (*s2);
}
static int
rk_rule_compare_func(const void *p, const void *q)
{
const struct rk_rule *r1, *r2;
r1 = p;
r2 = q;
return rk_my_strcmp(r1->lhs, r2->lhs);
}
/*
* ソートされたrk_ruleを作って返す
*/
static struct rk_rule *
rk_sort_rule(const struct rk_rule *src)
{
struct rk_rule* rules;
int size = rk_rule_length(src);
int i, ret;
rules = (struct rk_rule*) malloc(sizeof(struct rk_rule) * (size + 1));
if (!rules) {
return NULL;
}
for (i = 0; i < size; i++) {
ret = rk_rule_copy_to (&src[i], &rules[i]);
if (ret == -1) {
goto ERROR;
}
}
qsort(rules, size, sizeof(struct rk_rule),
rk_rule_compare_func);
rules[i].lhs = NULL;
return rules;
ERROR:
rules[i].lhs = NULL;
rk_rules_free(rules);
free(rules);
return NULL;
}
/* 一つ目のルールが優先される */
static struct rk_rule*
rk_do_merge_rules(const struct rk_rule* r1,
const struct rk_rule* r2)
{
int size;
int ret;
struct rk_rule* rules;
struct rk_rule* p, *q;
struct rk_rule* r;
struct rk_rule* tmp;
int i;
size = rk_rule_length(r1) + rk_rule_length(r2);
rules = (struct rk_rule*) malloc(sizeof(struct rk_rule) * (size + 1));
if (rules == NULL) {
return NULL;
}
r = rules;
p = (struct rk_rule *)r1;
q = (struct rk_rule *)r2;
/* ソート済の列に対してマージソートをする */
for (i = 0; i < size; i++) {
if (p->lhs && q->lhs) {
/* p,qを比較してどちらから取り出すかを選ぶ */
ret = rk_my_strcmp(p->lhs, q->lhs);
if (ret > 0) {
tmp = q;
q++;
} else if (ret < 0) {
tmp = p;
p++;
} else {
/* キーが両方同じなのでqの方を優先する */
tmp = q;
p++;q++;
}
} else if (p->lhs) {
tmp = p;
p++;
} else if (q->lhs) {
tmp = q;
q++;
} else {
continue;
}
/* ここまでに選択したものをcopyする */
ret = rk_rule_copy_to(tmp, r);
if (ret == -1) {
r->lhs = NULL;
goto ERROR;
}
r++;
}
r->lhs = NULL;
return rules;
ERROR:
rk_rules_free (rules);
return NULL;
}
struct rk_rule*
rk_merge_rules(const struct rk_rule* r1,
const struct rk_rule* r2)
{
struct rk_rule *t1, *t2;
struct rk_rule* rules;
t1 = rk_sort_rule(r1);
if (!t1) {
return NULL;
}
t2 = rk_sort_rule(r2);
if (!t2) {
rk_rules_free(t1);
return NULL;
}
rules = rk_do_merge_rules(t1, t2);
rk_rules_free(t1);
rk_rules_free(t2);
return rules;
}
void
rk_rules_free(struct rk_rule* rules)
{
struct rk_rule* p;
for (p = rules; p->lhs != NULL; p++) {
free((void *) p->lhs);
free((void *) p->rhs);
free((void *) p->follow);
}
free(rules);
}
const char *
brk_roman_get_previous_pending(struct rk_conv_context *rkctx)
{
struct break_roman *br=rkctx->brk_roman;
if(!br)
return NULL;
return br->pending[0] ? br->pending : NULL;
}
int
brk_roman_get_decided_len(struct rk_conv_context *rkctx)
{
struct break_roman *br=rkctx->brk_roman;
if(!br)
return 0;
return br->decided_length;
}
syntax highlighted by Code2HTML, v. 0.9.1