/*
 * kanji.c - kanji code converter
 * Copyright (C) 1994, 1996 by candy
 */
const char rcsid_kanji[] = "$Id: kanji.c,v 3.7 1997/05/02 04:06:57 candy Exp candy $";
#include <stdio.h>
#include <stddef.h>
#include "kanji.h"

int kanji_in = '@';
int kanji_out = 'J';

static int etoj(int wc);
static int etos(int wc);
static int jtoe(int wc);
static int jtos(int wc);
static int stoe(int wc);
static int stoj(int wc);
static int get_euc(struct MB *mb, int ch, int (*put)(int));
static int get_sjis(struct MB *mb, int ch, int (*put)(int));
static int get_jis(struct MB *mb, int ch, int (*put)(int));

/*
 * EUC を JIS 0x2121..0x7e7e 
 * 0x00 .. 0xff はそのまま。
 */
static int
etoj(int wc)
{
	int ret, hib = (wc >> 8) & 0x7f, lob = wc & 0x7f;
	if (hib != 0)
		ret = (hib << 8) | lob;
	else
		ret = wc;
	return ret;
}/* etoj */

/*
 * EUC を shift-JIS 
 * 0x00 .. 0xff はそのまま。
 */
static int
etos(int wc)
{
	int ret, hib = (wc >> 8) & 0x7f, lob = wc & 0x7f;
	if ((wc & 0xff00) == 0x8e00) {
		ret = wc & 0xff;
	}
	else if (hib != 0)
		ret = jtos((hib << 8) | lob);
	else
		ret = wc;
	return ret;
}/* etos */

/*
 * JIS 0x2121..0x7e7e を EUC 
 * 0x00 .. 0xff はそのまま。
 */
static int
jtoe(int wc)
{
	int ret, hib = (wc >> 8) & 0xff, lob = wc & 0xff;
	if (hib != 0)
		ret = ((hib | 0x80) << 8) | (lob | 0x80);
	else
		ret = wc;
	return ret;
}/* jtoe */

/*
 * JIS 0x2121..0x7e7e を shift-JIS 
 * 0x00 .. 0xff はそのまま。
 */
static int
jtos(int wc)
{
	int ret = 0, err = 0;
	int hib = (wc >> 8) & 0xff, lob = wc & 0xff, hi1 = 0x81, hi2 = 0x21, lo1 = 0x1f;
	if (hib >= 0x21 && hib < 0x5f)
		hi1 = 0x81;
	else {
		if (hib >= 0x5f && hib < 0x80)
			hi1 = 0xc1;
		else
			err = -1;
	}
	if (err == 0) {
		if ((hib & 1) == 1) {
			hi2 = 0x21;
			if (lob >= 0x21 && lob < 0x60)
				lo1 = 0x1f;
			else {
				if (lob >= 0x60 && lob < 0x7f)
					lo1 = 0x20;
				else
					err = -1;
			}
		}
		else {
			hi2 = 0x22;
			if (lob >= 0x21 && lob < 0x7f)
				lo1 = 0x7e;
			else
				err = -1;
		}
	}
	if (err == 0)
		ret = (((hib - hi2) / 2 + hi1) << 8) + lob + lo1;
	else
		ret = wc;
	return ret;
}/* jtos */

/*
 * shift-JIS を EUC 
 * 0x00 .. 0xff はそのまま。
 */
static int
stoe(int wc)
{
	int ret, hib = (wc >> 8) & 0xff;
	if (hib != 0)
		ret = stoj(wc) | 0x8080;
	else
		ret = wc;
	return ret;
}/* stoe */

/*
 * shift-JIS を JIS 0x2121..0x7e7e 
 * 0x00 .. 0xff はそのまま。
 */
static int
stoj(int wc)
{
	int err = 0, ret = 0;
	int hib = (wc >> 8) & 0xff, lob = wc & 0xff, hi1 = 0x81, hi2 = 0x21, lo1 = 0x1f;
	if (hib >= 0x81 && hib < 0xa0)
		hi1 = 0x81;
	else {
		if (hib >= 0xe0 && hib < 0xf0)
			hi1 = 0xc1;
		else
			err = -1;
	}
	if (err == 0) {
		if (lob >= 0x40 && lob < 0x7f) {
			hi2 = 0x21;
			lo1 = 0x1f;
		}
		else {
			if (lob >= 0x80 && lob < 0x9f) {
				hi2 = 0x21;
				lo1 = 0x20;
			}
			else {
				if (lob >= 0x9f && lob < 0xfd) {
					hi2 = 0x22;
					lo1 = 0x7e;
				}
				else
					err = -1;
			}
		}
	}
	if (err == 0)
		ret = (((hib - hi1) * 2 + hi2) << 8) + lob - lo1;
	else
		ret = wc;
	return ret;
}/* stoj */

/*
 * ch == 0 で呼び出すと、*mb を初期化する。
 * ch == EOF で呼び出すと、プールしてある文字(033 など)を掃き出す。
 * 戻り値は、文字コード(single-byte: 1..0xff, multi-byte: 0x8181..0xfefe)
 * 0 ならば、文字はない(エスケープシーケンス処理中など)
 */
static int
get_euc(struct MB *mb, int ch, int (*put)(int))
{
	int ret = 0;
	if (ch == 0) {
		MB_CLEAR(mb);
	}
	else {
		if (ch == EOF) {
			if (mb->hi)
				put(mb->hi);
			mb->hi = 0;
		}
		else if (mb->hi) {
			ret = (mb->hi << 8) + ch;
			mb->hi = 0;
		}
		else {
			if (ch >= 0x80)
				mb->hi = ch;
			else
				ret = ch;
		}
	}
	return ret;
}/* get_euc */

/*
 * ch == 0 で呼び出すと、*mb を初期化する。
 * ch == EOF で呼び出すと、プールしてある文字(033 など)を掃き出す。
 * 戻り値は、文字コード(single-byte: 1..0xff, multi-byte: 0x8181..0xfefe)
 * 0 ならば、文字はない(エスケープシーケンス処理中など)
 */
static int
get_sjis(struct MB *mb, int ch, int (*put)(int))
{
	int ret = 0;
	if (ch == 0) {
		MB_CLEAR(mb);
	}
	else {
		if (ch == EOF) {
			if (mb->hi)
				put(mb->hi);
			mb->hi = 0;
		}
		else if (mb->hi) {
			ret = (mb->hi << 8) + ch;
			mb->hi = 0;
		}
		else {
			if (is1sjis(ch))
				mb->hi = ch;
			else
				ret = ch;
		}
	}
	return ret;
}/* get_sjis */

/*
 * ch == 0 で呼び出すと、*mb を初期化する。
 * ch == EOF で呼び出すと、プールしてある文字(033 など)を掃き出す。
 * 戻り値は、文字コード(single-byte: 1..0xff, multi-byte: 0x2121..0xfefe)
 * 0 ならば、文字はない(エスケープシーケンス処理中など)
 */
static int
get_jis(struct MB *mb, int ch, int (*put)(int))
{
	int ret = 0;
	if (ch == 0) {
		MB_CLEAR(mb);
	}
	else if (ch == '\n') {
		return ch;
	}
	else {
		switch (mb->st) {
		case MB_KI1:
			if (ch == KI_1) {
				mb->st = MB_KI2;
			}
			else {
				mb->st = MB_INITIAL;
				put(033);
			}
			break;
		case MB_KI2:
			if (ch == kanji_in || ch == '@' || ch == 'B') {
				mb->st = MB_INKANJI;
				ch = 0;
			}
			else {
				mb->st = MB_INITIAL;
				put(033);
				put(KI_1);
			}
			break;
		case MB_KO1:
			if (ch == KO_1) {
				mb->st = MB_KO2;
			}
			else {
				mb->st = MB_INKANJI;
				put(033);
			}
			break;
		case MB_KO2:
			if (ch == kanji_out || ch == 'J' || ch == 'H' || ch == 'B') {
				mb->st = MB_INITIAL;
				ch = 0;
			}
			else {
				mb->st = MB_INKANJI;
				put(033);
				put(KO_1);
			}
			break;
		default:
			break;
		}/* switch */
		switch (mb->st) {
		case MB_INITIAL:
			if (ch == EOF)
				ret = 0;
			else if (ch == 033)
				mb->st = MB_KI1;
			else
				ret = ch;
			break;
		case MB_INKANJI:
			if (ch == EOF) {
				if (mb->hi) {
					mb->st = MB_INITIAL;
					put(mb->hi);
					mb->hi = 0;
				}
			}
			else if (ch == 033)
				mb->st = MB_KO1;
			else {
				if (mb->hi) {
					ret = (mb->hi << 8) + ch;
					mb->hi = 0;
				}
				else
					mb->hi = ch;
			}
			break;
		default:
			break;
		}/* switch */
	}
	return ret;
}/* get_jis */

/*
 * EUC 文字列を shift-JIS に変換する。
 * ch == 0 でまず呼び出し、文字列から1バイトずつ ch に入れて呼び出し、
 * 最後は ch == EOF で呼び出す。
 * 出力は put() を通じて行われる。
 */
int
euc_to_sjis(struct MB *mb, int ch, int (*put)(int))
{
	int wc = get_euc(mb, ch, put);
	if (wc != 0) {
		wc = etos(wc);
		if (wc != 0) {
			if (wc & 0xff00) {
				put((wc >> 8) & 0xff);
			}
			put(wc & 0xff);
		}
	}
	return wc;
}/* euc_to_sjis */

/*
 * JIS 文字列を shift-JIS に変換する。
 * ch == 0 でまず呼び出し、文字列から1バイトずつ ch に入れて呼び出し、
 * 最後は ch == EOF で呼び出す。
 * 出力は put() を通じて行われる。
 */
int
jis_to_sjis(struct MB *mb, int ch, int (*put)(int))
{
	int wc = get_jis(mb, ch, put);
	if (wc != 0) {
		wc = jtos(wc);
		if (wc != 0) {
			if (wc & 0xff00) {
				put((wc >> 8) & 0xff);
			}
			put(wc & 0xff);
		}
	}
	return wc;
}/* jis_to_sjis */

/*
 */
int
sjis_to_euc(struct MB *mb, int ch, int (*put)(int))
{
	int wc = get_sjis(mb, ch, put);
	if (wc != 0) {
		wc = stoe(wc);
		if (wc != 0) {
			if (wc & 0xff00) {
				put((wc >> 8) & 0xff);
			}
			put(wc & 0xff);
		}
	}
	return wc;
}/* sjis_to_euc */

/*
 */
int
sjis_to_jis(struct MB *mb, int ch, int (*put)(int))
{
	int wc = get_sjis(mb, ch, put);
	static int inkanji = 0;
	if (wc != 0) {
		wc = stoj(wc);
		if (wc != 0) {
			if (wc & 0xff00) {
				if (!inkanji) {
					inkanji = 1;
					put('\033');
					put(KI_1);
					put(kanji_in);
				}
				put((wc >> 8) & 0xff);
			}
			else {
				if (inkanji) {
					inkanji = 0;
					put('\033');
					put(KO_1);
					put(kanji_out);
				}
			}
			put(wc & 0xff);
		}
	}
	return wc;
}/* sjis_to_jis */

#define GETWCH(s) (((unsigned char)(s)[0] << 8) | (unsigned char)(s)[1])

/*
 *
 */
char *
stretos(char *buf, const char *src)
{
	const unsigned char *s = (const unsigned char *)src;
	char *d = buf;
	while (*s != '\0') {
		int wc;
		if (is1euc(*s)) {
			wc = GETWCH(s);
			wc = etos(wc);
			if (s[1] != '\0')
				s++;
		}
		else {
			wc = *s;
		}
		if ((wc & ~0xff) != 0) {
			*d++ = wc >> 8;
		}
		*d++ = wc;
		s++;
	}/* while */
	*d = '\0';
	return buf;
}/* stretos */

/*
 * buf == src だとだめ(カナの時)。
 */
char *
strstoe(char *buf, const char *src)
{
	const unsigned char *s = (const unsigned char *)src;
	char *d = buf;
	while (*s != '\0') {
		int wc;
		if (is1sjis(*s)) {
			wc = GETWCH(s);
			wc = stoe(wc);
			if (s[1] != '\0')
				s++;
		}
		else if (isskana(*s)) {
			wc = 0x8e00 | *s;
		}
		else {
			wc = *s;
		}
		if ((wc & ~0xff) != 0) {
			*d++ = wc >> 8;
		}
		*d++ = wc;
		s++;
	}/* while */
	*d = '\0';
	return buf;
}/* strstoe */

/*
 * strstoe() した時のバイト数を返す。
 */
size_t
strstoelen(const char *src)
{
	size_t d = 0;
	const unsigned char *s = (const unsigned char *)src;
	while (*s != '\0') {
		int wc;
		if (is1sjis(*s)) {
			wc = GETWCH(s);
			wc = stoe(wc);
			if (s[1] != '\0')
				s++;
		}
		else if (isskana(*s)) {
			wc = 0x8e00 | *s;
		}
		else {
			wc = *s;
		}
		if ((wc & ~0xff) != 0) {
			d++;
		}
		d++;
		s++;
	}/* while */
	return d;
}/* strstoelen */

static unsigned short kana_zen[64] = {
	0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 
	0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 
	0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 
	0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 
	0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 
	0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de, 
	0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9, 
	0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab, 0xa1ac, 
};

char *
strstoe2(char *buf, const char *src)
{
	const unsigned char *s = (unsigned char *)src;
	char *d = buf;
	while (*s != '\0') {
		int wc;
		if (is1sjis(*s)) {
			wc = GETWCH(s);
			wc = stoe(wc);
			if (s[1] != '\0')
				s++;
		}
		else if (isskana(*s)) {
			wc = kana_zen[*s - 0xa0];
		}
		else {
			wc = *s;
		}
		if ((wc & ~0xff) != 0) {
			*d++ = wc >> 8;
		}
		*d++ = wc;
		s++;
	}/* while */
	*d = '\0';
	return buf;
}/* strstoe2 */

#ifdef TEST

char *myname;
char usage_msg[] =
	"euc/sjis converter V0.8086\n"
	"usage: %s [-es] [file ...]\n"
	"\t-e\tsjis->euc\n"
	"\t-j\tsjis->jis\n"
	"\t-s\teuc->sjis\n"
	;

int verbose;

int (*filter)(struct MB *mb, int ch, int (*put)(int)) = euc_to_sjis;

int
fnain(FILE *fp)
{
	struct MB mb;
	int err = 0, ch;
	filter(&mb, 0, putchar);
	while ((ch = fgetc(fp)) != EOF) {
		filter(&mb, ch, putchar);
	}/* while */
	filter(&mb, EOF, putchar);
	return err;
}/* fnain */

int
nain(const char *name)
{
	int err = -1;
	FILE *fp = fopen(name, "r");
	if (fp == NULL) {
		fprintf(stderr, "%s: %s: cannot open\n", myname, name);
	}
	else {
		err = fnain(fp);
		fclose(fp);
	}
	return err;
}/* nain */

#ifdef __BORLANDC__
extern int optind;
extern char *optarg;
extern int getopt(int, char **, const char *);
#endif

int
main(int argc, char *argv[])
{
	int ex, ch, show_usage = 0;
	myname = argv[0];
	while ((ch = getopt(argc, argv, "ejsvV")) != EOF) {
		switch (ch) {
		default:
		case 'V':
			show_usage++;
			break;
		case 'v':
			verbose = 1;
			break;
		case 'e':
			filter = sjis_to_euc;
			break;
		case 'j':
			filter = sjis_to_jis;
			break;
		case 's':
			filter = euc_to_sjis;
			break;
		}/* switch */
	}/* while */
	ex = 1;
	if (show_usage) {
		fprintf(stderr, usage_msg, myname);
	}
	else {
		ex = 0;
		if (argc - optind == 0) {
			if (fnain(stdin) < 0)
				ex = 1;
		}
		else {
			int i;
			for (i = optind; i < argc; i++) {
				if (nain(argv[i]) < 0)
					ex = 1;
			}/* for */
		}
	}
	return ex;
}/* main */

#endif


syntax highlighted by Code2HTML, v. 0.9.1