/* * kanji.c - kanji code converter * Copyright (C) 1994, 1996 by candy */ const char rcsid_kanji[] = "$Id: kanji.c,v 3.7 1997/05/02 04:06:57 candy Exp candy $"; #include #include #include "kanji.h" int kanji_in = '@'; int kanji_out = 'J'; static int etoj(int wc); static int etos(int wc); static int jtoe(int wc); static int jtos(int wc); static int stoe(int wc); static int stoj(int wc); static int get_euc(struct MB *mb, int ch, int (*put)(int)); static int get_sjis(struct MB *mb, int ch, int (*put)(int)); static int get_jis(struct MB *mb, int ch, int (*put)(int)); /* * EUC を JIS 0x2121..0x7e7e * 0x00 .. 0xff はそのまま。 */ static int etoj(int wc) { int ret, hib = (wc >> 8) & 0x7f, lob = wc & 0x7f; if (hib != 0) ret = (hib << 8) | lob; else ret = wc; return ret; }/* etoj */ /* * EUC を shift-JIS * 0x00 .. 0xff はそのまま。 */ static int etos(int wc) { int ret, hib = (wc >> 8) & 0x7f, lob = wc & 0x7f; if ((wc & 0xff00) == 0x8e00) { ret = wc & 0xff; } else if (hib != 0) ret = jtos((hib << 8) | lob); else ret = wc; return ret; }/* etos */ /* * JIS 0x2121..0x7e7e を EUC * 0x00 .. 0xff はそのまま。 */ static int jtoe(int wc) { int ret, hib = (wc >> 8) & 0xff, lob = wc & 0xff; if (hib != 0) ret = ((hib | 0x80) << 8) | (lob | 0x80); else ret = wc; return ret; }/* jtoe */ /* * JIS 0x2121..0x7e7e を shift-JIS * 0x00 .. 0xff はそのまま。 */ static int jtos(int wc) { int ret = 0, err = 0; int hib = (wc >> 8) & 0xff, lob = wc & 0xff, hi1 = 0x81, hi2 = 0x21, lo1 = 0x1f; if (hib >= 0x21 && hib < 0x5f) hi1 = 0x81; else { if (hib >= 0x5f && hib < 0x80) hi1 = 0xc1; else err = -1; } if (err == 0) { if ((hib & 1) == 1) { hi2 = 0x21; if (lob >= 0x21 && lob < 0x60) lo1 = 0x1f; else { if (lob >= 0x60 && lob < 0x7f) lo1 = 0x20; else err = -1; } } else { hi2 = 0x22; if (lob >= 0x21 && lob < 0x7f) lo1 = 0x7e; else err = -1; } } if (err == 0) ret = (((hib - hi2) / 2 + hi1) << 8) + lob + lo1; else ret = wc; return ret; }/* jtos */ /* * shift-JIS を EUC * 0x00 .. 0xff はそのまま。 */ static int stoe(int wc) { int ret, hib = (wc >> 8) & 0xff; if (hib != 0) ret = stoj(wc) | 0x8080; else ret = wc; return ret; }/* stoe */ /* * shift-JIS を JIS 0x2121..0x7e7e * 0x00 .. 0xff はそのまま。 */ static int stoj(int wc) { int err = 0, ret = 0; int hib = (wc >> 8) & 0xff, lob = wc & 0xff, hi1 = 0x81, hi2 = 0x21, lo1 = 0x1f; if (hib >= 0x81 && hib < 0xa0) hi1 = 0x81; else { if (hib >= 0xe0 && hib < 0xf0) hi1 = 0xc1; else err = -1; } if (err == 0) { if (lob >= 0x40 && lob < 0x7f) { hi2 = 0x21; lo1 = 0x1f; } else { if (lob >= 0x80 && lob < 0x9f) { hi2 = 0x21; lo1 = 0x20; } else { if (lob >= 0x9f && lob < 0xfd) { hi2 = 0x22; lo1 = 0x7e; } else err = -1; } } } if (err == 0) ret = (((hib - hi1) * 2 + hi2) << 8) + lob - lo1; else ret = wc; return ret; }/* stoj */ /* * ch == 0 で呼び出すと、*mb を初期化する。 * ch == EOF で呼び出すと、プールしてある文字(033 など)を掃き出す。 * 戻り値は、文字コード(single-byte: 1..0xff, multi-byte: 0x8181..0xfefe) * 0 ならば、文字はない(エスケープシーケンス処理中など) */ static int get_euc(struct MB *mb, int ch, int (*put)(int)) { int ret = 0; if (ch == 0) { MB_CLEAR(mb); } else { if (ch == EOF) { if (mb->hi) put(mb->hi); mb->hi = 0; } else if (mb->hi) { ret = (mb->hi << 8) + ch; mb->hi = 0; } else { if (ch >= 0x80) mb->hi = ch; else ret = ch; } } return ret; }/* get_euc */ /* * ch == 0 で呼び出すと、*mb を初期化する。 * ch == EOF で呼び出すと、プールしてある文字(033 など)を掃き出す。 * 戻り値は、文字コード(single-byte: 1..0xff, multi-byte: 0x8181..0xfefe) * 0 ならば、文字はない(エスケープシーケンス処理中など) */ static int get_sjis(struct MB *mb, int ch, int (*put)(int)) { int ret = 0; if (ch == 0) { MB_CLEAR(mb); } else { if (ch == EOF) { if (mb->hi) put(mb->hi); mb->hi = 0; } else if (mb->hi) { ret = (mb->hi << 8) + ch; mb->hi = 0; } else { if (is1sjis(ch)) mb->hi = ch; else ret = ch; } } return ret; }/* get_sjis */ /* * ch == 0 で呼び出すと、*mb を初期化する。 * ch == EOF で呼び出すと、プールしてある文字(033 など)を掃き出す。 * 戻り値は、文字コード(single-byte: 1..0xff, multi-byte: 0x2121..0xfefe) * 0 ならば、文字はない(エスケープシーケンス処理中など) */ static int get_jis(struct MB *mb, int ch, int (*put)(int)) { int ret = 0; if (ch == 0) { MB_CLEAR(mb); } else if (ch == '\n') { return ch; } else { switch (mb->st) { case MB_KI1: if (ch == KI_1) { mb->st = MB_KI2; } else { mb->st = MB_INITIAL; put(033); } break; case MB_KI2: if (ch == kanji_in || ch == '@' || ch == 'B') { mb->st = MB_INKANJI; ch = 0; } else { mb->st = MB_INITIAL; put(033); put(KI_1); } break; case MB_KO1: if (ch == KO_1) { mb->st = MB_KO2; } else { mb->st = MB_INKANJI; put(033); } break; case MB_KO2: if (ch == kanji_out || ch == 'J' || ch == 'H' || ch == 'B') { mb->st = MB_INITIAL; ch = 0; } else { mb->st = MB_INKANJI; put(033); put(KO_1); } break; default: break; }/* switch */ switch (mb->st) { case MB_INITIAL: if (ch == EOF) ret = 0; else if (ch == 033) mb->st = MB_KI1; else ret = ch; break; case MB_INKANJI: if (ch == EOF) { if (mb->hi) { mb->st = MB_INITIAL; put(mb->hi); mb->hi = 0; } } else if (ch == 033) mb->st = MB_KO1; else { if (mb->hi) { ret = (mb->hi << 8) + ch; mb->hi = 0; } else mb->hi = ch; } break; default: break; }/* switch */ } return ret; }/* get_jis */ /* * EUC 文字列を shift-JIS に変換する。 * ch == 0 でまず呼び出し、文字列から1バイトずつ ch に入れて呼び出し、 * 最後は ch == EOF で呼び出す。 * 出力は put() を通じて行われる。 */ int euc_to_sjis(struct MB *mb, int ch, int (*put)(int)) { int wc = get_euc(mb, ch, put); if (wc != 0) { wc = etos(wc); if (wc != 0) { if (wc & 0xff00) { put((wc >> 8) & 0xff); } put(wc & 0xff); } } return wc; }/* euc_to_sjis */ /* * JIS 文字列を shift-JIS に変換する。 * ch == 0 でまず呼び出し、文字列から1バイトずつ ch に入れて呼び出し、 * 最後は ch == EOF で呼び出す。 * 出力は put() を通じて行われる。 */ int jis_to_sjis(struct MB *mb, int ch, int (*put)(int)) { int wc = get_jis(mb, ch, put); if (wc != 0) { wc = jtos(wc); if (wc != 0) { if (wc & 0xff00) { put((wc >> 8) & 0xff); } put(wc & 0xff); } } return wc; }/* jis_to_sjis */ /* */ int sjis_to_euc(struct MB *mb, int ch, int (*put)(int)) { int wc = get_sjis(mb, ch, put); if (wc != 0) { wc = stoe(wc); if (wc != 0) { if (wc & 0xff00) { put((wc >> 8) & 0xff); } put(wc & 0xff); } } return wc; }/* sjis_to_euc */ /* */ int sjis_to_jis(struct MB *mb, int ch, int (*put)(int)) { int wc = get_sjis(mb, ch, put); static int inkanji = 0; if (wc != 0) { wc = stoj(wc); if (wc != 0) { if (wc & 0xff00) { if (!inkanji) { inkanji = 1; put('\033'); put(KI_1); put(kanji_in); } put((wc >> 8) & 0xff); } else { if (inkanji) { inkanji = 0; put('\033'); put(KO_1); put(kanji_out); } } put(wc & 0xff); } } return wc; }/* sjis_to_jis */ #define GETWCH(s) (((unsigned char)(s)[0] << 8) | (unsigned char)(s)[1]) /* * */ char * stretos(char *buf, const char *src) { const unsigned char *s = (const unsigned char *)src; char *d = buf; while (*s != '\0') { int wc; if (is1euc(*s)) { wc = GETWCH(s); wc = etos(wc); if (s[1] != '\0') s++; } else { wc = *s; } if ((wc & ~0xff) != 0) { *d++ = wc >> 8; } *d++ = wc; s++; }/* while */ *d = '\0'; return buf; }/* stretos */ /* * buf == src だとだめ(カナの時)。 */ char * strstoe(char *buf, const char *src) { const unsigned char *s = (const unsigned char *)src; char *d = buf; while (*s != '\0') { int wc; if (is1sjis(*s)) { wc = GETWCH(s); wc = stoe(wc); if (s[1] != '\0') s++; } else if (isskana(*s)) { wc = 0x8e00 | *s; } else { wc = *s; } if ((wc & ~0xff) != 0) { *d++ = wc >> 8; } *d++ = wc; s++; }/* while */ *d = '\0'; return buf; }/* strstoe */ /* * strstoe() した時のバイト数を返す。 */ size_t strstoelen(const char *src) { size_t d = 0; const unsigned char *s = (const unsigned char *)src; while (*s != '\0') { int wc; if (is1sjis(*s)) { wc = GETWCH(s); wc = stoe(wc); if (s[1] != '\0') s++; } else if (isskana(*s)) { wc = 0x8e00 | *s; } else { wc = *s; } if ((wc & ~0xff) != 0) { d++; } d++; s++; }/* while */ return d; }/* strstoelen */ static unsigned short kana_zen[64] = { 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab, 0xa1ac, }; char * strstoe2(char *buf, const char *src) { const unsigned char *s = (unsigned char *)src; char *d = buf; while (*s != '\0') { int wc; if (is1sjis(*s)) { wc = GETWCH(s); wc = stoe(wc); if (s[1] != '\0') s++; } else if (isskana(*s)) { wc = kana_zen[*s - 0xa0]; } else { wc = *s; } if ((wc & ~0xff) != 0) { *d++ = wc >> 8; } *d++ = wc; s++; }/* while */ *d = '\0'; return buf; }/* strstoe2 */ #ifdef TEST char *myname; char usage_msg[] = "euc/sjis converter V0.8086\n" "usage: %s [-es] [file ...]\n" "\t-e\tsjis->euc\n" "\t-j\tsjis->jis\n" "\t-s\teuc->sjis\n" ; int verbose; int (*filter)(struct MB *mb, int ch, int (*put)(int)) = euc_to_sjis; int fnain(FILE *fp) { struct MB mb; int err = 0, ch; filter(&mb, 0, putchar); while ((ch = fgetc(fp)) != EOF) { filter(&mb, ch, putchar); }/* while */ filter(&mb, EOF, putchar); return err; }/* fnain */ int nain(const char *name) { int err = -1; FILE *fp = fopen(name, "r"); if (fp == NULL) { fprintf(stderr, "%s: %s: cannot open\n", myname, name); } else { err = fnain(fp); fclose(fp); } return err; }/* nain */ #ifdef __BORLANDC__ extern int optind; extern char *optarg; extern int getopt(int, char **, const char *); #endif int main(int argc, char *argv[]) { int ex, ch, show_usage = 0; myname = argv[0]; while ((ch = getopt(argc, argv, "ejsvV")) != EOF) { switch (ch) { default: case 'V': show_usage++; break; case 'v': verbose = 1; break; case 'e': filter = sjis_to_euc; break; case 'j': filter = sjis_to_jis; break; case 's': filter = euc_to_sjis; break; }/* switch */ }/* while */ ex = 1; if (show_usage) { fprintf(stderr, usage_msg, myname); } else { ex = 0; if (argc - optind == 0) { if (fnain(stdin) < 0) ex = 1; } else { int i; for (i = optind; i < argc; i++) { if (nain(argv[i]) < 0) ex = 1; }/* for */ } } return ex; }/* main */ #endif