/* Convertors for iso-2022 family */ #include #include #include #include #include "xlocale.h" #include "xwchar.h" int _ISO2022_init __P((_LocaleInfo *loc, const char *encoding)); static void _ISO2022_stateinit __P((mbstate_t *)); static int _ISO2022_mbsinit __P((const mbstate_t *)); static size_t _ISO2022_mbrtowc __P((wchar_t*,const char*, size_t, mbstate_t*)); static size_t _ISO2022_wcrtomb __P((char *, wchar_t, mbstate_t *)); typedef struct { int default_invoke_l; int default_invoke_r; wchar_t default_head[4]; /* default G0-G3 */ int bit; /* bit length */ int locking_shift; /* use locking-shift */ int ascii_before_ctl; int ascii_after_lf; /* for japanese only */ int ascii_roman; /* ASCII / JIS0201-1976-Roman */ int jis_version; /* jisx0208-1983 / jisx0208-1976 */ } _Iso2022Info; static struct { char *encoding; _Iso2022Info info; } iso2022family[] = { { "ISO-2022-JP", { 0, 0, { HEAD_ASCII, HEAD_ASCII, HEAD_ASCII, HEAD_ASCII}, 0, 0, 1, 1, 1, 1 }}, { "EUC-JP", { 0, 1, { HEAD_JIS_ROMAN, HEAD_JISX0208, HEAD_JISX0201, HEAD_JISX0212}, 1, 1, 0, 0, 0, 0 }}, { "EUC-KR", { 0, 1, { HEAD_ASCII, HEAD_KSC5601, HEAD_ASCII, HEAD_ASCII}, 1, 1, 0, 0, 0, 0 }}, { "x-ctext", { 0, 1, { HEAD_ASCII, HEAD_ISO8859_1, HEAD_ISO8859_1, HEAD_ISO8859_1}, 1, 1, 0, 0, 0, 0 }}, }; int _ISO2022_init(loc, encoding) _LocaleInfo *loc; const char *encoding; { _Iso2022Info *info; int i; loc->mbcurmax = 6; loc->stateinit = _ISO2022_stateinit; loc->mbsinit = _ISO2022_mbsinit; loc->mbrtowc = _ISO2022_mbrtowc; loc->wcrtomb = _ISO2022_wcrtomb; if (!loc->info) { if ((info = malloc(sizeof(*info))) == NULL) return -1; else loc->info = info; } else { if ((info = realloc(loc->info, sizeof(*info))) == NULL) { free(loc->info); loc->info = NULL; return -1; } else loc->info = info; } for (i=0; i< sizeof iso2022family / sizeof iso2022family[0]; i++) { if (!strcasecmp(iso2022family[i].encoding, encoding)) { *info = iso2022family[i].info; return 0; } } return -1; } static void _ISO2022_stateinit(state) mbstate_t *state; { int i; _Iso2022Info *info = _CurrentLocaleInfo->info; state->invoke_l = info->default_invoke_l; state->invoke_r = info->default_invoke_r; for (i=0;i<4;i++) state->head[i] = info->default_head[i]; } static int _ISO2022_mbsinit(state) const mbstate_t *state; { _Iso2022Info *info = _CurrentLocaleInfo->info; int i; if (state->invoke_l != info->default_invoke_l || state->invoke_r != info->default_invoke_r) return 0; for (i=0;i<4;i++) if (state->head[i] != info->default_head[i]) return 0; return 1; } enum { S_NORMAL, S_ESC, S_MULTI, S_94, S_96, S_94x94, S_96x96 }; static size_t _ISO2022_mbrtowc(pwc, s, len, mbs) wchar_t *pwc; const char *s; size_t len; mbstate_t *mbs; { _Iso2022Info *info = _CurrentLocaleInfo->info; u_char *in; wchar_t wc; int state, digignate_no, invoke; if (s == NULL) { /* initial state */ _ISO2022_stateinit(mbs); return 1; /* iso2022 has states */ } state = S_NORMAL; in = (u_char*)s; for (;;) { switch (state) { case S_NORMAL: if (*in == '\033') { /* ESCAPE */ state = S_ESC; break; } if (info->locking_shift) { if (*in == SI) { /* invoke to G0 */ mbs->invoke_l = 0; break; } if (*in == SO) { /* invoke to G1 */ mbs->invoke_l = 1; break; } if (info->bit) { if (*in == SS2R) { /* single shift G2 -> GR */ invoke = 2; in++; len--; goto end; } if (*in == SS3R) { /* single shift G3 -> GR */ invoke = 3; in++; len--; goto end; } } } /* locking_shift */ /* normal characters */ if (info->bit) { invoke = (*in & 0x80)? mbs->invoke_r: mbs->invoke_l; }else invoke = mbs->invoke_l; goto end; case S_ESC: if (*in == '$') { /* multibyte */ state = S_MULTI; break; } if (strchr("()*+", *in)) { /* 94 */ state = S_94; digignate_no = *in - '('; break; } if (strchr(",-./", *in)) { /* 96 */ state = S_96; digignate_no = *in - ','; break; } if (info->locking_shift) { if (*in == LS2) { /* invoke to G2 */ mbs->invoke_l = 2; state = S_NORMAL; break; } if (*in == LS3) { /* invoke to G3 */ mbs->invoke_l = 3; state = S_NORMAL; break; } if ((*in & 0x7f) == SS2) { /* invoke to G2, only one char. */ in++; len--; invoke = 2; goto end; } if ((*in & 0x7f) == SS3) { /* invoke to G3, only one char. */ in++; len--; invoke = 3; goto end; } if (info->bit) { if (*in == LS1R) { mbs->invoke_r = 1; state = S_NORMAL; break; } if (*in == LS2R) { mbs->invoke_r = 2; state = S_NORMAL; break; } if (*in == LS3R) { mbs->invoke_r = 3; state = S_NORMAL; break; } } } /* invalid sequence */ goto err; case S_96: mbs->head[digignate_no] = (*in == 'A')? HEAD_ISO8859_1 : (HEAD_96|(*in<<24)); state = S_NORMAL; break; case S_94: mbs->head[digignate_no] = (*in == 'B')? HEAD_ASCII : (HEAD_94|(*in<<24)); /* XXX */ state = S_NORMAL; break; case S_MULTI: if (*in == 0x40 /* JIS C 6226-1978 */ || *in == 0x41 /* GB 2312-80 */ || *in == 0x42) { /* JIS X0208 */ mbs->head[0] = HEAD_94x94|(*in<<24); state = S_NORMAL; break; } if (strchr("()*+", *in)) { /* 94x94 */ state = S_94x94; digignate_no = *in - '('; break; } if (strchr("-./", *in)) { /* 96x96 */ state = S_96x96; digignate_no = *in - ','; break; } /* invalid sequence */ goto err; case S_94x94: mbs->head[digignate_no] = HEAD_94x94|(*in<<24); state = S_NORMAL; break; case S_96x96: mbs->head[digignate_no] = HEAD_96x96|(*in<<24); state = S_NORMAL; break; } in++; if (--len < 0) return -1; /* overrun */ } err: return -1; /* error */ end: if (IS_MB(mbs->head[invoke]) && len > 1) { /* multibyte */ wc = mbs->head[invoke] | (in[0] & 0x7f)<<8 | (in[1] & 0x7f); in += 2; } else if (len > 0) { wc = mbs->head[invoke] | (*in++ & 0x7f); } else return -1; if (pwc) *pwc = wc; return ((const char*)in - s); /* return length */ } /* XXX don't work correctry */ /* * ISO-2022 wchar -> multibyte */ static size_t _ISO2022_wcrtomb(s, wc, wcs) char *s; wchar_t wc; mbstate_t *wcs; { /*_Iso2022Info *info = _CurrentLocaleInfo->info;*/ u_char *out; wchar_t head; int mbflag, invoke; u_char mask; if (s == NULL) { /* initial state */ _ISO2022_stateinit(wcs); return 1; /* iso2022 has state */ } if (wc & 0x80000000) { /* UNICODE area */ return -1; } out = (u_char*)s; head = wc & HEAD_MASK; mask = 0; if (head == wcs->head[0]) { /* already invoked? */ invoke = 0; mbflag = IS_MB(wcs->head[0])?1:0; } else if (head == wcs->head[1]) { /* already invoked? */ invoke = 1; mbflag = IS_MB(wcs->head[1])?1:0; } else { int designator = (wc >> 24) & 0x7f; wchar_t h = wc & HEAD_MASK; if (h == HEAD_ASCII) { *out++ = '\033'; *out++ = '('; *out++ = 'A'; invoke = 0; mbflag = 0; } else if (h == HEAD_ISO8859_1) { *out++ = '\033'; *out++ = '-'; *out++ = 'B'; invoke = 0; mbflag = 0; } else if (h == HEAD_94) { *out++ = '\033'; *out++ = '('; *out++ = designator; invoke = 0; mbflag = 0; } else if (h == HEAD_96) { *out++ = '\033'; *out++ = '-'; *out++ = designator; invoke = 1; /* 96 shuld desig. to G1-G3 */ mbflag = 0; } else if (h == HEAD_94x94) { *out++ = '\033'; *out++ = '$'; if (designator != 0x40 /* JIS C 6226-1978 */ && designator != 0x41 /* GB 2312-80 */ && designator != 0x42) /* JIS X 0208 */ *out++ = '('; *out++ = designator; invoke = 0; mbflag = 1; } else if (h == HEAD_96x96) { *out++ = '\033'; *out++ = '$'; *out++ = '-'; *out++ = designator; invoke = 1; /* 96x96 must desig. to G1-G3 */ mbflag = 1; } else goto err; wcs->head[invoke] = h; } if ( wcs->invoke_l != invoke ){ *out++ = (invoke == 0)? SI:SO; /* invoke to G0 or G1 */ wcs->invoke_l = invoke; } if (mbflag) /* multibyte */ *out++ = ((wc >> 8) & 0x7f) | mask; *out++ = (wc & 0x7f) | mask; return ((const char*)out-s); err: return -1; }