/* Convertors for UTF-8 Name: UTF-8 [RFC2044] MIBenum: 106 Source: RFC 2044 Alias: */ #include #include #include #include #include "xlocale.h" #include "xwchar.h" int _UTF8_init __P((_LocaleInfo *loc, const char *encoding)); static void _UTF8_stateinit __P((mbstate_t *)); static int _UTF8_mbsinit __P((const mbstate_t *)); static size_t _UTF8_mbrtowc __P((wchar_t*, const char*, size_t, mbstate_t *)); static size_t _UTF8_wcrtomb __P((char *, wchar_t, mbstate_t *)); int _UTF8_init(loc, encoding) _LocaleInfo *loc; const char *encoding; { if (!strcasecmp(encoding, "UTF-8")) { loc->mbcurmax = 6; loc->stateinit = _UTF8_stateinit; loc->mbsinit = _UTF8_mbsinit; loc->mbrtowc = _UTF8_mbrtowc; loc->wcrtomb = _UTF8_wcrtomb; return 0; } else return -1; } static void _UTF8_stateinit(state) mbstate_t *state; { } static int _UTF8_mbsinit(state) const mbstate_t *state; { return 1; } static size_t _UTF8_mbrtowc(pwc, s, len, mbs) wchar_t *pwc; const char *s; size_t len; mbstate_t *mbs; { u_char *in = (u_char *)s; int c, c2, c3, c4, c5, c6; wchar_t wc; if (s == NULL) { /* initial state */ /* _UTF8_stateinit(mbs); */ return 0; /* Shift_Jis has no states */ } if (len < 1) return -1; else if (!((c = *in++) & 0x80)) /* US ASCII */ wc = c; else if (!(c & 0x40)) { goto err; } else if (!(c & 0x20)) { if (len < 2 || ((c2 = *in++) & 0xc0) != 0x80) goto err; wc = (c & 0x1f) << 6 | (c2 & 0x3f); } else if (!(c & 0x10)) { if (len < 3 || ((c2 = *in++) & 0xc0) != 0x80 || ((c3 = *in++) & 0xc0) != 0x80) goto err; wc = (c & 0x1f) << 12 | (c2 & 0x3f) << 6 | (c3 & 0x3f) << 6; } else if (!(c & 0x08)) { if (len < 4 || ((c2 = *in++) & 0xc0) != 0x80 || ((c3 = *in++) & 0xc0) != 0x80 || ((c4 = *in++) & 0xc0) != 0x80) goto err; wc = (c & 0x1f) << 18 | (c2 & 0x3f) << 12 | (c3 & 0x3f) << 6 | (c4 & 0x3f); } else if (!(c & 0x04)) { if (len < 5 || ((c2 = *in++) & 0xc0) != 0x80 || ((c3 = *in++) & 0xc0) != 0x80 || ((c4 = *in++) & 0xc0) != 0x80 || ((c5 = *in++) & 0xc0) != 0x80) goto err; wc = (c & 0x1f) << 24 | (c2 & 0x3f) << 18 | (c3 & 0x3f) << 12 | (c4 & 0x3f) << 6 | (c5 & 0x3f); } else if (!(c & 0x02)) { if (len < 6 || ((c2 = *in++) & 0xc0) != 0x80 || ((c3 = *in++) & 0xc0) != 0x80 || ((c4 = *in++) & 0xc0) != 0x80 || ((c5 = *in++) & 0xc0) != 0x80 || ((c6 = *in++) & 0xc0) != 0x80) goto err; wc = (c & 0x1f) << 30 | (c2 & 0x3f) << 24 | (c3 & 0x3f) << 18 | (c4 & 0x3f) << 12 | (c5 & 0x3f) << 6 | (c6 & 0x3f); } else goto err; if (pwc) *pwc = wc; return ((const char*)in - s); /* return length */ err: return -1; } static size_t _UTF8_wcrtomb(s, wc, wcs) char *s; wchar_t wc; mbstate_t *wcs; { return 1; }