/*======================================================================*\ |* Editor mined *| |* CJK character set <-> Unicode mapping tables *| \*======================================================================*/ #include "mined.h" #include "charcode.h" #include "termprop.h" /*======================================================================*\ |* Character properties *| \*======================================================================*/ FLAG no_char (c) unsigned long c; { return c == CHAR_UNKNOWN || c == CHAR_INVALID; } FLAG no_unichar (u) unsigned long u; { return u == CHAR_UNKNOWN || u == CHAR_INVALID; } /** Check if character is a control character in current encoding. (Should be more generic...) */ int iscontrol (c) unsigned long c; { if (mapped_text) { unsigned long u = lookup_encodedchar (c); return u == '\177' || (! no_unichar (u) && u < ' '); } else if (utf8_text) { if (unassigned_single_width) { if (rxvt_version > 0) { /* handle weird mapping of non-Unicode ranges */ if (c < 0x80000000) { c &= 0x1FFFFF; } } } return c == '\177' || c < ' '; } else if (cjk_text) { return c == '\177' || c < ' '; } else { return c == '\177' || (c & '\177') < ' '; } } /** Return display indication for a control character. */ character controlchar (c) character c; { if (c == '\177') { return '?'; } else { return c + '@'; } } /** Return the isolated form of an ALEF character. */ unsigned long isolated_alef (unichar) unsigned long unichar; { if (unichar == 0x0622) { /* ALEF WITH MADDA ABOVE */ return 0xFE81; } else if (unichar == 0x0623) { /* ALEF WITH HAMZA ABOVE */ return 0xFE83; } else if (unichar == 0x0625) { /* ALEF WITH HAMZA BELOW */ return 0xFE87; } else if (unichar == 0x0627) { /* ALEF */ return 0xFE8D; } else { /* ? -> ALEF SYMBOL */ return 0x2135; } } /** Return max value in current encoding. */ unsigned long max_char_value () { if (cjk_text) switch (text_encoding_tag) { case 'G': return 0xFFFFFFFF; case 'C': return 0x8EFFFFFF; case 'J': return 0x8FFFFF; default: return 0xFFFF; } else if (utf8_text) { return 0x7FFFFFFF; } else { return 0xFF; } } /** Convert CJK character in current text encoding to byte sequence. */ int cjkencode (cjkchar, buf) unsigned long cjkchar; character * buf; { return cjkencode_char (False, cjkchar, buf); } static int multi_char (term, c) FLAG term; character c; { if (term) { return (character) c >= 0x80 && (! cjk_term || term_encoding_tag != 'S' || (character) c < 0xA1 || (character) c > 0xDF); } else { return multichar (c); } } /** Convert CJK character in terminal or text encoding to byte sequence. */ int cjkencode_char (term, cjkchar, buf) FLAG term; unsigned long cjkchar; character * buf; { int len = 0; int i; char encoding_tag = term ? term_encoding_tag : text_encoding_tag; if (cjkchar >= 0x1000000) { i = (cjkchar >> 16) & 0xFF; if (encoding_tag == 'G' && cjkchar >= 0x80000000 && i >= '0' && i <= '9') { len = 4; } else if (encoding_tag == 'C' && (cjkchar >> 24) == 0x8E) { len = 4; } } else if (cjkchar >= 0x10000) { if (encoding_tag == 'J' && (cjkchar >> 16) == 0x8F) { len = 3; } } else if (cjkchar >= 0x8000 && (cjkchar & 0xFF) > 0 && multi_char (term, (character) (cjkchar >> 8))) { len = 2; } else if (cjkchar < 0x100 && ! multi_char (term, cjkchar)) { len = 1; } for (i = len - 1; i >= 0; i --) { buf [i] = cjkchar & 0xFF; cjkchar = cjkchar >> 8; if (buf [i] == '\0') { len = 0; } } buf [len] = '\0'; return len; } /** convert Unicode character to UTF-8 */ int utfencode (unichar, buf) unsigned long unichar; character * buf; { int len; if (unichar < 0x80) { len = 1; * buf ++ = unichar; } else if (unichar < 0x800) { len = 2; * buf ++ = 0xC0 | (unichar >> 6); * buf ++ = 0x80 | (unichar & 0x3F); } else if (unichar < 0x10000) { len = 3; * buf ++ = 0xE0 | (unichar >> 12); * buf ++ = 0x80 | ((unichar >> 6) & 0x3F); * buf ++ = 0x80 | (unichar & 0x3F); } else if (unichar < 0x200000) { len = 4; * buf ++ = 0xF0 | (unichar >> 18); * buf ++ = 0x80 | ((unichar >> 12) & 0x3F); * buf ++ = 0x80 | ((unichar >> 6) & 0x3F); * buf ++ = 0x80 | (unichar & 0x3F); } else if (unichar < 0x4000000) { len = 5; * buf ++ = 0xF8 | (unichar >> 24); * buf ++ = 0x80 | ((unichar >> 18) & 0x3F); * buf ++ = 0x80 | ((unichar >> 12) & 0x3F); * buf ++ = 0x80 | ((unichar >> 6) & 0x3F); * buf ++ = 0x80 | (unichar & 0x3F); } else if (unichar < 0x80000000) { len = 6; * buf ++ = 0xFC | (unichar >> 30); * buf ++ = 0x80 | ((unichar >> 24) & 0x3F); * buf ++ = 0x80 | ((unichar >> 18) & 0x3F); * buf ++ = 0x80 | ((unichar >> 12) & 0x3F); * buf ++ = 0x80 | ((unichar >> 6) & 0x3F); * buf ++ = 0x80 | (unichar & 0x3F); } else { len = 0; } * buf = '\0'; return len; } /** Convert character to byte sequence. */ char * encode_char (c) unsigned long c; { static char buf [7]; if (utf8_text) { (void) utfencode (c, buf); } else if (cjk_text) { (void) cjkencode (c, buf); } else { buf [0] = c; buf [1] = '\0'; } return buf; } /** Check if a CJK character is encoded in the defined code range of the currently active text encoding. */ FLAG valid_cjk (cjkchar, cjkbytes) unsigned long cjkchar; character * cjkbytes; { return valid_cjkchar (False, cjkchar, cjkbytes); } /** Check if a CJK character is encoded in the defined code range of the terminal or text encoding. */ FLAG valid_cjkchar (term, cjkchar, cjkbytes) FLAG term; unsigned long cjkchar; character * cjkbytes; { character cjkbuf [5]; char encoding_tag = term ? term_encoding_tag : text_encoding_tag; if (cjkchar < 0x80) { return True; } if (! cjkbytes) { cjkbytes = cjkbuf; (void) cjkencode_char (term, cjkchar, cjkbytes); } /* GB 18030 >> GBK >> GB 2312-80 GB18030 *? 81-FE 40-7E, 80-FE 81-FE 30-39 81-FE 30-39 Big5-HKSCS >> Big5 Big5 ! A1-FE 40-7E, A1-FE (Big5+ ! 89-FE 40-7E, A1-FE) Big5+ ! 88-FE 40-7E, A1-FE HKSCS-2004! 87-FE 40-7E, A1-FE CNS EUC-TW -> CNS 11643-1992 EUC-TW - A1-FE A1-FE 8E A1-A7 A1-FE A1-FE */ switch (encoding_tag) { case 'G': if (cjkchar > 0xFFFF) { return cjkbytes [0] >= 0x81 && cjkbytes [0] <= 0xFE && cjkbytes [1] >= '0' && cjkbytes [1] <= '9' && cjkbytes [2] >= 0x81 && cjkbytes [2] <= 0xFE && cjkbytes [3] >= '0' && cjkbytes [3] <= '9'; } else { return cjkbytes [0] >= 0x81 && cjkbytes [0] <= 0xFE && cjkbytes [1] >= 0x40 && cjkbytes [1] <= 0xFE && cjkbytes [1] != 0x7F; } case 'B': return cjkbytes [0] >= 0x87 && cjkbytes [0] <= 0xFE && ((cjkbytes [1] >= 0x40 && cjkbytes [1] <= 0x7E) || (cjkbytes [1] >= 0xA1 && cjkbytes [1] <= 0xFE) ) && cjkbytes [2] == 0; case 'C': return (cjkbytes [0] >= 0xA1 && cjkbytes [0] <= 0xFE && cjkbytes [1] >= 0xA1 && cjkbytes [1] <= 0xFE && cjkbytes [2] == 0) || (cjkbytes [0] == 0x8E && cjkbytes [1] >= 0xA1 && cjkbytes [1] <= 0xAF && cjkbytes [2] >= 0xA1 && cjkbytes [2] <= 0xFE && cjkbytes [3] >= 0xA1 && cjkbytes [3] <= 0xFE); /* EUC-JP -> JIS X 0208 + JIX X 0212 EUC-JP !* A1-FE A1-FE 8F A1-FE A1-FE 8E A1-DF Shift-JIS 0213 >> Shift-JIS Shift-JIS A1-DF 81-9F, E0-EF 40-7E, 80-FC */ case 'J': return (cjkbytes [0] >= 0xA1 && cjkbytes [0] <= 0xFE && cjkbytes [1] >= 0xA1 && cjkbytes [1] <= 0xFE && cjkbytes [2] == 0 ) || (cjkbytes [0] == 0x8E && cjkbytes [1] >= 0xA1 && cjkbytes [1] <= 0xDF && cjkbytes [2] == 0) || (cjkbytes [0] == 0x8F && cjkbytes [1] >= 0xA1 && cjkbytes [1] <= 0xFE && cjkbytes [2] >= 0xA1 && cjkbytes [2] <= 0xFE && cjkbytes [3] == 0); case 'S': return (cjkchar >= 0xA1 && cjkchar <= 0xDF) || (((cjkbytes [0] >= 0x81 && cjkbytes [0] <= 0x9F) || (cjkbytes [0] >= 0xE0) ) && cjkbytes [1] >= 0x40 && cjkbytes [1] <= 0xFC && cjkbytes [1] != 0x7F && cjkbytes [2] == 0); /* UHC >> KSC EUC-KR ! A1-FE A1-FE UHC * 81-FE 41-5A, 61-7A, 81-FE Johab Johab 84-DE, E0-F9 31-7E, 81-FE */ case 'K': return cjkbytes [0] >= 0x81 && cjkbytes [0] <= 0xFE && ((cjkbytes [1] >= 0x41 && cjkbytes [1] <= 0x5A) || (cjkbytes [1] >= 0x61 && cjkbytes [1] <= 0x7A) || (cjkbytes [1] >= 0x81 && cjkbytes [1] <= 0xFE) ) && cjkbytes [2] == 0; case 'H': return ((cjkbytes [0] >= 0x84 && cjkbytes [0] <= 0xDE) || (cjkbytes [0] >= 0xE0 && cjkbytes [0] <= 0xF9) ) && ((cjkbytes [1] >= 0x31 && cjkbytes [1] <= 0x7E) || (cjkbytes [1] >= 0x81 && cjkbytes [1] <= 0xFE) ) && cjkbytes [2] == 0; default: return False; } } /*======================================================================*\ Conversion tables mapping various CJK encodings to Unicode \*======================================================================*/ #include "charmaps.h" struct charmap_table_entry { struct encoding_table_entry * table; unsigned int * table_len; char * charmap; char * tag2; char tag1; }; static struct charmap_table_entry charmaps_table [] = { #ifdef __TURBOC__ {pc437_table, & pc437_table_len, "CP437", "PC", 'p'}, {pc850_table, & pc850_table_len, "CP850", "PL", 'P'}, #else #include "charmaps.t" #endif }; /*======================================================================*\ |* Configuration string matching *| \*======================================================================*/ /** matchprefix determines whether its first parameter contains its second parameter matching approximately as an initial prefix. The match ignores separating '-', '_', and space characters, and does not match case. The algorithm assumes that letters are ASCII as this is used for configuration strings only. */ static int matchprefix (s, m) char * s; char * m; { char cs, cm; do { while (* m == '-' || * m == '_' || * m == ' ') { m ++; } while (* s == '-' || * s == '_' || * s == ' ') { s ++; } if (! * m) { return True; } if (! * s) { return False; } cs = * s; if (cs >= 'a' && cs <= 'z') { cs = cs - 'a' + 'A'; } cm = * m; if (cm >= 'a' && cm <= 'z') { cm = cm - 'a' + 'A'; } if (cm != cs) { return False; } s ++; m ++; } while (True); } /** matchpart determines whether its first parameter contains its second parameter matching approximately as an initial prefix or as a prefix of any part after a '/' or '>' separator. The match ignores separating '-', '_', and space characters, and does not match case. The algorithm assumes that letters are ASCII as this is used for configuration strings only. */ static int matchpart (s, m) char * s; char * m; { char * p; if (matchprefix (s, m)) { return True; } else { p = strpbrk (s, ">/"); if (p) { p ++; return matchpart (p, m); } else { return False; } } } /*======================================================================*\ |* Mapping tables and functions *| \*======================================================================*/ /** Terminal character mapping table and its length */ static struct encoding_table_entry * terminal_table = (struct encoding_table_entry *) 0; static unsigned int terminal_table_len = 0; /** Current CJK/Unicode mapping table and its length */ static struct encoding_table_entry * text_table = (struct encoding_table_entry *) 0; static unsigned int text_table_len = 0; #ifndef use_cjk_tables /* define dummy tables referred in code */ #ifdef __TURBOC__ #define EMPTY 1 #else #define EMPTY 0 #endif static struct encoding_table_entry gb_table [EMPTY]; static struct encoding_table_entry ejis_table [EMPTY]; static struct encoding_table_entry sjis_table [EMPTY]; #endif /** Are mapped text and terminal encodings different? */ FLAG remap_chars () { return text_table != terminal_table; } /** List of 2nd characters of 2 Unicode character mappings (mostly accents) for certain 2-character CJK mappings (JIS or HKSCS); must be consistent with range and order of according #defines in charcode.h */ static unsigned int uni2_accents [] = {0x309A, 0x0300, 0x0301, 0x02E5, 0x02E9, 0x0304, 0x030C}; /** Current encoding indications */ char text_encoding_tag = '-'; char * text_encoding_flag = "??"; /* for display in flags menu area */ char term_encoding_tag = '-'; static char * current_text_encoding = ""; static char * term_encoding = ""; /** Return charmap name of current text encoding. */ char * get_text_encoding () { if (utf8_text) { if (utf16_file) { if (utf16_little_endian) { return "UTF-16LE"; } else { return "UTF-16BE"; } } else { return "UTF-8"; } } else if (! cjk_text && ! mapped_text) { return "ISO 8859-1"; } else { return current_text_encoding; } } /** Return charmap name of terminal encoding. */ char * get_term_encoding () { if (* term_encoding == '\0' && utf8_screen) { return "UTF-8"; } else if (* term_encoding == '\0' && ! cjk_term && ! mapped_term) { return "ISO 8859-1"; } else { return term_encoding; } } static FLAG combined_text; /** Return True if active encoding has combining characters. */ FLAG encoding_has_combining () { return utf8_text || (cjk_text && (text_encoding_tag == 'G' || text_encoding_tag == 'J' || text_encoding_tag == 'S')) || (mapped_text && combined_text); } /** Determine if active encoding has combining characters. */ static FLAG mapping_has_combining (term) FLAG term; { unsigned long i; for (i = 0; i < 0x100; i ++) { unsigned long unichar; if (term) { unichar = lookup_mappedtermchar (i); } else { unichar = lookup_encodedchar (i); } if (term ? term_iscombining (unichar) : iscombining_unichar (unichar)) { return True; } } return False; } #ifdef split_map_entries /* Decode CJK character value from split table entry. */ static unsigned long decode_cjk (entrypoi, map_table) struct encoding_table_entry * entrypoi; struct encoding_table_entry * map_table; { if (map_table == gb_table) { if ((unsigned int) entrypoi->cjk_ext == 0xFF) { return entrypoi->cjk_base; } else { return ((entrypoi->cjk_base & 0x00FF) << 24) | (entrypoi->cjk_base & 0xFF00) | 0x00300030 | ((((unsigned int) entrypoi->cjk_ext) & 0xF0) << 12) | (((unsigned int) entrypoi->cjk_ext) & 0x0F); } } else { if ((unsigned int) entrypoi->cjk_ext >= 0x90) { return 0x8E000000 | (((unsigned int) entrypoi->cjk_ext) << 16) | entrypoi->cjk_base; } else { return (((unsigned int) entrypoi->cjk_ext) << 16) | entrypoi->cjk_base; } } } #endif static void setup_mapping (term, map_table, map_table_len, tag1, tag2) FLAG term; struct encoding_table_entry * map_table; unsigned int map_table_len; char tag1; char * tag2; { FLAG multi_byte = False; unsigned int j; if (term) { terminal_table = map_table; terminal_table_len = map_table_len; term_encoding_tag = tag1; } else { text_table = map_table; text_table_len = map_table_len; text_encoding_tag = tag1; text_encoding_flag = tag2; } /* check if it is a multi-byte mapping table */ for (j = 0; j < map_table_len; j ++) { unsigned long cjki; #ifdef split_map_entries cjki = decode_cjk (& map_table [j], map_table); #else cjki = map_table [j].cjk; #endif if (cjki > 0xFF) { multi_byte = True; break; } } if (term) { if (multi_byte) { cjk_term = True; mapped_term = False; /* combining_screen is auto-detected */ } else { mapped_term = True; cjk_term = False; /* combining_screen is auto-detected */ } } else { if (multi_byte) { cjk_text = True; mapped_text = False; /* combined_text is dynamically enquired with encoding_has_combining () */ } else { mapped_text = True; cjk_text = False; combined_text = mapping_has_combining (term); } } } /** Set either text or terminal character mapping table. Return True on success, False if tag unknown. */ static FLAG set_char_encoding (term, charmap, tag) FLAG term; char * charmap; char tag; { int i; if (charmap && ! term && (streq (":16", charmap) || matchpart ("UTF-16BE", charmap))) { utf8_text = True; utf16_file = True; utf16_little_endian = False; cjk_text = False; mapped_text = False; current_text_encoding = "UTF-16BE"; text_encoding_flag = "16"; return True; } else if (charmap && ! term && (streq (":61", charmap) || matchpart ("UTF-16LE", charmap))) { utf8_text = True; utf16_file = True; utf16_little_endian = True; cjk_text = False; mapped_text = False; current_text_encoding = "UTF-16LE"; text_encoding_flag = "61"; return True; } else if (charmap && ! term && streq (":??", charmap)) { text_table_len = 0; text_encoding_tag = ' '; text_encoding_flag = "??"; utf8_text = False; cjk_text = True; mapped_text = False; current_text_encoding = "[CJK]"; return True; } else if (charmap ? strisprefix ("UTF-8", charmap) : tag == 'U') { if (term) { utf8_screen = True; utf8_input = True; term_encoding = "UTF-8"; } else { utf8_text = True; utf16_file = False; cjk_text = False; mapped_text = False; current_text_encoding = "UTF-8"; text_encoding_flag = "U8"; } return True; } else if (charmap ? matchpart ("ISO 8859-1", charmap) : tag == 'L') { if (term) { utf8_screen = False; utf8_input = False; cjk_term = False; term_encoding = "ISO 8859-1"; } else { utf8_text = False; cjk_text = False; mapped_text = False; current_text_encoding = "ISO 8859-1"; text_encoding_flag = "L1"; } return True; } else { for (i = 0; i < arrlen (charmaps_table); i ++) { if (charmap ? (charmap [0] == ':' ? streq (& charmap [1], charmaps_table [i].tag2) : matchpart (charmaps_table [i].charmap, charmap) ) : charmaps_table [i].tag1 == tag) { if (term) { utf8_screen = False; utf8_input = False; term_encoding = charmaps_table [i].charmap; } else { utf8_text = False; current_text_encoding = charmaps_table [i].charmap; } setup_mapping (term, charmaps_table [i].table, * charmaps_table [i].table_len, charmaps_table [i].tag1, charmaps_table [i].tag2); return True; } } } return False; } /** Set terminal character code mapping table according to encoding tag. Return True on success, False if tag unknown. */ FLAG set_term_encoding (charmap, tag) char * charmap; char tag; { return set_char_encoding (True, charmap, tag); } #define dont_debug_set_text_encoding /** Set character mapping table and text encoding variables according to encoding tag. Return True on success, False if tag unknown. */ FLAG set_text_encoding (charmap, tag, debug_tag) char * charmap; char tag; char * debug_tag; { #ifdef debug_set_text_encoding printf ("set_text_encoding [%s] %s %c\n", debug_tag, charmap, tag); #endif return set_char_encoding (False, charmap, tag); } /* Look up a Unicode value in a character set mapping table. @return CJK value, or CHAR_INVALID if not found */ static unsigned long unmap_char (unichar, map_table, map_table_len) unsigned long unichar; struct encoding_table_entry * map_table; unsigned int map_table_len; { #ifdef split_map_entries unsigned char unichar_high = unichar >> 16; unsigned short unichar_low = unichar & 0xFFFF; #endif unsigned int i = 0; struct encoding_table_entry * map_table_poi = map_table; while (i ++ < map_table_len) { #ifdef split_map_entries if ( unichar_low == map_table_poi->unicode_low && unichar_high == map_table_poi->unicode_high ) { return decode_cjk (map_table_poi, map_table); } #else if ( unichar == map_table_poi->unicode ) { return map_table_poi->cjk; } #endif map_table_poi ++; } return CHAR_INVALID; } /* Map a character in a character set mapping table. @return Unicode value, or CHAR_INVALID if not found */ static unsigned long map_char (cjk, map_table, map_table_len) unsigned long cjk; struct encoding_table_entry * map_table; unsigned int map_table_len; { int low = 0; int high = map_table_len - 1; int i; unsigned long cjki; while (low <= high) { i = (low + high) / 2; #ifdef split_map_entries cjki = decode_cjk (& map_table [i], map_table); #else cjki = map_table [i].cjk; #endif if (cjki == cjk) { #ifdef split_map_entries if (map_table [i].unicode_high & 0x80) { return 0x80000000 | (uni2_accents [map_table [i].unicode_high & 0x7F] << 16) | (map_table [i].unicode_low); } else { return (((unsigned long) map_table [i].unicode_high) << 16) | (map_table [i].unicode_low); } #else if (map_table [i].unicode & 0x800000) { return 0x80000000 | (uni2_accents [(map_table [i].unicode >> 16) & 0x7F] << 16) | (map_table [i].unicode & 0xFFFF); } else { return map_table [i].unicode; } #endif } else if (cjki >= cjk) { high = i - 1; } else { low = i + 1; } } return CHAR_INVALID; } /*======================================================================*\ |* Conversion functions *| \*======================================================================*/ /** GB18030 algorithmic mapping part */ static unsigned long gb_to_unicode (gb) unsigned long gb; { unsigned int byte2 = (gb >> 16) & 0xFF; unsigned int byte3 = (gb >> 8) & 0xFF; unsigned int byte4 = gb & 0xFF; if (byte2 < '0' || byte2 > '9' || byte3 < 0x81 || byte4 < '0' || byte4 > '9') { return CHAR_INVALID; } return (((((gb >> 24) & 0xFF) - 0x90) * 10 + (byte2 - 0x30)) * 126L + (byte3 - 0x81)) * 10L + (byte4 - 0x30) + 0x10000; } static unsigned long unicode_to_gb (uc) unsigned long uc; { unsigned int a, b, c, d; uc -= 0x10000; d = 0x30 + uc % 10; uc /= 10; c = 0x81 + uc % 126; uc /= 126; b = 0x30 + uc % 10; uc /= 10; a = 0x90 + uc; return (a << 24) | (b << 16) | (c << 8) | d; } /* mapped_char () converts a Unicode value into an encoded character, using the table given as parameter. */ static unsigned long mapped_char (unichar, map_table, map_table_len) unsigned long unichar; struct encoding_table_entry * map_table; unsigned int map_table_len; { unsigned long cjkchar; if (map_table == gb_table && unichar >= 0x10000) { return unicode_to_gb (unichar); } cjkchar = unmap_char (unichar, map_table, map_table_len); if (cjkchar != CHAR_INVALID) { return cjkchar; } if (unichar < 0x20) { /* transparently return control range (for commands) */ return unichar; } else if (unichar < 0x80) { /* transparently map ASCII range unless remapped */ cjkchar = unichar; unichar = map_char (cjkchar, map_table, map_table_len); if (! no_unichar (unichar) && unichar != cjkchar) { return CHAR_INVALID; } else { return cjkchar; } } else { /* notify "not found" */ return CHAR_INVALID; } } /* mappedtermchar () converts a Unicode value into an encoded character, using the terminal encoding (terminal_table). */ unsigned long mappedtermchar (unichar) unsigned long unichar; { return mapped_char (unichar, terminal_table, terminal_table_len); } /* encodedchar () converts a Unicode value into an encoded character, using the current text encoding (text_table). */ unsigned long encodedchar (unichar) unsigned long unichar; { if (cjk_text || mapped_text) { return mapped_char (unichar, text_table, text_table_len); } else if (utf8_text || unichar < 0x100) { return unichar; } else { return CHAR_INVALID; } } /* encodedchar2 () converts two Unicode values into one JIS character, using the current text encoding (text_table). */ unsigned long encodedchar2 (uc1, uc2) unsigned long uc1; unsigned long uc2; { int i; for (i = 0; i < arrlen (uni2_accents); i ++) { if (uni2_accents [i] == uc2) { unsigned long unichar = uc1 | ((0x80 + i) << uni2tag_shift); return mapped_char (unichar, text_table, text_table_len); } } return CHAR_INVALID; } /* lookup_mapped_char () converts an encoded character to Unicode, using the table given as parameter. */ static unsigned long lookup_mapped_char (cjk, map_table, map_table_len) unsigned long cjk; struct encoding_table_entry * map_table; unsigned int map_table_len; { unsigned long unichar; if (map_table == gb_table && cjk >= 0x90000000) { return gb_to_unicode (cjk); } unichar = map_char (cjk, map_table, map_table_len); if (! no_unichar (unichar)) { return unichar; } else if (cjk < 0x80) { /* transparently map ASCII range */ return cjk; } else { /* notify "not found" */ return CHAR_INVALID; } } /* lookup_mappedtermchar () converts an encoded character to Unicode, using the terminal encoding (terminal_table). */ unsigned long lookup_mappedtermchar (cjk) unsigned long cjk; { return lookup_mapped_char (cjk, terminal_table, terminal_table_len); } /* lookup_encodedchar () converts an encoded character to Unicode, using the current text encoding (text_table). */ unsigned long lookup_encodedchar (cjk) unsigned long cjk; { return lookup_mapped_char (cjk, text_table, text_table_len); } /*======================================================================*\ |* End *| \*======================================================================*/