/*
* Copyright (c) 1994-2005 Kazushi (Jam) Marukawa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice in the documentation and/or other materials provided with
* the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Routines to manipulate a buffer to hold string of multi bytes character.
* Detect a character set from input string and convert them to internal
* codes. And convert it to other codes to display them.
*/
#include "defines.h"
#include "less.h"
#include <stdio.h>
#include <assert.h>
#if STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#endif
#define LESS 1
/* TODO: remove caller control_char(), change_control_char() and ecalloc() */
extern int control_char ();
extern void change_control_char ();
extern void* ecalloc ();
#if ISO
static void multi_reparse();
#if JAPANESE
int markwrongchar = 1;
/*
* Macro for character detection
*/
#define ISJIS(c) (0x21 <= (c) && (c) <= 0x7e)
#define ISUJIS(c) (0xa1 <= (c) && (c) <= 0xfe)
#define ISUJISSS(c) ((c) == 0x8e || (c) == 0x8f)
#define ISUJISKANJI(c1,c2) (ISUJIS(c1) && ISUJIS(c2))
#define ISUJISKANJI1(c) (ISUJIS(c))
#define ISUJISKANA(c1,c2) ((c1) == 0x8e && ISUJIS(c2))
#define ISUJISKANA1(c) ((c) == 0x8e)
#define ISUJISKANJISUP(c1,c2,c3) ((c1) == 0x8f && ISUJIS(c2) && ISUJIS(c3))
#define ISUJISKANJISUP1(c) ((c) == 0x8f)
#define ISSJISKANJI(c1,c2) (((0x81 <= (c1) && (c1) <= 0x9f) || \
(0xe0 <= (c1) && (c1) <= 0xfc)) && \
(0x40 <= (c2) && (c2) <= 0xfc && (c2) != 0x7f))
#define ISSJISKANJI1(c) ((0x81 <= (c) && (c) <= 0x9f) || \
(0xe0 <= (c) && (c) <= 0xfc))
#define ISSJISKANA(c) (0xa1 <= (c) && (c) <= 0xdf)
#define ISUTF8_HEAD(c) (0xc0 <= (c) && (c) < 0xfe)
#define ISUTF8_REST(c) (((c) & 0xc0) == 0x80)
#define ISUTF8_1(c) ((c) <= 0x7f)
#define ISUTF8_2(c1,c2) (((c1) & 0xe0) == 0xc0 && ISUTF8_REST(c2))
#define ISUTF8_3(c1,c2,c3) (((c1) & 0xf0) == 0xe0 && ISUTF8_REST(c2) && \
ISUTF8_REST(c3))
#define ISUTF8_4(c1,c2,c3,c4) (((c1) & 0xf8) == 0xf0 && ISUTF8_REST(c2) && \
ISUTF8_REST(c3) && ISUTF8_REST(c4))
#define ISUTF8_5(c1,c2,c3,c4,c5) \
(((c1) & 0xfc) == 0xf8 && ISUTF8_REST(c2) && ISUTF8_REST(c3) && \
ISUTF8_REST(c4) && ISUTF8_REST(c5))
#define ISUTF8_6(c1,c2,c3,c4,c5,c6) \
(((c1) & 0xfe) == 0xfc && ISUTF8_REST(c2) && ISUTF8_REST(c3) && \
ISUTF8_REST(c4) && ISUTF8_REST(c5) && ISUTF8_REST(c6))
#endif
/*
* Definitions for understanding the escape sequence.
* Following escape sequences which be understood by less:
* ESC 2/4 2/8,2/9,2/10,2/11,2/13,2/14,2/15 F
* ESC 2/4 4/0,4/1,4/2
* ESC 2/6 F
* ESC 2/8,2/9,2/10,2/11,2/13,2/14,2/15 F
* ESC 2/12 F This is used in MULE. Less support this as input.
* 0/14,0/15
* ESC 4/14,4/15,6/14,6/15,7/12,7/13,7/14
* 8/14,8/15
*/
enum escape_sequence {
NOESC, /* No */ ESC_, /* ^[ */
ESC_2_4, /* ^[$ */ ESC_2_4_8, /* ^[$( */
ESC_2_4_9, /* ^[$) */ ESC_2_4_10, /* ^[$* */
ESC_2_4_11, /* ^[$+ */ ESC_2_4_13, /* ^[$- */
ESC_2_4_14, /* ^[$. */ ESC_2_4_15, /* ^[$/ */
ESC_2_6, /* ^[& */ ESC_2_8, /* ^[( */
ESC_2_9, /* ^[) */ ESC_2_10, /* ^[* */
ESC_2_11, /* ^[+ */ ESC_2_12, /* ^[, */
ESC_2_13, /* ^[- */ ESC_2_14, /* ^[. */
ESC_2_15 /* ^[/ */
};
static SETCHARSET def_scs = SCSASCII | SCSOTHERISO;
static ENCSET def_input = ESISO7; /* Default character set of left plane */
static ENCSET def_inputr = ESISO8; /* Default character set of right plane */
static int def_gs[4] = {
ASCII, /* Default g0 plane status */
WRONGCS, /* Default g1 plane status */
WRONGCS, /* Default g2 plane status */
WRONGCS /* Default g3 plane status */
};
static ENCSET output = ESISO8; /* Character set for output */
#if JAPANESE
static J_PRIORITY def_priority = PUJIS; /* Which code was given priority. */
#endif
typedef POSITION m_position;
#define M_NULL_POS ((POSITION)(-1))
/*
* Structure to represent character set information.
*
* This data set contains current character set and other information
* to keep the status of ISO-2022 escape sequence.
*/
struct m_status {
/* Graphi Sets */
int gs[4]; /* Current g0..g3 plane sets. */
/* gl, gr, and sg refer one of 4 planes. */
int gl; /* Current gl plane status */
int gr; /* Current gr plane status */
int sg; /* Current status of single-shifted plane */
#define WRONGPLANE (-1)
#define ISVALIDPLANE(mp,plane) ((mp)->ms->plane != WRONGPLANE)
#define FINDCS(mp,c) ((mp)->ms->gs[(ISVALIDPLANE((mp), sg) ? (mp)->ms->sg : \
((c) & 0x80) ? (mp)->ms->gr : (mp)->ms->gl)])
#define PLANE2CS(mp,plane) ((mp)->ms->gs[(mp)->ms->plane])
int irr; /* Identify revised registration number */
};
struct multibuf {
struct {
SETCHARSET scs;
ENCSET input;
ENCSET inputr;
} io;
ENCSET orig_io_right;
int rotation_io_right;
enum escape_sequence eseq;
/*
* Variables to control of escape sequences as output.
*/
int cs; /* Current character set */
struct m_status* ms;
#if JAPANESE
J_PRIORITY priority; /* Which code was given priority. */
int sequence_counter; /* Special counter for detect UJIS KANJI. */
#endif
CHARSET icharset; /* Last non ASCII character set of input */
/*
* Small buffers to hold all parsing bytes of multi-byte characters.
*
* multi_parse() function receive a sequence of byte and buffer it.
* Each time multi_parse() recognize full data sequence to represent
* one character, it converts the data into internal data and returns
* converted data.
*
* Caller must buffer it somewhere and output it using outbuf() of
* outchar(). Those output functions() converts internal data into
* appropriate data stream for choosen output device.
*
* As internal data, we use char[] and CHARSET[] to keep byte and
* additional information, respectively. We choose ISO-2022 style
* data format as our internal data format because it is most easy
* to work with. It has completely separated planes for each
* character set. This helps code conversion and others alot.
* For example, we don't need to work to separate Chinese and
* Japanese because they are separated from the beginning in ISO-2022
* although UTF-8 uses only single plane with all CJK character sets.
*/
/*
* Buffer for input/parsing
*/
m_position lastpos; /* position of last byte */
m_position startpos; /* position of first byte buffered */
unsigned char inbuf[20];
m_position laststartpos; /* position of first byte buffered last time */
int lastsg; /* last single-shifted plane (ms->sg) */
/*
* Buffer for internalized/converted data
*/
unsigned char multiint[10]; /* Byte data */
CHARSET multics[10]; /* Character set data (no UJIS/SJIS/UTF */
/* because all of them are converted into */
/* internal data format) */
int intindex; /* Index of multiint */
};
#define INBUF(mp) ((mp)->inbuf[(mp)->lastpos%sizeof((mp)->inbuf)])
#define INBUF0(mp) ((mp)->inbuf[(mp)->startpos%sizeof((mp)->inbuf)])
#define INBUF1(mp) ((mp)->inbuf[((mp)->startpos+1)%sizeof((mp)->inbuf)])
#define INBUF2(mp) ((mp)->inbuf[((mp)->startpos+2)%sizeof((mp)->inbuf)])
#define INBUF3(mp) ((mp)->inbuf[((mp)->startpos+3)%sizeof((mp)->inbuf)])
#define INBUF4(mp) ((mp)->inbuf[((mp)->startpos+4)%sizeof((mp)->inbuf)])
#define INBUF5(mp) ((mp)->inbuf[((mp)->startpos+5)%sizeof((mp)->inbuf)])
#define INBUFI(mp,i) ((mp)->inbuf[(i)%sizeof((mp)->inbuf)])
static int code_length(mp, cs)
MULBUF* mp;
CHARSET cs;
{
#if JAPANESE
unsigned char c;
#endif
if (CSISWRONG(cs))
return 1;
#if JAPANESE
switch (CS2CHARSET(cs)) {
case UJIS:
case UJIS2000:
case UJIS2004:
c = INBUF0(mp);
if (ISUJISKANJI1(c)) return 2;
if (ISUJISKANA1(c)) return 2;
if (ISUJISKANJISUP1(c)) return 3;
return 1;
case SJIS:
case SJIS2000:
case SJIS2004:
c = INBUF0(mp);
if (ISSJISKANJI1(c)) return 2;
if (ISSJISKANA(c)) return 1;
return 1;
}
#endif
switch (CS2TYPE(cs))
{
case TYPE_94_CHARSET:
case TYPE_96_CHARSET:
return 1;
case TYPE_94N_CHARSET:
case TYPE_96N_CHARSET:
switch (CS2FT(cs) & 0x70)
{
case 0x30: return 2; /* for private use */
case 0x40:
case 0x50: return 2;
case 0x60: return 3;
case 0x70: return 4; /* or more bytes */
}
}
assert(0);
return (0);
}
/*
* Convert first byte of buffered data as one byte ASCII data
* without any conversion.
*/
static void noconv1(mp)
MULBUF *mp;
{
mp->multiint[mp->intindex] = INBUF0(mp);
mp->multics[mp->intindex] = ASCII;
mp->intindex++;
mp->startpos++;
}
/*
* Convert first byte of buffered data as one byte WRONGCS data
* without any conversion.
*/
static void wrongcs1(mp)
MULBUF *mp;
{
mp->multiint[mp->intindex] = INBUF0(mp);
mp->multics[mp->intindex] = WRONGCS;
mp->intindex++;
mp->startpos++;
}
/*
* Write a wrongmark on out buffer.
*/
static void put_wrongmark(mp)
MULBUF *mp;
{
mp->multiint[mp->intindex + 0] = '"';
mp->multiint[mp->intindex + 1] = '.';
mp->multics[mp->intindex + 0] = JISX0208KANJI;
mp->multics[mp->intindex + 1] = REST_MASK | JISX0208KANJI;
mp->intindex += 2;
/* flush buffer */
mp->startpos = mp->lastpos + 1;
}
/*
* Convert first several bytes of buffered data.
*
* If less is in marking mode, it erase several bytes of data (depend on
* the current character set) and write "?" mark on output buffer.
* If less is not in marking mode, it calls wrongcs1().
*/
static void wrongchar(mp)
MULBUF *mp;
{
if (markwrongchar) {
switch (CS2CHARSET(mp->multics[mp->intindex])) {
case JISX0201KANA:
case JISX0201ROMAN:
case LATIN1:
case LATIN2:
case LATIN3:
case LATIN4:
case GREEK:
case ARABIC:
case HEBREW:
case CYRILLIC:
case LATIN5:
/* Should I use one byte character, like '?' or '_'? */
put_wrongmark(mp);
break;
case JISX0208_78KANJI:
case JISX0208KANJI:
case JISX0208_90KANJI:
case JISX0212KANJISUP:
case JISX0213KANJI1:
case JISX0213KANJI2:
case JISX02132004KANJI1:
case UJIS:
case UJIS2000:
case UJIS2004:
case SJIS:
case SJIS2000:
case SJIS2004:
put_wrongmark(mp);
break;
case GB2312:
case KSC5601:
default:
put_wrongmark(mp);
break;
}
} else {
while (mp->startpos <= mp->lastpos) {
wrongcs1(mp);
}
}
}
/*
* Internalize input stream.
* We recognized input data as using ISO coding set.
*/
static void internalize_iso(mp)
MULBUF *mp;
{
register int i;
m_position pos;
m_position to;
int intindex;
/*
* If character set points empty character set, reject buffered data.
*/
if (CSISWRONG(mp->cs)) {
wrongcs1(mp);
return;
}
/*
* If character set points 94 or 94x94 character set, reject
* DEL and SPACE codes in buffered data.
*/
if (CS2TYPE(mp->cs) == TYPE_94_CHARSET ||
CS2TYPE(mp->cs) == TYPE_94N_CHARSET) {
unsigned char c = INBUF(mp);
if ((c & 0x7f) == 0x7f) {
if (mp->lastpos - mp->startpos + 1 == 1) {
wrongcs1(mp);
} else {
wrongcs1(mp);
multi_reparse(mp);
}
return;
} else if ((c & 0x7f) == 0x20) {
/*
* A 0x20 (SPACE) code is wrong, but I treat it as
* a SPACE.
*/
if (mp->lastpos - mp->startpos + 1 == 1) {
noconv1(mp);
} else {
wrongcs1(mp);
multi_reparse(mp);
}
return;
}
}
/*
* Otherwise, keep buffering.
*/
pos = mp->startpos;
to = pos + code_length(mp, mp->cs) - 1;
if (mp->lastpos < to) {
return; /* Not enough, so go back to fetch next data. */
}
/*
* We buffered enough data for one character of multi byte characters.
* Therefore, start to convert this buffered data into a first character.
*/
intindex = mp->intindex;
mp->multiint[intindex] = INBUFI(mp, pos) & 0x7f;
mp->multics[intindex] = mp->cs;
intindex++;
for (pos++; pos <= to; pos++) {
mp->multiint[intindex] = INBUFI(mp, pos) & 0x7f;
mp->multics[intindex] = REST_MASK | mp->cs;
intindex++;
}
/*
* Check newly converted code. If it is not valid code,
* less may mark it as not valid code.
*/
if (chisvalid_cs(&mp->multiint[mp->intindex], &mp->multics[mp->intindex])) {
mp->intindex = intindex;
mp->startpos = pos;
} else {
/*
* less ignore the undefined codes
*/
wrongchar(mp);
}
}
#if JAPANESE
/*
* Internalize input stream encoded by UJIS encoding scheme.
*
* Return 1 if input is recognized well.
* Return 0 if input is rejected.
*/
static int internalize_ujis(mp)
MULBUF *mp;
{
if (mp->lastpos - mp->startpos + 1 == 1) {
/* do nothing. return 1 to get next byte */
return 1;
} else if (mp->lastpos - mp->startpos + 1 == 2) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
if (ISUJISKANA(c0, c1)) {
mp->cs = JISX0201KANA;
mp->icharset = UJIS;
mp->multiint[mp->intindex] = c1 & 0x7f;
mp->multics[mp->intindex] = mp->cs;
mp->intindex += 1;
mp->startpos = mp->lastpos + 1;
return 1;
} else if (ISUJISKANJI(c0, c1)) {
if (mp->io.scs & SCSJISX0213_2004) {
mp->icharset = UJIS2004;
mp->cs = JISX02132004KANJI1;
} else if (mp->io.scs & SCSJISX0213_2000) {
mp->icharset = UJIS2000;
mp->cs = JISX0213KANJI1;
} else {
mp->icharset = UJIS;
mp->cs = JISX0208KANJI;
}
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->icharset;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset;
/* Check character whether it has defined glyph or not */
if (chisvalid_cs(&mp->multiint[mp->intindex],
&mp->multics[mp->intindex])) {
/* defined */
mp->multiint[mp->intindex] = c0 & 0x7f;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1 & 0x7f;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
} else {
/* undefined. less ignore them */
wrongchar(mp);
}
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
} else if (ISUJISKANJISUP(c0, c1, 0xa1)) {
/* do nothing. return 1 to get next byte */
return 1;
}
} else if (mp->lastpos - mp->startpos + 1 == 3) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
int c2 = INBUF2(mp);
if (ISUJISKANJISUP(c0, c1, c2)) {
mp->cs = JISX0212KANJISUP;
mp->icharset = UJIS;
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = UJIS;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | UJIS;
mp->multiint[mp->intindex + 2] = c2;
mp->multics[mp->intindex + 2] = REST_MASK | UJIS;
/* Check character whether it has defined glyph or not */
if (chisvalid_cs(&mp->multiint[mp->intindex],
&mp->multics[mp->intindex])) {
/* defined */
static unsigned char table_ujis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0x21, 0, 0x23, 0x24, 0x25, 0, 0,
0x28, 0, 0, 0, 0x2C, 0x2D, 0x2E, 0x2F,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0
};
c1 &= 0x7f;
if (table_ujis[c1] != 0) {
/* JIS X 0213:2000 plane 2 */
if (output & ESJIS83) {
/* JIS cannot output JIS X 0213:2000 plane 2 */
wrongchar(mp);
} else {
mp->cs = JISX0213KANJI2;
mp->multiint[mp->intindex] = c1;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c2 & 0x7f;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
}
} else {
/* JIS X 0212:1990 */
if (output & (ESSJIS | ESJIS83)) {
/* SJIS cannot output JIS X 0212:1990 */
wrongchar(mp);
} else {
mp->multiint[mp->intindex] = c1;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c2 & 0x7f;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
}
}
} else {
/* undefined. less ignore them */
wrongchar(mp);
}
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
}
}
/* return 0 because this data sequence is not matched to UJIS */
return 0;
}
/*
* Internalize input stream encoded by SJIS encoding scheme.
*
* Return 1 if input is recognized well.
* Return 0 if input is rejected.
*/
static int internalize_sjis(mp)
MULBUF *mp;
{
if (mp->lastpos - mp->startpos + 1 == 1) {
int c0 = INBUF(mp);
if (ISSJISKANA(c0)) {
mp->cs = JISX0201KANA;
mp->icharset = SJIS;
mp->multiint[mp->intindex] = c0 & 0x7f;
mp->multics[mp->intindex] = mp->cs;
mp->intindex += 1;
mp->startpos = mp->lastpos + 1;
return 1;
} else {
/* do nothing. return 1 to get next byte */
return 1;
}
} else if (mp->lastpos - mp->startpos + 1 == 2) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
if (ISSJISKANJI(c0, c1)) {
if (mp->io.scs & SCSJISX0213_2004) {
mp->icharset = SJIS2004;
mp->cs = JISX02132004KANJI1;
} else if (mp->io.scs & SCSJISX0213_2000) {
mp->icharset = SJIS2000;
mp->cs = JISX0213KANJI1;
} else {
mp->icharset = SJIS;
mp->cs = JISX0208KANJI;
}
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->icharset;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset;
/*
* Check the correctness of SJIS encoded characters and
* convert them into internal representation.
*/
if (chisvalid_cs(&mp->multiint[mp->intindex],
&mp->multics[mp->intindex])) {
int c2, c3;
static unsigned char table_sjis[] = {
0, 0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D,
0x2F, 0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D,
0x3F, 0x41, 0x43, 0x45, 0x47, 0x49, 0x4B, 0x4D,
0x4F, 0x51, 0x53, 0x55, 0x57, 0x59, 0x5B, 0x5D,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x5F, 0x61, 0x63, 0x65, 0x67, 0x69, 0x6B, 0x6D,
0x6F, 0x71, 0x73, 0x75, 0x77, 0x79, 0x7B, 0x7D,
0x80, 0xA3, 0x81, 0xAD, 0x82, 0xEF, 0xF1, 0xF3,
0xF5, 0xF7, 0xF9, 0xFB, 0xFD, 0, 0, 0
};
c0 = table_sjis[c0 & 0x7f];
c2 = c1 - ((unsigned char)c1 >= 0x80 ? 1 : 0);
c1 = c0;
c3 = c2 >= 0x9e;
if (c1 < 0x80) {
/* JIS X 0213:2000 plane 1 or JIS X 0208:1997 */
mp->multiint[mp->intindex] =
(c1 + (c3 ? 1 : 0));
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] =
(c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21));
mp->multics[mp->intindex + 1] =
REST_MASK | mp->cs;
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
} else {
/* JIS X 0213:2000 plane 2 */
if (output & ESJIS83) {
/* JIS cannot output JIS X 0213:2000 plane 2 */
wrongchar(mp);
} else {
mp->cs = JISX0213KANJI2;
if (c1 > 0xA0) {
/* row 3-4, 13-14, and 79-94 */
mp->multiint[mp->intindex] =
((c1 & 0x7f) + (c3 ? 1 : 0));
} else if (c1 == 0x80) {
/* row 1 or 8 */
mp->multiint[mp->intindex] =
c3 ? 0x28 : 0x21;
} else if (c1 == 0x81) {
/* row 5 or 12 */
mp->multiint[mp->intindex] =
c3 ? 0x2C : 0x25;
} else {
/* row 15 or 78 */
mp->multiint[mp->intindex] =
c3 ? 0x6E : 0x2F;
}
mp->multics[mp->intindex] = JISX0213KANJI2;
mp->multiint[mp->intindex + 1] =
(c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21));
mp->multics[mp->intindex + 1] =
REST_MASK | JISX0213KANJI2;
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
}
}
} else {
/* undefined. less ignore them */
wrongchar(mp);
}
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
}
}
/* return 0 because this data sequence is not matched to UJIS */
return 0;
}
/*
* Internalize input stream encoded by UTF8 encoding scheme.
*
* Return 1 if input is recognized well.
* Return 0 if input is rejected.
*/
static int internalize_utf8(mp)
MULBUF *mp;
{
if (mp->lastpos - mp->startpos + 1 == 1) {
/* do nothing. return 1 to get next byte */
return 1;
} else if (mp->lastpos - mp->startpos + 1 == 2) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
if (ISUTF8_2(c0, c1)) {
mp->cs = UTF8;
mp->icharset = UTF8;
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
if (output & ESUTF8) {
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
return 1;
} else {
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
return 1;
}
} else if (ISUJISKANJI(c0, c1)) {
if (mp->io.scs & SCSJISX0213_2004) {
mp->icharset = UJIS2004;
mp->cs = JISX02132004KANJI1;
} else if (mp->io.scs & SCSJISX0213_2000) {
mp->icharset = UJIS2000;
mp->cs = JISX0213KANJI1;
} else {
mp->icharset = UJIS;
mp->cs = JISX0208KANJI;
}
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->icharset;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset;
/* Check character whether it has defined glyph or not */
if (chisvalid_cs(&mp->multiint[mp->intindex],
&mp->multics[mp->intindex])) {
/* defined */
mp->multiint[mp->intindex] = c0 & 0x7f;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1 & 0x7f;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->intindex += 2;
mp->startpos = mp->lastpos + 1;
} else {
/* undefined. less ignore them */
wrongchar(mp);
}
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) {
/* do nothing. return 1 to get next byte */
return 1;
}
} else if (mp->lastpos - mp->startpos + 1 == 3) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
int c2 = INBUF2(mp);
if (ISUTF8_3(c0, c1, c2)) {
mp->cs = UTF8;
mp->icharset = UTF8;
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 2] = c2;
mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
mp->intindex += 3;
mp->startpos = mp->lastpos + 1;
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2)) {
/* do nothing. return 1 to get next byte */
return 1;
}
} else if (mp->lastpos - mp->startpos + 1 == 4) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
int c2 = INBUF2(mp);
int c3 = INBUF3(mp);
if (ISUTF8_4(c0, c1, c2, c3)) {
mp->cs = UTF8;
mp->icharset = UTF8;
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 2] = c2;
mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 3] = c3;
mp->multics[mp->intindex + 3] = REST_MASK | mp->cs;
mp->intindex += 4;
mp->startpos = mp->lastpos + 1;
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2) &&
ISUTF8_REST(c3)) {
/* do nothing. return 1 to get next byte */
return 1;
}
} else if (mp->lastpos - mp->startpos + 1 == 5) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
int c2 = INBUF2(mp);
int c3 = INBUF3(mp);
int c4 = INBUF4(mp);
if (ISUTF8_5(c0, c1, c2, c3, c4)) {
mp->cs = UTF8;
mp->icharset = UTF8;
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 2] = c2;
mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 3] = c3;
mp->multics[mp->intindex + 3] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 4] = c4;
mp->multics[mp->intindex + 4] = REST_MASK | mp->cs;
mp->intindex += 5;
mp->startpos = mp->lastpos + 1;
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
} else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2) &&
ISUTF8_REST(c3) && ISUTF8_REST(c4)) {
/* do nothing. return 1 to get next byte */
return 1;
}
} else if (mp->lastpos - mp->startpos + 1 == 6) {
int c0 = INBUF0(mp);
int c1 = INBUF1(mp);
int c2 = INBUF2(mp);
int c3 = INBUF3(mp);
int c4 = INBUF4(mp);
int c5 = INBUF5(mp);
if (ISUTF8_6(c0, c1, c2, c3, c4, c5)) {
mp->cs = UTF8;
mp->icharset = UTF8;
mp->multiint[mp->intindex] = c0;
mp->multics[mp->intindex] = mp->cs;
mp->multiint[mp->intindex + 1] = c1;
mp->multics[mp->intindex + 1] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 2] = c2;
mp->multics[mp->intindex + 2] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 3] = c3;
mp->multics[mp->intindex + 3] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 4] = c4;
mp->multics[mp->intindex + 4] = REST_MASK | mp->cs;
mp->multiint[mp->intindex + 5] = c5;
mp->multics[mp->intindex + 5] = REST_MASK | mp->cs;
mp->intindex += 6;
mp->startpos = mp->lastpos + 1;
/* data are recognized as kanji or wrong data, so return 1 */
return 1;
}
}
/* return 0 because this data sequence is not matched to UTF8 */
return 0;
}
#endif
static void internalize(mp)
MULBUF *mp;
{
int c = INBUF(mp);
if (mp->lastpos - mp->startpos + 1 == 1) {
if ((c <= 0x7f && mp->io.input == ESNOCONV) ||
(c >= 0x80 && mp->io.inputr == ESNOCONV)) {
#if JAPANESE
mp->sequence_counter = 0;
#endif
if (control_char(c)) {
wrongcs1(mp);
} else {
noconv1(mp);
}
return;
} else if (c >= 0x80 && mp->io.inputr == ESNONE) {
#if JAPANESE
mp->sequence_counter = 0;
#endif
wrongcs1(mp);
return;
}
mp->cs = ASCII;
if (c < 0x20) {
#if JAPANESE
mp->sequence_counter = 0;
#endif
wrongcs1(mp);
return;
} else if (c <= 0x7f ||
((mp->io.inputr & ESISO8) && (0xa0 <= c && c <= 0xff))) {
#if JAPANESE
mp->sequence_counter = 0;
#endif
/*
* Decide current character set.
*/
mp->cs = FINDCS(mp, c);
/*
* Check cs that fit for output code set.
*/
/* JIS cannot output JISX0212, JISX0213_2, or ISO2022 */
if ((output & ESJIS83) && mp->cs != ASCII &&
mp->cs != JISX0201KANA &&
mp->cs != JISX0201ROMAN &&
mp->cs != JISX0208_78KANJI &&
mp->cs != JISX0208KANJI &&
mp->cs != JISX0208_90KANJI &&
mp->cs != JISX0213KANJI1 &&
mp->cs != JISX02132004KANJI1) {
wrongcs1(mp);
multi_reparse(mp);
return;
}
/* UJIS cannot output regular ISO2022 except JIS */
if ((output & ESUJIS) && mp->cs != ASCII &&
mp->cs != JISX0201KANA &&
mp->cs != JISX0201ROMAN &&
mp->cs != JISX0208_78KANJI &&
mp->cs != JISX0208KANJI &&
mp->cs != JISX0208_90KANJI &&
mp->cs != JISX0212KANJISUP &&
mp->cs != JISX0213KANJI1 &&
mp->cs != JISX0213KANJI2 &&
mp->cs != JISX02132004KANJI1) {
wrongcs1(mp);
multi_reparse(mp);
return;
}
/* SJIS cannot output JISX0212 or ISO2022 */
if ((output & ESSJIS) && mp->cs != ASCII &&
mp->cs != JISX0201KANA &&
mp->cs != JISX0201ROMAN &&
mp->cs != JISX0208_78KANJI &&
mp->cs != JISX0208KANJI &&
mp->cs != JISX0208_90KANJI &&
mp->cs != JISX0213KANJI1 &&
mp->cs != JISX0213KANJI2 &&
mp->cs != JISX02132004KANJI1) {
wrongcs1(mp);
multi_reparse(mp);
return;
}
if (mp->cs != ASCII)
mp->icharset = mp->cs;
internalize_iso(mp);
return;
} else if (control_char(c)) {
#if JAPANESE
mp->sequence_counter = 0;
#endif
wrongcs1(mp);
return;
}
#if JAPANESE
if (mp->priority == PSJIS && ISSJISKANA(c)) {
if (mp->io.inputr & ESUJIS) {
mp->sequence_counter++;
if (mp->sequence_counter % 2 == 1 &&
INBUF0(mp) != 0xa4) /* ???? */
{
mp->sequence_counter = 0;
}
if (mp->sequence_counter >= 6)
/*
* It looks like a sequence of UJIS
* hiragana. Thus we give priority
* to not PSJIS.
*/
mp->priority = PUJIS;
}
internalize_sjis(mp);
return;
} else if (mp->io.inputr & (ESUJIS | ESSJIS)) {
mp->sequence_counter = 0;
return;
}
mp->sequence_counter = 0;
#endif
wrongcs1(mp);
return;
}
#if JAPANESE
assert(mp->sequence_counter == 0);
#endif
if (c < 0x20) {
wrongcs1(mp);
multi_reparse(mp);
return;
} else if (mp->cs != ASCII &&
(c <= 0x7f ||
((mp->io.inputr & ESISO8) && 0xa0 <= c && c <= 0xff))) {
if (mp->cs != FINDCS(mp, c)) {
wrongcs1(mp);
multi_reparse(mp);
} else {
internalize_iso(mp);
}
return;
} else if (control_char(c)) {
wrongcs1(mp);
multi_reparse(mp);
return;
}
#if JAPANESE
if (mp->lastpos - mp->startpos + 1 == 2) {
if (mp->priority == PSJIS) {
if (internalize_sjis(mp)) {
return;
}
} else if (mp->priority == PUJIS) {
if (internalize_ujis(mp)) {
return;
}
} else if (mp->priority == PUTF8) {
if (internalize_utf8(mp)) {
return;
}
}
if (mp->io.inputr & ESUJIS) {
if (internalize_ujis(mp)) {
mp->priority = PUJIS;
return;
}
}
if (mp->io.inputr & ESUTF8) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
if (mp->io.inputr & ESSJIS) {
if (internalize_sjis(mp)) {
mp->priority = PSJIS;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 3) {
if (mp->io.inputr & ESUJIS) {
if (internalize_ujis(mp)) {
mp->priority = PUJIS;
return;
}
}
if (mp->io.inputr & ESUJIS) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 4) {
if (mp->io.inputr & ESUJIS) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 5) {
if (mp->io.inputr & ESUJIS) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
} else if (mp->lastpos - mp->startpos + 1 == 6) {
if (mp->io.inputr & ESUJIS) {
if (internalize_utf8(mp)) {
mp->priority = PUTF8;
return;
}
}
}
#endif
wrongcs1(mp);
multi_reparse(mp);
}
/*
* Check routines
*/
static int check_ft(mp, c, type, plane)
MULBUF *mp;
register int c;
int type;
int *plane;
{
if (type == TYPE_94_CHARSET) {
switch (c) {
case 'B': /* ASCII */
goto ok;
case 'I': /* JIS X 0201 right half (Katakana) */
case 'J': /* JIS X 0201 left half (Roman) */
if (mp->io.scs & SCSJISX0201_1976) goto ok;
}
} else if (type == TYPE_94N_CHARSET) {
switch (c) {
case '@': /* JIS C 6226-1978 */
if (mp->io.scs & SCSJISC6226_1978) goto ok;
break;
case 'B': /* JIS X 0208-1983, JIS X 0208:1990, or JIS X 0208:1997 */
if (mp->io.scs & (SCSJISX0208_1983 | SCSJISX0208_1990)) goto ok;
break;
case 'D': /* JIS X 0212:1990 */
if (mp->io.scs & SCSJISX0212_1990) goto ok;
break;
case 'O': /* JIS X 0213:2000 plane 1 */
if (mp->io.scs & SCSJISX0213_2000) goto ok;
break;
case 'P': /* JIS X 0213:2000 plane 2 or JIS X 0213:2004 plane 2 */
if (mp->io.scs & (SCSJISX0213_2000 | SCSJISX0213_2004)) goto ok;
break;
case 'Q': /* JIS X 0213:2004 plane 1 */
if (mp->io.scs & SCSJISX0213_2004) goto ok;
break;
}
}
if ((mp->io.scs & SCSOTHERISO) && 0x30 <= c && c <= 0x7e) {
/* accepting all other ISO, so OK */
goto ok;
}
return (-1);
ok:
*plane = (mp->ms->irr ? IRR2CS(mp->ms->irr) : 0) | TYPE2CS(type) | FT2CS(c);
mp->ms->irr = 0;
mp->eseq = NOESC;
return (0);
}
static int check_irr(mp, c)
MULBUF *mp;
register int c;
{
if (0x40 <= c && c <= 0x7e) {
mp->ms->irr = CODE2IRR(c);
mp->eseq = NOESC;
return (0);
}
return (-1);
}
static void fix_status_for_escape_sequence(mp)
MULBUF *mp;
{
if (mp->eseq == NOESC) {
switch (CS2TYPE(ISVALIDPLANE(mp, sg) ? PLANE2CS(mp, sg) :
PLANE2CS(mp, gl))) {
case TYPE_96_CHARSET:
case TYPE_96N_CHARSET:
change_control_char(0177, 0);
break;
case TYPE_94_CHARSET:
case TYPE_94N_CHARSET:
change_control_char(0177, 1);
break;
}
switch (CS2TYPE(ISVALIDPLANE(mp, sg) ? PLANE2CS(mp, sg) :
PLANE2CS(mp, gr))) {
case TYPE_96_CHARSET:
case TYPE_96N_CHARSET:
change_control_char(0377, 0);
break;
case TYPE_94_CHARSET:
case TYPE_94N_CHARSET:
change_control_char(0377, 1);
break;
}
}
}
static int check_escape_sequence(mp)
MULBUF *mp;
{
int c = INBUF(mp);
switch (mp->eseq) {
case ESC_:
switch (c) {
case '$': mp->eseq = ESC_2_4; break;
case '&': mp->eseq = ESC_2_6; break;
case '(': mp->eseq = ESC_2_8; break;
case ')': mp->eseq = ESC_2_9; break;
case '*': mp->eseq = ESC_2_10; break;
case '+': mp->eseq = ESC_2_11; break;
case ',': mp->eseq = ESC_2_12; break;
case '-': mp->eseq = ESC_2_13; break;
case '.': mp->eseq = ESC_2_14; break;
case '/': mp->eseq = ESC_2_15; break;
case 'N': mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/break;
case 'O': mp->ms->sg = 3; mp->eseq = NOESC; /*SS3*/break;
case 'n': mp->ms->gl = 2; mp->eseq = NOESC; break;
case 'o': mp->ms->gl = 3; mp->eseq = NOESC; break;
case '|': if (!(mp->io.inputr & ESISO8)) goto wrong;
mp->ms->gr = 3; mp->eseq = NOESC; break;
case '}': if (!(mp->io.inputr & ESISO8)) goto wrong;
mp->ms->gr = 2; mp->eseq = NOESC; break;
case '~': if (!(mp->io.inputr & ESISO8)) goto wrong;
mp->ms->gr = 1; mp->eseq = NOESC; break;
default: goto wrong;
}
break;
case ESC_2_4:
switch (c) {
case '(': mp->eseq = ESC_2_4_8; break;
case ')': mp->eseq = ESC_2_4_9; break;
case '*': mp->eseq = ESC_2_4_10; break;
case '+': mp->eseq = ESC_2_4_11; break;
case '-': mp->eseq = ESC_2_4_13; break;
case '.': mp->eseq = ESC_2_4_14; break;
case '/': mp->eseq = ESC_2_4_15; break;
case '@':
case 'A':
case 'B': if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[0])) == 0)
break;
default: goto wrong;
}
break;
case ESC_2_6:
if (check_irr(mp, c) == 0) break;
goto wrong;
case ESC_2_8:
if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[0])) == 0) break;
goto wrong;
case ESC_2_9:
if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[1])) == 0) break;
goto wrong;
case ESC_2_10:
if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[2])) == 0) break;
goto wrong;
case ESC_2_11:
if (check_ft(mp, c, TYPE_94_CHARSET, &(mp->ms->gs[3])) == 0) break;
goto wrong;
case ESC_2_12:
if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[0])) == 0) break;
goto wrong;
case ESC_2_13:
if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[1])) == 0) break;
goto wrong;
case ESC_2_14:
if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[2])) == 0) break;
goto wrong;
case ESC_2_15:
if (check_ft(mp, c, TYPE_96_CHARSET, &(mp->ms->gs[3])) == 0) break;
goto wrong;
case ESC_2_4_8:
if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[0])) == 0) break;
goto wrong;
case ESC_2_4_9:
if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[1])) == 0) break;
goto wrong;
case ESC_2_4_10:
if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[2])) == 0) break;
goto wrong;
case ESC_2_4_11:
if (check_ft(mp, c, TYPE_94N_CHARSET, &(mp->ms->gs[3])) == 0) break;
goto wrong;
case ESC_2_4_13:
if (check_ft(mp, c, TYPE_96N_CHARSET, &(mp->ms->gs[1])) == 0) break;
goto wrong;
case ESC_2_4_14:
if (check_ft(mp, c, TYPE_96N_CHARSET, &(mp->ms->gs[2])) == 0) break;
goto wrong;
case ESC_2_4_15:
if (check_ft(mp, c, TYPE_96N_CHARSET, &(mp->ms->gs[3])) == 0) break;
goto wrong;
case NOESC:
/*
* This sequence is wrong if we buffered some data.
*/
if (mp->lastpos > mp->startpos) {
switch (c) {
case 0033:
case 0016:
case 0017:
case 0031: goto wrong;
case 0216:
case 0217: if (mp->io.inputr & ESISO8) goto wrong;
default: goto wrongone;
}
}
/*
* Nothing is buffered. So, check this sequence.
*/
switch (c) {
case 0033: mp->eseq = ESC_; break;
case 0016: mp->ms->gl = 1; mp->eseq = NOESC; break;
case 0017: mp->ms->gl = 0; mp->eseq = NOESC; break;
case 0031: mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/ break;
case 0216: if (!(mp->io.inputr & ESISO8)) goto wrongone;
mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/ break;
case 0217: if (!(mp->io.inputr & ESISO8)) goto wrongone;
mp->ms->sg = 3; mp->eseq = NOESC; /*SS3*/ break;
default: goto wrongone;
}
break;
default:
assert(0);
}
if (mp->eseq == NOESC) {
fix_status_for_escape_sequence(mp);
mp->startpos = mp->lastpos + 1;
return (0);
}
return (0);
wrong:
if (mp->eseq != NOESC) {
mp->eseq = NOESC;
fix_status_for_escape_sequence(mp);
}
wrongcs1(mp);
multi_reparse(mp);
return (0);
wrongone:
assert(mp->eseq == NOESC);
return (-1);
}
struct planeset {
char *name;
char *planeset;
} planesets[] = {
{ "ascii", "" },
{ "ctext", "\\e-A" },
{ "latin1", "\\e-A" },
{ "latin2", "\\e-B" },
{ "latin3", "\\e-C" },
{ "latin4", "\\e-D" },
{ "greek", "\\e-F" },
{ "arabic", "\\e-G" },
{ "hebrew", "\\e-H" },
{ "cyrillic", "\\e-L" },
{ "latin5", "\\e-M" },
{ "japanese", "\\e$)B\\e*I\\e$+D" },
{ "ujis", "\\e$)B\\e*I\\e$+D" },
{ "euc", "\\e$)B\\e*I\\e$+D" },
{ NULL, "" }
};
int set_planeset(name)
register char *name;
{
register struct planeset *p;
MULBUF *mp;
int ret;
int i;
if (name == NULL) {
return -1;
}
for (p = planesets; p->name != NULL; p++) {
if (strcmp(name, p->name) == 0) {
name = p->planeset;
break;
}
}
mp = new_multibuf();
init_priority(mp);
while (*name) {
if (*name == '\\' &&
(*(name + 1) == 'e' || *(name + 1) == 'E')) {
++mp->lastpos;
INBUF(mp) = '\033';
ret = check_escape_sequence(mp);
name += 2;
} else {
++mp->lastpos;
INBUF(mp) = *name++;
ret = check_escape_sequence(mp);
}
if (ret < 0 || mp->intindex > 0) {
free(mp);
return -1;
}
}
def_gs[0] = mp->ms->gs[0];
def_gs[1] = mp->ms->gs[1];
def_gs[2] = mp->ms->gs[2];
def_gs[3] = mp->ms->gs[3];
free(mp);
return 0;
}
void init_def_scs_es(scs, input, inputr, out)
SETCHARSET scs;
ENCSET input;
ENCSET inputr;
ENCSET out;
{
def_scs = scs;
def_input = input;
def_inputr = inputr;
output = out;
}
void init_def_priority(pri)
J_PRIORITY pri;
{
#if JAPANESE
assert(pri == PUJIS || pri == PSJIS || pri == PUTF8);
def_priority = pri;
#endif
}
void init_priority(mp)
MULBUF *mp;
{
#if JAPANESE
if ((mp->io.inputr & ESSJIS) && (mp->io.inputr & ESUJIS))
mp->priority = def_priority;
else if (mp->io.inputr & ESUJIS)
mp->priority = PUJIS;
else if (mp->io.inputr & ESUTF8)
mp->priority = PUTF8;
else if (mp->io.inputr & ESSJIS)
mp->priority = PSJIS;
else
mp->priority = PNONE;
mp->sequence_counter = 0;
#endif
}
J_PRIORITY get_priority(mp)
MULBUF *mp;
{
#if JAPANESE
return (mp->priority);
#else
return (PNONE);
#endif
}
void set_priority(mp, pri)
MULBUF *mp;
J_PRIORITY pri;
{
#if JAPANESE
assert(pri == PSJIS || pri == PUJIS || pri == PUTF8 || pri == PNONE);
mp->priority = pri;
#endif
}
MULBUF *new_multibuf()
{
MULBUF *mp = (MULBUF*) ecalloc(1, sizeof(MULBUF));
mp->io.scs = def_scs;
mp->io.input = def_input;
mp->io.inputr = def_inputr;
mp->orig_io_right = def_inputr;
mp->rotation_io_right = 0;
mp->eseq = NOESC;
mp->ms = (struct m_status*) ecalloc(1, sizeof(struct m_status));
init_multibuf(mp);
return (mp);
}
void clear_multibuf(mp)
MULBUF *mp;
{
mp->lastpos = M_NULL_POS;
mp->startpos = 0;
mp->laststartpos = 0;
mp->lastsg = WRONGPLANE;
mp->intindex = 0;
}
static void init_ms(ms)
struct m_status *ms;
{
ms->gs[0] = def_gs[0];
ms->gs[1] = def_gs[1];
ms->gs[2] = def_gs[2];
ms->gs[3] = def_gs[3];
ms->gl = 0;
ms->gr = 1;
ms->sg = WRONGPLANE;
ms->irr = 0;
}
void init_multibuf(mp)
MULBUF *mp;
{
mp->cs = ASCII;
init_ms(mp->ms);
if (mp->eseq != NOESC) {
mp->eseq = NOESC;
}
fix_status_for_escape_sequence(mp);
#if JAPANESE
mp->sequence_counter = 0;
#endif
mp->icharset = ASCII;
clear_multibuf(mp);
}
/*
* Buffering characters untile get a guarantee that it is right sequence.
*/
static void check_new_buffered_byte(mp)
MULBUF *mp;
{
m_position last_startpos = mp->startpos;
if (mp->io.input & (ESJIS83 | ESISO7 | ESISO8)) {
if (check_escape_sequence(mp) == 0) {
return; /* going process well */
}
}
/* it is not a escape sequence, try to use it as character */
internalize(mp);
/*
* If a character was detected in internalize(),
* clean sg since single shift affect only one character.
*/
if (last_startpos != mp->startpos) {
mp->lastsg = mp->ms->sg;
if (mp->ms->sg != WRONGPLANE) {
mp->ms->sg = WRONGPLANE;
fix_status_for_escape_sequence(mp);
}
}
}
/*
* Re-parse all buffered data.
*
* This routine is called when we find a problem in buffered data.
* We firstly take out the first byte of buffered data before we call
* this function. This routine parse all rest of buffered data again.
*/
static void multi_reparse(mp)
MULBUF *mp;
{
m_position to;
/*
* We found something wrong and going to move first byte.
* So, we clear single-shifted character set because it will
* shift only this one byte being makred wrong.
*/
if (mp->ms->sg != WRONGPLANE) {
mp->ms->sg = WRONGPLANE;
fix_status_for_escape_sequence(mp);
}
#if JAPANESE
/*
* Quick japanese code hack.
* Check whether character is SJIS KANA or not.
* If it is SJIS KANA, it means our prediction was failed.
* Now going to fall back to SJIS KANA mode.
*/
if ((mp->priority == PSJIS || (mp->io.inputr & ESSJIS)) &&
CSISWRONG(mp->multics[mp->intindex - 1]) &&
ISSJISKANA(mp->multiint[mp->intindex - 1])) {
mp->cs = JISX0201KANA;
mp->priority = PSJIS;
mp->icharset = SJIS;
mp->multiint[mp->intindex - 1] &= 0x7f;
mp->multics[mp->intindex - 1] = mp->cs;
}
#endif
/*
* Retry to parse rest of buffered data.
*/
to = mp->lastpos;
for (mp->lastpos = mp->startpos; mp->lastpos <= to; mp->lastpos++) {
check_new_buffered_byte(mp);
}
mp->lastpos = to;
}
#if LESS
void multi_find_cs(mp, pos)
MULBUF* mp;
m_position pos;
{
int c;
m_position lpos = pos;
if (ch_seek(pos) == 0) {
/*
* Back up to the beginning of the line.
*/
while ((c = ch_back_get()) != '\n' && c != EOI) ;
if (c == '\n') {
(void)ch_forw_get();
}
lpos = ch_tell();
if (lpos != pos) {
while (lpos < pos) {
c = ch_forw_get();
assert(c != EOI && c != '\n');
multi_parse(mp, c, NULL_POSITION, NULL, NULL);
lpos++;
}
ch_seek(pos);
}
}
}
#endif
#define DEBUG 0
#if DEBUG
int debug = 1;
#endif
/*
* Manage m_status data structure to maintain ISO-2022 status of input stream.
*/
void multi_start_buffering(mp, pos)
MULBUF *mp;
m_position pos;
{
/* buffer must be empty */
assert(mp->lastpos < mp->startpos);
/* initialize m_status if it is necessary */
if (pos == mp->lastpos + 2 || pos == mp->laststartpos) {
/*
* pos == mp->lastpos+2 if this line is started after \n.
* pos == mp->laststartpos if this line is started by a non-fit
* character.
*/
/* restore backed up sg */
if (mp->ms->sg != mp->lastsg) {
mp->ms->sg = mp->lastsg;
fix_status_for_escape_sequence(mp);
}
/* adjust pointers */
mp->startpos = pos;
mp->lastpos = pos - 1;
} else {
/*
* pos == somewhere else if this function is called after jump_loc().
*/
#if DEBUG
if (debug) {
fprintf(stderr, "%qd, %qd, %qd, %qd\n", pos, mp->lastpos,
mp->startpos, mp->laststartpos);
fprintf(stderr, "oct %qo, %qo, %qo, %qo\n", pos, mp->lastpos,
mp->startpos, mp->laststartpos);
}
#endif
init_multibuf(mp);
#if LESS
multi_find_cs(mp, pos);
clear_multibuf(mp);
#endif
/* adjust pointers */
mp->startpos = pos;
mp->lastpos = pos - 1;
mp->laststartpos = pos;
}
}
/*
* Buffering characters untile get a guarantee that it is right sequence.
*/
void multi_parse(mp, c, pos, mbd, mpos)
MULBUF* mp;
int c;
m_position pos;
M_BUFDATA* mbd;
POSITION* mpos;
{
if (c < 0) {
if (mpos != NULL) {
*mpos = mp->startpos;
}
/*
* Force to flush all buffering characters.
*/
if (mp->eseq != NOESC) {
mp->eseq = NOESC;
fix_status_for_escape_sequence(mp);
}
while (mp->startpos <= mp->lastpos) {
wrongcs1(mp);
multi_reparse(mp);
}
if (mbd != NULL) {
mbd->cbuf = mp->multiint;
mbd->csbuf = mp->multics;
mbd->byte = mp->intindex;
}
mp->intindex = 0;
} else {
if (pos != NULL_POSITION) {
assert(pos == mp->lastpos + 1);
mp->lastpos = pos;
} else {
mp->lastpos++;
}
INBUF(mp) = c;
mp->laststartpos = mp->startpos;
if (mpos != NULL) {
*mpos = mp->startpos;
}
/*
* Put it into buffer and parse it.
*/
check_new_buffered_byte(mp);
if (mbd != NULL) {
mbd->cbuf = mp->multiint;
mbd->csbuf = mp->multics;
mbd->byte = mp->intindex;
}
mp->intindex = 0;
}
}
/*
* Flush buffered data.
*/
void multi_flush(mp, mbd, mpos)
MULBUF* mp;
M_BUFDATA* mbd;
POSITION* mpos;
{
multi_parse(mp, -1, NULL_POSITION, mbd, mpos);
}
/*
* Discard buffered data.
*/
void multi_discard(mp)
MULBUF* mp;
{
multi_parse(mp, -1, NULL_POSITION, NULL, NULL);
}
void set_codesets(mp, input, inputr)
MULBUF *mp;
ENCSET input;
ENCSET inputr;
{
mp->io.input = input;
mp->io.inputr = inputr;
}
/*
* Return string representation about multi bytes character
* which was buffered.
*/
char *get_icharset_string(mp)
MULBUF *mp;
{
static char buf[10];
switch (mp->icharset)
{
#if JAPANESE
/*
* Code set
*/
case SJIS: return ("SJIS");
case SJIS2000: return ("SJIS2000");
case SJIS2004: return ("SJIS2004");
case UJIS: return ("UJIS");
case UJIS2000: return ("UJIS2000");
case UJIS2004: return ("UJIS2004");
#endif
/*
* Character set
*/
case ASCII: return ("ASCII");
case JISX0201KANA: return ("JIS-KANA");
case JISX0201ROMAN: return ("JIS-ROMAN");
case LATIN1: return ("LATIN1");
case LATIN2: return ("LATIN2");
case LATIN3: return ("LATIN3");
case LATIN4: return ("LATIN4");
case GREEK: return ("GREEK");
case ARABIC: return ("ARABIC");
case HEBREW: return ("HEBREW");
case CYRILLIC: return ("CYRILLIC");
case LATIN5: return ("LATIN5");
case JISX0208_78KANJI: return ("JIS-78KANJI");
case GB2312: return ("GB2312");
case JISX0208KANJI: return ("JIS-83KANJI");
case JISX0208_90KANJI: return ("JIS-90KANJI");
case KSC5601: return ("KSC5601");
case JISX0212KANJISUP: return ("JIS-KANJISUP");
case JISX0213KANJI1: return ("JISX0213KANJI1");
case JISX0213KANJI2: return ("JISX0213KANJI2");
case JISX02132004KANJI1:return ("JISX0213:2004KANJI1");
}
switch (CS2TYPE(mp->icharset))
{
case TYPE_94_CHARSET:
strcpy(buf, "94( )");
buf[3] = CS2FT(mp->icharset);
break;
case TYPE_96_CHARSET:
strcpy(buf, "96( )");
buf[3] = CS2FT(mp->icharset);
break;
case TYPE_94N_CHARSET:
strcpy(buf, "94N( )");
buf[4] = CS2FT(mp->icharset);
break;
case TYPE_96N_CHARSET:
strcpy(buf, "96N( )");
buf[4] = CS2FT(mp->icharset);
break;
default:
assert(0);
}
if (CS2IRR(mp->icharset) > 0)
{
char num[3];
sprintf(num, "%d", CS2IRR(mp->icharset));
strcat(buf, num);
}
return (buf);
}
static int old_output_charset = ASCII; /* Last displayed character set */
static unsigned char *make_escape_sequence(charset)
int charset;
{
static unsigned char p[9];
int len;
if (CSISWRONG(charset))
{
charset = ASCII;
}
p[0] = '\033';
len = 1;
if ((output & (ESISO7 | ESISO8)) && CS2IRR(charset) > 0)
{
p[len] = '&';
p[len + 1] = IRR2CODE(CS2IRR(charset));
p[len + 2] = '\033';
len += 3;
}
/*
* Call 94 or 94N character set to G0 plane.
* Call 96 or 96N character set to G1 plane.
*/
switch (CS2TYPE(charset))
{
case TYPE_94_CHARSET:
p[len] = '(';
p[len + 1] = CS2FT(charset);
len += 2;
break;
case TYPE_94N_CHARSET:
switch (CS2FT(charset))
{
case '@':
case 'A':
case 'B':
p[len] = '$';
p[len + 1] = CS2FT(charset);
len += 2;
break;
default:
p[len] = '$';
p[len + 1] = '(';
p[len + 2] = CS2FT(charset);
len += 3;
break;
}
break;
case TYPE_96_CHARSET:
p[len] = '-';
p[len + 1] = CS2FT(charset);
len += 2;
break;
case TYPE_96N_CHARSET:
p[len] = '$';
p[len + 1] = '-';
p[len + 2] = CS2FT(charset);
len += 3;
break;
}
/*
* If output is not ESISO8, use SO and SI to call G1 to GL.
* Otherwise, we use GR directly, so no need to call G1
* since G1 is called GR already.
*/
if (!(output & ESISO8))
{
switch (CS2TYPE(charset))
{
case TYPE_94_CHARSET:
case TYPE_94N_CHARSET:
switch (CS2TYPE(old_output_charset))
{
case TYPE_96_CHARSET:
case TYPE_96N_CHARSET:
p[len] = '\017';
len++;
}
break;
case TYPE_96_CHARSET:
case TYPE_96N_CHARSET:
switch (CS2TYPE(old_output_charset))
{
case TYPE_94_CHARSET:
case TYPE_94N_CHARSET:
p[len] = '\016';
len++;
}
break;
}
}
p[len] = '\0';
return (p);
}
static char cvbuffer[32];
static int cvindex = 0;
static char *nullcvbuffer = "";
static char *convert_to_iso(c, cs)
int c;
int cs;
{
register unsigned char *p;
static char buffer2[2];
if ((output & ESISO8) && c != 0 &&
(CS2TYPE(cs) == TYPE_96_CHARSET ||
CS2TYPE(cs) == TYPE_96N_CHARSET))
c |= 0x80;
buffer2[0] = c;
buffer2[1] = '\0';
if (CSISREST(cs))
{
return (buffer2);
}
if (CSISWRONG(cs))
{
cs = ASCII;
}
cs = CS2CHARSET(cs);
if (cs == old_output_charset)
{
return (buffer2);
}
else
{
p = make_escape_sequence(cs);
old_output_charset = cs;
strcpy(cvbuffer, p);
strcat(cvbuffer, buffer2);
return (cvbuffer);
}
}
static char *convert_to_jis(c, cs)
int c;
int cs;
{
register unsigned char *p;
static char buffer2[3];
if (c == 0)
{
cvindex = 0;
return (nullcvbuffer);
}
buffer2[cvindex++] = c;
buffer2[cvindex] = '\0';
if (CSISWRONG(cs))
{
cs = ASCII;
}
cs = CS2CHARSET(cs);
if (cs == ASCII || cs == JISX0201ROMAN)
{
assert(cvindex == 1);
cvindex = 0;
} else if (cs == JISX0201KANA)
{
assert(cvindex == 1);
cvindex = 0;
} else if (cs == JISX0208_78KANJI)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
jis78to90(buffer2);
cs = JISX0208_90KANJI;
cvindex = 0;
} else if (cs == JISX0208KANJI || cs == JISX0208_90KANJI)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
cvindex = 0;
} else if (cs == JISX0213KANJI1)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
cvindex = 0;
cs = JISX0208KANJI;
} else if (cs == JISX02132004KANJI1)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
cvindex = 0;
cs = JISX0208KANJI;
} else if (cs == UTF8)
{
/* ? */
cvindex = 0;
return (nullcvbuffer);
} else
{
assert(0);
cvindex = 0;
}
if (cs == old_output_charset)
{
return (buffer2);
}
else
{
p = make_escape_sequence(cs);
old_output_charset = cs;
strcpy(cvbuffer, p);
strcat(cvbuffer, buffer2);
return (cvbuffer);
}
}
#if JAPANESE
static char *convert_to_ujis(c, cs)
int c;
int cs;
{
if (c == 0)
{
cvindex = 0;
return (nullcvbuffer);
}
cvbuffer[cvindex++] = c;
cvbuffer[cvindex] = '\0';
if (CSISWRONG(cs))
{
cs = ASCII;
}
cs = CS2CHARSET(cs);
if (cs == ASCII || cs == JISX0201ROMAN)
{
assert(cvindex == 1);
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0201KANA)
{
assert(cvindex == 1);
cvbuffer[2] = '\0';
cvbuffer[1] = cvbuffer[0] | 0x80;
cvbuffer[0] = 0x8e;
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
cs == JISX02132004KANJI1)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
if (cs == JISX0208_78KANJI)
jis78to90(cvbuffer);
cvbuffer[0] |= 0x80;
cvbuffer[1] |= 0x80;
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0212KANJISUP)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
cvbuffer[2] = cvbuffer[1] | 0x80;
cvbuffer[1] = cvbuffer[0] | 0x80;
cvbuffer[0] = 0x8f;
cvbuffer[3] = '\0';
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0213KANJI2)
{
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
cvbuffer[2] = cvbuffer[1] | 0x80;
cvbuffer[1] = cvbuffer[0] | 0x80;
cvbuffer[0] = 0x8f;
cvbuffer[3] = '\0';
cvindex = 0;
return (cvbuffer);
} else if (cs == UTF8)
{
/* ? */
cvindex = 0;
return (nullcvbuffer);
}
assert(0);
cvindex = 0;
return (cvbuffer);
}
static char *convert_to_sjis(c, cs)
int c;
int cs;
{
if (c == 0)
{
cvindex = 0;
return (nullcvbuffer);
}
cvbuffer[cvindex++] = c;
cvbuffer[cvindex] = '\0';
if (CSISWRONG(cs))
{
cs = ASCII;
}
cs = CS2CHARSET(cs);
if (cs == ASCII || cs == JISX0201ROMAN)
{
assert(cvindex == 1);
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0201KANA)
{
assert(cvindex == 1);
cvbuffer[0] |= 0x80;
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
cs == JISX02132004KANJI1)
{
register int c1, c2, c3;
static unsigned char table_sjis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
};
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
if (cs == JISX0208_78KANJI)
jis78to90(cvbuffer);
c3 = cvbuffer[0] & 0x7f;
c1 = c3 & 1;
c2 = (cvbuffer[1] & 0x7f) + (c1 ? 0x40 - 0x21 : 0x9e - 0x21);
c1 = table_sjis[c3 / 2 + c1];
cvbuffer[0] = c1;
cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0);
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0213KANJI2)
{
register int c1, c2, c3;
if (cvindex == 1)
return (nullcvbuffer);
assert(cvindex == 2);
c3 = cvbuffer[0] & 0x7f;
c1 = c3 & 1;
c2 = (cvbuffer[1] & 0x7f) +
(c1 ? 0x40 - 0x21 : 0x9e - 0x21);
if (c3 <= 0x25) {
/* Map 1, 3, 4, and 5-KU */
/* Note: 2-KU is rejected already. */
c1 = (c3 - 0x21) / 2 + 0xf0;
} else if (c3 == 0x28) {
/* Map 8-KU */
c1 = 0xf0;
} else if (c3 <= 0x2f) {
/* Map 12, 13, 14, and 15-KU */
c1 = (c3 - 0x2b) / 2 + 0xf2;
} else {
/* Map 78-94 KU. */
/* Note: 16-77 KU is rejected already. */
c1 = (c3 - 0x6d) / 2 + 0xf4;
}
cvbuffer[0] = c1;
cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0);
cvindex = 0;
return (cvbuffer);
} else if (cs == UTF8)
{
/* ? */
cvindex = 0;
return (nullcvbuffer);
}
assert(0);
cvindex = 0;
return (cvbuffer);
}
#endif
static char *convert_to_utf8(c, cs)
int c;
int cs;
{
if (c == 0)
{
cvindex = 0;
return (nullcvbuffer);
}
cvbuffer[cvindex++] = c;
cvbuffer[cvindex] = '\0';
if (CSISWRONG(cs))
{
cs = ASCII;
}
cs = CS2CHARSET(cs);
assert(0);
if (cs == ASCII || cs == JISX0201ROMAN)
{
assert(cvindex == 1);
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0201KANA)
{
assert(cvindex == 1);
cvbuffer[0] |= 0x80;
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI ||
cs == JISX0208_90KANJI || cs == JISX0213KANJI1 ||
cs == JISX02132004KANJI1)
{
cvindex = 0;
return (cvbuffer);
} else if (cs == JISX0213KANJI2)
{
cvindex = 0;
return (cvbuffer);
} else if (cs == UTF8)
{
/* ? */
cvindex = 0;
return (nullcvbuffer);
}
assert(0);
cvindex = 0;
return (cvbuffer);
}
char *outchar(c, cs)
int c;
CHARSET cs;
{
if (c < 0)
{
c = 0;
cs = ASCII;
}
if (output & (ESISO7 | ESISO8))
return (convert_to_iso(c, cs));
if (output & ESJIS83)
return (convert_to_jis(c, cs));
#if JAPANESE
if (output & ESUJIS)
return (convert_to_ujis(c, cs));
if (output & ESSJIS)
return (convert_to_sjis(c, cs));
#endif
if (output & ESUTF8)
return (convert_to_utf8(c, cs));
cvbuffer[0] = c;
cvbuffer[1] = '\0';
return (cvbuffer);
}
char *outbuf(p, cs)
unsigned char *p;
CHARSET cs;
{
static char buffer[1024];
char *s;
int i = 0;
while (*p != '\0')
{
s = outchar(*p++, cs);
while (*s != '\0')
buffer[i++] = *s++;
assert(i < (int)sizeof(buffer));
}
buffer[i] = '\0';
return (buffer);
}
int mwidth(c, cs)
int c;
CHARSET cs;
{
if (CSISREST(cs))
return (0);
switch (CS2TYPE(cs))
{
case TYPE_94_CHARSET:
case TYPE_96_CHARSET:
return (1);
case TYPE_94N_CHARSET:
case TYPE_96N_CHARSET:
return (2);
default:
assert(0);
return (0);
}
}
char *rotate_right_codeset(mp)
MULBUF *mp;
{
char *p = NULL;
mp->rotation_io_right++;
mp->rotation_io_right %= 7;
switch (mp->rotation_io_right) {
case 0: p = "original"; mp->io.inputr = mp->orig_io_right; break;
case 1: p = "japanese"; mp->io.inputr = ESUJIS | ESSJIS; break;
case 2: p = "ujis"; mp->io.inputr = ESUJIS; break;
case 3: p = "sjis"; mp->io.inputr = ESSJIS; break;
case 4: p = "iso8"; mp->io.inputr = ESISO8; break;
case 5: p = "noconv"; mp->io.inputr = ESNOCONV; break;
case 6: p = "none"; mp->io.inputr = ESNONE; break;
default: assert(0); break;
}
init_priority(mp);
return (p);
}
#endif
int strlen_cs(str, cs)
char* str;
CHARSET* cs;
{
int i = 0;
if (cs == NULL)
return strlen(str);
while (*str != NULCH || !CSISNULLCS(*cs)) {
str++;
cs++;
i++;
}
return i;
}
int chlen_cs(chstr, cs)
char* chstr;
CHARSET* cs;
{
int i;
if (cs == NULL)
{
if (chstr == NULL || *chstr == NULCH)
return 0;
else
return 1;
}
if (*chstr == NULCH && CSISNULLCS(*cs))
return 0;
i = 0;
do {
i++;
cs++;
} while (CSISREST(*cs));
return i;
}
char* strdup_cs(str, cs, csout)
char* str;
CHARSET* cs;
CHARSET** csout;
{
int len = strlen_cs(str, cs);
char* save_str = (char *)ecalloc(len + 1, 1);
CHARSET* save_cs = (CHARSET *)ecalloc(len + 1, sizeof(CHARSET));
memcpy(save_str, str, sizeof(char) * (len + 1));
if (cs)
memcpy(save_cs, cs, sizeof(CHARSET) * (len + 1));
else {
cs = save_cs;
while (--len >= 0)
*cs++ = ASCII;
*cs = NULLCS;
}
*csout = save_cs;
return save_str;
}
syntax highlighted by Code2HTML, v. 0.9.1