/* vi:ts=4:sw=4
 *
 * JFOLD - Japanized FOLD
 *
 * Code Contributions By:	Atsushi Nakamura		ann@mrit.mei.co.jp
 *
 */
#define EXTERN
#include "vim.h"
#ifdef JP
#include "jp.h"
#endif
#include "globals.h"
#include "proto.h"
#include <fcntl.h>
#undef free
#ifdef IOSIZE
# undef IOSIZE
#endif
/*
#define IOSIZE 20
#define LINESIZE 10
*/
#define IOSIZE 4096
#define LINESIZE 4096
#define KCHARR	"NJESX,."
#define KCHARW	"NJESX"
#ifdef JP
static void usage();
	void
emsg(msg)
	char *msg;
{
	fprintf(stderr, "%s\n", msg);
}
#ifndef MSDOS
# define T_FILE  2
static int text = FALSE;
#endif
static int o_width = 0;         /* -w */
static int o_tab   = 8;         /* -t */
static int o_cline = FALSE;     /* -c */
static int o_intel = FALSE;     /* -i */
static int o_serr  = FALSE;     /* -Q */
#ifndef MSDOS
static int o_text  = T_FILE;    /* -T, -B */
#endif
static int o_scode = FALSE;     /* -C */
static int o_usage = FALSE;     /* -h */
	static void
usage()
{
    fprintf(stderr, "Jfold/%s by ann@mrit.mei.co.jp\n", JpVersion);
	fprintf(stderr, "Usage: jfold [ -wtsciup [ width ] [tab] | -width ] [ files ...]\n");
	fprintf(stderr, "   -w num   width.\n");
	fprintf(stderr, "   -t num   tab width.\n");
	fprintf(stderr, "\n");
	fprintf(stderr, "   -c       concatenate (non indented) next line.\n");
	fprintf(stderr, "   -i       intellegent(word and symbol recognized folding.)\n");
	fprintf(stderr, "\n");
#ifndef MSDOS
	fprintf(stderr, "   -T       generate CR-LF on NEWLINE(MS-DOS text mode).\n");
	fprintf(stderr, "   -B       generate LF on NEWLINE(UNIX text or MS-DOS binary mode).\n");
#endif
	fprintf(stderr, "   -Q       suppress error messages.\n");
	fprintf(stderr, "   -C       print kanji code.\n");
	fprintf(stderr, "   -N -J -E -S -X  \n");
	fprintf(stderr, "            kanji code for output.\n");
	fprintf(stderr, "   -. -,    kanji code for input.\n");
	fprintf(stderr, "   Use - as a filename to read from standard input.\n");
	fprintf(stderr, "\n");
	fprintf(stderr, "Environment JMASK specifies default Kanji code.\n");
	exit(1);
}
char jread;
char jdisp;
	void
main(argc, argv)
	int				argc;
	char		  **argv;
{
#ifdef JP
	static char jmask[4] = JP;
#endif
	int	 i;
	char *cp;
	++argv;
	/*
	 * Process the command line arguments
	 */
#ifdef JP
	if ((cp = (char *)getenv("JMASK")) != NULL)
		strncpy(jmask, cp, 4);
	if (jmask[0] && !strchr(KCHARR, jmask[0]))
	{
		fprintf(stderr, "Unknown Kanji code %c for reading.\n", jmask[0]);
		return;
	}
	if (jmask[1] && !strchr(KCHARW, jmask[1]))
	{
		fprintf(stderr, "Unknown Kanji code %c for writing.\n", jmask[1]);
		return;
	}
	jread = jmask[0];
	jdisp = jmask[1];
#else
	jread = jdisp = JP_NONE;
#endif
	while(argc > 1 && argv[0][0] == '-' && argv[0][1] !=NUL)
	{
		char *cp;
		if (isdigit(argv[0][1]))
			o_width = atoi(argv[0] + 1);
		else
		{
			for(cp = argv[0] + 1; *cp; cp++)
			{
				switch(*cp)
				{
				case 'w':
					argv ++;
					argc --;
					o_width = atoi(argv[0]);
					break;
				case 't':
					argv ++;
					argc --;
					o_tab = atoi(argv[0]);
					break;
				case 'h':
					o_usage = TRUE;
					break;
				case 'c':
					o_cline = TRUE;
					break;
				case 'i':
					o_intel = TRUE;
					break;
				case 'Q':
					o_serr = TRUE;
					break;
#ifndef MSDOS
				case 'T':
					o_text = TRUE;
					break;
				case 'B':
					o_text = FALSE;
					break;
#endif
				case 'C':
					o_scode = TRUE;
					break;
				case 'N':
				case 'J':
				case 'E':
				case 'S':
				case 'X':
					jdisp = *cp;
					break;
				case 'x':
					cp ++;
					if (*cp == NUL || strchr(KCHARR, *cp))
					{
						cp --;
						fprintf(stderr, "Unknown Kanji code '%c'.\n", *cp);
						o_usage = TRUE;
						break;
					}
				case ',':
				case '.':
					jread = *cp;
					break;
				default:
					fprintf(stderr, "Unknown option '%c'\n", *cp);
					o_usage = TRUE;
					break;
				}
			}
		}
		++ argv;
		-- argc;
	}
	if (o_width < 0 || o_width > LINESIZE - 2)
	{
		fprintf(stderr, "jfold: Illegal width(%d)\n", o_width);
		o_usage = TRUE;
	}
	if (o_tab < 0 || o_tab > 128)
	{
		fprintf(stderr, "jfold: Illegal tab length(%d)\n", o_tab);
		o_usage = TRUE;
	}
#ifndef MSDOS
	if (o_text != T_FILE)
		text = o_text;
#endif
	if (o_usage)
		usage();
#if !defined(UNIX) && !defined(MSDOS)
	ExpandWildCards(argc - 1, argv, &numfiles, &files, TRUE, TRUE);
	if (numfiles != 0)
		files_exp = TRUE;
#else
	files = argv;
	numfiles = argc - 1;
#endif
/*
 * execute fold for each file
 */
 	{	void do_fold();
		if (numfiles == 0)
			do_fold(0, NULL);
		else
			for(i = 0; i < numfiles; i++)
			{
				int fd;
#ifdef AMIGA
				fname_case(files[i]);		/* set correct case for file name */
#endif
				if (!strcmp(files[i], "-"))
					fd = 0;
				else
					fd = open(files[i], O_RDONLY);
				if (fd >= 0)
				{
					do_fold(fd, fd == 0 ? NULL: files[i]);
					close(fd);
				}
				else if (!o_serr)
					fprintf(stderr, "jfold: %s cannot open\n", files[i]);
			}
	}
}
static char round[5], *rp;
static char Rbuf[IOSIZE * 2], *Rp, *Rend;
static int kanji;
static int reof;
	static void
j_init()
{
	reof = kanji = FALSE;
	rp = round;
	reset_jcount();
	Rp = Rend = Rbuf;
}
/*
 *	j_readln(line, size) returns the number of char. transferred into 'line'
 *		note: line is not NUL terminated.
 *		return -1:	EOF reached
 *		return -2:	Error
 */
	static int
j_readln(fd, line, size)
	int fd;
	char *line;
	int size;
{
	int  len;
	char c;
	len = 0;
retry:
	while (Rp != Rend)
	{
		c = *line ++ = *Rp ++;
		len ++;
		if (IsKanji(c))
		{
			*line ++ = *Rp ++;
			len ++;
		}
        else if (c == '\r' && *Rp == '\n')
		{
#ifndef MSDOS
			if (o_text == T_FILE)
				o_text = text = TRUE;
#endif
			/* suppress CR for MS-DOS text */
            c = *(line - 1) = *Rp ++;
		}
#ifndef MSDOS
		else if (c == '\n' && o_text == T_FILE)
			o_text = text = FALSE;
#endif
		if (c == '\n' || len >= size - 1)
			return len;
	}
	if (reof)
	{
		if (rp != round)
		{
			memmove(Rbuf, round, rp - round);
			rp = round;
			Rp = Rbuf;
			Rend = Rbuf + (rp - round);
			goto retry;
		}
		else if (len)
			return len;
		else
			return -1;
	}
	/* reload Rbuf */
	{
		static char cbuf[IOSIZE];
		int  clen;
		int  ofst;
		if (rp != round)	/* flush round buffer */
		{
			memmove(cbuf, round, ofst = rp - round);
			rp = round;
		}
		else
			ofst = 0;
		clen = read(fd, cbuf + ofst, IOSIZE - ofst);
		if (clen < 0)
			return -2;
		reof = (clen == 0);
		clen += ofst;
		Rend = Rp = Rbuf;
		Rend += kanjiconvsfrom(cbuf, clen, Rbuf, IOSIZE * 2, round,
															jread, &kanji);
		rp = round + strlen(round);
		goto retry;
	}
}
	static void
j_writeln(line, tailnl)
	char *line;
	int tailnl;
{
	char *prline;
	char *cp;
	int len;
	int crlf;
	prline  = kanjiconvsto(line, jdisp);
	crlf = FALSE;
	for(cp = prline, len = 0; *cp; cp++, len++)
		if (*cp == '\n')
		{
			if (tailnl && *(cp + 1) == NUL)
			{
#ifndef MSDOS
				if (!text)
					continue;
#endif
				crlf = TRUE;
				break;
			}
			*cp = NUL;
		}
	if (write(1, prline, len) < 0 )
		exit(-1);
	
	if (crlf)
		if (write(1, "\r\n", 2) < 0 )
			exit(-1);
	if (prline != line)
		free(prline);
}
	void
do_fold(fd, fname)
	int fd;
	char *fname;
{
	static char Lbuf[LINESIZE];
	static char Cbuf[LINESIZE * 2 + 2];
	char *Lp, *Cp;
	int len;
	int linetop;
	int tailnl;
	int tailkanji;
	int olinetop;
	int lastempty;
	int col;
	char jcode;
	j_init();
	linetop = TRUE;
	olinetop = TRUE;
	tailnl = FALSE;
	tailkanji = FALSE;
	lastempty = FALSE;
	Cp = Cbuf;
	col = 0;
	while((len = j_readln(fd, Lbuf, LINESIZE)) != -1)
	{
		int isempty;
		if (len == -2)
		{
			fprintf(stderr, "jfold: read error(%s).",
										fname ? fname: "standard input");
			exit(-1);
		}
		isempty = linetop && Lbuf[0] == '\n';
		if (tailnl)
		{
			if (o_cline)
			{
				/*
				 *	Separate paragraph
				 */
				if (isempty || lastempty ||
					(Lbuf[0] == '\240' && Lbuf[1] == '\240'))
				{
					*Cp++ = '\n';
					*Cp = NUL;
					j_writeln(Cbuf, tailnl);
					olinetop = TRUE;
					Cp = Cbuf;
					col = 0;
				}
				/*
				 *	Concatenate multiple lines
				 */
				else
				{
					/* remove preceeding spaces */
					for(Lp = Lbuf; *Lp == ' ' || *Lp == '\t'; Lp++);
					/* add white space for English text */
					if (!tailkanji && !IsKanji(*Lp))
					{
						*Cp++ = ' ';
						col ++;
					}
				}
			}
			/*
			 *	!Concatenate multiple lines
			 */
			else
			{
				*Cp++ = '\n';
				*Cp = NUL;
				j_writeln(Cbuf, tailnl);
				olinetop = TRUE;
				Cp = Cbuf;
				col = 0;
			}
			tailkanji = FALSE;
		}
		lastempty = isempty;
		/*
		 *	Generate print string.
		 */
		linetop = FALSE;
		olinetop = FALSE;
		tailnl = FALSE;
		Lp = Lbuf;
		while(len)
		{
			char c;
			int  countwidth();
			c = *Lp ++;
			if (IsKanji(c) && !(c == '\377' || (c & 0xe0) == 0))
			{
				*Cp ++ = c;
				*Cp ++ = * Lp++;
				len -= 2;
				col += 2;
				tailkanji = TRUE;
			}
			else if (c == '\n')
			{
				len --;
				linetop = TRUE;
				tailnl = TRUE;
			}
			else if (c == '\t')
			{
				*Cp ++ = c;
				col = ((col / o_tab) + 1) * o_tab;
				len --;
			}
			else
			{
				*Cp ++ = (c == NUL)? '\n' : c;
				col ++;
				len --;
			}
			/*
			 *	Cut by column
			 */
			if (col >= o_width && o_width >0)
			{
				char *cend;
				char cbuf[2];
				cend = Cp;
				/*
				 *	non intellegent mode
				 */
				if (!o_intel)
				{
					if (col > o_width && IsKanji(c))
						cend -= 2;
				}
				/*
				 *	intellegent mode
				 */
				else
				{
					/* Note: lend may points to the last byte
					 * of a multi-byte char.
					 */
					cend --;
					/*
					 * find position to break at (for English words)
					 */
					while(cend > Cbuf)
					{
						if (IsKanji(*cend))
						{
							cend ++;	/* adjust to the top byte */
							if (col > o_width)
								cend -= 2;
							break;
						}
						else if (isspace(*cend))
						{
							while(cend > Cbuf && isspace(*cend))
								cend --;
							if (cend > Cbuf)
								cend ++;	/* points to the first space */
							else
							{	/* removing preceeding spaces */
								for(; cend != Cp; cend++)
									if (!isspace(*cend))
										break;
								if (cend == Cp)
									Cp = Cbuf;
								else if (cend != Cbuf)
								{
									memmove(Cbuf, cend, Cp - cend);
									Cp -= cend - Cbuf;
								}
								cend = Cbuf;
							}
							break;
						}
						cend --;
					}
					if (cend == Cbuf)	 /* if nospaces, cannot break line */
					{
						col = countwidth(Cbuf, Cp);
						continue;
					}
					/*
					 *	KINSOKU processing
					 */
					/* for opening symbols */
					c = *(cend - 1);
					if (IsKanji(c))
					{
						if (cend - 2 > Cbuf &&
							isjppunc(*(cend - 2), c, FALSE) )
							cend -= 2;
					}
					else
					{
						if (cend - 1 > Cbuf && isaspunc(c, FALSE) )
							cend --;
					}
					/* for closing symbols */
					c = *cend;
					if (cend == Cp)
					{
						col = countwidth(Cbuf, Cp);
						continue;
					}
					if (IsKanji(c))
					{
						if (isjppunc(c, *(cend + 1), TRUE) )
							cend += 2;
					}
					else
					{
						if (isaspunc(c, TRUE) )
							cend ++;
					}
				}
				cbuf[0] = *cend;
				cbuf[1] = *(cend + 1);
				*cend = '\n';
				*(cend + 1) = NUL;
				j_writeln(Cbuf, TRUE);
				olinetop = TRUE;
				*cend       =  cbuf[0];
				*(cend + 1) =  cbuf[1];
				if (cend != Cbuf)
				{
					memmove(Cbuf, cend, Cp - cend);
					Cp -= cend - Cbuf;
				}
				col = countwidth(Cbuf, Cp);
			}
		}
	}
	if (Cp != Cbuf)
	{
		*Cp = NUL;
		j_writeln(Cbuf, tailnl);
		Cp = Cbuf;
	}
	jcode = judge_jcode(jread);
	if (!o_serr)
	{
		int  jisx0201r;
		extern int  num_jis0201r();
		if (jread == JP_ANY  && jcode == JP_SJIS)
		{
			if (fname)
				fprintf(stderr, "%s: ", fname);
			fprintf(stderr, "EUC SJIS conflict: ");
			fprintf(stderr, "Use ',' option to read SJIS file.\n");
		}
		if (jread == JP_SANY && jcode == JP_EUC)
		{
			if (fname)
				fprintf(stderr, "%s: ", fname);
			fprintf(stderr, "EUC SJIS conflict: ");
			fprintf(stderr, "Use '.' option to read EUC file.\n");
		}
		jisx0201r = num_jis0201r();
		if (jisx0201r)
		{
			if (fname)
				fprintf(stderr, "%s: ", fname);
			fprintf(stderr, "%d Hankaku Kana -> Zenkaku Katakana\n", jisx0201r);
		}
	}
	if (o_scode)
	{
		if (fname)
			printf("%s: ", fname);
		printf("%c\n", jcode);
	}
	return;
}
	int
countwidth(start, end)
	char *start, *end;
{
	int col;
	char c;
	col = 0;
	while(start < end)
	{
		c = *start;
		if (IsKanji(c) && !(c == '\377' || (c & 0xe0) == 0))
		{
			col += 2;
			start += 2;
		}
		else if (c == '\n')
			start ++;
		else if (c == '\t')
		{
			start ++;
			col = ((col / o_tab) + 1) * o_tab;
		}
		else
		{
			start ++;
			col ++;
		}
	}
	return col;
}
	void
getout(r)
	int	r;
{
	fprintf(stderr, "\n");
	exit(r);
}
#else
main(){}	/* dummy */
#endif
syntax highlighted by Code2HTML, v. 0.9.1