/* --------------------------------------------------------------------------
 * input.c:     Copyright (c) Mark P Jones 1991-1998.   All rights reserved.
 *              See NOTICE for details and conditions of use etc...
 *              Hugs version 1.3b, January 1998
 *
 * Input functions, lexical analysis parsing etc...
 * ------------------------------------------------------------------------*/

#include "prelude.h"
#include "storage.h"
#include "connect.h"
#include "command.h"
#include "errors.h"
#include <ctype.h>

/* --------------------------------------------------------------------------
 * Global data:
 * ------------------------------------------------------------------------*/

List tyconDefns	     = NIL;		/* type constructor definitions	   */
List typeInDefns     = NIL;		/* type synonym restrictions 	   */
List valDefns	     = NIL;		/* value definitions in script	   */
List opDefns	     = NIL;		/* operator defns in script	   */
List classDefns      = NIL;		/* class defns in script 	   */
List instDefns       = NIL;		/* instance defns in script	   */
List overDefns	     = NIL;		/* overloaded implementation names */
List primDefns	     = NIL;		/* primitive definitions	   */
List defaultDefns    = NIL;		/* default definitions (if any)	   */
Int  defaultLine     = 0;		/* line in which default defs occur*/
List evalDefaults    = NIL;		/* defaults for evaluator	   */

Cell inputExpr	     = NIL;		/* input expression		   */
Bool literateScripts = FALSE;		/* TRUE => default to lit scripts  */
Bool literateErrors  = TRUE;		/* TRUE => report errs in lit scrs */

String repeatStr     = 0;		/* Repeat last expr		   */

/* --------------------------------------------------------------------------
 * Local function prototypes:
 * ------------------------------------------------------------------------*/

static Void local initCharTab	  Args((Void));
static Void local fileInput	  Args((String,Long));
static Bool local literateMode	  Args((String));
static Void local skip		  Args((Void));
static Void local thisLineIs	  Args((Int));
static Void local newlineSkip	  Args((Void));
static Void local closeAnyInput   Args((Void));

       Int  yyparse	    Args((Void)); /* can't stop yacc making this   */
					  /* public, but don't advertise   */
					  /* it in a header file.	   */

static Void local endToken	  Args((Void));
static Text local readOperator	  Args((Void));
static Text local readIdent	  Args((Void));
static Cell local readRadixNumber Args((Int));
static Cell local readNumber	  Args((Void));
static Cell local readChar	  Args((Void));
static Cell local readString	  Args((Void));
static Void local saveStrChr	  Args((Char));
static Cell local readAChar	  Args((Bool));

static Bool local lazyReadMatches Args((String));
static Cell local readEscapeChar  Args((Bool));
static Void local skipGap	  Args((Void));
static Cell local readCtrlChar	  Args((Void));
static Cell local readOctChar	  Args((Void));
static Cell local readHexChar	  Args((Void));
static Int  local readHexDigit	  Args((Char));
static Cell local readDecChar	  Args((Void));

static Void local goOffside	  Args((Int));
static Void local unOffside	  Args((Void));
static Bool local canUnOffside	  Args((Void));

static Void local skipWhitespace  Args((Void));
static Void local useLayout       Args((Void));
static Int  local yylex 	  Args((Void));
static Int  local repeatLast	  Args((Void));

static Void local parseInput	  Args((Int));

/* --------------------------------------------------------------------------
 * Text values for reserved words and special symbols:
 * ------------------------------------------------------------------------*/

static Text textCase,	 textOfK,      textData,   textType,   textIf;
static Text textThen,	 textElse,     textWhere,  textLet,    textIn;
static Text textInfix,   textInfixl,   textInfixr, textPrim,   textNewtype;
static Text textDefault, textDeriving, textDo,     textClass,  textInstance;
static Text textStruct;

static Text textCoco,	 textEq,       textUpto,   textAs,     textLambda;
static Text textBar,	 textMinus,    textFrom,   textArrow,  textLazy;
static Text textImplies, textLT,       textGT;

static Text textModule,  textImport;
static Text textHiding,  textQualified, textAsMod;

#if    LAZY_ST
static Text textRunST;
#endif

#if OBJ
static Text textTemplate, textAction, textRequest, textAssign;
static Text textHandle,   textForall, textWhile,   textElsif,   textFix;
static Cell varAssign;			/* (:=)				   */
#endif

static Cell varMinus;			/* (-)				   */
static Cell varBang;			/* (!)				   */
static Cell varLT;			/* (<)				   */
static Cell varGT;			/* (>)				   */
static Cell varHiding;			/* hiding			   */
static Cell varQualified;		/* qualified			   */
static Cell varAsMod;			/* as				   */

#if    NPLUSK
Text   textPlus;			/* (+)				   */
#endif
Text   textBang;

static List imps;			/* List of imports to be chased	   */

/* --------------------------------------------------------------------------
 * Character set handling:
 *
 * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1
 * character set.  The following code provides methods for classifying
 * input characters according to the lexical structure specified by the
 * report.  Hugs should still accept older programs because ASCII is
 * essentially just a subset of the ISO character set.
 *
 * Notes: If you want to port Hugs to a machine that uses something
 * substantially different from the ISO character set, then you will need
 * to insert additional code to map between character sets.
 *
 * At some point, the following data structures may be exported in a .h
 * file to allow the information contained here to be picked up in the
 * implementation of LibChar is* primitives.
 *
 * Relies, implicitly but for this comment, on assumption that NUM_CHARS=256.
 * ------------------------------------------------------------------------*/

static  unsigned char   ctable[NUM_CHARS];
#define isIn(c,x)	(ctable[c]&(x))
#define isISO(c)	(0<=c && c<NUM_CHARS)

#define DIGIT		0x01
#define SMALL		0x02
#define LARGE		0x04
#define SYMBOL		0x08
#define IDAFTER		0x10
#define SPACE		0x20
#define PRINT		0x40

static Void local initCharTab() {	/* Initialize char decode table	   */
#define setRange(x,f,t)	{Int i=f;   while (i<=t) ctable[i++] |=x;}
#define setChars(x,s)	{char *p=s; while (*p)   ctable[*p++]|=x;}
#define setCopy(x,c)	{Int i;				\
			 for (i=0; i<NUM_CHARS; ++i)	\
			     if (isIn(i,c))		\
				 ctable[i]|=x;		\
			}

    setRange(DIGIT,	'0','9');	/* ASCII decimal digits		   */

    setRange(SMALL,	'a','z');	/* ASCII lower case letters	   */
    setRange(SMALL,	223,246);	/* ISO lower case letters	   */
    setRange(SMALL,	248,255);	/* (omits division symbol, 247)	   */

    setRange(LARGE,	'A','Z');	/* ASCII upper case letters	   */
    setRange(LARGE,	192,214);	/* ISO upper case letters	   */
    setRange(LARGE,	216,222);	/* (omits multiplication, 215)	   */

    setRange(SYMBOL,	161,191);	/* Symbol characters + ':'	   */
    setRange(SYMBOL,	215,215);
    setRange(SYMBOL,	247,247);
    setChars(SYMBOL,	":!#$%&*+./<=>?@\\^|-~");

    setChars(IDAFTER,	"'_");		/* Characters in identifier	   */
    setCopy (IDAFTER,	(DIGIT|SMALL|LARGE));

    setRange(SPACE,	' ',' ');	/* ASCII space character	   */
    setRange(SPACE,	160,160);	/* ISO non breaking space	   */
    setRange(SPACE,	9,13);		/* special whitespace: \t\n\v\f\r  */

    setChars(PRINT,	"(),;[]_`{}");	/* Special characters		   */
    setChars(PRINT,	" '\""); 	/* Space and quotes		   */
    setCopy (PRINT,	(DIGIT|SMALL|LARGE|SYMBOL));
#undef setRange
#undef setChars
#undef setCopy
}

/* --------------------------------------------------------------------------
 * Single character input routines:
 *
 * At the lowest level of input, characters are read one at a time, with the
 * current character held in c0 and the following (lookahead) character in
 * c1.	The corrdinates of c0 within the file are held in (column,row).
 * The input stream is advanced by one character using the skip() function.
 * ------------------------------------------------------------------------*/

#define TABSIZE    8		       /* spacing between tabstops	   */

#define NOTHING    0		       /* what kind of input is being read?*/
#define KEYBOARD   1		       /* - keyboard/console?		   */
#define SCRIPTFILE 2		       /* - script file 		   */
#define PROJFILE   3		       /* - project file		   */

static Int    reading	= NOTHING;

static Target readSoFar;
static Int    row, column, startColumn;
static int    c0, c1;
static FILE   *inputStream;
static Bool   thisLiterate;

#if     USE_READLINE			/* for command line editors	   */
static  String currentLine;		/* editline or GNU readline	   */
static  String nextChar;
#define nextConsoleChar()   (*nextChar=='\0' ? '\n' : *nextChar++)
extern  Void add_history    Args((String));
extern  String readline	    Args((String));
#else
#define nextConsoleChar()   getc(stdin)
#endif

static	Int litLines;		       /* count defn lines in lit script   */
#define DEFNCHAR  '>'		       /* definition lines begin with this */
static	Int lastLine;		       /* records type of last line read:  */
#define STARTLINE 0		       /* - at start of file, none read    */
#define BLANKLINE 1		       /* - blank (may preceed definition) */
#define TEXTLINE  2		       /* - text comment		   */
#define DEFNLINE  3		       /* - line containing definition	   */

Void consoleInput(prompt)		/* prepare to input characters from*/
String prompt; {			/* standard in (i.e. console/kbd)  */
    reading	= KEYBOARD;		/* keyboard input is Line oriented,*/
    c0		=			/* i.e. input terminated by '\n'   */
    c1		= ' ';
    column	= (-1);
    row 	= 0;

#if USE_READLINE
    if (currentLine)
	free(currentLine);
    currentLine = readline(prompt);
    nextChar    = currentLine;
    if (currentLine) {
	if (*currentLine)
	    add_history(currentLine);
    }
    else
	c0 = c1 = EOF;
#else
    printf("%s",prompt);
    fflush(stdout);
#endif
}

Void projInput(nm)		       /* prepare to input characters from */
String nm; {			       /* from named project file	   */
    if (inputStream = fopen(nm,"r")) {
	reading = PROJFILE;
	c0      = ' ';
	c1      = '\n';
        column  = 1;
        row     = 0;
    }
    else {
	ERRMSG(0) "Unable to open project file \"%s\"", nm
	EEND;
    }
}

static Void local fileInput(nm,len)	/* prepare to input characters from*/
String nm;				/* named file (specified length is */
Long   len; {				/* used to set target for reading) */
    if (inputStream = fopen(nm,"r")) {
	reading      = SCRIPTFILE;
	c0	     = ' ';
	c1	     = '\n';
	column	     = 1;
	row	     = 0;
	readSoFar    = 0;
	lastLine     = STARTLINE;
	litLines     = 0;
	thisLiterate = literateMode(nm);
	setGoal("Parsing", (Target)len);
    }
    else {
	ERRMSG(0) "Unable to open file \"%s\"", nm
	EEND;
    }
}

static Bool local literateMode(nm)	/* selecte literate mode for file  */
String nm; {
    String dot = 0;

#if !RISCOS
    for (; *nm; ++nm)			/* look for last dot in file name  */
	if (*nm == '.')
	    dot = nm+1;

    if (dot) {
	if (strcmp(dot,"hs")==0)	/* .hs files are never literate	   */
	    return FALSE;

	if (strcmp(dot,"lhs") ==0 ||	/* .lhs, .verb files are always	   */
	    strcmp(dot,"verb")==0)	/* literate scripts		   */
	    return TRUE;
    }
#else
    char *start = nm;
    for (; *nm; ++nm)                   /* look for last dot in file name  */
        if (*nm == '.')
            dot = nm+1;
    if (dot) {
	char *prev = dot-1;
	while (prev > start && *--prev != '.')
	    ;
	if (*prev == '.')
	    ++prev;
	if (namecmp(prev,"hs"))
	    return FALSE;
	if (namecmp(prev,"lhs") ||  namecmp(prev,"verb"))
	    return TRUE;
    }
#endif
    return literateScripts;		/* otherwise, use the default	   */
}

static Void local skip() {		/* move forward one char in input  */
    if (c0!=EOF) {			/* stream, updating c0, c1, ...	   */
	if (c0=='\n') {			/* Adjusting cursor coords as nec. */
	    row++;
	    column=1;
	    if (reading==SCRIPTFILE)
		soFar(readSoFar);
	}
	else if (c0=='\t')
	    column += TABSIZE - ((column-1)%TABSIZE);
	else
	    column++;

	c0 = c1;
	readSoFar++;

	if (c0==EOF) {
	    column = 0;
	    if (reading==SCRIPTFILE)
		done();
	    closeAnyInput();
	}
	else if (reading==KEYBOARD) {
	    allowBreak();
	    if (c0=='\n')
		c1 = EOF;
	    else
		c1 = nextConsoleChar();
	}
	else
	    c1 = getc(inputStream);
    }
}

static Void local thisLineIs(kind)	/* register kind of current line   */
Int kind; {				/* & check for literate script errs*/
    if (literateErrors) {
	if ((kind==DEFNLINE && lastLine==TEXTLINE) ||
	    (kind==TEXTLINE && lastLine==DEFNLINE)) {
	    ERRMSG(row) "Program line next to comment"
	    EEND;
	}
	lastLine = kind;
    }
}

static Void local newlineSkip() {      /* skip `\n' (supports lit scripts) */
    if (reading==SCRIPTFILE && thisLiterate) {
	do {
	    skip();
	    if (c0==DEFNCHAR) {        /* pass chars on definition lines   */
		thisLineIs(DEFNLINE);  /* to lexer (w/o leading DEFNCHAR)  */
		skip();
		litLines++;
		return;
	    }
	    while (c0==' ' || c0=='\t')/* maybe line is blank?		   */
		skip();
	    if (c0=='\n' || c0==EOF)
		thisLineIs(BLANKLINE);
	    else {
		thisLineIs(TEXTLINE);  /* otherwise it must be a comment   */
		while (c0!='\n' && c0!=EOF)
		    skip();
	    }			       /* by now, c0=='\n' or c0==EOF	   */
	} while (c0!=EOF);	       /* if new line, start again	   */

	if (litLines==0 && literateErrors) {
	    ERRMSG(row) "Empty script - perhaps you forgot the `%c's?",
		        DEFNCHAR
	    EEND;
	}
	return;
    }
    skip();
}

static Void local closeAnyInput() {	/* Close input stream, if open,	   */
    switch (reading) {			/* or skip to end of console line  */
	case PROJFILE   :
	case SCRIPTFILE : fclose(inputStream);
			  break;
	case KEYBOARD   : while (c0!=EOF)
			      skip();
			  break;
    }
    reading=NOTHING;
}

/* --------------------------------------------------------------------------
 * Parser: Uses table driven parser generated from parser.y using yacc (bison)
 * ------------------------------------------------------------------------*/

#if VC32
#include "parser.c"
#else
#include "parser.tab.c"
#endif

/* --------------------------------------------------------------------------
 * Single token input routines:
 *
 * The following routines read the values of particular kinds of token given
 * that the first character of the token has already been located in c0 on
 * entry to the routine.
 * ------------------------------------------------------------------------*/

#define MAX_TOKEN	    250
#define startToken()	    tokPos = 0
#define saveTokenChar(c)    if (tokPos<=MAX_TOKEN) saveChar(c); else ++tokPos
#define saveChar(c)	    tokenStr[tokPos++]=(c)
#define overflows(n,b,d,m)  (n > ((m)-(d))/(b))

static char tokenStr[MAX_TOKEN+1];	/* token buffer			   */
static Int  tokPos;			/* input position in buffer	   */
static Int  identType;			/* identifier type: CONID / VARID  */
static Int  opType;			/* operator type  : CONOP / VAROP  */

static Void local endToken() {		/* check for token overflow	   */
    if (tokPos>MAX_TOKEN) {
	ERRMSG(row) "Maximum token length (%d) exceeded", MAX_TOKEN
	EEND;
    }
    tokenStr[tokPos] = '\0';
}

static Text local readOperator() {	/* read operator symbol		   */
    startToken();
    do {
	saveTokenChar(c0);
	skip();
    } while (isISO(c0) && isIn(c0,SYMBOL));
    opType = (tokenStr[0]==':' ? CONOP : VAROP);
    endToken();
    return findText(tokenStr);
}

static Text local readIdent() {		/* read identifier		   */
    startToken();
    do {
	saveTokenChar(c0);
	skip();
    } while (isISO(c0) && isIn(c0,IDAFTER));
    endToken();
    identType = isIn(tokenStr[0],LARGE) ? CONID : VARID;
    return findText(tokenStr);
}

static Cell local readRadixNumber(r)	/* Read literal in specified radix */
Int r; {				/* from input of the form 0c{digs} */
    Int d;
    skip();				/* skip leading zero		   */
    if ((d=readHexDigit(c1))<0 || d>=r) /* Special case; no digits, lex as */
	return mkInt(0);		/* if it had been written "0 c..." */
    else {
	Int  n = 0;
#if BIGNUMS
	Cell big = NIL;
#endif
	skip();
	do {
#if BIGNUMS
	    if (nonNull(big))
		big = bigShift(big,d,r);
	    else if (overflows(n,r,d,MAXPOSINT))
		big = bigShift(bigInt(n),d,r);
	    else
#else
	    if (overflows(n,r,d,MAXPOSINT)) {
		ERRMSG(row) "Integer literal out of range"
		EEND;
	    }
	    else
#endif
		n = r*n + d;
	    skip();
	    d = readHexDigit(c0);
	} while (d>=0 && d<r);
#if BIGNUMS
	return nonNull(big) ? big : mkInt(n);
#else
	return mkInt(n);
#endif
    }
}

static Cell local readNumber() {	/* read numeric constant	   */
    Int   n           = 0;
    Bool  intTooLarge = FALSE;

    if (c0=='0') {
	if (c1=='x' || c1=='X')		/* Maybe a hexadecimal literal?	   */
	    return readRadixNumber(16);
	if (c1=='o' || c1=='O')		/* Maybe an octal literal?	   */
	    return readRadixNumber(8);
    }

    startToken();
    do {
	if (overflows(n,10,(c0-'0'),MAXPOSINT))
	    intTooLarge = TRUE;
	n  = 10*n  + (c0-'0');
	saveTokenChar(c0);
	skip();
    } while (isISO(c0) && isIn(c0,DIGIT));

    if (c0!='.' || !isISO(c1) || !isIn(c1,DIGIT)) {
	endToken();
	if (!intTooLarge)
	    return mkInt(n);
#if BIGNUMS
	return bigStr(tokenStr);
#else
	ERRMSG(row) "Integer literal out of range"
	EEND;
#endif
    }

    saveTokenChar(c0);		        /* save decimal point		   */
    skip();
    do {				/* process fractional part ...	   */
	saveTokenChar(c0);
	skip();
    } while (isISO(c0) && isIn(c0,DIGIT));

    if (c0=='e' || c0=='E') {		/* look for exponent part...	   */
	saveTokenChar('e');
	skip();
	if (c0=='-') {
	    saveTokenChar('-');
	    skip();
	}
	else if (c0=='+')
	    skip();

	if (!isISO(c0) || !isIn(c0,DIGIT)) {
	    ERRMSG(row) "Missing digits in exponent"
	    EEND;
	}
	else {
	    do {
		saveTokenChar(c0);
		skip();
	    } while (isISO(c0) && isIn(c0,DIGIT));
	}
    }

    endToken();
#if !HAS_FLOATS
    ERRMSG(row) "No floating point numbers in this implementation"
    EEND;
#endif

    return mkFloat(stringToFloat(tokenStr));
}

static Cell local readChar() {	       /* read character constant	   */
    Cell charRead;

    skip(/* '\'' */);
    if (c0=='\'' || c0=='\n' || c0==EOF) {
	ERRMSG(row) "Illegal character constant"
	EEND;
    }

    charRead = readAChar(FALSE);

    if (c0=='\'')
	skip(/* '\'' */);
    else {
	ERRMSG(row) "Improperly terminated character constant"
	EEND;
    }
    return charRead;
}

static Cell local readString() {       /* read string literal		   */
    Cell c;

    startToken();
    skip(/* '\"' */);
    while (c0!='\"' && c0!='\n' && c0!=EOF) {
	c = readAChar(TRUE);
	if (nonNull(c))
	    saveStrChr(charOf(c));
    }

    if (c0=='\"')
	skip(/* '\"' */);
    else {
	ERRMSG(row) "Improperly terminated string"
	EEND;
    }
    endToken();
    return mkStr(findText(tokenStr));
}

static Void local saveStrChr(c)        /* save character in string	   */
Char c; {
    if (c!='\0' && c!='\\') {	       /* save non null char as single char*/
	saveTokenChar(c);
    }
    else {			       /* save null char as TWO null chars */
	if (tokPos+1<MAX_TOKEN) {
	    saveChar('\\');
	    if (c=='\\')
		saveChar('\\');
	    else
		saveChar('0');
	}
    }
}

static Cell local readAChar(isStrLit)	/* read single char constant	   */
Bool isStrLit; {			/* TRUE => enable \& and gaps	   */
    Cell c = mkChar(c0);

    if (c0=='\\')		       /* escape character?		   */
	return readEscapeChar(isStrLit);
    if (!isISO(c0) || !isIn(c0,PRINT)) {
	ERRMSG(row) "Non printable character `\\%d' in constant", ((int)c0)
	EEND;
    }
    skip();			       /* normal character?		   */
    return c;
}

/* --------------------------------------------------------------------------
 * Character escape code sequences:
 * ------------------------------------------------------------------------*/

static struct { 		       /* table of special escape codes    */
    char *codename;
    int  codenumber;
} escapes[] = {
   {"a",    7}, {"b",	 8}, {"f",   12}, {"n",   10},	/* common escapes  */
   {"r",   13}, {"t",	 9}, {"\\",'\\'}, {"\"",'\"'},
   {"\'",'\''}, {"v",	11},
   {"NUL",  0}, {"SOH",  1}, {"STX",  2}, {"ETX",  3},	/* ascii codenames */
   {"EOT",  4}, {"ENQ",  5}, {"ACK",  6}, {"BEL",  7},
   {"BS",   8}, {"HT",	 9}, {"LF",  10}, {"VT",  11},
   {"FF",  12}, {"CR",	13}, {"SO",  14}, {"SI",  15},
   {"DLE", 16}, {"DC1", 17}, {"DC2", 18}, {"DC3", 19},
   {"DC4", 20}, {"NAK", 21}, {"SYN", 22}, {"ETB", 23},
   {"CAN", 24}, {"EM",	25}, {"SUB", 26}, {"ESC", 27},
   {"FS",  28}, {"GS",	29}, {"RS",  30}, {"US",  31},
   {"SP",  32}, {"DEL", 127},
   {0,0}
};

static Int  alreadyMatched;	       /* Record portion of input stream   */
static char alreadyRead[10];	       /* that has been read w/o a match   */

static Bool local lazyReadMatches(s)   /* compare input stream with string */
String s; {			       /* possibly using characters that   */
    int i;			       /* have already been read	   */

    for (i=0; i<alreadyMatched; ++i)
	if (alreadyRead[i]!=s[i])
	    return FALSE;

    while (s[i] && s[i]==c0) {
	alreadyRead[alreadyMatched++]=c0;
	skip();
	i++;
    }

    return s[i]=='\0';
}

static Cell local readEscapeChar(isStrLit)/* read escape character	   */
Bool isStrLit; {
    int i=0;

    skip(/* '\\' */);
    switch (c0) {
	case '&'  : if (isStrLit) {
			skip();
			return NIL;
		    }
		    ERRMSG(row) "Illegal use of `\\&' in character constant"
		    EEND;
		    break;/*NOTREACHED*/

	case '^'  : return readCtrlChar();

	case 'o'  : return readOctChar();
	case 'x'  : return readHexChar();

	default	  : if (!isISO(c0)) {
			ERRMSG(row) "Illegal escape sequence"
			EEND;
		    }
		    else if (isIn(c0,SPACE)) {
			if (isStrLit) {
			    skipGap();
			    return NIL;
			}
			ERRMSG(row) "Illegal use of gap in character constant"
			EEND;
			break;
		    }
		    else if (isIn(c0,DIGIT))
			return readDecChar();
    }

    for (alreadyMatched=0; escapes[i].codename; i++)
	if (lazyReadMatches(escapes[i].codename))
	    return mkChar(escapes[i].codenumber);

    alreadyRead[alreadyMatched++] = c0;
    alreadyRead[alreadyMatched++] = '\0';
    ERRMSG(row) "Illegal character escape sequence \"\\%s\"",
	        alreadyRead
    EEND;
    return NIL;/*NOTREACHED*/
}

static Void local skipGap() {		/* skip over gap in string literal */
    do					/* (simplified in Haskell 1.1)	   */
	if (c0=='\n')
	    newlineSkip();
	else
	    skip();
    while (isISO(c0) && isIn(c0,SPACE));
    if (c0!='\\') {
	ERRMSG(row) "Missing `\\' terminating string literal gap"
	EEND;
    }
    skip(/* '\\' */);
}

static Cell local readCtrlChar() {     /* read escape sequence \^x	   */
    static String controls = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
    String which;

    skip(/* '^' */);
    if ((which = strchr(controls,c0))==NULL) {
	ERRMSG(row) "Unrecognised escape sequence `\\^%c'", c0
	EEND;
    }
    skip();
    return mkChar(which-controls);
}

static Cell local readOctChar() {      /* read octal character constant    */
    Int n = 0;
    Int d;

    skip(/* 'o' */);
    if ((d = readHexDigit(c0))<0 || d>=8) {
	ERRMSG(row) "Empty octal character escape"
	EEND;
    }
    do {
	if (overflows(n,8,d,MAXCHARVAL)) {
	    ERRMSG(row) "Octal character escape out of range"
	    EEND;
	}
	n = 8*n + d;
	skip();
    } while ((d = readHexDigit(c0))>=0 && d<8);

    return mkChar(n);
}

static Cell local readHexChar() {      /* read hex character constant	   */
    Int n = 0;
    Int d;

    skip(/* 'x' */);
    if ((d = readHexDigit(c0))<0) {
	ERRMSG(row) "Empty hexadecimal character escape"
	EEND;
    }
    do {
	if (overflows(n,16,d,MAXCHARVAL)) {
	    ERRMSG(row) "Hexadecimal character escape out of range"
	    EEND;
	}
	n = 16*n + d;
	skip();
    } while ((d = readHexDigit(c0))>=0);

    return mkChar(n);
}

static Int local readHexDigit(c)	/* read single hex digit 	   */
Char c; {
    if ('0'<=c && c<='9')
	return c-'0';
    if ('A'<=c && c<='F')
	return 10 + (c-'A');
    if ('a'<=c && c<='f')
	return 10 + (c-'a');
    return -1;
}

static Cell local readDecChar() {      /* read decimal character constant  */
    Int n = 0;

    do {
	if (overflows(n,10,(c0-'0'),MAXCHARVAL)) {
	    ERRMSG(row) "Decimal character escape out of range"
	    EEND;
	}
	n = 10*n + (c0-'0');
	skip();
    } while (c0!=EOF && isIn(c0,DIGIT));

    return mkChar(n);
}

/* --------------------------------------------------------------------------
 * Produce printable representation of character:
 * ------------------------------------------------------------------------*/

String unlexChar(c,quote)		/* return string representation of */
Char c;					/* character...			   */
Char quote; {				/* protect quote character	   */
    static char buffer[12];

    if (c<0)				/* deal with sign extended chars.. */
	c += NUM_CHARS;

    if (isISO(c) && isIn(c,PRINT)) {	/* normal printable character	   */
	if (c==quote || c=='\\') {	/* look for quote of approp. kind  */
	    buffer[0] = '\\';		
	    buffer[1] = c;
	    buffer[2] = '\0';
	}
	else {
	    buffer[0] = c;
            buffer[1] = '\0';
	}
    }
    else {				/* look for escape code		   */
        Int escs;
        for (escs=0; escapes[escs].codename; escs++)
	    if (escapes[escs].codenumber==c) {
		sprintf(buffer,"\\%s",escapes[escs].codename);
		return buffer;
	    }
        sprintf(buffer,"\\%d",c);	/* otherwise use numeric escape	   */
    }
    return buffer;
}

Void printString(s)			/* print string s, using quotes and*/
String s; {				/* escapes if any parts need them  */
    if (s) {
	String t = s;
        Char   c;
        while ((c = *t)!=0 && isISO(c) && isIn(c,PRINT) && c!='"' && !isIn(c,SPACE))
	    t++;
	if (*t) {
	    putchar('"');
	    for (t=s; *t; t++)
		printf("%s",unlexChar(*t,'"'));
	    putchar('"');
	}
	else
	    printf("%s",s);
    }
}

/* --------------------------------------------------------------------------
 * Handle special types of input for use in interpreter:
 * ------------------------------------------------------------------------*/

Command readCommand(cmds,start,sys)	/* read command at start of input  */
struct cmd *cmds;			/* line in interpreter		   */
Char   start;				/* characters introducing a cmd    */
Char   sys; {				/* character for shell escape	   */
    while (c0==' ' || c0 =='\t')
	skip();

    if (c0=='\n')			/* look for blank command lines    */
	return NOCMD;
    if (c0==EOF)			/* look for end of input stream	   */
	return QUIT;
    if (c0==sys) {			/* single character system escape  */
	skip();
	return SYSTEM;
    }
    if (c0==start && c1==sys) {		/* two character system escape	   */
	skip();
	skip();
	return SYSTEM;
    }

    startToken();			/* All cmds start with start	   */
    if (c0==start)			/* except default (usually EVAL)   */
	do {				/* which is empty		   */
	    saveTokenChar(c0);
	    skip();
	} while (c0!=EOF && !isIn(c0,SPACE));
    endToken();

    for (; cmds->cmdString; ++cmds)
	if (strcmp((cmds->cmdString),tokenStr)==0 ||
            (tokenStr[0]==start &&
             tokenStr[1]==(cmds->cmdString)[1] &&
             tokenStr[2]=='\0'))
	    return (cmds->cmdCode);
    return BADCMD;
}

String readFilename() { 	       /* Read filename from input (if any)*/
    if (reading==PROJFILE)
	skipWhitespace();
    else
	while (c0==' ' || c0=='\t')
	    skip();

    if (c0=='\n' || c0==EOF)	       /* return null string at end of line*/
	return 0;

    startToken();
    while (c0!=EOF && !isIn(c0,SPACE)) {
	if (c0=='"') {
	    skip();
	    while (c0!=EOF && c0!='\"') {
		Cell c = readAChar(TRUE);
		if (nonNull(c))
		    saveTokenChar(charOf(c));
	    }
	    if (c0=='"')
		skip();
	    else {
		ERRMSG(row) "a closing quote, '\"', was expected"
		EEND;
	    }
	}
	else {
	    saveTokenChar(c0);
	    skip();
	}
    }
    endToken();
    return tokenStr;
}

String readLine() {			/* Read command line from input	   */
    while (c0==' ' || c0=='\t')		/* skip leading whitespace	   */
	skip();

    startToken();
    while (c0!='\n' && c0!=EOF) {
	saveTokenChar(c0);
	skip();
    }
    endToken();

    return tokenStr;
}

/* --------------------------------------------------------------------------
 * This lexer supports the Haskell layout rule:
 *
 * - Layout area bounded by { ... }, with `;'s in between.
 * - A `{' is a HARD indentation and can only be matched by a corresponding
 *   HARD '}'
 * - Otherwise, if no `{' follows the keywords WHERE/LET or OF, a SOFT `{'
 *   is inserted with the column number of the first token after the
 *   WHERE/LET/OF keyword.
 * - When a soft indentation is uppermost on the indetation stack with
 *   column col' we insert:
 *    `}'  in front of token with column<col' and pop indentation off stack,
 *    `;'  in front of token with column==col'.
 * ------------------------------------------------------------------------*/

#define MAXINDENT  100		       /* maximum nesting of layout rule   */
static	Int	   layout[MAXINDENT+1];/* indentation stack		   */
#define HARD	   (-1) 	       /* indicates hard indentation	   */
static	Int	   indentDepth = (-1); /* current indentation nesting	   */

static Void local goOffside(col)       /* insert offside marker 	   */
Int col; {			       /* for specified column		   */
    if (indentDepth>=MAXINDENT) {
	ERRMSG(row) "Too many levels of program nesting"
	EEND;
    }
    layout[++indentDepth] = col;
}

static Void local unOffside() {        /* leave layout rule area	   */
    indentDepth--;
}

static Bool local canUnOffside() {     /* Decide if unoffside permitted    */
    return indentDepth>=0 && layout[indentDepth]!=HARD;
}

/* --------------------------------------------------------------------------
 * Main tokeniser:
 * ------------------------------------------------------------------------*/

static Void local skipWhitespace() {	/* Skip over whitespace/comments   */
    for (;;)				/* Strictly speaking, this code is */
	if (c0==EOF)			/* a little more liberal than the  */
	    return;			/* report allows ...		   */
	else if (c0=='\n')
	    newlineSkip();
	else if (isIn(c0,SPACE))
	    skip();
	else if (c0=='{' && c1=='-') {	/* (potentially) nested comment	   */
	    Int nesting = 1;
	    Int origRow = row;		/* Save original row number	   */
	    skip();
	    skip();
	    while (nesting>0 && c0!=EOF)
		if (c0=='{' && c1=='-') {
		    skip();
		    skip();
		    nesting++;
		}
		else if (c0=='-' && c1=='}') {
		    skip();
		    skip();
		    nesting--;
		}
		else if (c0=='\n')
		    newlineSkip();
		else
		    skip();
	    if (nesting>0) {
		ERRMSG(origRow) "Unterminated nested comment {- ..."
		EEND;
	    }
	}
	else if (c0=='-' && c1=='-') {	/* One line comment		   */
	    do
		skip();
	    while (c0!='\n' && c0!=EOF);
	    if (c0=='\n')
		newlineSkip();
	}
	else
	    return;
}

static Bool firstToken; 	       /* Set to TRUE for first token	   */
static Int  firstTokenIs;	       /* ... with token value stored here */
static Bool insertOpen    = FALSE;

static Void local useLayout() {
    skipWhitespace();
    insertOpen = (c0 != '{');
}

static Int local yylex() {	       /* Read next input token ...	   */
    static Text textRepeat;
    static Bool insertedToken = FALSE;

#define lookAhead(t) {skipWhitespace(); insertOpen = (c0!='{'); return t;}

    if (firstToken) {		       /* Special case for first token	   */
	indentDepth   = (-1);
	firstToken    = FALSE;
	insertOpen    = FALSE;
	insertedToken = FALSE;
	if (reading==KEYBOARD)
	    textRepeat = findText(repeatStr);
	return firstTokenIs;
    }

    if (insertOpen) {		       /* insert `soft' opening brace	   */
	insertOpen    = FALSE;
	insertedToken = TRUE;
	goOffside(column);
	push(yylval = mkInt(row));
	return '{';
    }

    /* ----------------------------------------------------------------------
     * Skip white space, and insert tokens to support layout rules as reqd.
     * --------------------------------------------------------------------*/

    skipWhitespace();
    startColumn = column;
    push(yylval = mkInt(row));	       /* default token value is line no.  */
    /* subsequent changes to yylval must also set top() to the same value  */

    if (indentDepth>=0) 	       /* layout rule(s) active ?	   */
        if (insertedToken)	       /* avoid inserting multiple `;'s    */
	    insertedToken = FALSE;     /* or putting `;' after `{'	   */
        else if (layout[indentDepth]!=HARD)
	    if (column<layout[indentDepth]) {
		unOffside();
		return '}';
            }
            else if (column==layout[indentDepth] && c0!=EOF) {
                insertedToken = TRUE;
                return ';';
            }

    /* ----------------------------------------------------------------------
     * Now try to identify token type:
     * --------------------------------------------------------------------*/

    switch (c0) {
	case EOF  : return 0;			/* End of file/input	   */

	/* The next 10 characters make up the `special' category in 1.3	   */
	case '('  : skip(); return '(';
	case ')'  : skip(); return ')';
	case ','  : skip(); return ',';
	case ';'  : skip(); return ';'; 
	case '['  : skip(); return '['; 
	case ']'  : skip(); return ']';
	case '_'  : skip(); return '_';
	case '`'  : skip(); return '`';
	case '{'  : goOffside(HARD);
		    skip();
		    return '{';
	case '}'  : if (indentDepth<0) {
			ERRMSG(row) "Misplaced `}'"
			EEND;
		    }
		    if (layout[indentDepth]==HARD)	/* skip over hard }*/
			skip();
		    unOffside();	/* otherwise, we have to insert a }*/
		    return '}';		/* to (try to) avoid an error...   */

	/* Character and string literals				   */
	case '\'' : top() = yylval = readChar();
		    return CHARLIT;

	case '\"' : top() = yylval = readString();
		    return STRINGLIT;
    }

    if (isIn(c0,LARGE)) {		/* Look for, but ignore, qual name */
	Text it = readIdent();		/* No keyword begins with LARGE ...*/
	if (c0=='.' && isIn(c1,(SMALL|LARGE|SYMBOL)))
	    skip();			/* Skip qualifying dot		   */
	else {
	    top() = yylval = ap(CONIDCELL,it);
	    return identType;
	}				/* We could easily keep a record of*/
    }					/* the qualifying name here ...    */
    if (isIn(c0,(SMALL|LARGE))) {
	Text it = readIdent();

	if (it==textCase)              return CASEXP;
	if (it==textOfK)               return OF;
	if (it==textData)	       return DATA;
	if (it==textType)	       return TYPE;
	if (it==textIf) 	       return IF;
	if (it==textThen)	       return THEN;
	if (it==textElse)	       return ELSE;
	if (it==textWhere)             return WHERE;
        if (it==textLet)               return LET;
        if (it==textIn)                return IN;
	if (it==textInfix)	       return INFIX;
	if (it==textInfixl)	       return INFIXL;
	if (it==textInfixr)	       return INFIXR;
	if (it==textPrim)              return PRIMITIVE;
	if (it==textNewtype)	       return TNEWTYPE;
	if (it==textDefault)	       return DEFAULT;
	if (it==textDeriving)	       return DERIVING;
	if (it==textDo)		       return DO;
	if (it==textClass)	       return TCLASS;
	if (it==textInstance)	       return TINSTANCE;
	if (it==textModule)	       return MODULE;
	if (it==textImport)	       return IMPORT;
	if (it==textHiding)	       return HIDING;
	if (it==textQualified)	       return QUALIFIED;
	if (it==textAsMod)	       return ASMOD;
#if OBJ
	if (it==textTemplate)          return TEMPLATE;
	if (it==textAction)            return ACTION;
	if (it==textRequest)           return REQUEST;
	if (it==textHandle)            return HANDLE;
	if (it==textForall)	       return FORALL;
	if (it==textWhile)             return WHILE;
	if (it==textElsif)	       return ELSIF;
	if (it==textFix)               return FIX;
#endif
	if (it==textRepeat && reading==KEYBOARD)
	    return repeatLast();
#if LAZY_ST
	if (it==textRunST)	       return TRUNST;
#endif
	if (it==textStruct)	       return STRUCT;

	top() = yylval = ap((identType==CONID ? CONIDCELL : VARIDCELL),it);
	return identType;
    }

    if (c0=='.' && isIn(c1,SMALL)) {
        skip();
        return SELDOT;
    }

    if (isIn(c0,SYMBOL)) {
	Text it = readOperator();

	if (it==textCoco)    return COCO;
	if (it==textEq)      return '=';
	if (it==textUpto)    return UPTO;
	if (it==textAs)      return '@';
	if (it==textLambda)  return '\\';
	if (it==textBar)     return '|';
	if (it==textFrom)    return FROM;
	if (it==textMinus)   return '-';
	if (it==textBang)    return '!';
	if (it==textLT)      return '<';
	if (it==textGT)      return '>';
	if (it==textArrow)   return ARROW;
	if (it==textLazy)    return '~';
	if (it==textImplies) return IMPLIES;
#if OBJ
	if (it==textAssign)  return ASSIGN;
#endif
	if (it==textRepeat && reading==KEYBOARD)
	    return repeatLast();

	top() = yylval = ap((opType==CONOP ? CONOPCELL : VAROPCELL),it);
	return opType;
    }

    if (isIn(c0,DIGIT)) {
	top() = yylval = readNumber();
	return NUMLIT;
    }

    ERRMSG(row) "Unrecognised character `\\%d' in column %d", ((int)c0), column
    EEND;
    return 0; /*NOTREACHED*/
}

static Int local repeatLast() {		/* Obtain last expression entered  */
    if (isNull(yylval=getLastExpr())) {
	ERRMSG(row) "Cannot use %s without any previous input", repeatStr
	EEND;
    }
    return REPEAT;
}

Syntax defaultSyntax(t) 	       /* Find default syntax of var named */
Text t; {			       /* by t ...			   */
    String s = textToStr(t);
    return isIn(s[0],SYMBOL) ? DEF_OPSYNTAX : APPLIC;
}

Bool isStructSel(c)                     /* signal TRUE if t == ".abc"      */
Cell c; {
    String s = textToStr(name(c).text);
    return (s[0]=='.' && isIn(s[1],SMALL));
}

Text unStructSel(c)			/* given ".abs", return "abc"      */
Cell c; {
    Text t = name(c).text;
    String s = textToStr(t);
    return (s[0]=='.') ? findText(s+1) : t;
}

Text mkStructSel(t)			/* given "abc", return ".abc"      */
Text t; {
    char buf[MAX_TOKEN+1], *p = buf;
    String s = textToStr(t);
    if (s[0]=='.')
        return t;
    *p++ = '.';
    while (*s)
        *p++ = *s++;
    *p = 0;
    return findText(buf);
}

/* --------------------------------------------------------------------------
 * main entry points to parser/lexer:
 * ------------------------------------------------------------------------*/

static Void local parseInput(startWith)/* Parse input with given first tok,*/
Int startWith; {		       /* determining whether to read a    */
    firstToken	 = TRUE;	       /* script or an expression	   */
    firstTokenIs = startWith;

    clearStack();
    if (yyparse()) {		       /* This can only be parser overflow */
	ERRMSG(row) "Parser overflow"  /* as all syntax errors are caught  */
	EEND;			       /* in the parser...		   */
    }
    drop();
    if (!stackEmpty())		       /* stack should now be empty	   */
	internal("parseInput");
}

Void parseScript(nm,len)	       /* Read a script			   */
String nm;
Long   len; {			       /* Used to set a target for reading */
    input(RESET);
    fileInput(nm,len);
    parseInput(SCRIPT);
}

Void parseExp() {		       /* Read an expression to evaluate   */
    parseInput(EXPR);
    setLastExpr(inputExpr);
}

/* --------------------------------------------------------------------------
 * Input control:
 * ------------------------------------------------------------------------*/

Void input(what)
Int what; {
    switch (what) {
	case INSTALL : initCharTab();
		       textCase       = findText("case");
		       textOfK	      = findText("of");
		       textData       = findText("data");
		       textType       = findText("type");
		       textIf	      = findText("if");
		       textThen       = findText("then");
		       textElse       = findText("else");
		       textWhere      = findText("where");
                       textLet        = findText("let");
		       textIn         = findText("in");
		       textInfix      = findText("infix");
		       textInfixl     = findText("infixl");
		       textInfixr     = findText("infixr");
		       textPrim       = findText("primitive");
		       textNewtype    = findText("newtype");
		       textDefault    = findText("default");
		       textDeriving   = findText("deriving");
		       textDo	      = findText("do");
		       textClass      = findText("class");
		       textInstance   = findText("instance");
		       textCoco       = findText("::");
		       textEq	      = findText("=");
		       textUpto       = findText("..");
		       textAs	      = findText("@");
		       textLambda     = findText("\\");
		       textBar	      = findText("|");
		       textMinus      = findText("-");
		       textFrom       = findText("<-");
		       textArrow      = findText("->");
		       textLazy       = findText("~");
		       textBang	      = findText("!");
		       textLT         = findText("<");
		       textGT         = findText(">");
		       textImplies    = findText("=>");
#if NPLUSK
		       textPlus	      = findText("+");
#endif
		       textModule     = findText("module");
		       textImport     = findText("import");
		       textHiding     = findText("hiding");
		       textQualified  = findText("qualified");
		       textAsMod      = findText("as");
#if LAZY_ST
		       textRunST      = findText("runST");
#endif
		       textStruct     = findText("struct");
#if OBJ
		       textTemplate   = findText("template");
		       textAction     = findText("action");
		       textRequest    = findText("request");
		       textHandle     = findText("handle");
		       textForall     = findText("forall");
		       textWhile      = findText("while");
		       textElsif      = findText("elsif");
		       textFix        = findText("fix");
		       textAssign     = findText(":=");
		       varAssign      = mkVar(textAssign);
#endif
		       varMinus	      = mkVar(textMinus);
		       varBang	      = mkVar(textBang);
		       varLT          = mkVar(textLT);
		       varGT          = mkVar(textGT);
		       varHiding      = mkVar(textHiding);
		       varQualified   = mkVar(textQualified);
		       varAsMod       = mkVar(textAsMod);
		       evalDefaults   = NIL;

		       input(RESET);
		       break;

	case RESET   : tyconDefns   = NIL;
		       typeInDefns  = NIL;
		       valDefns     = NIL;
		       opDefns	    = NIL;
		       classDefns   = NIL;
                       instDefns    = NIL;
		       overDefns    = NIL;
		       primDefns    = NIL;
		       defaultDefns = NIL;
		       defaultLine  = 0;
		       inputExpr    = NIL;
		       imps         = NIL;
		       closeAnyInput();
		       break;

	case BREAK   : if (reading==KEYBOARD)
			   c0 = EOF;
		fflush(stdin);
		       break;

	case MARK    : mark(tyconDefns);
		       mark(typeInDefns);
		       mark(valDefns);
		       mark(opDefns);
		       mark(classDefns);
                       mark(instDefns);
		       mark(overDefns);
		       mark(primDefns);
		       mark(defaultDefns);
		       mark(evalDefaults);
		       mark(inputExpr);
		       mark(varMinus);
		       mark(varBang);
		       mark(varLT);
		       mark(varGT);
#if OBJ
		       mark(varAssign);
#endif
		       mark(varHiding);
		       mark(varQualified);
		       mark(varAsMod);
		       mark(imps);
		       break;
    }
}

/*-------------------------------------------------------------------------*/


syntax highlighted by Code2HTML, v. 0.9.1