/*-
 * Copyright (c) 2003-2004 Andrey Simonenko
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "config.h"

#ifndef lint
static const char rcsid[] ATTR_UNUSED =
  "@(#)$Id: parser.c,v 1.3.2.3 2007/02/17 09:21:55 simon Exp $";
#endif /* !lint */

/*
 * I considered using of yacc/lex for implementing the parser for
 * ipa.conf(5), but have found that there will not be much advantages
 * in using such tools.  At least: ipa.conf(5) accepts macro variables
 * and a module is allowed to add macro variables at any time, when it
 * parses a configuration stream, so we have to get one-by-one logical
 * line from the file and then parse it or give it to a module for parsing.
 * If a module considers that yacc/lex or similar tools are better, then
 * it can mark all its arguments values as IPA_CONF_TYPE_MISC and take
 * full control on parsing any argument.
 */

#include <ctype.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/queue.h>
#include <sys/types.h>

#include "ipa_mod.h"

#include "memfunc.h"

#include "parser.h"

#ifndef DEBUG_PARSER
# define DEBUG_PARSER 0			/* If non-zero, then output to stderr debug information. */
#endif

ipa_mem_type	*m_parser;		/* Memory allocated by parser. */
char		*parser_str_buf = NULL;	/* Pointer to buffer used in parser_buf_to_string. */

void	(*parser_vlogmsgx)(const char *, va_list) = NULL;

u_int		parser_token_id;	/* Token ID. */
char		*parser_token;		/* Token name or NULL (if there is end of section). */
char		*parser_args;		/* Arguments or NULL (if there are no arguments). */
size_t		parser_args_len;	/* Length of arguments in characters not including '\0'. */
int		parser_nargs;		/* Number of arguments. */

#define PBUF_SIZE	4096		/* How many bytes to read from the file. */
#define CONF_BUF_CHUNK	64		/* Number of bytes for increasing buf in struct conf_buf{}. */

struct parser_pbuf *parser_curpbuf;	/* Current pbuf. */

static SLIST_HEAD(, parser_pbuf) pbuf_stack; /* Stack of pbufs. */

static u_char	*curchar;		/* Pointer to current char read by parser_get_curhcar(). */

/*
 * One symbol (macro variable).
 */
struct psym {
	TAILQ_ENTRY(psym) link;	/* List of symbols. */
	char	*sym;		/* Symbol's name. */
	char	*val;		/* Symbol's value. */
	size_t	val_len;	/* Length of val. */
	int	copy_flag;	/* If non-zero, then copy sym and val. */
};

TAILQ_HEAD(psym_list, psym);	/* Head of list of symbols. */

static struct psym_list	local_sym_list;	/* Local symbols. */
static struct psym_list	global_sym_list;/* Global symbols. */

static int	section_cnt;	/* +1 when '{' and -1 when '}'. */
static u_int	quotes_cnt;	/* Number of '\"' in the argument. */

static int	in_string_flag;	/* Non-zero if we are in a string. */
static int	was_space_flag;	/* Non-zero if there was a white space character in arguments. */
static int	sym_def_flag;	/* Non-zero if we are in symbol definition. */

static void	logmsgx(const char *, ...) ATTR_FORMAT(printf, 1, 2);
static void	syntax_error_logmsgx(const char *, ...) ATTR_FORMAT(printf, 1, 2);
static void	error_logmsgx(const char *, ...) ATTR_FORMAT(printf, 1, 2);

#if DEBUG_PARSER
static void	debuglog(const char *, ...) ATTR_FORMAT(printf, 1, 2);
#endif

/*
 * Some constants for characters.
 */
enum {
	CH_NEWLINE = 0,	/*  0 | '\n'		*/
	CH_SEMICOLON,	/*  1 | ';'		*/
	CH_SPACE,	/*  2 | ' ' or '\t'	*/
	CH_QUOTE,	/*  3 | '\"'		*/
	CH_OPEN_BRACE,	/*  4 | '{'		*/
	CH_CLOSE_BRACE,	/*  5 | '}'		*/
	CH_EQUAL,	/*  6 | '='		*/
	CH_POUND,	/*  7 | '#'		*/
	CH_DOLLAR,	/*  8 | '$'		*/
	CH_TOKPUNCT,	/*  9 | ':' or '_'	*/
	CH_BACKSLASH,	/* 10 | '\\'		*/
	CH_SLASH,	/* 11 | '/'		*/
	CH_ALPHA,	/* 12 | letter		*/
	CH_DIGIT,	/* 13 | digit		*/
	CH_PUNCT,	/* 14 | punctuation	*/
	CH_OTHER	/* 15 | other		*/
};

#define PARSER_AC_T	u_char	/* Type for AC_xxx values. */
#define PARSER_ST_T	u_char	/* Type for ST_xxx and ER_xxx values. */

#if DEBUG_PARSER
static const char *const st_msg[] = { "INI", "TOK", "ARG", "STR", "AEX" };
#endif

/*
 * Parser's states.
 */
enum {
	ST_INI = 0,	/*  0 | Initial state. */
	ST_TOK,		/*  1 | Characters go to the token. */
	ST_ARG,		/*  2 | Characters go to the argument. */
	ST_STR,		/*  3 | Characters go to a string in the argument. */
	ST_AEX		/*  4 | An argument is expected, wait for any non space character. */
};

#define ST_NO 5		/* Number of states. */

static PARSER_ST_T curstate = ST_INI; /* Current state. */

#if DEBUG_PARSER
static const char *const ac_msg[] = { "SKP", "AAR", "ATK", "AQU", "TOK", "PAR", "BOS", "EOS", "PND", "FMT", "ASP", "SLH", "DLR", "ERR" };
#endif

/*
 * Parser's actions.
 */
enum {
	AC_SKP = 0,	/*  0 | Skip a character, continue parsing. */
	AC_AAR,		/*  1 | Add a character to the argument. */
	AC_ATK,		/*  2 | Add a character to the token. */
	AC_AQU,		/*  3 | Add '\"' to the argument. */
	AC_TOK,		/*  4 | Definition of the token is complete. */
	AC_PAR,		/*  5 | Definition of the parameter is complete. */
	AC_BOS,		/*  6 | Begin of the section. */
	AC_EOS,		/*  7 | End of the section. */
	AC_PND,		/*  8 | '#' appeared. */
	AC_FMT,		/*  9 | '\\' appeared in a string. */
	AC_ASP,		/* 10 | Space in the argument. */
	AC_SLH,		/* 11 | '/' appeared. */
	AC_DLR,		/* 12 | '$' appeared. */
	AC_ERR		/* 13 | A syntax error occurred. */
};

#define AC_NO 14	/* Number of actions. */

/*
 * Parser's errors.
 */
enum {
	ER_UXP,		/*  0 | Unexpected character. */
	ER_NSN,		/*  1 | No section name. */
	ER_NLV,		/*  2 | No lvalue (section or parameter name is absent). */
	ER_FMT		/*  3 | Wrong format character after '\\' in a string. */
};

#if DEBUG_PARSER
static const char *const er_msg[] = { "UXP", "NSN", "NLV", "FMT" };
#endif

static const char *const error_msg[] = {
/*  0 */ "unexpected character",
/*  1 */ "no section name",
/*  2 */ "no lvalue (section or parameter name is absent)",
/*  3 */ "wrong format character after '\\' in a string"
};

struct stac {
	PARSER_AC_T action;	/* AC_xxx */
	PARSER_ST_T state;	/* ST_xxx or ER_xxx if action == AC_ERR. */
} ATTR_PACKED;

static const struct stac *curstac;	/* Current entry in stac_tbl. */

static const struct stac stac_tbl[][ST_NO] = {
/*	         ST_INI           ST_TOK           ST_ARG           ST_STR           ST_AEX     */
/*  \n */ { {AC_SKP,ST_INI}, {AC_TOK,ST_AEX}, {AC_ASP,ST_ARG}, {AC_AAR,ST_STR}, {AC_SKP,ST_AEX} },
/*  ;  */ { {AC_ERR,ER_UXP}, {AC_PAR,ST_INI}, {AC_PAR,ST_INI}, {AC_AAR,ST_STR}, {AC_PAR,ST_INI} },
/*  sp */ { {AC_SKP,ST_INI}, {AC_TOK,ST_AEX}, {AC_ASP,ST_ARG}, {AC_AAR,ST_STR}, {AC_SKP,ST_AEX} },
/*  \" */ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AQU,ST_STR}, {AC_AQU,ST_ARG}, {AC_AQU,ST_STR} },
/*  {  */ { {AC_ERR,ER_NSN}, {AC_BOS,ST_INI}, {AC_BOS,ST_INI}, {AC_AAR,ST_STR}, {AC_BOS,ST_INI} },
/*  }  */ { {AC_EOS,ST_INI}, {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_STR}, {AC_ERR,ER_UXP} },
/*  =  */ { {AC_ERR,ER_NLV}, {AC_TOK,ST_ARG}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_SKP,ST_ARG} },
/*  #  */ { {AC_PND,ST_INI}, {AC_PND,ST_ARG}, {AC_PND,ST_ARG}, {AC_AAR,ST_STR}, {AC_PND,ST_AEX} },
/*  $  */ { {AC_DLR,ST_AEX}, {AC_DLR,ST_TOK}, {AC_DLR,ST_ARG}, {AC_DLR,ST_STR}, {AC_DLR,ST_AEX} },
/* : _ */ { {AC_ATK,ST_TOK}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} },
/* \\  */ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_ARG}, {AC_FMT,ST_STR}, {AC_AAR,ST_ARG} },
/*  /  */ { {AC_SLH,ST_INI}, {AC_SLH,ST_TOK}, {AC_SLH,ST_ARG}, {AC_AAR,ST_STR}, {AC_SLH,ST_AEX} },
/*alpha*/ { {AC_ATK,ST_TOK}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} },
/*digit*/ { {AC_ERR,ER_UXP}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} },
/*punct*/ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} },
/*other*/ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_STR}, {AC_ERR,ER_UXP} }
};

static u_char	whatis_char[256];	/* Characters --> classes table. */

/*
 * This generic configuration buffer structure is used for adding
 * characters to token, argument and symbol name.
 */
struct conf_buf {
	char	*buf;		/* Buffer. */
	size_t	size;		/* Its size. */
	size_t	off;		/* Current offset. */
};

static struct conf_buf token_buf;	/* Buffer for a token. */
static struct conf_buf args_buf;	/* Buffer for arguments. */
static struct conf_buf sym_buf;		/* Buffer for a symbol name. */
static struct conf_buf *cur_buf;	/* Pointer to the current buffer. */

/*
 * Return codes from parser_get_curchar() and ac_xxx().
 */
enum {
	AC_RET_CONT = 0,	/*  0 | Continue parsing. */
	AC_RET_LINE,		/*  1 | Return logical line. */
	AC_RET_ERR,		/*  2 | An error occurred. */
	AC_RET_EOF		/*  3 | EOF. */
};

static int	ac_skp(void), ac_aar(void), ac_atk(void), ac_aqu(void);
static int	ac_tok(void), ac_par(void), ac_bos(void), ac_eos(void);
static int	ac_pnd(void), ac_fmt(void), ac_asp(void), ac_slh(void);
static int	ac_dlr(void), ac_err(void);

static int (* const ac_func_tbl[AC_NO])(void) = {
	ac_skp,			/* AC_SKP */
	ac_aar,			/* AC_AAR */
	ac_atk,			/* AC_ATK */
	ac_aqu,			/* AC_AQU */
	ac_tok,			/* AC_TOK */
	ac_par,			/* AC_PAR */
	ac_bos,			/* AC_BOS */
	ac_eos,			/* AC_EOS */
	ac_pnd,			/* AC_PND */
	ac_fmt,			/* AC_FMT */
	ac_asp,			/* AC_ASP */
	ac_slh,			/* AC_SLH */
	ac_dlr,			/* AC_DLR */
	ac_err			/* AC_ERR */
};

/*
 * The wrapper for parser_vlog, its declaration above will check
 * format string and arguments.
 */
static void
logmsgx(const char *format, ...)
{
	va_list ap;

	va_start(ap, format);
	parser_vlogmsgx(format, ap);
	va_end(ap);
}

#if DEBUG_PARSER
/*
 * printf-like function, which outputs debug messages to stderr.
 */
static void
debuglog(const char *format, ...)
{
	va_list ap;

	va_start(ap, format);
	vfprintf(stderr, format, ap);
	va_end(ap);
}
#endif

/*
 * Output error message header.
 */
static void
error_header_log(const char *what)
{
	if (parser_curpbuf != NULL) {
		char curcharrep[5];

		if (isprint(*curchar)) {
			curcharrep[0] = *curchar;
			curcharrep[1] = '\0';
		} else
			switch (*curchar) {
			case '\n':
				strncpy(curcharrep, "\\n", sizeof curcharrep);
				break;
			case '\t':
				strncpy(curcharrep, "\\t", sizeof curcharrep);
				break;
			default:
				snprintf(curcharrep, sizeof curcharrep, "0x%02x", *curchar);
			}
		if (parser_curpbuf->fp != NULL)
			logmsgx("%serror occurred near line %u at character '%s' in %s",
			    what, parser_curpbuf->lineno, curcharrep, parser_curpbuf->fname);
		else
			logmsgx("%serror occurred near line %u at character '%s' in %s (in macro variable ${%s} value)",
			    what, parser_curpbuf->lineno, curcharrep, parser_curpbuf->fname,
			    parser_curpbuf->sname);
	} else
		logmsgx("%serror occurred near last line in main configuration file",
		    what);
}

/*
 * Report about syntax error.
 */
static void
syntax_error_logmsgx(const char *format, ...)
{
	va_list ap;

	error_header_log("syntax ");
	va_start(ap, format);
	parser_vlogmsgx(format, ap);
	va_end(ap);
}

/*
 * Report about syntax error given by the code.
 */
static void
syntax_error(PARSER_ST_T error)
{
	syntax_error_logmsgx(error_msg[error]);
}

/*
 * Report about non-syntax error.
 */
static void
error_logmsgx(const char *format, ...)
{
	va_list ap;

	error_header_log("");
	va_start(ap, format);
	parser_vlogmsgx(format, ap);
	va_end(ap);
}

/*
 * Return a pointer to psym for the symbol with name sym
 * in the given psym_list.
 */
static struct psym *
find_sym(const struct psym_list *list, const char *sym)
{
	struct psym *psym;

	TAILQ_FOREACH(psym, list, link)
		if (strcmp(psym->sym, sym) == 0)
			return psym;
	return NULL;
}

/*
 * Release memory used by the given psym_list.
 */
static void
free_sym_list(struct psym_list *list)
{
	struct psym *psym, *psym_next;

	for (psym = TAILQ_FIRST(list); psym != NULL; psym = psym_next) {
		psym_next = TAILQ_NEXT(psym, link);
		if (psym->copy_flag) {
			mem_free(psym->sym, m_parser);
			mem_free(psym->val, m_parser);
		}
		mem_free(psym, m_parser);
	}
	TAILQ_INIT(list);
}

/*
 * Add a new symbol with name sym and with value val to list, if
 * the list already has this symbol, then replace it with a new value,
 * honoring the copy_flag value.
 */
static int
sym_add(struct psym_list *list, char *sym, char *val, int copy_flag)
{
	int	old_copy_flag;
	struct psym *psym;

	if ( (psym = find_sym(list, sym)) != NULL) {
		/* Replace symbol. */
		old_copy_flag = psym->copy_flag;
		if (old_copy_flag) {
			if (!copy_flag)
				mem_free(psym->sym, m_parser);
			mem_free(psym->val, m_parser);
		}
	} else {
		/* New symbol. */
		old_copy_flag = 0;
		if ( (psym = mem_malloc(sizeof *psym, m_parser)) == NULL) {
			logmsgx("sym_add: mem_malloc failed");
			return -1;
		}
		TAILQ_INSERT_TAIL(list, psym, link);
	}

	if (copy_flag) {
		if (!old_copy_flag)
			if ( (psym->sym = mem_strdup(sym, m_parser)) == NULL) {
				logmsgx("sym_add: mem_strdup failed");
				mem_free(psym, m_parser);
				return -1;
			}
		if ( (psym->val = mem_strdup(val, m_parser)) == NULL) {
			logmsgx("sym_add: mem_strdup failed");
			mem_free(psym->sym, m_parser);
			mem_free(psym, m_parser);
			return -1;
		}
	} else {
		psym->sym = sym;
		psym->val = val;
	}

	psym->val_len = strlen(val);

	psym->copy_flag = copy_flag;

	return 0;
}

/*
 * Remove a symbol sym from the list, return -1 if the list
 * does not have a symbol with the given name.
 */
static int
sym_del(struct psym_list *list, const char *sym)
{
	struct psym *psym;

	if ( (psym = find_sym(list, sym)) != NULL) {
		TAILQ_REMOVE(list, psym, link);
		if (psym->copy_flag) {
			mem_free(psym->sym, m_parser);
			mem_free(psym->val, m_parser);
		}
		mem_free(psym, m_parser);
	} else
		return -1;
	return 0;
}

/*
 * Delete a local symbol.
 */
int
parser_local_sym_del(const char *sym)
{
	return sym_del(&local_sym_list, sym);
}

/*
 * Add a local symbol.
 */
int
parser_local_sym_add(char *sym, char *val, int copy_flag)
{
	return sym_add(&local_sym_list, sym, val, copy_flag);
}

/*
 * Delete a global symbol.
 */
int
parser_global_sym_del(const char *sym)
{
	return sym_del(&global_sym_list, sym);
}

/*
 * Add a global symbol.
 */
int
parser_global_sym_add(char *sym, char *val, int copy)
{
	return sym_add(&global_sym_list, sym, val, copy);
}

/*
 * Create a new pbuf.  If size is zero, then allocate pbuf->buf,
 * else do not allocate pbuf->buf, it should be allocated somewhere
 * outside of this function.
 */
struct parser_pbuf *
parser_new_pbuf(size_t size)
{
	struct parser_pbuf *pbuf;

	if ( (pbuf = mem_malloc(sizeof *pbuf, m_parser)) == NULL) {
		logmsgx("parser_new_pbuf: mem_malloc failed");
		return NULL;
	}
	if (size == 0) {
        	pbuf->buf_size = PBUF_SIZE;
		if ( (pbuf->buf = mem_malloc(PBUF_SIZE, m_parser)) == NULL) {
			logmsgx("parser_new_pbuf: mem_malloc failed");
			mem_free(pbuf, m_parser);
			return NULL;
		}
	} else {
		pbuf->buf_size = size;
		pbuf->buf = NULL;
	}
	pbuf->buf_nread = pbuf->buf_off = 0;
	pbuf->fname = NULL;
	pbuf->sname = NULL;
	pbuf->fp = NULL;
	pbuf->lineno = 1;
	return pbuf;
}

/*
 * Push pbuf to the top of pbuf_stack and set curchar pointer.  If the
 * given pbuf is a macro variable's value, then check for loops.
 */
int
parser_push_pbuf(struct parser_pbuf *pbuf1)
{
	struct parser_pbuf *pbuf2;

#if DEBUG_PARSER
	if (parser_curpbuf != NULL) {
		debuglog("CURPBUF:   ");
		debuglog("file %s", parser_curpbuf->fname);
		if (parser_curpbuf->sname != NULL)
			debuglog(" (sym ${%s} mem <%s>)", parser_curpbuf->sname, parser_curpbuf->buf);
		debuglog(" size %lu, off %lu, nread %lu\n", (u_long)parser_curpbuf->buf_size,
		    (u_long)parser_curpbuf->buf_off, (u_long)parser_curpbuf->buf_nread);
	}
	debuglog("PBUF PUSH: ");
	if (pbuf1->fname != NULL)
		debuglog("file %s", pbuf1->fname);
	else
		debuglog("sym ${%s} mem <%s>", pbuf1->sname, pbuf1->buf);
	debuglog(" size %lu, off %lu, nread %lu\n", (u_long)pbuf1->buf_size,
	    (u_long)pbuf1->buf_off, (u_long)pbuf1->buf_nread);
#endif /* DEBUG_PARSER */

	/* Check for loopbacks in macro variable expanding. */
	if (pbuf1->sname != NULL)
		SLIST_FOREACH(pbuf2, &pbuf_stack, link)
			if (pbuf2->sname != NULL)
				if (strcmp(pbuf2->sname, pbuf1->sname) == 0) {
					error_logmsgx("parser_push_pbuf: loop back is detected while expanding ${%s} macro variable",
					    pbuf1->sname);
					return -1;
				}

	/* Link just created new pbuf. */
	SLIST_INSERT_HEAD(&pbuf_stack, pbuf1, link);

	if (pbuf1->fname == NULL) {
		/* This is needed only for log functions. */
		pbuf1->fname = parser_curpbuf->fname;
		pbuf1->lineno = parser_curpbuf->lineno;
	}

	parser_curpbuf = pbuf1;

	/* ...-1 for parser_get_curchar(). */
	curchar = parser_curpbuf->buf - 1;

	return 0;
}

/*
 * Pop pbuf from the top of pbuf_stack and restore curchar pointer.
 */
void
parser_pop_pbuf(void)
{
	struct parser_pbuf *pbuf;

	/* Unlink pbuf. */
	pbuf = SLIST_FIRST(&pbuf_stack);
	SLIST_REMOVE_HEAD(&pbuf_stack, link);

	/* For macro variables buffer is shared with macro variable's value. */
	if (pbuf->fp != NULL)
		mem_free(pbuf->buf, m_parser);

	/* Release memory used by pbuf structure. */
	mem_free(pbuf, m_parser);

	/* Get new pbuf and set curchar, ...-1 for parser_get_curchar(). */
	if ( (parser_curpbuf = SLIST_FIRST(&pbuf_stack)) != NULL)
		curchar = parser_curpbuf->buf + parser_curpbuf->buf_off - 1;

#if DEBUG_PARSER
	if (parser_curpbuf == NULL)
		debuglog("PBUF POP:  NULL\n");
	else {
		debuglog("PBUF POP:  ");
		debuglog("file %s", parser_curpbuf->fname);
		if (parser_curpbuf->sname != NULL)
			debuglog(" (sym ${%s} mem <%s>)", parser_curpbuf->sname, parser_curpbuf->buf);
		debuglog(" %lu bytes, off %lu, nread %lu\n", (u_long)parser_curpbuf->buf_size,
		    (u_long)parser_curpbuf->buf_off, (u_long)parser_curpbuf->buf_nread);
	}
#endif /* DEBUG_PARSER */
}

/*
 * Init whatis_char table (only once), {token,args,sym}_buf,
 * symbol tables and pbuf_stack.  This function can be called,
 * even if parser_read_string() returned an error.
 */
int
parser_init(void)
{
	static int whatis_char_inited = 0;

	if (parser_vlogmsgx == NULL)
		/* Cannot even log about problem. */
		return -1;

	/*
	 * Init whatis_char only once.  Can't use static table,
	 * because we should honor locale (isxxxx() functions).
	 */
	if (!whatis_char_inited) {
		u_char	*ptr;
		u_int	ch;

		whatis_char_inited = 1;
		for (ch = 0, ptr = whatis_char; ch < sizeof whatis_char; ++ptr, ++ch)
			switch (ch) {
			case '\n':
				*ptr = CH_NEWLINE;
				break;
			case ' ':
			case '\t':
				*ptr = CH_SPACE;
				break;
			case '\"':
				*ptr = CH_QUOTE;
				break;
			case '#':
				*ptr = CH_POUND;
				break;
			case '$':
				*ptr = CH_DOLLAR;
				break;
			case ';':
				*ptr = CH_SEMICOLON;
				break;
			case '=':
				*ptr = CH_EQUAL;
				break;
			case '/':
				*ptr = CH_SLASH;
				break;
			case ':':
			case '_':
				*ptr = CH_TOKPUNCT;
				break;
			case '\\':
				*ptr = CH_BACKSLASH;
				break;
			case '{':
				*ptr = CH_OPEN_BRACE;
				break;
			case '}':
				*ptr = CH_CLOSE_BRACE;
				break;
			default:
				if (isalpha(ch))
					*ptr = CH_ALPHA;
				else if (ispunct(ch))
					*ptr = CH_PUNCT;
				else if (isdigit(ch))
					*ptr = CH_DIGIT;
				else
					*ptr = CH_OTHER;
			}
	}

	/* Init token_buf, args_buf and sym_buf. */
	token_buf.buf = args_buf.buf = sym_buf.buf = NULL;
	token_buf.size = args_buf.size = sym_buf.size = 0;

	/* Init symbol tables. */
	TAILQ_INIT(&global_sym_list);
	TAILQ_INIT(&local_sym_list);

	/* Init pbuf_stack. */
	SLIST_INIT(&pbuf_stack);

	section_cnt = 0;

	in_string_flag = was_space_flag = sym_def_flag = 0;

	return 0;
}

/*
 * Do last check of syntax and release all memory except whatis_char
 * table allocated in parser_init().
 */
int
parser_deinit(void)
{
	/* Last check of syntax. */
	if (section_cnt) {
		syntax_error_logmsgx("section is not closed");
		return -1;
	}

	/* Release internal memory. */
	mem_free(token_buf.buf, m_parser);
	mem_free(args_buf.buf, m_parser);
	mem_free(sym_buf.buf, m_parser);

	/* Deinit global symbol table, local one automatically was deinited. */
	free_sym_list(&global_sym_list);

	return 0;
}

/*
 * Add the given character to the current buffer, if there is not
 * enough space in the buffer, then realloc() it.
 */
static int
parser_add_char(u_char ch)
{
	if (cur_buf->off == cur_buf->size) {
		cur_buf->size += CONF_BUF_CHUNK;
		if ( (cur_buf->buf = mem_realloc(cur_buf->buf, cur_buf->size, m_parser)) == NULL) {
			error_logmsgx("parser_add_char: mem_realloc failed");
			return -1;
		}
	}
	cur_buf->buf[cur_buf->off++] = ch;
	return 0;
}

/*
 * Get next curchar from the configuration stream.
 * Return:
 *  AC_RET_CONT, if curchar was successfully read;
 *  AC_RET_ERR, if an error occurred;
 *  AC_RET_EOF, if EOF occurred.
 */
static int
parser_get_curchar(void)
{
	for (;;) {
		if (parser_curpbuf->buf_off < parser_curpbuf->buf_nread) {
			/* Get curchar from the buffer. */
			curchar++;
			if (*curchar == '\n' && parser_curpbuf->fp != NULL)
				parser_curpbuf->lineno++;
			parser_curpbuf->buf_off++;
			return AC_RET_CONT;
		}
		if (parser_curpbuf->fp != NULL) {
			/* Pbuf content from the file. */
			if ( (parser_curpbuf->buf_nread = fread(parser_curpbuf->buf, sizeof(char), parser_curpbuf->buf_size, parser_curpbuf->fp)) == 0) {
				if (feof(parser_curpbuf->fp) != 0)
					return AC_RET_EOF;
				error_logmsgx("parser_get_curchar: fread(%s): %s", parser_curpbuf->fname, strerror(errno));
				return AC_RET_ERR;
			}
			parser_curpbuf->buf_off = 0;
			curchar = parser_curpbuf->buf - 1;
		} else
			/* This pbuf ended, use previous one. */
			parser_pop_pbuf();
	}
	/* NOTREACHED */
}

/*
 * Put curchar back to the buffer, this function must be called
 * no more than one time after parser_get_curchar(), because it
 * cannot restore pbuf_stack.
 */
static int
parser_put_curchar(void)
{
	if (parser_curpbuf->buf_off == 0) {
		error_logmsgx("internal error: parser_put_curchar: buf_off == 0");
		return -1;
	}

	/* Put curchar to the buffer. */
	if (*curchar == '\n' && parser_curpbuf->fp != NULL)
		parser_curpbuf->lineno--;
	curchar--;
	parser_curpbuf->buf_off--;

	return 0;
}

#if DEBUG_PARSER
/*
 * Output current character, current state and new state and action.
 */
static void
print_curchar_and_stac(void)
{
	switch (*curchar) {
	case '\n':
		debuglog("\\n");
		break;
	case '\t':
		debuglog("\\t");
		break;
	case ' ':
		debuglog("sp");
		break;
	default:
		if (isprint(*curchar))
			debuglog(" %c", *curchar);
		else
			debuglog("%02x", *curchar);
	}
	if (curstac->action != AC_ERR)
		debuglog(" : %s -> %s : %s\n",
		    st_msg[curstate], st_msg[curstac->state], ac_msg[curstac->action]);
	else
		debuglog(" : %s -> %s : %s\n",
		    st_msg[curstate], er_msg[curstac->state], ac_msg[curstac->action]);
}
#endif /* DEBUG_PARSER */

static void
init_read_string(void)
{
	cur_buf = &token_buf;
	parser_args = NULL;
	parser_nargs = 0;
	token_buf.off = args_buf.off = 0;

	was_space_flag = sym_def_flag = 0;
	quotes_cnt = 0;
}

/*
 * Read one logical string from the configuration stream.
 * Macro variables definitions are not returned, instead they
 * are saved in local symbols tables.
 * Return:
 *  1, if one string was successfully read;
 * -1, if syntax or another error occurred;
 *  0, if EOF occurred.
 */
int
parser_read_string(void)
{
	init_read_string();

	for (;;) {
		switch (parser_get_curchar()) {
		case AC_RET_CONT:
			curstac = &stac_tbl[whatis_char[*curchar]][curstate];
#if DEBUG_PARSER
			print_curchar_and_stac();
#endif
			switch (ac_func_tbl[curstac->action]()) {
			case AC_RET_CONT:
				break;
			case AC_RET_LINE:
				return 1;
			case AC_RET_EOF:
				goto end_of_file;
			default: /* AC_RET_ERR */
				return -1;
			}
			break;
		case AC_RET_EOF:
			goto end_of_file;
		default: /* AC_RET_ERR */
			return -1;
		}

	}

end_of_file:
	if (curstate != ST_INI) {
		if (in_string_flag)
			syntax_error_logmsgx("string is not closed with '\\\"' and end of file occurred");
		else
			syntax_error_logmsgx("unexpected end of file");
		return -1;
	}
	return 0;
}

/*
 * Skip current character, continue.
 */
static int
ac_skp(void)
{
	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * Add curchar to the arguments buffer.
 */
static int
ac_aar(void)
{
	if (parser_add_char(*curchar) < 0)
		return AC_RET_ERR;

	if (was_space_flag) {
		was_space_flag = 0;
		parser_nargs++;
	}

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * Add curchar to the token buffer.
 */
static int
ac_atk(void)
{
	if (parser_add_char(*curchar) < 0)
		return AC_RET_ERR;

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * Add curchar '\"' to the arguments buffer, increment number
 * of double quotes and change in_string_flag.
 */
static int
ac_aqu(void)
{
	int error;

	if ( (error = ac_aar()) != AC_RET_CONT)
		return error;

	++quotes_cnt;

	in_string_flag = !in_string_flag;

	return AC_RET_CONT;
}

/*
 * Definition of the token is complete.  Switch current conf_buf
 * to the arguments buffer.
 */
static int
ac_tok(void)
{
	if (parser_add_char('\0') < 0)
		return AC_RET_ERR;

	cur_buf = &args_buf;

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * If configuration buffers are not complete (there is not '\0'
 * character at the end for example), then this function must be called.
 * Set parser_token and parser_args pointers.
 */
static int
complete_conf_bufs(void)
{
	if (cur_buf == &token_buf) {
		if (parser_add_char('\0') < 0)
			return -1;
	} else if (args_buf.off != 0) {
		char *ptr;

		ptr = args_buf.buf + args_buf.off - 1;
		switch (*ptr) {
		case ' ':
		case '\t':
			*ptr = '\0';
			break;
		default:
			if (parser_add_char('\0') < 0)
				return -1;
		}
		parser_args = args_buf.buf;
		parser_args_len = args_buf.off - 1;
		parser_nargs++;
	}
	parser_token = token_buf.buf;
	return 0;
}

/*
 * Definition of the parameter or macro variable is complete.
 */
static int
ac_par(void)
{
	if (complete_conf_bufs() < 0)
		return AC_RET_ERR;

	if (sym_def_flag) {
		/* Definition of a macro variable. */
		char *val;

		if (!parser_arg_is_string()) {
			syntax_error_logmsgx("macro variable's value should be a string");
			return AC_RET_ERR;
		}
		if ( (val = parser_string_strdup(args_buf.buf, m_parser)) == NULL) {
			error_logmsgx("cannot allocate memory for value of ${%s} macro variable", sym_buf.buf);
			return AC_RET_ERR;
		}
		if (section_cnt == 0) {
			if (parser_global_sym_add(sym_buf.buf, val, 1) < 0) {
				error_logmsgx("cannot register a macro variable");
				return -1;
			}
		} else {
			if (parser_local_sym_add(sym_buf.buf, val, 1) < 0) {
				error_logmsgx("cannot register a macro variable");
				return -1;
			}
		}
		mem_free(val, m_parser);
		init_read_string();
		curstate = ST_INI;
		return AC_RET_CONT;
	}

	parser_token = token_buf.buf;
	parser_token_id = TOKEN_ID_PARAMETER;

	curstate = curstac->state;

	return AC_RET_LINE;
}

/*
 * Begin of the section.
 */
static int
ac_bos(void)
{
	if (sym_def_flag) {
		/* Looks like incorrect definition of a macro variable. */
		syntax_error_logmsgx("macro variable's value should be a string or wrong usage or macro variable definition");
		return AC_RET_ERR;
	}

	if (complete_conf_bufs() < 0)
		return AC_RET_ERR;

	++section_cnt;

	parser_token_id = TOKEN_ID_SECTION_BEGIN;

	curstate = curstac->state;

	return AC_RET_LINE;
}

/*
 * End of the section.
 */
static int
ac_eos(void)
{
	if (section_cnt == 0) {
		syntax_error_logmsgx("unmatched closed curly brace");
		return AC_RET_ERR;
	}

	if (--section_cnt == 0)
		free_sym_list(&local_sym_list);

	parser_token_id = TOKEN_ID_SECTION_END;

	curstate = curstac->state;

	return AC_RET_LINE;
}

/*
 * '#' appeared, read current line up to the '\n' or EOF.
 */
static int
ac_pnd(void)
{
	int error;

	for (;;) {
		if ( (error = parser_get_curchar()) != AC_RET_CONT)
			return error;
		if (*curchar == '\n')
			break;
	}

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * '\\' appeared in a string, check if the next character is either
 * '\"', '\\', 't', 'n' or '\n'.  If the next character is '\n', then
 * ignore '\\' and '\n'; if next character is 't' or 'n', then convert
 * this sequence to real character, else add "\\x" character sequence
 * to a string.
 */
static int
ac_fmt(void)
{
	int error;

	if ( (error = parser_get_curchar()) != AC_RET_CONT) {
		if (error == AC_RET_EOF)
			syntax_error(ER_FMT);	
		return error;
	}
	switch (*curchar) {
	case '\"':
	case '\\':
		if (parser_add_char('\\') < 0 ||
		    parser_add_char(*curchar) < 0)
			return AC_RET_ERR;
		break;
	case 'n':
		if (parser_add_char('\n') < 0)
			return AC_RET_ERR;
		break;
	case 't':
		if (parser_add_char('\t') < 0)
			return AC_RET_ERR;
		break;
	case '\n':
		break;
	default:
		syntax_error(ER_FMT);
		return AC_RET_ERR;
	}

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * Space in the argument.
 */
static int
ac_asp(void)
{
	if (!was_space_flag && args_buf.off != 0) {
		if (parser_add_char(' ') < 0)
			return AC_RET_ERR;
		was_space_flag = 1;
	}

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * '/' appeared.  To not make stac_tbl too complex here, there are
 * some tricks in checking if '/' can appear in current state.
 */
static int
ac_slh(void)
{
	int error, was_star;

	if ( (error = parser_get_curchar()) != AC_RET_CONT) {
		if (error == AC_RET_EOF)
			syntax_error(ER_UXP);
		return error;
	}

	if (*curchar != '*')
		switch (curstate) {
		case ST_INI:
		case ST_TOK:
			syntax_error(ER_UXP);
			return AC_RET_ERR;
		default: /* ST_AEX or ST_ARG. */
			if (parser_add_char('/') < 0)
				return AC_RET_ERR;
			curstate = ST_ARG;
			if (parser_put_curchar() < 0)
				return AC_RET_ERR;
		}
	else {
		was_star = 0;
		for (;;) {
			if ( (error = parser_get_curchar()) != AC_RET_CONT) {
				if (error == AC_RET_EOF) {
					syntax_error_logmsgx("C-like comment is not closed");
					return AC_RET_ERR;
				}
				return error;
			}
			if (*curchar == '/') {
				if (was_star)
					break;
			} else
				was_star = *curchar == '*';
		}
	}

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * '$' appeared.  To not make stac_tbl too complex here, we will decide
 * if it is the begin of a macro variable definition or we need to
 * expand a macro variable.
 */
static int
ac_dlr(void)
{
	int	error;
	struct conf_buf *next_buf = cur_buf;

	if (sym_def_flag) {
		if (parser_add_char('$') < 0)
			return AC_RET_ERR;
		return AC_RET_CONT;
	}

	if ( (error = parser_get_curchar()) != AC_RET_CONT) {
		if (error == AC_RET_EOF)
			syntax_error(ER_UXP);
		return error;
	}

	if (*curchar != '{') {
		syntax_error_logmsgx("wrong usage of character '$': character '{' was missed");
		return AC_RET_ERR;
	}

	/* Get name of a macro variable. */
	cur_buf = &sym_buf;
	sym_buf.off = 0;
	for (;;) {
		if ( (error = parser_get_curchar()) != AC_RET_CONT)
			return error;
		switch (whatis_char[*curchar]) {
		case CH_ALPHA:
		case CH_DIGIT:
		case CH_DOLLAR:
			if (parser_add_char(*curchar) < 0)
				return AC_RET_ERR;
			break;
		case CH_CLOSE_BRACE:
			if (parser_add_char('\0') < 0)
				return AC_RET_ERR;
			goto got_sym_name;
		default:
			syntax_error_logmsgx("this character is not allowed in macro variable name");
			return AC_RET_ERR;
		}
	}

got_sym_name:
	switch (sym_buf.off) {
	case 1:
		syntax_error_logmsgx("empty macro variable's name is not allowed");
		return AC_RET_ERR;
	case 2:
		if (sym_buf.buf[0] == '$') {
			/* ${$} */
			if (curstate == ST_INI) {
				syntax_error_logmsgx("macro variable ${$} is reserved, cannot redefine it");
				return AC_RET_ERR;
			}
			/* Expand ${$} right now to allow to insert '$' character. */
			cur_buf = &args_buf;
			if (parser_add_char('$') < 0)
				return AC_RET_ERR;
			break;
		}
		/* FALLTHROUGH */
	default:
		if (curstate == ST_INI) {
			/* Symbol definition. */
			next_buf = &args_buf;
			sym_def_flag = 1;
		} else {
			/* Expand symbol. */
			struct psym *psym;
			struct parser_pbuf *pbuf;

			if ( (psym = find_sym(&local_sym_list, sym_buf.buf)) == NULL &&
			     (psym = find_sym(&global_sym_list, sym_buf.buf)) == NULL) {
				syntax_error_logmsgx("unknown macro variable ${%s}", sym_buf.buf);
				return AC_RET_ERR;
			}
			if ( (pbuf = parser_new_pbuf(psym->val_len)) == NULL) {
				error_logmsgx("cannot expand macro variable ${%s}", sym_buf.buf);
				return AC_RET_ERR;
			}
			pbuf->buf = (u_char *)psym->val;
			pbuf->buf_nread = psym->val_len;
			pbuf->sname = psym->sym;
			if (parser_push_pbuf(pbuf) < 0)
				return AC_RET_ERR;
		}
	}

	cur_buf = next_buf;

	curstate = curstac->state;

	return AC_RET_CONT;
}

/*
 * A syntax error occurred.
 */
static int
ac_err(void)
{
	syntax_error(curstac->state);
	return AC_RET_ERR;
}

/*
 * Return non-zero if the given buffer is a string.
 */
int
parser_buf_is_string(const char *buf)
{
	const char *ptr = buf;

	if (*ptr != '\"')
		return 0;

	for (++ptr; *ptr != '\0'; ++ptr)
		switch (*ptr) {
		case '\\':
			/* "\\x" */
			++ptr;
			break;
		case '\"':
			/* Last '\"'. */
			return *(ptr + 1) == '\0';
		}

	return 0;
}

/*
 * Return non-zero if whole parser_args is a string.
 * This is the optimized version, no linear search.
 */
int
parser_arg_is_string(void)
{
	return parser_nargs == 1 && quotes_cnt == 2 &&
	    parser_args[0] == '\"' && parser_args[parser_args_len - 1] == '\"';
}

/*
 * Allocate buffer and copy string from str converting '\\'-like
 * sequences to real characters (thanks to ac_fmt() there are only
 * two such sequences \" and \\).
 */
char *
parser_string_strdup(char *str, void *mem_type)
{
	const char *ptr;
	char	*res, *mod;
	int	has_special;
	size_t	len;

	len = strlen(str) - 1;
	if ( (res = mem_malloc(len, mem_type)) == NULL) {
		error_logmsgx("parser_string_strdup: mem_malloc failed");
		return NULL;
	}
	has_special = 0;
	for (mod = res, ptr = str + 1, len = 0; *ptr != '\"'; ++len, ++mod, ++ptr)
		if (*ptr == '\\') {
			has_special = 1;
			/* \" or \\. */
			*mod = *++ptr;
		} else
			*mod = *ptr;

	*mod = '\0';
	if (has_special)
		if ( (res = mem_realloc(res, ++len, mem_type)) == NULL)
			error_logmsgx("parser_string_strdup: mem_realloc failed");
	return res;
}

/*
 * Allocate buffer and convert a string pointed by ptr to
 * this buffer expanding all escape sequences.
 */
char *
parser_buf_to_string(const char *ptr)
{
	char *mod;

	/* Can free, because mem_malloc can return NULL. */
	mem_free(parser_str_buf, m_parser);

	/* Allocated buffer will be bigger than really needed. */
	if ( (parser_str_buf = mem_malloc(2 + 1 + strlen(ptr) * 2, m_parser)) == NULL)
		return "(parser_buf_to_string: mem_malloc failed)";

	*parser_str_buf = '\"';

	for (mod = parser_str_buf + 1; *ptr != '\0'; ++mod, ++ptr)
		switch (*ptr) {
		case '\\':
		case '\"':
		case '\t':
		case '\n':
			*mod = '\\';
			++mod;
			/* FALLTHROUGH */
		default:
			*mod = *ptr;
		}

	*mod = '\"';
	*(mod + 1) = '\0';

	return parser_str_buf;
}


syntax highlighted by Code2HTML, v. 0.9.1