#define CLASS_Config_PRIVATE
#include "config.h"

#include <strings.h>
#include <ctype.h>

#define DFLT_NFIELDS	10
#define START_FIELD(S)	\
	(self->nfields < self->maxfields \
	  ? (char*)(self->fields[self->nfields++] = (char*)(S)) \
	  : (char*)config_expand(self, S))

#define BRK_NORM	0x00
#define BRK_EOS		0x02
#define BRK_WS		0x04
#define BRK_PUNCT	0x08
#define BRK_QUOTE	0x10

/* separators separate adjacent fields, and are discarded. */
static char default_separators[]	= " \t\r\n\f";

/* delimiters mark the end of a field, and are retained */
static char default_delimiters[]	= "";

/* quotes prevent both separators and delimiters from splitting
 * a field
 */
static char quote_delimiters[]		= "'\"";

/* Anything not declared as a delimiter or separator is just
 * a normal character.
 */

static Config config_expand();
static unsigned char* config_copy();
static unsigned char* get_quoted_string();

extern char*	malloc();
extern char*	realloc();
extern char*	strdup();

static int dflt_options	= CFG_OPT_UCASE;

Config
config_new()
{
	Config self;
	register char* p;

	self = (Config)calloc (1, sizeof (*self));
	config_setbreak (self, default_separators, default_delimiters);
	self->fields = (char**)calloc (DFLT_NFIELDS, sizeof (char*));
	self->maxfields = DFLT_NFIELDS;
	self->options = dflt_options;

	return self;
}

Config
config_dispose (self)
Config	self;
{
	if (self == 0)
		return 0;

	if (self->fields)
		free (self->fields);
	free (self);
	return 0;
}

#ifndef __STDC__
# define const
#endif
int
config_setbreak (self, separators, delimiters)
Config	self;
const char*	separators;
const char*	delimiters;
#undef const
{
	register unsigned char* p;

	self->separators = (char*)separators;
	self->delimiters = (char*)delimiters;

	memset (self->brk, BRK_NORM, sizeof self->brk);
	for (p = (unsigned char*)self->separators; *p; p++)
		self->brk[*p] = BRK_WS;

	for (p = (unsigned char*)self->delimiters; *p; p++)
		self->brk[*p] = BRK_PUNCT;

	for (p = (unsigned char*)quote_delimiters; *p; p++)
		self->brk[*p] = BRK_QUOTE;

	self->brk['\0'] = BRK_EOS;
}

int
config_nfields (self)
Config	self;
{
	return self->nfields;
}

char**
config_fields (self)
Config	self;
{
	return self->fields;
}

/* Okay, start stashing tokens away */
config_scanbuf (self, buf)
Config	self;
char*	buf;
{
	register unsigned char* fr;
	unsigned char* to;
	int type;
	int len;

	self->buf = buf;
	self->tbuf = strdup(buf);
	self->nfields = 0;

	fr = (unsigned char*)self->tbuf; to = (unsigned char*)self->buf;
	while ((type = self->brk[*fr]) != BRK_EOS) {
		switch (type) {
		 case BRK_QUOTE:
			START_FIELD(to);
			fr = get_quoted_string (*fr, fr+1, &to);
			fr++;	/* skip terminating quote */
			break;

		 case BRK_WS:
			fr++;
			break;

		 case BRK_PUNCT:
			START_FIELD(to);
			*to++ = *fr++;
			*to++ = '\0';
			break;

		 case BRK_NORM:
			START_FIELD(to);
			fr = config_copy (self, fr, &to, ~BRK_NORM);
			break;
		}
	}

	free (self->tbuf);
	return self->nfields;

}

int
config_getoptions (self)
Config	self;
{
	return self->options;
}

int
config_setoptions (self, options)
Config	self;
int	options;
{
	int old = self->options;
	self->options = options;
	return old;
}

/* Private methods */
static Config
config_expand (self, fieldstart)
Config	self;
char*	fieldstart;
{
	self->maxfields += DFLT_NFIELDS;

	/* reallocate to larger size and zero out new storage */
	self->fields = (char**)
		realloc (self->fields, self->maxfields*sizeof(char*));
	memset ((char*)&self->fields[self->nfields], 0,
		(self->maxfields-self->nfields)*sizeof(char*));

	/* store the new field */
	self->fields[self->nfields++] = fieldstart;

	return self;
}

/* Quoting conventions:
 *
 * Following C, quoted strings preserve blanks, prevent
 * case-mapping, and may contain all the "backslash escapes",
 * to wit:
 *
 *	\a	(alert) the ASCII BEL character
 *	\b	ASCII backspace
 *	\e	ASCII escape
 *	\f	ASCII formfeed
 *	\n	ASCII linefeed (logical newline to UNIX folks)
 *	\r	ASCII carriage return
 *	\t	ASCII horizontal tab
 *	\v	ASCII vertical tab
 *	\DDD	(up to 3 octal digits) arbitrary ASCII character whose
 *		value is octal DDD.
 *	\xDD	(exactly 2 hex digits) arbitrary ASCII character whose
 *		value is hex DD.
 *
 * The quote character may be included by preceeding it with a backslash.
 */

#define hex_ord(chr)	(islower(chr) ? ((chr)-'a'+10) \
			 : (isupper(chr) ? ((chr)-'A'+10) : ((chr)-'0')))
#ifndef isodigit
# define isodigit(x)	(isdigit(x) && (x) < '9')
#endif

/* qc:	The quote character which ends the scan (either ' or " )
 * ss:	The source string to scan.
 * rsd:	Reference destination string to write the result.  This will be
 *	null-terminated on return, and the reference parameter updated.
 *
 * returns: the location in <ss> where the scan terminated (i.e., the
 * terminating instances of <qc>).
 */
static unsigned char*
get_quoted_string (qc, ss, rsd)
char qc;
register unsigned char* ss;
char**	rsd;
{
	int v;
	register char* sd = *rsd;

	for ( ; *ss != qc && *ss != '\0'; ss++) {
		if (*ss == '\\') {
			/* The single-character escapes.
			 * Put more here if you want them.
			 * Format of string is pairs: value, character
			 * such that <character> following backslash
			 * is replaced by <value>.
			 */
			register char *esp = "\007a\bb\033e\ff\nn\rr\tt\vv";

			ss++;

			/* \xDD: hex digit escape */
			if (*ss == 'x' && isxdigit (ss[1])) {
				v = 0;
				ss++;

				v = (hex_ord(*ss) << 4) + hex_ord(ss[1]);
				ss++;
				*sd++ = v;
				continue;
			}

			/* \DDD: octal digit escape. Up to 3 octal
			 * digits.
			 */
			else if (isodigit(*ss)) {
				int n = 3;

				for (v = 0; isodigit(*ss) && n > 0; ss++,n--) {
					v = (v << 3) + *ss - '0';
				}
				ss--;
				*sd++ = v;
				continue;
			}

			/* look for a single-character escape */
			while (*esp++ != '\0')
				if (*esp++ == *ss) {
					*sd++ = esp[-2];
					break;
				}

			/* backslash followed by anything else just
			 * yields the following character
			 */
			if (esp[-1] == '\0')
				*sd++ = *ss;
		}
		else
			*sd++ = *ss;
	}
	*sd++ = '\0';
	*rsd = sd;
	return ss;
}

/* Copy characters and map to upper-case */
static unsigned char*
config_copy (self, fr, pto, brk)
Config self;
register unsigned char* fr;
unsigned char** pto;
int brk;
{
	register unsigned char* to = *pto;
	register unsigned char c;

	if (self->options & CFG_OPT_UCASE) {
		while ((self->brk[c = *fr] & brk) == 0) {
			fr++;
			if (islower(c))
				c = toupper(c);
			*to++ = c;
		}
	}
	else if (self->options & CFG_OPT_LCASE) {
		while ((self->brk[c = *fr] & brk) == 0) {
			fr++;
			if (isupper(c))
				c = tolower(c);
			*to++ = c;
		}
	}
	else {
		while ((self->brk[c = *fr] & brk) == 0) {
			fr++;
			*to++ = c;
		}
	}

	*to++ = '\0';
	*pto = to;
	return fr;
}

#ifdef TEST
#include <stdio.h>
main (argc, argv)
int	argc;
char**	argv;
{
	FILE* stream;
	char buf[256];
	char buf2[256];
	Config conf;
	int nfields;
	char** ret;
	int line = 0;
	int i;

	if (argc == 1)
		stream = stdin;
	else
		stream = fopen (argv[1], "r");


	conf = config_new();
	config_setbreak (conf, " \t\r\n\f", "=+,");

	if (stream == 0) {
		perror (argv[1]);
		exit (1);
	}

	while (fgets (buf, sizeof buf, stream)) {
		line += 1;
		nfields = config_scanbuf (conf, buf);
		printf ("line %2d: nfields = %d\n", line, nfields);
		if (nfields == 0)
			continue;

		ret = config_fields(conf);
		for (i = 0; i < nfields; i++) {
			ctrltoascii (ret[i], buf2);
			printf ("\tfield[%02d] = %d:'%s'\n", i,
				strlen(ret[i]), buf2);
		}
	}
}

static int
ctrltoascii (buf1, buf2)
char*	buf1, *buf2;
{

	for ( ; *buf1 != '\0'; buf1++) {
		if (iscntrl(*buf1)) {
			*buf2++ = '^';
			*buf2++ = (*buf1)+'@';
		}
		else
			*buf2++ = *buf1;
	}
	*buf2++ = '\0';
}

#endif


syntax highlighted by Code2HTML, v. 0.9.1