/* ======================================================= *
 * Copyright 1998-2005 Stephen C. Grubb                    *
 * http://ploticus.sourceforge.net                         *
 * Covered by GPL; see the file ./Copyright for details.   *
 * ======================================================= */

/* Parse data into fields by assigning char * pointers to beginning
   of each field, and inserting nulls into the data buffer. 
*/
#include "pl.h"
/* #define Eerr(a,b,c)  TDH_err(a,b,c) */

#define FILLCHAR 3
#define SPACECHAR 4
#define SPACE 's'
#define WHITESPACE 'w'
#define TAB 't'
#define COMMA 'c'


/* Notes:
   spacequoted allows "" to represent null field, and "John Adams" as a field
   with comma delimitation (.csv), embedded (") are represented as ("").  This routine
	converts these to ''
   nr parameter may be passed as > 0 to dictate number of fields per record
 */

/* ============================================ */
int
PL_parsedata( data, delimmethod, comsym, field, maxd, nr, nf, nd )
unsigned char *data; 	/* for LOCALE scg 3/15/00 */
/* char *data;  	// for LOCALE scg 3/15/00 //   this avoids gcc4 warning */
char *delimmethod;	/* one of: space (whitespace w/ quotes), whitespace (no quotes), tab, comma */
char *comsym;		/* user symbol signifying beginning of comment */
char *field[]; 		/* array of pointers to fields */
int maxd; 		/* max # of elements in above array */
int *nr; 		/* number of newline-delimited records - returned */
int *nf; 		/* number of fields per record - returned - but if this is passed as > 0, 
						it also dictates number of fields per record */
int *nd; 		/* total number of fields */
{

int i, j, ip, state, start, quotes, qon, firstline, reqnf, nfields, nrows, cslen, nt, lastbreak;
char delim, sepchar, tok[255];
int datalen; /* added scg 9/30/03 */


*nr = 0;
ip = 0;
if( *nf > 0 ) reqnf = *nf;
else reqnf = 0;

delim = tolower( delimmethod[0] );
if( ! GL_member( delim, "tcw")) delim = SPACE;

quotes = 0;
if( delim == SPACE ) { sepchar = ' '; quotes = 1; }
else if( delim == WHITESPACE ) { sepchar = ' '; delim = SPACE; }
else if( delim == TAB ) sepchar = '\t';
else if( delim == COMMA ) { sepchar = ','; quotes = 1; }


cslen = strlen( comsym );

datalen = strlen( data ); /* scg 9/30/03 */

/* do quote conversion if necessary .. */
if( quotes ) for( i = 0, qon = 0; i < datalen; i++ ) {
	if( data[i] == '\n' ) qon = 0; /* BOL - clean slate */
	if( data[i] == '"' ) {
		if( !qon ) qon = 1; 
		else qon = 0; 
		if( i > 0 && data[i-1] == FILLCHAR ) { 
			if( delim == SPACE ) data[i] = SPACECHAR;	  /* "" in whitespace is null field */
			else { data[i-1] = '\''; data[i] = '\''; }        /* convert "" -> '' */
			}
		else data[i] = FILLCHAR;
		}
	else if( qon ) {    /* mask separator characters found within a quoted string */
		if( delim == SPACE && isspace( data[i] ) ) data[i] = SPACECHAR;
		else if( delim == COMMA && data[i] == ',' ) data[i] = SPACECHAR;
		}
	}


/* now go thru data buffer one line at a time..  */
firstline = 1;  nrows = 0; lastbreak = -1;

/* condition of datalen+1 needed because the 'data' null terminator must be processed - scg 9/30/03 */
for( i = 0, start = 0; i < (datalen+1); i++ ) {

	if( data[i] == '\n' || data[i] == '\0' ) {

		if( i - lastbreak <= 1 ) break; /* don't do anything where we have newline followed immed. by null .. */
		lastbreak = i;


		/* process a line.. current line is from data[start] to null terminator.. */

		data[i] = '\0';

		/* skip blank and commented lines.. */
		if( delim == SPACE ) {
			nt = sscanf( &data[start], "%s", tok );
			if( nt < 1 || strncmp( tok, comsym, cslen )==0 ) { start = i+1; continue; }
			}
		else if( strncmp( &data[start], comsym, cslen )==0 ) { start = i+1; continue; }

		nrows++;
		state = 0;
		nfields = 0;

		if( delim == SPACE ) for( j = start; data[j] != '\0'; j++ ) {
			if( state == 0 ) {
				if( isspace( data[j] ) || data[j] == FILLCHAR ) continue; /* eat leading space */
				field[ip++] = &data[j];		   /* set pointer to field */
				nfields++;
				state = 1; 			   /* 1 = get field */
				}
			else if( state == 1 && ( isspace( data[j] ) || data[j] == FILLCHAR ) ) { /* terminate item */
				data[j] = '\0';
				if( reqnf > 0 && nfields >= reqnf ) break;
				state = 0;
				}
			else if( data[j] == SPACECHAR ) data[j] = sepchar;
			}

		else if( delim == TAB || delim == COMMA ) for( j = start; data[j] != '\0'; j++ ) {
			if( state == 0 ) {
				if( data[j] == FILLCHAR ) continue;
				field[ip++] = &data[j];
				nfields++;
				state = 1;
				}
			if( data[j] == sepchar ) { /* terminate item */
				data[j] = '\0';
				if( reqnf > 0 && nfields >= reqnf ) break;
				state = 0;
				}
			if( data[j] == FILLCHAR ) data[j] = '\0';
			if( state == 1 && data[j] == SPACECHAR ) data[j] = sepchar;
			}
		

		if( firstline && reqnf == 0 ) reqnf = nfields;
		else if( nfields != reqnf ) {
			if( nfields == 0 ) nrows--;  /* empty line -- reject */
			else for( j = nfields; j < reqnf; j++ ) field[ip++] = "";  /* fill in "" fields.. */
			}
		firstline = 0;

		/* finish up for current line.. */
		start = i+1;
		}
	}

*nf = reqnf;
*nr = nrows;
*nd = ip;

return( 0 );
}

/* ======================================================= *
 * Copyright 1998-2005 Stephen C. Grubb                    *
 * http://ploticus.sourceforge.net                         *
 * Covered by GPL; see the file ./Copyright for details.   *
 * ======================================================= */


syntax highlighted by Code2HTML, v. 0.9.1