// ParserJuly2004.cc
//
// Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Matthew Flood
// See file AUTHORS for contact information
//
// This file is part of RudeConfig.
//
// RudeConfig is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2, or (at your option)
// any later version.
// 
// RudeConfig is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with RudeConfig; (see COPYING) if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
// 02111-1307, USA.
//------------------------------------------------------------------------

#include "ParserJuly2004.h"

#ifndef INCLUDED_AbstractOrganiser_h
#include "AbstractOrganiser.h"
#endif

#ifndef INCLUDED_CCTYPE
#include <cctype>
#define INCLUDED_CCTYPE
#endif

#ifndef INCLUDED_IOSTREAM
#include <iostream>
#define INCLUDED_IOSTREAM
#endif

using namespace rude::config;
using namespace std;

namespace rude{
namespace config{

enum SectionState{ STARTSECTION, SECTIONID, ESCAPEID, ENDSECTIONID, SECTIONCOMMENT, FOUNDIDONLY, FOUNDIDCOMMENT, SECTIONERROR, ENDSECTION };
enum KeyValueState{ KEY, KEYESCAPE, STARTVALUE, COMMENT, FINDCOMMENT, KVERROR, ENDKV, VALUE, QUOTEVALUE, NONQUOTEVALUE, QUOTEESCAPE, NONQUOTEESCAPE, ENDKEYVALUE};

void ParserJuly2004::stripTrailing(std::string& buffer)
{
	int bufferLength = buffer.size();

	for (int x = bufferLength - 1; x >= 0; x--)
	{
		char c = buffer[x];

		if (isspace(c))
		{
			buffer.erase(x);
		}
		else
		{
			break;
		}
	}
}


bool ParserJuly2004::isEOL(char c)
{
	return (c == '\r' || c == '\f' || c == '\n');
}

bool ParserJuly2004::chompEOL(std::istream& inputstream)
{
		char c = inputstream.peek();
		if(isEOL(c))
		{
			inputstream.get();
			char next_c = inputstream.peek();
			if( (c != next_c) && isEOL(next_c) )
			{
				inputstream.get();
			}
		}
		return true;
}

bool ParserJuly2004::parse(std::istream& infile, AbstractOrganiser& organiser)
{
	if (d_delimiter == '\\' || isEOL(d_delimiter) || d_delimiter == d_commentchar || d_delimiter == '[')
	{
		setError("110", "Illegal delimiter.");
		return false;
	}

	if (d_commentchar == '\\' || d_commentchar == '"' || isspace(d_commentchar))
	{
		setError("111", "Illegal comment character.");
		return false;
	}
	
	register char c;
		
	// eof only gets set when error_flag is set on previous operation
	// as such, you need to peek() at the end ot the while loop
	// in order for eof to happen when you want it to!!
	// one peek() triggers infile.eof(), but it does not reveal it!!!!
	// you gotta call peek() twice.
	//
	while( (c = infile.peek()) != EOF)
	{
		// We're looking for:
		// '[' beggining of section
		// '#' (d_commentchar) comment character
		// any non-whitespace character

		if(isspace(c))
		{
			std::string whitespace = "";

			while(c != EOF && isspace(c))
			{
				whitespace += infile.get();
				c = infile.peek();
			}
			organiser.foundWhiteSpace(whitespace.c_str());
		}
		else if(c == '[')
		{
			// discard '[' character
			//
			infile.get();
				
			register SectionState sectionState = STARTSECTION;

			std::string sectionID = "";
			std::string comment = "";
				
			while (sectionState != ENDSECTION)
			{						
				switch (sectionState)
				{
					case STARTSECTION:
					{
							c = infile.peek();
							if(c == EOF)
							{
								setError("102", "End of stream found before section ID");
								sectionState = SECTIONERROR;
							}
							else if(isEOL(c))
							{
								setError("101", "End of line found before section ID");
								sectionState = SECTIONERROR;								
							}
							else if(c == ' ' || c == '\t')
							{
								// discard whitespace
								//
								infile.get();
									
								// LOOP
							}
							else if(c == ']')
							{
								// discard ']'
								//
								infile.get();
								
								sectionState = ENDSECTIONID;
							}
							else
							{
								sectionState = SECTIONID;
							}
							break;
					}
					case SECTIONID:
					{
							c = infile.peek();
							if(c == EOF)
							{
								setError("104", "End of stream found before end-of-section marker");
								sectionState = SECTIONERROR;
							}
							else if(isEOL(c))
							{
								setError("103", "End of line found before end-of-section marker");
								sectionState = SECTIONERROR;								
							}
							else if(c == '\\')
							{
								// discard backslash
								//
								infile.get();

								sectionState = ESCAPEID;
							}
							else if(c == ']')
							{
								// discard ']'
								//
								infile.get();

								// Strip Trailing Whitespace from ID
								//
								stripTrailing(sectionID);
									
								sectionState = ENDSECTIONID;
							}
							else
							{
								sectionID += infile.get();

								// LOOP
							}
							break;
					}
					case ESCAPEID:
					{
							c = infile.peek();
							if(c == EOF)
							{
								setError("107", "End of stream found after un-escaped backslash");
								sectionState = SECTIONERROR;
							}
							else if(isEOL(c))
							{
								setError("108", "Escaped new-line is not allowed in section ID or key");
								sectionState = SECTIONERROR;
							}
							else
							{
								sectionID += infile.get();
								sectionState = SECTIONID;
							}
							break;
					}
					case ENDSECTIONID:
					{
							c = infile.peek();
							if(c == EOF || isEOL(c))
							{
								sectionState = FOUNDIDONLY;				
							}
							else if(d_commentchar != 0 && c == d_commentchar)
							{
								// discard '#'
								//
								infile.get();
									
								sectionState = SECTIONCOMMENT;								
							}
							else if(c == ' ' || c == '\t')
							{
								// discard whitespace
								//
								infile.get();
									
								// LOOP
							}
							else
							{
								setError("105", "Illegal character found after end-of-section marker");
								sectionState = SECTIONERROR;
							}
							break;
					}
					case SECTIONCOMMENT:
					{
							c = infile.peek();
							if(c == EOF || isEOL(c))
							{
								sectionState = FOUNDIDCOMMENT;
								stripTrailing(comment);
							}
							else
							{
								// append to comment
								//
								comment += infile.get();
									
								// LOOP			
							}
							break;
					}
					case SECTIONERROR:
					{
						return false;
					}
					case FOUNDIDONLY:
					{
						organiser.foundSection(sectionID.c_str(), 0);
						chompEOL(infile);
						sectionState = ENDSECTION;
						break;
					}
					case FOUNDIDCOMMENT:
					{
						organiser.foundSection(sectionID.c_str(), comment.c_str());
						chompEOL(infile);
						sectionState = ENDSECTION;
						break;
					}
				}
			}
		}
		else if(c == d_commentchar)
		{
			// found a comment line
			// discard the comment character
			//
			infile.get();
				
			// put the rest of the line into a string
			//
			std::string line="";
			while(infile.good())
			{
				c=infile.get();
				if( isEOL(c))
				{
					break;
				}
				line += c;
			}

			chompEOL(infile);
			
			// PROCESS THE COMMENT LINE
			//
			stripTrailing(line);
			organiser.foundComment(line.c_str());
		}
		else
		{
			register KeyValueState kvState = KEY;
			std::string key = "";
			std::string value = "";
			std::string comment = "";

			while (kvState != ENDKEYVALUE)
			{						
				
				switch (kvState)
				{
					case KEY:
					{
							char c = infile.peek();
							
							if(c == EOF || isEOL(c))
							{
								kvState = ENDKV;
							}
							else if(d_delimiter && c == d_delimiter)
							{
								// discard '='
								//
								infile.get();

								kvState = STARTVALUE;
							}
							else if(!d_delimiter && (c == ' ' || c == '\t'))
							{
								// discard whitespace
								//
								infile.get();
								kvState = STARTVALUE;
							}
							else if(d_commentchar != 0 && c == d_commentchar)
							{
								// discard '#'
								//
								infile.get();

								kvState = COMMENT;
							}
							else if(c == '\\')
							{
								// discard '\\'
								//
								infile.get();

								kvState = KEYESCAPE;
							}
							else
							{
								// append to key
								//
								key += infile.get();
									
								// LOOP			
							}
							break;
					}
					case KEYESCAPE:
					{
							char c = infile.peek();
							if(c == EOF)
							{
								setError("107", "End of stream found after un-escaped backslash");
								kvState = KVERROR;
							}
							else if(isEOL(c))
							{
								setError("108", "Escaped new-line is not allowed in key");
								kvState = KVERROR;
							}
							else
							{
								// append to key
								//
								key += infile.get();

								kvState = KEY;
							}
							break;
					}
					case STARTVALUE:
					{
							char c = infile.peek();

							if(c == EOF || isEOL(c))
							{
								kvState = ENDKV;
							}
							else if(d_commentchar != 0 && c == d_commentchar)
							{
								// discard '#'
								//
								infile.get();

								kvState = COMMENT;
							}
							else if(c == ' ' || c == '\t')
							{
								// discard whitespace
								//
								infile.get();

								// LOOP
							}
							else
							{
								kvState = VALUE;	
							}
							break;
					}
					case KVERROR:
					{
							return false;
					}
					case FINDCOMMENT:
					{
							char c = infile.peek();

							if(c == EOF || isEOL(c))
							{
								kvState = ENDKV;
							}
							else if(d_commentchar != 0 && c == d_commentchar)
							{
								// discard '#'
								//
								infile.get();

								kvState = COMMENT;
							}
							else if(c == ' ' || c == '\t')
							{
								// discard whitespace
								//
								infile.get();

								// LOOP
							}
							else
							{
								setError("109", "Illegal Character Found after quoted value.");
								kvState = KVERROR;	
							}
							break;
					}
					case COMMENT:
					{
							char c = infile.peek();

							if(c == EOF || isEOL(c))
							{
								stripTrailing(comment);
								kvState = ENDKV;
							}
							else
							{
								// Append to comment
								//
								comment += infile.get();

								// LOOP	
							}
							break;
					}
					case VALUE:
					{
						char c = infile.peek();
						if(c == '"')
						{
							// discard '"'
							//
							infile.get();

							kvState = QUOTEVALUE;
						}
						else
						{
							kvState = NONQUOTEVALUE;
						}
						break;
					}
					case QUOTEVALUE:
					{
							char c = infile.peek();
							if(c == EOF)
							{
								setError("106", "End of stream found before final quote (\") in value");
								kvState = KVERROR;
							}
							else if(c == '"')
							{
								// discard '"'
								//
								infile.get();

								kvState = FINDCOMMENT;
							}
							else if(c == '\\')
							{
								// discard backslash
								//
								infile.get();

								kvState = QUOTEESCAPE;
							}
							else
							{
								// append to value
								//
								value += infile.get();

								// LOOP
							}
							break;
					}
					case QUOTEESCAPE:
					{
							char c = infile.peek();
							if(c == EOF)
							{
								setError("107", "End of stream found after un-escaped backslash");
								kvState = KVERROR;
							}
							else
							{
								// append to value
								//
								value += infile.get();

								kvState = QUOTEVALUE;
							}
							break;
					}
					case NONQUOTEVALUE:
					{
							char c = infile.peek();

							if(c == EOF || isEOL(c))
							{
								stripTrailing(value);

								kvState = ENDKV;
							}
							else if(d_commentchar != 0 && c == d_commentchar)
							{
								// discard '#'
								//
								infile.get();
								
								stripTrailing(value);
								
								kvState = COMMENT;
							}
							else if(c == '\\')
							{
								// discard backslash
								//
								infile.get();

								kvState = NONQUOTEESCAPE;
							}
							else
							{
								// append to value
								//
								value += infile.get();

								// LOOP
							}
							break;
					}
					case NONQUOTEESCAPE:
					{
							char c = infile.peek();
							if(c == EOF)
							{
								setError("107", "End of stream found after un-escaped backslash");
								kvState = KVERROR;
							}
							else
							{
								// append to value
								//
								c = infile.get();
								value += c;

								// SPECIAL CASE FOR ESCAPED CRLFs:
								//
								// if c is newline and next character is also newline, 
								// we keep both of them if they are different forms of newline
								// 
								char next_c = infile.peek();
								if(isEOL(c) && isEOL(next_c) && (c != next_c))
								{
									value += infile.get();
								} 
								kvState = NONQUOTEVALUE;
							}
							break;
					}				
					case ENDKV:
					{
							chompEOL(infile);

							stripTrailing(key);
							organiser.foundData(key.c_str(), value.c_str(), comment.c_str());

							kvState = ENDKEYVALUE;
							break;
					}
				} // end switch
			} // end while
		}
	}
	return true;
}

}} // end namespaces