// ParserJuly2004.cc
//
// Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Matthew Flood
// See file AUTHORS for contact information
//
// This file is part of RudeConfig.
//
// RudeConfig is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2, or (at your option)
// any later version.
//
// RudeConfig is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with RudeConfig; (see COPYING) if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
// 02111-1307, USA.
//------------------------------------------------------------------------
#include "ParserJuly2004.h"
#ifndef INCLUDED_AbstractOrganiser_h
#include "AbstractOrganiser.h"
#endif
#ifndef INCLUDED_CCTYPE
#include <cctype>
#define INCLUDED_CCTYPE
#endif
#ifndef INCLUDED_IOSTREAM
#include <iostream>
#define INCLUDED_IOSTREAM
#endif
using namespace rude::config;
using namespace std;
namespace rude{
namespace config{
enum SectionState{ STARTSECTION, SECTIONID, ESCAPEID, ENDSECTIONID, SECTIONCOMMENT, FOUNDIDONLY, FOUNDIDCOMMENT, SECTIONERROR, ENDSECTION };
enum KeyValueState{ KEY, KEYESCAPE, STARTVALUE, COMMENT, FINDCOMMENT, KVERROR, ENDKV, VALUE, QUOTEVALUE, NONQUOTEVALUE, QUOTEESCAPE, NONQUOTEESCAPE, ENDKEYVALUE};
void ParserJuly2004::stripTrailing(std::string& buffer)
{
int bufferLength = buffer.size();
for (int x = bufferLength - 1; x >= 0; x--)
{
char c = buffer[x];
if (isspace(c))
{
buffer.erase(x);
}
else
{
break;
}
}
}
bool ParserJuly2004::isEOL(char c)
{
return (c == '\r' || c == '\f' || c == '\n');
}
bool ParserJuly2004::chompEOL(std::istream& inputstream)
{
char c = inputstream.peek();
if(isEOL(c))
{
inputstream.get();
char next_c = inputstream.peek();
if( (c != next_c) && isEOL(next_c) )
{
inputstream.get();
}
}
return true;
}
bool ParserJuly2004::parse(std::istream& infile, AbstractOrganiser& organiser)
{
if (d_delimiter == '\\' || isEOL(d_delimiter) || d_delimiter == d_commentchar || d_delimiter == '[')
{
setError("110", "Illegal delimiter.");
return false;
}
if (d_commentchar == '\\' || d_commentchar == '"' || isspace(d_commentchar))
{
setError("111", "Illegal comment character.");
return false;
}
register char c;
// eof only gets set when error_flag is set on previous operation
// as such, you need to peek() at the end ot the while loop
// in order for eof to happen when you want it to!!
// one peek() triggers infile.eof(), but it does not reveal it!!!!
// you gotta call peek() twice.
//
while( (c = infile.peek()) != EOF)
{
// We're looking for:
// '[' beggining of section
// '#' (d_commentchar) comment character
// any non-whitespace character
if(isspace(c))
{
std::string whitespace = "";
while(c != EOF && isspace(c))
{
whitespace += infile.get();
c = infile.peek();
}
organiser.foundWhiteSpace(whitespace.c_str());
}
else if(c == '[')
{
// discard '[' character
//
infile.get();
register SectionState sectionState = STARTSECTION;
std::string sectionID = "";
std::string comment = "";
while (sectionState != ENDSECTION)
{
switch (sectionState)
{
case STARTSECTION:
{
c = infile.peek();
if(c == EOF)
{
setError("102", "End of stream found before section ID");
sectionState = SECTIONERROR;
}
else if(isEOL(c))
{
setError("101", "End of line found before section ID");
sectionState = SECTIONERROR;
}
else if(c == ' ' || c == '\t')
{
// discard whitespace
//
infile.get();
// LOOP
}
else if(c == ']')
{
// discard ']'
//
infile.get();
sectionState = ENDSECTIONID;
}
else
{
sectionState = SECTIONID;
}
break;
}
case SECTIONID:
{
c = infile.peek();
if(c == EOF)
{
setError("104", "End of stream found before end-of-section marker");
sectionState = SECTIONERROR;
}
else if(isEOL(c))
{
setError("103", "End of line found before end-of-section marker");
sectionState = SECTIONERROR;
}
else if(c == '\\')
{
// discard backslash
//
infile.get();
sectionState = ESCAPEID;
}
else if(c == ']')
{
// discard ']'
//
infile.get();
// Strip Trailing Whitespace from ID
//
stripTrailing(sectionID);
sectionState = ENDSECTIONID;
}
else
{
sectionID += infile.get();
// LOOP
}
break;
}
case ESCAPEID:
{
c = infile.peek();
if(c == EOF)
{
setError("107", "End of stream found after un-escaped backslash");
sectionState = SECTIONERROR;
}
else if(isEOL(c))
{
setError("108", "Escaped new-line is not allowed in section ID or key");
sectionState = SECTIONERROR;
}
else
{
sectionID += infile.get();
sectionState = SECTIONID;
}
break;
}
case ENDSECTIONID:
{
c = infile.peek();
if(c == EOF || isEOL(c))
{
sectionState = FOUNDIDONLY;
}
else if(d_commentchar != 0 && c == d_commentchar)
{
// discard '#'
//
infile.get();
sectionState = SECTIONCOMMENT;
}
else if(c == ' ' || c == '\t')
{
// discard whitespace
//
infile.get();
// LOOP
}
else
{
setError("105", "Illegal character found after end-of-section marker");
sectionState = SECTIONERROR;
}
break;
}
case SECTIONCOMMENT:
{
c = infile.peek();
if(c == EOF || isEOL(c))
{
sectionState = FOUNDIDCOMMENT;
stripTrailing(comment);
}
else
{
// append to comment
//
comment += infile.get();
// LOOP
}
break;
}
case SECTIONERROR:
{
return false;
}
case FOUNDIDONLY:
{
organiser.foundSection(sectionID.c_str(), 0);
chompEOL(infile);
sectionState = ENDSECTION;
break;
}
case FOUNDIDCOMMENT:
{
organiser.foundSection(sectionID.c_str(), comment.c_str());
chompEOL(infile);
sectionState = ENDSECTION;
break;
}
}
}
}
else if(c == d_commentchar)
{
// found a comment line
// discard the comment character
//
infile.get();
// put the rest of the line into a string
//
std::string line="";
while(infile.good())
{
c=infile.get();
if( isEOL(c))
{
break;
}
line += c;
}
chompEOL(infile);
// PROCESS THE COMMENT LINE
//
stripTrailing(line);
organiser.foundComment(line.c_str());
}
else
{
register KeyValueState kvState = KEY;
std::string key = "";
std::string value = "";
std::string comment = "";
while (kvState != ENDKEYVALUE)
{
switch (kvState)
{
case KEY:
{
char c = infile.peek();
if(c == EOF || isEOL(c))
{
kvState = ENDKV;
}
else if(d_delimiter && c == d_delimiter)
{
// discard '='
//
infile.get();
kvState = STARTVALUE;
}
else if(!d_delimiter && (c == ' ' || c == '\t'))
{
// discard whitespace
//
infile.get();
kvState = STARTVALUE;
}
else if(d_commentchar != 0 && c == d_commentchar)
{
// discard '#'
//
infile.get();
kvState = COMMENT;
}
else if(c == '\\')
{
// discard '\\'
//
infile.get();
kvState = KEYESCAPE;
}
else
{
// append to key
//
key += infile.get();
// LOOP
}
break;
}
case KEYESCAPE:
{
char c = infile.peek();
if(c == EOF)
{
setError("107", "End of stream found after un-escaped backslash");
kvState = KVERROR;
}
else if(isEOL(c))
{
setError("108", "Escaped new-line is not allowed in key");
kvState = KVERROR;
}
else
{
// append to key
//
key += infile.get();
kvState = KEY;
}
break;
}
case STARTVALUE:
{
char c = infile.peek();
if(c == EOF || isEOL(c))
{
kvState = ENDKV;
}
else if(d_commentchar != 0 && c == d_commentchar)
{
// discard '#'
//
infile.get();
kvState = COMMENT;
}
else if(c == ' ' || c == '\t')
{
// discard whitespace
//
infile.get();
// LOOP
}
else
{
kvState = VALUE;
}
break;
}
case KVERROR:
{
return false;
}
case FINDCOMMENT:
{
char c = infile.peek();
if(c == EOF || isEOL(c))
{
kvState = ENDKV;
}
else if(d_commentchar != 0 && c == d_commentchar)
{
// discard '#'
//
infile.get();
kvState = COMMENT;
}
else if(c == ' ' || c == '\t')
{
// discard whitespace
//
infile.get();
// LOOP
}
else
{
setError("109", "Illegal Character Found after quoted value.");
kvState = KVERROR;
}
break;
}
case COMMENT:
{
char c = infile.peek();
if(c == EOF || isEOL(c))
{
stripTrailing(comment);
kvState = ENDKV;
}
else
{
// Append to comment
//
comment += infile.get();
// LOOP
}
break;
}
case VALUE:
{
char c = infile.peek();
if(c == '"')
{
// discard '"'
//
infile.get();
kvState = QUOTEVALUE;
}
else
{
kvState = NONQUOTEVALUE;
}
break;
}
case QUOTEVALUE:
{
char c = infile.peek();
if(c == EOF)
{
setError("106", "End of stream found before final quote (\") in value");
kvState = KVERROR;
}
else if(c == '"')
{
// discard '"'
//
infile.get();
kvState = FINDCOMMENT;
}
else if(c == '\\')
{
// discard backslash
//
infile.get();
kvState = QUOTEESCAPE;
}
else
{
// append to value
//
value += infile.get();
// LOOP
}
break;
}
case QUOTEESCAPE:
{
char c = infile.peek();
if(c == EOF)
{
setError("107", "End of stream found after un-escaped backslash");
kvState = KVERROR;
}
else
{
// append to value
//
value += infile.get();
kvState = QUOTEVALUE;
}
break;
}
case NONQUOTEVALUE:
{
char c = infile.peek();
if(c == EOF || isEOL(c))
{
stripTrailing(value);
kvState = ENDKV;
}
else if(d_commentchar != 0 && c == d_commentchar)
{
// discard '#'
//
infile.get();
stripTrailing(value);
kvState = COMMENT;
}
else if(c == '\\')
{
// discard backslash
//
infile.get();
kvState = NONQUOTEESCAPE;
}
else
{
// append to value
//
value += infile.get();
// LOOP
}
break;
}
case NONQUOTEESCAPE:
{
char c = infile.peek();
if(c == EOF)
{
setError("107", "End of stream found after un-escaped backslash");
kvState = KVERROR;
}
else
{
// append to value
//
c = infile.get();
value += c;
// SPECIAL CASE FOR ESCAPED CRLFs:
//
// if c is newline and next character is also newline,
// we keep both of them if they are different forms of newline
//
char next_c = infile.peek();
if(isEOL(c) && isEOL(next_c) && (c != next_c))
{
value += infile.get();
}
kvState = NONQUOTEVALUE;
}
break;
}
case ENDKV:
{
chompEOL(infile);
stripTrailing(key);
organiser.foundData(key.c_str(), value.c_str(), comment.c_str());
kvState = ENDKEYVALUE;
break;
}
} // end switch
} // end while
}
}
return true;
}
}} // end namespaces
syntax highlighted by Code2HTML, v. 0.9.1