/*
 Copyright (C) 2000-2004

 Code contributed by Greg Collecutt, Joseph Hope and Paul Cochrane

 This file is part of xmds.
 
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

/*
  $Id: xmlbasics.cc,v 1.11 2004/07/13 05:29:38 paultcochrane Exp $
*/

/*! @file xmlbasics.cc
  @brief Basic xml parsing classes and methods

  More detailed explanation...
*/

#include<xmlbasics.h>

extern bool xmlDebugFlag;

//#define DEBUG 0

// ******************************************************************************
// ******************************************************************************
//	XMLException
// ******************************************************************************
// ******************************************************************************

long nXMLExceptions=0;  //!< The number of XML exceptions

// ******************************************************************************
XMLException::XMLException() {
  if(xmlDebugFlag) {
    nXMLExceptions++;
    printf("XMLException::XMLException()\n");
    printf("nXMLExceptions=%li\n",nXMLExceptions);
  }
  code=UNKNOWN_ERR;
};

// ******************************************************************************
XMLException::XMLException(
			   const unsigned short& error) {
  if(xmlDebugFlag) {
    nXMLExceptions++;
    printf("XMLException::XMLException(unsigned short error)\n");
    printf("nXMLExceptions=%li\n",nXMLExceptions);
  }
  code=error;
};

// ******************************************************************************
XMLException::~XMLException() {
  if(xmlDebugFlag) {
    nXMLExceptions--;
    printf("XMLException::~XMLException()\n");
    printf("nXMLExceptions=%li\n",nXMLExceptions);
  }
};

// ******************************************************************************
const char* XMLException::getError() const {
  switch(code) {
  case UNKNOWN_ENCODING_ERR :
    return "XMLException::UNKNOWN_ENCODING_ERR\n";
    break;
  case UNEXPECTED_EOF_ERR :
    return "XMLException::UNEXPECTED_EOF_ERR\n";
    break;
  case RANGE_ERR :
    return "XMLException::RANGE_ERR\n";
    break;
  case INVALID_CHAR_ERR :
    return "XMLException::INVALID_CHAR_ERR\n";
    break;
  default :
    return "XMLException::UNKNOWN_ERR\n";
  }
};

// ******************************************************************************
// ******************************************************************************
//	XMLChar
// ******************************************************************************
// ******************************************************************************

// **********************************************************************
bool XMLChar::isChar(
		     const char& ch) {
  if(ch>=0x0020) {
      return 1;
  }
  if(isWhiteSpace(ch)) {
      return 1;
  }
  return 0;
};

// **********************************************************************
bool XMLChar::isCharData(
			 const char& ch) {
  if((ch=='<')|(ch=='&')) {
      return 0;
  }
  return isChar(ch);
};

// **********************************************************************
bool XMLChar::isWhiteSpace(
			   const char& ch) {
  return ((ch==0x20)|(ch==0x09)|(ch==0x0D)|(ch==0x0A));
};

// **********************************************************************
bool XMLChar::isLatinLetter(
			    const char& ch) {
  return (((ch>='A')&(ch<='Z'))|((ch>='a')&(ch<='z')));
};

// **********************************************************************
bool XMLChar::isLatinDigit(
			   const char& ch) {
  return ((ch>='0')&(ch<='9'));
};

// **********************************************************************
bool XMLChar::isLatinHexDigit(
			      const char& ch) {
  return (((ch>='0')&(ch<='9'))
	  |((ch>='A')&(ch<='F'))
	  |((ch>='a')&(ch<='f')));
};

// **********************************************************************
bool XMLChar::isLetter(
		       const char& ch) {
  return isBaseChar(ch);
};

// **********************************************************************
bool XMLChar::isBaseChar(
			 const char& ch)	{
  return isLatinLetter(ch);
};

// **********************************************************************
bool XMLChar::isDigit(
		      const char& ch) {
  return isLatinDigit(ch);
};

// **********************************************************************
bool XMLChar::isNameChar(
			 const char& ch) {
  if (isNCNameChar(ch)) {
      return 1;
  }
  if (ch==':') {
      return 1;
  }
  return 0;
};

// **********************************************************************
bool XMLChar::isNCNameChar(
			   const char& ch) {
  if (isLetter(ch)) {
      return 1;
  }
  if (isDigit(ch)) {
      return 1;
  }
  if ((ch=='.')|(ch=='-')|(ch=='_')) {
      return 1;
  }
  return 0;
};

// **********************************************************************
bool XMLChar::isPubidChar(
			  const char& ch) {
  if (isLatinLetter(ch)) {
      return 1;
  }
  if (isLatinDigit(ch)) {
      return 1;
  }
  if ((ch==0x20)|(ch==0x0D)|(ch==0x0A)) {
      return 1;
  }
  if ((ch=='-')|(ch==0x27)|(ch=='(')) {
      return 1;
  }
  if ((ch==')')|(ch=='+')|(ch==',')) {
      return 1;
  }
  if ((ch=='.')|(ch=='/')|(ch==':')) {
      return 1;
  }
  if ((ch=='=')|(ch=='?')|(ch==';')) {
      return 1;
  }
  if ((ch=='!')|(ch=='*')|(ch=='#')) {
      return 1;
  }
  if ((ch=='@')|(ch=='$')|(ch=='_')|(ch=='%')) {
      return 1;
  }
  return 0;
};

// ******************************************************************************
// ******************************************************************************
//	XMLString
// ******************************************************************************
// ******************************************************************************

long nXMLStrings=0;  //!< The number of XML strings

// ******************************************************************************
// XMLString default constructor
XMLString::XMLString() {
  if(xmlDebugFlag) {
    nXMLStrings++;
    printf("XMLString::XMLString()\n");
    printf("nXMLStrings=%li\n",nXMLStrings);
  }
  _length=0;
  _c_str_valid=0;
};

// **********************************************************************
// XMLString construction from C-style string
XMLString::XMLString(
		     const char* s) {
  if(xmlDebugFlag) {
    nXMLStrings++;
    printf("XMLString::XMLString(const char* s) = %s\n",s);
    printf("nXMLStrings=%li\n",nXMLStrings);
  }

  _length=0;
  while(s[_length]) {
    _length++;
  }
  _data = new char[_length];
  for(unsigned long i=0;i<_length;i++) {
    _data[i] = s[i];
  }
  _c_str_valid=0;
};

// **********************************************************************
// XMLString construction from another XMLString
XMLString::XMLString(
		     const XMLString& s) {
  if(xmlDebugFlag) {
    nXMLStrings++;
    printf("XMLString::XMLString(const XMLString& s) = %s\n",s.c_str());
    printf("nXMLStrings=%li\n",nXMLStrings);
  }

  _length=s._length;
  _data = new char[_length];
  for(unsigned long i=0;i<_length;i++) {
    _data[i] = s._data[i];
  }
  _c_str_valid=0;
};

// **********************************************************************
// XMLString default destructor
XMLString::~XMLString() {
  if(xmlDebugFlag) {
    nXMLStrings--;
    printf("XMLString::~XMLString()\n");
    printf("nXMLStrings=%li\n",nXMLStrings);
  }

  if(_length) {
    delete _data;
  }
  if(_c_str_valid) {
    delete _c_str;
  }
};

// **********************************************************************
XMLString& XMLString::operator=(
				const XMLString& s) {
  if(this != &s) { // so as to never copy itself!
    if (_length) {
      delete _data;
    }
    if(_c_str_valid) {
      delete _c_str;
    }
    _length=s.length();
    _data = new char[_length];
    for(unsigned long i=0;i<_length;i++) {
      _data[i] = s._data[i];
    }
    _c_str_valid=0;
  }
  return *this;	
};

// **********************************************************************
XMLString& XMLString::operator=(
				const char* s) {
  if(_length) {
    delete _data;
  }
  if(_c_str_valid) {
    delete _c_str;
  }
  _length=0;
  while(!(s[_length]==0)) {
    _length++;
  }
  _data = new char[_length+1];
  for(unsigned long i=0;i<=_length;i++){
    _data[i] = s[i];
  }
  _c_str_valid=0;
  return *this;	
};

// **********************************************************************
unsigned long XMLString::length() const {
  return _length;
};

// **********************************************************************
const char* XMLString::c_str() const {
  if(!_c_str_valid) {
    _c_str = new char[_length+1];
    for(unsigned long i=0;i<_length;i++) {
      _c_str[i]=(char) _data[i];
    }
    _c_str[_length]=0;
    _c_str_valid=1;
  }
  return _c_str;
};	

// **********************************************************************
char XMLString::data(
		     const unsigned long& index) const {

  if(index>=_length) {
    throw XMLException(XMLException::RANGE_ERR);
  }

  return _data[index];
};

// **********************************************************************
bool XMLString::operator==(
			   const XMLString& s) const {
  if(&s==this) {
    return 1;
  }
  if(s.length()!=_length){
    return 0;
  }
  bool result=1;
  for(unsigned long i=0;i<_length;i++) {
    result = result&(s._data[i]==_data[i]);		
  }
  return result;	
};

// **********************************************************************
bool XMLString::operator!=(
			   const XMLString& s) const {
  return !((*this)==s);
};

// **********************************************************************
XMLString& XMLString::operator+=(
				 const XMLString& s) {

  unsigned long newLength = _length + s._length;
  char* newData = new char[newLength];

  unsigned long i;
  for(i = 0; i < _length; i++) {
    newData[i] = _data[i];
  }
  for(i = _length; i < newLength; i++) {
    newData[i] = s._data[i-_length];
  }

  if(_length) {
    delete _data;
  }
  _data = newData;
  _length = newLength;

  if(_c_str_valid) {
    delete _c_str;
  }
  _c_str_valid = 0;
  return *this;
};

// **********************************************************************
long XMLString::loadFromFile(
			     FILE* infile) {
  if(_length) {
    delete _data;
  }
  if(_c_str_valid) {
    delete _c_str;
  }
  fseek(infile,0,SEEK_END);
  _length = ftell(infile);
  fseek(infile,0,SEEK_SET);
  _data = new char[_length];
  for(unsigned long i=0;i<_length;i++) {
    _data[i] = fgetc(infile);
  }
  _c_str_valid=0;
  return _length;
};

// **********************************************************************
void XMLString::insertString(
			     const unsigned long& offset,
			     const XMLString& s) {

  if(offset > _length) {
    return;
  }

  unsigned long newLength = _length + s._length;
  char* newData = new char[newLength];

  unsigned long i;
  for(i = 0; i < offset; i++) {
    newData[i] = _data[i];
  }
  for(i = 0; i < s._length; i++) {
    newData[offset + i] = s._data[i];
  }
  for(i = offset; i < _length; i++) {
    newData[offset + s._length + i] = _data[i];
  }

  if(_length) {
    delete _data;
  }
  _data = newData;
  _length = newLength;

  if(_c_str_valid) {
    delete _c_str;
  }
  _c_str_valid = 0;
}; 

// **********************************************************************
void XMLString::deleteData(
			   const unsigned long& offset,
			   const unsigned long& count) {

  unsigned long i = offset + count;
  while(i < _length) {
    _data[i - count] = _data[i];
  }

  if(offset + count > _length) {
    _length = offset;
  }
  else {
    _length -= count;
  }
 
  if(_c_str_valid) {
    delete _c_str;
  }
  _c_str_valid = 0;
}; 

// **********************************************************************
void XMLString::replaceData(
			    const unsigned long& offset,
			    unsigned long count,
			    const XMLString& s) {

  if(offset > _length) {
    *this += s;
  }

  if(offset + count > _length) {
    count = _length - offset;
  }

  unsigned long newLength = _length + s._length - count;
  char* newData = new char[newLength];

  unsigned long i;
  for(i = 0; i < offset; i++) {
    newData[i] = _data[i];
  }

  for(i = offset; i < offset+s._length; i++) {
    newData[i] = s._data[i-offset];
  }

  for(i = offset+s._length; i < newLength; i++) {
    newData[i] = _data[i-s._length+count];
  }

  if(_length) {
    delete _data;
  }
  _data = newData;
  _length = newLength;

  if(_c_str_valid) {
    delete _c_str;
  }
  _c_str_valid = 0;
}; 

// **********************************************************************
void XMLString::subString(
			  XMLString& subS,
			  const unsigned long& begin,
			  const unsigned long& end_plus_one) const {

  if(end_plus_one>_length) {
    throw XMLException(XMLException::RANGE_ERR);
  }

  if(end_plus_one<=begin) {
    subS="";
    return;
  }

  if(subS._length) {
    delete subS._data;
  }
  if(subS._c_str_valid) {
    delete subS._c_str;
  }
  subS._c_str_valid=0;
  subS._data = new char[end_plus_one-begin];
  for(unsigned long i=begin;i<end_plus_one;i++) {
    subS._data[i-begin] = _data[i];
  }
  subS._length = end_plus_one-begin;
}; 

// **********************************************************************
bool XMLString::hasIllegalCharacters() const {
  unsigned long i = 0;
  while(i < _length) {
    if(!XMLChar::isChar(_data[i])) {
      return 1;
    }
    i++;
  }
  return 0;
};

// **********************************************************************
bool XMLString::beginsWithxml() const {
  if(_length < 3) {
    return 0;
  }
  return ((_data[0] == 'x')&(_data[1] == 'm')&(_data[2] == 'l'));
};

// **********************************************************************
bool XMLString::beginsWithXxMmLl() const {
  if(_length < 3) {
    return 0;
  }
  return (((_data[0] == 'X') | (_data[0] == 'x'))
	  &((_data[1] == 'M') | (_data[1] == 'm'))
	  &((_data[2] == 'L') | (_data[2] == 'l')));
};

// **********************************************************************
bool XMLString::eqxml() const {
  if(!(_length==3)) {
    return 0;
  }
  return (((_data[0] == 'X') | (_data[0] == 'x'))
	  &((_data[1] == 'M') | (_data[1] == 'm'))
	  &((_data[2] == 'L') | (_data[2] == 'l')));
};

// **********************************************************************
bool XMLString::eqxmlns() const {
  if(_length != 5) {
    return 0;
  }
  return (((_data[0] == 'x') | (_data[0] == 'X'))
	  &((_data[1] == 'm') | (_data[1] == 'M'))
	  &((_data[2] == 'l') | (_data[2] == 'L'))
	  &((_data[3] == 'n') | (_data[3] == 'N'))
	  &((_data[4] == 's') | (_data[4] == 'S')));
};

// **********************************************************************
bool XMLString::isName() const {
  if (_length == 0) {
      return 0;
  }
  if (!(XMLChar::isLetter(_data[0])|(_data[0]=='_')|(_data[0]==':'))) {
    return 0;
  }
  for(unsigned long i=1;i<_length;i++) {
    if(!XMLChar::isNameChar(_data[i])) {
      return 0;
    }
  }
  return 1;
};

// **********************************************************************
bool XMLString::isNCName() const {
  if (_length == 0) {
      return 0;
  }
  if (!(XMLChar::isLetter(_data[0])|(_data[0]=='_'))) {
    return 0;
  }
  for(unsigned long i=1;i<_length;i++) {
    if(!XMLChar::isNCNameChar(_data[i])) {
	return 0;
    }
  }
  return 1;
};	

// **********************************************************************
bool XMLString::isNSWellFormed() const {
  unsigned long nColons = 0;
  for(unsigned long i=0;i<_length;i++) {
    if(_data[i]==':') {
      nColons++;
    }
  }
  return(nColons < 2);
};

// **********************************************************************
bool XMLString::isVersionNum() const {
  for(unsigned long i=0;i<_length;i++) {
    if(!( XMLChar::isLatinLetter(_data[i])
	  | XMLChar::isLatinDigit(_data[i])
	  |(_data[i]=='_')|(_data[i]=='.')|(_data[i]==':')|(_data[i]=='-'))) {
      return 0;
    }
  }
  return 1;
};

// **********************************************************************
bool XMLString::isEncName() const {
  if (_length==0) {
    return 0;
    }
  if(!( XMLChar::isLatinLetter(_data[0]))) {
    return 0;
  }
  for(unsigned long i=1;i<_length;i++) {
    if(!( XMLChar::isLatinLetter(_data[i])
	  | XMLChar::isLatinDigit(_data[i])
	  |(_data[i]=='_')|(_data[i]=='.')|(_data[i]=='-'))) {
      return 0;
    }
  }
  return 1;
};

// **********************************************************************
bool XMLString::isPubidLiteral() const {
  for(unsigned long i=0;i<_length;i++) {
    if(! XMLChar::isPubidChar(_data[i])) {
      return 0;
    }
  }
  return 1;
};

// **********************************************************************
bool XMLString::splitNSName(
			    XMLString& prefix,
			    XMLString& localPart) const {

  unsigned long firstColon = 0;

  while((firstColon < _length) && (_data[firstColon] != ':')) {
    firstColon++;
  }

  if(firstColon < _length) {
    subString(prefix,0,firstColon);
    subString(localPart,firstColon+1,_length);
    return 1;
  }

  prefix="";
  localPart = *this;

  return 0;
};

// **********************************************************************
bool XMLString::isAllWhiteSpace() const {
  for(unsigned long i=0;i<_length;i++) {
    if(!XMLChar::isWhiteSpace(_data[i])) {
      return 0;
    }
  }
  return 1;
};

// ******************************************************************************
bool XMLString::asULong(
			unsigned long& outULong) const {

  // checks to ensure string contains exactly one parsable
  // positive integer and parses it

  if(_length==0) {
    return 0;
  }

  if(!_c_str_valid) {
    c_str();
  }

  long i=0;

  while(XMLChar::isWhiteSpace(_c_str[i])) {
      // skip white space
    i++;
  }

  while(XMLChar::isLatinDigit(_c_str[i])) {
      // skip numbers
    i++;
  }

  while(XMLChar::isWhiteSpace(_c_str[i])) {
      // skip any more white space
    i++;
  }

  if(_c_str[i] != 0) {
      // should now be at end
    return 0; // more characters!!!
  }

  // finally
  if(sscanf(_c_str,"%lui",&outULong)==1) {
    return 1;
  }

  return 0;
}

// ******************************************************************************
bool XMLString::asDouble(
			 double& outDouble) const {

  // checks to ensure string contains exactly one parsable double and parses it

  if(_length==0) {
    return 0;
  }
 
  if(!_c_str_valid) {
    c_str();
  }

  long i=0;

  while(XMLChar::isWhiteSpace(_c_str[i])) {
      // skip white space
    i++;
  }

  if((_c_str[i]=='+')|(_c_str[i]=='-')) {
      // skip any +/- sign
    i++;
  }

  while(XMLChar::isLatinDigit(_c_str[i])) {
      // skip any numbers
    i++;
  }

  if(_c_str[i]=='.') {
      // skip a decimal point
    i++;
  }

  while(XMLChar::isLatinDigit(_c_str[i])) {
      // skip any more numbers
    i++;
  }

  if((_c_str[i]=='e')|(_c_str[i]=='e'))	{
      // skip exponential
    i++;
    if((_c_str[i]=='+')|(_c_str[i]=='-')) {
	// skip any +/- sign
      i++;
    }
    while(XMLChar::isLatinDigit(_c_str[i])) {
	// skip any more numbers
      i++;
    }
  }

  while(XMLChar::isWhiteSpace(_c_str[i])) {
      // skip any more white space
    i++;
  }

  if(_c_str[i] != 0) {
      // should now be at end
    return 0; // more characters!!!
  }

  // finally
  if(sscanf(_c_str,"%lf",&outDouble)==1) {
    return 1;
  }

  return 0;
}

// **********************************************************************
void XMLString::goLatinAlphaNumeric() {

  if(_length==0) {
    return;
  }

  if(_c_str_valid) {
    delete _c_str;
  }
  _c_str_valid=0;

  if(XMLChar::isLatinDigit(_data[0])) {
    _data[0]='_';
  }

  for(unsigned long i=1; i<_length; i++) {
    if(!(XMLChar::isLatinLetter(_data[i])|XMLChar::isLatinDigit(_data[i])|(_data[i]=='_'))) {
      _data[i]='_';
    }
  }
};



syntax highlighted by Code2HTML, v. 0.9.1