ports//net/libmxp/work/libmxp-0.2.2/src/cmxpparser.cpp

/***************************************************************************
 *   Copyright (C) 2004 by Tomas Mecir                                     *
 *   kmuddy@kmuddy.org                                                     *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU Library General Public License as       *
 *   published by the Free Software Foundation; either version 2 of the    *
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU Library General Public License for more details.                  *
 ***************************************************************************/

#include "cmxpparser.h"

#include "celementmanager.h"
#include "cmxpstate.h"
#include "cresulthandler.h"

cMXPParser::cMXPParser (cMXPState *st, cElementManager *elm, cResultHandler *res)
{
  state = st;
  elements = elm;
  results = res;
  
  pstate = pText;
  wasBackslashR = false;
}

cMXPParser::~cMXPParser ()
{
}

void cMXPParser::reset ()
{
  str = "";
  pstate = pText;
  wasBackslashR = false;
  chunks.clear();
}

void cMXPParser::parse (const string &text)
{
  //WARNING: examine this function only at your own risk!
  //it is advised to have a look at the simpleParse() function first - it's similar
  //to this one, but much simpler...
  if (text.empty())
    return;
  string::const_iterator it;
  for (it = text.begin(); it != text.end(); ++it)
  {
    char c = *it;
    
    //handle \r not followed by \n - treated as a newline
    if (wasBackslashR && (c != '\n'))
    {
      //"str" now certainly is empty, so we needn't care about that
      //report new-line
      elements->gotNewLine();
      state->gotNewLine();
    }    
    wasBackslashR = false;
    
    //we need current mode - parsing in LOCKED mode is limited
    //mode is retrieved in every iteration to ensure that it's always up-to-date
    mxpMode mode = state->getMXPMode();
    switch (pstate) {
      case pText: {
        //tags not recognized in LOCKED mode...
        if ((c == '\e') || ((mode != lockedMode) && (c == '<')) || (c == '\n') || (c == '\r'))
        {
          //end of text - got newline / ANSI seq / start of tag
          if (!str.empty())
          {
            state->gotText (str);
            str = "";
          }
          if (c == '\e')
            pstate = pAnsiSeq;
          if ((c == '<') && (mode != lockedMode))
            pstate = pTag;
          if (c == '\n')
          {
            //report new-line
            elements->gotNewLine();
            state->gotNewLine();
          }
          if (c == '\r')
            wasBackslashR = true;
        }
        else
          str += c;  //add new character to the text... 
        break;
      };
      case pAnsiSeq: {
        if ((c == '\e') || (c == '\n') || (c == '\r'))
        {
          //the same as in pTag section...
          results->addToList (results->createError ("Received unfinished ANSI sequence!"));
          str = "";
          if (c == '\e')
            pstate = pAnsiSeq;
          if (c == '\n')
          {
            //report new-line
            elements->gotNewLine();
            state->gotNewLine();
            pstate = pText;
          }
          if (c == '\r')
          {
            pstate = pText;
            wasBackslashR = true;
          }
        }
        else
        if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))
        {
          //ANSI sequence ends...
          if (c == 'z')  //line tag
          {
            if (str.empty())
            {
              //invalid sequence
              str = "\ez";
            }
            else
            {
              //process this sequence
              int len = str.length();
              int num = 0;
              for (int i = 1; i < len; i++)  //str[0] is '[', which is SKIPPED
              {
                char cc = str[i];
                if (cc == ';')  //this shouldn't happen, but some MUD might want to use it...
                {
                  if ((num >= 0) && (num <= 99))  //ensure that number lies in correct range
                  {
                    state->gotLineTag (num);
                    elements->gotLineTag (num);
                  }
                  else
                    results->addToList (results->createError ("Received invalid line tag!"));
                  num = 0;
                }
                else
                  num = num * 10 + (cc - 48);  //48 is the code of '0'
              }
              //report last line tag (and usually the only one)
              if ((num >= 0) && (num <= 99))  //ensure that number lies in correct range
              {
                state->gotLineTag (num);
                elements->gotLineTag (num);
              }
              else
                results->addToList (results->createError ("Received invalid line tag!"));
              str = "";
            }
          }
          else  //something else
          {
            //'\e' and c are not in the string - add them there
            str = '\e' + str + c;
          } 
          pstate = pText;
        }
        else
        if (c == '[')  //this one is valid, but only at the beginning
        {
          if (str.empty())
            str += c;
          else
          {
            //'[' in the middle of ANSI seq => not an ANSI seq...
            pstate = pText;
            str = '\e' + str + c;
          }
        }
        else
        if ((c == ';') || ((c >= '0') && (c <= '9')))  //correct char, unless str is empty
          if (!str.empty())
            str += c;  //here we go...
          else
          {
            //ANSI seq must start with [ - therefore this is not an ANSI sequence after all
            pstate = pText;
            str += '\e';
            str += c;
          }
        else
        //incorrect character...
        {
          str = '\e' + str + c;
          pstate = pText;
        }
        break;
      };
      case pTag: {
        if (c == '>')
        {
          elements->gotTag (str);
          str = "";
          pstate = pText;
        }
        else
        if ((c == '"') || (c == '\''))
        {
          pstate = pQuotedParam;
          quoteChar = c;
          str += c;
        }
        else if ((c == '\e') || (c == '\n') || (c == '\r'))
        {
          //handle incorrectly terminated tag and continue parsing...
          results->addToList (results->createError ("Received unfinished tag <" + str));
          str = "";
          if (c == '\e')
            pstate = pAnsiSeq;
          if (c == '\n')
          {
            //report new-line
            elements->gotNewLine();
            state->gotNewLine();
            pstate = pText;
          }
          if (c == '\r')
          {
            pstate = pText;
            wasBackslashR = true;
          }
        }
        else if (str == "!--")  //comment
        {
          str += c;
          pstate = pComment;
        }
        else
          str += c;
        break;
      };
      case pComment: {
        if (c == '>')
        {
          int l = str.length();
          if ((str[l-2] == '-') && (str[l-1] == '-')) //okay, comment ends
          {
            str = "";
            pstate = pText;
          }
          else
            str += c;
        }
        else if ((c == '\e') || (c == '\n') || (c == '\r'))
        {
          //handle incorrectly terminated comment and continue parsing...
          results->addToList (results->createError ("Received an unfinished comment!"));
          str = "";
          if (c == '\e')
            pstate = pAnsiSeq;
          if (c == '\n')
          {
            //report new-line
            elements->gotNewLine();
            state->gotNewLine();
            pstate = pText;
          }
          if (c == '\r')
          {
            pstate = pText;
            wasBackslashR = true;
          }
        }
        else
          str += c;
        break;
      };
      case pQuotedParam: {
        if (c == quoteChar)
        {
          //quoted parameter ends... this simple approach will work correctly for correct
          //tags, it may treat incorrect quotes as correct, but element manager will take care
          //of that
          pstate = pTag;
          str += c;
        }
        else
        if ((c == '\e') || (c == '\n') || (c == '\r'))
        {
          //the same as in pTag section...
          results->addToList (results->createError ("Received unfinished tag <" + str));
          str = "";
          if (c == '\e')
            pstate = pAnsiSeq;
          if (c == '\n')
          {
            //report new-line
            elements->gotNewLine();
            state->gotNewLine();
            pstate = pText;
          }
          if (c == '\r')
          {
            pstate = pText;
            wasBackslashR = true;
          }
        }
        else
          str += c;
        break;
      };
    };
  }
  //report remaining text, if any (needed to improve speed of text displaying and to handle
  //prompts correctly)
  if ((pstate == pText) && (!str.empty()))
  {
    state->gotText (str);
    str = "";
  }
}

void cMXPParser::simpleParse (const string &text)
//simple parsing - only text and tags - no newlines, no ANSI sequences, no line tags
//simpler version of parse() function above
{
  if (text.empty())
    return;
  chunk ch;
  string::const_iterator it;
  pstate = pText;
  str = "";
  for (it = text.begin(); it != text.end(); ++it)
  {
    char c = *it;
    switch (pstate) {
      case pText: {
        if (c == '<')
        {
          //end of text - got start of tag
          if (!str.empty())
          {
            ch.chk = chunkText;
            ch.text = str;
            chunks.push_back (ch);
            str = "";
          }
          pstate = pTag;
        }
        else
          str += c;  //add new character to the text... 
        break;
      };
      case pTag: {
        if (c == '>')
        {
          ch.chk = chunkTag;
          ch.text = str;
          chunks.push_back (ch);
          str = "";
          pstate = pText;
        }
        else
        if ((c == '"') || (c == '\''))
        {
          pstate = pQuotedParam;
          quoteChar = c;
          str += c;
        }
        else
          str += c;
        break;
      };
      case pQuotedParam: {
        if (c == quoteChar)
        {
          //quoted parameter ends... this simple approach will work correctly for correct
          //tags, it may treat incorrect quotes as correct, but element manager will take care
          //of that
          pstate = pTag;
          str += c;
        }
        else
          str += c;
        break;
      };
    };
  }
  //unfinished things...
  if (pstate == pText)
  {
    ch.chk = chunkText;
    ch.text = str;
    chunks.push_back (ch);
  }
  if ((pstate == pTag) || (pstate == pQuotedParam))
  {
    ch.chk = chunkError;
    ch.text = "Tag definition contains unfinished tag <" + str;
    chunks.push_back (ch);
  }
  str = "";
}

bool cMXPParser::hasNext()
{
  return chunks.empty() ? false : true;
}

chunk cMXPParser::getNext()
{
  if (!hasNext())
  {
    chunk nochunk;
    nochunk.chk = chunkNone;
    return nochunk;
  }
  chunk ch = chunks.front();
  chunks.pop_front();
  return ch;
}
syntax highlighted by Code2HTML, v. 0.9.1