///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// http://www.cooldevtools.com
// $Id: Tokenizer.cc 272 2007-01-06 19:37:27Z brian $
//
// Copyright (C) 2007 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt.  If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
//    http://www.burton-computer.com/qpl.html
//    http://www.cooldevtools.com/qpl.html
//

#include "Tokenizer.h"

Tokenizer::Tokenizer()
  : m_state(STATE_START)
{
}

Tokenizer::~Tokenizer()
{
}

inline bool is_letter_char(char ch)
{
  return (ch & 0x80) || is_alnum(ch) || (ch == '%');
}

inline bool is_special_char(char ch)
{
  switch (ch) {
  case '.':
  case '-':
  case '+':
  case ',':
  case '_':
  case '$':
    return true;

  default:
    return false;
  }
}

bool Tokenizer::addChar(char ch)
{
  switch (m_state) {
  case STATE_START:
    m_word.erase();
    m_state = STATE_WAITING;
    // fall through

  case STATE_WAITING:
    assert(m_word.length() == 0);
    assert(m_pending.length() == 0);

    if (ch == '$' || is_letter_char(ch)) {
      m_word += to_lower(ch);
      m_state = STATE_IN_WORD;
      return false;
    }

    return false;

  case STATE_IN_WORD:
    assert(m_word.length() > 0);
    assert(m_pending.length() == 0);

    if (is_letter_char(ch)) {
      m_word += to_lower(ch);
      return false;
    }

    if (is_special_char(ch)) {
      m_state = STATE_PENDING;
      m_pending += ch;
      return false;
    }

    m_state = STATE_START;
    return true;

  case STATE_PENDING:
    assert(m_word.length() > 0);
    assert(m_pending.length() > 0);

    if (is_letter_char(ch)) {
      m_state = STATE_IN_WORD;
      m_word += m_pending;
      m_word += to_lower(ch);
      m_pending.erase();
      return false;
    }

    if (is_special_char(ch)) {
      m_pending += ch;
      return false;
    }

    m_state = STATE_START;
    m_pending.erase();
    return true;

  default:
    assert(!"not a valid state");
    break;
  }

  assert(!"not reached");
  return false;
}

void Tokenizer::start()
{
  m_state = STATE_START;
  m_pending.erase();
}

bool Tokenizer::stop()
{
  return (m_state != STATE_START) && (m_word.length() > 0);
}


syntax highlighted by Code2HTML, v. 0.9.1