///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// http://www.cooldevtools.com
// $Id: Tokenizer.cc 272 2007-01-06 19:37:27Z brian $
//
// Copyright (C) 2007 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt. If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
// http://www.burton-computer.com/qpl.html
// http://www.cooldevtools.com/qpl.html
//
#include "Tokenizer.h"
Tokenizer::Tokenizer()
: m_state(STATE_START)
{
}
Tokenizer::~Tokenizer()
{
}
inline bool is_letter_char(char ch)
{
return (ch & 0x80) || is_alnum(ch) || (ch == '%');
}
inline bool is_special_char(char ch)
{
switch (ch) {
case '.':
case '-':
case '+':
case ',':
case '_':
case '$':
return true;
default:
return false;
}
}
bool Tokenizer::addChar(char ch)
{
switch (m_state) {
case STATE_START:
m_word.erase();
m_state = STATE_WAITING;
// fall through
case STATE_WAITING:
assert(m_word.length() == 0);
assert(m_pending.length() == 0);
if (ch == '$' || is_letter_char(ch)) {
m_word += to_lower(ch);
m_state = STATE_IN_WORD;
return false;
}
return false;
case STATE_IN_WORD:
assert(m_word.length() > 0);
assert(m_pending.length() == 0);
if (is_letter_char(ch)) {
m_word += to_lower(ch);
return false;
}
if (is_special_char(ch)) {
m_state = STATE_PENDING;
m_pending += ch;
return false;
}
m_state = STATE_START;
return true;
case STATE_PENDING:
assert(m_word.length() > 0);
assert(m_pending.length() > 0);
if (is_letter_char(ch)) {
m_state = STATE_IN_WORD;
m_word += m_pending;
m_word += to_lower(ch);
m_pending.erase();
return false;
}
if (is_special_char(ch)) {
m_pending += ch;
return false;
}
m_state = STATE_START;
m_pending.erase();
return true;
default:
assert(!"not a valid state");
break;
}
assert(!"not reached");
return false;
}
void Tokenizer::start()
{
m_state = STATE_START;
m_pending.erase();
}
bool Tokenizer::stop()
{
return (m_state != STATE_START) && (m_word.length() > 0);
}
syntax highlighted by Code2HTML, v. 0.9.1