/* ====================================================================
* Copyright (c) 2003-2006, Martin Hauner
* http://subcommander.tigris.org
*
* Subcommander is licensed as described in the file doc/COPYING, which
* you should have received as part of this distribution.
* ====================================================================
*/
#include "LineTokenizer.h"
// sys
#include <memory.h>
//#include <sstream>
#include <strstream>
LineTokenizer::LineTokenizer( const unsigned char* buf, size_t size )
: _buf(buf), _size(size)
{
}
LineTokenizer::LineTokenizer( const LineTokenizer& src )
: _buf(src._buf), _size(src._size)
{
}
bool LineTokenizer::nextToken( char** token, size_t* size )
{
if( _size == 0 )
{
*token = 0;
*size = 0;
return false;
}
const unsigned char* p = findLineEnd( _buf, _size );
if( ! p )
{
p = _buf + _size;
}
// set result
*token = (char*)_buf;
*size = p - _buf;
// update state
_buf = p;
_size -= *size;
return true;
}
// find the line break, regardless of its line ending style
const unsigned char* LineTokenizer::findLineEnd( const unsigned char* buf, size_t count )
{
size_t pos = 0;
while( pos < count )
{
if( buf[pos] == '\r' )
{
// next char is a line feed?
if( pos+1 < count && buf[pos+1] == '\n' )
{
// we found a single crlf line end
return buf+pos+2;
}
else
{
// we found a single cr line end
return buf+pos+1;
}
}
else if( buf[pos] == '\n' )
{
// we found a single lf line end
return buf+pos+1;
}
pos++;
}
return NULL;
}
///////////////////////////////////////////////////////////////////////////////
//#include <istream>
//#include <string>
//#include <strstream>
LineTokenizer2::LineTokenizer2( std::istream& in )
: _in(in), _tokenbuf(0)
{
}
LineTokenizer2::~LineTokenizer2()
{
clearToken();
}
void LineTokenizer2::clearToken()
{
if( _tokenbuf )
{
delete[] _tokenbuf;
_tokenbuf = 0;
}
}
bool LineTokenizer2::nextToken( char** token, size_t* size )
{
clearToken();
// \todo replace with std::stringbuf
std::strstreambuf buf(1024);
std::streamsize got = 0;
char c;
_in.get( buf, '\n' ); // does not read the '\n'
if( ! _in.bad() )
{
got = _in.gcount();
}
if( ! _in.eof() )
{
// if we have a line with only an '\n', the stream state is fail,
// we have to clear the state before we can read the next char...
if( got == 0 && _in.fail() )
{
_in.clear();
}
// get the \n if available
_in.get(c);
got = _in.gcount();
if( got == 1 )
{
buf.sputc(c);
}
// if got is 0 we will run into eof on the next call
}
else
{
// eof, but if we read something fall through..
if( got == 0 )
{
*token = 0;
*size = 0;
return false;
}
}
_tokenbuf = new char[buf.pcount()];
std::memcpy( _tokenbuf, buf.str(), buf.pcount() );
*token = _tokenbuf;
*size = buf.pcount();
return true;
}
syntax highlighted by Code2HTML, v. 0.9.1