/* ====================================================================
 * Copyright (c) 2003-2006, Martin Hauner
 *                          http://subcommander.tigris.org
 *
 * Subcommander is licensed as described in the file doc/COPYING, which
 * you should have received as part of this distribution.
 * ====================================================================
 */

#include "LineTokenizer.h"

// sys
#include <memory.h>
//#include <sstream>
#include <strstream>


LineTokenizer::LineTokenizer( const unsigned char* buf, size_t size )
: _buf(buf), _size(size)
{
}

LineTokenizer::LineTokenizer( const LineTokenizer& src )
: _buf(src._buf), _size(src._size)
{
}

bool LineTokenizer::nextToken( char** token, size_t* size )
{
  if( _size == 0 )
  {
    *token = 0;
    *size  = 0;
    return false;
  }

  const unsigned char* p = findLineEnd( _buf, _size );
  if( ! p )
  {
    p = _buf + _size;
  }

  // set result
  *token  = (char*)_buf;
  *size   = p - _buf;

  // update state
  _buf   = p;
  _size -= *size;

  return true;
}

// find the line break, regardless of its line ending style
const unsigned char* LineTokenizer::findLineEnd( const unsigned char* buf, size_t count )
{
  size_t pos = 0;
  while( pos < count )
  {
    if( buf[pos] == '\r' )
    {
      // next char is a line feed?
      if( pos+1 < count && buf[pos+1] == '\n' )
      {
        // we found a single crlf line end
        return buf+pos+2;
      }
      else
      {
        // we found a single cr line end
        return buf+pos+1;
      }
    }
    else if( buf[pos] == '\n' )
    {
      // we found a single lf line end
      return buf+pos+1;
    }
    pos++;
  }
  return NULL;
}

///////////////////////////////////////////////////////////////////////////////


//#include <istream>
//#include <string>
//#include <strstream>




LineTokenizer2::LineTokenizer2( std::istream& in )
: _in(in), _tokenbuf(0)
{
}

LineTokenizer2::~LineTokenizer2()
{
  clearToken();
}

void LineTokenizer2::clearToken()
{
  if( _tokenbuf )
  {
    delete[] _tokenbuf;
    _tokenbuf = 0;
  }
}


bool LineTokenizer2::nextToken( char** token, size_t* size )
{
  clearToken();

  // \todo replace with std::stringbuf
  std::strstreambuf buf(1024);
  std::streamsize   got = 0;
  char c;

  _in.get( buf, '\n' );  // does not read the '\n'
  if( ! _in.bad() )
  {
    got = _in.gcount();
  }

  if( ! _in.eof() )
  {
    // if we have a line with only an '\n', the stream state is fail,
    // we have to clear the state before we can read the next char...
    if( got == 0 && _in.fail()  )
    {
      _in.clear();
    }

    // get the \n if available
    _in.get(c);
    got = _in.gcount();

    if( got == 1 )
    {
      buf.sputc(c);
    }
    // if got is 0 we will run into eof on the next call
  }
  else
  {
    // eof, but if we read something fall through..
    if( got == 0 )
    {
      *token = 0;
      *size  = 0;
      return false;
    }
  }

  _tokenbuf = new char[buf.pcount()];
  std::memcpy( _tokenbuf, buf.str(), buf.pcount() );

  *token = _tokenbuf;
  *size  = buf.pcount();

  return true;
}


syntax highlighted by Code2HTML, v. 0.9.1