ports//devel/obby/work/obby-0.4.4/src/serialise/token.cpp

/* libobby - Network text editing library
 * Copyright (C) 2005 0x539 dev group
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the Free
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "common.hpp"
#include "format_string.hpp"
#include "serialise/error.hpp"
#include "serialise/token.hpp"

namespace
{
	using namespace obby::serialise;
	typedef std::string::const_iterator string_iterator;

	/** Local _() function that uses obby::_.
	 */
	const char* _(const char* msgid)
	{
		return obby::_(msgid);
	}

	void escape(
		std::string& src
	)
	{
		std::string::size_type pos = 0;
		while( (pos = src.find_first_of("\n\t\\\"", pos)) != std::string::npos)
		{
			std::string replace_with;

			switch(src[pos])
			{
			case '\n':
				replace_with = "\\n";
				break;
			case '\t':
				replace_with = "\\t";
				break;
			case '\\':
				replace_with = "\\\\";
				break;
			case '\"':
				replace_with = "\\\"";
				break;
			}

			src.replace(pos, 1, replace_with);
			pos += replace_with.length();
		}
	}

	void unescape(
		std::string& src,
		unsigned int src_line
	)
	{
		std::string::size_type pos = 0;
		while( (pos = src.find('\\', pos)) != std::string::npos)
		{
			char replace_with;

			// \\ cannot be at end of string - terminating " would
			// have been escaped
			switch(src[pos + 1])
			{
			case 'n':
				replace_with = '\n';
				break;
			case '\\':
				replace_with = '\\';
				break;
			case 't':
				replace_with = '\t';
				break;
			case '\"':
				replace_with = '\"';
				break;
			default:
				obby::format_string str(
					_("Unexpected escape sequence: \\%0%")
				);

				str << src[pos + 1];
				throw error(str.str(), src_line);
			}

			src.replace(pos, 2, 1, replace_with);
			++ pos;
		}
	}

	void tokenise_identifier(
		token_list& list,
		const std::string& src,
		string_iterator& iter,
		unsigned int& line
	)
	{
		// Read to next non-alphanumerical character
		string_iterator orig = iter ++;
		for(; iter != src.end(); ++ iter)
			if(!isalnum(*iter) && *iter != '_')
				break;

		list.add(token::TYPE_IDENTIFIER, std::string(orig, iter), line);
	}

	void tokenise_comment(
		token_list& list,
		const std::string& src,
		string_iterator& iter,
		unsigned int& line
	)
	{
		// Ignore rest of line
		for(++ iter; iter != src.end(); ++ iter)
			if(*iter == '\n')
				break;
	}

	void tokenise_string(
		token_list& list,
		const std::string& src,
		string_iterator& iter,
		unsigned int& line
	)
	{
		string_iterator orig = ++ iter;
		unsigned int orig_line = line;
		bool escaped = false;

		for(; iter != src.end(); ++ iter)
		{
			// Line counting
			if(*iter == '\n')
				++ line;

			// Is this character escaped?
			if(!escaped)
			{
				if(*iter == '\\')
					escaped = true;
				else if(*iter == '\"')
					break;
			}
			else
			{
				// This one was escaped, process normally with
				// next one
				escaped = false;
			}
		}

		// Unexpected end of input
		if(iter == src.end() )
			throw error(_("String not closed"), orig_line);

		// Add unescaped string literal
		std::string unescaped(orig, iter);
		unescape(unescaped, orig_line);
		list.add(token::TYPE_STRING, unescaped, orig_line);

		// Proceed after string termination ('\"')
		++ iter;
	}

	void tokenise_indentation(
		token_list& list,
		const std::string& src,
		string_iterator& iter,
		unsigned int& line
	)
	{
		// Proceed to next char that is not a space character
		string_iterator orig = iter;
		for(; iter != src.end(); ++ iter)
			if(!isspace(*iter) || *iter == '\n')
				break;

		// Add indentation if this is not an empty line
		if(*iter != '\n' && *iter != '\0' && iter != src.end() )
		{
			list.add(
				token::TYPE_INDENTATION,
				std::string(orig, iter),
				line
			);
		}
	}

	void tokenise(
		token_list& list,
		const std::string& src
	)
	{
		unsigned int line = 1;
		for(string_iterator iter = src.begin();
		    iter != src.end();)
		{
			// Nullbyte identifies end of string
			if(*iter == '\0')
				break;

			if(*iter == '\n')
			{
				// Line counting
				++ line;

				// Parse it
				++ iter;
				tokenise_indentation(
					list, src, iter, line
				);

				continue;
			}

			// String literal
			if(*iter == '\"')
			{
				tokenise_string(list, src, iter, line);
				continue;
			}

			// Comment
			if(*iter == '#')
			{
				tokenise_comment(list, src, iter, line);
				continue;
			}

			// Identifier
			if(isalnum(*iter) || *iter == '_')
			{
				tokenise_identifier(list, src, iter, line);
				continue;
			}

			// Ignore whitespace
			if(isspace(*iter) )
			{
				++ iter;
				continue;
			}

			// Special character?
			token::type type = token::TYPE_UNKNOWN;
			switch(*iter)
			{
			case '!':
				type = token::TYPE_EXCLAMATION;
				break;
			case '=':
				type = token::TYPE_ASSIGNMENT;
				break;
			}

			if(type == token::TYPE_UNKNOWN)
			{
				obby::format_string str(
					_("Unexpected token: '%0%'")
				);

				str << *iter;
				throw error(str.str(), line);
			}

			list.add(type, std::string(1, *iter), line);

			// Go on with next character
			++ iter;
		}
	}

	void detokenise(const token_list& list, std::string& target)
	{
		bool line_begin = true;
		std::string escaped_string;

		for(token_list::iterator iter = list.begin();
		    iter != list.end();
		    ++ iter)
		{
			switch(iter->get_type() )
			{
			case token::TYPE_INDENTATION:
				target.append("\n" + iter->get_text() );
				line_begin = true;
				break;
			case token::TYPE_STRING:
				escaped_string = iter->get_text();
				escape(escaped_string);

				target.append("\"");
				target.append(escaped_string);
				target.append("\"");

				line_begin = false;
				break;
			case token::TYPE_IDENTIFIER:
				if(!line_begin)
					target.append(" ");
				// Fallthrough
			default:
				target.append(iter->get_text() );
				if(iter->get_type() != token::TYPE_EXCLAMATION)
					line_begin = false;
				break;
			}
		}
	}
}

obby::serialise::token::token(
	type type,
	const std::string& text,
	unsigned int line
) :
	m_type(type), m_text(text), m_line(line)
{
}

obby::serialise::token::type obby::serialise::token::get_type() const
{
	return m_type;
}

const std::string& obby::serialise::token::get_text() const
{
	return m_text;
}

unsigned int obby::serialise::token::get_line() const
{
	return m_line;
}

obby::serialise::token_list::token_list()
{
}

void obby::serialise::token_list::serialise(
	std::string& string
) const
{
	detokenise(*this, string);
}

void obby::serialise::token_list::deserialise(
	const std::string& string
)
{
	tokenise(*this, string);
}

void obby::serialise::token_list::add(
	token::type type,
	const std::string& text,
	unsigned int line
)
{
	m_list.push_back(token(type, text, line) );
}

obby::serialise::token_list::iterator obby::serialise::token_list::begin() const
{
	return m_list.begin();
}

obby::serialise::token_list::iterator obby::serialise::token_list::end() const
{
	return m_list.end();
}

void obby::serialise::token_list::next_token(
	iterator& iter
) const
{
	unsigned int orig_line = iter->get_line();
	if(++ iter == m_list.end() )
		throw error(_("Unexpected end of input"), orig_line);
}
syntax highlighted by Code2HTML, v. 0.9.1