// NAnt - A .NET build tool // Copyright (C) 2001-2003 Gerry Shaw // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // Jaroslaw Kowalski (jkowalski@users.sourceforge.net) using System; using System.IO; using System.Text; using System.Globalization; using NAnt.Core.Util; namespace NAnt.Core { /// /// Splits an input string into a sequence of tokens used during parsing. /// public class ExpressionTokenizer { public struct Position { private int _charIndex; public Position(int charIndex) { _charIndex = charIndex; } public int CharIndex { get { return _charIndex; } } } /// /// Available tokens /// public enum TokenType { BOF, EOF, Number, String, Keyword, EQ, NE, LT, GT, LE, GE, Plus, Minus, Mul, Div, Mod, LeftParen, RightParen, LeftCurlyBrace, RightCurlyBrace, Not, Punctuation, Whitespace, Dollar, Comma, Dot, DoubleColon, } #region Public Instance Constructors public ExpressionTokenizer() { } #endregion Public Instance Constructors #region Static Constructor static ExpressionTokenizer() { for (int i = 0; i < 128; ++i) { charIndexToTokenType[i] = TokenType.Punctuation; }; foreach (CharToTokenType cht in charToTokenType) { charIndexToTokenType[(int)cht.ch] = cht.tokenType; } } #endregion Static Constructor #region Public Instance Properties public bool IgnoreWhitespace { get { return _ignoreWhiteSpace; } set { _ignoreWhiteSpace = value; } } public bool SingleCharacterMode { get { return _singleCharacterMode; } set { _singleCharacterMode = value; } } public TokenType CurrentToken { get { return _tokenType; } } public string TokenText { get { return _tokenText; } } public Position CurrentPosition { get { return _tokenStartPosition; } } #endregion Public Instance Properties #region Public Instance Methods public void InitTokenizer(string s) { _text = s; _position = 0; _tokenType = TokenType.BOF; GetNextToken(); } public void GetNextToken() { if (_tokenType == TokenType.EOF) throw new ExpressionParseException(ResourceUtils.GetString("String_CannotReadPastStream"), -1, -1); if (IgnoreWhitespace) { SkipWhitespace(); }; _tokenStartPosition = new Position(_position); int i = PeekChar(); if (i == -1) { _tokenType = TokenType.EOF; return ; } char ch = (char)i; if (!SingleCharacterMode) { if (!IgnoreWhitespace && Char.IsWhiteSpace(ch)) { StringBuilder sb = new StringBuilder(); int ch2; while ((ch2 = PeekChar()) != -1) { if (!Char.IsWhiteSpace((char)ch2)) { break; } sb.Append((char)ch2); ReadChar(); }; _tokenType = TokenType.Whitespace; _tokenText = sb.ToString(); return ; } if (Char.IsDigit(ch)) { _tokenType = TokenType.Number; string s = ""; s += ch; ReadChar(); while ((i = PeekChar()) != -1) { ch = (char)i; if (Char.IsDigit(ch)) { s += (char)ReadChar(); } else { break; }; }; _tokenText = s; return ; } if (ch == '\'') { _tokenType = TokenType.String; string s = ""; ReadChar(); while ((i = ReadChar()) != -1) { ch = (char)i; if (ch == '\'') { if (PeekChar() == (int)'\'') { ReadChar(); } else break; } s += ch; }; _tokenText = s; return ; } if (ch == '_' || Char.IsLetter(ch)) { _tokenType = TokenType.Keyword; StringBuilder sb = new StringBuilder(); sb.Append((char)ch); ReadChar(); while ((i = PeekChar()) != -1) { if ((char)i == '_' || (char)i == '-' || Char.IsLetterOrDigit((char)i)) { sb.Append((char)ReadChar()); } else { break; }; }; _tokenText = sb.ToString(); if (_tokenText.EndsWith("-")) throw new ExpressionParseException(String.Format(CultureInfo.InvariantCulture, ResourceUtils.GetString("NA1182"), _tokenText), CurrentPosition.CharIndex); return ; } ReadChar(); if (ch == ':' && PeekChar() == (int)':') { _tokenType = TokenType.DoubleColon; _tokenText = "::"; ReadChar(); return ; } if (ch == '!' && PeekChar() == (int)'=') { _tokenType = TokenType.NE; _tokenText = "!="; ReadChar(); return ; } if (ch == '=' && PeekChar() == (int)'=') { _tokenType = TokenType.EQ; _tokenText = "=="; ReadChar(); return ; } if (ch == '<' && PeekChar() == (int)'=') { _tokenType = TokenType.LE; _tokenText = "<="; ReadChar(); return ; } if (ch == '>' && PeekChar() == (int)'=') { _tokenType = TokenType.GE; _tokenText = ">="; ReadChar(); return ; } } else { ReadChar(); } _tokenText = new String(ch, 1); _tokenType = TokenType.Punctuation; if (ch >= 32 && ch < 128) { _tokenType = charIndexToTokenType[ch]; } } public bool IsKeyword(string k) { return (_tokenType == TokenType.Keyword) && (_tokenText == k); } #endregion Public Instance Methods #region Private Instance Methods private int ReadChar() { if (_position < _text.Length) { return _text[_position++]; } else { return -1; } } private int PeekChar() { if (_position < _text.Length) { return _text[_position]; } else { return -1; } } private void SkipWhitespace() { int ch; while ((ch = PeekChar()) != -1) { if (!Char.IsWhiteSpace((char)ch)) break; ReadChar(); }; } #endregion Private Instance Methods #region Private Instance Fields private string _text = null; private int _position; private Position _tokenStartPosition; private TokenType _tokenType; private string _tokenText; private bool _ignoreWhiteSpace = true; private bool _singleCharacterMode = false; #endregion Private Instance Fields #region Private Static Fields private static CharToTokenType[] charToTokenType = { new CharToTokenType('+', TokenType.Plus), new CharToTokenType('-', TokenType.Minus), new CharToTokenType('*', TokenType.Mul), new CharToTokenType('/', TokenType.Div), new CharToTokenType('%', TokenType.Mod), new CharToTokenType('<', TokenType.LT), new CharToTokenType('>', TokenType.GT), new CharToTokenType('(', TokenType.LeftParen), new CharToTokenType(')', TokenType.RightParen), new CharToTokenType('{', TokenType.LeftCurlyBrace), new CharToTokenType('}', TokenType.RightCurlyBrace), new CharToTokenType('!', TokenType.Not), new CharToTokenType('$', TokenType.Dollar), new CharToTokenType(',', TokenType.Comma), new CharToTokenType('.', TokenType.Dot), }; private static TokenType[] charIndexToTokenType = new TokenType[128]; #endregion Private Static Fields private struct CharToTokenType { public readonly char ch; public readonly TokenType tokenType; public CharToTokenType(char ch, TokenType tokenType) { this.ch = ch; this.tokenType = tokenType; } } } }