# the scanner/lexer

require 'strscan'
require 'puppet'


module Puppet
    class LexError < RuntimeError; end
    module Parser
        #---------------------------------------------------------------
        class Lexer
            attr_reader :line, :last, :file

            attr_accessor :indefine

                #%r{\w+} => :WORD,
            @@tokens = {
                %r{#.*} => :COMMENT,
                %r{\[} => :LBRACK,
                %r{\]} => :RBRACK,
                %r{\{} => :LBRACE,
                %r{\}} => :RBRACE,
                %r{\(} => :LPAREN,
                %r{\)} => :RPAREN,
                %r{\"} => :DQUOTE,
                %r{\n} => :RETURN,
                %r{\'} => :SQUOTE,
                %r{=} => :EQUALS,
                %r{==} => :ISEQUAL,
                %r{>=} => :GREATEREQUAL,
                %r{>} => :GREATERTHAN,
                %r{<} => :LESSTHAN,
                %r{<=} => :LESSEQUAL,
                %r{!=} => :NOTEQUAL,
                %r{!} => :NOT,
                %r{,} => :COMMA,
                %r{\.} => :DOT,
                %r{:} => :COLON,
                %r{@} => :AT,
                %r{<<\|} => :LLCOLLECT,
                %r{\|>>} => :RRCOLLECT,
                %r{<\|} => :LCOLLECT,
                %r{\|>} => :RCOLLECT,
                %r{;} => :SEMIC,
                %r{\?} => :QMARK,
                %r{\\} => :BACKSLASH,
                %r{=>} => :FARROW,
                %r{\+>} => :PARROW,
                %r{[a-z][-\w]*} => :NAME,
                %r{([a-z][-\w]*::)+[a-z][-\w]*} => :CLASSNAME,
                %r{([A-Z][-\w]*::)+[A-Z][-\w]*} => :CLASSREF,
                %r{[A-Z][-\w]*} => :TYPE,
                %r{[0-9]+} => :NUMBER,
                %r{\$(\w*::)*\w+} => :VARIABLE
            }

            @@pairs = {
                "{" => "}",
                "(" => ")",
                "[" => "]",
                "<|" => "|>",
                "<<|" => "|>>"
            }

            @@reverse_pairs = @@pairs.inject({}) { |hash, pair| hash[pair[1]] = pair[0]; hash }

            @@keywords = {
                "case" => :CASE,
                "class" => :CLASS,
                "default" => :DEFAULT,
                "define" => :DEFINE,
                "false" => :BOOLEAN,
                "import" => :IMPORT,
                "if" => :IF,
                "elsif" => :ELSIF,
                "else" => :ELSE,
                "inherits" => :INHERITS,
                "node" => :NODE,
                "true" => :BOOLEAN,
                "and"  => :AND,
                "or"   => :OR,
                "undef"   => :UNDEF
            }

            def clear
                initvars
            end

            def expected
                if @expected.empty?
                    nil
                else
                    token = @expected[-1]
                    @@tokens.each do |value, name|
                        if token == name
                            return value
                        end
                    end
                    return token
                end
            end

            # scan the whole file
            # basically just used for testing
            def fullscan
                array = []

                self.scan { |token,str|
                    # Ignore any definition nesting problems
                    @indefine = false
                    #Puppet.debug("got token '%s' => '%s'" % [token,str])
                    if token.nil?
                        return array
                    else
                        array.push([token,str])
                    end
                }
                return array
            end

            # this is probably pretty damned inefficient...
            # it'd be nice not to have to load the whole file first...
            def file=(file)
                @file = file
                @line = 1
                File.open(file) { |of|
                    str = ""
                    of.each { |line| str += line }
                    @scanner = StringScanner.new(str)
                }
            end

            def indefine?
                if defined? @indefine
                    @indefine
                else
                    false
                end
            end

            def initialize
                initvars()
            end

            def initvars
                @line = 1
                @last = ""
                @lasttoken = nil
                @scanner = nil
                @file = nil
                # AAARRGGGG! okay, regexes in ruby are bloody annoying
                # no one else has "\n" =~ /\s/
                @skip = %r{[ \t]+}

                @namestack = []
                @indefine = false

                @expected = []
            end

            # Go up one in the namespace.
            def namepop
                @namestack.pop
            end

            # Collect the current namespace.
            def namespace
                @namestack.join("::")
            end

            # This value might have :: in it, but we don't care -- it'll be
            # handled normally when joining, and when popping we want to pop
            # this full value, however long the namespace is.
            def namestack(value)
                @namestack << value
            end

            def rest
                @scanner.rest
            end

            # this is the heart of the lexer
            def scan
                #Puppet.debug("entering scan")
                if @scanner.nil?
                    raise TypeError.new("Invalid or empty string")
                end

                @scanner.skip(@skip)
                until @scanner.eos? do
                    yielded = false
                    sendbreak = false # gah, this is a nasty hack
                    stoken = nil
                    sregex = nil
                    value = ""

                    # first find out which type of token we've got
                    @@tokens.each { |regex,token|
                        # we're just checking, which doesn't advance the scan
                        # pointer
                        tmp = @scanner.check(regex)
                        if tmp.nil?
                            #puppet.debug("did not match %s to '%s'" %
                            #    [regex,@scanner.rest])
                            next
                        end

                        # find the longest match
                        if tmp.length > value.length
                            value = tmp 
                            stoken = token
                            sregex = regex
                        else
                            # we've already got a longer match
                            next
                        end
                    }

                    # error out if we didn't match anything at all
                    if stoken.nil?
                        nword = nil
                        if @scanner.rest =~ /^(\S+)/
                            nword = $1
                        elsif@scanner.rest =~ /^(\s+)/
                            nword = $1
                        else
                            nword = @scanner.rest
                        end
                        raise "Could not match '%s'" % nword
                    end

                    value = @scanner.scan(sregex)

                    if value == ""
                        raise "Didn't match regex on token %s" % stoken
                    end

                    # token-specific operations
                    # if this gets much more complicated, it should
                    # be moved up to where the tokens themselves are defined
                    # which will get me about 75% of the way to a lexer generator
                    ptoken = stoken
                    case stoken
                    when :NAME then
                        wtoken = stoken
                        # we're looking for keywords here
                        if @@keywords.include?(value)
                            wtoken = @@keywords[value]
                            #Puppet.debug("token '%s'" % wtoken)
                            if wtoken == :BOOLEAN
                                value = eval(value)
                            end
                        end
                        ptoken = wtoken
                    when :NUMBER then
                        ptoken = :NAME
                    when :COMMENT then
                        # just throw comments away
                        next
                    when :RETURN then
                        @line += 1
                        @scanner.skip(@skip)
                        next
                    when :SQUOTE then
                        #Puppet.debug("searching '%s' after '%s'" % [self.rest,value])
                        value = self.slurpstring(value)
                        ptoken = :SQTEXT
                        #Puppet.debug("got string '%s' => '%s'" % [:DQTEXT,value])
                    when :DQUOTE then
                        value = self.slurpstring(value)
                        ptoken = :DQTEXT
                    when :VARIABLE then
                        value = value.sub(/^\$/, '')
                    end

                    if match = @@pairs[value] and ptoken != :DQUOTE and ptoken != :SQUOTE
                        @expected << match
                    elsif exp = @expected[-1] and exp == value and ptoken != :DQUOTE and ptoken != :SQUOTE
                        @expected.pop
                    end

                    yield [ptoken, value]

                    if @lasttoken == :CLASS
                        namestack(value)
                    end

                    if @lasttoken == :DEFINE
                        if indefine?
                            msg = "Cannot nest definition %s inside %s" % [value, @indefine]
                            self.indefine = false
                            raise Puppet::ParseError, msg
                        end

                        @indefine = value
                    end

                    @last = value
                    @lasttoken = ptoken

                    @scanner.skip(@skip)
                end
                @scanner = nil
                yield [false,false]
            end

            # we've encountered an opening quote...
            # slurp in the rest of the string and return it
            def slurpstring(quote)
                # we search for the next quote that isn't preceded by a
                # backslash; the caret is there to match empty strings
                str = @scanner.scan_until(/([^\\]|^)#{quote}/)
                if str.nil?
                    raise Puppet::LexError.new("Unclosed quote after '%s' in '%s'" %
                        [self.last,self.rest])
                else
                    str.sub!(/#{quote}\Z/,"")
                    str.gsub!(/\\#{quote}/,quote)
                end

                return str
            end

            # just parse a string, not a whole file
            def string=(string)
                @scanner = StringScanner.new(string)
            end
        end
        #---------------------------------------------------------------
    end
end

# $Id: lexer.rb 2670 2007-07-10 15:31:19Z luke $


syntax highlighted by Code2HTML, v. 0.9.1