#! perl

# Copyright (C) 2003-2007, The Perl Foundation.
# $Id: COMP_toker.pm 21232 2007-09-12 19:30:46Z paultcochrane $

my ( $cur, $next, $afternext, $curtok );
my $stmts = 0;

sub fetch {
    $cur       = $next;
    $next      = $afternext;
    $afternext = getc(D);
    $afternext = "" if ( !defined $afternext );
    $stmts++ if $afternext =~ /\n/;
}

sub append {
    $curtok .= $cur;
}

sub emit {
    unless ( $_[0] eq "STRING" or $_[0] eq "COMM" or $_[0] eq "DIRECTIVE" ) {
        $curtok =~ tr/A-Z/a-z/;
    }
    push( @tokens, $curtok );
    push( @tokdsc, $_[0] );
    $curtok = "";
}

sub tokenize {

    fetch;
    fetch;
    fetch;
MAIN:
    goto END if $cur eq -1;
    if ( $cur =~ /[\t ]/ ) {    # Whitespace
        fetch;
        goto MAIN;
    }

    if ( $cur =~ /[\d\.]/ ) {    # Numeric constants
        goto NOTNUM if ( $cur eq "." and $next !~ /\d/ );
        my $numtype = "INT";
        if ( $cur eq "." ) {
            $numtype = "FLO";
            $cur     = "0.";
        }
        append;
        fetch;
    NUMCONST: if ( $cur =~ /\d/ ) {
            append;
            fetch;
            goto NUMCONST;
        }
        if ( $cur eq '.' ) {
            append;
            fetch;
            $numtype = "FLO";
            goto NUMCONST;
        }
        if ( $cur =~ /[eEdD]/ and $next =~ /[-+\d]/ ) {
            append;
            fetch;
            append;    # Pick up next char too
            fetch;
            $numtype = "FLO";
            goto NUMCONST;
        }

        # Falls through!
        if ( $cur =~ /[#!&%]/ ) {
            append;
            fetch;
            if ( $cur =~ /[!#]/ ) {
                $numtype = "FLO";
            }
        }
        emit($numtype);
        goto MAIN;
    }
NOTNUM:

    #if ($cur eq "-" and $next =~ /[\d\.]/) {
    #       append;
    #       fetch;
    #       goto NUMCONST;
    #}
    if ( $cur eq '"' ) {    # String constants
        append;
        fetch;
        while ( $cur ne '"' ) {
            if ( $cur eq "\n" ) {
                $cur = q{"};    # EOL is also an end-quote
                last;
            }
            die "Unexpected EOF (missing quote?)" if ( $cur eq -1 or $cur eq "\n" );
            append;
            fetch;
        }
        append;
        $curtok =~ s/^"|"$//g;    # Remove quote marks!
        emit("STRING");
        fetch;
        goto MAIN;
    }
    if (   ( $cur eq "'" )
        or ( $cur =~ /[Rr]/ and $next =~ /[Ee]/ and $afternext =~ /[Mm]/ ) )
    {                             # Comments
        append;
        fetch;
        while ( $cur ne -1 and $cur ne "\n" ) {
            append;
            fetch;
        }
        emit("COMM");
        fetch;
        goto MAIN;
    }
    if ( $cur =~ /[A-Za-z]/ ) {    # Bareword
        append;
        fetch;
        while ( $cur =~ /[A-Za-z0-9]/ or $cur =~ /[#!&\$%]/ ) {    # SIGIL
            append;
            if ( $cur =~ /[#!&\$%]/ ) {
                fetch;
                last;
            }
            fetch;
        }
        emit("BARE");
        goto MAIN;
    }
    if ( $cur eq '&' ) {                                           # Hex or Octal constant
        append;
        fetch;
        die "Bad constant" unless ( $cur =~ /[oOhH]/ );
        if ( $cur =~ /[oO]/ ) {
            append;
            fetch;
            while ( $cur =~ /[0-7]/ ) {
                append;
                fetch;
            }
        }
        elsif ( $cur =~ /[hH]/ ) {
            append;
            fetch;
            while ( $cur =~ /[0-7a-fA-F]/ ) {
                append;
                fetch;
            }
        }
        if ( $cur eq '&' or $cur eq '%' ) {
            append;
            fetch;
        }
        emit("BASE");
        goto MAIN;
    }
    if ( $cur eq ">" or $cur eq "<" or $cur eq "=" ) {
        append;
        if ( $next eq "=" or $next eq ">" ) {
            fetch;
            append;
        }
        emit("Compare");
        fetch;
        goto MAIN;
    }
    if ( $cur eq "_" and $next =~ /\w/ ) {    # Directive
        append;
        fetch;
        while ( $cur =~ /\w/ ) {
            append;
            fetch;
        }
        emit("BARE");
        until ( $cur eq "\n" and $next eq "_" and $afternext =~ /\w/ ) {
            append;
            fetch;
        }
        append;
        fetch;
        emit("DIRECTIVE");
        while ( $cur =~ /\w/ ) {
            append;
            fetch;
        }
        emit("BARE");
        fetch;
        goto MAIN;
    }

    if ( $cur =~ /[-^+\/*()#,\\_.;]/ ) {    # Punct
        append;
        emit("PUN");
        fetch;
        goto MAIN;
    }

    if ( $cur eq "\n" ) {                   # ;?
        append;
        emit("STMT");
        fetch;
        goto MAIN;
    }
    if ( $cur eq ":" ) {
        append;
        emit("COMP");
        fetch;
        goto MAIN;
    }
    if ( $cur eq "" ) { goto END }
    die "unknown: $cur at source line $stmts";
    goto MAIN;

END: emit("STMT");
    return;

}

1;

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 4
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=4:


syntax highlighted by Code2HTML, v. 0.9.1