#! perl # Copyright (C) 2003-2007, The Perl Foundation. # $Id: COMP_toker.pm 21232 2007-09-12 19:30:46Z paultcochrane $ my ( $cur, $next, $afternext, $curtok ); my $stmts = 0; sub fetch { $cur = $next; $next = $afternext; $afternext = getc(D); $afternext = "" if ( !defined $afternext ); $stmts++ if $afternext =~ /\n/; } sub append { $curtok .= $cur; } sub emit { unless ( $_[0] eq "STRING" or $_[0] eq "COMM" or $_[0] eq "DIRECTIVE" ) { $curtok =~ tr/A-Z/a-z/; } push( @tokens, $curtok ); push( @tokdsc, $_[0] ); $curtok = ""; } sub tokenize { fetch; fetch; fetch; MAIN: goto END if $cur eq -1; if ( $cur =~ /[\t ]/ ) { # Whitespace fetch; goto MAIN; } if ( $cur =~ /[\d\.]/ ) { # Numeric constants goto NOTNUM if ( $cur eq "." and $next !~ /\d/ ); my $numtype = "INT"; if ( $cur eq "." ) { $numtype = "FLO"; $cur = "0."; } append; fetch; NUMCONST: if ( $cur =~ /\d/ ) { append; fetch; goto NUMCONST; } if ( $cur eq '.' ) { append; fetch; $numtype = "FLO"; goto NUMCONST; } if ( $cur =~ /[eEdD]/ and $next =~ /[-+\d]/ ) { append; fetch; append; # Pick up next char too fetch; $numtype = "FLO"; goto NUMCONST; } # Falls through! if ( $cur =~ /[#!&%]/ ) { append; fetch; if ( $cur =~ /[!#]/ ) { $numtype = "FLO"; } } emit($numtype); goto MAIN; } NOTNUM: #if ($cur eq "-" and $next =~ /[\d\.]/) { # append; # fetch; # goto NUMCONST; #} if ( $cur eq '"' ) { # String constants append; fetch; while ( $cur ne '"' ) { if ( $cur eq "\n" ) { $cur = q{"}; # EOL is also an end-quote last; } die "Unexpected EOF (missing quote?)" if ( $cur eq -1 or $cur eq "\n" ); append; fetch; } append; $curtok =~ s/^"|"$//g; # Remove quote marks! emit("STRING"); fetch; goto MAIN; } if ( ( $cur eq "'" ) or ( $cur =~ /[Rr]/ and $next =~ /[Ee]/ and $afternext =~ /[Mm]/ ) ) { # Comments append; fetch; while ( $cur ne -1 and $cur ne "\n" ) { append; fetch; } emit("COMM"); fetch; goto MAIN; } if ( $cur =~ /[A-Za-z]/ ) { # Bareword append; fetch; while ( $cur =~ /[A-Za-z0-9]/ or $cur =~ /[#!&\$%]/ ) { # SIGIL append; if ( $cur =~ /[#!&\$%]/ ) { fetch; last; } fetch; } emit("BARE"); goto MAIN; } if ( $cur eq '&' ) { # Hex or Octal constant append; fetch; die "Bad constant" unless ( $cur =~ /[oOhH]/ ); if ( $cur =~ /[oO]/ ) { append; fetch; while ( $cur =~ /[0-7]/ ) { append; fetch; } } elsif ( $cur =~ /[hH]/ ) { append; fetch; while ( $cur =~ /[0-7a-fA-F]/ ) { append; fetch; } } if ( $cur eq '&' or $cur eq '%' ) { append; fetch; } emit("BASE"); goto MAIN; } if ( $cur eq ">" or $cur eq "<" or $cur eq "=" ) { append; if ( $next eq "=" or $next eq ">" ) { fetch; append; } emit("Compare"); fetch; goto MAIN; } if ( $cur eq "_" and $next =~ /\w/ ) { # Directive append; fetch; while ( $cur =~ /\w/ ) { append; fetch; } emit("BARE"); until ( $cur eq "\n" and $next eq "_" and $afternext =~ /\w/ ) { append; fetch; } append; fetch; emit("DIRECTIVE"); while ( $cur =~ /\w/ ) { append; fetch; } emit("BARE"); fetch; goto MAIN; } if ( $cur =~ /[-^+\/*()#,\\_.;]/ ) { # Punct append; emit("PUN"); fetch; goto MAIN; } if ( $cur eq "\n" ) { # ;? append; emit("STMT"); fetch; goto MAIN; } if ( $cur eq ":" ) { append; emit("COMP"); fetch; goto MAIN; } if ( $cur eq "" ) { goto END } die "unknown: $cur at source line $stmts"; goto MAIN; END: emit("STMT"); return; } 1; # Local Variables: # mode: cperl # cperl-indent-level: 4 # fill-column: 100 # End: # vim: expandtab shiftwidth=4: