package Regex::Rewrite;
use Regex::Ops::Tree;
use Regex::Ops::List;
# Copyright (C) 2002-2006, The Perl Foundation.
# $Id: Rewrite.pm 21249 2007-09-13 06:33:24Z paultcochrane $
require 'Regex.pm';
use strict;
use warnings;
*aop = *Regex::Rewrite::aop;
*rop = *Regex::Rewrite::rop;
# The basic method signature used here:
#
# rewrite_X : (optree, lastback) -> (back, ops...)
#
# where $lastback is the previous backtracking point that we should
# jump to if matching $op fails; $back is the backtracking point that
# can be returned to within the processing of the current $op to fix
# up the partial state and possibly attempt another match, or just
# abort and return to $lastback.
#
sub new {
my ( $proto, %options ) = @_;
my $self = bless \%options, ( ref($proto) || $proto );
$self->init();
return $self;
}
sub init {
my ( $self, @args ) = @_;
$self->{_markers} = {};
$self->{_temp_int_count} = 3;
$self->{state} ||= Regex->global_state();
my $FAIL = $self->genlabel("FAIL");
$self->{_labels}{'fail'} = $FAIL;
}
sub aop {
Regex::Ops::List->op(@_);
}
sub genlabel {
my ( $self, $desc ) = @_;
return $self->{state}->genlabel($desc);
}
sub alloc_temp_int {
my ( $self, $name ) = @_;
$name ||= "_temp_int";
if ( exists $self->{_temps}{$name} ) {
$name .= ++$self->{_temp_int_count};
}
$self->{_temps}{$name} = 'int';
return "<$name>";
}
# This implementation should be overridden by the host language,
# because it's not a very good one.
sub new_local {
my ( $self, $name, $type ) = @_;
$type ||= 'int';
die "cannot handle type '$type'" if $type ne 'int';
# Bad implementation -- does not handle recursion. Actual
# instances should probably be subclasses that use something like
# IMCC's facilities for creating local vars.
return $self->alloc_temp_int($name);
}
sub new_rxlocal {
my ( $self, $op, $name, $type ) = @_;
my $var = $self->new_local( $name, $type );
push @{ $op->{rxlocals} }, $var;
return $var;
}
sub op_save_rxlocals {
my ( $self, $op ) = @_;
if ( $op->{non_reentrant} ) {
# Mark this variable as needing to be preserved across rule calls
push @{ $self->{rxlocals} }, @{ $op->{rxlocals} };
return ();
}
else {
my $rxlocals = ( $op->{rxlocals} ||= [] );
return ( aop( comment => ["save rxlocals for op"] ),
map { aop( 'pushint' => [ $_, "op rxlocal $_" ] ) } @$rxlocals );
}
}
sub op_restore_rxlocals {
my ( $self, $op ) = @_;
if ( $op->{non_reentrant} ) {
return ();
}
else {
my $rxlocals = ( $op->{rxlocals} ||= [] );
return ( aop( comment => ["restore rxlocals for op"] ),
map { aop( 'popint' => [ $_, "op rxlocal $_" ] ) }
reverse @$rxlocals );
}
}
sub rule_save_rxlocals {
my ($self) = @_;
my $rxlocals = ( $self->{rxlocals} ||= [] );
return ( aop( comment => ["save rxlocals"] ),
map { aop( 'pushint' => [ $_, "rxlocal $_" ] ) } @$rxlocals );
}
sub rule_restore_rxlocals {
my ($self) = @_;
my $rxlocals = ( $self->{rxlocals} ||= [] );
return ( aop( comment => ["restore rxlocals"] ),
map { aop( 'popint' => [ $_, "rxlocal $_" ] ) }
reverse @$rxlocals );
}
sub get_temp {
my ( $self, $name ) = @_;
return $self->{_temps}->{$name} || die "Requested unallocated temporary!";
}
######################## Default rewrite rules #######################
sub rewrite_terminate {
my ( $self, $op, $R ) = @_;
return $R, aop( 'goto', [ $self->{_return} ] );
}
sub rewrite_goto {
my ( $self, $op, $R, $lastback ) = @_;
return aop( 'goto', [$R] ) if $R->{name} eq 'LABEL';
return $self->rewrite( $R, $lastback );
}
sub rewrite_test {
my ( $self, $op, $op1, $test, $op2, $dest, $lastback ) = @_;
my $continue = $self->genlabel('after_test');
my $rev_test = {
"==" => "!=",
"!=" => "==",
"<" => ">=",
">" => "<=",
"<=" => ">",
">=" => "<",
}->{$test};
if ( $dest->{name} eq 'goto' ) {
return aop( 'if', [ $op1, $test, $op2, $dest->[1] ] );
}
elsif ( $dest->{name} eq 'LABEL' ) {
return aop( 'if', [ $op1, $test, $op2, $dest ] );
}
else {
my $testop = rop( 'test', [ $op1, $rev_test, $op2, rop( 'goto', [$continue] ) ] );
return ( $self->rewrite( $testop, $lastback ), $self->rewrite( $dest, $lastback ),
$continue );
}
}
# TODO: can_match_empty (so s/a*/x/g doesn't go into infinite loop)
# This is sometimes a runtime property.
sub rewrite_multi_match {
my ( $self, $op, $min, $max, $greedy, $R, @rest ) = @_;
if ( ( $min == 0 ) && defined($max) && ( $max == 1 ) ) {
return $self->rewrite_optional( $op, $R, $greedy, @rest );
}
elsif ( ( $min == 0 ) && ( !defined($max) || ( $max == -1 ) ) ) {
return $self->rewrite_star( $op, $R, $greedy, @rest );
}
elsif ( ( $min == 1 ) && ( !defined($max) || $max == -1 ) && $self->can('rewrite_plus') ) {
if ($greedy) {
return $self->rewrite_plus( $op, $R, @rest );
}
else {
return $self->rewrite_nongreedy_plus( $op, $R, @rest );
}
}
else {
if ($greedy) {
return $self->rewrite_greedy_range( $op, $R, $min, $max, @rest );
}
else {
return $self->rewrite_nongreedy_range( $op, $R, $min, $max, @rest );
}
}
}
####### Old Stackless start #######
# Does nothing if DEBUG is not set (if you're using this through
# perl6, then you can turn this on with the $RX_DEBUG environment
# variable)
#
# Otherwise, emits a print of the given string, but with %-escapes.
sub dbprint {
my ( $self, $what ) = @_;
return () unless $self->{DEBUG};
my @ops;
$what = "%<rx_pos>: $what";
foreach my $part ( $what =~ /((?:\%\<\w+\>)|[^\%]+)/g ) {
if ( $part =~ /^%/ ) {
push @ops, aop( 'print', [ substr( $part, 1 ) ] );
}
else {
$part =~ s/(["'\\])/\\$1/g;
$part =~ s/\n/\\n/g;
push @ops, aop( 'print', ["\"$part\""] );
}
}
return @ops;
}
sub need_group_setup {
my ( $self, $group ) = @_;
$self->{_setup_starts}{$group} = 1;
$self->{_setup_ends}{$group} = 1;
}
sub rewrite_try {
my ( $self, $op, $R, $lastback ) = @_;
return $self->rewrite( $R, $lastback );
}
# Most general case:
#
# ( R ) ->
# push start[n]
# push end[n]
# start[n] <- pos
# R or rfail
# end[n] <- pos
# goto next
#
# rfail: pop -> end[n]
# pop -> start[n]
# goto lastback
#
# back: end[n] <- -2
# goto R.back
#
# next:
#
# The 'back' part is used only so that $n within R does not return a
# now-abandoned match. I suppose it should restore $n to its previous
# value, but I'm not going to bother with that for now. (It'll rarely
# have a valid previous value anyway; I think that'll only come in for
# situations like (R)*, which are silly anyway. Then again, I'm
# probably wrong.)
#
sub rewrite_group {
my ( $self, $op, $R, $group, $lastback ) = @_;
my $rfail = $self->genlabel("group_iback");
my $back = $self->genlabel("group_back");
my $next = $self->genlabel("group_next");
$self->need_group_setup($group);
my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
my @ops = (
aop( 'getstart', [ '<tmp>', $group ] ),
$self->dbprint("pushing start[$group]: "),
aop( 'pushint', ['<tmp>'] ),
aop( 'getend', [ '<tmp>', $group ] ),
$self->dbprint("pushing end[$group]: "),
aop( 'pushint', ['<tmp>'] ),
aop( 'setstart', [ $group, '<pos>' ] ),
@R_ops,
$self->dbprint("setting end[$group] := %<rx_pos>-1\n"),
aop( 'setend', [ $group, '<pos>', -1 ] ),
aop( 'goto', [$next] ),
$rfail => $self->dbprint("R in group failed\n"),
aop( 'popint', [ '<tmp>', 'group end' ] ),
aop( 'setend', [ $group, '<tmp>' ] ),
aop( 'popint', [ '<tmp>', 'group start' ] ),
aop( 'setstart', [ $group, '<tmp>' ] ),
aop( 'goto', [$lastback] ),
$back => aop( 'setend', [ $group, -2 ] ),
aop( 'goto', [$R_back] ),
$next =>
);
return ( $back, @ops );
}
# Cost: 4 + 2ff (insanely high!) if we need to check the length
# 3 + 2ff otherwise
# 1 of that is a goto that could be eliminated pretty easily.
sub rewrite_match {
my ( $self, $op, $char, $lastback ) = @_;
my $back = $self->genlabel('undo_match');
my $next = $self->genlabel('after_match');
my @ops;
push @ops, aop( 'check', [ 1, $lastback ] )
unless ( $op->{nocheck} );
my @debugging;
if ( $self->{DEBUG} ) {
my $old_lastback = $lastback;
$lastback = $self->genlabel('debug_matchback');
@debugging = (
$lastback => $self->dbprint("failed to match $char at %<rx_pos>\n"),
aop( 'goto', [$old_lastback] )
);
}
push @ops,
(
aop( 'match', [ $char, $lastback ] ),
aop( 'increment', [ 1, $lastback ] ),
aop( 'goto', [$next] ),
$back => $self->dbprint("Unmatching $char\n"),
aop( 'increment', [ -1, $lastback ] ),
aop( 'goto', [$lastback] ),
@debugging,
$next =>
);
return ( $back, @ops );
}
sub rewrite_check {
my ( $self, $op, $amount, $R, $lastback ) = @_;
my ( $R_back, @R_body ) = $self->rewrite( $R, $lastback );
my @ops = ( aop( 'check', [ $amount, $lastback ] ), @R_body );
return ( $R_back, @ops );
}
sub _translate_classpieces {
my ($pieces) = @_;
# Empty list
return [] if @$pieces == 0;
# Negated list
if ( $pieces->[0] eq 'neg' ) {
return _negate_incexc( _translate_classpieces( $pieces->[1] ) );
}
# Convert "a-b" to [97,98] and "a" to [97,97]
my @ranges;
foreach (@$pieces) {
my ( $first, $last );
if ( ref $_ ) {
( $first, $last ) = @$_;
}
else {
( $first, $last ) = ( $_, $_ );
}
push @ranges, [ ord($first), ord($last) ];
}
return _ranges_to_incexc( \@ranges );
}
sub _ranges_to_incexc {
my @ranges = @{ shift() };
# Sort those pairs by their first element
@ranges = sort { $a->[0] <=> $b->[0] } @ranges;
# Build up an inclusion/exclusion list
my @incexc;
foreach (@ranges) {
my ( $first, $last ) = @$_;
if ( @incexc && $first <= $incexc[-1] ) {
# Merge (1,10)=1..9 with [4,x]=4..x and [10,x]=10..x
if ( $incexc[-1] <= $last + 1 ) {
$incexc[-1] = $last + 1;
}
}
else {
# Append
push @incexc, $first, ( $last + 1 );
}
}
return \@incexc;
}
sub _negate_incexc ($) {
my ($incexc) = @_;
return [0] if ( @$incexc == 0 );
return [ 0, @$incexc ] if $incexc->[0] > 0;
return [] if @$incexc == 1;
shift @$incexc;
$incexc->[0]++;
return $incexc;
}
sub rewrite_classpieces {
my ( $self, $op, $classpieces, $lastback ) = @_;
my $incexc = _translate_classpieces($classpieces);
return $self->rewrite_charclass( $op, $incexc, $lastback );
}
sub rewrite_charclass {
my ( $self, $op, $incexc, $lastback ) = @_;
my @ops;
push @ops, aop( 'check', [ 1, $lastback ] )
unless ( $op->{nocheck} );
my $back = $self->genlabel('undo_charclass');
my $next = $self->genlabel('after_charclass');
push @ops,
(
$self->dbprint("trying classmatch\n"),
aop( 'classmatch', [ $incexc, $lastback ] ),
aop( 'increment', [ 1, $lastback ] ),
aop( 'goto', [$next] ),
$back => aop( 'increment', [ -1, $lastback ] ),
aop( 'goto', [$lastback] ),
$next =>
);
return ( $back, @ops );
}
sub rewrite_advance {
my ( $self, $op, $howfar, $lastback ) = @_;
my $back = $self->genlabel('undo_charclass');
my $next = $self->genlabel('after_charclass');
my @ops = (
aop( 'advance', [ $howfar, $lastback ] ),
aop( 'goto', [$next] ),
$back => aop( 'increment', [ -$howfar ] ),
aop( 'goto', [$lastback] ),
$next =>
);
return ( $back, @ops );
}
sub rewrite_other {
my ( $self, $op, $lastback ) = @_;
return aop( $op->{name}, [ @{ $op->{args} }, $lastback ] );
}
# scan(R) -> scan: R or advance
# goto next
# advance: pos++ or lastback
# goto scan
# next:
#
# back is R.back
#
sub rewrite_scan {
my ( $self, $op, $R, $lastback ) = @_;
my $scan = $self->genlabel('scan_start');
my $advance = $self->genlabel('scan_advance');
my $next = $self->genlabel('after_scan');
my ( $R_back, @R_body ) = $self->rewrite( $R, $advance );
my @ops = (
$scan => @R_body,
aop( 'goto', [$next] ),
$advance => $self->dbprint("scan advancing\n"),
aop( 'advance', [ 1, $lastback ] ),
aop( 'goto', [$scan] ),
$next =>
);
return ( $R_back, @ops );
}
sub rewrite_simple_or_simple {
my ( $self, $op, $R, $S, $lastback ) = @_;
my $nextalt = $self->genlabel('nextalt');
my $back = $self->genlabel('alt_back');
my $try_S = $self->genlabel('alt_S');
my $next = $self->genlabel('next');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $nextalt );
my ( $S_back, @S_ops ) = $self->rewrite( $S, $back );
return $S_back,
(
aop('pushmark'),
aop('pushindex'),
@R_ops,
aop( 'goto', [$next] ),
$nextalt => aop( 'popindex', [ $self->{_labels}{'fail'} ] ),
$try_S => @S_ops,
aop( 'goto', [$next] ),
$back => aop( 'popindex', [$lastback] ),
aop( 'goto', [$try_S] ),
$next =>
);
}
# R|S|T -> R or tryS
# push 0
# goto next
# tryS: S or tryT
# push 1
# goto next
# tryT: T or lastback
# push 2
# goto next
# back: popint -> I0
# eq I0, 0, R.back
# eq I0, 1, S.back
# goto T.back
# next:
#
sub rewrite_alternate {
my ( $self, $op, @args ) = @_;
my $lastback = pop(@args);
my $back = $self->genlabel('alt_back');
my $next = $self->genlabel('alt_next');
my @tries = map { $self->genlabel('alt_try') } 2 .. @args;
my @fails = ( @tries, $lastback );
my ( @ibacks, @iops );
foreach (@args) {
my ( $iback, @ops ) = $self->rewrite( $_, shift(@fails) );
push @ibacks, $iback;
push @iops, \@ops;
}
my @ops;
for my $i ( 0 .. $#args ) {
push @ops, $tries[ $i - 1 ] unless $i == 0; # Label for the try
push @ops, $self->dbprint("Trying alternative $i of 0..$#args\n");
push @ops, @{ $iops[$i] };
push @ops, aop( 'pushint', [$i] );
push @ops, aop( 'goto', [$next] );
}
push @ops, $back => aop( 'popint', [ '<tmp>', 'branch marker' ] );
for my $i ( 0 .. $#args - 1 ) {
push @ops, aop( 'eq', [ '<tmp>', $i, $ibacks[$i] ] );
}
push @ops, aop( 'goto', [ $ibacks[-1] ] );
push @ops, $next;
return ( $back, @ops );
}
# Dynamic alternation: a set of alternatives that should be tried in
# turn, but the exact alternatives are unknown (eg because they're
# coming from an array.)
#
# @R -> .local $counter
# $counter = 0
# try: R[$counter] or goto fail
# push $counter
# goto next
# fail: $counter++
# if $counter >= @R goto lastback
# goto try
# back: pop $counter
# goto R[].back
# next:
#
# The code below does not assume the alternatives are coming from an
# array; instead, a callback is given that should rewrite the
# alternative that corresponds to the (dynamic) $counter passed in.
#
sub rewrite_dynamic_alternate {
my ( $self, $op, $sizer, $chooser, $lastback ) = @_;
my $try = $self->genlabel('dalt_try');
my $fail = $self->genlabel('dalt_fail');
my $back = $self->genlabel('dalt_back');
my $next = $self->genlabel('dalt_next');
my $counter = $self->new_local("counter");
my ( $N, @N_ops ) = $sizer->( $self, $op );
my ( $R_back, @R_ops ) = $chooser->( $self, $op, $counter, $fail );
my @ops = (
aop( 'assign', [ $counter, 0 ] ),
@N_ops,
$try => $self->dbprint("matching dynalt[%<$counter>]\n"),
@R_ops,
aop( 'pushint', [ $counter, "dynamic alt counter" ] ),
aop( 'goto', [$next] ),
$fail => $self->dbprint("failed dynalt, advancing from dynalt[%<$counter>]/%<$N>\n"),
aop( 'add', [ $counter, 1 ] ),
aop( 'ge', [ $counter, $N, $lastback ] ),
aop( 'goto', [$try] ),
$back => $self->op_restore_rxlocals($op),
aop( 'popint', [ $counter, 'dynamic alt counter' ] ),
$self->dbprint("backtracking into dynalt's index %<$counter> match\n"),
aop( 'goto', [$R_back] ),
$next => $self->op_save_rxlocals($op),
);
return ( $back, @ops );
}
# R<min,max> -> $matchcount = 0
# loop: if $matchcount >= max goto next
# R or check
# $matchcount++
# goto loop
# back: if $matchcount == 0 goto lastback
# $matchcount--
# goto R.back
# check: if $matchcount < min goto back
# next:
#
# to make $matchcount local, we will choose a different name for every
# instance of this rewrite rule. Recursive calls to a given instance
# will be handled in the same way any other local variable would
# (somehow, they'll get saved on the stack.)
sub rewrite_greedy_range {
my ( $self, $op, $R, $min, $max, $lastback ) = @_;
my ( $loop, $back, $local_back, $check, $next ) =
map { $self->genlabel("gr_$_") } qw(loop back local_back check next);
my $matchcount = $self->new_rxlocal( $op, "matchcount" );
my ( $R_back, @R_ops ) = $self->rewrite( $R, $check );
my @ops = (
aop( 'set', [ $matchcount, 0 ] ),
$loop => aop( 'ge', [ $matchcount, $max, $next ] ),
@R_ops,
aop( 'add', [ $matchcount, 1 ] ),
aop( 'goto', [$loop] ),
$back => $self->op_restore_rxlocals($op),
$local_back => aop( 'unless', [ $matchcount, $lastback ] ),
aop( 'add', [ $matchcount, -1 ] ),
aop( 'goto', [$R_back] ),
$check => aop( 'lt', [ $matchcount, $min, $local_back ] ),
$next => $self->op_save_rxlocals($op),
);
return ( $back, @ops );
}
# R<0,max>? -> $matchcount = 0
# goto next
# rfail: if $matchcount == 0 goto lastback
# $matchcount--
# goto R.back
# back: if $matchcount >= max goto rfail
# R or rfail
# $matchcount++
# next:
#
# R<min,max>? -> $matchcount = 0
# goto check
# rfail: if $matchcount == 0 goto lastback
# $matchcount--
# goto R.back
# back: if $matchcount >= max goto rfail
# R or rfail
# $matchcount++
# check: if $matchcount < min goto back
# next:
# [ab]<1,2>?: a aa ab b ba bb
sub rewrite_nongreedy_range {
my ( $self, $op, $R, $min, $max, $lastback ) = @_;
my ( $rfail, $local_back, $back, $check, $next ) =
map { $self->genlabel("ngr_$_") } qw(rfail local_back back check next);
my $matchcount = $self->new_rxlocal( $op, "matchcount" );
my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
my @ops = (
aop( 'set', [ $matchcount, 0 ] ),
aop( 'goto', [$check] ),
$rfail => aop( 'unless', [ $matchcount, $lastback ] ),
aop( 'add', [ $matchcount, -1 ] ),
aop( 'goto', [$R_back] ),
$back => $self->op_restore_rxlocals($op),
$local_back => aop( 'ge', [ $matchcount, $max, $rfail ] ),
@R_ops,
aop( 'add', [ $matchcount, 1 ] ),
$check => aop( 'lt', [ $matchcount, $min, $local_back ] ),
$next => $self->op_save_rxlocals($op),
);
return ( $back, @ops );
}
# ( [ [aaaaa|aaaaaaa]<2,3> ]<1,2> )
# 30,42,20,28,15,21,10,14
# R* -> push 0
# loop: R or next
# push 1
# goto loop
# back: popint -> haveTries_flag
# if (haveTries_flag) goto R.back
# goto lastback
# next:
#
# R*? -> pushindex
# goto next
# rfail: popint -> tmp or R.back
# goto lastback
# back: R or rfail
# pushmark
# next:
#
# conversion to using a temporary (not implemented this way; only for
# comparison with <n,m>? above.)
#
# R*? -> $matchcount = 0
# goto next
# rfail: if $matchcount == 0 goto lastback
# $matchcount--
# goto R.back
# back: R or rfail
# $matchcount++
# next:
#
sub rewrite_star {
my ( $self, $op, $R, $greedy, $lastback ) = @_;
my $next = $self->genlabel('star_next');
my $back = $self->genlabel('star_back');
if ($greedy) {
my ( $R_back, @R_ops ) = $self->rewrite( $R, $next );
my $loop = $self->genlabel('loop');
my @ops = (
aop( 'pushint', [0] ),
$loop => @R_ops,
aop( 'pushint', [1] ),
aop( 'goto', [$loop] ),
$back => $self->dbprint("backtracking into *\n"),
aop( 'popint', [ '<tmp>', $lastback ] ),
aop( 'if', [ '<tmp>', $R_back ] ),
aop( 'goto', [$lastback] ),
$next =>
);
return ( $back, @ops );
}
else {
my $rfail = $self->genlabel('nongreedy_star_fail');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
my @ops = (
aop( 'pushint', [0] ),
aop( 'goto', [$next] ),
$rfail => aop( 'popindex', [ '<tmp>', $R_back ] ),
aop( 'goto', [$lastback] ),
$back => @R_ops,
aop('pushmark'),
$next =>
);
return ( $back, @ops );
}
}
# R+ -> pushmark
# loop: R or rback
# push 0
# goto loop
# rback: popindex or lastback
# next:
#
# (back is R.back)
#
#
sub rewrite_plus {
my ( $self, $op, $R, $lastback ) = @_;
my $loop = $self->genlabel('plus_loop');
my $rfail = $self->genlabel('plus_rfail');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
my @ops = (
aop( 'pushmark', ["+"] ),
$loop => @R_ops,
aop( 'pushint', [ 0, 'plus matched marker' ] ),
aop( 'goto', [$loop] ),
$rfail => aop( 'popindex', [ '<tmp>', $lastback ] ),
);
return ( $R_back, @ops );
}
#
# R+? -> pushmark
# back: R or rfail
# push 0
# goto next
# rfail: popindex -> junk or lastback
# goto R.back
#
sub rewrite_nongreedy_plus {
my ( $self, $op, $R, $lastback ) = @_;
my $back = $self->genlabel('plus_backloop');
my $rfail = $self->genlabel('plus_rfail');
my $next = $self->genlabel('plus_next');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
my @ops = (
aop( 'pushmark', ["+"] ),
$back => @R_ops,
aop( 'pushint', [0] ),
aop( 'goto', [$next] ),
$rfail => aop( 'popindex', [ '<tmp>', $lastback ] ),
aop( 'goto', [$R_back] ),
$next =>
);
return ( $back, @ops );
}
sub rewrite_optional {
my ( $self, $op, $R, $greedy, $lastback ) = @_;
return $greedy
? $self->rewrite_greedy_optional( $op, $R, $lastback )
: $self->rewrite_nongreedy_optional( $op, $R, $lastback );
}
# R? -> R or rfail
# push 1
# goto next
# back: popint -> tmp
# if tmp, R.back
# goto lastback
# rfail: push 0
# next:
#
sub rewrite_greedy_optional {
my ( $self, $op, $R, $lastback ) = @_;
my $back = $self->genlabel('greedy_optional_back');
my $next = $self->genlabel('after_greedy_optional');
my $rfail = $self->genlabel('greedy_optional_fail');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
my @ops = (
@R_ops,
aop( 'pushint', [1] ),
aop( 'goto', [$next] ),
$back => aop( 'popint', [ '<tmp>', 'optional marker' ] ),
aop( 'if', [ '<tmp>', $R_back ] ),
aop( 'goto', [$lastback] ),
$rfail => aop( 'pushint', [0] ),
$next =>
);
return ( $back, @ops );
}
# R?? -> pushindex
# goto next
# back: popindex or R.back
# R or lastback
# pushmark
# next:
#
sub rewrite_nongreedy_optional {
my ( $self, $op, $R, $lastback ) = @_;
my $back = $self->genlabel('nongreedy_opt_back');
my $next = $self->genlabel('after_greedy_opt');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $lastback );
my @ops = (
aop('pushindex'),
aop( 'goto', [$next] ),
$back => aop( 'popindex', [$R_back] ),
@R_ops,
aop( 'pushmark', ['?? - with R'] ),
$next =>
);
return ( $back, @ops );
}
###################### New stuff ###################
# Most rewrite rules will declare a fallback point, and also jump back
# to the previous fallback point on failure. So this threads all the
# fallback points together.
sub rewrite_seq {
my $self = shift;
my $op = shift;
my $fallback = pop;
my @ops;
foreach (@_) {
my ( $back, @rewritten ) = $self->rewrite( $_, $fallback );
push @ops, @rewritten;
$fallback = $back;
}
return ( $fallback, @ops );
}
sub rewrite_external {
my ( $self, $op, $extname, $extargs, $lastback ) = @_;
my $handler = "external_$extname";
if ( $self->{$handler} ) {
return $self->{$handler}->( $op, $extargs, $lastback );
}
else {
return ( $lastback, aop( 'external' => [ $extname, $extargs, $lastback ] ) );
}
}
sub describe_seq { undef }
sub describe_check { undef }
sub describe_group { "group $_[3]" }
sub describe_rule { undef }
sub wrap {
my ( $self, $op, $back, @ops ) = @_;
return ( $back, @ops ) unless $self->{DEBUG};
my $method = "describe_" . $op->{name};
my $desc = $op->{name};
if ( $self->can($method) ) {
$desc = $self->$method( $op, @{ $op->{args} } );
return ( $back, @ops ) if !defined $desc;
}
my $db_back = $self->genlabel( $op->{name} . "_back" );
my $db_start = $self->genlabel( $op->{name} . "_enter" );
return (
$db_back,
aop( 'goto', [$db_start] ),
$db_back => $self->dbprint("<- $desc BACK\n"),
aop( 'goto', [$back] ),
$db_start => $self->dbprint("-> $desc ENTER\n"),
@ops,
$self->dbprint(".. $desc NEXT\n"),
);
}
################################ Main loop #######################
sub rewrite {
my ( $self, $op, $lastback ) = @_;
if ( UNIVERSAL::isa( $op, 'Regex::Ops::Tree' ) ) {
my $method = "rewrite_" . $op->{name};
if ( $self->can($method) ) {
return $self->$method( $op, @{ $op->{args} }, $lastback )
unless $self->{DEBUG};
return $self->wrap( $op, $self->$method( $op, @{ $op->{args} }, $lastback ) );
}
else {
return ( $lastback, $self->rewrite_other( $op, $lastback ) );
}
}
elsif ( UNIVERSAL::isa( $op, "Regex::Ops::List" ) ) {
return ( $lastback, $op );
}
else {
$DB::single = 1;
die "malformed op $op";
}
}
sub run {
my ( $self, $tree, $ctx, $pass_label, $fail_label ) = @_;
die "Wrong #args" if @_ != 5;
die "Expected rule!" unless ref($tree) eq 'Regex::Ops::Tree::rule';
my ( undef, @ops ) = $self->rewrite( $tree, undef );
# return { lastback => $back, code => \@ops };
return { code => \@ops };
}
our $call_uid;
sub rewrite_call {
my ( $self, $op, $name, $capture, $lastback ) = @_;
# if ($capture) {
# local $op->{args}->[1] = 0;
# return $self->rewrite_group(rop('group', [ $op, $name ]), $op, $name, $lastback);
# }
my $uid = "_rule_${name}_" . ++$call_uid;
my $handle = $self->genlabel('handle_call_result');
my $back = $self->genlabel('backtrack_into_call');
my $next = $self->genlabel('after_call');
my @ops = (
aop( 'call_setup' => [ $name, $uid ] ),
aop( 'call' => [ $name, 1, $uid ] ),
$handle => aop( 'call_result' => [ $uid, $capture ? $name : undef, $lastback ] ),
aop( 'goto' => [$next] ),
$back => aop( 'call' => [ $name, 0, $uid ] ),
aop( 'goto' => [$handle] ),
$next =>
);
return ( $back, @ops );
}
# Handle a rule definition. $lastback is ignored; perhaps I should
# warn if it's defined at all.
sub rewrite_rule {
my ( $self, $op, $name, $R, $num_groups, $lastback ) = @_;
# Generate code for saving/restoring the "rxlocals" gathered while
# rewriting the regex
my @save_rxlocals;
my @restore_rxlocals;
# if ($ctx->{preserve_state}) {
@save_rxlocals = $self->rule_save_rxlocals();
@restore_rxlocals = $self->rule_restore_rxlocals();
# }
my $trymatch = $self->genlabel('rule_try_match');
my $backup = $self->genlabel('rule_backtrack');
# Generate the code for the body of the rule
my $back = $self->genlabel('rule_fail');
my ( $R_back, @R_ops ) = $self->rewrite( $R, $back );
# Set up the full preamble, including stuff gathered from
# rewriting the expression. Then write out the rest of the
# expression.
my $def = aop( 'rule_def', [ $name, $trymatch, $backup, $num_groups ] );
my @declarations;
while ( my ( $var, $type ) = each %{ $self->{_temps} } ) {
push @declarations, aop( 'declare' => [ $var, $type ] );
}
my @ops = (
$backup => aop( 'popint', [ '<tmp>', "restore rule $name start" ] ),
aop( 'setstart', [ "0", '<tmp>' ] ),
@restore_rxlocals,
aop( 'goto' => [$R_back] ),
$trymatch => $self->startup($num_groups),
@R_ops,
@save_rxlocals,
aop( 'getstart', [ '<tmp>', "0" ] ),
aop( 'pushint', [ '<tmp>', "save rule $name start" ] ),
aop( 'rule_pass', [$name] ),
$back => aop( 'rule_fail', [$name] ),
aop( 'rule_end', [$name] ),
);
push @{ $def->{args} }, \@declarations;
return ( undef, $def, @ops );
}
sub startup {
my ( $self, $num_groups ) = @_;
my $group;
my @ops;
foreach $group ( 0 .. $num_groups ) {
push @ops, aop( 'initgroup' => [$group] );
}
push @ops, aop( 'setstart' => [ "0", '<rx_pos>' ] );
foreach $group ( sort keys %{ $self->{_setup_starts} || {} } ) {
push @ops, aop( 'setstart' => [ $group, -2 ] );
}
foreach $group ( sort keys %{ $self->{_setup_ends} || {} } ) {
push @ops, aop( 'setend' => [ $group, -2 ] );
}
return @ops;
}
1;
# Local Variables:
# mode: cperl
# cperl-indent-level: 4
# fill-column: 100
# End:
# vim: expandtab shiftwidth=4:
syntax highlighted by Code2HTML, v. 0.9.1