package Regex::Rewrite;
use Regex::Ops::Tree;
use Regex::Ops::List;

# Copyright (C) 2002-2006, The Perl Foundation.
# $Id: Rewrite.pm 21249 2007-09-13 06:33:24Z paultcochrane $

require 'Regex.pm';
use strict;
use warnings;

*aop = *Regex::Rewrite::aop;
*rop = *Regex::Rewrite::rop;

# The basic method signature used here:
#
# rewrite_X : (optree, lastback) -> (back, ops...)
#
# where $lastback is the previous backtracking point that we should
# jump to if matching $op fails; $back is the backtracking point that
# can be returned to within the processing of the current $op to fix
# up the partial state and possibly attempt another match, or just
# abort and return to $lastback.
#

sub new {
    my ( $proto, %options ) = @_;
    my $self = bless \%options, ( ref($proto) || $proto );
    $self->init();
    return $self;
}

sub init {
    my ( $self, @args ) = @_;
    $self->{_markers}        = {};
    $self->{_temp_int_count} = 3;
    $self->{state} ||= Regex->global_state();
    my $FAIL = $self->genlabel("FAIL");
    $self->{_labels}{'fail'} = $FAIL;
}

sub aop {
    Regex::Ops::List->op(@_);
}

sub genlabel {
    my ( $self, $desc ) = @_;
    return $self->{state}->genlabel($desc);
}

sub alloc_temp_int {
    my ( $self, $name ) = @_;
    $name ||= "_temp_int";
    if ( exists $self->{_temps}{$name} ) {
        $name .= ++$self->{_temp_int_count};
    }
    $self->{_temps}{$name} = 'int';
    return "<$name>";
}

# This implementation should be overridden by the host language,
# because it's not a very good one.
sub new_local {
    my ( $self, $name, $type ) = @_;
    $type ||= 'int';
    die "cannot handle type '$type'" if $type ne 'int';

    # Bad implementation -- does not handle recursion. Actual
    # instances should probably be subclasses that use something like
    # IMCC's facilities for creating local vars.
    return $self->alloc_temp_int($name);
}

sub new_rxlocal {
    my ( $self, $op, $name, $type ) = @_;
    my $var = $self->new_local( $name, $type );
    push @{ $op->{rxlocals} }, $var;
    return $var;
}

sub op_save_rxlocals {
    my ( $self, $op ) = @_;
    if ( $op->{non_reentrant} ) {

        # Mark this variable as needing to be preserved across rule calls
        push @{ $self->{rxlocals} }, @{ $op->{rxlocals} };
        return ();
    }
    else {
        my $rxlocals = ( $op->{rxlocals} ||= [] );
        return ( aop( comment => ["save rxlocals for op"] ),
            map { aop( 'pushint' => [ $_, "op rxlocal $_" ] ) } @$rxlocals );
    }
}

sub op_restore_rxlocals {
    my ( $self, $op ) = @_;
    if ( $op->{non_reentrant} ) {
        return ();
    }
    else {
        my $rxlocals = ( $op->{rxlocals} ||= [] );
        return ( aop( comment => ["restore rxlocals for op"] ),
            map { aop( 'popint' => [ $_, "op rxlocal $_" ] ) }
                reverse @$rxlocals );
    }
}

sub rule_save_rxlocals {
    my ($self) = @_;

    my $rxlocals = ( $self->{rxlocals} ||= [] );
    return ( aop( comment => ["save rxlocals"] ),
        map { aop( 'pushint' => [ $_, "rxlocal $_" ] ) } @$rxlocals );
}

sub rule_restore_rxlocals {
    my ($self) = @_;
    my $rxlocals = ( $self->{rxlocals} ||= [] );
    return ( aop( comment => ["restore rxlocals"] ),
        map { aop( 'popint' => [ $_, "rxlocal $_" ] ) }
            reverse @$rxlocals );
}

sub get_temp {
    my ( $self, $name ) = @_;
    return $self->{_temps}->{$name} || die "Requested unallocated temporary!";
}

######################## Default rewrite rules #######################

sub rewrite_terminate {
    my ( $self, $op, $R ) = @_;
    return $R, aop( 'goto', [ $self->{_return} ] );
}

sub rewrite_goto {
    my ( $self, $op, $R, $lastback ) = @_;
    return aop( 'goto', [$R] ) if $R->{name} eq 'LABEL';
    return $self->rewrite( $R, $lastback );
}

sub rewrite_test {
    my ( $self, $op, $op1, $test, $op2, $dest, $lastback ) = @_;
    my $continue = $self->genlabel('after_test');
    my $rev_test = {
        "==" => "!=",
        "!=" => "==",
        "<"  => ">=",
        ">"  => "<=",
        "<=" => ">",
        ">=" => "<",
    }->{$test};

    if ( $dest->{name} eq 'goto' ) {
        return aop( 'if', [ $op1, $test, $op2, $dest->[1] ] );
    }
    elsif ( $dest->{name} eq 'LABEL' ) {
        return aop( 'if', [ $op1, $test, $op2, $dest ] );
    }
    else {
        my $testop = rop( 'test', [ $op1, $rev_test, $op2, rop( 'goto', [$continue] ) ] );
        return ( $self->rewrite( $testop, $lastback ), $self->rewrite( $dest, $lastback ),
            $continue );
    }
}

# TODO: can_match_empty (so s/a*/x/g doesn't go into infinite loop)
# This is sometimes a runtime property.
sub rewrite_multi_match {
    my ( $self, $op, $min, $max, $greedy, $R, @rest ) = @_;

    if ( ( $min == 0 ) && defined($max) && ( $max == 1 ) ) {
        return $self->rewrite_optional( $op, $R, $greedy, @rest );
    }
    elsif ( ( $min == 0 ) && ( !defined($max) || ( $max == -1 ) ) ) {
        return $self->rewrite_star( $op, $R, $greedy, @rest );
    }
    elsif ( ( $min == 1 ) && ( !defined($max) || $max == -1 ) && $self->can('rewrite_plus') ) {
        if ($greedy) {
            return $self->rewrite_plus( $op, $R, @rest );
        }
        else {
            return $self->rewrite_nongreedy_plus( $op, $R, @rest );
        }
    }
    else {
        if ($greedy) {
            return $self->rewrite_greedy_range( $op, $R, $min, $max, @rest );
        }
        else {
            return $self->rewrite_nongreedy_range( $op, $R, $min, $max, @rest );
        }
    }
}

####### Old Stackless start #######

# Does nothing if DEBUG is not set (if you're using this through
# perl6, then you can turn this on with the $RX_DEBUG environment
# variable)
#
# Otherwise, emits a print of the given string, but with %-escapes.
sub dbprint {
    my ( $self, $what ) = @_;
    return () unless $self->{DEBUG};
    my @ops;
    $what = "%<rx_pos>: $what";
    foreach my $part ( $what =~ /((?:\%\<\w+\>)|[^\%]+)/g ) {
        if ( $part =~ /^%/ ) {
            push @ops, aop( 'print', [ substr( $part, 1 ) ] );
        }
        else {
            $part =~ s/(["'\\])/\\$1/g;
            $part =~ s/\n/\\n/g;
            push @ops, aop( 'print', ["\"$part\""] );
        }
    }
    return @ops;
}

sub need_group_setup {
    my ( $self, $group ) = @_;
    $self->{_setup_starts}{$group} = 1;
    $self->{_setup_ends}{$group}   = 1;
}

sub rewrite_try {
    my ( $self, $op, $R, $lastback ) = @_;
    return $self->rewrite( $R, $lastback );
}

# Most general case:
#
# ( R ) ->
#                  push start[n]
#                  push end[n]
#                  start[n] <- pos
#                  R or rfail
#                  end[n] <- pos
#                  goto next
#
#           rfail: pop -> end[n]
#                  pop -> start[n]
#                  goto lastback
#
#            back: end[n] <- -2
#                  goto R.back
#
#            next:
#
# The 'back' part is used only so that $n within R does not return a
# now-abandoned match. I suppose it should restore $n to its previous
# value, but I'm not going to bother with that for now. (It'll rarely
# have a valid previous value anyway; I think that'll only come in for
# situations like (R)*, which are silly anyway. Then again, I'm
# probably wrong.)
#
sub rewrite_group {
    my ( $self, $op, $R, $group, $lastback ) = @_;
    my $rfail = $self->genlabel("group_iback");
    my $back  = $self->genlabel("group_back");
    my $next  = $self->genlabel("group_next");

    $self->need_group_setup($group);

    my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );

    my @ops = (
        aop( 'getstart', [ '<tmp>', $group ] ),
        $self->dbprint("pushing start[$group]: "),
        aop( 'pushint', ['<tmp>'] ),
        aop( 'getend', [ '<tmp>', $group ] ),
        $self->dbprint("pushing end[$group]: "),
        aop( 'pushint', ['<tmp>'] ),
        aop( 'setstart', [ $group, '<pos>' ] ),
        @R_ops,
        $self->dbprint("setting end[$group] := %<rx_pos>-1\n"),
        aop( 'setend', [ $group, '<pos>', -1 ] ),
        aop( 'goto', [$next] ),
        $rfail => $self->dbprint("R in group failed\n"),
        aop( 'popint',   [ '<tmp>', 'group end' ] ),
        aop( 'setend',   [ $group,  '<tmp>' ] ),
        aop( 'popint',   [ '<tmp>', 'group start' ] ),
        aop( 'setstart', [ $group,  '<tmp>' ] ),
        aop( 'goto', [$lastback] ),
        $back => aop( 'setend', [ $group, -2 ] ),
        aop( 'goto', [$R_back] ),
        $next =>

    );

    return ( $back, @ops );
}

# Cost: 4 + 2ff (insanely high!) if we need to check the length
#       3 + 2ff otherwise
# 1 of that is a goto that could be eliminated pretty easily.
sub rewrite_match {
    my ( $self, $op, $char, $lastback ) = @_;

    my $back = $self->genlabel('undo_match');
    my $next = $self->genlabel('after_match');

    my @ops;
    push @ops, aop( 'check', [ 1, $lastback ] )
        unless ( $op->{nocheck} );

    my @debugging;
    if ( $self->{DEBUG} ) {
        my $old_lastback = $lastback;
        $lastback  = $self->genlabel('debug_matchback');
        @debugging = (
            $lastback => $self->dbprint("failed to match $char at %<rx_pos>\n"),
            aop( 'goto', [$old_lastback] )
        );
    }
    push @ops,
        (
        aop( 'match', [ $char, $lastback ] ),
        aop( 'increment', [ 1, $lastback ] ),
        aop( 'goto', [$next] ),
        $back => $self->dbprint("Unmatching $char\n"),
        aop( 'increment', [ -1, $lastback ] ),
        aop( 'goto', [$lastback] ),
        @debugging,
        $next =>
        );

    return ( $back, @ops );
}

sub rewrite_check {
    my ( $self, $op, $amount, $R, $lastback ) = @_;
    my ( $R_back, @R_body ) = $self->rewrite( $R, $lastback );
    my @ops = ( aop( 'check', [ $amount, $lastback ] ), @R_body );
    return ( $R_back, @ops );
}

sub _translate_classpieces {
    my ($pieces) = @_;

    # Empty list
    return [] if @$pieces == 0;

    # Negated list
    if ( $pieces->[0] eq 'neg' ) {
        return _negate_incexc( _translate_classpieces( $pieces->[1] ) );
    }

    # Convert "a-b" to [97,98] and "a" to [97,97]
    my @ranges;
    foreach (@$pieces) {
        my ( $first, $last );
        if ( ref $_ ) {
            ( $first, $last ) = @$_;
        }
        else {
            ( $first, $last ) = ( $_, $_ );
        }
        push @ranges, [ ord($first), ord($last) ];
    }

    return _ranges_to_incexc( \@ranges );
}

sub _ranges_to_incexc {
    my @ranges = @{ shift() };

    # Sort those pairs by their first element
    @ranges = sort { $a->[0] <=> $b->[0] } @ranges;

    # Build up an inclusion/exclusion list
    my @incexc;
    foreach (@ranges) {
        my ( $first, $last ) = @$_;
        if ( @incexc && $first <= $incexc[-1] ) {

            # Merge (1,10)=1..9 with [4,x]=4..x and [10,x]=10..x
            if ( $incexc[-1] <= $last + 1 ) {
                $incexc[-1] = $last + 1;
            }
        }
        else {

            # Append
            push @incexc, $first, ( $last + 1 );
        }
    }

    return \@incexc;
}

sub _negate_incexc ($) {
    my ($incexc) = @_;
    return [0] if ( @$incexc == 0 );
    return [ 0, @$incexc ] if $incexc->[0] > 0;
    return [] if @$incexc == 1;
    shift @$incexc;
    $incexc->[0]++;
    return $incexc;
}

sub rewrite_classpieces {
    my ( $self, $op, $classpieces, $lastback ) = @_;
    my $incexc = _translate_classpieces($classpieces);
    return $self->rewrite_charclass( $op, $incexc, $lastback );
}

sub rewrite_charclass {
    my ( $self, $op, $incexc, $lastback ) = @_;

    my @ops;
    push @ops, aop( 'check', [ 1, $lastback ] )
        unless ( $op->{nocheck} );

    my $back = $self->genlabel('undo_charclass');
    my $next = $self->genlabel('after_charclass');

    push @ops,
        (
        $self->dbprint("trying classmatch\n"),
        aop( 'classmatch', [ $incexc, $lastback ] ),
        aop( 'increment',  [ 1,       $lastback ] ),
        aop( 'goto',       [$next] ),
        $back => aop( 'increment', [ -1, $lastback ] ),
        aop( 'goto', [$lastback] ),
        $next =>
        );

    return ( $back, @ops );
}

sub rewrite_advance {
    my ( $self, $op, $howfar, $lastback ) = @_;
    my $back = $self->genlabel('undo_charclass');
    my $next = $self->genlabel('after_charclass');

    my @ops = (
        aop( 'advance', [ $howfar, $lastback ] ),
        aop( 'goto', [$next] ),
        $back => aop( 'increment', [ -$howfar ] ),
        aop( 'goto', [$lastback] ),
        $next =>
    );
    return ( $back, @ops );
}

sub rewrite_other {
    my ( $self, $op, $lastback ) = @_;
    return aop( $op->{name}, [ @{ $op->{args} }, $lastback ] );
}

# scan(R) ->   scan: R or advance
#                    goto next
#           advance: pos++ or lastback
#                    goto scan
#              next:
#
# back is R.back
#
sub rewrite_scan {
    my ( $self, $op, $R, $lastback ) = @_;

    my $scan    = $self->genlabel('scan_start');
    my $advance = $self->genlabel('scan_advance');
    my $next    = $self->genlabel('after_scan');

    my ( $R_back, @R_body ) = $self->rewrite( $R, $advance );

    my @ops = (
        $scan => @R_body,
        aop( 'goto', [$next] ),
        $advance => $self->dbprint("scan advancing\n"),
        aop( 'advance', [ 1, $lastback ] ),
        aop( 'goto', [$scan] ),
        $next =>
    );

    return ( $R_back, @ops );
}

sub rewrite_simple_or_simple {
    my ( $self, $op, $R, $S, $lastback ) = @_;

    my $nextalt = $self->genlabel('nextalt');
    my $back    = $self->genlabel('alt_back');
    my $try_S   = $self->genlabel('alt_S');
    my $next    = $self->genlabel('next');

    my ( $R_back, @R_ops ) = $self->rewrite( $R, $nextalt );
    my ( $S_back, @S_ops ) = $self->rewrite( $S, $back );

    return $S_back,
        (
        aop('pushmark'),
        aop('pushindex'),
        @R_ops,
        aop( 'goto', [$next] ),
        $nextalt => aop( 'popindex', [ $self->{_labels}{'fail'} ] ),
        $try_S => @S_ops,
        aop( 'goto', [$next] ),
        $back => aop( 'popindex', [$lastback] ),
        aop( 'goto', [$try_S] ),
        $next =>
        );
}

# R|S|T ->       R or tryS
#                push 0
#                goto next
#          tryS: S or tryT
#                push 1
#                goto next
#          tryT: T or lastback
#                push 2
#                goto next
#          back: popint -> I0
#                eq I0, 0, R.back
#                eq I0, 1, S.back
#                goto T.back
#          next:
#
sub rewrite_alternate {
    my ( $self, $op, @args ) = @_;
    my $lastback = pop(@args);

    my $back  = $self->genlabel('alt_back');
    my $next  = $self->genlabel('alt_next');
    my @tries = map { $self->genlabel('alt_try') } 2 .. @args;
    my @fails = ( @tries, $lastback );

    my ( @ibacks, @iops );
    foreach (@args) {
        my ( $iback, @ops ) = $self->rewrite( $_, shift(@fails) );
        push @ibacks, $iback;
        push @iops,   \@ops;
    }

    my @ops;
    for my $i ( 0 .. $#args ) {
        push @ops, $tries[ $i - 1 ] unless $i == 0;    # Label for the try
        push @ops, $self->dbprint("Trying alternative $i of 0..$#args\n");
        push @ops, @{ $iops[$i] };
        push @ops, aop( 'pushint', [$i] );
        push @ops, aop( 'goto', [$next] );
    }

    push @ops, $back => aop( 'popint', [ '<tmp>', 'branch marker' ] );

    for my $i ( 0 .. $#args - 1 ) {
        push @ops, aop( 'eq', [ '<tmp>', $i, $ibacks[$i] ] );
    }

    push @ops, aop( 'goto', [ $ibacks[-1] ] );
    push @ops, $next;

    return ( $back, @ops );
}

# Dynamic alternation: a set of alternatives that should be tried in
# turn, but the exact alternatives are unknown (eg because they're
# coming from an array.)
#
# @R ->          .local $counter
#                $counter = 0
#           try: R[$counter] or goto fail
#                push $counter
#                goto next
#          fail: $counter++
#                if $counter >= @R goto lastback
#                goto try
#          back: pop $counter
#                goto R[].back
#          next:
#
# The code below does not assume the alternatives are coming from an
# array; instead, a callback is given that should rewrite the
# alternative that corresponds to the (dynamic) $counter passed in.
#
sub rewrite_dynamic_alternate {
    my ( $self, $op, $sizer, $chooser, $lastback ) = @_;

    my $try  = $self->genlabel('dalt_try');
    my $fail = $self->genlabel('dalt_fail');
    my $back = $self->genlabel('dalt_back');
    my $next = $self->genlabel('dalt_next');

    my $counter = $self->new_local("counter");
    my ( $N, @N_ops ) = $sizer->( $self, $op );
    my ( $R_back, @R_ops ) = $chooser->( $self, $op, $counter, $fail );

    my @ops = (
        aop( 'assign', [ $counter, 0 ] ),
        @N_ops,
        $try => $self->dbprint("matching dynalt[%<$counter>]\n"),
        @R_ops,
        aop( 'pushint', [ $counter, "dynamic alt counter" ] ),
        aop( 'goto', [$next] ),
        $fail => $self->dbprint("failed dynalt, advancing from dynalt[%<$counter>]/%<$N>\n"),
        aop( 'add', [ $counter, 1 ] ),
        aop( 'ge', [ $counter, $N, $lastback ] ),
        aop( 'goto', [$try] ),
        $back => $self->op_restore_rxlocals($op),
        aop( 'popint', [ $counter, 'dynamic alt counter' ] ),
        $self->dbprint("backtracking into dynalt's index %<$counter> match\n"),
        aop( 'goto', [$R_back] ),
        $next => $self->op_save_rxlocals($op),
    );

    return ( $back, @ops );
}

# R<min,max> -> $matchcount = 0
#         loop: if $matchcount >= max goto next
#               R or check
#               $matchcount++
#               goto loop
#         back: if $matchcount == 0 goto lastback
#               $matchcount--
#               goto R.back
#        check: if $matchcount < min goto back
#         next:
#
# to make $matchcount local, we will choose a different name for every
# instance of this rewrite rule. Recursive calls to a given instance
# will be handled in the same way any other local variable would
# (somehow, they'll get saved on the stack.)

sub rewrite_greedy_range {
    my ( $self, $op, $R, $min, $max, $lastback ) = @_;

    my ( $loop, $back, $local_back, $check, $next ) =
        map { $self->genlabel("gr_$_") } qw(loop back local_back check next);

    my $matchcount = $self->new_rxlocal( $op, "matchcount" );

    my ( $R_back, @R_ops ) = $self->rewrite( $R, $check );
    my @ops = (
        aop( 'set', [ $matchcount, 0 ] ),
        $loop => aop( 'ge', [ $matchcount, $max, $next ] ),
        @R_ops,
        aop( 'add', [ $matchcount, 1 ] ),
        aop( 'goto', [$loop] ),
        $back       => $self->op_restore_rxlocals($op),
        $local_back => aop( 'unless', [ $matchcount, $lastback ] ),
        aop( 'add', [ $matchcount, -1 ] ),
        aop( 'goto', [$R_back] ),
        $check => aop( 'lt', [ $matchcount, $min, $local_back ] ),
        $next => $self->op_save_rxlocals($op),
    );

    return ( $back, @ops );
}

# R<0,max>? ->  $matchcount = 0
#               goto next
#        rfail: if $matchcount == 0 goto lastback
#               $matchcount--
#               goto R.back
#         back: if $matchcount >= max goto rfail
#               R or rfail
#               $matchcount++
#         next:
#

# R<min,max>? ->  $matchcount = 0
#                 goto check
#          rfail: if $matchcount == 0 goto lastback
#                 $matchcount--
#                 goto R.back
#           back: if $matchcount >= max goto rfail
#                 R or rfail
#                 $matchcount++
#          check: if $matchcount < min goto back
#           next:

# [ab]<1,2>?: a aa ab b ba bb

sub rewrite_nongreedy_range {
    my ( $self, $op, $R, $min, $max, $lastback ) = @_;

    my ( $rfail, $local_back, $back, $check, $next ) =
        map { $self->genlabel("ngr_$_") } qw(rfail local_back back check next);

    my $matchcount = $self->new_rxlocal( $op, "matchcount" );

    my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
    my @ops = (
        aop( 'set', [ $matchcount, 0 ] ),
        aop( 'goto', [$check] ),
        $rfail => aop( 'unless', [ $matchcount, $lastback ] ),
        aop( 'add', [ $matchcount, -1 ] ),
        aop( 'goto', [$R_back] ),
        $back       => $self->op_restore_rxlocals($op),
        $local_back => aop( 'ge', [ $matchcount, $max, $rfail ] ),
        @R_ops,
        aop( 'add', [ $matchcount, 1 ] ),
        $check => aop( 'lt', [ $matchcount, $min, $local_back ] ),
        $next => $self->op_save_rxlocals($op),
    );

    return ( $back, @ops );
}

# ( [ [aaaaa|aaaaaaa]<2,3> ]<1,2> )
# 30,42,20,28,15,21,10,14

# R* ->         push 0
#         loop: R or next
#               push 1
#               goto loop
#         back: popint -> haveTries_flag
#               if (haveTries_flag) goto R.back
#               goto lastback
#         next:
#
# R*? ->        pushindex
#               goto next
#        rfail: popint -> tmp or R.back
#               goto lastback
#         back: R or rfail
#               pushmark
#         next:
#
# conversion to using a temporary (not implemented this way; only for
# comparison with <n,m>? above.)
#
# R*? ->        $matchcount = 0
#               goto next
#        rfail: if $matchcount == 0 goto lastback
#               $matchcount--
#               goto R.back
#         back: R or rfail
#               $matchcount++
#         next:
#
sub rewrite_star {
    my ( $self, $op, $R, $greedy, $lastback ) = @_;

    my $next = $self->genlabel('star_next');
    my $back = $self->genlabel('star_back');

    if ($greedy) {
        my ( $R_back, @R_ops ) = $self->rewrite( $R, $next );
        my $loop = $self->genlabel('loop');
        my @ops  = (
            aop( 'pushint', [0] ),
            $loop => @R_ops,
            aop( 'pushint', [1] ),
            aop( 'goto',    [$loop] ),
            $back => $self->dbprint("backtracking into *\n"),
            aop( 'popint', [ '<tmp>', $lastback ] ),
            aop( 'if',     [ '<tmp>', $R_back ] ),
            aop( 'goto', [$lastback] ),
            $next =>
        );

        return ( $back, @ops );
    }
    else {
        my $rfail = $self->genlabel('nongreedy_star_fail');
        my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
        my @ops = (
            aop( 'pushint', [0] ),
            aop( 'goto',    [$next] ),
            $rfail => aop( 'popindex', [ '<tmp>', $R_back ] ),
            aop( 'goto', [$lastback] ),
            $back => @R_ops,
            aop('pushmark'),
            $next =>
        );
        return ( $back, @ops );
    }
}

# R+ ->       pushmark
#       loop: R or rback
#             push 0
#             goto loop
#      rback: popindex or lastback
#       next:
#
# (back is R.back)
#
#
sub rewrite_plus {
    my ( $self, $op, $R, $lastback ) = @_;

    my $loop  = $self->genlabel('plus_loop');
    my $rfail = $self->genlabel('plus_rfail');
    my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
    my @ops = (
        aop( 'pushmark', ["+"] ),
        $loop => @R_ops,
        aop( 'pushint', [ 0, 'plus matched marker' ] ),
        aop( 'goto', [$loop] ),
        $rfail => aop( 'popindex', [ '<tmp>', $lastback ] ),
    );

    return ( $R_back, @ops );
}

#
# R+? ->        pushmark
#         back: R or rfail
#               push 0
#               goto next
#        rfail: popindex -> junk or lastback
#               goto R.back
#
sub rewrite_nongreedy_plus {
    my ( $self, $op, $R, $lastback ) = @_;

    my $back  = $self->genlabel('plus_backloop');
    my $rfail = $self->genlabel('plus_rfail');
    my $next  = $self->genlabel('plus_next');
    my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );
    my @ops = (
        aop( 'pushmark', ["+"] ),
        $back => @R_ops,
        aop( 'pushint', [0] ),
        aop( 'goto',    [$next] ),
        $rfail => aop( 'popindex', [ '<tmp>', $lastback ] ),
        aop( 'goto', [$R_back] ),
        $next =>
    );

    return ( $back, @ops );
}

sub rewrite_optional {
    my ( $self, $op, $R, $greedy, $lastback ) = @_;
    return $greedy
        ? $self->rewrite_greedy_optional( $op, $R, $lastback )
        : $self->rewrite_nongreedy_optional( $op, $R, $lastback );
}

# R? ->       R or rfail
#             push 1
#             goto next
#       back: popint -> tmp
#             if tmp, R.back
#             goto lastback
#      rfail: push 0
#       next:
#
sub rewrite_greedy_optional {
    my ( $self, $op, $R, $lastback ) = @_;

    my $back  = $self->genlabel('greedy_optional_back');
    my $next  = $self->genlabel('after_greedy_optional');
    my $rfail = $self->genlabel('greedy_optional_fail');
    my ( $R_back, @R_ops ) = $self->rewrite( $R, $rfail );

    my @ops = (
        @R_ops,
        aop( 'pushint', [1] ),
        aop( 'goto',    [$next] ),
        $back => aop( 'popint', [ '<tmp>', 'optional marker' ] ),
        aop( 'if', [ '<tmp>', $R_back ] ),
        aop( 'goto', [$lastback] ),
        $rfail => aop( 'pushint', [0] ),
        $next =>
    );

    return ( $back, @ops );
}

# R?? ->       pushindex
#              goto next
#        back: popindex or R.back
#              R or lastback
#              pushmark
#        next:
#
sub rewrite_nongreedy_optional {
    my ( $self, $op, $R, $lastback ) = @_;

    my $back = $self->genlabel('nongreedy_opt_back');
    my $next = $self->genlabel('after_greedy_opt');
    my ( $R_back, @R_ops ) = $self->rewrite( $R, $lastback );

    my @ops = (
        aop('pushindex'),
        aop( 'goto', [$next] ),
        $back => aop( 'popindex', [$R_back] ),
        @R_ops,
        aop( 'pushmark', ['?? - with R'] ),
        $next =>
    );

    return ( $back, @ops );
}

###################### New stuff ###################

# Most rewrite rules will declare a fallback point, and also jump back
# to the previous fallback point on failure. So this threads all the
# fallback points together.
sub rewrite_seq {
    my $self     = shift;
    my $op       = shift;
    my $fallback = pop;

    my @ops;
    foreach (@_) {
        my ( $back, @rewritten ) = $self->rewrite( $_, $fallback );
        push @ops, @rewritten;
        $fallback = $back;
    }

    return ( $fallback, @ops );
}

sub rewrite_external {
    my ( $self, $op, $extname, $extargs, $lastback ) = @_;
    my $handler = "external_$extname";
    if ( $self->{$handler} ) {
        return $self->{$handler}->( $op, $extargs, $lastback );
    }
    else {
        return ( $lastback, aop( 'external' => [ $extname, $extargs, $lastback ] ) );
    }
}

sub describe_seq   { undef }
sub describe_check { undef }
sub describe_group { "group $_[3]" }
sub describe_rule  { undef }

sub wrap {
    my ( $self, $op, $back, @ops ) = @_;
    return ( $back, @ops ) unless $self->{DEBUG};

    my $method = "describe_" . $op->{name};
    my $desc   = $op->{name};
    if ( $self->can($method) ) {
        $desc = $self->$method( $op, @{ $op->{args} } );
        return ( $back, @ops ) if !defined $desc;
    }

    my $db_back  = $self->genlabel( $op->{name} . "_back" );
    my $db_start = $self->genlabel( $op->{name} . "_enter" );
    return (
        $db_back,
        aop( 'goto', [$db_start] ),
        $db_back => $self->dbprint("<- $desc BACK\n"),
        aop( 'goto', [$back] ),
        $db_start => $self->dbprint("-> $desc ENTER\n"),
        @ops,
        $self->dbprint(".. $desc NEXT\n"),
    );
}

################################ Main loop #######################

sub rewrite {
    my ( $self, $op, $lastback ) = @_;

    if ( UNIVERSAL::isa( $op, 'Regex::Ops::Tree' ) ) {
        my $method = "rewrite_" . $op->{name};
        if ( $self->can($method) ) {
            return $self->$method( $op, @{ $op->{args} }, $lastback )
                unless $self->{DEBUG};
            return $self->wrap( $op, $self->$method( $op, @{ $op->{args} }, $lastback ) );
        }
        else {
            return ( $lastback, $self->rewrite_other( $op, $lastback ) );
        }
    }
    elsif ( UNIVERSAL::isa( $op, "Regex::Ops::List" ) ) {
        return ( $lastback, $op );
    }
    else {
        $DB::single = 1;
        die "malformed op $op";
    }
}

sub run {
    my ( $self, $tree, $ctx, $pass_label, $fail_label ) = @_;
    die "Wrong #args" if @_ != 5;

    die "Expected rule!" unless ref($tree) eq 'Regex::Ops::Tree::rule';

    my ( undef, @ops ) = $self->rewrite( $tree, undef );

    #    return { lastback => $back, code => \@ops };
    return { code => \@ops };
}

our $call_uid;

sub rewrite_call {
    my ( $self, $op, $name, $capture, $lastback ) = @_;

    #    if ($capture) {
    #        local $op->{args}->[1] = 0;
    #        return $self->rewrite_group(rop('group', [ $op, $name ]), $op, $name, $lastback);
    #    }

    my $uid = "_rule_${name}_" . ++$call_uid;

    my $handle = $self->genlabel('handle_call_result');
    my $back   = $self->genlabel('backtrack_into_call');
    my $next   = $self->genlabel('after_call');

    my @ops = (
        aop( 'call_setup' => [ $name, $uid ] ),
        aop( 'call'       => [ $name, 1, $uid ] ),
        $handle => aop( 'call_result' => [ $uid, $capture ? $name : undef, $lastback ] ),
        aop( 'goto' => [$next] ),
        $back => aop( 'call' => [ $name, 0, $uid ] ),
        aop( 'goto' => [$handle] ),
        $next =>
    );
    return ( $back, @ops );
}

# Handle a rule definition. $lastback is ignored; perhaps I should
# warn if it's defined at all.
sub rewrite_rule {
    my ( $self, $op, $name, $R, $num_groups, $lastback ) = @_;

    # Generate code for saving/restoring the "rxlocals" gathered while
    # rewriting the regex
    my @save_rxlocals;
    my @restore_rxlocals;

    #    if ($ctx->{preserve_state}) {
    @save_rxlocals    = $self->rule_save_rxlocals();
    @restore_rxlocals = $self->rule_restore_rxlocals();

    #    }

    my $trymatch = $self->genlabel('rule_try_match');
    my $backup   = $self->genlabel('rule_backtrack');

    # Generate the code for the body of the rule
    my $back = $self->genlabel('rule_fail');
    my ( $R_back, @R_ops ) = $self->rewrite( $R, $back );

    # Set up the full preamble, including stuff gathered from
    # rewriting the expression. Then write out the rest of the
    # expression.
    my $def = aop( 'rule_def', [ $name, $trymatch, $backup, $num_groups ] );

    my @declarations;
    while ( my ( $var, $type ) = each %{ $self->{_temps} } ) {
        push @declarations, aop( 'declare' => [ $var, $type ] );
    }

    my @ops = (
        $backup => aop( 'popint', [ '<tmp>', "restore rule $name start" ] ),
        aop( 'setstart', [ "0", '<tmp>' ] ),
        @restore_rxlocals,
        aop( 'goto' => [$R_back] ),
        $trymatch => $self->startup($num_groups),
        @R_ops,
        @save_rxlocals,
        aop( 'getstart', [ '<tmp>', "0" ] ),
        aop( 'pushint',  [ '<tmp>', "save rule $name start" ] ),
        aop( 'rule_pass', [$name] ),
        $back => aop( 'rule_fail', [$name] ),
        aop( 'rule_end', [$name] ),
    );

    push @{ $def->{args} }, \@declarations;

    return ( undef, $def, @ops );
}

sub startup {
    my ( $self, $num_groups ) = @_;

    my $group;

    my @ops;
    foreach $group ( 0 .. $num_groups ) {
        push @ops, aop( 'initgroup' => [$group] );
    }

    push @ops, aop( 'setstart' => [ "0", '<rx_pos>' ] );

    foreach $group ( sort keys %{ $self->{_setup_starts} || {} } ) {
        push @ops, aop( 'setstart' => [ $group, -2 ] );
    }

    foreach $group ( sort keys %{ $self->{_setup_ends} || {} } ) {
        push @ops, aop( 'setend' => [ $group, -2 ] );
    }

    return @ops;
}

1;

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 4
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=4:


syntax highlighted by Code2HTML, v. 0.9.1