# Copyright (C) 2002-2006, The Perl Foundation.
# $Id: Optimize.pm 21249 2007-09-13 06:33:24Z paultcochrane $

=head1 PACKAGE

Regex::Optimize

=head1 ABSTRACT

Optimize a sequence of list ops.

=head1 INTERNAL ROUTINES

=over 4

=cut

package Regex::Optimize;

use Regex::Ops::List;
use Regex::State;
use strict;
use warnings;
require 'Regex.pm';

sub new {
    my ( $proto, %options ) = @_;
    my $self = bless \%options, ( ref($proto) || $proto );
    $self->init();
    return $self;
}

sub init {
    my $self = shift;
    $self->{state} ||= Regex->global_state();
}

sub mklabel {
    my ($self) = @_;
    return $self->{state}->genlabel("L");
}

sub is_label {
    return UNIVERSAL::isa( shift(), 'Regex::Ops::Label' );
}

=item method label_indices(op)

Figure out which arguments of an op are labels, and return an array of
their indices.

=cut

sub label_indices {
    my ( $self, $op ) = @_;
    my @indices;
    for my $i ( 0 .. $#{ $op->{args} } ) {
        my $arg = $op->{args}->[$i];
        push( @indices, $i ) if is_label($arg);
    }
    return @indices;
}

=item method combineLabels(label1, label2, ...)

Creates a new label to represent a group of label objects. Also
remembers what the original names are so a comment giving them can be
generated later.

=cut

sub combineLabels {
    my $self = shift;
    my @names = map { $_->{label} =~ /(\w+)/; $1 } @_;
    my %names;
    @names{@names} = ();
    my $label = $self->mklabel();
    $self->{_label_comments}{ $label->{label} } = join( ", ", keys %names );
    return $label->{label};
}

=item method optimize(ops...)

 1. Merge equivalent labels
 2. Jump threading: Replace goto X; ...; X: goto Y; with goto Y.
 3. Eliminate unreachable code.
 4. Eliminate jumps to the following address.
 5. Eliminate unused labels.

TODO:

 1. I would like to optimize

     B1: sub x, 1
         goto S0
     B2: sub x, 1
         goto B1
     B3: sub x, 1
         goto B2

 to

     B1: sub x, 1
         goto S0
     B2: sub x, 2
         goto S0
     B3: sub x, 3
         goto S0

since this commonly occurs in regex code, due to sequence of
single-character matches (eg /a[bB]c/).

But perhaps this should be handled in the Tree -> List rewrite??

=cut

sub optimize {
    my ( $self, $ops, $ctx ) = @_;
    die "Wrong #args" if @_ != 3;

    my @equivs;    # (labels)
    my @output;    # (ops)

    # Merge adjacent (equivalent) labels, renaming them
    for my $stmt (@$ops) {
        if ( ref $stmt && $stmt->{name} eq 'LABEL' ) {
            push @equivs, $stmt;
        }
        else {
            if (@equivs) {
                my $megalabel = $self->combineLabels(@equivs);
                $_->{label} = $megalabel foreach (@equivs);
                push @output, $equivs[0];
                @equivs = ();
            }
            push @output, $stmt;
        }
    }
    die "The final 'terminate' is supposed to make this impossible!"
        if @equivs;

    # Jump threading: replace
    #
    #  goto @1
    #  ...
    #  @1: goto @2
    #
    # with
    #
    #  goto @2
    #  ...
    #  @1: goto @2
    #

    # First, convert all statements to the form
    #  { label => optional_label, code => original_op }
    # and construct a mapping from label names to destination tagged_op
    my $curlabel;
    my @output2;    # ( { label => ?label, code => op } : tagged_op )
    my %labels;     # { label string => tagged_op }
    foreach my $stmt (@output) {
        if ( $stmt->{name} eq 'LABEL' ) {
            $curlabel = $stmt;
        }
        else {
            push @output2, { label => $curlabel, code => $stmt };
            $labels{ $curlabel->{label} } = $output2[-1] if $curlabel;
            undef $curlabel;
        }
    }

    # Second, scan for label references and follow goto's until the
    # final destination of each is reached, then replace the original
    # reference.

    foreach my $stmt (@output2) {

        # $stmt : { label => ?label, code => op }
        my ( $label, $actual ) = @$stmt{ 'label', 'code' };

        # Find statements that can branch to a label
        my @labels;
        @labels = $self->label_indices($actual) if ref $actual;

        foreach my $pos (@labels) {
            my $dest = $actual->{args}->[$pos];
            while (1) {
                my $dest_stmt = $labels{ $dest->{label} };    # tagged_op
                if ( !$dest_stmt ) {
                    if ( $ctx->{external_labels}{ $dest->{label} } ) {

                        # Mark external label as reachable
                        $dest->{reachable} = 1;
                        last;                                 # Stop tracing through jumps
                    }
                    else {
                        die "untargeted label $dest->{label}";
                    }
                }
                last if $dest_stmt->{code}->{name} ne 'goto';
                $dest = $dest_stmt->{code}->{args}->[0];
            }
            $actual->{args}->[$pos] = $dest;
        }
    }

    # At this point, every basic block but the first begins with a
    # labelled statement. Next, do a reachability analysis to find
    # unreachable basic blocks. We'll store a 'reachable' flag in the
    # 3rd element of the labels.

    # But first, make *all* basic blocks begin with a label.
    $output2[0]->{label} ||= $self->{state}->genlabel("beginning");

    # Stick in a next_stmt ref in every statement to make it easier to
    # move around.
    my $next;
    for my $stmt ( reverse @output2 ) {
        $stmt->{'next'} = $next;
        $next = $stmt;
    }

    # Push first statement on the queue
    my @Q = ( $output2[0] );

BBLOCK:
    while ( my $stmt = shift(@Q) ) {
        next if $stmt->{label}->{reachable};    # Already reached here
        $stmt->{label}->{reachable} = 1;

        # Loop over the basic block starting at $stmt
        my $prev;
        do {
            if ( ref $stmt->{code} ) {
                my @labels = $self->label_indices( $stmt->{code} );
                foreach my $pos (@labels) {
                    push @Q, $labels{ $stmt->{code}->{args}->[$pos]->{label} };
                    pop @Q if !defined $Q[-1];    # External label
                }
                if ( $stmt->{code}->{name} =~ /^(?:goto|fail)$/ ) {
                    next BBLOCK;
                }
            }
            $prev = $stmt;
            $stmt = $stmt->{next};
        } while ( $stmt && !$stmt->{label} );

        # Fallthrough reachable
        push @Q, $stmt if $stmt;
    }

    # Eliminate unreachable code
    my @output3;    # Really should do @output = (), but I hate doing a
                    # compiler's work for it.
    my $keeping = 1;
    foreach my $stmt (@output2) {
        if ( $stmt->{label} ) {
            $keeping = $stmt->{label}->{reachable};    # Keep if reachable
        }
        push @output3, $stmt if $keeping;
    }

    # Reset the 'next' pointers
    undef $next;
    for my $stmt ( reverse @output3 ) {
        $stmt->{next} = $next;
        $next = $stmt;
    }

    # Eliminate gotos to the following address
    my @output4;
    foreach my $stmt (@output3) {
        if ( ref $stmt->{code} && $stmt->{code}->{name} eq 'goto' ) {
            if (   $stmt->{next}->{label}
                && $stmt->{code}->{args}->[0] == $stmt->{next}->{label} )
            {

                # If the label of the goto is the label of the following
                # block of code:
                next;
            }
        }
        push @output4, $stmt;
    }

    # Delete labels that are not the destination of any jump (these
    # are the ones that were marked reachable because the previous
    # basic block fell through to them.)
    my %AMDEST;    # { label name => boolean }
    foreach (@output4) {
        my $code = $_->{code};
        foreach ( map { $code->{args}->[$_] } $self->label_indices($code) ) {
            $AMDEST{ $_->{label} } = 1;
        }
    }
    foreach (@output4) {
        delete $_->{label} if ( $_->{label} && !$AMDEST{ $_->{label}->{label} } );
    }

    return ( bless( $self->{_label_comments}, 'LABEL_COMMENTS' ),
        map { ( $_->{label} ? ( $_->{label} ) : () ), $_->{code} } @output4 );
}

sub dbg_render {
    if ( UNIVERSAL::isa( $_[0], 'Regex::Ops::List' ) ) {
        map {
            if ( $_->{name} eq 'LABEL' )
            {
                "$_->{label}: ";
            }
            else {
                $_->{name} . " "
                    . join( ", ", map { ref($_) ? $_->{label} : $_ } @{ $_->{args} || [] } );
            }
        } @_;
    }
    else {
        map {
            my $str;
            if ( $_->{label} ) {
                $str .= "**" if $_->{label}{reachable};
                $str .= "$_->{label}->{label}: ";
            }
            $str .=
                $_->{code}{name} . " "
                . join( ", ", map { ref($_) ? $_->{label} : $_ } @{ $_->{code}{args} || [] } );
            $str;
        } @_;
    }
}

1;

=back

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 4
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=4:


syntax highlighted by Code2HTML, v. 0.9.1