# Copyright (C) 2004-2007, The Perl Foundation.
# $Id: Parser.pm 23322 2007-12-02 02:13:52Z petdance $
package Parrot::Pmc2c::Parser;
use strict;
use warnings;
use base qw( Exporter );
our @EXPORT_OK = qw( parse_pmc extract_balanced );
use Parrot::Pmc2c::PMC;
use Parrot::Pmc2c::Method;
use Parrot::Pmc2c::Emitter;
use Parrot::Pmc2c::UtilFunctions qw(count_newlines filename slurp);
use Text::Balanced 'extract_bracketed';

=head1 NAME

Parrot::Pmc2c::Parser - PMC Parser

=head1 SYNOPSIS

    use Parrot::Pmc2c::Parser;

=head1 DESCRIPTION

Parrot::Pmc2c::Paser parses a sudo C syntax into a perl hash that is then dumped.


=head2 C<parse_pmc()>

    $parsed_pmc_hash = parse_pmc($pmc2cMain, $filename);

B<Purpose:>  Parse PMC code and return a hash ref of pmc attributes.

B<Arguments:>  List of two arguments:

=over 4

=item *

The pmc2cMain object

=item *

Filename of the pmc to parse.

=back

B<Return Values:>  Reference to a Parrot::Pmc2c::PMC object

B<Comments:>  Called by C<dump_pmc()>.

=cut

sub parse_pmc {
    my ( $pmc2cMain, $filename ) = @_;

    #slurp file contents
    $filename = $pmc2cMain->find_file( filename( $filename, '.pmc' ), 1 );
    my $code = slurp($filename);

    my ( $preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines ) =
        parse_top_level($code);

    my $pmc = Parrot::Pmc2c::PMC->create($pmcname);
    $pmc->preamble( Parrot::Pmc2c::Emitter->text( $preamble, $filename, 1 ) );
    $pmc->name($pmcname);
    $pmc->set_filename($filename);
    $pmc->set_flags($flags);
    $pmc->set_parents($parents);

    my $lineno = count_newlines($preamble) + $chewed_lines + 1;  #the +1 puts us on the current line
    my $class_init;

    # backreferences here are all +1 because below the qr is wrapped in quotes
    my $signature_re = qr{
        ^
        (?:
          [;\n\s]*            #blank spaces and spurious semicolons
          (?:/\*.*?\*/)?      #C comments
        )*

        ((?:PARROT_\w+\s+)+)? #decorators

        # attribute|vtable|method marker
        (ATTR|VTABLE|(PCC)?METHOD\s+)?
        # return type (no return type for PCCMETHOD)
        (?(4) | (\w+\s*?\**))
        \s*
        (\w+)                 #method name
        \s*
        \( ([^\(]*) \)        #parameters
        \s*
        ((?::(\w+)\s*)*)      #method attrs
        \s*
    }sx;

    while ( $pmcbody =~ s/($signature_re)// ) {
        $lineno += count_newlines($1);
        my ( $decorators, $marker, $pcc, $return_type, $methodname, $parameters, $attrs ) =
            ( $2, $3, $4, $5, $6, $7, parse_method_attrs($8) );

        ( my $methodblock, $pmcbody ) = extract_balanced($pmcbody);
        $methodblock = strip_outer_brackets($methodblock);
        $methodblock =~ s/^[ ]{4}//mg;    #remove pmclass 4 space indent
        $methodblock =~ s/\n\s+$/\n/g;    #trim trailing whitespace from lastline

        $decorators ||= '';
        $decorators =~ s/^\s*(.*?)\s*$/$1/s;
        $decorators = [ split /\s+/ => $decorators ];

        $return_type = 'void'
            if defined $pcc;

        my $method = Parrot::Pmc2c::Method->new(
            {
                name        => $methodname,
                parent_name => $pmc->name,
                body        => Parrot::Pmc2c::Emitter->text( $methodblock, $filename, $lineno ),
                return_type => $return_type,
                parameters  => $parameters,
                type        => Parrot::Pmc2c::Method::VTABLE,
                attrs       => $attrs,
                decorators  => $decorators,
            }
        );

        #PCCMETHOD needs FixedIntegerArray header
        if ( $marker and $marker =~ /PCCMETHOD/ ) {
            Parrot::Pmc2c::PCCMETHOD::rewrite_pccmethod( $method, $pmc );
            $pmc->set_flag('need_fia_header');
        }

        #PCCINVOKE needs FixedIntegerArray header
        $pmc->set_flag('need_fia_header') if ( $methodblock =~ /PCCINVOKE/ );

        # the class_init method is added last after all other methods
        if ( $methodname eq 'class_init' ) {
            $class_init = $method;
        }
        else {

            # Name-mangle NCI methods to avoid conflict with vtable methods.
            if ( $marker and $marker !~ /ATTR|VTABLE/ ) {
                $method->type(Parrot::Pmc2c::Method::NON_VTABLE);
                $method->name("nci_$methodname");
                $method->symbol($methodname);
            }

            parse_mmds( $method, $filename, $lineno ) if $methodblock =~ /\bMMD_(\w+):/;
            $pmc->add_method($method);
        }
        $lineno += count_newlines($methodblock);
    }

    $pmc->postamble( Parrot::Pmc2c::Emitter->text( $post, $filename, $lineno ) );

    #ensure class_init is the last method in the method list
    $pmc->add_method($class_init) if $class_init;
    $pmc->vtable( $pmc2cMain->read_dump("vtable.pmc") );
    $pmc->pre_method_gen();

    return $pmc;
}

sub parse_mmds {
    my ( $method, $filename, $lineno ) = @_;
    my $mmd_methods         = [];
    my $body_text           = $method->body;
    my $default_body        = $body_text;
    my $default_body_lineno = $lineno;

    # now split into MMD if necessary:
    while ( $body_text =~ s/(\bMMD_(\w+):\s*)// ) {
        $lineno += count_newlines($1);
        my $right_type = $2;
        $method->add_mmd_rights($right_type);
        ( my $mmd_part, $body_text ) = extract_bracketed_body_text( $body_text, '{' );
        die "Empty MMD body near '$body_text'" if ( !$mmd_part );
        my $mmd_part_lines = count_newlines($mmd_part);
        $mmd_part =~ s/\n\s*$/\n/s;    #remove whitespace at end of last line
        if ( $right_type eq 'DEFAULT' ) {
            $default_body        = $mmd_part;
            $default_body_lineno = $lineno;
        }
        else {
            my $mmd_method = Parrot::Pmc2c::Method->new(
                {
                    name        => $method->name . "_$right_type",
                    parent_name => $method->parent_name,
                    body        => Parrot::Pmc2c::Emitter->text( $mmd_part, $filename, $lineno ),
                    return_type => $method->return_type,
                    parameters  => $method->parameters,
                    type        => Parrot::Pmc2c::Method::VTABLE,
                    attrs       => $method->attrs,
                    right       => $right_type,
                }
            );
            push @{$mmd_methods}, $mmd_method;
        }

        $lineno += $mmd_part_lines;
    }
    $method->mmds($mmd_methods);
    $method->body( Parrot::Pmc2c::Emitter->text( $default_body, $filename, $default_body_lineno ) );
}

sub strip_outer_brackets {
    my ($method_body) = @_;
    die "First character in $method_body is not a {" unless substr( $method_body, 0,  1 ) eq '{';
    die "Last character in $method_body is not a }"  unless substr( $method_body, -1, 1 ) eq '}';
    return substr $method_body, 1, -1;
}

sub extract_bracketed_body_text {
    my ( $body_text, $bracketed ) = @_;
    my ( $extracted, $remaining ) = extract_bracketed( $body_text, $bracketed );
    return ( strip_outer_brackets($extracted), $remaining );

}

=head2 C<parse_top_level()>

    my ($preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines)
        = parse_top_level(\$code);

B<Purpose:>  Extract a pmc signature from the code ref.

B<Argument:>  PMC file contents slurped by C<parse_pmc()>.

B<Return Values:>  List of seven elements:

=over 4

=item *

the code found before the pmc signature;

=item *

the name of the pmc

=item *

a hash ref containing the flags associated with the pmc (such as
C<extends> and C<does>).

=item *

the list of parents this pmc extends

=item *

the body of the pmc

=item *

the code found after the pmc body

=item *

number of newlines in the pmc signature that need to be added to the
running total of lines in the file

=back

B<Comments:>  Called internally by C<parse_pmc()>.

=cut

sub parse_top_level {
    my $code = shift;

    my $top_level_re = qr{
        ^                 #beginning of line
        (.*?)             #preamble
        ^
        (
            \s*
            pmclass       #pmclass keyword
            \s+           #whitespace
            ([\w]*)       #pmc name
            ([\s+\w+]*)   #pmc attributes
            \s*           #whitespace
        )
        \{                #pmc body beginning marker
    }smx;
    $code =~ s[$top_level_re][{]smx or die "No pmclass found\n";
    my ( $preamble, $pmc_signature, $pmcname, $attributes ) = ( $1, $2, $3, $4 );

    my $chewed_lines = count_newlines($pmc_signature);
    my ( $flags, $parents ) = parse_flags( $attributes, $pmcname );
    my ( $body, $postamble ) = extract_balanced($code);
    $body = strip_outer_brackets($body);    # trim out the { }
    return ( $preamble, $pmcname, $flags, $parents, $body, $postamble, $chewed_lines );
}

our %has_value  = map { $_ => 1 } qw(group hll);
our %has_values = map { $_ => 1 } qw(does extends maps lib);

=head2 C<parse_flags()>

    my ($flags, $parents) = parse_flags($attributes, $pmcname);

B<Purpose:>  Extract a pmc signature from the code ref.

B<Argument:>  PMC file contents slurped by C<parse_pmc()>.

B<Return Values:>  List of two elements:

=over 4

=item *

a hash ref containing the flags associated with the pmc (such as
C<extends> and C<does>).

=item *

the list of parents this pmc extends

=back

B<Comments:>  Called internally by C<parse_top_level()>.

=cut

sub parse_flags {
    my ( $data, $pmcname ) = @_;
    my ( $flags, @parents );
    my @words = ( $data =~ /(\w+)/g );
    while ( scalar @words ) {
        my $name = shift @words;
        if ( $has_value{$name} || $has_values{$name} ) {
            my $value = shift @words or die "Parser error: no value for '$name'";
            if ( $name eq 'extends' ) {
                push @parents, $value;
            }
            elsif ( $has_values{$name} ) {
                $flags->{$name}{$value} = 1;
            }
            else {
                $flags->{$name} = $value;
            }
        }
        else {
            $flags->{$name} = 1;
        }
    }

    # setup some defaults
    if ( $pmcname ne 'default' ) {
        push @parents, 'default' unless scalar @parents;
        $flags->{does}{scalar} = 1 unless $flags->{does};
    }
    return ( $flags, \@parents );
}

=head2 C<extract_balanced()>

    ($pmcbody, $post) = extract_balanced($code);

B<Purpose:>  Remove a balanced C<{}> construct from the beginning of C<$code>.
Return it and the remaining code.

B<Argument:>  The code ref which was the first argument provided to
C<parse_pmc()>.

B<Return Values:>  List of two elements:

=over 4

=item *

String beginning with C<{> and ending with C<}>.  In between is found C code
where the comments hold strings of Perl comments written in POD.

=item *

String holding the balance of the code.  Same style as first element, but
without the braces.

=back

B<Comments:>  Called twice within C<parse_pmc()>.  Will die with error message
C<Badly balanced> if not balanced.

=cut

sub extract_balanced {
    my $code       = shift;
    my $unbalanced = 0;

    die "Unexpected whitespace, expecting" if $code =~ /^\s+/;
    die "bad block open: ", substr( $code, 0, 40 ), "..." unless $code =~ /^\{/;

    # create a copy and remove strings and comments so that
    # unbalanced {} can be used in them in PMCs, being careful to
    # preserve string length.
    local $_ = $code;
    s[
        ( ' (?: \\. | [^'] )* '     # remove ' strings
        | " (?: \\. | [^"] )* "     # remove " strings
        | /\* .*? \*/ )             # remove C comments
    ]
    [ "-" x length $1 ]sexg;

    while (/ (\{) | (\}) /gx) {
        if ($1) {
            $unbalanced++;
        }
        else {    # $2
            $unbalanced--;
            return ( substr( $code, 0, pos, "" ), $code ) if not $unbalanced;
        }
    }
    die "Badly balanced PMC source\n" if $unbalanced;
    return;
}

=head2 C<parse_method_attrs()>

    $attrs = parse_method_attrs($method_attributes);

B<Purpose:>  Parse a list of method attributes and return a hash ref of them.

B<Arguments:>  String captured from regular expression.

B<Return Values:>  Reference to hash of attribute values.

B<Comments:>  Called within C<parse_pmc()>.

=cut

sub parse_method_attrs {
    my $flags = shift;
    my %result;
    ++$result{$1} while $flags =~ /:(\w+)/g;
    return \%result;
}

1;

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 4
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=4:


syntax highlighted by Code2HTML, v. 0.9.1