# Copyright (C) 2004-2007, The Perl Foundation. # $Id: Parser.pm 23322 2007-12-02 02:13:52Z petdance $ package Parrot::Pmc2c::Parser; use strict; use warnings; use base qw( Exporter ); our @EXPORT_OK = qw( parse_pmc extract_balanced ); use Parrot::Pmc2c::PMC; use Parrot::Pmc2c::Method; use Parrot::Pmc2c::Emitter; use Parrot::Pmc2c::UtilFunctions qw(count_newlines filename slurp); use Text::Balanced 'extract_bracketed'; =head1 NAME Parrot::Pmc2c::Parser - PMC Parser =head1 SYNOPSIS use Parrot::Pmc2c::Parser; =head1 DESCRIPTION Parrot::Pmc2c::Paser parses a sudo C syntax into a perl hash that is then dumped. =head2 C $parsed_pmc_hash = parse_pmc($pmc2cMain, $filename); B Parse PMC code and return a hash ref of pmc attributes. B List of two arguments: =over 4 =item * The pmc2cMain object =item * Filename of the pmc to parse. =back B Reference to a Parrot::Pmc2c::PMC object B Called by C. =cut sub parse_pmc { my ( $pmc2cMain, $filename ) = @_; #slurp file contents $filename = $pmc2cMain->find_file( filename( $filename, '.pmc' ), 1 ); my $code = slurp($filename); my ( $preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines ) = parse_top_level($code); my $pmc = Parrot::Pmc2c::PMC->create($pmcname); $pmc->preamble( Parrot::Pmc2c::Emitter->text( $preamble, $filename, 1 ) ); $pmc->name($pmcname); $pmc->set_filename($filename); $pmc->set_flags($flags); $pmc->set_parents($parents); my $lineno = count_newlines($preamble) + $chewed_lines + 1; #the +1 puts us on the current line my $class_init; # backreferences here are all +1 because below the qr is wrapped in quotes my $signature_re = qr{ ^ (?: [;\n\s]* #blank spaces and spurious semicolons (?:/\*.*?\*/)? #C comments )* ((?:PARROT_\w+\s+)+)? #decorators # attribute|vtable|method marker (ATTR|VTABLE|(PCC)?METHOD\s+)? # return type (no return type for PCCMETHOD) (?(4) | (\w+\s*?\**)) \s* (\w+) #method name \s* \( ([^\(]*) \) #parameters \s* ((?::(\w+)\s*)*) #method attrs \s* }sx; while ( $pmcbody =~ s/($signature_re)// ) { $lineno += count_newlines($1); my ( $decorators, $marker, $pcc, $return_type, $methodname, $parameters, $attrs ) = ( $2, $3, $4, $5, $6, $7, parse_method_attrs($8) ); ( my $methodblock, $pmcbody ) = extract_balanced($pmcbody); $methodblock = strip_outer_brackets($methodblock); $methodblock =~ s/^[ ]{4}//mg; #remove pmclass 4 space indent $methodblock =~ s/\n\s+$/\n/g; #trim trailing whitespace from lastline $decorators ||= ''; $decorators =~ s/^\s*(.*?)\s*$/$1/s; $decorators = [ split /\s+/ => $decorators ]; $return_type = 'void' if defined $pcc; my $method = Parrot::Pmc2c::Method->new( { name => $methodname, parent_name => $pmc->name, body => Parrot::Pmc2c::Emitter->text( $methodblock, $filename, $lineno ), return_type => $return_type, parameters => $parameters, type => Parrot::Pmc2c::Method::VTABLE, attrs => $attrs, decorators => $decorators, } ); #PCCMETHOD needs FixedIntegerArray header if ( $marker and $marker =~ /PCCMETHOD/ ) { Parrot::Pmc2c::PCCMETHOD::rewrite_pccmethod( $method, $pmc ); $pmc->set_flag('need_fia_header'); } #PCCINVOKE needs FixedIntegerArray header $pmc->set_flag('need_fia_header') if ( $methodblock =~ /PCCINVOKE/ ); # the class_init method is added last after all other methods if ( $methodname eq 'class_init' ) { $class_init = $method; } else { # Name-mangle NCI methods to avoid conflict with vtable methods. if ( $marker and $marker !~ /ATTR|VTABLE/ ) { $method->type(Parrot::Pmc2c::Method::NON_VTABLE); $method->name("nci_$methodname"); $method->symbol($methodname); } parse_mmds( $method, $filename, $lineno ) if $methodblock =~ /\bMMD_(\w+):/; $pmc->add_method($method); } $lineno += count_newlines($methodblock); } $pmc->postamble( Parrot::Pmc2c::Emitter->text( $post, $filename, $lineno ) ); #ensure class_init is the last method in the method list $pmc->add_method($class_init) if $class_init; $pmc->vtable( $pmc2cMain->read_dump("vtable.pmc") ); $pmc->pre_method_gen(); return $pmc; } sub parse_mmds { my ( $method, $filename, $lineno ) = @_; my $mmd_methods = []; my $body_text = $method->body; my $default_body = $body_text; my $default_body_lineno = $lineno; # now split into MMD if necessary: while ( $body_text =~ s/(\bMMD_(\w+):\s*)// ) { $lineno += count_newlines($1); my $right_type = $2; $method->add_mmd_rights($right_type); ( my $mmd_part, $body_text ) = extract_bracketed_body_text( $body_text, '{' ); die "Empty MMD body near '$body_text'" if ( !$mmd_part ); my $mmd_part_lines = count_newlines($mmd_part); $mmd_part =~ s/\n\s*$/\n/s; #remove whitespace at end of last line if ( $right_type eq 'DEFAULT' ) { $default_body = $mmd_part; $default_body_lineno = $lineno; } else { my $mmd_method = Parrot::Pmc2c::Method->new( { name => $method->name . "_$right_type", parent_name => $method->parent_name, body => Parrot::Pmc2c::Emitter->text( $mmd_part, $filename, $lineno ), return_type => $method->return_type, parameters => $method->parameters, type => Parrot::Pmc2c::Method::VTABLE, attrs => $method->attrs, right => $right_type, } ); push @{$mmd_methods}, $mmd_method; } $lineno += $mmd_part_lines; } $method->mmds($mmd_methods); $method->body( Parrot::Pmc2c::Emitter->text( $default_body, $filename, $default_body_lineno ) ); } sub strip_outer_brackets { my ($method_body) = @_; die "First character in $method_body is not a {" unless substr( $method_body, 0, 1 ) eq '{'; die "Last character in $method_body is not a }" unless substr( $method_body, -1, 1 ) eq '}'; return substr $method_body, 1, -1; } sub extract_bracketed_body_text { my ( $body_text, $bracketed ) = @_; my ( $extracted, $remaining ) = extract_bracketed( $body_text, $bracketed ); return ( strip_outer_brackets($extracted), $remaining ); } =head2 C my ($preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines) = parse_top_level(\$code); B Extract a pmc signature from the code ref. B PMC file contents slurped by C. B List of seven elements: =over 4 =item * the code found before the pmc signature; =item * the name of the pmc =item * a hash ref containing the flags associated with the pmc (such as C and C). =item * the list of parents this pmc extends =item * the body of the pmc =item * the code found after the pmc body =item * number of newlines in the pmc signature that need to be added to the running total of lines in the file =back B Called internally by C. =cut sub parse_top_level { my $code = shift; my $top_level_re = qr{ ^ #beginning of line (.*?) #preamble ^ ( \s* pmclass #pmclass keyword \s+ #whitespace ([\w]*) #pmc name ([\s+\w+]*) #pmc attributes \s* #whitespace ) \{ #pmc body beginning marker }smx; $code =~ s[$top_level_re][{]smx or die "No pmclass found\n"; my ( $preamble, $pmc_signature, $pmcname, $attributes ) = ( $1, $2, $3, $4 ); my $chewed_lines = count_newlines($pmc_signature); my ( $flags, $parents ) = parse_flags( $attributes, $pmcname ); my ( $body, $postamble ) = extract_balanced($code); $body = strip_outer_brackets($body); # trim out the { } return ( $preamble, $pmcname, $flags, $parents, $body, $postamble, $chewed_lines ); } our %has_value = map { $_ => 1 } qw(group hll); our %has_values = map { $_ => 1 } qw(does extends maps lib); =head2 C my ($flags, $parents) = parse_flags($attributes, $pmcname); B Extract a pmc signature from the code ref. B PMC file contents slurped by C. B List of two elements: =over 4 =item * a hash ref containing the flags associated with the pmc (such as C and C). =item * the list of parents this pmc extends =back B Called internally by C. =cut sub parse_flags { my ( $data, $pmcname ) = @_; my ( $flags, @parents ); my @words = ( $data =~ /(\w+)/g ); while ( scalar @words ) { my $name = shift @words; if ( $has_value{$name} || $has_values{$name} ) { my $value = shift @words or die "Parser error: no value for '$name'"; if ( $name eq 'extends' ) { push @parents, $value; } elsif ( $has_values{$name} ) { $flags->{$name}{$value} = 1; } else { $flags->{$name} = $value; } } else { $flags->{$name} = 1; } } # setup some defaults if ( $pmcname ne 'default' ) { push @parents, 'default' unless scalar @parents; $flags->{does}{scalar} = 1 unless $flags->{does}; } return ( $flags, \@parents ); } =head2 C ($pmcbody, $post) = extract_balanced($code); B Remove a balanced C<{}> construct from the beginning of C<$code>. Return it and the remaining code. B The code ref which was the first argument provided to C. B List of two elements: =over 4 =item * String beginning with C<{> and ending with C<}>. In between is found C code where the comments hold strings of Perl comments written in POD. =item * String holding the balance of the code. Same style as first element, but without the braces. =back B Called twice within C. Will die with error message C if not balanced. =cut sub extract_balanced { my $code = shift; my $unbalanced = 0; die "Unexpected whitespace, expecting" if $code =~ /^\s+/; die "bad block open: ", substr( $code, 0, 40 ), "..." unless $code =~ /^\{/; # create a copy and remove strings and comments so that # unbalanced {} can be used in them in PMCs, being careful to # preserve string length. local $_ = $code; s[ ( ' (?: \\. | [^'] )* ' # remove ' strings | " (?: \\. | [^"] )* " # remove " strings | /\* .*? \*/ ) # remove C comments ] [ "-" x length $1 ]sexg; while (/ (\{) | (\}) /gx) { if ($1) { $unbalanced++; } else { # $2 $unbalanced--; return ( substr( $code, 0, pos, "" ), $code ) if not $unbalanced; } } die "Badly balanced PMC source\n" if $unbalanced; return; } =head2 C $attrs = parse_method_attrs($method_attributes); B Parse a list of method attributes and return a hash ref of them. B String captured from regular expression. B Reference to hash of attribute values. B Called within C. =cut sub parse_method_attrs { my $flags = shift; my %result; ++$result{$1} while $flags =~ /:(\w+)/g; return \%result; } 1; # Local Variables: # mode: cperl # cperl-indent-level: 4 # fill-column: 100 # End: # vim: expandtab shiftwidth=4: