#! perl # Copyright (C) 2001-2006, The Perl Foundation. # $Id: OpsFile.pm 21450 2007-09-21 09:33:12Z paultcochrane $ =head1 NAME Parrot::OpsFile - Ops To C Code Generation =head1 SYNOPSIS use Parrot::OpsFile; =head1 DESCRIPTION C takes one or more files of op functions and creates real C code for them. This class is used by F, F and F. =head2 Op Functions For ops that have trivial bodies (such as just a call to some other function and a C statement), opcode functions are in the format: inline op opname (args) :class,flags { ... body of function ... } Note that currently the C op type is ignored. Alternately, for opcode functions that have more internal complexity the format is: op opname (args) :class,flags { ... body of function ... } There may be more than one C. In both cases the closing brace B be on its own line. =head2 Op Arguments Op arguments are a comma-separated list of direction and type pairs. Argument direction is one of: in the argument passes a value into the op out the argument passes a value out of the op inout the argument passes a value into and out of the op inconst the argument passes a constant value into the op invar the argument passes a variable value into the op label an in argument containing a branch offset or address labelconst an invar argument containing a branch offset or address labelvar an inconst argument containing a branch offset or address Argument direction is used to determine the life times of symbols and their related register allocations. When an argument is passed into an op a register is read from, when it's passed out of an op a register is written to. Argument type is one of: INT the argument is an integer NUM the argument is an numeric STR the argument is an string PMC the argument is an PMC KEY the argument is an aggregate PMC key INTKEY the argument is an aggregate PMC integer key The size of the return offset is determined from the op function's signature. =head2 Op Classification and Flags The op classification and flags are optional hints which provide information about the op. The classification of ops is intended to facilitate the selection of suitable ops for a Parrot safe mode, or for inclusion in miniparrot. =head2 Op Body (Macro Substitutions) In the following macro descriptions, C and C are the current and next position within the Parrot code. =over 4 =item C Transforms to C. This is used for branches. =item C Transforms to C, where C is the size of an op. =item C Transforms to C. This is used for absolute jumps. =item C Transforms to C<< PC' = >>. Pops the address off control stack. =item C Transforms to C. This is used to give a relative address. =item C Transforms to C, the position of the next op. =item C Transforms to C, an absolute address. =item C Transforms to C, the size of an op. =item C Transforms to C. Halts run loop, and resets the current position to the start of the Parrot code, without resuming. =item C Transforms to C and restarts at C. =item C Transforms to C and restarts at C. =item C<$n> Transforms to the op function's nth argument. C<$0> is the opcode itself. =back Note that, for ease of parsing, if the argument to one of the above notations in a ops file contains parentheses, then double the enclosing parentheses and add a space around the argument, like so: goto OFFSET(( (void*)interp->happy_place )) =head2 Class Methods =over 4 =cut package Parrot::OpsFile; use strict; use warnings; use base qw( Exporter ); use Parrot::Op; use Parrot::Config; our %op_body; our @EXPORT = qw( %op_body ); # private sub _trim() # # Trim leading and trailing spaces. sub _trim { my $value = shift; $value =~ s/^\s+//; $value =~ s/\s+$//; return $value; } =item C Returns a new instance initialized by calling C on each of the specified op files. =cut sub new { my ( $class, $files, $nolines ) = @_; my $self = bless { PREAMBLE => '' }, $class; $self->read_ops( $_, $nolines ) for @{$files}; # FILE holds a space separated list of opsfile name if ( $self->{FILE} ) { $self->{FILE} =~ s/, $//; $self->{FILE} =~ s/, $//; } return $self; } =back =head2 Instance Methods =over 4 =item C Reads in the specified .ops file, gathering information about the ops. =cut sub read_ops { my ( $self, $file, $nolines ) = @_; my $ops_file = "src/" . $file; die "Parrot::OpFunc::init(): No file specified!\n" unless defined $file; $self->{FILE} .= $file . ', '; my $orig = $file; open my $OPS, '<', $file or die "Can't open $file, $!/$^E"; if ( !( $file =~ s/\.ops$/.c/ ) ) { $file .= ".c"; } # # Read through the file, creating and storing Parrot::Op objects: # my $count = 0; my ( $name, $footer ); my $type; my $body; my $short_name; my $args; my @args; my @argdirs; my $seen_pod; my $seen_op; my $line; my $flags; my @labels; while (<$OPS>) { $seen_pod = 1 if m|^=|; unless ( $seen_op or m|^(inline\s+)?op\s+| ) { if (m/^\s*VERSION\s*=\s*"(\d+\.\d+\.\d+)"\s*;\s*$/) { if ( exists $self->{VERSION} ) { #die "VERSION MULTIPLY DEFINED!"; } $self->version($1); $_ = ''; } elsif (m/^\s*VERSION\s*=\s*PARROT_VERSION\s*;\s*$/) { if ( exists $self->{VERSION} ) { #die "VERSION MULTIPLY DEFINED!"; } $self->version( $PConfig{VERSION} ); $_ = ''; } $self->{PREAMBLE} .= $_ unless $seen_pod or $count; # Lines up to first op def. next; } die "No 'VERSION = ...;' line found before beginning of ops in file '$orig'!\n" unless defined $self->version; # # Handle start-of-op: # # We create a new Parrot::Op instance based on the type, name and args. # We query the Parrot::Op for the op size, etc., which we use later. # # Either of these two forms work: # # inline op name (args) { # op name (args) { # # The args are a comma-separated list of items from this table of argument # types (even if no formal args are specified, there will be a single 'o' # entry): # # op The opcode # # i Integer register index # n Number register index # p PMC register index # s String register index # # ic Integer constant (in-line) # nc Number constant index # pc PMC constant index # sc String constant index # kc Key constant index # kic Integer Key constant index (in-line) # if (/^(inline\s+)?op\s+([a-zA-Z]\w*)\s*\((.*)\)\s*(\S*)?\s*{/) { if ($seen_op) { die "$ops_file [$.]: Cannot define an op within an op definition!\n"; } $type = defined($1) ? 'inline' : 'function'; $short_name = $2; $args = _trim( lc $3 ); $flags = $4 ? _trim( lc $4 ) : ""; @args = split( /\s*,\s*/, $args ); @argdirs = (); @labels = (); $body = ''; $seen_op = 1; $line = $. + 1; my @temp = (); foreach my $arg (@args) { my ( $use, $type ) = $arg =~ m/^(in|out|inout|inconst|invar|label|labelconst|labelvar) \s+ (INT|NUM|STR|PMC|KEY|INTKEY)$/ix; die "Unrecognized arg format '$arg' in '$_'!" unless defined($use) and defined($type); if ( $type =~ /^INTKEY$/i ) { $type = "ki"; } else { $type = lc substr( $type, 0, 1 ); } # convert e.g. "labelvar" to "invar" and remember labels if ( $use =~ /label(\w*)/ ) { push @labels, 1; $use = "in$1"; } else { push @labels, 0; } if ( $use eq 'in' ) { push @temp, "$type|${type}c"; push @argdirs, 'i'; } elsif ( $use eq 'invar' ) { push @temp, $type; push @argdirs, 'i'; } elsif ( $use eq 'inconst' ) { push @temp, "${type}c"; push @argdirs, 'i'; } elsif ( $use eq 'inout' ) { push @temp, $type; push @argdirs, 'io'; } else { push @temp, $type; push @argdirs, 'o'; } } @args = @temp; next; } # # Handle end-of-op: # # We stash the accumulated body of source code in the Parrot::Op, push the # Parrot::Op onto our op array, and forget the op so we can start the next # one. # if (/^}\s*$/) { $count += $self->make_op( $count, $type, $short_name, $body, \@args, \@argdirs, $line, $orig, \@labels, $flags, $nolines ); $seen_op = 0; next; } # # Accumulate the code into the op's body: # if ($seen_op) { $body .= $_; } else { die "Parrot::OpsFile: Unrecognized line: '$_'!\n"; } } if ($seen_op) { die "Parrot::OpsFile: File ended with incomplete op definition!\n"; } close $OPS or die "Could not close ops file '$file' ($!)!"; return; } # Extends a string containing an or expression "0" .. "A" .. "A|B" etc. sub or_flag { my ( $flag, $value ) = @_; if ( $$flag eq '0' ) { $$flag = $value; } else { $$flag .= "|$value"; } } =item C Returns a new C instance for the specified arguments. =cut sub make_op { my ( $self, $code, $type, $short_name, $body, $args, $argdirs, $line, $file, $labels, $flags, $nolines ) = @_; my $counter = 0; my $absolute = 0; my $branch = 0; my $pop = 0; my $next = 0; my $restart = 0; foreach my $variant ( expand_args(@$args) ) { my (@fixedargs) = split( /,/, $variant ); my $op = Parrot::Op->new( $code++, $type, $short_name, [@fixedargs], [@$argdirs], [@$labels], $flags ); my $op_size = $op->size; my $jumps = "0"; # # Macro substitutions: # # We convert the following notations: # # .ops file Op body Meaning Comment # ----------------- ------- ------------ ---------------------------------- # goto OFFSET(X) {{+=X}} PC' = PC + X Used for branches # goto NEXT() {{+=S}} PC' = PC + S Where S is op size # goto ADDRESS(X) {{=X}} PC' = X Used for absolute jumps # goto POP() {{=*}} PC' = Pop address off control stack # expr OFFSET(X) {{^+X}} PC + X Relative address # expr NEXT() {{^+S}} PC + S Where S is op size # expr ADDRESS(X) {{^X}} X Absolute address # OP_SIZE {{^S}} S op size # # HALT() {{=0}} PC' = 0 Halts run_ops loop, no resume # # restart OFFSET(X) {{=0,+=X}} PC' = 0 Restarts at PC + X # restart NEXT() {{=0,+=S}} PC' = 0 Restarts at PC + S # # $X {{@X}} Argument X $0 is opcode, $1 is first arg # # For ease of parsing, if the argument to one of the above # notations in a .ops file contains parentheses, then double the # enclosing parentheses and add a space around the argument, # like so: # # goto OFFSET(( (void*)interp->happy_place )) # # Later transformations turn the Op body notations into C code, based # on the mode of operation (function calls, switch statements, gotos # with labels, etc.). # # RT#43719: Complain about using, e.g. $3 in an op with only 2 args. # $branch ||= $body =~ s/\bgoto\s+OFFSET\(\( (.*?) \)\)/{{+=$1}}/mg; $absolute ||= $body =~ s/\bgoto\s+ADDRESS\(\( (.*?) \)\)/{{=$1}}/mg; $body =~ s/\bexpr\s+OFFSET\(\( (.*?) \)\)/{{^+$1}}/mg; $body =~ s/\bexpr\s+ADDRESS\(\( (.*?) \)\)/{{^$1}}/mg; $body =~ s/\bOP_SIZE\b/{{^$op_size}}/mg; $branch ||= $body =~ s/\bgoto\s+OFFSET\((.*?)\)/{{+=$1}}/mg; $body =~ s/\bgoto\s+NEXT\(\)/{{+=$op_size}}/mg; $absolute ||= $body =~ s/\bgoto\s+ADDRESS\((.*?)\)/{{=$1}}/mg; $pop ||= $body =~ s/\bgoto\s+POP\(\)/{{=*}}/mg; $body =~ s/\bexpr\s+OFFSET\((.*?)\)/{{^+$1}}/mg; $next ||= $body =~ s/\bexpr\s+NEXT\(\)/{{^+$op_size}}/mg; $body =~ s/\bexpr\s+ADDRESS\((.*?)\)/{{^$1}}/mg; $body =~ s/\bexpr\s+POP\(\)/{{^*}}/mg; $body =~ s/\bHALT\(\)/{{=0}}/mg; $branch ||= $short_name =~ /runinterp/; $next ||= $short_name =~ /runinterp/; if ( $body =~ s/\brestart\s+OFFSET\((.*?)\)/{{=0,+=$1}}/mg ) { $branch = 1; $restart = 1; } elsif ( $body =~ s/\brestart\s+NEXT\(\)/{{=0,+=$op_size}}/mg ) { $restart = 1; $next = 1; } elsif ( $short_name eq 'branch_cs' || $short_name eq 'returncc' ) { $restart = 1; # dest may be NULL to leave run-loop } elsif ( $body =~ s/\brestart\s+ADDRESS\((.*?)\)/{{=$1}}/mg ) { $next = 0; $restart = 1; } $body =~ s/\$(\d+)/{{\@$1}}/mg; my $file_escaped = $file; $file_escaped =~ s|(\\)|$1$1|g; # escape backslashes $op->body( $nolines ? $body : qq{#line $line "$file_escaped"\n$body} ); # Constants here are defined in include/parrot/op.h or_flag( \$jumps, "PARROT_JUMP_RELATIVE" ) if ($branch); or_flag( \$jumps, "PARROT_JUMP_ADDRESS" ) if ($absolute); or_flag( \$jumps, "PARROT_JUMP_POP" ) if ($pop); or_flag( \$jumps, "PARROT_JUMP_ENEXT" ) if ($next); # I'm assuming the op branches to the value in the last argument. or_flag( \$jumps, "PARROT_JUMP_GNEXT" ) if ( ($jumps) && ( $fixedargs[ @fixedargs - 1 ] ) && ( $fixedargs[ @fixedargs - 1 ] eq 'i' ) ); or_flag( \$jumps, "PARROT_JUMP_RESTART" ) if ($restart); $op->jump($jumps); $self->push_op($op); $counter++; } return $counter; } =item C Given an argument list, returns a list of all the possible argument combinations. =cut sub expand_args { my (@args) = @_; return "" if ( !scalar(@args) ); my $arg = shift(@args); my @var = split( /\|/, $arg ); if ( !scalar(@args) ) { return @var; } else { my @list = expand_args(@args); my @results; foreach my $l (@list) { foreach my $v (@var) { push( @results, "$v,$l" ); } } return @results; } } =item C Returns the C instances found in the file(s). =cut sub ops { my ($self) = @_; return @{ $self->{OPS} }; } =item C Returns the op at C<$index>. =cut sub op { my ( $self, $index ) = @_; return $self->{OPS}[$index]; } =item C =item C Returns any lines found prior to first op definition. If C<$trans> (an C subclass) is supplied then substitutions are made. =cut sub preamble { my ( $self, $trans ) = @_; local $_ = $self->{PREAMBLE}; if ($trans) { s/goto\s+OFFSET\((.*)\)/{{+=$1}}/mg; #s/goto\s+NEXT\(\)/{{+=$op_size}}/mg; #not supported--dependent on op size s/goto\s+ADDRESS\((.*)\)/{{=$1}}/mg; s/goto\s+POP\(\)/{{=*}}/mg; s/HALT\(\)/{{=0}}/mg; # RT#43721: This ought to throw errors when attempting to rewrite $n # argument accesses and other things that make no sense in the # preamble. $_ = Parrot::Op->rewrite_body( $_, $trans ); } return $_; } =item C =item C =item C Sets/gets the version number. =cut sub version { my $self = shift; if ( @_ == 1 ) { $self->{VERSION} = shift; } elsif ( @_ == 3 ) { $self->{VERSION} = join( '.', @_ ); } elsif ( @_ == 0 ) { if (wantarray) { return split( /\./, $self->{VERSION} ); } else { return $self->{VERSION}; } } else { die "Parrot::OpsFile::version(): Illegal argument count" . scalar(@_) . "!"; } } =item C Returns the major version number. =cut sub major_version { my $self = shift; $self->{VERSION} =~ m/^(\d+)\./; return $1; } =item C Returns the minor version number. =cut sub minor_version { my $self = shift; $self->{VERSION} =~ m/^\d+\.(\d+)\./; return $1; } =item C Returns the patch version number. =cut sub patch_version { my $self = shift; $self->{VERSION} =~ m/^\d+\.\d+\.(\d+)/; return $1; } =item C Adds C<$op> to the end of the op list. =cut sub push_op { my ( $self, $op ) = @_; push @{ $self->{OPS} }, $op; } =back =head1 SEE ALSO =over 4 =item C =item C =item F =item F =item F =back =cut 1; # Local Variables: # mode: cperl # cperl-indent-level: 4 # fill-column: 100 # End: # vim: expandtab shiftwidth=4: