#! perl
# Copyright (C) 2001-2006, The Perl Foundation.
# $Id: OpsFile.pm 21450 2007-09-21 09:33:12Z paultcochrane $
=head1 NAME
Parrot::OpsFile - Ops To C Code Generation
=head1 SYNOPSIS
use Parrot::OpsFile;
=head1 DESCRIPTION
C<Parrot::OpsFile> takes one or more files of op functions and
creates real C code for them.
This class is used by F<tools/build/ops2c.pl>,
F<tools/build/ops2pm.pl> and F<tools/build/pbc2c.pl>.
=head2 Op Functions
For ops that have trivial bodies (such as just a call to some other
function and a C<return> statement), opcode functions are in the format:
inline op opname (args) :class,flags {
... body of function ...
}
Note that currently the C<inline> op type is ignored.
Alternately, for opcode functions that have more internal complexity the
format is:
op opname (args) :class,flags {
... body of function ...
}
There may be more than one C<return>.
In both cases the closing brace B<must> be on its own line.
=head2 Op Arguments
Op arguments are a comma-separated list of direction and type pairs.
Argument direction is one of:
in the argument passes a value into the op
out the argument passes a value out of the op
inout the argument passes a value into and out of the op
inconst the argument passes a constant value into the op
invar the argument passes a variable value into the op
label an in argument containing a branch offset or address
labelconst an invar argument containing a branch offset or address
labelvar an inconst argument containing a branch offset or address
Argument direction is used to determine the life times of symbols and
their related register allocations. When an argument is passed into an
op a register is read from, when it's passed out of an op a register is
written to.
Argument type is one of:
INT the argument is an integer
NUM the argument is an numeric
STR the argument is an string
PMC the argument is an PMC
KEY the argument is an aggregate PMC key
INTKEY the argument is an aggregate PMC integer key
The size of the return offset is determined from the op function's
signature.
=head2 Op Classification and Flags
The op classification and flags are optional hints which provide
information about the op.
The classification of ops is intended to facilitate the selection of
suitable ops for a Parrot safe mode, or for inclusion in miniparrot.
=head2 Op Body (Macro Substitutions)
In the following macro descriptions, C<PC> and C<PC'> are the current
and next position within the Parrot code.
=over 4
=item C<goto OFFSET(X)>
Transforms to C<PC' = PC + X>. This is used for branches.
=item C<goto NEXT()>
Transforms to C<PC' = PC + S>, where C<S> is the size of an op.
=item C<goto ADDRESS(X)>
Transforms to C<PC' = X>. This is used for absolute jumps.
=item C<goto POP()>
Transforms to C<< PC' = <pop> >>. Pops the address off control stack.
=item C<expr OFFSET(X)>
Transforms to C<PC + X>. This is used to give a relative address.
=item C<expr NEXT()>
Transforms to C<PC + S>, the position of the next op.
=item C<expr ADDRESS(X)>
Transforms to C<X>, an absolute address.
=item C<OP_SIZE>
Transforms to C<S>, the size of an op.
=item C<HALT()>
Transforms to C<PC' = 0>. Halts run loop, and resets the current
position to the start of the Parrot code, without resuming.
=item C<restart OFFSET(X)>
Transforms to C<PC' = 0> and restarts at C<PC + X>.
=item C<restart NEXT()>
Transforms to C<PC' = 0> and restarts at C<PC + S>.
=item C<$n>
Transforms to the op function's nth argument. C<$0> is the opcode itself.
=back
Note that, for ease of parsing, if the argument to one of the above
notations in a ops file contains parentheses, then double the enclosing
parentheses and add a space around the argument, like so:
goto OFFSET(( (void*)interp->happy_place ))
=head2 Class Methods
=over 4
=cut
package Parrot::OpsFile;
use strict;
use warnings;
use base qw( Exporter );
use Parrot::Op;
use Parrot::Config;
our %op_body;
our @EXPORT = qw( %op_body );
# private sub _trim()
#
# Trim leading and trailing spaces.
sub _trim {
my $value = shift;
$value =~ s/^\s+//;
$value =~ s/\s+$//;
return $value;
}
=item C<new(@files)>
Returns a new instance initialized by calling C<read_ops()> on each of
the specified op files.
=cut
sub new {
my ( $class, $files, $nolines ) = @_;
my $self = bless { PREAMBLE => '' }, $class;
$self->read_ops( $_, $nolines ) for @{$files};
# FILE holds a space separated list of opsfile name
if ( $self->{FILE} ) {
$self->{FILE} =~ s/, $//;
$self->{FILE} =~ s/, $//;
}
return $self;
}
=back
=head2 Instance Methods
=over 4
=item C<read_ops($file,$nolines)>
Reads in the specified .ops file, gathering information about the ops.
=cut
sub read_ops {
my ( $self, $file, $nolines ) = @_;
my $ops_file = "src/" . $file;
die "Parrot::OpFunc::init(): No file specified!\n" unless defined $file;
$self->{FILE} .= $file . ', ';
my $orig = $file;
open my $OPS, '<', $file or die "Can't open $file, $!/$^E";
if ( !( $file =~ s/\.ops$/.c/ ) ) {
$file .= ".c";
}
#
# Read through the file, creating and storing Parrot::Op objects:
#
my $count = 0;
my ( $name, $footer );
my $type;
my $body;
my $short_name;
my $args;
my @args;
my @argdirs;
my $seen_pod;
my $seen_op;
my $line;
my $flags;
my @labels;
while (<$OPS>) {
$seen_pod = 1 if m|^=|;
unless ( $seen_op or m|^(inline\s+)?op\s+| ) {
if (m/^\s*VERSION\s*=\s*"(\d+\.\d+\.\d+)"\s*;\s*$/)
{
if ( exists $self->{VERSION} ) {
#die "VERSION MULTIPLY DEFINED!";
}
$self->version($1);
$_ = '';
}
elsif (m/^\s*VERSION\s*=\s*PARROT_VERSION\s*;\s*$/) {
if ( exists $self->{VERSION} ) {
#die "VERSION MULTIPLY DEFINED!";
}
$self->version( $PConfig{VERSION} );
$_ = '';
}
$self->{PREAMBLE} .= $_
unless $seen_pod or $count; # Lines up to first op def.
next;
}
die "No 'VERSION = ...;' line found before beginning of ops in file '$orig'!\n"
unless defined $self->version;
#
# Handle start-of-op:
#
# We create a new Parrot::Op instance based on the type, name and args.
# We query the Parrot::Op for the op size, etc., which we use later.
#
# Either of these two forms work:
#
# inline op name (args) {
# op name (args) {
#
# The args are a comma-separated list of items from this table of argument
# types (even if no formal args are specified, there will be a single 'o'
# entry):
#
# op The opcode
#
# i Integer register index
# n Number register index
# p PMC register index
# s String register index
#
# ic Integer constant (in-line)
# nc Number constant index
# pc PMC constant index
# sc String constant index
# kc Key constant index
# kic Integer Key constant index (in-line)
#
if (/^(inline\s+)?op\s+([a-zA-Z]\w*)\s*\((.*)\)\s*(\S*)?\s*{/) {
if ($seen_op) {
die "$ops_file [$.]: Cannot define an op within an op definition!\n";
}
$type = defined($1) ? 'inline' : 'function';
$short_name = $2;
$args = _trim( lc $3 );
$flags = $4 ? _trim( lc $4 ) : "";
@args = split( /\s*,\s*/, $args );
@argdirs = ();
@labels = ();
$body = '';
$seen_op = 1;
$line = $. + 1;
my @temp = ();
foreach my $arg (@args) {
my ( $use, $type ) =
$arg =~ m/^(in|out|inout|inconst|invar|label|labelconst|labelvar)
\s+
(INT|NUM|STR|PMC|KEY|INTKEY)$/ix;
die "Unrecognized arg format '$arg' in '$_'!"
unless defined($use)
and defined($type);
if ( $type =~ /^INTKEY$/i ) {
$type = "ki";
}
else {
$type = lc substr( $type, 0, 1 );
}
# convert e.g. "labelvar" to "invar" and remember labels
if ( $use =~ /label(\w*)/ ) {
push @labels, 1;
$use = "in$1";
}
else {
push @labels, 0;
}
if ( $use eq 'in' ) {
push @temp, "$type|${type}c";
push @argdirs, 'i';
}
elsif ( $use eq 'invar' ) {
push @temp, $type;
push @argdirs, 'i';
}
elsif ( $use eq 'inconst' ) {
push @temp, "${type}c";
push @argdirs, 'i';
}
elsif ( $use eq 'inout' ) {
push @temp, $type;
push @argdirs, 'io';
}
else {
push @temp, $type;
push @argdirs, 'o';
}
}
@args = @temp;
next;
}
#
# Handle end-of-op:
#
# We stash the accumulated body of source code in the Parrot::Op, push the
# Parrot::Op onto our op array, and forget the op so we can start the next
# one.
#
if (/^}\s*$/) {
$count += $self->make_op(
$count, $type, $short_name, $body, \@args, \@argdirs,
$line, $orig, \@labels, $flags, $nolines
);
$seen_op = 0;
next;
}
#
# Accumulate the code into the op's body:
#
if ($seen_op) {
$body .= $_;
}
else {
die "Parrot::OpsFile: Unrecognized line: '$_'!\n";
}
}
if ($seen_op) {
die "Parrot::OpsFile: File ended with incomplete op definition!\n";
}
close $OPS or die "Could not close ops file '$file' ($!)!";
return;
}
# Extends a string containing an or expression "0" .. "A" .. "A|B" etc.
sub or_flag {
my ( $flag, $value ) = @_;
if ( $$flag eq '0' ) {
$$flag = $value;
}
else {
$$flag .= "|$value";
}
}
=item C<make_op($code,
$type, $short_name, $body, $args, $argdirs, $line, $file, $labels, $flags, $nolines)>
Returns a new C<Parrot::Op> instance for the specified arguments.
=cut
sub make_op {
my (
$self, $code, $type, $short_name, $body, $args,
$argdirs, $line, $file, $labels, $flags, $nolines
) = @_;
my $counter = 0;
my $absolute = 0;
my $branch = 0;
my $pop = 0;
my $next = 0;
my $restart = 0;
foreach my $variant ( expand_args(@$args) ) {
my (@fixedargs) = split( /,/, $variant );
my $op =
Parrot::Op->new( $code++, $type, $short_name, [@fixedargs], [@$argdirs], [@$labels],
$flags );
my $op_size = $op->size;
my $jumps = "0";
#
# Macro substitutions:
#
# We convert the following notations:
#
# .ops file Op body Meaning Comment
# ----------------- ------- ------------ ----------------------------------
# goto OFFSET(X) {{+=X}} PC' = PC + X Used for branches
# goto NEXT() {{+=S}} PC' = PC + S Where S is op size
# goto ADDRESS(X) {{=X}} PC' = X Used for absolute jumps
# goto POP() {{=*}} PC' = <pop> Pop address off control stack
# expr OFFSET(X) {{^+X}} PC + X Relative address
# expr NEXT() {{^+S}} PC + S Where S is op size
# expr ADDRESS(X) {{^X}} X Absolute address
# OP_SIZE {{^S}} S op size
#
# HALT() {{=0}} PC' = 0 Halts run_ops loop, no resume
#
# restart OFFSET(X) {{=0,+=X}} PC' = 0 Restarts at PC + X
# restart NEXT() {{=0,+=S}} PC' = 0 Restarts at PC + S
#
# $X {{@X}} Argument X $0 is opcode, $1 is first arg
#
# For ease of parsing, if the argument to one of the above
# notations in a .ops file contains parentheses, then double the
# enclosing parentheses and add a space around the argument,
# like so:
#
# goto OFFSET(( (void*)interp->happy_place ))
#
# Later transformations turn the Op body notations into C code, based
# on the mode of operation (function calls, switch statements, gotos
# with labels, etc.).
#
# RT#43719: Complain about using, e.g. $3 in an op with only 2 args.
#
$branch ||= $body =~ s/\bgoto\s+OFFSET\(\( (.*?) \)\)/{{+=$1}}/mg;
$absolute ||= $body =~ s/\bgoto\s+ADDRESS\(\( (.*?) \)\)/{{=$1}}/mg;
$body =~ s/\bexpr\s+OFFSET\(\( (.*?) \)\)/{{^+$1}}/mg;
$body =~ s/\bexpr\s+ADDRESS\(\( (.*?) \)\)/{{^$1}}/mg;
$body =~ s/\bOP_SIZE\b/{{^$op_size}}/mg;
$branch ||= $body =~ s/\bgoto\s+OFFSET\((.*?)\)/{{+=$1}}/mg;
$body =~ s/\bgoto\s+NEXT\(\)/{{+=$op_size}}/mg;
$absolute ||= $body =~ s/\bgoto\s+ADDRESS\((.*?)\)/{{=$1}}/mg;
$pop ||= $body =~ s/\bgoto\s+POP\(\)/{{=*}}/mg;
$body =~ s/\bexpr\s+OFFSET\((.*?)\)/{{^+$1}}/mg;
$next ||= $body =~ s/\bexpr\s+NEXT\(\)/{{^+$op_size}}/mg;
$body =~ s/\bexpr\s+ADDRESS\((.*?)\)/{{^$1}}/mg;
$body =~ s/\bexpr\s+POP\(\)/{{^*}}/mg;
$body =~ s/\bHALT\(\)/{{=0}}/mg;
$branch ||= $short_name =~ /runinterp/;
$next ||= $short_name =~ /runinterp/;
if ( $body =~ s/\brestart\s+OFFSET\((.*?)\)/{{=0,+=$1}}/mg ) {
$branch = 1;
$restart = 1;
}
elsif ( $body =~ s/\brestart\s+NEXT\(\)/{{=0,+=$op_size}}/mg ) {
$restart = 1;
$next = 1;
}
elsif ( $short_name eq 'branch_cs' || $short_name eq 'returncc' ) {
$restart = 1; # dest may be NULL to leave run-loop
}
elsif ( $body =~ s/\brestart\s+ADDRESS\((.*?)\)/{{=$1}}/mg ) {
$next = 0;
$restart = 1;
}
$body =~ s/\$(\d+)/{{\@$1}}/mg;
my $file_escaped = $file;
$file_escaped =~ s|(\\)|$1$1|g; # escape backslashes
$op->body( $nolines ? $body : qq{#line $line "$file_escaped"\n$body} );
# Constants here are defined in include/parrot/op.h
or_flag( \$jumps, "PARROT_JUMP_RELATIVE" ) if ($branch);
or_flag( \$jumps, "PARROT_JUMP_ADDRESS" ) if ($absolute);
or_flag( \$jumps, "PARROT_JUMP_POP" ) if ($pop);
or_flag( \$jumps, "PARROT_JUMP_ENEXT" ) if ($next);
# I'm assuming the op branches to the value in the last argument.
or_flag( \$jumps, "PARROT_JUMP_GNEXT" )
if ( ($jumps)
&& ( $fixedargs[ @fixedargs - 1 ] )
&& ( $fixedargs[ @fixedargs - 1 ] eq 'i' ) );
or_flag( \$jumps, "PARROT_JUMP_RESTART" ) if ($restart);
$op->jump($jumps);
$self->push_op($op);
$counter++;
}
return $counter;
}
=item C<expand_args(@args)>
Given an argument list, returns a list of all the possible argument
combinations.
=cut
sub expand_args {
my (@args) = @_;
return "" if ( !scalar(@args) );
my $arg = shift(@args);
my @var = split( /\|/, $arg );
if ( !scalar(@args) ) {
return @var;
}
else {
my @list = expand_args(@args);
my @results;
foreach my $l (@list) {
foreach my $v (@var) {
push( @results, "$v,$l" );
}
}
return @results;
}
}
=item C<ops()>
Returns the C<Parrot::Op> instances found in the file(s).
=cut
sub ops {
my ($self) = @_;
return @{ $self->{OPS} };
}
=item C<op($index)>
Returns the op at C<$index>.
=cut
sub op {
my ( $self, $index ) = @_;
return $self->{OPS}[$index];
}
=item C<preamble()>
=item C<preamble($trans)>
Returns any lines found prior to first op definition.
If C<$trans> (an C<Parrot::OpTrans> subclass) is supplied then
substitutions are made.
=cut
sub preamble {
my ( $self, $trans ) = @_;
local $_ = $self->{PREAMBLE};
if ($trans) {
s/goto\s+OFFSET\((.*)\)/{{+=$1}}/mg;
#s/goto\s+NEXT\(\)/{{+=$op_size}}/mg; #not supported--dependent on op size
s/goto\s+ADDRESS\((.*)\)/{{=$1}}/mg;
s/goto\s+POP\(\)/{{=*}}/mg;
s/HALT\(\)/{{=0}}/mg;
# RT#43721: This ought to throw errors when attempting to rewrite $n
# argument accesses and other things that make no sense in the
# preamble.
$_ = Parrot::Op->rewrite_body( $_, $trans );
}
return $_;
}
=item C<version($major, $minor, $patch)>
=item C<version($version)>
=item C<version()>
Sets/gets the version number.
=cut
sub version {
my $self = shift;
if ( @_ == 1 ) {
$self->{VERSION} = shift;
}
elsif ( @_ == 3 ) {
$self->{VERSION} = join( '.', @_ );
}
elsif ( @_ == 0 ) {
if (wantarray) {
return split( /\./, $self->{VERSION} );
}
else {
return $self->{VERSION};
}
}
else {
die "Parrot::OpsFile::version(): Illegal argument count" . scalar(@_) . "!";
}
}
=item C<major_version()>
Returns the major version number.
=cut
sub major_version {
my $self = shift;
$self->{VERSION} =~ m/^(\d+)\./;
return $1;
}
=item C<minor_version()>
Returns the minor version number.
=cut
sub minor_version {
my $self = shift;
$self->{VERSION} =~ m/^\d+\.(\d+)\./;
return $1;
}
=item C<patch_version()>
Returns the patch version number.
=cut
sub patch_version {
my $self = shift;
$self->{VERSION} =~ m/^\d+\.\d+\.(\d+)/;
return $1;
}
=item C<push_op($op)>
Adds C<$op> to the end of the op list.
=cut
sub push_op {
my ( $self, $op ) = @_;
push @{ $self->{OPS} }, $op;
}
=back
=head1 SEE ALSO
=over 4
=item C<Parrot::Op>
=item C<Parrot::OpTrans>
=item F<tools/build/ops2c.pl>
=item F<tools/build/ops2pm.pl>
=item F<tools/build/pbc2c.pl>
=back
=cut
1;
# Local Variables:
# mode: cperl
# cperl-indent-level: 4
# fill-column: 100
# End:
# vim: expandtab shiftwidth=4:
syntax highlighted by Code2HTML, v. 0.9.1