package B::Utils; use 5.006; use strict; use warnings; use vars '$DEBUG'; our @EXPORT_OK = qw(all_starts all_roots anon_subs walkoptree_simple walkoptree_filtered walkallops_simple walkallops_filtered carp croak opgrep ); sub import { my $pack = shift; my @exports = @_; my $caller = caller; my %EOK = map {$_ => 1} @EXPORT_OK; for (@exports) { unless ($EOK{$_}) { require Carp; Carp::croak(qq{"$_" is not exported by the $pack module}); } no strict 'refs'; *{"$caller\::$_"} = \&{"$pack\::$_"}; } } our $VERSION = '0.05'; use B qw(main_start main_root walksymtable class OPf_KIDS); my (%starts, %roots, @anon_subs); our @bad_stashes = qw(B Carp Exporter warnings Cwd Config CORE blib strict DynaLoader vars XSLoader AutoLoader base); sub null { my $op = shift; class( $op ) eq 'NULL'; } { my $_subsdone=0; sub _init { # To ensure runtimeness. return if $_subsdone; %starts = ( '__MAIN__' => main_start() ); %roots = ( '__MAIN__' => main_root() ); walksymtable(\%main::, '_push_starts', sub { return if scalar grep {$_[0] eq $_."::"} @bad_stashes; 1; }, # Do not eat our own children! ''); push @anon_subs, { root => $_->ROOT, start => $_->START} for grep { class($_) eq "CV" } B::main_cv->PADLIST->ARRAY->ARRAY; $_subsdone=1; } } =head1 NAME B::Utils - Helper functions for op tree manipulation =head1 SYNOPSIS use B::Utils; =head1 DESCRIPTION These functions make it easier to manipulate the op tree. =head1 FUNCTIONS =over 3 =item C =item C Returns a hash of all of the starting ops or root ops of optrees, keyed to subroutine name; the optree for main program is simply keyed to C<__MAIN__>. B: Certain "dangerous" stashes are not scanned for subroutines: the list of such stashes can be found in C<@B::Utils::bad_stashes>. Feel free to examine and/or modify this to suit your needs. The intention is that a simple program which uses no modules other than C and C would show no addition symbols. This does B return the details of ops in anonymous subroutines compiled at compile time. For instance, given $a = sub { ... }; the subroutine will not appear in the hash. This is just as well, since they're anonymous... If you want to get at them, use... =item C This returns an array of hash references. Each element has the keys "start" and "root". These are the starting and root ops of all of the anonymous subroutines in the program. =cut sub all_starts { _init(); return %starts; } sub all_roots { _init(); return %roots; } sub anon_subs { _init(); return @anon_subs } sub B::GV::_push_starts { my $name = $_[0]->STASH->NAME."::".$_[0]->SAFENAME; return unless ${$_[0]->CV}; my $cv = $_[0]->CV; if ($cv->PADLIST->can("ARRAY") and $cv->PADLIST->ARRAY and $cv->PADLIST->ARRAY->can("ARRAY")) { push @anon_subs, { root => $_->ROOT, start => $_->START} for grep { class($_) eq "CV" } $cv->PADLIST->ARRAY->ARRAY; } return unless ${$cv->START} and ${$cv->ROOT}; $starts{$name} = $cv->START; $roots{$name} = $cv->ROOT; }; sub B::SPECIAL::_push_starts{} =item C<< $op->oldname >> Returns the name of the op, even if it is currently optimized to null. This helps you understand the stucture of the op tree. =cut sub B::OP::oldname { return substr(B::ppname($_[0]->targ),3) if $_[0]->name eq "null" and $_[0]->targ; return $_[0]->name; } =item C<< $op->kids >> Returns an array of all this op's non-null children, in order. =cut sub B::OP::kids { my $op = shift; my @rv; if (class($op) eq "LISTOP") { $op = $op->first; push @rv, $op while $op->can("sibling") and $op = $op->sibling and $$op; return @rv; } push @rv, $op->first if $op->can("first") and $op->first and ${$op->first}; push @rv, $op->last if $op->can("last") and $op->last and ${$op->last}; push @rv, $op->other if $op->can("other") and $op->other and ${$op->other}; return @rv; } =item C<< $op->parent >> Returns the parent node in the op tree, if possible. Currently "possible" means "if the tree has already been optimized"; that is, if we're during a C block. (and hence, if we have valid C pointers.) In the future, it may be possible to search for the parent before we have the C pointers in place, but it'll take me a while to figure out how to do that. =cut sub B::OP::parent { my $target = shift; printf( "parent %s %s=(0x%07x)\n", B::class( $target), $target->oldname, $$target ) if $DEBUG; die "I'm not sure how to do this yet. I'm sure there is a way. If you know, please email me." if (!$target->seq); my (%deadend, $search_kids); $search_kids = sub { my $node = shift || return undef; printf( "Searching from %s %s=(0x%07x)\n", class($node)||'?', $node->oldname, $$node ) if $DEBUG; # Go up a level if we've got stuck, and search (for the same # $target) from a higher vantage point. if ( exists $deadend{ $node } ) { printf( " search parent %s %s=(0x%07x)\n", B::class( $node ), $node->oldname, $$node ) if $DEBUG; return $search_kids->( $node->parent ); } # Test the immediate children, but only children we haven't visited # already. my @new_kids = ( grep !$deadend{ $_ }, $node->kids ); if ( scalar grep $$_ == $$target, @new_kids ) { return $node; } # Recurse and examine each child, in turn. print( " search kids\n" . join( "", map sprintf( " %s %s=(0x%07x)\n", B::class( $_ ), $_->oldname, $$_ ), @new_kids ) ) if $DEBUG and @new_kids; for ( @new_kids ) { my $x = $search_kids->( $_ ); return $x if $x; } # Not in this subtree. $deadend{$node}++; return undef; }; my $start = $target; # Skip to the farthest sibling and make a list of each with the most # recent at the beginning of the list. # I am planning ahead for the day when it turns out that the parent # cannot be found in the last sibling somewhere. Maybe it is just a # null? I would like to be able to back track up the tree to find a # ->next node that will bring us to northeast of (or even better, # directly to) the parent. my @siblings = $start; while ( $start and ${$start->sibling} ) { $start = $start->sibling; unshift @siblings, $start; printf( "->sibling %s %s=(0x%07x)\n", class($start)||'null', $start->oldname, $$start ) if $DEBUG; } # Now search each sibling as noted from above. for $start ( @siblings ) { my $next = $start; while ( $$next ) { printf( "->next %s %s=(0x%07x)\n", B::class( $next ), $next->oldname, $$next ) if $DEBUG; my $result = $search_kids->( $next ); return $result if $result; } continue { $next = $next->next; } } # Having reached here... I give up? undef; } =item C<< $op->previous >> Like C<< $op->next >>, but not quite. =cut sub B::OP::previous { my $target = shift; my $start = $target; my (%deadend, $search); $search = sub { my $node = shift || die; return $search->(find_parent($node)) if exists $deadend{$node}; return $node if $node->{next}==$target; # Recurse my $x; ($_->next == $target and return $_) for $node->kids; defined($x = $search->($_)) and return $x for $node->{kids}; # Not in this subtree. $deadend{$node}++; return undef; }; my $result; $result = $search->($start) and return $result while $start = $start->next; } =item walkoptree_simple($op, \&callback, [$data]) The C module provides various functions to walk the op tree, but they're all rather difficult to use, requiring you to inject methods into the C class. This is a very simple op tree walker with more expected semantics. All the C functions set C and C to the appropriate values of file and line number in the program being examined. =cut our ($file, $line) = ("__none__",0); sub walkoptree_simple { my ($op, $callback, $data) = @_; ($file, $line) = ($op->file, $op->line) if $op->isa("B::COP"); $callback->($op,$data); if ($$op && ($op->flags & OPf_KIDS)) { my $kid; for ($kid = $op->first; $$kid; $kid = $kid->sibling) { walkoptree_simple($kid, $callback, $data); } } } =item walkoptree_filtered($op, \&filter, \&callback, [$data]) This is much the same as C, but will only call the callback if the C returns true. The C is passed the op in question as a parameter; the C function is fantastic for building your own filters. =cut sub walkoptree_filtered { my ($op, $filter, $callback, $data) = @_; ($file, $line) = ($op->file, $op->line) if $op->isa("B::COP"); $callback->($op,$data) if $filter->($op); if ($$op && ($op->flags & OPf_KIDS)) { my $kid; for ($kid = $op->first; $$kid; $kid = $kid->sibling) { walkoptree_filtered($kid, $filter, $callback, $data); } } } =item walkallops_simple(\&callback, [$data]) This combines C with C and C to examine every op in the program. C<$B::Utils::sub> is set to the subroutine name if you're in a subroutine, C<__MAIN__> if you're in the main program and C<__ANON__> if you're in an anonymous subroutine. =cut our $sub; sub walkallops_simple { my ($callback, $data) = @_; _init(); for $sub (keys %roots) { walkoptree_simple($roots{$sub}, $callback, $data); } $sub = "__ANON__"; for (@anon_subs) { walkoptree_simple($_->{root}, $callback, $data); } } =item walkallops_filtered(\&filter, \&callback, [$data]) Same as above, but filtered. =cut sub walkallops_filtered { my ($filter, $callback, $data) = @_; _init(); for $sub (keys %roots) { walkoptree_filtered($roots{$sub}, $filter, $callback, $data); } $sub = "__ANON__"; for (@anon_subs) { walkoptree_filtered($_->{root}, $filter, $callback, $data); } } =item carp(@args) =item croak(@args) Warn and die, respectively, from the perspective of the position of the op in the program. Sounds complicated, but it's exactly the kind of error reporting you expect when you're grovelling through an op tree. =cut sub _preparewarn { my $args = join '', @_; $args = "Something's wrong " unless $args; $args .= " at $file line $line.\n" unless substr($args, length($args) -1) eq "\n"; } sub carp (@) { CORE::die(preparewarn(@_)) } sub croak (@) { CORE::warn(preparewarn(@_)) } =item opgrep(\%conditions, @ops) Returns the ops which meet the given conditions. The conditions should be specified like this: @barewords = opgrep( { name => "const", private => OPpCONST_BARE }, @ops ); You can specify alternation by giving an arrayref of values: @svs = opgrep ( { name => ["padsv", "gvsv"] }, @ops) And you can specify inversion by making the first element of the arrayref a "!". (Hint: if you want to say "anything", say "not nothing": C<["!"]>) You may also specify the conditions to be matched in nearby ops. walkallops_filtered( sub { opgrep( {name => "exec", next => { name => "nextstate", sibling => { name => [qw(! exit warn die)] } } }, @_)}, sub { carp("Statement unlikely to be reached"); carp("\t(Maybe you meant system() when you said exec()?)\n"); } ) Get that? Here are the things that can be tested: name targ type seq flags private pmflags pmpermflags first other last sibling next pmreplroot pmreplstart pmnext =cut sub opgrep { my ($cref, @ops) = @_; my %conds = %$cref; my @rv = (); my $o; OPLOOP: for $o (@ops) { # First, let's skim off ops of the wrong type. for (qw(first other last pmreplroot pmreplstart pmnext pmflags pmpermflags)) { next OPLOOP if exists $conds{$_} and !$o->can($_); } for my $test (qw(name targ type seq flags private pmflags pmpermflags)) { next unless exists $conds{$test}; next OPLOOP unless ref $o and $o->can($test); if (!ref $conds{$test}) { next OPLOOP if $o->$test ne $conds{$test}; } else { if ($conds{$test}[0] eq "!") { my @conds = @{$conds{$test}}; shift @conds; next OPLOOP if grep {$o->$test eq $_} @conds; } else { next OPLOOP unless grep {$o->$test eq $_} @{$conds{$test}}; } } } for my $neighbour (qw(first other last sibling next pmreplroot pmreplstart pmnext)) { next unless exists $conds{$neighbour}; # We know it can, because we tested that above # Recurse, recurse! next OPLOOP unless opgrep($conds{$neighbour}, $o->$neighbour); } push @rv, $_; } return @rv; } 1; =back =head2 EXPORT None by default. =head1 AUTHOR Simon Cozens, C =head1 TODO I need to add more Fun Things, and possibly clean up some parts where the (previous/parent) algorithm has catastrophic cases, but it's more important to get this out right now than get it right. =head1 SEE ALSO L, L. =cut