# -*- perl -*- # Eps.pm $Id: Eps.pm,v 1.15.2.2 2000/01/30 13:56:23 jens Exp $ # (C) Copyright Jens G Jensen # This file is part of epsmerge and is distributed under GNU GPL package Eps; use strict; use Options; # input: filename, IO ref, function to store procedures, function to # store DSCs (see Driver for details; this function is called # from check_input which does the conversion name -> cell). # output: nothing (false), or error string sub new { my ($class, $fname, $io, $store_proc, $store_dsc ) = @_; # An eps has boundingbox, orientation (default portrait) and a filename # - also knows name of save variable my $self = { fn => $fname, save => 'eps_Inc_Save_the_State' }; bless $self, $class; unless( open(HELLO, "<$fname") ) { return "Creating $class: can't open $fname for reading\n"; } $_ = ; # File is expected to be either encapsulated postscript, or non-e postscript # in which case we pick *one* page (later) to display; see the ?? method below. # Note the distinction between `Pages:' (a DSC comment) and `pages' (a list of # page [numbers, labels, offset] refs in the order they should be displayed) if( /^%!PS-Adobe.*EPSF/i ) { $self->{'pages'} = undef; } elsif( /^%!/ ) { $self->{'pages'} = [ ]; } else { return "It seems that $fname is not postscript\n"; } # Simple parser: parse DSCs in the header looking in particular for the boundingbox. # Uh -- the parser is getting a bit long (but less ugly (than before)) # atend says: 1 = must read trailer, 2 = reading trailer, # 4 = reading procs, 8 = have met %%BeginProlog (never reset) my $atend = 0; # emblevel counts depth of embedded documents my $emblevel = 0; # Reading_List specifies which DSCs we are looking for and their parsed # values (keeping the first in the Header or the last in the Trailer). # Each entry has a name as the key, a regexp saying how to parse each entry of the # value, and sometimes a default value, and an expected length-of-list. my %Reading_List = ( 'BoundingBox:' => { re => '\d+', ll => 4 }, 'Orientation:' => { re => 'Portrait|Landscape', df => ['Portrait'], ll =>1 }, 'LanguageLevel:' => { re => '\d+', df => ['3.0'], ll => 1 }, 'Extensions:' => { re => '.+', }, 'Title:' => { re => '.*', df => ['(no title)'] }, 'CreationDate:' => { re => '.+', }, 'Pages:' => { re => '\d+', df => [1], ll => 1, }, 'PageOrder:' => { re => 'Ascend|Descend|Special', df => ['Ascend'], ll =>1 }, ); # All but the last of these store locations in the file; # $name remembers the name of the DSC that we are currently reading. my ($end_of_header, $trailer, $begin_setup, $end_setup, $begin_prolog, $end_prolog, $name); my $ppos = 0; # previous position, i.e., (before) beginning of current line PARSE: while( ) { # pick out the resources, first the "normal" resources... if( /^%%Begin(Resource|File|ProcSet|Font):\s*(.*)$/ ) { my ($what, $arg) = ($1, $2); my $line = $.; $begin_prolog = $ppos unless defined $begin_prolog; if( $atend & 8 && ! ($atend & 4) ) { $io->message("$fname:$line: Warning: resource found outside prolog", "W"); } else { $atend |= 12; } if( $what ne 'Resource' ) { # convert to resources $_ = "%%BeginResource: \L$what\E $2\n"; } # Get the resource my $proc = $_; while() { s/(^|\W)showpage(\W|$)/$1$2/; if( /^%%End$what/ ) { $proc .= "%%EndResource\n"; &$store_proc($proc); if( $what eq 'Resource' ) { if( $arg =~ /^(\w+) (\S+)/ ) { &$store_dsc( 'supplied', $1, $2 ); } } else { if( $arg =~ /^(\S+)/ ) { &$store_dsc( 'supplied', lc $what, $1 ); } } next PARSE; } $proc .= $_; } continue { $ppos = tell HELLO; } return "$fname: runaway arg from line $line?"; } # ... and now the procedures not delineated by DSCs, but found # inside the prolog (these are terminated by the beginning of the # next DSC line). Could have been combined with the above, but # it's cleaner to keep it separate. if( $atend & 4 ) { my $line = $.; my $proc = $_; while() { s/(^|\W)showpage(\W|$)/$1$2/; if( /^%%/ ) { &$store_proc($proc); goto DONEWITHTHISBITNOWGETONWITHTHENEXT; } $proc .= $_; } continue { $ppos = tell HELLO; } return "$fname: runaway arg from line $line?"; DONEWITHTHISBITNOWGETONWITHTHENEXT: } # ... and now the Include things, we just mark that we'll need them. if( /^%%Include(\w+):\s*(\S+)/ ) { &$store_dsc( 'needed', $1, $2 ); next; } if ( /^%%LanguageLevel:\s*(\d+)/ ) { &$store_dsc( 'language', $1 ); next; } --$emblevel if /^%%EndDocument/ || /^%%EndData/; ++$emblevel if /^%%BeginDocument/ || /^%%BeginData/; next if $emblevel; # is this a %%+ continuation of the last DSC comment? if( /^%%\+\s*(.+)$/ ) { next if $name eq 'ignore'; # continuation of a comment we ignore if( !defined $name || !defined $self->{$name} ) { $io->message("$fname:$.: Unknown continuation", "W"); } else { push @{$self->{ $name }}, split(/\s+/, $1); } next; } if( !defined $end_of_header && &_endofheader($_) ) { $end_of_header = /^%/ ? tell HELLO : $ppos; } elsif(/^%%Page:\s*(\S+)\s+([0-9]+)$/) { push @{$self->{'pages'}}, [ $1, $2, tell HELLO ]; } elsif(/^%%Trailer/) { $trailer = $ppos; $atend |= 2; $atend &= ~1; next; } elsif(/^%%BeginProlog/) { if( $atend & 8 ) { $io->message("$fname:$.: Additional prolog??", "W"); # silly! next; } $begin_prolog = $ppos; $atend |= 12; next; } elsif(/^%%EndProlog/) { $end_prolog = tell HELLO; $atend &= ~4; next; } elsif(/^%%BeginSetup/) { $begin_setup = $ppos; next; } elsif(/^%%EndSetup/) { $end_setup = tell HELLO; next; } elsif(/^%%BeginFeature/) { my $line = $.; # these are probably printer specific things; # right now they are Ignored(tm). while() { next PARSE if /^%%EndFeature/; } return "$fname: runaway arg from line $line?"; } last if /^%%EOF/ && !Options->new()->getopts('ignore-eof'); # identify comment my ($arg, $item); next unless ($name, $arg) = /^%%([a-zA-Z\.\?\!]+:?)\s*(.*)$/; # strip leading and trailing spaces $arg =~ s/^\s*//; $arg =~ s/\s*$//; $item = $Reading_List{ $name }; unless( defined $item ) { $name = 'ignore'; next; } if( $arg eq '(atend)' ) { if( $atend & 2 ) { $io->message("$fname:$.: Huh? Can't have (atend) ref in the trailer!\n", 'W'); next; } $atend |= 1; $item->{ value } = '(atend)'; next; } if( $arg =~ /$item->{re}/ ) { # The DSC that counts is the _first_ in _headers_, _last_ in _trailers_ # Here the trailer one (only if available and someone prompts us to read trailer) # takes precedence over the header one if( $self->{ $name } && !($atend & 2) ) { $name = 'ignore'; next; } $self->{ $name } = [ split /\s+/, $arg ]; } else { $io->message("$fname:$.: Cannot read DSC $name", "W"); next; } } continue { $ppos = tell HELLO; } # PARSE close HELLO; # check that we read the correct number of pages if( defined $self->{'Pages:'} ) { my $p = defined $self->{'pages'} ? @{$self->{'pages'}} : 0; $io->message(sprintf("Warning: $fname: expected %d page%s; found %d\n", \ $self->{'Pages:'}->[0], $self->{'Pages:'}->[0] != 1 ? "s" : "", $p), 'W') unless $p == $self->{'Pages:'}->[0]; $self->{'Pages:'} = [ $p ]; } $io->message("$fname requires a trailer but does not have one", "W") if $atend & 1; if( !defined $begin_prolog && defined $end_prolog ) { $begin_prolog = $end_of_header; } $self->{'filepos'} = [ $end_of_header, $trailer, $begin_prolog, $end_prolog, $begin_setup, $end_setup, ]; # Test data for (keys %Reading_List) { $io->message("Warning: $fname: option $_ should have been specified in trailer", "W") if defined $self->{$_} && $self->{$_} eq '(atend)'; } # check that everything from the reading-list was defined; # at least those that might be used as labels *must* be def'd foreach (keys %Reading_List) { my $ref = $Reading_List{$_}; if( defined $self->{ $_ } ) { # check that we got the expected number of entries if( defined $Reading_List{ll} ) { if($Reading_List{ll} != @{$self->{ $_}}) { $io->message(sprintf("$fname: expected %d elements for %s found %d", \ $Reading_List{ll}, $_, scalar @{$self->{ $_}} ), 'W'); if(defined $Reading_List{df}) { $io->message("...since there is a default, I'd rather pick that", "W"); goto DEFAULT; } } } next; } DEFAULT: # The thing was not defined, find a default if( defined $ref->{df} ) { $self->{ $_ } = $ref->{df}; } elsif( $_ eq 'CreationDate:' ) { # get file's mtime (last modified) my $date = (stat( $self->{fn} ))[9]; # if no mtime, shouldn't ever happen, use now() $date = scalar localtime( $date ? $date : time() ) unless $date; $self->{$_} = [ split /\s+/, $date ]; } } if( defined $self->{'pages'} && $self->{'PageOrder:'}->[0] ne 'Special' ) { # sort pages on second entry (first is label, second is physical page number) my @list = sort { $a->[1] <=> $b->[1] } @{$self->{'pages'}}; if( $self->{'PageOrder:'}->[0] eq 'Descend' ) { $self->{'pages'} = [ reverse @list ]; } else { $self->{'pages'} = [ @list ]; } } return $self; } # sub new # This method returns undef if its file describes EPS; if it describes # postscript the method returns a reference to the page-descriptor array # which looks like this: # [ [ 'i', 1, o1 ], [ 'ii', 2, o2 ], [ 'iii', 3, o3 ], [ '1', 4, o4 ] ] # i.e., references to lists, and each entry has the label first and then # the physical page number, and finally the offset in the file where # that page may be found. The list is sorted according to the # %%PageOrder DSC. # Oh, and please don't change any of the data; it is not protected # (this is not C++, after all, is it?) sub pages { my $self = shift; return $self->{'pages'}; } # getlabel returns a label string as follows: # f: filename # d: the date of creation (er, of the eps file) # T: the title of the eps file, as described by the DSC sub getlabel { my $self = shift; $_ = shift; if( /f/ ) { return $self->{fn}; } elsif( /d/ ) { return join( ' ', @{$self->{'CreationDate:'}} ); } elsif( /T/ ) { return join( ' ', @{$self->{'Title:'}} ); } else { # shouldn't ever ever happen, so OK to complain on STDERR print STDERR "$self->{fn}: Warning: unknown label specifier $_ ignored\n"; } return; } sub box { my $self = shift; my @bbox = @{$self->{ 'BoundingBox:' }}; my ($w, $h) = ($bbox[2]-$bbox[0], $bbox[3]-$bbox[1]); if( $self->{'Orientation:'}->[0] eq 'Landscape' ) { my $tmp = $w; $w = $h; $h = $tmp; } return ($bbox[0], $bbox[1], $bbox[0]+$w, $bbox[1]+$h); } sub write_prolog { my $self = shift; return unless defined $self->{'filepos'}->[2] && defined $self->{'filepos'}->[3]; open(DATA, "<$self->{fn}") or die "Couldn't open $self->{fn}"; # write prolog _write_section(\*DATA, $self->{'filepos'}->[2], $self->{'filepos'}->[3]); close DATA; } sub write_setup { my $self = shift; return unless defined $self->{'filepos'}->[4] && defined $self->{'filepos'}->[5]; open(DATA, "<$self->{fn}") or die "Couldn't open $self->{fn}"; _write_section(\*DATA, $self->{'filepos'}->[4], $self->{'filepos'}->[5]); close DATA; } # Write the Trailer, hopefully cleaning up after the Setup code sub write_trailer { my $self = shift; return unless defined $self->{'filepos'}->[1]; open(DATA, "<$self->{fn}") or die "Couldn't open $self->{fn} for reading"; _write_section(\*DATA, $self->{'filepos'}->[1], -1); close DATA; } # write must be passed (1) a boundingbox (integers) and # (2) scaling parameters (possibly floats) and (3, optionally) # a page number of the page it is supposed to print (for non-e postscript). # It writes clipping things and the EPS file itself sub write { my $self = shift; my ($llx, $lly, $urx, $ury, $xsc, $ysc) = @_; my $page = @_ > 6 ? $_[6] : undef; open(DATA, "<$$self{fn}") or return "Couldn't reopen $$self{fn}: $!"; print <{save} save def newpath $llx $lly moveto $llx $ury lineto $urx $ury lineto $urx $lly lineto closepath clip newpath HERE # no support for 'PageOrientation' yet, but why would you have such a thing anyway? if( $self->{ 'Orientation:' }->[0] eq 'Landscape' ) { print "$llx $ury translate -90 rotate\n"; my $tmp = $xsc; $xsc = $ysc; $ysc = $tmp; } else { print "$llx $lly translate\n"; } print <{'BoundingBox:'}}[0] neg ${$self->{'BoundingBox:'}}[1] neg translate %Epsmerge included file: $$self{fn} HERE ; # Probably put Setup stuff here _write_section(\*DATA, $self->{'filepos'}->[4], $self->{'filepos'}->[5]); if(defined $page) { # Locate data for the correct page; index is currently physical page no. # Support for physical page numbers that aren't the sequence 1..max my $npages = @{$self->{'pages'}}; my $ref; if( $page >= $npages || ($ref = $self->{'pages'}->[ $page-1 ])->[1] != $page ) { # ack! not in physical order or nonexistent page! # but they are sorted (haha!), so we do a binary search my ($lo, $hi) = (0, $npages); while( $lo <= $hi ) { my $i = ($lo + $hi) >> 1; if( $self->{'pages'}->[$i]->[1] == $page ) { # found the page anyway! $ref = $self->{'pages'}->[$i]; goto FOUNDTHEPAGEANYWAY; } if( $self->{'pages'}->[$i]->[1] < $page ) { $lo = $i+1; } else { $hi = $i-1; } } return "epsmerge::Eps request for non-existent page $page in file $self->{fn}\n"; FOUNDTHEPAGEANYWAY: } print "%Including page $ref->[0] (physical $page)\n"; seek(DATA, $ref->[2], 0); } else { # Including a std EPS file; skip the header seek DATA, $self->{'filepos'}->[0], 0; } my $atend = 0; my $emblevel = 0; while() { --$emblevel if /^%%EndDocument/; ++$emblevel if /^%%BeginDocument/; next if $emblevel && /^%%/; # ignore embedded documents' DSC if( /^%%Page/ ) { if(defined $page) { last; # done when we find next page } else { next; # ignore embedded page thingies } } # if we see `showpage' surrounded by whitespace, kill it and leave just the spaces # (doesn't kill `/showpage', nor does it do evil things to `myshowpageidentifier') s/(^|\W)showpage(\W|$)/$1$2/; if( /^%%Trailer/ ) { $atend = 1; if( defined $page ) { last; } else { next; } } next if $atend && /^%%/; last if /^%%EOF/ && !Options->new()->getopts('ignore-eof'); print; } # Write the Trailer, hopefully cleaning up after the Setup code _write_section(\*DATA, $self->{'filepos'}->[1], -1); print "$self->{save} restore\n"; close DATA; return; } # Return true if the input could be the first non-header line (*not* a method!) sub _endofheader { my $data = shift; return ($data =~ /^%%EndComments/) || ($data !~ /^%\S/); } # Write a section of the file, given FILE *, start, end. # end can be nonsense (like -1), in which case we read till EOF. # However, if start or end are undefined, then nothing happens. # We will ignore DSC comments because there has been -- trouble. sub _write_section { my ($fh, $start, $end) = @_; return unless defined $start && defined $end; my $pos = tell $fh; seek $fh, $start, 0; until( eof($fh) || ($end != -1 && tell $fh >= $end ) ) { $_ = <$fh>; next if /^%%/; s/(^|\W)showpage(\W|$)/$1$2/; print; } seek $fh, $pos, 0; } package EpsWrap; # package EpsWrap is a simple Eps-wrapper; it's interface is exactly the # same as that of Eps. It's raison d'etre is to have several EpsWrap'es # sharing one Eps Cell such that if the Eps cell represents a postscript # file then all EpsWrap's represent individual pages and they all share # pointers to the same Eps. This saves potentially lots of memory and # time for the first-stage parsing of the file (only done once). # input: eps cell reference and physical page number sub new { my $class = shift; my $data = { 'eps' => $_[0], 'page' => $_[1] }; return bless $data, $class; } sub pages { my $self = shift; return $self->{'eps'}->pages(); } sub getlabel { my $self = shift; return $self->{'eps'}->getlabel(@_); } sub box { my $self = shift; return $self->{'eps'}->box(@_); } sub write { my $self = shift; return $self->{'eps'}->write(@_, $self->{'page'}); } 1;