#! /usr/local/bin/perl5.8.8 #============================================================================* # * # htmlpp HTML pre-processor * # * # Written: 96/03/27 Pieter Hintjens * # Revised: 98/10/22 Enrique Bengoechea * # * # Copyright (c) 1996-98 iMatix * # * # This program is free software; you can redistribute it and/or modify * # it under the terms of the GNU General Public License as published by * # the Free Software Foundation; either version 2 of the License, or * # (at your option) any later version. * # * # This program is distributed in the hope that it will be useful, * # but WITHOUT ANY WARRANTY; without even the implied warranty of * # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * # GNU General Public License for more details. * # * # You should have received a copy of the GNU General Public License * # along with this program; if not, write to the Free Software * # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * #============================================================================* require 'sflcvdp.pl'; # SFL date picture formatting require 'shellwd.pl'; # Perl word-splitting module require 'findfile.pl'; # Find file on path require 'textdb.pl'; # Query flat-text databases require 'htmlpp.d'; # Include dialog interpreter ########################## INITIALISE THE PROGRAM ######################### sub initialise_the_program { $version = "4.2a"; $me = "htmlpp"; # For error messages $ext = ".htp"; # Default argument file extension print "\nHtmlpp - a HTML pre-processor V$version\n"; print "This is free software and may be freely modified and distributed.\n"; print "Copyright (c) 1996-98 iMatix Corporation - http://www.imatix.com\n\n"; if (@ARGV > 0) { # 1 or more arguments in @ARGV? $the_next_event = $ok_event; $next_arg = 0; # Arguments start at 0 } else { print<<"."; syntax: $me [-debug] [-guru] [-env] [-nofunc] [-page list] [-set name=value] [-charset value] ... -debug Leave work files: useful for debugging macros and loops -guru Work in Guru Mode -env Load all environment variables into document symbol table -nofunc Ignore unknown intrinsic functions (&xxx) -page Produce only specified pages; list can take any of these forms: 'nn', 'nn-nn', 'nn,nn,nn'. E.g. -page 1 -page 3,7 You can also refer to output filenames: -page index3.htm -set Set symbol value. This override any default settings or settings made from within the htmlpp input. -charset Define the character set for the source input. Valid values are 'iso-8859-1' and 'ms-dos'. . $the_next_event = $error_event; } } ######################### INITIALISE PROGRAM DATA ######################### sub initialise_program_data { # These are the preprocessor keywords that we recognise $keyword {"block"} = $block_event; $keyword {"endblock"} = $end_block_event; $keyword {"end"} = $end_block_event; $keyword {"build"} = $build_event; $keyword {"define"} = $define_event; $keyword {"echo"} = $echo_event; $keyword {"ignore"} = $ignore_event; $keyword {"include"} = $include_event; $keyword {"page"} = $page_event; $keyword {"pipe"} = $pipe_event; $keyword {"endpipe"} = $end_pipe_event; $keyword {"if"} = $if_event; $keyword {"else"} = $else_event; $keyword {"endif"} = $end_if_event; $keyword {"for"} = $for_event; $keyword {"endfor"} = $end_for_event; $keyword {"macro"} = $macro_event; # These are the standard block types that we handle $standard_block {"header"} = 1; $standard_block {"footer"} = 1; $standard_block {"pipe_header"} = 1; $standard_block {"pipe_footer"} = 1; $standard_block {"toc_open"} = 1; $standard_block {"toc_entry"} = 1; $standard_block {"toc_close"} = 1; $standard_block {"dir_open"} = 1; $standard_block {"dir_entry"} = 1; $standard_block {"dir_close"} = 1; $standard_block {"index"} = 1; $standard_block {"index_open"} = 1; $standard_block {"index_close"} = 1; $standard_block {"index_entry"} = 1; $standard_block {"anchor"} = 1; # We log all errors to $errors.lst unless (open (ERRORS, ">errors.lst")) { print "$me E: can't create errors.lst: $!"; &raise_exception ($exception_event); } $debug_mode = 0; $guru_mode = 0; $getenv_mode = 0; $nofunc_mode = 0; $page_mode = 0; # Prepare date and time variables ($sec, $min, $hour, $day, $month, $year) = localtime; $date = sprintf ("%02d/%02d/%02d", $year, $month + 1, $day); $time = sprintf ("%2d:%02d:%02d", $hour, $min, $sec); # Initialise the accented-character table to use by default, # using a horrible youzine (fr. 'Usine a gaz') to figure-out # whether we're under MS-DOS, or UNIX, or something else. # This will be overridden if the -charset command-line option is used. # If you add to these tables, provide me with updates! 8-/ # if (-f "/etc/passwd") { # We're on a UNIX, assume ISO-8859-1 $charset = "iso-8859-1"; } elsif ($ENV {"COMSPEC"}) { # Looks like MS-DOS... $charset = "ms-dos"; } else { # Else assume ISO-8859-1 $charset = "iso-8859-1"; } } ######################### LOAD ANCHOR DEFINITIONS ######################### sub load_anchor_definitions { undef %anchors; # Clear assoc. arrays in any case undef %atitles; if (open (ANCHOR, "anchor.def")) { while () { next if /^#/; # Skip comments chop; /(\S+)\s+(\S+)(\s+(.*))?/; # Break $_ into name and value $anchors {$1} = $2; # and load into assoc. array $atitles {$1} = $4 if $4; } close (ANCHOR); } } ######################### SAVE ANCHOR DEFINITIONS ######################### sub save_anchor_definitions { local ($key, $value); if (open (ANCHOR, ">anchor.def")) { print ANCHOR "# Anchor definitions - created by $me\n"; print ANCHOR "# Delete this file to reset all anchors\n"; while (($key, $value) = each %anchors) { printf (ANCHOR "%-20s %-12s %s\n", $key, $value, $atitles {$key}); } close (ANCHOR); } else { print "$me E: can't create anchor.def: $!"; &raise_exception ($exception_event); } } ######################### GET NEXT ARGUMENT VALUE ######################### sub get_next_argument_value { if ($next_arg < @ARGV) { $_ = $ARGV [$next_arg++]; if (/^-/) { $the_next_event = $switch_event; } else { $_ .= $ext if !/\./; # Add extension if missing $main_document = $_; $the_next_event = $ok_event; print "$me I: processing $main_document...\n"; } } else { $the_next_event = $finished_event; } } ############################ PROCESS THE SWITCH ########################### sub process_the_switch { if (/^-d/) { # -debug $debug_mode = 1; } elsif (/^-g/) { # -guru $guru_mode = 1; } elsif (/^-e/) { # -env $getenv_mode = 1; } elsif (/^-nof/) { # -nofunc $nofunc_mode = 1; } elsif (/^-p/) { # -page $page_mode = 1; undef %requested_pages; if ($next_arg < @ARGV) { $_ = $ARGV [$next_arg++]; # Parse page list specification # n n-nn n,n,n print "$me I: selected pages"; for (split (/,/)) { if (/-/) { for ($`..$') { $requested_pages {$_} = 1; print " $_"; } } else { $requested_pages {$_} = 1; print " $_"; } } print "\n"; } else { print "$me E: page numbers missing after -page option\n"; &raise_exception ($exception_event); } } elsif (/^-s/) { # -set if ($next_arg < @ARGV) { $_ = $ARGV [$next_arg++]; # Parse variable setting specification # name=value or name="value" if (/=/) { local ($name) = $`; local ($value) = $'; if ($value =~ /^"([^"]*)"/) { $value = $1; } $preset_symbols {$name} = $value; } else { print "$me E: invalid -set value: $_\n"; &raise_exception ($exception_event); } } else { print "$me E: name=value expected after -set option\n"; &raise_exception ($exception_event); } } elsif (/^-c/) { # -charset undef $charset; if ($next_arg < @ARGV) { $_ = $ARGV [$next_arg++]; # Parse charset specification if (/^iso\-?8859\-?1|^8859\-?1|^iso\-?latin\-?1|^latin\-?1/i || /^win.*|unix/i) { $charset = 'iso-8859-1'; } elsif (/^ms\-?dos|^dos.*|^cp\-?850/i) { $charset = 'ms-dos'; } else { print "$me E: charset $_ unknown\n"; &raise_exception ($exception_event); } } else { print "$me E: charset specification missing after -charset option\n"; &raise_exception ($exception_event); } } } ################ ACCENTED-CHARACTERS TRANSLATION TABLES ################## sub define_translation_table { local ($char) = @_; if ($char eq 'iso-8859-1') { $translate {"\221"} = "lsquo"; $translate {"\222"} = "rsquo"; $translate {"\241"} = "iexcl"; $translate {"\242"} = "cent"; $translate {"\243"} = "pound"; $translate {"\244"} = "curren"; $translate {"\245"} = "yen"; $translate {"\246"} = "brvbar"; $translate {"\247"} = "sect"; $translate {"\250"} = "uml"; $translate {"\251"} = "copy"; $translate {"\252"} = "ordf"; $translate {"\253"} = "laquo"; $translate {"\254"} = "not"; $translate {"\255"} = "shy"; $translate {"\256"} = "reg"; $translate {"\257"} = "macr"; $translate {"\260"} = "deg"; $translate {"\261"} = "plusmn"; $translate {"\262"} = "sup2"; $translate {"\263"} = "sup3"; $translate {"\264"} = "acute"; $translate {"\265"} = "micro"; $translate {"\266"} = "para"; $translate {"\267"} = "middot"; $translate {"\270"} = "cedil"; $translate {"\271"} = "sup1"; $translate {"\272"} = "ordm"; $translate {"\273"} = "raquo"; $translate {"\274"} = "frac14"; $translate {"\275"} = "frac12"; $translate {"\276"} = "frac34"; $translate {"\277"} = "iquest"; $translate {"\300"} = "Agrave"; $translate {"\301"} = "Aacute"; $translate {"\302"} = "Acirc"; $translate {"\303"} = "Atilde"; $translate {"\304"} = "Auml"; $translate {"\305"} = "Aring"; $translate {"\306"} = "AElig"; $translate {"\307"} = "Ccedil"; $translate {"\310"} = "Egrave"; $translate {"\311"} = "Eacute"; $translate {"\312"} = "Ecirc"; $translate {"\313"} = "Euml"; $translate {"\314"} = "Igrave"; $translate {"\315"} = "Iacute"; $translate {"\316"} = "Icirc"; $translate {"\317"} = "Iuml"; $translate {"\320"} = "ETH"; $translate {"\321"} = "Ntilde"; $translate {"\322"} = "Ograve"; $translate {"\323"} = "Oacute"; $translate {"\324"} = "Ocirc"; $translate {"\325"} = "Otilde"; $translate {"\326"} = "Ouml"; $translate {"\327"} = "times"; $translate {"\330"} = "Oslash"; $translate {"\331"} = "Ugrave"; $translate {"\332"} = "Uacute"; $translate {"\333"} = "Ucirc"; $translate {"\334"} = "Uuml"; $translate {"\335"} = "Yacute"; $translate {"\336"} = "THORN"; $translate {"\337"} = "szlig"; $translate {"\340"} = "agrave"; $translate {"\341"} = "aacute"; $translate {"\342"} = "acirc"; $translate {"\343"} = "atilde"; $translate {"\344"} = "auml"; $translate {"\345"} = "aring"; $translate {"\346"} = "aelig"; $translate {"\347"} = "ccedil"; $translate {"\350"} = "egrave"; $translate {"\351"} = "eacute"; $translate {"\352"} = "ecirc"; $translate {"\353"} = "euml"; $translate {"\354"} = "igrave"; $translate {"\355"} = "iacute"; $translate {"\356"} = "icirc"; $translate {"\357"} = "iuml"; $translate {"\360"} = "eth"; $translate {"\361"} = "ntilde"; $translate {"\362"} = "ograve"; $translate {"\363"} = "oacute"; $translate {"\364"} = "ocirc"; $translate {"\365"} = "otilde"; $translate {"\366"} = "ouml"; $translate {"\367"} = "divide"; $translate {"\370"} = "oslash"; $translate {"\371"} = "ugrave"; $translate {"\372"} = "uacute"; $translate {"\373"} = "ucirc"; $translate {"\374"} = "uuml"; $translate {"\375"} = "yacute"; $translate {"\376"} = "thorn"; $translate {"\377"} = "yuml"; } elsif ($char eq 'ms-dos') { $translate {"\255"} = "iexcl"; $translate {"\233"} = "cent"; $translate {"\234"} = "pound"; $translate {"\317"} = "curren"; $translate {"\235"} = "yen"; $translate {"\335"} = "brvbar"; $translate {"\365"} = "sect"; $translate {"\270"} = "copy"; $translate {"\246"} = "ordf"; $translate {"\256"} = "laquo"; $translate {"\252"} = "not"; $translate {"\260"} = "shy"; $translate {"\251"} = "reg"; $translate {"\356"} = "macr"; $translate {"\370"} = "deg"; $translate {"\361"} = "plusmn"; $translate {"\375"} = "sup2"; $translate {"\374"} = "sup3"; $translate {"\357"} = "acute"; $translate {"\346"} = "micro"; $translate {"\364"} = "para"; $translate {"\372"} = "middot"; $translate {"\373"} = "sup1"; $translate {"\247"} = "ordm"; $translate {"\257"} = "raquo"; $translate {"\253"} = "frac14"; $translate {"\254"} = "frac12"; $translate {"\363"} = "frac34"; $translate {"\250"} = "iquest"; $translate {"\267"} = "Agrave"; $translate {"\265"} = "Aacute"; $translate {"\266"} = "Acirc"; $translate {"\307"} = "Atilde"; $translate {"\216"} = "Auml"; $translate {"\217"} = "Aring"; $translate {"\222"} = "AElig"; $translate {"\200"} = "Ccedil"; $translate {"\324"} = "Egrave"; $translate {"\220"} = "Eacute"; $translate {"\322"} = "Ecirc"; $translate {"\323"} = "Euml"; $translate {"\336"} = "Igrave"; $translate {"\326"} = "Iacute"; $translate {"\327"} = "Icirc"; $translate {"\330"} = "Iuml"; $translate {"\321"} = "ETH"; $translate {"\245"} = "Ntilde"; $translate {"\343"} = "Ograve"; $translate {"\340"} = "Oacute"; $translate {"\342"} = "Ocirc"; $translate {"\345"} = "Otilde"; $translate {"\231"} = "Ouml"; $translate {"\236"} = "times"; $translate {"\235"} = "Oslash"; $translate {"\353"} = "Ugrave"; $translate {"\351"} = "Uacute"; $translate {"\352"} = "Ucirc"; $translate {"\232"} = "Uuml"; $translate {"\355"} = "Yacute"; $translate {"\346"} = "THORN"; $translate {"\341"} = "szlig"; $translate {"\205"} = "agrave"; $translate {"\240"} = "aacute"; $translate {"\203"} = "acirc"; $translate {"\306"} = "atilde"; $translate {"\204"} = "auml"; $translate {"\206"} = "aring"; $translate {"\221"} = "aelig"; $translate {"\207"} = "ccedil"; $translate {"\312"} = "egrave"; $translate {"\202"} = "eacute"; $translate {"\210"} = "ecirc"; $translate {"\211"} = "euml"; $translate {"\215"} = "igrave"; $translate {"\241"} = "iacute"; $translate {"\214"} = "icirc"; $translate {"\213"} = "iuml"; # $translate {"\360"} = "eth"; # seems not to exist in DOS cp850 $translate {"\244"} = "ntilde"; $translate {"\225"} = "ograve"; $translate {"\242"} = "oacute"; $translate {"\223"} = "ocirc"; $translate {"\242"} = "otilde"; $translate {"\224"} = "ouml"; $translate {"\366"} = "divide"; $translate {"\233"} = "oslash"; $translate {"\227"} = "ugrave"; $translate {"\243"} = "uacute"; $translate {"\226"} = "ucirc"; $translate {"\201"} = "uuml"; $translate {"\354"} = "yacute"; # $translate {"\376"} = "thorn"; # seems not to exist in DOS cp850 $translate {"\230"} = "yuml"; } } ########################## TEXT TO HTMLPP IF GURU ######################### sub text_to_htmlpp_if_guru { local ($output_file); # Output file local ($header_file); # Guru.def file location local ($had_blank); local ($h1_count); # Insert TOC before second .H1 local ($figure_count); # Figure numbering local ($table_row); # First row in table? local ($width, $height); # Image width, height local ($alttext); # Image alt text return unless $guru_mode; # Defaults for guru mode formatting - if you want to customise these, # copy guru_opt.fmt to guru.fmt, and change that file. # $guru_toc = "Table of Contents"; # If empty, no TOC $guru_ul = "
    "; $guru_ol = "
      "; $guru_li = "
    1. "; $guru_hr = "
      "; $guru_beg_fig = "

      Figure "; $guru_end_fig = "
      "; $guru_beg_fig_block = "

      "; $guru_end_fig_block = "
      "; $guru_beg_table = "

      "; $guru_end_table = "
      "; $guru_beg_dt = "
      "; $guru_end_dt = ""; $guru_dl = "
      "; $guru_dd = "
      "; $guru_p = "

      "; $guru_pre = "

      ";
          $guru_tr            = "";
          $guru_th1           = "";
          $guru_th2           = "";
          $guru_td1           = "";
          $guru_td2           = "";
      
          do 'guru.fmt';
      
          $output_file = &basename ($main_document).".hpp";
          if ($output_file eq $main_document) {
              &error ("$me E: document may not have '.hpp' extension");
              &raise_exception ($exception_event);
          }
          elsif (!open (PLAIN, $main_document)) {
              &error ("$me E: can't open $main_document: $!");
              &raise_exception ($exception_event);
          }
          elsif (!open (OUTPUT, ">$output_file")) {
              &error ("$me E: can't create $output_file: $!");
              &raise_exception ($exception_event);
          }
          return if $exception_raised;
      
          #   Include text from guru.def file
          $header_file = &findfile ("guru.def", "LIBPATH");
          $header_file = &findfile ("guru.def", "PATH")
              unless $header_file;
      
          if (!$header_file || !open (HEADER, $header_file)) {
              &error ("$me E: can't open 'guru.def': $!");
              &raise_exception ($exception_event);
          }
          while (
      ) { # Copy text except comments print OUTPUT "$_" unless /^#/; } close (HEADER); $guru_block = ""; # Not in any block $had_blank = 1; # Last line was blank, initially $h1_count = 0; $figure_count = 0; while (&get_plain_line) { # Numbered list consists of paragraphs starting with 'n.', # if (/^[0-9]+\.\s*/) { &guru_want_block ("OL", $guru_ol); $_ = "$guru_li$'"; # Replace 'n.' by
    2. } # Bulleted list consists of paragraphs starting with '- ' # elsif (/^-\s+/) { &guru_want_block ("UL", $guru_ul); $_ = "$guru_li$'"; # Replace '- ' by
    3. } # Horizontal rule is '....'; 4 or more dots # elsif (/^\\\.\.\.\./) { $_ = $guru_hr; # Replace '....' by
      } # Figure is defined by [Figure filename: caption] where the # 'figure' keyword and caption are optional, and the filename # may be enclosed in quotes. # Figures are numbered only if 'Figure' keyword is used # elsif (/^\[(Figure\s+)?"([^"\s]+)"\s*(:\s*([^]]*))?]/i || /^\[(Figure\s+)?([^:\s]+)\s*(:\s*([^]]*))?]/i) { # Output the figure label if ($1) { print OUTPUT "$guru_beg_fig_block"; $figure_count++; print OUTPUT "$guru_beg_fig$figure_count$3$guru_end_fig\n"; print OUTPUT "$guru_end_fig_block"; } # Get image width and height if possible $width = &image_width ($2); $height = &image_height ($2); $alttext = $4? "ALT=\"$4\"": $1? "ALT=\"Figure $figure_count\"": ""; print OUTPUT "\n"; next; } # Handle start of block of text after blank line elsif (/^\S/ && $had_blank) { local ($first) = $_; &get_plain_line; # Header 1: line followed by '*****' line # Header 2: line followed by '=====' line # Header 3: line followed by '-----' line # if (/^\*\*\*+$/) { &guru_want_block (""); # Close any previous block print OUTPUT "\n"; if (++$h1_count == 1) { print OUTPUT ".ignore header\n"; } elsif ($h1_count == 2) { print OUTPUT ".ignore header\n"; if ($guru_toc) { print OUTPUT ".H2 $guru_toc\n"; print OUTPUT ".include contents.def\n"; } } print OUTPUT ".page $first\n"; $_ = ".H1 $first"; } elsif (/^\=\=\=+$/) { &guru_want_block (""); # Close any previous block $_ = "\n.H2 $first"; } elsif (/^\-\-\-+$/) { &guru_want_block (""); # Close any previous block $_ = "\n.H3 $first"; } # Tables are triggered by either a header line in the form # 'This field: Has this meaning:', or by a table line in # the form 'One_word: Explanation...', where both the word # and the text start in a capital letter or a digit. Table # elsif ($first =~ /[A-Z0-9].*:\s*[A-Z].*:/ || $first =~ /[A-Z0-9]\w*:\s*\S+/) { &guru_want_block (""); # Close any previous block print OUTPUT "$guru_beg_table\n"; $table_row = 1; while ($first =~ /:\s/) { # $_ holds next line if (/^\s/) { # Continuation is indented $first .= "\n $'"; } else { &guru_table_row ($first, $table_row++); $first = $_; # Look at next line } &get_plain_line; } print OUTPUT "$guru_end_table\n"; redo; # Next line is in $_ } # Definition lists are triggered by a line ending in ':' # followed by indented text. Each definition item ends in # a blank line or a non-indented line. # elsif (/^\s/ && $first =~ /^(.*):$/) { &guru_want_block ("DL", $guru_dl); print OUTPUT "$guru_beg_dt$1$guru_end_dt\n"; /^\s+/; $_ = " $guru_dd$'"; # Prefix first line by
      while (/^\s+/) { print OUTPUT " $'\n"; &get_plain_line; } redo; # Next line is in $_ } else { # Start new paragraph &guru_want_block (""); # Close any previous block print OUTPUT "\n$guru_p$first\n"; $had_blank = 0; redo; # Next line is in $_ } } # Preformatted text consists of text indented by 4+ spaces # or a single tab character. # elsif (/^( |\t)/) { $_ = " $'"; # Indent by 2 spaces &guru_want_block ("PRE", $guru_pre); } if (/^$/) { $had_blank = 1; } else { print OUTPUT "$_\n"; $had_blank = 0; } } &guru_want_block (""); # Close any current block close (PLAIN); close (OUTPUT); $main_document = $output_file; } # Return filename, without extension # sub basename { local ($name) = @_; # Get argument $name =~ s/\..*//; # Remove extension, if any return ($name); } # Subroutine returns normalised line of text from # sub get_plain_line { if ($_ = ) { chop while /\s$/; # Remove trailing whitespace s/&\(/&\\\(/g; # Replace &( by &\( s/\$\(/\$\\\(/g; # Replace $( by $\( s/%\(/%\\\(/g; # Replace %( by %\( s/&/&/g; # Replace & by & s//>/g; # Replace > by > s/^\./\\\./; # Replace . at start of line by \. # Replace all hyperlinks in line for (;;) { # Format if (/<([^@]+@[^&]+)>/) { $_ = $`."$1".$'; } # Format elsif (/<(\w+:\/\/[^&]+):([^&]+)>/) { $_ = $`."$2".$'; } # Format elsif (/<(\w+:\/\/[^&]+)>/) { $_ = $`."$1".$'; } # Format elsif (/<\/([^&]+):([^&]+)>/) { $_ = $`."$2".$'; } # Format elsif (/<\/([^&]+)>/) { $_ = $`."$2".$'; } else { last; } } return (1); } else { return (0); } } sub guru_want_block { local ($new_block, $tag) = @_; # Get subroutine arguments local ($close, $open); if ($guru_block ne $new_block) { print OUTPUT "\n" if $had_blank && !$guru_block; print OUTPUT "\n" if $guru_block; if ($new_block) { print OUTPUT "$tag"; print OUTPUT "\n" unless $new_block eq "PRE"; } $guru_block = $new_block; } } sub guru_table_row { local ($_, $row) = @_; # Get arguments print OUTPUT "$guru_tr\n"; s/_/ /g; # Underlines -> spaces # Table header? if ($row == 1 && /(.*):\s+(.*):$/) { print OUTPUT<<"."; $guru_th1$1: $guru_th2$2: . } elsif (/([^:]*):\s+((.|\n)*)/) { print OUTPUT<<"."; $guru_td1$1 $guru_td2$2 . } print OUTPUT "\n"; } # ------------------------------------------------------------------------- # The code to extract image sizes was mostly provided by Craig Smith # in December 1997 (thanks, Craig!). # ------------------------------------------------------------------------- # Subroutine returns width of GIF or JPG image, if found, else 0 # sub image_width { local ($_) = @_; # Get arguments local ($hi, $lo); if (-e && (/\.gif$/i || /(\.jpg|\.jpeg|\.jfif)$/i)) { open (IMAGE, $_) || die "Can't read $_: $!"; if (/\.gif$/i) { # width is at bytes 6 and 7 (lohi) seek (IMAGE, 6, 0); read (IMAGE, $lo, 1); read (IMAGE, $hi, 1); } elsif (/(\.jpg|\.jpeg|\.jfif)$/i) { # width is at bytes 7 and 8 of JFIF frame seek (IMAGE, &findJfifFrame ($_) + 7, 0); read (IMAGE, $hi, 1); read (IMAGE, $lo, 1); } close (IMAGE); return (ord ($hi) * 256 + ord ($lo)); } else { return (0); } } # Subroutine returns height of GIF or JPG image, if found, else 0 # sub image_height { local ($_) = @_; # Get arguments local ($hi, $lo); if (-e && (/\.gif$/i || /(\.jpg|\.jpeg|\.jfif)$/i)) { open (IMAGE, $_) || die "Can't read $_: $!"; if (/\.gif$/i) { # height is at 8 and 9 (lohi) seek (IMAGE, 8, 0); read (IMAGE, $lo, 1); read (IMAGE, $hi, 1); } elsif (/(\.jpg|\.jpeg|\.jfif)$/i) { # width is at bytes 6 and 7 of JFIF frame seek (IMAGE, &findJfifFrame ($_)+5, 0); read (IMAGE, $hi, 1); read (IMAGE, $lo, 1); } close (IMAGE); return (ord ($hi) * 256 + ord ($lo)); } else { return (0); } } # First we identify whether the file is indeed a JFIF file, then we # need to skip through the segments in the file until we find a JPEG # frame, identified by the marker bytes 0xffc0. Each segment contains # a pair of marker bytes, followed by 2 byte length (hilo). The length # includes itself, but not the marker bytes, so the total number of bytes # in each segment is length+2. sub findJfifFrame { local ($image) = @_; local ($buffer, $offset, $len, $id); local ($hi, $lo); open (IFILE, $image) || die "Can't read $image: $!"; # Verify JFIF file # first 4 bytes are 0xffd8ffe0, followed by 2 bytes of length, # followed by string "JFIF\x00". read (IFILE, $buffer, 4); read (IFILE, $hi, 1); read (IFILE, $lo, 1); $len = ord ($hi) * 256 + ord ($lo); read (IFILE, $id, 5); if ($buffer ne "\xff\xd8\xff\xe0" || $id ne "JFIF\x00") { die "$image doesn't appear to be a JFIF file"; } $offset = 2; $buffer = "\xff\xff"; while ($buffer ne "\xff\xc0" && $buffer ne "\xff\xc2") { $offset += $len + 2; seek (IFILE, $offset, 0); read (IFILE, $buffer, 2) || die "read: possible corrupt file"; read (IFILE, $hi, 1) || die "read: possible corrupt file"; read (IFILE, $lo, 1) || die "read: possible corrupt file"; $len = ord ($hi) * 256 + ord ($lo); } close (IFILE); return $offset; } ############################# START FIRST PASS ############################ sub start_first_pass { # Clear document structure tables undef @toc_title; # Table of contents titles undef @toc_file; # Table of contents filenames undef @toc_level; # Table of contents levels undef @page_list; # Clear page name table undef @page_title; # Clear page title table undef @user_blocks; # Clear user text blocks table undef @work_files; # Clear list of work files undef %index_ignore; # Clear index ignore table undef %already_seen; # Reset .include handling undef %macros; # Clear table of macros $have_errors = 0; # No errors detected so far $pass = 0; # Pass 0 = scan, pass 1..n = output $final_pass = 0; # Not in final pass $work_file_number = 0; # Work files are numbered $default_warning = 0; # No 'defaults' error message yet $collect_pages = 1; # Collect .page names now &reset_symbols; # Reset symbol table &reset_counters; # Reset document counters print "$me I: pass 1 through $main_document...\n" unless $symbols {"SILENT"} == 1; } sub reset_symbols { undef %symbols; # Clear symbol table undef %preproc; # Clear preprocessing table undef %postproc; # Clear postprocessing table # Now set symbols comming from command-line -set option %symbols = %preset_symbols if (defined (%preset_symbols)); # Define the traslation table to use for accented characters &define_translation_table ($charset); # Prepare built-in symbols &define ("DATE", $date); &define ("TIME", $time); &define ("SILENT", 0); # If 1, will shut-up &define ("DIR", "."); # Output directory &define ("BASE", "doc"); # For default page filenames &define ("EXT", "htm"); # For default page filenames &define ("LINEMAX", 0); # Warns for longer lines &define ("DEBUG_MODE", $debug_mode); &define ("DOCBASE", &basename ($main_document)); &define ("USE_LANG", 0); # Enable symbol.$(LANG) searches &define ("LANG", "en"); # Current language being processed &define ("USE_RELPATH", 0); # Enable relative path links &define ("LCASE_DIR", 0); # Use lower-case in .build dir if ($getenv_mode) { foreach $key (keys %ENV) { &define ($key, $ENV {$key}); } } } # Define symbol, taking into account any -- or ++ attached to the # symbol name. Does not allow redefinition of a preset symbol. sub define { local ($symbol, $value) = @_; # Get subroutine arguments return if defined ($preset_symbols {$symbol}); if ($symbol =~ /^(--|\+\+)/) { $symbol = $'; $preproc {$symbol} = $1; $postproc {$symbol} = ""; } elsif ($symbol =~ /(--|\+\+)$/) { $symbol = $`; $preproc {$symbol} = ""; $postproc {$symbol} = $1; } else { $preproc {$symbol} = ""; $postproc {$symbol} = ""; } $symbols {$symbol} = $value; } sub reset_counters { # Reset document counters $lines_read = 0; # Nothing processed so far $header_nbr = 0; # Header numbering for TOC $page_nbr = 0; # Index into @page_list $ignore_header = 0; # Ignore next header for TOC? $ignore_level = 99; # Ignore what header levels? $ignore_pages = 0; # Ignore all .page commands? $ignore_page = 0; # Ignore next .page command? $output_open = 0; # No output file open yet $inside_page = 0; # Inside a page $output_size = 0; # Size of output file $output_lines = 0; # Size of output, in lines &define ("PASS", $pass++); # Start new pass &define ("INC++", ""); # Standard counter } ########################## OPEN OUTPUT WORK FILE ########################## sub open_output_work_file { $work_file = sprintf ("html%04d.wrk", ++$work_file_number); if (!open (OUTPUT, ">$work_file")) { &error ("$me E: ($document $.) can't create $work_file: $!"); &raise_exception ($exception_event); } $output_open = 1; push (@work_files, $work_file); } ######################### REUSE WORK FILE AS INPUT ######################## sub reuse_work_file_as_input { close (OUTPUT); $document = $work_file; &open_the_document; # Read from old work file } ############################# START MAIN PASS ############################# sub start_main_pass { &reset_counters; # Reset document counters $more_commands = 0; # Do we need to reprocess output? $collect_pages = 0 if @page_list > 0; print "$me I: pass $pass through $main_document...\n" unless $symbols {"SILENT"} == 1; } ############################# START INTER PASS ############################ sub start_inter_pass { &start_main_pass; } ############################# START FINAL PASS ############################ sub start_final_pass { &reset_counters; # Reset document counters $final_pass = 1; # Yes, in final pass $collect_pages = 0 if @page_list > 0; print "$me I: final pass through $main_document...\n" unless $symbols {"SILENT"} == 1; } ############################ OPEN MAIN DOCUMENT ########################### sub open_main_document { $document = $main_document; &open_the_document; # Go open main document } ############################ OPEN THE DOCUMENT ############################ sub open_the_document { local ($filepath); $filepath = &findfile ($document, "LIBPATH"); $filepath = &findfile ($document, "PATH") unless $filepath; if ($filepath && open ($document, $filepath)) { $file_is_open {$document} = 1; # Keep track of open documents } else { &error ("$me E: ($document $.) can't open $document: $!"); &raise_exception ($exception_event); } } # Subroutine prints an error message to the console and the ERROR file # sub error { ($_) = @_; # Get argument print STDERR "$_\n"; print ERRORS "$_\n"; $have_errors = 1; # We have 1 or more errors } ########################## GET NEXT DOCUMENT LINE ######################### sub get_next_document_line { local ($command); # Action line keyword local ($delimiter); # For handling continuation lines local ($line); # For handling continuation lines if ($_ = <$document>) { # Get next line of input chop while /\s$/; # Remove trailing whitespace $lines_read++; # Count the line # Warn if line > LINEMAX chars long if ($symbols {"PASS"} == 0 && $symbols {"LINEMAX"} < length && $symbols {"LINEMAX"} > 0) { print STDERR "$me W: ($document $.) line > ".$symbols {"LINEMAX"}. " chars\n"; } if (/^$/) { # Blank lines $the_next_event = $blank_line_event; } elsif (/^\.\s*\-/) { # Comments $the_next_event = $comment_event; } elsif (/^\.\s*(\w+)/) { # Action line # Look at action line, figure-out what it is supposed to be ($command = $1) =~ tr/A-Z/a-z/; if (defined ($keyword {$command})) { $the_next_event = $keyword {$command}; if ($command eq "macro") { $delimiter = "\n"; # Keep as multiple lines } else { $delimiter = " "; # Concatenate into one line } } elsif (defined ($macros {$command})) { $the_next_event = $macro_text_event; $delimiter = " "; # Concatenate into one line } else { $action_line = $_; &syntax_error ("undefined keyword '$1'"); return; # Nothing more we can do here } # Now pick-up any continuation lines while (/\-$/) { s/\-$/$delimiter/; # Replace hyphen by multiline delim ($line = <$document>) || last; chop ($line); # Get next line and remove newline if ($line =~ /^\.-/) { # Ignore comments $_ .= "-"; } else { $line =~ s/^\s*|\s*$//; # Remove leading/trailing spaces $_ .= $line; # and attach to current line } } $action_line = $_; # Save original line /^\.\s*\w+\s*((.|\n)*)/; # Get full command arguments if ($the_next_event == $macro_text_event) { $_ = ¯o_value ($command, $1); } else { $_ = $1; # Get remainder of line } } else { $the_next_event = $body_text_event; } } else { $the_next_event = $finished_event; } } sub syntax_error { &error ("$action_line"); &error ("$me E: ($document $.) syntax error"); &error ("$me E: ($document $.) @_"); &raise_exception ($exception_event); } ######################### STORE SYMBOL DEFINITION ######################### sub store_symbol_definition { # .define symbol "" -- define symbol as empty string # .define symbol = expr -- evaluate as Perl expression # .define symbol value -- define or redefine symbol # .define symbol -- undefine symbol # # --name, ++name -- decrement/increment before using # name--, name++ -- decrement/increment after using # # Symbol names can consist of letters, digits, embedded -._+ # The order of the following tests is important, as we need # to treat the special cases first: # if (/^([A-Za-z0-9-\+\._]+)\s+""/) { &define ($1, ""); # .define symbol "" } elsif (/^([A-Za-z0-9-\+\._]+)\s+=\s+(.+)/) { &expand_symbols_in_line; # Need an evaluated expression /^([A-Za-z0-9-\+\._]+)\s*=\s*(.+)/; &define ($1, eval ($2)); # .define symbol = expr } elsif (/^([A-Za-z0-9-\+\._]+)\s+(.+)/) { &define ($1, $2); # .define symbol value } elsif (/^([A-Za-z0-9-\+\._]+)/) { undef $symbols {$1}; # .define symbol } else { &syntax_error ("this is not a valid '.define' command"); } } ########################## STORE MACRO DEFINITION ######################### sub store_macro_definition { # .macro [-nosplit|-noquote] name body|"" # local ($name, $value); if (/^(\-(\w+)\s+)?([A-Za-z0-9-\+\._]+)\s+((.|\n)+)/) { if (defined ($keyword {$3})) { &syntax_error ("you cannot use $3 as a macro name"); } else { ($name = $3) =~ tr/A-Z/a-z/; $value = $4 eq '""'? "": $4; $macopt {$name} = $2 if $2; $macros {$name} = $value; } } else { &syntax_error ("this is not a valid '.macro' command"); } } ########################## EXPAND SYMBOLS IN LINE ######################### sub expand_symbols_in_line { # Expands symbols in $_ variable, then resolves escape sequences # if in the final pass: # \( - ( # \{ - { # \. - . at start of line # # Note that the entire symbol must be on one line; if the symbol or # its label is broken over two lines it won't be expanded. After we # expand symbols, we drop trailing whitespace on the line. The link # symbols $(*...) omit the and tags if the symbol value is # empty. In a symbol label, underlines are converted to spaces. # $_ = &expand_symbols ($_); if ($final_pass) { s/\\\(/\(/g; # Replace \( by ( in whole line s/\\\{/\{/g; # Replace \{ by { in whole line s/\\\./\./g; # Replace \. by . in whole line } chop while /\s$/; # Remove trailing whitespace } # Recursively expand symbols like this (and in this order): # # $(Hn) - value of header-numbering symbol # $(xxx) - value of variable # $(xxx?zzz) - value of variable, or zzz if undefined # $(*xxx) - create link: xxx # $(*xxx*attrib*) - create link: xxx # $(*xxx="label") - create link: label # $(*xxx*attrib*="label") - create link: label # $(*xxx=label) - create link: label # $(*xxx*attrib*=label) - create link: label # $(*xxx=) - create link: value # $(*xxx=*attrib*) - create link: value # &("text") - output of Perl program fragment # &(text) - output of Perl program fragment # %(text?zzz) - value of environment variable, or zzz if undef # &abc(text) - intrinsic htmlpp function with arguments # - value of expanded macro # <.name args> - value of expanded macro # sub expand_symbols { local ($_) = @_; local ($before, $match, $after, $expr); return unless ($_); # Quit if input string is empty for (;;) { # Force expansion from end of string first, so things like # $(xxx?$(yyy)) work properly. if (/[\$%]\(/ || /\&([a-z_]+)\s*\(/i) { $before = $`; $match = $&; $after = &expand_symbols ($'); $_ = $before.$match.$after; } # $(xxx) if (/\$\(H([1-9])\)/) { $_ = $`.&header_number ($1).$'; } elsif (/\$\(([A-Za-z0-9-_\.]+)\)/) { if (!defined ($symbols {$1}) && defined ($atitles {$1})) { $_ = $`.$atitles {$1}.$'; } else { $_ = $`.&valueof ($1).$'; } } # $(xxx?zzz) elsif (/\$\(([A-Za-z0-9-_\.]+)\?([^)\$]*)\)/) { # Handle $(anchor_name?xxx) if (!defined ($symbols {$1}) && defined ($atitles {$1})) { $_ = $`.$atitles {$1}.$'; } else { $_ = $`.&valueof ($1, $2).$'; } } # $(*xxx) elsif (/\$\(\*([A-Za-z0-9-_\.]+)\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; # If we're referring to an anchor, it may have a label if (!defined ($symbols {$1}) && defined ($atitles {$1})) { $label = $atitles {$1}; } else { $label = $1; } $_ = $`.&make_link ($1, $label).$'; } # $(*xxx*attributes*) elsif (/\$\(\*([A-Za-z0-9-_\.]+)\*(.+)\*\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; # If we're referring to an anchor, it may have a label if (!defined ($symbols {$1}) && defined ($atitles {$1})) { $label = $atitles {$1}; } else { $label = $1; } $_ = $`.&make_link ($1, $label, $2).$'; } # $(*xxx="label") elsif (/\$\(\*([A-Za-z0-9-_\.]+)="([^"\$]+)"\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; $_ = $`.&make_link ($1, $2).$'; } # $(*xxx*attributes*="label") elsif (/\$\(\*([A-Za-z0-9-_\.]+)\*(.+)\*="([^"\$]+)"\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; $_ = $`.&make_link ($1, $3, $2).$'; } # $(*xxx=label) elsif (/\$\(\*([A-Za-z0-9-_\.]+)=([^\*][^)\$]+)\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; $_ = $`.&make_link ($1, $2).$'; } # $(*xxx*attributes*=label) elsif (/\$\(\*([A-Za-z0-9-_\.]+)\*(.+)\*=([^)\$]+)\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; $_ = $`.&make_link ($1, $3, $2).$'; } # $(*xxx=) elsif (/\$\(\*([A-Za-z0-9-_\.]+)=\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; $_ = $`.&make_link ($1, &valueof ($1)).$'; } # $(*xxx=*attributes*) elsif (/\$\(\*([A-Za-z0-9-_\.]+)=\*(.+)\*\)/) { last if !defined ($symbols {$1}) && !defined ($symbols {"$1.$symbols{LANG}"}) && !$final_pass; $_ = $`.&make_link ($1, &valueof ($1), $2).$'; } # &("text") # &(text) elsif (/\&\("([^"]+)"\)/ || /\&\(([^\)]+)\)/) { # Problem: $`,$' can be redefined in eval local ($pre, $expr, $post) = ($`, $1, $'); $expr =~ s/\\(.)/$1/g; # Turn \) into ), etc. */ local ($valu) = eval $expr; $_ = $pre.$valu.$post; if ($@) { # Syntax error in Perl statement? $action_line = $pre."&($1)".$post; &syntax_error ("this is not valid Perl: $1"); last; } } # %(text) elsif (/\%\(([^\)]+)\)/) { $_ = $`.$ENV {$1}.$'; } # %(text?zzz) elsif (/\%\(([^\)]+)\?([^)\$]*)\)/) { $_ = $`.($ENV {$1}? $ENV {$1}: $2).$'; } # &abc(text) elsif (/\&([a-z_]+)\s*\(([^\)]*)\)/i) { $function = $1; $args = $2; $before = $`; $after = $'; $args =~ s/\\/\\\\/g; $_ = eval ("&intrinsic_$function ($args)"); $_ = $before.$_.$after; if ($@) { # Syntax error in Perl statement? &syntax_error ("$function is not a valid intrinsic function") unless $nofunc_mode; last; } } # elsif (/\/) { $_ = $`.¯o_value ($1, $2).$'; } # <.name args> elsif (/\<\.([A-Za-z0-9-_\.]+)\s*([^>]*)>/) { $_ = $`.¯o_value ($1, $2).$'; } elsif (/[\200-\377]/ && defined (%translate)) { &translate_accents_in_line; } else { last; } } return $_; } # Subroutine returns the value of the specified symbol; it issues a # warning message and returns 'UNDEF' if the symbol is not defined # and the default value is empty. # sub valueof { local ($symbol, $default) = @_; # Argument is symbol name local ($return); # Returned value local ($langed_symbol); # Language-dependent symbol if (defined ($symbols {$symbol})) { $preproc {$symbol} eq "--" && $symbols {$symbol}--; $preproc {$symbol} eq "++" && $symbols {$symbol}++; $return = $symbols {$symbol}; $postproc {$symbol} eq "--" && $symbols {$symbol}--; $postproc {$symbol} eq "++" && $symbols {$symbol}++; return $return; } # If the symbol does not exist and $(USE_LANG)=1, look whether # symbol.$(LANG) is defined elsif ($symbols{USE_LANG} && defined ($symbols {"$symbol.$symbols{LANG}"})) { return $symbols{"$symbol.$symbols{LANG}"}; } elsif (defined ($anchors {$symbol})) { return ($anchors {$symbol} eq $cur_file? $symbol: $anchors {$symbol}."#".$symbol); } elsif (defined ($default)) { return ($default); } &error ("$_"); &error ("$me E: ($document $.) undefined symbol \"$symbol\""); $default_warning == 1 || do { &error ("$me I: Use \$($symbol?default) for default values."); $default_warning = 1; }; &define ($symbol, "UNDEF"); return $symbols {$symbol}; } # Subroutine formats and returns a header-number. Uses symbols with # the name Hn where n is 1 to 9. # sub header_number { local ($level) = @_; # Argument is level number local ($symbol) = "H$level"; local ($value); if (defined ($symbols {$symbol})) { # Get previous value and increment it $value = $symbols {$symbol}; $value++; } else { # Initialise new symbol value $value = "1"; } # Store next level symbol if ($level < 9) { $symbols {"H".($level + 1)} = "0"; } # Now build full value from all parent levels $symbols {$symbol} = $value; while (--$level > 0) { $value = $symbols {"H".$level}.".".$value; } return ($value); } # Subroutine returns a formatted link between and : the first # argument is the symbol to translate; the second is the label for the # link. If the symbol has an empty value, the and tags are # omitted. # # We handle attributes and automatically add the attribute "hreflang=xx" # if "USE_LANG" is activated and the symbol name ends with ".xx" where # "xx" is not the value of the current language. We also make # references relative is the symbol flag "USE_RELPATH" is set to 1. sub make_link { local ($symbol, $label, $attributes) = @_; $label =~ tr/_/ /; # Add hreflang attribute if necessary, and use spaces in the right # places for nicely formatted HTML # if ($symbols{USE_LANG} && $symbol =~ m/([A-Za-z0-9-_\.]+)\.([A-Za-z][A-Za-z])/ && $2 ne $symbols {LANG}) { if ($attributes eq "") { $attributes = "hreflang=$2"; } else { $attributes = "hreflang=$2 "."$attributes"; } } $symbol = &valueof ($symbol); # If "USE_RELPATH" flag is 1, and the reference URI # does not start with any of "http://", "mailto:", # "ftp:", "./" or "../" it is considered a within-site link # and the reference is made relative. # $symbol = &intrinsic_relpath($symbol) if ($symbols {USE_RELPATH} && $symbol !~ /^http:|^mailto:|^ftp:|^\.{1,2}\//); # Add space in the right place for nicely formatted HTML $attributes = " ".$attributes if ($attributes ne ""); return $symbol? "$label": $label; } # Macros are expanded like this: # # $0 Name of macro # $1 .. $n Arguments 1 to n # $# Number of arguments # $* Full arguments string # $+ Full unused arguments string # { ..$n.. } Text within '{' and '}' repeated for each argument # # Arguments can be supplied in single/double quotes; they can also be # typed with underlines in place of spaces. Macro names are always # normalised to lowercase. # sub macro_value { local ($name, $args) = @_; local ($arg); # Macro argument local ($last_arg) = 0; # Highest argument used local ($_); local (@args); if ($macopt {$name} eq "nosplit") { @args = ($args); } elsif ($macopt {$name} eq "noquote") { $args =~ s/'/\\'/g; $args =~ s/"/\\"/g; @args = &shellwords ($args); } else { @args = &shellwords ($args); } $name =~ tr/A-Z/a-z/; if (defined ($macros {$name})) { $_ = $macros {$name}; } else { &error ("$_"); &error ("$me E: ($document $.) undefined macro \"$name\""); return ""; } # Replace $1 to $9, and $*, within macro body # Repeat text within {...} for macro argument s/\\\{/\001/g; # Replace escaped \{ by \001 for (;;) { if (/\$0/) { # $0 = name of macro $_ = $`.$name.$'; } elsif (/\$([0-9]+)/) { # $1 to $n $_ = $`.$args [$1 - 1].$'; $last_arg = $1 if $last_arg < $1; } elsif (/\$#/) { # $# = number of arguments $_ = $`.($#args+1).$'; } elsif (/\$\*/) { # $* = all arguments $_ = $`.$args.$'; } elsif (/\$\+/) { # $+ = remaining unused arguments $_ = $`; if ($last_arg < @args) { for ($arg = $last_arg; $arg < $#args; $arg++) { $_ .= $args [$arg]." "; } $_ .= $args [$#args]; } $_ .= $'; } elsif (/\{([^}]*)\}/) { # {...$n...} $_ = $`; local ($repeat) = $1; local ($after) = $'; foreach $arg (@args) { local ($repeat_this) = $repeat; $repeat_this =~ s/\$n/$arg/; $_ .= $repeat_this; } $_ .= $after; } else { last; } } s/\001/\\\{/g; # Replace escaped \{ by \001 return $_; } # Function translates accented characters into HTML representations # sub translate_accents_in_line { while (/([\200-\377])/) { if (defined ($translate {$1})) { $_ = "$`&".$translate {$1}.";$'"; } else { $_ = "$`?$'"; # Replace by '?' if not known } } } # INTRINSIC FUNCTIONS # # time() - Format current time as hh:mm:ss # date() - Return current date value # date("picture") - Format current date using picture # date("picture", date, lc) - Format specified date using picture & language # week_day([date]) - Get day of week, 0=Sunday to 6=Saturday # year_week([date]) - Get week of year, 1 is first full week # julian_date([date]) - Get Julian date for date # lillian_date([date]) - Get Lillian date for date # date_to_days(date) - Convert yyyymmdd to Lillian date # days_to_date(days) - Convert Lillian date to yyyymmdd # future_date(days[,date]) - Calculate a future date # past_date(days[,date]) - Calculate a past date # date_diff(date1[,date2]) - Calculate date1 - date2 # image_height("image.ext") - Get image height (GIF, JPEG) # image_width("image.ext") - Get image width (GIF, JPEG) # file_size("filename",arg) - Get size of file: optional arg K or M # file_date("filename") - Get date of file # file_time("filename") - Get time of file as hh:mm:ss # normalise("filename") - Normalise filename to UNIX format # system("command") - Call a system utility # lower("string") - Convert string to lower case # upper("string") - Convert string to upper case # pageref("page","title") - Return title if current page, else link # relpath("from","to") - Get relative path from one file to another # sub intrinsic_date { local ($picture, $value, $language) = @_; $value = &date_now unless $value; $language = $symbols{LANG} unless $language; if ($picture) { return (&conv_date_pict ($value, $picture, $language)); } else { return ($value); } } sub intrinsic_time { local ($sec, $min, $hour, $day, $month, $year) = localtime; return (sprintf ("%2d:%02d:%02d", $hour, $min, $sec)); } sub intrinsic_week_day { return (&day_of_week ($_ [0]? $_ [0]: &date_now)); } sub intrinsic_year_week { return (&week_of_year ($_ [0]? $_ [0]: &date_now)); } sub intrinsic_julian_date { return (&julian_date ($_ [0]? $_ [0]: &date_now)); } sub intrinsic_lillian_date { return (&date_to_days ($_ [0]? $_ [0]: &date_now)); } sub intrinsic_date_to_days { return (&date_to_days ($_ [0])); } sub intrinsic_days_to_date { return (&days_to_date ($_ [0])); } sub intrinsic_future_date { local ($date) = &future_date ($_ [1], 0, $_ [0], 0); return ($date); } sub intrinsic_past_date { local ($date) = &past_date ($_ [1], 0, $_ [0], 0); return ($date); } sub intrinsic_date_diff { local ($date1, $date2) = @_; $date1 = &date_now unless $date1; $date2 = &date_now unless $date2; local ($days) = &date_diff ($date1, 0, $date2, 0); return ($days); } sub intrinsic_image_height { local ($filename) = @_; if (! -e $filename) { &error ("$me E: ($document $.) file not found: \"$filename\""); } else { return (&image_height ($filename)); } } sub intrinsic_image_width { local ($filename) = @_; if (! -e $filename) { &error ("$me E: ($document $.) file not found: \"$filename\""); } else { return (&image_width ($filename)); } } sub intrinsic_file_size { local ($filename, $arg) = @_; local ($size) = (stat ($filename)) [7]; if (! -e $filename) { &error ("$me E: ($document $.) file not found: \"$filename\""); } elsif ($arg eq "K") { $size /= 1024; } elsif ($arg eq "M") { $size /= 1048576; } return (int ($size)); } sub intrinsic_file_date { local ($filename) = @_; if (! -e $filename) { &error ("$me E: ($document $.) file not found: \"$filename\""); } else { local ($mtime) = (stat ($filename)) [9]; local ($sec,$min,$hour,$mday,$mon,$year) = localtime ($mtime); return (($year + 1900) * 10000 + ($mon + 1) * 100 + $mday); } } sub intrinsic_file_time { local ($filename) = @_; if (! -e $filename) { &error ("$me E: ($document $.) file not found: \"$filename\""); } else { local ($mtime) = (stat ($filename)) [9]; local ($sec,$min,$hour,$mday,$mon,$year) = localtime ($mtime); return (sprintf ("%2d:%02d:%02d", $hour, $min, $sec)); } } sub intrinsic_normalise { local ($_) = @_; # Get filename argument s/\\/\//g; # Replace DOS-style \ by / s/\s/_/g; # Replace white space by _ return ($_); } sub intrinsic_system { local ($_) = `@_`; # Return all but the last character, which should be a newline chop; return ($_); } sub intrinsic_lower { local ($_) = @_; # Get filename argument tr/A-Z/a-z/; return ($_); } sub intrinsic_upper { local ($_) = @_; # Get filename argument tr/a-z/A-Z/; return ($_); } sub intrinsic_pageref { local ($page, $text) = @_; # Get filename argument if ($page eq $symbols {"PAGE"}) { return ($text); } else { while ($text =~ /\<[^>]*\>/) { $text = $`.$'; } return ("$text"); } } sub intrinsic_relpath { local ($from, $to) = @_; # Get path arguments if ($to eq "") { # If only one argument, use current $to = $from; # HTML page as 'from' $from = $symbols {PAGE}; } @from = split ('/',"$from"); $from_cnt = @from - 1; @to = split ('/',"$to"); $to_cnt = @to - 1; local $href = ""; while ($from_cnt && $to_cnt) { if ($from [0] eq $to [0]) { shift (@from); $from_cnt--; shift (@to); $to_cnt--; } else { pop (@from); foreach (@from) { $href .= '../'; $from_cnt --; } } } if ($to_cnt == 0) { pop (@from); foreach (@from) { $href .= '../'; } $href .= qq#$to[0]#; } if ($from_cnt == 0 && $to_cnt > 0) { $path = join ('/', @to); $href .= qq[$path]; } return ($href); } ######################### EXPAND SYMBOLS IN MACRO ######################### sub expand_symbols_in_macro { # Expands symbols in $_ variable, allowing $\( and &\( escapes # $_ = &expand_symbols ($_); s/\\\(/\(/g; # Replace \( by ( in whole line } ########################### COPY LINE TO OUTPUT ########################### sub copy_line_to_output { if ($output_open) { print OUTPUT "$_\n"; $last_blank = 0; # Any action except a .page or .define means we need to # reprocess the file in an intermediate pass. if (/^\.\s*(\w+)/) { # Get word after dot if ($1 ne "page" && $1 ne "define") { $more_commands = 1; } } } if ($pipe_open) { print PIPE "$_\n"; } } ######################## COPY ACTION LINE TO OUTPUT ####################### sub copy_action_line_to_output { if ($output_open) { print OUTPUT "$action_line\n"; $had_blank = 0; } if ($pipe_open) { print PIPE "$_\n"; } } ######################## COPY BLANK LINE TO OUTPUT ######################## sub copy_blank_line_to_output { ©_line_to_output unless $had_blank; $had_blank = 1; } ##################### COPY BLANK LINE TO FINAL OUTPUT ##################### sub copy_blank_line_to_final_output { ©_line_to_final_output unless $had_blank; $had_blank = 1; } ######################## COPY LINE TO FINAL OUTPUT ######################## sub copy_line_to_final_output { if ($output_open) { # Add a newline unless the line ends in '\'; handle '\\' as '\' # if (/^(.*)\\\\$/) { $_ = "$1\\\n"; } elsif (/^(.*)\\$/) { $_ = "$1"; } else { $_ = "$_\n"; } print OUTPUT "$_"; $output_size += length ($_); $output_lines++; $last_blank = 0; # Any action except a .page means we need to reprocess the # file in an intermediate pass. if (/^\.\s*(\w+)/) { # Get word after dot if ($1 ne "page" && $1 ne "define") { $more_commands = 1; } } } if ($pipe_open) { # Add a newline unless the line ends in '\' if (/^(.*)\\$/) { print PIPE "$1"; } else { print PIPE "$_\n"; } } } ########################## TAKE INCLUDE FILE NAME ######################### sub take_include_file_name { # .include `command` -- include output from command # .include filename -- include file first time only # .include filename! -- include file in any case # if (/^`([^`]+)`/) { # .include `command` if (open (CMD, "$1|")) { while () { &expand_symbols_in_line; ©_line_to_output; } # We tell the dialog to treat this line as a comment, # since no further action is now needed. &raise_exception ($comment_event); close (CMD); } else { &error ("$me E: ($document $.) can't run command: $!"); &raise_exception ($exception_event); } } elsif (/^([^\s!]+)(!)?/) { # .include filename[!] if ($file_is_open {$1}) { &error ("$_"); &error ("$me E: ($document $.) $1 is already open"); &raise_exception ($exception_event); }; # If include file already seen and not forced by !, skip it if ($already_seen {$1} && $2 ne "!") { &raise_exception ($comment_event); } else { # Save current document name and switch to new document push (@document_stack, $document); $document = $1; $already_seen {$1} = 1; } } else { &syntax_error ("this is not a valid '.include' command"); } } ########################## HANDLE IGNORE COMMAND ########################## sub handle_ignore_command { # .ignore header - ignore next header # .ignore header n - ignore headers Hn and greater # .ignore pages - ignore all future .page commands # .ignore page - ignore next .page in index table # if (/^header$/) { $ignore_header = 1; } elsif (/^header\s+([0-9]+)$/) { $ignore_level = $1; } elsif (/^pages$/) { $ignore_pages = 1; } elsif (/^page$/) { $ignore_page = 1; } else { &syntax_error ("this is not a valid '.ignore' command"); } } ########################## CHECK IF IGNORE PAGES ########################## sub check_if_ignore_pages { $ignore_pages && &raise_exception ($ignore_pages_event); } ######################### COLLECT TITLE IF PRESENT ######################## sub collect_title_if_present { # If the line contains a value between header tags, get that value # and add it to the @toc table. We don't check that the tags are # correct, and we don't handle multiple titles on the same line. # We add an anchor tag so that the table of contents can refer to # the header. # if (/]*)>(.*)<\/H[1-9]>/i) { unless ($ignore_header || $1 >= $ignore_level) { $_ = $`."$3".$'; push (@toc_level, $1); # Store header level 1..9 push (@toc_title, $3); # Store header title text push (@toc_file, $cur_page); } $ignore_header = 0; } } ######################### PARSE PAGE TITLE FOR TOC ######################## sub parse_page_title_for_toc { if (&parse_page_command) { push (@page_list, $cur_page); push (@page_title, $cur_title); &set_symbols_for_new_page; } # Set $index_ignore if this page is not wanted in index table $index_ignore {$cur_page} = 1 if $ignore_page == 1; $ignore_page = 0; } # Subroutine parses the .page command and sets the symbols PAGE and # TITLE appropriately. The .page command can take various forms: # # .page = "" Filename fully specified # .page <filename> = <title> Filename fully specified # .page "<title>" Filename built from $(BASE)$(INC).$(EXT) # .page <title> Filename built from $(BASE)$(INC).$(EXT) # # Returns 1 if the .page command was parsed okay, else 0. Sets $cur_page # to the current page filename and $cur_title to the current page title. sub parse_page_command { if (/^(\S+)\s*=\s*"(.*)"/ # .page <filename> = "title" || /^(\S+)\s*=\s*(.*)/) { # .page <filename> = title $cur_page = $1; # Keep current output filename $cur_title = $2; # Keep current output filename $cur_page .= ".".&valueof ("EXT") unless $cur_page =~ /\./; &define ("PAGE", $cur_page); &define ("TITLE", $cur_title); $inside_page = 1; # Command parsed okay } elsif (/^"(.*)"/ # .page "title" || /^(.*)/ ) { # .page title $_ = &valueof ("BASE").&valueof ("INC").".".&valueof ("EXT"); &expand_symbols_in_line; $cur_page = $_; $cur_title = $1; &define ("PAGE", $cur_page); &define ("TITLE", $cur_title); $inside_page = 1; # Command parsed okay } else { &syntax_error ("this is not a valid '.page' command"); $inside_page = 0; # Command failed } return ($inside_page); } sub set_symbols_for_new_page { # Get symbols for first/last/previous/next pages # $(...PAGE) is name of file, for HREF # $(...TITLE) is name of file, for description &define ("FIRST_PAGE", $page_list [0]); &define ("FIRST_TITLE", $page_title [0]); &define ("LAST_PAGE", $page_list [@page_list - 1]); &define ("LAST_TITLE", $page_title [@page_title - 1]); if ($page_nbr < @page_list - 1) { &define ("NEXT_PAGE", $page_list [$page_nbr + 1]); &define ("NEXT_TITLE", $page_title [$page_nbr + 1]); } else { &define ("NEXT_PAGE", ""); &define ("NEXT_TITLE", ""); } if ($page_nbr > 0) { &define ("PREV_PAGE", $page_list [$page_nbr - 1]); &define ("PREV_TITLE", $page_title [$page_nbr - 1]); } else { &define ("PREV_PAGE", ""); &define ("PREV_TITLE", ""); } $page_nbr++; } ######################## PARSE PAGE TITLE AND NAME ######################## sub parse_page_title_and_name { if (&parse_page_command) { if ($collect_pages) { push (@page_list, $cur_page); push (@page_title, $cur_title); } } &set_symbols_for_new_page; } ########################## PARSE PAGE TITLE ONLY ########################## sub parse_page_title_only { # .page [<filename> =] ["]<title>["] # local ($old_page) = $cur_page; # Save current page name &parse_page_command; # Parse .page command # and restore old page name $cur_page = $old_page; &define ("PAGE", $cur_page); } ########################### OPEN NEW OUTPUT PAGE ########################## sub open_new_output_page { # .page [<filename> =] ["]<title>["] # local ($dir) = $symbols {"DIR"}; if ($page_mode == 0 || defined ($requested_pages {$page_nbr}) || defined ($requested_pages {$cur_page})) { # Report size of previously-opened page, if any if ($output_open) { print " $output_lines lines, $output_size bytes\n"; } if (open (OUTPUT, ">$dir/$cur_page")) { print "$me I: creating $dir/$cur_page..." unless $symbols {"SILENT"} == 1; $output_open = 1; $output_lines = 0; $output_size = 0; } else { &error ("$me E: ($document $.) can't create $cur_page: $!"); &raise_exception ($exception_event); } } else { $output_open = 0; close (OUTPUT); } } ########################## OUTPUT HEADER FOR PAGE ######################### sub output_header_for_page { &output_block (*header); @header = @saved_header if defined (@saved_header); } sub output_block { local (*the_block) = @_; # Get reference to argument local ($saved_line) = $_; # We manipulate $_ local ($line); # Each line in array local ($forlevel) = 0; foreach $line (@the_block) { # We cannot use $_ directly $_ = $line; # or the array is modified &expand_symbols_in_line unless $for_level; ©_line_to_output; $for_level++ if /^\.\s*for/; $for_level-- if /^\.\s*endfor/; } $_ = $saved_line; } ########################## OUTPUT FOOTER FOR PAGE ######################### sub output_footer_for_page { if ($inside_page) { &output_block (*footer); @footer = @saved_footer if defined (@saved_footer); } } ############################ OPEN PIPED OUTPUT ############################ sub open_piped_output { # .pipe <filename> = "<title>" Filename fully specified # .pipe <filename> = <title> Filename fully specified if (/^(\S+)\s*=\s*"(.*)"/ # .page <filename> = "title" || /^(\S+)\s*=\s*(.*)/) { # .page <filename> = title &define ("PIPE_TITLE", $2); if (!open (PIPE, ">$1")) { &error ("$me E: ($document $.) can't create $1: $!"); &raise_exception ($exception_event); } else { local ($old_output_open) = $output_open; $pipe_open = 1; $output_open = 0; &output_block (*pipe_header); $output_open = $old_output_open; } } else { &syntax_error ("this is not a valid '.pipe' command"); $inside_page = 0; # Command failed } } ############################ CLOSE PIPED OUTPUT ########################### sub close_piped_output { # .endpipe if ($pipe_open) { local ($old_output_open) = $output_open; $output_open = 0; &output_block (*pipe_footer); $output_open = $old_output_open; $pipe_open = 0; close (PIPE); } else { &error ("$me E: ($document $.) .endpipe used with no matching .pipe"); &raise_exception ($exception_event); } } ########################## CLEAR SPECIFIED BLOCK ########################## sub clear_specified_block { # .block <name> [local] # /^(\S+)\s*(local)?/; # Get name after .block if ($standard_block {$1}) { eval ("*cur_block = *$1"); eval ("\@saved_$1 = \@$1") if ($2); # If 'local', save previous undef $user_block; undef @cur_block; } else { # User-defined block $user_block = $1; $user_line = 0; # Line into user block } } ############################ ADD LINE TO BLOCK ############################ sub add_line_to_block { if (defined ($user_block)) { $user_blocks {$user_block, ++$user_line} .= $_; } else { push (@cur_block, $_); } } ######################### ADD ACTION LINE TO BLOCK ######################## sub add_action_line_to_block { # When we parse an action line we removes the action and leaves # the action arguments in $_. So, when we need the original action # line -- as here -- we use the $action_line that we saved earlier. $_ = $action_line; &add_line_to_block; } ########################## BUILD SPECIFIED TABLE ########################## sub build_specified_table { # .build <block_name> <arguments> # /^(\S+)/; # Get name after .build { # These standard blocks are output only in the final pass if ($1 eq "toc") { ©_action_line_to_output; } elsif ($1 eq "dir") { &build_dir_block; } elsif ($1 eq "index") { &build_index_block; } elsif ($1 eq "anchor") { &build_anchor_block; } elsif (defined ($user_blocks {$1, 1})) { &build_user_block ($1); } else { &error ("$_"); &error ("$me E: ($document $.) undefined block '$1'"); &raise_exception ($exception_event); } } } # .build dir <directory> [<filename>...] # # Build one or more lines of directory listing, using @dir_open, @dir_entry, # and @dir_close blocks. The filename(s) can be complete names (no path), # or regular expressions. If no filenames are supplied, the entire # directory is read. Assumes $(LOCAL) in front of the directory name; # places $(SERVER) in front of the HREF name. sub build_dir_block { local ($dir); # Directory name local ($local_dir); # LOCAL directory name local ($files); # List of files, or empty local (@filelist); # List of file specifications local (@matches); # List of files that match # Check that $(LOCAL) and $(SERVER) are defined unless (defined ($symbols {"LOCAL"})) { &error ("$me E: ($document $.) .define LOCAL is required"); &raise_exception ($exception_event); } unless (defined ($symbols {"SERVER"})) { &error ("$me E: ($document $.) .define SERVER is required"); &raise_exception ($exception_event); } # .build dir <directory> [<filename>...] # Get directory name, and optional list of files from .build command ($dir, $files) = /^dir\s+(\S+)\s*(.*)/; $files = "*" if $files eq ""; # If nothing specified, assume * $dir =~ s/\\/\//g; # Replace any \ by / chop $dir if $dir =~ /\/$/; # Remove trailing / if any foreach (split (/\s/, $files)) { # Flag each file specified if (/^"(.*)"$/) { # Quoted filename $_ = $1; # may be regular expression } else { # Convert normal wildcards s/\./\\./g; # . becomes \. s/\+/\\+/g; # + becomes \+ s/\?/./g; # ? becomes . s/\*/.*/g; # * becomes .* } push (@filelist, $_); }; # Stick $(LOCAL) in front of directory name and expand symbols $_ = $symbols {"LOCAL"}.$dir; &expand_symbols_in_line; $local_dir = $_; # Process the directory if (opendir (DIR, $local_dir)) { # Process each file in the directory except "." and ".." foreach (grep (!/^\.\.?$/, readdir (DIR))) { # Look for file or pattern in @filelist (may get slow!) foreach $files (@filelist) { if (/$files/i) { # If we have a match, process $_ push (@matches, $_); last; } } } # Now build the directory listing @matches = sort @matches; &output_block (*dir_open); foreach (@matches) { &build_dir_entry ("$local_dir/$_", $symbols {"SERVER"}."$dir/$_"); } &output_block (*dir_close); closedir (DIR); } else { &error ("$me E: ($document $.) can't read directory $local_dir"); &raise_exception ($exception_event); } } sub build_dir_entry { local ($lname, $sname) = @_; # Get local and server filenames local ($ext) = $lname =~ /(\..*$)/; # Find extension in filename $ext =~ tr/A-Z/a-z/; # and convert to lowercase $ext = ".NONE" if $ext eq ""; # If no extension, use .NONE local (@stats) = stat ("$lname"); # [7] = file size, [9] = time local ($size) = @stats [7]; local ($sec, $min, $hour, $day, $mon, $year) = localtime (@stats [9]); # Populate symbols and generate @dir_entry block &define ("DIR_HREF", $sname); $sname =~ tr/A-Z/a-z/; &define ("DIR_HREFL", $sname); &define ("DIR_NAME", sprintf ("%-13s", $_)); &define ("DIR_EXT", $ext); &define ("DIR_SIZE", sprintf ("%8d", $size)); &define ("DIR_SIZEK", sprintf ("%8d", $size / 1024)); &define ("DIR_SIZEM", sprintf ("%8d", $size / 1048576)); &define ("DIR_DATE", sprintf ("%2d/%02d/%02d", $year, $mon + 1, $day)); &define ("DIR_TIME", sprintf ("%2d:%02d:%02d", $hour, $min, $sec)); &output_block (*dir_entry); } # .build index # # Build index for document using @index_entry block. The index lists # all pages in the document; a kind of summarised table of contents. sub build_index_block { local ($line); # Index into tables $ignore_pages && return; # No index if we're ignoring pages &output_block (*index_open); for ($line = 0; $line < @page_list; $line++) { # Update the symbols used to build the index &define ("INDEX_PAGE", $page_list [$line]); &define ("INDEX_TITLE", $page_title [$line]); &output_block (*index_entry) unless $index_ignore { $page_list [$line] } == 1; &output_block (*index) unless $index_ignore { $page_list [$line] } == 1; } &output_block (*index_close); } # .build anchor <anchor_name>[=title] # # Create an anchor definition at the specified point in the document, # and (re)define the anchor variable appropriately. sub build_anchor_block { # Get name and title (if any) after ".build anchor" if (/^anchor\s+([^=\s]+)(\s*=\s*(.*))?/) { &define ("ANCHOR", $1); $anchors {$1} = $cur_page; $atitles {$1} = $3 if $3; &output_block (*anchor); } else { &syntax_error ("this is not a valid '.build anchor' command"); } @anchor = @saved_anchor if defined (@saved_anchor); } # .build <user_block_name> # # Output the user-defined block of text at the current point in the # document. sub build_user_block { local ($name) = @_; # Get reference to argument local ($line); # Index into block array local ($forlevel) = 0; for ($line = 1; ; $line++) { !defined ($user_blocks {$name, $line}) && last; $_ = $user_blocks {$name, $line}; &expand_symbols_in_line unless $for_level; ©_line_to_output; $for_level++ if /^\.\s*for/; $for_level-- if /^\.\s*endfor/; } } ####################### BUILD SPECIFIED TABLE FINAL ####################### sub build_specified_table_final { # .build <block_name> <arguments> # /^(\S+)/; # Get name after .build { if ($1 eq "toc") { &build_toc_block; } } } # .build toc # # Build table of contents for document, using @toc_open, @toc_entry and # @toc_close blocks. sub build_toc_block { local ($line); # Index into @toc tables local ($level); # Current indentation level local ($level_base); # Minimum indentation level local ($header_nbr); # Generate header anchors local ($reference); # HREF for toc entry $level = $level_base = $toc_level [0] - 1; return if $ignore_pages; # No TOC if we're ignoring pages for ($line = 0; $line < @toc_title; $line++) { # Close old level in TOC (with @toc_close) if necessary while ($toc_level [$line] < $level) { &output_block (*toc_close); $level--; } # Open new level in TOC (with @toc_open) if necessary while ($toc_level [$line] > $level) { &output_block (*toc_open); $level++; } $reference = $toc_file [$line] eq $cur_page? "": $toc_file [$line]; $reference .= "#TOC".++$header_nbr; # Update the symbols used to build the table of contents &define ("TOC_LEVEL", $level); &define ("TOC_HREF", $reference); &define ("TOC_TITLE", $toc_title [$line]); &output_block (*toc_entry); } while ($level > $level_base) { &output_block (*toc_close); $level--; } } ########################## SKIP IF BLOCK IF FALSE ######################### sub skip_if_block_if_false { # .if <expression> # if (/^(.+)/) { # Get expression into $1 $if_level++; # We started a new .if block &skip_conditional_block unless eval ($1); } else { &syntax_error ("this is not a valid '.if' command"); } } # We skip input from the document until we close the current block. If # the current block started at an .if, it ends with an .else or an # .endif. If the current block started at an .else, it ends with an # .endif only. We count down $if_level if we find an .endif. Note that # the whole .if block must be in the same file. sub skip_conditional_block { local ($level) = 1; # Current nesting level local ($line) = $.; # Current input line number while (<$document>) { # Get next line of input $lines_read++; # Count the line if (/^\.\s*if/i) { $level++; # Open indentation level at if $if_level++; } elsif (/^\.\s*endif/i) { # Close indentation level at endif $level--; $if_level--; } # .else at top level ends block if ($level == 1 && /\.\s*else/) { $level = 0; } last if $level == 0; # End of local block } # If we ran-out of input, bitch a little if ($level > 0) { &error ("$me E: ($document $line) .endif missing"); &raise_exception ($exception_event); } } ########################## SKIP ELSE BLOCK ALWAYS ######################### sub skip_else_block_always { &skip_conditional_block; } ########################## CLOSE IF BLOCK IF OPEN ######################### sub close_if_block_if_open { if (--$if_level < 0) { &error ("$me E: ($document $.) .endif not expected"); &raise_exception ($exception_event); } } ######################### REPEAT FOR LOOP CONTENTS ######################## sub repeat_for_loop_contents { # .for <variable> in `<command>` # .for <variable> in @<filename> # .for <variable> in %<filename> <separator> <comment_flag> # [<orderby[>]>] [<exact_match>] [<case_sensitive>] [<criterium> ...] # .for <variable> in <item> <item>... # .for <variable> from <start> to <end> # local ($for_counter); local ($for_step); local (@args); local ($line, @fields, $not_found, $row_count, $db_delimiter, $join_delimiter, $db_comment_flag, $db_sortby, $exact_match, $case_sensitive, $criteria); undef @for_list; if (/^([A-Za-z0-9-\+\._]+)\s+in\s+`([^`]+)`$/) { if (open (CMD, "$2|")) { while (<CMD>) { chop; &expand_symbols_in_line; push (@for_list, $_); } close (CMD); &do_for_loop ($1, @for_list); } else { &error ("$me E: ($document $.) can't run command: $!"); &raise_exception ($exception_event); } } elsif (/^([A-Za-z0-9-\+\._]+)\s+in\s+\@([\S]+)$/) { if (open (LIST, "$2")) { while (<LIST>) { chop; &expand_symbols_in_line; push (@for_list, $_); } close (LIST); &do_for_loop ($1, @for_list); } else { &error ("$me E: ($document $.) can't open $2: $!"); &raise_exception ($exception_event); } } # Handles flat-text databases elsif (/^([A-Za-z0-9-\+\._]+)\s+in\s+\%([\S]+)\s+(.*)$/) { @args = &shellwords ($3); # Capture the five first arguments (two first ones compulsory) and # leave the remaining query criteria into the @args array. $db_delimiter = shift @args; $db_comment_flag = shift @args; $db_sortby = (@args > 0) ? shift @args:""; $exact_match = (@args > 0) ? shift @args:"off"; $case_sensitive = (@args > 0) ? shift @args:"off"; $row_count = 0; if (open (LIST, "$2")) { while (($line = <LIST>)) { # As we work our way through the datafile we will process # the rows to see if they match our search parameters. # First the script will skip over any "comment" line in # the datafile. Comment lines are denoted as beginning # (^) with the flag given by the second argument variable. # The newline character will also be stripped off every row # before it is processed. unless ($line =~ /^$db_comment_flag/) { chop($line); # Chop off extraneous newline # Then each row is split into its database field based on # the the value of the first argument. @fields = split("$db_delimiter", $line); # Once we have gathered all of the database fields for the # current database row as separate elements in the @fields # array, we can begin to process them, checking to # see if they match the client-submitted criteria. # # First, we set not_found to zero which indicates that we # are assuming the criteria was satisfied for the row. # # Then, for each criteria specified in @args, # we call the db_query subroutine (in the textdb.pl library) # to apply the criteria. # If the criteria is not satisfied, it keeps returning 1 # which would increment $not_found. # # Thus, $not_found will end up being the number of # criteria that were not found, thus a "no match". a # zero, on the other hand, means success. $not_found = 0; foreach $criteria (@args) { $not_found += &db_query( $exact_match, $case_sensitive, *fields, $criteria); } # If not found is still 0, the row is pushed into the # @for_list array. We will use this array to hold # all of the rows which met the user-submitted search # criteria until we are ready to display them all to the # user. # We will increment row_count whenever a row matches the query. # That way, we will be able to report how many hits were scored # and issue a warning if there were no hits at all. # Recall that when not_found = 0, that means that the # criteria was satisfied for the row. if ($not_found == 0) { # The following takes care of a delimiter like "\\|" (the one you use # when the delimiter character is "|"), which works # properly when splitting, but adds an undesired "\" when joining. $join_delimiter = $db_delimiter; $join_delimiter =~ s/\\// if ($db_delimiter =~ m/\\/); push(@for_list, join("$join_delimiter", @fields)); $row_count++; } } # End of unless ($line =~ /^$db_delimiter/) } # End of while datafile has data close (LIST); if ($row_count == 0) { print "$me E: ($document $.) The query returned no hits\n"; } elsif ($db_sortby ne ""){ # Now sort the results of the query according to the field number # $db_orderby, the second argument in the .for loop # If no argument was provided the rows will be kept as they were read. &db_sort(*for_list, $db_sortby); } &do_for_loop ($1, @for_list); } else { &error ("$me E: ($document $.) can't open $2: $!"); &raise_exception ($exception_event); } } # The first "in" argument may not start with "%" and so conflict # with the text-database syntax # elsif (/^([A-Za-z0-9-\+\._]+)\s+in\s*([^%]*)/) { @for_list = split (/\s/, $2); &do_for_loop ($1, @for_list); } elsif (/^([A-Za-z0-9-\+\._]+)\s+from\s+([0-9]+)\s+to\s+([0-9]+)$/) { $for_step = $3 > $2? 1: -1; $for_counter = $2; while ($for_counter != $3) { push (@for_list, $for_counter); $for_counter += $for_step; } push (@for_list, $for_counter); &do_for_loop ($1, @for_list); } else { &syntax_error ("this is not a valid '.for' command"); } } # Subroutine generates a .for block as specified. Any embedded commands # are copied without modification (except variable expansion). This may # result in extra passes to handle those commands. # sub do_for_loop { local ($for_var, @for_list) = @_; local ($level) = 1; # Current nesting level local ($line) = $.; # Current input line number local (@for_block); local ($for_item); local ($indent); # Indented .for? local ($index); # Words in line while (<$document>) { # Get next line of input $lines_read++; # Count the line $level++ if /^\.\s*for/i; # We found the end of the .for $level-- if /^\.\s*endfor/i; # block when $level is zero last if $level == 0; # End of local block chop; # Kill trailing newline push (@for_block, $_); # Else store the line } # If we ran-out of input, bitch a little if ($level > 0) { &error ("$me E: ($document $line) .endfor missing"); &raise_exception ($exception_event); } # Output .for block for each instance in the for loop # We only expand symbols in the outermost .for loop; if it # contains further .for loops, these are handled in the next # pass. This allows access to $(1)..$(n) inside each loop. foreach $for_item (@for_list) { if (defined ($db_delimiter)){ @fields = split (/$db_delimiter/,$for_item); } else { @fields = &shellwords ($for_item); } # Define for future passes $_ = ".define $for_var $for_item"; ©_line_to_output; foreach $line (@for_block) { # We cannot use $_ directly $_ = $line; # or the array is modified $indent++ if /^\.for/; $indent-- if /^\.endfor/; # Expand $(1)..$(n) and for_var in line if (!$indent) { for (;;) { if (/\$\($for_var\)/) { $_ = $`.$for_item.$'; } elsif (/\$\(([0-9]+)\)/) { $_ = $`.$fields [$1 - 1].$'; } else { last; } } } ©_line_to_output; } } } ########################### ECHO TEXT TO CONSOLE ########################## sub echo_text_to_console { # .echo [-] <text> # .echo [-] "<text>" # .echo [-] '<text>' # /^(-\s+)?/; # Parse .echo command local ($newline) = $1 eq ""? "\n": ""; $_ = $'; # Get text after .echo [-] $_ = $2 if /^(["'])(.*)\1$/; # Remove " or ' if any print "$_$newline"; # Print text + $newline } ######################## CHECK IF INTER PASS NEEDED ####################### sub check_if_inter_pass_needed { &raise_exception ($need_inter_pass_event) if $more_commands; } ######################## SIGNAL DOCUMENT PROCESSED ######################## sub signal_document_processed { print "$me I: $lines_read lines processed\n" unless $symbols {"SILENT"} == 1; } ####################### SIGNAL UNEXPECTED END BLOCK ####################### sub signal_unexpected_end_block { &syntax_error (".endblock not expected"); } ######################## SIGNAL UNEXPECTED END FOR ######################## sub signal_unexpected_end_for { &syntax_error (".endfor not expected"); } ######################### SIGNAL INTERNAL FAILURE ######################### sub signal_internal_failure { &syntax_error ("unexpected command"); } ############################ CLOSE THE DOCUMENT ########################### sub close_the_document { # Close current document, and see if we can unstack a level # close ($document); undef $file_is_open {$document}; if (@document_stack > 0) { $document = pop (@document_stack); &raise_exception ($finished_include_event); } } ########################### CLEAN UP WORK FILES ########################### sub clean_up_work_files { foreach (@work_files) { unlink unless $debug_mode; } } ############################ GET EXTERNAL EVENT ########################### sub get_external_event { } ########################## TERMINATE THE PROGRAM ######################### sub terminate_the_program { $the_next_event = $terminate_event; close (ERRORS); unlink ("errors.lst") unless ($have_errors == 1); }