#!/usr/bin/perl -w # Copyright (c) 2000, 2001 Udo Erdelhoff. All rights reserved. # Written for the FreeBSD German Documentation Project # # Redistribution and use in source and compiled forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above # copyright notice, this list of conditions and the following # disclaimer as the first lines of this file unmodified. # # 2. Redistributions in compiled form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # THIS SOFTWARE IS PROVIDED BY UDO ERDELHOFF "AS IS" AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL UDO ERDELHOFF BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. # # $Id: suppe,v 1.16 2003/05/25 09:32:12 ue Exp $ use strict; use Getopt::Long; # Things that can be configured my ($fli, $indent, $linecount, $maxlinelength, $sli, $debug, $tabsize); # Activate debug code if != 0 $debug = 0; # Minimum indent. Should be zero, increase it only if you need to format a # small section of a document. For a FAQ entry, use $indent = 6; $indent = 0; # First level indent. Number of spaces to add when we descend a tag level. # Taken from FDP $fli = 2; # Second level indent. Number of spaces to add when we wrap contents. # Taken from FDP $sli = 2; # Tabsize - how many spaces are replaced by one tab $tabsize = 8; # Maximum line length when wrapping $maxlinelength = 70; # Number of first line, adjust when doing work inside a document $linecount = 0; sub make_indent ($); sub usage () { print STDERR "usage: suppe [-i initial indent] [-l linestart] "; print STDERR "[-m max linelength] [-t tabsize] [files ...]"; die; } GetOptions ('debug' => \$debug, 'firstlevelindent=i' => \$fli, 'indent=i' => \$indent, 'linecount=i' => \$linecount, 'maxlinelength=i' => \$maxlinelength, 'tabsize=i' => \$tabsize, 'secondlevelindent=i' => \$sli) || usage(); if ($indent < 0 || $indent >= $maxlinelength || $maxlinelength < 40) { usage(); } ########################################################################## # Variables for protblock handling my (@protected, $protcount, @special, $protect, $pattern); # List of elements where whitespace or linebreaks are significant. # Expand if neccessary @special = ("programlisting", "screen", "literallayout", "address", "pre"); # All other global variables my ($in, $foo, $closer, $bar, $type, $lindent, $line, $counter, $flush); # Sometimes, it's good to be bad... $/ = "\0\0"; $in = <>; exit unless (defined $in); chomp $in; exit if ($in eq ""); if ($debug > 0) { $counter = length ($in); print "$counter bytes of input read\n"; } # Skip the FPI and any comments, brutal version ($foo) = ($in =~ /^ 0); ($foo) = ($in =~ /^[\000-\377]*? 0); $foo =~ tr/A-Z/a-z/; ($bar) = ($in =~ /^([\000-\377]*?)<$foo.*?>/io); unless (defined $bar) { print STDERR "Could't find the opening <$foo>\n"; die "Cannot handle this document\n"; } $counter = length ($bar); print "Cutting $counter bytes to skip FPI\n" if ($debug > 0); substr ($in, 0, $counter) = ""; print "$bar"; $linecount += ($bar =~ tr/\n/\n/); } # look for areas where whitespace and/or linebreaks are significant # Store them in an array and replace them with a "protblock" to insure # that we don't mess with them. $protcount = 0; foreach $protect (@special) { $pattern = "<$protect>[\000-\377]*?"; print "Protecting $protect, pattern is $pattern\n" if ($debug > 0); while (1) { undef $foo; ($foo) = ($in =~ /($pattern)/i); last unless (defined $foo); print "Instance found\n" if ($debug > 0); $protcount++; $protected[$protcount] = $foo; $bar = "###PROT$protcount###"; $in =~ s/$pattern/$bar/i; } } print "End of main protection loop\n" if ($debug > 0); # Save multi-line SGML comments print "Looking for multi-line SGML comments\n" if ($debug > 0); $pattern = ""; while (1) { ($foo) = ($in =~ /\G($pattern)/gic); last unless (defined $foo); # $counter = pos($in); print "Instance found\n" if ($debug > 0); next unless ($foo =~ /\n/); print "Protecting instance\n" if ($debug > 0); $protcount++; $protected[$protcount] = $foo; $bar = "###PROT$protcount###"; $in =~ s/$pattern/$bar/i; } print "Nuking whitespace\n" if ($debug > 0); # Nuke everything even remotely resembling whitespace # Protect ". " sequences $in =~ s/\. /__FULLSTOP_DOUBLE_SPACE__/g; # Treat ".\n" as possible ". " $in =~ s/\.\n/__FULLSTOP_DOUBLE_SPACE__/g; # Same with ? $in =~ s/\? /__QUESTION_DOUBLE_SPACE__/g; $in =~ s/\?\n/__QUESTION_DOUBLE_SPACE__/g; # and ! $in =~ s/\! /__EXCLAIM_DOUBLE_SPACE__/g; $in =~ s/\!\n/__EXCLAIM_DOUBLE_SPACE__/g; # and : $in =~ s/\: /__COLON_DOUBLE_SPACE__/g; $in =~ s/\:\n/__COLON_DOUBLE_SPACE__/g; # and ; $in =~ s/\; /__SEMICOLON_DOUBLE_SPACE__/g; $in =~ s/\;\n/__SEMICOLON_DOUBLE_SPACE__/g; # Convert all tabs to spaces $in =~ tr/\t/ /; # Convert all newlines to spaces $in =~ tr/\n/ /; # Convert all sequences of spaces into a single space $in =~ s/\s+/ /g; # Nuke space at SOL and EOL (Can't be more than one after the last line) $in =~ s/^ //; $in =~ s/ $//; # Nuke space between tags $in =~ s/> 0); unless ($in =~ /^)/); $in =~ s/^//; if ($line eq "") { make_indent($indent); print "$foo\n"; $linecount++; } else { $line = "$line$foo"; } next; } # Check for character data unless ($in =~ /^<.*?>/) { undef $foo; ($foo) = ($in =~ /^(.*?)>>Hey, incomplete entity, bailing out\n"; $foo = $in; $in = ""; } else { $in =~ s/^.*?/); $in =~ s/^<.*?>//; $closer = ($foo =~ s!^/!!); $type = classify_tag ($foo); if ($type == 1 || $type == 2) { #inline/single if ($counter == 0 && $line ne "") { unless ($line =~ /^<.*>/) { print STDERR ">>>Whoa, character data outside inline tag in $linecount!\n"; } make_indent ($indent); print "$line\n"; $linecount++; $line = ""; } if ($closer == 0) { # opening tag $line = "$line<$foo>"; if ($type == 1) { $counter++; $indent += 2; } next; } if ($type == 2) { print "\n\nline: $line\n"; # Output trash for debug print "foo: $foo\n"; print "closer: $closer\n"; print "in: $in\n"; die "closing single tag found"; } $counter--; $indent -= 2; if ($counter < 0) { print "\n\nline: $line\n"; # Output trash for debug print "foo: $foo\n"; print "closer: $closer\n"; print "in: $in\n"; die "counter < 0"; } $line = "$line"; next unless ($counter == 0 || $counter == $flush); if ($counter == 0) { $flush = 0; } print_inline(1); $line = ""; next; } # Ok, this is a normal tag. Stay calm, we can handle it # Sanity checks first if ($counter > 0) { if ($line ne "") { print_inline(2); $line = ""; } print STDERR ">>>Hey, normal tag $foo inside inline element in $linecount!\n"; } if ($counter == 0 && $line ne "") { print STDERR ">>>Whoa, character data before normal tag in $linecount!\n"; print_inline(3); $line = ""; } if ($closer == 1) { # closing tag $indent -= 2; make_indent($indent); print "\n"; $linecount++; $lindent = $indent; next; } # This memory is too small for me and this opening tag if ($indent == $lindent) { print "\n"; $linecount++; } make_indent($indent); print "<$foo>\n"; $linecount++; $lindent = $indent; $indent += 2; } sub print_inline { my ($mode, $rci); $mode = $_[0]; if ($mode == 2) { $rci = $indent - 2 * $counter; } else { $rci = $indent; } if ($lindent == $rci && $mode != 3) { print "\n"; $linecount++; } real_print ($rci, $line, $mode); $line = ""; if ($mode != 3) { $lindent = $rci; } if ($mode == 2) { $flush = $counter; } } # Printing routine for inline elements, first part # Distinguish between protblock markers and normal inputs (tags, char data) # protblock markers are replaced by the contents of the protected block # length of last line of the contents is remembered for further wrapping # normal input is fed to wrapup to line wrapping sub real_print () { my ($base, $content, $toprint, $protid, $protlen, $offset); $base = $_[0]; $content = $_[1]; $offset = -1; while ($content ne "") { unless ($content =~ /###PROT[0-9]+###/) { $linecount += wrapup ($base, $content, $offset); print "\n"; $linecount++; last; } ($protid) = ($content =~ /^###PROT([0-9]+)###/); if (defined $protid) { $linecount += ($protected[$protid] =~ tr/\n/\n/); print "$protected[$protid]"; $content =~ s/^###PROT[0-9]+### *//; ($toprint) = ($protected[$protid] =~ /\n(.*?)$/); if (defined $toprint) { $offset = length ($toprint); } else { $offset = 0; } next; } ($toprint) = ($content =~ /^(.*?)###PROT[0-9]+###/); unless (defined $toprint) { print "Internal logic error\n"; print "Protblock left, not in front, but nothing in front of it?\n"; print "Original input: $_[1]\n"; print "Remaining content: >>>$content<<<\n"; print "!!!$toprint!!!\n"; die "Internal Logic Error"; } $linecount += wrapup ($base, $toprint, $offset); $content =~ s/^(.*?)###PROT([0-9]+)###/###PROT$2###/; } } # printing routine for inline elements, second part # sub wrapup { my ($b, $base, $pos, $content, $mark, $out, $lpos, $count, $offset, $o); $base = $_[0]; $content = $_[1]; $offset = $_[2]; if ($offset == -1) { $mark = 0; } else { $mark = 1; $base += $sli; }; $count = 0; while (1) { if ($offset == -1) { $o = $base; } else { $o = $offset; }; if (length ($content) + $o < $maxlinelength) { if ($offset == -1) { make_indent($base); print "$content"; } else { print "$content"; } last; } $pos = -1; $lpos = -1; while (($pos = index ($content, " ", $pos)) > -1) { last if ($pos + $o > $maxlinelength); $lpos = $pos; $pos++; } if ($lpos == -1) { # No early space if ($pos != -1) { # is there any space $lpos = $pos; # We use that } else { # if not make_indent($base); print "$content"; # we're done last; } } $out = substr ($content, 0, $lpos); $out =~ s/ +$//; if ($offset == -1) { make_indent($base); print "$out\n"; } else { print "$out\n"; $offset = -1; } $count++; if ($mark == 0) { $base += $sli; $mark = 1; } substr ($content, 0, $lpos+1) = ""; if ($content =~ /^ /) { substr ($content, 0, 1) = ""; } } return ($count); } my ($lastresult, $lastpattern); sub classify_tag { my ($inlinetags, $singletags, $normaltags, $pattern, $result); $inlinetags = "<ulink><link><filename><command><surname><pubdate><emphasis><replaceable><literal><hostid><email><acronym><entry><keycap><userinput><application><prompt><seg><devicename><option><symbol><quote><username><userinput><varname><function><para><city><street><postcode><country><phone><fax><otheraddr><state><holder><releaseinfo><firstname><year><attribution><envar><wordasword><makevar><citetitle><constant><errorname><firstterm><trademark><type><systemitem><parameter><token><action><database><guimenuitem><guimenu><keysym><keycombo><hardware><term><h1><h2><h3><h4><h5><h6><h7><h8><h9><p><i><a><b><sgmltag><collabname><corpauthor><maketarget><foreignphrase><publishername><edition><isbn><abbrev><pagenums><guimenuitem><li><em>"; $singletags = "<anchor><xref><area><co>"; $normaltags = "<sect1><sect2><sect3><sect4><sect5><sect6><sect7><sect8><sect9><qandaset><qandaentry><question><answer><itemizedlist><listitem><book><bookinfo><authorgroup><author><abstract><preface><informaltable><tgroup><thead><row><tbody><tip><note><warning><important><footnote><orderedlist><chapter><table><segmentedlist><seglistitem><variablelist><varlistentry><procedure><step><example><informalexample><programlisting><literallayout><screen><article><artheader><affiliation><abstract><programlistingco><areaspec><areaset><calloutlist><callout><copyright><blockquote><qandadiv><html><head><body><ol><li><collab><caution><bibliography><biblioentry><biblioset><publisher><ul><screenco>"; $pattern = $_[0]; $pattern =~ s/ .*$//; $pattern = "<$pattern>"; if ($inlinetags =~ $pattern) { $result = 1; } elsif ($singletags =~ $pattern) { $result = 2; } elsif ($normaltags =~ $pattern) { $result = 3; } elsif ($pattern eq "<>" and defined $lastresult) { $result = $lastresult; print STDERR ">>>Hey, $lastpattern closed with </>\n"; # Call by reference is powerfull. # All power corrupts, absolute power is even more fun. # I'm tempted. I'm really tempted. # $_[0] = $lastpattern; } else { die "Unknown tag $_[0]"; } $lastresult = $result; $lastpattern = $pattern; return ($result); } sub make_indent ($) { my $amount = $_[0]; my ($tabs, $spaces); die "internal error: no amount for indent" unless (defined $amount && $amount >= 0); return if ($amount == 0); $spaces = $amount % $tabsize; $tabs = ($amount - $spaces) / $tabsize; print "\t" x $tabs; print " " x $spaces; }