#!/usr/bin/perl -w # Copyright (c) 2000, 2001 Udo Erdelhoff. All rights reserved. # Written for the FreeBSD German Documentation Project # # Redistribution and use in source and compiled forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above # copyright notice, this list of conditions and the following # disclaimer as the first lines of this file unmodified. # # 2. Redistributions in compiled form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # THIS SOFTWARE IS PROVIDED BY UDO ERDELHOFF "AS IS" AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL UDO ERDELHOFF BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. # # $Id: slashexpand,v 1.5 2002/11/24 09:37:59 ue Exp $ use strict; use Getopt::Long; # Whitespace and/or newlines are significant inside these elements my $specials = "
"; # Hack to parse HTML-disguised-as-SGML $specials = "$specials
";

# These tags are never closed
my $singles = "";

# All other global variables
my ($in, $filetype, $chunk, $tag, $length, $realtag, $closer, @tagstack, $debug);

$debug = 0;

sub usage () {
        print STDERR "usage: slashexpand [--debug]\n";
        print STDERR "Second dash is optional, abbreviation is possible\n";
        die;
}

GetOptions ('debug'	=> \$debug);

# Sometimes, it's good to be bad...
$/ = "\0\0";		
$in = <>;
exit unless (defined $in);
chomp $in;
exit unless ($in ne "");

# Skip the FPI and any comments, brutal version
($filetype) = ($in =~ /^\n";
	die "Cannot handle this document\n";
}
$filetype =~ tr/A-Z/a-z/;
($chunk) = ($in =~ /^([\000-\377]*?)<$filetype>/io);
unless (defined $chunk) {
	print STDERR "Could't find the opening <$filetype>\n";
	die "Cannot handle this document\n";
}
$length = length ($chunk);
substr ($in, 0, $length) = "";
print "$chunk";

# Let's deal out some damage ;->
while (defined $in && $in ne "") {
# Detect and handle SGML comments. We assume valid SGML...
	if ($in =~ /^)/);
		unless (defined $chunk) {
			print STDERR "unclosed SGML comment found here: $in\n";
			die "Giving up\n";
		}
		$length = length ($chunk);
		substr ($in, 0, $length) = "";
		print "$chunk";
		next;
	}

# Detect and handle character data
	unless ($in =~ /^)/);
	unless (defined $tag) {
		print STDERR "impossible situation #2 found\n";
		print STDERR "Current input $in\n";
		die "Giving up!\n";
	}
	$realtag = $tag;
	if ($tag =~ /\s/) {
		($tag) = split (/\s/, $tag, -1);
		$tag = "$tag>";
	}
	$tag =~ tr/A-Z/a-z/;

	if ($specials =~ $tag) {				# Special tag?
		$closer = $tag;
		substr ($closer, 1, 0) = "/";
		($chunk) = ($in =~ /^($realtag[\000-\377]+?$closer)/);
		if (defined $chunk) {				# Good special
			$length = length ($chunk);
			substr ($in, 0, $length) = "";
			print "$chunk";
			next;
		}
		print "Yikes, special hack activated\n" if ($debug);
		($chunk) = ($in =~ /^($realtag[\000-\377]+?<\/>)/);
		unless (defined $chunk) {			# ***TILT***
			print STDERR "Unclosed special found: $in\n";
			die "Giving up!\n";
		}
		$length = length ($chunk);
		substr ($chunk, -3, 3) = $closer;
		substr ($in, 0, $length) = "";
		print "$chunk";
		next;
	}

	if ($singles =~ $tag) {					# Single tag?
		$length = length ($realtag);
		substr ($in, 0, $length) = "";
		print "$realtag";
		print "Special $realtag ignored, matched by $tag\n" if ($debug > 0);
		next;
	}

	$length = length ($realtag);				# Remember it
	if ($tag =~ m!^ 0);
		if ($tag ne "") {
			if ($tag ne $closer) {			# Sanity check
				print STDERR "Stack is $closer\n";
				print STDERR "tag expects $tag\n";
				print STDERR "realtag expects $realtag\n";
				print STDERR "Input position is $in\n";
				die "Program lost in space -- Bad SGML?\n";
			}
		} else {					# Bad closer!
			print "Replacing  with $closer\n" if ($debug > 0);
			$realtag = $closer;
		}
	} else {						# Opening tag!
		push @tagstack, $tag;
		print "Pushed $tag\n" if ($debug > 0);
	}

	substr ($in, 0, $length) = "";
	print "$realtag";
	next;
}