#!/usr/bin/perl
#
# $Id: $
#
=head1 NAME
bsd-man-to-exception.pl - Converts BSD man pages into ECMA XML <exception/> elements
=head1 SYNOPSIS
zcat /path/to/man/page.gz | B<bsd-man-to-exception.pl> > page.xml
=head1 DESCRIPTION
Parses a BSD nroff man page looking for the C<ERRORS> section and converts the
error numbers found into ECMA XML <exception/> elements. The errno values
found are mapped to the best-fit .NET Exception that will be thrown by
C<Mono.Unix.UnixMarshal.ThrowExceptionForError(Mono.Unix.Native.Errno)>.
This program is B<NOT> currently suitable for converting Linux man pages,
since the Linux man pages use different nroff macros. The Linux man pages
are also less semantic and more output-oriented -- for example, BSD will
use C<.Fn function> to name a function, while Linux will use
C<.B function> (.B bolds the named item, with no semantic implications for
what it's bolding).
=head1 NOTES
You should review the generated text, to ensure conformance with argument
names and other matters.
The exception mapping in get_exception_type() should be kept in sync with
C<Mono.Unix.UnixMarshal.CreateExceptionForError(Mono.Unix.Native.Errno)>.
=head1 COPYRIGHT
Copyright (C) 2006 Jonathan Pryor <jonpryor@vt.edu>
=cut
#
# Copyright (C) 2006 Jonathan Pryor <jonpryor@vt.edu>
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
use strict;
my $in_errors = undef;
# $errors->{ExceptionType}->[ message1, message2 ...]
my $errors = {};
my $last_errno = undef;
my $message = "";
my $commands = {
".It" => \&command_begin_error,
".Dv" => \&command_constant,
".Fa" => \&command_parameter,
".Ql" => \&command_quote,
".El" => \&command_end_error,
".Sh" => sub { $in_errors = undef; },
};
while (<>) {
unless (defined $in_errors) {
$in_errors = 1 if (/^\.Sh.*ERRORS$/);
next;
}
chomp;
my $arg;
if (($arg) = /^(\.[\w\\"]+)/) {
if (exists $commands->{$arg}) {
$commands->{$arg}->($_);
}
else {
append_message ($_);
}
}
else {
append_message ($_);
}
}
command_end_error ();
foreach my $etype (sort etype_sort keys %$errors) {
create_exception ($etype , $errors->{$etype});
}
sub append_message {
my $append = shift;
if ($message eq "") {
$message = $append;
}
else {
$message .= "\n $append";
}
}
sub add_error {
my $errors = shift;
my $etype = shift;
my $message= shift;
unless (exists $errors->{$etype}) {
$errors->{$etype} = [];
}
my $elist = $errors->{$etype};
push @$elist, $message;
}
sub get_exception_type {
my $errno = shift;
return "System.ArgumentException" if $errno eq "EINVAL" or
$errno eq "EBADF";
return "System.ArgumentOutOfRangeException" if $errno eq "ERANGE";
return "System.IO.DirectoryNotFoundException" if $errno eq "ENOTDIR";
return "System.IO.FileNotFoundException" if $errno eq "ENOENT";
return "System.InvalidOperationException" if $errno eq "EOPNOTSUPP" or
$errno eq "EPERM";
return "System.InvalidProgramException" if $errno eq "ENOEXEC";
return "System.IO.IOException" if $errno eq "EIO" or
$errno eq "ENOSPC" or $errno eq "ENOTEMPTY" or $errno eq "ENXIO" or
$errno eq "EROFS" or $errno eq "ESPIPE";
return "System.NullReferenceException" if $errno eq "EFAULT";
return "System.OverflowException" if $errno eq "EOVERFLOW";
return "System.IO.PathTooLongException" if $errno eq "ENAMETOOLONG";
return "System.UnauthorizedAccessException" if $errno eq "EACCESS" or
$errno eq "EISDIR";
return "Mono.Unix.UnixIOException";
}
sub command_begin_error {
shift;
my $arg;
if (($arg) = /^\.It Bq Er (.+)$/) {
command_end_error ();
$last_errno = $arg;
}
else {
append_message ($_);
}
}
sub command_end_error {
if (defined $last_errno) {
append_message ("[<see cref=\"F:Mono.Unix.Native.Errno.$last_errno\" />]");
add_error ($errors, get_exception_type ($last_errno), $message);
}
$last_errno = undef;
$message = "";
}
sub command_constant {
shift;
my ($arg, $rest) = /^\.Dv (\w+)(.*)$/;
append_message ("<see cref=\"F:Mono.Unix.Native.TODO.$arg\" /> $rest");
}
sub command_parameter {
shift;
my ($arg, $rest) = /^\.Fa (\w+)(.*)$/;
append_message ("<paramref name=\"$arg\" /> $rest");
}
sub command_quote {
shift;
my ($arg, $rest) = /^\.Ql (\S+)( [.,])?$/;
$arg =~ s/\\&//g;
append_message ("\"<c>$arg</c>\"$rest");
}
sub create_exception {
my $etype = shift;
my $elist = shift;
print <<EOF;
<exception cref="$etype">
EOF
my $first_message = shift @$elist;
print_message ($first_message);
foreach my $message (@$elist) {
print <<EOF;
<para>-or-</para>
EOF
print_message ($message);
}
unshift @$elist, $first_message;
print <<EOF;
</exception>
EOF
}
sub print_message {
my $message = shift;
print <<EOF;
<para>
$message
</para>
EOF
}
# Sorts exception types so that System.* exceptions come before Mono.*
# exceptions, and fewer namespaced types are before greater namespaced types
# (e.g. System.UnauthorizedAccessException before
# System.IO.FileNotFoundException).
sub etype_sort {
my @aparts = split /\./, $a;
my @bparts = split /\./, $b;
my $acnt = scalar @aparts;
my $bcnt = scalar @bparts;
# return shortest path first, except for Mono.* (which is always last)
if ($acnt < $bcnt) {
return -1 if $aparts [0] eq "System";
return 1 if $bparts [0] eq "System";
return $aparts [$acnt] cmp $bparts [$acnt];
}
elsif ($acnt == $bcnt) {
return -1 if ($aparts [0] eq "System" and $bparts [0] eq "Mono");
return 1 if ($aparts [0] eq "Mono" and $bparts [0] eq "System");
return $a cmp $b;
}
else { # $bcnt < $acnt;
return 1 if $aparts [0] eq "System";
return -1 if $bparts [0] eq "System";
return $aparts [$acnt] cmp $bparts [$acnt];
}
}
sub min {
my ($a, $b) = @_;
return ($a < $b) ? $a : $b;
}
syntax highlighted by Code2HTML, v. 0.9.1