package Lire::ReportParser::AsciiDocBookFormatter; use strict; use base qw/ Exporter Lire::DocBookParser /; use Text::Wrap qw/ wrap /; use Lire::Utils qw/deep_copy/; use Lire::I18N qw/ensure_utf8 ensure_local_codeset/; our @EXPORT_OK = qw/dbk2txt/; =pod =head1 NAME Lire::ReportParser::AsciiDocBookFormatter - Lire::ReportParser subclass which formats description. =head1 SYNOPSIS To format DocBook: use Lire::ReportParser::AsciiDocBookFormatter qw/dbk2txt/; my $txt = dbk2txt( "Test" ); In XML Report processors : package MyParser; use base qw/ Lire::ReportParser::AsciiDocBookFormatter Lire::ReportParser /; sub new { my $self = shift->SUPER::new( @_ ); $self->init_docbook_formatter( @_ ); return $self; } sub handle_description { my ( $self, $desc ) = @_; print $desc; return; } =head1 DESCRIPTION This package implements methods that can handle the content of C elements and it can be used by a subclass of Lire::ReportParser. Client only have to inherit from this module so that a handle_description() method is available to process the text formatted DocBook description. This module also provide a convenient dbk2txt() function which can be used to format a string containing DocBook elements into an ASCII equivalent. =head1 USING Lire::ReportParser::AsciiDocBookFormatter Lire::ReportParser processors that would like to work with text version of the description should inherit from Lire::ReportParser::AsciiDocBookFormatter in addition to Lire::ReportParser. If they override the description_start(), description_end()methods, they B link to their parents' version using C. Additionnally, they should merge the value elements_spec() in their elements_spec() implementation. The Lire::ReportParser::AsciiDocBookFormatter should be listed before Lire::ReportParser in the @ISA. The AsciiDocBookFormatter doesn't inherit directly from Lire::ReportParser so that it can be used in multiple inheritance scenario. The subclass should call the init_docbook_formatter() method from their constructor method to initialize the DocBookFormatter module. =head2 init_docbook_formatter( %params ) The initializer recognizes some parameters that can be used to control the behavior of the DocBook handling: =over 4 =item columns The number of columns in which the DocBook text should be formatted. Defaults to 72. =back =cut sub new { my $self = shift->SUPER::new( @_ ); $self->init_docbook_formatter( @_ ); return $self; } sub init_docbook_formatter { my ($self, %args) = @_; $self->{'dbk_columns'} = $args{'columns'} || 72; return $self; } =pod =head2 dbk_start_processing() Initializes the parser's structure for formatting DocBook XML as ASCII. This is used from the description_start implementation. =cut sub dbk_start_processing { my $self = $_[0]; $self->{'dbk_process'} = 1; $self->{'dbk_text'} = ""; $self->{'dbk_text_blocks'} = []; $self->{'dbk_lists'} = []; $self->{'dbk_left_margin'} = 4; $self->{'dbk_right_margin'} = 8; return; } =pod =head2 dbk_end_processing() Cleans the parser structure. This is used from the description_end() implementation. =cut sub dbk_end_processing { my $self = $_[0]; delete $self->{'dbk_process'}; delete $self->{'dbk_text'}; delete $self->{'dbk_text_blocks'}; delete $self->{'dbk_lists'}; return; } sub namespaces { my $self = $_[0]; return { "http://www.logreport.org/LRML/" => 'lire' }; } sub elements_spec { my $self = $_[0]; my $spec = deep_copy( $self->Lire::DocBookParser::elements_spec() ); foreach my $admon ( qw/note tip caution important warning/ ) { $spec->{$admon}{'start'} = 'admonition_start'; $spec->{$admon}{'end'} = 'admonition_end'; } foreach my $element ( keys %$spec ) { next if $element eq 'ulink'; $spec->{$element}{'char'} = 'inline_char' if exists $spec->{$element}{'char'}; } $spec->{'lire:description'} = [ @Lire::DocBookParser::top_levels ]; return $spec; } sub description_start { $_[0]->dbk_start_processing(); return; } sub description_end { my $self = $_[0]; return unless $self->{'dbk_process'}; $self->handle_description( $self->{'dbk_text'} ); $self->dbk_end_processing(); return; } =pod =head2 handle_description( $description ) This method is invoked after the closing tag of the C element is encountered. The $description contains the description formatted in plain text. =cut sub handle_description { $_[0]{'saved_dbk'} = $_[1]; return; } sub parse_end { return $_[0]{'saved_dbk'}; } sub parent_block { return undef unless @{$_[0]{'dbk_text_blocks'}} > 1; return $_[0]{'dbk_text_blocks'}[$#{$_[0]{'dbk_text_blocks'}} - 1]; } sub current_block { return undef unless @{$_[0]{'dbk_text_blocks'}}; return $_[0]{'dbk_text_blocks'}[$#{$_[0]{'dbk_text_blocks'}}]; } sub print_block { my $self = $_[0]; my $block = $self->current_block(); return unless $block; return unless length $block->{'dbk_text'}; my $margin = ' ' x $self->{'dbk_left_margin'}; my $initial = $margin . $block->{'dbk_initial_indent'}; my $next = $margin . $block->{'dbk_indent'}; # Squash space and trim the string. $block->{'dbk_text'} =~ tr/\n\t / /s; $block->{'dbk_text'} =~ s/^\s*//; $block->{'dbk_text'} =~ s/\s*$//; return if $block->{'dbk_text'} =~ /^\s*$/; local $Text::Wrap::columns = $self->{'dbk_columns'} - $self->{'dbk_right_margin'}; $self->{'dbk_text'} .= wrap( $initial, $next, $block->{'dbk_text'} ); if ( $block->{'dbk_skip_line'} ) { $self->{'dbk_text'} .= "\n\n"; } else { $self->{'dbk_text'} .= "\n"; } # Flush text buffer $block->{'dbk_text'} = ""; return; } sub inline_char { my ( $self, $str ) = @_; my $block = $self->current_block(); $block->{'dbk_text'} .= $str if $block; return; } sub start_block { my ( $self, $left_margin_indent, $right_margin_indent )= @_; $left_margin_indent ||= 0; $right_margin_indent ||= 0; # Flush the current block, if there is one $self->print_block(); $self->{'dbk_left_margin'} += $left_margin_indent; $self->{'dbk_right_margin'} += $right_margin_indent; push @{$self->{'dbk_text_blocks'}}, { 'dbk_text' => "", 'dbk_initial_indent' => '', 'dbk_indent' => '', 'dbk_left_margin_indent' => $left_margin_indent, 'dbk_right_margin_indent' => $right_margin_indent, 'dbk_skip_line' => 1, 'dbk_children' => 0, }; my $parent = $self->parent_block(); $parent->{'dbk_children'}++ if $parent; return $self->current_block(); } sub end_block { my $self = $_[0]; # Flush the current block, if there is one $self->print_block(); my $block = $self->current_block(); $self->{'dbk_left_margin'} -= $block->{'dbk_left_margin_indent'}; $self->{'dbk_right_margin'} -= $block->{'dbk_right_margin_indent'}; pop @{$self->{'dbk_text_blocks'}}; return; } sub current_list { return undef unless @{$_[0]{'dbk_lists'}}; return $_[0]{'dbk_lists'}[$#{$_[0]{'dbk_lists'}}]; } sub start_list { my ( $self, $type, %attr )= @_; my $block = $self->start_block( 2 ); push @{$self->{'dbk_lists'}}, { 'dbk_type' => $type, %attr, }; return $self->current_list(); } sub end_list { my $self = $_[0]; $self->end_block(); my $list = pop @{$self->{'dbk_lists'}}; # We need an extra newline when the spacing was set to compact. # Otherwise the next block will start on the line immediately following # the last listitem. $self->{'dbk_text'} .= "\n" if ( $list->{'spacing'} eq 'compact' ); return; } sub dbk_para_start { my ( $self, $name, $attr ) = @_; my $block = $self->start_block(); if ( $self->in_element( "listitem" ) ) { my $parent = $self->parent_block(); my $list = $self->current_list(); $block->{'dbk_skip_line'} = 0 if $list->{'spacing'} eq 'compact'; # Copy listitem indent and initial_indent attribute if ( $parent->{'dbk_children'} == 1 ) { $block->{'dbk_initial_indent'} = $parent->{'dbk_initial_indent'}; } else { # Add extra space before the paragraph if it wasn't the first # and the list is compact $self->{'dbk_text'} .= "\n" if $parent->{'dbk_children'} > 1 && $list->{'spacing'} eq 'compact'; # Put mark only on first para $block->{'dbk_initial_indent'} = $parent->{'dbk_indent'}; } $block->{'dbk_indent'} = $parent->{'dbk_indent'}; } return; } sub dbk_para_end { $_[0]->end_block(); return; } sub dbk_itemizedlist_start { my ( $self, $name, $attr )= @_; $self->start_list( 'itemized', 'mark' => '-', 'spacing' => 'normal', %$attr ); return; } sub dbk_itemizedlist_end { $_[0]->end_list(); return; } sub dbk_orderedlist_start { my ( $self, $name, $attr )= @_; $self->start_list( 'ordered', 'spacing' => 'normal', %$attr, 'item_count' => 0 ); return; } sub dbk_orderedlist_end { $_[0]->end_list(); return; } sub dbk_variablelist_start { my ( $self, $name, $attr )= @_; $self->start_list( 'variable', 'spacing' => 'normal', %$attr ); return; } sub dbk_variablelist_end { $_[0]->end_list(); return; } sub dbk_term_start { my ( $self, $name, $attr )= @_; my $block = $self->start_block(); $block->{'dbk_skip_line'} = 0; return; } sub dbk_term_end { $_[0]->end_block(); return; } sub dbk_listitem_start { my ( $self, $name, $attr )= @_; my $list = $self->current_list(); my $block = $self->start_block(); if ( $list->{'dbk_type'} eq 'itemized' ) { my $mark = $attr->{'override'} || $list->{'mark'}; $block->{'dbk_initial_indent'} = $mark . ' '; $block->{'dbk_indent'} = ' ' x length $block->{'dbk_initial_indent'}; } elsif ( $list->{'dbk_type'} eq 'ordered' ) { $list->{'dbk_item_count'}++; $block->{'dbk_initial_indent'} = $list->{'dbk_item_count'} . '. '; $block->{'dbk_initial_indent'} .= ' ' if length $block->{'dbk_initial_indent'} < 4 ; $block->{'dbk_indent'} = ' ' x length $block->{'dbk_initial_indent'}; } elsif ( $list->{'dbk_type'} eq 'variable' ) { $block->{'dbk_initial_indent'} = ' ' x 4; $block->{'dbk_indent'} = ' ' x 4; } else { warn( "unknown list type: $list->{'dbk_type'}" ); } $block->{'dbk_skip_line'} = 0 if $list->{'spacing'} eq 'compact'; return; } sub dbk_listitem_end { $_[0]->end_block(); return; } sub dbk_title_start { my ( $self, $name, $attr )= @_; $self->start_block( 0, 4 ); return; } sub dbk_title_end { $_[0]->end_block(); return; } sub dbk_ulink_start { my ( $self, $name, $attr )= @_; $self->{'dbk_curr_url_attr'} = $attr->{'url'} || ""; $self->{'dbk_curr_url'} = ""; return; } sub dbk_ulink_end { my $self = $_[0]; $self->inline_char( ' (' . $self->{'dbk_curr_url_attr'} . ')' ) if ( $self->{'dbk_curr_url_attr'} ne $self->{'dbk_curr_url'} ); delete $self->{'dbk_curr_url_attr'}; delete $self->{'dbk_curr_url'}; return; } sub dbk_ulink_char { my ( $self, $str )= @_; $self->inline_char( $str ); $self->{'dbk_curr_url'} .= $str; return; } sub dbk_quote_start { my ( $self, $name, $attr )= @_; $self->inline_char( '"' ); return; } sub dbk_quote_end { my $self = $_[0]; $self->inline_char( '"' ); return; } sub admonition_start { my ( $self, $name, $attr ) = @_; my $block = $self->start_block(); $block->{'dbk_skip_line'} = 0; $self->inline_char( ucfirst $name . ":" ); $self->end_block(); $self->start_block( 2 ); return; } sub admonition_end { $_[0]->end_block(); return; } =pod =head1 FORMATTING DocBook STRINGS If you have DocBook content in a string, like you can obtain from some of the Report Specifications object, you can format it in plain text using the dbx2txt() function. =head2 dbk2txt( $docbook_str, [$columns] ) Returns a plain text version of the DocBook XML fragment $docbook_str. The C parameter sets the number of columns in which the DocBook text should be formatted. This method will die() in case of error. =cut sub dbk2txt { my ( $docbook_str, $columns ) = @_; my $parser = new Lire::ReportParser::AsciiDocBookFormatter( 'columns' => $columns ); return ensure_local_codeset( $parser->parse( '' . '' . ensure_utf8( $docbook_str ) . '' ) ); } # keep perl happy 1; __END__ =head1 SEE ALSO Lire::ReportParser(3pm) =head1 VERSION $Id: AsciiDocBookFormatter.pm,v 1.13 2006/07/23 13:16:31 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =head1 AUTHOR Francis J. Lacoste =cut