# Copyright (c) 2004 Timothy Appnel # http://www.timaoutloud.org/ # This code is released under the Artistic License. # # XML::Parser::Style::Elemental - A slightly more advanced object # tree style for XML::Parser. # package XML::Parser::Style::Elemental; use strict; use vars qw($VERSION); $VERSION = '0.50'; sub Init { my $xp = shift; $xp->{Elemental} ||= {}; my $e = $xp->{Elemental}; $e->{Document} ? eval "use $e->{Document};" : compile_class($xp,'Document'); $e->{Element} ? eval "use $e->{Element};" : compile_class($xp,'Element'); $e->{Characters} ? eval "use $e->{Characters};" : compile_class($xp,'Characters'); $xp->{__doc} = $e->{Document}->new(); push( @{ $xp->{__stack} }, $xp->{__doc} ); # $xp->{__NSMAP} = {} if ($xp->{NSMap}); } sub Start { my $xp = shift; my $tag = shift; my $node = $xp->{Elemental}->{Element}->new(); $node->name( ns_qualify($xp,$tag) ); $node->parent( $xp->{__stack}->[-1] ); if (@_) { $node->attributes({}); while (@_) { my($key,$value) = (shift @_,shift @_); $node->attributes->{ns_qualify($xp,$key,$tag)} = $value } } $node->parent->contents([]) unless $node->parent->contents; push( @{ $node->parent->contents }, $node); push( @{ $xp->{__stack} }, $node); #if ($xp->{NSMap} && $xp->new_ns_prefixes) { # my %newns; # map { $newns{$_} = $xp->expand_ns_prefix($_) } # $xp->new_ns_prefixes; # $xp->{__NSMAP}->{$node} = \%newns; #} } sub Char { my ($xp,$data)=@_; my $parent = $xp->{__stack}->[-1]; $parent->contents([]) unless $parent->contents; my $contents = $parent->contents(); my $class = $xp->{Elemental}->{Characters}; unless ($contents && ref($contents->[-1]) eq $class) { return if ($xp->{Elemental}->{No_Whitespace} && $data!~/\S/); my $node = $class->new(); $node->parent($parent); $node->data($data); push ( @{ $contents }, $node ); } else { my $d = $contents->[-1]->data() || ''; return if ( $xp->{Elemental}->{No_Whitespace} && $d!~/\S/ ); $contents->[-1]->data("$d$data"); } } sub End { pop( @{ $_[0]->{__stack} } ) } sub Final { delete $_[0]->{__stack}; $_[0]->{__doc}; # , $_[0]->{__NSMAP}; } sub ns_qualify { return $_[1] unless $_[0]->{Namespaces}; my $ns=$_[0]->namespace($_[1]) || ( $_[2] ? $_[0]->namespace($_[2]) : return $_[1] ); $ns=~m!(/|#)$! ? "$ns$_[1]" : "$ns/$_[1]"; } #--- Dynamic Class Factory { my $methods = { # All get root methods through special handling # Element gets a text_content method also through # special handling Document => [ qw(contents) ], Element => [ qw(name parent contents attributes) ], Characters => [ qw(parent data) ] }; sub compile_class { my $xp = shift; my $type = shift; my $class = "${$xp}{Pkg}::$type"; no strict 'refs'; *{"${class}::new"} = sub { bless { }, $class }; foreach my $field ( @{$methods->{$type}} ) { *{"${class}::${field}"} = sub { $_[0]->{$field} = $_[1] if defined $_[1]; $_[0]->{$field}; }; } *{"${class}::root"} = sub { my $o=shift; while($o->can('parent') && $o->parent) { $o = $o->parent } $o; }; if ($type eq 'Element') { *{"${class}::text_content"} = sub { return '' unless ref($_[0]->contents); join('', map { ref($_) eq $class ? $_->text_content : $_->data } @{ $_[0]->contents } ); }; } $xp->{Elemental}->{$type} = $class; } } 1; __END__ =begin =head1 NAME XML::Parser::Style::Elemental - a slightly more advanced and flexible object tree style for XML::Parser =head1 SYNOPSIS #!/usr/bin/perl -w use XML::Parser; use Data::Dumper; my $p = XML::Parser->new( Style => 'Elemental', Pkg => 'E' ); my $doc = < The world is foo enough. DOC my ($e) = $p->parse($doc); print Data::Dumper->Dump( [$e] ); my $test_node = $e->contents->[0]; print "root: ".$test_node->root." is ".$e."\n"; print "text content of ".$test_node->name."\n"; print $test_node->text_content; =head1 DESCRIPTION This module is similar to the L Objects style, but slightly more advanced and flexible. Like the Objects style, an object is created for each element. Elemental uses a dynamic class factory to create objects with accessor methods or can use any supplied classes that support the same method signatures. This module also provides full namespace support when the C option is in use in addition to a C option for stripping out extraneous non-markup characters that are commonly introduced when formatting XML to be human readable. =head1 CLASS TYPES Elemental style creates its parse tree with three class types -- Document, Element and Character. Developers have the option of using the built-in dynamic classes or registering their own. The following explains the purpose and method prototypes of each class type. =item Document - The root of the tree. =over 4 =item contents - An array reference of direct decendents. =item root - Return reference of itself. =back =item Element - The tags in the document. =over 4 =item name - The tag name. If the Namespace options is set to true, the extend name is stored. =item parent - A reference to the parent object. =item contents - An ordered array reference of direct descendents/children objects. =item attributes - A hash reference of key-value pairs representing the tags attributes. =item text_content - The text content of all siblings, whitespace included. =item root - A reference to the Document object. =back =item Characters - Non-markup text. =over 4 =item data - A string of non-markup characters. =item parent - A reference to the parent object. =item root - A reference to the Document object. =back =head1 OPTIONS Elemental specific options are set in the L constructor through a hash element with a key of 'Elemental', The value of Elemental is expected to be a hash reference with one of more of the option keys detailed in the following sections. =head2 USING DYNAMIC CLASS OBJECTS When parsing a document, Elemental uses a dynamic class factory to create minimal lightweight objects with accessor methods. These classes implement the pattern detailed in L in addition to a parameterless constructor method of C. Similar to the Objects style these classes are blessed into the package set with the C option. Here we create a parser that uses Elemental to create Document, Element and Characters objects in the E package. my $p = XML::Parser->new( Style => 'Elemental', Pkg => 'E' ); =head2 REGISTERING CLASSES If you require something more functional then the generated dynamic classes you can register your own with Elemental. Like the Elemental class types, the option keys are C, C and C. Here we register three classes and turn on the C option. my $p = XML::Parser->new( Style => 'Elemental', Namespace => 1, Elemental=>{ Document=>'Foo::Doc', Element=>'Foo::El', Characters=>'Foo::Chars', No_Whitespace=>1 } ); Note that, the same class can be registered for more then one class type as long as it supports all of the necessary method prototypes it is being registered to handle. See L for more detail. =head2 NO_WHITESPACE When set to true, C causes Elemental to pass over character strings of all whitespace instead of creating a new Character object. This options is helpful in stripping out extraneous non-markup characters that are commonly introduced when formatting XML to be human readable. =head1 SEE ALSO L =head1 TO DO =item * Implement xml::base support instead of No_Whitespace. =head1 LICENSE The software is released under the Artistic License. The terms of the Artistic License are described at L. =head1 AUTHOR & COPYRIGHT Except where otherwise noted, XML::Parser::Style::Elemental is Copyright 2004, Timothy Appnel, cpan@timaoutloud.org. All rights reserved. =cut =end