package Lire::Aggregator; use strict; use base qw/ Lire::ReportOperator /; use Lire::ReportOperator; use constant MERGE_INITED => 1; use constant MERGE_FINISHED => 2; use Carp; =pod =head1 NAME Lire::Aggregator - Base class for all aggregator operators =head1 SYNOPSIS use base qw/ Lire::Aggregator /; =head1 DESCRIPTION The Lire::Aggregator is the base class for all the aggregator operators available in Lire. It implements behavior common to all the aggregator as well as defining some methods that need to be implemented by subclasses. In Lire, an aggregator is an operator which will group DLF records. Other operators like avg or sum will then compute values in these groups of DLF records. Aggregators can be nested to compute values on hierarchical groups. =head1 METHODS =head2 ops( [$new_ops] ) Returns the operators contained in this aggregator. The returned value is a reference to an array of Lire::ReportOperator objects. If the $new_ops is used, it changes the content of this aggragagor to these new values. $new_ops should be a reference to an array containing Lire::ReportOperator objects. =cut sub ops { my ( $self, $ops ) = @_; if ( @_ == 2 ) { croak "$ops isn't an array reference" unless UNIVERSAL::isa( $ops, "ARRAY" ); croak "fields array is empty" if @$ops == 0; foreach my $op ( @$ops ) { croak "$op isn't of type Lire::ReportOperator" unless UNIVERSAL::isa( $op, "Lire::ReportOperator" ); } $self->{'ops'} = $ops; } return $self->{'ops'}; } =pod =head2 op_by_name( $name ) Returns the operator named $name in this aggregator. An exception is thrown if there is no such operator. =cut sub op_by_name { my ( $self, $name ) = @_; # Check in ops foreach my $op ( @{$self->{'ops'}} ) { return $op if $op->name eq $name; } croak "No operation named $name\n"; } =pod =head2 is_name_defined( $name ) Returns true if this aggregator contains an operator named $name. =cut sub is_name_defined { my ( $self, $name ) = @_; return 1 if $name eq $self->name(); # Check in ops and aggregator's children foreach my $p ( @{$self->{'ops'}} ) { return 1 if $p->name() eq $name; if ( $p->isa( 'Lire::Aggregator' ) ) { return 1 if $p->is_name_defined( $name ); } } return 0; } =pod =head1 METHODS FOR SUBCLASSES =cut #------------------------------------------------------------------------ # Method init( %params ) # # Initialize the ops attribute. sub init { my ($self, %params) = @_; $self->{'ops'} = []; $self->SUPER::init( %params ); return; } =pod =head2 print( $fh, $pfx ) This methods implements the print() method required by Lire::ReportOpetor. It prints the XML element named after op() and takes care of writing the XML representation of all the children operation. It also takes care of writing the name and label attribute. Other attributes can be added to the XML element by overriding the xml_attrs() method. Other children elements could be added to the output stream by overriding the print_content() method. =cut sub print { my ( $self, $fh, $pfx ) = @_; $fh ||= \*STDOUT; $pfx ||= 0; my $prefix = " " x $pfx; my $attrs = $self->xml_attrs; print $fh $prefix, 'op; print $fh ' ', $attrs if length $attrs; print $fh qq{ label="$self->{'label'}"} if $self->{'label'}; print $fh ">\n"; $self->print_content( $fh, $pfx + 1); print $fh $prefix, "op, ">\n"; } =pod =head2 xml_attrs() This method can be used to write additional XML attributes. The returned string will be output in the XML element. =cut sub xml_attrs { return ""; } =pod =head2 print_content( $fh, $pfx ) This method prints the operators contained in this aggregator. It can be overriden to add some other elements. =cut sub print_content { my ( $self, $fh, $pfx ) = @_; foreach my $o ( @{$self->{'ops'}} ) { $o->print( $fh, $pfx ); } } =pod =head2 create_group_info( $info ) FIXME Subclasses have to override the create_categorical_info() method for this implementation. =cut sub create_group_info { my ( $self, $info ) = @_; $info = $self->maybe_nest_group_info( $info ); $self->create_categorical_info( $info ); foreach my $op ( @{$self->ops} ) { if ( $op->isa( 'Lire::Aggregator' ) ) { $op->create_group_info( $info ); } else { $op->create_numerical_info( $info ); } } } =pod =head2 create_categorical_info( $info ) This method is used by the implementation of create_group_info() to add the categorical ColumnInfo provided by the aggregator. =cut sub create_categorical_info { croak "unimplemented create_categorical_info() in ", ref $_[0]; } #------------------------------------------------------------------------ # Method maybe_nest_group_info( $info ) # # This method should be used by subclasses in their create_group_info() # implementation to select the GroupInfo to which they should add their # ColumnInfo. # # This method takes care of creating a GroupInfo if the aggregator has a # parent. It returns the GroupInfo object to which the aggregator should # add its ColumnInfo objects. # sub maybe_nest_group_info { my ( $self, $info ) = @_; if ( $self->parent ) { return $info->create_group_info( $self->name ); } else { return $info; } } =pod =head2 create_entries( $subreport ) This method is used by Lire::ReportSpec to fill the Lire::Report::Subreport with the entries when creating the subreport. The $subreport parameter contains the Subreport object to which the subreport's entries should get added. This method will only be called on the top-level aggregator in the report. =cut sub create_entries { my ( $self, $subreport ) = @_; if ( $self->{'store'} ) { my $query = new Lire::DlfQuery( $self->report_spec()->schema()->id() ); foreach my $schema ( @{ $self->report_spec()->joined_schemas() } ) { $query->join_stream( $schema ); } $self->build_query( $query ); my $expr = $self->report_spec()->filter_spec(); if ( $expr ) { $query->set_filter_clause( $expr->sql_expr(), @{$expr->sql_params()} ); } $self->build_table( $self->{'store'}, $query, $subreport ); } elsif ( defined $self->{'_state'} ) { # Merging croak "end_merge() wasn't called" unless $self->{'_state'} == MERGE_FINISHED; $subreport->nrecords( $self->{'data'}[1] ); $subreport->missing_cases( $self->{'data'}[3] ); $self->set_summary_values( $subreport, $self->{'data'}[0] ); $self->create_group_entries( $subreport, $self->{'data'}[2] ); } return; } =pod =head2 build_query( $query ) FIXME =cut sub build_query { my ($self, $query ) = @_; $query->add_aggr_field( '_lr_nrecords', 'count(*)' ) unless grep { $_->isa( 'Lire::Aggregator' ) } @{$self->ops()}; foreach my $op ( @{ $self->ops() }) { $op->build_query( $op->isa( 'Lire::Aggregator' ) ? $query->create_nested_query() : $query ); } } sub build_table_summary { my ( $self, $store, $query, $table ) = @_; my $result = $query->execute_summary( $store ); my $summary = $result->next_row(); $table->nrecords( $summary->{'_lr_nrecords'} ); $self->set_group_summary( $table, $summary ); } =pod =head2 set_group_summary( $group, $row ) FIXME =cut sub set_group_summary { my ( $self, $group, $row ) = @_; $group->nrecords( $row->{'_lr_nrecords'} ); $self->_set_aggregate_summary_values( $group, $row ); } sub _set_aggregate_summary_values { my ( $self, $group, $row ) = @_; foreach my $op ( @{$self->ops()} ) { if ( $op->isa( 'Lire::Aggregator' ) ) { $op->_set_aggregate_summary_values( $group, $row ); } else { my $parent = $group->parent_entry() ? $group->parent_entry()->group() : $group; my $value = $op->create_value( $parent, $row ); $group->set_summary_value( $op->name(), %$value ); } } } sub _set_store { my ( $self, $store ) = @_; $self->{'store'} = $store; return; } sub build_table { my ( $self, $store, $query, $table ) = @_; $self->build_table_summary( $store, $query, $table ) unless $self->parent(); my $result = $query->execute( $store ); ROW: while (defined (my $row = $result->next_row() ) ) { my $group; if ( $self->parent() ) { my $p_name = defined $self->parent()->parent() ? $self->parent()->name() : 'table'; my $p_entry = $table->find_entry( $p_name, $row ); next ROW unless defined $p_entry; $group = $p_entry->data_by_name( $self->name() ); } else { $group = $table; } my $entry = $self->create_entry( $group, $row ); next ROW unless defined $entry; foreach my $op ( @{$self->ops()} ) { if ( $op->isa( 'Lire::Aggregator' ) ) { my $group = $entry->create_group(); $op->set_group_summary( $group, $row ); } else { my $value = $op->create_value( $entry->group(), $row ); $entry->add_value( $value ); } } } my $i=0; my $nqueries = $query->nested_queries(); foreach my $op ( grep { $_->isa( 'Lire::Aggregator' ) } @{$self->ops()}) { $op->build_table( $store, $nqueries->[$i++], $table ); } } =pod =head2 create_entry( $group, $row ) FIXME =cut sub create_entry { my ( $self, $group, $row ) = @_; croak( "Unimplemented create_entry() in ", ref $self ); } =pod =head1 MERGING AGGRATOR API It defines additional methods required by Aggregator implementation to be able to merge data.. The base Aggregator implementation takes care of merging the summary information included in the Lire XML reports. The merging specifics to the aggregator should be implemented in the init_agggregator_data(), update_aggregator_data() and end_aggregator_data() methods. This class also takes care of the case when the aggregator is the top-level aggregator, that is the immediate child of the report-calc-spec element in the report specification), =head1 IMPLEMENTATION OF Lire::ReportOperator MERGING METHODS =pod =head1 init_merge( $period_start, $period_end ) The default implementation makes sure that all contained operators are inited. Implementation of specific aggregator must chain up to this method, if they override it. =cut sub init_merge { my $self = $_[0]; foreach my $op ( @{$self->ops()} ) { $op->init_merge(); } $self->{'data'} = $self->init_group_data(); $self->{'_state'} = MERGE_INITED; return $self; } #------------------------------------------------------------------------ # Methode merge_subreport( $subreport ) # # This method will be called once for every subreport to be merged. # # $subreport is the Lire::Report::Subreport to merged. # # Method called by lr_xml_merge(1) sub merge_subreport { my ( $self, $subreport ) = @_; croak "init_merge() wasn't called" unless $self->{'_state'} == MERGE_INITED; $self->merge_group_data( $subreport, $self->{'data'} ); return $self; } =pod =head2 end_merge() The default implementation makes sure that all operators gets the end_report() event. Subclasses should chain up to this method, if they override it. =cut sub end_merge { my ( $self ) = @_; croak "init_merge() wasn't called" unless $self->{'_state'} == MERGE_INITED; $self->end_group_data( $self->{'data'} ); $self->{'_state'} = MERGE_FINISHED; return $self; } =pod =head2 init_group_data() The Aggregator implements init_group_data(). It takes care of computing the summary information. Subclass does the equivalent in init_aggregator_data(). =cut sub init_group_data { my $self = $_[0]; # Elements of the array # 0 = summary data # 1 = nrecords # 2 = subclass' data # 3 = missing-cases return [ $self->init_summary_data(), 0, $self->init_aggregator_data(), 0 ]; } =pod =head2 merge_group_data( $value, $data ) The Aggregator implements merge_group_data(). It takes care of merging the summary information. Subclass does the equivalent in merge_aggregator_data(). =cut sub merge_group_data { my ( $self, $value, $data ) = @_; croak "value should be of type Lire::Report::Group, not $value\n" unless UNIVERSAL::isa( $value, "Lire::Report::Group" ); $data->[1] += $value->nrecords(); $data->[3] += $value->missing_cases(); $self->merge_summary_data( $value, $data->[0] ); $self->merge_aggregator_data( $value, $data->[2] ); return; } =pod =head2 end_group_data($data) The Aggregator implements end_group_data(). It takes care of computing the summary information. Subclass does the equivalent in end_aggregator_data(). =cut sub end_group_data { my ( $self, $data ) = @_; $self->end_summary_data( $data->[0] ); $self->end_aggregator_data( $data->[2] ); return } =pod =head2 add_entry_value( $entry, $data ) This method will make sure that the entries of nested aggregator are wrapped up in a Lire::Report::Group element. There is no reason to override that method since the entries of the aggregator are added in the create_group_entries() method. =cut sub add_entry_value { my ( $self, $entry, $data ) = @_; my $group = $entry->create_group(); $group->nrecords( $data->[1] ); $group->missing_cases( $data->[3] ); $self->set_summary_values( $group, $data->[0] ); $self->create_group_entries( $group, $data->[2] ); return; } # ------------------------------------------------------------------------ # METHODS FOR SUMMARY STATISTICS COMPUTATION # # This class defines several methods that are to be used in subclasses # to compute the summary statistics that are included in Lire XML # report. Those summary statistics are only computed by Aggregate # operator (those that compute an aggregated value like avg, sum, # etc.). The summary value is computed over all the records seen by the # aggregator instead of only the grouped records. #------------------------------------------------------------------------ # Method init_summary_data () # # Returns a summary data structure. sub init_summary_data { my ( $self, $data ) = @_; $data ||= {}; foreach my $op ( @{$self->ops()} ) { if ( $op->isa( 'Lire::Aggregate' ) ) { $data->{$op->name} = $op->init_group_data(); } elsif ( $op->isa( 'Lire::Aggregator' ) ) { $op->init_summary_data( $data ); } } return $data; } #------------------------------------------------------------------------ # Method merge_summary_data ( $group, $summary_data ) # # This method updates the Aggregate's summary data structures. sub merge_summary_data { my ( $self, $group, $data ) = @_; foreach my $op ( @{$self->ops()} ) { if ( $op->isa( 'Lire::Aggregate' ) ) { my $value = $group->get_summary_value( $op->name() ); unless ($value) { warn( "missing summary value for ", $op->name(), " operator"); next; } $op->merge_group_data( $value, $data->{$op->name()} ); } elsif ( $op->isa( 'Lire::Aggregator' ) ) { $op->merge_summary_data( $group, $data ); } } } #------------------------------------------------------------------------ # end_summary_data( $summary_data ) # # This method calls end_group_data() on all aggregates' data structures. sub end_summary_data { my ( $self, $data ) = @_; foreach my $op ( @{$self->ops()} ) { if ( $op->isa( 'Lire::Aggregate' ) ) { $op->end_group_data( $data->{$op->name} ); } elsif ( $op->isa( 'Lire::Aggregator' ) ) { $op->end_summary_data( $data ); } } } #------------------------------------------------------------------------ # set_summary_values( $group, $summary_data ) # # Make sure all operators sets their summary value sub set_summary_values { my ( $self, $group, $data ) = @_; foreach my $op ( @{$self->ops()} ) { if ( $op->isa( 'Lire::Aggregate' ) ) { my $v = $op->create_value( $group, $op->data2dlf( $data->{$op->name()} ) ); $group->set_summary_value( $op->name(), %$v ); } elsif ( $op->isa( 'Lire::Aggregator' ) ) { $op->set_summary_values( $group, $data ); } } } #------------------------------------------------------------------------ # Method get_summary_value_string( $name ) # # Returns a code string which can be used to access the summary # data item of operator $name. sub get_summary_value_string { my ( $self, $name ) = @_; return "->{'" . $name . "'}"; } =pod =head1 METHODS THAT SHOULD BE IMPLEMENTED BY SUBCLASSES FOR MERGING =head2 init_aggregator_data() This is the equivalent of init_group_data() and is called from Aggregator's implementation of init_group_data(). =cut sub init_aggregator_data { croak "Unimplemented init_aggregator_data() method in ", ref $_[0], "\n"; } =pod =head2 merge_aggregator_data( $value, $data ) This method is the equivalent than merge_group_data() and is called from Aggregator's implementation of merge_group_data(). =cut sub merge_aggregator_data { croak "Unimplemented merge_aggregator_data() method in ", ref $_[0], "\n"; } =pod =head2 end_aggregator_data( $data ) This method is the equivalent of the end_group_data() and is called from Aggregator's implementation of end_group_data(). =cut sub end_aggregator_data { croak "Unimplemented end_aggregator_data() method in ", ref $_[0], "\n"; } =head2 create_group_entries( $group, $data ) In this method, the aggregator should add one Lire::Report::Entry object for every group merged by the aggregator. $group is an instance of Lire::Report::Group to which the entries should be added. $data is the data structure returned by init_group_data() for the group in which the aggregator is nested (or the only structure that was created when the aggregator is the top-level aggregator). In this method, the aggregator must make sure to call add_entry_value() on its contained operators for each entry created. =cut sub create_group_entries { croak "Unimplemented create_group_entries method in ", ref $_[0], "\n"; } # keep perl happy 1; __END__ =pod =head1 SEE ALSO Lire::ReportSpec(3pm), Lire::Aggregate(3pm), Lire::ReportOperator(3pm), Lire::Group(3pm), Lire::Timegroup(3pm), Lire::Timeslot(3pm), Lire::Rangegroup(3pm) =head1 AUTHORS Francis J. Lacoste Wolgang Sourdeau =head1 VERSION $Id: Aggregator.pm,v 1.29 2006/07/23 13:16:27 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut