package Lire::Aggregator;

use strict;

use base qw/ Lire::ReportOperator /;

use Lire::ReportOperator;

use constant MERGE_INITED  => 1;
use constant MERGE_FINISHED => 2;

use Carp;

=pod

=head1 NAME

Lire::Aggregator - Base class for all aggregator operators

=head1 SYNOPSIS

    use base qw/ Lire::Aggregator /;

=head1 DESCRIPTION

The Lire::Aggregator is the base class for all the aggregator
operators available in Lire. It implements behavior common to all the
aggregator as well as defining some methods that need to be
implemented by subclasses.

In Lire, an aggregator is an operator which will group DLF records.
Other operators like avg or sum will then compute values in these
groups of DLF records. Aggregators can be nested to compute values on
hierarchical groups.

=head1 METHODS

=head2 ops( [$new_ops] )

Returns the operators contained in this aggregator. The returned value
is a reference to an array of Lire::ReportOperator objects.

If the $new_ops is used, it changes the content of this aggragagor to
these new values. $new_ops should be a reference to an array
containing Lire::ReportOperator objects.

=cut

sub ops {
    my ( $self, $ops ) = @_;

    if ( @_ == 2 ) {
	croak "$ops isn't an array reference"
	  unless UNIVERSAL::isa( $ops, "ARRAY" );

	croak "fields array is empty"
	  if @$ops == 0;

	foreach my $op ( @$ops ) {
	    croak "$op isn't of type Lire::ReportOperator"
	      unless UNIVERSAL::isa( $op, "Lire::ReportOperator" );
	}

	$self->{'ops'} = $ops;
    }

    return $self->{'ops'};
}

=pod

=head2 op_by_name( $name )

Returns the operator named $name in this aggregator. An exception is
thrown if there is no such operator.

=cut

sub op_by_name {
    my ( $self, $name ) = @_;

    # Check in ops
    foreach my $op ( @{$self->{'ops'}} ) {
	return $op if $op->name eq $name;
    }

    croak "No operation named $name\n";
}

=pod

=head2 is_name_defined( $name )

Returns true if this aggregator contains an operator named $name.

=cut

sub is_name_defined {
    my ( $self, $name ) = @_;

    return 1 if $name eq $self->name();

    # Check in ops and aggregator's children
    foreach my $p ( @{$self->{'ops'}} ) {
	return 1 if $p->name() eq $name;
	if ( $p->isa( 'Lire::Aggregator' ) ) {
	    return 1 if $p->is_name_defined( $name );
	}
    }

    return 0;
}

=pod

=head1 METHODS FOR SUBCLASSES

=cut

#------------------------------------------------------------------------
# Method init( %params )
#
# Initialize the ops attribute.
sub init {
    my ($self, %params) = @_;

    $self->{'ops'} = [];

    $self->SUPER::init( %params );

    return;
}


=pod

=head2 print( $fh, $pfx )

This methods implements the print() method required by
Lire::ReportOpetor. It prints the XML element named after op() and
takes care of writing the XML representation of all the children
operation. It also takes care of writing the name and label attribute.
Other attributes can be added to the XML element by overriding the
xml_attrs() method. Other children elements could be added to the
output stream by overriding the print_content() method.

=cut

sub print {
    my ( $self, $fh, $pfx ) = @_;
    $fh  ||= \*STDOUT;
    $pfx ||= 0;

    my $prefix = " " x $pfx;
    my $attrs = $self->xml_attrs;

    print $fh $prefix, '<lire:', $self->op;
    print $fh ' ', $attrs
      if length $attrs;
    print $fh qq{ label="$self->{'label'}"}
      if $self->{'label'};
    print $fh ">\n";

    $self->print_content( $fh, $pfx + 1);

    print $fh $prefix, "</lire:", $self->op, ">\n";
}

=pod

=head2 xml_attrs()

This method can be used to write additional XML attributes. The
returned string will be output in the XML element.

=cut

sub xml_attrs {
    return "";
}

=pod

=head2 print_content( $fh, $pfx )

This method prints the operators contained in this aggregator. It can
be overriden to add some other elements.

=cut

sub print_content {
    my ( $self, $fh, $pfx ) = @_;

    foreach my $o ( @{$self->{'ops'}} ) {
	$o->print( $fh, $pfx );
    }
}

=pod

=head2 create_group_info( $info )

FIXME

Subclasses have to override the create_categorical_info() method for
this implementation.

=cut

sub create_group_info {
    my ( $self, $info ) = @_;

    $info = $self->maybe_nest_group_info( $info );

    $self->create_categorical_info( $info );

    foreach my $op ( @{$self->ops} ) {
        if ( $op->isa( 'Lire::Aggregator' ) ) {
            $op->create_group_info( $info );
        } else {
            $op->create_numerical_info( $info );
        }
    }
}

=pod

=head2 create_categorical_info( $info )

This method is used by the implementation of create_group_info() to add
the categorical ColumnInfo provided by the aggregator.

=cut

sub create_categorical_info {
    croak "unimplemented create_categorical_info() in ", ref $_[0];
}

#------------------------------------------------------------------------
# Method maybe_nest_group_info( $info )
#
# This method should be used by subclasses in their create_group_info() 
# implementation to select the GroupInfo to which they should add their
# ColumnInfo.
#
# This method takes care of creating a GroupInfo if the aggregator has a
# parent. It returns the GroupInfo object to which the aggregator should
# add its ColumnInfo objects.
#
sub maybe_nest_group_info {
    my ( $self, $info ) = @_;

    if ( $self->parent ) {
	return $info->create_group_info( $self->name );
    } else {
	return $info;
    }
}

=pod

=head2 create_entries( $subreport )

This method is used by Lire::ReportSpec to fill the
Lire::Report::Subreport with the entries when creating the subreport.

The $subreport parameter contains the Subreport object to which the
subreport's entries should get added.

This method will only be called on the top-level aggregator in the
report.

=cut

sub create_entries {
    my ( $self, $subreport ) = @_;

    if ( $self->{'store'} ) {
        my $query = new Lire::DlfQuery( $self->report_spec()->schema()->id() );
        foreach my $schema ( @{ $self->report_spec()->joined_schemas() } ) {
            $query->join_stream( $schema );
        }
        $self->build_query( $query );
        my $expr = $self->report_spec()->filter_spec();
        if ( $expr ) {
            $query->set_filter_clause( $expr->sql_expr(),
                                       @{$expr->sql_params()} );
        }
        $self->build_table( $self->{'store'}, $query, $subreport );
    } elsif ( defined $self->{'_state'} ) {
        # Merging
        croak "end_merge() wasn't called"
          unless $self->{'_state'} == MERGE_FINISHED;

        $subreport->nrecords( $self->{'data'}[1] );
        $subreport->missing_cases( $self->{'data'}[3] );
        $self->set_summary_values( $subreport, $self->{'data'}[0] );
        $self->create_group_entries( $subreport, $self->{'data'}[2] );
    }

    return;
}


=pod

=head2 build_query( $query )

FIXME

=cut

sub build_query {
    my ($self, $query ) = @_;

    $query->add_aggr_field( '_lr_nrecords', 'count(*)' )
      unless grep { $_->isa( 'Lire::Aggregator' ) } @{$self->ops()};

    foreach my $op ( @{ $self->ops() }) {
        $op->build_query( $op->isa( 'Lire::Aggregator' )
                          ? $query->create_nested_query()
                          : $query );
    }
}

sub build_table_summary {
    my ( $self, $store, $query, $table ) = @_;

    my $result = $query->execute_summary( $store );
    my $summary = $result->next_row();

    $table->nrecords( $summary->{'_lr_nrecords'} );
    $self->set_group_summary( $table, $summary );
}

=pod

=head2 set_group_summary( $group, $row )

FIXME

=cut

sub set_group_summary {
    my ( $self, $group, $row ) = @_;

    $group->nrecords( $row->{'_lr_nrecords'} );

    $self->_set_aggregate_summary_values( $group, $row );
}

sub _set_aggregate_summary_values {
    my ( $self, $group, $row ) = @_;

    foreach my $op ( @{$self->ops()} ) {
        if ( $op->isa( 'Lire::Aggregator' ) ) {
            $op->_set_aggregate_summary_values( $group, $row );
        } else {
            my $parent = $group->parent_entry()
              ? $group->parent_entry()->group() : $group;
            my $value = $op->create_value( $parent, $row );
            $group->set_summary_value( $op->name(), %$value );
        }
    }
}

sub _set_store {
    my ( $self, $store )  = @_;

    $self->{'store'} = $store;

    return;
}

sub build_table {
    my ( $self, $store, $query, $table ) = @_;

    $self->build_table_summary( $store, $query, $table )
      unless $self->parent();

    my $result = $query->execute( $store );
  ROW:
    while (defined (my $row = $result->next_row() ) ) {
        my $group;
        if ( $self->parent() ) {
            my $p_name = defined $self->parent()->parent()
              ? $self->parent()->name() : 'table';
            my $p_entry = $table->find_entry( $p_name, $row );
            next ROW unless defined $p_entry;
            $group = $p_entry->data_by_name( $self->name() );
        } else {
            $group = $table;
        }


        my $entry = $self->create_entry( $group, $row );
        next ROW unless defined $entry;

        foreach my $op ( @{$self->ops()} ) {
            if ( $op->isa( 'Lire::Aggregator' ) ) {
                my $group = $entry->create_group();
                $op->set_group_summary( $group, $row );
            } else {
                my $value = $op->create_value( $entry->group(), $row );
                $entry->add_value( $value );
            }
        }
    }

    my $i=0;
    my $nqueries = $query->nested_queries();
    foreach my $op ( grep { $_->isa( 'Lire::Aggregator' ) } @{$self->ops()}) {
        $op->build_table( $store, $nqueries->[$i++], $table );
    }
}

=pod

=head2 create_entry( $group, $row )

FIXME

=cut
sub create_entry {
    my ( $self, $group, $row ) = @_;

    croak( "Unimplemented create_entry() in ", ref $self );
}

=pod

=head1 MERGING AGGRATOR API

It defines additional methods required by Aggregator implementation to
be able to merge data..

The base Aggregator implementation takes care of merging the summary
information included in the Lire XML reports.

The merging specifics to the aggregator should be implemented in
the init_agggregator_data(), update_aggregator_data() and
end_aggregator_data() methods.

This class also takes care of the case when the aggregator is the
top-level aggregator, that is the immediate child of the
report-calc-spec element in the report specification),

=head1 IMPLEMENTATION OF Lire::ReportOperator MERGING METHODS

=pod

=head1 init_merge( $period_start, $period_end )

The default implementation makes sure that all contained operators are
inited.

Implementation of specific aggregator must chain up to this method, if
they override it.

=cut

sub init_merge {
    my $self = $_[0];

    foreach my $op ( @{$self->ops()} ) {
	$op->init_merge();
    }

    $self->{'data'}   = $self->init_group_data();
    $self->{'_state'} = MERGE_INITED;

    return $self;
}

#------------------------------------------------------------------------
# Methode merge_subreport( $subreport )
#
# This method will be called once for every subreport to be merged.
# 
# $subreport is the Lire::Report::Subreport to merged.
#
# Method called by lr_xml_merge(1)
sub merge_subreport {
    my ( $self, $subreport ) = @_;

    croak "init_merge() wasn't called"
      unless $self->{'_state'} == MERGE_INITED;

    $self->merge_group_data( $subreport, $self->{'data'} );

    return $self;
}

=pod

=head2 end_merge()

The default implementation makes sure that all operators gets the
end_report() event.

Subclasses should chain up to this method, if they override it.

=cut

sub end_merge {
    my ( $self ) = @_;

    croak "init_merge() wasn't called"
      unless $self->{'_state'} == MERGE_INITED;

    $self->end_group_data( $self->{'data'} );
    $self->{'_state'} = MERGE_FINISHED;

    return $self;
}

=pod

=head2 init_group_data()

The Aggregator implements init_group_data(). It takes care of
computing the summary information. Subclass does the equivalent in
init_aggregator_data().

=cut

sub init_group_data {
    my $self = $_[0];

    # Elements of the array
    # 0 = summary data
    # 1 = nrecords
    # 2 = subclass' data
    # 3 = missing-cases
    return [ $self->init_summary_data(), 0,
             $self->init_aggregator_data(), 0 ];
}


=pod

=head2 merge_group_data( $value, $data )

The Aggregator implements merge_group_data(). It takes care of merging
the summary information. Subclass does the equivalent in
merge_aggregator_data().

=cut

sub merge_group_data {
    my ( $self, $value, $data ) = @_;

    croak "value should be of type Lire::Report::Group, not $value\n"
      unless UNIVERSAL::isa( $value, "Lire::Report::Group" );

    $data->[1] += $value->nrecords();
    $data->[3] += $value->missing_cases();
    $self->merge_summary_data( $value, $data->[0] );
    $self->merge_aggregator_data( $value, $data->[2] );

    return;
}

=pod

=head2 end_group_data($data)

The Aggregator implements end_group_data(). It takes care of
computing the summary information. Subclass does the equivalent in
end_aggregator_data().

=cut

sub end_group_data {
    my ( $self, $data ) = @_;

    $self->end_summary_data( $data->[0] );
    $self->end_aggregator_data( $data->[2] );

    return
}

=pod

=head2 add_entry_value( $entry, $data )

This method will make sure that the entries of nested aggregator are
wrapped up in a Lire::Report::Group element.

There is no reason to override that method since the entries of the
aggregator are added in the create_group_entries() method.

=cut

sub add_entry_value {
    my ( $self, $entry, $data ) = @_;

    my $group = $entry->create_group();
    $group->nrecords( $data->[1] );
    $group->missing_cases( $data->[3] );
    $self->set_summary_values( $group, $data->[0] );
    $self->create_group_entries( $group, $data->[2] );

    return;
}

# ------------------------------------------------------------------------
# METHODS FOR SUMMARY STATISTICS COMPUTATION
#
# This class defines several methods that are to be used in subclasses
# to compute the summary statistics that are included in Lire XML
# report. Those summary statistics are only computed by Aggregate
# operator (those that compute an aggregated value like avg, sum,
# etc.). The summary value is computed over all the records seen by the
# aggregator instead of only the grouped records.

#------------------------------------------------------------------------
# Method init_summary_data ()
#
# Returns a summary data structure. 
sub init_summary_data {
    my ( $self, $data ) = @_;

    $data ||= {};

    foreach my $op ( @{$self->ops()} ) {
	if ( $op->isa( 'Lire::Aggregate' ) ) {
	    $data->{$op->name} = $op->init_group_data();
	} elsif ( $op->isa( 'Lire::Aggregator' ) ) {
	    $op->init_summary_data( $data );
	}
    }

    return $data;
}

#------------------------------------------------------------------------
# Method merge_summary_data ( $group, $summary_data )
#
# This method updates the Aggregate's summary data structures.
sub merge_summary_data {
    my ( $self, $group, $data ) = @_;

    foreach my $op ( @{$self->ops()} ) {
	if ( $op->isa( 'Lire::Aggregate' ) ) {
	    my $value = $group->get_summary_value( $op->name() );
	    unless ($value) {
		warn( "missing summary value for ", $op->name(), " operator");
		next;
	    }
	    $op->merge_group_data( $value, $data->{$op->name()} );
	} elsif ( $op->isa( 'Lire::Aggregator' ) ) {
	    $op->merge_summary_data( $group, $data );
	}
    }
}

#------------------------------------------------------------------------
# end_summary_data( $summary_data )
#
# This method calls end_group_data() on all aggregates' data structures.
sub end_summary_data {
    my ( $self, $data ) = @_;

    foreach my $op ( @{$self->ops()} ) {
	if ( $op->isa( 'Lire::Aggregate' ) ) {
	    $op->end_group_data( $data->{$op->name} );
	} elsif ( $op->isa( 'Lire::Aggregator' ) ) {
	    $op->end_summary_data( $data );
	}
    }
}

#------------------------------------------------------------------------
# set_summary_values( $group, $summary_data )
#
# Make sure all operators sets their summary value
sub set_summary_values {
    my ( $self, $group, $data ) = @_;

    foreach my $op ( @{$self->ops()} ) {
	if ( $op->isa( 'Lire::Aggregate' ) ) {
            my $v = $op->create_value( $group,
                                       $op->data2dlf( $data->{$op->name()} ) );
            $group->set_summary_value( $op->name(), %$v );
	} elsif ( $op->isa( 'Lire::Aggregator' ) ) {
	    $op->set_summary_values( $group, $data );
	}
    }
}

#------------------------------------------------------------------------
# Method get_summary_value_string( $name )
#
# Returns a code string which can be used to access the summary
# data item of operator $name.
sub get_summary_value_string {
    my ( $self, $name ) = @_;

    return "->{'" . $name . "'}";
}

=pod

=head1 METHODS THAT SHOULD BE IMPLEMENTED BY SUBCLASSES FOR MERGING

=head2 init_aggregator_data()

This is the equivalent of init_group_data() and is called from
Aggregator's implementation of init_group_data().

=cut

sub init_aggregator_data {
    croak "Unimplemented init_aggregator_data() method in ", ref $_[0], "\n";
}

=pod

=head2 merge_aggregator_data( $value, $data )

This method is the equivalent than merge_group_data() and is called
from Aggregator's implementation of merge_group_data().

=cut

sub merge_aggregator_data {
    croak "Unimplemented merge_aggregator_data() method in ", ref $_[0], "\n";
}

=pod

=head2 end_aggregator_data( $data )

This method is the equivalent of the end_group_data() and is called
from Aggregator's implementation of end_group_data().

=cut

sub end_aggregator_data {
    croak "Unimplemented end_aggregator_data() method in ", ref $_[0], "\n";
}

=head2 create_group_entries( $group, $data )

In this method, the aggregator should add one Lire::Report::Entry
object for every group merged by the aggregator.

$group is an instance of Lire::Report::Group to which the entries
should be added. $data is the data structure returned by
init_group_data() for the group in which the aggregator is nested (or
the only structure that was created when the aggregator is the
top-level aggregator).

In this method, the aggregator must make sure to call
add_entry_value() on its contained operators for each entry created.

=cut

sub create_group_entries {
    croak "Unimplemented create_group_entries method in ", ref $_[0], "\n";
}

# keep perl happy
1;

__END__

=pod

=head1 SEE ALSO

 Lire::ReportSpec(3pm), Lire::Aggregate(3pm), Lire::ReportOperator(3pm),
 Lire::Group(3pm), Lire::Timegroup(3pm), Lire::Timeslot(3pm),
 Lire::Rangegroup(3pm)

=head1 AUTHORS

 Francis J. Lacoste <flacoste@logreport.org>
 Wolgang Sourdeau <wsourdeau@logreport.org>

=head1 VERSION

$Id: Aggregator.pm,v 1.29 2006/07/23 13:16:27 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html. 

=cut