package Lire::Average;

use strict;

use base qw/ Lire::SimpleStat /;

use Carp;

use Lire::DataTypes qw/ is_numeric_type format_numeric_type /;
use Lire::Utils qw/ sql_quote_name check_object_param/;
use Lire::ReportOperator qw/group_data_value/;
use Lire::Count;
use Lire::Sum;

=pod

=head1 NAME

Lire::Average

=head1 SYNOPSIS

FIXME

=head1 DESCRIPTION

Class that implements the avg operator. This operator will compute the
average value of a DLF field in a group of DLF records.

If the field used by average isn't a numeric fields, the number of
different values in that field will be used as the numerator.

It's possible to compute a weighted average by specifying a weighting field.

By default, the average is a by record average. It's possible to
compute a by different values average. The list of fields that's used
to determine the different values if speficied in the by-fields
attribute.

=head1 METHODS

=head2 new( %params )

Creates a new Lire::Average object. In addition to the values
supported by its parents, the by-fields and weight attributes will be
initialized to the values specified in the %params argument.

=cut

sub new {
    my $self = bless {}, shift;

    $self->init( @_, 'op' => "avg" );

    my %args = @_;
    $self->by_fields(  $args{'by-fields'} )
      if exists $args{'by-fields'};

    $self->weight(  $args{'weight'} )
      if exists $args{'weight'};

    return $self;
}

#------------------------------------------------------------------------
# Method field( [$field]
#
# Overrides Lire::SimpleStat one since the field doesn't have
# to be numeric.
sub field {
    my ($self, $field ) = @_;

    if ( @_ == 2 ) {
	if ( defined $field ) {
	    croak "'$field' isn't a defined field in the specification's schemas"
		unless $self->report_spec()->has_field( $field );
	}
	$self->{'field'} = $field;
    }

    $self->{'field'};
}

=pod

=head2 by_fields( [$new_by_fields] )

Returns the fields that are going to be used to count the different
values which will made up the by part of the average.

This a reference to an array of DLF field names.

If the $new_by_fields parameter is set, it will be used as the new
$by_fields value. It must be an array reference and should only
contains valid field names for the current report specification's
schema.

=cut

sub by_fields {
    my ($self, $by_fields) = @_;

    if ( @_ == 2 ) {
	if ( defined $by_fields ) {
            check_object_param( $by_fields, 'by_fields', 'ARRAY' );

	    foreach my $f ( @$by_fields ) {
		croak "$f isn't a defined field in the specification's schemas"
		    unless $self->report_spec()->has_field( $f );
	    }
	}
	$self->{'by_fields'} = $by_fields;
    }
    $self->{'by_fields'};
}

=pod

=head2 weigth( [$new_weight] )

Returns the DLF field's name by which the values will be multiplied
before being summed.

You can change the weight field by specifying a new name as the
$new_weight parameter. Use undef to remove the use of a weighting
field.

=cut

sub weight {
    my ($self, $weight) = @_;

    if ( @_ == 2 ) {
	if ( defined $weight ) {
	    croak "'$weight' isn't a defined field in the specification's schemas"
		unless $self->report_spec()->has_field( $weight );

	    croak "$weight isn't a numeric type",
	      unless is_numeric_type( $self->report_spec()->field( $weight )->type() );

	}

	$self->{'weight'} = $weight;
    }

    $self->{'weight'};
}

#------------------------------------------------------------------------
# Method xml_attrs()
#
# Implementation needed by Lire::Aggregate
sub xml_attrs {
    my ($self) = @_;

    my $attr = $self->SUPER::xml_attrs;
    $attr .= qq{ weight="$self->{'weight'}"}
      if defined $self->{'weight'};

    if ( exists $self->{'by_fields'} ) {
	my $by_fields = join " ", @{$self->{'by_fields'}};
	$attr .= qq{ by-fields="$by_fields"};
    }
    $attr;
}

# Overrides Lire::Aggregate::build_query
sub build_query {
    my ( $self, $query ) = @_;

    $self->_build_avg_query( $query );
    $self->set_missing_cases_aggr_expr( $query );
}

sub _build_avg_query {
    my ( $self, $query ) = @_;

    my $sum_expr;
    if ( defined $self->{'weight' }) {
        $sum_expr = sprintf( 'sum(%s*%s)',
                             sql_quote_name( $self->{'field'} ),
                             sql_quote_name( $self->{'weight'} ) );
    } else {
        $sum_expr = sprintf( 'sum(%s)', sql_quote_name( $self->{'field'} ) );
    }
    $query->add_aggr_field( $self->{'name'} . "_total", $sum_expr );

    my $n_expr;
    if ( !defined $self->{'by_fields'} ) {
        $n_expr = 'count(*)';
    } elsif ( @{$self->{'by_fields'}} == 1 &&
            is_numeric_type( $self->report_spec()->field( $self->{'by_fields'}[0] )->type() ) )
    {
        $n_expr = sprintf( 'sum(%s)', sql_quote_name($self->{'by_fields'}[0]));
    } else {
        my @fields = map { sql_quote_name( $_ ) } @{ $self->{'by_fields'} };
        $n_expr = sprintf( 'lr_count_uniq(%s)', join (',', @fields) );
    }
    $query->add_aggr_field( $self->{'name'} . "_n", $n_expr );
    $query->add_aggr_field( $self->{'name'},
                            sprintf( '%s/%s', $sum_expr, $n_expr ) );
    return;
}

# Overrides Lire::SimpleStat::sql_required_fields
sub sql_required_fields {
    my $self = $_[0];

    my @fields = ( $self->{'field'} );
    push @fields, $self->{'weight'}
      if defined $self->{'weight'};
    push @fields, @{ $self->{'by_fields'} }
      if defined $self->{'by_fields'};

    return \@fields;
}

# Overrides Lire::SimpleStat::create_value
sub create_value {
    my ( $self, $group, $row ) = @_;

    my %value;
    my $name = $self->name();

    my $avg;
    if ( ! $row->{$name . "_n"} ) {
        $avg = 'NaN';
    } elsif ( ! $row->{ $name . '_total'}  ) {
        $avg = '0.00';
    } else {
        $avg = sprintf( "%.2f", $row->{$name . '_total' } 
                        /  $row->{$name . "_n"});
    }
    $value{'content'} =
      format_numeric_type( $avg, $self->dlf_field()->type() );
    $value{'value'} = $avg;
    $value{'total'} = $row->{ $name . '_total' };
    $value{'n'} = $row->{ $name . '_n' };

    $self->set_missing_cases_value( $row, \%value );

    return \%value;
}

# Implements Lire::ReportOperator::init_merge
sub init_merge {
    my $self = $_[0];

    $self->create_avg_ops();
    $self->{'sum_op'}->init_merge();
    $self->{'n_op'}->init_merge();

    return;
}

# Initialize n_op and sum_op
sub create_avg_ops {
    my ($self) = @_;
    my %common = (
		  'parent' => $self->parent(),
		  'report_spec' => $self->report_spec(),
		 );
    if ( $self->field() ) {
	my $f = $self->report_spec()->schema()->field( $self->field );
	if ( is_numeric_type( $f->type() ) ) {
	    # Use a sum
	    $self->{'sum_op'} =
	      new Lire::Sum( %common,
                             'name' => $self->name() . ".sum",
                             'field' => $self->field(),
                             'weight' => $self->weight(),
                           );
	} else {
	    # Use a counter
	    $self->{'sum_op'} =
              new Lire::Count( %common, 'fields' => [ $self->field() ],
                               'name' => $self->name() . ".sum", );
	}
    } else {
	# Use a simple count operation
	$self->{'sum_op'} = 
	  new Lire::Count( %common, 'name' => $self->name() . ".sum" );
    }

    # Special case for a numerical field. This enables
    # to compute arbitrary ratio.
    # FIXME: A new operator ratio operator should be defined
    if ( $self->by_fields() && @{$self->by_fields()} == 1 &&
         is_numeric_type( $self->report_spec()->schema()->field( $self->by_fields()->[0])->type()))
     {
         $self->{'n_op'} =
           new Lire::Sum( %common,
                          'field' => $self->by_fields->[0],
                          'name' => $self->name() . ".n" );
     } else {
         $self->{'n_op'} =
           new Lire::Count( %common, 'fields' => $self->by_fields(),
                            'name' => $self->name() . ".n" );
     }
}

# Implements Lire::ReportOperator::init_group_data
sub init_group_data {
    my ( $self ) = @_;

    # Result is held in the first element
    return [ "NaN", $self->{'sum_op'}->init_group_data(),
             $self->{'n_op'}->init_group_data() ];
}

# Implements Lire::ReportOperator::merge_group_data
sub merge_group_data {
    my ( $self, $value, $data ) = @_;

    $self->{'sum_op'}->merge_group_data( {value => $value->{'total'}},
				       $data->[1] );
    $self->{'n_op'}->merge_group_data( {value => $value->{'n'}},
				     $data->[2] );

    return;
}

# Implements Lire::ReportOperator::end_group_data
sub end_group_data {
    my ( $self, $data ) = @_;

    $self->{'sum_op'}->end_group_data( $data->[1] );
    $self->{'n_op'}->end_group_data( $data->[2] );

    my $sum = group_data_value( $data->[1] );
    my $n = group_data_value( $data->[2] );
    $data->[0] = sprintf "%.2f", $sum / $n
      if $n;

    return;
}

# Implements Lire::ReportOperator::end_merge()
sub end_merge {
    my $self = $_[0];

    $self->{'sum_op'}->end_merge();
    $self->{'n_op'}->end_merge();

    return;
}

# Implements Lire::Aggregate::data2dlf()
sub data2dlf {
    my ($self, $data) = @_;

    my $name = $self->name();
    return { "$name" => $data->[0],
             "${name}_total" => group_data_value( $data->[1] ),
             "${name}_n" => group_data_value( $data->[2] ),
             "_lr_${name}_mc" => $self->missing_cases( $data ),
           };
}

# keep perl happy
1;

__END__

=head1 SEE ALSO

 Lire::ReportSpec(3pm), Lire::ReportOperator(3pm),
 Lire::Aggregator(3pm), Lire::Aggregate(3pm).

=head1 AUTHORS

 Francis J. Lacoste <flacoste@logreport.org>
 Wolfgang Sourdeau <wsourdea@logreport.org>

=head1 VERSION

$Id: Average.pm,v 1.16 2006/07/23 13:16:27 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html. 

=cut


syntax highlighted by Code2HTML, v. 0.9.1