package Lire::Average;
use strict;
use base qw/ Lire::SimpleStat /;
use Carp;
use Lire::DataTypes qw/ is_numeric_type format_numeric_type /;
use Lire::Utils qw/ sql_quote_name check_object_param/;
use Lire::ReportOperator qw/group_data_value/;
use Lire::Count;
use Lire::Sum;
=pod
=head1 NAME
Lire::Average
=head1 SYNOPSIS
FIXME
=head1 DESCRIPTION
Class that implements the avg operator. This operator will compute the
average value of a DLF field in a group of DLF records.
If the field used by average isn't a numeric fields, the number of
different values in that field will be used as the numerator.
It's possible to compute a weighted average by specifying a weighting field.
By default, the average is a by record average. It's possible to
compute a by different values average. The list of fields that's used
to determine the different values if speficied in the by-fields
attribute.
=head1 METHODS
=head2 new( %params )
Creates a new Lire::Average object. In addition to the values
supported by its parents, the by-fields and weight attributes will be
initialized to the values specified in the %params argument.
=cut
sub new {
my $self = bless {}, shift;
$self->init( @_, 'op' => "avg" );
my %args = @_;
$self->by_fields( $args{'by-fields'} )
if exists $args{'by-fields'};
$self->weight( $args{'weight'} )
if exists $args{'weight'};
return $self;
}
#------------------------------------------------------------------------
# Method field( [$field]
#
# Overrides Lire::SimpleStat one since the field doesn't have
# to be numeric.
sub field {
my ($self, $field ) = @_;
if ( @_ == 2 ) {
if ( defined $field ) {
croak "'$field' isn't a defined field in the specification's schemas"
unless $self->report_spec()->has_field( $field );
}
$self->{'field'} = $field;
}
$self->{'field'};
}
=pod
=head2 by_fields( [$new_by_fields] )
Returns the fields that are going to be used to count the different
values which will made up the by part of the average.
This a reference to an array of DLF field names.
If the $new_by_fields parameter is set, it will be used as the new
$by_fields value. It must be an array reference and should only
contains valid field names for the current report specification's
schema.
=cut
sub by_fields {
my ($self, $by_fields) = @_;
if ( @_ == 2 ) {
if ( defined $by_fields ) {
check_object_param( $by_fields, 'by_fields', 'ARRAY' );
foreach my $f ( @$by_fields ) {
croak "$f isn't a defined field in the specification's schemas"
unless $self->report_spec()->has_field( $f );
}
}
$self->{'by_fields'} = $by_fields;
}
$self->{'by_fields'};
}
=pod
=head2 weigth( [$new_weight] )
Returns the DLF field's name by which the values will be multiplied
before being summed.
You can change the weight field by specifying a new name as the
$new_weight parameter. Use undef to remove the use of a weighting
field.
=cut
sub weight {
my ($self, $weight) = @_;
if ( @_ == 2 ) {
if ( defined $weight ) {
croak "'$weight' isn't a defined field in the specification's schemas"
unless $self->report_spec()->has_field( $weight );
croak "$weight isn't a numeric type",
unless is_numeric_type( $self->report_spec()->field( $weight )->type() );
}
$self->{'weight'} = $weight;
}
$self->{'weight'};
}
#------------------------------------------------------------------------
# Method xml_attrs()
#
# Implementation needed by Lire::Aggregate
sub xml_attrs {
my ($self) = @_;
my $attr = $self->SUPER::xml_attrs;
$attr .= qq{ weight="$self->{'weight'}"}
if defined $self->{'weight'};
if ( exists $self->{'by_fields'} ) {
my $by_fields = join " ", @{$self->{'by_fields'}};
$attr .= qq{ by-fields="$by_fields"};
}
$attr;
}
# Overrides Lire::Aggregate::build_query
sub build_query {
my ( $self, $query ) = @_;
$self->_build_avg_query( $query );
$self->set_missing_cases_aggr_expr( $query );
}
sub _build_avg_query {
my ( $self, $query ) = @_;
my $sum_expr;
if ( defined $self->{'weight' }) {
$sum_expr = sprintf( 'sum(%s*%s)',
sql_quote_name( $self->{'field'} ),
sql_quote_name( $self->{'weight'} ) );
} else {
$sum_expr = sprintf( 'sum(%s)', sql_quote_name( $self->{'field'} ) );
}
$query->add_aggr_field( $self->{'name'} . "_total", $sum_expr );
my $n_expr;
if ( !defined $self->{'by_fields'} ) {
$n_expr = 'count(*)';
} elsif ( @{$self->{'by_fields'}} == 1 &&
is_numeric_type( $self->report_spec()->field( $self->{'by_fields'}[0] )->type() ) )
{
$n_expr = sprintf( 'sum(%s)', sql_quote_name($self->{'by_fields'}[0]));
} else {
my @fields = map { sql_quote_name( $_ ) } @{ $self->{'by_fields'} };
$n_expr = sprintf( 'lr_count_uniq(%s)', join (',', @fields) );
}
$query->add_aggr_field( $self->{'name'} . "_n", $n_expr );
$query->add_aggr_field( $self->{'name'},
sprintf( '%s/%s', $sum_expr, $n_expr ) );
return;
}
# Overrides Lire::SimpleStat::sql_required_fields
sub sql_required_fields {
my $self = $_[0];
my @fields = ( $self->{'field'} );
push @fields, $self->{'weight'}
if defined $self->{'weight'};
push @fields, @{ $self->{'by_fields'} }
if defined $self->{'by_fields'};
return \@fields;
}
# Overrides Lire::SimpleStat::create_value
sub create_value {
my ( $self, $group, $row ) = @_;
my %value;
my $name = $self->name();
my $avg;
if ( ! $row->{$name . "_n"} ) {
$avg = 'NaN';
} elsif ( ! $row->{ $name . '_total'} ) {
$avg = '0.00';
} else {
$avg = sprintf( "%.2f", $row->{$name . '_total' }
/ $row->{$name . "_n"});
}
$value{'content'} =
format_numeric_type( $avg, $self->dlf_field()->type() );
$value{'value'} = $avg;
$value{'total'} = $row->{ $name . '_total' };
$value{'n'} = $row->{ $name . '_n' };
$self->set_missing_cases_value( $row, \%value );
return \%value;
}
# Implements Lire::ReportOperator::init_merge
sub init_merge {
my $self = $_[0];
$self->create_avg_ops();
$self->{'sum_op'}->init_merge();
$self->{'n_op'}->init_merge();
return;
}
# Initialize n_op and sum_op
sub create_avg_ops {
my ($self) = @_;
my %common = (
'parent' => $self->parent(),
'report_spec' => $self->report_spec(),
);
if ( $self->field() ) {
my $f = $self->report_spec()->schema()->field( $self->field );
if ( is_numeric_type( $f->type() ) ) {
# Use a sum
$self->{'sum_op'} =
new Lire::Sum( %common,
'name' => $self->name() . ".sum",
'field' => $self->field(),
'weight' => $self->weight(),
);
} else {
# Use a counter
$self->{'sum_op'} =
new Lire::Count( %common, 'fields' => [ $self->field() ],
'name' => $self->name() . ".sum", );
}
} else {
# Use a simple count operation
$self->{'sum_op'} =
new Lire::Count( %common, 'name' => $self->name() . ".sum" );
}
# Special case for a numerical field. This enables
# to compute arbitrary ratio.
# FIXME: A new operator ratio operator should be defined
if ( $self->by_fields() && @{$self->by_fields()} == 1 &&
is_numeric_type( $self->report_spec()->schema()->field( $self->by_fields()->[0])->type()))
{
$self->{'n_op'} =
new Lire::Sum( %common,
'field' => $self->by_fields->[0],
'name' => $self->name() . ".n" );
} else {
$self->{'n_op'} =
new Lire::Count( %common, 'fields' => $self->by_fields(),
'name' => $self->name() . ".n" );
}
}
# Implements Lire::ReportOperator::init_group_data
sub init_group_data {
my ( $self ) = @_;
# Result is held in the first element
return [ "NaN", $self->{'sum_op'}->init_group_data(),
$self->{'n_op'}->init_group_data() ];
}
# Implements Lire::ReportOperator::merge_group_data
sub merge_group_data {
my ( $self, $value, $data ) = @_;
$self->{'sum_op'}->merge_group_data( {value => $value->{'total'}},
$data->[1] );
$self->{'n_op'}->merge_group_data( {value => $value->{'n'}},
$data->[2] );
return;
}
# Implements Lire::ReportOperator::end_group_data
sub end_group_data {
my ( $self, $data ) = @_;
$self->{'sum_op'}->end_group_data( $data->[1] );
$self->{'n_op'}->end_group_data( $data->[2] );
my $sum = group_data_value( $data->[1] );
my $n = group_data_value( $data->[2] );
$data->[0] = sprintf "%.2f", $sum / $n
if $n;
return;
}
# Implements Lire::ReportOperator::end_merge()
sub end_merge {
my $self = $_[0];
$self->{'sum_op'}->end_merge();
$self->{'n_op'}->end_merge();
return;
}
# Implements Lire::Aggregate::data2dlf()
sub data2dlf {
my ($self, $data) = @_;
my $name = $self->name();
return { "$name" => $data->[0],
"${name}_total" => group_data_value( $data->[1] ),
"${name}_n" => group_data_value( $data->[2] ),
"_lr_${name}_mc" => $self->missing_cases( $data ),
};
}
# keep perl happy
1;
__END__
=head1 SEE ALSO
Lire::ReportSpec(3pm), Lire::ReportOperator(3pm),
Lire::Aggregator(3pm), Lire::Aggregate(3pm).
=head1 AUTHORS
Francis J. Lacoste <flacoste@logreport.org>
Wolfgang Sourdeau <wsourdea@logreport.org>
=head1 VERSION
$Id: Average.pm,v 1.16 2006/07/23 13:16:27 vanbaal Exp $
=head1 COPYRIGHT
Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org
This file is part of Lire.
Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.
=cut
syntax highlighted by Code2HTML, v. 0.9.1