package Lire::Average; use strict; use base qw/ Lire::SimpleStat /; use Carp; use Lire::DataTypes qw/ is_numeric_type format_numeric_type /; use Lire::Utils qw/ sql_quote_name check_object_param/; use Lire::ReportOperator qw/group_data_value/; use Lire::Count; use Lire::Sum; =pod =head1 NAME Lire::Average =head1 SYNOPSIS FIXME =head1 DESCRIPTION Class that implements the avg operator. This operator will compute the average value of a DLF field in a group of DLF records. If the field used by average isn't a numeric fields, the number of different values in that field will be used as the numerator. It's possible to compute a weighted average by specifying a weighting field. By default, the average is a by record average. It's possible to compute a by different values average. The list of fields that's used to determine the different values if speficied in the by-fields attribute. =head1 METHODS =head2 new( %params ) Creates a new Lire::Average object. In addition to the values supported by its parents, the by-fields and weight attributes will be initialized to the values specified in the %params argument. =cut sub new { my $self = bless {}, shift; $self->init( @_, 'op' => "avg" ); my %args = @_; $self->by_fields( $args{'by-fields'} ) if exists $args{'by-fields'}; $self->weight( $args{'weight'} ) if exists $args{'weight'}; return $self; } #------------------------------------------------------------------------ # Method field( [$field] # # Overrides Lire::SimpleStat one since the field doesn't have # to be numeric. sub field { my ($self, $field ) = @_; if ( @_ == 2 ) { if ( defined $field ) { croak "'$field' isn't a defined field in the specification's schemas" unless $self->report_spec()->has_field( $field ); } $self->{'field'} = $field; } $self->{'field'}; } =pod =head2 by_fields( [$new_by_fields] ) Returns the fields that are going to be used to count the different values which will made up the by part of the average. This a reference to an array of DLF field names. If the $new_by_fields parameter is set, it will be used as the new $by_fields value. It must be an array reference and should only contains valid field names for the current report specification's schema. =cut sub by_fields { my ($self, $by_fields) = @_; if ( @_ == 2 ) { if ( defined $by_fields ) { check_object_param( $by_fields, 'by_fields', 'ARRAY' ); foreach my $f ( @$by_fields ) { croak "$f isn't a defined field in the specification's schemas" unless $self->report_spec()->has_field( $f ); } } $self->{'by_fields'} = $by_fields; } $self->{'by_fields'}; } =pod =head2 weigth( [$new_weight] ) Returns the DLF field's name by which the values will be multiplied before being summed. You can change the weight field by specifying a new name as the $new_weight parameter. Use undef to remove the use of a weighting field. =cut sub weight { my ($self, $weight) = @_; if ( @_ == 2 ) { if ( defined $weight ) { croak "'$weight' isn't a defined field in the specification's schemas" unless $self->report_spec()->has_field( $weight ); croak "$weight isn't a numeric type", unless is_numeric_type( $self->report_spec()->field( $weight )->type() ); } $self->{'weight'} = $weight; } $self->{'weight'}; } #------------------------------------------------------------------------ # Method xml_attrs() # # Implementation needed by Lire::Aggregate sub xml_attrs { my ($self) = @_; my $attr = $self->SUPER::xml_attrs; $attr .= qq{ weight="$self->{'weight'}"} if defined $self->{'weight'}; if ( exists $self->{'by_fields'} ) { my $by_fields = join " ", @{$self->{'by_fields'}}; $attr .= qq{ by-fields="$by_fields"}; } $attr; } # Overrides Lire::Aggregate::build_query sub build_query { my ( $self, $query ) = @_; $self->_build_avg_query( $query ); $self->set_missing_cases_aggr_expr( $query ); } sub _build_avg_query { my ( $self, $query ) = @_; my $sum_expr; if ( defined $self->{'weight' }) { $sum_expr = sprintf( 'sum(%s*%s)', sql_quote_name( $self->{'field'} ), sql_quote_name( $self->{'weight'} ) ); } else { $sum_expr = sprintf( 'sum(%s)', sql_quote_name( $self->{'field'} ) ); } $query->add_aggr_field( $self->{'name'} . "_total", $sum_expr ); my $n_expr; if ( !defined $self->{'by_fields'} ) { $n_expr = 'count(*)'; } elsif ( @{$self->{'by_fields'}} == 1 && is_numeric_type( $self->report_spec()->field( $self->{'by_fields'}[0] )->type() ) ) { $n_expr = sprintf( 'sum(%s)', sql_quote_name($self->{'by_fields'}[0])); } else { my @fields = map { sql_quote_name( $_ ) } @{ $self->{'by_fields'} }; $n_expr = sprintf( 'lr_count_uniq(%s)', join (',', @fields) ); } $query->add_aggr_field( $self->{'name'} . "_n", $n_expr ); $query->add_aggr_field( $self->{'name'}, sprintf( '%s/%s', $sum_expr, $n_expr ) ); return; } # Overrides Lire::SimpleStat::sql_required_fields sub sql_required_fields { my $self = $_[0]; my @fields = ( $self->{'field'} ); push @fields, $self->{'weight'} if defined $self->{'weight'}; push @fields, @{ $self->{'by_fields'} } if defined $self->{'by_fields'}; return \@fields; } # Overrides Lire::SimpleStat::create_value sub create_value { my ( $self, $group, $row ) = @_; my %value; my $name = $self->name(); my $avg; if ( ! $row->{$name . "_n"} ) { $avg = 'NaN'; } elsif ( ! $row->{ $name . '_total'} ) { $avg = '0.00'; } else { $avg = sprintf( "%.2f", $row->{$name . '_total' } / $row->{$name . "_n"}); } $value{'content'} = format_numeric_type( $avg, $self->dlf_field()->type() ); $value{'value'} = $avg; $value{'total'} = $row->{ $name . '_total' }; $value{'n'} = $row->{ $name . '_n' }; $self->set_missing_cases_value( $row, \%value ); return \%value; } # Implements Lire::ReportOperator::init_merge sub init_merge { my $self = $_[0]; $self->create_avg_ops(); $self->{'sum_op'}->init_merge(); $self->{'n_op'}->init_merge(); return; } # Initialize n_op and sum_op sub create_avg_ops { my ($self) = @_; my %common = ( 'parent' => $self->parent(), 'report_spec' => $self->report_spec(), ); if ( $self->field() ) { my $f = $self->report_spec()->schema()->field( $self->field ); if ( is_numeric_type( $f->type() ) ) { # Use a sum $self->{'sum_op'} = new Lire::Sum( %common, 'name' => $self->name() . ".sum", 'field' => $self->field(), 'weight' => $self->weight(), ); } else { # Use a counter $self->{'sum_op'} = new Lire::Count( %common, 'fields' => [ $self->field() ], 'name' => $self->name() . ".sum", ); } } else { # Use a simple count operation $self->{'sum_op'} = new Lire::Count( %common, 'name' => $self->name() . ".sum" ); } # Special case for a numerical field. This enables # to compute arbitrary ratio. # FIXME: A new operator ratio operator should be defined if ( $self->by_fields() && @{$self->by_fields()} == 1 && is_numeric_type( $self->report_spec()->schema()->field( $self->by_fields()->[0])->type())) { $self->{'n_op'} = new Lire::Sum( %common, 'field' => $self->by_fields->[0], 'name' => $self->name() . ".n" ); } else { $self->{'n_op'} = new Lire::Count( %common, 'fields' => $self->by_fields(), 'name' => $self->name() . ".n" ); } } # Implements Lire::ReportOperator::init_group_data sub init_group_data { my ( $self ) = @_; # Result is held in the first element return [ "NaN", $self->{'sum_op'}->init_group_data(), $self->{'n_op'}->init_group_data() ]; } # Implements Lire::ReportOperator::merge_group_data sub merge_group_data { my ( $self, $value, $data ) = @_; $self->{'sum_op'}->merge_group_data( {value => $value->{'total'}}, $data->[1] ); $self->{'n_op'}->merge_group_data( {value => $value->{'n'}}, $data->[2] ); return; } # Implements Lire::ReportOperator::end_group_data sub end_group_data { my ( $self, $data ) = @_; $self->{'sum_op'}->end_group_data( $data->[1] ); $self->{'n_op'}->end_group_data( $data->[2] ); my $sum = group_data_value( $data->[1] ); my $n = group_data_value( $data->[2] ); $data->[0] = sprintf "%.2f", $sum / $n if $n; return; } # Implements Lire::ReportOperator::end_merge() sub end_merge { my $self = $_[0]; $self->{'sum_op'}->end_merge(); $self->{'n_op'}->end_merge(); return; } # Implements Lire::Aggregate::data2dlf() sub data2dlf { my ($self, $data) = @_; my $name = $self->name(); return { "$name" => $data->[0], "${name}_total" => group_data_value( $data->[1] ), "${name}_n" => group_data_value( $data->[2] ), "_lr_${name}_mc" => $self->missing_cases( $data ), }; } # keep perl happy 1; __END__ =head1 SEE ALSO Lire::ReportSpec(3pm), Lire::ReportOperator(3pm), Lire::Aggregator(3pm), Lire::Aggregate(3pm). =head1 AUTHORS Francis J. Lacoste Wolfgang Sourdeau =head1 VERSION $Id: Average.pm,v 1.16 2006/07/23 13:16:27 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut