package Lire::Rangegroup; use strict; use base qw/ Lire::Aggregator /; use Carp; use POSIX; use constant NEGLIGIBLE_QTY => 0.000001; use Lire::DataTypes qw/ check_bytes check_duration check_number duration2sec size2bytes format_numeric_type is_quantity_type /; use Lire::Utils qw/ sql_quote_name /; =pod =head1 NAME Lire::Group - Base class for implementation of the rangegroup aggregator =head1 SYNOPSIS use Lire::Rangegroup; =head1 DESCRIPTION This module is the base class for implementation of the rangegroup aggregator. This aggregator will split the DLF records based on a numerical field. The so-called range groups creates numerical classes (e.g. 1-10, 11-20, etc.) and the DLF record will be grouped with other records which are in the same class. =head1 CONSTRUCTOR =head2 new( %params ) Creates a new instance of a group aggregator. In addition to the normal report operator parameters, the rangegroup aggregator can take several parameters: =over =item field This parameter is mandatory. It should be the name of the DLF field which will be used as key for this aggregator. =item range-size This parameter is mandatory. This will be used as the size of each range group. =item range-start The number at which the first range group starts. Defauls to 0. =item min-value Value lower than this number will be sorted in the first range group. =item max-value Value higher than this number will be sorted in the last range group. =item size-scale This parameter can be used to create a logarithmic scale. In this case, each new range group will be size-scale bigger than the one that comes before it. For example, setting range-size=5 and size-scale=2 will create ranges like [0-5>, [5-15>, [15-35>, ... =back =cut sub new { my $proto = shift; my $class = ref( $proto) || $proto; my %params = @_; my $self = bless {}, $class; $self->SUPER::init( %params, 'op' => "rangegroup" ); croak "missing 'field' attribute" unless exists $params{'field'}; $self->field( $params{'field'} ); croak "missing 'range-size' attribute" unless exists $params{'range-size'}; $self->range_size( $params{'range-size'} ); $self->range_start( $params{'range-start'} || 0 ); $self->min_value( $params{'min-value'} ) if exists $params{'min-value'}; $self->max_value( $params{'max-value'} ) if exists $params{'max-value'}; $self->size_scale( $params{'size-scale'} || 1 ); return $self; } =pod =head1 METHODS =head2 field( [$new_field] ) Returns the name of the DLF field which is used as grouping key. If the $new_field parameter is set, it changes the grouping field. This must be the name of a quantity type field in the report specifications DLF schema. =cut sub field { my ( $self, $field ) = @_; if (defined $field) { croak "'$field' isn't a field in the specification's schemas" unless $self->report_spec()->has_field( $field ); croak "'$field' isn't a bytes, duration, int or number field" unless is_quantity_type( $self->report_spec()->field( $field )->type() ); $self->{'field'} = $field; } return $self->{'field'}; } #------------------------------------------------------------------------ # Method set_attr_value( $attr_name, $value ) # # Method used to share the logic of validating the attributes' value. sub set_attr_value { my ( $self, $attr_name, $value ) = @_; my $field = $self->field(); my $type = $self->report_spec()->field( $field )->type(); if ( $value =~ /^\$/ ) { my $name = substr $value, 1; croak "$value isn't a defined parameter" unless $self->report_spec()->has_param( $name ); croak "'$attr_name' attribute: '$value' should be of type $type" if $self->report_spec()->param( $name )->type() ne $type; } elsif ( $type eq 'duration' ) { croak "invalid duration value for '$attr_name' attribute: $value" unless check_duration( $value ); } elsif ($type eq 'bytes' ) { croak "invalid bytes value for '$attr_name' attribute: $value" unless check_bytes( $value ); } else { croak "invalid numeric value for '$attr_name' attribute: $value" unless check_number( $value ); } $attr_name =~ s/-/_/g; $self->{$attr_name} = $value; return; } =pod =head2 range_start( [$new_start] ) Returns the number which is the starting bound of the first range group. If the $new_start parameter is set, it changes the starting bound of the first range group. This should either be a positive integer or the name of one of the report specification's parameter. =cut sub range_start { my ( $self, $start ) = @_; $self->set_attr_value( 'range-start', $start || 0 ) if @_ == 2; return $self->{'range_start'}; } =pod =head2 range_size( [$new_size] ) Returns the width of each range group. If the $new_size parameter is set, it changes the width of each range group. This should either be a positive integer or the name of one of the report specification's parameter. =cut sub range_size { my ( $self, $size ) = @_; $self->set_attr_value( 'range-size', $size ) if (defined $size ); return $self->{'range_size'}; } =pod =head2 min_value( [$new_min] ) Returns the number below which no new range groups will be created. If the $new_min is set, it changes the lower bound below which no new groups will be created. This should either be a number or the name of one of the report specification's parameter. =cut sub min_value { my ( $self, $min ) = @_; if ( @_ == 2) { if ( defined $min ) { $self->set_attr_value( 'min-value', $min ); } else { delete $self->{'min_value'}; } } return $self->{'min_value'}; } =pod =head2 max_value( [$new_max] ) Returns the number above which no new range groups will be created. If the $new_max is set, it changes the upper bound above which no new groups will be created. This should either be a number or the name of one of the report specification's parameter. =cut sub max_value { my ( $self, $max ) = @_; if ( @_ == 2) { if ( defined $max ) { $self->set_attr_value( 'max-value', $max ); } else { delete $self->{'max_value'}; } } return $self->{'max_value'}; } =pod =head2 size_scale( [$new_size_scale] ) Returns the multiplier that will be apply to each of the range group's width after the first one. When this attribute is greater than 1, it creates a logarithmic scale where each new ranges is $size_scale wider than the precedent one. If the $new_size_scale parameter is used, it changes the attribute to this new value. It should be a positive interger or can also be a parameter reference. =cut sub size_scale { my ( $self, $size_scale ) = @_; if ( @_ == 2 ) { if ( defined $size_scale ) { if ( $size_scale =~ /^\$/ ) { my $name = substr $size_scale, 1; croak "$size_scale isn't a defined parameter" unless $self->report_spec->has_param( $name ); croak "'size-scale' attribute must be a number type" unless $self->report_spec->param( $name )->type() ne 'number'; } else { croak "invalid 'size-scale' value. It should be a number" unless check_number( $size_scale ); } $self->{'size_scale'} = $size_scale; } else { delete $self->{'size_scale'}; } } return $self->{'size_scale'}; } # ------------------------------------------------------------------------ # Method xml_attrs() # # Implementation required by Lire::Aggregator sub xml_attrs { my ( $self ) = @_; return qq{field="$self->{'field'}" range-start="$self->{'range_start'}" range-size="$self->{'range_size'}"}; } # Implements Lire::ReportOperator::name() sub name { return 'rangegroup:' . $_[0]->{'field'}; } # ------------------------------------------------------------------------ # Method create_categorical_info( $info ) # # Implementation of the method required by Lire::Aggregator sub create_categorical_info { my ( $self, $info ) = @_; my $dlf_field = $self->report_spec()->field( $self->field() ); $info->create_column_info( $self->name(), 'categorical', $dlf_field->type(), $self->label() ); return; } sub build_query { my ( $self, $query ) = @_; $self->SUPER::build_query( $query ); my $scale = $self->_param_value( 'size_scale' ); my $func = ( $scale == 1) ? "lr_rangegroup" : "lr_rangegroup_geo"; my @params; foreach my $p ( qw/range_start range_size size_scale min_value max_value/ ) { my $value = $self->_param_value( $p ); next if ( $p eq 'size_scale' && $value == 1 ); push @params, defined $value ? $value : 'NULL'; } $query->add_group_field( $self->name(), sprintf( '%s(%s,%s)', $func, sql_quote_name( $self->{'field'} ), join( ",", @params) ) ); $query->set_sort_spec( $self->name() ); return; } sub _param_value { my ( $self, $param ) = @_; my $value = $self->{$param}; return undef unless defined $value; $value = $self->report_spec()->resolve_param_ref( $value ); my $type = $self->report_spec()->field( $self->{'field'} )->type(); if ( $type eq 'bytes' ) { return size2bytes( $value ); } elsif ( $type eq 'duration' ) { return duration2sec( $value ); } else { return $value; } return 0; } sub create_entry { my ( $self, $group, $row ) = @_; my $entry = $group->create_entry(); my $value = $row->{ $self->name() }; unless ( defined $value ) { $group->missing_cases( $row->{'_lr_nrecords'} ); return undef; } my $length = $self->_param_value( 'range_size' ); my $start = $self->_param_value( 'range_start'); my $scale = $self->_param_value( 'size_scale' ); while ( $value < $start || $value >= ( $start + $length ) ) { $start += $length; $length *= $scale; } my $type = $self->report_spec()->field( $self->{'field'} )->type(); my $content = sprintf( '[%s-%s>', format_numeric_type( $start, $type ), format_numeric_type( ( $start + $length ), $type )); $entry->add_name( $content, $value, $length ); return $entry; } # Implements Lire::ReportOperator::init_merge() sub init_merge { my $self = $_[0]; $self->SUPER::init_merge(); $self->{'_m_min_value'} = $self->_param_value( 'min_value' ); $self->{'_m_max_value'} = $self->_param_value( 'max_value' ); $self->{'_m_size_scale'} = $self->_param_value( 'size_scale' ); die "'size-scale' must be a positive number: $self->{'_m_size_scale'}\n" unless $self->{'_m_size_scale'} >= 0; $self->{'_m_range_start'} = $self->_param_value( 'range_start' ); $self->{'_m_range_size'} = $self->_param_value( 'range_size' ); die "'range-size' attribute's value should be positive: $self->{'_m_range_size'}\n" unless $self->{'_m_range_size'} >= 0; return; } # Implements Lire::Aggregator::init_aggregator_data() sub init_aggregator_data { my $self = $_[0]; my $ranges = []; # Transform the min_value in the range_start parameter $self->{'_m_range_start'} = $self->{'_m_min_value'} - $self->{'_m_range_start'} if ( defined $self->{'_m_min_value'}); if ( defined $self->{'_m_max_value'}) { my $end_idx; if ( $self->{'_m_size_scale'} == 1 ) { $end_idx = int( ($self->{'_m_max_value'} - $self->{'_m_range_start'}) / $self->{'_m_range_size'}); } else { $end_idx = 0; while (1) { my $end = $self->{'_m_range_start'} + (($self->{'_m_size_scale'} ** $end_idx) * $self->{'_m_range_size'}); last if $self->{'_m_max_value'} < $end; $end_idx++; } } # Make sure that we have entry until max_value $ranges->[$end_idx] = undef; } return $ranges; } # Implements Lire::Aggregator::merge_aggregator_data() sub merge_aggregator_data { my ( $self, $group, $ranges ) = @_; foreach my $e ( $group->entries() ) { my @names = $e->names(); die "invalid number of names for a rangegroup subreport: ", scalar @names, "\n" if @names != 1; # FIXME: We won't interpolate data. So for merging to succeed # the whole class must be contained in the new class. # # Another possible way to merge is to use the middle of the class # to determine where the class should be merged. This would be less # accurate than the current method, but merging would never fail. my $start = $names[0]{'value'}; my $length = $names[0]{'range'}; # Clamp if necessary if ( defined $self->{'_m_min_value'} && $start < $self->{'_m_min_value'} ) { croak "incompatible merging parameters: ", "range is splitted across min value: [$start,", $start + $length, "> <> $self->{'_m_min_value'}\n" if $start + $length > $self->{'_m_min_value'}; $start = $self->{'_m_min_value'}; $length = NEGLIGIBLE_QTY; } if ( defined $self->{'_m_max_value'} && $start > $self->{'_m_max_value'} ) { $start = $self->{'_m_max_value'}; $length = NEGLIGIBLE_QTY; } # Since the $start + $length isn't included in the range, we subtract # a negligible quantity just to make sure that it falls onto the # same idx my ($idx); if ( $self->{'_m_size_scale'} == 1 ) { $idx = int( ($start - $self->{'_m_range_start'}) / $self->{'_m_range_size'}); my $eidx = int( (($start + $length) - $self->{'_m_range_start'} - NEGLIGIBLE_QTY) / $self->{'_m_range_size'}); croak "incompatible merge: source range is splitted across ranges:", "[$start,", $start + $length, "> : start=$self->{'_m_range_start'}; ", "size=$self->{'_m_range_size'}\n" if $idx != $eidx; } else { $idx = 0; while (1) { my $end = $self->{'_m_range_start'} + (($self->{'_m_size_scale'} ** $idx) * $self->{'_m_range_size'}); last if ( $end > ($start + $length) - NEGLIGIBLE_QTY ); die "incompatible merge: source range is splitted across", " target ranges: [$start,", $start + $length, "> : ", "end=$end\n" if $end > $start; $idx++; } } if ( $idx < 0 ) { croak "can't reorder ranges when using size-scale != 1. Please set min-value $!" if $self->{'_m_size_scale'} != 1; # We have ranges under the first index. Push the ranges to the right. $self->{'_m_range_start'} = $idx * $self->{'_m_range_size'} + $self->{'_m_range_start'}; @$ranges = ( (undef) x abs($idx), @$ranges ); $idx = 0; } my $data = $ranges->[$idx]; unless ( defined $data ) { $data = []; my $i = 0; foreach my $op ( @{$self->ops()} ) { $data->[$i++] = $op->init_group_data(); } $ranges->[$idx] = $data; } my $i = 0; foreach my $op ( @{ $self->ops() } ) { my $value = $e->data_by_name( $op->name ); my $op_data = $data->[$i++]; $op->merge_group_data( $value, $op_data ) if ( $value ); } } return; } # Implements Lire::Aggregator::end_aggregator_data() sub end_aggregator_data { my ( $self, $ranges ) = @_; # Finalize each ranges # Either create empty one or call end_group_data on them my $last_idx = $#$ranges; my $i = 0; while ( $i <= $last_idx) { if ( $ranges->[$i]) { my $data = $ranges->[$i]; my $j = 0; foreach my $op ( @{$self->ops} ) { $op->end_group_data( $data->[$j++] ); } } else { my $data = []; my $j = 0; foreach my $op ( @{$self->ops} ) { $data->[$j] = $op->init_group_data(); $op->end_group_data( $data->[$j++] ); } $ranges->[$i] = $data; } $i++; } return $self; } # Implements Lire::Aggregator::create_group_entries() sub create_group_entries { my ( $self, $group, $ranges ) = @_; for ( my $i=0; $i < @$ranges; $i++ ) { my $range = $ranges->[$i]; my $start; if ( $self->{'_m_size_scale'} == 1 ) { $start = $self->{'_m_range_size'} * $i + $self->{'_m_range_start'}; } else { if ( $i == 0 ) { $start = $self->{'_m_range_start'}; } else { $start = $self->{'_m_range_start'} + ($self->{'_m_size_scale'} ** ($i-1)) * $self->{'_m_range_size'}; } } my $row = { $self->name() => $start}; my $entry = $self->create_entry( $group, $row );; my $j = 0; foreach my $op ( @{ $self->ops() } ) { $op->add_entry_value( $entry, $range->[$j++] ); } } return; } # keep perl happy 1; __END__ =head1 SEE ALSO Lire::ReportSpec(3pm), Lire::Group(3pm), Lire::ReportOperator(3pm), Lire::Aggregator(3pm) =head1 AUTHORS Francis J. Lacoste Wolfgang Sourdeau =head1 VERSION $Id: Rangegroup.pm,v 1.23 2006/07/23 13:16:29 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut