package Lire::Rangegroup;
use strict;
use base qw/ Lire::Aggregator /;
use Carp;
use POSIX;
use constant NEGLIGIBLE_QTY => 0.000001;
use Lire::DataTypes qw/ check_bytes check_duration check_number duration2sec size2bytes format_numeric_type is_quantity_type /;
use Lire::Utils qw/ sql_quote_name /;
=pod
=head1 NAME
Lire::Group - Base class for implementation of the rangegroup aggregator
=head1 SYNOPSIS
use Lire::Rangegroup;
=head1 DESCRIPTION
This module is the base class for implementation of the rangegroup
aggregator. This aggregator will split the DLF records based on a
numerical field. The so-called range groups creates numerical classes
(e.g. 1-10, 11-20, etc.) and the DLF record will be grouped with other
records which are in the same class.
=head1 CONSTRUCTOR
=head2 new( %params )
Creates a new instance of a group aggregator. In addition to the
normal report operator parameters, the rangegroup aggregator can take
several parameters:
=over
=item field
This parameter is mandatory. It should be the name of the DLF field
which will be used as key for this aggregator.
=item range-size
This parameter is mandatory. This will be used as the size of each
range group.
=item range-start
The number at which the first range group starts. Defauls to 0.
=item min-value
Value lower than this number will be sorted in the first range group.
=item max-value
Value higher than this number will be sorted in the last range group.
=item size-scale
This parameter can be used to create a logarithmic scale. In this
case, each new range group will be size-scale bigger than the one that
comes before it. For example, setting range-size=5 and size-scale=2
will create ranges like [0-5>, [5-15>, [15-35>, ...
=back
=cut
sub new {
my $proto = shift;
my $class = ref( $proto) || $proto;
my %params = @_;
my $self = bless {}, $class;
$self->SUPER::init( %params, 'op' => "rangegroup" );
croak "missing 'field' attribute"
unless exists $params{'field'};
$self->field( $params{'field'} );
croak "missing 'range-size' attribute"
unless exists $params{'range-size'};
$self->range_size( $params{'range-size'} );
$self->range_start( $params{'range-start'} || 0 );
$self->min_value( $params{'min-value'} )
if exists $params{'min-value'};
$self->max_value( $params{'max-value'} )
if exists $params{'max-value'};
$self->size_scale( $params{'size-scale'} || 1 );
return $self;
}
=pod
=head1 METHODS
=head2 field( [$new_field] )
Returns the name of the DLF field which is used as grouping key.
If the $new_field parameter is set, it changes the grouping field.
This must be the name of a quantity type field in the report
specifications DLF schema.
=cut
sub field {
my ( $self, $field ) = @_;
if (defined $field) {
croak "'$field' isn't a field in the specification's schemas"
unless $self->report_spec()->has_field( $field );
croak "'$field' isn't a bytes, duration, int or number field"
unless is_quantity_type( $self->report_spec()->field( $field )->type() );
$self->{'field'} = $field;
}
return $self->{'field'};
}
#------------------------------------------------------------------------
# Method set_attr_value( $attr_name, $value )
#
# Method used to share the logic of validating the attributes' value.
sub set_attr_value {
my ( $self, $attr_name, $value ) = @_;
my $field = $self->field();
my $type = $self->report_spec()->field( $field )->type();
if ( $value =~ /^\$/ ) {
my $name = substr $value, 1;
croak "$value isn't a defined parameter"
unless $self->report_spec()->has_param( $name );
croak "'$attr_name' attribute: '$value' should be of type $type"
if $self->report_spec()->param( $name )->type() ne $type;
} elsif ( $type eq 'duration' ) {
croak "invalid duration value for '$attr_name' attribute: $value"
unless check_duration( $value );
} elsif ($type eq 'bytes' ) {
croak "invalid bytes value for '$attr_name' attribute: $value"
unless check_bytes( $value );
} else {
croak "invalid numeric value for '$attr_name' attribute: $value"
unless check_number( $value );
}
$attr_name =~ s/-/_/g;
$self->{$attr_name} = $value;
return;
}
=pod
=head2 range_start( [$new_start] )
Returns the number which is the starting bound of the first range group.
If the $new_start parameter is set, it changes the starting bound of the
first range group. This should either be a positive integer or the
name of one of the report specification's parameter.
=cut
sub range_start {
my ( $self, $start ) = @_;
$self->set_attr_value( 'range-start', $start || 0 )
if @_ == 2;
return $self->{'range_start'};
}
=pod
=head2 range_size( [$new_size] )
Returns the width of each range group.
If the $new_size parameter is set, it changes the width of each range
group. This should either be a positive integer or the name of one of
the report specification's parameter.
=cut
sub range_size {
my ( $self, $size ) = @_;
$self->set_attr_value( 'range-size', $size )
if (defined $size );
return $self->{'range_size'};
}
=pod
=head2 min_value( [$new_min] )
Returns the number below which no new range groups will be created.
If the $new_min is set, it changes the lower bound below which no new
groups will be created. This should either be a number or the name of
one of the report specification's parameter.
=cut
sub min_value {
my ( $self, $min ) = @_;
if ( @_ == 2) {
if ( defined $min ) {
$self->set_attr_value( 'min-value', $min );
} else {
delete $self->{'min_value'};
}
}
return $self->{'min_value'};
}
=pod
=head2 max_value( [$new_max] )
Returns the number above which no new range groups will be created.
If the $new_max is set, it changes the upper bound above which no new
groups will be created. This should either be a number or the name of
one of the report specification's parameter.
=cut
sub max_value {
my ( $self, $max ) = @_;
if ( @_ == 2) {
if ( defined $max ) {
$self->set_attr_value( 'max-value', $max );
} else {
delete $self->{'max_value'};
}
}
return $self->{'max_value'};
}
=pod
=head2 size_scale( [$new_size_scale] )
Returns the multiplier that will be apply to each of the range group's
width after the first one.
When this attribute is greater than 1, it creates a logarithmic scale
where each new ranges is $size_scale wider than the precedent one.
If the $new_size_scale parameter is used, it changes the attribute to
this new value. It should be a positive interger or can also be a
parameter reference.
=cut
sub size_scale {
my ( $self, $size_scale ) = @_;
if ( @_ == 2 ) {
if ( defined $size_scale ) {
if ( $size_scale =~ /^\$/ ) {
my $name = substr $size_scale, 1;
croak "$size_scale isn't a defined parameter"
unless $self->report_spec->has_param( $name );
croak "'size-scale' attribute must be a number type"
unless $self->report_spec->param( $name )->type() ne 'number';
} else {
croak "invalid 'size-scale' value. It should be a number"
unless check_number( $size_scale );
}
$self->{'size_scale'} = $size_scale;
} else {
delete $self->{'size_scale'};
}
}
return $self->{'size_scale'};
}
# ------------------------------------------------------------------------
# Method xml_attrs()
#
# Implementation required by Lire::Aggregator
sub xml_attrs {
my ( $self ) = @_;
return qq{field="$self->{'field'}" range-start="$self->{'range_start'}" range-size="$self->{'range_size'}"};
}
# Implements Lire::ReportOperator::name()
sub name {
return 'rangegroup:' . $_[0]->{'field'};
}
# ------------------------------------------------------------------------
# Method create_categorical_info( $info )
#
# Implementation of the method required by Lire::Aggregator
sub create_categorical_info {
my ( $self, $info ) = @_;
my $dlf_field = $self->report_spec()->field( $self->field() );
$info->create_column_info( $self->name(), 'categorical',
$dlf_field->type(), $self->label() );
return;
}
sub build_query {
my ( $self, $query ) = @_;
$self->SUPER::build_query( $query );
my $scale = $self->_param_value( 'size_scale' );
my $func = ( $scale == 1) ? "lr_rangegroup" : "lr_rangegroup_geo";
my @params;
foreach my $p ( qw/range_start range_size size_scale min_value max_value/ )
{
my $value = $self->_param_value( $p );
next if ( $p eq 'size_scale' && $value == 1 );
push @params, defined $value ? $value : 'NULL';
}
$query->add_group_field( $self->name(),
sprintf( '%s(%s,%s)', $func,
sql_quote_name( $self->{'field'} ),
join( ",", @params) ) );
$query->set_sort_spec( $self->name() );
return;
}
sub _param_value {
my ( $self, $param ) = @_;
my $value = $self->{$param};
return undef unless defined $value;
$value = $self->report_spec()->resolve_param_ref( $value );
my $type = $self->report_spec()->field( $self->{'field'} )->type();
if ( $type eq 'bytes' ) {
return size2bytes( $value );
} elsif ( $type eq 'duration' ) {
return duration2sec( $value );
} else {
return $value;
}
return 0;
}
sub create_entry {
my ( $self, $group, $row ) = @_;
my $entry = $group->create_entry();
my $value = $row->{ $self->name() };
unless ( defined $value ) {
$group->missing_cases( $row->{'_lr_nrecords'} );
return undef;
}
my $length = $self->_param_value( 'range_size' );
my $start = $self->_param_value( 'range_start');
my $scale = $self->_param_value( 'size_scale' );
while ( $value < $start || $value >= ( $start + $length ) ) {
$start += $length;
$length *= $scale;
}
my $type = $self->report_spec()->field( $self->{'field'} )->type();
my $content = sprintf( '[%s-%s>', format_numeric_type( $start, $type ),
format_numeric_type( ( $start + $length ), $type ));
$entry->add_name( $content, $value, $length );
return $entry;
}
# Implements Lire::ReportOperator::init_merge()
sub init_merge {
my $self = $_[0];
$self->SUPER::init_merge();
$self->{'_m_min_value'} = $self->_param_value( 'min_value' );
$self->{'_m_max_value'} = $self->_param_value( 'max_value' );
$self->{'_m_size_scale'} = $self->_param_value( 'size_scale' );
die "'size-scale' must be a positive number: $self->{'_m_size_scale'}\n"
unless $self->{'_m_size_scale'} >= 0;
$self->{'_m_range_start'} = $self->_param_value( 'range_start' );
$self->{'_m_range_size'} = $self->_param_value( 'range_size' );
die "'range-size' attribute's value should be positive: $self->{'_m_range_size'}\n"
unless $self->{'_m_range_size'} >= 0;
return;
}
# Implements Lire::Aggregator::init_aggregator_data()
sub init_aggregator_data {
my $self = $_[0];
my $ranges = [];
# Transform the min_value in the range_start parameter
$self->{'_m_range_start'} =
$self->{'_m_min_value'} - $self->{'_m_range_start'}
if ( defined $self->{'_m_min_value'});
if ( defined $self->{'_m_max_value'}) {
my $end_idx;
if ( $self->{'_m_size_scale'} == 1 ) {
$end_idx = int( ($self->{'_m_max_value'} - $self->{'_m_range_start'}) / $self->{'_m_range_size'});
} else {
$end_idx = 0;
while (1) {
my $end = $self->{'_m_range_start'} +
(($self->{'_m_size_scale'} ** $end_idx) * $self->{'_m_range_size'});
last if $self->{'_m_max_value'} < $end;
$end_idx++;
}
}
# Make sure that we have entry until max_value
$ranges->[$end_idx] = undef;
}
return $ranges;
}
# Implements Lire::Aggregator::merge_aggregator_data()
sub merge_aggregator_data {
my ( $self, $group, $ranges ) = @_;
foreach my $e ( $group->entries() ) {
my @names = $e->names();
die "invalid number of names for a rangegroup subreport: ",
scalar @names, "\n"
if @names != 1;
# FIXME: We won't interpolate data. So for merging to succeed
# the whole class must be contained in the new class.
#
# Another possible way to merge is to use the middle of the class
# to determine where the class should be merged. This would be less
# accurate than the current method, but merging would never fail.
my $start = $names[0]{'value'};
my $length = $names[0]{'range'};
# Clamp if necessary
if ( defined $self->{'_m_min_value'}
&& $start < $self->{'_m_min_value'} )
{
croak "incompatible merging parameters: ",
"range is splitted across min value: [$start,",
$start + $length, "> <> $self->{'_m_min_value'}\n"
if $start + $length > $self->{'_m_min_value'};
$start = $self->{'_m_min_value'};
$length = NEGLIGIBLE_QTY;
}
if ( defined $self->{'_m_max_value'}
&& $start > $self->{'_m_max_value'} )
{
$start = $self->{'_m_max_value'};
$length = NEGLIGIBLE_QTY;
}
# Since the $start + $length isn't included in the range, we subtract
# a negligible quantity just to make sure that it falls onto the
# same idx
my ($idx);
if ( $self->{'_m_size_scale'} == 1 ) {
$idx = int( ($start - $self->{'_m_range_start'}) / $self->{'_m_range_size'});
my $eidx = int( (($start + $length) - $self->{'_m_range_start'}
- NEGLIGIBLE_QTY)
/ $self->{'_m_range_size'});
croak "incompatible merge: source range is splitted across ranges:",
"[$start,", $start + $length, "> : start=$self->{'_m_range_start'}; ",
"size=$self->{'_m_range_size'}\n"
if $idx != $eidx;
} else {
$idx = 0;
while (1) {
my $end = $self->{'_m_range_start'} +
(($self->{'_m_size_scale'} ** $idx) * $self->{'_m_range_size'});
last if ( $end > ($start + $length) - NEGLIGIBLE_QTY );
die "incompatible merge: source range is splitted across",
" target ranges: [$start,", $start + $length, "> : ",
"end=$end\n" if $end > $start;
$idx++;
}
}
if ( $idx < 0 ) {
croak "can't reorder ranges when using size-scale != 1. Please set min-value $!"
if $self->{'_m_size_scale'} != 1;
# We have ranges under the first index. Push the ranges to the right.
$self->{'_m_range_start'} = $idx * $self->{'_m_range_size'} + $self->{'_m_range_start'};
@$ranges = ( (undef) x abs($idx), @$ranges );
$idx = 0;
}
my $data = $ranges->[$idx];
unless ( defined $data ) {
$data = [];
my $i = 0;
foreach my $op ( @{$self->ops()} ) {
$data->[$i++] = $op->init_group_data();
}
$ranges->[$idx] = $data;
}
my $i = 0;
foreach my $op ( @{ $self->ops() } ) {
my $value = $e->data_by_name( $op->name );
my $op_data = $data->[$i++];
$op->merge_group_data( $value, $op_data )
if ( $value );
}
}
return;
}
# Implements Lire::Aggregator::end_aggregator_data()
sub end_aggregator_data {
my ( $self, $ranges ) = @_;
# Finalize each ranges
# Either create empty one or call end_group_data on them
my $last_idx = $#$ranges;
my $i = 0;
while ( $i <= $last_idx) {
if ( $ranges->[$i]) {
my $data = $ranges->[$i];
my $j = 0;
foreach my $op ( @{$self->ops} ) {
$op->end_group_data( $data->[$j++] );
}
} else {
my $data = [];
my $j = 0;
foreach my $op ( @{$self->ops} ) {
$data->[$j] = $op->init_group_data();
$op->end_group_data( $data->[$j++] );
}
$ranges->[$i] = $data;
}
$i++;
}
return $self;
}
# Implements Lire::Aggregator::create_group_entries()
sub create_group_entries {
my ( $self, $group, $ranges ) = @_;
for ( my $i=0; $i < @$ranges; $i++ ) {
my $range = $ranges->[$i];
my $start;
if ( $self->{'_m_size_scale'} == 1 ) {
$start = $self->{'_m_range_size'} * $i + $self->{'_m_range_start'};
} else {
if ( $i == 0 ) {
$start = $self->{'_m_range_start'};
} else {
$start = $self->{'_m_range_start'} +
($self->{'_m_size_scale'} ** ($i-1)) * $self->{'_m_range_size'};
}
}
my $row = { $self->name() => $start};
my $entry = $self->create_entry( $group, $row );;
my $j = 0;
foreach my $op ( @{ $self->ops() } ) {
$op->add_entry_value( $entry, $range->[$j++] );
}
}
return;
}
# keep perl happy
1;
__END__
=head1 SEE ALSO
Lire::ReportSpec(3pm), Lire::Group(3pm), Lire::ReportOperator(3pm),
Lire::Aggregator(3pm)
=head1 AUTHORS
Francis J. Lacoste <flacoste@logreport.org>
Wolfgang Sourdeau <wsourdeau@logreport.org>
=head1 VERSION
$Id: Rangegroup.pm,v 1.23 2006/07/23 13:16:29 vanbaal Exp $
=head1 COPYRIGHT
Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org
This file is part of Lire.
Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.
=cut
syntax highlighted by Code2HTML, v. 0.9.1