package Lire::Rangegroup;

use strict;

use base qw/ Lire::Aggregator /;

use Carp;
use POSIX;

use constant NEGLIGIBLE_QTY => 0.000001;

use Lire::DataTypes qw/ check_bytes check_duration check_number duration2sec size2bytes format_numeric_type is_quantity_type /;
use Lire::Utils qw/ sql_quote_name /;

=pod

=head1 NAME

Lire::Group - Base class for implementation of the rangegroup aggregator

=head1 SYNOPSIS

    use Lire::Rangegroup;

=head1 DESCRIPTION

This module is the base class for implementation of the rangegroup
aggregator. This aggregator will split the DLF records based on a
numerical field. The so-called range groups creates numerical classes
(e.g. 1-10, 11-20, etc.) and the DLF record will be grouped with other
records which are in the same class.

=head1 CONSTRUCTOR

=head2 new( %params )

Creates a new instance of a group aggregator. In addition to the
normal report operator parameters, the rangegroup aggregator can take
several parameters:

=over

=item field

This parameter is mandatory. It should be the name of the DLF field
which will be used as key for this aggregator.

=item range-size

This parameter is mandatory. This will be used as the size of each
range group.

=item range-start

The number at which the first range group starts. Defauls to 0.

=item min-value

Value lower than this number will be sorted in the first range group.

=item max-value

Value higher than this number will be sorted in the last range group.

=item size-scale

This parameter can be used to create a logarithmic scale. In this
case, each new range group will be size-scale bigger than the one that
comes before it. For example, setting range-size=5 and size-scale=2
will create ranges like [0-5>, [5-15>, [15-35>, ...

=back

=cut

sub new {
    my $proto = shift;
    my $class = ref( $proto) || $proto;

    my %params = @_;
    my $self = bless {}, $class;
    $self->SUPER::init( %params, 'op' => "rangegroup" );

    croak "missing 'field' attribute"
      unless exists $params{'field'};
    $self->field( $params{'field'} );

    croak "missing 'range-size' attribute"
      unless exists $params{'range-size'};
    $self->range_size( $params{'range-size'} );

    $self->range_start( $params{'range-start'} || 0 );

    $self->min_value( $params{'min-value'} )
      if exists $params{'min-value'};
    $self->max_value( $params{'max-value'} )
      if exists $params{'max-value'};

    $self->size_scale( $params{'size-scale'} || 1 );

    return $self;
}

=pod

=head1 METHODS

=head2 field( [$new_field] )

Returns the name of the DLF field which is used as grouping key.

If the $new_field parameter is set, it changes the grouping field.
This must be the name of a quantity type field in the report
specifications DLF schema.

=cut

sub field {
    my ( $self, $field ) = @_;

    if (defined $field) {
	croak "'$field' isn't a field in the specification's schemas"
	  unless $self->report_spec()->has_field( $field );

	croak "'$field' isn't a bytes, duration, int or number field"
	  unless is_quantity_type( $self->report_spec()->field( $field )->type() );

	$self->{'field'} = $field;
    }

    return $self->{'field'};
}

#------------------------------------------------------------------------
# Method set_attr_value( $attr_name, $value )
#
# Method used to share the logic of validating the attributes' value.
sub set_attr_value {
    my ( $self, $attr_name, $value ) = @_;

    my $field = $self->field();
    my $type  = $self->report_spec()->field( $field )->type();
    if ( $value =~ /^\$/ ) {
	my $name = substr $value, 1;
	    croak "$value isn't a defined parameter"
	      unless $self->report_spec()->has_param( $name );

	croak "'$attr_name' attribute: '$value' should be of type $type"
	  if $self->report_spec()->param( $name )->type() ne $type;

    } elsif ( $type eq 'duration' ) {
	croak "invalid duration value for '$attr_name' attribute: $value"
	  unless check_duration( $value );
    } elsif ($type eq 'bytes' ) {
	croak "invalid bytes value for '$attr_name' attribute: $value"
	  unless check_bytes( $value );
    } else {
	croak "invalid numeric value for '$attr_name' attribute: $value"
	  unless check_number( $value );
    }
    $attr_name =~ s/-/_/g;

    $self->{$attr_name} = $value;

    return;
}

=pod

=head2 range_start( [$new_start] )

Returns the number which is the starting bound of the first range group.

If the $new_start parameter is set, it changes the starting bound of the
first range group. This should either be a positive integer or the
name of one of the report specification's parameter.

=cut

sub range_start {
    my ( $self, $start ) = @_;

    $self->set_attr_value( 'range-start', $start || 0 )
      if @_ == 2;

    return $self->{'range_start'};
}

=pod

=head2 range_size( [$new_size] )

Returns the width of each range group.

If the $new_size parameter is set, it changes the width of each range
group. This should either be a positive integer or the name of one of
the report specification's parameter.

=cut

sub range_size {
    my ( $self, $size ) = @_;

    $self->set_attr_value( 'range-size', $size )
      if (defined $size );

    return $self->{'range_size'};
}

=pod

=head2 min_value( [$new_min] )

Returns the number below which no new range groups will be created.

If the $new_min is set, it changes the lower bound below which no new
groups will be created. This should either be a number or the name of
one of the report specification's parameter.

=cut

sub min_value {
    my ( $self, $min ) = @_;

    if ( @_ == 2) {
	if ( defined $min ) {
	    $self->set_attr_value( 'min-value', $min );
	} else {
	    delete $self->{'min_value'};
	}
    }

    return $self->{'min_value'};
}

=pod

=head2 max_value( [$new_max] )

Returns the number above which no new range groups will be created.

If the $new_max is set, it changes the upper bound above which no new
groups will be created. This should either be a number or the name of
one of the report specification's parameter.

=cut

sub max_value {
    my ( $self, $max ) = @_;

    if ( @_ == 2) {
	if ( defined $max ) {
	    $self->set_attr_value( 'max-value', $max );
	} else {
	    delete $self->{'max_value'};
	}
    }

    return $self->{'max_value'};
}

=pod

=head2 size_scale( [$new_size_scale] )

Returns the multiplier that will be apply to each of the range group's
width after the first one.

When this attribute is greater than 1, it creates a logarithmic scale
where each new ranges is $size_scale wider than the precedent one.

If the $new_size_scale parameter is used, it changes the attribute to
this new value. It should be a positive interger or can also be a
parameter reference.

=cut

sub size_scale {
    my ( $self, $size_scale ) = @_;

    if ( @_ == 2 ) {
	if ( defined $size_scale ) {
	    if ( $size_scale =~ /^\$/ ) {
		my $name = substr $size_scale, 1;
		croak "$size_scale isn't a defined parameter"
		  unless $self->report_spec->has_param( $name );

		croak "'size-scale' attribute must be a number type"
		  unless $self->report_spec->param( $name )->type() ne 'number';

	    } else {
		croak "invalid 'size-scale' value. It should be a number"
		  unless check_number( $size_scale );
	    }
	    $self->{'size_scale'} = $size_scale;
	} else {
	    delete $self->{'size_scale'};
	}
    }

    return $self->{'size_scale'};
}



# ------------------------------------------------------------------------
# Method xml_attrs()
#
# Implementation required by Lire::Aggregator
sub xml_attrs {
    my ( $self ) = @_;

    return qq{field="$self->{'field'}" range-start="$self->{'range_start'}" range-size="$self->{'range_size'}"};
}

# Implements Lire::ReportOperator::name()
sub name {
    return 'rangegroup:' . $_[0]->{'field'};
}

# ------------------------------------------------------------------------
# Method create_categorical_info( $info )
#
# Implementation of the method required by Lire::Aggregator
sub create_categorical_info {
    my ( $self, $info ) = @_;

    my $dlf_field = $self->report_spec()->field( $self->field() );
    $info->create_column_info( $self->name(), 'categorical',
                               $dlf_field->type(), $self->label() );

    return;
}

sub build_query {
    my ( $self, $query ) = @_;

    $self->SUPER::build_query( $query );

    my $scale = $self->_param_value( 'size_scale' );
    my $func = ( $scale  == 1) ? "lr_rangegroup" : "lr_rangegroup_geo";

    my @params;
    foreach my $p ( qw/range_start range_size size_scale min_value max_value/ )
    {
        my $value = $self->_param_value( $p );
        next if ( $p eq 'size_scale' && $value == 1 );
        push @params, defined $value ? $value : 'NULL';
    }

    $query->add_group_field( $self->name(),
                             sprintf( '%s(%s,%s)', $func,
                                      sql_quote_name( $self->{'field'} ),
                                      join( ",", @params) ) );
    $query->set_sort_spec( $self->name() );

    return;
}

sub _param_value {
    my ( $self, $param ) = @_;

    my $value = $self->{$param};
    return undef unless defined $value;

    $value = $self->report_spec()->resolve_param_ref( $value );

    my $type = $self->report_spec()->field( $self->{'field'} )->type();

    if ( $type eq 'bytes' ) {
        return size2bytes( $value );
    } elsif ( $type eq 'duration' ) {
        return duration2sec( $value );
    } else {
        return $value;
    }

    return 0;
}

sub create_entry {
    my ( $self, $group, $row ) = @_;

    my $entry = $group->create_entry();
    my $value = $row->{ $self->name() };

    unless ( defined $value ) {
        $group->missing_cases( $row->{'_lr_nrecords'} );
        return undef;
    }

    my $length = $self->_param_value( 'range_size' );
    my $start = $self->_param_value( 'range_start');
    my $scale = $self->_param_value( 'size_scale' );


    while ( $value < $start || $value >= ( $start + $length ) ) {
        $start += $length;
        $length *= $scale;
    }

    my $type = $self->report_spec()->field( $self->{'field'} )->type();
    my $content = sprintf( '[%s-%s>', format_numeric_type( $start, $type ),
                           format_numeric_type( ( $start + $length ), $type ));
    $entry->add_name( $content, $value, $length );

    return $entry;
}

# Implements Lire::ReportOperator::init_merge()
sub init_merge {
    my $self = $_[0];

    $self->SUPER::init_merge();

    $self->{'_m_min_value'} = $self->_param_value( 'min_value' );
    $self->{'_m_max_value'} = $self->_param_value( 'max_value' );
    $self->{'_m_size_scale'} = $self->_param_value( 'size_scale' );
    die "'size-scale' must be a positive number: $self->{'_m_size_scale'}\n"
      unless $self->{'_m_size_scale'} >= 0;
    $self->{'_m_range_start'} = $self->_param_value( 'range_start' );
    $self->{'_m_range_size'} = $self->_param_value( 'range_size' );
    die "'range-size' attribute's value should be positive: $self->{'_m_range_size'}\n"
      unless $self->{'_m_range_size'} >= 0;

    return;
}

# Implements Lire::Aggregator::init_aggregator_data()
sub init_aggregator_data {
    my $self = $_[0];

    my $ranges = [];

    # Transform the min_value in the range_start parameter
    $self->{'_m_range_start'} =
      $self->{'_m_min_value'} - $self->{'_m_range_start'}
      if ( defined $self->{'_m_min_value'});

    if ( defined $self->{'_m_max_value'}) {
	my $end_idx;
	if ( $self->{'_m_size_scale'} == 1 ) {
	    $end_idx   = int( ($self->{'_m_max_value'} - $self->{'_m_range_start'}) / $self->{'_m_range_size'});
	} else {
	    $end_idx = 0;
	    while (1) {
		my $end = $self->{'_m_range_start'} +
		  (($self->{'_m_size_scale'} ** $end_idx) * $self->{'_m_range_size'});

		last if $self->{'_m_max_value'} < $end;
		$end_idx++;
	    }
	}

	# Make sure that we have entry until max_value
	$ranges->[$end_idx] = undef;
    }

    return $ranges;
}

# Implements Lire::Aggregator::merge_aggregator_data()
sub merge_aggregator_data {
    my ( $self, $group, $ranges ) = @_;

    foreach my $e ( $group->entries() ) {
	my @names = $e->names();
	die "invalid number of names for a rangegroup subreport: ",
	  scalar @names, "\n"
	    if @names != 1;

	# FIXME: We won't interpolate data. So for merging to succeed
	# the whole class must be contained in the new class.
	#
	# Another possible way to merge is to use the middle of the class
	# to determine where the class should be merged. This would be less
	# accurate than the current method, but merging would never fail.
	my $start  = $names[0]{'value'};
	my $length = $names[0]{'range'};

	# Clamp if necessary
	if ( defined $self->{'_m_min_value'}
             && $start < $self->{'_m_min_value'} ) 
        {
	    croak "incompatible merging parameters: ",
	      "range is splitted across min value: [$start,",
		$start + $length, "> <> $self->{'_m_min_value'}\n"
		  if $start + $length > $self->{'_m_min_value'};
	    $start  = $self->{'_m_min_value'};
	    $length = NEGLIGIBLE_QTY;
	}
	if ( defined $self->{'_m_max_value'}
             && $start > $self->{'_m_max_value'} )
        {
	    $start  = $self->{'_m_max_value'};
	    $length = NEGLIGIBLE_QTY;
	}

	# Since the $start + $length isn't included in the range, we subtract
	# a negligible quantity just to make sure that it falls onto the
	# same idx
	my ($idx);
	if ( $self->{'_m_size_scale'} == 1 ) {
	    $idx = int( ($start - $self->{'_m_range_start'}) / $self->{'_m_range_size'});
	    my $eidx = int( (($start + $length) - $self->{'_m_range_start'}
			     - NEGLIGIBLE_QTY)
			    / $self->{'_m_range_size'});

	    croak "incompatible merge: source range is splitted across ranges:",
	      "[$start,", $start + $length, "> : start=$self->{'_m_range_start'}; ",
		"size=$self->{'_m_range_size'}\n"
		  if $idx != $eidx;
	} else {
	    $idx = 0;
	    while (1) {
		my $end = $self->{'_m_range_start'} +
		  (($self->{'_m_size_scale'} ** $idx) * $self->{'_m_range_size'});

		last if ( $end > ($start + $length) - NEGLIGIBLE_QTY );

		die "incompatible merge: source range is splitted across",
		  " target ranges: [$start,", $start + $length, "> : ",
		    "end=$end\n" if $end > $start;

		$idx++;
	    }
	}

	if ( $idx < 0 ) {
	    croak "can't reorder ranges when using size-scale != 1. Please set min-value $!"
	      if $self->{'_m_size_scale'} != 1;

	    # We have ranges under the first index. Push the ranges to the right.
	    $self->{'_m_range_start'} = $idx * $self->{'_m_range_size'} + $self->{'_m_range_start'};
	    @$ranges = ( (undef) x abs($idx), @$ranges );

	    $idx = 0;
	}

	my $data = $ranges->[$idx];
	unless ( defined $data ) {
	    $data = [];

	    my $i = 0;
	    foreach my $op ( @{$self->ops()} ) {
		$data->[$i++] = $op->init_group_data();
	    }

	    $ranges->[$idx] = $data;
	}

	my $i = 0;
	foreach my $op ( @{ $self->ops() } ) {
	    my $value = $e->data_by_name( $op->name );
	    my $op_data = $data->[$i++];
	    $op->merge_group_data( $value, $op_data )
	      if ( $value );
	}
    }
    return;
}

# Implements Lire::Aggregator::end_aggregator_data()
sub end_aggregator_data {
    my ( $self, $ranges ) = @_;

    # Finalize each ranges
    # Either create empty one or call end_group_data on them
    my $last_idx = $#$ranges;
    my $i = 0;
    while ( $i <= $last_idx) {
	if ( $ranges->[$i]) {
	    my $data = $ranges->[$i];
	    my $j = 0;
	    foreach my $op ( @{$self->ops} ) {
		$op->end_group_data( $data->[$j++] );
	    }
	} else {
	    my $data = [];

	    my $j = 0;
	    foreach my $op ( @{$self->ops} ) {
		$data->[$j] = $op->init_group_data();
		$op->end_group_data( $data->[$j++] );
	    }

	    $ranges->[$i] = $data;
	}
	$i++;
    }

    return $self;
}

# Implements Lire::Aggregator::create_group_entries()
sub create_group_entries {
    my ( $self, $group, $ranges ) = @_;

    for ( my $i=0; $i < @$ranges; $i++ ) {
	my $range = $ranges->[$i];

        my $start;
	if ( $self->{'_m_size_scale'} == 1 ) {
	    $start = $self->{'_m_range_size'} * $i + $self->{'_m_range_start'};
	} else {
	    if ( $i == 0 ) {
		$start = $self->{'_m_range_start'};
	    } else {
		$start = $self->{'_m_range_start'} +
		  ($self->{'_m_size_scale'} ** ($i-1)) * $self->{'_m_range_size'};
	    }
	}

        my $row = { $self->name() => $start};
	my $entry = $self->create_entry( $group, $row );;

	my $j = 0;
	foreach my $op ( @{ $self->ops() } ) {
	    $op->add_entry_value( $entry, $range->[$j++] );
	}
    }

    return;
}

# keep perl happy
1;

__END__

=head1 SEE ALSO

 Lire::ReportSpec(3pm), Lire::Group(3pm), Lire::ReportOperator(3pm),
 Lire::Aggregator(3pm)

=head1 AUTHORS

  Francis J. Lacoste <flacoste@logreport.org>
  Wolfgang Sourdeau <wsourdeau@logreport.org>

=head1 VERSION

$Id: Rangegroup.pm,v 1.23 2006/07/23 13:16:29 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html. 

=cut



syntax highlighted by Code2HTML, v. 0.9.1