package Lire::Timegroup; use strict; use base qw/ Lire::Aggregator /; use Carp; use POSIX qw/ setlocale strftime LC_TIME /; use Lire::DataTypes qw/ check_duration daily_duration duration2sec monthly_duration weekly_duration yearly_duration/; use Lire::Utils qw/ sql_quote_name check_param /; =pod =head1 NAME Lire::Timegroup - Base class for implementation of the timegroup aggregator =head1 SYNOPSIS use Lire::Timegroup; =head1 DESCRIPTION This module is the base class for implementation of the timegroup aggregator. This aggregator will split the DLF records based on a time period controlled throught the period attribute. For example, using 1d as the period value, this aggregator will group all records in the same day period together. =head1 CONSTRUCTOR =head2 new( %params ) Creates a new instance of a timegroup aggregator. In addition to the normal report operator parameters, the timegroup aggregator can take several parameters: =over =item field This optional parameter contains the DLF field which contains the time value used to group the DLF records together. See the field() method for more information. =item period This mandatory parameter should contains the period's length that will be used to group the records. See the period() method for more information. =back =cut sub new { my ( $class, %params ) = @_; check_param( $params{'period'}, 'period' ); my $self = bless {}, $class; $self->SUPER::init( %params, 'op' => "timegroup" ); $self->field( $params{'field'} ); $self->period( $params{'period'} ); return $self; } =pod =head1 METHODS =head2 field( [$new_field] ) Returns the DLF field's name that is used to group the DLF records. This should be a valid timestamp DLF field in the current schema. By default, the default timestamp field of the DLF schema is used. You can change the field by passing a $new_field parameter. =cut sub field { my ( $self, $field ) = @_; if (@_ == 2 ) { if ( defined $field ) { croak "'$field' isn't a valid field for the specification's schemas" unless $self->report_spec()->has_field( $field ); croak "'$field' isn't a timestamp field" unless $self->report_spec()->field( $field )->type() eq "timestamp"; } else { $field = $self->report_spec()->schema()->timestamp_field()->name(); } $self->{'field'} = $field; } return $self->{'field'}; } =pod =head2 period( [$new_period]) Returns the period's length in which the records are grouped. This can either be a duration value or the name of a report specification's parameter containing a duration value. The period's length can be changed by using the $new_period parameter. =cut sub period { my ( $self, $period ) = @_; if (@_ == 2 ) { check_param( $period, 'period' ); if ( $period =~ /^\$/ ) { my $name = substr $period, 1; croak "parameter '$name' isn't defined" unless $self->report_spec()->has_param( $name ); my $type = $self->report_spec()->param( $name )->type(); croak "parameter '$name' isn't a 'duration' parameter: '$type'" unless $type eq "duration"; } else { croak "'period' parameter isn't a valid duration: '$period'" unless check_duration( $period ); } my $value = $self->report_spec()->resolve_param_ref( $period ); croak "can't use multiple with period of type 'd'" unless ( !daily_duration( $value ) || $value =~ m/^\s*1\s*d/ ); #1 $self->{'period'} = $period; } return $self->{'period'}; } # Implements Lire::ReportOperator::name sub name { return "timegroup:" . $_[0]->{'field'}; } # ------------------------------------------------------------------------ # Method create_categorical_info( $info ) # # Implementation of the method required by Lire::Aggregator sub create_categorical_info { my ( $self, $info ) = @_; my $dlf_field = $self->report_spec()->field( $self->field() ); $info->create_column_info( $self->name(), 'categorical', $dlf_field->type(), $self->label() ); } # ------------------------------------------------------------------------ # Method xml_attrs( $info ) # # Implementation of the method required by Lire::Aggregator sub xml_attrs { my ( $self ) = @_; return "field=\"$self->{'field'}\" period=\"$self->{'period'}\""; } sub build_query { my ( $self, $query ) = @_; $self->SUPER::build_query( $query ); my $period = $self->report_spec()->resolve_param_ref( $self->{'period'} ); my ( $mult, $unit ) = $period =~ /(\d+)\s*(\w)/; my ($func, $param); if ( $unit eq 'y' ) { ( $func, $param ) = ('lr_timegroup_year', ",$mult" ); } elsif ( $unit eq 'M' ) { ( $func, $param ) = ('lr_timegroup_month', ",$mult" ); } elsif ( $unit eq 'w' ) { ( $func, $param ) = ('lr_timegroup_week', ",$mult" ); } elsif ( $unit eq 'd' ) { ( $func, $param ) = ( 'lr_timegroup_day', '' ); } else { $mult = duration2sec( $period ); ( $func, $param ) = ('lr_timegroup_sec', ",$mult" ); } $query->add_group_field( $self->name(), sprintf( '%s(%s%s)', $func, sql_quote_name( $self->{'field'} ), $param) ); $query->set_sort_spec( $self->name() ); return; } sub create_entry { my ( $self, $group, $row ) = @_; my $entry = $group->create_entry(); my $value = $row->{ $self->name() }; unless ( defined $value ) { $group->missing_cases( $row->{'_lr_nrecords'} ); return undef; } my $period = $self->report_spec()->resolve_param_ref( $self->{'period'} ); my $period_sec = duration2sec( $period ); my ( $mult, $unit ) = $period =~ /(\d+)\s*(\w)/; my $fmt; if ( $unit eq 'y' ) { $fmt = '%Y'; } elsif ( $unit eq 'M' ) { $fmt = '%B %Y'; } elsif ( $unit eq 'w' ) { $fmt = new Lire::WeekCalculator()->strformat(); } elsif ( $unit eq 'd' ) { $fmt = '%Y-%m-%d'; } elsif ($self->_is_day_change( $group, $value ) ){ $fmt = '%Y-%m-%d %H:%M';} else { $fmt = ' %H:%M'; } my $old_locale = setlocale( LC_TIME, 'C' ); $entry->add_name( strftime( $fmt, localtime $value), $value, $period_sec ); setlocale( LC_TIME, $old_locale ); return $entry; } sub _is_day_change { my ( $self, $group, $value ) = @_; my @entries = $group->entries(); return 1 unless @entries > 1; # -1 is the entry currently being created my $name = $entries[-2]->data_by_name( $self->name() ); my $new_day = (localtime $value)[3]; my $old_day = (localtime $name->{'value'})[3]; return $new_day != $old_day; } # Implements Lire::ReportOperator::init_merge() sub init_merge { my $self = $_[0]; $self->SUPER::init_merge(); my $period = $self->report_spec()->resolve_param_ref( $self->period() ); $self->{'period_sec'} = duration2sec( $period ); my ( $multiple ) = $period =~ /^(\d+)/; if ( monthly_duration( $period ) ) { $self->{'helper'} = Lire::Timegroup::MonthHelper->new( $self->{'period_sec'}, $multiple); } elsif (weekly_duration( $period ) ) { $self->{'helper'} = Lire::Timegroup::WeekHelper->new( $self->{'period_sec'}, $multiple); } elsif ( yearly_duration( $period ) ) { $self->{'helper'} = Lire::Timegroup::YearHelper->new( $self->{'period_sec'}, $multiple ); } else { $self->{'helper'} = Lire::Timegroup::SecHelper->new( $self->{'period_sec'}, $multiple ); } $self->{'_merge_started'} = 0; return; } sub init_slice_data { my ( $self, $idx ) = @_; my $data = [ $self->{'helper'}->slice_start( $idx ) ]; my $i = 1; foreach my $op ( @{$self->ops()} ) { $data->[$i++] = $op->init_group_data(); } return $data; } # Implements Lire::Aggregator::init_aggregator_data() sub init_aggregator_data { return []; } # Implements Lire::Aggregator::merge_aggregator_data() sub merge_aggregator_data { my ( $self, $group, $timeslices ) = @_; # INVARIANT: entries are assumed to be sorted on timestamp and the # length attribute will be identical across all the entries. This # is respected by our create_group_entries() method my $first = 1; foreach my $e ( $group->entries() ) { my @names = $e->names(); die "invalid number of names for a timegroup aggregator: ", scalar @names, "\n" if @names != 1; my $time = $names[0]{'value'}; # Check period compatibility $self->{'start'} if ( $first ) { my $length = $names[0]{'range'}; croak "incompatible merge: source period isn't compatible ", "with new period: source=$length; target=$self->{'period_sec'}\n" if $self->{'period_sec'} < $length || $self->{'period_sec'} % $length; $first = 0; unless ( $self->{'_merge_started'} ) { $self->{'helper'}->init( $time ); $self->{'_merge_started'} = 1; } } my $idx = $self->{'helper'}->find_idx( $time ); if ( $idx < 0 ) { unshift @$timeslices, (undef) x abs $idx; $self->{'helper'}->init( $time ); $idx = 0; } my $data = $timeslices->[$idx]; $timeslices->[$idx] = $data = $self->init_slice_data( $idx ) unless defined $data; my $i = 1; foreach my $op ( @{$self->ops()} ) { my $value = $e->data_by_name( $op->name() ); my $op_data = $data->[$i++]; $op->merge_group_data( $value, $op_data ) if ( $value ); } } return; } # Implements Lire::Aggregator::end_aggregator_data() sub end_aggregator_data { my ( $self, $timeslices ) = @_; for ( my $i=0; $i < @$timeslices; $i++ ) { # Create empty set $timeslices->[$i] = $self->init_slice_data( $i ) unless $timeslices->[$i]; my $data = $timeslices->[$i]; my $j = 1; foreach my $op ( @{$self->ops()} ) { $op->end_group_data( $data->[$j++] ); } } return; } # Implements Lire::Aggregator::create_group_entries() sub create_group_entries { my ( $self, $group, $timeslices ) = @_; foreach my $tslice ( @$timeslices ) { my $row = { $self->name() => $tslice->[0] }; my $entry = $self->create_entry( $group, $row ); my $i = 1; foreach my $op ( @{$self->ops()} ) { $op->add_entry_value( $entry, $tslice->[$i++] ); } } return; } package Lire::Timegroup::Helper; sub new { my ( $class, $period, $multiple ) = @_; return bless { 'period' => $period, 'multiple' => $multiple, }, $class; } package Lire::Timegroup::SecHelper; use base qw/Lire::Timegroup::Helper/; use Time::Timezone; use POSIX qw/floor/; sub init { my ( $self, $start ) = @_; $self->{'start'} = int( $start / $self->{'period'} ) * $self->{'period'}; # The start of the period is offset by the timezone offset. We # need only do this once, because UTC doesn't have a standard vs. # daylight saving issue. my $offset = tz_local_offset( $self->{'start'} ); $self->{'start'} -= $offset if abs($offset) < $self->{'period'}; return; } sub find_idx { my ( $self, $time ) = @_; return floor( ($time - $self->{'start'}) / $self->{'period'} ); } sub slice_start { my ( $self, $idx ) = @_; return $idx * $self->{'period'} + $self->{'start'}; } package Lire::Timegroup::WeekHelper; use base qw/Lire::Timegroup::Helper/; use Lire::WeekCalculator; use POSIX qw/floor/; sub week_year { my ( $week_idx, $time ) = @_; my ( $month, $year ) = (localtime( $time ))[4,5]; # ISO: Last week of December as week 01 of next year $year++ if $month == 11 && $week_idx == 0; # All: First week of january as week 52 or 53 $year-- if $month == 0 && $week_idx > 50; return $year + 1900; } sub init { my ( $self, $start ) = @_; $self->{'week_calc'} = new Lire::WeekCalculator(); $self->{'week_start'} = $self->{'week_calc'}->week_idx( $start ); $self->{'year_start'} = week_year( $self->{'week_start'}, $start ); return; } sub find_idx { my ( $self, $time ) = @_; my $week_idx = $self->{'week_calc'}->week_idx( $time ); my $year = week_year( $week_idx, $time ); my $year_diff = $year - $self->{'year_start'}; my $idx; if ( $year_diff > 0 ) { $idx = -$self->{'week_start'}; for ( my $i=$self->{'year_start'}; $i < $self->{'year_start'}; $i++ ) { $idx += $self->{'week_calc'}->last_week_of_year( $i ); } } elsif ( $year_diff < 0 ) { $idx = -$self->{'week_start'}; for ( my $i=$self->{'year_start'}-1; $i > $year; $i-- ) { $idx -= $self->{'week_calc'}->last_week_of_year( $i ); } $idx -= $self->{'week_calc'}->last_week_of_year( $year ) - $week_idx; } elsif ( $year_diff == 0 && $week_idx < $self->{'week_start'} ) { $idx = - ( $self->{'week_start'} - $week_idx ); } else { $idx = $week_idx - $self->{'week_start'}; } return floor( $idx / $self->{'multiple'} ); } sub slice_start { my ( $self, $idx ) = @_; my $week_idx = $idx * $self->{'multiple'}; my $year = $self->{'year_start'}; my $year_week = $self->{'week_calc'}->last_week_of_year( $year ); if ( $week_idx > ( $year_week - $self->{'week_start'} ) ) { $week_idx -= $year_week - $self->{'week_start'}; $year++; while ( $week_idx > $self->{'week_calc'}->last_week_of_year( $year ) ) { $week_idx -= $self->{'week_calc'}->last_week_of_year( $year ); $year++; } } else { $week_idx += $self->{'week_start'}; } return $self->{'week_calc'}->week_start( $year, $week_idx + 1 ); } package Lire::Timegroup::MonthHelper; use base qw/Lire::Timegroup::Helper/; use Time::Local; use POSIX qw/floor/; sub init { my ( $self, $start ) = @_; my ($month,$year) = (localtime( $start ))[4,5]; $self->{'month_start'} = $month; $self->{'year_start'} = $year; return; } sub find_idx { my ( $self, $time ) = @_; my ($month,$year) = (localtime( $time ))[4,5]; my $year_diff = $year - $self->{'year_start'}; my $idx = 12 * $year_diff - $self->{'month_start'} + $month; return floor( $idx / $self->{'multiple'} ); } sub slice_start { my ( $self, $idx ) = @_; my $month = $idx * $self->{'multiple'} + $self->{'month_start'}; my $year_offset = int( $month / 12 ); $month -= $year_offset * 12; return timelocal( 0, 0, 0, 1, $month, $self->{'year_start'} + $year_offset ); } package Lire::Timegroup::YearHelper; use base qw/Lire::Timegroup::Helper/; use Time::Local; use POSIX qw/floor/; sub init { my ( $self, $start ) = @_; $self->{'start'} = (localtime( $start ))[5]; return; } sub slice_start { my ( $self, $idx ) = @_; my $year_off = $idx * $self->{'multiple'}; return timelocal( 0, 0, 0, 1, 0, $self->{'start'} + $year_off ); } sub find_idx { my ( $self, $time ) = @_; my $year = (localtime( $time ))[5]; return floor( ($year - $self->{'start'}) / $self->{'multiple'} ); } # keep perl happy 1; __END__ =head1 SEE ALSO Lire::ReportSpec(3pm), Lire::Group(3pm), Lire::ReportOperator(3pm), Lire::Timeslot(3pm) =head1 AUTHORS Francis J. Lacoste Wolfgang Sourdeau =head1 VERSION $Id: Timegroup.pm,v 1.33 2006/07/23 13:16:30 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001-2004 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut