package Lire::Utils; use strict; use base qw/ Exporter /; use File::Spec; use Fcntl; use File::Basename; use Carp; use Socket; # for gethostbyaddr use Time::Local; use vars qw( @EXPORT_OK ); BEGIN { @EXPORT_OK = qw/ xml_encode latex_encode diff_lists tilde_expand tempfile tempdir tmpdir min max ratio ratio100 shell_quote indent file_content create_file sql_quote_name tree_apply item_index check_param check_object_param deep_copy host_by_addr is_url parse_url period_range unique text_for_width /; } =pod =head1 NAME Lire::Utils - Various general-purpose function. =head1 SYNOPSIS use Lire:Utils qw/ xml_encode /; =head1 DESCRIPTION This module defines several general purpose functions. No functions are exported by default, you have to specify the one you want to import in your namespace when you use the module. =head2 xml_encode( $str ) Converts standard the characters <,>,&," and ' to their XML entities. Example print XML_STREAM xml_encode( $value ); =cut sub xml_encode { my $value = $_[0]; $value =~ s/&/&/g; $value =~ s/)/\$$1\$/g; $str =~ s/--LIRE--BACKSLASH--/\$\\backslash\$/g; return $str; } =pod =head2 tilde_expand( $path ) Does tilde-expansion on a path, if possible. This means that paths of the form ~/foo are transformed to something like /home/user/foo, where "/home/user" is the content of the $HOME variable. Paths of the form ~otheruser/foo are translated similary by a passwd lookup. =cut sub tilde_expand { my $path = $_[0]; return $path if ord( $path ) != 126; # ascii code for ~ my ( $tilde, $rest ) = split( '/', $path, 2 ); my $user = substr( $tilde, 1 ); # skip the initial ~ if ( $user eq '' ) { croak('$HOME not set') unless exists $ENV{'HOME'}; $tilde = $ENV{'HOME'}; } else { my ( $name, $pass, $uid, $gid, $quota, $comment, $gecos, $dir) = getpwnam( $user ); $tilde = $dir if defined $dir; } $tilde .= "/$rest" if defined $rest; return $tilde; } =pod =head2 diff_lists( $list1, $list2 ) Compare two list, if the two list contains the same items (even if in different order) it returns undef. Otherwise an hash reference is returned which contains the list of new items in the 'new' key and the list of items to remove (to $list1 to make $list2) in the 'remove' key. =cut sub diff_lists { my ( $list1, $list2 ) = @_; croak "list1 param must be an array ref: $list1" unless ref $list1 eq 'ARRAY'; croak "list2 param must be an array ref: $list2" unless ref $list2 eq 'ARRAY'; # Sort the items my @list1 = sort @$list1; my @list2 = sort @$list2; my @new = (); my @remove = (); while ( @list1 && @list2 ) { my $d = $list1[0] cmp $list2[0]; if ( $d < 0 ) { push @remove, shift @list1; } elsif ( $d == 0 ) { shift @list1; shift @list2; } else { push @new, shift @list2; } } # All other elements are removed push @remove, @list1; push @new, @list2; if ( @remove || @new ) { return { 'remove' => \@remove, new => \@new }; } else { return undef; } } =pod =head1 PORTABILITY FUNCTIONS For portability across Perl versions, this module defines some functions that are usually found in the latest version of Perl but that may not be present in some old ones (the oldest version of Perl we support is 5.00503). =head2 tmpdir() This method (provided by recent versions of File::Spec) returns where temporary files should go. =cut sub tmpdir { return ( File::Spec->can( "tmpdir" ) ? return File::Spec->tmpdir : _tmpdir() ); } sub _tmpdir { foreach my $dir ( $ENV{'TMPDIR'}, "/var/tmp", "/tmp" ) { return $dir if -r $dir && -w $dir; } croak "no writeable temporary directory available\n"; } =pod =head2 tempfile() my $fh = tempfile(); my $fh = tempfile( $template, 'SUFFIX' => ".txt" ) my ( $fh, $name ) = tempfile( $template, 'SUFFIX' => ".dlf" ); This is a wrapper around the File::Temp::tempfile Perl function when available, and it offers a home grown version which should be safe when it isn't available. The only difference is that the file will always be created in the directory specified in $ENV{'TMPDIR'} or F when unset. The first argument to the function should be a template name containing at least 6 X (i.e. tempXXXXXX) which will get replaced to generate a random name. When no arguments are passed, a default template of tempfileXXXXXX will be use. Other options can be passed to the function by using 'key' => value pairs. The only option understood by the home grown version is SUFFIX which will be appended to the filename. (The Perl version understands more options, but you shouldn't use them for compatibility.) The function takes precautions against symlink attacks (and creates the file with readwrite permission for the owner only). It will die(), if it fails to create a temporary file after 10 attempts. (This shouldn't happen unless someone is seriously trying to race with us.) The function will return in scalar context an anonymous file handle opened on the temporary file. The temporary file was unlinked after creation and will thus be deleted automatically when you close the file handle. When used in an array context, the function will return a file handle and the path to the temporary file (this can be useful for debugging purpose or when you can't pass the file by file handle to another process). In this case, the file should be deleted manually. =cut my @chars = ( 'a' .. 'z', 'A' .. 'Z', 0 .. 9, ); sub tempfile { eval "use File::Temp"; if ( $@ ) { return _tempfile( @_ ); } elsif ( @_ ) { return File::Temp::tempfile( @_, 'DIR' => tmpdir() ); } else { return File::Temp::tempfile( 'DIR' => tmpdir() ); } } sub _tempfile { # Poor's man tempfile, File::Temp is only part of Perl 5.6.1 my ($tmpl, %args); if (@_) { ($tmpl, %args) = @_; $tmpl = tmpdir() . "/" . basename( $tmpl ); } else { $tmpl = tmpdir() . "/tempfileXXXXXX"; } # Try 10 times to open a file for (0..10) { $tmpl =~ s/X/$chars[rand @chars]/ge; $tmpl .= $args{'SUFFIX'} if defined $args{'SUFFIX'}; if ( sysopen( my $fh, $tmpl, O_RDWR|O_CREAT|O_EXCL, 0600 ) ) { binmode $fh; unlink $tmpl unless wantarray; return wantarray ? ($fh, $tmpl): $fh; } sleep 1; } die "tempfile(): somebody is trying to race with us!\n"; } =pod =head2 tempdir() my $dir = tempdir(); my $dir = tempdir( $template ) This is a wrapper around the File::Temp::tempdir Perl function when available, and it offers a home grown version which should be safe when itsn't available. The only difference is that the directory will always be created in the directory specified in $ENV{'TMPDIR'} or F when unset. The first argument to the function should be a template name containing at least 6 X (i.e. tempXXXXXX) which will get replaced to generate a random name. When no arguments are passed, a default template of tempdirXXXXXX will be used. Other options can be passed to the function by using 'key' => value pairs. The only option understood by the home grown version is DIR which specifies where the directory will be created (The Perl version understands more options, but you shouldn't use them for compatibility.) The function takes precautions against symlink attacks (and create the file with readwrite permission for the owner only). It will die(), if it fails to create a temporary directory after 10 attempts. (This shouldn't happen unless someone is seriously trying to race with us.) The function will return the name of the directory that was created. =cut sub tempdir { eval "use File::Temp;"; if ( $@ ) { return _tempdir( @_ ); } elsif (@_) { return File::Temp::tempdir( @_, 'DIR' => tmpdir() ); } else { return File::Temp::tempdir( 'DIR' => tmpdir() ); } } sub _tempdir { my $self = $_[0]; # Poor's man tempdir, File::Temp is only part of Perl 5.6.1 my ($tmpl, %args); if (@_) { ($tmpl, %args) = @_; $tmpl = tmpdir() . "/" . basename( $tmpl ); } else { $tmpl = tmpdir() . "/tempdirXXXXXX"; } # Try 10 times to create a directory for (0..10) { $tmpl =~ s/X/$chars[rand @chars]/ge; if ( mkdir $tmpl, 0700 ) { return $tmpl; } sleep 1; } croak "tempdir() somebody is trying to race with us!\n"; } =pod =head2 min() my $least = min(@values); my $least = min($a,$b,$c); =head2 max() my $greatest = max(@values); my $greatest = max($a,$b,$c); These find the smallest or largest value in a list of numbers. An empty list will return undef. Undef values are ignored and will only be returned if the list is empty or contains only undefined value. =cut sub min { my $min = undef; foreach my $x ( @_ ) { next unless defined $x; if ( defined $min ) { $min = $x if $x < $min; } else { $min = $x; } } return $min; } sub max { my $max = undef; foreach my $x ( @_ ) { next unless defined $x; if ( defined $max ) { $max = $x if $x > $max; } else { $max = $x; } } return $max; } =pod =head2 ratio( $dividend, $divisor ) Returns $divivend / $divisor and returns "NaN" when $divisor is equals to 0. It rounds the result to the second decimal. =cut sub ratio { my ( $dividend, $divisor ) = @_; if ( $divisor ) { return sprintf "%.2f", $dividend / $divisor; } else { return "NaN"; } } =pod =head2 ratio100( $part, $total ) Returns as a percentage $part on $total. This function is safe to use when $total is equal to 0 (it will returns NaN). The percentage is rounded to the first decimal. =cut sub ratio100 { my ( $dividend, $divisor ) = @_; if ( $divisor ) { return sprintf "%.1f", ($dividend / $divisor) * 100; } else { return "NaN"; } } =pod =head2 shell_quote($string) Return $string in a format that make it safe to hand out to the shell so that metacharacters are not interpreted by the shell. This is done by returning $string wrapped in single quotes and escaping the single quotes contained in the $string. =cut sub shell_quote { my $string = $_[0]; if (defined $string ) { # Make sure that there is no way to # escape the shell single quotes $string =~ s/'/'\\''/g } else { $string = ''; } return "'" . $string . "'"; } =pod =head2 indent( $string, [ $count ] ) Return $string, indented by $count spaces. If $count is not specified, a default of 2 will be assumed. =cut sub indent { my ( $text, $step ) = @_; return '' unless $text; $step = 2 unless defined $step; my @lines = split( /\n/, $text ); my $indented = join( "\n", map( { ' ' x $step . $_ } @lines ) ); $indented .= "\n" if $text =~ m/\n$/; return $indented; } =pod =head2 check_param( $param, $name, [ $regex | $coderef, $msg ] ) Check that param "$name" is not undefined, and that it optionally match the given regexp. Validation can also be achieved through a code reference passed as the third parameter. The convention in this case is that the subroutine will return a boolean indicating whether $param is valid or not. Examples: check_param( $req_param, 'req_param' ); check_param( $param_string, 'param_string', qr/^[a-z]+$/ ); check_param( $integer, 'integer', qr/^[0-9]+$/, "not a valid integer" ); check_param( $bool, 'bool', sub { return $_[0] }, "boolean was false" ); =cut sub check_param { my ( $param, $name, $regex, $msg ) = @_; my ( $package, $filename, $line ) = caller(); die "check_param needs at least 2 arguments: a parameter, the parameter\'s name at $filename:$line\n" unless ( @_ >= 2 ); die "parameter 'name' is not a valid name: '$name' at $filename:$line\n" unless ( $name =~ m/^[a-zA-Z0-9_]+$/ ); ( $package, $filename, $line ) = caller( 1 ); die "missing '$name' parameter at $filename:$line\n" unless defined $param; if ( defined $regex ) { if ( ref $regex eq 'CODE' ) { die ( defined $msg ? "$msg: '$param' at $filename:$line\n" : "'$name' parameter value ('$param') doesn't validate with subroutine at $filename:$line\n" ) unless ( $regex->( $param ) ); } else { die ( defined $msg ? "$msg: '$param' at $filename:$line\n" : "'$name' parameter doesn't match '$regex': '$param' at $filename:$line\n" ) if ( $param !~ $regex ); } } return; } =pod =head2 check_param( $instance, $name, $class ) Check that param "$instance" is a valid of one or more classes specified in the 'class' parameter. The latter being either a string or a reference to an array containing one or more such strings. Examples: check_param( $object, 'object', 'Wawa::Class' ); check_param( $object, 'object', [ 'Wawa::Class', 'Other::Class' ] ); =cut sub check_object_param { my ( $instance, $name, $class ) = @_; my ( $package, $filename, $line ) = caller(); die "check_object_param needs 3 arguments: an object, the object parameter's name, the object's class name at $filename:$line\n" unless ( @_ == 3 ); die "parameter 'name' is not a valid name: '$name' at $filename:$line\n" unless ( $name =~ m/^[a-zA-Z0-9_]+$/ ); $class = [ $class ] unless ( ref $class eq 'ARRAY' ); die "parameter 'class' should contain at least one class name at $filename:$line\n" unless ( @{$class} > 0 ); foreach my $cl ( @{$class} ) { die "parameter 'class' contains an invalid class name: '$cl' at $filename:$line\n" unless ( $cl =~ m/^[a-zA-Z0-9_]+(::[a-zA-Z0-9_]+)*$/ ); } ( $package, $filename, $line ) = caller( 1 ); die "missing '$name' parameter at $filename:$line\n" unless defined $instance; foreach my $cl ( @{$class} ) { return if UNIVERSAL::isa( $instance, $cl ); } if ( @{$class} == 1 ) { die "'$name' parameter should be a '$class->[0]' instance, not '$instance' at $filename:$line\n"; } else { my $string = join "', '", @{$class}[0..@{$class}-2]; $string .= "' or '$class->[-1]" ; die "'$name' parameter should be a '$string' instance, not '$instance' at $filename:$line\n"; } } =pod =head2 sql_quote_name( $name ) =cut sub sql_quote_name { my $name = $_[0]; return ( $name =~ /[-:.]/ ) ? '"' . $name . '"' : $name; } =pod =head2 tree_apply( $root, $children_func, $apply_func ) Preorder processsing =cut sub tree_apply { my ( $root, $children_func, $apply_func ) = @_; check_param( $root, 'root' ); check_param( $children_func, 'children_func' ); check_param( $apply_func, 'apply_func' ); my $results = [ $root->$apply_func() ]; my $children = $root->$children_func(); while ( @$children ) { my $node = shift @$children; push @$results, $node->$apply_func(); unshift @$children, @{$node->$children_func()}; } return $results; } =pod =head2 item_index( $array_ref, $item ) Returns the index of $item in $array. It returns undef if the item isn't found. $array_ref should be an ARRAY reference and $item should be a non-null scalar. =cut sub item_index { my ( $array, $item ) = @_; check_param( $array, 'array' ); croak "'array' should be an array ref, not '$array'" unless ( ref $array eq 'ARRAY' ); check_param( $item, 'item' ); for ( my $i = 0; $i < @$array; $i++ ) { return $i if ( $array->[$i] eq $item ); } return undef; } =pod =head2 deep_copy( $object, [$exclusion] ) Makes a recursive copy of $object. Cyclic references are maintained in the copy. Optionnally, an array ref of packages for which the objects shouldn't be deeply copied can be provided. =cut sub deep_copy { my ( $object, $exclusions ) = @_; check_param( $object, 'object' ); croak "'exclusions' should be an array reference, not '$exclusions'" if defined $exclusions && ref $exclusions ne 'ARRAY'; $exclusions ||= []; return _deep_copy( $object, {}, $exclusions ); } sub _deep_copy { my ( $object, $seen, $exclusions ) = @_; return $object unless ref $object; return $seen->{$object} if exists $seen->{$object}; if ( ref $object eq 'SCALAR' || ref $object eq 'REF' ) { return _clone_scalar_ref( $object, $seen, $exclusions ); } elsif ( ref $object eq 'ARRAY' ) { return _clone_array_ref( $object, $seen, $exclusions ); } elsif ( ref $object eq 'HASH' ) { return _clone_hash_ref( $object, $seen, $exclusions ); } elsif ( ref $object eq 'Regexp' ) { # This is a read-only object return $object; } elsif ( UNIVERSAL::isa( $object, 'UNIVERSAL' ) ){ # blessed object return _clone_object( $object, $seen, $exclusions ); } else { croak "ref type unsupported by deep_copy(): $object"; } } sub _clone_scalar_ref { my ( $scalar_ref, $seen_refs, $exclusions ) = @_; my $scalar; $seen_refs->{$scalar_ref} = \$scalar; $scalar = _deep_copy( $$scalar_ref, $seen_refs, $exclusions ); return \$scalar; } sub _clone_array_ref { my ( $array_ref, $seen_refs, $exclusions ) = @_; my @array_copy = (); $seen_refs->{$array_ref} = \@array_copy; for my $elmnt ( @$array_ref ) { push @array_copy, _deep_copy( $elmnt, $seen_refs, $exclusions ); } return \@array_copy; } sub _clone_hash_ref { my ( $hash_ref, $seen_refs, $exclusions ) = @_; my %hash = (); $seen_refs->{$hash_ref} = \%hash; while ( my( $key, $value ) = each %$hash_ref ) { $hash{$key} = _deep_copy( $value, $seen_refs, $exclusions ); } return \%hash; } sub _clone_object { my ( $object, $seen, $exclusions ) = @_; foreach my $excl ( @$exclusions ) { return $object if $object->isa( $excl ); } if ( index( $object, '=SCALAR(' ) >= 0 ) { return bless( _clone_scalar_ref( $object, $seen, $exclusions ), ref $object); } elsif ( index( $object, '=HASH(' ) >= 0 ) { return bless( _clone_hash_ref( $object, $seen, $exclusions ), ref $object); } elsif ( index( $object, '=ARRAY(' ) >= 0 ) { return bless( _clone_array_ref( $object, $seen, $exclusions ), ref $object); } else { croak "unsupported object storage: '$object'"; } } # could better use a Perl interface to getnameinfo(3) sub host_by_addr { ## perldoc -f gethostbyaddr ## # gethostbyaddr(3) ## # See also: logresolve, from e.g. the Debian apache-utils package ## ## # $? holds: ## # ## # HOST_NOT_FOUND ## # The specified host is unknown. ## # ## # NO_ADDRESS or NO_DATA ## # The requested name is valid but does not have an IP address. ## # ## # NO_RECOVERY ## # A non-recoverable name server error occurred. ## # ## # TRY_AGAIN ## # A temporary error occurred on an authoritative name server. Try ## # again later. my $ip = $_[0] or die "usage: gethostbyaddr IPv4address\n"; my $iaddr = inet_aton($ip) or die "$ip isn't an IPv4 address\n"; my $name; unless ( $name = gethostbyaddr( $iaddr, AF_INET ) ) { warn "can't resolve $ip: $? ($!)\n"; $name = undef; } return $name; } =pod =head2 unique( $list ) Returns an array reference with the duplicates elements of $list removed. =cut sub unique { my $list = $_[0]; check_object_param( $list, 'list', 'ARRAY' ); my $unique = []; my %cache = (); foreach my $element ( @$list ) { push @$unique, $element unless exists $cache{$element}; $cache{$element} = 1; } return $unique; } =pod =head2 text_for_width( $text, $width ) Returns a stripped-down representation of 'text', ensuring its length is shorter or equal to 'width'. If the original text already fits the given width, it is returned unchanged, otherwise, it is shortened and '...' is put in the middle to indicate the cut. =cut sub text_for_width { my ( $text, $width ) = @_; check_param( $text, 'text' ); check_param( $width, 'width', sub { return ( $_[0] =~ m/^[0-9]+$/ && $_[0] >= 5 ) }, "'width' should be a positive, greater than or equal to 5, integer" ); my $length = length( $text ); return $text if $length <= $width; my $segment = ( $width - 3 ) / 2; my $start = substr( $text, 0, $segment ); $start =~ s/\s*$//g; my $end = substr( $text, $length - $segment ); $end =~ s/^\s*//g; return $start . '...' . $end; } =pod =head2 is_url( $string ) Determines whether $string is a url or not, returning 'true' or 'false' as result code. =cut sub is_url { my $string = $_[0]; check_param( $string, 'string' ); return $string =~ m@^[a-zA-Z0-9]+://@ || 0; } =pod =head2 parse_url( $string ) Returns an hash reference containing keys for the following URL parts: 'scheme', 'host', 'port', 'path', 'query', 'fragment'. The value will be empty if this wasn't present in the URL. This function is somewhat 'http' biased and one should use the URI module for full blown URI parsing. The function dies if the URL cannot be parsed. =cut sub parse_url { check_param( $_[0], 'url' ); # e.g. # http://ds.internic.net/instructions/overview.html#WARNING my ( $scheme, $authority, $path, $query, $fragment) = ( $_[0] =~ m/^ (?: ([^:\/?\#]+): # scheme )? (?: \/\/([^\/?\#]*) # authority )? ([^?\#]*) # path (?: \?([^\#]*) # query )? (?: \#(.*) # fragment )?$/x ) or die "invalid url: '$_[0]'"; # fullblown # # authority = server | reg_name # server = [ [ userinfo "@" ] hostport ] # hostport = host [ ":" port ] # host = hostname | IPv4address # # is not yet supported my ( $host, $port ); ( $host, $port ) = $authority =~ m/^([-\.a-zA-Z0-9]+)(?::(\d+))?$/ if defined $authority; return { 'scheme' => $scheme, 'host' => $host, 'port' => $port, 'path' => $path, 'query' => $query, 'fragment' => $fragment }; } =pod =head2 file_content( $filename ) Returns the content of $filename. Dies if an error occurs. =cut require Lire::Error; sub file_content { my $file = $_[0]; check_param( $file, 'filename' ); open my $fh, $file or croak( Lire::Error::file_not_readable( $file ) ); local $/ = undef; my $content = <$fh>; close $fh; return $content; } =pod =head2 create_file( $filename, [$content], [$utf8_encoding] ) Creates file $filename with $content. The $utf8_encoding flag specifies whether you want to keep strings in their UTF-8 encoding with versions of Perl that supports it (5.8.0 and above). =cut require Lire::I18N; sub create_file { my ( $filename, $content, $utf8_encoding ) = @_; check_param( $filename, 'filename' ); $content = "" unless defined $content; open( my $fh, '>', $filename ) or croak "error creating '$filename': $!"; Lire::I18N::set_fh_encoding( $fh, 'utf-8' ) if $utf8_encoding; print $fh $content; close $fh; return; } =pod =head2 period_range( $period, $time ) Returns an array reference containing the starting and ending boundaries for the $period that includes $time. $period should be one of 'hourly', 'daily', 'weekly', 'monthly' or 'yearly'. $time should be in seconds since epoch. =cut require Lire::WeekCalculator; sub period_range { my ( $period, $time ) = @_; check_param( $period, 'period', qr/^(hourly|daily|weekly|monthly|yearly)$/, "'period' parameter should be one of 'hourly', 'daily', 'weekly', 'monthly' or 'yearly'" ); check_param( $time, 'time', qr/^\d+$/, "'time' paremeter should be seconds since epoch" ); my ( $hour, $day, $month, $year ) = (localtime( $time || time() ))[2..5]; if ( $period eq 'hourly' ) { my $start = timelocal( 0, 0, $hour, $day, $month, $year ); return [ $start, $start + 3600 ]; } elsif ( $period eq 'daily' ) { my $start = timelocal( 0, 0, 0, $day, $month, $year ); return [ $start, $start + 3600*24 ]; } elsif ( $period eq 'weekly' ) { my $calc = new Lire::WeekCalculator(); my $week_no = $calc->week_number( $time ); my $start = $calc->week_start( $year, $week_no ); return [ $start, $start + 86400*7 ]; } elsif ( $period eq 'monthly' ) { my $start = timelocal( 0, 0, 0, 1, $month, $year ); if ( $month == 11 ) { return [ $start, timelocal( 0, 0, 0, 1, 0, $year+1 ) ] } else { return [ $start, timelocal( 0, 0, 0, 1, $month + 1, $year ) ] } } elsif ( $period eq 'yearly' ) { return [ timelocal( 0, 0, 0, 1, 0, $year ), timelocal( 0, 0, 0, 1, 0, $year+1 ) ] } } # keep perl happy 1; __END__ =pod =head1 AUTHORS Francis J. Lacoste Joost van Baal Wessel Dankers Wolfgang Sourdeau =head1 VERSION $Id: Utils.pm,v 1.67 2006/07/23 13:16:30 vanbaal Exp $ =head1 COPYRIGHT Copyright (C) 2001, 2002, 2004 Stichting LogReport Foundation LogReport@LogReport.org This file is part of Lire. Lire is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see COPYING); if not, check with http://www.gnu.org/copyleft/gpl.html. =cut