#!/usr/bin/perl5
#-
#-read_mail_log.pl: Prints a summary of mail routed through system to STDOUT
#-
#- Performance note: summarizing 3 megabytes of sendmail logs
#- takes about 5 minutes and uses about 7 megabytes of virtual memory
#- (2 megabytes to run perl5 and the program, plus 5 mb of internal
#- data structures.)
#-
#-Usage:
#- $0 [-ugm] [-o cache_outfile]
#- [-U user_filter] [-T date_filter]
#- [-y year]
#- [-hvqD] [-i cache_in | syslog ...]
#-
#-Where:
#- -u : print report grouped by user (default)
#- -g : print report grouped by internet domain name
#- -m : print report of mail deliveries
#- -o cache_out : store mail deliveries to cache-file "cache_out"
#-
#- -U user_filter : only summarize mail involving certain users
#- -T date_filter : only summarize mail delivered in a certain time-range
#-
#-
#- -y year : supply year syslogs were written
#- -q : quiet mode (suppress parsing errors and commentary)
#- -i cache_in : read in deliveries from cache-file "cache_in"
#- syslog ...: name of logs to scan (default is to use
#- log which mail.info messages currently go to)
#-
#- -h : print this help message and exit
#- -v : print the version number and exit
#- -D : print debugging information
#-
#
# Version: 0.23
# Author: Rolf Nelson
#
require 5.002; #need perl5.002 or later
use SyslogScan::WhereIs;
use SyslogScan::DeliveryIterator;
use SyslogScan::Summary;
use SyslogScan::ByGroup;
use Getopt::Std;
use strict;
sub inform;
&procOpts();
# set at getopts() time by T:U: flags
my ($startDate, $endDate) = ($::gStartDate, $::gEndDate);
my ($selfPattern, $otherPattern) = ($::gSelfPattern, $::gOtherPattern);
my $deliveryYear = $::gYear;
# set at getopts() time by gudo:i: flags
my ($reportByGroup, $reportByUser, $reportByDelivery, $cacheOut, $cacheIn) =
($::gReportByGroup, $::gReportByUser, $::gReportByDelivery,
$::gCacheOut, $::gCacheIn);
my @syslogList = @ARGV;
if (! @syslogList and ! defined $cacheIn)
{
my $file = SyslogScan::WhereIs::guess('/etc/syslog.conf');
@syslogList = ($file);
}
if (defined $cacheIn)
{
inform "reading in deliveries from cached file $cacheIn\n";
open(CACHE_IN,$cacheIn) or die "could not open $cacheIn: $!";
}
else
{
inform "Scanning ", join(' ',@syslogList), " for deliveries";
if (defined $startDate)
{
inform "\n which were delivered on or after " .
localtime($startDate);
inform "\n AND which were delivered strictly before " .
localtime($endDate) ;
}
inform "...\n";
}
if (defined $cacheOut)
{
inform "writing deliveries out to cache file $cacheOut\n";
(-e $cacheOut and die "cache-file $cacheOut already exists, bye");
open(CACHE_OUT,">$cacheOut") or die "could not write to $cacheOut";
}
my $iter = new SyslogScan::DeliveryIterator ('unknownSender' => 'antiquity',
'unknownSize' => 0,
'startDate' => $startDate,
'endDate' => $endDate,
'defaultYear' => $deliveryYear);
my $syslog;
foreach $syslog (@syslogList)
{
$iter -> appendSyslog($syslog);
}
inform "Each . is a registered delivery:\n" unless $reportByDelivery;
my $summary = new SyslogScan::Summary();
while (1)
{
my $delivery;
if (defined $cacheIn)
{
$delivery = SyslogScan::Delivery -> restore(\*CACHE_IN);
}
else
{
$delivery = $iter -> next;
}
last unless $delivery;
if ($reportByGroup or $reportByUser)
{
$summary -> registerDelivery($delivery,$selfPattern,$otherPattern);
}
if (defined $cacheOut)
{
$delivery -> persist(\*CACHE_OUT);
}
if ($reportByDelivery)
{
print $delivery -> summary();
}
else
{
inform ".";
}
}
if ($reportByDelivery)
{
inform "\nReport by delivery finished successfully.\n\n";
}
if ($reportByUser)
{
inform "\nGenerating report by user-name...\n";
&printSummaryReport($summary);
}
if ($reportByGroup)
{
inform "Grouping by domain name (could take a while)...\n";
my $byGroup = new SyslogScan::ByGroup($summary);
inform "...finished grouping by domain name.\n";
inform "\nSorting domain names...\n";
my $domainName;
foreach $domainName (sort keys %$byGroup)
{
my $group = $$byGroup{$domainName};
&printUsageReport($$group{groupUsage},"$domainName TOTAL");
&printSummaryReport($$group{byAddress});
print "\n";
}
}
inform "\n$0 finished executing sucessfully.\n";
exit 0;
sub printSummaryReport
{
my $summary = shift;
my $address;
foreach $address (sort keys %$summary)
{
my $usage = $$summary{$address};
&printUsageReport($usage,$address);
}
}
sub printUsageReport
{
my $usage = shift;
my $name = shift;
my $broadcastVolume = $usage -> getBroadcastVolume();
my $receiveVolume = $usage -> getReceiveVolume();
print "$name: ";
&printVolumeReport($broadcastVolume,"bcast");
print ", ";
&printVolumeReport($receiveVolume,"rcvd");
print "\n";
}
sub printVolumeReport
{
my $volume = shift;
my $tag = shift;
print "$$volume[0] msgs/$$volume[1] bytes $tag";
}
sub inform
{
print STDERR @_
unless $::opt_q;
}
#----------------------------------------------------------------
# procOpts: process command-line options
#----------------------------------------------------------------
sub procOpts
{
($::opt_v, $::opt_h, $::opt_D) = (); #avoid warning message
getopts('hvDgi:mo:quy:T:U:') || &showUsage("bad command switches");
&d();
$::opt_h && &showUsage();
$::opt_v && &showVersion();
$::opt_q and $::gbQuiet = 1;
# check for incompatibilities
if ($::opt_m or defined $::opt_c)
{
(defined $::opt_U) and &showUsage("-m|-c incompatible with -U, sorry");
}
if ($::opt_i)
{
@ARGV and &showUsage("-i incompatible with <syslog ...>");
(defined $::opt_T) and
&showUsage("-i incompatible with -T; please time-filter while cacheing");
}
if (! $::opt_g and ! $::opt_m and ! $::opt_u and ! defined $::opt_c)
{
$::opt_u = 1;
inform "Using default -u option\n\n";
}
($::gReportByGroup, $::gReportByUser, $::gReportByDelivery,
$::gCacheOut, $::gCacheIn) =
($::opt_g, $::opt_u, $::opt_m, $::opt_o, $::opt_i);
$::gYear = $::opt_y;
&populateGlobalTimeFilter($::opt_T) if defined($::opt_T);
&populateGlobalUserFilter($::opt_U) if defined($::opt_U);
}
sub populateGlobalUserFilter
{
my $userFilter = shift; # $::opt_U
my ($selfSwitch, $notOtherSwitch, $otherSwitch);
if ($::opt_U =~ /(.+):NOT:(.+)/i)
{
$selfSwitch = $1;
$notOtherSwitch = $2;
}
else
{
$selfSwitch = $::opt_U;
}
$selfSwitch =~ s/\./\\\./g;
$selfSwitch .= '$'; # end ' emacs format
my $otherSwitch;
if (defined ($notOtherSwitch))
{
$notOtherSwitch =~ s/\./\\\./g; # escape for pattern
$notOtherSwitch .= '$'; # end ' emacs format
$otherSwitch = '^(?!.*' . $notOtherSwitch . '$)'; #reverse pattern
}
$::gSelfPattern = $selfSwitch;
$::gOtherPattern = $otherSwitch;
}
sub populateGlobalTimeFilter
{
my $timeFilter = shift; # $::opt_T
if ($timeFilter =~ /(\d+)\.(\d+)\.(\d+)/)
{
my ($mon, $day, $year) = ($1, $2, $3);
require 'timelocal.pl';
$year =~ s/^19(\d\d)$/$1/;
$year =~ s/^20(\d\d)$/1$1/;
$::gStartDate = timelocal(0,0,0,$day,$mon-1,$year);
$::gEndDate = $::gStartDate + 24 * 60 * 60;
}
elsif ($timeFilter =~ /^(\d+)\.\.(\d+)$/)
{
$::gStartDate = $1;
$::gEndDate = $2;
}
else
{
&showUsage("bad -T date format: $timeFilter");
}
}
#----------------------------------------------------------------
# showUsage : display a usage string, then exit.
#----------------------------------------------------------------
sub showUsage
{
my $errMsg = shift;
if ($errMsg ne "")
{
print STDERR "Usage error: $errMsg\n\n";
}
seek(DATA,0,0);
while (<DATA>)
{
if (s/^\#\-//)
{
s/\$0/$0/;
print STDERR $_ unless /^\-/;
}
}
exit ($errMsg ne "");
}
#----------------------------------------------------------------
# showVersion : print Version and exit.
#----------------------------------------------------------------
sub showVersion
{
seek(DATA,0,0);
while (<DATA>)
{
print STDERR $_ if /\s+Version:/;
}
exit(0);
}
#----------------------------------------------------------------
# d : print debugging message if -D verbose flag is on.
#----------------------------------------------------------------
sub d
{
return unless $::opt_D;
my $msg = shift;
if ($msg eq "")
{
print STDERR "found -D flag; running $0 in verbose DEBUG mode.\n";
}
else
{
print STDERR $msg, "\n";
}
}
__END__
=head1 NAME
read_mail_log.pl -- Summarizes amount of mail routed through host,
sorted by e-mail address
=head1 SYNOPSIS
# summarize mail from syslog by user-name
% read_mail_log.pl
# which, if your mail loging goes to /var/log/syslog, is equivalent to:
% read_mail_log.pl -u /var/log/syslog
# summarize mail by internet domain name from /var/log/syslog,
# suppressing parse errors
% read_mail_log.pl -q -g /var/log/syslog
# summarize mail by mail deliveries, filtering out mail which
# was not delivered on September 18 1996
% read_mail_log.pl -m -T 9.18.1996 /var/log/syslog
# cache mail deliveries to file ./syslog.cache
% read_mail_log.pl -o syslog.cache /var/log/syslog
# now read deliveries in from cache, and summarize the usage
# of all users at your domain
% read_mail_log.pl -i syslog.cache -U `hostname -d`
# now summarize the usage of all users at foo.com, not counting
# mail sent to/from bar.com
% read_mail_log.pl -i syslog.cache -U foo.com:NOT:bar.com
=head1 DESCRIPTION
Usage:
read_mail_log.pl [-ugm] [-o cache_outfile]
[-U user_filter] [-T date_filter]
[-hvqD] [-i cache_in | syslog ...]
Where:
-u : print report grouped by user (default)
-g : print report grouped by internet domain name
-m : print report of mail deliveries
-o cache_out : store mail deliveries to cache-file "cache_out"
-U user_filter : only summarize mail involving certain users
-T date_filter : only summarize mail delivered in a certain time-range
-q : quiet mode (suppress parsing errors and commentary)
-i cache_in : read in deliveries from cache-file "cache_in"
syslog ...: name of logs to scan (default is to use
log which mail.info messages currently go to)
-h : print this help message and exit
-v : print the version number and exit
-D : print debugging information
=head2 CACHES
To save time for multiple reports, you can cache the deliveries
generated from an execution of read_mail_log.pl with the C<-o> flag.
The cachefile specified may not already exist.
Subsequent executions can read in the information from the cachefile
and increase the executation rate by a factor of about 10.
=head2 FILTERS
There are two legal formats for user filters:
-U foo.com (summarizes mail foo.com users sent or delivered)
-U foo.com:NOT:bar.com (summarizes mail foo.com users sent or delivered
to users who are _not_ at bar.com)
There are two legal format for date filters:
-T 9.14.1996
-T 845251200..845337600
Both these filters will process only mail successfully delivered on
Sept. 14, 1996. The second format allows you to specify any two
bounding time_t values such as those produced by timelocal.pl.
=head2 HOW CACHES AND FILTERS INTERACT
The C<-T> date/time filter should only act upon the data as it is
generated from a syslog file. Using the C<-T> filter when reading
from a cachefile is not allowed.
The C<-U> address/user filter should only act upon the data as it is
being generated into a user or domain summary. Using the C<-U> filter
when writing to a cachefile or when generating only a list of
deliveries is not allowed.
So, these two lines are legal and will generate a summary of mail sent
and received by users at mydomain.org on 9.18.1996:
read_mail_log.pl -T 9.18.1996 -o /tmp/syslog.cache /var/log/syslog
read_mail_log.pl -u -U mydomain.org -i /tmp/syslog.cache
But neither of these lines is currently legal:
read_mail_log.pl -U mydomain.org -o /tmp/syslog.cache /var/log/syslog
read_mail_log.pl -u -T 9.18.1996 -i /tmp/syslog.cache
=head1 PERFORMANCE
Expect processing mail deliveries to take about 90 sec/megabyte of
mail log-lines. If you expect to run multiple reports, consider
cacheing your syslog with the C<-o> switch.
> ls -lL syslog.960801
-rw-r--r-- 1 rolf 30 2364752 Aug 5 18:58 syslog.960801
> time read_mail_log.pl -m -o /tmp/syslog.cache syslog.960801 > /dev/null 2>&1
184.226s real 178.220s user 1.560s system 97%
> cat big_file > /dev/null # clear out cache for performance test
> time read_mail_log.pl -m -i /tmp/syslog.cache > /dev/null 2>&1
17.801s real 14.540s user 0.530s system 84%
Summarizing mail by delivery takes up constant memory. Summarizing by
user-name takes up O(n) memory; expect roughly 1 extra megabyte of
virtual memory usage per megabyte of syslog file.
=head1 AUTHOR and COPYRIGHT
The author (Rolf Nelson) can currently be e-mailed as
rolf@usa.healthnet.org.
This code is Copyright (C) SatelLife, Inc. 1996. All rights reserved.
This code is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
In no event shall SatelLife be liable to any party for direct,
indirect, special, incidental, or consequential damages arising out of
the use of this software and its documentation (including, but not
limited to, lost profits) even if the authors have been advised of the
possibility of such damage.
=head1 SEE ALSO
L<SyslogScan::DeliveryIterator>, L<SyslogScan::Summary>,
L<SyslogScan::WhereIs>
syntax highlighted by Code2HTML, v. 0.9.1