#! /usr/bin/perl -w # Copyright (c) 1997 University of Cambridge. # Written by Philip Hazel for the exim mailer and placed under the GPL # (ph10@cus.cam.ac.uk, P.Hazel@ucs.cam.ac.uk). # Revised by Christoph Lameter for use with zmailer (clameter@waterf.org). # Perl script to generate statistics from one or more Zmailer log files. # Usage: zmailstats [] ... # 1997-05-18 Reworked for the Zmailer (Christoph Lameter) # # 1996-05-21: Ignore lines not starting with valid date/time, just in case # these get into a log file. # 1996-11-19: Add the -h option to control the size of the histogram, # and optionally turn it off. # Use some Perl 5 things; it should be everywhere by now. # Add the Perl -w option and rewrite so no warnings are given. # Add the -t option to control the length of the "top" listing. # Add the -ne, -nt options to turn off errors and transport # information. # Add information about length of time on queue, and -q to # control the intervals and turn it off. # Add count and percentage of delayed messages to the Received # line. # Show total number of errors. # Add count and percentage of messages with errors to Received # line. # Add information about relaying and -nr to suppress it. # 1997-02-03 Merged in some of the things Nigel Metheringham had done: # Re-worded headings # Added received histogram as well as delivered # Added local senders' league table # Added local recipients' league table # 1997-03-10 Fixed typo "destinationss" # Allow for intermediate address between final and original # when testing for relaying # Give better message when no input use integer; ################################################## # Static data # ################################################## @tab62 = (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9 0,10,11,12,13,14,15,16,17,18,19,20, # A-K 21,22,23,24,25,26,27,28,29,30,31,32, # L-W 33,34,35, 0, 0, 0, 0, 0, # X-Z 0,36,37,38,39,40,41,42,43,44,45,46, # a-k 47,48,49,50,51,52,53,54,55,56,57,58, # l-w 59,60,61); # x-z @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334); @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60, 12*60*60, 24*60*60); %month_to_num = ( Jan => "01", Feb => "02", Mar => "03", Apr => "04", May => "05", Jun => "06", Jul => "07", Aug => "08", Sep => "09", Oct => "10", Nov => "11", Dec => "12", ); ################################################## # Subroutines # ################################################## sub print_volume_rounded { my($x) = pop @_; if ($x < 10000) { printf("%6d", $x); } elsif ($x < 10000000) { printf("%4dKB", ($x + 512)/1024); } else { printf("%4dMB", ($x + 512*1024)/(1024*1024)); } } sub format_time { my($t) = pop @_; my($s) = $t % 60; $t /= 60; my($m) = $t % 60; $t /= 60; my($h) = $t % 24; $t /= 24; my($d) = $t % 7; my($w) = $t/7; my($p) = ""; $p .= "$w"."w" if $w > 0; $p .= "$d"."d" if $d > 0; $p .= "$h"."h" if $h > 0; $p .= "$m"."m" if $m > 0; $p .= "$s"."s" if $s > 0 || $p eq ""; $p; } # Given a log date/time, compute seconds since 1970 sub seconds { my($y,$mo,$d,$h,$mi,$s) = (pop @_) =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/; my($leap) = ($y % 4)? 1 : 0; $d += $days_per_month[$mo-1]; $d++ if ($leap && $mo > 2); $y -= 1970; $d += $y * 365 + ($y+1)/4; $s += 60 * ($mi + 60 * ($h + $d * 24)); $s; } # Given a message id, compute seconds sub id_seconds { my($id) = substr((pop @_), 0, 6); my($s) = 0; my(@c) = split(//, $id); while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] } $s; } # Print time on queue information sub print_queue_times { no integer; my($string,$array) = @_; $printed_one = 0; $cumulative_percent = 0; $queue_unknown += keys %arrival_time; $queue_total = $queue_more_than; for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] } my($temp) = "Time spent on the queue: $string"; printf ("%s\n%s\n\n", $temp, "-" x length($temp)); for ($i = 0; $i <= $#queue_times; $i++) { if ($$array[$i] > 0) { $percent = ($$array[$i] * 100)/$queue_total; $cumulative_percent += $percent; printf("%s %4s %6d %5.1f%% %5.1f%%\n", $printed_one? " " : "Under", &format_time($queue_times[$i]), $$array[$i], $percent, $cumulative_percent); $printed_one = 1; } } if ($queue_more_than > 0) { $percent = ($queue_more_than * 100)/$queue_total; $cumulative_percent += $percent; printf("Over %4s %6d %5.1f%% %5.1f%%\n", &format_time($queue_times[$#queue_times]), $queue_more_than, $percent, $cumulative_percent); } #printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0; print "\n"; } # Print histogram sub print_histogram { my($text) = shift; my(@interval_count) = @_; my($maxd) = 0; for ($i = 0; $i < $hist_number; $i++) { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; } $scale = int(($maxd + 25)/50); $scale = 1 if $scale == 0; if ($text eq "Deliveries") { $type = ($scale == 1)? "delivery" : "deliveries"; } else { $type = ($scale == 1)? "message" : "messages"; } my($temp) = sprintf("$text per %s (each dot is $scale $type)", ($hist_interval == 60)? "hour" : ($hist_interval == 1)? "minute" : "$hist_interval minutes"); printf("%s\n%s\n\n", $temp, "-" x length($temp)); $hour = 0; $minutes = 0; for ($i = 0; $i < $hist_number; $i++) { $c = $interval_count[$i]; # If the interval is an hour (the maximum) print the starting and # ending hours as a label. Otherwise print the starting hour and # minutes, which take up the same space. if ($hist_opt == 1) { printf("%02d-%02d", $hour, $hour + 1); $hour++; } else { if ($minutes == 0) { printf("%02d:%02d", $hour, $minutes) } else { printf(" :%02d", $minutes) } $minutes += $hist_interval; if ($minutes >= 60) { $minutes = 0; $hour++; } } printf(" %6d %s\n", $c, "." x ($c/$scale)); } print "\n"; } # Print league table sub print_league_table { my($text) = $_[0]; my($m_count) = $_[1]; my($m_data) = $_[2]; my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s"; my($temp) = "Top $name by message count"; printf ("%s\n%s\n\n", $temp, "-" x length($temp)); $count = 1; foreach $key (sort { $$m_count{$b} <=> $$m_count{$a} || $$m_data{$b} <=> $$m_data{$a} || $a cmp $b } keys %{$m_count}) { printf("%5d %8d %s\n", $$m_count{$key}, $$m_data{$key}, $key); last if $count++ >= $topcount; } $temp = "Top $name by volume"; printf ("\n%s\n%s\n\n", $temp, "-" x length($temp)); $count = 1; foreach $key (sort { $$m_data{$b} <=> $$m_data{$a} || $$m_count{$b} <=> $$m_count{$a} || $a cmp $b } keys %{$m_count}) { printf("%5d %8d %s\n", $$m_count{$key}, $$m_data{$key}, $key); last if $count++ >= $topcount; } print "\n"; } ################################################## # Main Program # ################################################## $delayed_count = 0; $relayed_unshown = 0; $show_errors = 1; $show_relay = 1; $show_transport = 1; $queue_more_than = 0; $queue_unknown = 0; $topcount = 50; $local_league_table = 1; $hist_opt = 1; $begin = "9999-99-99 99:99:99"; $end = "0000-00-00 00:00:00"; # Decode options if ($#ARGV > 0) { while (substr($ARGV[0], 0, 1) eq '-') { if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 } elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 } elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/) { if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 } } elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/) { @queue_times = split(/,/, $1); foreach $q (@queue_times) { $q = eval($q) + 0 } @queue_times = sort { $a <=> $b } @queue_times; @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0); } elsif ($ARGV[0] =~ /^\-nt$/) { $show_transport = 0 } elsif ($ARGV[0] =~ /^\-t(\d+)$/) { $topcount = $1 } elsif ($ARGV[0] =~ /^\-tnl$/) { $local_league_table = 0 } else { print "zmailstats: Unknown or malformed option $ARGV[0]\n"; print " Valid options are:\n"; print " -h histogram divisions per hour\n"; print " default is 1, 0 suppresses histogram\n"; print " -ne don't display error information\n"; print " -nr don't display relaying information\n"; print " -nr/pattern/ don't display relaying information that matches\n"; print " -nt don't display transport information\n"; print " -q list of times for queuing information\n"; print " single 0 item suppresses\n"; print " -t display top sources/destinations\n"; print " default is 50, 0 suppresses top listing\n"; print " -tnl omit local sources/destinations in top listing\n"; exit 1; } shift; } } # Initialize slots for queue times for ($i = 0; $i <= $#queue_times; $i++) { $queue_bin[$i] = 0; $remote_queue_bin[$i] = 0; } # Compute the number of slots for the histogram if ($hist_opt > 0) { if ($hist_opt > 60 || 60 % $hist_opt != 0) { print "zmailstats: -h must specify a factor of 60\n"; exit 1; } $hist_interval = 60/$hist_opt; $hist_number = (24*60)/$hist_interval; @received_interval_count = (0) x $hist_number; @delivered_interval_count = (0) x $hist_number; } # Scan the input files and collect the data $received_data_total = 0; $received_count_total = 0; $delivered_data_total = 0; $delivered_count_total = 0; while (<>) { ($x_month,$x_day,$x_time,$transport,$id,$flag,$x_address) = /^(\w\w\w)\s{1,2}(\d{1,2})\s(\d\d:\d\d:\d\d)\s\w*\s(\w*)\[\d*\]:\s([\d\w\.])*:\s(\w*)=(\<\S+\>)/; # Continue if invalid line if (!defined $flag || $transport eq "hold") { next }; # Assume 1997. This should be fixed somehow $tod = "1997-$month_to_num{$x_month}-$x_day $x_time"; $begin = $tod if $tod lt $begin; $end = $tod if $tod gt $end; if ($flag eq "from") { ($thissize) = /\ssize=([0-9]+)/; $thissize = 0 if !defined($thissize); $size{$id} = $thissize; ($user,$host) = /\srrelay=(\S+)\@(\S+)/; if (defined $host && $host ne "localhost") { if ($show_relay) # Save incoming information { # in case it becomes interesting ($ip) = /\srrelay=\S+(\s\[[^]]*\])/; # read. $ip = "" if !defined $ip; $from_host{$id} = "$host$ip"; # $from_address{$id} = $x_address; } } else { $host = "local"; if (defined $user) { $received_count_user{$user}++; $received_data_user{$user} = 0 if !defined $received_data_user{$user}; $received_data_user{$user} += $thissize; } } $received_count{$host}++; $received_data{$host} = 0 if !defined $received_data{$host}; $received_data{$host} += $thissize; $received_count_total++; $received_data_total += $thissize; $arrival_time{$id} = $tod if $#queue_times >= 0; if ($hist_opt > 0) { ($hour,$minute) = $tod =~ /^\S+ ([0-9]{2}):([0-9]{2}):/; $received_interval_count[($minute+$hour*60)/$hist_interval]++; } } elsif ($flag eq "to") { # Was this a deferring action? if ( /stat=defer/ || /stat=retry/) { if (!defined ($delayed{$id})) { $delayed_count++; $delayed{$id} = 1; } else { $delayed{$id}++; } next; } elsif (! /stat=ok/) { # Unsuccessful delivery. Note this and continue $had_error{$id} = 1 if defined ($size{$id}); if ($show_errors) { ($error) = /,\sstat=(.*)/; if (defined $error) { if ($error =~ /^error2\s/) { $error = substr($error,7); } $errors_count{$error}++; } } next; } $departure_time{$id}= $tod; $arrival_time{$id}= $begin if !defined $arrival_time{$id}; $size = $size{$id}; $size = 0 if !defined $size; ($host) = /\srelay=(\S+)/; if (defined $host) { $remote_delivered{$id} = 0 if !defined($remote_delivered{$id}); $remote_delivered{$id}++; # Determine relaying address if either only one address listed, # or two the same. If they are different, it implies a forwarding # or aliasing, which is not relaying. Note that for multi-aliased # addresses, there may be a further address between the first # and last. if ($show_relay && defined $from_host{$id}) { ($ip) = /\srelay=\S+(\s\[[^]]*\])/; $ip = "" if !defined $ip; { # $old = $x_address; # $key = "H=\L$from_host{$id}\E A=\L$from_address{$id}\E => A=\L$old\E " . # "H=\L$host\E$ip"; $key = "H=\L$from_host{$id}\E => H=\L$host\E$ip"; if (!defined $relay_pattern || $key !~ /$relay_pattern/o) { $relayed{$key} = 0 if !defined $relayed{$key}; $relayed{$key}++; } else { $relayed_unshown++ } } } } else { $host = "local"; if (($user) = $x_address =~ /^\<(\S+)@\S+\>/) { # if ($user =~ /^[\/|]/) # { # my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/; # $user = "$user $parent" if defined $parent; # } $delivered_count_user{$user}++; $delivered_data_user{$user} = 0 if !defined $delivered_data_user{$user}; $delivered_data_user{$user} += $size; } } $delivered_count{$host}++; $delivered_data{$host} = 0 if !defined $delivered_data{$host}; $delivered_data{$host} += $size; $delivered_count_total++; $delivered_data_total += $size; if ($show_transport) { $transported_data{$transport} = 0 if !defined $transported_data{$transport}; $transported_data{$transport} += $size; $transported_count{$transport}++; } if ($hist_opt > 0) { ($hour,$minute) = $tod =~ /^\S+ ([0-9]{2}):([0-9]{2}):/; $delivered_interval_count[($minute+$hour*60)/$hist_interval]++; } } } foreach (keys %arrival_time) { $id = $_; $deptime=$departure_time{$id}; if (!defined $deptime) { $deptime=$end; } if ($#queue_times >=0) { $queued = &seconds($deptime) - &seconds($arrival_time{$id}); for ($i = 0; $i <= $#queue_times; $i++) { if ($queued < $queue_times[$i]) { $queue_bin[$i]++; $remote_queue_bin[$i]++ if $remote_delivered{$id}; last; } } $queue_more_than++ if $i > $#queue_times; } # if ($show_relay) # { # delete($from_host{$id}); # delete($from_address{$id}); # } } if ($begin eq "9999-99-99 99:99:99") { print "**** No valid log lines read\n"; exit 1; } print "\nZmailer statistics from $begin to $end\n"; # Print grand totals print "\nGrand total summary"; print "\n-------------------"; print "\n At least one address"; print "\n TOTAL Volume Messages Hosts Delayed Failed"; print "\n Received "; &print_volume_rounded($received_data_total); { no integer; $failed_count = keys %had_error; printf(" %6d %4d %6d %4.1f%% %6d %4.1f%%", $received_count_total, scalar(keys %received_data), $delayed_count, ($received_count_total == 0)? 0 : ($delayed_count*100)/$received_count_total, $failed_count, ($received_count_total == 0)? 0 : ($failed_count*100)/$received_count_total); } print "\n Delivered "; &print_volume_rounded($delivered_data_total); printf(" %6d %4d\n", $delivered_count_total, scalar(keys %delivered_data)); print "\n"; # Print totals by transport if required if ($show_transport) { print "Deliveries by transport\n"; print "-----------------------"; print "\n Volume Messages"; foreach $key (sort keys %transported_data) { printf( "\n %-16s ", $key); &print_volume_rounded($transported_data{$key}); printf( " %6d", $transported_count{$key}); } print "\n\n"; } # Print the deliveries per interval as a histogram, unless configured not to. # First find the maximum in one interval and scale accordingly. if ($hist_opt > 0) { &print_histogram("Messages received", @received_interval_count); &print_histogram("Deliveries", @delivered_interval_count); } # Print times on queue if required if ($#queue_times >= 0) { &print_queue_times("all messages", \@queue_bin); &print_queue_times("messages with at least one remote delivery", \@remote_queue_bin); } # Print relay information if required if ($show_relay) { if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) { $shown = 0; $spacing = ""; print "Relayed messages\n"; print "----------------\n\n"; foreach $key (sort { $relayed{$b} <=> $relayed{$a} } keys %relayed) { $count = $relayed{$key}; $shown += $count; $key =~ s/[HA]=//g; ($one,$two) = split(/=> /, $key); # printf("%5d %s\n => %s\n", $count, $one, $two); printf("%5d %s => %s\n", $count, $one, $two); $spacing = "\n"; } print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n"; } else { print "No relayed messages\n"; print "-------------------\n"; } print "\n"; } # If the topcount is zero, print no league tables if ($topcount > 0) { &print_league_table("sending host", \%received_count, \%received_data); &print_league_table("local sender", \%received_count_user, \%received_data_user) if $local_league_table; &print_league_table("destination", \%delivered_count, \%delivered_data); &print_league_table("local destination", \%delivered_count_user, \%delivered_data_user) if $local_league_table; } # Omit error statistics if configured out if ($show_errors) { $total_errors = 0; if (scalar(keys %errors_count) != 0) { print "List of errors\n"; print "--------------\n\n"; foreach $key (sort keys %errors_count) { chop($text = $key); printf("%5d ", $errors_count{$key}); $total_errors += $errors_count{$key}; while (length($text) > 65) { ($first,$rest) = $text =~ /(.{55}\S*)\s+(.+)/; if (!$first) { printf("%s\n", $text); last; } printf("%s\n ", $first); $text = $rest; } printf("%s\n\n", $text); } } $temp = "Errors encountered: $total_errors"; printf("%s\n%s\n\n", $temp, "-" x length($temp)); } # End of zmailstats