#!/usr/bin/perl -w
# sqlgrey: a postfix greylisting policy server using an SQL backend
# based on postgrey
# Copyright 2004 (c) ETH Zurich
# Copyright 2004 (c) Lionel Bouton
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
package sqlgrey_logstats;
use strict;
use Pod::Usage;
use Getopt::Long qw(:config posix_default no_ignore_case);
use Time::Local;
use Date::Calc;
my $VERSION = "1.7.5";
# supports IPv4 and IPv6
my $ipregexp = '[\dabcdef\.:]+';
######################
# Time-related methods
my %months = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5,
"Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10, "Dec" => 11 );
sub validate_tstamp {
my $self = shift;
my $value = shift;
my ($monthname, $mday, $hour, $min, $sec);
if ($value =~ /^(\w{3}) ([\d ]\d) (\d\d):(\d\d):(\d\d)$/) {
($monthname, $mday, $hour, $min, $sec) = ($1, $2, $3, $4, $5);
} else {
$self->debug("invalid date format: $value\n");
return undef;
}
my $month = $months{$monthname};
my $year = $self->{year};
if ($month > $self->{month}) {
# yes we can compute stats across years...
$year--;
}
my $epoch_seconds = Time::Local::timelocal($sec, $min, $hour, $mday, $month, $year);
if (! $epoch_seconds) {
$self->debug("can't compute timestamp from: $value\n");
return undef;
}
if ($epoch_seconds < $self->{begin} or $epoch_seconds > $self->{end}) {
$self->debug("date out of range: $value\n");
return undef;
}
return $epoch_seconds;
}
# What was the tstamp yesterday at 00:00 ?
sub yesterday_tstamp {
# Get today 00:00:00 and deduce one day
my ($day, $month, $year) = reverse Date::Calc::Add_Delta_Days(Date::Calc::Today(), -1 );
# Adjust Date::Calc 1-12 month to 0-11
$month--;
return Time::Local::timelocal(0,0,0,$day,$month,$year);
}
# What was the tstamp today at 00:00 ?
sub today_tstamp {
# Get today 00:00:00
return Time::Local::timelocal(0, 0, 0, ((localtime())[3,4,5]));
}
# set time period
sub yesterday {
my $self = shift;
$self->{begin} = $self->yesterday_tstamp();
$self->{end} = $self->{begin} + (60 * 60 * 24);
}
sub today {
my $self = shift;
$self->{begin} = $self->today_tstamp();
$self->{end} = time();
}
sub lasthour {
my $self = shift;
my $now = time();
$self->{begin} = $now - (60 * 60);
$self->{end} = $now;
}
sub last24h {
my $self = shift;
my $now = time();
$self->{begin} = $now - (60 * 60 * 24);
$self->{end} = $now;
}
sub lastweek {
my $self = shift;
$self->{end} = $self->today_tstamp();
$self->{begin} = $self->{end} - (60 * 60 * 24 * 7);
}
##################
# Argument parsing
sub parse_args {
my $self = shift;
my %opt = ();
GetOptions(\%opt, 'help|h', 'man', 'version', 'yesterday|y', 'today|t',
'lasthour', 'last24h|d', 'lastweek|w', 'programname', 'debug',
'top-domain=i', 'top-from=i', 'top-spam=i', 'top-throttled=i',
'print-delayed')
or pod2usage(1);
if ($opt{debug}) {
$self->{debug} = 1;
}
if ($opt{help}) { pod2usage(1) }
if ($opt{man}) { pod2usage(-exitstatus => 0, -verbose => 2) }
if ($opt{version}) { print "sqlgrey-logstats.pl $VERSION\n"; exit(0) }
my $setperiod_count = 0;
if ($opt{yesterday}) {
$self->yesterday();
$setperiod_count++;
}
if ($opt{today}) {
$self->today();
$setperiod_count++;
}
if ($opt{lasthour}) {
$self->lasthour();
$setperiod_count++;
}
if ($opt{last24h}) {
$self->last24h();
$setperiod_count++;
}
if ($opt{lastweek}) {
$self->lastweek();
$setperiod_count++;
}
if ($setperiod_count > 1) {
pod2usage(1);
}
if ($opt{'top-domain'}) {
$self->{top_domain} = $opt{'top-domain'};
}
if ($opt{'top-from'}) {
$self->{top_from} = $opt{'top-from'};
}
if ($opt{'top-spam'}) {
$self->{top_spam} = $opt{'top-spam'};
}
if ($opt{'top-throttled'}) {
$self->{top_throttled} = $opt{'top-throttled'};
}
if ($opt{'print-delayed'}) {
$self->{print_delayed} = 1;
}
# compute current year and month
($self->{month}, $self->{year}) = (localtime)[4,5];
if ($opt{programname}) {
$self->{programname} = $opt{programname};
}
}
################
# percent string
sub percent {
my $portion = shift;
my $total = shift;
if ($total == 0) {
return "N/A%";
}
return sprintf ("%.2f%%", ($portion / $total) * 100);
}
# quick debug function
sub debug {
my $self = shift;
if (defined $self->{debug}) {
print shift;
}
}
sub split_date_event {
my ($self, $line) = @_;
if ($line =~
m/^(\w{3} [\d ]\d \d\d:\d\d:\d\d)\s\S+\s$self->{programname}: (\w+): (.*)$/o
) {
my $time = $self->validate_tstamp($1);
if (! defined $time) {
return (undef,undef,undef);
} else {
#$self->debug("match: $time, $2, $3\n");
return ($time, $2, $3);
}
} else {
$self->debug("not matched: $line\n");
return (undef,undef,undef);
}
}
sub parse_grey {
my ($self, $time, $event) = @_;
## old format
if ($event =~ /^domain awl match: updating ($ipregexp), (.*)$/i) {
$self->{events}++;
$self->{passed}++;
$self->{domain_awl_match}{$1}{$2}++;
$self->{domain_awl_match_count}++;
} elsif ($event =~ /^from awl match: updating ($ipregexp), (.*)$/i) {
$self->{events}++;
$self->{passed}++;
$self->{from_awl_match}{$1}{$2}++;
$self->{from_awl_match_count}++;
} elsif ($event =~ /^new: ($ipregexp), (.*) -> (.*)$/i) {
$self->{events}++;
$self->{new}{$1}++;
$self->{new_count}++;
} elsif ($event =~ /^throttling: ($ipregexp), (.*) -> (.*)$/i) {
$self->{events}++;
$self->{throttled}{$1}{$2}++;
$self->{throttled_count}++;
} elsif ($event =~ /^early reconnect: ($ipregexp), (.*) -> (.*)$/i) {
$self->{events}++;
$self->{early}{$1}++;
$self->{early_count}++;
} elsif ($event =~ /^reconnect ok: ($ipregexp), (.*) -> (.*) \((.*)\)/i) {
$self->{events}++;
$self->{passed}++;
$self->{reconnect}{$1}{$2}++;
$self->{reconnect_count}++;
## new format
} elsif ($event =~ /^domain awl match: updating ($ipregexp)\($ipregexp\), (.*)$/i) {
$self->{events}++;
$self->{passed}++;
$self->{domain_awl_match}{$1}{$2}++;
$self->{domain_awl_match_count}++;
## new format for from_awl match (deverp log)
} elsif ($event =~ /^from awl match: updating ($ipregexp)\($ipregexp\), (.*)\(.*\)$/i) {
$self->{events}++;
$self->{passed}++;
$self->{from_awl_match}{$1}{$2}++;
$self->{from_awl_match_count}++;
} elsif ($event =~ /^from awl match: updating ($ipregexp)\($ipregexp\), (.*)$/i) {
$self->{events}++;
$self->{passed}++;
$self->{from_awl_match}{$1}{$2}++;
$self->{from_awl_match_count}++;
} elsif ($event =~ /^new: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) {
$self->{events}++;
$self->{new}{$1}++;
$self->{new_count}++;
} elsif ($event =~ /^throttling: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) {
$self->{events}++;
$self->{throttled}{$1}{$2}++;
$self->{throttled_count}++;
} elsif ($event =~ /^early reconnect: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) {
$self->{events}++;
$self->{early}{$1}++;
$self->{early_count}++;
} elsif ($event =~ /^reconnect ok: ($ipregexp)\($ipregexp\), (.*) -> (.*) \((.*)\)/i) {
$self->{events}++;
$self->{passed}++;
$self->{reconnect}{$1}{$2}++;
$self->{reconnect_count}++;
} elsif ($event =~ /^domain awl: $ipregexp, .* added$/i) {
## what?
} elsif ($event =~ /^from awl: $ipregexp, .* added$/i) {
## what?
} elsif ($event =~ /^from awl: $ipregexp, .* added/i) {
## what?
} elsif ($event =~ /^domain awl: $ipregexp, .* added/i) {
## what?
} else {
$self->debug("unknown grey event at $time: $event\n");
}
}
sub parse_whitelist {
my ($self, $time, $event) = @_;
if ($event =~ /^.*, $ipregexp\(.*\) -> .*$/i) {
$self->{events}++;
$self->{passed}++;
$self->{whitelisted}++;
} else {
$self->debug("unknown whitelist event at $time: $event\n");
}
}
sub parse_spam {
my ($self, $time, $event) = @_;
if ($event =~ /^([\d\.]+): (.*) -> (.*) at (.*)$/) {
$self->{rejected_count}++;
$self->{rejected}{$1}{$2}++;
} else {
$self->debug("unknown spam event at $time: $event\n");
}
}
# TODO
sub parse_perf {
}
# distribute processing to appropriate parser
sub parse_line {
my ($self, $line) = @_;
my ($time, $type, $event) = $self->split_date_event($line);
if (! defined $time) {
return;
}
# else parse event
if ($type eq 'grey') {
$self->parse_grey($time, $event);
} elsif ($type eq 'whitelist') {
$self->parse_whitelist($time, $event);
} elsif ($type eq 'spam') {
$self->parse_spam($time, $event);
} elsif ($type eq 'perf') {
$self->parse_perf($time, $event);
} # don't care for other types
}
# format a title
sub print_title {
my $self = shift;
my $title = shift;
my $ln = length($title);
my $line = ' ' . '-' x ($ln + 2) . ' ';
print $line . "\n";
print "| $title |\n";
print $line . "\n\n";
}
# breaks down and print an hash
sub print_distribution {
my $self = shift;
my $hash_to_print = shift;
my $max_to_print = shift;
my $title = shift;
my @top;
my $idx;
my $count = 0;
foreach my $id (keys(%{$hash_to_print})) {
$count++;
my $hash;
$hash->{count} = 0;
$hash->{id} = $id;
foreach my $subval (keys(%{$hash_to_print->{$id}})) {
$hash->{count} += $hash_to_print->{$id}{$subval};
}
$top[$#top+1] = $hash;
@top = reverse sort { $a->{count} <=> $b->{count} } @top;
pop @top if (($max_to_print != -1) && ($#top >= $max_to_print));
}
if ($max_to_print != -1) {
$self->print_title("$title (top " . ($#top + 1) . ", " . ($#top + 1 - $count) . " hidden)");
} else {
$self->print_title($title);
}
for ($idx = 0; $idx <= $#top; $idx++) {
my @dtop;
foreach my $subval (keys(%{$hash_to_print->{$top[$idx]->{id}}})) {
my $hash;
$hash->{count} = $hash_to_print->{$top[$idx]->{id}}{$subval};
$hash->{domain} = $subval;
$dtop[$#dtop+1] = $hash;
@dtop = sort { $a->{count} <=> $b->{count} } @dtop;
}
@dtop = reverse @dtop;
print "$top[$idx]->{id}: $top[$idx]->{count}\n";
for (my $didx = 0; $didx <= $#dtop; $didx++) {
print " $dtop[$didx]->{domain}: $dtop[$didx]->{count}\n";
}
}
print "\n";
}
sub print_domain_awl {
my $self = shift;
$self->print_distribution($self->{domain_awl_match}, $self->{top_domain},
"Domain AWL");
}
sub print_from_awl {
my $self = shift;
$self->print_distribution($self->{from_awl_match}, $self->{top_from},
"From AWL");
}
sub print_spam {
my $self = shift;
$self->print_distribution($self->{rejected}, $self->{top_spam},
"Spam");
}
sub print_delayed {
my $self = shift;
if (! defined $self->{print_delayed}) {
return;
}
$self->print_distribution($self->{reconnect}, -1,
"Delayed");
}
sub print_throttled {
my $self = shift;
$self->print_distribution($self->{throttled}, $self->{top_throttled},
"Throttled");
}
sub print_stats {
my $self = shift;
print "##################\n" .
"## Global stats ##\n" .
"##################\n\n";
print "Events : " . $self->{events} . "\n";
print "Passed : " . $self->{passed} . "\n";
print "Early : " . $self->{early_count} . "\n";
print "Delayed : " . $self->{new_count} . "\n\n";
print "Probable SPAM : " . $self->{rejected_count} . "\n";
print "Throttled : " . $self->{throttled_count} . "\n\n";
print "###############################\n" .
"## Whitelist/AWL performance ##\n" .
"###############################\n\n";
print "Breakdown for $self->{passed} accepted messages:\n\n";
print "Whitelists : " .
percent($self->{whitelisted}, $self->{passed}) .
"\t($self->{whitelisted})\n";
print "Domain AWL : " .
percent($self->{domain_awl_match_count}, $self->{passed}) .
"\t($self->{domain_awl_match_count})\n";
print "From AWL : " .
percent($self->{from_awl_match_count}, $self->{passed}) .
"\t($self->{from_awl_match_count})\n";
print "Delayed : " .
percent($self->{reconnect_count},$self->{passed}) .
"\t($self->{reconnect_count})\n\n";
$self->print_domain_awl();
$self->print_from_awl();
$self->print_spam();
$self->print_throttled();
$self->print_delayed();
}
# create parser with no period limits
# and counters set to 0
my $parser = bless {
begin => 0,
end => (1 << 31) - 1,
programname => 'sqlgrey',
events => 0,
passed => 0,
whitelisted => 0,
rejected_count => 0,
new_count => 0,
throttled_count => 0,
early_count => 0,
domain_awl_match_count => 0,
from_awl_match_count => 0,
domain_awl_match => {},
from_awl_match => {},
rejected => {},
reconnect => {},
reconnect_count => 0,
top_domain => -1,
top_from => -1,
top_spam => -1,
top_throttled => -1,
}, 'sqlgrey_logstats';
$parser->parse_args();
while (<STDIN>) {
chomp;
$parser->parse_line($_);
}
$parser->print_stats();
__END__
=head1 NAME
sqlgrey-logstats.pl - SQLgrey log parser
=head1 SYNOPSIS
B<sqlgrey-logstats.pl> [I<options>...] < syslogfile
-h, --help display this help and exit
--man display man page
--version output version information and exit
--debug output detailed log parsing steps
-y, --yesterday compute stats for yesterday
-t, --today compute stats for today
--lasthour compute stats for last hour
-d, --lastday compute stats for last 24 hours
-w, --lastweek compute stats for last 7 days
--programname program name looked into log file
--top-from how many from AWL entries to print (default: all)
--top-domain how many domain AWL entries to print (default: all)
--top-spam how many SPAM sources to print (default: all)
--top-throttled how many throttled sources to print (default: all)
--print-delayed print delayed sources (default: don't)
=head1 DESCRIPTION
sqlgrey-logstats.pl ...
=head1 SEE ALSO
See L<http://www.greylisting.org/> for a description of what greylisting
is and L<http://www.postfix.org/SMTPD_POLICY_README.html> for a
description of how Postfix policy servers work.
=head1 COPYRIGHT
Copyright (c) 2004 by Lionel Bouton.
=head1 LICENSE
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
=head1 AUTHOR
S<Lionel Bouton E<lt>lionel-dev@bouton.nameE<gt>>
=cut
syntax highlighted by Code2HTML, v. 0.9.1