#!/usr/local/bin/perl
eval 'exec perl -S $0 "$@"'
    if 0;

###
# Project:     pflogstats
# Program:     pflogstats.pl
# Description: Main program
#
# Based on:    pflogsumm.pl - Produce summaries of Postfix/VMailer MTA in logfile -
#               Copyright (C) 1998-2002 by James S. Seymour (jseymour@LinxNet.com)
#               Release 1.0.4.
#               http://jimsun.LinxNet.com/postfix_contrib.html
#
# All not overtaken code:
#              Copyright (C) 2002-2005 by Dr. Peter Bieringer <pbieringer@aerasec.de>
#               ftp://ftp.aerasec.de/pub/linux/postfix/pflogsumm/
#
# License:     GNU GPL v2
# CVS:         $Id: pflogstats.pl,v 1.44 2005/05/02 13:39:32 peter Exp $
#
# See also following files: CREDITS ChangeLog TODO LICENSE README INSTALL
###


# Debug values
#                                    | 0x1000 : add verp_mung match token to address
#                                    | 0x2000 : show check of net for skip
#                                    | 0x0002 : show qid
#                                    | 0x0004 : show log line

# ***** Sorry for some coding confusion, this is a developer version *****

# Minium required Perl version
require 5.00503;

use strict;

# Global used Perl modules
use Time::Local;
use locale;
use Getopt::Long;


## Name and version
use vars qw{$release $progName};
our $release = "1.1.1";
our $progName = "pflogstats.pl";
our $copyright = "(P) & (C) by Dr. Peter Bieringer, AERAsec - based on pflogsumm.pl by James S. Seymour";


## Defines before module loader
# Extend module search path !!!! REVIEW before using !!!!

# 1st: look into current directory
push @INC, ".";

# 2nd: look into /usr/local/lib/perl5/site_perl/5.8.8/Pflogstats
push @INC, "/usr/local/lib/perl5/site_perl/5.8.8/Pflogstats";

# 3rd: look into /usr/lib/pflogstats
push @INC, "/usr/lib/pflogstats";

## Define global variables

# option handling
use vars qw{%options %opts %types};

# Default value
$types{'default'} = 0;

# Temp defines here
$types{'test_verp_mung'} = 0;

# module hooks
use vars qw{%hooks};

# module info
use vars qw{%moduleinfo};

# Format options
use vars qw{@opt_format %format};
# @opt_format; # from option parsing
# %format; # used format

my @opt_types;
my $isoDateTime;


# Preset options
%options = (
    "e"                  => \$opts{'e'},
    "iso_date_time"      => \$isoDateTime,
    "m"                  => \$opts{'m'},
    "uucp_mung"          => \$opts{'m'},
    "q"                  => \$opts{'q'},
    "u=i"                => \$opts{'u'},
  ## extra
    "show_users"         => \$opts{'show_users'},
);


## Module loader

# General
require "pflogstats-common-support.pm";

# Optional common modules
require "pflogstats-common-profiling.pm";

# Optional intermediate XML storage
require "pflogstats-common-intermediatexml.pm";

# Features/Enhancements
require "pflogstats-statistics-accounting.pm";
require "pflogstats-statistics-antivirus.pm";
require "pflogstats-statistics-accpopimap.pm";
require "pflogstats-statistics-uce.pm";
require "pflogstats-statistics-reject.pm";
require "pflogstats-extensions-addressmapping.pm";
require "pflogstats-extensions-verpmung.pm";
require "pflogstats-extensions-networking.pm";
require "pflogstats-statistics-spamassassin.pm";


## Hook 'early_begin'
for my $p_hook (keys %{$hooks{'early_begin'}}) {
	&{$hooks{'early_begin'}->{$p_hook}};
};

## Print options (debug)
#for my $key (keys %options) {
#	print $key . "\n";
#};
#exit 0;

## Print types (debug)
#for my $type (keys %types) {
#	print $type . "\n";
#};
#exit 0;

# Number formats
use vars qw{%numberformat};

# Time range of logdata
use vars qw{$timemin $timemax};
my ($time);


###
#
#
###
# ATTENTION: not updated:
# Usage:
#    pflogstats.pl -[eq] [-d <today|yesterday>] [-h <cnt>] [-u <cnt>]
#        [--verp_mung[=<n>]] [--verbose_msg_detail] [--iso_date_time]
#        [-m|--uucp_mung] [-i|--ignore_case] [--smtpd_stats] [--mailq]
#        [--problems_first] [--rej_add_from] [--no_bounce_detail]
#        [--no_deferral_detail] [--no_reject_detail] [file1 [filen]]
#
#    pflogstats.pl -[help|version]
#
# Options:
#
#    -e             extended (extreme? excessive?) detail - emit detailed
#                   reports.  At present, this includes only a per-message
#                   report, sorted by sender domain, then user-in-domain,
#                   then by queue i.d.
#
#                   WARNING: the data built to generate this report can
#                   quickly consume very large amounts of memory if a lot
#                   of log entries are processed!
#
#    --iso_date_time
#
#                   For summaries that contain date or time information, use
#                   ISO 8601 standard formats (CCYY-MM-DD and HH:MM), rather
#                   than "Mon DD CCYY" and "HHMM".
#
#    -m             modify (mung?) UUCP-style bang-paths
#    --uucp_mung
#
#                   This is for use when you have a mix of Internet-style
#                   domain addresses and UUCP-style bang-paths in the log.
#                   Upstream UUCP feeds sometimes mung Internet domain
#                   style address into bang-paths.  This option can
#                   sometimes undo the "damage".  For example:
#                   "somehost.dom!username@foo" (where "foo" is the next
#                   host upstream and "somehost.dom" was whence the email
#                   originated) will get converted to
#                   "foo!username@somehost.dom".  This also affects the
#                   extended detail report (-e), to help ensure that by-
#                    domain-by-name sorting is more accurate.
#
#
#    -q             quiet - don't print headings for empty reports (note:
#                   headings for warning, fatal, and "master" messages will
#                   always be printed.)
#
#
#    --verbose_msg_detail
#
#                   For the message deferral, bounce and reject summaries:
#                   display the full "reason", rather than a truncated one.
#                   Note: this can result in quite long lines in the report.
#
#    If no file(s) specified, reads from stdin.  Output is to stdout.
#
# Typical usage:
#    Produce a report of previous day's activities:
#        pflogsumm.pl -d yesterday /var/log/syslog
#    A report of prior week's activities (after logs rotated):
#        pflogsumm.pl /var/log/syslog.1
#    What's happened so far today:
#        pflogsumm.pl -d today /var/log/syslog
#
# Debug/developent options:
#   -t|--type test_verp_mung
#     Test addresses given on stdin for verp_mung rewriting
#
# Notes:
#
#    -------------------------------------------------------------
#    IMPORTANT: Pflogsumm makes no attempt to catch/parse non-
#               postfix/vmailer daemon log entries.  (I.e.: Unless
#               it has "postfix/" or "vmailer/" in the log entry,
#               it will be ignored.)
#    -------------------------------------------------------------
#
#    The "-c <cnt>" option is gone.  Use "--top <cnt>" and/or "-u <cnt>"
#    instead.
#
#    For display purposes: integer values are munged into "kilo" and
#    "mega" notation as they exceed certain values.  I chose the
#    admittedly arbitrary boundaries of 512k and 512m as the points
#    at which to do this--my thinking being 512x was the largest
#    number (of digits) that most folks can comfortably grok
#    at-a-glance.  These are "computer" "k" and "m", not 1000 and
#    1,000,000.  You can easily change all of this with some
#    constants near the beginning of the program.
#
#    "Items-per-day" reports are not generated for single-day
#    reports.  For multiple-day reports: "Items-per-hour" numbers
#    are daily averages (reflected in the report headings).
#
#    It's important that the logs are presented to pflogsumm in
#    chronological order so that message sizes are available when
#    needed.


#### Reviewed main options ####
my @mainhelptext;

# Help
push @mainhelptext, "    [--help|-h|-?]            Display help/usage message";
$options{'help|h|?'}  = \$opts{'help'};

# Type
my $temp = "    [--type|-t <type>]        Do type of statistics (more than one can be specified)\n                               Default: 'default' (pflogsumm-like-only)\n                               More examples:";
$options{"type|t=s"}  = \@opt_types;
foreach my $type (keys %types ) {
	$temp .= " '" . $type . "'";	

};
push @mainhelptext, $temp;

# Time range
push @mainhelptext, "    [--range|-r <timerange>]  Timerange of collecting data\n                               Default: all data\n                               More examples: 'yesterday' 'today'  'Mar 1'";
$options{"range|r=s"}  = \$opts{'range'};

# Table sort
push @mainhelptext, "    [--sort <sorttype>]       Sort output\n                               Default: 'alpha'\n                               More examples: 'maxmin' 'minmax'";
$options{"sort=s"}  = \$opts{'sort_type'};

# Ignore case
push @mainhelptext, "    [--ignore_case|-i]        Handle complete email address in a case-insensitive manner\n                               Default: lower-cases only the host and domain parts\n                               If used, entire email address will be lower-cased";
$options{"ignore_case|i"}  = \$opts{'i'};

# Version
push @mainhelptext, "    [--version]               Displays version of main program and modules";
$options{"version"}  = \$opts{'version'};

# Variables and constants used throughout pflogsumm
use vars qw(
    $progName
    $usageMsg
    $divByOneKAt $divByOneMegAt $oneK $oneMeg
    @monthNames %monthNums $thisYr $thisMon
    $msgCntI $msgSizeI $msgDfrsI $msgDlyAvgI $msgDlyMaxI
	%numberformat @opt_format %format
);

# Some constants used by display routines.  I arbitrarily chose to
# display in kilobytes and megabytes at the 512k and 512m boundaries,
# respectively.  Season to taste.
$divByOneKAt   = 524288;	# 512k
$divByOneMegAt = 536870912;	# 512m
$oneK          = 1024;		# 1k
$oneMeg        = 1048576;	# 1m

# Constants used throughout pflogsumm
@monthNames = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
%monthNums = qw(
    Jan  0 Feb  1 Mar  2 Apr  3 May  4 Jun  5
    Jul  6 Aug  7 Sep  8 Oct  9 Nov 10 Dec 11);
($thisMon, $thisYr) = (localtime(time()))[4,5];
$thisYr += 1900;


#
# Variables used only in main loop
#
# Per-user data
my (%recipUser, $recipUserCnt);
my (%sendgUser, $sendgUserCnt);
# Per-domain data
my (%recipDom, $recipDomCnt);	# recipient domain data
my (%sendgDom, $sendgDomCnt);	# sending domain data
# Indexes for arrays in above
$msgCntI    = 0;	# message count
$msgSizeI   = 1;	# total messages size
$msgDfrsI   = 2;	# number of defers
$msgDlyAvgI = 3;	# total of delays (used for averaging)
$msgDlyMaxI = 4;	# max delay

my (
    $cmd, $qid, $addr, $size, $relay, $status, $delay,
    $dateStr,
    %panics, %fatals, %warnings, %masterMsgs,
    %msgSizes,
    %deferred, %bounced,
    %noMsgSize, %msgDetail,
    $msgsRcvd, $msgsDlvrd, $sizeRcvd, $sizeDlvrd,
    $msgMonStr, $msgMon, $msgDay, $msgTimeStr, $msgHr, $msgMin, $msgSec,
    $msgYr,
    $revMsgDateStr, $dayCnt, %msgsPerDay,
    %rejects, $msgsRjctd,
    %rcvdMsg, $msgsFwdd, $msgsBncd,
    $msgsDfrdCnt, $msgsDfrd, %msgDfrdFlgs,
    %connTime, %smtpdPerDay, %smtpdPerDom, $smtpdConnCnt, $smtpdTotTime,
    %smtpMsgs
);
$dayCnt = $smtpdConnCnt = $smtpdTotTime = 0;

# time range of log data
my %timerange;

# Messages received and delivered per hour
my @rcvPerHr = qw(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0);
my @dlvPerHr = @rcvPerHr;
my @dfrPerHr = @rcvPerHr;	# defers per hour
my @bncPerHr = @rcvPerHr;	# bounces per hour
my @rejPerHr = @rcvPerHr;	# rejects per hour
my $lastMsgDay = 0;

# "doubly-sub-scripted array: cnt, total and max time per-hour
# Gag - some things, Perl doesn't do well :-(
my @smtpdPerHr;
$smtpdPerHr[0]  = [0,0,0]; $smtpdPerHr[1]  = [0,0,0]; $smtpdPerHr[2]  = [0,0,0];
$smtpdPerHr[3]  = [0,0,0]; $smtpdPerHr[4]  = [0,0,0]; $smtpdPerHr[5]  = [0,0,0];
$smtpdPerHr[6]  = [0,0,0]; $smtpdPerHr[7]  = [0,0,0]; $smtpdPerHr[8]  = [0,0,0];
$smtpdPerHr[9]  = [0,0,0]; $smtpdPerHr[10] = [0,0,0]; $smtpdPerHr[11] = [0,0,0];
$smtpdPerHr[12] = [0,0,0]; $smtpdPerHr[13] = [0,0,0]; $smtpdPerHr[14] = [0,0,0];
$smtpdPerHr[15] = [0,0,0]; $smtpdPerHr[16] = [0,0,0]; $smtpdPerHr[17] = [0,0,0];
$smtpdPerHr[18] = [0,0,0]; $smtpdPerHr[19] = [0,0,0]; $smtpdPerHr[20] = [0,0,0];
$smtpdPerHr[21] = [0,0,0]; $smtpdPerHr[22] = [0,0,0]; $smtpdPerHr[23] = [0,0,0];


## Main start
my $mailqCmd = "mailq";
$usageMsg =
    "usage: $progName -[eq]
       [--iso_date_time]
       [-m|--uucp_mung] [-i|--ignore_case]
       [file1 [filen]]

     usage for normal statistics: $progName

     general
      [--show_users]               Display statistics also per user

";

# Some pre-inits for convenience
$isoDateTime = 0;	# Don't use ISO date/time formats

sub print_help() {
	print "$progName $release\n";
	print $copyright . "\n\n";

	print STDERR "  Options (reviewed):\n\n";
	foreach my $line (@mainhelptext) {
		print STDERR $line . "\n\n";
	};
	print STDERR "\n";

	print STDERR "  Options from included modules (new style):\n\n";

	## Hook 'help'
	for my $p_hook (sort keys %{$hooks{'help'}}) {
		my $helpstring = &{$hooks{'help'}->{$p_hook}};
		print STDERR "    Options from module '" . $p_hook . "':";
		print STDERR $helpstring . "\n";
	};

	print STDERR "  Options (still not reviewed):\n\n";
	print STDERR $usageMsg . "\n\n";
};

if (defined $main::opts{'verbose'}) {
	print "Calling options: " . "@ARGV" . "\n";
};

my $ret = GetOptions(%options);

if (! $ret ) {
	print "Use help to see more\n";
	exit 1;
};

# Print help or version
if(defined($opts{'help'})) {
	print_help();
	exit 0;
};
if(defined($opts{'version'})) {
	print "$progName $release\n";
	print $copyright . "\n\n";
	exit 0;
};


#### Check values of given options

##  Check sort option
if (! defined $opts{'sort_type'}) {
	# Default
	$opts{'sort_type'} = "alpha";
};
if ($opts{'sort_type'} ne "alpha" && $opts{'sort_type'} ne "maxmin" && $opts{'sort_type'} ne "minmax") {
	die "ERROR: Unsupported sort type: " . $opts{'sort_type'} . "\n";
};


# internally: 0 == none, undefined == -1 == all
$opts{'h'} = -1 unless(defined($opts{'h'}));
$opts{'u'} = -1 unless(defined($opts{'u'}));



## Hook 'checkoptions'
for my $p_hook (keys %{$hooks{'checkoptions'}}) {
	&{$hooks{'checkoptions'}->{$p_hook}};
};


## Parse and check type
if ( $#opt_types < 0 ) {
	$types{'default'} = 1;
} else {
	foreach my $type ( @opt_types ) {
		print STDERR "DEBUG: Got type: " . $type . "\n" if ($opts{'debug'});
		if (defined $types{$type}) {
			print STDERR "DEBUG: valid type: " . $type . "\n" if ($opts{'debug'});
			$types{$type} = 1;
		} else {
			die "ERROR: unsupported type: " . $type . "\n";
		};
	};
};
	

$dateStr = get_datestr($opts{'range'}) if(defined($opts{'range'}));


print "\nStatistics generated by: $progName $release";

if (defined $main::opts{'verbose'}) {
	print "\n" . $copyright . "\n";
	print " Included modules:\n";

	for my $module ( sort { $moduleinfo{$a}->{'name'} cmp $moduleinfo{$b}->{'name'} } sort { $moduleinfo{$a}->{'type'} cmp $moduleinfo{$b}->{'type'} } keys %moduleinfo) {
		print "  " . $moduleinfo{$module}->{'type'} . "/" . $moduleinfo{$module}->{'name'} . ":" .  $moduleinfo{$module}->{'version'} . "\n";
	};
	print "\n";
} else {
	my $modulesum = 0;
	for my $module ( keys %moduleinfo) {
		$modulesum += $moduleinfo{$module}->{'version'};
	};
	printf " (module version sum: %0.2f)\n", $modulesum;
	print $copyright . "\n";
};


## Any data to read from stdin?
#if (-t) {
#	# Skip log file parsing
#	print "\nINFO: no data given, proceed without parsing\n";
#	goto("LABEL_end_logfileparsing");
#};

## Show timerange on log file parsing
if ( defined($opts{'range'}) ) {
	print "Statistics generated for timerange: $dateStr\n";
};
print "\n\n";

## Hook 'beforemainloopstarts'
for my $p_hook (keys %{$hooks{'beforemainloopstarts'}}) {
	&{$hooks{'beforemainloopstarts'}->{$p_hook}};
};


## Start parsing logfile #################################################
print "DEBUG: start parsing logfile\n" if ($opts{'debug'});

## Hook 'loop_beforestart'
for my $p_hook (keys %{$hooks{'loop_beforestart'}}) {
	&{$hooks{'loop_beforestart'}->{$p_hook}};
};

while(<>) {
	chomp;
	$_ =~ s/
$//g; # Remove trailing CR
	$~ =~ s/^[[:space:][:cntrl:]]+$//g; # Remove spaces and ctrl chars only

	next if (length($_) == 0); # skip empty lines

	if ( $types{'test_verp_mung'} != 0 ) {
		die "Currently not supported at the moment";

		# Debug/developing: test verp_mung code
		my $addr = lc($_);

		print "\n" . $addr . "\n";

		$opts{'verpMung'} = 1;
		#my $verpmung1 = do_verp_mung($addr);
		# print " -> " . $verpmung1 . "\n";

		$opts{'verpMung'} = 2;
		#my $verpmung2 = do_verp_mung($addr);
		# print " -> " . $verpmung2 . "\n";

		next; # No others here for testing
	};

	## Now starting here, only a maillog is valid!

	# Skip not selected date
	next if (defined($dateStr) && ! /^$dateStr/);

	# Extract date & time
	($msgMonStr, $msgDay, $msgTimeStr) = /^(...)\s+([0-9]+)\s(..:..:..)\s.*/;

	if (! defined $msgMonStr || ! defined $msgDay || ! defined $msgTimeStr) {
		warn "WARNING(skipped-line): Date&Time in logline is not valid: $_";
		next;
	};
	
	# snatch out log entry date & time
	($msgHr, $msgMin, $msgSec) = split(/:/, $msgTimeStr);
	$msgMon = $monthNums{$msgMonStr};
	$msgYr = $thisYr; --$msgYr if($msgMon > $thisMon);

	if (! defined $msgHr || ! defined $msgMin || ! defined $msgSec ) {
		warn "WARNING(skipped-line): Date&Time in logline is not valid: $_";
		next;
	};

	# Calculate Unixtime
	$time = timelocal( $msgSec, $msgMin, $msgHr, $msgDay, $msgMon, $msgYr );

	# Catch min/max times for late timerange display
	if (! defined $timemin || ! defined $timemax ) {
		# initial values
		if (! defined $timemin) { $timemin = $time };
		if (! defined $timemax) { $timemax = $time };
	} else {
		# get min/max
		if    ($time < $timemin) { $timemin = $time; }
		elsif ($time > $timemax) { $timemax = $time; };
	};

	# Hook "loglineparser"
	for my $p_hook (keys %{$hooks{'loglineparser'}}) {
		&{$hooks{'loglineparser'}->{$p_hook}} (\$time, \$_);
	};
}

## Hook 'loop_afterfinish'
for my $p_hook (keys %{$hooks{'loop_afterfinish'}}) {
	&{$hooks{'loop_afterfinish'}->{$p_hook}} (\$_);
};

print "DEBUG: end parsing logfile\n" if ($opts{'debug'});


##### end of log file parsing

if ( $types{'test_verp_mung'} != 0 ) {
	goto "LABEL_end";
};

## Hook 'register_intermediate_storage' (register global data)
$timerange{'timemin'} = $timemin;
$timerange{'timemax'} = $timemax;
for my $p_hook (keys %{$main::hooks{'register_intermediate_data'}}) {
	&{$main::hooks{'register_intermediate_data'}->{$p_hook}} ("timerange", \%timerange);
};

LABEL_end_logfileparsing:

## Hook 'intermediate_storage'
for my $p_hook (keys %{$hooks{'intermediate_storage'}}) {
	&{$hooks{'intermediate_storage'}->{$p_hook}} ();
};


## explicit disable of printing statistics
if (defined $format{'none'}) {
	goto "LABEL_end";
};

## Hook 'before_print_result'
for my $p_hook (keys %{$hooks{'before_print_result'}}) {
	&{$hooks{'before_print_result'}->{$p_hook}} ();
};

## Hook 'retrieve_intermediate_data' (retrieve global data)
for my $p_hook (keys %{$main::hooks{'retrieve_intermediate_data'}}) {
	&{$main::hooks{'retrieve_intermediate_data'}->{$p_hook}} ("timerange", \%timerange);
};

$timemin = $timerange{'timemin'};
$timemax = $timerange{'timemax'};

## Hook 'print_result'
for my $p_hook (keys %{$hooks{'print_result'}}) {
	&{$hooks{'print_result'}->{$p_hook}};
};

## Hook 'printstatistics'
if (defined $main::opts{'printstatistics'}) {
	for my $p_hook (keys %{$hooks{'print_additional_statistics'}}) {
		&{$hooks{'print_additional_statistics'}->{$p_hook}};
	};
};



LABEL_end:

## Hook 'final_end'
for my $p_hook (keys %{$hooks{'final_end'}}) {
	&{$hooks{'final_end'}->{$p_hook}};
};



##################### End of main program


# return a date string to match in log
sub get_datestr {
    my $dateOpt = $_[0];
    my ($t_mday, $t_mon);

    my $aDay = 60 * 60 * 24;

    my $time = time();
    if($dateOpt eq "yesterday") {
	$time -= $aDay;
    	($t_mday, $t_mon) = (localtime($time))[3,4];
    } elsif($dateOpt eq "today") {
    	($t_mday, $t_mon) = (localtime($time))[3,4];
    } else {
	my ($m, $d) = split / +/, $dateOpt;
	if (! defined $monthNums{$m} ) {
		die "Illegal month string in: " . $dateOpt . "\n";
	};
	if ($d < 1 || $d >31) {
		die "Illegal day in: " . $dateOpt . "\n";
	};
	$t_mday = $d;
	$t_mon = $monthNums{$m};
    }

    return sprintf("%s %2d", $monthNames[$t_mon], $t_mday);
}


syntax highlighted by Code2HTML, v. 0.9.1