#!/usr/bin/perl -w
#
###
# Project: pflogstats
# Program: apachelogiostats.pl
# Description: Main program for extract accounting information from Apache2 logio
#
# Copyright (C) 2003-2003 by Dr. Peter Bieringer <pbieringer at aerasec dot de>
# ftp://ftp.aerasec.de/pub/linux/postfix/pflogsumm/
#
# License: GNU GPL v2
# CVS: $Id: apachelogiostats.pl,v 1.12 2005/04/26 16:02:13 peter Exp $
#
# See also following files: LICENSE, ChangeLog
###
###
# ChangeLog
# 0.01
# - copy from sqwmstats.pl and adapt it
# 0.02
# - fix bug for matching in common log lines
# - add optional (default) TCP overhead accounting (IPv6-enabled)
# 0.03
# - implement caching of IP address version (speed-up: x3)
# 0.04
# - fix layout problem on accounting > 1 GB
# 0.05
# - fix typo introduced in 0.04
# 0.06
# - fix parser to accept user with whitespaces
# 0.07
# - replace number format function call
###
# ToDo
# - timerange (set)
# - implement "format"
###
use strict;
use Getopt::Long;
use Net::IP;
## Name and version
use vars qw{$release $progName};
$release = "0.07";
$progName = "apachelogiostats.pl";
## Define global variables
# option handling
use vars qw{%options %opts};
$options{'help|h|?'} = \$opts{'help'};
$options{"version"} = \$opts{'version'};
$options{'acc_notcpoverhead'} = \$opts{'acc_notcpoverhead'};
# module hooks
use vars qw{%hooks};
# Number formats
use vars qw{%numberformat};
## Module loader
# 1st: look into current directory
push @INC, ".";
# 2nd: look into /usr/local/lib/pflogstats
push @INC, "/usr/local/lib/pflogstats";
# 3rd: look into /usr/lib/pflogstats
push @INC, "/usr/lib/pflogstats";
# General
require "pflogstats-common-support.pm";
require "pflogstats-extensions-networking.pm";
## Print options (debug)
#for my $key (keys %options) {
# print $key . "\n";
#};
#exit 0;
# Local variables
my %accounting;
$accounting{'sent'} = 0;
$accounting{'rcvd'} = 0;
my @mainhelptext;
my $p_hook;
# Time range of logdata
use vars qw{$timemin $timemax};
my ($time);
my %monthNums = qw(
Jan 0 Feb 1 Mar 2 Apr 3 May 4 Jun 5
Jul 6 Aug 7 Sep 8 Oct 9 Nov 10 Dec 11);
# Local functions prototyping
sub print_apachelogio_stats();
# Help
sub help() {
my $helpstring = "
Type: accpopimap
[--acc_notcpoverhead] Don't account estimated TCP overhead
[--debug <debug>] Debug value
| 0x0020 : display extracted data from log line
";
return $helpstring;
};
## Help function
sub print_help() {
print "$progName $release\n\n";
help();
print STDERR " Options from included modules:\n\n";
## Hook 'help'
for my $p_hook (sort keys %{$hooks{'help'}}) {
my $helpstring = &{$hooks{'help'}->{$p_hook}};
print STDERR " Options from module '" . $p_hook . "':";
print STDERR $helpstring . "\n";
};
};
## Hook 'early_begin'
for my $p_hook (keys %{$hooks{'early_begin'}}) {
&{$hooks{'early_begin'}->{$p_hook}};
};
## Get options
my $ret = GetOptions(%options);
if (! $ret ) {
print_help();
exit 1;
};
# Print help or version
if(defined($opts{'help'})) {
print_help();
exit 0;
};
if(defined($opts{'version'})) {
print "$progName $release\n";
exit 0;
};
## Hook 'checkoptions'
for $p_hook (keys %{$hooks{'checkoptions'}}) {
&{$hooks{'checkoptions'}->{$p_hook}};
};
## Hook 'beforemainloopstarts'
for $p_hook (keys %{$hooks{'beforemainloopstarts'}}) {
&{$hooks{'beforemainloopstarts'}->{$p_hook}};
};
print "DEBUG: start parsing logfile\n" if ($opts{'debug'});
## Start parsing logfile #################################################
my $skip;
my ($user, $ip_string, $rcvd, $sent, $returncode, $request, $date, $size);
my ($ip, $ps, $pr, $ip_version);
my %cache_ip_version;
while(<>) {
chomp;
$_ =~ s/^M$//g; # Remove trailing CR
$~ =~ s/^[[:space:][:cntrl:]]+$//g; # Remove spaces and ctrl chars only
next if (length($_) == 0); # skip empty lines
# Parsing web log
# Todo: Datematching!!!!
undef $user; undef $ip_string; undef $rcvd; undef $sent; undef $returncode; undef $date; undef $size;
# Logline: 1.2.3.4 - - [01/Sep/2003:00:38:27 +0200] "GET /path/to/logo.gif HTTP/1.0" 401 401 "https://smtp2.aerasec.de/webmail/cgi-bin/sqwebmail?noframes=1" "Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.3) Gecko/20030312" IN=536 OUT=698
# Get content
printf STDERR "DEBUG/apachelogio: line: " . $_ . "\n" if ( $opts{'debug'} & 0x0020 ) ;
($ip_string, $user, $date, $request, $returncode, $size) = /^([^\s]+)\s+[^\s]+\s+([^\[]+)\s+\[(.*)\]\s+\"(.*)\"\s+(\d+)\s+(\d+|\-)/i;
printf STDERR "DEBUG/apachelogio: ip='" . $ip_string . "' user='" . $user . "' date='" . $date . "' request='" . $request . "' returncode='" . $returncode . "' size='" . $size . "'\n" if ( $opts{'debug'} & 0x0020 );
# Calculate Unixtime
if (! ($date =~ /^(\d+)\/(.*)\/(\d+):(\d+):(\d+):(\d+) /)) {
print STDERR "ERROR/apachelogio: line contains no valid date: $date\n";
};
$time = timelocal( $6, $5, $4, $1, $monthNums{$2}, $3);
# Catch min/max times for late timerange display
if (! defined $timemin || ! defined $timemax ) {
# initial values
if (! defined $timemin) { $timemin = $time };
if (! defined $timemax) { $timemax = $time };
} else {
# get min/max
if ($time < $timemin) { $timemin = $time; }
elsif ($time > $timemax) { $timemax = $time; };
};
# Log line containing prefix for IN and OUT bytes from logio
if ( /\s+IN=([0-9]+).*$/ ) {
$rcvd = $1;
};
if ( /\s+OUT=([0-9]+).*$/ ) {
$sent = $1;
};
if ((! defined $sent) && (! defined $rcvd)) {
# logio values without any prefix tokens at the end of the log line
if ( /([0-9]+)\s+([0-9]+)$/ ) {
$rcvd = $1;
$sent = $2;
};
};
if ((! defined $sent) && (! defined $rcvd)) {
# use size, we still have nothing else
if ($size =~ /^[0-9]+$/) {
$sent = $size;
};
# use length of request, we still have nothing else
# Rest of HTTP header cannot be estimated
if ($request ne "-") {
# Request + CRLF + CRLF
$rcvd = length($request) + 4;
} else {
# No request, at least 2x CRLF
$rcvd = 4;
};
};
if ( ! (defined $user && defined $ip_string && defined $rcvd && defined $sent ) ) {
# not a proper accounting line
print STDERR "DEBUG/apachelogio: not a proper line\n" if ( $opts{'debug'} & 0x0010 ) ;
next;
};
$skip = 0;
# Hook "testipaddress"
for my $p_hook (keys %{$main::hooks{'testipaddress'}}) {
#printf STDERR "DEBUG/apachelogio: test address: " . $ip_string . "\n" if ($opts{'debug'} & 0x0010 ) ;
if ( &{$main::hooks{'testipaddress'}->{$p_hook}} ($ip_string, 'returnonerror') != 0 ) {
# excluded
printf STDERR "DEBUG/apachelogio: excluded from accounting\n" if ($opts{'debug'} & 0x0010 ) ;
$skip = 1;
last;
};
};
if ($skip == 0) {
$accounting{'sent'} += $sent;
$accounting{'rcvd'} += $rcvd;
if ( ! defined $opts{'acc_notcpoverhead'} ) {
if (defined $cache_ip_version{$ip_string}) {
# in cache
$ip_version = $cache_ip_version{$ip_string};
} else {
# retrieve information
undef $ip;
$ip = new Net::IP ($ip_string);
if (defined $ip) {
if (($ip->version() == 4) || ($ip->version() == 6)) {
$ip_version = $ip->version();
} else {
$ip_version = 0; # dummy, die later
};
} else {
# Hostname instead of IP address, assume IPv4
$ip_version = 4;
};
# Fill cache
$cache_ip_version{$ip_string} = $ip_version;
};
if ($ip_version == 6) {
## IPv6
# 2xSYN + 2xFIN = 4x( IPv6[40] + TCP[20] ) = 240
$accounting{'rcvd'} += 240;
# 1xSYN + 2xFIN = 3x( IPv6[40] + TCP[20] ) = 180
$accounting{'sent'} += 120;
# MTU: 1500 - IPv6[40] - TCP[20] = 1440
# Packets received
$pr = int(($rcvd + 1440 - 1) / 1440);
# Packets sent
$ps = int(($sent + 1440 - 1) / 1440);
# Assume 10% ACKs, header IPv6[40] + TCP[20] = 60
$accounting{'rcvd'} += $pr * 60 + $ps * 6;
$accounting{'send'} += $ps * 60 + $pr * 6;
} elsif ($ip_version == 4) {
## IPv4
# 2xSYN + 2xFIN = 4x( IPv4[20] + TCP[20] ) = 160
$accounting{'rcvd'} += 160;
# 1xSYN + 2xFIN = 3x( IPv4[20] + TCP[20] ) = 120
$accounting{'sent'} += 120;
# MTU: 1500 - IPv6[20] - TCP[20] = 1460
# Packets received
$pr = int(($rcvd + 1460 - 1) / 1460);
# Packets sent
$ps = int(($sent + 1460 - 1) / 1460);
# Assume 10% ACKs, header IPv4[20] + TCP[20] = 40
$accounting{'rcvd'} += $pr * 40 + $ps * 4;
$accounting{'send'} += $ps * 40 + $pr * 4;
} else {
die "Unsupported IP version: $ip->version()";
};
};
};
};
print "DEBUG/apachelogio: end parsing logfile\n" if ($opts{'debug'});
if (defined $main::opts{'printstatistics'}) {
for my $p_hook (keys %{$main::hooks{'printstatistics'}}) {
&{$main::hooks{'printstatistics'}->{$p_hook}};
};
};
&print_apachelogio_stats();
exit 0;
#### END
# statistics
sub print_apachelogio_stats() {
print_headline("Apache logio accounting statistics", "default");
if (! defined $main::opts{'acc_notcpoverhead'}) {
print "\n# Accounting data also contains following overheads:\n";
print "# + TCP overhead (partially estimated)\n" if (! defined $opts{'acc_notcpoverhead'});
};
print '='x75 . "\n";
printf "%-50s: %6s\n", "", "BytesTraffic";
print_timerange_normal();
print '-'x75 . "\n";
printf "%-50s: %11u %9s\n",
"received (requests)",
$accounting{'rcvd'},
format_number($accounting{'rcvd'});
printf "%-50s: %11u %9s\n",
"sent (data)",
$accounting{'sent'},
format_number($accounting{'sent'});
print '-'x75 . "\n";
printf "%-50s: %11u %9s\n", "Total",
$accounting{'rcvd'} + $accounting{'sent'},
format_number($accounting{'rcvd'} + $accounting{'sent'});
print '='x75 . "\n";
print "\n";
return 0;
};
syntax highlighted by Code2HTML, v. 0.9.1