#!/usr/local/bin/perl # ### # Project: pflogstats # Program: apachelogiostats.pl # Description: Main program for extract accounting information from Apache2 logio # # Copyright (C) 2003-2003 by Dr. Peter Bieringer # ftp://ftp.aerasec.de/pub/linux/postfix/pflogsumm/ # # License: GNU GPL v2 # CVS: $Id: apachelogiostats.pl,v 1.12 2005/04/26 16:02:13 peter Exp $ # # See also following files: LICENSE, ChangeLog ### ### # ChangeLog # 0.01 # - copy from sqwmstats.pl and adapt it # 0.02 # - fix bug for matching in common log lines # - add optional (default) TCP overhead accounting (IPv6-enabled) # 0.03 # - implement caching of IP address version (speed-up: x3) # 0.04 # - fix layout problem on accounting > 1 GB # 0.05 # - fix typo introduced in 0.04 # 0.06 # - fix parser to accept user with whitespaces # 0.07 # - replace number format function call ### # ToDo # - timerange (set) # - implement "format" ### use strict; use Getopt::Long; use Net::IP; ## Name and version use vars qw{$release $progName}; $release = "0.07"; $progName = "apachelogiostats.pl"; ## Define global variables # option handling use vars qw{%options %opts}; $options{'help|h|?'} = \$opts{'help'}; $options{"version"} = \$opts{'version'}; $options{'acc_notcpoverhead'} = \$opts{'acc_notcpoverhead'}; # module hooks use vars qw{%hooks}; # Number formats use vars qw{%numberformat}; ## Module loader # 1st: look into current directory push @INC, "."; # 2nd: look into /usr/local/lib/perl5/site_perl/5.8.8/Pflogstats push @INC, "/usr/local/lib/perl5/site_perl/5.8.8/Pflogstats"; # 3rd: look into /usr/lib/pflogstats push @INC, "/usr/lib/pflogstats"; # General require "pflogstats-common-support.pm"; require "pflogstats-extensions-networking.pm"; ## Print options (debug) #for my $key (keys %options) { # print $key . "\n"; #}; #exit 0; # Local variables my %accounting; $accounting{'sent'} = 0; $accounting{'rcvd'} = 0; my @mainhelptext; my $p_hook; # Time range of logdata use vars qw{$timemin $timemax}; my ($time); my %monthNums = qw( Jan 0 Feb 1 Mar 2 Apr 3 May 4 Jun 5 Jul 6 Aug 7 Sep 8 Oct 9 Nov 10 Dec 11); # Local functions prototyping sub print_apachelogio_stats(); # Help sub help() { my $helpstring = " Type: accpopimap [--acc_notcpoverhead] Don't account estimated TCP overhead [--debug ] Debug value | 0x0020 : display extracted data from log line "; return $helpstring; }; ## Help function sub print_help() { print "$progName $release\n\n"; help(); print STDERR " Options from included modules:\n\n"; ## Hook 'help' for my $p_hook (sort keys %{$hooks{'help'}}) { my $helpstring = &{$hooks{'help'}->{$p_hook}}; print STDERR " Options from module '" . $p_hook . "':"; print STDERR $helpstring . "\n"; }; }; ## Hook 'early_begin' for my $p_hook (keys %{$hooks{'early_begin'}}) { &{$hooks{'early_begin'}->{$p_hook}}; }; ## Get options my $ret = GetOptions(%options); if (! $ret ) { print_help(); exit 1; }; # Print help or version if(defined($opts{'help'})) { print_help(); exit 0; }; if(defined($opts{'version'})) { print "$progName $release\n"; exit 0; }; ## Hook 'checkoptions' for $p_hook (keys %{$hooks{'checkoptions'}}) { &{$hooks{'checkoptions'}->{$p_hook}}; }; ## Hook 'beforemainloopstarts' for $p_hook (keys %{$hooks{'beforemainloopstarts'}}) { &{$hooks{'beforemainloopstarts'}->{$p_hook}}; }; print "DEBUG: start parsing logfile\n" if ($opts{'debug'}); ## Start parsing logfile ################################################# my $skip; my ($user, $ip_string, $rcvd, $sent, $returncode, $request, $date, $size); my ($ip, $ps, $pr, $ip_version); my %cache_ip_version; while(<>) { chomp; $_ =~ s/^M$//g; # Remove trailing CR $~ =~ s/^[[:space:][:cntrl:]]+$//g; # Remove spaces and ctrl chars only next if (length($_) == 0); # skip empty lines # Parsing web log # Todo: Datematching!!!! undef $user; undef $ip_string; undef $rcvd; undef $sent; undef $returncode; undef $date; undef $size; # Logline: 1.2.3.4 - - [01/Sep/2003:00:38:27 +0200] "GET /path/to/logo.gif HTTP/1.0" 401 401 "https://smtp2.aerasec.de/webmail/cgi-bin/sqwebmail?noframes=1" "Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.3) Gecko/20030312" IN=536 OUT=698 # Get content printf STDERR "DEBUG/apachelogio: line: " . $_ . "\n" if ( $opts{'debug'} & 0x0020 ) ; ($ip_string, $user, $date, $request, $returncode, $size) = /^([^\s]+)\s+[^\s]+\s+([^\[]+)\s+\[(.*)\]\s+\"(.*)\"\s+(\d+)\s+(\d+|\-)/i; printf STDERR "DEBUG/apachelogio: ip='" . $ip_string . "' user='" . $user . "' date='" . $date . "' request='" . $request . "' returncode='" . $returncode . "' size='" . $size . "'\n" if ( $opts{'debug'} & 0x0020 ); # Calculate Unixtime if (! ($date =~ /^(\d+)\/(.*)\/(\d+):(\d+):(\d+):(\d+) /)) { print STDERR "ERROR/apachelogio: line contains no valid date: $date\n"; }; $time = timelocal( $6, $5, $4, $1, $monthNums{$2}, $3); # Catch min/max times for late timerange display if (! defined $timemin || ! defined $timemax ) { # initial values if (! defined $timemin) { $timemin = $time }; if (! defined $timemax) { $timemax = $time }; } else { # get min/max if ($time < $timemin) { $timemin = $time; } elsif ($time > $timemax) { $timemax = $time; }; }; # Log line containing prefix for IN and OUT bytes from logio if ( /\s+IN=([0-9]+).*$/ ) { $rcvd = $1; }; if ( /\s+OUT=([0-9]+).*$/ ) { $sent = $1; }; if ((! defined $sent) && (! defined $rcvd)) { # logio values without any prefix tokens at the end of the log line if ( /([0-9]+)\s+([0-9]+)$/ ) { $rcvd = $1; $sent = $2; }; }; if ((! defined $sent) && (! defined $rcvd)) { # use size, we still have nothing else if ($size =~ /^[0-9]+$/) { $sent = $size; }; # use length of request, we still have nothing else # Rest of HTTP header cannot be estimated if ($request ne "-") { # Request + CRLF + CRLF $rcvd = length($request) + 4; } else { # No request, at least 2x CRLF $rcvd = 4; }; }; if ( ! (defined $user && defined $ip_string && defined $rcvd && defined $sent ) ) { # not a proper accounting line print STDERR "DEBUG/apachelogio: not a proper line\n" if ( $opts{'debug'} & 0x0010 ) ; next; }; $skip = 0; # Hook "testipaddress" for my $p_hook (keys %{$main::hooks{'testipaddress'}}) { #printf STDERR "DEBUG/apachelogio: test address: " . $ip_string . "\n" if ($opts{'debug'} & 0x0010 ) ; if ( &{$main::hooks{'testipaddress'}->{$p_hook}} ($ip_string, 'returnonerror') != 0 ) { # excluded printf STDERR "DEBUG/apachelogio: excluded from accounting\n" if ($opts{'debug'} & 0x0010 ) ; $skip = 1; last; }; }; if ($skip == 0) { $accounting{'sent'} += $sent; $accounting{'rcvd'} += $rcvd; if ( ! defined $opts{'acc_notcpoverhead'} ) { if (defined $cache_ip_version{$ip_string}) { # in cache $ip_version = $cache_ip_version{$ip_string}; } else { # retrieve information undef $ip; $ip = new Net::IP ($ip_string); if (defined $ip) { if (($ip->version() == 4) || ($ip->version() == 6)) { $ip_version = $ip->version(); } else { $ip_version = 0; # dummy, die later }; } else { # Hostname instead of IP address, assume IPv4 $ip_version = 4; }; # Fill cache $cache_ip_version{$ip_string} = $ip_version; }; if ($ip_version == 6) { ## IPv6 # 2xSYN + 2xFIN = 4x( IPv6[40] + TCP[20] ) = 240 $accounting{'rcvd'} += 240; # 1xSYN + 2xFIN = 3x( IPv6[40] + TCP[20] ) = 180 $accounting{'sent'} += 120; # MTU: 1500 - IPv6[40] - TCP[20] = 1440 # Packets received $pr = int(($rcvd + 1440 - 1) / 1440); # Packets sent $ps = int(($sent + 1440 - 1) / 1440); # Assume 10% ACKs, header IPv6[40] + TCP[20] = 60 $accounting{'rcvd'} += $pr * 60 + $ps * 6; $accounting{'send'} += $ps * 60 + $pr * 6; } elsif ($ip_version == 4) { ## IPv4 # 2xSYN + 2xFIN = 4x( IPv4[20] + TCP[20] ) = 160 $accounting{'rcvd'} += 160; # 1xSYN + 2xFIN = 3x( IPv4[20] + TCP[20] ) = 120 $accounting{'sent'} += 120; # MTU: 1500 - IPv6[20] - TCP[20] = 1460 # Packets received $pr = int(($rcvd + 1460 - 1) / 1460); # Packets sent $ps = int(($sent + 1460 - 1) / 1460); # Assume 10% ACKs, header IPv4[20] + TCP[20] = 40 $accounting{'rcvd'} += $pr * 40 + $ps * 4; $accounting{'send'} += $ps * 40 + $pr * 4; } else { die "Unsupported IP version: $ip->version()"; }; }; }; }; print "DEBUG/apachelogio: end parsing logfile\n" if ($opts{'debug'}); if (defined $main::opts{'printstatistics'}) { for my $p_hook (keys %{$main::hooks{'printstatistics'}}) { &{$main::hooks{'printstatistics'}->{$p_hook}}; }; }; &print_apachelogio_stats(); exit 0; #### END # statistics sub print_apachelogio_stats() { print_headline("Apache logio accounting statistics", "default"); if (! defined $main::opts{'acc_notcpoverhead'}) { print "\n# Accounting data also contains following overheads:\n"; print "# + TCP overhead (partially estimated)\n" if (! defined $opts{'acc_notcpoverhead'}); }; print '='x75 . "\n"; printf "%-50s: %6s\n", "", "BytesTraffic"; print_timerange_normal(); print '-'x75 . "\n"; printf "%-50s: %11u %9s\n", "received (requests)", $accounting{'rcvd'}, format_number($accounting{'rcvd'}); printf "%-50s: %11u %9s\n", "sent (data)", $accounting{'sent'}, format_number($accounting{'sent'}); print '-'x75 . "\n"; printf "%-50s: %11u %9s\n", "Total", $accounting{'rcvd'} + $accounting{'sent'}, format_number($accounting{'rcvd'} + $accounting{'sent'}); print '='x75 . "\n"; print "\n"; return 0; };