#!/usr/bin/perl # # Verify the numbers from http-analyze reports # # Usage: verify ) { chop; $line++; if (/^(\S+) \S+ \S+ \[([^ ]+) [^\]]+\] "(.+)" ([^ ]+) ([^ ]+)/) { $site = $1; # the sitename $date = $2; # the local date $req = $3; # the request $resp = $4+0 if ($4 ne "-"); # the response code $size = $5+0 if ($5 ne "-"); # the request size ($mday,$mon,$year,$hour,$min,$sec) = split(/[:\/]/, $date); ($method,$url,$prot) = split(/ /, $req); if ($prot ne "" && $prot !~ /^HTTP\/1.[01]/) { print "line $line: unknown protocol: $prot\n--> $_\n\n"; $corrupt++; next; } elsif ($method ne "GET" && $method ne "HEAD" && $method ne "POST" && $method ne "PUT" && $method ne "TRACE" && $method ne "OPTIONS" && $method ne "DELETE") { print "line $line: unknown request method: $method\n--> $_\n\n"; $corrupt++; next; } } else { print "line $line: corrupted logfile entry:\n--> $_\n\n"; $corrupt++; next; } if ($method eq "HEAD") { $size = 0; # don't account for size if HEAD request } $hits++; # total hits if ($resp == 200) { $files++; $kbsent += $size; } elsif ($resp == 304) { $nomod++; } else { $other++; } if ($resp == 200 || $resp == 304) { $urls{$url}++; # count URLs } $sites{$site}++; # count sites } print "\n" if ($corrupt); print "Total hits:\t$hits\n"; print "Total files:\t$files\n"; print "Total nomod:\t$nomod\n"; print "Total other:\t$other\n"; print "Total KB sent:\t$kbsent\n"; print "Total corrupted entries:\t$corrupt\n" if ($corrupt); $cnt = 0; #print "\nList of URLs:\n"; foreach $url (sort keys(%urls)) { #print "\t$urls{$url}\t$url\n"; $cnt++; } print "Total # of unique URLs:\t$cnt\n"; $cnt = 0; #print "\nList of sites:\n"; foreach $site (sort keys(%sites)) { #print "\t$sites{$site}\t$site\n"; $cnt++; } print "Total # of unique sites:\t$cnt\n"; exit(0);