#!/usr/local/bin/perl # flag_truncs.pl - martin hamilton # # Check the CERN/Harvest/Netscape cache for truncated objects # - i.e. those for which there is a "Content-length:" HTTP header, # and this does not match the size of the cached object # $Id: flag_truncs.pl,v 1.2 2003/01/23 00:37:06 robertc Exp $ require "getopts.pl"; require "stat.pl"; &Getopts("cd"); # -c -> just count the number of objects with a Content-length header # -d -> turn on debugging output # pass filenames on command line or via STDIN @things = $#ARGV >= 0 ? @ARGV : ; $total_objects = 0, $content_length = 0; # iterate through them foreach $thing (@things) { chop $thing; $opt_d && (print STDERR ">> inspecting: $thing\n"); next if -d "$thing"; # don't want directories $size = (stat($thing))[$ST_SIZE]||next; $opt_d && (print STDERR ">> stat: $size\n"); print "$thing\n", next if ($size == 0); $total_objects++; $count = 0, $expected = 0; open(IN, "$thing") || die "Can't open cached object $thing: $!"; while() { $count += length($_); chop; print STDERR ">> inspecting $_\n" if $opt_d; last if /^(\s+|)$/; # drop out after the end of the HTTP headers # skip if cached file appeared since script started running if (-M $_ < 0) { print STDERR ">> skipping $_\n" if $opt_d; next; } if (/^Content-length:\s+(\d+)/i) { $expected = $1; $content_length++; } } close(IN); next if $opt_c; next if $expected == 0; # no Content-length header # looked at the headers now $difference = $size - $count; $opt_d && print STDERR ">> real: ", $difference, ", expected: $expected\n"; if ($difference != $expected) { print "$thing (expected: $expected, got: $difference)\n"; } } print "$content_length out of $total_objects had Content-length: header\n" if $opt_c;