#!/usr/local/bin/perl # # $Id: scan-log,v 1.2 2001/01/09 12:03:54 cmdjb Exp $ # # Metadata::HTTP class example # # Copyright (C) 1997-2001 Dave Beckett - http://purl.org/net/dajobe/ # # USAGE: # scan-log # # Outputs lines that look like they came from robots. # require 5.004; use Metadata::HTTP; my $md=new Metadata::HTTP; my $count=0; while($md->read(\*STDIN)) { my $is_a_robot=0; $is_a_robot=1 if $md->get('request') =~ /robots\.txt/; if (my $agent=$md->get('agent')) { $is_a_robot=1 if $agent =~ /(?:scooter|slurp|robot|crawl|spider|ultraseek|ferret)/i; next if $agent =~ /(?:link|valid|lint)/i; } next unless $is_a_robot; # print "elements: ",join(' ',$md->elements),"\n"; # for my $el ($md->elements) { # print $el, ":'", $md->get($el), "'\n"; # } print $md->format; }