#!/usr/bin/perl # Set $WINDOWS to 1 if running on Windows $WINDOWS = 0; # $WINDOWS = 1; # Fields to include in the log file # XXX should be configurable somewhere @log_format = qw(action mid reason groups bytes binary); $MODE = 'highwind'; # load the filter code and run get_config() and some other initialization code require './cleanfeed'; # XXX # set up our signal handlers. $got_usr1 = 0; $got_usr2 = 0; $got_hup = 0; $got_term = 0; $SIG{USR1} = sub { $got_usr1 = 1 }; # use SIGUSR1 to write statfile $SIG{USR2} = sub { $got_usr2 = 1 }; # use SIGUSR2 to dump EMP history $SIG{HUP} = sub { $got_hup = 1 }; # use SIGHUP to reload configuration $SIG{TERM} = sub { $got_term = 1 }; # catch SIGTERM so we can clean up # Check for the PID file existence and create it pid_file(); mainloop(); exit 0; # main loop for standalone mode. sub mainloop { my ($head); $| = 1; # Flush STDOUT binmode STDIN if $WINDOWS; open_logfile(); my %known_headers = ( 'approved' => 'Approved', 'content-base' => 'Content-Base', 'content-disposition' => 'Content-Disposition', 'content-type' => 'Content-Type', 'control' => 'Control', 'date' => 'Date', 'distribution' => 'Distribution', 'followup-to' => 'Followup-To', 'from' => 'From', 'lines' => 'Lines', 'message-id' => 'Message-ID', 'newsgroups' => 'Newsgroups', 'nntp-posting-host' => 'NNTP-Posting-Host', 'organization' => 'Organization', 'path' => 'Path', 'references' => 'References', 'reply-to' => 'Reply-To', 'sender' => 'Sender', 'subject' => 'Subject', 'supersedes' => 'Supersedes', 'user-agent' => 'User-Agent', 'x-trace' => 'X-Trace', 'x-newsreader' => 'X-Newsreader', 'x-newsposter' => 'X-Newsposter', 'x-mailer' => 'X-Mailer', 'x-poster' => 'X-Poster', 'x-cancelled-by' => 'X-Cancelled-By', 'x-canceled-by' => 'X-Canceled-By', ); $/ = "\r\n.\r\n"; # we're slurping in the whole article at once %hdr = (); while (defined ($head = )) { $head =~ s/\r\n/\n/g; $bytes = length $head; # size of the article ($head, $hdr{__BODY__}) = split(/\n\n/, $head, 2); $head =~ s/\n\s+/ /g; # handle continuation headers # read in a line of the header and store it in a hash for (split (/\n/, $head)) { next if not /^([^ ]+): (.*)$/; my ($header, $value) = ($1, $2); my $lcheader = lc $header; $header = $known_headers{$lcheader} if exists $known_headers{$lcheader}; $hdr{$header} = $value; } my $ret = filter_art(); # the real work if ($ret) { print "435\r\n"; # rejected log_entry($ret); } else { print "335\r\n"; # accepted log_entry() if $config{log_accepts}; } %hdr = (); # reload our config if we caught SIGHUP re_configure() if $got_hup; # write stats file if we caught SIGUSR1 if ($got_usr1) { writestats(1); $got_usr1 = 0; } # terminate cleanly if we caught SIGTERM last if $got_term; # dump EMP histories if we caught SIGUSR2 if ($got_usr2) { dump_emp(); $got_usr2 = 0; } } # stdin loop # Cleanup close_logfile(); dump_emp() if $config{do_emp_dump}; unlink $config{pid_file} if $config{pid_file}; } # when running standalone, HUP brings us here. # reload config, close logfiles and repoen if we still want them. sub re_configure { $got_hup = 0; get_config(); setup_stuff(); # Close the logfile. close_logfile(); # If we still want logfiles, open again. open_logfile(); } # Create a pid file. If it already exists, complain and die. sub pid_file { return undef unless $config{pid_file}; die "Cleanfeed already running (pid file)\n" if -e $config{pid_file}; if (open(PIDFILE, ">$config{pid_file}")) { print PIDFILE "$$\n"; close PIDFILE; } else { warn "cleanfeed can't create pid file: $!\n"; } } ############################################################################## # logging ############################################################################## # Rotate the logfile sub rotate_log { # Make sure logging is actually set up. return 0 unless $config{log_directory} and $config{log_name}; # Delete files older than we want to keep opendir(DIR, $config{log_directory}) or return 1; my @newlist; while (readdir DIR) { next unless /^\Q$config{log_name}\E\.(\d+)$/; my $number = $1; if ($config{keep_old_logs} and $number >= $config{keep_old_logs} - 1) { unlink "$config{log_directory}/$config{log_name}.$number"; } else { push @newlist, $_; } } closedir DIR; # Increment all the numbers by one, then move the main logfile to .0 foreach (reverse sort extension_sort @newlist) { $newnum = $1 + 1 if /\.(\d+)$/; rename("$config{log_directory}/$_", "$config{log_directory}/$config{log_name}.$newnum"); } rename("$config{log_directory}/$config{log_name}", "$config{log_directory}/$config{log_name}.0"); # Close the logfile. close_logfile(); # Open the new logfile. open_logfile(); return 1; } sub extension_sort { my ($anum, $bnum); $anum = $1 if $a =~ /\.(\d+)$/; $bnum = $1 if $b =~ /\.(\d+)$/; return $anum <=> $bnum; } # Write a log entry. # The format for log entries is defined in @log_format. Each # element in that array is an element for the log. Elements # are tab-separated. Each element consists of the name of # the element, a colon, and the contents. # # Pass a true argument if the article is being accepted but logged anyway. # # Possible elements for log entries: # # action - 'filter' or 'accept' # localtime - current time in local timezone # gmtime - current time in GMT # time - current time in unix format # reason - reason for rejection # mid - article's message-id # groups - newsgroups the article was posted to # groupcount - how many newsgroups the article was posted to # lines - article's line count (not from the Lines header) # bytes - article size # pathtail - Path tail, second to last entry in the Path header # peer - first entry in the Path header (who sent us the article) # nntpph - NNTP-Posting-Host header # from - From header # subject - Subject header # reader - either User-Agent, X-Newsreader, X-Mailer, X-Poster # date - Date header # control - Control header # controltype - Type of control message # supersedes - Supersedes header # org - Organization header # refcount - how many ID's in References header # urls - list of urls in the body, first three only # binary - 'yes' if the article is a binary sub log_entry { return 0 unless $Do_Log; my ($reason) = @_; my $entry = ''; foreach my $item (@log_format) { my $junk; if ($item eq 'action') { $entry .= 'action:'. ($reason ? 'filter' : 'accept') ."\t"; } elsif ($item eq 'localtime') { $entry .= 'localtime:'. scalar localtime() ."\t"; } elsif ($item eq 'gmtime') { $entry .= 'gmtime:'. scalar gmtime() ."\t"; } elsif ($item eq 'time') { $entry .= "time:$now\t"; } elsif ($item eq 'reason' and $reason) { ($junk = $reason) =~ s/\t/ /g; $entry .= "reason:$junk\t"; } elsif ($item eq 'mid') { $entry .= "mid:$hdr{'Message-ID'}\t"; } elsif ($item eq 'groups') { $entry .= 'groups:'. join(',', @groups) ."\t"; } elsif ($item eq 'groupcount') { $entry .= 'groupcount:'. (scalar @groups) ."\t"; } elsif ($item eq 'lines') { $entry .= "lines:$lines\t"; } elsif ($item eq 'bytes') { $entry .= "bytes:$bytes\t"; } elsif ($item eq 'pathtail') { $junk = path_tail(); $entry .= "pathtail:$junk\t" if defined $junk; } elsif ($item eq 'peer') { $junk = what_peer(); $entry .= "peer:$junk\t" if defined $junk; } elsif ($item eq 'nntpph' and $hdr{'NNTP-Posting-Host'}) { ($junk = $hdr{'NNTP-Posting-Host'}) =~ s/\t/ /g; $entry .= "nntpph:$junk\t"; } elsif ($item eq 'from') { ($junk = $hdr{From}) =~ s/\t/ /g; $entry .= "from:$junk\t"; } elsif ($item eq 'subject' and $hdr{Subject}) { ($junk = $hdr{Subject}) =~ s/\t/ /g; $entry .= "subject:$junk\t"; } elsif ($item eq 'reader') { ($junk = ($XReader || x_reader())) =~ s/\t/ /g; $entry .= "reader:$junk\t" if $junk; } elsif ($item eq 'date') { $entry .= "date:$hdr{Date}\t"; } elsif ($item eq 'control' and $hdr{Control}) { ($junk = $hdr{Control}) =~ s/\t/ /g; $entry .= "control:$junk\t"; } elsif ($item eq 'controltype' and $hdr{Control}) { $entry .= "controltype:$1\t" if $hdr{Control} =~ /^\s*(\w)\s/; } elsif ($item eq 'supersedes' and $hdr{Supersedes}) { ($junk = $hdr{Supersedes}) =~ s/\t/ /g; $entry .= "supersedes:$junk\t"; } elsif (($item eq 'org' or $item eq 'organization') and $hdr{Organization}) { ($junk = $hdr{Organization}) =~ s/\t/ /g; $entry .= "org:$junk\t"; } elsif ($item eq 'refcount') { $entry .= 'refcount:' . reference_count() . "\t"; } elsif ($item eq 'urls') { $junk = body_urls(); $junk = [ @$junk[1..3] ] if scalar @$junk > 3; $entry .= 'urls:' . join(' ', @$junk) . "\t" if scalar @$junk > 0; } elsif ($item eq 'binary') { $entry .= "binary:yes\t" if is_binary(); } # what kind of entry? } # foreach format entry $entry =~ s/\t+$//; $logfh->print("$entry\n"); } sub open_logfile { return 0 unless $Log_File; $logfh = new IO::File unless ref $logfh eq 'IO::File'; return 1 if $logfh->opened; # logfile is already open (?) if ($logfh->open(">>$Log_File")) { $Do_Log = 1; } else { $Do_Log = 0; slog('E', "failed to open logfile $Log_File"); } return $Do_Log; } sub close_logfile { return 0 unless $Do_Log and $logfh->opened; $logfh->close; $Do_Log = 0; return 1; } # Second-to-last entry in the Path header. sub path_tail { return $1 if $hdr{Path} =~ /!([^!]+)![^!]+$/; return undef; } # First entry in the Path header (the peer who sent us this article). sub what_peer { return $1 if $hdr{Path} =~ /^([^!]+)!/; return undef; } # Extract URLs from the article body and return a reference to a # list of them. sub body_urls { $body = lc substr($hdr{__BODY__}, 0, 50000) unless defined $body; my @body_urls = ($body =~ /$fullURL/og); return \@body_urls; } # Number of entries in the References header sub reference_count { return 0 if not $hdr{References}; return scalar ($hdr{References} =~ /<[^>]*>\s*/g); } print $MODE.$fullURL.$XReader.$now.$lines; # lint food 1;