#!/usr/bin/ruby
#
# A script to read an mbox of spams and perform a test on each to
# see how well the filter can score spams as they arrive. After
# each message is score it is classified as spam and the next
# is scored. This is intended to simulate normal use of the
# filter with incoming email.
#
if ARGV.length != 3
STDERR.printf("usage: incrtest filename mail_type spamprobe\n")
exit 1
end
class ScoreKeeper
def initialize
@goods = 0.to_f
@spams = 0.to_f
@classifies = 0
end
def isWrongType(score, mailtype)
if score >= 0.9
return mailtype == "good"
else
return mailtype == "spam"
end
end
def addScore(score)
if score >= 0.9
@spams += 1
else
@goods += 1
end
end
def addGood
@goods += 1
end
def addSpam
@spams += 1
end
def total
@goods + @spams
end
def percentage
100.0 * @spams / (@goods + @spams)
end
def printSummary
printf("GOOD %10d SPAM %10d TOTAL %10d PCT %10.2f (%d errors)\n", @goods, @spams, total, percentage, @classifies)
end
def processMail(baseargs, mailtype, tmpname)
score = 0
args = baseargs + " score " + tmpname
IO.popen(args, "r") do |f|
a = f.readlines
addScore(a[0].to_f)
end
if isWrongType(score, mailtype)
@classifies += 1
system(baseargs + " " + mailtype + " " + tmpname)
end
end
end
tmpname = '_temp'
filename = ARGV.shift
mailtype = ARGV.shift
baseargs = ARGV.shift
score_keeper = ScoreKeeper.new
on_blank = true
first = true
tmp = File.open(tmpname, "w")
File.open(filename).each do |line|
if on_blank and line =~ /^From\s+\S+\s+\w\w\w\s+\w\w\w\s+\d\d?\s+\d\d\s*:\s*\d\d\s*:\s*\d\d\s+\d\d\d\d/
on_blank = false
if first
first = false
else
tmp.close
score_keeper.processMail(baseargs, mailtype, tmpname)
tmp = File.open(tmpname, "w")
end
elsif line.length == 1
on_blank = true
end
tmp.print(line)
end
tmp.close
score_keeper.processMail(baseargs, mailtype, tmpname)
score_keeper.printSummary
syntax highlighted by Code2HTML, v. 0.9.1