#!/bin/bash
# Script to run spamprobe on input files generated by splitmail.rb and
# summarize the results.
#DBPREFIX=hash:44:
#DBDIR=$HOME/.spamprobe
#DBDIR=$HOME/sptest2
DBPREFIX=
DBDIR=$HOME/s2test
DBARG=${DBPREFIX}${DBDIR}
ulimit -c 100000
CACHE_ARG="-s 500"
VERBOSE=""
#SPEXEC="./spamprobe.old.2"
SPEXEC="./spamprobe -cd $DBARG $CACHE_ARG"
(
#for i in normal normal-ext all-sigs graham ; do
#for i in normal all-sigs normal-5 all-sigs-5 ; do
#for i in normal normal-3 normal-5 ; do
#for i in normal normal-3 graham ; do
#for i in phrases-2 phrases-3 phrases-4 phrases-5 ; do
#for i in normal avg alt1 ; do
#for i in normal all-phrases-5 all-phrases-3-5 ; do
#for i in normal proximity-3 proximity-5 ; do
for i in normal ; do
echo `date` RUNNING TEST $i
if [ -d $DBDIR ] ; then
\rm $DBDIR/* 2>/dev/null
fi
#echo `date` Training
#cat _spam.1 | formail -n 5 -s $SPEXEC train-spam &
#cat _good.1 | formail -n 5 -s $SPEXEC train-good &
#wait
echo `date` Adding good emails
$SPEXEC -t $i good _good.1
echo `date` Adding spam emails
$SPEXEC -t $i spam _spam.1
echo Purging unique terms
$SPEXEC -cd $DBARG purge 1
echo `date` Database directory
ls -l $DBDIR/sp*
echo `date` Number of messages in database
$SPEXEC -t $i counts
#echo `date` Number of terms in database
#$SPEXEC -t $i dump | wc -l
echo `date` Verifying good emails "(looking for false positives)"
$SPEXEC $VERBOSE -t $i score _good.1 | ruby countscores.rb
#echo `date` Finding false positives
#$SPEXEC $VERBOSE -T -t $i find-spam _good.1
echo `date` Verifying spam emails
$SPEXEC $VERBOSE -t $i score _spam.1 | ruby countscores.rb
echo `date` Scoring good mails
$SPEXEC $VERBOSE -t $i score _good.2 | ruby countscores.rb
#echo `date` Finding false positives
#$SPEXEC $VERBOSE -T -t $i find-spam _good.2
echo `date` Scoring spam mails
$SPEXEC $VERBOSE -t $i score _spam.2 | ruby countscores.rb
#echo `date` Finding false negatives
#$SPEXEC $VERBOSE -T -t $i find-good _spam.2
echo
done
) 2>&1 | tee _results
syntax highlighted by Code2HTML, v. 0.9.1