# URL-PATTERNS.RC
#
# Various spammy URL patterns.
LT2=no
# Obfuscated URLs
#
# This catches those http://203948023 URLs, urls of the form
# http://anydomain@realdomain-or-IP/, and urls with embedded
# spaces and newline characters.
#
:0 B
* LEANTAG ?? no
* ! ^[^0-9a-z]*Received:(.*$)+Received:
* -1000^0
* -1100^1 (^|[^0-9a-z])http://[a-z]\.msn\.com/
* -300^1 ^[:>]
* 1100^1 (^|[^0-9a-z])(=3D)?https?://(%[0-9][0-9]?[a-z]?)(%[0-9][0-9]?[a-z]?)*
* 1100^1 (^|[^0-9a-z])(=3D)?https?://[0-9a-z][^ /]*(@|=40|\*)
* 1100^1 (^|[^0-9a-z])(=3D)?https?://([0-9a-z][-_0-9a-z]+\.)*([0-9]+;)+([0-9a-z][-_0-9a-z]+\.)*( |/|$)
* 1100^1 (@|=40)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?/
* 1100^1 (@|=40)([0-9a-z][-_0-9a-z]*\.)+[a-z][a-z][a-z]?[a-z]?/
* 1100^1 (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z\.]+(@|=40)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?([^0-9a-z\.]|$)
* 1100^1 (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z\.]+(@|=40)([0-9a-z][-_0-9a-z]*\.)+[a-z][a-z][a-z]?[a-z]?([^0-9a-z\.]|$)
* 1100^1 (^|[^0-9a-z])(=3D)?https?://[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]([^0-9a-z]|$)
* 1100^1 (^|[^0-9a-z])(=3D)?https?://[01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01]*/\?
* 1100^1 (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z]+\.[a-z][a-z]+([^0-9a-z\.]|$)
* 1100^1 ()
{
LT3=yes
LT2=yes
SBLOG="C3R-Pattern Match (${TESTNAME}) (Obfuscated URL)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
# Overlong Body Hostname
#
# A bunch of spam has ridiculously overlong body hosts. This catches that spam.
#
:0 B
* LT2 ?? no
* FIRSTBODYHOST ?? ^([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)+\.[a-z][a-z][a-z]?[a-z]?$
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (Ridiculously large message body hostname)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
:0 B
* LT2 ?? no
* SECONDBODYHOST ?? ^([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)+\.[a-z][a-z][a-z]?[a-z]?$
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (Ridiculously large message body hostname)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
:0 B
* LT2 ?? no
* THIRDBODYHOST ?? ^([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)+\.[a-z][a-z][a-z]?[a-z]?$
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (Ridiculously large message body hostname)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
:0 B
* LT2 ?? no
* FOURTHBODYHOST ?? ^([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)+\.[a-z][a-z][a-z]?[a-z]?$
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (Ridiculously large message body hostname)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
:0 B
* LT2 ?? no
* FIFTHBODYHOST ?? ^([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)+\.[a-z][a-z][a-z]?[a-z]?$
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (Ridiculously large message body hostname)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
:0 B
* LT2 ?? no
* SIXTHBODYHOST ?? ^([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)([0-9a-z][-_0-9a-z]+\.)+\.[a-z][a-z][a-z]?[a-z]?$
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (Ridiculously large message body hostname)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
# Encoded URL in A HREF Tag
#
:0 B
* LT2 ?? no
* ()
{
LT3=yes
LT2=yes
SBLOG="A1R-Pattern Match (${TESTNAME}) (Encoded URL in A HREF Tag)"
INCLUDERC=${SBDIR}/functions/loglevel.rc
}
# Encoded ASCII entities in URL
#
# Lots of spammers using this to evade SURBL and other message
# body URI patterns.
#
:0
* LT2 ?? no
* !--.*forwarded message --
* !^forwarded message:
* ! FROMEMAIL ^([0-9a-z][-_0-9a-z]+@([0-9a-z][-_0-9a-z]+\.)+amazon\.com)$
* H ?? ! ^Content-Type: text/(plain|html); charset=.?(big5|chinesebig5|euc.(jp|kr)|gb-?(1988|2312|18030)|iso-10646|iso-2022-(cn|jp|kr)|jis.C622[06]|jis.x02(01|08|12)|shift.jis|ks.c.5601-198[79]|utf.?[78]|windows.31j).?$
* -1000^0
* B ?? -1100^0 (^|[^0-9a-z]|=3D)https?://([0-9a-z][-_0-9a-z]+(ÿ|\.|[=%]2E))*\
[0-9a-z][-_0-9a-z]+[=%](20|2E|3D)
* B ?? 1100^0 (^|[^0-9a-z]|=3D)https?://([0-9a-z][-_0-9a-z]+(ÿ|\.|[=%]2E))*\
([0-9a-z]|%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)\
[-_0-9a-z]*(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)+\
([-_0-9a-z]|(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?))*([^0-9a-z]|$)
* B ?? 1100^0 (^|[^0-9a-z]|=3D)mailto:([0-9a-z][-_0-9a-z]+(ÿ|\.|[=%]2E))*\
([0-9a-z.@]|%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)\
[-_0-9a-z.@]*(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)+\
([-_0-9a-z.@]|(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?))*([^0-9a-z]|$)
* B ?? 1100^0 ()