You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/10/25 18:15:35 UTC
svn commit: r467701 [2/2] - in /spamassassin/branches/jm_re2c_hacks: ./
build/ build/automc/ lib/ lib/Mail/SpamAssassin/
lib/Mail/SpamAssassin/Bayes/ lib/Mail/SpamAssassin/BayesStore/
lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Message/ lib/Mail/...
Modified: spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf Wed Oct 25 09:15:31 2006
@@ -27,8 +27,551 @@
###########################################################################
+# partial messages; currently-theoretical attack
+# unsurprisingly this hits 0/0 right now.
+header FRAGMENTED_MESSAGE Content-Type =~ /\bmessage\/partial/i
+describe FRAGMENTED_MESSAGE Partial message
+tflags FRAGMENTED_MESSAGE userconf
+
+# this is also mostly-theoretical, so allow 0 hits
+header HEAD_LONG eval:check_msg_parse_flags('truncated_header')
+describe HEAD_LONG Message headers are very long
+tflags HEAD_LONG userconf
+
+###########################################################################
+
+header FROM_BLANK_NAME From =~ /(?:\s|^)"" <\S+>/i
+describe FROM_BLANK_NAME From: contains empty name
+
+###########################################################################
+# numeric address rules, these are written to avoid overlap with each other
+
+header __FROM_ENDS_IN_NUMS From:addr =~ /\D\d{8,}\@/i
+
+header FROM_STARTS_WITH_NUMS From:addr =~ /^\d{6,}\S+\@/i
+describe FROM_STARTS_WITH_NUMS From: starts with many numbers
+
+# don't match US/Canada phone numbers: 10 digits optionally preceded by a "1"
+header __FROM_ALL_NUMS From:addr =~ /^(?:\d{1,9}|[02-9]\d{10}|\d{12,})@/
+
+###########################################################################
+
+header FROM_OFFERS From:addr =~ /\@\S*offers(?![eo]n\b)/i
+describe FROM_OFFERS From address is "at something-offers"
+
+header FROM_NO_USER From =~ /(?:^\@|<\@| \@[^\)<]*$|<>)/ [if-unset: unset@unset.unset]
+describe FROM_NO_USER From: has no local-part before @ sign
+
+# also 100% valid
+header FAKED_UNDISC_RECIPS To =~ /undisclosed[_ ]*recipient(?:s[^:]|[^s])/i
+describe FAKED_UNDISC_RECIPS Faked To "Undisclosed-Recipients"
+
+header PLING_QUERY Subject =~ /\?.*!|!.*\?/
+describe PLING_QUERY Subject has exclamation mark and question mark
+
+
+header MSGID_SPAM_99X9XX99 MESSAGEID =~ /^<\d\d\d\d\d\d[a-z]\d[a-z][a-z]\d\d\$[a-z][a-z][a-z]\d\d\d\d\d\$\d\d\d\d\d\d\d\d\@/
+describe MSGID_SPAM_99X9XX99 Spam tool Message-Id: (99x9xx99 variant)
+
+header MSGID_SPAM_ALPHA_NUM MESSAGEID =~ /<[A-Z]{7}-000[0-9]{10}\@[a-z]*>/
+describe MSGID_SPAM_ALPHA_NUM Spam tool Message-Id: (alpha-numeric variant)
+
+header MSGID_SPAM_CAPS Message-ID =~ /^\s*<?[A-Z]+\@(?!(?:mailcity|whowhere)\.com)/
+describe MSGID_SPAM_CAPS Spam tool Message-Id: (caps variant)
+
+header MSGID_SPAM_LETTERS Message-Id =~ /<[a-z]{5,}\@(\S+\.)+\S+>/
+describe MSGID_SPAM_LETTERS Spam tool Message-Id: (letters variant)
+
+
+header MSGID_NO_HOST MESSAGEID =~ /\@>(?:$|\s)/m
+describe MSGID_NO_HOST Message-Id has no hostname
+
+# negative lookahead exempts this MUA from circa 1997-2000
+# X-Mailer: Microsoft Outlook Express 4.71.1712.3
+# Message-ID: <01...@andrew>
+header __MSGID_DOLLARS_OK MESSAGEID =~ /<[0-9a-f]{4,}\$[0-9a-f]{4,}\$[0-9a-f]{4,}\@\S+>/m
+header __MSGID_DOLLARS_MAYBE MESSAGEID =~ /<\w{4,}\$\w{4,}\$(?!localhost)\w{4,}\@\S+>/mi
+meta MSGID_DOLLARS_RANDOM __MSGID_DOLLARS_MAYBE && !__MSGID_DOLLARS_OK
+
+# bit of a ratware rule, but catches a bit more than just the one ratware
+header __MSGID_RANDY Message-ID =~ /<[a-z\d][a-z\d\$-]{10,29}[a-z\d]\@[a-z\d][a-z\d.]{3,12}[a-z\d]>/
+# heuristic to eliminate most good Message-ID formats
+header __MSGID_OK_HEX Message-ID =~ /\b[a-f\d]{8}\b/
+header __MSGID_OK_DIGITS Message-ID =~ /\d{10}/
+header __MSGID_OK_HOST Message-ID =~ /\@(?:\D{2,}|(?:\d{1,3}\.){3}\d{1,3})>/
+meta MSGID_RANDY (__MSGID_RANDY && !(__MSGID_OK_HEX || __MSGID_OK_DIGITS || __MSGID_OK_HOST))
+describe MSGID_RANDY Message-Id has pattern used in spam
+
+# bug 3395
+header MSGID_YAHOO_CAPS Message-ID =~ /<[A...@yahoo.com>/
+describe MSGID_YAHOO_CAPS Message-ID has ALLCAPS@yahoo.com
+
+###########################################################################
+
+header __AT_AOL_MSGID MESSAGEID =~ /\@aol\.com\b/i
+header __FROM_AOL_COM From =~ /\@aol\.com\b/i
+meta FORGED_MSGID_AOL (__AT_AOL_MSGID && !__FROM_AOL_COM)
+describe FORGED_MSGID_AOL Message-ID is forged, (aol.com)
+
+header __AT_EXCITE_MSGID MESSAGEID =~ /\@excite\.com\b/i
+header __MY_RCVD_EXCITE Received =~ /\.excite\.com\b/i
+meta FORGED_MSGID_EXCITE (__AT_EXCITE_MSGID && !__MY_RCVD_EXCITE)
+describe FORGED_MSGID_EXCITE Message-ID is forged, (excite.com)
+
+header __AT_HOTMAIL_MSGID MESSAGEID =~ /\@hotmail\.com\b/i
+header __FROM_HOTMAIL_COM From =~ /\@hotmail\.com\b/i
+meta FORGED_MSGID_HOTMAIL (__AT_HOTMAIL_MSGID && (!__FROM_HOTMAIL_COM && !__FROM_MSN_COM && !__FROM_YAHOO_COM))
+describe FORGED_MSGID_HOTMAIL Message-ID is forged, (hotmail.com)
+
+header __AT_MSN_MSGID MESSAGEID =~ /\@msn\.com\b/i
+header __FROM_MSN_COM From =~ /\@msn\.com\b/i
+meta FORGED_MSGID_MSN (__AT_MSN_MSGID && (!__FROM_MSN_COM && !__FROM_HOTMAIL_COM && !__FROM_YAHOO_COM))
+describe FORGED_MSGID_MSN Message-ID is forged, (msn.com)
+
+header __AT_YAHOO_MSGID MESSAGEID =~ /\@yahoo\.com\b/i
+header __FROM_YAHOO_COM From =~ /\@yahoo\.com\b/i
+meta FORGED_MSGID_YAHOO (__AT_YAHOO_MSGID && !__FROM_YAHOO_COM)
+describe FORGED_MSGID_YAHOO Message-ID is forged, (yahoo.com)
+
+###########################################################################
+
+header __MSGID_BEFORE_RECEIVED ALL =~ /\nMessage-Id:.*\nReceived:/si
+header __MSGID_BEFORE_OKAY Message-Id =~ /\@[a-z0-9.-]+\.(?:yahoo|wanadoo)(?:\.[a-z]{2,3}){1,2}>/
+meta MSGID_FROM_MTA_HEADER (__MSGID_BEFORE_RECEIVED && !__MSGID_BEFORE_OKAY)
+describe MSGID_FROM_MTA_HEADER Message-Id was added by a relay
+
+header MSGID_FROM_MTA_HOTMAIL Message-Id =~ /<MC\d{1,2}-F{1,2}\w{21,22}\@\S*hotmail\.com>/
+describe MSGID_FROM_MTA_HOTMAIL Message-Id was added by a hotmail.com relay
+
+header MSGID_LONG MESSAGEID =~ /<.{160,}>|<.{140,}\@|\@.{55,}>/m
+describe MSGID_LONG Message-ID is unusually long
+
+header MSGID_SHORT MESSAGEID =~ /^.{1,15}$|<.{0,4}\@/m
+describe MSGID_SHORT Message-ID is unusually short
+
+header MSGID_MULTIPLE_AT MESSAGEID =~ /<[^>]*\@[^>]*\@/
+describe MSGID_MULTIPLE_AT Message-ID contains multiple '@' characters
+
+###########################################################################
+
+header DATE_SPAMWARE_Y2K Date =~ /^[A-Z][a-z]{2}, \d\d [A-Z][a-z]{2} [0-6]\d \d\d:\d\d:\d\d [A-Z]{3}$/
+describe DATE_SPAMWARE_Y2K Date header uses unusual Y2K formatting
+
+# as noted on the dev@ list, ":60" is valid for seconds when there's a leap
+# second (12/31/2005 for instance), so let's accept that as valid. ISO 8601
+# apparently allows for it.
+# WRT the tests, remember that ok and fail are reversed -- so valid dates
+# should be "fail" and invalid dates should be "ok".
+header INVALID_DATE Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+describe INVALID_DATE Invalid Date: header (not RFC 2822)
+test INVALID_DATE fail Sat, 31 Dec 2005 23:59:60 -0500
+test INVALID_DATE fail Wed, 31 Jul 2002 16:41:57 +0200
+test INVALID_DATE ok Sat, 31 Dec 2005 24:00:00 -0500
+test INVALID_DATE ok Sat, 31 Dec 2005 23:00:00
+test INVALID_DATE ok Thurs, 31 Jul 2002 16:41:57 +0200
+
+# allow +1300, NZ timezone
+header INVALID_DATE_TZ_ABSURD Date =~ /[-+](?:1[4-9]\d\d|[2-9]\d\d\d)$/
+describe INVALID_DATE_TZ_ABSURD Invalid Date: header (timezone does not exist)
+
+header INVALID_TZ_CST ALL =~ /[+-]\d\d[30]0(?<!-0600|-0500|\+0800|\+0930|\+1030)\s+(?:\bCST\b|\(CST\))/
+describe INVALID_TZ_CST Invalid date in header (wrong CST timezone)
+
+header INVALID_TZ_EST ALL =~ /[+-]\d\d[30]0(?<!-0500|-0300|\+1000|\+1100)\s+(?:\bEST\b|\(EST\))/
+describe INVALID_TZ_EST Invalid date in header (wrong EST timezone)
+
+
+###########################################################################
+# MIME encoding with spam characteristics
+
+header __SUBJECT_NEEDS_MIME Subject =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/
+header __SUBJECT_ENCODED_QP Subject:raw =~ /=\?\S+\?Q\?/i
+header __SUBJECT_ENCODED_B64 Subject:raw =~ /=\?\S+\?B\?/i
+
+
+
+header __FROM_NEEDS_MIME From =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/
+header __FROM_ENCODED_QP From:raw =~ /=\?\S+\?Q\?/i
+header __FROM_ENCODED_B64 From:raw =~ /=\?\S+\?B\?/i
+
+
+meta FROM_EXCESS_BASE64 __FROM_ENCODED_B64 && !__FROM_NEEDS_MIME
+describe FROM_EXCESS_BASE64 From: base64 encoded unnecessarily
+
+
+###########################################################################
+# ADV tags in various languages
+
+header ENGLISH_UCE_SUBJECT Subject =~ /^[^0-9a-z]*adv(?:ert)?\b/i
+describe ENGLISH_UCE_SUBJECT Subject contains an English UCE tag
+
+# alan premselaar <al...@12inch.com>, see SpamAssassin-talk list 2003-03
+# quinlan: 2003-03-23 here are more generic Japanese iso-2022-jp codes
+# ("not yet acceptance" or "email") + "announcement"
+# FWIW, according to Peter Evans, this should be sufficient to catch the
+# UCE tag and a common attempt at evasion (using the "sue" instead of
+# "mi" Chinese character). 2006-10-12: updated by bug 4021.
+header JAPANESE_UCE_SUBJECT Subject =~ /\e\$B.*(?:L\$>5Bz|EE;R%a!<%k)(?:8x|9-)9p/
+describe JAPANESE_UCE_SUBJECT Subject contains a Japanese UCE tag
+
+# check body for "shou nin daku kou koku" UCE tag (bug 4021)
+body __JAPANESE_UCE_BODY /(?:L\$>5Bz|EE;R%a!<%k)(?:8x|9-)9p/
+
+meta JAPANESE_UCE_BODY (__ISO_2022_JP_DELIM && __JAPANESE_UCE_BODY)
+describe JAPANESE_UCE_BODY Body contains Japanese UCE tag
+
+# quinlan: "advertisement" in Russian KOI8-R
+# (no longer common, but worth noting in future)
+#header RUSSIAN_UCE_SUBJECT Subject =~ /\xf0\xe5\xea\xeb\xe0\xec\xf3/
+#describe RUSSIAN_UCE_SUBJECT Subject contains a Russian UCE tag
+
+# Korean UCE Subject: lines are usually 8-bit, but are occasionally encoded
+# with quoted-printable or base64.
+#
+# \xbc\xba\xc0\xce means "adult"
+# \xb1\xa4\xb0\xed means "advertisement"
+# \xc1\xa4\xba\xb8 means "information"
+# \xc8\xab\xba\xb8 means "publicity"
+#
+# Each two byte sequence is one Korean letter; the spaces and periods are
+# sometimes used to obscure the words. \xb1\xa4\xb0\xed is the most common
+# tag and is sometimes very obscured so we look harder.
+#
+header KOREAN_UCE_SUBJECT Subject =~ /[({[<][. ]*(?-i:\xbc\xba[. ]*\xc0\xce[. ]*)?(?-i:\xb1\xa4(?:[. ]*|[\x00-\x7f]{0,3})\xb0\xed|\xc1\xa4[. ]*\xba\xb8|\xc8\xab[. ]*\xba\xb8)[. ]*[)}\]>]/
+describe KOREAN_UCE_SUBJECT Subject: contains Korean unsolicited email tag
+
+###########################################################################
+
+# two reliable signatures
+header __DOUBLE_IP_SPAM_1 Received =~ /from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/
+header __DOUBLE_IP_SPAM_2 Received =~ /from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/
+# loose match
+header __DOUBLE_IP_LOOSE Received =~ /(?:\b(?:from|by)\b.{1,4}\b\d{1,3}[._-]\d{1,3}[._-]\d{1,3}[._-]\d{1,3}(?<!127\.0\.0\.1)\b.{0,4}){2}/i
+# spam signature
+meta RCVD_DOUBLE_IP_SPAM (__DOUBLE_IP_SPAM_1 || __DOUBLE_IP_SPAM_2)
+describe RCVD_DOUBLE_IP_SPAM Bulk email fingerprint (double IP) found
+# other matches
+meta RCVD_DOUBLE_IP_LOOSE (__DOUBLE_IP_LOOSE && !RCVD_DOUBLE_IP_SPAM)
+describe RCVD_DOUBLE_IP_LOOSE Received: by and from look like IP addresses
+
+header FORGED_TELESP_RCVD Received =~ /\.(?!br).. \(\d+-\d+-\d+-\d+\.dsl\.telesp\.net\.br /
+describe FORGED_TELESP_RCVD Contains forged hostname for a DSL IP in Brazil
+
+# forgery meta-rules: more reliable than their inputs
+meta CONFIRMED_FORGED (__FORGED_RCVD_TRAIL && (__FORGED_AOL_RCVD || __FORGED_HOTMAIL_RCVD || __FORGED_EUDORAMAIL_RCVD || FORGED_YAHOO_RCVD || __FORGED_JUNO_RCVD || FORGED_GW05_RCVD))
+describe CONFIRMED_FORGED Received headers are forged
+
+meta MULTI_FORGED ((__FORGED_AOL_RCVD + __FORGED_HOTMAIL_RCVD + __FORGED_EUDORAMAIL_RCVD + FORGED_YAHOO_RCVD + __FORGED_JUNO_RCVD + FORGED_GW05_RCVD) > 1)
+describe MULTI_FORGED Received headers indicate multiple forgeries
+
+header NONEXISTENT_CHARSET Content-Type =~ /charset=.?DEFAULT/
+describe NONEXISTENT_CHARSET Character set doesn't exist
+
+
+
+header MISSING_DATE Date =~ /^UNSET$/ [if-unset: UNSET]
+describe MISSING_DATE Missing Date: header
+
+header __HAS_SUBJECT exists:Subject
+meta MISSING_SUBJECT !__HAS_SUBJECT
+describe MISSING_SUBJECT Missing Subject: header
+
+header GAPPY_SUBJECT Subject =~ /\b(?:[a-z]([-_. =~\/:,*!\@\#\$\%\^&+;\"\'<>\\])\1{0,2}){4}/i
+describe GAPPY_SUBJECT Subject: contains G.a.p.p.y-T.e.x.t
+
+### header existence tests (description is added automatically)
+
+# X-Fix example: NTMail fixed non RFC822 compliant EMail message
+#
+# X-PMFLAGS is all caps
+#
+# Headers that seem to only be used by a single spamming software and
+# are found together in the same message:
+# 1. X-MailingID and X-ServerHost
+# 2. X-Stormpost-To and X-List-Unsubscribe
+#
+# not spammish: X-EM-Registration, X-EM-Version, X-Antiabuse, X-List-Host,
+# X-Message-Id
+# bad FP rate: Comment, Date-warning
+
+header PREVENT_NONDELIVERY exists:Prevent-NonDelivery-Report
+describe PREVENT_NONDELIVERY Message has Prevent-NonDelivery-Report header
+
+header X_IP exists:X-IP
+describe X_IP Message has X-IP header
+
+header __HAS_MIMEOLE exists:X-MimeOLE
+header __HAS_MSMAIL_PRI exists:X-MSMail-Priority
+header __HAS_SQUIRRELMAIL_IN_MAILER X-Mailer =~ /SquirrelMail\b/
+meta MISSING_MIMEOLE (__HAS_MSMAIL_PRI && !__HAS_MIMEOLE && !__HAS_SQUIRRELMAIL_IN_MAILER)
+describe MISSING_MIMEOLE Message has X-MSMail-Priority, but no X-MimeOLE
+
+header __HAS_X_MAILER exists:X-Mailer
+
+header __IS_EXCH X-MimeOLE =~ /Produced By Microsoft Exchange V/
+
+header SUBJ_AS_SEEN Subject =~ /\bAs Seen/i
+describe SUBJ_AS_SEEN Subject contains "As Seen"
+
+header SUBJ_DOLLARS Subject =~ /^\$[0-9.,]+\b/
+describe SUBJ_DOLLARS Subject starts with dollar amount
+
+
+
+
+
+
+
+header SUBJ_YOUR_DEBT Subject =~ /Your (?:Bills|Debt|Credit)/i
+describe SUBJ_YOUR_DEBT Subject contains "Your Bills" or similar
+
+header SUBJ_YOUR_FAMILY Subject =~ /Your Family/i
+describe SUBJ_YOUR_FAMILY Subject contains "Your Family"
+
+
+# the real services never HELO as 'foo.com', instead 'mail.foo.com' or
+# something like that. Note: be careful when expanding this... legit dotcom
+# HELOers include: hotmail.com, drizzle.com, lockergnome.com.
+header RCVD_FAKE_HELO_DOTCOM Received =~ /^from (?:msn|yahoo|yourwebsite|lycos|excite|cs|aol|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail|excite|caramail)\.com \(/m
+describe RCVD_FAKE_HELO_DOTCOM Received contains a faked HELO hostname
+
+header SUBJECT_DIET Subject =~ /\bLose .*(?:pounds|lbs|weight)/i
+describe SUBJECT_DIET Subject talks about losing pounds
+
+header EXTRA_MPART_TYPE Content-Type =~ /(?:\s*multipart\/)?.* type=/i
+describe EXTRA_MPART_TYPE Header has extraneous Content-type:...type= entry
+
+header TO_RECIP_MARKER To =~ /\#recipient\#/
+describe TO_RECIP_MARKER To header contains 'recipient' marker
+
+# MIME boundary tests; spam tools use distinctive patterns.
+header MIME_BOUND_DD_DIGITS Content-Type =~ /boundary=\"--\d+\"/
+describe MIME_BOUND_DD_DIGITS Spam tool pattern in MIME boundary
+header MIME_BOUND_DIGITS_7 Content-Type =~ /boundary=\d{9}\.\d{13}/
+describe MIME_BOUND_DIGITS_7 Spam tool pattern in MIME boundary
+header MIME_BOUND_DIGITS_15 Content-Type =~ /boundary=\"\d{15,}\"/
+describe MIME_BOUND_DIGITS_15 Spam tool pattern in MIME boundary
+header MIME_BOUND_MANY_HEX Content-Type =~ /boundary="[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}"/
+describe MIME_BOUND_MANY_HEX Spam tool pattern in MIME boundary
+header __NEXTPART_ALL Content-Type =~ /NextPart/
+header __NEXTPART_NORMAL Content-Type =~ /="(?:----_?=_)?NextPart_[\dA-F]{3}(_[\dA-F]{3,8})?_[\dA-F]{8}\.[\dA-F]{8}"/
+meta MIME_BOUND_NEXTPART (__NEXTPART_ALL && !__NEXTPART_NORMAL)
+describe MIME_BOUND_NEXTPART Spam tool pattern in MIME boundary
+
+# note: the first alternation is anchored for speed
+header TO_MALFORMED To !~ /(?:^|[^\S"])(?:(?:\"[^\"]+\"|\S+)\@\S+\.\S+|^\s*.+:\s*;|^\s*\"[^\"]+\":\s*;|^\s*\([^\)]*\)\s*$|<\S+(?:\!\S+){1,}>|^\s*$)/ [if-unset: unset@unset.unset]
+describe TO_MALFORMED To: has a malformed address
+
+header __CD exists:Content-Disposition
+header __CT exists:Content-Type
+header __CTE exists:Content-Transfer-Encoding
+header __MIME_VERSION exists:MIME-Version
+header __CT_TEXT_PLAIN Content-Type =~ /^text\/plain\b/i
+meta MIME_HEADER_CTYPE_ONLY (!__CD && !__CTE && __CT && !__MIME_VERSION && !__CT_TEXT_PLAIN)
+describe MIME_HEADER_CTYPE_ONLY 'Content-Type' found without required MIME headers
+
+header WITH_LC_SMTP Received =~ /\swith\ssmtp;\s/
+describe WITH_LC_SMTP Received line contains spam-sign (lowercase smtp)
+
+
+header SUBJ_BUY Subject =~ /^buy/i
+describe SUBJ_BUY Subject line starts with Buy or Buying
+
+# seems to be ratware
+header RCVD_AM_PM Received =~ /; [A-Z][a-z][a-z], \d{1,2} \d{4} \d{1,2}:\d\d:\d\d [AP]M [+-]\d{4}/
+describe RCVD_AM_PM Received headers forged (AM/PM)
+
+header __USER_AGENT_MSN X-Mailer =~ /^MSN Explorer /
+
+# host no longer exists according to administrator
+header FAKE_OUTBLAZE_RCVD Received =~ /\.mr\.outblaze\.com/
+describe FAKE_OUTBLAZE_RCVD Received header contains faked 'mr.outblaze.com'
+
+header SUBJ_2_NUM_PARENS Subject =~ /^\(\d+\).*\(\d+\)\s*$/
+describe SUBJ_2_NUM_PARENS Subject contains common spam sign (2 numbers)
+
+# thanks to David Ritz for passing this on; ready for post-3.0.0
+header UNCLOSED_BRACKET ALL =~ /\[\d+\r?\n/s
+describe UNCLOSED_BRACKET Headers contain an unclosed bracket
+
+header FROM_DOMAIN_NOVOWEL From =~ /\@\S*[bcdfghjklmnpqrstvwxz]{7}/i
+describe FROM_DOMAIN_NOVOWEL From: domain has series of non-vowel letters
+
+header FROM_LOCAL_NOVOWEL From =~ /[bcdfghjklmnpqrstvwxz]{7}\S*\@/i
+describe FROM_LOCAL_NOVOWEL From: localpart has series of non-vowel letters
+
+header FROM_LOCAL_HEX From =~ /[0-9a-f]{11}\S*\@/i
+describe FROM_LOCAL_HEX From: localpart has long hexadecimal sequence
+
+header FROM_LOCAL_DIGITS From =~ /\d{11}\S*\@/i
+describe FROM_LOCAL_DIGITS From: localpart has long digit sequence
+
+header __TOCC_EXISTS exists:ToCc
+
+header X_PRIORITY_CC ALL =~ /\nX-Priority:[^\n]{0,80}\nCc:/si
+describe X_PRIORITY_CC Cc: after X-Priority: (bulk email fingerprint)
+
+# catch non-RFC2047 compliant messages
+# Apple Mail has a bug where headers will have whitespace around the encoded
+# text, so try to ignore that
+header BAD_ENC_HEADER ALL =~ /=\?[^?\s]+\?[^?\s]\?\s*[^?]+\s(?!\?=)/
+describe BAD_ENC_HEADER Message has bad MIME encoding in the header
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::HeaderEval
+
+header __FORGED_AOL_RCVD eval:check_for_fake_aol_relay_in_rcvd()
+
+header CHARSET_FARAWAY_HEADER eval:check_for_faraway_charset_in_headers()
+describe CHARSET_FARAWAY_HEADER A foreign language charset used in headers
+tflags CHARSET_FARAWAY_HEADER userconf
+
+ ###################################################################
+
+# illegal characters that should be MIME encoded
+# might want to exempt users using languages that don't use Latin
+# alphabets, but do it in the eval
+
+header SUBJ_ILLEGAL_CHARS eval:check_illegal_chars('Subject','0.00','2')
+describe SUBJ_ILLEGAL_CHARS Subject: has too many raw illegal characters
+
+header FROM_ILLEGAL_CHARS eval:check_illegal_chars('From','0.20','2')
+describe FROM_ILLEGAL_CHARS From: has too many raw illegal characters
+
+header HEAD_ILLEGAL_CHARS eval:check_illegal_chars('ALL','0.010','2')
+describe HEAD_ILLEGAL_CHARS Headers have too many raw illegal characters
+
+ ###################################################################
+
+# a forged Hotmail message; host HELO'd as hotmail.com, but it wasn't
+header __FORGED_HOTMAIL_RCVD eval:check_for_forged_hotmail_received_headers()
+
+# this, by comparison is more common: from was @hotmail.com, but it wasn't
+header FORGED_HOTMAIL_RCVD2 eval:check_for_no_hotmail_received_headers()
+describe FORGED_HOTMAIL_RCVD2 hotmail.com 'From' address, but no 'Received:'
+
+header __FORGED_EUDORAMAIL_RCVD eval:check_for_forged_eudoramail_received_headers()
+
+header FORGED_YAHOO_RCVD eval:check_for_forged_yahoo_received_headers()
+describe FORGED_YAHOO_RCVD 'From' yahoo.com does not match 'Received' headers
+
+header __FORGED_JUNO_RCVD eval:check_for_forged_juno_received_headers()
+
+header FORGED_GW05_RCVD eval:check_for_forged_gw05_received_headers()
+describe FORGED_GW05_RCVD Forged 'by gw05' 'Received:' header found
+
+
+header SORTED_RECIPS eval:sorted_recipients()
+describe SORTED_RECIPS Recipient list is sorted by address
+
+header SUSPICIOUS_RECIPS eval:similar_recipients('0.65','undef')
+describe SUSPICIOUS_RECIPS Similar addresses in recipient list
+
+# this is a quite common false positive, as it's legal to remove a To but leave
+# a CC. so don't score it high.
+header MISSING_HEADERS eval:check_for_missing_to_header()
+describe MISSING_HEADERS Missing To: header
+
+# this variant is local, using the Received hdr itself...
+header ROUND_THE_WORLD_LOCAL eval:check_for_round_the_world_received_helo()
+describe ROUND_THE_WORLD_LOCAL Received: says mail sent around the world (HELO)
+
+header DATE_IN_PAST_03_06 eval:check_for_shifted_date('-6', '-3')
+describe DATE_IN_PAST_03_06 Date: is 3 to 6 hours before Received: date
+
+header DATE_IN_PAST_06_12 eval:check_for_shifted_date('-12', '-6')
+describe DATE_IN_PAST_06_12 Date: is 6 to 12 hours before Received: date
+
+header DATE_IN_PAST_12_24 eval:check_for_shifted_date('-24', '-12')
+describe DATE_IN_PAST_12_24 Date: is 12 to 24 hours before Received: date
+
+header DATE_IN_PAST_24_48 eval:check_for_shifted_date('-48', '-24')
+describe DATE_IN_PAST_24_48 Date: is 24 to 48 hours before Received: date
+
+
+header DATE_IN_PAST_96_XX eval:check_for_shifted_date('undef', '-96')
+describe DATE_IN_PAST_96_XX Date: is 96 hours or more before Received: date
+
+header DATE_IN_FUTURE_03_06 eval:check_for_shifted_date('3', '6')
+describe DATE_IN_FUTURE_03_06 Date: is 3 to 6 hours after Received: date
+
+header DATE_IN_FUTURE_06_12 eval:check_for_shifted_date('6', '12')
+describe DATE_IN_FUTURE_06_12 Date: is 6 to 12 hours after Received: date
+
+header DATE_IN_FUTURE_12_24 eval:check_for_shifted_date('12', '24')
+describe DATE_IN_FUTURE_12_24 Date: is 12 to 24 hours after Received: date
+
+header DATE_IN_FUTURE_24_48 eval:check_for_shifted_date('24', '48')
+describe DATE_IN_FUTURE_24_48 Date: is 24 to 48 hours after Received: date
+
+header DATE_IN_FUTURE_48_96 eval:check_for_shifted_date('48', '96')
+describe DATE_IN_FUTURE_48_96 Date: is 48 to 96 hours after Received: date
+
+header DATE_IN_FUTURE_96_XX eval:check_for_shifted_date('96', 'undef')
+describe DATE_IN_FUTURE_96_XX Date: is 96 hours or more after Received: date
+
+header UNRESOLVED_TEMPLATE eval:check_unresolved_template()
+describe UNRESOLVED_TEMPLATE Headers contain an unresolved template
+
+header SUBJ_ALL_CAPS eval:subject_is_all_caps()
+describe SUBJ_ALL_CAPS Subject is all capitals
+
+
+header LOCALPART_IN_SUBJECT eval:check_for_to_in_subject('user')
+describe LOCALPART_IN_SUBJECT Local part of To: address appears in Subject
+
+header MSGID_OUTLOOK_INVALID eval:check_outlook_message_id()
+describe MSGID_OUTLOOK_INVALID Message-Id is fake (in Outlook Express format)
+
+header HEADER_COUNT_CTYPE eval:check_header_count_range('Content-Type','2','999')
+describe HEADER_COUNT_CTYPE Multiple Content-Type headers found
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::MIMEEval
+
+header MISSING_HB_SEP eval:check_msg_parse_flags('missing_head_body_separator')
+describe MISSING_HB_SEP Missing blank line between message header and body
+tflags MISSING_HB_SEP userconf
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::RelayEval
+
+header UNPARSEABLE_RELAY eval:check_relays_unparseable()
+tflags UNPARSEABLE_RELAY userconf
+describe UNPARSEABLE_RELAY Informational: message has unparseable relay lines
+
+
+
+header RCVD_HELO_IP_MISMATCH eval:helo_ip_mismatch()
+describe RCVD_HELO_IP_MISMATCH Received: HELO and IP do not match, but should
+
+header RCVD_NUMERIC_HELO eval:check_for_numeric_helo()
+describe RCVD_NUMERIC_HELO Received: contains an IP address used for HELO
+
+header RCVD_ILLEGAL_IP eval:check_for_illegal_ip()
+describe RCVD_ILLEGAL_IP Received: contains illegal IP address
+
+# not used directly right now due to FPs; but CONFIRMED_FORGED turns it
+# into a 1.0 S/O rule anyway, so that's not a problem ;)
+# 2.626 3.6340 1.5251 0.704 0.34 1.44 FORGED_RCVD_TRAIL
+# 0.956 3.3890 0.0000 1.000 0.98 4.30 CONFIRMED_FORGED
+header __FORGED_RCVD_TRAIL eval:check_for_forged_received_trail()
+
+header NO_RDNS_DOTCOM_HELO eval:check_for_no_rdns_dotcom_helo()
+describe NO_RDNS_DOTCOM_HELO Host HELO'd as a big ISP, but had no rDNS
+
+endif
+
ifplugin Mail::SpamAssassin::Plugin::HeaderEval
header __ENV_AND_HDR_FROM_MATCH eval:check_for_matching_env_and_hdr_from()
endif
+
Modified: spamassassin/branches/jm_re2c_hacks/rules/20_html_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/20_html_tests.cf?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/20_html_tests.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/20_html_tests.cf Wed Oct 25 09:15:31 2006
@@ -36,14 +36,10 @@
describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image
-meta HTML_SHORT_COMMENT (__HTML_LENGTH_512 && __COMMENT_EXISTS)
-describe HTML_SHORT_COMMENT HTML is very short with HTML comments
meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER)
describe HTML_SHORT_CENTER HTML is very short with CENTER tag
-meta HTML_TITLE_LONG __HTML_TITLE_120 && !__MIME_ATTACHMENT
-describe HTML_TITLE_LONG HTML title is very long
meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT
@@ -75,23 +71,11 @@
meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE)
describe JS_FROMCHARCODE Document is built from a Javascript charcode array
-# A-Z, a-z, 0-9
-rawbody ENTITY_DEC_ALPHANUM /\&\#0*(?:4[89]|5[0-7]|6[5-9][78]\d|9[0789]|1[01]\d|12[012])\;/
-describe ENTITY_DEC_ALPHANUM HTML contains needlessly encoded characters
-
-# ! $ % ' ( ) , - . / : ; = ? @ _
# a good possible rule that may resurface
+# ! $ % ' ( ) , - . / : ; = ? @ _
#rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/
#describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation
-# thanks to Bob Menschel for this one; bug 4116
-rawbody HTML_EHTML2 m'</html></html>'i
-describe HTML_EHTML2 HTML has doubled end HTML tag
-
-# bug 3070
-rawbody HTML_TINY_FONT /\<.*font\-size\:[ \"]*[01][^0-9]+.*\>/i
-describe HTML_TINY_FONT body contains 1 or 0-point font
-
body __HIGHBITS /(?:[\x80-\xff].?){4}/
# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
@@ -103,48 +87,6 @@
body HTML_MESSAGE eval:html_test('html')
describe HTML_MESSAGE HTML included in message
-# the HTML percentage range
-# should really be converted into a numeric function test
-body HTML_00_10 eval:html_range('ratio','0.00','0.10')
-body HTML_10_20 eval:html_range('ratio','0.10','0.20')
-body HTML_20_30 eval:html_range('ratio','0.20','0.30')
-body HTML_30_40 eval:html_range('ratio','0.30','0.40')
-body HTML_40_50 eval:html_range('ratio','0.40','0.50')
-body HTML_50_60 eval:html_range('ratio','0.50','0.60')
-body HTML_60_70 eval:html_range('ratio','0.60','0.70')
-body HTML_70_80 eval:html_range('ratio','0.70','0.80')
-body HTML_80_90 eval:html_range('ratio','0.80','0.90')
-body HTML_90_100 eval:html_range('ratio','0.90','1.00')
-describe HTML_00_10 Message is 0% to 10% HTML
-describe HTML_10_20 Message is 10% to 20% HTML
-describe HTML_20_30 Message is 20% to 30% HTML
-describe HTML_30_40 Message is 30% to 40% HTML
-describe HTML_40_50 Message is 40% to 50% HTML
-describe HTML_50_60 Message is 50% to 60% HTML
-describe HTML_60_70 Message is 60% to 70% HTML
-describe HTML_70_80 Message is 70% to 80% HTML
-describe HTML_80_90 Message is 80% to 90% HTML
-describe HTML_90_100 Message is 90% to 100% HTML
-
-# HTML shouting range
-# should really be converted into a numeric function test
-body HTML_SHOUTING3 eval:html_range('max_shouting','2','3')
-body HTML_SHOUTING4 eval:html_range('max_shouting','3','4')
-body HTML_SHOUTING5 eval:html_range('max_shouting','4','5')
-body HTML_SHOUTING6 eval:html_range('max_shouting','5','6')
-body HTML_SHOUTING7 eval:html_range('max_shouting','6','7')
-describe HTML_SHOUTING3 HTML has very strong "shouting" markup
-describe HTML_SHOUTING4 HTML has very strong "shouting" markup
-describe HTML_SHOUTING5 HTML has very strong "shouting" markup
-describe HTML_SHOUTING6 HTML has very strong "shouting" markup
-describe HTML_SHOUTING7 HTML has very strong "shouting" markup
-
-body HTML_TEXT_AFTER_HTML eval:html_test('text_after_html')
-describe HTML_TEXT_AFTER_HTML HTML contains text after HTML close tag
-
-body HTML_TEXT_AFTER_BODY eval:html_test('text_after_body')
-describe HTML_TEXT_AFTER_BODY HTML contains text after BODY close tag
-
# HTML comment tests
body HTML_COMMENT_SHORT eval:html_text_match('comment', '<!(?!-).{0,6}>')
describe HTML_COMMENT_SHORT HTML comment is very short
@@ -155,17 +97,11 @@
body HTML_EMBEDS eval:html_test('embeds')
describe HTML_EMBEDS HTML with embedded plugin object
-body HTML_EVENT_UNSAFE eval:html_test('html_event_unsafe')
-describe HTML_EVENT_UNSAFE HTML contains unsafe auto-executing code
body HTML_EXTRA_CLOSE eval:html_range('closed_extra_ratio', '0.09', 'inf')
describe HTML_EXTRA_CLOSE HTML contains far too many close tags
-body HTML_FONT_SIZE_TINY eval:html_eval('min_size', '< 1')
-describe HTML_FONT_SIZE_TINY HTML font size is tiny
-body HTML_FONT_SIZE_NONE eval:html_eval('min_size', '< 0')
-describe HTML_FONT_SIZE_NONE HTML font size is negative
body HTML_FONT_SIZE_LARGE eval:html_range('max_size', '5', '6')
describe HTML_FONT_SIZE_LARGE HTML font size is large
@@ -173,14 +109,8 @@
body HTML_FONT_SIZE_HUGE eval:html_range('max_size', '6', 'inf')
describe HTML_FONT_SIZE_HUGE HTML font size is huge
-body HTML_FONT_BIG eval:html_test('big_font')
-describe HTML_FONT_BIG HTML tag for a big font size
-body HTML_FONT_TINY eval:html_test('tiny_font')
-describe HTML_FONT_TINY HTML tag for a tiny font size
-body HTML_FONT_INVISIBLE eval:html_test('font_invisible')
-describe HTML_FONT_INVISIBLE HTML font color is same as background
body HTML_FONT_LOW_CONTRAST eval:html_test('font_low_contrast')
describe HTML_FONT_LOW_CONTRAST HTML font color similar to background
@@ -188,8 +118,6 @@
body HTML_FONT_FACE_BAD eval:html_test('font_face_bad')
describe HTML_FONT_FACE_BAD HTML font face is not a word
-body HTML_FONT_FACE_CAPS eval:html_test('font_face_caps')
-describe HTML_FONT_FACE_CAPS HTML font face has excess capital characters
body HTML_FORMACTION_MAILTO eval:html_test('form_action_mailto')
describe HTML_FORMACTION_MAILTO HTML includes a form which sends mail
@@ -214,56 +142,24 @@
# HTML_IMAGE_RATIO - more image area than text (ratio)
body HTML_IMAGE_RATIO_02 eval:html_image_ratio('0.000','0.002')
-body HTML_IMAGE_RATIO_04 eval:html_image_ratio('0.002','0.004')
-body HTML_IMAGE_RATIO_06 eval:html_image_ratio('0.004','0.006')
-body HTML_IMAGE_RATIO_08 eval:html_image_ratio('0.006','0.008')
describe HTML_IMAGE_RATIO_02 HTML has a low ratio of text to image area
-describe HTML_IMAGE_RATIO_04 HTML has a low ratio of text to image area
-describe HTML_IMAGE_RATIO_06 HTML has a low ratio of text to image area
-describe HTML_IMAGE_RATIO_08 HTML has a low ratio of text to image area
-
-body HTML_LINK_PUSH_HERE eval:html_text_match('anchor', '(?i)(?:push|go|cl[1l]ck)\s*(?:here|this)')
-describe HTML_LINK_PUSH_HERE HTML link text says "push here" or similar
-
-body HTML_LINK_OPT_OUT eval:html_text_match('anchor', '(?i)opt.?out')
-describe HTML_LINK_OPT_OUT HTML link text says "opt out" or similar
# HTML obfuscation
body HTML_OBFUSCATE_05_10 eval:html_range('obfuscation_ratio','.05','.1')
body HTML_OBFUSCATE_10_20 eval:html_range('obfuscation_ratio','.1','.2')
body HTML_OBFUSCATE_20_30 eval:html_range('obfuscation_ratio','.2','.3')
body HTML_OBFUSCATE_30_40 eval:html_range('obfuscation_ratio','.3','.4')
-body HTML_OBFUSCATE_40_50 eval:html_range('obfuscation_ratio','.4','.5')
body HTML_OBFUSCATE_50_60 eval:html_range('obfuscation_ratio','.5','.6')
-body HTML_OBFUSCATE_60_70 eval:html_range('obfuscation_ratio','.6','.7')
body HTML_OBFUSCATE_70_80 eval:html_range('obfuscation_ratio','.7','.8')
-body HTML_OBFUSCATE_80_90 eval:html_range('obfuscation_ratio','.8','.9')
body HTML_OBFUSCATE_90_100 eval:html_range('obfuscation_ratio','.9','1.0')
describe HTML_OBFUSCATE_05_10 Message is 5% to 10% HTML obfuscation
describe HTML_OBFUSCATE_10_20 Message is 10% to 20% HTML obfuscation
describe HTML_OBFUSCATE_20_30 Message is 20% to 30% HTML obfuscation
describe HTML_OBFUSCATE_30_40 Message is 30% to 40% HTML obfuscation
-describe HTML_OBFUSCATE_40_50 Message is 40% to 50% HTML obfuscation
describe HTML_OBFUSCATE_50_60 Message is 50% to 60% HTML obfuscation
-describe HTML_OBFUSCATE_60_70 Message is 60% to 70% HTML obfuscation
describe HTML_OBFUSCATE_70_80 Message is 70% to 80% HTML obfuscation
-describe HTML_OBFUSCATE_80_90 Message is 80% to 90% HTML obfuscation
describe HTML_OBFUSCATE_90_100 Message is 90% to 100% HTML obfuscation
-# backhair - idea from backhair set by Jennifer Wheeler and Adam Lopresto.
-body HTML_BACKHAIR_2 eval:html_range('backhair_count', '1', '4')
-body HTML_BACKHAIR_4 eval:html_range('backhair_count', '4', '8')
-body HTML_BACKHAIR_8 eval:html_range('backhair_count', '8', 'inf')
-describe HTML_BACKHAIR_2 HTML tags used to obfuscate words
-describe HTML_BACKHAIR_4 HTML tags used to obfuscate words
-describe HTML_BACKHAIR_8 HTML tags used to obfuscate words
-
-# HTML attribute testing
-body HTML_ATTR_BAD eval:html_range('attr_bad','0.75','1.0')
-describe HTML_ATTR_BAD HTML has many bad attributes in tags
-body HTML_ATTR_UNIQUE eval:html_range('attr_unique_bad','0.5','1.0')
-describe HTML_ATTR_UNIQUE HTML appears to have random attributes in tags
-
body HTML_TAG_BALANCE_BODY eval:html_tag_balance('body', '!= 0')
describe HTML_TAG_BALANCE_BODY HTML has unbalanced "body" tags
@@ -273,55 +169,25 @@
body HTML_TAG_EXIST_BGSOUND eval:html_tag_exists('bgsound')
describe HTML_TAG_EXIST_BGSOUND HTML has "bgsound" tag
-body HTML_TAG_EXIST_MARQUEE eval:html_tag_exists('marquee')
-describe HTML_TAG_EXIST_MARQUEE HTML has "marquee" tag
-
-body HTML_TAG_EXIST_TBODY eval:html_tag_exists('tbody')
-describe HTML_TAG_EXIST_TBODY HTML has "tbody" tag
-
# percentage of tags that are not legal elements in HTML
-body HTML_BADTAG_00_10 eval:html_range('bad_tag_ratio','0.00','0.10')
-body HTML_BADTAG_10_20 eval:html_range('bad_tag_ratio','0.10','0.20')
-body HTML_BADTAG_20_30 eval:html_range('bad_tag_ratio','0.20','0.30')
-body HTML_BADTAG_30_40 eval:html_range('bad_tag_ratio','0.30','0.40')
body HTML_BADTAG_40_50 eval:html_range('bad_tag_ratio','0.40','0.50')
body HTML_BADTAG_50_60 eval:html_range('bad_tag_ratio','0.50','0.60')
body HTML_BADTAG_60_70 eval:html_range('bad_tag_ratio','0.60','0.70')
-body HTML_BADTAG_70_80 eval:html_range('bad_tag_ratio','0.70','0.80')
-body HTML_BADTAG_80_90 eval:html_range('bad_tag_ratio','0.80','0.90')
body HTML_BADTAG_90_100 eval:html_range('bad_tag_ratio','0.90','1.00')
-describe HTML_BADTAG_00_10 HTML message is 0% to 10% bad tags
-describe HTML_BADTAG_10_20 HTML message is 10% to 20% bad tags
-describe HTML_BADTAG_20_30 HTML message is 20% to 30% bad tags
-describe HTML_BADTAG_30_40 HTML message is 30% to 40% bad tags
describe HTML_BADTAG_40_50 HTML message is 40% to 50% bad tags
describe HTML_BADTAG_50_60 HTML message is 50% to 60% bad tags
describe HTML_BADTAG_60_70 HTML message is 60% to 70% bad tags
-describe HTML_BADTAG_70_80 HTML message is 70% to 80% bad tags
-describe HTML_BADTAG_80_90 HTML message is 80% to 90% bad tags
describe HTML_BADTAG_90_100 HTML message is 90% to 100% bad tags
# percentage of unique non-elements in HTML
-body HTML_NONELEMENT_00_10 eval:html_range('non_element_ratio','0.00','0.10')
-body HTML_NONELEMENT_10_20 eval:html_range('non_element_ratio','0.10','0.20')
-body HTML_NONELEMENT_20_30 eval:html_range('non_element_ratio','0.20','0.30')
body HTML_NONELEMENT_30_40 eval:html_range('non_element_ratio','0.30','0.40')
body HTML_NONELEMENT_40_50 eval:html_range('non_element_ratio','0.40','0.50')
-body HTML_NONELEMENT_50_60 eval:html_range('non_element_ratio','0.50','0.60')
body HTML_NONELEMENT_60_70 eval:html_range('non_element_ratio','0.60','0.70')
-body HTML_NONELEMENT_70_80 eval:html_range('non_element_ratio','0.70','0.80')
body HTML_NONELEMENT_80_90 eval:html_range('non_element_ratio','0.80','0.90')
-body HTML_NONELEMENT_90_100 eval:html_range('non_element_ratio','0.90','1.00')
-describe HTML_NONELEMENT_00_10 0% to 10% of HTML elements are non-standard
-describe HTML_NONELEMENT_10_20 10% to 20% of HTML elements are non-standard
-describe HTML_NONELEMENT_20_30 20% to 30% of HTML elements are non-standard
describe HTML_NONELEMENT_30_40 30% to 40% of HTML elements are non-standard
describe HTML_NONELEMENT_40_50 40% to 50% of HTML elements are non-standard
-describe HTML_NONELEMENT_50_60 50% to 60% of HTML elements are non-standard
describe HTML_NONELEMENT_60_70 60% to 70% of HTML elements are non-standard
-describe HTML_NONELEMENT_70_80 70% to 80% of HTML elements are non-standard
describe HTML_NONELEMENT_80_90 80% to 90% of HTML elements are non-standard
-describe HTML_NONELEMENT_90_100 90% to 100% of HTML elements are non-standard
# short HTML messages with certain attributes
body __HTML_LINK_IMAGE eval:html_text_match('anchor', '<img>')
@@ -329,32 +195,21 @@
body __HTML_LENGTH_1024_1536 eval:html_range('length', '1024', '1536')
body __HTML_LENGTH_1536_2048 eval:html_range('length', '1536', '2048')
-body HTML_SHORT_LENGTH eval:html_eval('length', '< 170')
-describe HTML_SHORT_LENGTH HTML is extremely short
-
body __HTML_LENGTH_512 eval:html_eval('length', '< 512')
body __COMMENT_EXISTS eval:html_text_match('comment', '<!.*?>')
body __HTML_LENGTH_384 eval:html_eval('length', '< 384')
body __TAG_EXISTS_CENTER eval:html_tag_exists('center')
-body HTML_TITLE_EMPTY eval:html_text_not_match('title', '(?s)\S')
-describe HTML_TITLE_EMPTY HTML title contains no text
-
body __HTML_TITLE_120 eval:html_text_match('title', '.{120}')
body __HTML_TITLE_SUBJ_DIFF eval:html_title_subject_ratio('3.5')
-body HTML_TITLE_UNTITLED eval:html_text_match('title', '(?i)(?:untitled|new page \d+)')
-describe HTML_TITLE_UNTITLED HTML title contains "Untitled"
body __HTML_CHARSET_FARAWAY eval:html_charset_faraway()
body HTML_IFRAME_SRC eval:check_iframe_src()
describe HTML_IFRAME_SRC Message has HTML IFRAME tag with SRC URI
-
-body URI_HTML_ONLY eval:check_html_uri_only()
-describe URI_HTML_ONLY URIs only found in HTML part of multipart/alternative message
endif
Modified: spamassassin/branches/jm_re2c_hacks/rules/25_replace.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/25_replace.cf?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/25_replace.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/25_replace.cf Wed Oct 25 09:15:31 2006
@@ -104,9 +104,6 @@
describe FUZZY_BILLION Attempt to obfuscate words in spam
replace_rules FUZZY_BILLION
-body FUZZY_CELEBREX /<inter W1><post P2>(?!celebrex)<C><E><L><E><B><R><E><X>/i
-describe FUZZY_CELEBREX Attempt to obfuscate words in spam
-replace_rules FUZZY_CELEBREX
body FUZZY_CPILL /(?!ciali[sz])<C><I><A><L><I><S>/i
describe FUZZY_CPILL Attempt to obfuscate words in spam
@@ -120,9 +117,6 @@
describe FUZZY_ERECT Attempt to obfuscate words in spam
replace_rules FUZZY_ERECT
-body FUZZY_FOLLOW /(?!follow)<F><O><L><L><O><W>/i
-describe FUZZY_FOLLOW Attempt to obfuscate words in spam
-replace_rules FUZZY_FOLLOW
body FUZZY_GUARANTEE /<inter W1><post P2>(?!guarantee)<G><U><A><R><A><N><T><E><E>/i
describe FUZZY_GUARANTEE Attempt to obfuscate words in spam
@@ -132,9 +126,6 @@
describe FUZZY_MEDICATION Attempt to obfuscate words in spam
replace_rules FUZZY_MEDICATION
-body FUZZY_MILF /<inter SP>(?!milf)\b<M><I><L><F>/i
-describe FUZZY_MILF Attempt to obfuscate words in spam
-replace_rules FUZZY_MILF
body FUZZY_MILLION /(?!million)<M><I><L><L><I><O><N>/i
describe FUZZY_MILLION Attempt to obfuscate words in spam
@@ -164,9 +155,6 @@
describe FUZZY_PHENT Attempt to obfuscate words in spam
replace_rules FUZZY_PHENT
-body FUZZY_PLEASE /(?!please)<P><L><E><A><S><E>/i
-describe FUZZY_PLEASE Attempt to obfuscate words in spam
-replace_rules FUZZY_PLEASE
body FUZZY_PRESCRIPT /<inter W2><post P2>(?!prescription)<P><R><E><S><C><R><I><P><T><I><O><N>/i
describe FUZZY_PRESCRIPT Attempt to obfuscate words in spam
@@ -197,17 +185,11 @@
describe FUZZY_THOUSANDS Attempt to obfuscate words in spam
replace_rules FUZZY_THOUSANDS
-body FUZZY_TRAMADOL /<inter W1><post P2>(?!tramadol)<T><R><A><M><A><D><O><L>/i
-describe FUZZY_TRAMADOL Attempt to obfuscate words in spam
-replace_rules FUZZY_TRAMADOL
body FUZZY_VLIUM /<inter W1><post P2>(?!valium)<V><A><L><I><U><M>/i
describe FUZZY_VLIUM Attempt to obfuscate words in spam
replace_rules FUZZY_VLIUM
-body FUZZY_VICODIN /<inter W1><post P2>(?!vicodin)<V><I><C><O><D><I><N>/i
-describe FUZZY_VICODIN Attempt to obfuscate words in spam
-replace_rules FUZZY_VICODIN
body FUZZY_VIOXX /<inter W1><post P2>(?!vioxx)<V><I><O><X><X>/i
describe FUZZY_VIOXX Attempt to obfuscate words in spam
Modified: spamassassin/branches/jm_re2c_hacks/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/active.list?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/active.list (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/active.list Wed Oct 25 09:15:31 2006
@@ -1,11 +1,5 @@
# active ruleset list, automatically generated from http://ruleqa.spamassassin.org/
-# with results from: bb-doc bb-jm bb-zmi cthielen daf dos parkerm theo zmi
-
-# good enough
-ADVANCE_FEE_3
-
-# good enough
-ADVANCE_FEE_4
+# with results from: cthielen daf parkerm zmi
# tflags userconf
ALL_TRUSTED
@@ -17,7 +11,7 @@
AXB_FAKETZ
# good enough
-BANG_OPRAH
+AXB_XR_STULDAP
# good enough
BASE64_LENGTH_78
@@ -62,37 +56,7 @@
BROKEN_RATWARE_BOM
# good enough
-CONFIRMED_FORGED
-
-# good enough
-CUM_SHOT
-
-# good enough
-DATE_IN_FUTURE_03_06
-
-# good enough
-DATE_IN_FUTURE_06_12
-
-# good enough
-DATE_IN_FUTURE_12_24
-
-# good enough
-DATE_IN_FUTURE_24_48
-
-# good enough
-DATE_IN_FUTURE_48_96
-
-# good enough
-DATE_IN_FUTURE_96_XX
-
-# good enough
-DATE_IN_PAST_06_12
-
-# good enough
-DATE_IN_PAST_96_XX
-
-# good enough
-DATE_SPAMWARE_Y2K
+CTYPE_1SPACE_GIF
# good enough
DC_GIF_MULTI_LARGO
@@ -121,9 +85,6 @@
# tflags net
DIGEST_MULTIPLE
-# good enough
-DISGUISE_PORN_MUNDANE
-
# tflags net
DNS_FROM_AHBL_RHSBL
@@ -146,9 +107,6 @@
DNS_FROM_SECURITYSAGE
# good enough
-DOS_DOUBLE_SOTCK
-
-# good enough
DOS_LET_GO_JOB
# good enough
@@ -164,81 +122,12 @@
DOS_YOUR_PLACE
# good enough
-DRUGS_ANXIETY
-
-# good enough
-DRUGS_ANXIETY_EREC
-
-# good enough
-DRUGS_ANXIETY_OBFU
-
-# good enough
-DRUGS_DIET
-
-# good enough
-DRUGS_DIET_OBFU
-
-# good enough
-DRUGS_ERECTILE
-
-# good enough
-DRUGS_ERECTILE_OBFU
-
-# good enough
DRUGS_HDIA
-# good enough
-DRUGS_MANYKINDS
-
-# good enough
-DRUGS_SLEEP_EREC
-
-# good enough
-DRUG_DOSAGE
-
-# good enough
-DRUG_ED_GENERIC
-
-# good enough
-DRUG_ED_ONLINE
-
-# good enough
-DRUG_ED_SILD
-
-# good enough
-EMPTY_MESSAGE
-
-# good enough
-EM_ROLEX
-
# tflags userconf
ENV_AND_HDR_SPF_MATCH
# good enough
-EXCUSE_24
-
-# good enough
-EXCUSE_4
-
-# good enough
-EXTRA_MPART_TYPE
-
-# good enough
-FAKE_HELO_LYCOS
-
-# good enough
-FAKE_HELO_MAIL_COM
-
-# good enough
-FAKE_HELO_MAIL_COM_DOM
-
-# good enough
-FAKE_HELO_MSN
-
-# good enough
-FAKE_OUTBLAZE_RCVD
-
-# good enough
FB_CIALIS_LEO3
# good enough
@@ -254,13 +143,13 @@
FB_VALIUM_LEO2
# good enough
-FB_VIAGRA_LEO3
+FB_VIAGRA_LEO2
# good enough
-FB_YOURSELF_MASTER
+FB_VIAGRA_LEO3
# good enough
-FH_DATE_IS_19XX
+FB_YOURSELF_MASTER
# good enough
FH_DATE_PAST_20XX
@@ -278,115 +167,22 @@
FM_CUSTOMLOGODSGNc
# good enough
-FORGED_AOL_TAGS
-
-# good enough
-FORGED_HOTMAIL_RCVD
-
-# good enough
-FORGED_IMS_HTML
-
-# good enough
-FORGED_IMS_TAGS
-
-# good enough
-FORGED_MSGID_AOL
-
-# good enough
-FORGED_MSGID_EXCITE
-
-# good enough
-FORGED_MSGID_HOTMAIL
-
-# good enough
-FORGED_MSGID_MSN
-
-# good enough
-FORGED_MSGID_YAHOO
+FS_START_DOYOU
# good enough
-FORGED_MUA_AOL_FROM
-
-# good enough
-FORGED_MUA_EUDORA
-
-# good enough
-FORGED_MUA_IMS
-
-# good enough
-FORGED_MUA_MOZILLA
-
-# good enough
-FORGED_MUA_OIMO
-
-# good enough
-FORGED_MUA_OUTLOOK
-
-# good enough
-FORGED_MUA_THEBAT_BOUN
-
-# good enough
-FORGED_OUTLOOK_HTML
-
-# good enough
-FORGED_OUTLOOK_TAGS
-
-# good enough
-FORGED_QUALCOMM_TAGS
-
-# good enough
-FORGED_TELESP_RCVD
-
-# good enough
-FORGED_THEBAT_HTML
-
-# good enough
-FORGED_YAHOO_RCVD
-
-# good enough
-FREE_PORN
-
-# good enough
-FREE_QUOTE_INSTANT
-
-# good enough
-FROM_ALL_NUMS
-
-# good enough
-FROM_BLANK_NAME
-
-# good enough
-FROM_DOMAIN_NOVOWEL
-
-# good enough
-FROM_ENDS_IN_NUMS
-
-# good enough
-FROM_HAS_MIXED_NUMS
-
-# good enough
-FROM_ILLEGAL_CHARS
-
-# good enough
-FROM_LOCAL_DIGITS
-
-# good enough
-FROM_LOCAL_HEX
-
-# good enough
-FROM_LOCAL_NOVOWEL
+FS_START_DOYOU2
# good enough
-FROM_NO_USER
+FUZZY_MERIDIA
# good enough
-FS_START_DOYOU2
+FUZZY_SPRM
# good enough
-FUZZY_MERIDIA
+FUZZY_STOCK
# good enough
-GAPPY_SUBJECT
+FU_HOODIA
# good enough
GEO_QUERY_STRING
@@ -427,88 +223,40 @@
# tflags userconf
HASHCASH_HIGH
-# good enough
-HEADER_SPAM
-
-# good enough
-HEAD_ILLEGAL_CHARS
-
-# good enough
-HELO_DYNAMIC_CHELLO_NL
-
-# good enough
-HELO_DYNAMIC_DIALIN
-
-# good enough
-HELO_DYNAMIC_HCC
-
-# good enough
-HELO_DYNAMIC_HEXIP
-
-# good enough
-HELO_DYNAMIC_HOME_NL
+# tflags userconf
+HEAD_LONG
# good enough
-HELO_DYNAMIC_IPADDR
+HS_EXTRA
# good enough
-HELO_DYNAMIC_IPADDR2
+HS_GETMEOFF
# good enough
-HELO_DYNAMIC_SPLIT_IP
+HS_INDEX_PARAM
# good enough
-HG_HORMONE
+HS_MEETUP_FOR_SEX
# good enough
-HS_FORGED_OE_FW
+HS_NO_FLOWERS
# good enough
-HS_GETMEOFF
+HS_PHARMA_1
# good enough
-HS_MEETUP_FOR_SEX
+HS_SUBJ_ONLINE_PHARMACEUTICAL
# good enough
-HS_SUBJ_ONLINE_PHARMACEUTICAL
+HS_SYNDICATE_P2
# tflags userconf
HTML_CHARSET_FARAWAY
# good enough
-HTTPS_HTTP_MISMATCH
-
-# good enough
-HTTPS_IP_MISMATCH
-
-# good enough
-HTTP_77
-
-# good enough
-HTTP_EXCESSIVE_ESCAPES
-
-# good enough
-INFO_TLD
-
-# good enough
-INVALID_DATE
-
-# good enough
-INVALID_TZ_CST
-
-# good enough
-INVESTMENT_ADVICE
-
-# good enough
-JM_LC_MID
-
-# good enough
JM_RCVD_QMAILV1
# good enough
-JM_RCVD_SENDMAILID
-
-# good enough
KAM_STOCKOTC
# good enough
@@ -518,25 +266,19 @@
KAM_STOCKTIP15
# good enough
-KAM_STOCKTIP21
-
-# good enough
-KAM_STOCKTIP6
-
-# good enough
-KAM_STOCKTIP8
+KAM_STOCKTIP2
# good enough
-KOREAN_UCE_SUBJECT
+KAM_STOCKTIP21
# good enough
-LOCALPART_IN_SUBJECT
+KAM_STOCKTIP3
# good enough
-LONGWORDS
+KAM_STOCKTIP6
# good enough
-MALE_ENHANCE
+KAM_STOCKTIP8
# good enough
MID_DEGREES
@@ -547,78 +289,12 @@
# good enough
MID_OUTLOOK_ZZZNN
-# good enough
-MILLION_USD
-
-# good enough
-MIME_BAD_ISO_CHARSET
-
-# good enough
-MIME_BASE64_BLANKS
-
-# good enough
-MIME_BASE64_TEXT
-
-# good enough
-MIME_BOUND_ALLHEX_17
-
-# good enough
-MIME_BOUND_DD_DIGITS
-
-# good enough
-MIME_BOUND_DIGITS_15
-
-# good enough
-MIME_BOUND_MANY_HEX
-
-# good enough
-MISSING_MIMEOLE
-
-# good enough
-MISSING_MIME_HB_SEP
-
-# good enough
-MORE_SEX
-
-# good enough
-MSGID_DOLLARS_RANDOM
-
-# good enough
-MSGID_OUTLOOK_INVALID
-
-# good enough
-MSGID_RANDY
-
-# good enough
-MSGID_SHORT
-
-# good enough
-MSGID_SPAM_CAPS
-
-# good enough
-MSGID_SPAM_LETTERS
-
-# good enough
-MSGID_YAHOO_CAPS
-
-# good enough
-MULTIPART_ALT_NON_TEXT
-
-# good enough
-NOT_ADVISOR
+# tflags userconf
+MISSING_HB_SEP
# tflags net
NO_DNS_FOR_FROM
-# good enough
-NO_PRESCRIPTION
-
-# good enough
-NO_RDNS_DOTCOM_HELO
-
-# tflags userconf
-NO_RECEIVED
-
# tflags userconf
NO_RELAYS
@@ -626,68 +302,14 @@
NULL_IN_BODY
# good enough
-NUMERIC_HTTP_ADDR
-
-# good enough
-PERCENT_RANDOM
-
-# good enough
-PLING_QUERY
-
-# good enough
-PORN_15
+PART_CID_STOCK
# good enough
-PREST_NON_ACCREDITED
+PART_CID_STOCK_LESS
# tflags net
PYZOR_CHECK
-# good enough
-RATWARE_EFROM
-
-# good enough
-RATWARE_EGROUPS
-
-# good enough
-RATWARE_GECKO_BUILD
-
-# good enough
-RATWARE_HASH_2
-
-# good enough
-RATWARE_HASH_2_BUG2108
-
-# good enough
-RATWARE_HASH_2_V2
-
-# good enough
-RATWARE_HASH_2_V2_BUG2108
-
-# good enough
-RATWARE_MOZ_MALFORMED
-
-# good enough
-RATWARE_MS_HASH
-
-# good enough
-RATWARE_NAME_ID
-
-# good enough
-RATWARE_OE_MALFORMED
-
-# good enough
-RATWARE_OUTLOOK_NONAME
-
-# good enough
-RATWARE_RCVD_AT
-
-# good enough
-RATWARE_RCVD_PF
-
-# good enough
-RATWARE_ZERO_TZ
-
# tflags net
RAZOR2_CF_RANGE_51_100
@@ -701,22 +323,7 @@
RAZOR2_CHECK
# good enough
-RCVD_AM_PM
-
-# good enough
-RCVD_DOUBLE_IP_LOOSE
-
-# good enough
-RCVD_DOUBLE_IP_SPAM
-
-# good enough
-RCVD_FAKE_HELO_DOTCOM
-
-# good enough
-RCVD_HELO_IP_MISMATCH
-
-# good enough
-RCVD_ILLEGAL_IP
+RCVD_FORGED_WROTE
# tflags net
RCVD_IN_BL_SPAMCOP_NET
@@ -803,41 +410,11 @@
RCVD_IN_XBL
# good enough
-RCVD_LSO_SND
-
-# good enough
-RCVD_NUMERIC_HELO
-
-# good enough
-REPLICA_WATCH
-
-# good enough
-REPTO_OVERQUOTE_THEBAT
-
-# good enough
-REPTO_QUOTE_AOL
-
-# good enough
-REPTO_QUOTE_IMS
-
-# good enough
-REPTO_QUOTE_MSN
-
-# good enough
-REPTO_QUOTE_QUALCOMM
-
-# good enough
-REPTO_QUOTE_YAHOO
+RCVD_MAIL_COM
# tflags net
ROUND_THE_WORLD
-# good enough
-ROUND_THE_WORLD_LOCAL
-
-# good enough
-SORTED_RECIPS
-
# tflags net
SPF_FAIL
@@ -862,30 +439,6 @@
# tflags net
SPF_SOFTFAIL
-# good enough
-SPOOF_COM2OTH
-
-# good enough
-SPOOF_NET2COM
-
-# good enough
-STOCK_ALERT
-
-# good enough
-SUBJECT_DRUG_GAP_C
-
-# good enough
-SUBJECT_DRUG_GAP_L
-
-# good enough
-SUBJECT_DRUG_GAP_S
-
-# good enough
-SUBJECT_DRUG_GAP_VA
-
-# good enough
-SUBJECT_DRUG_GAP_X
-
# tflags userconf
SUBJECT_IN_BLACKLIST
@@ -896,25 +449,13 @@
SUBJECT_NEEDS_ENCODING
# good enough
-SUBJECT_SEXUAL
-
-# good enough
-SUBJ_ILLEGAL_CHARS
-
-# good enough
SUBJ_RE_NUM
# good enough
-SUSPICIOUS_RECIPS
-
-# good enough
-TO_MALFORMED
-
-# good enough
-TO_NO_USER
+TT_MSGID_TRUNC
# good enough
-TT_MSGID_TRUNC
+TT_OBSCURED_VIAGRA
# good enough
TVD_ACT_193
@@ -923,21 +464,27 @@
TVD_APP_LOAN
# good enough
-TVD_DEAR_HOMEOWNER
+TVD_BODY_END_STAR
# good enough
-TVD_DOLLARS_US
+TVD_DEAR_HOMEOWNER
# good enough
TVD_EB_PHISH
# good enough
-TVD_FINGER_02
+TVD_ENHANCE
+
+# good enough
+TVD_FINGER_01
# good enough
TVD_FLOAT_GENERAL
# good enough
+TVD_FROM_1
+
+# good enough
TVD_FUZZY_DEGREE
# good enough
@@ -953,18 +500,24 @@
TVD_FUZZY_PHARMACEUTICAL
# good enough
+TVD_FUZZY_SECURITIES
+
+# good enough
TVD_FUZZY_SYMBOL
# good enough
-TVD_FW_GRAPHIC_ID3
+TVD_FW_GRAPHIC_ID1
# good enough
-TVD_FW_GRAPHIC_ID3_2
+TVD_FW_GRAPHIC_ID2
# good enough
TVD_FW_MESG1
# good enough
+TVD_FW_MESG2
+
+# good enough
TVD_GET_STOCK
# good enough
@@ -974,13 +527,10 @@
TVD_INCREASE_SIZE
# good enough
-TVD_LINK_SAVE
-
-# good enough
TVD_NOT_SATISFIED
# good enough
-TVD_PH_7
+TVD_PH_BODY_META
# good enough
TVD_PH_FR5
@@ -989,33 +539,21 @@
TVD_PH_REC
# good enough
-TVD_PH_SUBJ_ACCOUNTS_POST
-
-# good enough
-TVD_PH_SUBJ_META
-
-# good enough
-TVD_PH_SUBJ_META_ALL
-
-# good enough
-TVD_PH_SUBJ_SEC_MEASURES
-
-# good enough
TVD_PH_SUBJ_UPDATE
# good enough
TVD_PH_SUBJ_URGENT
# good enough
-TVD_PP_PHISH
-
-# good enough
TVD_QUAL_MEDS
# good enough
TVD_RATWARE_CB
# good enough
+TVD_RATWARE_CB_2
+
+# good enough
TVD_RATWARE_MSGID_01
# good enough
@@ -1037,9 +575,6 @@
TVD_SPACED_WORDS
# good enough
-TVD_SPACE_RATIO
-
-# good enough
TVD_STOCK1
# good enough
@@ -1055,9 +590,6 @@
TVD_SUBJ_FINGER_03
# good enough
-TVD_SUBJ_FINGER_04
-
-# good enough
TVD_SUBJ_OWE
# good enough
@@ -1067,50 +599,41 @@
TVD_UA_FOSTERING
# good enough
-TVD_VIS_HIDDEN
+TVD_UNDER_VALUED
# good enough
-FORGED_IMS_HTML
+TVD_VIS_HIDDEN
# good enough
-FORGED_IMS_TAGS
+DOS_TO_READ_STOCK
# good enough
-FORGED_OUTLOOK_HTML
+DRUGS_STOCK_MIMEOLE
# good enough
-FORGED_OUTLOOK_TAGS
+DRUGS_STOCK_MIMEOLE2
# good enough
-RATWARE_MS_HASH
+FB_CIALIS_LEO2
# good enough
-RATWARE_OUTLOOK_NONAME
+FH_DATE_IS_19XX
# good enough
-TVD_FW_GRAPHIC_ID1
+FR_WWW_DOMAIN_23SUBDIR
# good enough
-TVD_FW_GRAPHIC_ID2
+KAM_STOCKTIP20
# good enough
-UNCLAIMED_MONEY
+TVD_LINK_SAVE
# good enough
-UNCLOSED_BRACKET
+TVD_PH_SUBJ_SEC_MEASURES
# tflags userconf
UNPARSEABLE_RELAY
-# good enough
-UNRESOLVED_TEMPLATE
-
-# good enough
-UPPERCASE_50_75
-
-# good enough
-UPPERCASE_75_100
-
# tflags net
URIBL_AB_SURBL
@@ -1144,15 +667,6 @@
# good enough
URI_L_PHP
-# good enough
-URI_NOVOWEL
-
-# good enough
-URI_NO_WWW_BIZ_CGI
-
-# good enough
-URI_NO_WWW_INFO_CGI
-
# tflags userconf
USER_IN_ALL_SPAM_TO
@@ -1184,34 +698,31 @@
VERTICAL_DRUGS_1
# good enough
-VIA_GAP_GRA
+VERTICAL_WORDS_1
# good enough
-WEIRD_QUOTING
-
-# good enough
-X_IP
+ZMIde_EBAYJOBSURI
# good enough
-X_LIBRARY
+ZMIde_GIRLSRCH1
# good enough
-X_MESSAGE_INFO
+ZMIde_GIRLSRCH2
# good enough
-X_MSMAIL_PRIORITY_HIGH
+ZMIde_LOVEGALX1
# good enough
-X_ORIG_IP_NOT_IPV4
+ZMIde_LOVEGALX2
# good enough
-X_PRIORITY_CC
+ZMIde_LOVEGALXURI
# good enough
-ZMIde_EBAYJOBSURI
+ZMIde_SEXUALEXPL1
# good enough
-ZMIde_SEXUALEXPL1
+ZMIde_URIPORNWEB
# tflags net
__RCVD_IN_IADB
Modified: spamassassin/branches/jm_re2c_hacks/rules/regression_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/regression_tests.cf?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/regression_tests.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/regression_tests.cf Wed Oct 25 09:15:31 2006
@@ -27,8 +27,6 @@
test DEAR_FRIEND ok Dear friend,
test DEAR_FRIEND fail Dear Mr. Ithacus,
-test FROM_ENDS_IN_NUMS ok matt12345678@sergeant.org
-test FROM_ENDS_IN_NUMS fail matt@sergeant.org
test FROM_STARTS_WITH_NUMS ok 12345678matt@sergeant.org
test FROM_STARTS_WITH_NUMS fail matt@sergeant.org
test FORGED_YAHOO_RCVD fail by mf1.lng.yahoo.com (8.11.1/8.11.1) id g3SDfPH19426
Modified: spamassassin/branches/jm_re2c_hacks/rules/rule2xs.pre
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/rule2xs.pre?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/rule2xs.pre (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/rule2xs.pre Wed Oct 25 09:15:31 2006
@@ -1,4 +1,4 @@
-# loadplugin Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor
+loadplugin Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor
loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody
Modified: spamassassin/branches/jm_re2c_hacks/sa-learn.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/sa-learn.raw?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/sa-learn.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/sa-learn.raw Wed Oct 25 09:15:31 2006
@@ -85,7 +85,8 @@
%opt = (
'force-expire' => 0,
'use-ignores' => 0,
- 'nosync' => 0,
+ 'nosync' => 0,
+ 'cf' => []
);
Getopt::Long::Configure(
@@ -104,6 +105,7 @@
'configpath|config-file|config-dir|c|C=s' => \$opt{'configpath'},
'prefspath|prefs-file|p=s' => \$opt{'prefspath'},
'siteconfigpath=s' => \$opt{'siteconfigpath'},
+ 'cf=s' => \@{$opt{'cf'}},
'folders|f=s' => \$opt{'folders'},
'force-expire|expire' => \$opt{'force-expire'},
@@ -216,6 +218,8 @@
$post_config .= "use_bayes 1\n";
}
+$post_config .= join("\n", @{$opt{'cf'}})."\n";
+
# create the tester factory
$spamtest = new Mail::SpamAssassin(
{
@@ -585,6 +589,7 @@
-C path, --configpath=path, --config-file=path Path to standard configuration dir
-p prefs, --prefspath=file, --prefs-file=file Set user preferences file
--siteconfigpath=path Path for site configs (def: /etc/mail/spamassassin)
+ --cf='config line' Additional line of configuration
-D, --debug-level Print debugging messages
-V, --version Print version
-h, --help Print usage message
@@ -738,6 +743,12 @@
Use the specified path for locating site-specific configuration files. Ignore
the default directories (usually C</etc/mail/spamassassin> or similar).
+
+=item B<--cf='config line'>
+
+Add additional lines of configuration directly from the command-line, parsed
+after the configuration files are read. Multiple B<--cf> arguments can be
+used, and each will be considered a separate line of configuration.
=item B<-p> I<prefs>, B<--prefspath>=I<prefs>, B<--prefs-file>=I<prefs>
Modified: spamassassin/branches/jm_re2c_hacks/sa-update.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/sa-update.raw?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/sa-update.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/sa-update.raw Wed Oct 25 09:15:31 2006
@@ -362,19 +362,6 @@
}
}
-# --lint check the current site config before we download any updates so that
-# a site with a broken config, with sa-update in a cron job, doesn't hammer
-# the update servers continously downloading and then aborting the update when
-# the lint check of the update (with the site config included) fails.
-# Wait until now to do it since nothing above depends on a successful --lint.
-
-if (!lint_check_dir(File::Spec->catfile($opt{'updatedir'}, "doesnotexist"))) {
- warn "error: lint check of current site config failed, cannot continue\n";
- dbg("diag: local site config must successfully lint before doing updates, ".
- "exiting with code 2");
- exit 2;
-}
-
my $res = Net::DNS::Resolver->new();
my $ua = LWP::UserAgent->new();
@@ -1252,6 +1239,7 @@
# "config" or otherwise be more terse. :(
my $spamtest = new Mail::SpamAssassin( {
rules_filename => $dir,
+ site_rules_filename => File::Spec->catfile($dir, "doesnotexist"),
userprefs_filename => File::Spec->catfile($dir, "doesnotexist"),
local_tests_only => 1,
@@ -1438,8 +1426,6 @@
installed successfully.
An exit code of C<1> means no fresh updates were available.
-
-An exit code of C<2> means that a lint check of the current site config failed.
An exit code of C<4> or higher, indicates that errors occurred while
attempting to download and extract updates.
Modified: spamassassin/branches/jm_re2c_hacks/spamassassin.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/spamassassin.raw?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/spamassassin.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/spamassassin.raw Wed Oct 25 09:15:31 2006
@@ -152,7 +152,7 @@
# - create user preference files
# - have ArchiveIterator detect the input message format (file vs dir)
#
-my %opt = ( 'create-prefs' => 1, 'format' => 'detect' );
+my %opt = ( 'create-prefs' => 1, 'format' => 'detect', cf => [] );
my $doing_whitelist_operation = 0;
my $count = 0;
@@ -174,6 +174,7 @@
'add-to-whitelist|W' => \$opt{'add-to-whitelist'},
'configpath|config-file|config-dir|c|C=s' => \$opt{'configpath'},
'create-prefs!' => \$opt{'create-prefs'},
+ 'cf=s' => \@{$opt{'cf'}},
'debug|D:s' => \$opt{'debug'},
'error-code|exit-code|e:i' => \$opt{'error-code'},
'help|h|?' => \$opt{'help'},
@@ -250,6 +251,7 @@
local_tests_only => $opt{'local'},
debug => $opt{'debug'},
dont_copy_prefs => ( $opt{'create-prefs'} ? 0 : 1 ),
+ post_config_text => join("\n", @{$opt{'cf'}})."\n",
PREFIX => $PREFIX,
DEF_RULES_DIR => $DEF_RULES_DIR,
LOCAL_RULES_DIR => $LOCAL_RULES_DIR,
Modified: spamassassin/branches/jm_re2c_hacks/spamc/configure
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/spamc/configure?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/spamc/configure (original)
+++ spamassassin/branches/jm_re2c_hacks/spamc/configure Wed Oct 25 09:15:31 2006
@@ -2231,9 +2231,10 @@
ac_compiler_gnu=$ac_cv_c_compiler_gnu
-if test "x$GCC" = "xyes" ; then
- CFLAGS="-Wall -Wextra -Wdeclaration-after-statement $CFLAGS"
-fi
+# off: breaks gcc 3.3.x! doh
+# if test "x$GCC" = "xyes" ; then
+# CFLAGS="-Wall -Wextra -Wdeclaration-after-statement $CFLAGS"
+# fi
Modified: spamassassin/branches/jm_re2c_hacks/spamc/configure.in
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/spamc/configure.in?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/spamc/configure.in (original)
+++ spamassassin/branches/jm_re2c_hacks/spamc/configure.in Wed Oct 25 09:15:31 2006
@@ -17,9 +17,10 @@
AC_PROG_CC
-if test "x$GCC" = "xyes" ; then
- CFLAGS="-Wall -Wextra -Wdeclaration-after-statement $CFLAGS"
-fi
+# off: breaks gcc 3.3.x! doh
+# if test "x$GCC" = "xyes" ; then
+# CFLAGS="-Wall -Wextra -Wdeclaration-after-statement $CFLAGS"
+# fi
AC_EXEEXT
Modified: spamassassin/branches/jm_re2c_hacks/spamd/spamd.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/spamd/spamd.raw?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/spamd/spamd.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/spamd/spamd.raw Wed Oct 25 09:15:31 2006
@@ -170,6 +170,7 @@
'min-children' => 1, # min kids to have running
'min-spare' => 1, # min kids that must be spare
'max-spare' => 2, # max kids that should be spare
+ 'cf' => [], # extra config lines
);
@@ -236,6 +237,7 @@
'setuid-with-ldap' => \$opt{'setuid-with-ldap'},
'setuid-with-sql' => \$opt{'setuid-with-sql'},
'siteconfigpath=s' => \$opt{'siteconfigpath'},
+ 'cf=s' => \@{$opt{'cf'}},
'socketgroup=s' => \$opt{'socketgroup'},
'socketmode=s' => \$opt{'socketmode'},
'socketowner=s' => \$opt{'socketowner'},
@@ -731,6 +733,7 @@
dont_copy_prefs => $dontcopy,
rules_filename => ( $opt{'configpath'} || 0 ),
site_rules_filename => ( $opt{'siteconfigpath'} || 0 ),
+ post_config_text => join("\n", @{$opt{'cf'}})."\n",
force_ipv4 => ( $opt{'force_ipv4'} || 0 ),
local_tests_only => ( $opt{'local'} || 0 ),
debug => ( $opt{'debug'} || 0 ),
@@ -2216,6 +2219,7 @@
-c, --create-prefs Create user preferences files
-C path, --configpath=path Path for default config files
--siteconfigpath=path Path for site configs
+ --cf='config line' Additional line of configuration
-d, --daemonize Daemonize
-h, --help Print usage message.
-i [ipaddr], --listen-ip=ipaddr Listen on the IP ipaddr
@@ -2319,6 +2323,12 @@
Use the specified path for locating site-specific configuration files. Ignore
the default directories (usually C</etc/mail/spamassassin> or similar).
+
+=item B<--cf='config line'>
+
+Add additional lines of configuration directly from the command-line, parsed
+after the configuration files are read. Multiple B<--cf> arguments can be
+used, and each will be considered a separate line of configuration.
=item B<-d>, B<--daemonize>
Modified: spamassassin/branches/jm_re2c_hacks/t/bayesdbm.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/t/bayesdbm.t?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/t/bayesdbm.t (original)
+++ spamassassin/branches/jm_re2c_hacks/t/bayesdbm.t Wed Oct 25 09:15:31 2006
@@ -63,7 +63,8 @@
# $msgid is the generated hash messageid
# $msgid_hdr is the Message-Id header
-ok($msgid eq 'ce33e4a8bc5798c65428d6018380bae346c7c126@sa_generated');
+ok($msgid eq 'ce33e4a8bc5798c65428d6018380bae346c7c126@sa_generated')
+ or warn "got: [$msgid]";
ok($msgid_hdr eq '9PS291LhupY');
ok($sa->{bayes_scanner}->{store}->tie_db_writable());
Modified: spamassassin/branches/jm_re2c_hacks/t/mimeheader.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/t/mimeheader.t?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/t/mimeheader.t (original)
+++ spamassassin/branches/jm_re2c_hacks/t/mimeheader.t Wed Oct 25 09:15:31 2006
@@ -2,7 +2,7 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("mimeheader");
-use Test; BEGIN { plan tests => 2 };
+use Test; BEGIN { plan tests => 4 };
$ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the patterns to work
@@ -12,6 +12,8 @@
q{ MIMEHEADER_TEST1 }, q{ test1 },
q{ MIMEHEADER_TEST2 }, q{ test2 },
+ q{ MATCH_NL_NONRAW }, q{ match_nl_nonraw },
+ q{ MATCH_NL_RAW }, q{ match_nl_raw },
);
@@ -20,6 +22,9 @@
# loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
mimeheader MIMEHEADER_TEST1 content-type =~ /application\/msword/
mimeheader MIMEHEADER_TEST2 content-type =~ m!APPLICATION/MSWORD!i
+
+ mimeheader MATCH_NL_NONRAW Content-Type =~ /msword; name/
+ mimeheader MATCH_NL_RAW Content-Type:raw =~ /msword;\n\tname/
});
Modified: spamassassin/branches/jm_re2c_hacks/t/missing_hb_separator.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/t/missing_hb_separator.t?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/t/missing_hb_separator.t (original)
+++ spamassassin/branches/jm_re2c_hacks/t/missing_hb_separator.t Wed Oct 25 09:15:31 2006
@@ -42,6 +42,7 @@
$result = 0;
foreach (@{$status->{test_names_hit}}) {
+ print "test hit: $_\n";
$result++ if ($_ eq 'MISSING_HB_SEP' || $_ eq 'X_MESSAGE_INFO');
}
Modified: spamassassin/branches/jm_re2c_hacks/t/mkrules.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/t/mkrules.t?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/t/mkrules.t (original)
+++ spamassassin/branches/jm_re2c_hacks/t/mkrules.t Wed Oct 25 09:15:31 2006
@@ -2,7 +2,7 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("mkrules");
-use Test; BEGIN { plan tests => 96 };
+use Test; BEGIN { plan tests => 101 };
use File::Path;
# ---------------------------------------------------------------------------
@@ -417,6 +417,36 @@
# checkfile("$tdir/rules/72_active.cf", \&patterns_run_cb);
checkfile("$tdir/rules/70_sandbox.cf", \&patterns_run_cb);
ok (-f "$tdir/rules/plugin.pm");
+ok ok_all_patterns();
+save_tdir();
+
+# ---------------------------------------------------------------------------
+print "meta rule depends on unpromoted subrule in lexically-earlier file\n\n";
+# (see mail from Sidney of Oct 16 2006, rules HS_INDEX_PARAM and HS_PHARMA_1)
+
+%patterns = (
+ "header T_GOOD_SUB" => rule_line_1,
+ "header T_BAD_SUB" => rule_line_2,
+ "meta GOOD (T_GOOD_SUB && !T_BAD_SUB)" => meta_found
+);
+%anti_patterns = (
+);
+
+rmtree([ $tdir ]); mkpath ([ "$tdir/rulesrc/sandbox/foo", "$tdir/rules" ]);
+
+write_file("$tdir/MANIFEST", [ "rules/72_active.cf\n" ]);
+write_file("$tdir/MANIFEST.SKIP", [ ]);
+write_file("$tdir/rules/active.list", [ "GOOD\n" ]);
+write_file("$tdir/rulesrc/sandbox/foo/20_aaa.cf", [
+ "meta GOOD (GOOD_SUB && !BAD_SUB)\n",
+]);
+write_file("$tdir/rulesrc/sandbox/foo/20_bbb.cf", [
+ "header GOOD_SUB Foo =~ /good/\n",
+ "header BAD_SUB Foo =~ /bad/\n",
+]);
+
+ok (mkrun ("--src $tdir/rulesrc --out $tdir/rules --manifest $tdir/MANIFEST --manifestskip $tdir/MANIFEST.SKIP --active $tdir/rules/active.list 2>&1", \&patterns_run_cb));
+checkfile("$tdir/rules/72_active.cf", \&patterns_run_cb);
ok ok_all_patterns();
save_tdir();