You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2006/12/30 11:00:10 UTC
svn commit: r491203 - /spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
Author: felicity
Date: Sat Dec 30 02:00:08 2006
New Revision: 491203
URL: http://svn.apache.org/viewvc?view=rev&rev=491203
Log:
optimize the bayes stop-list via Regexp::Trie, add a pointer to the wiki page which has the word list, etc.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm?view=diff&rev=491203&r1=491202&r2=491203
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm Sat Dec 30 02:00:08 2006
@@ -381,19 +381,10 @@
# but extend the stop-list. These are squarely in the gray
# area, and it just slows us down to record them.
+ # See http://wiki.apache.org/spamassassin/BayesStopList for more info.
+ #
next if $len < 3 ||
- ($token =~ /^(?:a(?:nd|ny|ble|ll|re)|
- m(?:uch|ost|ade|ore|ail|ake|ailing|any|ailto)|
- t(?:his|he|ime|hrough|hat)|
- w(?:hy|here|ork|orld|ith|ithout|eb)|
- f(?:rom|or|ew)| e(?:ach|ven|mail)|
- o(?:ne|ff|nly|wn|ut)| n(?:ow|ot|eed)|
- s(?:uch|ame)| l(?:ook|ike|ong)|
- y(?:ou|our|ou're)|
- The|has|have|into|using|http|see|It's|it's|
- number|just|both|come|years|right|know|already|
- people|place|first|because|
- And|give|year|information|can)$/x);
+ ($token =~ /^(?:a(?:ble|l(?:ready|l)|n[dy]|re)|b(?:ecause|oth)|c(?:an|ome)|e(?:ach|mail|ven)|f(?:ew|irst|or|rom)|give|h(?:a(?:ve|s)|ttp)|i(?:n(?:formation|to)|t\'s)|just|know|l(?:ike|o(?:ng|ok))|m(?:a(?:de|il(?:(?:ing|to))?|ke|ny)|o(?:re|st)|uch)|n(?:eed|o[tw]|umber)|o(?:ff|n(?:ly|e)|ut|wn)|p(?:eople|lace)|right|s(?:ame|ee|uch)|t(?:h(?:at|is|rough|e)|ime)|using|w(?:eb|h(?:ere|y)|ith(?:out)?|or(?:ld|k))|y(?:ears?|ou(?:(?:\'re|r))?))$/i);
# are we in the body? If so, apply some body-specific breakouts
if ($region == 1 || $region == 2) {