You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2006/12/30 11:00:10 UTC

svn commit: r491203 - /spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm

Author: felicity
Date: Sat Dec 30 02:00:08 2006
New Revision: 491203

URL: http://svn.apache.org/viewvc?view=rev&rev=491203
Log:
optimize the bayes stop-list via Regexp::Trie, add a pointer to the wiki page which has the word list, etc.

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm?view=diff&rev=491203&r1=491202&r2=491203
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm Sat Dec 30 02:00:08 2006
@@ -381,19 +381,10 @@
 
     # but extend the stop-list. These are squarely in the gray
     # area, and it just slows us down to record them.
+    # See http://wiki.apache.org/spamassassin/BayesStopList for more info.
+    #
     next if $len < 3 ||
-	($token =~ /^(?:a(?:nd|ny|ble|ll|re)|
-		m(?:uch|ost|ade|ore|ail|ake|ailing|any|ailto)|
-		t(?:his|he|ime|hrough|hat)|
-		w(?:hy|here|ork|orld|ith|ithout|eb)|
-		f(?:rom|or|ew)| e(?:ach|ven|mail)|
-		o(?:ne|ff|nly|wn|ut)| n(?:ow|ot|eed)|
-		s(?:uch|ame)| l(?:ook|ike|ong)|
-		y(?:ou|our|ou're)|
-		The|has|have|into|using|http|see|It's|it's|
-		number|just|both|come|years|right|know|already|
-		people|place|first|because|
-		And|give|year|information|can)$/x);
+	($token =~ /^(?:a(?:ble|l(?:ready|l)|n[dy]|re)|b(?:ecause|oth)|c(?:an|ome)|e(?:ach|mail|ven)|f(?:ew|irst|or|rom)|give|h(?:a(?:ve|s)|ttp)|i(?:n(?:formation|to)|t\'s)|just|know|l(?:ike|o(?:ng|ok))|m(?:a(?:de|il(?:(?:ing|to))?|ke|ny)|o(?:re|st)|uch)|n(?:eed|o[tw]|umber)|o(?:ff|n(?:ly|e)|ut|wn)|p(?:eople|lace)|right|s(?:ame|ee|uch)|t(?:h(?:at|is|rough|e)|ime)|using|w(?:eb|h(?:ere|y)|ith(?:out)?|or(?:ld|k))|y(?:ears?|ou(?:(?:\'re|r))?))$/i);
 
     # are we in the body?  If so, apply some body-specific breakouts
     if ($region == 1 || $region == 2) {