You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/02/08 02:45:16 UTC

svn commit: rev 6574 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin

Author: jm
Date: Sat Feb  7 17:45:16 2004
New Revision: 6574

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
Log:
bayes: track IP addresses from Received for trusted_networks mining

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	Sat Feb  7 17:45:16 2004
@@ -437,13 +437,14 @@
 
       # stop-list for numeric tokens.  These are squarely in the gray
       # area, and it just slows us down to record them.
-      if ($token !~ /(?:
-		  \QN:H*r:NN.NN.NNN\E |
-		  \QN:H*r:N.N.N\E |
-		  \QN:H*r:NNN.NNN.NNN\E |
-		  \QN:H*r:NNNN\E |
-		  \QN:H*r:NNN.NN.NN\E |
-		  \QN:NNNN\E
+      if ($token !~ /^(?:
+		  H\*r:ip\* |
+		  \QH*r:NN.NN.NNN\E |
+		  \QH*r:N.N.N\E |
+		  \QH*r:NNN.NNN.NNN\E |
+		  \QH*r:NNNN\E |
+		  \QH*r:NNN.NN.NN\E |
+		  \QNNNN\E
 		)/x)
       {
 	push (@{$self->{tokens}}, 'N:'.$tokprefix.$token);
@@ -600,11 +601,15 @@
   # IPs: break down to nearest /24, to reduce hapaxes -- EXCEPT for
   # IPs in the 10 and 192.168 ranges, they gets lots of significant tokens
   # (on both sides)
+  # also make a dup with the full IP, as fodder for
+  # bayes_dump_to_trusted_networks: "H*r:ip*aaa.bbb.ccc.ddd"
   $val =~ s{(\b|[^\d])(\d{1,3}\.)(\d{1,3}\.)(\d{1,3})(\.\d{1,3})(\b|[^\d])}{
            if ($2 eq '10' || ($2 eq '192' && $3 eq '168')) {
-             $1.$2.$3.$4.$5.$6;
+             $1.$2.$3.$4.$5.$6.
+		" ip*".$2.$3.$4.$5." ";
            } else {
-             $1.$2.$3.$4.$6;
+             $1.$2.$3.$4.$6.
+		" ip*".$2.$3.$4.$5." ";
            }
          }gex;