You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/09/15 05:29:40 UTC

svn commit: rev 46074 - in spamassassin/trunk: . lib/Mail/SpamAssassin rules

Author: felicity
Date: Tue Sep 14 20:29:40 2004
New Revision: 46074

Modified:
   spamassassin/trunk/MANIFEST
   spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
   spamassassin/trunk/rules/20_body_tests.cf
   spamassassin/trunk/rules/30_text_de.cf
   spamassassin/trunk/rules/30_text_fr.cf
   spamassassin/trunk/rules/30_text_nl.cf
   spamassassin/trunk/rules/30_text_pl.cf
   spamassassin/trunk/rules/50_scores.cf
   spamassassin/trunk/rules/init.pre
Log:
bug 3643: converted the Razor2 check into a Plugin.  the debug and report pieces have to stay as-is until the plugin architecture grows to allow them from the plugin.  see bugs 3777 and 3778.

Modified: spamassassin/trunk/MANIFEST
==============================================================================
--- spamassassin/trunk/MANIFEST	(original)
+++ spamassassin/trunk/MANIFEST	Tue Sep 14 20:29:40 2004
@@ -55,6 +55,7 @@
 lib/Mail/SpamAssassin/Plugin/Hashcash.pm
 lib/Mail/SpamAssassin/Plugin/MSExec.pm
 lib/Mail/SpamAssassin/Plugin.pm
+lib/Mail/SpamAssassin/Plugin/Razor2.pm
 lib/Mail/SpamAssassin/Plugin/RelayCountry.pm
 lib/Mail/SpamAssassin/Plugin/SPF.pm
 lib/Mail/SpamAssassin/Plugin/Test.pm
@@ -152,6 +153,7 @@
 rules/25_body_tests_es.cf
 rules/25_hashcash.cf
 rules/25_msexec.cf
+rules/25_razor2.cf
 rules/25_spf.cf
 rules/25_uribl.cf
 rules/30_text_de.cf

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	Tue Sep 14 20:29:40 2004
@@ -1257,31 +1257,6 @@
     }
   });
 
-=item use_razor2 ( 0 | 1 )		(default: 1)
-
-Whether to use Razor version 2, if it is available.
-
-=cut
-
-  push (@cmds, {
-    setting => 'use_razor2',
-    default => 1,
-    type => $CONF_TYPE_BOOL
-  });
-
-=item razor_timeout n		(default: 10)
-
-How many seconds you wait for razor to complete before you go on without
-the results
-
-=cut
-
-  push (@cmds, {
-    setting => 'razor_timeout',
-    default => 10,
-    type => $CONF_TYPE_NUMERIC
-  });
-
 =item skip_rbl_checks { 0 | 1 }   (default: 0)
 
 By default, SpamAssassin will run RBL checks.  If your ISP already does this
@@ -2451,19 +2426,6 @@
       if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; }
       $self->{parser}->add_regression_test($1, $2, $3);
     }
-  });
-
-=item razor_config filename
-
-Define the filename used to store Razor's configuration settings.
-Currently this is left to Razor to decide.
-
-=cut
-
-  push (@cmds, {
-    setting => 'razor_config',
-    is_admin => 1,
-    type => $CONF_TYPE_STRING
   });
 
 =item pyzor_path STRING

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm	Tue Sep 14 20:29:40 2004
@@ -84,9 +84,6 @@
     require Net::DNS::Resolver;
   };
   eval {
-    require Razor2::Client::Agent;
-  };
-  eval {
     require MIME::Base64;
   };
   eval {
@@ -370,203 +367,6 @@
   delete $self->{dnspost};
   delete $self->{dnsresult};
   delete $self->{dnsuri};
-}
-
-###########################################################################
-
-sub is_razor2_available {
-  my ($self) = @_;
-
-  if ($self->{main}->{local_tests_only}) {
-    dbg ("local tests only, ignoring Razor2", "razor", -1);
-    return 0;
-  }
-  if (!$self->{conf}->{use_razor2}) { return 0; }
-
-  # Use Razor2 if it's available
-  if (eval { require Razor2::Client::Agent; }) {
-    dbg("Razor2 is available", "razor", -1);
-    return 1;
-  }
-  else {
-    dbg("Razor2 is not available", "razor", -1);
-    return 0;
-  }
-}
-
-sub razor2_lookup {
-  my ($self, $fulltext) = @_;
-  my $timeout=$self->{conf}->{razor_timeout};
-
-  # Set the score for the ranged checks
-  $self->{razor2_cf_score} = 0;
-  return $self->{razor2_result} if ( defined $self->{razor2_result} );
-  $self->{razor2_result} = 0;
-
-  # this test covers all aspects of availability
-  if (!$self->is_razor2_available()) { return 0; }
-  
-  # razor also debugs to stdout. argh. fix it to stderr...
-  if ($Mail::SpamAssassin::DEBUG->{enabled}) {
-    open (OLDOUT, ">&STDOUT");
-    open (STDOUT, ">&STDERR");
-  }
-
-  $self->enter_helper_run_mode();
-
-    eval {
-      local ($^W) = 0;    # argh, warnings in Razor
-
-      require Razor2::Client::Agent;
-
-      local $SIG{ALRM} = sub { die "alarm\n" };
-      alarm $timeout;
-
-      # everything's in the module!
-      my $rc = Razor2::Client::Agent->new('razor-check');
-
-      if ($rc) {
-        my %opt = (
-            debug      => ($Mail::SpamAssassin::DEBUG->{enabled} and
-                 $Mail::SpamAssassin::DEBUG->{razor} < -2), 
-	    foreground => 1,
-            config     => $self->{conf}->{razor_config}
-        );
-        $rc->{opt} = \%opt;
-        $rc->do_conf() or die $rc->errstr;
-
-	my $tmptext = $$fulltext;
-	my @msg = (\$tmptext);
-
-        my $objects = $rc->prepare_objects( \@msg )
-          or die "error in prepare_objects";
-        $rc->get_server_info() or die $rc->errprefix("checkit");
-
-	# let's reset the alarm since get_server_info() calls
-	# nextserver() which calls discover() which very likely will
-	# reset the alarm for us ... how polite.  :(  
-	alarm $timeout;
-
-        my $sigs = $rc->compute_sigs($objects)
-          or die "error in compute_sigs";
-
-        # 
-        # if mail isn't whitelisted, check it out
-        #   
-        if ( ! $rc->local_check( $objects->[0] ) ) {
-          if (!$rc->connect()) {
-            # provide a better error message when servers are unavailable,
-            # than "Bad file descriptor Died".
-            die "could not connect to any servers\n";
-          }
-          $rc->check($objects) or die $rc->errprefix("checkit");
-          $rc->disconnect() or die $rc->errprefix("checkit");
-
-	  # if we got here, we're done doing remote stuff, abort the alert
-	  alarm 0;
-
-          # figure out if we have a log file we need to close...
-          if (ref($rc->{logref}) && exists $rc->{logref}->{fd}) {
-            # the fd can be stdout or stderr, so we need to find out if it is
-	    # so we don't close them by accident.  Note: we can't just
-	    # undef the fd here (like the IO::Handle manpage says we can)
-	    # because it won't actually close, unfortunately. :(
-            my $untie = 1;
-            foreach my $log ( *STDOUT{IO}, *STDERR{IO} ) {
-              if ($log == $rc->{logref}->{fd}) {
-                $untie = 0;
-                last;
-              }
-            }
-            close $rc->{logref}->{fd} if ($untie);
-          }
-
-
-	  dbg("Using results from Razor v".$Razor2::Client::Version::VERSION);
-
-	  # so $objects->[0] is the first (only) message, and ->{spam} is a general yes/no
-          $self->{razor2_result} = $objects->[0]->{spam} || 0;
-
-	  # great for debugging, but leave this off!
-	  #use Data::Dumper;
-	  #print Dumper($objects),"\n";
-
-	  # ->{p} is for each part of the message
-	  # so go through each part, taking the highest cf we find
-	  # of any part that isn't contested (ct).  This helps avoid false
-	  # positives.  equals logic_method 4.
-	  #
-	  # razor-agents < 2.14 have a different object format, so we now support both.
-	  # $objects->[0]->{resp} vs $objects->[0]->{p}->[part #]->{resp}
-	  my $part = 0;
-	  my $arrayref = $objects->[0]->{p} || $objects;
-	  if ( defined $arrayref ) {
-	    foreach my $cf ( @{$arrayref} ) {
-	      if ( exists $cf->{resp} ) {
-	        for (my $response=0;$response<@{$cf->{resp}};$response++) {
-	          my $tmp = $cf->{resp}->[$response];
-	      	  my $tmpcf = $tmp->{cf} || 0; # Part confidence
-	      	  my $tmpct = $tmp->{ct} || 0; # Part contested?
-		  my $engine = $cf->{sent}->[$response]->{e};
-	          dbg("Found Razor2 part: part=$part engine=$engine ct=$tmpct cf=$tmpcf");
-	          $self->{razor2_cf_score} = $tmpcf if ( !$tmpct && $tmpcf > $self->{razor2_cf_score} );
-	        }
-	      }
-	      else {
-		my $text = "part=$part noresponse";
-		$text .= " skipme=1" if ( $cf->{skipme} );
-	        dbg("Found Razor2 part: $text");
-	      }
-	      $part++;
-	    }
-	  }
-	  else {
-	    # If we have some new $objects format that isn't close to
-	    # the current razor-agents 2.x version, we won't FP but we
-	    # should alert in debug.
-	    dbg("It looks like the internal Razor object has changed format!  Tell spamassassin-devel!");
-	  }
-        }
-      }
-      else {
-        warn "undefined Razor2::Client::Agent\n";
-      }
-  
-      alarm 0;
-    };
-
-    alarm 0;    # just in case
-  
-    if ($@) {
-      if ( $@ =~ /alarm/ ) {
-          dbg("razor2 check timed out after $timeout secs.");
-        } elsif ($@ =~ /(?:could not connect|network is unreachable)/) {
-          # make this a dbg(); SpamAssassin will still continue,
-          # but without Razor checking.  otherwise there may be
-          # DSNs and errors in syslog etc., yuck
-          dbg("razor2 check could not connect to any servers");
-        } else {
-          warn("razor2 check skipped: $! $@");
-        }
-      }
-
-  # work around serious brain damage in Razor2 (constant seed)
-  srand;
-
-  $self->leave_helper_run_mode();
-
-  # razor also debugs to stdout. argh. fix it to stderr...
-  if ($Mail::SpamAssassin::DEBUG->{enabled}) {
-    open (STDOUT, ">&OLDOUT");
-    close OLDOUT;
-  }
-
-  dbg("Razor2 results: spam? ".$self->{razor2_result}."  highest cf score: ".$self->{razor2_cf_score});
-
-  if ($self->{razor2_result} > 0) {
-      return 1;
-  }
-  return 0;
 }
 
 ###########################################################################

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	Tue Sep 14 20:29:40 2004
@@ -2579,19 +2579,6 @@
 # FULL-MESSAGE TESTS:
 ###########################################################################
 
-sub check_razor2 {
-  my ($self) = @_;
-
-  return 0 unless ($self->is_razor2_available());
-  return $self->{razor2_result} if (defined $self->{razor2_result});
-
-  # note: we don't use $fulltext. instead we get the raw message,
-  # unfiltered, for razor2 to check.  ($fulltext removes MIME
-  # parts etc.)
-  my $full = $self->{msg}->get_pristine();
-  return $self->razor2_lookup (\$full);
-}
-
 sub check_pyzor {
   my ($self, $full) = @_;
 
@@ -2736,30 +2723,6 @@
   $diff = $timetoken - $expected;
 
   return (abs($diff) >= $fudge);
-}
-
-# Check the cf value of a given message and return if it's within the
-# given range
-sub check_razor2_range {
-  my ($self,$fulltext,$min,$max) = @_;
-
-  # If the Razor2 general test is disabled, don't continue.
-  return 0 unless $self->{conf}{scores}{'RAZOR2_CHECK'};
-
-  # If Razor2 hasn't been checked yet, go ahead and run it.
-  if (!defined $self->{razor2_result}) {
-    # note: we don't use $fulltext. instead we get the raw message,
-    # unfiltered, for razor2 to check.  ($fulltext removes MIME
-    # parts etc.)
-    my $full = $self->{msg}->get_pristine();
-    $self->razor2_lookup (\$full);
-  }
-
-  if ($self->{razor2_cf_score} >= $min && $self->{razor2_cf_score} <= $max) {
-    $self->test_log(sprintf ("cf: %3d", $self->{razor2_cf_score}));
-    return 1;
-  }
-  return 0;
 }
 
 sub check_messageid_not_usable {

Modified: spamassassin/trunk/rules/20_body_tests.cf
==============================================================================
--- spamassassin/trunk/rules/20_body_tests.cf	(original)
+++ spamassassin/trunk/rules/20_body_tests.cf	Tue Sep 14 20:29:40 2004
@@ -40,29 +40,6 @@
 ###########################################################################
 # Message digest tests
 
-full RAZOR2_CHECK	eval:check_razor2()
-describe RAZOR2_CHECK	Listed in Razor2 (http://razor.sf.net/)
-tflags RAZOR2_CHECK	net
-
-# cf (confidence level) is how likely the message is spam.  RAZOR2_CHECK
-# returns true if cf>=min_cf (as defined by user/config).  These return
-# true depending on what cf value the message has.  The algorithm goes:
-# check the message via razor, then go through each mime part and check
-# how razor scored it.  If the part is contested (ie: it's been reported
-# as both ham and spam) it's ignored.  SA takes the highest non-contested
-# part cf score and returns it for the range rules.  ie: This is essentially
-# Razor 2's logic_method 4.
-#
-# Note: Disabling RAZOR2_CHECK (score RAZOR2_CHECK 0) will also disable
-# these checks.
-#
-# Note: The scores are set to 0 on these tests right now until they get
-# better integrated with SA overall.
-#
-body	RAZOR2_CF_RANGE_51_100	eval:check_razor2_range('51','100')
-tflags  RAZOR2_CF_RANGE_51_100	net
-describe RAZOR2_CF_RANGE_51_100	Razor2 gives confidence level above 50%
-
 full DCC_CHECK		eval:check_dcc()
 describe DCC_CHECK	Listed in DCC (http://rhyolite.com/anti-spam/dcc/)
 tflags DCC_CHECK	net
@@ -72,9 +49,11 @@
 tflags PYZOR_CHECK	net
 
 # bug 2220. nice results
+ifplugin Mail::SpamAssassin::Plugin::Razor2
 meta DIGEST_MULTIPLE       RAZOR2_CHECK + DCC_CHECK + PYZOR_CHECK > 1
 describe DIGEST_MULTIPLE   Message hits more than one network digest check
 tflags DIGEST_MULTIPLE     net
+endif	# Mail::SpamAssassin::Plugin::Razor2
 
 # this seems to be the new fashion (as of Jul 5 2002).  base64-encoded parts need to
 # be stripped before this match

Modified: spamassassin/trunk/rules/30_text_de.cf
==============================================================================
--- spamassassin/trunk/rules/30_text_de.cf	(original)
+++ spamassassin/trunk/rules/30_text_de.cf	Tue Sep 14 20:29:40 2004
@@ -58,8 +58,6 @@
 #                     ........................................................................
 
 lang de describe GTUBE Test zur Pr�fung von Anti-Spam-Software
-lang de describe RAZOR2_CHECK Gelistet im "Razor2"-System (http://razor.sf.net/)
-lang de describe RAZOR2_CF_RANGE_51_100 Razor2 Spam-Bewertung liegt zwischen 51 und 100
 lang de describe DCC_CHECK Gelistet im DCC-System (http://rhyolite.com/anti-spam/dcc/)
 lang de describe PYZOR_CHECK Gelistet im Pyzor-System (http://pyzor.sf.net/)
 lang de describe DIGEST_MULTIPLE Mehrere Internettests (Razor, DCC, Pyzor, etc.) treffen zu

Modified: spamassassin/trunk/rules/30_text_fr.cf
==============================================================================
--- spamassassin/trunk/rules/30_text_fr.cf	(original)
+++ spamassassin/trunk/rules/30_text_fr.cf	Tue Sep 14 20:29:40 2004
@@ -392,8 +392,6 @@
 lang fr describe RATWARE_OE_MALFORMED	En-t�te X-Mailer indique No de version Outlook Express malform�
 lang fr describe RATWARE_RCVD_LC_ESMTP	Trace de logiciel de spam ('esmtp' Received)
 lang fr describe RATWARE_STORM_URI	Trace de logiciel de spam (StormPost)
-lang fr describe RAZOR2_CHECK		Message list� par Razor2, voir http://razor.sourceforge.net
-lang fr describe RAZOR2_CF_RANGE_51_100	Razor2 donne un indice de confiance entre 51 et 100
 lang fr describe RCVD_AM_PM		En-t�te Received: falsifi� (AM/PM)
 lang fr describe RCVD_FAKE_HELO_DOTCOM  En-t�te Received contient nom d'h�te falsifi� dans le HELO
 lang fr describe RCVD_IN_BL_SPAMCOP_NET	Relais list� dans http://spamcop.net/bl.shtml

Modified: spamassassin/trunk/rules/30_text_nl.cf
==============================================================================
--- spamassassin/trunk/rules/30_text_nl.cf	(original)
+++ spamassassin/trunk/rules/30_text_nl.cf	Tue Sep 14 20:29:40 2004
@@ -25,8 +25,6 @@
 #                    ........................................................................
 
 lang nl describe GTUBE                           Standaard test voor ongewenste bulk mail
-lang nl describe RAZOR2_CHECK                    Gevonden in Razor2 (http://razor.sf.net/)
-lang nl describe RAZOR2_CF_RANGE_51_100          Razor2 geeft een zekerheid tussen 51 en 100
 lang nl describe DCC_CHECK                       Gevonden in DCC (http://rhyolite.com/anti-spam/dcc/)
 lang nl describe PYZOR_CHECK                     Gevonden in Pyzor (http://pyzor.sf.net/)
 lang nl describe TRACKER_ID                      Maakt gebruik van een nummer om het bericht te kunnen volgen

Modified: spamassassin/trunk/rules/30_text_pl.cf
==============================================================================
--- spamassassin/trunk/rules/30_text_pl.cf	(original)
+++ spamassassin/trunk/rules/30_text_pl.cf	Tue Sep 14 20:29:40 2004
@@ -368,8 +368,6 @@
 lang pl describe RATWARE_OE_MALFORMED	Nag��wek X-Mailer podaje z�� wersj� Outlook Express
 lang pl describe RATWARE_RCVD_LC_ESMTP	Znaleziono oznaczenie  przesy�ki masowej ('esmtp' Received)
 lang pl describe RATWARE_STORM_URI	Znaleziono oznaczenie  przesy�ki masowej (StormPost)
-lang pl describe RAZOR2_CF_RANGE_51_100	Razor2 stwierdzi� pewno�� pomi�dzy 51 i 100
-lang pl describe RAZOR2_CHECK		Na li�cie Razor2 (http://razor.sf.net/)
 lang pl describe RCVD_AM_PM		Sfa�szowane nag��wki Received (AM/PM)
 lang pl describe RCVD_FAKE_HELO_DOTCOM	Nag��wek Received zawiera sfa�szowan� nazw� hosta HELO
 lang pl describe RCVD_IN_BL_SPAMCOP_NET	Odebrane od systemu klasy RELAY w/g: bl.spamcop.net

Modified: spamassassin/trunk/rules/50_scores.cf
==============================================================================
--- spamassassin/trunk/rules/50_scores.cf	(original)
+++ spamassassin/trunk/rules/50_scores.cf	Tue Sep 14 20:29:40 2004
@@ -478,8 +478,6 @@
 score RATWARE_RCVD_PF 2.880 3.384 3.608 3.867
 score RATWARE_STORM_URI 1.920 1.518 2.405 2.295
 score RATWARE_ZERO_TZ 3.200 2.372 3.875 4.100
-score RAZOR2_CF_RANGE_51_100 0 1.485 0 0.056
-score RAZOR2_CHECK 0 0.150 0 1.511
 score RCVD_AM_PM 1.558 0.091 1.802 1.927
 score RCVD_BONUS_SPC_DATE 1.371 0.904 1.575 1.872
 score RCVD_BY_IP 0 0.024 0.051 0.067
@@ -699,6 +697,12 @@
 score URIBL_SC_SURBL 0 3.897 0 4.263
 score URIBL_WS_SURBL 0 0.539 0 1.462
 endif # Mail::SpamAssassin::Plugin::URIDNSBL
+
+# Razor2
+ifplugin Mail::SpamAssassin::Plugin::Razor2
+score RAZOR2_CF_RANGE_51_100 0 1.485 0 0.056
+score RAZOR2_CHECK 0 0.150 0 1.511
+endif # Mail::SpamAssassin::Plugin::Razor2
 
 # MAPS
 # MAPS is a commercial service.  If you pay for them, assign a score

Modified: spamassassin/trunk/rules/init.pre
==============================================================================
--- spamassassin/trunk/rules/init.pre	(original)
+++ spamassassin/trunk/rules/init.pre	Tue Sep 14 20:29:40 2004
@@ -28,6 +28,10 @@
 #
 loadplugin Mail::SpamAssassin::Plugin::SPF
 
+# Razor2 - perform Razor2 message checks
+#
+loadplugin Mail::SpamAssassin::Plugin::Razor2
+
 # MSExec - do simple checks to see if the message includes a Microsoft
 # executable file
 #