You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/11/06 00:01:45 UTC

svn commit: rev 56704 - in spamassassin/trunk: lib/Mail/SpamAssassin lib/Mail/SpamAssassin/Message/Metadata spamd t

Author: jm
Date: Fri Nov  5 15:01:45 2004
New Revision: 56704

Modified:
   spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
   spamassassin/trunk/spamd/spamd.raw
   spamassassin/trunk/t/rcvd_parser.t
Log:
bug: use of '~' in ArchiveIterator specs stopped working.  fixed.  TODO: does this need a b3_0 backport?

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	Fri Nov  5 15:01:45 2004
@@ -1002,7 +1002,7 @@
   my ($self, $path) = @_;
 
   # replace leading tilde with home dir: ~/abc => /home/jm/abc
-  $path =~ s!^~/!$ENV{'HOME'}!;
+  $path =~ s!^~/!$ENV{'HOME'}/!;
 
   # protect/escape spaces: ./Mail/My Letters => ./Mail/My\ Letters
   $path =~ s/([^\\])(\s)/$1\\$2/g;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata/Received.pm	Fri Nov  5 15:01:45 2004
@@ -447,9 +447,8 @@
 
     if (/Exim/) {
       # one of the HUGE number of Exim formats :(
-      # This must be scriptable.
+      # This must be scriptable.  (update: it is. cf bug 3950, 3582)
       # mss 2004-09-27: See <http://www.exim.org/exim-html-4.40/doc/html/spec_14.html#IX1315>
-      #                 and <http://bugzilla.spamassassin.org/show_bug.cgi?id=3582>
 
       # Received: from [61.174.163.26] (helo=host) by sc8-sf-list1.sourceforge.net with smtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t2z0-0001NX-00 for <ra...@lists.sourceforge.net>; Wed, 12 Mar 2003 01:57:10 -0800
       # Received: from [218.19.142.229] (helo=hotmail.com ident=yiuhyotp) by yzordderrex with smtp (Exim 3.35 #1 (Debian)) id 194BE5-0005Zh-00; Sat, 12 Apr 2003 03:58:53 +0100
@@ -487,6 +486,27 @@
       # 2002 18:57:06 +1300
       if (/^from (\S+) \[(${IP_ADDRESS})\](:\d+)? by (\S+) /) {
 	$rdns= $1; $ip = $2; $helo = $1; $by = $4; goto enough;
+      }
+
+      # attempt to deal with other odd Exim formats; just match little bits
+      # of the header.
+      # Received: from helene8.i.pinwand.net (helene.cats.ms) [10.0.8.6.13219]
+      # (mail) by lisbeth.i.pinwand.net with esmtp (Exim 3.35 #1 (Debian)) id
+      # 1CO5y7-0001vC-00; Sun, 31 Oct 2004 04:01:23 +0100
+      if (/^from (\S+) /) {
+        $rdns= $1;      # assume this is the rDNS, not HELO.  is this appropriate?
+      }
+      if (/ \((\S+)\) /) {
+        $helo = $1;
+      }
+      if (/ \[(${IP_ADDRESS})(?:\.\d+)?\] /) {
+        $ip = $1;
+      }
+      if (/by (\S+) /) {
+        $by = $1;
+        # now, if we have a "by" and an IP, that's enough for most uses;
+        # we have to make do with that.
+        if ($ip) { goto enough; }
       }
 
       # else it's probably forged. fall through

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	Fri Nov  5 15:01:45 2004
@@ -202,6 +202,9 @@
       $self->do_full_tests($priority, \$fulltext);
       $self->do_full_eval_tests($priority, \$fulltext);
 
+      # rundump($self->{main});
+
+
       # we may need to call this more often than once through the loop, but
       # it needs to be done at least once, either at the beginning or the end.
       $self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
@@ -1759,6 +1762,7 @@
   local ($_);
 
   my $text;
+  my $uri_count = 0;
 
   for (@$textary) {
     # NOTE: do not modify $_ in this loop
@@ -1786,6 +1790,7 @@
 
       # warn("uri: got URI: $uri\n");
       push @uris, $uri;
+      last if (scalar @uris > 200);
     }
     while (/($Addr_spec_re)/go) {
       my $uri = $1;
@@ -1794,6 +1799,7 @@
 
       #warn("uri: got URI: $uri\n");
       push @uris, $uri;
+      last if (scalar @uris > 200);
     }
   }
 
@@ -1803,6 +1809,10 @@
     push @uris, @{ $self->{msg}->{metadata}->{html}->{uri} };
   }
 
+  # trim to a sane, limited number of urls
+  if (scalar @uris > 200) {
+    @uris = splice (@uris, 200);
+  }
   @uris = Mail::SpamAssassin::Util::uri_list_canonify(@uris);
 
   # get domain list
@@ -2600,6 +2610,24 @@
     $self->{fulltext_tmpfile} = undef;
   }
 }
+
+
+our $dumpcount = 0;
+# sub rundump {
+# my $spamtest = shift;
+# $dumpcount++;
+# warn "dumping to dumps/dump.$$.$dumpcount\n";
+# system ("mkdir -p dumps");
+# local (*OLDERR);
+# open (OLDERR, ">&STDERR");
+# open (STDERR, ">dumps/dump.$$.$dumpcount");
+# use Devel::Peek;
+# Dump ($spamtest, 9999);
+# close STDERR;
+# open (STDERR, ">&OLDERR");
+# # use Devel::Size qw(size total_size); warn "JMD ".total_size($self);
+# }
+
 
 ###########################################################################
 

Modified: spamassassin/trunk/spamd/spamd.raw
==============================================================================
--- spamassassin/trunk/spamd/spamd.raw	(original)
+++ spamassassin/trunk/spamd/spamd.raw	Fri Nov  5 15:01:45 2004
@@ -15,6 +15,8 @@
 # limitations under the License.
 # </...@LICENSE>
 
+my $RUN_AS_SINGLE_PROCESS = 0;
+
 my $PREFIX          = '@@PREFIX@@';             # substituted at 'make' time
 my $DEF_RULES_DIR   = '@@DEF_RULES_DIR@@';      # substituted at 'make' time
 my $LOCAL_RULES_DIR = '@@LOCAL_RULES_DIR@@';    # substituted at 'make' time
@@ -853,7 +855,11 @@
   sigprocmask( POSIX::SIG_BLOCK(), $sigset )
     or die "Can't block SIGINT for fork: $!\n";
 
-  die "fork: $!" unless defined( $pid = fork );
+  if ($RUN_AS_SINGLE_PROCESS) {
+    $pid = 0;
+  } else {
+    die "fork: $!" unless defined( $pid = fork );
+  }
 
   if ($pid) {
     ## PARENT
@@ -874,9 +880,11 @@
     sigprocmask( POSIX::SIG_UNBLOCK(), $sigset )
       or die "Can't unblock SIGINT for fork: $!\n";
 
-    # set process name where supported
-    # this will help make it clear via process listing which is child/parent
-    $0 = 'spamd child';
+    if (!$RUN_AS_SINGLE_PROCESS) {
+      # set process name where supported
+      # this will help make it clear via process listing which is child/parent
+      $0 = 'spamd child';
+    }
 
     # handle $clients_per_child connections, then die in "old" age...
     for ( my $i = 0 ; $i < $clients_per_child ; $i++ ) {
@@ -921,7 +929,7 @@
     }
 
     # If the child lives to get here, it will die ...  Muhaha.
-    exit;
+    exit unless $RUN_AS_SINGLE_PROCESS;
   }
 }
 
@@ -1644,6 +1652,7 @@
 
 # sig handlers: child processes
 sub setup_child_sig_handlers {
+  return if ($RUN_AS_SINGLE_PROCESS);
   # note: all the signals changed in setup_parent_sig_handlers() must
   # be reset to appropriate values here!
   $SIG{HUP} = $SIG{CHLD} = $SIG{INT} = $SIG{TERM} = 'DEFAULT';

Modified: spamassassin/trunk/t/rcvd_parser.t
==============================================================================
--- spamassassin/trunk/t/rcvd_parser.t	(original)
+++ spamassassin/trunk/t/rcvd_parser.t	Fri Nov  5 15:01:45 2004
@@ -18,7 +18,7 @@
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("rcvd_parser");
-use Test; BEGIN { plan tests => 36 };
+use Test; BEGIN { plan tests => 37 };
 
 
 use strict;
@@ -56,6 +56,17 @@
 },
 q{
 
+Received: from helene8.i.pinwand.net (helene.cats.ms) [10.0.8.6.13219] (mail)
+     by lisbeth.i.pinwand.net with esmtp (Exim 3.35 #1 (Debian))
+     id 1CO5y7-0001vC-00; Sun, 31 Oct 2004 04:01:23 +0100
+
+} => q{
+
+[ ip=10.0.8.6 rdns=helene8.i.pinwand.net helo=helene.cats.ms by=lisbeth.i.pinwand.net ident= envfrom= intl=0 id=1CO5y7-0001vC-00 ]
+
+},
+q{
+
       Received: from inet-vrs-05.redmond.corp.microsoft.com ([157.54.6.157])
         by INET-IMC-05.redmond.corp.microsoft.com with Microsoft
         SMTPSVC(5.0.2195.6624); Thu, 6 Mar 2003 12:02:35 -0800
@@ -414,7 +425,7 @@
 
 } => q{
 
-[ ip=159.134.118.16 rdns=mail00.svc.cra.dublin.eircom.net helo=mail00.svc.cra.dublin.eircom.net by=amgod.boxhost.net ident= envfrom= id=0ACFC31014D ] [ ip=83.70.48.2 rdns=83-70-48-2.bas2.dbn.dublin.eircom.net helo=?192.168.23.32? by=mail00.svc.cra.dublin.eircom.net ident= envfrom= id= ]
+[ ip=159.134.118.16 rdns=mail00.svc.cra.dublin.eircom.net helo=mail00.svc.cra.dublin.eircom.net by=amgod.boxhost.net ident= envfrom= id=0ACFC31014D ] [ ip=83.70.48.2 rdns=83-70-48-2.bas2.dbn.dublin.eircom.net helo=?192.168.23.32? by=mail00.svc.cra.dublin.eircom.net ident= envfrom= id= ] [ ip=127.0.0.1 rdns=localhost helo=localhost by=radish.zzzz.org ident= envfrom= id=1398F5900D9 ]
 
 },
 q{
@@ -437,7 +448,7 @@
 
 } => q{
   
-[ ip=128.200.80.6 rdns=smtp3.es.uci.edu helo=smtp3.es.uci.edu by=amgod.boxhost.net ident= envfrom= id=87D0A310091 ] [ ip=128.200.80.22 rdns=rigel.oac.uci.edu helo=rigel.oac.uci.edu by=smtp3.es.uci.edu ident= envfrom= id=i2907ZaF008726 ]
+[ ip=128.200.80.6 rdns=smtp3.es.uci.edu helo=smtp3.es.uci.edu by=amgod.boxhost.net ident= envfrom= id=87D0A310091 ] [ ip=128.200.80.22 rdns=rigel.oac.uci.edu helo=rigel.oac.uci.edu by=smtp3.es.uci.edu ident= envfrom= id=i2907ZaF008726 ] [ ip=127.0.0.1 rdns=localhost helo=localhost by=radish.jmason.org ident= envfrom= id=27B275900D9 ]
   
 },
 q{

Re: svn commit: rev 56704 - in spamassassin/trunk: lib/Mail/SpamAssassin lib/Mail/SpamAssassin/Message/Metadata spamd t

Posted by Theo Van Dinter <fe...@kluge.net>.
On Fri, Nov 05, 2004 at 11:01:45PM -0000, jm@apache.org wrote:
> bug: use of '~' in ArchiveIterator specs stopped working.  fixed.  TODO: does this need a b3_0 backport?

No, that was something I put in while reworking the AI code.  There was
confusion between ~ and ~user, so I made it deal specifically with ~/ and
added docs.  Apparently I missed the extra / ... :(

-- 
Randomly Generated Tagline:
Time to fertilize the lawn.  A couple of 500-pound bags should do it!
 
 		-- Homer Simpson
 		   Homer vs. Patty and Selma