You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2006/09/03 03:07:11 UTC

svn commit: r439695 - in /spamassassin/branches/tvd-multi-mass-check: ./ build/automc/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Message/ masses/ masses/rule-qa/ rules/ t/

Author: felicity
Date: Sat Sep  2 18:07:10 2006
New Revision: 439695

URL: http://svn.apache.org/viewvc?rev=439695&view=rev
Log:
merge up to r439690

Modified:
    spamassassin/branches/tvd-multi-mass-check/MANIFEST
    spamassassin/branches/tvd-multi-mass-check/Makefile.PL
    spamassassin/branches/tvd-multi-mass-check/build/automc/run_nightly
    spamassassin/branches/tvd-multi-mass-check/build/automc/run_preflight
    spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/branches/tvd-multi-mass-check/masses/mass-check
    spamassassin/branches/tvd-multi-mass-check/masses/rule-qa/corpus-nightly
    spamassassin/branches/tvd-multi-mass-check/rules/active.list
    spamassassin/branches/tvd-multi-mass-check/t/mkrules.t

Modified: spamassassin/branches/tvd-multi-mass-check/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/MANIFEST?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/MANIFEST (original)
+++ spamassassin/branches/tvd-multi-mass-check/MANIFEST Sat Sep  2 18:07:10 2006
@@ -146,6 +146,7 @@
 masses/graphs/gnuplot-score-graph
 masses/hit-frequencies
 masses/lint-rules-from-freqs
+masses/logdiff
 masses/logs-to-c
 masses/mass-check
 masses/mass-check.cf

Modified: spamassassin/branches/tvd-multi-mass-check/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/Makefile.PL?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/Makefile.PL (original)
+++ spamassassin/branches/tvd-multi-mass-check/Makefile.PL Sat Sep  2 18:07:10 2006
@@ -284,6 +284,10 @@
     # asking all questions twice after a 'make dist*'.
     'NORECURS' => 1,
 
+    # bug 5074: perl 5.6.1 (with ExtUtils::MakeMaker 5.45) attempts to
+    # recurse anyway unless this is explicitly specified
+    'DIR' => [ ],
+
     # Don't add META.yml to the MANIFEST for god's sake!
     'NO_META' => 1,
 );

Modified: spamassassin/branches/tvd-multi-mass-check/build/automc/run_nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/build/automc/run_nightly?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/build/automc/run_nightly (original)
+++ spamassassin/branches/tvd-multi-mass-check/build/automc/run_nightly Sat Sep  2 18:07:10 2006
@@ -7,9 +7,10 @@
 
 # add usernames who you want to do nightlies for here:
 nightly_users="
-  zmi
   doc
   jm
+  fredt
+  zmi
 "
 
 # and where the $HOMEs are:

Modified: spamassassin/branches/tvd-multi-mass-check/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/build/automc/run_preflight?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/build/automc/run_preflight (original)
+++ spamassassin/branches/tvd-multi-mass-check/build/automc/run_preflight Sat Sep  2 18:07:10 2006
@@ -40,35 +40,13 @@
 
 unlink ("ham.log", "spam.log");
 
-# just the sandbox rules, sandbox plugins, the timing plugin,
-# and the default system-wide plugins (so DNS evals can work)
+# change of plan: mass-check the entire ruleset
 #
 system ("rm -rf tstrules");
 run "mkdir tstrules";
-run "cp ../rules/*.pre tstrules";
-run "cp ../rules/*.pm tstrules";
-run "cp plugins/*.* tstrules";
-
-# don't just copy; instead, transcribe while dropping score and describe lines
-# (to avoid '[26260] warn: config: warning: score set for non-existent rule
-# HTML_SHORT_LENGTH').
-## run "cp ../rules/70_sandbox.cf tstrules";
-open IN, "<../rules/70_sandbox.cf" or die "cannot read ../rules/sandbox.cf";
-open OUT, ">tstrules/70_sandbox.cf" or die "cannot write tstrules/70_sandbox.cf";
-
-while (<IN>) {
-  s/^\s+//;
-  next if /^(?:
-    score|
-    describe|
-    lang
-    )/x;
 
-  print OUT;
-}
-
-close IN or die "close failed!";
-close OUT or die "close failed!";
+run "cp ../rules/*.* tstrules";
+run "cp plugins/*.* tstrules";
 
 # well, ok just those, and anything that's been mailed-in
 # 

Modified: spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/ArchiveIterator.pm?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/ArchiveIterator.pm Sat Sep  2 18:07:10 2006
@@ -34,9 +34,10 @@
 use vars qw {
   $MESSAGES
   $AICache
+  @ISA
 };
 
-my @ISA = qw($MESSAGES);
+@ISA = qw();
 
 =head1 NAME
 

Modified: spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/Message/Node.pm?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/tvd-multi-mass-check/lib/Mail/SpamAssassin/Message/Node.pm Sat Sep  2 18:07:10 2006
@@ -261,7 +261,7 @@
 =item decode()
 
 If necessary, decode the part text as base64 or quoted-printable.
-The decoded text will be returned as a scalar.  An optional length
+The decoded text will be returned as a scalar string.  An optional length
 parameter can be passed in which limits how much decoded data is returned.
 If the scalar isn't needed, call with "0" as a parameter.
 

Modified: spamassassin/branches/tvd-multi-mass-check/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/masses/mass-check?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/masses/mass-check (original)
+++ spamassassin/branches/tvd-multi-mass-check/masses/mass-check Sat Sep  2 18:07:10 2006
@@ -19,6 +19,8 @@
 # limitations under the License.
 # </...@LICENSE>
 
+sub aidbg;
+
 sub usage {
   my $status = shift;
 
@@ -922,7 +924,7 @@
       select($old);
 
       $socket->add($child->[$i]);
-      #warn "debug: starting new child $i (pid ",$pid->[$i],")\n";
+      aidbg "mass-check: starting new child $i (pid ".$pid->[$i].")\n";
       next;
     }
     elsif (defined $pid->[$i]) {
@@ -974,7 +976,7 @@
   local $SIG{'PIPE'} = 'IGNORE';
 
   for (my $i = 0; $i < $count; $i++) {
-    #warn "debug: killing child $i (pid ",$pid->[$i],")\n";
+    aidbg "mass-check: killing child $i (pid ",$pid->[$i],")\n";
     send_line($socket->[$i],"exit"); # tell the child to die.
     close $socket->[$i];
     waitpid($pid->[$i], 0); # wait for the signal ...
@@ -1251,10 +1253,10 @@
           warn "mass-check: readline failed, attempting to recover\n";
           $select->remove($socket);
         }
-        elsif ($line =~ /^([^\0]+)\0RESULT (.+)$/s) {
+        elsif ($line =~ /^([^\0]*)\0RESULT (.+)$/s) {
 	  my $result = $1;
 	  my ($date,$class,$type) = Mail::SpamAssassin::ArchiveIterator::index_unpack($2);
-	  #warn ">> RESULT: $class, $type, $date\n";
+	  aidbg "mass-check: $class, $type, $date\n";
 
 	  if (defined $opt_restart && ($total_count % $opt_restart) == 0) {
 	    $needs_restart = 1;
@@ -1264,11 +1266,11 @@
 	  if (($total_messages > $total_count) && !$needs_restart) {
 	    send_line($socket, read_line($tmpfd));
 	    $total_count++;
-	    #warn ">> recv: $total_messages $total_count\n";
+	    aidbg "mass-check: $total_messages $total_count\n";
 	  }
 	  else {
 	    # stop listening on this child since we're done with it
-	    #warn ">> removeresult: $needs_restart $total_messages $total_count\n";
+	    aidbg "mass-check: $needs_restart $total_messages $total_count\n";
 	    $select->remove($socket);
 	  }
 
@@ -1282,24 +1284,29 @@
 	    # we still have messages, send one to child
 	    send_line($socket, read_line($tmpfd));
 	    $total_count++;
-	    #warn ">> new: $total_messages $total_count\n";
+	    aidbg "mass-check: $total_messages $total_count\n";
 	  }
 	  else {
 	    # no more messages, so stop listening on this child
-	    #warn ">> removestart: $needs_restart $total_messages $total_count\n";
+	    aidbg "mass-check: $needs_restart $total_messages $total_count\n";
 	    $select->remove($socket);
 	  }
         }
+        else {
+          $needs_restart = 1;
+          warn "mass-check: bad line from readline: $line\n";
+          $select->remove($socket);
+        }
       }
 
-      #warn ">> out of loop, $total_messages $total_count $needs_restart ".$select->count()."\n";
+      aidbg "mass-check: out of loop, $total_messages $total_count $needs_restart ".$select->count()."\n";
 
       # If there are still messages to process, and we need to restart
       # the children, and all of the children are idle, let's go ahead.
       if ($needs_restart && $select->count == 0 && $total_messages > $total_count) {
         $needs_restart = 0;
 
-        #warn "debug: needs restart, $total_messages total, $total_count done\n";
+        aidbg "mass-check: needs restart, $total_messages total, $total_count done\n";
         reap_children($opt_j, \@child, \@pid);
         @child=();
         @pid=();
@@ -1802,5 +1809,11 @@
   }
   else {
     warn ">> WTH!?  result is not in the correct format: $result\n";
+  }
+}
+
+sub aidbg {
+  if (would_log("dbg", "mass-check") == 2) {
+    dbg (@_);
   }
 }

Modified: spamassassin/branches/tvd-multi-mass-check/masses/rule-qa/corpus-nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/masses/rule-qa/corpus-nightly?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/masses/rule-qa/corpus-nightly (original)
+++ spamassassin/branches/tvd-multi-mass-check/masses/rule-qa/corpus-nightly Sat Sep  2 18:07:10 2006
@@ -62,37 +62,31 @@
         rm -rf rules    # causing trouble for some reason
         rm -rf rulesrc/sandbox
 
-        # argh svn, messiness
-        svn cleanup < /dev/null
-
 	if svn update -r $revision < /dev/null && \
             svn update -r $revision rulesrc < /dev/null
         then
 		break;
 	fi
 
-        # TODO: if that failed, we should be just using
-        # a brand new "svn checkout" and forget about the incremental
-        # update niceties
-
-        # OFF: this is extremely antisocial when multiple
-        # corpus-nightly scripts run simultaneously on one CPU!
-	# pkill -TERM svn
-	# sleep 10
-	# pkill -KILL svn
+        # if that failed, get aggressive about getting a full
+        # checkout, regardless of what may have been here already;
+        # delete most of the checkout to do so, and just use "svn co"
+        # instead of "svn up".
 
-	if svn update -r $revision < /dev/null && \
-            svn update -r $revision rulesrc < /dev/null
-        then
-		break;
-	fi
+        rm -rf rules lib build rulesrc spamd spamc
+        svn cleanup < /dev/null
+        svn co --non-interactive `svnpath` .
 
-	if [ $retry -eq 120 ]; then
-		echo "svn update failed" 1>&2
+        # just 10 times; our SVN repo shouldn't be *that* unreliable!
+	if [ $retry -eq 20 ]; then
+		echo "svn update/co failed" 1>&2
 		exit 1
 	fi
+
 	retry=$(( $retry + 1 ))
-	sleep 20
+
+        # longer and longer between retries
+	sleep `expr 20 '*' $retry`
 done
 set -e
 

Modified: spamassassin/branches/tvd-multi-mass-check/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/rules/active.list?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/rules/active.list (original)
+++ spamassassin/branches/tvd-multi-mass-check/rules/active.list Sat Sep  2 18:07:10 2006
@@ -1,5 +1,5 @@
 # active ruleset list, automatically generated from http://ruleqa.spamassassin.org/
-# with results from: daf parkerm theo wtogami zmi
+# with results from: daf parkerm theo wtogami
 
 # good enough
 ADDR_NUMS_AT_BIGSITE
@@ -11,6 +11,9 @@
 ADVANCE_FEE_4
 
 # tflags userconf
+ALL_TRUSTED
+
+# tflags userconf
 AWL
 
 # good enough
@@ -146,6 +149,12 @@
 DNS_FROM_SECURITYSAGE
 
 # good enough
+DOS_STOCK_O_PRICE
+
+# good enough
+DOS_YOUR_PLACE
+
+# good enough
 DRUGS_ANXIETY
 
 # good enough
@@ -155,9 +164,6 @@
 DRUGS_ANXIETY_OBFU
 
 # good enough
-DRUGS_DIET
-
-# good enough
 DRUGS_ERECTILE_OBFU
 
 # good enough
@@ -181,9 +187,6 @@
 # good enough
 EM_ROLEX
 
-# good enough
-ENGLISH_UCE_SUBJECT
-
 # tflags userconf
 ENV_AND_HDR_SPF_MATCH
 
@@ -245,6 +248,9 @@
 FH_RCVD_WITHSMTPFOR
 
 # good enough
+FM_CUSTOMLOGODSGNc
+
+# good enough
 FORGED_AOL_TAGS
 
 # good enough
@@ -323,6 +329,9 @@
 FROM_BLANK_NAME
 
 # good enough
+FROM_DOMAIN_NOVOWEL
+
+# good enough
 FROM_ENDS_IN_NUMS
 
 # good enough
@@ -338,9 +347,6 @@
 FROM_NO_USER
 
 # good enough
-FROM_OFFERS
-
-# good enough
 FS_START_DOYOU2
 
 # good enough
@@ -350,9 +356,6 @@
 GAPPY_SUBJECT
 
 # good enough
-GEO_QUERY_STRING
-
-# good enough
 GMD_FAKETZ
 
 # tflags net
@@ -389,9 +392,6 @@
 HASHCASH_HIGH
 
 # good enough
-HEADER_COUNT_CTYPE
-
-# good enough
 HEADER_SPAM
 
 # good enough
@@ -425,12 +425,6 @@
 HS_FORGED_OE_FW
 
 # good enough
-HS_GETMEOFF
-
-# good enough
-HS_PHARMA_1
-
-# good enough
 HS_SUBJ_ONLINE_PHARMACEUTICAL
 
 # tflags userconf
@@ -458,10 +452,10 @@
 INVALID_DATE
 
 # good enough
-INVALID_MSGID
+INVALID_TZ_CST
 
 # good enough
-INVALID_TZ_CST
+INVALID_TZ_GMT
 
 # good enough
 INVESTMENT_ADVICE
@@ -488,9 +482,6 @@
 KAM_STOCKTIP15
 
 # good enough
-KAM_STOCKTIP2
-
-# good enough
 KAM_STOCKTIP6
 
 # good enough
@@ -527,6 +518,9 @@
 MIME_BASE64_BLANKS
 
 # good enough
+MIME_BASE64_NO_NAME
+
+# good enough
 MIME_BASE64_TEXT
 
 # good enough
@@ -563,9 +557,6 @@
 MSGID_DOLLARS_RANDOM
 
 # good enough
-MSGID_LONG
-
-# good enough
 MSGID_OUTLOOK_INVALID
 
 # good enough
@@ -608,15 +599,15 @@
 OBSCURED_EMAIL
 
 # good enough
-ONLINE_PHARMACY
-
-# good enough
 ORG_MIME_TOOLS
 
 # good enough
 PERCENT_RANDOM
 
 # good enough
+PLING_PLING
+
+# good enough
 PORN_15
 
 # good enough
@@ -670,9 +661,6 @@
 # good enough
 RATWARE_RCVD_PF
 
-# good enough
-RATWARE_ZERO_TZ
-
 # tflags net
 RAZOR2_CF_RANGE_51_100
 
@@ -794,6 +782,9 @@
 REPLICA_WATCH
 
 # good enough
+REPLY_TO_EMPTY
+
+# good enough
 REPTO_OVERQUOTE_THEBAT
 
 # good enough
@@ -878,6 +869,9 @@
 SUBJECT_NEEDS_ENCODING
 
 # good enough
+SUBJECT_NOVOWEL
+
+# good enough
 SUBJECT_SEXUAL
 
 # good enough
@@ -887,10 +881,13 @@
 SUBJ_RE_NUM
 
 # good enough
-SUB_HELLO
+SUSPICIOUS_RECIPS
 
 # good enough
-SUSPICIOUS_RECIPS
+TO_CC_NONE
+
+# good enough
+TO_EMPTY
 
 # good enough
 TO_MALFORMED
@@ -905,9 +902,6 @@
 TT_MSGID_TRUNC
 
 # good enough
-TVD_ACT_193
-
-# good enough
 TVD_APP_LOAN
 
 # good enough
@@ -980,9 +974,6 @@
 TVD_PH_SUBJ_ACCOUNTS_POST
 
 # good enough
-TVD_PH_SUBJ_META
-
-# good enough
 TVD_PH_SUBJ_META_ALL
 
 # good enough
@@ -1046,24 +1037,9 @@
 TVD_UA_FOSTERING
 
 # good enough
-TVD_UNDER_VALUED
-
-# good enough
-TVD_VISIT_PHARMA
-
-# good enough
 TVD_VIS_HIDDEN
 
 # good enough
-DOS_STOCK_O_PRICE
-
-# good enough
-DOS_YOUR_PLACE
-
-# good enough
-FM_CUSTOMLOGODSGNc
-
-# good enough
 FORGED_IMS_HTML
 
 # good enough
@@ -1076,19 +1052,19 @@
 FORGED_OUTLOOK_TAGS
 
 # good enough
-HS_MEETUP_FOR_SEX
-
-# good enough
 RATWARE_MS_HASH
 
 # good enough
 RATWARE_OUTLOOK_NONAME
 
 # good enough
-TVD_FUZZY_MICROCAP
+TVD_FW_GRAPHIC_NAME_LONG
+
+# good enough
+TVD_FW_GRAPHIC_NAME_MID
 
 # good enough
-TVD_FW_GRAPHIC_ID1
+TVD_LONG_WORD5
 
 # good enough
 UNCLAIMED_MONEY
@@ -1142,6 +1118,9 @@
 URIBL_WS_SURBL
 
 # good enough
+URI_4YOU
+
+# good enough
 URI_NOVOWEL
 
 # good enough
@@ -1206,9 +1185,6 @@
 
 # good enough
 ZMIde_EBAYJOBSURI
-
-# good enough
-ZMIde_SEXUALEXPL1
 
 # tflags net
 __RCVD_IN_IADB

Modified: spamassassin/branches/tvd-multi-mass-check/t/mkrules.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/t/mkrules.t?rev=439695&r1=439694&r2=439695&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/t/mkrules.t (original)
+++ spamassassin/branches/tvd-multi-mass-check/t/mkrules.t Sat Sep  2 18:07:10 2006
@@ -2,7 +2,7 @@
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("mkrules");
-use Test; BEGIN { plan tests => 85 };
+use Test; BEGIN { plan tests => 86 };
 use File::Path;
 
 # ---------------------------------------------------------------------------
@@ -101,7 +101,8 @@
 
 ok (mkrun ("--src $tdir/rulesrc --out $tdir/rules --manifest $tdir/MANIFEST --manifestskip $tdir/MANIFEST.SKIP --active $tdir/rules/active.list 2>&1", \&patterns_run_cb));
 checkfile("$tdir/rules/70_sandbox.cf", \&patterns_run_cb);
-ok (! -f "$tdir/rules/72_active.cf");
+ok (-f "$tdir/rules/72_active.cf");
+ok (-s "$tdir/rules/72_active.cf" == 0);
 ok ok_all_patterns();
 save_tdir();