You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/12/13 01:22:27 UTC

svn commit: r356425 - in /spamassassin/trunk: build/listpromotable lib/Mail/SpamAssassin.pm lib/Mail/SpamAssassin/Conf/Parser.pm

Author: jm
Date: Mon Dec 12 16:22:23 2005
New Revision: 356425

URL: http://svn.apache.org/viewcvs?rev=356425&view=rev
Log:
per-rule linting; allow multiple files to be specified for rules_filename and site_rules_filename, with \000 separators, so build/listpromotable can specify multiple paths; implement much of listpromotable

Modified:
    spamassassin/trunk/build/listpromotable
    spamassassin/trunk/lib/Mail/SpamAssassin.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm

Modified: spamassassin/trunk/build/listpromotable
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/listpromotable?rev=356425&r1=356424&r2=356425&view=diff
==============================================================================
--- spamassassin/trunk/build/listpromotable (original)
+++ spamassassin/trunk/build/listpromotable Mon Dec 12 16:22:23 2005
@@ -8,21 +8,36 @@
 use URI::Escape;
 use Data::Dumper;
 
+my $FROM_CACHE = 1;
+my $MAKE_CACHE = 1;
+
+###########################################################################
+
 my $cgi_url = "http://buildbot.spamassassin.org/";
+my $doc;
 
-my $url = $cgi_url."ruleqa?daterev=last-night";
-my $doc = get ($url);
-if (!$doc) {
-  die "HTTP get failed: $doc\n";
+if ($FROM_CACHE == 0) {
+  my $url = $cgi_url."ruleqa?daterev=last-night";
+  $doc = get ($url);
+  if (!$doc) {
+    die "HTTP get failed: $doc\n";
+  }
+
+  if ($MAKE_CACHE) {
+    open(O, ">ruleqa.cache"); print O $doc; close O;
+  }
+}
+else {
+  open(I, "<ruleqa.cache") or die; $doc = join('',<I>); close I;
 }
 
-# print $doc;
+###########################################################################
 
 # <rule><test>__HIGHBITS</test><promo>0</promo>
 # <spc>8.7654</spc><hpc>0.2056</hpc><so>0.977</so>
 # <detailhref>ruleqa%3Fdaterev%3Dlast-night%26rule%3D__HIGHBITS%26s_detail%3D1</detailhref></rule>
 
-# my $todump = { };
+my $plist = { };
 while ($doc =~ m!<rule>(.*?)</rule>!xg) {
   my $xml = $1;
   my $obj = { };
@@ -37,19 +52,56 @@
   my $name = $obj->{test};
   $obj->{detailhref} = $cgi_url.$obj->{detailhref};
 
-  ## $todump->{$name} = $obj;
+  $plist->{$name} = $obj;
+}
+
+if (!scalar keys %$plist) {
+  die "no rules found?\n$doc\n";
+}
+
+###########################################################################
+
+## my $dump = Data::Dumper->Dump([$plist], ['promolist']); print $dump;
+
+# use SpamAssassin classes directly, so we can lint rules
+# as we go
+use lib 'lib';
+use Mail::SpamAssassin;
+
+my $mailsa = Mail::SpamAssassin->new({
+    rules_filename => join("\000", qw( rulesrc/core rulesrc/sandbox )),
+    site_rules_filename => "rules",
+    local_tests_only => 1,
+    dont_copy_prefs => 1,
+    config_tree_recurse => 1,
+    # debug => 1,
+});
+
+my %rules_with_errors = ();
+
+$mailsa->{lint_callback} = sub {
+  my %opts = @_;
+  warn "lint failure: $opts{rule}: $opts{msg}";
+  if ($opts{iserror}) {
+    $rules_with_errors{$opts{rule}}++;
+  }
+};
+
+$mailsa->lint_rules();
 
+foreach my $name (sort keys %$plist) {
+  my $obj = $plist->{$name};
   next unless ($obj->{promo});
-  print "$name\n";
+
+  my $tfs = $mailsa->{conf}->{tflags}->{$name};
+  if ($tfs) {
+    next if ($tfs =~ /\bnopublish\b/);
+  }
+
+  next if $rules_with_errors{$name};
 }
 
-# if (!scalar keys %$todump) {
-# die "no rules found?\n$doc\n";
-# }
-
-## my $dump = Data::Dumper->Dump([$todump], ['promolist']);
-## # print $dump;
-## 
+
 ## # now write that to a tmp file so 'mkrules' can use it
 ## my $tmp = new File::Temp( UNLINK => 1, SUFFIX => '.pl' );
 ## print $tmp $dump;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=356425&r1=356424&r2=356425&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Mon Dec 12 16:22:23 2005
@@ -1411,46 +1411,39 @@
 }
 
 sub read_cf {
-  my ($self, $path, $desc) = @_;
-
-  return '' unless defined ($path);
-
-  dbg("config: using \"$path\" for $desc");
-  my $txt = '';
-
-  if (-d $path) {
-    foreach my $file ($self->get_cf_files_in_dir ($path)) {
-      $txt .= read_cf_file($file);
-    }
-
-  } elsif (-f $path && -s _ && -r _) {
-    $txt .= read_cf_file($path);
-  }
-
-  return $txt;
+  my ($self, $allpaths, $desc) = @_;
+  return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
 }
 
-
 sub read_pre {
-  my ($self, $path, $desc) = @_;
+  my ($self, $allpaths, $desc) = @_;
+  return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
+}
+
+sub _read_cf_pre {
+  my ($self, $allpaths, $desc, $filelistmethod) = @_;
 
-  return '' unless defined ($path);
+  return '' unless defined ($allpaths);
 
-  dbg("config: using \"$path\" for $desc");
   my $txt = '';
+  foreach my $path (split("\000", $allpaths)) 
+  {
+    dbg("config: using \"$path\" for $desc");
 
-  if (-d $path) {
-    foreach my $file ($self->get_pre_files_in_dir($path)) {
-      $txt .= read_cf_file($file); # ok to use read_cf_file at this point
-    }
+    if (-d $path) {
+      foreach my $file ($self->$filelistmethod($path)) {
+        $txt .= read_cf_file($file);
+      }
 
-  } elsif (-f $path && -s _ && -r _) {
-    $txt .= read_cf_file($path);
+    } elsif (-f $path && -s _ && -r _) {
+      $txt .= read_cf_file($path);
+    }
   }
 
   return $txt;
 }
 
+
 sub read_cf_file {
   my($path) = @_;
   my $txt = '';
@@ -1629,18 +1622,18 @@
   my ($self, $dir, $type) = @_;
 
   if ($self->{config_tree_recurse}) {
+    my @cfs = ();
+
     # use "eval" to avoid loading File::Find unless this is specified
     eval {
       use File::Find qw();
-
-      my @cfs = ();
       File::Find::find(
         sub {
-          return unless /\.${type}$/i && -f $_;
+          return unless (/\.${type}$/i && -f $_);
           push @cfs, $File::Find::name;
         }, $dir);
-      return map { "$dir/$_" } sort { $a cmp $b } @cfs;
     };
+    return sort { $a cmp $b } @cfs;
 
     die "oops! $@";     # should never get here
   }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=356425&r1=356424&r2=356425&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Dec 12 16:22:23 2005
@@ -508,7 +508,7 @@
 
     while ( my($sk) = each %{$conf->{scores}} ) {
       if (!exists $conf->{tests}->{$sk}) {
-        $self->lint_warn("config: warning: score set for non-existent rule $k\n", $k);
+        $self->lint_warn("config: warning: score set for non-existent rule $sk\n", $sk);
       }
     }
   }
@@ -786,7 +786,7 @@
   if ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS)
   {
     my ($pat) = ($text =~ /^\s*\S+\s*(?:\=|\!)\~\s*(\S.*?\S)\s*$/);
-    $pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//;
+    if ($pat) { $pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//; }
     return unless $self->is_delimited_regexp_valid($name, $pat);
   }
   elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS)
@@ -858,7 +858,7 @@
 sub is_delimited_regexp_valid {
   my ($self, $name, $re) = @_;
 
-  unless ($re =~ /^\s*m?(\W).*(?:\1|>|}|\)|\])[a-z]*\s*$/) {
+  if (!$re || $re !~ /^\s*m?(\W).*(?:\1|>|}|\)|\])[a-z]*\s*$/) {
     $self->lint_warn("config: invalid regexp for rule $name: $re: missing or invalid delimiters\n", $name);
     return 0;
   }
@@ -986,8 +986,8 @@
 
   if (!defined $iserror) { $iserror = 1; }
 
-  if ($self->{main}->{lint_callback}) {
-    $self->{main}->{lint_callback}->(
+  if ($self->{conf}->{main}->{lint_callback}) {
+    $self->{conf}->{main}->{lint_callback}->(
           msg => $msg,
           rule => $rule,
           iserror => $iserror