You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/12/09 05:37:13 UTC
svn commit: r355363 - in /spamassassin/trunk: build/listpromotable build/mkrules masses/rule-qa/automc/ruleqa.cgi

Author: jm
Date: Thu Dec  8 20:37:10 2005
New Revision: 355363

URL: http://svn.apache.org/viewcvs?rev=355363&view=rev
Log:
add build/listpromotable, a script which lists the current set of promotable sandbox rules, based on last night's mass-checks (looked up via HTTP)

Added:
    spamassassin/trunk/build/listpromotable   (with props)
Modified:
    spamassassin/trunk/build/mkrules
    spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi

Added: spamassassin/trunk/build/listpromotable
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/listpromotable?rev=355363&view=auto
==============================================================================
--- spamassassin/trunk/build/listpromotable (added)
+++ spamassassin/trunk/build/listpromotable Thu Dec  8 20:37:10 2005
@@ -0,0 +1,62 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use File::Temp ();
+use LWP::Simple;
+use URI::Escape;
+use Data::Dumper;
+
+my $cgi_url = "http://buildbot.spamassassin.org/";
+
+my $url = $cgi_url."ruleqa?daterev=last-night";
+my $doc = get ($url);
+if (!$doc) {
+  die "HTTP get failed: $doc\n";
+}
+
+# print $doc;
+
+# <rule><test>__HIGHBITS</test><promo>0</promo>
+# <spc>8.7654</spc><hpc>0.2056</hpc><so>0.977</so>
+# <detailhref>ruleqa%3Fdaterev%3Dlast-night%26rule%3D__HIGHBITS%26s_detail%3D1</detailhref></rule>
+
+my $todump = { };
+while ($doc =~ m!<rule>(.*?)</rule>!xg) {
+  my $xml = $1;
+  my $obj = { };
+
+  while ($xml =~ s!<([A-Za-z0-9_]+)>(.*?)</\1>!!) {
+    $obj->{$1} = $2;
+  }
+  while ($xml =~ s!<([A-Za-z0-9_]+)\s+esc=["']1["']>(.*?)</\1>!!) {
+    $obj->{$1} = uri_unescape($2);
+  }
+
+  my $name = $obj->{test};
+  $obj->{detailhref} = $cgi_url.$obj->{detailhref};
+
+  $todump->{$name} = $obj;
+}
+
+if (!scalar keys %$todump) {
+  die "no rules found?\n$doc\n";
+}
+
+my $dump = Data::Dumper->Dump([$todump], ['promolist']);
+# print $dump;
+
+# now write that to a tmp file so 'mkrules' can use it
+my $tmp = new File::Temp( UNLINK => 1, SUFFIX => '.pl' );
+print $tmp $dump;
+
+my $perl = $^X;
+if (!$perl) {
+  die "no perl path found in ARGV!";
+}
+
+# and exec that script
+exec $perl, "build/mkrules", "--listpromotable=$tmp";
+die "exec failed";
+

Propchange: spamassassin/trunk/build/listpromotable
------------------------------------------------------------------------------
    svn:executable = *

Modified: spamassassin/trunk/build/mkrules
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/mkrules?rev=355363&r1=355362&r2=355363&view=diff
==============================================================================
--- spamassassin/trunk/build/mkrules (original)
+++ spamassassin/trunk/build/mkrules Thu Dec  8 20:37:10 2005
@@ -37,19 +37,20 @@
 
 use vars qw(
     @opt_srcs $opt_out $opt_sandboxout $opt_manifest
-    $opt_manifestskip
+    $opt_manifestskip $opt_listpromotable
 );
 GetOptions("src=s" => \@opt_srcs,
     "out=s",
     "sandboxout=s",
     "manifest=s",
-    "manifestskip=s"
+    "manifestskip=s",
+    "listpromotable=s",
   );
 
 if (!@opt_srcs) {
   foreach ( 'rulescode', 'rulesrc' ) {
     if (-d $_) {
-      print "using default src $_\n";
+      # print "using default src $_\n";
       push(@opt_srcs, $_);
     }
   }
@@ -64,9 +65,17 @@
 }
 
 die "no src" unless (@opt_srcs >= 1);
-die "no out" unless ($opt_out);
-die "unreadable out" unless (-d $opt_out);
 
+my $promolist;
+if ($opt_listpromotable) {
+  my $list = do $opt_listpromotable;
+  die "no listpromotable list" unless $list;
+  $promolist = $list;
+}
+else {
+  die "no out" unless ($opt_out);
+  die "unreadable out" unless (-d $opt_out);
+}
 $opt_sandboxout ||= "70_sandbox.cf";
 
 # source files that need compilation, and their targets
@@ -104,7 +113,19 @@
 
 compile_sorted($needs_compile);
 lint_output_files();
-write_output_files();
+
+if ($opt_listpromotable) {
+  list_promotable();
+}
+else {
+  write_output_files();
+}
+exit;
+
+sub list_promotable {
+}
+
+# ---------------------------------------------------------------------------
 
 sub lint_output_files {
   foreach my $file (keys %{$files_to_lint}) {
@@ -129,8 +150,6 @@
   return $res;       # 0 means good
 }
 
-exit;
-
 sub wanted {
   my $path = $File::Find::name;
   # only files
@@ -149,7 +168,12 @@
 
 
   my $f = "$current_src/$dir$filename";
-  my $t = "$opt_out/$filename";
+  my $t;
+
+  if (!$opt_listpromotable) {
+    $t = "$opt_out/$filename";
+  }
+
   $needs_compile->{$f} = {
           f => $f,
           t => $t,
@@ -160,7 +184,6 @@
 
 # compile all the source files found by the wanted() sub, in sorted
 # order so that the order of precedence makes sense.
-
 sub compile_sorted {
   my ($sources) = @_;
 
@@ -185,17 +208,7 @@
     my $needs_rebuild = 1;
 
     if ($entry->{filename} =~ /\.pm$/) {
-      # just copy the raw perl module over to the new area
-      # we can't really rename to avoid conflicts since the loadplugin lines
-      # are going to be all screwed up in that case.
-      # jm: we always want to update the output file in case the input
-      # has been changed!
-      if (0 && -e $entry->{t}) {
-	warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
-      }
-      else {
-        copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
-      }
+      plugin_file_compile($entry);
     }
     elsif ($entry->{dir} =~ /sandbox/) {
       rule_file_compile_sandbox($f, $t, $entry->{filename});
@@ -233,9 +246,9 @@
 # one that's already been output by the compiler in another source file. The
 # autorenaming is very simple -- portions of the current source path are
 # appended to the rule name, sanitised.
-#
-# TODO: linting during compilation, and ignore lint-failures? may have to
-# reimplement a small subset of lint behaviour to do this.
+
+my $COMMENTS;
+my $ALWAYS_PUBLISH;
 
 sub rule_file_compile_sandbox {
   my ($f, $t, $filename) = @_;
@@ -249,24 +262,25 @@
   my $rules = { };
   my $rule_order = [ ];
 
-  # $COMMENTS is a "catch-all" "name", for lines that appear before the first
+  # $COMMENTS is a "catch-all" "name", for lines that appear after the last
   # line that refers to a rule by name.  Those lines are not published by
   # themselves; they'll be published to all pubfiles found in the file.
   #
-  # It's assumed they are comments, because they generally are, but could
-  # be all sorts of unparseable lines.
+  # It's assumed they are comments, because they generally are, but could be
+  # all sorts of unparseable lines.
+  $COMMENTS = '!comments!';
 
-  my $COMMENTS = '!comments!';
   my $lastrule = $COMMENTS;
 
   # another "fake name" for lines that should always be published, to an
   # output file with the same name as the input file.
-  my $ALWAYS_PUBLISH = '!always_publish!';
+  $ALWAYS_PUBLISH = '!always_publish!';
   $rules->{$ALWAYS_PUBLISH} = rule_entry_create();
-  my %ALWAYS = ( $ALWAYS_PUBLISH => 1 );
+  my $ALWAYS = { $ALWAYS_PUBLISH => 1 };
 
   # an "ifplugin" or "if" scope
   my $current_conditional;
+  my $current_comments = '';
 
   while (<IN>) {
     my $orig = $_;
@@ -276,9 +290,9 @@
 
     s/#.*$//g; s/^\s+//; s/\s+$//;
 
-    # send comments/blank lines to the current default output
+    # send comments/blank lines to the holding area for next rule
     if (/^$/) {
-      $rules->{$lastrule}->{text} .= $orig;
+      $current_comments .= $orig;
       next;
     }
 
@@ -302,12 +316,13 @@
       my $origname = $name;
       $name = sandbox_rule_name_avoid_collisions($name, $f);
 
-      # TODO: sandbox rules -- enforce "T_" prefix
-
       if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
       $rules->{$name}->{origname} = $origname;
       $rules->{$name}->{cond} = $current_conditional;
-      $rules->{$name}->{text} .= $orig;
+      $rules->{$name}->{text} .= $current_comments . $orig;
+      $rules->{$name}->{srcfile} = $f;
+
+      $current_comments = '';
 
       $lastrule = $name;
       push (@$rule_order, $name);
@@ -366,7 +381,7 @@
         $NAME .= $current_conditional . '!';
         unless ($rules->{$NAME}) {
 	  $rules->{$NAME} = rule_entry_create();
-	  $ALWAYS{$NAME} = 1;
+	  $ALWAYS->{$NAME} = 1;
 	}
 	$rules->{$NAME}->{cond} = $current_conditional;
       }
@@ -377,64 +392,112 @@
   }
   close IN;
 
+  if ($current_comments) {
+    $rules->{$COMMENTS}->{text} .= $current_comments;
+  }
+
   # now append all the found text to the output file buffers
+  copy_to_output_buffers($rule_order, $rules, $ALWAYS, $f, $filename);
+
+  # ok; file complete.  now mark all those rules as "seen"; future
+  # refs to those rule names will trigger an autorename.
+  foreach my $name (@$rule_order) {
+    $seen_rules->{$name} = 1;
+  }
+}
+
+sub copy_to_output_buffers {
+  my ($rule_order, $rules, $ALWAYS, $f, $filename) = @_;
+
   my %already_done = ();
   my $copied = 0;
-  foreach my $name (@$rule_order, keys(%ALWAYS))
+  foreach my $name (@$rule_order, keys(%$ALWAYS))
   {
     # only do each rule once, please ;)
     next if exists $already_done{$name};
     $already_done{$name} = undef;
 
-    my $pubfile;
-    if ($rules->{$name}->{publish}) {
-      $pubfile = ($rules->{$name}->{pubfile} || $filename);
-      $pubfile = $opt_out.'/'.$pubfile;
-    } else {
-      $pubfile = $opt_out.'/'.$opt_sandboxout;
-    }
-    $output_files->{$pubfile} = 1;
-
     my $text = $rules->{$name}->{text};
     if (!$text) {
       next;     # nothing to write!
     }
 
-    if (!$output_file_text->{$pubfile}) {
-      $output_file_text->{$pubfile} = output_file_header($f);
-
-      # do we have any top-of-file comments?  if so, add it
-      my $cmts = $rules->{$COMMENTS}->{text}; $cmts ||= '';
-      $output_file_text->{$pubfile} .= $cmts;
-    }
-
-    # fix up any rule renamings we were supposed to do
-    sed_renamed_rule_names(\$text);
-
-    my $cond = $rules->{$name}->{cond};
-    if ($cond) {
-      $output_file_text->{$pubfile} .= $cond.$text."endif\n";
+    if ($opt_listpromotable) {
+      promo_rule ($rules, $name, $text);
     }
     else {
-      $output_file_text->{$pubfile} .= $text;
+      copy_rule ($rules, $name, $text, $filename);
+      $copied++;
     }
+  }
+
+  if (!$opt_listpromotable) {
+    print "$f: $copied sandbox rules copied\n";
+  }
+}
+
+sub copy_rule {
+  my ($rules, $name, $text, $filename) = @_;
 
-    $files_to_lint->{$pubfile} = 1;
+  my $f = $rules->{$name}->{srcfile};
 
-    $copied++;
+  my $pubfile;
+  if ($rules->{$name}->{publish}) {
+    $pubfile = ($rules->{$name}->{pubfile} || $filename);
+    $pubfile = $opt_out.'/'.$pubfile;
+  } else {
+    $pubfile = $opt_out.'/'.$opt_sandboxout;
   }
-  print "$f: $copied sandbox rules copied\n";
+  $output_files->{$pubfile} = 1;
 
-  # ok; file complete.  now mark all those rules as "seen"; future
-  # refs to those rule names will trigger an autorename.
-  foreach my $name (@$rule_order) {
-    $seen_rules->{$name} = 1;
+  if (!$output_file_text->{$pubfile}) {
+    $output_file_text->{$pubfile} = output_file_header($f);
+  }
+
+  # fix up any rule renamings we were supposed to do
+  sed_renamed_rule_names(\$text);
+
+  my $cond = $rules->{$name}->{cond};
+  if ($cond) {
+    $output_file_text->{$pubfile} .= $cond.$text."endif\n";
+  }
+  else {
+    $output_file_text->{$pubfile} .= $text;
+  }
+
+  # do we have any end-of-file comments?  if so, add it
+  my $cmts = $rules->{$COMMENTS}->{text};
+  if ($cmts) {
+    $output_file_text->{$pubfile} .= $cmts;
   }
+
+  $files_to_lint->{$pubfile} = 1;
+}
+
+sub promo_rule {
+  my ($rules, $name, $text) = @_;
+
+  return unless $promolist->{$name};
+  my $pent = $promolist->{$name};
+  return unless $pent->{promo};
+
+  $text =~ s/^\s+//s;
+  $text =~ s/\s+$//s;
+
+  print "\n## ".("-" x 70)."\n";
+  print "## Promotable rule: $name\n";
+  printf "## so=%5.3f   spc=%5.3f   hpc=%5.3f\n",
+            $pent->{so}, $pent->{spc}, $pent->{hpc};
+  print "## $rules->{$name}->{srcfile}\n";
+  print "## $pent->{detailhref}\n\n";
+  print $text,"\n";
 }
 
 sub rule_file_compile_core {
   my ($f, $t, $filename) = @_;
 
+  return if $opt_listpromotable;
+
   my $pubfile = $opt_out.'/'.$filename;
   $output_files->{$pubfile} = 1;
 
@@ -490,6 +553,23 @@
   print "$f: all lines copied\n";
 }
 
+sub plugin_file_compile {
+  my ($entry) = @_;
+
+  return if $opt_listpromotable;
+  # just copy the raw perl module over to the new area
+  # we can't really rename to avoid conflicts since the loadplugin lines
+  # are going to be all screwed up in that case.
+  # jm: we always want to update the output file in case the input
+  # has been changed!
+  if (0 && -e $entry->{t}) {
+    warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
+  }
+  else {
+    copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
+  }
+}
+
 sub write_output_files {
   foreach my $pubfile (sort keys %$output_files) {
     if (-f $pubfile) {
@@ -524,6 +604,8 @@
   my ($rule, $path) = @_;
   my $new;
   my $newreason;
+
+  return $rule if $opt_listpromotable;
 
   if ($rule !~ /^(?:T_|__)/) {
     $new = "T_".$rule;

Modified: spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi?rev=355363&r1=355362&r2=355363&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi Thu Dec  8 20:37:10 2005
@@ -8,6 +8,7 @@
 use Template;
 use Date::Manip;
 use XML::Simple;
+use URI::Escape;
 
 use strict;
 use bytes;
@@ -88,6 +89,7 @@
 if (defined $daterev) {
   if ($daterev eq 'last-night') {
     $daterev = get_last_night_daterev();
+    $q->url_param('daterev', $daterev);                # make it absolute
   }
   else {
     $daterev =~ /(\d+)[\/-](r\d+)-(\S+)/; undef $daterev;
@@ -784,10 +786,10 @@
     <td style='text-align: left'><a href="[% NAMEREF %]">[% NAME %]</a></td>
     <td>[% USERNAME %]</td>
     <td>[% AGE %]</td>
+    <!--
+      <rule><test>[% NAME %]</test><promo>[% PROMO %]</promo> <spc>[% SPAMPC %]</spc><hpc>[% HAMPC %]</hpc><so>[% SO %]</so> <detailhref esc='1'>[% NAMEREFENCD %]</detailhref></rule>
+    -->
   </tr>
-  <!--
-    <rule><n>[% NAME %]</n><p>[% PROMO %]</p><sp>[% SPAMPC %]</sp><hp>[% HAMPC %]</hp><so>[% SO %]</so><href>[% NAMEREF %]</href></rule>
-  -->
 
   };
 
@@ -836,6 +838,7 @@
         SCORE => $score,
         NAME => $line->{name},
         NAMEREF => create_detail_url($line->{name}),
+        NAMEREFENCD => uri_encode(create_detail_url($line->{name})),
         USERNAME => $line->{username} || '',
         AGE => $line->{age} || '',
         PROMO => $line->{promotable},
@@ -879,8 +882,9 @@
   my @parms = (
         get_params_except(qw(
           rule s_age s_overlap s_all s_detail
+          rule s_age s_overlap s_all s_detail daterev
         )), 
-        "rule=".uri_escape($rulename), "s_detail=1",
+        "daterev=".$daterev, "rule=".uri_escape($rulename), "s_detail=1",
       );
   return assemble_url(@parms);
 }
@@ -1067,6 +1071,11 @@
     <h3> Preflight Mass-Checks </h3>
     <br/> <a href='#preflight' name=preflight>#</a>
   }.  gen_daterev_table(@drs_preflight);
+}
+
+sub uri_encode {
+  my ($str) = @_;
+  return uri_escape($str);
 }
 
 sub gen_daterev_table {