You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/12/09 05:37:13 UTC
svn commit: r355363 - in /spamassassin/trunk: build/listpromotable
build/mkrules masses/rule-qa/automc/ruleqa.cgi
Author: jm
Date: Thu Dec 8 20:37:10 2005
New Revision: 355363
URL: http://svn.apache.org/viewcvs?rev=355363&view=rev
Log:
add build/listpromotable, a script which lists the current set of promotable sandbox rules, based on last night's mass-checks (looked up via HTTP)
Added:
spamassassin/trunk/build/listpromotable (with props)
Modified:
spamassassin/trunk/build/mkrules
spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi
Added: spamassassin/trunk/build/listpromotable
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/listpromotable?rev=355363&view=auto
==============================================================================
--- spamassassin/trunk/build/listpromotable (added)
+++ spamassassin/trunk/build/listpromotable Thu Dec 8 20:37:10 2005
@@ -0,0 +1,62 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use File::Temp ();
+use LWP::Simple;
+use URI::Escape;
+use Data::Dumper;
+
+my $cgi_url = "http://buildbot.spamassassin.org/";
+
+my $url = $cgi_url."ruleqa?daterev=last-night";
+my $doc = get ($url);
+if (!$doc) {
+ die "HTTP get failed: $doc\n";
+}
+
+# print $doc;
+
+# <rule><test>__HIGHBITS</test><promo>0</promo>
+# <spc>8.7654</spc><hpc>0.2056</hpc><so>0.977</so>
+# <detailhref>ruleqa%3Fdaterev%3Dlast-night%26rule%3D__HIGHBITS%26s_detail%3D1</detailhref></rule>
+
+my $todump = { };
+while ($doc =~ m!<rule>(.*?)</rule>!xg) {
+ my $xml = $1;
+ my $obj = { };
+
+ while ($xml =~ s!<([A-Za-z0-9_]+)>(.*?)</\1>!!) {
+ $obj->{$1} = $2;
+ }
+ while ($xml =~ s!<([A-Za-z0-9_]+)\s+esc=["']1["']>(.*?)</\1>!!) {
+ $obj->{$1} = uri_unescape($2);
+ }
+
+ my $name = $obj->{test};
+ $obj->{detailhref} = $cgi_url.$obj->{detailhref};
+
+ $todump->{$name} = $obj;
+}
+
+if (!scalar keys %$todump) {
+ die "no rules found?\n$doc\n";
+}
+
+my $dump = Data::Dumper->Dump([$todump], ['promolist']);
+# print $dump;
+
+# now write that to a tmp file so 'mkrules' can use it
+my $tmp = new File::Temp( UNLINK => 1, SUFFIX => '.pl' );
+print $tmp $dump;
+
+my $perl = $^X;
+if (!$perl) {
+ die "no perl path found in ARGV!";
+}
+
+# and exec that script
+exec $perl, "build/mkrules", "--listpromotable=$tmp";
+die "exec failed";
+
Propchange: spamassassin/trunk/build/listpromotable
------------------------------------------------------------------------------
svn:executable = *
Modified: spamassassin/trunk/build/mkrules
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/mkrules?rev=355363&r1=355362&r2=355363&view=diff
==============================================================================
--- spamassassin/trunk/build/mkrules (original)
+++ spamassassin/trunk/build/mkrules Thu Dec 8 20:37:10 2005
@@ -37,19 +37,20 @@
use vars qw(
@opt_srcs $opt_out $opt_sandboxout $opt_manifest
- $opt_manifestskip
+ $opt_manifestskip $opt_listpromotable
);
GetOptions("src=s" => \@opt_srcs,
"out=s",
"sandboxout=s",
"manifest=s",
- "manifestskip=s"
+ "manifestskip=s",
+ "listpromotable=s",
);
if (!@opt_srcs) {
foreach ( 'rulescode', 'rulesrc' ) {
if (-d $_) {
- print "using default src $_\n";
+ # print "using default src $_\n";
push(@opt_srcs, $_);
}
}
@@ -64,9 +65,17 @@
}
die "no src" unless (@opt_srcs >= 1);
-die "no out" unless ($opt_out);
-die "unreadable out" unless (-d $opt_out);
+my $promolist;
+if ($opt_listpromotable) {
+ my $list = do $opt_listpromotable;
+ die "no listpromotable list" unless $list;
+ $promolist = $list;
+}
+else {
+ die "no out" unless ($opt_out);
+ die "unreadable out" unless (-d $opt_out);
+}
$opt_sandboxout ||= "70_sandbox.cf";
# source files that need compilation, and their targets
@@ -104,7 +113,19 @@
compile_sorted($needs_compile);
lint_output_files();
-write_output_files();
+
+if ($opt_listpromotable) {
+ list_promotable();
+}
+else {
+ write_output_files();
+}
+exit;
+
+sub list_promotable {
+}
+
+# ---------------------------------------------------------------------------
sub lint_output_files {
foreach my $file (keys %{$files_to_lint}) {
@@ -129,8 +150,6 @@
return $res; # 0 means good
}
-exit;
-
sub wanted {
my $path = $File::Find::name;
# only files
@@ -149,7 +168,12 @@
my $f = "$current_src/$dir$filename";
- my $t = "$opt_out/$filename";
+ my $t;
+
+ if (!$opt_listpromotable) {
+ $t = "$opt_out/$filename";
+ }
+
$needs_compile->{$f} = {
f => $f,
t => $t,
@@ -160,7 +184,6 @@
# compile all the source files found by the wanted() sub, in sorted
# order so that the order of precedence makes sense.
-
sub compile_sorted {
my ($sources) = @_;
@@ -185,17 +208,7 @@
my $needs_rebuild = 1;
if ($entry->{filename} =~ /\.pm$/) {
- # just copy the raw perl module over to the new area
- # we can't really rename to avoid conflicts since the loadplugin lines
- # are going to be all screwed up in that case.
- # jm: we always want to update the output file in case the input
- # has been changed!
- if (0 && -e $entry->{t}) {
- warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
- }
- else {
- copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
- }
+ plugin_file_compile($entry);
}
elsif ($entry->{dir} =~ /sandbox/) {
rule_file_compile_sandbox($f, $t, $entry->{filename});
@@ -233,9 +246,9 @@
# one that's already been output by the compiler in another source file. The
# autorenaming is very simple -- portions of the current source path are
# appended to the rule name, sanitised.
-#
-# TODO: linting during compilation, and ignore lint-failures? may have to
-# reimplement a small subset of lint behaviour to do this.
+
+my $COMMENTS;
+my $ALWAYS_PUBLISH;
sub rule_file_compile_sandbox {
my ($f, $t, $filename) = @_;
@@ -249,24 +262,25 @@
my $rules = { };
my $rule_order = [ ];
- # $COMMENTS is a "catch-all" "name", for lines that appear before the first
+ # $COMMENTS is a "catch-all" "name", for lines that appear after the last
# line that refers to a rule by name. Those lines are not published by
# themselves; they'll be published to all pubfiles found in the file.
#
- # It's assumed they are comments, because they generally are, but could
- # be all sorts of unparseable lines.
+ # It's assumed they are comments, because they generally are, but could be
+ # all sorts of unparseable lines.
+ $COMMENTS = '!comments!';
- my $COMMENTS = '!comments!';
my $lastrule = $COMMENTS;
# another "fake name" for lines that should always be published, to an
# output file with the same name as the input file.
- my $ALWAYS_PUBLISH = '!always_publish!';
+ $ALWAYS_PUBLISH = '!always_publish!';
$rules->{$ALWAYS_PUBLISH} = rule_entry_create();
- my %ALWAYS = ( $ALWAYS_PUBLISH => 1 );
+ my $ALWAYS = { $ALWAYS_PUBLISH => 1 };
# an "ifplugin" or "if" scope
my $current_conditional;
+ my $current_comments = '';
while (<IN>) {
my $orig = $_;
@@ -276,9 +290,9 @@
s/#.*$//g; s/^\s+//; s/\s+$//;
- # send comments/blank lines to the current default output
+ # send comments/blank lines to the holding area for next rule
if (/^$/) {
- $rules->{$lastrule}->{text} .= $orig;
+ $current_comments .= $orig;
next;
}
@@ -302,12 +316,13 @@
my $origname = $name;
$name = sandbox_rule_name_avoid_collisions($name, $f);
- # TODO: sandbox rules -- enforce "T_" prefix
-
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
$rules->{$name}->{cond} = $current_conditional;
- $rules->{$name}->{text} .= $orig;
+ $rules->{$name}->{text} .= $current_comments . $orig;
+ $rules->{$name}->{srcfile} = $f;
+
+ $current_comments = '';
$lastrule = $name;
push (@$rule_order, $name);
@@ -366,7 +381,7 @@
$NAME .= $current_conditional . '!';
unless ($rules->{$NAME}) {
$rules->{$NAME} = rule_entry_create();
- $ALWAYS{$NAME} = 1;
+ $ALWAYS->{$NAME} = 1;
}
$rules->{$NAME}->{cond} = $current_conditional;
}
@@ -377,64 +392,112 @@
}
close IN;
+ if ($current_comments) {
+ $rules->{$COMMENTS}->{text} .= $current_comments;
+ }
+
# now append all the found text to the output file buffers
+ copy_to_output_buffers($rule_order, $rules, $ALWAYS, $f, $filename);
+
+ # ok; file complete. now mark all those rules as "seen"; future
+ # refs to those rule names will trigger an autorename.
+ foreach my $name (@$rule_order) {
+ $seen_rules->{$name} = 1;
+ }
+}
+
+sub copy_to_output_buffers {
+ my ($rule_order, $rules, $ALWAYS, $f, $filename) = @_;
+
my %already_done = ();
my $copied = 0;
- foreach my $name (@$rule_order, keys(%ALWAYS))
+ foreach my $name (@$rule_order, keys(%$ALWAYS))
{
# only do each rule once, please ;)
next if exists $already_done{$name};
$already_done{$name} = undef;
- my $pubfile;
- if ($rules->{$name}->{publish}) {
- $pubfile = ($rules->{$name}->{pubfile} || $filename);
- $pubfile = $opt_out.'/'.$pubfile;
- } else {
- $pubfile = $opt_out.'/'.$opt_sandboxout;
- }
- $output_files->{$pubfile} = 1;
-
my $text = $rules->{$name}->{text};
if (!$text) {
next; # nothing to write!
}
- if (!$output_file_text->{$pubfile}) {
- $output_file_text->{$pubfile} = output_file_header($f);
-
- # do we have any top-of-file comments? if so, add it
- my $cmts = $rules->{$COMMENTS}->{text}; $cmts ||= '';
- $output_file_text->{$pubfile} .= $cmts;
- }
-
- # fix up any rule renamings we were supposed to do
- sed_renamed_rule_names(\$text);
-
- my $cond = $rules->{$name}->{cond};
- if ($cond) {
- $output_file_text->{$pubfile} .= $cond.$text."endif\n";
+ if ($opt_listpromotable) {
+ promo_rule ($rules, $name, $text);
}
else {
- $output_file_text->{$pubfile} .= $text;
+ copy_rule ($rules, $name, $text, $filename);
+ $copied++;
}
+ }
+
+ if (!$opt_listpromotable) {
+ print "$f: $copied sandbox rules copied\n";
+ }
+}
+
+sub copy_rule {
+ my ($rules, $name, $text, $filename) = @_;
- $files_to_lint->{$pubfile} = 1;
+ my $f = $rules->{$name}->{srcfile};
- $copied++;
+ my $pubfile;
+ if ($rules->{$name}->{publish}) {
+ $pubfile = ($rules->{$name}->{pubfile} || $filename);
+ $pubfile = $opt_out.'/'.$pubfile;
+ } else {
+ $pubfile = $opt_out.'/'.$opt_sandboxout;
}
- print "$f: $copied sandbox rules copied\n";
+ $output_files->{$pubfile} = 1;
- # ok; file complete. now mark all those rules as "seen"; future
- # refs to those rule names will trigger an autorename.
- foreach my $name (@$rule_order) {
- $seen_rules->{$name} = 1;
+ if (!$output_file_text->{$pubfile}) {
+ $output_file_text->{$pubfile} = output_file_header($f);
+ }
+
+ # fix up any rule renamings we were supposed to do
+ sed_renamed_rule_names(\$text);
+
+ my $cond = $rules->{$name}->{cond};
+ if ($cond) {
+ $output_file_text->{$pubfile} .= $cond.$text."endif\n";
+ }
+ else {
+ $output_file_text->{$pubfile} .= $text;
+ }
+
+ # do we have any end-of-file comments? if so, add it
+ my $cmts = $rules->{$COMMENTS}->{text};
+ if ($cmts) {
+ $output_file_text->{$pubfile} .= $cmts;
}
+
+ $files_to_lint->{$pubfile} = 1;
+}
+
+sub promo_rule {
+ my ($rules, $name, $text) = @_;
+
+ return unless $promolist->{$name};
+ my $pent = $promolist->{$name};
+ return unless $pent->{promo};
+
+ $text =~ s/^\s+//s;
+ $text =~ s/\s+$//s;
+
+ print "\n## ".("-" x 70)."\n";
+ print "## Promotable rule: $name\n";
+ printf "## so=%5.3f spc=%5.3f hpc=%5.3f\n",
+ $pent->{so}, $pent->{spc}, $pent->{hpc};
+ print "## $rules->{$name}->{srcfile}\n";
+ print "## $pent->{detailhref}\n\n";
+ print $text,"\n";
}
sub rule_file_compile_core {
my ($f, $t, $filename) = @_;
+ return if $opt_listpromotable;
+
my $pubfile = $opt_out.'/'.$filename;
$output_files->{$pubfile} = 1;
@@ -490,6 +553,23 @@
print "$f: all lines copied\n";
}
+sub plugin_file_compile {
+ my ($entry) = @_;
+
+ return if $opt_listpromotable;
+ # just copy the raw perl module over to the new area
+ # we can't really rename to avoid conflicts since the loadplugin lines
+ # are going to be all screwed up in that case.
+ # jm: we always want to update the output file in case the input
+ # has been changed!
+ if (0 && -e $entry->{t}) {
+ warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
+ }
+ else {
+ copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
+ }
+}
+
sub write_output_files {
foreach my $pubfile (sort keys %$output_files) {
if (-f $pubfile) {
@@ -524,6 +604,8 @@
my ($rule, $path) = @_;
my $new;
my $newreason;
+
+ return $rule if $opt_listpromotable;
if ($rule !~ /^(?:T_|__)/) {
$new = "T_".$rule;
Modified: spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi?rev=355363&r1=355362&r2=355363&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi Thu Dec 8 20:37:10 2005
@@ -8,6 +8,7 @@
use Template;
use Date::Manip;
use XML::Simple;
+use URI::Escape;
use strict;
use bytes;
@@ -88,6 +89,7 @@
if (defined $daterev) {
if ($daterev eq 'last-night') {
$daterev = get_last_night_daterev();
+ $q->url_param('daterev', $daterev); # make it absolute
}
else {
$daterev =~ /(\d+)[\/-](r\d+)-(\S+)/; undef $daterev;
@@ -784,10 +786,10 @@
<td style='text-align: left'><a href="[% NAMEREF %]">[% NAME %]</a></td>
<td>[% USERNAME %]</td>
<td>[% AGE %]</td>
+ <!--
+ <rule><test>[% NAME %]</test><promo>[% PROMO %]</promo> <spc>[% SPAMPC %]</spc><hpc>[% HAMPC %]</hpc><so>[% SO %]</so> <detailhref esc='1'>[% NAMEREFENCD %]</detailhref></rule>
+ -->
</tr>
- <!--
- <rule><n>[% NAME %]</n><p>[% PROMO %]</p><sp>[% SPAMPC %]</sp><hp>[% HAMPC %]</hp><so>[% SO %]</so><href>[% NAMEREF %]</href></rule>
- -->
};
@@ -836,6 +838,7 @@
SCORE => $score,
NAME => $line->{name},
NAMEREF => create_detail_url($line->{name}),
+ NAMEREFENCD => uri_encode(create_detail_url($line->{name})),
USERNAME => $line->{username} || '',
AGE => $line->{age} || '',
PROMO => $line->{promotable},
@@ -879,8 +882,9 @@
my @parms = (
get_params_except(qw(
rule s_age s_overlap s_all s_detail
+ rule s_age s_overlap s_all s_detail daterev
)),
- "rule=".uri_escape($rulename), "s_detail=1",
+ "daterev=".$daterev, "rule=".uri_escape($rulename), "s_detail=1",
);
return assemble_url(@parms);
}
@@ -1067,6 +1071,11 @@
<h3> Preflight Mass-Checks </h3>
<br/> <a href='#preflight' name=preflight>#</a>
}. gen_daterev_table(@drs_preflight);
+}
+
+sub uri_encode {
+ my ($str) = @_;
+ return uri_escape($str);
}
sub gen_daterev_table {