You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/01/21 05:04:37 UTC
svn commit: r125884 - /spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla
Author: jm
Date: Thu Jan 20 20:04:37 2005
New Revision: 125884
URL: http://svn.apache.org/viewcvs?view=rev&rev=125884
Log:
fix scrape-bugzilla to reassemble lines inside bracketed blocks, ignore already-done mcs correctly, not include the build date (to reduce checkins) and use a more readable rule name format
Modified:
spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla
Modified: spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla?view=diff&rev=125884&p1=spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla&r1=125883&p2=spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla&r2=125884
==============================================================================
--- spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla (original)
+++ spamassassin/trunk/masses/rule-qa/automc/scrape-bugzilla Thu Jan 20 20:04:37 2005
@@ -35,8 +35,7 @@
sub mywarn;
open (RULES, ">70_scraped.cf") or die "cannot write to output file";
-print RULES "# SpamAssassin rules file: bugzilla-scraped needs-mc rules\n",
- "# generated on ",(scalar localtime time),"\n\n";
+print RULES "# SpamAssassin rules file: bugzilla-scraped needs-mc rules\n\n";
open (COMMIT, ">".$conf{MCTMP}."/commit.msg") or die "cannot write to output file";
print COMMIT "auto-mass-checks:\n\n";
@@ -117,9 +116,14 @@
$cmt->{cmtnum} = $count;
$ctx->{cmts_by_num}->{$count} = $cmt;
push @{$ctx->{cmts}}, $cmt;
- process_comment($ctx, $cmt);
$count++;
}
+ foreach my $cmt (@{$xml->{bug}->{long_desc}}) {
+ process_comment_for_needsmc($ctx, $cmt);
+ }
+ # foreach my $cmt (@{$xml->{bug}->{long_desc}}) {
+ # process_comment_for_done($ctx, $cmt);
+ # }
# now mark all the ones that need mass-checking
my @trigger_cmts = ();
@@ -185,6 +189,7 @@
$outputs{$bug} = { };
$outputs{$bug}{rulenames} = $ctx->{rulenames};
$outputs{$bug}{trigger_cmts} = \@trigger_cmts;
+ print "\n\n";
}
sub validate_rule_code {
@@ -213,12 +218,19 @@
next if ($n eq 'MC'); # a glitch, from the comments
my $newname = $n;
+ my $rnd;
- # use part of base64(bug.cmtnum) instead of "random" values,
- # so it doesn't keep changing every night
- my $rnd = sha1_base64("$bug.$cmtnum");
- $rnd =~ /(...)$/; # last 3 base64-its
- $rnd = $1;
+ if (0) # use randomness?
+ {
+ # use part of base64(bug.cmtnum) instead of "random" values,
+ # so it doesn't keep changing every night
+ $rnd = sha1_base64("$bug.$cmtnum");
+ $rnd =~ /(...)$/; # last 3 base64-its
+ $rnd = $1;
+ }
+ else {
+ $rnd = "b${bug}_c${cmtnum}"; # the verbose version
+ }
# ensure it's unique; we only need to add randomness if we have already
# seen a rule by that name
@@ -252,15 +264,10 @@
print RULES $cf;
}
-sub process_comment {
+sub process_comment_for_needsmc {
my ($ctx, $cmt) = @_;
- my $text = $cmt->{thetext};
- $text =~ s/</</gs;
- $text =~ s/>/>/gs;
- $text =~ s/"/"/gs;
- $text =~ s/&/\&/gs;
-
+ my $text = decode_xml_text ($cmt->{thetext});
if ($text =~ /NEEDSMC/) {
if ($cmt->{who} !~ $ALLOWED_NEEDSMCERS) {
needsmc_not_permitted($ctx, $cmt);
@@ -276,22 +283,28 @@
$cmt->{needsmc_end} = $cmt->{cmtnum};
}
else {
- $cmt->{needsmc_start} = 0;
+ $cmt->{needsmc_start} = $ctx->{default_needsmc_start};
$cmt->{needsmc_end} = $cmt->{cmtnum};
}
print "bug $ctx->{bugnum} cmt $cmt->{cmtnum}: needs-mc by $cmt->{who} from $cmt->{needsmc_start} to $cmt->{needsmc_end}\n";
}
}
- elsif ($text =~ /\# DONEMC (\d+)/) {
+ elsif ($text =~ /\# DONEMC (\d+)/)
+ {
my $done = $1;
$cmt->{needsmc_done} = $done;
my $mccmt = $ctx->{cmts_by_num}->{$done};
# note that future "NEEDMC"s start from after that comment's
# NEEDMC end number
- $ctx->{default_needsmc_start} = $mccmt->{needsmc_end} + 1;
+ $ctx->{default_needsmc_start} =
+ ($mccmt->{needsmc_end}||$mccmt->{prior_needsmc_end}) + 1;
- # delete the "needsmc" flag from that comment object
+ # delete the "needsmc" flag from that comment object. save
+ # a copy of the start/end values in case we have multiple DONEMC
+ # comments later
+ $mccmt->{prior_needsmc_start} = $mccmt->{needsmc_start};
+ $mccmt->{prior_needsmc_end} = $mccmt->{needsmc_end};
delete $mccmt->{needsmc_start};
delete $mccmt->{needsmc_end};
$mccmt->{has_needsmc} = 0;
@@ -305,46 +318,24 @@
$text =~ s/}}}.*?$//s; #{{{
$text =~ s/}}}.*?{{{//gs; #}}}
$text .= "\n";
- $cmt->{mcrules} = $text;
print "bug $ctx->{bugnum} cmt $cmt->{cmtnum}: rules in marked block\n";
+ read_cmt_rules_from_text($ctx, $cmt, $text);
}
else {
- $cmt->{mcrules} ||= '';
- my $seenrules = 0;
- my $lastwasrule = 0;
- foreach my $line (split(/^/m, $text)) {
- if ($line =~
- /^\s*(header|rawbody|body|full|meta|uri|score|describe|tflags)\s+(\S+)\s+(.*)$/
- )
- {
- my $type = $1;
- my $name = $2;
- my $code = $3;
- $cmt->{mcrules} .= "$type $name $code\n";
- $lastwasrule = 1;
- if (!$seenrules) {
- print "bug $ctx->{bugnum} cmt $cmt->{cmtnum}: rules inline\n";
- $seenrules++;
- }
- }
- else {
- if ($line =~ /\S/) {
- if ($lastwasrule) {
- # assume it's a continuation of the last line
- chop($cmt->{mcrules});
- $cmt->{mcrules} .= "$line\n";
- }
- }
- else {
- $lastwasrule = 0;
- }
- }
- }
-
- print "bug $ctx->{bugnum} cmt $cmt->{cmtnum}: code: \n".$cmt->{mcrules};
+ # just infer it...
+ read_cmt_rules_from_text($ctx, $cmt, $text);
}
}
+sub decode_xml_text {
+ my $text = shift;
+ $text =~ s/</</gs;
+ $text =~ s/>/>/gs;
+ $text =~ s/"/"/gs;
+ $text =~ s/&/\&/gs;
+ $text;
+}
+
sub needsmc_not_permitted {
my ($ctx, $cmt) = @_;
@@ -357,5 +348,47 @@
if ($log =~ /^bug (\d+)/) {
$outputs{messages}{$1} ||= '';
$outputs{messages}{$1} .= $log;
+ }
+}
+
+sub read_cmt_rules_from_text {
+ my ($ctx, $cmt, $text) = @_;
+
+ $cmt->{mcrules} ||= '';
+ my $seenrules = 0;
+ my $lastwasrule = 0;
+ foreach my $line (split(/^/m, $text)) {
+ if ($line =~
+/^\s*(header|rawbody|body|full|meta|uri|score|describe|tflags)\s+(\S+)\s+(.*)$/
+ )
+ {
+ my $type = $1;
+ my $name = $2;
+ my $code = $3;
+ $cmt->{mcrules} .= "$type $name $code\n";
+ $lastwasrule = 1;
+ if (!$seenrules) {
+ print "bug $ctx->{bugnum} cmt $cmt->{cmtnum}: rules inline\n";
+ $seenrules++;
+ }
+ }
+ else {
+ if ($line =~ /\S/) {
+ if ($lastwasrule) {
+ # assume it's a continuation of the last line
+ chop($cmt->{mcrules});
+ $cmt->{mcrules} .= "$line\n";
+ }
+ }
+ else {
+ $lastwasrule = 0;
+ }
+ }
+ }
+
+ if ($cmt->{mcrules} =~ /\S/) {
+ my $ruletext = $cmt->{mcrules};
+ $ruletext =~ s/^/>> /gm;
+ print "bug $ctx->{bugnum} cmt $cmt->{cmtnum}: code: \n".$ruletext;
}
}