You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/11/24 18:29:43 UTC

svn commit: r478926 [2/2] - in /spamassassin/branches/jm_re2c_hacks: ./ build/ build/automc/ build/buildbot/ build/mkupdates/ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Plugin/ masses/ masses/rule-qa/ masses/rule-qa/automc/ rules/ t/

Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi Fri Nov 24 09:29:40 2006
@@ -136,12 +136,6 @@
   $self->{s} = { };
 
 # selection of what will be displayed.
-  $self->{s}{defcorpus} = $self->get_url_switch('s_defcorpus', 1);
-  $self->{s}{html} = $self->get_url_switch('s_html', 0);
-  $self->{s}{net} = $self->get_url_switch('s_net', 0);
-  $self->{s}{zero} = $self->get_url_switch('s_zero', 1);
-
-  $self->{s}{new} = $self->get_url_switch('s_new', 1);
   $self->{s}{detail} = $self->get_url_switch('s_detail', 0);
   $self->{s}{g_over_time} = $self->get_url_switch('s_g_over_time', 0);
 
@@ -155,11 +149,9 @@
     $self->{s}{new} = 1;
     $self->{s}{overlap} = 1;
     $self->{s}{scoremap} = 1;
-    $self->{s}{zero} = 1;
   }
 
   if (!grep { $_ } values %{$self->{s}}) {
-    $self->{s}{defcorpus} = 1;      # set the defaults
     $self->{s}{new} = 1;
   }
 }
@@ -184,10 +176,22 @@
 
   # sanitise daterev string
   if (defined $self->{daterev}) {
+
+    # all of these ignore "b" preflight mass-checks, btw
     if ($self->{daterev} eq 'last-night') {
       $self->{daterev} = $self->get_last_night_daterev();
       $self->{q}->param('daterev', $self->{daterev});  # make it absolute
     }
+    elsif ($self->{daterev} eq 'today') {
+      $self->{daterev} = $self->get_daterev_by_date(
+            POSIX::strftime "%Y%m%d", gmtime ((time + DATEREV_ADJ)));
+      $self->{q}->param('daterev', $self->{daterev});  # make it absolute
+    }
+    elsif ($self->{daterev} =~ /^(20\d\d[01]\d\d\d)$/) {
+      # a date
+      $self->{daterev} = $self->get_daterev_by_date($1);
+      $self->{q}->param('daterev', $self->{daterev});  # make it absolute
+    }
     else {
       $self->{daterev} =~ /(\d+)[\/-](r\d+)-(\S+)/; undef $self->{daterev};
       if ($2) {
@@ -268,163 +272,17 @@
   my ($self, $title) = @_;
 
   my $hdr = q{<html><head>
-
   <title>}.$title.q{</title>
 
-  <style type="text/css" media="all">
-
-    body {
-      padding: 1em 1em 1em 1em;
-    }
-    pre.freqs {
-      font-family: monospace;
-      font-size: 14px;
-      border: 1px dashed #ddb;
-      margin: 0em -0.5em 0em -0.5em;
-      padding: 10px 20px 10px 20px;
-    }
-    div.updateform {
-      border: 3px solid #aaa;
-      background: #eec;
-      margin: 0em 0em 1em 0em;
-      padding: 0em 1em 0em 2em;
-    }
-
-    p.showfreqslink {
-      color: #999;
-      font-size: 50%;
-      text-align: right;
-      margin: 0px 0px 0px 0px;
-      border: 0px 0px 0px 0px;
-    }
-    p.showfreqslink a { color: #999; }
-
-    div.headdiv {
-      border: 1px solid;
-      background: #f0f8c0;
-      margin: 0px 0px 0px 20px;
-    }
-    p.headclosep {
-      margin: 0px 0px 0px 0px;
-      border: 0px 0px 0px 0px;
-    }
-    pre.head {
-      margin-left: 10px;
-    }
-    
-    table.freqs {
-      border: 1px dashed #ddb;
-      background: #fff;
-      padding: 10px 5px 10px 5px;
-    }
-
-    tr.freqsline_promo1 td {
-      text-align: right;
-      padding: 0.1em 0.2em 0.1em 0.2em;
-    }
-    tr.freqsline_promo0 td {
-      text-align: right;
-      padding: 0.1em 0.2em 0.1em 0.2em;
-      color: #999;
-    }
-    tr.freqsline_promo0 td a { color: #999; }
-
-    a.mcloghref {
-      color: #999;
-      font-size: 50%;
-    }
-
-    h3 {
-      border: 1px solid;
-      padding: 10px 20px 10px 20px;
-      margin: 20px -20px -10px -20px;
-      background: #fe8;
-    }
-
-    td.daterevtd {
-      font-size: 75%;
-      padding: 1px 3px 1px 5px;
-    }
-
-    td.daterevcommittd {
-      font-size: 75%;
-      padding: 1px 3px 1px 5px;
-      background: #ffc;
-    }
-
-    td.mcviewing {
-      background: #7f9;
-    }
-
-    div.commitmsgdiv {
-      font-size: 50%;
-      overflow: auto;
-    }
-
-    td.daterevtdempty {
-      background: #eec;
-    }
-
-    tr.daterevtr {
-      background: #fff;
-    }
-
-    tr.daterevdesc {
-      background: #fea;
-    }
-
-
-    /* Sortable tables, see http://www.kryogenix.org/code/browser/sorttable/ */
-    table.sortable a.sortheader {
-       background: #ddd;
-       color:#666;
-       font-weight: bold;
-       text-decoration: none;
-       display: block;
-    }
-    tr.freqsheader {
-       background: #ddd;
-    }
-    table.sortable span.sortarrow {
-       color: black;
-       text-decoration: none;
-    }
-
-
-    /* mouseover data for the freqs spam% and ham% figures using CSS2.
-     * see: http://www.meyerweb.com/eric/css/edge/popups/demo.html
-     */
-    table tr td a.ftd {
-      position: relative;
-      /* relative positioning so that the span will be
-       * "absolute" positioned relative to this block */
-    }
-    table tr td a.ftd span {
-      display: none;
-    }
-    table tr td a.ftd:hover span {
-      display: block;
-      position: absolute; top: 1em; left: 0.5em;
-      padding: 5px 20px 5px 20px; margin: 10px; z-index: 100;
-      border: 1px dashed;
-      background: #ffc;
-    }
-
-
-  </style>
-
+  <link href="/ruleqa.css" rel="stylesheet" type="text/css">
+  <script src="http://ruleqa.spamassassin.org/sorttable.js"></script>
   <script type="text/javascript"><!--
 
-    function hide_header(id) {
-      document.getElementById(id).style.display = "none";
-    }
-    function show_header(id) {
-      document.getElementById(id).style.display = "block";
-    }
+    function hide_header(id) {document.getElementById(id).style.display="none";}
+    function show_header(id) {document.getElementById(id).style.display="block";}
 
     //-->
   </script>
-  <script src="http://ruleqa.spamassassin.org/sorttable.js"></script>
 
   </head><body>
 
@@ -445,9 +303,9 @@
 
   my $tmpl = q{
 
-  <div class=updateform>
+  <div class='updateform'>
   <form action="!THISURL!" method="GET">
-    <table style="padding-left: 0px" class=datetable>
+    <table style="padding-left: 0px" class='datetable'>
 
         <tr>
         <th> Commit </th>
@@ -460,50 +318,54 @@
 
     </table>
 
-  <table width=100%>
+  <table width='100%'>
   <tr>
-  <td width=90%>
+  <td width='90%'>
+  <div class='ui_label'>
     <a href="http://wiki.apache.org/spamassassin/DateRev">DateRev</a>
-    to display (UTC timezone):
-    <input type=textfield name=daterev value="!daterev!">
+    to display (UTC timezone):</div><input
+            type='textfield' name='daterev' value="!daterev!">
+    <br/>
+  <div class='ui_label'>
+    (Select a recent nightly mass-check by date:
+    <a href='!daterev=last-night!'>last-night</a>,
+    <a href='!daterev=today!'>today</a>, or
+    enter 'YYYYMMDD' in the DateRev text field for a specific date.)
+  </div>
   </td>
-  <td width=10%><div align=right>
-    <a href="!shortdatelist!">(List&nbsp;Nearby)</a><br/>
-    <a href="!longdatelist!">(List&nbsp;All)</a><br/>
+  <td width='10%'><div align='right' class='ui_label'>
+    <a href="!shortdatelist!">(Nearby&nbsp;List)</a><br/>
+    <a href="!longdatelist!">(Full&nbsp;List)</a><br/>
   </div></td>
   </tr>
   </table>
 
     <br/>
 
-<!-- 
-
-    (This has been pretty much superceded by the --net mass-checks)
-
-    <h4> Which Corpus? </h4>
-    <input type=checkbox name=s_defcorpus !s_defcorpus!> Show default non-net ruleset and corpus, set 0<br/>
-    <input type=checkbox name=s_net !s_net!> Show frequencies from network tests, set 1<br/>
-    <input type=checkbox name=s_html !s_html!> Show frequencies for mails containing HTML only, set 0<br/>
-    <br/>
--->
-
     <h4> Which Rules?</h4>
+  <div class='ui_label'>
     Show only these rules (space separated, or regexp with '/' prefix):<br/>
-    <input type=textfield size=60 name=rule value="!rule!"><br/>
+  </div>
+    <input type='textfield' size='60' name='rule' value="!rule!"><br/>
     <br/>
+  <div class='ui_label'>
     Show only rules from files whose paths contain this string:<br/>
-    <input type=textfield size=60 name=srcpath value="!srcpath!"><br/>
+  </div>
+    <input type='textfield' size='60' name='srcpath' value="!srcpath!"><br/>
     <br/>
-    <input type=checkbox name=s_zero !s_zero!> Show rules with zero hits<br/>
-    <input type=checkbox name=s_detail !s_detail!> Display full details: message age in weeks, by contributor, as score-map, overlaps with other rules, freshness graphs<br/>
+    <input type='checkbox' name='s_detail' id='s_detail' !s_detail!><label
+        for='s_detail' class='ui_label'>Display full details: message age in weeks, by contributor, as score-map, overlaps with other rules, freshness graphs
+        </label><br/>
     <br/>
 
-    <input type=submit name=g value="Change"><br/>
-
 <p>
+  <div class='ui_label'>
     Show only rules from files modified in the
     <a href='!mtime=1!'>last day</a>, <a href='!mtime=7!'>last week</a>
+  </div>
 </p>
+
+    <div align='right'><input type='submit' name='g' value="Change"></div>
   </form>
   </div>
 
@@ -550,6 +412,9 @@
   $tmpl =~ s/!mtime=(.*?)!/
                $self->gen_switch_url("mtime", $1);
        /eg;
+  $tmpl =~ s/!daterev=(.*?)!/
+               $self->gen_switch_url("daterev", $1);
+       /eg;
   $tmpl =~ s/!rule!/$self->{rule}/gs;
   $tmpl =~ s/!srcpath!/$self->{srcpath}/gs;
   foreach my $opt (keys %{$self->{s}}) {
@@ -566,7 +431,7 @@
 
     print qq{
 
-      <p class=intro> <strong>Instructions</strong>: click
+      <p class='intro'> <strong>Instructions</strong>: click
       the rule name to view details of a particular rule. </p>
 
     };
@@ -580,25 +445,25 @@
     {
       my $graph_on = qq{
 
-        <p><a id="over_time_anchor" 
-          href="}.$self->gen_switch_url("s_g_over_time", "0").qq{#over_time_anchor"
+        <p><a id="over_time_anchor"></a><a id="overtime" 
+          href="}.$self->gen_switch_url("s_g_over_time", "0").qq{#overtime"
           >Hide Graph</a></p>
         <img src="}.$self->gen_switch_url("graph", "over_time").qq{" 
-          width=800 height=815 />
+          width='800' height='815' />
 
       };
 
       my $graph_off = qq{
 
-        <p><a id="over_time_anchor" 
-          href="}.$self->gen_switch_url("s_g_over_time", "1").qq{#over_time_anchor"
+        <p><a id="over_time_anchor"></a><a id="overtime" 
+          href="}.$self->gen_switch_url("s_g_over_time", "1").qq{#overtime"
           >Show Graph</a></p>
 
       };
 
       print qq{
 
-        <h3 class=graph_title>Graph, hit-rate over time</h3>
+        <h3 class='graph_title'>Graph, hit-rate over time</h3>
         }.($self->{s}{g_over_time} ? $graph_on : $graph_off).qq{
 
         </ul>
@@ -613,16 +478,20 @@
 
     print qq{
 
+      <div class='ui_label'>
       <p><a href="$url_back">&lt; Back</a> to overview.</p>
+      </div>
 
     };
   }
 
   print qq{
 
-  <p>Note: the freqs tables are sortable.  Click on the headers to resort them
-  by that column.  <a
-  href="http://www.kryogenix.org/code/browser/sorttable/">(thanks!)</a></p>
+      <div class='ui_label'>
+      <p>Note: the freqs tables are sortable.  Click on the headers to resort them
+      by that column.  <a
+      href="http://www.kryogenix.org/code/browser/sorttable/">(thanks!)</a></p>
+      </div>
 
   </body></html>
 
@@ -703,6 +572,11 @@
 
   my $notafter = POSIX::strftime "%Y%m%d",
         gmtime ((time + DATEREV_ADJ) - (12*60*60));
+  return $self->get_daterev_by_date($notafter);
+}
+
+sub get_daterev_by_date {
+  my ($self, $notafter) = @_;
 
   foreach my $dr (reverse @{$self->{daterevs}}) {
     my $t = $self->get_daterev_metadata($dr);
@@ -721,9 +595,7 @@
 
   $self->{datadir} = $self->get_datadir_for_daterev($path);
 
-  $self->{s}{defcorpus} and $self->showfreqset('DETAILS', $strdate);
-  $self->{s}{html} and $self->showfreqset('HTML', $strdate);
-  $self->{s}{net} and $self->showfreqset('NET', $strdate);
+  $self->showfreqset('DETAILS', $strdate);
 
   # special case: we only build this for one set, as it's quite slow
   # to generate
@@ -929,11 +801,19 @@
   my $desc = $FREQS_FILENAMES{$key};
   my $file = $self->{datadir}.$key;
 
-  my $titleplink = "$key.$strdate"; $titleplink =~ s/[^A-Za-z0-9]+/_/gs;
+  my $titleplinkold = "$key.$strdate";
+  $titleplinkold =~ s/[^A-Za-z0-9]+/_/gs;
+
+  my $titleplinknew = "t".$key;
+  $titleplinknew =~ s/[^A-Za-z0-9]+/_/gs;
+  $titleplinknew =~ s/^tDETAILS_//;
+
+  my $titleplinkhref = $self->gen_this_url()."#".$titleplinknew;
+
   my $comment = qq{
   
     <!-- freqs start $key -->
-    <h3 class=freqs_title>$desc</h3>
+    <h3 class='freqs_title'>$desc</h3>
     <!-- <h4>$strdate</h4> -->
 
   };
@@ -945,27 +825,28 @@
 
   $comment .= qq{ 
     
-    <div id="$headers_id" class=headdiv style='display: none'>
-    <p class=headclosep align=right><a
+    <div id="$headers_id" class='headdiv' style='display: none'>
+    <p class='headclosep' align='right'><a
           href="javascript:hide_header('$headers_id')">[close]</a></p>
-    <pre class=head>$heads</pre>
+    <pre class='head'>$heads</pre>
     </div>
 
-    <div id="txt_$headers_id" class=headdiv style='display: none'>
-    <p class=headclosep align=right><a
+    <div id="txt_$headers_id" class='headdiv' style='display: none'>
+    <p class='headclosep' align='right'><a
           href="javascript:hide_header('txt_$headers_id')">[close]</a></p>
-    <pre class=head><<<TEXTS>>></pre>
+    <pre class='head'><<<TEXTS>>></pre>
     </div>
 
     <br clear="all"/>
-    <p class=showfreqslink><a
-      href="javascript:show_header('txt_$headers_id')">(pasteable)</a><a
+    <p class='showfreqslink'><a
+      href="javascript:show_header('txt_$headers_id')">(pasteable)</a> <a
       href="javascript:show_header('$headers_id')">(source details)</a>
-      <a name='$titleplink' href='#$titleplink' class=title_permalink>(#)</a>
+      <a name='$titleplinknew' href='$titleplinkhref' class='title_permalink'>(#)</a>
+      <a name='$titleplinkold'><!-- backwards compat --></a>
     </p>
 
-    <table class=sortable id='freqs_${headers_id}' class=freqs>
-      <tr class=freqshead>
+    <table class='sortable' id='freqs_${headers_id}' class='freqs'>
+      <tr class='freqshead'>
       <th>MSECS</th>
       <th>SPAM%</th>
       <th>HAM%</th>
@@ -984,7 +865,7 @@
 
   if (ref $self->{freqs_ordr}{$key} ne 'ARRAY') {
     print qq(
-      <h3 class=freqs_title>$desc</h3>
+      <h3 class='freqs_title'>$desc</h3>
       <table><p><i>('$key' not yet available)</i></p></table>
     );
     return;
@@ -1087,10 +968,10 @@
 
   $FREQS_LINE_TEMPLATE = qq{
 
-  <tr class=freqsline_promo[% PROMO %]>
+  <tr class='freqsline_promo[% PROMO %]'>
     <td>[% MSECS %]</td>
-    <td><a class=ftd>[% SPAMPC %]<span>[% SPAMPCDETAIL %]</span></a>[% SPAMLOGLINK %]
-    <td><a class=ftd>[% HAMPC %]<span>[% HAMPCDETAIL %]</span></a>[% HAMLOGLINK %]
+    <td><a class='ftd'>[% SPAMPC %]<span>[% SPAMPCDETAIL %]</span></a>[% SPAMLOGLINK %]
+    <td><a class='ftd'>[% HAMPC %]<span>[% HAMPCDETAIL %]</span></a>[% HAMLOGLINK %]
     <td>[% SO %]</td>
     <td>[% RANK %]</td>
     <td>[% SCORE %]</td>
@@ -1113,8 +994,8 @@
 
   $FREQS_EXTRA_TEMPLATE = qq{
 
-  <tr class=freqsextra>
-    <td colspan=7><pre class=perruleextra>[% EXTRA %]</pre></td>
+  <tr class='freqsextra'>
+    <td colspan=7><pre class='perruleextra'>[% EXTRA %]</pre></td>
   </tr>
 
   };
@@ -1203,12 +1084,6 @@
   # normal freqs lines, with optional subselector after rule name
   my $out = '';
   foreach my $line (@{$obj->{lines}}) {
-    if (!$self->{s}{zero}) {
-      my $ov = $line->{spampc} + $line->{hampc};
-      if (!$ov || $ov !~ /^\s*\d/ || $ov+0 == 0) {
-        next;       # skip this line, it's a 0-hitter
-      }
-    }
 
     my $detailurl = '';
     if (!$self->{s}{detail}) {	# not already in "detail" mode
@@ -1304,6 +1179,12 @@
   return $self->assemble_url(@parms);
 }
 
+sub gen_this_url {
+  my ($self) = @_;
+  my @parms =  $self->get_params_except("__nonexistent__");
+  return $self->assemble_url(@parms);
+}
+
 sub get_rev_for_daterev {
   my ($self, $daterev) = @_;
   # '20060120-r370897-b'
@@ -1327,10 +1208,13 @@
     if (!$p) { next; }
     elsif ($p =~ /^keywords=$/) { next; }
     elsif ($p =~ /^g=Change$/) { next; }
+    # default values that can be omitted
+    elsif ($p =~ /^srcpath=$/) { next; }
+    elsif ($p =~ /^mtime=$/) { next; }
     # the ones we can put in the path
     elsif ($p =~ /^rule=(.*)$/) { $path{rule} = $1; }
     elsif ($p =~ /^daterev=(.*)$/) { $path{daterev} = $1; }
-    elsif ($p =~ /^s_detail=1$/) { $path{s_detail} = 1; }
+    elsif ($p =~ /^s_detail=(?:1|on)$/) { $path{s_detail} = 1; }
     # and all the rest
     else { push (@parms, $p); }
   }
@@ -1534,7 +1418,7 @@
 
     <a href="javascript:show_header('$id')">[+]</a>
     <div id='$id' class='mclogmds' style='display: none'>
-      <p class=headclosep align=right><a
+      <p class='headclosep' align='right'><a
           href="javascript:hide_header('$id')">[-]</a></p>
 
       $all
@@ -1642,7 +1526,7 @@
 
     push @html, qq{
 
-            <tr class=daterevtr>
+            <tr class='daterevtr'>
 
       }, $self->gen_daterev_html_commit_td($meta, $ignore_logmds);
 
@@ -1713,10 +1597,10 @@
   print qq{
 
     <h3> All Mass-Checks </h3>
-    <br/> <a href='#net' name=net>#</a>
+    <br/> <a href='#net' name='net'>#</a>
 
-    <div class=updateform>
-      <table style="padding-left: 0px" class=datetable>
+    <div class='updateform'>
+      <table style="padding-left: 0px" class='datetable'>
       <tr>
       <th> Commit </th>
       <th> Preflight Mass-Checks </th>

Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-nightly?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-nightly (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-nightly Fri Nov 24 09:29:40 2006
@@ -37,7 +37,14 @@
 fi
 
 # enter tree
-cd $tree
+if [ -d $tree ] ; then 
+  cd $tree
+else
+  echo "$tree does not exist yet. you need to check it out first" 1>&2
+  exit 2
+fi
+
+[ -d $tmp ] || mkdir -p $tmp
 
 # find current revision
 rm -f weekly-versions.txt nightly-versions.txt
@@ -56,6 +63,7 @@
 fi
 
 # update
+# TODO: replace all this with rsync, it's much safer ;)
 set +e
 retry=0
 while true; do
@@ -104,6 +112,8 @@
 rm -f spamassassin/auto*
 rm -f spamassassin/bayes*
 rm -f razor-agent.log
+[ -d spamassassin ] || mkdir spamassassin
+
 if [ -n "$net" ]; then
 	prefs=$prefs_weekly
 	opts=$opts_weekly

Modified: spamassassin/branches/jm_re2c_hacks/masses/score-ranges-from-freqs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/score-ranges-from-freqs?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/score-ranges-from-freqs (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/score-ranges-from-freqs Fri Nov 24 09:29:40 2006
@@ -67,7 +67,7 @@
 my %is_nice = ();
 
 if (!defined $argcffile) { $argcffile = "../rules"; }
-system ("./parse-rules-for-masses -d \"$argcffile\" -s $scoreset") and die;
+system ("../build/parse-rules-for-masses -d \"$argcffile\" -s $scoreset") and die;
 if (-e "tmp/rules.pl") {
   # Note, the spaces need to stay in front of the require to work around a RPM 4.1 problem
   require "./tmp/rules.pl";

Modified: spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf Fri Nov 24 09:29:40 2006
@@ -33,11 +33,6 @@
 describe FRAGMENTED_MESSAGE	Partial message
 tflags FRAGMENTED_MESSAGE       userconf
 
-# this is also mostly-theoretical, so allow 0 hits
-header HEAD_LONG                eval:check_msg_parse_flags('truncated_header')
-describe HEAD_LONG              Message headers are very long
-tflags HEAD_LONG                userconf
-
 ###########################################################################
 
 header FROM_BLANK_NAME		From =~ /(?:\s|^)"" <\S+>/i
@@ -536,6 +531,11 @@
 ###########################################################################
 
 ifplugin Mail::SpamAssassin::Plugin::MIMEEval
+
+# this is also mostly-theoretical, so allow 0 hits
+header HEAD_LONG                eval:check_msg_parse_flags('truncated_header')
+describe HEAD_LONG              Message headers are very long
+tflags HEAD_LONG                userconf
 
 header MISSING_HB_SEP		eval:check_msg_parse_flags('missing_head_body_separator')
 describe MISSING_HB_SEP		Missing blank line between message header and body

Modified: spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf Fri Nov 24 09:29:40 2006
@@ -273,13 +273,15 @@
 header RATWARE_RCVD_AT		Received =~ / by \S+\@\S+ with Microsoft SMTPSVC/
 describe RATWARE_RCVD_AT	Bulk email fingerprint (Received @) found
 
-meta RATWARE_OUTLOOK_NONAME	__MSGID_DOLLARS_OK && !__HAS_X_MAILER
+header __RCVD_WITH_EXCHANGE	Received =~ /with Microsoft Exchange Server/
+
+meta RATWARE_OUTLOOK_NONAME	__MSGID_DOLLARS_OK && !__HAS_X_MAILER && !__RCVD_WITH_EXCHANGE
 describe RATWARE_OUTLOOK_NONAME	Bulk email fingerprint (Outlook no name) found
 
 
 
 header __MIMEOLE_MS		X-MIMEOLE =~ /^Produced By Microsoft MimeOLE/
-meta RATWARE_MS_HASH 		__MSGID_DOLLARS_OK && !__MIMEOLE_MS
+meta RATWARE_MS_HASH 		__MSGID_DOLLARS_OK && !__MIMEOLE_MS && !__RCVD_WITH_EXCHANGE
 describe RATWARE_MS_HASH	Bulk email fingerprint (msgid ms hash) found
 
 ###########################################################################

Modified: spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf Fri Nov 24 09:29:40 2006
@@ -37,29 +37,16 @@
 score ADVANCE_FEE_2 1.607 0.647 1.189 1.392
 score ADVANCE_FEE_3 2.872 1.760 3.330 3.336
 score ADVANCE_FEE_4 3.024 3.040 3.515 3.727
-score ALL_TRUSTED -1.360 -1.440 -1.665 -1.800
 score BAD_CREDIT 1.501 0.129 1.539 0.001
 score BAD_ENC_HEADER 2.480 2.255 2.960 3.100
 score BANG_GUAR 0.001 0.139 0.504 0.001
 score BANG_OPRAH 1.233 0.366 1.386 0.001
 score BILLION_DOLLARS 0.001
-score BLANK_LINES_80_90 0.272 0.107 0.810 0.001
 score BODY_ENHANCEMENT 1.090 0.001 1.163 0.001
 score BODY_ENHANCEMENT2 1.821 0.618 2.045 0.736
 score CLICK_BELOW_CAPS 0.001
 score CONFIRMED_FORGED 0.690 0.001 1.059 0.001
 score CUM_SHOT 2.320 2.095 2.683 2.708
-score DATE_IN_FUTURE_03_06 2.061 2.007 2.275 1.961
-score DATE_IN_FUTURE_06_12 1.680 1.498 1.883 1.668
-score DATE_IN_FUTURE_12_24 2.320 2.316 2.775 2.767
-score DATE_IN_FUTURE_24_48 2.080 2.080 2.498 2.688
-score DATE_IN_FUTURE_48_96 1.680 1.680 1.942 2.100
-score DATE_IN_FUTURE_96_XX 1.920 1.888 2.276 2.403
-score DATE_IN_PAST_03_06 0.736 0.001 1.122 0.478
-score DATE_IN_PAST_06_12 0.846 0.746 0.926 0.827
-score DATE_IN_PAST_12_24 0.960 0.881 1.036 1.247
-score DATE_IN_PAST_24_48 0.801 0.805 0.976 0.880
-score DATE_IN_PAST_96_XX 1.752 1.572 2.101 2.020
 score DATE_SPAMWARE_Y2K 1.859 1.822 1.944 0.745
 score DEAR_FRIEND 0.811 0.858 0.976 1.632
 score DEAR_SOMETHING 1.605 1.612 1.901 2.100
@@ -93,18 +80,10 @@
 score EXCUSE_REMOVE 1.345 0.001 1.573 0.110
 score EXTRA_MPART_TYPE 0.847 0.815 0.733 1.091
 score FAKED_UNDISC_RECIPS 0.001 # n=0 n=1 n=2 n=3
-score FAKE_HELO_EMAIL_COM 1.440 1.440 1.665 1.335
-score FAKE_HELO_EXCITE 0.001
-score FAKE_HELO_LYCOS 0.001
-score FAKE_HELO_MAIL_COM 1.920 1.920 2.220 2.369
 score FAKE_HELO_MAIL_COM_DOM 2.160 2.160 2.498 2.700
-score FAKE_HELO_MSN 2.080 2.060 2.358 2.509
-score FAKE_HELO_YAHOO_CA 1.186 1.353 1.466 1.599
 score FAKE_OUTBLAZE_RCVD 2.480 2.480 2.867 3.100
 score FIN_FREE 1.019 0.611 1.313 0.469
 score FORGED_AOL_TAGS 1.696 0.281 1.844 1.582
-score FORGED_GW05_RCVD 0.001 # n=0 n=1 n=2 n=3
-score FORGED_HOTMAIL_RCVD2 1.653 0.549 2.127 1.162
 score FORGED_IMS_HTML 2.184 2.265 2.518 1.937
 score FORGED_IMS_TAGS 2.054 2.117 2.321 1.768
 score FORGED_MSGID_AOL 1.440 1.436 1.665 1.619
@@ -125,14 +104,12 @@
 score FORGED_QUALCOMM_TAGS 2.080 1.809 2.312 1.783
 score FORGED_TELESP_RCVD 1.280 0.001 1.470 0.001
 score FORGED_THEBAT_HTML 2.560 2.387 2.960 2.500
-score FORGED_YAHOO_RCVD 1.506 0.928 1.794 1.849
 score FORWARD_LOOKING 1.207 1.434 1.692 1.048
 score FREE_PORN 0.001 0.001 0.143 0.001
 score FREE_QUOTE_INSTANT 1.178 0.001 1.422 0.001
 score FROM_BLANK_NAME 1.659 1.467 0.936 1.534
 score FROM_DOMAIN_NOVOWEL 1.582 1.592 1.903 2.100
 score FROM_EXCESS_BASE64 0.647 1.052 1.190 1.309
-score FROM_ILLEGAL_CHARS 3.280 3.280 3.792 4.100
 score FROM_LOCAL_DIGITS 0.001
 score FROM_LOCAL_HEX 2.000 1.343 2.240 1.305
 score FROM_LOCAL_NOVOWEL 2.480 2.331 2.867 2.861
@@ -143,9 +120,7 @@
 score GUARANTEED_100_PERCENT 0.810 0.001 1.392 0.001
 score HDR_ORDER_MTSRIX 0.001 # n=0 n=1 n=2 n=3
 score HDR_ORDER_TRIMRS 1.440 1.440 1.665 1.800
-score HEADER_COUNT_CTYPE 1.336 1.440 1.665 1.800
 score HEADER_SPAM 3.200 3.115 3.700 3.789
-score HEAD_ILLEGAL_CHARS 1.652 1.519 1.796 1.606
 score HELO_DYNAMIC_ATTBI 2.400 2.400 2.775 2.692
 score HELO_DYNAMIC_CHELLO_NL 1.624 0.001 2.035 0.170
 score HELO_DYNAMIC_CHELLO_NO 0.001 # n=0 n=1 n=2 n=3
@@ -170,7 +145,6 @@
 score HTML_SHORT_LINK_IMG_2 2.296 0.951 2.739 1.582
 score HTML_SHORT_LINK_IMG_3 0.913 0.518 1.675 0.881
 score HTML_TITLE_SUBJ_DIFF 1.547 0.551 1.224 0.266
-score HTTPS_IP_MISMATCH 1.920 1.920 2.220 2.400
 score HTTP_77 2.242 2.658 2.644 2.346
 score HTTP_ESCAPED_HOST 0.001 0.001 0.124 0.001
 score HTTP_EXCESSIVE_ESCAPES 1.329 1.146 1.145 1.572
@@ -188,45 +162,33 @@
 score JS_FROMCHARCODE 1.200 0.001 1.480 1.600 # n=1
 score KOREAN_UCE_SUBJECT 2.480 2.480 2.867 3.100
 score LIVE_PORN 0.530 0.332 0.782 0.001
-score LOCALPART_IN_SUBJECT 1.559 1.561 1.757 1.900
 score LONGWORDS 3.120 2.957 3.530 3.789
 score LOW_PRICE 0.001
 score MALE_ENHANCE 2.480 2.480 2.867 3.100
 score MARKETING_PARTNERS 1.482 1.435 1.757 1.765
 score MICRO_CAP_WARNING 1.200 1.280 1.480 1.462
 score MILLION_USD 2.359 1.606 2.824 1.816
-score MIME_BAD_ISO_CHARSET 3.360 3.360 3.885 4.185
-score MIME_BASE64_BLANKS 0.001 0.001 0.184 0.224
-score MIME_BASE64_TEXT 2.048 1.522 2.749 1.885
 score MIME_BOUND_DD_DIGITS 3.600 3.600 4.162 4.500
 score MIME_BOUND_DIGITS_15 2.400 2.400 2.775 2.949
 score MIME_BOUND_DIGITS_7 0.001 # n=0 n=1 n=2 n=3
 score MIME_BOUND_MANY_HEX 2.160 2.144 2.498 2.700
 score MIME_BOUND_NEXTPART 0.224 0.241 0.375 0.278
 score MIME_HEADER_CTYPE_ONLY 0.001 0.001 0.182 0.001
-score MIME_HTML_MOSTLY 1.703 0.699 2.309 1.102
-score MIME_HTML_ONLY 0.414 0.001 0.389 0.001
 score MIME_HTML_ONLY_MULTI 0.001
-score MIME_QP_LONG_LINE 0.159 0.001 0.234 0.001
 score MISSING_DATE 0.001
-score MISSING_HEADERS 0.001 0.189 0.001 0.001
 score MISSING_MIMEOLE 1.664 1.394 1.970 1.612
-score MISSING_MIME_HB_SEP 0.001
 score MISSING_SUBJECT 1.729 1.345 2.035 1.816
 score MONEY_BACK 0.843 0.001 0.645 0.001
 score MORE_SEX 2.240 2.035 2.590 1.950
 score MORTGAGE_BEST 0.001
 score MORTGAGE_PITCH 0.151 0.001 0.001 0.001
 score MORTGAGE_RATES 0.001
-score MPART_ALT_DIFF 0.425 0.137 1.142 0.001
-score MPART_ALT_DIFF_COUNT 1.649 0.001 1.607 0.708
 score MSGID_DOLLARS_RANDOM 3.200 3.200 3.700 3.780
 score MSGID_FROM_MTA_HEADER 0.001 0.001 0.274 0.001
 score MSGID_FROM_MTA_HOTMAIL 0.001 # n=0 n=1 n=2 n=3
 score MSGID_LONG 0.899 0.267 1.188 1.204
 score MSGID_MULTIPLE_AT 2.880 1.375 3.187 1.914
 score MSGID_NO_HOST 0.533 0.129 0.787 0.285
-score MSGID_OUTLOOK_INVALID 2.080 2.027 2.405 2.600
 score MSGID_RANDY 2.794 2.165 3.412 2.880
 score MSGID_SHORT 2.480 2.465 2.821 3.100
 score MSGID_SPAM_99X9XX99 0.001 # n=0 n=1 n=2 n=3
@@ -242,7 +204,6 @@
 score NO_MEDICAL 1.200 1.259 1.480 1.363
 score NO_OBLIGATION 0.488 0.303 0.628 0.966
 score NO_PRESCRIPTION 3.200 2.888 3.700 3.887
-score NO_RDNS_DOTCOM_HELO 0.356 0.001 0.001 0.001
 score NUMERIC_HTTP_ADDR 1.253 0.585 1.249 0.472
 score OBFUSCATING_COMMENT 0.806 1.117 1.379 1.690
 score OBSCURED_EMAIL 1.680 1.680 1.834 2.100
@@ -253,14 +214,12 @@
 score PREST_NON_ACCREDITED 1.280 1.280 1.480 1.600
 score PREVENT_NONDELIVERY 1.515 1.640 1.737 1.600
 score PRICES_ARE_AFFORDABLE 1.964 0.522 2.312 0.995
-score RATWARE_EFROM 2.880 2.880 3.330 3.600
 score RATWARE_EGROUPS 2.640 2.487 3.052 2.563
 score RATWARE_GECKO_BUILD 1.751 1.426 1.966 1.691
 score RATWARE_HASH_DASH 0.001 # n=0 n=1 n=2 n=3
 score RATWARE_MOZ_MALFORMED 1.840 1.820 2.035 1.847
 score RATWARE_MPOP_WEBMAIL 0.001 0.118 0.417 0.111
 score RATWARE_MS_HASH 2.425 1.379 2.691 1.910
-score RATWARE_NAME_ID 3.280 3.280 3.792 4.100
 score RATWARE_NETIP 1.272 0.548 1.497 1.398
 score RATWARE_OE_MALFORMED 2.400 2.400 2.775 3.000
 score RATWARE_OUTLOOK_NONAME 3.120 1.861 3.471 2.777
@@ -274,9 +233,6 @@
 score RCVD_DOUBLE_IP_LOOSE 0.163 0.001 0.293 0.001
 score RCVD_DOUBLE_IP_SPAM 3.506 3.455 4.070 3.690
 score RCVD_FAKE_HELO_DOTCOM 2.160 1.652 2.590 2.281
-score RCVD_HELO_IP_MISMATCH 3.200 3.200 3.700 4.000
-score RCVD_ILLEGAL_IP 1.585 0.234 1.813 0.288
-score RCVD_NUMERIC_HELO 1.440 1.253 1.665 1.500
 score REFINANCE_NOW 1.520 0.872 1.576 1.050
 score REFINANCE_YOUR_HOME 1.760 0.980 2.035 0.302
 score REMOVE_BEFORE_LINK 3.120 2.152 3.700 2.692
@@ -289,9 +245,7 @@
 score REPTO_QUOTE_YAHOO 2.126 2.181 3.428 2.124
 score RESISTANCE_IS_FUTILE 0.001 # n=0 n=1 n=2 n=3
 score RISK_FREE 0.001
-score ROUND_THE_WORLD_LOCAL 1.840 1.429 2.127 1.659
 score RUDE_HTML 2.344 1.362 2.902 2.041
-score SORTED_RECIPS 2.800 1.530 3.237 1.960
 score SPOOF_COM2COM 2.320 1.938 2.683 2.450
 score SPOOF_COM2OTH 0.537 0.001 0.001 0.001
 score SPOOF_NET2COM 1.806 1.106 1.870 1.541
@@ -305,20 +259,16 @@
 score SUBJECT_DRUG_GAP_X 2.262 2.334 2.447 2.401
 score SUBJECT_SEXUAL 2.160 2.160 2.498 2.684
 score SUBJ_2_NUM_PARENS 0.952 1.074 1.026 1.206
-score SUBJ_ALL_CAPS 1.049 1.166 0.459 0.997
 score SUBJ_AS_SEEN 1.511 0.001 1.757 0.001
 score SUBJ_BUY 1.311 0.116 0.701 0.255
 score SUBJ_DOLLARS 0.650 0.381 0.636 0.301
-score SUBJ_ILLEGAL_CHARS 3.360 3.360 3.978 4.279
 score SUBJ_YOUR_DEBT 1.405 0.577 1.757 1.106
 score SUBJ_YOUR_FAMILY 1.600 0.338 1.850 1.157
-score SUSPICIOUS_RECIPS 2.240 0.849 2.267 1.757
 score TO_MALFORMED 0.001
 score TO_RECIP_MARKER 1.044 1.033 1.168 1.038
 score TRACKER_ID 2.000 1.295 2.292 1.032
 score UNCLAIMED_MONEY 1.920 1.920 2.220 2.400
 score UNCLOSED_BRACKET 2.480 2.480 2.867 2.900
-score UNRESOLVED_TEMPLATE 1.520 0.687 1.923 1.324
 score UPPERCASE_50_75 0.206 0.591 0.001 0.368
 score UPPERCASE_75_100 1.394 1.040 0.809 1.371
 score URG_BIZ 0.395 0.269 0.699 0.351
@@ -345,6 +295,16 @@
 
 ###########################################################################
 
+ifplugin Mail::SpamAssassin::Plugin::BodyEval
+# <gen:mutable>
+score BLANK_LINES_80_90 0.272 0.107 0.810 0.001
+score MPART_ALT_DIFF 0.425 0.137 1.142 0.001
+score MPART_ALT_DIFF_COUNT 1.649 0.001 1.607 0.708
+# </gen:mutable>
+endif
+
+###########################################################################
+
 ifplugin Mail::SpamAssassin::Plugin::HTMLEval
 # <gen:mutable>
 score HTML_COMMENT_SAVED_URL 0.001 0.647 1.197 0.273
@@ -450,8 +410,92 @@
 
 ifplugin Mail::SpamAssassin::Plugin::HeaderEval
 # <gen:mutable>
+score DATE_IN_FUTURE_03_06 2.061 2.007 2.275 1.961
+score DATE_IN_FUTURE_06_12 1.680 1.498 1.883 1.668
+score DATE_IN_FUTURE_12_24 2.320 2.316 2.775 2.767
+score DATE_IN_FUTURE_24_48 2.080 2.080 2.498 2.688
+score DATE_IN_FUTURE_48_96 1.680 1.680 1.942 2.100
+score DATE_IN_FUTURE_96_XX 1.920 1.888 2.276 2.403
+score DATE_IN_PAST_03_06 0.736 0.001 1.122 0.478
+score DATE_IN_PAST_06_12 0.846 0.746 0.926 0.827
+score DATE_IN_PAST_12_24 0.960 0.881 1.036 1.247
+score DATE_IN_PAST_24_48 0.801 0.805 0.976 0.880
+score DATE_IN_PAST_96_XX 1.752 1.572 2.101 2.020
+score FORGED_GW05_RCVD 0.001 # n=0 n=1 n=2 n=3
+score FORGED_HOTMAIL_RCVD2 1.653 0.549 2.127 1.162
+score FORGED_YAHOO_RCVD 1.506 0.928 1.794 1.849
+score FROM_ILLEGAL_CHARS 3.280 3.280 3.792 4.100
+score HEADER_COUNT_CTYPE 1.336 1.440 1.665 1.800
+score HEAD_ILLEGAL_CHARS 1.652 1.519 1.796 1.606
+score LOCALPART_IN_SUBJECT 1.559 1.561 1.757 1.900
+score MISSING_HEADERS 0.001 0.189 0.001 0.001
+score MSGID_OUTLOOK_INVALID 2.080 2.027 2.405 2.600
+score RATWARE_EFROM 2.880 2.880 3.330 3.600
+score RATWARE_NAME_ID 3.280 3.280 3.792 4.100
 score ROUND_THE_WORLD 0.001 1.267 0.001 1.495
+score ROUND_THE_WORLD_LOCAL 1.840 1.429 2.127 1.659
+score SORTED_RECIPS 2.800 1.530 3.237 1.960
+score SUBJ_ALL_CAPS 1.049 1.166 0.459 0.997
+score SUBJ_ILLEGAL_CHARS 3.360 3.360 3.978 4.279
+score SUSPICIOUS_RECIPS 2.240 0.849 2.267 1.757
+score UNRESOLVED_TEMPLATE 1.520 0.687 1.923 1.324
 # </gen:mutable>
+
+# ok_locales
+score CHARSET_FARAWAY_HEADER 3.200
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::MIMEEval
+# <gen:mutable>
+score MIME_BAD_ISO_CHARSET 3.360 3.360 3.885 4.185
+score MIME_BASE64_BLANKS 0.001 0.001 0.184 0.224
+score MIME_BASE64_TEXT 2.048 1.522 2.749 1.885
+score MIME_HTML_MOSTLY 1.703 0.699 2.309 1.102
+score MIME_HTML_ONLY 0.414 0.001 0.389 0.001
+score MIME_QP_LONG_LINE 0.159 0.001 0.234 0.001
+score MISSING_MIME_HB_SEP 0.001
+# </gen:mutable>
+
+# ok_locales
+score CHARSET_FARAWAY 3.200
+
+# we dare you
+score HEAD_LONG 2.5
+score MISSING_HB_SEP 2.5
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::RelayEval
+# <gen:mutable>
+score ALL_TRUSTED -1.360 -1.440 -1.665 -1.800
+score FAKE_HELO_EMAIL_COM 1.440 1.440 1.665 1.335
+score FAKE_HELO_EXCITE 0.001
+score FAKE_HELO_LYCOS 0.001
+score FAKE_HELO_MAIL_COM 1.920 1.920 2.220 2.369
+score FAKE_HELO_MSN 2.080 2.060 2.358 2.509
+score FAKE_HELO_YAHOO_CA 1.186 1.353 1.466 1.599
+score NO_RDNS_DOTCOM_HELO 0.356 0.001 0.001 0.001
+score RCVD_HELO_IP_MISMATCH 3.200 3.200 3.700 4.000
+score RCVD_ILLEGAL_IP 1.585 0.234 1.813 0.288
+score RCVD_NUMERIC_HELO 1.440 1.253 1.665 1.500
+# </gen:mutable>
+
+# Informational rules about Received header parsing
+score NO_RELAYS -0.001
+score UNPARSEABLE_RELAY 0.001
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::URIEval
+# <gen:mutable>
+score HTTPS_IP_MISMATCH 1.920 1.920 2.220 2.400
 endif
 
 ###########################################################################
@@ -466,8 +510,6 @@
 # we dare you
 score FRAGMENTED_MESSAGE 2.5
 score HIGH_CODEPAGE_URI 2.5
-score MISSING_HB_SEP 2.5
-score HEAD_LONG 2.5
 
 # make the Bayes scores unmutable (as discussed in bug 4505)
 ifplugin Mail::SpamAssassin::Plugin::Bayes
@@ -483,13 +525,9 @@
 endif
 
 # Informational rules about Received header parsing
-score NO_RELAYS -0.001
 score NO_RECEIVED -0.001
-score UNPARSEABLE_RELAY 0.001
 
 # ok_locales
-score CHARSET_FARAWAY 3.200
-score CHARSET_FARAWAY_HEADER 3.200
 score HTML_CHARSET_FARAWAY 0.500
 score MIME_CHARSET_FARAWAY 2.450
 

Modified: spamassassin/branches/jm_re2c_hacks/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/active.list?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/active.list (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/active.list Fri Nov 24 09:29:40 2006
@@ -1,5 +1,5 @@
 # active ruleset list, automatically generated from http://ruleqa.spamassassin.org/
-# with results from: bb-doc bb-jm bb-zmi cthielen dos parkerm theo zmi
+# with results from: bb-doc bb-fredt bb-jm bb-zmi daf jm parkerm
 
 # tflags userconf
 ALL_TRUSTED
@@ -68,7 +68,7 @@
 CORRUPT_FROM_LINE_IN_HDRS
 
 # good enough
-CTYPE_8SPACE_GIF
+CTYPE_001C_A
 
 # good enough
 DC_GIF_MULTI_LARGO
@@ -77,6 +77,9 @@
 DC_GIF_UNO_LARGO
 
 # good enough
+DC_IMAGE001_GIF
+
+# good enough
 DC_IMAGE_SPAM_HTML
 
 # good enough
@@ -116,12 +119,24 @@
 DNS_FROM_SECURITYSAGE
 
 # good enough
+DOS_DOUBLE_SOTCK
+
+# good enough
+DOS_LET_GO_JOB
+
+# good enough
 DOS_MORTGAGE
 
 # good enough
+DOS_STOCK_O_PRICE
+
+# good enough
 DOS_TO_READ_STOCK
 
 # good enough
+DOS_TWO_MIS_STOCK
+
+# good enough
 DOS_YOUR_PLACE
 
 # good enough
@@ -136,6 +151,9 @@
 # good enough
 DYN_RDNS_SHORT_HELO_HTML
 
+# good enough
+DYN_RDNS_SHORT_HELO_IMAGE
+
 # tflags userconf
 ENV_AND_HDR_SPF_MATCH
 
@@ -146,12 +164,21 @@
 FB_OBFU_URI_COM
 
 # good enough
+FB_THOUS_PERSONAL
+
+# good enough
 FB_VALIUM_LEO2
 
 # good enough
 FB_VIAGRA_LEO3
 
 # good enough
+FB_YOURSELF_MASTER
+
+# good enough
+FH_DATE_IS_19XX
+
+# good enough
 FH_DATE_PAST_20XX
 
 # good enough
@@ -161,6 +188,9 @@
 FH_FROM_TO_PERC
 
 # good enough
+FH_HELO_GMAILSMTP
+
+# good enough
 FM_CUSTOMLOGODSGNc
 
 # tflags userconf
@@ -215,7 +245,10 @@
 HASHCASH_HIGH
 
 # good enough
-HDR_ORDER_FTSDMCXX
+HDR_ORDER_FTSDMCXX_001C
+
+# good enough
+HDR_ORDER_FTSDMCXX_BAT
 
 # tflags userconf
 HEAD_LONG
@@ -224,6 +257,9 @@
 HIGH_CODEPAGE_URI
 
 # good enough
+HS_BODY_UPLOADED_SOFTWARE
+
+# good enough
 HS_DRUG_DOLLAR_1
 
 # good enough
@@ -248,8 +284,14 @@
 HS_PHARMA_1
 
 # good enough
+HS_SUBJ_NEW_SOFTWARE
+
+# good enough
 HS_SUBJ_ONLINE_PHARMACEUTICAL
 
+# good enough
+HS_UPLOADED_SOFTWARE
+
 # tflags userconf
 HTML_CHARSET_FARAWAY
 
@@ -260,6 +302,9 @@
 JM_RCVD_QMAILV1
 
 # good enough
+JM_TORA_XM
+
+# good enough
 KAM_STOCKOTC
 
 # good enough
@@ -272,12 +317,18 @@
 KAM_STOCKTIP21
 
 # good enough
+KAM_STOCKTIP4
+
+# good enough
 KAM_STOCKTIP6
 
 # good enough
 KAM_STOCKTIP8
 
 # good enough
+L_SPAM_TOOL_13
+
+# good enough
 MID_14DIGITS_HEX
 
 # good enough
@@ -286,12 +337,18 @@
 # good enough
 MID_MJW_STOX
 
+# good enough
+MID_START_001C_A8C0_2
+
 # tflags userconf
 MIME_CHARSET_FARAWAY
 
 # tflags userconf
 MISSING_HB_SEP
 
+# good enough
+MOLE_28001106
+
 # tflags net
 NO_DNS_FOR_FROM
 
@@ -500,14 +557,26 @@
 RCVD_IN_XBL
 
 # good enough
+RCVD_LSO_SND
+
+# good enough
 RCVD_MAIL_COM
 
+# good enough
+RDNS_DYNAMIC
+
 # tflags net
 ROUND_THE_WORLD
 
 # good enough
+SB_GIF_AND_NO_URIS
+
+# good enough
 SHORT_HELO_AND_INLINE_IMAGE
 
+# good enough
+SPAMMY_XMAILER
+
 # tflags net
 SPF_FAIL
 
@@ -532,6 +601,18 @@
 # tflags net
 SPF_SOFTFAIL
 
+# good enough
+STOCK_IMG_CTYPE
+
+# good enough
+STOCK_IMG_HDR_FROM
+
+# good enough
+STOCK_IMG_HTML
+
+# good enough
+STOCK_IMG_OUTLOOK
+
 # tflags userconf
 SUBJECT_IN_BLACKLIST
 
@@ -560,9 +641,15 @@
 TVD_DEAR_HOMEOWNER
 
 # good enough
+TVD_DOLLARS_US
+
+# good enough
 TVD_EB_PHISH
 
 # good enough
+TVD_FINGER_02
+
+# good enough
 TVD_FLOAT_GENERAL
 
 # good enough
@@ -572,7 +659,7 @@
 TVD_FUZZY_FINANCE
 
 # good enough
-TVD_FUZZY_FIXED_RATE
+TVD_FUZZY_MICROCAP
 
 # good enough
 TVD_FUZZY_PHARMACEUTICAL
@@ -587,18 +674,15 @@
 TVD_FW_GRAPHIC_ID3_2
 
 # good enough
-TVD_FW_GRAPHIC_NAME_LONG
-
-# good enough
-TVD_FW_GRAPHIC_NAME_MID
-
-# good enough
 TVD_HEAD_KERNEL
 
 # good enough
 TVD_INCREASE_SIZE
 
 # good enough
+TVD_LINK_SAVE
+
+# good enough
 TVD_NOT_SATISFIED
 
 # good enough
@@ -608,10 +692,13 @@
 TVD_PH_REC
 
 # good enough
+TVD_PH_SUBJ_ACCOUNTS_POST
+
+# good enough
 TVD_PH_SUBJ_META
 
 # good enough
-TVD_PH_SUBJ_META_ALL
+TVD_PH_SUBJ_SEC_MEASURES
 
 # good enough
 TVD_PH_SUBJ_UPDATE
@@ -635,67 +722,67 @@
 TVD_RATWARE_MSGID_02
 
 # good enough
-TVD_SECTION
+TVD_RCVD_IP
 
 # good enough
-TVD_SINGLE_SPAN_DIV
+TVD_RCVD_IP4
 
 # good enough
-TVD_SPACED_SUBJECT_WORD3
+TVD_RCVD_SINGLE
 
 # good enough
-TVD_STOCK1
+TVD_SECTION
 
 # good enough
-TVD_SUBJ_ACC_NUM
+TVD_SINGLE_SPAN_DIV
 
 # good enough
-TVD_SUBJ_APPR_LOAN
+TVD_SPACED_SUBJECT_WORD3
 
 # good enough
-TVD_SUBJ_END_STAR
+TVD_SPACED_SUBJECT_WORD5
 
 # good enough
-TVD_SUBJ_FINGER_03
+TVD_STOCK1
 
 # good enough
-TVD_UA_FOSTERING
+TVD_SUBJ_ACC_NUM
 
 # good enough
-TVD_VISIT_PHARMA
+TVD_SUBJ_APPR_LOAN
 
 # good enough
-TVD_VIS_HIDDEN
+TVD_SUBJ_END_STAR
 
 # good enough
-DC_IMAGE001_GIF
+TVD_SUBJ_FINGER_03
 
 # good enough
-DOS_DOUBLE_SOTCK
+TVD_SUBJ_OWE
 
 # good enough
-DOS_LET_GO_JOB
+TVD_SUBJ_WIPE_DEBT
 
 # good enough
-DOS_STOCK_O_PRICE
+TVD_UA_FOSTERING
 
 # good enough
-DOS_TWO_MIS_STOCK
+TVD_VISIT_PHARMA
 
 # good enough
-FH_DATE_IS_19XX
+TVD_VIS_HIDDEN
 
 # good enough
-HS_BODY_UPLOADED_SOFTWARE
+CTYPE_1SPACE_GIF
 
 # good enough
-HS_SUBJ_NEW_SOFTWARE
+JM_ATTACHEMENT
 
 # good enough
-HS_UPLOADED_SOFTWARE
+KAM_STOCKTIP20
 
 # good enough
-KAM_STOCKTIP4
+RCVD_CORRUPT_ESMTP
 
 # tflags net
 RCVD_IN_DSBL
@@ -716,103 +803,10 @@
 RCVD_IN_XBL
 
 # good enough
-RCVD_LSO_SND
-
-# good enough
-RDNS_DYNAMIC
-
-# good enough
-SB_GIF_AND_NO_URIS
-
-# good enough
-TVD_DOLLARS_US
-
-# good enough
-TVD_FINGER_02
-
-# good enough
-TVD_LINK_SAVE
-
-# good enough
-TVD_PH_SUBJ_SEC_MEASURES
-
-# good enough
-TVD_SPACED_SUBJECT_WORD5
-
-# good enough
-TVD_SUBJ_FINGER_04
-
-# good enough
-TVD_SUBJ_OWE
-
-# good enough
-TVD_SUBJ_WIPE_DEBT
-
-# good enough
-XMAILER_MIMEOLE_OL_09BB4
-
-# good enough
-XMAILER_MIMEOLE_OL_20C99
-
-# good enough
-XMAILER_MIMEOLE_OL_3D61D
-
-# good enough
-XMAILER_MIMEOLE_OL_4B815
-
-# good enough
-XMAILER_MIMEOLE_OL_56DF6
-
-# good enough
-XMAILER_MIMEOLE_OL_58CB5
+TVD_FW_GRAPHIC_ID1
 
 # good enough
-XMAILER_MIMEOLE_OL_61ABB
-
-# good enough
-XMAILER_MIMEOLE_OL_62E6A
-
-# good enough
-XMAILER_MIMEOLE_OL_7533E
-
-# good enough
-XMAILER_MIMEOLE_OL_83BF7
-
-# good enough
-XMAILER_MIMEOLE_OL_91287
-
-# good enough
-XMAILER_MIMEOLE_OL_95A76
-
-# good enough
-XMAILER_MIMEOLE_OL_9B607
-
-# good enough
-XMAILER_MIMEOLE_OL_AA207
-
-# good enough
-XMAILER_MIMEOLE_OL_B4A6F
-
-# good enough
-XMAILER_MIMEOLE_OL_BC7E6
-
-# good enough
-XMAILER_MIMEOLE_OL_CAC8F
-
-# good enough
-XMAILER_MIMEOLE_OL_CE8A4
-
-# good enough
-XMAILER_MIMEOLE_OL_F3B05
-
-# good enough
-XMAILER_MIMEOLE_OL_F8BA4
-
-# good enough
-XM_OL_48072300
-
-# good enough
-XM_OL_49631700
+TVD_FW_GRAPHIC_ID2
 
 # tflags userconf
 UNPARSEABLE_RELAY
@@ -1007,12 +1001,15 @@
 XMAILER_MIMEOLE_OL_07794
 
 # good enough
-XMAILER_MIMEOLE_OL_12589
+XMAILER_MIMEOLE_OL_09BB4
 
 # good enough
 XMAILER_MIMEOLE_OL_1ECD5
 
 # good enough
+XMAILER_MIMEOLE_OL_20C99
+
+# good enough
 XMAILER_MIMEOLE_OL_20CC2
 
 # good enough
@@ -1034,9 +1031,15 @@
 XMAILER_MIMEOLE_OL_3AC1D
 
 # good enough
+XMAILER_MIMEOLE_OL_3D61D
+
+# good enough
 XMAILER_MIMEOLE_OL_465CD
 
 # good enough
+XMAILER_MIMEOLE_OL_4B815
+
+# good enough
 XMAILER_MIMEOLE_OL_4BF4C
 
 # good enough
@@ -1046,6 +1049,9 @@
 XMAILER_MIMEOLE_OL_4F240
 
 # good enough
+XMAILER_MIMEOLE_OL_58CB5
+
+# good enough
 XMAILER_MIMEOLE_OL_5B79A
 
 # good enough
@@ -1058,24 +1064,39 @@
 XMAILER_MIMEOLE_OL_60256
 
 # good enough
-XMAILER_MIMEOLE_OL_6554A
+XMAILER_MIMEOLE_OL_61ABB
+
+# good enough
+XMAILER_MIMEOLE_OL_62E6A
 
 # good enough
-XMAILER_MIMEOLE_OL_681AD
+XMAILER_MIMEOLE_OL_6554A
 
 # good enough
 XMAILER_MIMEOLE_OL_72641
 
 # good enough
+XMAILER_MIMEOLE_OL_7533E
+
+# good enough
 XMAILER_MIMEOLE_OL_812FF
 
 # good enough
+XMAILER_MIMEOLE_OL_83BF7
+
+# good enough
 XMAILER_MIMEOLE_OL_8627E
 
 # good enough
 XMAILER_MIMEOLE_OL_8E893
 
 # good enough
+XMAILER_MIMEOLE_OL_91287
+
+# good enough
+XMAILER_MIMEOLE_OL_9B607
+
+# good enough
 XMAILER_MIMEOLE_OL_9B90B
 
 # good enough
@@ -1097,6 +1118,9 @@
 XMAILER_MIMEOLE_OL_B9B11
 
 # good enough
+XMAILER_MIMEOLE_OL_BC7E6
+
+# good enough
 XMAILER_MIMEOLE_OL_C65FA
 
 # good enough
@@ -1106,43 +1130,37 @@
 XMAILER_MIMEOLE_OL_C9068
 
 # good enough
-XMAILER_MIMEOLE_OL_CF0C0
-
-# good enough
-XMAILER_MIMEOLE_OL_D03AB
-
-# good enough
-XMAILER_MIMEOLE_OL_EF20B
+XMAILER_MIMEOLE_OL_CAC8F
 
 # good enough
-XMAILER_MIMEOLE_OL_EF222
+XMAILER_MIMEOLE_OL_CE8A4
 
 # good enough
-XMAILER_MIMEOLE_OL_F475E
+XMAILER_MIMEOLE_OL_CF0C0
 
 # good enough
-XMAILER_MIMEOLE_OL_F6D01
+XMAILER_MIMEOLE_OL_D03AB
 
 # good enough
-XMAILER_MIMEOLE_OL_FF5C8
+XMAILER_MIMEOLE_OL_EF20B
 
 # good enough
-XM_OL_28001441
+XMAILER_MIMEOLE_OL_EF222
 
 # good enough
-XM_OL_29196600
+XMAILER_MIMEOLE_OL_F3B05
 
 # good enough
-XM_OL_29196700
+XMAILER_MIMEOLE_OL_F475E
 
 # good enough
-XM_OL_41332400
+XMAILER_MIMEOLE_OL_F6D01
 
 # good enough
-XM_OL_48071700
+XMAILER_MIMEOLE_OL_F8BA4
 
 # good enough
-ZMIde_EBAYJOBSURI
+XMAILER_MIMEOLE_OL_FF5C8
 
 # good enough
 ZMIde_SEXUALEXPL1

Modified: spamassassin/branches/jm_re2c_hacks/rules/v320.pre
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/v320.pre?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/v320.pre (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/v320.pre Fri Nov 24 09:29:40 2006
@@ -45,4 +45,4 @@
 # loadplugin Mail::SpamAssassin::Plugin::RabinKarpBody
 
 # P595Body - EXPERIMENTAL speedup plugin, using bleadperl RE optimizations
-# loadplugin Mail::SpamAssassin::Plugin::P595Body
+loadplugin Mail::SpamAssassin::Plugin::P595Body

Modified: spamassassin/branches/jm_re2c_hacks/sa-compile.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/sa-compile.raw?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/sa-compile.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/sa-compile.raw Fri Nov 24 09:29:40 2006
@@ -137,7 +137,7 @@
 $spamtest->{bases_can_use_alternations} = 0;
 $spamtest->{bases_can_use_quantifiers} = 0;
 $spamtest->{bases_can_use_char_classes} = 0;
-$spamtest->{bases_split_out_alternations} = 0;
+$spamtest->{bases_split_out_alternations} = 1;
 
 $spamtest->init(1);
 

Modified: spamassassin/branches/jm_re2c_hacks/sa-learn.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/sa-learn.raw?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/sa-learn.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/sa-learn.raw Fri Nov 24 09:29:40 2006
@@ -617,6 +617,9 @@
 mistake will be corrected.  SpamAssassin will automatically 'forget' the
 previous indications.
 
+Users of C<spamd> who wish to perform training remotely, over a network,
+should investigate the C<spamc -L> switch.
+
 =head1 OPTIONS
 
 =over 4

Modified: spamassassin/branches/jm_re2c_hacks/sa-update.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/sa-update.raw?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/sa-update.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/sa-update.raw Fri Nov 24 09:29:40 2006
@@ -383,6 +383,9 @@
 # Use a temporary directory for all update channels
 my $UPDTmp;
 
+# we only need to lint the site pre files once
+my $site_pre_linted = 0;
+
 # Go ahead and loop through all of the channels
 foreach my $channel (@channels) {
   dbg("channel: attempting channel $channel");
@@ -450,6 +453,30 @@
     next;
   }
 
+  # we need a directory we control that we can use to aviod loading any rules
+  # when we lint the site pre files, we might as well use the channel temp dir
+  dbg("channel: preparing temp directory for new channel");
+  if (!$UPDTmp) {
+    $UPDTmp = Mail::SpamAssassin::Util::secure_tmpdir();
+    dbg("generic: update tmp directory $UPDTmp");
+  }
+  elsif (!clean_update_dir($UPDTmp)) {
+    die "channel: attempt to clean update dir failed, aborting";
+  }
+
+  # lint the site pre files (that will be used when lint checking the channel)
+  # before downloading the channel update
+  unless ($site_pre_linted) {
+    dbg("generic: lint checking site pre files once before attempting channel updates");
+    unless (lint_check_dir(File::Spec->catfile($UPDTmp, "doesnotexist"))) {
+      dbg("generic: lint of site pre files failed, cannot continue"); 
+      $exit = 2;
+      last;
+    }
+    dbg("generic: lint check of site pre files succeeded, continuing with channel updates");
+    $site_pre_linted = 1;
+  }
+
   # Read in the MIRRORED.BY file if it exists
   if (open(MIRBY, $mirby_path)) {
     local $/ = undef;
@@ -692,15 +719,6 @@
   # OK, we're all validated at this point, install the new version
   dbg("channel: file verification passed, testing update");
 
-  dbg("channel: preparing temp directory for new channel");
-  if (!$UPDTmp) {
-    $UPDTmp = Mail::SpamAssassin::Util::secure_tmpdir();
-    dbg("generic: update tmp directory $UPDTmp");
-  }
-  elsif (!clean_update_dir($UPDTmp)) {
-    die "channel: attempt to clean update dir failed, aborting";
-  }
-
   dbg("channel: extracting archive");
   if (!taint_safe_archive_extract($UPDTmp, $content_file)) {
     channel_failed("channel: archive extraction failed");
@@ -1246,17 +1264,18 @@
   # Logger setup), we can't change the debug level here to only include
   # "config" or otherwise be more terse. :(
   my $spamtest = new Mail::SpamAssassin( {
-    rules_filename      => $dir,
-    site_rules_filename => File::Spec->catfile($dir, "doesnotexist"),
-    userprefs_filename  => File::Spec->catfile($dir, "doesnotexist"),
-
-    local_tests_only    => 1,
-    dont_copy_prefs     => 1,
-
-    PREFIX              => $PREFIX,
-    DEF_RULES_DIR       => $DEF_RULES_DIR,
-    LOCAL_RULES_DIR     => $LOCAL_RULES_DIR,
-    LOCAL_STATE_DIR     => $LOCAL_STATE_DIR,
+    rules_filename       => $dir,
+    site_rules_filename  => $LOCAL_RULES_DIR,
+    ignore_site_cf_files => 1,
+    userprefs_filename   => File::Spec->catfile($dir, "doesnotexist"),
+
+    local_tests_only     => 1,
+    dont_copy_prefs      => 1,
+
+    PREFIX               => $PREFIX,
+    DEF_RULES_DIR        => $DEF_RULES_DIR,
+    LOCAL_RULES_DIR      => $LOCAL_RULES_DIR,
+    LOCAL_STATE_DIR      => $LOCAL_STATE_DIR,
   });
 
   # need to kluge disabling bayes since it may try to expire the DB, and
@@ -1436,6 +1455,10 @@
 installed successfully if --checkonly was not specified.
 
 An exit code of C<1> means no fresh updates were available.
+
+An exit code of C<2> means that at least one update is available but that a
+lint check of the site pre files failed.  The site pre files must pass a lint
+check before any updates are attempted.
 
 An exit code of C<4> or higher, indicates that errors occurred while
 attempting to download and extract updates.

Modified: spamassassin/branches/jm_re2c_hacks/t/meta.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/t/meta.t?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/t/meta.t (original)
+++ spamassassin/branches/jm_re2c_hacks/t/meta.t Fri Nov 24 09:29:40 2006
@@ -39,7 +39,7 @@
   unlink $output || die;
   %rules = ();
   %scores = ();
-  if (system("$prefix/masses/parse-rules-for-masses -o $output -d \"$prefix/rules\" -s $scoreset -x")) {
+  if (system("$prefix/build/parse-rules-for-masses -o $output -d \"$prefix/rules\" -s $scoreset -x")) {
     warn "parse-rules-for-masses failed!";
   }
   eval {