You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by pd...@apache.org on 2019/06/09 14:49:02 UTC

svn commit: r1860907 - /spamassassin/trunk/build/mkupdates/listpromotable

Author: pds
Date: Sun Jun  9 14:49:02 2019
New Revision: 1860907

URL: http://svn.apache.org/viewvc?rev=1860907&view=rev
Log:
Use last-net to ensure net rules are eligable for promotion

Modified:
    spamassassin/trunk/build/mkupdates/listpromotable

Modified: spamassassin/trunk/build/mkupdates/listpromotable
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/mkupdates/listpromotable?rev=1860907&r1=1860906&r2=1860907&view=diff
==============================================================================
--- spamassassin/trunk/build/mkupdates/listpromotable (original)
+++ spamassassin/trunk/build/mkupdates/listpromotable Sun Jun  9 14:49:02 2019
@@ -32,6 +32,57 @@ my $cgi_url = "https://ruleqa.spamassass
 my @doc = ();
 my $cache = 'ruleqa.cache.';
 my $submitters = '';
+my $last_net;
+
+if (!$FROM_CACHE || !-f "${cache}net" || (-M "${cache}net") > 0.5) {
+  my $neturl = $cgi_url."last-net?xml=1";
+  warn "HTTP get: $neturl\n";
+  $last_net = get ($neturl);
+  if (!$last_net) {
+    die "HTTP get failed: last-net\n";
+  }
+  if ($MAKE_CACHE) {
+    open(O, ">${cache}net"); print O $last_net; close O;
+  }
+} else {
+  open(I, "<${cache}net") or die; $last_net = join('',<I>); close I;
+}
+
+if ($last_net =~ m{
+          <span\s+class="daterev_masscheck_description\smcviewing"
+          .{0,400}
+          <span\s+class="mcsubmitters">\s*(.*?)\s*</span>
+        }sx)
+  {
+
+  my $netsubs = $1;
+  ($submitters ne '') and $submitters .= "; ";
+  $submitters .= "last-net: $netsubs";
+} else {
+  loghtml_die("no 'mcviewing', 'mcsubmitters' microformats for last-net");
+}
+
+my $netlist;
+while ($last_net =~ m!<rule>(.*?)</rule>!xg) {
+  my $xml = $1;
+  my $obj = { };
+
+  while ($xml =~ s!<([A-Za-z0-9_]+)>(.*?)</\1>!!) {
+    $obj->{$1} = $2;
+  }
+  while ($xml =~ s!<([A-Za-z0-9_]+)\s+esc=["']1["']>(.*?)</\1>!!) {
+    $obj->{$1} = uri_unescape($2);
+  }
+
+  my $name = $obj->{test};
+  $obj->{detailhref} = $cgi_url.$obj->{detailhref};
+
+  $netlist->{$name} = $obj;
+}
+
+if (!scalar keys %{$netlist}) {
+  loghtml_die("no rules found? on last-net");
+}
 
 my $url;        # tracks the last day used
 my $dayoffset = 0;
@@ -80,7 +131,7 @@ with_new_offset:
     # it's the weekly --net run.  That generally contains a much
     # smaller set of logs (since it takes longer to run mass-check --net)
     # so the results are untrustworthy.
-    if (($daysubs =~ /(?:^|\s)net-/) && ((localtime(time))[6])) {
+    if ($daysubs =~ /(?:^|\s)net-/) {
       warn "day $day contains a --net mass-check! offsetting by an extra day\n";
       $dayoffset++; goto with_new_offset;
     }
@@ -179,6 +230,47 @@ if (scalar @spcs < 2) {
   die "not generating results; less than 3 submitter results available!\n";
 }
 
+foreach my $netrule (sort keys %{$netlist}) {
+  my $name = $netrule;
+  my $notes = '';
+
+  my $no_t = $name;
+  if ($no_t =~ s/^T_//) {
+    if (defined $mailsa->{conf}->{scores}->{$no_t}) {
+      $name = $no_t;
+    }
+  }
+
+  # now that it's ok to have sandbox rules without a T_ prefix,
+  # "T_" prefix implies "tflags nopublish"
+  next if ($name =~ /^T_/);
+
+  # ignore rules that don't exist (if they have a desc or score,
+  # they exist according to the Conf parser)
+  next unless ($mailsa->{conf}->{descriptions}->{$name}
+        || $mailsa->{conf}->{scores}->{$name});
+
+  my $tfs = $mailsa->{conf}->{tflags}->{$name};
+
+  # "nopublish" tflags
+  if ($tfs) {
+    next if ($tfs =~ /\bnopublish\b/);
+  }
+
+  next if ($mailsa->{conf}->{testrules}->{$name});
+
+  if ($tfs && $tfs =~ /\b(net)\b/) {
+    $notes = "tflags ".$1;
+    goto publish;
+  }
+
+  next;
+publish:
+
+  print "\n# $notes\n$name\n";
+
+}
+
 # base most of our decisions off day 1 (last night's mass-checks)
 foreach my $plistkey (sort keys %{$plist->[1]}) {
   my $name = $plistkey;