You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/04/30 17:07:00 UTC

svn commit: r398355 - /spamassassin/trunk/masses/rule-qa/corpus-hourly

Author: jm
Date: Sun Apr 30 08:06:58 2006
New Revision: 398355

URL: http://svn.apache.org/viewcvs?rev=398355&view=rev
Log:
better way to expose metadata for log reporting; via XML in hit-freqs reports

Modified:
    spamassassin/trunk/masses/rule-qa/corpus-hourly

Modified: spamassassin/trunk/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus-hourly?rev=398355&r1=398354&r2=398355&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/trunk/masses/rule-qa/corpus-hourly Sun Apr 30 08:06:58 2006
@@ -19,8 +19,6 @@
 $opt_override ||= '';
 $opt_tag ||= 'n';       # nightly is the default
 
-my $LOGS_REPORT_FILE = "/var/www/ruleqa.spamassassin.org/reports/logs_report.txt";
-
 use File::Path;
 use File::Copy;
 use Time::ParseDate;
@@ -38,6 +36,9 @@
 my $configuration = "$ENV{HOME}/.corpus";
 my %opt;
 my %revision = ();
+my %filesize = ();
+my %dateline = ();
+my %mtime = ();
 my %logs_by_daterev = ();
 my %is_net_daterev = ();
 my %time = ();
@@ -137,8 +138,6 @@
   @files = sort readdir(CORPUS);
   closedir(CORPUS);
 
-  open_logs_report();
-
   @files = grep {
     /^(?:spam|ham)-(?:net-)?[-\w]+\.log$/ && -f "$corpusdir/$_" && -M _ < 10 
   } @files;
@@ -161,14 +160,18 @@
       }
       elsif ($line =~ m/^# Date:\s*(\S+)/) {
         # a better way to do the above.  TODO: parse it instead
+        $dateline{$file} = $1;
       }
       elsif ($line =~ m/^# SVN revision:\s*(\S+)/) {
-        my $rev = $1;
-        $revision{$file} = $rev;
+        $revision{$file} = $1;
       }
     }
     close(FILE);
 
+    my @s = stat("$corpusdir/$file");
+    $filesize{$file} = $s[7];
+    $mtime{$file} = $s[9];
+
     if (!defined $time{$file}) {
       warn "$corpusdir/$file: no time found, ignored\n"; next;
     }
@@ -191,59 +194,7 @@
     else {
       print "$corpusdir/$file: rev=$daterev time=$time{$file} (set 0)\n";
     }
-
-    add_to_logs_report({
-              file => "$corpusdir/$file",
-              daterev => $daterev,
-              timet => $time{$file},
-              isnet => $is_net_daterev{$daterev},
-              rev => $revision{$file},
-              headers => $headers
-            });
   }
-
-  complete_logs_report();
-}
-
-sub open_logs_report {
-  open (LOGREP, ">>$LOGS_REPORT_FILE")
-        or die "cannot write to $LOGS_REPORT_FILE";
-
-  my $now = localtime time;
-
-  print LOGREP qq{
-
----------------------------------------------------------------------------
-Reading logs at $now:
-
-  };
-}
-
-sub add_to_logs_report {
-  my ($o) = @_;
-
-  $o->{file} =~ /(?:ham|spam)(?:-net)?-(\S+)\.log/
-            or return;
-
-  my $who = $1;
-  my $ls = `ls -lL $o->{file}`;
-
-  my $hdrs = $o->{headers};
-  $hdrs =~ s/^\# /    /gm;
-  $hdrs =~ s/^(.{0,70}).*?$/$1 [...]/gm;
-
-  print LOGREP qq{
-Contributor: $who:
-    $ls
-    DateRev: $o->{daterev}
-$hdrs
-
-  };
-
-}
-
-sub complete_logs_report {
-  close LOGREP;
 }
 
 sub sort_all {
@@ -390,11 +341,8 @@
     open(OUT, "> $tmpfname") or warn "cannot write to $tmpfname";
     print OUT "# ham results used for $daterev $class $age: " . join(" ", @ham) . "\n";
     print OUT "# spam results used for $daterev $class $age: " . join(" ", @spam) . "\n";
-    for (@ham) {
-      print OUT "# $_ was at r$revision{$_}\n";
-    }
-    for (@spam) {
-      print OUT "# $_ was at r$revision{$_}\n";
+    for (@ham, @spam) {
+      print OUT "# ".log_metadata_xml($daterev, $_);
     }
 
     push (@tmps, $tmpfname);
@@ -522,5 +470,25 @@
   $output_revpath =~ s/\//-/;       # looks nicer
 
   return $dir;
+}
+
+sub log_metadata_xml {
+  my $daterev = shift;
+  my $f = shift;
+
+  # this is extracted into the info.xml file later by the gen_info_xml script
+
+  my $str = qq{
+    <mclogmd>
+      <daterev>$daterev</daterev>
+      <rev>$revision{$f}</rev>
+      <fsize>$filesize{$f}</fsize>
+      <mcstartdate>$dateline{$f}</mcstartdate>
+      <mtime>$mtime{$f}</mtime>
+    </mclogmd>
+  };
+
+  $str =~ s/\s+/ /gs;  # on a single line please
+  return $str;
 }