You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/06/28 22:08:16 UTC

svn commit: r202274 - in /spamassassin/trunk/masses: mass-check-results-to-mbox mboxget

Author: jm
Date: Tue Jun 28 13:08:15 2005
New Revision: 202274

URL: http://svn.apache.org/viewcvs?rev=202274&view=rev
Log:
masses consolidation: refactor mass-check-results-to-mbox functionality into mboxget.  note: this means that mboxget now annotates its output with the original message's mass-check ID in a new 'X-Mass-Check-Id:' header; use '-noannotate' switch to avoid that.  Otherwise perfectly compatible with both scripts.

Modified:
    spamassassin/trunk/masses/mass-check-results-to-mbox
    spamassassin/trunk/masses/mboxget

Modified: spamassassin/trunk/masses/mass-check-results-to-mbox
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/mass-check-results-to-mbox?rev=202274&r1=202273&r2=202274&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check-results-to-mbox (original)
+++ spamassassin/trunk/masses/mass-check-results-to-mbox Tue Jun 28 13:08:15 2005
@@ -1,148 +1,2 @@
 #!/usr/bin/perl
-#
-# very handy for e.g.:
-#
-#   grep SUBJECT_FREQ spam.log | ./mass-check-results-to-mbox | grep Subject:
-#
-# <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# </...@LICENSE>
-
-my $grep = undef;
-my $annotate = 1;
-while ($#ARGV >= 0) {
-  $_ = $ARGV[0]; shift;
-  if ($_ eq '-grep') { $grep = $ARGV[0]; shift; }
-  if ($_ eq '-noannotate') { $annotate = 0; }
-}
-
-while (<>) {
-  s/^[^\s:]+://;  # filenames, from "grep foo *"
-
-  next if /^#/;
-  /^.\s+-?\d+\s+(\S+) / or next;
-  my $mail = $1;
-
-  if ($mail =~ /^(\S+):</) {
-    my $msgp = find_in_mailbox ($mail);
-    if (defined $msgp) {
-      $annotate and unshift (@$msgp, "X-Mass-Check-Id: $mail\n");
-      handle ($msgp);
-    } else {
-      mywarn ("failed to find message for $mail\n");
-    }
-
-  } else {
-    if ($mail =~ /\.gz$/) {
-      open (IN, "gunzip -cd $mail |") or mywarn ("gunzip $mail failed: $@");
-    } elsif ($mail =~ /\.bz2$/) {
-      open (IN, "bzip2 -cd $mail |") or mywarn ("bunzip2 $mail failed: $@");
-    } else {
-      open (IN, "<$mail") or mywarn ("open $mail failed: $@");
-    }
-    my @msg = (<IN>); close IN;
-
-    while (scalar @msg > 0 &&
-	$msg[0] =~ /^(?:From|X-Mass-Check-Id:) /)
-    {
-      shift @msg;
-    }
-    $annotate and unshift (@msg, "X-Mass-Check-Id: $mail\n");
-
-    handle (\@msg);
-  }
-}
-
-###########################################################################
-
-sub find_in_mailbox {
-  my ($mail) = @_;
-  $mail =~ /^(\S+):</;
-  $folder = $1; my $wantid = $_;
-
-  if (defined $CURRENT_MBOX_OPEN && $folder eq $CURRENT_MBOX_OPEN) {
-    # try from current position first
-    my $msgp = mbox_search($mail, $folder);
-    if (defined ($msgp->[0])) { return $msgp; }
-  }
-
-  # failed. have to (re-|)open.
-  if ($folder =~ /\.gz$/) {
-    open (MBOX, "gunzip -cd $folder |") or mywarn ("gunzip $folder failed: $@");
-  } elsif ($folder =~ /\.bz2$/) {
-    open (MBOX, "bzip2 -cd $folder |") or mywarn ("bunzip2 $folder failed: $@");
-  } else {
-    open (MBOX, "<$folder") or mywarn ("open $folder failed: $@");
-  }
-
-  $CURRENT_MBOX_OPEN = $folder;
-  while (<MBOX>) { /^From \S+ +... ... / and last; }
-  my $msgp = mbox_search($mail, $folder);
-  return $msgp;
-}
-
-sub mbox_search {
-  my ($mail, $folder) = @_;
-  my $wantid = $mail;
-  
-  my $count = 0;
-  my $host  = $ENV{'HOSTNAME'} || $ENV{'HOST'} || `hostname` || 'localhost';
-
-  while (!eof MBOX) {
-    my @msg = ();
-    my $msgid = undef;
-    my $in_header = 1;
-    $count++;
-
-    while (<MBOX>) {
-      if (/^$/ && $in_header) {
-        $in_header = 0 ;
-
-        if (!defined ($msgid)) {
-          $msgid = sprintf('<no-msgid-in-msg-%06d@%s.masses.spamassasin.org>', $count, $host);
-          push (@msg, "Message-Id: $msgid\n");
-        }
-      }
-      if ($in_header) {
-        /^Message-Id: (.*)\s*$/i and $msgid = $1;
-      }
-
-      /^From \S+ +... ... / and last;
-      push (@msg, $_);
-    }
-
-    $msgid = "$folder:$msgid";	# so we can find it again
-    $msgid =~ s/\s/_/gs;	# make safe
-
-    # print "JMD $wantid $msgid\n";
-
-    if ($wantid ne $msgid) { next; }
-    return \@msg;
-  }
-
-  close MBOX; $CURRENT_MBOX_OPEN = undef;
-}
-
-###########################################################################
-
-sub handle {
-  my $msgp = shift;
-  print STDOUT "From nobody\@nowhere  Wed Aug 21 12:41:07 2002\n", @$msgp, "\n";
-}
-
-sub mywarn {
-  warn @_;
-  if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
-}
+exec("./mboxget", @ARGV);

Modified: spamassassin/trunk/masses/mboxget
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/mboxget?rev=202274&r1=202273&r2=202274&view=diff
==============================================================================
--- spamassassin/trunk/masses/mboxget (original)
+++ spamassassin/trunk/masses/mboxget Tue Jun 28 13:08:15 2005
@@ -1,8 +1,12 @@
 #!/usr/bin/perl -w
 
-# mboxget - get a message from a mailbox
+# mboxget - get a message from a mailbox or maildir, from mass-check output
 #
-# usage: mboxget [mass-check-mbox-id ...]
+# usage: mboxget [-noannotate] [mass-check-mbox-or-file-id ...]
+#
+# example:
+#
+#   grep SUBJECT_FREQ spam.log | ./mboxget | grep Subject:
 #
 # <@LICENSE>
 # Copyright 2004 Apache Software Foundation
@@ -25,38 +29,89 @@
 my $prog = $0;
 $prog =~ s@.*/@@;
 
+sub mywarn;
+
+my $annotate = 1;
+while ($#ARGV >= 0) {
+  $_ = $ARGV[0]; shift;
+  if ($_ eq '-noannotate') { $annotate = 0; }
+  else { unshift @ARGV, $_; last; }
+}
+
 my @inputs;
 push @inputs, @ARGV;
 
 if (!@inputs) {
   while (<STDIN>) {
-    if (/^[Y.]\s+-?\d+\s+(\S+)\s+\S+/) {
+    s/^[^\s:]+://;  # filenames, from "grep foo *"
+
+    if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) {
       # mass-check format
-      push @inputs, $1;
+      handle_input($1);
     }
     else {
       next if /^#/;
       chomp;
-      push @inputs, $_;
+      handle_input($_);
     }
   }
 }
+exit;
 
-foreach my $where (@inputs) {
+sub handle_input {
+  my $where = shift;
   my ($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/);
-  open(INPUT, $file) || die("$prog: open $file failed: $!\n");
+
+  if ($file =~ /\.gz$/) {
+    open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!";
+  } elsif ($file =~ /\.bz2$/) {
+    open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!";
+  } else {
+    open (INPUT, "<$file") or mywarn "open $file failed: $!";
+  }
+
   if ($offset) {
-    seek(INPUT, $offset, 0) || die("$prog: seek $offset failed: $!\n");
+    # TODO: steal open-file caching code from old revisions of
+    # mass-check-results-to-mbox
+    if (!seek(INPUT, $offset, 0)) {
+      mywarn "$prog: seek $offset failed: $!\n";
+      close INPUT;
+      return;
+    }
   }
+
+  # read the message into @msg
   my $past = 0;
+  my @msg = ();
   while (<INPUT>) {
-    if ($past) {
+    if ($past && $offset) {
+      # only do this for mboxes
       last if substr($_,0,5) eq "From ";
     }
     else {
       $past = 1;
     }
-    print $_;
+    push (@msg, $_);
   }
   close INPUT;
+
+  # now chop off the leading headers that may have come from a previous
+  # run, or will interfere with insertion of the X-Mass-Check-Id hdr
+  my $fromline = "From nobody\@nowhere  Wed Jan  1 00:00:00 2000\n";
+  while (scalar @msg > 0 &&
+      $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /)
+  {
+    if ($msg[0] =~ /^From /) { $fromline = $msg[0]; }
+    shift @msg;
+  }
+
+  # and output
+  $annotate and unshift (@msg, "X-Mass-Check-Id: $where\n");
+  print $fromline, @msg, "\n";
+}
+
+sub mywarn {
+  warn @_;
+  if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
 }
+