You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2006/12/03 21:21:19 UTC

svn commit: r481873 - in /spamassassin/trunk: lib/Mail/SpamAssassin/ArchiveIterator.pm sa-learn.raw spamassassin.raw

Author: felicity
Date: Sun Dec  3 12:21:15 2006
New Revision: 481873

URL: http://svn.apache.org/viewvc?view=rev&rev=481873
Log:
bug 5145: better deal with STDIN in spamassassin and sa-learn, since ArchiveIterator doesn't deal with it so well

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/trunk/sa-learn.raw
    spamassassin/trunk/spamassassin.raw

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=481873&r1=481872&r2=481873
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Sun Dec  3 12:21:15 2006
@@ -238,18 +238,21 @@
 C<mbox> an mbox formatted file, or C<mbx> for an mbx formatted directory.
 
 C<detect> can also be used.  This assumes C<mbox> for any file whose path
-contains the pattern C</\.mbox/i>, C<file> for STDIN and anything that is
-not a directory, or C<directory> otherwise.
+contains the pattern C</\.mbox/i>, C<file> anything that is not a
+directory, or C<directory> otherwise.
 
 =item raw_location
 
-Path to file or directory.  Can be "-" for STDIN.  File globbing is allowed
-using the standard csh-style globbing (see C<perldoc -f glob>).  C<~> at the
-front of the value will be replaced by the C<HOME> environment variable.
-Escaped whitespace is protected as well.
+Path to file or directory.  File globbing is allowed using the
+standard csh-style globbing (see C<perldoc -f glob>).  C<~> at the
+front of the value will be replaced by the C<HOME> environment
+variable.  Escaped whitespace is protected as well.
 
 B<NOTE:> C<~user> is not allowed.
 
+B<NOTE 2:> C<-> is not allowed as a raw location.  To have
+ArchiveIterator deal with STDIN, generate a temp file.
+
 =back
 
 =cut
@@ -459,6 +462,11 @@
       next;
     }
 
+    if ($rawloc eq '-') {
+      warn 'archive-iterator: raw location "-" is not supported';
+      next;
+    }
+
     # use ham by default, things like "spamassassin" can't specify the type
     $class = substr($class, 0, 1) || 'h';
 
@@ -482,8 +490,7 @@
           # filename indicates mbox
           $format = 'mbox';
         } 
-	elsif ($location eq '-' || !(-d $location)) {
-	  # stdin is considered a file if not passed as mbox
+	elsif (!(-d $location)) {
           $format = 'file';
 	}
 	else {
@@ -687,7 +694,7 @@
   my ($self, $class, $folder, $bkfunc) = @_;
   my @files;
 
-  if ($folder ne '-' && -d $folder) {
+  if (-d $folder) {
     # passed a directory of mboxes
     $folder =~ s/\/\s*$//; #Remove trailing slash, if there
     if (!opendir(DIR, $folder)) {
@@ -792,7 +799,7 @@
   my ($self, $class, $folder, $bkfunc) = @_;
   my (@files, $fp);
 
-  if ($folder ne '-' && -d $folder) {
+  if (-d $folder) {
     # got passed a directory full of mbx folders.
     $folder =~ s/\/\s*$//; # remove trailing slash, if there is one
     if (!opendir(DIR, $folder)) {

Modified: spamassassin/trunk/sa-learn.raw
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sa-learn.raw?view=diff&rev=481873&r1=481872&r2=481873
==============================================================================
--- spamassassin/trunk/sa-learn.raw (original)
+++ spamassassin/trunk/sa-learn.raw Sun Dec  3 12:21:15 2006
@@ -380,29 +380,44 @@
     close(F);
   }
 
-  # add leftover args as targets
-  foreach (@ARGV) { target($_); }
+  ###########################################################################
+  # Deal with the target listing, and STDIN -> tempfile
 
-  #No arguments means they want stdin:
-  if ( $#targets < 0 ) {
-    target('-');
-  }
+  my $tempfile; # will be defined if stdin -> tempfile
+  push(@targets, @ARGV);
+  @targets = ('-') unless @targets;
+
+  for(my $elem = 0; $elem <= $#targets; $elem++) {
+    # ArchiveIterator doesn't really like STDIN, so if "-" is specified
+    # as a target, make it a temp file instead.
+    if ( $targets[$elem] =~ /(?:^|:)-$/ ) {
+      if (defined $tempfile) {
+        # uh-oh, stdin specified multiple times?
+        warn "skipping extra stdin target (".$targets[$elem].")\n";
+        splice @targets, $elem, 1;
+        $elem--; # go back to this element again
+        next;
+      }
+      else {
+        my $handle;
 
-  # mbox and mbx doesn't deal with STDIN, so make a temp file if they want STDIN.
-  # do it here since they may specify "-" on the commandline
-  #
-  my $tempfile;
-  if ( $targets[0] =~ /:mbo?x:-$/ ) {
-    my $handle;
-
-    local $/ = undef;    # go into slurp mode
-    ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
-    print {$handle} <STDIN>;
-    close $handle;
+        local $/ = undef;    # go into slurp mode
+        ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
+        print {$handle} <STDIN>;
+        close $handle;
 
-    # re-aim the targets at the tempfile instead of STDIN
-    $targets[0] =~ s/:-$/:$tempfile/;
+        # re-aim the targets at the tempfile instead of STDIN
+        $targets[$elem] =~ s/-$/$tempfile/;
+      }
+    }
+
+    # make sure the target list is in the normal AI format
+    if ($targets[$elem] !~ /^[^:]*:[a-z]+:/) {
+      $targets[$elem] = target($targets[$elem]);
+    }
   }
+
+  ###########################################################################
 
   my $iter = new Mail::SpamAssassin::ArchiveIterator(
     {

Modified: spamassassin/trunk/spamassassin.raw
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamassassin.raw?view=diff&rev=481873&r1=481872&r2=481873
==============================================================================
--- spamassassin/trunk/spamassassin.raw (original)
+++ spamassassin/trunk/spamassassin.raw Sun Dec  3 12:21:15 2006
@@ -308,6 +308,44 @@
 }
 
 ###########################################################################
+# Deal with the target listing, and STDIN -> tempfile
+
+my $tempfile; # will be defined if stdin -> tempfile
+push(@targets, @ARGV);
+@targets = ('-') unless @targets;
+
+for(my $elem = 0; $elem <= $#targets; $elem++) {
+  # ArchiveIterator doesn't really like STDIN, so if "-" is specified
+  # as a target, make it a temp file instead.
+  if ( $targets[$elem] =~ /(?:^|:)-$/ ) {
+    if (defined $tempfile) {
+      # uh-oh, stdin specified multiple times?
+      warn "skipping extra stdin target (".$targets[$elem].")\n";
+      splice @targets, $elem, 1;
+      $elem--; # go back to this element again
+      next;
+    }
+    else {
+      my $handle;
+
+      local $/ = undef;    # go into slurp mode
+      ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
+      print {$handle} <STDIN>;
+      close $handle;
+
+      # re-aim the targets at the tempfile instead of STDIN
+      $targets[$elem] =~ s/-$/$tempfile/;
+    }
+  }
+
+  # make sure the target list is in the normal AI format
+  if ($targets[$elem] !~ /^[^:]*:[a-z]+:/) {
+    my $format = $opt{'format'} || 'detect';
+    $targets[$elem] = join ( ":", '', $format, $targets[$elem] );
+  }
+}
+
+###########################################################################
 
 # Everything below here needs ArchiveIterator ...
 my $iter = new Mail::SpamAssassin::ArchiveIterator(
@@ -318,28 +356,6 @@
 );
 
 $iter->set_functions( \&wanted, \&result );
-
-# add leftover args as targets
-# no arguments means they want stdin:
-push ( @ARGV, '-' ) if ( !@ARGV );
-@targets = map { join ( ":", '', $opt{'format'}, $_ ) } @ARGV;
-
-# mbox and mbx handling doesn't deal with STDIN, so make a temp file if they
-# want STDIN.  do it here since they may specify "-" on the commandline
-# instead of leaving it as the default.
-#
-my $tempfile;
-if ( $targets[0] =~ /:mbo?x:-$/ ) {
-  my $handle;
-
-  local $/ = undef;    # go into slurp mode
-  ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
-  print {$handle} <STDIN>;
-  close $handle;
-
-  # re-aim the targets at the tempfile instead of STDIN
-  $targets[0] =~ s/:-$/:$tempfile/;
-}
 
 # Go run the messages!
 # bug 4930: use a temp variable since "||=" decides whether or not to set the