You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/02/06 21:26:00 UTC

svn commit: rev 6553 - incubator/spamassassin/trunk/masses/rule-dev

Author: jm
Date: Fri Feb  6 12:25:59 2004
New Revision: 6553

Added:
   incubator/spamassassin/trunk/masses/rule-dev/
   incubator/spamassassin/trunk/masses/rule-dev/maildir-scan-headers   (contents, props changed)
Log:
rule development helper script: scan a corpus of mail for one or more named headers, producing easilly-greppable output, to spot correlations between header patterns

Added: incubator/spamassassin/trunk/masses/rule-dev/maildir-scan-headers
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/masses/rule-dev/maildir-scan-headers	Fri Feb  6 12:25:59 2004
@@ -0,0 +1,79 @@
+#!/usr/bin/perl
+#
+# maildir-scan-headers - grep a set of mail files for patterns, and list
+# all headers specified.
+
+sub usage {
+  die "usage: maildir-scan-headers [-h headers] [--headers headers]
+	[-i] [-raw] [-sort] fileordir1 fileordir2 ...  \n";
+
+}
+
+my $wanted = '';
+my $sort = 0;
+my $casei = 0;
+my $raw = 0;
+my @files = ();
+
+use Getopt::Long qw(:config no_ignore_case);
+GetOptions(
+	"headers|h=s" => \$wanted,
+	"casei|i" => \$casei,
+	"raw" => \$raw,
+	"sort" => \$sort,
+	'help' => \&usage,
+	'<>' => sub { push (@files, $_[0]); }
+);
+
+$wanted =~ s/[ ,\|;:]+/\|/gs;
+if ($casei) {
+  $wanted = qr{(?:$wanted)}i;
+} else {
+  $wanted = qr{(?:$wanted)};
+}
+
+foreach my $file (@files) {
+  if (-d $file) {
+    use File::Find;
+    File::Find::find (sub {
+		-f $_ and do_file ($File::Find::name); 
+	      }, $file);
+  } else {
+    do_file ($file);
+  }
+}
+
+sub do_file {
+  my $file = shift;
+  return if ($file =~ /\/\,\d/);		# MH deleted mail
+
+  open (IN, "<$file") or warn "cannot read $file\n";
+
+  my $hdrs = '';
+  while (<IN>)
+  {
+    if (/^$/) { last; }
+
+    if (!$raw && /X-Spam-Flag: YES/) {
+      while (<IN>) {
+	/^Content-Description: original message before SpamAssassin/ and last;
+      }
+      $_ = <IN>; $_ = <IN>; $_ = <IN>;
+      $hdrs = '';
+      next;
+    }
+
+    $hdrs .= $_;
+  }
+  close IN;
+
+  $hdrs =~ s/(\n[ \t]+)/\\n /gs;	# folding
+  $hdrs =~ s/\|/[pipe]/gs;		# pipe chars
+
+  my @ok = ($hdrs =~ /^(${wanted}:(?: .*|))$/gom);
+  if ($sort) { @ok = sort @ok; }
+  $hdrs = join ("|", @ok);
+
+  return unless $hdrs =~ /\S/;
+  print "$file $hdrs\n";
+}