You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/12/10 00:49:36 UTC
svn commit: r355654 - /spamassassin/trunk/masses/corpora/mk-corpus-link-farm
Author: jm
Date: Fri Dec 9 15:49:34 2005
New Revision: 355654
URL: http://svn.apache.org/viewcvs?rev=355654&view=rev
Log:
cope with mboxes that start with blank lines
Modified:
spamassassin/trunk/masses/corpora/mk-corpus-link-farm
Modified: spamassassin/trunk/masses/corpora/mk-corpus-link-farm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/corpora/mk-corpus-link-farm?rev=355654&r1=355653&r2=355654&view=diff
==============================================================================
--- spamassassin/trunk/masses/corpora/mk-corpus-link-farm (original)
+++ spamassassin/trunk/masses/corpora/mk-corpus-link-farm Fri Dec 9 15:49:34 2005
@@ -473,6 +473,7 @@
my $where = 0; # current byte offset
my $in_header = 0; # are in we a header?
my $fromline;
+
while (!eof INPUT) {
my $offset = $start; # byte offset of this message
@@ -487,58 +488,61 @@
$fromline = $_;
last;
}
+ }
+ last unless defined($_);
+
+ # dbg "mbox From: $counter $start $where $fromline";
- if (mbox_new_enough($fromline))
- {
- $counter++;
-
- if (!$justcount) {
- $newname = get_mbox_name ($mboxpath, $offset);
-
- if (-f $newname && (-M _ >= -M INPUT)) {
- # no need to recreate it, it's fresh
-
- my $past = 0;
- while (<INPUT>) {
- if ($past) {
- last if (!defined($_) || substr($_,0,5) eq "From ");
- } else {
- $past = 1;
- }
+ if ($fromline && mbox_new_enough($fromline))
+ {
+ $counter++;
+
+ if (!$justcount) {
+ $newname = get_mbox_name ($mboxpath, $offset);
+
+ if (-f $newname && (-M _ >= -M INPUT)) {
+ # no need to recreate it, it's fresh
+
+ my $past = 0;
+ while (<INPUT>) {
+ if ($past) {
+ last if (!defined($_) || substr($_,0,5) eq "From ");
+ } else {
+ $past = 1;
}
}
- else {
- seek (INPUT, $offset, 0);
- open (OUTPUT, ">$newname") or die "cannot write to $newname";
- binmode OUTPUT;
-
- my $past = 0;
- while (<INPUT>) {
- if ($past) {
- last if (!defined($_) || substr($_,0,5) eq "From ");
- } else {
- $past = 1;
- }
- print OUTPUT;
+ }
+ else {
+ seek (INPUT, $where, 0);
+ open (OUTPUT, ">$newname") or die "cannot write to $newname";
+ binmode OUTPUT;
+
+ my $past = 0;
+ while (<INPUT>) {
+ if ($past) {
+ last if (!defined($_) || substr($_,0,5) eq "From ");
+ } else {
+ $past = 1;
}
-
- close OUTPUT or die "failed to write to $newname";
-
- chmod 0644, $newname or warn "cannot chmod $newname";
-
- utime $atime, $mtime, $newname
- or warn "failed to touch $newname";
+ print OUTPUT;
}
-
- push @created_files, $newname;
- remove_from_poss_delete($newname);
- $where = tell INPUT;
- $offset = $where;
+ close OUTPUT or die "failed to write to $newname";
- # we've already read the next "From " line, parse it now
- goto nextfrom;
+ chmod 0644, $newname or warn "cannot chmod $newname";
+
+ utime $atime, $mtime, $newname
+ or warn "failed to touch $newname";
}
+
+ push @created_files, $newname;
+ remove_from_poss_delete($newname);
+
+ $where = tell INPUT;
+ $offset = $where;
+
+ # we've already read the next "From " line, parse it now
+ goto nextfrom;
}
}
}
@@ -571,7 +575,7 @@
my ($fromline) = @_;
# From xscludshmkjgc@yahoo.com Thu Apr 29 20:02:18 2004
- return unless ($fromline =~ /^From \S+ (.*)$/);
+ return unless ($fromline && $fromline =~ /^From \S+ +(.*)$/);
$fromline = $1;
$fromline .= " ".local_tz() unless $fromline =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;