You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/10/06 14:47:11 UTC

svn commit: r453586 [5/7] - in /spamassassin/branches/jm_re2c_hacks: ./ build/ build/automc/ build/buildbot/ contrib/ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Bayes/ lib/Mail/SpamAssassin/BayesStore/ lib/Mail/SpamAssassin/Conf/ lib/Mail/S...

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm Fri Oct  6 05:46:56 2006
@@ -1,9 +1,10 @@
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm Fri Oct  6 05:46:56 2006
@@ -1,9 +1,10 @@
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Reporter.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Reporter.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Reporter.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Reporter.pm Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 # Mail::SpamAssassin::Reporter - report a message as spam
 
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SQLBasedAddrList.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SQLBasedAddrList.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SQLBasedAddrList.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SQLBasedAddrList.pm Fri Oct  6 05:46:56 2006
@@ -1,9 +1,10 @@
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 # spamd prefork scaling, using an Apache-based algorithm
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
@@ -121,11 +122,20 @@
   $self->compute_lowest_child_pid();
 }
 
+# this is called by SIGTERM and SIGHUP handlers, to ensure that new
+# kids aren't added while the main code is killing the old ones
+# and planning to exit.
+#
+sub set_exiting_flag {
+  my ($self) = @_;
+  $self->{am_exiting} = 1;
+}
+
 sub child_error_kill {
   my ($self, $pid, $sock) = @_;
 
-  warn "prefork: killing failed child $pid ".
-            ($sock ? "fd=".$sock->fileno : "");
+  warn "prefork: killing failed child $pid fd=".
+    ((defined $sock && defined $sock->fileno) ? $sock->fileno : "undefined");
 
   # close the socket and remove the child from our list
   $self->set_child_state ($pid, PFSTATE_KILLED);
@@ -138,7 +148,7 @@
     $sock->close;
   }
 
-  warn "prefork: killed child $pid";
+  warn "prefork: killed child $pid\n";
 }
 
 sub set_child_state {
@@ -650,6 +660,9 @@
 
 sub adapt_num_children {
   my ($self) = @_;
+
+  # don't start up new kids while main is working at killing the old ones
+  return if $self->{am_exiting};
 
   my $kids = $self->{kids};
   my $statestr = '';

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SubProcBackChannel.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SubProcBackChannel.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SubProcBackChannel.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SubProcBackChannel.pm Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 # back-channel for communication between a master and multiple slave processes.
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Timeout.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Timeout.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Timeout.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Timeout.pm Fri Oct  6 05:46:56 2006
@@ -1,9 +1,10 @@
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util.pm Fri Oct  6 05:46:56 2006
@@ -1,9 +1,10 @@
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
@@ -793,16 +794,23 @@
   # Get the actual MIME type out ...
   # Note: the header content may not be whitespace unfolded, so make sure the
   # REs do /s when appropriate.
+  # correct:
+  # Content-type: text/plain; charset=us-ascii
+  # missing a semi-colon, CT shouldn't have whitespace anyway:
+  # Content-type: text/plain charset=us-ascii
   #
-  $ct =~ s/^\s+//;			# strip leading whitespace
-  $ct =~ s/;.*$//s;			# strip everything after first ';'
-  $ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@s;	# only something/something ...
+  $ct =~ s/^\s+//;				# strip leading whitespace
+  $ct =~ s/;.*$//s;				# strip everything after first ';'
+  $ct =~ s@^([^/]+(?:/[^/\s]*)?).*$@$1@s;	# only something/something ...
   # strip inappropriate chars
   $ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d;
   $ct = lc $ct;
 
   # bug 4298: If at this point we don't have a content-type, assume text/plain
-  $ct ||= "text/plain";
+  # also, if the content-type is simply "text" or "text/", assume text/plain
+  if (!$ct || $ct =~ /^text\/?$/) {
+    $ct = "text/plain";
+  }
 
   # Now that the header has been parsed, return the requested information.
   # In scalar context, just the MIME type, in array context the
@@ -876,7 +884,7 @@
 
 ###########################################################################
 
-=item my ($filehandle, $filepath) = secure_tmpfile();
+=item my ($filepath, $filehandle) = secure_tmpfile();
 
 Generates a filename for a temporary file, opens it exclusively and
 securely, and returns a filehandle to the open file (opened O_RDWR).

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/DependencyInfo.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/DependencyInfo.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/DependencyInfo.pm Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 # Helper code to debug dependencies and their versions.
 
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/Progress.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/Progress.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/Progress.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/Progress.pm Fri Oct  6 05:46:56 2006
@@ -1,9 +1,10 @@
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 # The (extremely complex) rules for domain delegation.
 
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/lm/build.pl
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lm/build.pl?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lm/build.pl (original)
+++ spamassassin/branches/jm_re2c_hacks/lm/build.pl Fri Oct  6 05:46:56 2006
@@ -4,11 +4,12 @@
 # *.ln = new format, uses NULL as separator
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-accuracy-curve
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-accuracy-curve?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-accuracy-curve (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-accuracy-curve Fri Oct  6 05:46:56 2006
@@ -6,14 +6,15 @@
 # usage: graph-accuracy-curve [--buckets=100] ...dir/results .../dir2/results ...
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
+# 
 #     http://www.apache.org/licenses/LICENSE-2.0
-#
+# 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-bayes-histogram
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-bayes-histogram?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-bayes-histogram (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/bayes-testing/graph-bayes-histogram Fri Oct  6 05:46:56 2006
@@ -6,14 +6,15 @@
 # usage: graph-bayes-histogram [--buckets=100] ...dir/results .../dir2/results ...
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
+# 
 #     http://www.apache.org/licenses/LICENSE-2.0
-#
+# 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: spamassassin/branches/jm_re2c_hacks/masses/corpora/mk-corpus-link-farm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/corpora/mk-corpus-link-farm?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/corpora/mk-corpus-link-farm (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/corpora/mk-corpus-link-farm Fri Oct  6 05:46:56 2006
@@ -7,14 +7,15 @@
 # cause breakage.
 # 
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
+# 
 #     http://www.apache.org/licenses/LICENSE-2.0
-#
+# 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: spamassassin/branches/jm_re2c_hacks/masses/enable-all-evolved-rules
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/enable-all-evolved-rules?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/enable-all-evolved-rules (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/enable-all-evolved-rules Fri Oct  6 05:46:56 2006
@@ -12,11 +12,12 @@
 # required.
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/evolve_metarule.c
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/evolve_metarule.c?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/evolve_metarule.c (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/evolve_metarule.c Fri Oct  6 05:46:56 2006
@@ -2,14 +2,15 @@
  * the NIGERIAN or ADVANCE_FEE rule.
  *
  * <@LICENSE>
- * Copyright 2005 Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at:
+ * 
  *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/preproc.pl
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/preproc.pl?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/preproc.pl (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/evolve_metarule/preproc.pl Fri Oct  6 05:46:56 2006
@@ -1,13 +1,14 @@
 #!/usr/bin/perl -w
 # <@LICENSE>
-# Copyright 2005 Apache Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
+# 
 #     http://www.apache.org/licenses/LICENSE-2.0
-#
+# 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

Modified: spamassassin/branches/jm_re2c_hacks/masses/extract-message-from-mbox
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/extract-message-from-mbox?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/extract-message-from-mbox (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/extract-message-from-mbox Fri Oct  6 05:46:56 2006
@@ -1,10 +1,11 @@
 #!/usr/bin/perl
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/find-extremes
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/find-extremes?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/find-extremes (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/find-extremes Fri Oct  6 05:46:56 2006
@@ -2,11 +2,12 @@
 
 # hacked version of hit-frequencies - Allen
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/fp-fn-to-tcr
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/fp-fn-to-tcr?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/fp-fn-to-tcr (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/fp-fn-to-tcr Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 #!/usr/bin/perl
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/freqdiff
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/freqdiff?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/freqdiff (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/freqdiff Fri Oct  6 05:46:56 2006
@@ -3,11 +3,12 @@
 # freqdiff - print frequency difference between two inputs
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/generate-translation
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/generate-translation?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/generate-translation (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/generate-translation Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 #!/usr/bin/perl -w
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
@@ -169,11 +170,12 @@
 # See 'perldoc Mail::SpamAssassin::Conf' for details.
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 #!/usr/bin/perl -w
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/lint-rules-from-freqs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/lint-rules-from-freqs?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/lint-rules-from-freqs (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/lint-rules-from-freqs Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 #!/usr/bin/perl
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/logdiff
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/logdiff?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/logdiff (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/logdiff Fri Oct  6 05:46:56 2006
@@ -1,8 +1,10 @@
 #!/usr/bin/perl
 
 my ($l1, $l2, @diffargs);
+@diffargs = ();
+
 foreach my $arg (@ARGV) {
-  if (/^-/) {
+  if ($arg =~ /^-/) {
     push @diffargs, $arg;
   } elsif (defined $l1) {
     $l2 = $arg;
@@ -24,12 +26,21 @@
   open (IN, "<$inf") or die "cannot open $inf";
   open (OUT, ">$outf") or die "cannot open $outf";
   while (<IN>) {
-    s/\bscantime=\d+/scantime=N/gs;     # frequently different
-    s/\bAWL\b//gs;      # kill AWL hits
-    s/ ,/ /gs;
-    s/, / /gs;
-    s/,,/,/gs;
-    print OUT;
+    if (!/^([\.Y]\s+\S+\s+\S+)\s+(\S+)\s+(.+)/) {
+      print OUT;
+      next;
+    }
+
+    my ($scorepath, $rules, $meta) = ($1,$2,$3);
+    my @rules = split(/,/, $rules);
+    @rules = sort grep {
+        $_ !~ /^AWL$/
+    } @rules;
+    $rules = join(',', @rules);
+
+    $meta =~ s/\bscantime=\d+/scantime=N/gs;     # frequently different
+
+    print OUT "$scorepath $rules $meta\n";
   }
   close IN;
   close OUT;

Modified: spamassassin/branches/jm_re2c_hacks/masses/logs-to-c
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/logs-to-c?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/logs-to-c (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/logs-to-c Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 #!/usr/bin/perl -w
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/mass-check?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/mass-check (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/mass-check Fri Oct  6 05:46:56 2006
@@ -1,11 +1,14 @@
 #!/usr/bin/perl -w
+use strict;
+
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
@@ -16,13 +19,15 @@
 # limitations under the License.
 # </...@LICENSE>
 
+sub aidbg;
+
 sub usage {
   my $status = shift;
 
-  my $out = $status ? STDERR : STDOUT;
+  my $out = $status ? \*STDERR : \*STDOUT;
   print $out <<EOF;
 usage: mass-check [options] target ...
- 
+
   -c=file       set configuration/rules directory
   -p=dir        set user-prefs directory
   -f=file       read list of targets from <file>
@@ -41,7 +46,26 @@
                 were encapsulated by servers matching the regexp RE
                 (default = extract all SpamAssassin-encapsulated mails)
   --lint        check rules for syntax before running
- 
+
+  client/server mode options
+  --server host:port
+                use server mode, running on the given hostname and port
+  --client host:port
+  		use client mode, connecting to the given hostname and port
+  --cs_max N
+  		at most, only ever request (client)/give out (server) a
+		maximum of N messages (defaults to 1000)
+  --cs_timeout N
+  		in client mode, try to connect to the server every N seconds
+		defaults to 300
+		in server mode, timeout messages after N seconds
+		defaults to 60
+  --cs_paths_only
+		only used in client mode.  when making requests of the
+		server, only ask for paths to the messages and not the
+		messages themselves.  useful when the client and server
+		have the same paths to the corpus data.
+
   log options
   -o            write all logs to stdout
   --loghits     log the text hit for patterns (useful for debugging)
@@ -49,7 +73,7 @@
   --logmem	log the memory delta (only on Linux)
   --hamlog=log  use <log> as ham log ('ham.log' is default)
   --spamlog=log use <log> as spam log ('spam.log' is default)
- 
+
   message selection options
   -n            no date sorting or spam/ham interleaving
   --cache	use cache information when selecting messages
@@ -61,7 +85,7 @@
   --all         don't skip big messages
   --head=N      only check first N ham and N spam (N messages if -n used)
   --tail=N      only check last N ham and N spam (N messages if -n used)
- 
+
   simple target options (implies -o and no ham/spam classification)
   --dir         subsequent targets are directories
   --file        subsequent targets are files in RFC 822 format
@@ -95,12 +119,15 @@
 	    $opt_logmem $opt_after $opt_before $opt_rewrite $opt_deencap
 	    $opt_learn $opt_reuse $opt_lint $opt_cache $opt_noisy
 	    $total_messages $statusevery $opt_cachedir
-	    %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
+	    $opt_client $opt_cs_max $opt_cs_timeout $opt_cs_paths_only
+	    $opt_server %postdata %real $svn_revision
+	    $tmpfd %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
 
 use FindBin;
 use lib "$FindBin::Bin/../lib";
-use lib "$FindBin::Bin/tmp";
 eval "use bytes";
+use IO::Select;
+use IO::Socket;
 use Mail::SpamAssassin::ArchiveIterator;
 use Mail::SpamAssassin;
 use Mail::SpamAssassin::Logger;
@@ -109,6 +136,7 @@
 use Getopt::Long;
 use POSIX qw(strftime);
 use constant HAS_TIME_PARSEDATE => eval { require Time::ParseDate; };
+use constant HAS_IO_ZLIB => eval { require IO::Zlib; };
 use Config; 
 
 # default settings
@@ -128,6 +156,8 @@
 	   "rules=s", "restart=i", "after=s", "before=s", "loguris",
 	   "deencap=s", "logmem", "learn=i", "reuse", "lint", "cache",
            "cachedir=s", "noisy",
+	   "server=s", "cs_max=i", "cs_timeout=i", "cs_paths_only",
+	   "client=s",
 	   "dir" => sub { $opt_format = "dir"; },
 	   "file" => sub { $opt_format = "file"; },
 	   "mbox" => sub { $opt_format = "mbox"; },
@@ -135,29 +165,28 @@
 	   "help" => sub { usage(0); },
 	   '<>' => \&target) or usage(1);
 
-# rules.pl is for the --reuse option, score set doesn't matter
-if ($opt_reuse && ! -f "$FindBin::Bin/tmp/rules.pl") {
-  # some people specify paths relatively, whereas this needs an absolute path,
-  # so "do the right thing"(tm).
-  my $abs_opt_c = File::Spec->rel2abs($opt_c);
-  system("cd $FindBin::Bin; perl parse-rules-for-masses -d $abs_opt_c");
+# We need IO::Zlib for client-server mode!
+if ( ($opt_client || $opt_server) && ! HAS_IO_ZLIB ) {
+  die "IO::Zlib required for client/server mode!\n";
 }
 
-require "rules.pl" if $opt_reuse;
+# rules.pl is for the --reuse option, score set doesn't matter
+if ($opt_reuse) {
+  my $rules_path = "$FindBin::Bin/tmp/rules.pl";
+  if (! -f $rules_path) {
+    # some people specify paths relatively, whereas this needs an absolute path,
+    # so "do the right thing"(tm).
+    my $abs_opt_c = File::Spec->rel2abs($opt_c);
+    system("cd $FindBin::Bin; perl parse-rules-for-masses -d $abs_opt_c");
+  }
+
+  require $rules_path;
+}
 
 if ($opt_noisy) {
   $opt_progress = 1;        # implies --progress
 }
 
-# test messages for the mass-check
-my @targets;
-if ($opt_f) {
-  open(F, $opt_f) || die "cannot read target $opt_f: $!";
-  push(@targets, map { chomp; $_ } <F>);
-  close(F);
-}
-usage(1) if !@targets;
-
 $opt_debug ||= 'all' if defined $opt_debug;
 
 my $user_prefs = "$opt_p/user_prefs";
@@ -167,7 +196,6 @@
 # but since it's probably not expecting that, and we don't want
 # strange things happening, create a local object.
 if ($opt_lint) {
-
   my $spamlint = new Mail::SpamAssassin ({
     'debug'              			=> $opt_debug,
     'rules_filename'     			=> $opt_c,
@@ -178,13 +206,12 @@
     'dont_copy_prefs'   			=> 1,
     'local_tests_only'   			=> $opt_net ? 0 : 1,
     'only_these_rules'   			=> $opt_rules,
-    'ignore_safety_expire_timeout'	=> 1,
-    PREFIX				=> '',
+    'ignore_safety_expire_timeout'		=> 1,
+    PREFIX					=> '',
     DEF_RULES_DIR        			=> $opt_c,
     LOCAL_RULES_DIR      			=> '',
   });
 
-
   $spamlint->debug_diagnostics();
   my $res = $spamlint->lint_rules();
   $spamlint->finish();
@@ -192,7 +219,18 @@
   exit 1 if $res;
 }
 
-$spamtest = new Mail::SpamAssassin ({
+# test messages for the mass-check
+my @targets;
+if (!$opt_server && !$opt_client) {
+  if ($opt_f) {
+    open(F, $opt_f) || die "cannot read target $opt_f: $!";
+    push(@targets, map { chomp; $_ } <F>);
+    close(F);
+  }
+  usage(1) if !@targets;
+}
+
+my $spamtest = new Mail::SpamAssassin ({
   'debug'              			=> $opt_debug,
   'rules_filename'     			=> $opt_c,
   'userprefs_filename' 			=> $user_prefs,
@@ -214,7 +252,6 @@
 # generated user_prefs
 if ($opt_reuse) {
   # copy current prefs if it exists
-
   $spamtest->copy_config(undef, \%orig_conf);
 
   # zeroed scores to mass_prefs
@@ -239,13 +276,13 @@
 chomp $where;
 chomp $when;
 chomp $host;
-my $revision = get_current_svn_revision();
+$svn_revision = get_current_svn_revision();
 my $cmdline = join(' ',@ORIG_ARGV); $cmdline =~ s/\s+/ /gs;
 my $isowhen = strftime("%Y%m%dT%H%M%SZ", gmtime(time)); # better
 
 my $log_header = "# mass-check results from $who\@$where, on $when\n" .
 		 "# M:SA version ".$spamtest->Version()."\n" .
-		 "# SVN revision: $revision\n" .
+		 "# SVN revision: $svn_revision\n" .
                  "# Date: $isowhen\n" .
 		 "# Perl version: $] on $Config{archname}\n" .
                  "# Switches: '$cmdline'\n";
@@ -260,60 +297,141 @@
 my $showdots_counter = 0;
 my $showdots_every = ($opt_showdots ? 1 : 20);
 
-# Deal with --rewrite
-if (defined $opt_rewrite) {
-  my $rewrite = ($opt_rewrite ? $opt_rewrite : "/tmp/out");
-  open(REWRITE, "> $rewrite") || die "open of $rewrite failed: $!";
-}
+my $AIopts = {
+	'opt_all' => $opt_all,
+	};
 
-# Deal with --before and --after
-foreach my $time ($opt_before, $opt_after) {
-  if ($time && $time =~ /^-\d+$/) {
-    $time = time + $time;
+if (!$opt_client) {
+  # Deal with --rewrite
+  if (defined $opt_rewrite) {
+    my $rewrite = ($opt_rewrite ? $opt_rewrite : "/tmp/out");
+    open(REWRITE, "> $rewrite") || die "open of $rewrite failed: $!";
   }
-  elsif ($time && $time !~ /^-?\d+$/) {
-    if (HAS_TIME_PARSEDATE) {
-      $time = Time::ParseDate::parsedate($time, GMT => 1, PREFER_PAST => 1);
-    }
-    else { 
-      die "You need Time::ParseDate if you use either the --before or --after option.";
+
+  # Deal with --before and --after
+  foreach my $time ($opt_before, $opt_after) {
+    if ($time && $time =~ /^-\d+$/) {
+      $time = time + $time;
+    }
+    elsif ($time && $time !~ /^-?\d+$/) {
+      if (HAS_TIME_PARSEDATE) {
+        $time = Time::ParseDate::parsedate($time, GMT => 1, PREFER_PAST => 1);
+      }
+      else { 
+        die "You need Time::ParseDate if you use either the --before or --after option.";
+      }
     }
   }
+
+  if ($opt_before && $opt_after && $opt_after >= $opt_before) {
+    die "--before ($opt_before) <= --after ($opt_after) -- conflict!";
+  }
+
+  # ArchiveIterator options for non-client mode
+  $AIopts->{'opt_n'} = $opt_n;
+  $AIopts->{'opt_head'} = $opt_head;
+  $AIopts->{'opt_tail'} = $opt_tail;
+  $AIopts->{'opt_cache'} = $opt_cache;
+  $AIopts->{'opt_cachedir'} = $opt_cachedir;
+  $AIopts->{'opt_after'} = $opt_after;
+  $AIopts->{'opt_before'} = $opt_before;
+  $AIopts->{'scan_progress_sub'} = \&showdots_blip;
+}
+else {
+  # ArchiveIterator options for client mode -- tends to be simple
+  $AIopts->{'opt_n'} = 1;
 }
 
-if ($opt_before && $opt_after && $opt_after >= $opt_before) {
-  die "--before ($opt_before) <= --after ($opt_after) -- conflict!";
+###########################################################################
+## SCAN MODE
+
+my $iter = new Mail::SpamAssassin::ArchiveIterator($AIopts);
+
+# setup the AI functions
+if ($opt_client) {
+  $iter->set_functions(\&wanted, \&result_client);
+}
+elsif ($opt_server) {
+  $iter->set_functions(\&wanted_server, \&result);
+}
+else {
+  $iter->set_functions(\&wanted, \&result);
 }
 
-my $iter = new Mail::SpamAssassin::ArchiveIterator({
-	'opt_j' => $opt_j,
-	'opt_n' => $opt_n,
-	'opt_all' => $opt_all,
-	'opt_head' => $opt_head,
-	'opt_tail' => $opt_tail,
-	'opt_cache' => $opt_cache,
-	'opt_cachedir' => $opt_cachedir,
-	'opt_after' => $opt_after,
-	'opt_before' => $opt_before,
-	'opt_restart' => $opt_restart,
-        'scan_progress_sub' => \&scan_progress_cb
-});
+my $messages;
+
+# normal mode as well as a server do scan mode and get a temp file
+if (!$opt_client) {
+  status('starting scan stage') if ($opt_progress);
+
+  # Make a temp file and delete it
+  my $tmpf;
+  ($tmpf, $tmpfd) = Mail::SpamAssassin::Util::secure_tmpfile();
+  die 'mass-check: failed to create temp file' unless $tmpf;
+  unlink $tmpf or die "mass-check: unlink '$tmpf': $!";
+
+  # having opt_j or server mode means do scan in a separate process
+  if ($opt_server || $opt_j) {
+    if ($tmpf = fork()) {
+      # parent
+      waitpid($tmpf, 0);
+    }
+    elsif (defined $tmpf) {
+      # child -- process using message_array
+      my($num, $messages) = $iter->message_array(\@targets);
+
+      # Dump out the number of messages and the message index info to
+      # the temp file
+      send_line($tmpfd, $num, @{$messages});
+
+      exit;
+    }
+    else {
+      die "mass-check: cannot fork: $!";
+    }
+  }
+  else {
+    # we get here if opt_j == 0, so scan in this process
+    my($num, $messages) = $iter->message_array(\@targets);
+
+    # Dump out the number of messages and the message index info to
+    # the temp file
+    send_line($tmpfd, $num, @{$messages});
+  }
+
+  # we now have a temporary file with the messages to process
+  seek($tmpfd, 0, 0);
+  # the first line is the number of messages
+  $total_messages = read_line($tmpfd);
 
-if ($opt_progress) {
-  status('starting scan stage');
+  if (!$total_messages) {
+    die "mass-check: no messages to process\n";
+  }
+
+  status("completed scan stage, $total_messages messages") if ($opt_progress);
 }
 
-sub scan_progress_cb {
-  showdots_blip();
+###########################################################################
+## RUN MODE
+
+if ($opt_client) {
+  client_mode();
 }
+else {
+  status('starting run stage') if ($opt_progress);
 
-$iter->set_functions(\&wanted, \&result);
-$iter->run(@targets);
+  if ($opt_server) {
+    server_mode();
+  }
+  else {
+    run_through_messages();
+  }
 
-if ($opt_progress) {
-  status('completed run stage');
+  status('completed run stage') if ($opt_progress);
 }
 
+# Even though we're about to exit, let's clean up after ourselves
+close($tmpfd) if ($tmpfd);
 showdots_finish();
 
 if (defined $opt_rewrite) {
@@ -341,25 +459,23 @@
 ###########################################################################
 
 sub init_results {
+  $init_results = 1;
+
   showdots_finish();
 
   # now, showdots only happens if --showdots was used
   $showdots_active = $opt_showdots;
 
   if ($opt_progress) {
-    # make it a local variable for now
-    $total_messages = $Mail::SpamAssassin::ArchiveIterator::MESSAGES;
-
     # round up since 100% will be caught at end already
     $statusevery = int($total_messages / $updates + 1);
 
     # if $messages < $updates, just give a status line per msg.
     $statusevery ||= 1;
-
-    status("completed scan stage, $total_messages messages");
-    status('starting run stage');
   }
 
+  return if $opt_client;
+
   if ($opt_o) {
     autoflush STDOUT 1;
     print STDOUT $log_header;
@@ -372,14 +488,13 @@
     print HAM $log_header;
     print SPAM $log_header;
   }
-  $init_results = 1;
 }
 
 sub result {
   my ($class, $result, $time) = @_;
 
   # don't open results files until we get here to avoid overwriting files
-  &init_results if !$init_results;
+  init_results() if !$init_results;
 
   if ($class eq "s") {
     if ($opt_o) { print STDOUT $result; } else { print SPAM $result; }
@@ -391,7 +506,6 @@
   }
 
   $total_count++;
-#warn ">> result: $total_count $class $time\n";
 
   if ($opt_progress) {
     progress($time);
@@ -400,10 +514,32 @@
 
 sub wanted {
   my ($class, $id, $time, $dataref, $format) = @_;
-  my $out;
+  my $out = '';
+
+  # if origid is defined, it'll be the message number from server mode
+  my $origid;
+
+  # client mode is a little crazy because we need to kluge around the fact
+  # that the information needed to do the run is different than the
+  # information that goes into the results.
+  if ($opt_client) {
+    if ($opt_cs_paths_only) {
+      # the server message number
+      $origid = $real{$id};
+    }
+    else {
+      # if we're a non-paths_only client, change the format and id to the real
+      # version, make sure to remember the server's message number
+      $origid=$id;
+      $origid =~ s/^.+?(\d+)$/$1/;
+      $format = $real{$id}->[2];
+      $id = $real{$id}->[3];
+    }
+  }
 
   memory_track_start() if ($opt_logmem);
 
+  # parse the message, and force it to complete
   my $ma = $spamtest->parse($dataref, 1);
 
   # remove SpamAssassin markup, if present and the mail was spam
@@ -488,7 +624,7 @@
     if (defined $spam) {
       my $result = ($spam ? "spam" : "ham");
       my $status = $spamtest->learn($ma, undef, $spam, 0);
-      $learned = $status->did_learn();
+      my $learned = $status->did_learn();
       $result = "undef" if !defined $learned;
       push(@extra, "learn=".$result);
     }
@@ -532,6 +668,10 @@
     push(@extra, "reuse=no");
   }
 
+  if ($opt_client) {
+    push(@extra, "host=$where");
+  }
+
   my $yorn;
   my $score;
   my $tests;
@@ -569,6 +709,12 @@
 
   $id =~ s/\s/_/g;
 
+  # if we have an origid set, it'll be the server mode's message number, so
+  # attach it to our result appropriately.
+  if (defined $origid) {
+    $out = "$origid ";
+  }
+
   $out .= sprintf("%s %2d %s %s %s\n", $yorn, $score, $id, $tests, $extra);
 
   if ($tests =~ /MICROSOFT_EXECUTABLE|MIME_SUSPECT_NAME/) {
@@ -595,7 +741,15 @@
   undef $ma;		# clean 'em up
   undef $status;
 
+  # uncomment these lines to get a Data::Dumper dump of the Mail::SpamAssassin
+  # module, written to a file after each message is scanned.  This is a
+  # great way to find memory leaks...
+  ## use Data::Dumper;
+  ## open (D, ">dump.$$.$total_count"); print D Dumper($spamtest); close D;
+  ## warn "wrote memory dump: dump.$$.$total_count";
+
   showdots_blip();
+#  print ">>>> out = $out\n";
   return $out;
 }
 
@@ -717,7 +871,7 @@
       }
     }
   }
-  $str;
+  return $str;
 }
 
 sub get_current_svn_revision {
@@ -756,4 +910,930 @@
   }
 
   return $revision || "unknown";
+}
+
+############################################################################
+
+## children processors, start and process, used when opt_j > 1
+
+sub start_children {
+  my ($count, $child, $pid, $socket) = @_;
+
+  my $io = IO::Socket->new();
+  my $parent;
+
+  # create children
+  for (my $i = 0; $i < $count; $i++) {
+    ($child->[$i],$parent) = $io->socketpair(AF_UNIX,SOCK_STREAM,PF_UNSPEC)
+	or die "mass-check: socketpair failed: $!";
+    if ($pid->[$i] = fork) {
+      close $parent;
+
+      # disable caching for parent<->child relations
+      my ($old) = select($child->[$i]);
+      $|++;
+      select($old);
+
+      $socket->add($child->[$i]);
+      aidbg "mass-check: starting new child $i (pid ".$pid->[$i].")\n";
+      next;
+    }
+    elsif (defined $pid->[$i]) {
+      my $result;
+      my $line;
+
+      close $tmpfd if defined $tmpfd;
+
+      close $child->[$i];
+      select($parent);
+      $| = 1;	# print to parent by default, turn off buffering
+      send_line($parent,"START");
+      while ($line = read_line($parent)) {
+	if ($line eq "exit") {
+	  close $parent;
+	  exit;
+	}
+
+	my($class, $format, $date, $where, $result) = $iter->run_message($line);
+	$result ||= '';
+
+	# If determine_receive_date is not set, the original input date
+	# wasn't calculated, but run_message would have done so, so reset
+	# the packed version if possible ...  use defined for date since
+	# it could == 0.
+        if (!$iter->{determine_receive_date} && $class && $format && defined $date && $where) {
+	  $line = Mail::SpamAssassin::ArchiveIterator::index_pack($date, $class, $format, $where);
+        }
+
+	send_line($parent,"$result\0RESULT $line");
+      }
+      exit;
+    }
+    else {
+      die "mass-check: cannot fork: $!";
+    }
+  }
+}
+
+## handling killing off the children
+
+sub reap_children {
+  my ($count, $socket, $pid) = @_;
+
+  # If the child died, sending it the exit will generate a SIGPIPE, but we
+  # don't really care since the readline will go undef (which is fine),
+  # then we do the waitpid which will finish it off.  So we end up in the
+  # right state, in theory.
+  local $SIG{'PIPE'} = 'IGNORE';
+
+  for (my $i = 0; $i < $count; $i++) {
+    aidbg "mass-check: killing child $i (pid ",$pid->[$i],")\n";
+    send_line($socket->[$i],"exit"); # tell the child to die.
+    close $socket->[$i];
+    waitpid($pid->[$i], 0); # wait for the signal ...
+  }
+}
+
+# in server mode, this gets called to read in the HTTP request from a given
+# socket, then return the information the client sent to us.
+sub handle_http_request {
+  my $socket = shift;
+
+  my $headers = {};
+  my $postdata = {};
+
+  # read in the request (POST / HTTP/1.0)
+  my $line = $socket->getline();
+  $line ||= '';
+  $line =~ s/\r\n$//;
+
+  my ($type, $URI, $VERS) = $line =~ /^([a-zA-Z]+)\s+(\S+)(?:\s*(\S+))/;
+  unless ($type && $URI && $VERS) {
+    $type ||= '';
+    $URI  ||= '';
+
+    return ($type, $URI, $headers, $postdata);
+  }
+
+  $type = uc $type;
+
+  # read in headers, "key: value" up to a blank line
+  do {
+    $line = $socket->getline();
+    last unless defined $line;
+    $line =~ s/\r\n$//;
+
+    if ($line) {
+      my ($k,$v) = split(/:\s*/, $line, 2);
+      $headers->{lc $k} = $v;
+    }
+  } while ($line !~ /^$/);
+
+  # if this is a POST request w/ content-length, there'll be a payload, deal
+  # with it.
+  if ($type eq 'POST' && $headers->{'content-length'}) {
+    my $pd;
+    $socket->read($pd, $headers->{'content-length'});
+    $pd =~ s/[\r\n]+$//; # a hack for manual requests/telnet/etc
+
+    # key1=value1&key2=value2...
+    %{$postdata} = map {
+      my($k,$v) = split(/=/, $_, 2);
+
+      # we need to decode the key and value
+      $k =~ s/\%([0-9a-fA-F]{2})/sprintf "%c", hex($1)/eg;
+      $v =~ s/\%([0-9a-fA-F]{2})/sprintf "%c", hex($1)/eg;
+
+      $k => $v;
+    } split(/\&/, $pd);
+  }
+
+  return($type, $URI, $headers, $postdata);
+}
+
+# in server mode, generate a gzip compressed data stream with the messages and
+# return the path to the compressed file which the server will read and pass
+# to the client.
+#
+# Input:
+#  - Number of messages to generate (scalar)
+#  - Hash of Arrays of outstanding requests (reference to hash of array refs)
+#     timestamp# -> [ num1, num2, ... ]
+#     Used to quickly find outstanding/timed out messages to send to client.
+#  - Hash of outstanding messages and associated data (ref to hash of hash refs)
+#     num1 -> { data => 'binary data from scan mode', timestamp => timestamp# }
+#     Used later on to specify the timestamp entry to remove the entry from.
+#  - Paths only?  If true, just include the original message data in the gzip
+#    file.  Otherwise, include the message data.  Useful if the client has the
+#    corpus available via the same paths as originally specified.
+#
+# Returns: scalar path to gzip file
+#
+sub generate_messages {
+  my($msgs, $timestamps, $msgsout, $paths_only) = @_;
+
+  # Hold the message numbers we'll be sending out
+  my @tosend = ();
+
+  # Find out if any of the messages we sent out before need to be sent out
+  # again because we haven't seen a response within the timeout.
+  my $tooold = time - $opt_cs_timeout;
+  foreach (sort { $a <=> $b } keys %{$timestamps}) {
+    # since we're going in numeric order, if the current entry is newer than
+    # the timeout value, the rest will be too, so stop looking.
+    last if ($_ > $tooold);
+
+    # how many messages do we still need to fulfill the request?
+    my $wanted = $msgs - @tosend;
+
+    if (@{$timestamps->{$_}} > $wanted) {
+      # there are more entries in the timestamp list than we want, so just
+      # grab that many off the list.
+      push(@tosend, splice @{$timestamps->{$_}}, 0, $wanted);
+    }
+    else {
+      # there are just enough, or not enough entries on the timestamp list to
+      # satisfy our request, so take them all and we'll loop around.
+      push(@tosend, @{$timestamps->{$_}});
+      delete $timestamps->{$_};
+    }
+
+    # Ok, we have enough messages so we can stop now.
+    last if (@tosend == $msgs);
+  }
+
+  # if we still have the temp file with the input messages open, we'll fillup
+  # out message output queue with messages from there.
+  if ($tmpfd) {
+    while (@tosend < $msgs) {
+      my $msg = read_line($tmpfd);
+
+      # no more messages from the temp file, close it out
+      unless ($msg) {
+        delete $msgsout->{'curnum'};
+	close $tmpfd;
+	undef $tmpfd;
+	last;
+      }
+
+      # we got a result, so assign it a number (curnum) and store the data
+      # appropriately, then add the new number to the queue.
+      my $num = $msgsout->{'curnum'}++;
+      $msgsout->{$num}->{'data'} = $msg;
+      push(@tosend, $num);
+    }
+  }
+
+  # ok, at this point, @tosend ought to have a list of numbers, pointers into
+  # %{$msgsout}.  turn that into a tar file.
+  return '' unless @tosend;
+
+  my($gzpath, $gzfd) = Mail::SpamAssassin::Util::secure_tmpfile();
+  die "Can't make tempfile, exiting" unless $gzpath;
+  close($gzfd);
+
+  $gzfd = IO::Zlib->new($gzpath, 'wb') || die "Can't create temp gzip file: $!";
+
+  # first line is the number of messages included in the file
+  send_line($gzfd, scalar @tosend) || die "mass-check: error when writing to gz temp file\n";
+
+  # Generate an archive in the temp file
+  foreach my $num (@tosend) {
+    # Archive format, gzip compressed file w/ 3 parts per message:
+    # 1- server message number in text format
+    # 2- server index string, binary packed format
+    # 3- message content -- unless paths_only
+    send_line($gzfd, $num) || die "mass-check: error when writing to gz temp file\n";
+
+    my $data = $msgsout->{$num}->{'data'};
+    send_line($gzfd, $data) || die "mass-check: error when writing to gz temp file\n";
+
+    if (!$paths_only) {
+      my $msg = ($iter->run_message($data))[4];
+      send_line($gzfd, join('', @{$msg})) ||
+        die "mass-check: error when writing to gz temp file\n";
+    }
+  }
+
+  $gzfd->close;
+
+  # update timestamp entries
+  my $ts = time;
+  foreach (@tosend) {
+    $msgsout->{$_}->{'timestamp'} = $ts;
+  }
+
+  # conveniently, this list should be the only thing sent out w/ this
+  # timestamp, so just set the reference appropriately. :)
+  $timestamps->{$ts} = \@tosend;
+
+  if ($opt_noisy) {
+    print "generated ".scalar(@tosend)." messages\n";
+  }
+
+  return $gzpath;
+}
+
+# we've gotten results posted, so clean up msgsout and timestamp hashes and
+# process result...
+sub handle_post_results {
+  my($postdata, $timestamps, $msgsout) = @_;
+
+  # local version to batch the removals
+  my %timestamps = ();
+
+  # $msgsout->{num}->{data|timestamp}
+  # $timestamp{num} = [ msgout_nums ... ]
+  # $postdata{num} = result_string
+
+  while( my($k,$v) = each %{$postdata} ) {
+    # message run results will be \d+ => log entry
+    next if ($k !~ /^\d+$/);
+
+    # if we've been waiting for this result, process it, otherwise throw it on
+    # the ground.  multiple clients could have been given the same messages to
+    # process, and we take whatever the first responder sends us.
+    if (exists $msgsout->{$k}) {
+      # the result_sub will need parts of the message data, so get it ready
+      my @d = Mail::SpamAssassin::ArchiveIterator::index_unpack($msgsout->{$k}->{'data'});
+
+      # go ahead and do the result
+      &{$iter->{result_sub}}($d[1], $v, $d[0]);
+
+      # prep to get rid of the cached entries
+      $timestamps{$msgsout->{$k}->{'timestamp'}}->{$k} = 1;
+      delete $msgsout->{$k};
+    }
+  }
+
+  # if we got any results, clean out the results from the timestamp arrays
+  while ( my($k,$v) = each %timestamps ) {
+    # trim out the result list from the timestamp sent list
+    my @temp = grep(!exists $v->{$_}, @{$timestamps->{$k}});
+
+    # if there are results left for a specific timestamp, update the array
+    # pointer.  otherwise, delete the timestamp entry since it's empty.
+    if (@temp) {
+      $timestamps->{$k} = \@temp;
+    }
+    else {
+      delete $timestamps->{$k};
+    }
+  }
+}
+
+# This function reads from $tmpfd and processes the message as appropriate wrt
+# $opt_j, $opt_restart, etc.
+#
+sub run_through_messages {
+  # do everything in one process
+  if ($opt_j <= 1 && !defined $opt_restart) {
+    my $message;
+    my $messages;
+    my $total_count = 0;
+
+    while (($total_messages > $total_count) && ($message = read_line($tmpfd))) {
+      my($class, undef, $date, undef, $result) = $iter->run_message($message);
+      if ($result) {
+        &{$iter->{result_sub}}($class, $result, $date);
+      }
+      $total_count++;
+    }
+  }
+  # more than one process or one process with restarts
+  else {
+    my $select = IO::Select->new();
+
+    my $total_count = 0;
+    my $needs_restart = 0;
+    my @child = ();
+    my @pid = ();
+    my $messages;
+
+    # start children processes
+    start_children($opt_j, \@child, \@pid, $select);
+
+    # feed childen, make them work for it, repeat
+    while ($select->count()) {
+      foreach my $socket ($select->can_read()) {
+        my $line = read_line($socket);
+
+        # some error happened during the read!
+        if (!defined $line) {
+          $needs_restart = 1;
+          warn "mass-check: readline failed, attempting to recover\n";
+          $select->remove($socket);
+        }
+        elsif ($line =~ /^([^\0]*)\0RESULT (.+)$/s) {
+	  my $result = $1;
+	  my ($date,$class,$type) = Mail::SpamAssassin::ArchiveIterator::index_unpack($2);
+	  aidbg "mass-check: $class, $type, $date\n";
+
+	  if (defined $opt_restart && ($total_count % $opt_restart) == 0) {
+	    $needs_restart = 1;
+	  }
+
+	  # if messages remain, and we don't need to restart, send message
+	  if (($total_messages > $total_count) && !$needs_restart) {
+	    send_line($socket, read_line($tmpfd));
+	    $total_count++;
+	    aidbg "mass-check: $total_messages $total_count\n";
+	  }
+	  else {
+	    # stop listening on this child since we're done with it
+	    aidbg "mass-check: $needs_restart $total_messages $total_count\n";
+	    $select->remove($socket);
+	  }
+
+	  # deal with the result we received
+	  if ($result) {
+	    &{$iter->{result_sub}}($class, $result, $date);
+	  }
+        }
+        elsif ($line eq "START") {
+	  if ($total_messages > $total_count) {
+	    # we still have messages, send one to child
+	    send_line($socket, read_line($tmpfd));
+	    $total_count++;
+	    aidbg "mass-check: $total_messages $total_count\n";
+	  }
+	  else {
+	    # no more messages, so stop listening on this child
+	    aidbg "mass-check: $needs_restart $total_messages $total_count\n";
+	    $select->remove($socket);
+	  }
+        }
+        else {
+          $needs_restart = 1;
+          warn "mass-check: bad line from readline: $line\n";
+          $select->remove($socket);
+        }
+      }
+
+      aidbg "mass-check: out of loop, $total_messages $total_count $needs_restart ".$select->count()."\n";
+
+      # If there are still messages to process, and we need to restart
+      # the children, and all of the children are idle, let's go ahead.
+      if ($needs_restart && $select->count == 0 && $total_messages > $total_count) {
+        $needs_restart = 0;
+
+        aidbg "mass-check: needs restart, $total_messages total, $total_count done\n";
+        reap_children($opt_j, \@child, \@pid);
+        @child=();
+        @pid=();
+        start_children($opt_j, \@child, \@pid, $select);
+      }
+    }
+
+    # reap children
+    reap_children($opt_j, \@child, \@pid);
+  }
+}
+
+# send an HTTP response to a socket based on the input result, headers, and
+# data values.
+sub http_response {
+  my($socket, $result, $headers, $data) = @_;
+
+  print $socket
+    "HTTP/1.0 $result\r\n",
+    "Pragma: no-cache\r\n",
+    "Server: mass-check/$svn_revision\r\n",
+    map { "$_: ".$headers->{$_}."\r\n" } keys %{$headers};
+  print $socket "\r\n";
+  print $socket $data;
+}
+
+# the client needs to make a request to the server on a given socket.
+sub http_make_request {
+  my($socket, $type, $uri, $headers, $data) = @_;
+
+  print $socket
+    "$type $uri HTTP/1.0\r\n",
+    "User-Agent: mass-check/$svn_revision\r\n",
+    map { "$_: ".$headers->{$_}."\r\n" } keys %{$headers};
+  print $socket "\r\n";
+  print $socket $data;
+
+  # parse the response that the server sends us
+  my $line = $socket->getline() || '';
+  my(undef, $code, $string) = split(/\s+/, $line, 3);
+  return unless $code == 200;
+
+  my %headers = ();
+  do {
+    $line = $socket->getline();
+    last unless defined $line;
+    $line =~ s/\r\n$//;
+
+    if ($line) {
+      my ($k,$v) = split(/:\s*/, $line, 2);
+      $headers{lc $k} = $v;
+    }
+  } while ($line !~ /^$/);
+
+  # the server has sent us notification that it's going to exit, so let's
+  # follow suit.
+  return 'finished' if ($headers{'finished'});
+
+  my $gzpath = '';
+  if ($headers{'content-length'}) {
+    my $gzfd;
+    ($gzpath, $gzfd) = Mail::SpamAssassin::Util::secure_tmpfile();
+    die "Can't make tempfile, exiting" unless $gzpath;
+
+    my $rd;
+    $socket->read($rd, $headers{'content-length'}) || die "mass-check: error reading in data from server\n";
+    print $gzfd $rd;
+    close $gzfd;
+  }
+
+  $socket->close();
+  return $gzpath;
+}
+
+# Be conservative -- encode most things.
+# we could encode spaces to plusses, then decode that later, but...
+sub post_encode {
+  my $string = shift;
+  $string =~  s/([^a-zA-Z0-9_,.\/\\-])/sprintf "%%%02x",unpack("C",$1)/egx;
+  return $string;
+}
+
+# remove all of the files in a given directory, non-recursive
+sub clean_dir {
+  my $dir = shift;
+
+  unless (opendir(DIR, $dir)) {
+    warn "error: can't opendir $dir: $!\n";
+    return;
+  }
+  while(my $file = readdir(DIR)) {
+    $file =~ /^(.+)$/;       # untaint
+    $file = $1;
+
+    my $path = File::Spec->catfile($dir, $file);
+    next unless (-f $path);
+
+    if (!unlink $path) {
+      warn "error: can't remove file $path: $!\n";
+      closedir(DIR);
+      return;
+    }
+  }
+  closedir(DIR);
+  return 1;
+}
+
+############################################################################
+
+# four bytes in network/vax format (little endian) as length of message
+# the rest is the actual message
+
+sub read_line {
+  my $fd = shift;
+  my($length,$msg);
+
+  # read in the 4 byte length and unpack
+  $fd->read($length, 4) || return;
+
+  $length = unpack("V", $length);
+  return unless $length;
+
+  # read in the rest of the single message
+  $fd->read($msg, $length) || return;
+
+  return $msg;
+}
+
+sub send_line {
+  my $fd = shift;
+  foreach ( @_ ) {
+    my $length = pack("V", length $_);
+    $fd->print($length.$_) || return 0;
+  }
+
+  return 1;
+}
+
+############################################################################
+
+# this is the function that implemented server mode.  basically, sit and wait
+# for connections to come in.  when a client sends in a request, deal with any
+# results that the client sent, then generate a response and send it back,
+# and then go back to waiting.  lather, rinse, repeat.
+sub server_mode {
+  $opt_cs_max ||= 1000;
+  $opt_cs_timeout ||= 60 * 5;
+
+  my $serv_socket = IO::Socket::INET->new(
+    LocalAddr => $opt_server,
+    Proto => 'tcp',
+    Listen => 5,
+    ReuseAddr => 1,
+  );
+
+  die "Could not create socket: $!\n" unless $serv_socket;
+
+  if ($opt_progress) {
+    status('server ready for connections');
+  }
+
+  # Setup out "what messages have been sent out" hashes
+  my $timestamps = {};
+  my $msgsout = { 'curnum' => 0 };
+
+  # Generate an IO::Select object and put the server socket on the queue
+  my $select = IO::Select->new( $serv_socket );
+
+  # We'll keep looping while there's something to pay attention to
+  while ($select->count()) {
+    # Sit and block until there's something for us to read from
+    foreach my $socket ($select->can_read()) {
+      if ($socket == $serv_socket) {
+        # it's the server socket, go ahead and accept the connection and add
+	# it to the queue.
+        $select->add($serv_socket->accept);
+      }
+      else {
+	# it's some client, so deal with the request
+	my($type, $URI, $headers, $postdata) = handle_http_request($socket);
+
+	# we don't do GET, so just send something back
+	if ($type eq 'GET') {
+	  if ($opt_noisy) {
+	    print "GET request from ".$socket->peerhost."\n";
+	  }
+
+	  http_response($socket, "200 OK", {
+	      'Content-type' => 'text/plain',
+	    },
+	    "Your GET request came from IP Address: ".$socket->peerhost."\n");
+	}
+        elsif ($type eq 'POST') {
+	  # ooh, POST.  deal with any results that the client sent
+	  handle_post_results($postdata, $timestamps, $msgsout);
+
+	  if ($opt_noisy) {
+	    print "POST request from ".$socket->peerhost."\n";
+	  }
+
+          # based on the number of messages that the client requested,
+	  # generate a gzip file with the appropriate data in it
+	  my $messages = '';
+	  if ($postdata->{'max_messages'}) {
+	    my $msgnum = $postdata->{'max_messages'};
+	    if ($msgnum > $opt_cs_max || $msgnum < 1) {
+	      $msgnum = $opt_cs_max;
+	    }
+
+	    if ($opt_noisy) {
+	      print "client requested ".$postdata->{'max_messages'}." messages\n";
+	    }
+
+	    $messages = generate_messages($msgnum, $timestamps, $msgsout, $postdata->{'paths_only'});
+	  }
+
+          # $messages will contain the path to the gzip file if there are
+	  # messages to send out.
+          if ($messages && open(MSG, $messages)) {
+	    binmode(MSG);
+	    local $/ = undef;  # go go slurp mode
+
+	    # send the response
+	    http_response($socket, "200 OK", {
+	      'Content-Type' => 'application/x-gzip',
+	      'Content-Encoding' => 'x-gzip',
+	      "Content-Length" => (-s $messages),
+	      },
+	      scalar <MSG>);
+
+	    close(MSG);
+
+	    # we don't need the file anymore, so get rid of it
+	    unlink $messages;
+          }
+	  elsif (!keys %{$msgsout} && !defined $tmpfd) {
+	    # we have no more outstanding messages and our original queue of
+	    # messages to process is empty, so tell the client to exit.
+	    http_response($socket, "200 OK", {
+              "Content-type" => "text/plain",
+	      "Finished" => 1,
+	      },
+	      'We are all done');
+	  }
+	  else {
+	    # when in doubt, treat this like a GET
+	    http_response($socket, "200 OK", {
+              "Content-type" => "text/plain",
+	      },
+              "Your POST request (sans max_messages) came from IP Address: ".$socket->peerhost."\n");
+	  }
+	}
+	else {
+          # for error, "501 Not Implemented"
+	  http_response($socket, '501 Not Implemented', {}, '');
+	}
+      
+	# ok, we don't do keepalive, so get rid of the socket
+        $select->remove($socket);
+	$socket->close;
+      }
+    }
+
+    if ($opt_noisy) {
+      print scalar(keys %{$msgsout})." messages outstanding\n";
+    }
+
+
+#print "msgs waiting: ".join(" ", keys %{$msgsout})."\n";
+#print "tmpfd defined? ".(defined $tmpfd ? "yes" : "no")."\n";
+
+    # we're not awaiting responses and we've exhausted the input file, so
+    # drop the server socket. :)
+    $select->remove($serv_socket) if (!keys %{$msgsout} && !defined $tmpfd);
+  }
+}
+
+# this is the function that implements client mode.  generally, in a loop:
+#  make a request of the server for some max number of messages, and send our
+#  results back at the same time.  based on the results of that request, put
+#  messages into a temp dir and process them.  prep the results and loop.
+#  lather, rinse, repeat.
+sub client_mode {
+  $opt_cs_max ||= 1000;
+  $opt_cs_timeout ||= 60 * 2;
+
+  my($host, $uri);
+
+  if ($opt_client =~ /^http:\/\/([^\/]+)(\/.*)?/) {
+    ($host, $uri) = ($1,$2);
+  }
+  else {
+    $host = $opt_client;
+    if ($host =~ /^:/) {
+      $host = 'localhost'.$host;
+    }
+  }
+  my($http_host) = split(/:/, $host);
+
+  die "No host found in opt_client" unless $host;
+  $uri ||= "/";
+
+  # use this to track how many messages we ought to be requesting
+  my $msgnum = $opt_cs_max;
+
+  my $tmpdir;
+
+  # if we're not doing paths_only, create a temp dir where we'll put the
+  # incoming messages to process.
+  if (!$opt_cs_paths_only) {
+    $tmpdir = Mail::SpamAssassin::Util::secure_tmpdir();
+    die "Can't create tempdir" unless $tmpdir;
+  }
+
+  # keep going until something stops us.
+  while (1) {
+    # if the number of messages to request is too much, bring it down
+    $msgnum = $opt_cs_max if ($msgnum > $opt_cs_max);
+
+    # prep the POST request
+    $postdata{'max_messages'} = $msgnum;
+    $postdata{'paths_only'} = 1 if ($opt_cs_paths_only);
+
+    # the actual POST data string
+    my $POSTDATA = join('&', map { post_encode($_) . '=' . post_encode($postdata{$_}) } keys %postdata);
+
+    # connect to server
+    my $socket = IO::Socket::INET->new($host);
+
+    # last if connection fails
+    last unless ($socket);
+
+    print "Requesting $msgnum messages from server\n" if ($opt_noisy);
+
+    # make request, include and then drop results if there are any
+    my $result = http_make_request($socket, 'POST', $uri, {
+      'Host'		=> $http_host,
+      'Content-Type'	=> 'application/x-www-form-urlencoded',
+      'Content-Length'	=> length($POSTDATA),
+      },
+      $POSTDATA
+    );
+    %postdata = ();
+    undef $POSTDATA;
+
+    # If we received messages to run through, go ahead and do it.
+    # otherwise, just sleep for the timeout length and try again
+    if (!defined $result) {
+      # we got an error?!?  abort!
+      last;
+    }
+    elsif ($result eq 'finished') {
+      # the server said that we're done
+      print "Server states that there is no more work, exiting.\n" if ($opt_noisy);
+      last;
+    }
+    elsif ($result eq '') {
+      # no messages means the server may give us more work down the road.
+      # sleep for client_timeout seconds and try the request again
+      print "Received no messages from server, waiting $opt_cs_timeout seconds\n" if ($opt_noisy);
+      sleep $opt_cs_timeout;
+    }
+    else {
+      # we got messages, so deal with them.
+      my $time_start = time;
+
+      # postdata will hold our results, real will hold the original message
+      # data from the server's scan mode.
+      %postdata = ();
+      %real = ();
+      $init_results = $total_count = $spam_count = $ham_count = 0;
+
+      # we got a result, so do things with it!
+      my $gzfd = IO::Zlib->new($result, "rb");
+      die "Can't open temp result file: $!" unless $gzfd;
+
+      # used for the temp queue file
+      my $tmppath;
+      ($tmppath, $tmpfd) = Mail::SpamAssassin::Util::secure_tmpfile();
+      die "Can't make tempfile, exiting" unless $tmppath;
+      unlink $tmppath;
+
+      # if we have a temp directory, clean it out for this run
+      clean_dir($tmpdir) if ($tmpdir);
+
+      # Archive format, gzip compressed file w/ 3 parts per message:
+      # 1- server message number in text format
+      # 2- server index string, binary packed format
+      # 3- message content, if not doing paths_only
+
+      # number of messages
+      $msgnum = $total_messages = read_line($gzfd) || die "mass-check: error reading from gzip message file\n";
+
+      status("server gave us $total_messages messages") if ($opt_progress);
+
+      # loop through and prep all of the messages the server sent
+      for(my $i = 0 ; $i < $total_messages; $i++ ) {
+        my $num = read_line($gzfd);
+	last unless defined $num;
+
+        my $index = read_line($gzfd);
+	last unless defined $index;
+
+	# if we're doing paths_only, there'll be no message content
+	if (!$opt_cs_paths_only) {
+	  my $msg = read_line($gzfd);
+	  last unless defined $msg;
+
+	  # it's going to be a dir of file formatted messages
+	  if (open(OUT, ">$tmpdir/$num")) {
+	    print OUT $msg;
+	    close(OUT);
+
+	    # this is a little tricky -- we need to process the files in the
+	    # path and format we've created, but the original data is needed
+	    # to create a proper result later, so deal with that here.
+	    my @d = Mail::SpamAssassin::ArchiveIterator::index_unpack($index);
+	    $real{"$tmpdir/$num"} = \@d;
+            send_line($tmpfd,
+	      Mail::SpamAssassin::ArchiveIterator::index_pack($d[0], $d[1], 'f', "$tmpdir/$num")) ||
+	        die "mass-check: error writing out temp file in client mode\n";
+	  }
+	  else {
+	    warn "Can't create/write $tmpdir/$num: $!";
+	  }
+	}
+	else {
+	  # in paths_only mode, there's no kluging between formats since we're
+	  # reading the same corpus, however we do still need to track server
+	  # message number to message data so our results will be useable.
+	  my @d = Mail::SpamAssassin::ArchiveIterator::index_unpack($index);
+	  $real{$d[3]} = $num;
+	  send_line($tmpfd, $index) ||
+	        die "mass-check: error writing out temp file in client mode\n";
+	}
+      }
+
+      $gzfd->close;
+      unlink $result;
+
+      if ($opt_progress) {
+        status('starting run stage');
+      }
+
+      # we're about to start running, so go back to the start of the file
+      seek $tmpfd, 0, 0;
+
+      run_through_messages();
+
+      # we're done with the temp file -- bye bye
+      close($tmpfd);
+
+      # figure out new max messages, try keeping ~cs_timeout between runs
+      my $time_end = time;
+
+      # if we only requested a small number of messages, it may take <1s to
+      # run through them, so fake it and say it took 1s.
+      if ($time_end == $time_start) {
+        $time_end++;
+      }
+
+      if ($opt_progress) {
+        status('completed run stage');
+      }
+
+      print "Completed run in ".($time_end-$time_start)." seconds\n" if ($opt_noisy);
+      $msgnum = int($msgnum * $opt_cs_timeout / ($time_end-$time_start)) || 1;
+    }
+  }
+
+  # if we were using a temp dir, clean it out and then remove it
+  if ($tmpdir) {
+    clean_dir($tmpdir);
+    rmdir $tmpdir;
+  }
+}
+
+############################################################################
+
+# in server mode, just return the ref to the message data
+sub wanted_server {
+  my ($class, $id, $time, $dataref, $format) = @_;
+  return $dataref;
+}
+
+# very similar to result() except the result has the message number at the
+# front, so strip it off and then set the POST data appropriately.
+sub result_client {
+  my ($class, $result, $time) = @_;
+
+  # don't open results files until we get here to avoid overwriting files
+  init_results() if !$init_results;
+
+  if ($class eq "s") {
+    $spam_count++;
+  }
+  elsif ($class eq "h") {
+    $ham_count++;
+  }
+
+  $total_count++;
+
+  if ($opt_progress) {
+    progress($time);
+  }
+
+  if ($result =~ s/^(\d+)\s+//m) {
+    $postdata{$1} = $result;
+  }
+  else {
+    warn ">> WTH!?  result is not in the correct format: $result\n";
+  }
+}
+
+sub aidbg {
+  if (would_log("dbg", "mass-check") == 2) {
+    dbg (@_);
+  }
 }

Modified: spamassassin/branches/jm_re2c_hacks/masses/mboxget
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/mboxget?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/mboxget (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/mboxget Fri Oct  6 05:46:56 2006
@@ -9,11 +9,12 @@
 #   grep SUBJECT_FREQ spam.log | ./mboxget | grep Subject:
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs Fri Oct  6 05:46:56 2006
@@ -7,11 +7,12 @@
 # for details on ROC curves.
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/overlap
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/overlap?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/overlap (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/overlap Fri Oct  6 05:46:56 2006
@@ -3,11 +3,12 @@
 # overlap - print overlap between test pairs
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 

Modified: spamassassin/branches/jm_re2c_hacks/masses/parse-rules-for-masses
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/parse-rules-for-masses?view=diff&rev=453586&r1=453585&r2=453586
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/parse-rules-for-masses (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/parse-rules-for-masses Fri Oct  6 05:46:56 2006
@@ -1,11 +1,12 @@
 #!/usr/bin/perl -w
 #
 # <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
 # 
 #     http://www.apache.org/licenses/LICENSE-2.0
 #