You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/11/13 13:05:25 UTC

svn commit: r474270 - in /spamassassin/trunk: MANIFEST lib/Mail/SpamAssassin/Conf.pm lib/Mail/SpamAssassin/Conf/Parser.pm lib/Mail/SpamAssassin/Util/TieOneStringHash.pm

Author: jm
Date: Mon Nov 13 04:05:24 2006
New Revision: 474270

URL: http://svn.apache.org/viewvc?view=rev&rev=474270
Log:
remove the descriptions_str hack; instead, use a tie() class, Mail/SpamAssassin/Util/TieOneStringHash, for descriptions.  This class facades a single string with a hash interface, providing a slow but very memory-efficient hash-like structure, perfect for descriptions

Added:
    spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
Modified:
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?view=diff&rev=474270&r1=474269&r2=474270
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Mon Nov 13 04:05:24 2006
@@ -110,6 +110,7 @@
 lib/Mail/SpamAssassin/Util/DependencyInfo.pm
 lib/Mail/SpamAssassin/Util/Progress.pm
 lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
+lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
 lib/spamassassin-run.pod
 masses/CORPUS_POLICY
 masses/CORPUS_SUBMIT

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?view=diff&rev=474270&r1=474269&r2=474270
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Nov 13 04:05:24 2006
@@ -74,6 +74,7 @@
 use Mail::SpamAssassin::Constants qw(:sa);
 use Mail::SpamAssassin::Conf::Parser;
 use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Util::TieOneStringHash;
 use File::Spec;
 
 use strict;
@@ -2594,7 +2595,6 @@
   $self->{plugins_loaded} = { };
 
   $self->{tests} = { };
-  $self->{descriptions} = { };
   $self->{test_types} = { };
   $self->{scoreset} = [ {}, {}, {}, {} ];
   $self->{scoreset_current} = 0;
@@ -2602,6 +2602,11 @@
   $self->{tflags} = { };
   $self->{source_file} = { };
 
+  # keep descriptions in a slow but space-efficient single-string
+  # data structure
+  tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
+    or warn "tie failed";
+
   # after parsing, tests are refiled into these hashes for each test type.
   # this allows e.g. a full-text test to be rewritten as a body test in
   # the user's user_prefs file.
@@ -2895,11 +2900,7 @@
 
 sub get_description_for_rule {
   my ($self, $rule) = @_;
-  if ($self->{descriptions_str} =~ /^\Q${rule}\E:(.*?)$/m) {
-    return $1;
-  } else {
-    return;
-  }
+  return $self->{descriptions}->{$rule};
 }
 
 ###########################################################################
@@ -3060,6 +3061,7 @@
 
 sub finish {
   my ($self) = @_;
+  untie %{$self->{descriptions}};
   %{$self} = ();
 }
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?view=diff&rev=474270&r1=474269&r2=474270
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Nov 13 04:05:24 2006
@@ -669,7 +669,6 @@
 
   $self->trace_meta_dependencies();
   $self->fix_priorities();
-  $self->compact_descriptions();
 
   dbg("conf: finish parsing");
 
@@ -835,24 +834,6 @@
       }
     }
   }
-}
-
-# compact the {descriptions} hash into a single string; by using
-# a string, quite a lot of RAM is freed up in exchange for slightly
-# slower lookup time
-sub compact_descriptions {
-  my ($self) = @_;
-  my $conf = $self->{conf};
-
-  my $descs = '';
-  my ($k, $v);
-  while (($k, $v) = each %{$conf->{descriptions}})
-  {
-    $descs .= "$k:$v\n";
-  }
-
-  delete $conf->{descriptions};
-  $conf->{descriptions_str} = $descs;
 }
 
 ###########################################################################

Added: spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm?view=auto&rev=474270
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm Mon Nov 13 04:05:24 2006
@@ -0,0 +1,126 @@
+# A memory-efficient, but slow, single-string structure with a hash interface.
+
+# <@LICENSE>
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+package Mail::SpamAssassin::Util::TieOneStringHash;
+
+use strict;
+use warnings;
+use Carp qw(croak);
+
+our @ISA = qw();
+
+# the structure is pretty simple: it's a single string, containing
+# items like so:
+#
+#    \n KEY 0x00 VALUE 0x00 \n
+#    \n KEY2 0x00 VALUE2 0x00 \n
+#    ...
+#
+# undef values are represented using $UNDEF_VALUE, a hacky magic string.
+# Only simple scalars can be stored; refs of any kind produce a croak().
+#
+# writes are slowest, reads are slow, but memory usage is very low
+# compared to a "real" hash table -- in other words, this is perfect
+# for infrequently-read data that has to be kept around but should
+# not affect memory usage as little as possible.
+
+my $UNDEF_VALUE = "_UNDEF_\001";
+
+###########################################################################
+
+sub TIEHASH {
+  my $class = shift;
+  my $str = '';
+  return bless \$str, $class;
+}
+
+sub STORE {
+  my ($store, $k, $v) = @_;
+  $v = $UNDEF_VALUE unless defined($v);
+
+  if (ref $v) {
+    croak "oops! only simple scalars can be stored in a TieOneStringHash";
+  }
+
+  if ($$store !~ s{\n\Q$k\E\000.*?\000\n}
+                  {\n$k\000$v\000\n}xgs)
+  {
+    $$store .= "\n$k\000$v\000\n";
+  }
+  1;
+}
+
+sub FETCH {
+  my ($store, $k) = @_;
+  if ($$store =~ m{\n\Q$k\E\000(.*?)\000\n}xs)
+  {
+    return $1;
+  }
+  return;
+}
+
+sub EXISTS {
+  my ($store, $k) = @_;
+  if ($$store =~ m{\n\Q$k\E\000}xs)
+  {
+    return 1;
+  }
+  return;
+}
+
+sub DELETE {
+  my ($store, $k) = @_;
+  if ($$store =~ s{\n\Q$k\E\000(.*?)\000\n}
+                  {}xgs)
+  {
+    return $1;
+  }
+  return;
+}
+
+sub FIRSTKEY {
+  my ($store) = @_;
+  if ($$store =~ m{^\n(.*?)\000}s)
+  {
+    return $1;
+  }
+  return;
+}
+
+sub NEXTKEY {
+  my ($store, $lastk) = @_;
+  if ($$store =~ m{\n\Q$lastk\E\000.*?\000\n
+                   \n(.*?)\000}xs)
+  {
+    return $1;
+  }
+  return;
+}
+
+sub CLEAR {
+  my ($store) = @_;
+  $$store = '';
+}
+
+sub SCALAR {
+  my ($store) = @_;
+  return $$store;       # as a string!
+}
+
+1;