You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/11/13 13:05:25 UTC
svn commit: r474270 - in /spamassassin/trunk: MANIFEST
lib/Mail/SpamAssassin/Conf.pm lib/Mail/SpamAssassin/Conf/Parser.pm
lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
Author: jm
Date: Mon Nov 13 04:05:24 2006
New Revision: 474270
URL: http://svn.apache.org/viewvc?view=rev&rev=474270
Log:
remove the descriptions_str hack; instead, use a tie() class, Mail/SpamAssassin/Util/TieOneStringHash, for descriptions. This class facades a single string with a hash interface, providing a slow but very memory-efficient hash-like structure, perfect for descriptions
Added:
spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?view=diff&rev=474270&r1=474269&r2=474270
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Mon Nov 13 04:05:24 2006
@@ -110,6 +110,7 @@
lib/Mail/SpamAssassin/Util/DependencyInfo.pm
lib/Mail/SpamAssassin/Util/Progress.pm
lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
+lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
lib/spamassassin-run.pod
masses/CORPUS_POLICY
masses/CORPUS_SUBMIT
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?view=diff&rev=474270&r1=474269&r2=474270
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Nov 13 04:05:24 2006
@@ -74,6 +74,7 @@
use Mail::SpamAssassin::Constants qw(:sa);
use Mail::SpamAssassin::Conf::Parser;
use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Util::TieOneStringHash;
use File::Spec;
use strict;
@@ -2594,7 +2595,6 @@
$self->{plugins_loaded} = { };
$self->{tests} = { };
- $self->{descriptions} = { };
$self->{test_types} = { };
$self->{scoreset} = [ {}, {}, {}, {} ];
$self->{scoreset_current} = 0;
@@ -2602,6 +2602,11 @@
$self->{tflags} = { };
$self->{source_file} = { };
+ # keep descriptions in a slow but space-efficient single-string
+ # data structure
+ tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
+ or warn "tie failed";
+
# after parsing, tests are refiled into these hashes for each test type.
# this allows e.g. a full-text test to be rewritten as a body test in
# the user's user_prefs file.
@@ -2895,11 +2900,7 @@
sub get_description_for_rule {
my ($self, $rule) = @_;
- if ($self->{descriptions_str} =~ /^\Q${rule}\E:(.*?)$/m) {
- return $1;
- } else {
- return;
- }
+ return $self->{descriptions}->{$rule};
}
###########################################################################
@@ -3060,6 +3061,7 @@
sub finish {
my ($self) = @_;
+ untie %{$self->{descriptions}};
%{$self} = ();
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?view=diff&rev=474270&r1=474269&r2=474270
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Nov 13 04:05:24 2006
@@ -669,7 +669,6 @@
$self->trace_meta_dependencies();
$self->fix_priorities();
- $self->compact_descriptions();
dbg("conf: finish parsing");
@@ -835,24 +834,6 @@
}
}
}
-}
-
-# compact the {descriptions} hash into a single string; by using
-# a string, quite a lot of RAM is freed up in exchange for slightly
-# slower lookup time
-sub compact_descriptions {
- my ($self) = @_;
- my $conf = $self->{conf};
-
- my $descs = '';
- my ($k, $v);
- while (($k, $v) = each %{$conf->{descriptions}})
- {
- $descs .= "$k:$v\n";
- }
-
- delete $conf->{descriptions};
- $conf->{descriptions_str} = $descs;
}
###########################################################################
Added: spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm?view=auto&rev=474270
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util/TieOneStringHash.pm Mon Nov 13 04:05:24 2006
@@ -0,0 +1,126 @@
+# A memory-efficient, but slow, single-string structure with a hash interface.
+
+# <@LICENSE>
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+package Mail::SpamAssassin::Util::TieOneStringHash;
+
+use strict;
+use warnings;
+use Carp qw(croak);
+
+our @ISA = qw();
+
+# the structure is pretty simple: it's a single string, containing
+# items like so:
+#
+# \n KEY 0x00 VALUE 0x00 \n
+# \n KEY2 0x00 VALUE2 0x00 \n
+# ...
+#
+# undef values are represented using $UNDEF_VALUE, a hacky magic string.
+# Only simple scalars can be stored; refs of any kind produce a croak().
+#
+# writes are slowest, reads are slow, but memory usage is very low
+# compared to a "real" hash table -- in other words, this is perfect
+# for infrequently-read data that has to be kept around but should
+# not affect memory usage as little as possible.
+
+my $UNDEF_VALUE = "_UNDEF_\001";
+
+###########################################################################
+
+sub TIEHASH {
+ my $class = shift;
+ my $str = '';
+ return bless \$str, $class;
+}
+
+sub STORE {
+ my ($store, $k, $v) = @_;
+ $v = $UNDEF_VALUE unless defined($v);
+
+ if (ref $v) {
+ croak "oops! only simple scalars can be stored in a TieOneStringHash";
+ }
+
+ if ($$store !~ s{\n\Q$k\E\000.*?\000\n}
+ {\n$k\000$v\000\n}xgs)
+ {
+ $$store .= "\n$k\000$v\000\n";
+ }
+ 1;
+}
+
+sub FETCH {
+ my ($store, $k) = @_;
+ if ($$store =~ m{\n\Q$k\E\000(.*?)\000\n}xs)
+ {
+ return $1;
+ }
+ return;
+}
+
+sub EXISTS {
+ my ($store, $k) = @_;
+ if ($$store =~ m{\n\Q$k\E\000}xs)
+ {
+ return 1;
+ }
+ return;
+}
+
+sub DELETE {
+ my ($store, $k) = @_;
+ if ($$store =~ s{\n\Q$k\E\000(.*?)\000\n}
+ {}xgs)
+ {
+ return $1;
+ }
+ return;
+}
+
+sub FIRSTKEY {
+ my ($store) = @_;
+ if ($$store =~ m{^\n(.*?)\000}s)
+ {
+ return $1;
+ }
+ return;
+}
+
+sub NEXTKEY {
+ my ($store, $lastk) = @_;
+ if ($$store =~ m{\n\Q$lastk\E\000.*?\000\n
+ \n(.*?)\000}xs)
+ {
+ return $1;
+ }
+ return;
+}
+
+sub CLEAR {
+ my ($store) = @_;
+ $$store = '';
+}
+
+sub SCALAR {
+ my ($store) = @_;
+ return $$store; # as a string!
+}
+
+1;