You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by rj...@apache.org on 2014/03/31 21:05:53 UTC
svn commit: r1583403 - in /tomcat/jk/trunk: native/common/jk_map.c
xdocs/miscellaneous/changelog.xml
Author: rjung
Date: Mon Mar 31 19:05:52 2014
New Revision: 1583403
URL: http://svn.apache.org/r1583403
Log:
BZ 56297: Improve key hash function.
Copied from APR.
Modified:
tomcat/jk/trunk/native/common/jk_map.c
tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
Modified: tomcat/jk/trunk/native/common/jk_map.c
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_map.c?rev=1583403&r1=1583402&r2=1583403&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_map.c (original)
+++ tomcat/jk/trunk/native/common/jk_map.c Mon Mar 31 19:05:52 2014
@@ -36,32 +36,51 @@
#define JK_MAP_REFERENCE (".reference")
#define JK_MAP_REFERENCE_SZ (strlen(JK_MAP_REFERENCE))
-/* Compute the "checksum" for a key, consisting of the first
- * 4 bytes, packed into an int.
- * This checksum allows us to do a single integer
- * comparison as a fast check to determine whether we can
- * skip a strcmp
+/* Taken from APR tables/apr_hash.c */
+/*
+ * This is the popular `times 33' hash algorithm which is used by
+ * perl and also appears in Berkeley DB. This is one of the best
+ * known hash functions for strings because it is both computed
+ * very fast and distributes very well.
+ *
+ * The originator may be Dan Bernstein but the code in Berkeley DB
+ * cites Chris Torek as the source. The best citation I have found
+ * is "Chris Torek, Hash function for text in C, Usenet message
+ * <27...@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
+ * Salz's USENIX 1992 paper about INN which can be found at
+ * <http://citeseer.nj.nec.com/salz92internetnews.html>.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as I did while writing a low-level
+ * data structure library some time ago) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well.
+ * They all distribute in an acceptable way and this way fill a hash
+ * table with an average percent of approx. 86%.
+ *
+ * If one compares the chi^2 values of the variants (see
+ * Bob Jenkins ``Hashing Frequently Asked Questions'' at
+ * http://burtleburtle.net/bob/hash/hashfaq.html for a description
+ * of chi^2), the number 33 not even has the best value. But the
+ * number 33 and a few other equally good numbers like 17, 31, 63,
+ * 127 and 129 have nevertheless a great advantage to the remaining
+ * numbers in the large set of possible multipliers: their multiply
+ * operation can be replaced by a faster operation based on just one
+ * shift plus either a single addition or subtraction operation. And
+ * because a hash function has to both distribute good _and_ has to
+ * be very fast to compute, those few numbers should be preferred.
+ *
+ * -- Ralf S. Engelschall <rs...@engelschall.com>
*/
-#define COMPUTE_KEY_CHECKSUM(key, checksum) \
-{ \
- const char *k = (key); \
- unsigned int c = (unsigned int)*k; \
- (checksum) = c; \
- (checksum) <<= 8; \
- if (c) { \
- c = (unsigned int)*++k; \
- checksum |= c; \
- } \
- (checksum) <<= 8; \
- if (c) { \
- c = (unsigned int)*++k; \
- checksum |= c; \
- } \
- (checksum) <<= 8; \
- if (c) { \
- c = (unsigned int)*++k; \
- checksum |= c; \
- } \
+#define COMPUTE_KEY_HASH(key, hash) \
+{ \
+ const unsigned char *p; \
+ hash = 0; \
+ for (p = (const unsigned char *)key; *p; p++) { \
+ hash = hash * 33 + *p; \
+ } \
}
static volatile int global_map_id = 0;
@@ -130,10 +149,10 @@ void *jk_map_get(jk_map_t *m, const char
if (m && name) {
unsigned int i;
- unsigned int key;
- COMPUTE_KEY_CHECKSUM(name, key)
+ unsigned int hash;
+ COMPUTE_KEY_HASH(name, hash)
for (i = 0; i < m->size; i++) {
- if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+ if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
rc = m->values[i];
break;
}
@@ -148,10 +167,10 @@ int jk_map_get_id(jk_map_t *m, const cha
int rc = -1;
if (m && name) {
unsigned int i;
- unsigned int key;
- COMPUTE_KEY_CHECKSUM(name, key)
+ unsigned int hash;
+ COMPUTE_KEY_HASH(name, hash)
for (i = 0; i < m->size; i++) {
- if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+ if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
rc = i;
break;
}
@@ -167,10 +186,10 @@ const char *jk_map_get_string(jk_map_t *
if (m && name) {
unsigned int i;
- unsigned int key;
- COMPUTE_KEY_CHECKSUM(name, key)
+ unsigned int hash;
+ COMPUTE_KEY_HASH(name, hash)
for (i = 0; i < m->size; i++) {
- if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+ if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
rc = m->values[i];
break;
}
@@ -343,14 +362,14 @@ int jk_map_add(jk_map_t *m, const char *
int rc = JK_FALSE;
if (m && name) {
- unsigned int key;
- COMPUTE_KEY_CHECKSUM(name, key)
+ unsigned int hash;
+ COMPUTE_KEY_HASH(name, hash)
map_realloc(m);
if (m->size < m->capacity) {
m->values[m->size] = value;
m->names[m->size] = jk_pool_strdup(&m->p, name);
- m->keys[m->size] = key;
+ m->keys[m->size] = hash;
m->size++;
rc = JK_TRUE;
}
@@ -365,10 +384,10 @@ int jk_map_put(jk_map_t *m, const char *
if (m && name) {
unsigned int i;
- unsigned int key;
- COMPUTE_KEY_CHECKSUM(name, key)
+ unsigned int hash;
+ COMPUTE_KEY_HASH(name, hash)
for (i = 0; i < m->size; i++) {
- if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+ if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
break;
}
}
Modified: tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml?rev=1583403&r1=1583402&r2=1583403&view=diff
==============================================================================
--- tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml (original)
+++ tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Mon Mar 31 19:05:52 2014
@@ -48,8 +48,12 @@
Fix status worker display of worker IP address after name or port
was changed. (rjung)
</fix>
+ <update>
+ <bug>56297</bug>: Improve key hash function. Copied from APR. (rjung)
+ </update>
</changelog>
</subsection>
+</section>
<section name="Changes between 1.2.37 and 1.2.39">
<br />
<subsection name="Native">
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org