You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by rj...@apache.org on 2014/03/31 21:05:53 UTC

svn commit: r1583403 - in /tomcat/jk/trunk: native/common/jk_map.c xdocs/miscellaneous/changelog.xml

Author: rjung
Date: Mon Mar 31 19:05:52 2014
New Revision: 1583403

URL: http://svn.apache.org/r1583403
Log:
BZ 56297: Improve key hash function.
Copied from APR.

Modified:
    tomcat/jk/trunk/native/common/jk_map.c
    tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml

Modified: tomcat/jk/trunk/native/common/jk_map.c
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_map.c?rev=1583403&r1=1583402&r2=1583403&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_map.c (original)
+++ tomcat/jk/trunk/native/common/jk_map.c Mon Mar 31 19:05:52 2014
@@ -36,32 +36,51 @@
 #define JK_MAP_REFERENCE    (".reference")
 #define JK_MAP_REFERENCE_SZ (strlen(JK_MAP_REFERENCE))
 
-/* Compute the "checksum" for a key, consisting of the first
- * 4 bytes, packed into an int.
- * This checksum allows us to do a single integer
- * comparison as a fast check to determine whether we can
- * skip a strcmp
+/* Taken from APR tables/apr_hash.c */
+/*
+ * This is the popular `times 33' hash algorithm which is used by
+ * perl and also appears in Berkeley DB. This is one of the best
+ * known hash functions for strings because it is both computed
+ * very fast and distributes very well.
+ *
+ * The originator may be Dan Bernstein but the code in Berkeley DB
+ * cites Chris Torek as the source. The best citation I have found
+ * is "Chris Torek, Hash function for text in C, Usenet message
+ * <27...@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
+ * Salz's USENIX 1992 paper about INN which can be found at
+ * <http://citeseer.nj.nec.com/salz92internetnews.html>.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as I did while writing a low-level
+ * data structure library some time ago) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well.
+ * They all distribute in an acceptable way and this way fill a hash
+ * table with an average percent of approx. 86%.
+ *
+ * If one compares the chi^2 values of the variants (see
+ * Bob Jenkins ``Hashing Frequently Asked Questions'' at
+ * http://burtleburtle.net/bob/hash/hashfaq.html for a description
+ * of chi^2), the number 33 not even has the best value. But the
+ * number 33 and a few other equally good numbers like 17, 31, 63,
+ * 127 and 129 have nevertheless a great advantage to the remaining
+ * numbers in the large set of possible multipliers: their multiply
+ * operation can be replaced by a faster operation based on just one
+ * shift plus either a single addition or subtraction operation. And
+ * because a hash function has to both distribute good _and_ has to
+ * be very fast to compute, those few numbers should be preferred.
+ *
+ *                  -- Ralf S. Engelschall <rs...@engelschall.com>
  */
-#define COMPUTE_KEY_CHECKSUM(key, checksum)    \
-{                                              \
-    const char *k = (key);                     \
-    unsigned int c = (unsigned int)*k;         \
-    (checksum) = c;                            \
-    (checksum) <<= 8;                          \
-    if (c) {                                   \
-        c = (unsigned int)*++k;                \
-        checksum |= c;                         \
-    }                                          \
-    (checksum) <<= 8;                          \
-    if (c) {                                   \
-        c = (unsigned int)*++k;                \
-        checksum |= c;                         \
-    }                                          \
-    (checksum) <<= 8;                          \
-    if (c) {                                   \
-        c = (unsigned int)*++k;                \
-        checksum |= c;                         \
-    }                                          \
+#define COMPUTE_KEY_HASH(key, hash)    \
+{                                                    \
+    const unsigned char *p;                          \
+    hash = 0;                                        \
+    for (p = (const unsigned char *)key; *p; p++) {  \
+        hash = hash * 33 + *p;                       \
+    }                                                \
 }
 
 static volatile int global_map_id = 0;
@@ -130,10 +149,10 @@ void *jk_map_get(jk_map_t *m, const char
 
     if (m && name) {
         unsigned int i;
-        unsigned int key;
-        COMPUTE_KEY_CHECKSUM(name, key)
+        unsigned int hash;
+        COMPUTE_KEY_HASH(name, hash)
         for (i = 0; i < m->size; i++) {
-            if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+            if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
                 rc = m->values[i];
                 break;
             }
@@ -148,10 +167,10 @@ int jk_map_get_id(jk_map_t *m, const cha
     int rc = -1;
     if (m && name) {
         unsigned int i;
-        unsigned int key;
-        COMPUTE_KEY_CHECKSUM(name, key)
+        unsigned int hash;
+        COMPUTE_KEY_HASH(name, hash)
         for (i = 0; i < m->size; i++) {
-            if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+            if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
                 rc = i;
                 break;
             }
@@ -167,10 +186,10 @@ const char *jk_map_get_string(jk_map_t *
 
     if (m && name) {
         unsigned int i;
-        unsigned int key;
-        COMPUTE_KEY_CHECKSUM(name, key)
+        unsigned int hash;
+        COMPUTE_KEY_HASH(name, hash)
         for (i = 0; i < m->size; i++) {
-            if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+            if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
                 rc = m->values[i];
                 break;
             }
@@ -343,14 +362,14 @@ int jk_map_add(jk_map_t *m, const char *
     int rc = JK_FALSE;
 
     if (m && name) {
-        unsigned int key;
-        COMPUTE_KEY_CHECKSUM(name, key)
+        unsigned int hash;
+        COMPUTE_KEY_HASH(name, hash)
         map_realloc(m);
 
         if (m->size < m->capacity) {
             m->values[m->size] = value;
             m->names[m->size] = jk_pool_strdup(&m->p, name);
-            m->keys[m->size] = key;
+            m->keys[m->size] = hash;
             m->size++;
             rc = JK_TRUE;
         }
@@ -365,10 +384,10 @@ int jk_map_put(jk_map_t *m, const char *
 
     if (m && name) {
         unsigned int i;
-        unsigned int key;
-        COMPUTE_KEY_CHECKSUM(name, key)
+        unsigned int hash;
+        COMPUTE_KEY_HASH(name, hash)
         for (i = 0; i < m->size; i++) {
-            if (m->keys[i] == key && strcmp(m->names[i], name) == 0) {
+            if (m->keys[i] == hash && strcmp(m->names[i], name) == 0) {
                 break;
             }
         }

Modified: tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml?rev=1583403&r1=1583402&r2=1583403&view=diff
==============================================================================
--- tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml (original)
+++ tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Mon Mar 31 19:05:52 2014
@@ -48,8 +48,12 @@
         Fix status worker display of worker IP address after name or port
         was changed. (rjung)
       </fix>
+      <update>
+        <bug>56297</bug>: Improve key hash function. Copied from APR. (rjung)
+      </update>
     </changelog>
   </subsection>
+</section>
 <section name="Changes between 1.2.37 and 1.2.39">
   <br />
   <subsection name="Native">



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org