You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2010/08/29 13:39:11 UTC

svn commit: r990568 - /subversion/branches/performance/subversion/libsvn_subr/cache-membuffer.c

Author: stefan2
Date: Sun Aug 29 11:39:11 2010
New Revision: 990568

URL: http://svn.apache.org/viewvc?rev=990568&view=rev
Log:
Harden the caching strategy. Flooding the cache with large entries (e.g. full texts)
will soon sweep the smaller, more cache-efficient entries. It also prevents earlier
large entries from survival and getting actual hits.

This is fixed using two additional rules:
(1) entries much smaller than the *current* average are kept unconditionally
(2) new entries may only push out small amounts of data and thus may get
    rejected if not enough room could be made that way

* subversion/libsvn_subr/cache-membuffer.c
  (let_entry_age): extract the entry "aging" code and give it its own function
  (find_entry, move_entry): simplify using let_entry_age
  (ensure_data_insertable): refine entry eviction code; allow for rejecting entries
  (membuffer_cache_set): insert data only if room is available / could be made

Modified:
    subversion/branches/performance/subversion/libsvn_subr/cache-membuffer.c

Modified: subversion/branches/performance/subversion/libsvn_subr/cache-membuffer.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_subr/cache-membuffer.c?rev=990568&r1=990567&r2=990568&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_subr/cache-membuffer.c (original)
+++ subversion/branches/performance/subversion/libsvn_subr/cache-membuffer.c Sun Aug 29 11:39:11 2010
@@ -440,6 +440,18 @@ get_group_index(svn_membuffer_t *cache,
   return hash % cache->group_count;
 }
 
+/* Reduce the hit count of ENTRY and update the accumunated hit info
+ * in CACHE accordingly.
+ */
+static APR_INLINE void
+let_entry_age(svn_membuffer_t *cache, entry_t *entry)
+{
+  apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
+
+  cache->hit_count -= hits_removed;
+  entry->hit_count -= hits_removed;
+}
+
 /* Given the GROUP_INDEX that shall contain an entry with the hash key
  * TO_FIND, find that entry in the specified group. 
  *
@@ -498,21 +510,18 @@ find_entry(svn_membuffer_t *cache,
        */
       if (entry == NULL)
         {
-          for (i = 0; i < GROUP_SIZE; ++i)
-            if (entry == NULL || entry->hit_count > group[i].hit_count)
+          entry = &group[0];
+          for (i = 1; i < GROUP_SIZE; ++i)
+            if (entry->hit_count > group[i].hit_count)
               entry = &group[i];
 
+          /* for the entries that don't have been removed,
+           * reduce their hitcounts to put them at a relative
+           * disadvantage the next time.
+           */
           for (i = 0; i < GROUP_SIZE; ++i)
             if (entry != &group[i])
-              {
-                /* for the entries that don't have been removed,
-                 * reduce their hitcounts to put them at a relative
-                 * disadvantage the next time.
-                 */
-                apr_uint32_t hits_left = entry->hit_count >>= 1;
-                cache->hit_count -= entry->hit_count - hits_left;
-                entry->hit_count = hits_left;
-              }
+              let_entry_age(cache, entry);
 
           drop_entry(cache, entry);
         }
@@ -534,9 +543,7 @@ move_entry(svn_membuffer_t *cache, entry
    * hit count so that its removal gets more likely in the next
    * run unless someone read / hit this entry in the meantime.
    */
-  apr_uint32_t hits_left = entry->hit_count >>= 1;
-  cache->hit_count -= entry->hit_count - hits_left;
-  entry->hit_count = hits_left;
+  let_entry_age(cache, entry);
 
   /* Move the entry to the start of the empty / insertion section 
    * (if it isn't there already) 
@@ -556,19 +563,32 @@ move_entry(svn_membuffer_t *cache, entry
 }
 
 /* If necessary, enlarge the insertion window until it is at least
- * SIZE bytes long. SIZE must not exceed the data buffer size;
+ * SIZE bytes long. SIZE must not exceed the data buffer size.
+ * Return TRUE if enough room could be found or made. A FALSE result
+ * indicates that the respective item shall not be added.
  */
-static void
+static svn_boolean_t
 ensure_data_insertable(svn_membuffer_t *cache, apr_size_t size)
 {
-  int average_hits;
-  int threashold;
   entry_t *entry;
 
-  /* Make sure that this function actually terminates.
+  /* accumulated size of the entries that have been removed to make
+   * room for the new one.
    */
-  assert(cache->data_size >= size);
+  apr_size_t drop_size = 0;
 
+  /* This loop will eventually terminate because every cache entry
+   * will get dropped eventually:
+   * - hit counts become 0 after the got kept for 32 full scans
+   * - larger elements get dropped as soon as their hit count is 0
+   * - smaller and smaller elements get removed as the average
+   *   entry size drops (average drops by a factor of 8 per scan)
+   * - after no more than 43 full scans, all elements would be removed
+   *
+   * Since size is < 16th of the cache size and about 50% of all
+   * entries get removed by a scan, it is very unlikely that more
+   * than a fractional scan will be necessary.
+   */
   while (1)
     {
       /* first offset behind the insertion window
@@ -580,7 +600,18 @@ ensure_data_insertable(svn_membuffer_t *
       /* leave function as soon as the insertion window is large enough
        */
       if (end - cache->current_data >= size)
-        return;
+        return TRUE;
+
+      /* Don't be too eager to cache data. Smaller items will fit into
+       * the cache after dropping a single item. Of the larger ones, we
+       * will only accept about 50%. They are also likely to get evicted
+       * soon due to their notorious low hit counts.
+       *
+       * As long as enough similarly or even larger sized entries already
+       * exist in the cache, a lot less of insert requests will be rejected.
+       */
+      if (2 * drop_size > size)
+        return FALSE;
 
       /* try to enlarge the insertion window
        */
@@ -596,26 +627,46 @@ ensure_data_insertable(svn_membuffer_t *
         }
       else
         {
-          /* Roll the dice and determine a threashold somewhere from 0 up
-           * to 2 times the average hit count. 
-           */
-          average_hits = cache->hit_count / cache->used_entries;
-          if (average_hits < 1)
-            average_hits = 1;
-
-          threashold = rand() % (2 * average_hits);
-
-          /* Drop the entry from the end of the insertion window, if it has
-           * been hit less than the threashold. Otherwise, keep it and move
-           * the insertion window one entry further.
-           */
           entry = get_entry(cache, cache->next);
-          if (entry->hit_count >= threashold)
-            move_entry(cache, entry);
+
+          /* Keep that are very small. Those are likely to be data headers
+           * or similar management structures. So, they are probably
+           * important while not occupying much space.
+           * But keep them only as long as they are a minority.
+           */
+          if (   (apr_uint64_t)entry->size * cache->used_entries
+               < cache->data_used / 8)
+            {
+              move_entry(cache, entry);
+            }
           else
-            drop_entry(cache, entry);
+            {
+              /* Roll the dice and determine a threshold somewhere from 0 up
+               * to 2 times the average hit count.
+               */
+              int average_hit_value = cache->hit_count / cache->used_entries;
+              int threshold = (average_hit_value+1) * (rand() % 4096) / 2048;
+
+              /* Drop the entry from the end of the insertion window, if it
+               * has been hit less than the threshold. Otherwise, keep it and
+               * move the insertion window one entry further.
+               */
+              if (entry->hit_count >= threshold)
+                {
+                  move_entry(cache, entry);
+                }
+              else
+                {
+                  drop_size += entry->size;
+                  drop_entry(cache, entry);
+                }
+            }
         }
     }
+
+  /* This will never be reached. But if it was, "can't insert" was the
+   * right answer. */
+  return FALSE;
 }
 
 /* Create a new membuffer cache instance. If the TOTAL_SIZE of the
@@ -775,12 +826,10 @@ membuffer_cache_set(svn_membuffer_t *cac
    */
   SVN_ERR(lock_cache(cache));
 
-  if (cache->data_size / 4 > size)
+  /* if necessary, enlarge the insertion window.
+   */
+  if (cache->data_size / 4 > size && ensure_data_insertable(cache, size))
     {
-      /* if necessary, enlarge the insertion window.
-       */
-      ensure_data_insertable(cache, size);
-
       /* Remove old data for this key, if it that exists.
        * Get an unused entry for the key and and initialize it with
        * the serialized item's (future) posion within data buffer.