You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2014/04/29 17:45:58 UTC
svn commit: r1591005 - /subversion/trunk/subversion/libsvn_fs_fs/cached_data.c

Author: stefan2
Date: Tue Apr 29 15:45:58 2014
New Revision: 1591005

URL: http://svn.apache.org/r1591005
Log:
Improve memory usage pattern when reading from and writing to the FSFS
fulltext cache.  Avoid allocating large buffers while reading from the
cache and attempt to reduce memory / address space fragmentation when
reconstructing fulltexts for later caching.

Both are handled by the svn_fs_fs__get_contents function; the cache
update is an optional side effect of that reader.

To keep memory consumption low for as long as we can read from cache,
don't fetch the whole fulltext at once.  Instead use a partial getter to
read the respective next block as part of the normal stream processing.
The reapeated cache addressing overhead is relatively low as the block
size is typically 100kB.  We delay the constrution of the representation
list and optional fulltext buffer until we get a cache miss (ideally
never happens once we had a hit).  In case of a miss, we re-read the
whole data from the window stream and pick up behind the last delivered
location.

If we want to cache large fulltexts, we need buffers of adequate size.
As they are per reading client, there can be many of them them.  The
best we can do here (except for using code from the thunder branch) is
to allocate memory segments that are a power of two long.  This should
help the OS to keep fragmentation in check. YMMV.

* subversion/libsvn_fs_fs/cached_data.c
  (rep_read_baton): Store REP as we will delay its evaluation.  Also,
                    add tracking info for delivery from fulltext cache.
  (rep_read_get_baton): Initialize the new baton members but leave the
                        bits that we will only need once the fulltext
                        cache lookup fails.
  (rep_read_contents_close): 
  (get_contents): Rename to ...
  (get_contents_from_windows): ... this for clarity.
  (fulltext_baton_t,
   get_fulltext_partial,
   get_contents_from_fulltext): New equivalent to the above but getting
                                its data from the fulltext cache.
  (optimimal_allocation_size,
   skip_contents): Handle the switch-over from (failed) cache lookup
                   to reading from window stream.  This includes the
                   delayed BATON initialization and heuristics of the
                   optional fulltext buffer allocation.
  (rep_read_contents): Try reading from fulltext cache first (if available)
                       and fall back to normal window reading as needed.
  (svn_fs_fs__get_contents): Always use the rep_read_contents-based stream.
                             Only decide whether or not to use fulltext
                             caching with it for this particular REP.

Modified:
    subversion/trunk/subversion/libsvn_fs_fs/cached_data.c

Modified: subversion/trunk/subversion/libsvn_fs_fs/cached_data.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/cached_data.c?rev=1591005&r1=1591004&r2=1591005&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/cached_data.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/cached_data.c Tue Apr 29 15:45:58 2014
@@ -1025,6 +1025,9 @@ struct rep_read_baton
   /* The FS from which we're reading. */
   svn_fs_t *fs;
 
+  /* Representation to read. */
+  representation_t rep;
+
   /* If not NULL, this is the base for the first delta window in rs_list */
   svn_stringbuf_t *base_window;
 
@@ -1064,6 +1067,15 @@ struct rep_read_baton
   /* The text we've been reading, if we're going to cache it. */
   svn_stringbuf_t *current_fulltext;
 
+  /* If not NULL, attempt to read the data from this cache. 
+     Once that lookup fails, reset it to NULL. */
+  svn_cache__t *fulltext_cache;
+
+  /* Bytes delivered from the FULLTEXT_CACHE so far.  If the next
+     lookup fails, we need to skip that much data from the reconstructed
+     window stream before we continue normal operation. */
+  svn_filesize_t fulltext_delivered;
+
   /* Used for temporary allocations during the read. */
   apr_pool_t *pool;
 
@@ -1359,9 +1371,7 @@ build_rep_list(apr_array_header_t **list
 
 
 /* Create a rep_read_baton structure for node revision NODEREV in
-   filesystem FS and store it in *RB_P.  If FULLTEXT_CACHE_KEY is not
-   NULL, it is the rep's key in the fulltext cache, and a stringbuf
-   must be allocated to store the text.  Perform all allocations in
+   filesystem FS and store it in *RB_P.  Perform all allocations in
    POOL.  If rep is mutable, it must be for file contents. */
 static svn_error_t *
 rep_read_get_baton(struct rep_read_baton **rb_p,
@@ -1374,6 +1384,7 @@ rep_read_get_baton(struct rep_read_baton
 
   b = apr_pcalloc(pool, sizeof(*b));
   b->fs = fs;
+  b->rep = *rep;
   b->base_window = NULL;
   b->chunk_index = 0;
   b->buf = NULL;
@@ -1385,17 +1396,9 @@ rep_read_get_baton(struct rep_read_baton
   b->fulltext_cache_key = fulltext_cache_key;
   b->pool = svn_pool_create(pool);
   b->filehandle_pool = svn_pool_create(pool);
-
-  SVN_ERR(build_rep_list(&b->rs_list, &b->base_window,
-                         &b->src_state, &b->len, fs, rep,
-                         b->filehandle_pool));
-
-  if (SVN_IS_VALID_REVNUM(fulltext_cache_key.revision))
-    b->current_fulltext = svn_stringbuf_create_ensure
-                            ((apr_size_t)b->len,
-                             b->filehandle_pool);
-  else
-    b->current_fulltext = NULL;
+  b->fulltext_cache = NULL;
+  b->fulltext_delivered = 0;
+  b->current_fulltext = NULL;
 
   /* Save our output baton. */
   *rb_p = b;
@@ -1621,11 +1624,12 @@ rep_read_contents_close(void *baton)
   return SVN_NO_ERROR;
 }
 
-/* Return the next *LEN bytes of the rep and store them in *BUF. */
+/* Return the next *LEN bytes of the rep from our plain / delta windows
+   and store them in *BUF. */
 static svn_error_t *
-get_contents(struct rep_read_baton *rb,
-             char *buf,
-             apr_size_t *len)
+get_contents_from_windows(struct rep_read_baton *rb,
+                          char *buf,
+                          apr_size_t *len)
 {
   apr_size_t copy_len, remaining = *len;
   char *cur = buf;
@@ -1718,6 +1722,191 @@ get_contents(struct rep_read_baton *rb,
   return SVN_NO_ERROR;
 }
 
+/* Baton type for get_fulltext_partial. */
+typedef struct fulltext_baton_t
+{
+  /* Target buffer to write to; of at least LEN bytes. */
+  char *buffer;
+
+  /* Offset within the respective fulltext at which we shall start to
+     copy data into BUFFER. */
+  apr_size_t start;
+
+  /* Number of bytes to copy.  The actual amount may be less in case
+     the fulltext is short(er). */
+  apr_size_t len;
+
+  /* Number of bytes actually copied into BUFFER. */
+  apr_size_t read;
+} fulltext_baton_t;
+
+/* Implement svn_cache__partial_getter_func_t for fulltext caches.
+ * From the fulltext in DATA, we copy the range specified by the
+ * fulltext_baton_t* BATON into the buffer provided by that baton.
+ * OUT and RESULT_POOL are not used.
+ */
+static svn_error_t *
+get_fulltext_partial(void **out,
+                     const void *data,
+                     apr_size_t data_len,
+                     void *baton,
+                     apr_pool_t *result_pool)
+{
+  fulltext_baton_t *fulltext_baton = baton;
+
+  /* We cached the fulltext with an NUL appended to it. */
+  apr_size_t fulltext_len = data_len - 1;
+
+  /* Clip the copy range to what the fulltext size allows. */
+  apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
+  fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
+
+  /* Copy the data to the output buffer and be done. */
+  memcpy(fulltext_baton->buffer, data + start, fulltext_baton->read);
+
+  return SVN_NO_ERROR;
+}
+
+/* Find the fulltext specified in BATON in the fulltext cache given
+ * as well by BATON.  If that succeeds, set *CACHED to TRUE and copy
+ * up to the next *LEN bytes into BUFFER.  Set *LEN to the actual
+ * number of bytes copied.
+ */
+static svn_error_t *
+get_contents_from_fulltext(svn_boolean_t *cached,
+                           struct rep_read_baton *baton,
+                           char *buffer,
+                           apr_size_t *len)
+{
+  void *dummy;
+  fulltext_baton_t fulltext_baton;
+
+  SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
+                 == baton->fulltext_delivered);
+  fulltext_baton.buffer = buffer;
+  fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
+  fulltext_baton.len = *len;
+  fulltext_baton.read = 0;
+
+  SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
+                                 &baton->fulltext_cache_key,
+                                 get_fulltext_partial, &fulltext_baton,
+                                 baton->pool));
+
+  if (*cached)
+    {
+      baton->fulltext_delivered += fulltext_baton.read;
+      *len = fulltext_baton.read;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Determine the optimal size of a string buf that shall receive a
+ * (full-) text of NEEDED bytes.
+ *
+ * The critical point is that those buffers may be very large and
+ * can cause memory fragmentation.  We apply simple heuristics to
+ * make fragmentation less likely.
+ */
+static apr_size_t
+optimimal_allocation_size(apr_size_t needed)
+{
+  /* For all allocations, assume some overhead that is shared between
+   * OS memory managemnt, APR memory management and svn_stringbuf_t. */
+  const apr_size_t overhead = 0x400;
+  apr_size_t optimal;
+
+  /* If an allocation size if safe for other ephemeral buffers, it should
+   * be safe for ours. */
+  if (needed <= SVN__STREAM_CHUNK_SIZE)
+    return needed;
+
+  /* Paranoia edge case:
+   * Skip our heuristics if they created arithmetical overflow.
+   * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
+  if (needed >= APR_SIZE_MAX / 2 - overhead)
+    return needed;
+
+  /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
+   * Since we know NEEDED to be larger than that, use it as the
+   * starting point.
+   *
+   * Heuristics: Allocate a power-of-two number of bytes that fit
+   *             NEEDED plus some OVERHEAD.  The APR allocator
+   *             will round it up to the next full page size.
+   */
+  optimal = SVN__STREAM_CHUNK_SIZE;
+  while (optimal - overhead < needed)
+    optimal *= 2;
+
+  /* This is above or equal to NEEDED. */
+  return optimal - overhead;
+}
+
+/* After a fulltext cache lookup failure, we will continue to read from
+ * combined delta or plain windows.  However, we must first make that data
+ * stream in BATON catch up tho the position LEN already delivered from the
+ * fulltext cache.  Also, we need to store the reconstructed fulltext if we
+ * want to cache it at the end.
+ */
+static svn_error_t *
+skip_contents(struct rep_read_baton *baton,
+              svn_filesize_t len)
+{
+  svn_error_t *err = SVN_NO_ERROR;
+
+  /* Do we want to cache the reconstructed fulltext? */
+  if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
+    {
+      char *buffer;
+      svn_filesize_t to_alloc = MAX(len, baton->len);
+
+      /* This should only be happening if BATON->LEN and LEN are
+       * cacheable, implying they fit into memory. */
+      SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
+
+      /* Allocate the fulltext buffer. */
+      baton->current_fulltext = svn_stringbuf_create_ensure(
+                        optimimal_allocation_size((apr_size_t)to_alloc),
+                        baton->filehandle_pool);
+
+      /* Read LEN bytes from the window stream and store the data
+       * in the fulltext buffer (will be filled by further reads later). */
+      baton->current_fulltext->len = (apr_size_t)len;
+      baton->current_fulltext->data[(apr_size_t)len] = 0;
+
+      buffer = baton->current_fulltext->data;
+      while (len > 0 && !err)
+        {
+          apr_size_t to_read = (apr_size_t)len;
+          err = get_contents_from_windows(baton, buffer, &to_read);
+          len -= to_read;
+          buffer += to_read;
+        }
+    }
+  else if (len > 0)
+    {
+      /* Simply drain LEN bytes from the window stream. */
+      apr_pool_t *subpool = subpool = svn_pool_create(baton->pool);
+      char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
+
+      while (len > 0 && !err)
+        {
+          apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
+                            ? SVN__STREAM_CHUNK_SIZE
+                            : (apr_size_t)len;
+
+          err = get_contents_from_windows(baton, buffer, &to_read);
+          len -= to_read;
+        }
+
+      svn_pool_destroy(subpool);
+    }
+
+  return svn_error_trace(err);
+}
+
 /* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
    representation and store them in *BUF.  Sum as we read and verify
    the MD5 sum at the end. */
@@ -1728,8 +1917,35 @@ rep_read_contents(void *baton,
 {
   struct rep_read_baton *rb = baton;
 
+  /* Get data from the fulltext cache for as long as we can. */
+  if (rb->fulltext_cache)
+    {
+      svn_boolean_t cached;
+      SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
+      if (cached)
+        return SVN_NO_ERROR;
+
+      /* Cache miss.  From now on, we will never read from the fulltext
+       * cache for this representation anymore. */
+      rb->fulltext_cache = NULL;
+    }
+
+  /* No fulltext cache to help us.  We must read from the window stream. */
+  if (!rb->rs_list)
+    {
+      /* Window stream not initialized, yet.  Do it now. */
+      SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
+                             &rb->src_state, &rb->len, rb->fs, &rb->rep,
+                             rb->filehandle_pool));
+
+      /* In case we did read from the fulltext cache before, make the 
+       * window stream catch up.  Also, initialize the fulltext buffer
+       * if we want to cache the fulltext at the end. */
+      SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
+    }
+
   /* Get the next block of data. */
-  SVN_ERR(get_contents(rb, buf, len));
+  SVN_ERR(get_contents_from_windows(rb, buf, len));
 
   if (rb->current_fulltext)
     svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
@@ -1786,27 +2002,25 @@ svn_fs_fs__get_contents(svn_stream_t **c
   else
     {
       fs_fs_data_t *ffd = fs->fsap_data;
-      pair_cache_key_t fulltext_cache_key = { 0 };
       svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size;
       struct rep_read_baton *rb;
 
-      /* Cache lookup, if the fulltext may be cached. */
+      pair_cache_key_t fulltext_cache_key = { 0 };
       fulltext_cache_key.revision = rep->revision;
       fulltext_cache_key.second = rep->item_index;
+
+      /* Initialize the reader baton.  Some members may added lazily
+       * while reading from the stream */
+      SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
+
+      /* Make the stream attempt fulltext cache lookups if the fulltext
+       * is cacheable.  If it is not, then also don't try to buffer and
+       * cache it. */
       if (ffd->fulltext_cache && cache_fulltext
           && SVN_IS_VALID_REVNUM(rep->revision)
           && fulltext_size_is_cachable(ffd, len))
         {
-          svn_stringbuf_t *fulltext;
-          svn_boolean_t is_cached;
-          SVN_ERR(svn_cache__get((void **) &fulltext, &is_cached,
-                                 ffd->fulltext_cache, &fulltext_cache_key,
-                                 pool));
-          if (is_cached)
-            {
-              *contents_p = svn_stream_from_stringbuf(fulltext, pool);
-              return SVN_NO_ERROR;
-            }
+          rb->fulltext_cache = ffd->fulltext_cache;
         }
       else
         {
@@ -1815,10 +2029,6 @@ svn_fs_fs__get_contents(svn_stream_t **c
           fulltext_cache_key.revision = SVN_INVALID_REVNUM;
         }
 
-      /* Create the object chain for reconstruction from deltas or for
-         reading plain text, depending on on-disk representation. */
-      SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
-
       *contents_p = svn_stream_create(rb, pool);
       svn_stream_set_read2(*contents_p, NULL /* only full read support */,
                            rep_read_contents);