You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2015/01/19 23:05:03 UTC

svn commit: r1653132 - in /subversion/trunk/subversion/libsvn_fs_x: cached_data.c cached_data.h dag.c temp_serializer.c temp_serializer.h

Author: stefan2
Date: Mon Jan 19 22:05:03 2015
New Revision: 1653132

URL: http://svn.apache.org/r1653132
Log:
Speed up linear directory traversal in FSX.

The L1 DAG node cache already reduces sibling node lookups into a single
directory entry lookup.  This patch reduces the complexity of that entry
lookup from O(log N) to O(1) in the frequent case ordered tree walks.

This is accomplished by simply remembering the index of the last lookup
and then checking the following index upon the next lookup.  Even with
the very small directories in our test suite, this suceeds in 85% of all
calls.  For larger directories, the hit rates can be expected to be even
higher while the break-even should be around 20% or less.

* subversion/libsvn_fs_x/temp_serializer.h
  (svn_fs_x__ede_baton_t): New baton type.
  (svn_fs_x__extract_dir_entry): Update docstring.

* subversion/libsvn_fs_x/temp_serializer.c
  (found_entry): New dir entry access utility.
  (svn_fs_x__extract_dir_entry): For directories with 2+ entries, try the
                                 entry following HINT - if given - first.

* subversion/libsvn_fs_x/cached_data.h
  (svn_fs_x__rep_contents_dir_entry): Add HINT parameter.

* subversion/libsvn_fs_x/cached_data.c
  (svn_fs_x__rep_contents_dir_entry): Construct baton and handle HINT.

* subversion/libsvn_fs_x/dag.c
  (dag_node_t): Enable nodes to store the lookup hint.
  (svn_fs_x__dag_get_node): Initialize the new member.
  (dir_entry_id_from_node): Use the HINT during directory lookup.
  (svn_fs_x__dag_revision_root): Initialize the new member.
  (svn_fs_x__dag_delete): Use the HINT during directory lookup.

Modified:
    subversion/trunk/subversion/libsvn_fs_x/cached_data.c
    subversion/trunk/subversion/libsvn_fs_x/cached_data.h
    subversion/trunk/subversion/libsvn_fs_x/dag.c
    subversion/trunk/subversion/libsvn_fs_x/temp_serializer.c
    subversion/trunk/subversion/libsvn_fs_x/temp_serializer.h

Modified: subversion/trunk/subversion/libsvn_fs_x/cached_data.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/cached_data.c?rev=1653132&r1=1653131&r2=1653132&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/cached_data.c (original)
+++ subversion/trunk/subversion/libsvn_fs_x/cached_data.c Mon Jan 19 22:05:03 2015
@@ -2690,6 +2690,7 @@ svn_fs_x__rep_contents_dir_entry(svn_fs_
                                  svn_fs_t *fs,
                                  svn_fs_x__noderev_t *noderev,
                                  const char *name,
+                                 apr_size_t *hint,
                                  apr_pool_t *result_pool,
                                  apr_pool_t *scratch_pool)
 {
@@ -2700,14 +2701,22 @@ svn_fs_x__rep_contents_dir_entry(svn_fs_
   svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
   if (cache)
     {
+      svn_fs_x__ede_baton_t baton;
+      baton.hint = *hint;
+      baton.name = name;
+
       /* Cache lookup. */
       SVN_ERR(svn_cache__get_partial((void **)dirent,
                                      &found,
                                      cache,
                                      &key,
                                      svn_fs_x__extract_dir_entry,
-                                     (void*)name,
+                                     &baton,
                                      result_pool));
+
+      /* Remember the new clue only if we found something at that spot. */
+      if (found)
+        *hint = baton.hint;
     }
 
   /* fetch data from disk if we did not find it in the cache */

Modified: subversion/trunk/subversion/libsvn_fs_x/cached_data.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/cached_data.h?rev=1653132&r1=1653131&r2=1653132&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/cached_data.h (original)
+++ subversion/trunk/subversion/libsvn_fs_x/cached_data.h Mon Jan 19 22:05:03 2015
@@ -142,13 +142,19 @@ svn_fs_x__find_dir_entry(apr_array_heade
 
 /* Set *DIRENT to the entry identified by NAME in the directory given
    by NODEREV in filesystem FS.  If no such entry exits, *DIRENT will
-   be NULL. The returned object is allocated in RESULT_POOL; SCRATCH_POOL
+   be NULL.  The value referenced by HINT can be used to speed up
+   consecutive calls when travering the directory in name order.
+   Any value is allowed, however APR_SIZE_MAX gives best performance
+   when there has been no previous lookup for the same directory.
+
+   The returned object is allocated in RESULT_POOL; SCRATCH_POOL
    used for temporary allocations. */
 svn_error_t *
 svn_fs_x__rep_contents_dir_entry(svn_fs_x__dirent_t **dirent,
                                  svn_fs_t *fs,
                                  svn_fs_x__noderev_t *noderev,
                                  const char *name,
+                                 apr_size_t *hint,
                                  apr_pool_t *result_pool,
                                  apr_pool_t *scratch_pool);
 

Modified: subversion/trunk/subversion/libsvn_fs_x/dag.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/dag.c?rev=1653132&r1=1653131&r2=1653132&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/dag.c (original)
+++ subversion/trunk/subversion/libsvn_fs_x/dag.c Mon Jan 19 22:05:03 2015
@@ -77,6 +77,11 @@ struct dag_node_t
 
   /* the path at which this node was created. */
   const char *created_path;
+
+  /* Directory entry lookup hint to speed up consecutive calls to
+     svn_fs_x__rep_contents_dir_entry(). Only used for directory nodes.
+     Any value is legal but should default to APR_SIZE_MAX. */
+  apr_size_t hint;
 };
 
 
@@ -249,6 +254,7 @@ svn_fs_x__dag_get_node(dag_node_t **node
   new_node = apr_pcalloc(result_pool, sizeof(*new_node));
   new_node->fs = fs;
   new_node->id = *id;
+  new_node->hint = APR_SIZE_MAX;
 
   /* Grab the contents so we can inspect the node's kind and created path. */
   new_node->node_pool = result_pool;
@@ -369,7 +375,8 @@ dir_entry_id_from_node(svn_fs_x__id_t *i
 
   /* Get a dirent hash for this directory. */
   SVN_ERR(svn_fs_x__rep_contents_dir_entry(&dirent, parent->fs, noderev,
-                                           name, scratch_pool, scratch_pool));
+                                           name, &parent->hint,
+                                           scratch_pool, scratch_pool));
   if (dirent)
     *id_p = dirent->id;
   else
@@ -663,6 +670,7 @@ svn_fs_x__dag_revision_root(dag_node_t *
   /* Initialize the KIND and CREATED_PATH attributes */
   new_node->kind = svn_node_dir;
   new_node->created_path = "/";
+  new_node->hint = APR_SIZE_MAX;
 
   /* Return a fresh new node */
   *node_p = new_node;
@@ -883,7 +891,8 @@ svn_fs_x__dag_delete(dag_node_t *parent,
 
   /* Search this directory for a dirent with that NAME. */
   SVN_ERR(svn_fs_x__rep_contents_dir_entry(&dirent, fs, parent_noderev,
-                                           name, subpool, subpool));
+                                           name, &parent->hint,
+                                           subpool, subpool));
 
   /* If we never found ID in ENTRIES (perhaps because there are no
      ENTRIES, perhaps because ID just isn't in the existing ENTRIES

Modified: subversion/trunk/subversion/libsvn_fs_x/temp_serializer.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/temp_serializer.c?rev=1653132&r1=1653131&r2=1653132&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/temp_serializer.c (original)
+++ subversion/trunk/subversion/libsvn_fs_x/temp_serializer.c Mon Jan 19 22:05:03 2015
@@ -795,6 +795,23 @@ find_entry(svn_fs_x__dirent_t **entries,
   return lower;
 }
 
+/* Utility function that returns TRUE if entry number IDX in ENTRIES has the
+ * name NAME.
+ */
+static svn_boolean_t
+found_entry(const svn_fs_x__dirent_t * const *entries,
+            const char *name,
+            apr_size_t idx)
+{
+  /* check whether we actually found a match */
+  const svn_fs_x__dirent_t *entry =
+    svn_temp_deserializer__ptr(entries, (const void *const *)&entries[idx]);
+  const char* entry_name =
+    svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name);
+
+  return strcmp(entry_name, name) == 0;
+}
+
 svn_error_t *
 svn_fs_x__extract_dir_entry(void **out,
                             const void *data,
@@ -803,8 +820,9 @@ svn_fs_x__extract_dir_entry(void **out,
                             apr_pool_t *pool)
 {
   const dir_data_t *dir_data = data;
-  const char* name = baton;
+  svn_fs_x__ede_baton_t *b = baton;
   svn_boolean_t found;
+  apr_size_t pos;
 
   /* resolve the reference to the entries array */
   const svn_fs_x__dirent_t * const *entries =
@@ -814,14 +832,33 @@ svn_fs_x__extract_dir_entry(void **out,
   const apr_uint32_t *lengths =
     svn_temp_deserializer__ptr(data, (const void *const *)&dir_data->lengths);
 
-  /* binary search for the desired entry by name */
-  apr_size_t pos = find_entry((svn_fs_x__dirent_t **)entries,
-                              name,
-                              dir_data->count,
-                              &found);
+  /* Special case: Early out for empty directories.
+     That simplifies tests further down the road. */
+  *out = NULL;
+  if (dir_data->count == 0)
+    return SVN_NO_ERROR;
+
+  /* HINT _might_ be the position we hit last time.
+     If within valid range, check whether HINT+1 is a hit. */
+  if (   b->hint < dir_data->count - 1
+      && found_entry(entries, b->name, b->hint + 1))
+    {
+      /* Got lucky. */
+      pos = b->hint + 1;
+      found = TRUE;
+    }
+  else
+    {
+      /* Binary search for the desired entry by name. */
+      pos = find_entry((svn_fs_x__dirent_t **)entries, b->name,
+                       dir_data->count, &found);
+    }
+
+  /* Remember the hit index - if we FOUND the entry. */
+  if (found)
+    b->hint = pos;
 
   /* de-serialize that entry or return NULL, if no match has been found */
-  *out = NULL;
   if (found)
     {
       const svn_fs_x__dirent_t *source =

Modified: subversion/trunk/subversion/libsvn_fs_x/temp_serializer.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/temp_serializer.h?rev=1653132&r1=1653131&r2=1653132&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/temp_serializer.h (original)
+++ subversion/trunk/subversion/libsvn_fs_x/temp_serializer.h Mon Jan 19 22:05:03 2015
@@ -194,9 +194,20 @@ svn_fs_x__get_sharded_offset(void **out,
                              apr_pool_t *pool);
 
 /**
+ * Baton type to be used with svn_fs_x__extract_dir_entry. */
+typedef struct svn_fs_x__ede_baton_t
+{
+  /* Name of the directory entry to find. */
+  const char *name;
+
+  /* Lookup hint [in / out] */
+  apr_size_t hint;
+} svn_fs_x__ede_baton_t;
+
+/**
  * Implements #svn_cache__partial_getter_func_t for a single
  * #svn_fs_x__dirent_t within a serialized directory contents hash,
- * identified by its name (const char @a *baton).
+ * identified by its name (given in @a svn_fs_x__ede_baton_t @a *baton).
  */
 svn_error_t *
 svn_fs_x__extract_dir_entry(void **out,