You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2014/01/21 02:35:08 UTC

svn commit: r1559886 - /subversion/trunk/subversion/libsvn_fs_fs/pack.c

Author: stefan2
Date: Tue Jan 21 01:35:07 2014
New Revision: 1559886

URL: http://svn.apache.org/r1559886
Log:
Fine-tune our FSFS format7 reordering strategy:
Cluster all directory nodes putting them in front of all file nodes.
Within these sections, apply the same heuristics as we did before.
This reduces the I/O during "loggy" operations and random DAG walks.
Checkouts are widely unaffected.

* subversion/libsvn_fs_fs/pack.c
  (path_order_t): Mark nodes as either being directory or file.
  (compare_path_order): Make node kind the first-level ordering criterion.
  (compare_is_dir): Yet another binary search predicate.
  (sort_reps_range): New sub-function factored out from ...
  (sort_reps): ... this one; call the range sort for both node types.

Modified:
    subversion/trunk/subversion/libsvn_fs_fs/pack.c

Modified: subversion/trunk/subversion/libsvn_fs_fs/pack.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/pack.c?rev=1559886&r1=1559885&r2=1559886&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/pack.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/pack.c Tue Jan 21 01:35:07 2014
@@ -107,6 +107,9 @@ typedef struct path_order_t
   /* noderev predecessor count */
   int predecessor_count;
 
+  /* this is a directory node */
+  svn_boolean_t is_dir;
+
   /* length of the expanded representation content */
   apr_int64_t expanded_size;
 
@@ -753,13 +756,15 @@ copy_node_to_temp(pack_context_t *contex
   path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
   path_order->revision = svn_fs_fs__id_rev(noderev->id);
   path_order->predecessor_count = noderev->predecessor_count;
+  path_order->is_dir = noderev->kind == svn_node_dir;
   path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
   APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
 
   return SVN_NO_ERROR;
 }
 
-/* implements compare_fn_t.  Sort descending by PATH, NODE_ID and REVISION.
+/* implements compare_fn_t.  Bring all directories in front of the files
+   and sort descendingly by PATH, NODE_ID and REVISION.
  */
 static int
 compare_path_order(const path_order_t * const * lhs_p,
@@ -768,8 +773,13 @@ compare_path_order(const path_order_t * 
   const path_order_t * lhs = *lhs_p;
   const path_order_t * rhs = *rhs_p;
 
+  /* cluster all directories */
+  int diff = rhs->is_dir - lhs->is_dir;
+  if (diff)
+    return diff;
+
   /* lexicographic order on path and node (i.e. latest first) */
-  int diff = svn_prefix_string__compare(lhs->path, rhs->path);
+  diff = svn_prefix_string__compare(lhs->path, rhs->path);
   if (diff)
     return diff;
 
@@ -807,6 +817,15 @@ compare_ref_to_item(const reference_t * 
   return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p);
 }
 
+/* implements compare_fn_t.  Finds the DIR / FILE boundary.
+ */
+static int
+compare_is_dir(const path_order_t * const * lhs_p,
+               const void *unused)
+{
+  return (*lhs_p)->is_dir ? -1 : 0;
+}
+
 /* Look for the least significant bit set in VALUE and return the smallest
  * number with the same property, i.e. the largest power of 2 that is a
  * factor in VALUE. */
@@ -816,35 +835,25 @@ roundness(int value)
   return value ? value - (value & (value - 1)) : INT_MAX;
 }
 
-/* Order the data collected in CONTEXT such that we can place them in the
- * desired order.
+/* Order a range of data collected in CONTEXT such that we can place them
+ * in the desired order.  The input is taken from *PATH_ORDER, offsets FIRST
+ * to LAST and then written in the final order to the same range in *TEMP.
  */
 static void
-sort_reps(pack_context_t *context)
+sort_reps_range(pack_context_t *context,
+                const path_order_t **path_order,
+                const path_order_t **temp,
+                int first,
+                int last)
 {
-  apr_pool_t *temp_pool;
-  const path_order_t **temp, **path_order;
   const svn_prefix_string__t *path;
-  int i, dest, count, best;
+  int i, dest, best;
   svn_fs_fs__id_part_t rep_id;
   fs_fs_data_t *ffd = context->fs->fsap_data;
 
-  /* We will later assume that there is at least one node / path.
-   */
-  if (context->path_order->nelts == 0)
-    {
-      assert(context->references->nelts == 0);
-      return;
-    }
-
-  /* Sort containers by path and IDs, respectively.
-   */
-  qsort(context->path_order->elts, context->path_order->nelts,
-        context->path_order->elt_size,
-        (int (*)(const void *, const void *))compare_path_order);
-  qsort(context->references->elts, context->references->nelts,
-        context->references->elt_size,
-        (int (*)(const void *, const void *))compare_references);
+  /* The logic below would fail for empty ranges. */
+  if (first == last)
+    return;
 
   /* Re-order noderevs like this:
    *
@@ -854,15 +863,9 @@ sort_reps(pack_context_t *context)
    *
    * We simply pick & chose from the existing path, rev order.
    */
-
-  temp_pool = svn_pool_create(context->info_pool);
-  count = context->path_order->nelts;
-  temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
-  path_order = (void *)context->path_order->elts;
-
-  dest = 0;
-  path = path_order[0]->path;
-  best = 0;
+  dest = first;
+  path = path_order[first]->path;
+  best = first;
 
   /* (1) For each path, pick the "roundest" representation and put it in
    * front of all other nodes in the pack file.  The "roundest" rep is
@@ -874,7 +877,7 @@ sort_reps(pack_context_t *context)
    * sections because references *into* linear deltification ranges are
    * much less likely.
    */
-  for (i = 0; i < count; ++i)
+  for (i = first; i < last; ++i)
     {
       /* Investigated all nodes for the current path? */
       if (svn_prefix_string__compare(path, path_order[i]->path))
@@ -915,7 +918,7 @@ sort_reps(pack_context_t *context)
    * access HEAD of the respective path.  Keeping all its dependency chain
    * in one place turns reconstruction into a linear scan of minimal length.
    */
-  for (i = 0; i < count; ++i)
+  for (i = first; i < last; ++i)
     if (path_order[i])
       {
         /* New path? */
@@ -931,7 +934,7 @@ sort_reps(pack_context_t *context)
             reference_t **reference;
 
             temp[dest++] = path_order[i];
-            path_order[i] = 0;
+            path_order[i] = NULL;
 
             reference = svn_sort__array_lookup(context->references,
                                                &rep_id, NULL,
@@ -946,12 +949,65 @@ sort_reps(pack_context_t *context)
    * in a given pack file.  So, this is just a few remnants that we put
    * at the end of the pack file.
    */
-  for (i = 0; i < count; ++i)
+  for (i = first; i < last; ++i)
     if (path_order[i])
       temp[dest++] = path_order[i];
 
   /* We now know the final ordering. */
-  assert(dest == count);
+  assert(dest == last);
+}
+
+/* Order the data collected in CONTEXT such that we can place them in the
+ * desired order.
+ */
+static void
+sort_reps(pack_context_t *context)
+{
+  apr_pool_t *temp_pool;
+  const path_order_t **temp, **path_order;
+  int i, count, dir_count;
+
+  /* We will later assume that there is at least one node / path.
+   */
+  if (context->path_order->nelts == 0)
+    {
+      assert(context->references->nelts == 0);
+      return;
+    }
+
+  /* Sort containers by path and IDs, respectively.
+   */
+  qsort(context->path_order->elts, context->path_order->nelts,
+        context->path_order->elt_size,
+        (int (*)(const void *, const void *))compare_path_order);
+  qsort(context->references->elts, context->references->nelts,
+        context->references->elt_size,
+        (int (*)(const void *, const void *))compare_references);
+
+  /* Re-order noderevs like this:
+   * (1) Directories are already in front; sort directories section and
+   *     files section separately according to (2)-(4).
+   * (2) Most likely to be referenced by future pack files, in path order.
+   * (3) highest revision rep per path + dependency chain
+   * (4) Remaining reps in path, rev order
+   *
+   * We simply pick & chose from the existing path, rev order.
+   */
+
+  temp_pool = svn_pool_create(context->info_pool);
+  count = context->path_order->nelts;
+  temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
+  path_order = (void *)context->path_order->elts;
+
+  /* Find the boundary between DIR and FILE section. */
+  dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL,
+                     (int (*)(const void *, const void *))compare_is_dir);
+
+  /* Sort those sub-sections separately. */
+  sort_reps_range(context, path_order, temp, 0, dir_count);
+  sort_reps_range(context, path_order, temp, dir_count, count);
+
+  /* We now know the final ordering. */
   for (i = 0; i < count; ++i)
     path_order[i] = temp[i];