You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2015/09/28 20:13:44 UTC

svn commit: r1705739 - in /subversion/trunk/subversion: include/private/svn_fs_fs_private.h libsvn_fs_fs/stats.c svnfsfs/stats-cmd.c tests/cmdline/svnfsfs_tests.py tests/libsvn_fs_fs/fs-fs-private-test.c

Author: stefan2
Date: Mon Sep 28 18:13:44 2015
New Revision: 1705739

URL: http://svn.apache.org/viewvc?rev=1705739&view=rev
Log:
Teach 'svnfsfs stats' to show the average lengths of the representation
delta chains.

We build that info as we go: The chain length is the chain length of the
base representation we deltify against + 1.  For log. addressed repositories,
we want to keep the single pass process in place.  Therefore, we collect the
delta chain references and only after the whole rev / pack file was read,
will we set the counters.

Once at it, also determine and store the correct rep header size for phys.
addressed reps.

* subversion/include/private/svn_fs_fs_private.h
  (svn_fs_fs__representation_stats_t): Add the field to hold the sum of the
                                       lengths of the delta chains.

* subversion/libsvn_fs_fs/stats.c
  (rep_stats_t): Add field to hold the delta chain length.
  (rep_ref_t): New temporary data structure.
  (parse_representation): Set the chain length for phys. addressed reps.
  (compare_representation_refs,
   resolve_representation_refs): New functions to do the same for log.
                                 addressed reps as a post-scan step.
  (read_log_rev_or_packfile): Scan the file for reps as well and collect
                              the delta chain links.  Call the above to
                              update the rep info afterwards.
  (add_rep_stats): One more field to process in the aggregator.

* subversion/svnfsfs/stats-cmd.c
  (print_rep_stats,
   print_stats): Print the average delta chain lengths for these sections.

* subversion/tests/libsvn_fs_fs/fs-fs-private-test.c
  (verify_representation_stats): Have some check on the new data as well.

* subversion/tests/cmdline/svnfsfs_tests.py
  (test_stats): Update expected output pattern.

Modified:
    subversion/trunk/subversion/include/private/svn_fs_fs_private.h
    subversion/trunk/subversion/libsvn_fs_fs/stats.c
    subversion/trunk/subversion/svnfsfs/stats-cmd.c
    subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py
    subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c

Modified: subversion/trunk/subversion/include/private/svn_fs_fs_private.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_fs_fs_private.h?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_fs_fs_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_fs_fs_private.h Mon Sep 28 18:13:44 2015
@@ -152,6 +152,9 @@ typedef struct svn_fs_fs__representation
   /* sum of ref_count * expanded_size,
    * i.e. total plaintext content if there was no rep sharing */
   apr_uint64_t expanded_size;
+
+  /* sum of all representation delta chain lengths */
+  apr_uint64_t chain_len;
 } svn_fs_fs__representation_stats_t;
 
 /* Basic statistics we collect over a given set of noderevs.

Modified: subversion/trunk/subversion/libsvn_fs_fs/stats.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/stats.c?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/stats.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/stats.c Mon Sep 28 18:13:44 2015
@@ -93,8 +93,36 @@ typedef struct rep_stats_t
   /* classification of the representation. values of rep_kind_t */
   char kind;
 
+  /* length of the delta chain, including this representation,
+   * saturated to 255 - if need be */
+  apr_byte_t chain_length;
 } rep_stats_t;
 
+/* Represents a link in the rep delta chain.  REVISION + ITEM_INDEX points
+ * to BASE_REVISION + BASE_ITEM_INDEX.  We collect this info while scanning
+ * a f7 repo in a single pass and resolve it afterwards. */
+typedef struct rep_ref_t
+{
+  /* Revision that contains this representation. */
+  svn_revnum_t revision;
+
+  /* Item index of this rep within REVISION. */
+  apr_uint64_t item_index;
+
+  /* Revision of the representation we deltified against.
+   * -1 if this representation is either PLAIN or a self-delta. */
+  svn_revnum_t base_revision;
+
+  /* Item index of that rep within BASE_REVISION. */
+  apr_uint64_t base_item_index;
+
+  /* Length of the PLAIN / DELTA line in the source file in bytes.
+   * We use this to update the info in the rep stats after scanning the
+   * whole file. */
+  apr_uint16_t header_size;
+
+} rep_ref_t;
+
 /* Represents a single revision.
  * There will be only one instance per revision. */
 typedef struct revision_info_t
@@ -441,6 +469,23 @@ parse_representation(rep_stats_t **repre
                                              scratch_pool, scratch_pool));
 
           result->header_size = header->header_size;
+
+          /* Determine length of the delta chain. */
+          if (header->type == svn_fs_fs__rep_delta)
+            {
+              int base_idx;
+              rep_stats_t *base_rep
+                = find_representation(&base_idx, query, NULL,
+                                      header->base_revision,
+                                      header->base_item_index);
+
+              result->chain_length = 1 + MIN(base_rep->chain_length,
+                                             (apr_byte_t)0xfe);
+            }
+          else
+            {
+              result->chain_length = 1;
+            }
         }
 
       svn_sort__array_insert(revision_info->representations, &result, idx);
@@ -869,6 +914,70 @@ read_item(svn_stringbuf_t **contents,
   return SVN_NO_ERROR;
 }
 
+/* Predicate comparing the two rep_ref_t** LHS and RHS by the respective
+ * representation's revision.
+ */
+static int
+compare_representation_refs(const void *lhs, const void *rhs)
+{
+  svn_revnum_t lhs_rev = (*(const rep_ref_t *const *)lhs)->revision;
+  svn_revnum_t rhs_rev = (*(const rep_ref_t *const *)rhs)->revision;
+
+  if (lhs_rev < rhs_rev)
+    return -1;
+  return (lhs_rev > rhs_rev ? 1 : 0);
+}
+
+/* Given all the presentations found in a single rev / pack file as
+ * rep_ref_t * in REP_REFS, update the delta chain lengths in QUERY.
+ * REP_REFS and its contents can then be discarded.
+ */
+static svn_error_t *
+resolve_representation_refs(query_t *query,
+                            apr_array_header_t *rep_refs)
+{
+  int i;
+
+  /* Because delta chains can only point to previous revs, after sorting
+   * REP_REFS, all base refs have already been updated. */
+  svn_sort__array(rep_refs, compare_representation_refs);
+
+  /* Build up the CHAIN_LENGTH values. */
+  for (i = 0; i < rep_refs->nelts; ++i)
+    {
+      int idx;
+      rep_ref_t *ref = APR_ARRAY_IDX(rep_refs, i, rep_ref_t *);
+      rep_stats_t *rep = find_representation(&idx, query, NULL,
+                                             ref->revision, ref->item_index);
+
+      /* No dangling pointers and all base reps have been processed. */
+      SVN_ERR_ASSERT(rep);
+      SVN_ERR_ASSERT(!rep->chain_length);
+
+      /* Set the HEADER_SIZE as we found it during the scan. */
+      rep->header_size = ref->header_size;
+
+      /* The delta chain got 1 element longer. */
+      if (ref->base_revision == SVN_INVALID_REVNUM)
+        {
+          rep->chain_length = 1;
+        }
+      else
+        {
+          rep_stats_t *base;
+
+          base = find_representation(&idx, query, NULL, ref->base_revision,
+                                     ref->base_item_index);
+          SVN_ERR_ASSERT(base);
+          SVN_ERR_ASSERT(base->chain_length);
+
+          rep->chain_length = 1 + MIN(base->chain_length, (apr_byte_t)0xfe);
+        }
+    }
+
+  return SVN_NO_ERROR;
+}
+
 /* Process the logically addressed revision contents of revisions BASE to
  * BASE + COUNT - 1 in QUERY.
  *
@@ -889,6 +998,12 @@ read_log_rev_or_packfile(query_t *query,
   int i;
   svn_fs_fs__revision_file_t *rev_file;
 
+  /* We collect the delta chain links as we scan the file.  Afterwards,
+   * we determine the lengths of those delta chains and throw this
+   * temporary container away. */
+  apr_array_header_t *rep_refs = apr_array_make(scratch_pool, 64,
+                                                sizeof(rep_ref_t *));
+
   /* we will process every revision in the rev / pack file */
   for (i = 0; i < count; ++i)
     {
@@ -960,12 +1075,49 @@ read_log_rev_or_packfile(query_t *query,
                 = get_log_change_count(item->data + 0, item->len);
               info->changes_len += entry->size;
             }
+          else if (   (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP)
+                   || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
+                   || (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
+                   || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS))
+            {
+              /* Collect the delta chain link. */
+              svn_fs_fs__rep_header_t *header;
+              rep_ref_t *ref = apr_pcalloc(scratch_pool, sizeof(*ref));
+
+              SVN_ERR(svn_io_file_aligned_seek(rev_file->file,
+                                               rev_file->block_size,
+                                               NULL, entry->offset,
+                                               iterpool));
+              SVN_ERR(svn_fs_fs__read_rep_header(&header,
+                                                 rev_file->stream,
+                                                 iterpool, iterpool));
+
+              ref->header_size = header->header_size;
+              ref->revision = entry->item.revision;
+              ref->item_index = entry->item.number;
+
+              if (header->type == svn_fs_fs__rep_delta)
+                {
+                  ref->base_item_index = header->base_item_index;
+                  ref->base_revision = header->base_revision;
+                }
+              else
+                {
+                  ref->base_item_index = SVN_FS_FS__ITEM_INDEX_UNUSED;
+                  ref->base_revision = SVN_INVALID_REVNUM;
+                }
+
+              APR_ARRAY_PUSH(rep_refs, rep_ref_t *) = ref;
+            }
 
           /* advance offset */
           offset += entry->size;
         }
     }
 
+  /* Resolve the delta chain links. */
+  SVN_ERR(resolve_representation_refs(query, rep_refs));
+
   /* clean up and close file handles */
   svn_pool_destroy(iterpool);
 
@@ -1092,6 +1244,7 @@ add_rep_stats(svn_fs_fs__representation_
 
   stats->references += rep->ref_count;
   stats->expanded_size += rep->ref_count * rep->expanded_size;
+  stats->chain_len += rep->chain_length;
 }
 
 /* Aggregate the info the in revision_info_t * array REVISIONS into the

Modified: subversion/trunk/subversion/svnfsfs/stats-cmd.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svnfsfs/stats-cmd.c?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/svnfsfs/stats-cmd.c (original)
+++ subversion/trunk/subversion/svnfsfs/stats-cmd.c Mon Sep 28 18:13:44 2015
@@ -74,7 +74,8 @@ print_rep_stats(svn_fs_fs__representatio
            "%20s bytes expanded size\n"
            "%20s bytes expanded shared size\n"
            "%20s bytes with rep-sharing off\n"
-           "%20s shared references\n"),
+           "%20s shared references\n"
+           "%20.3f average delta chain length\n"),
          svn__ui64toa_sep(stats->total.packed_size, ',', pool),
          svn__ui64toa_sep(stats->total.count, ',', pool),
          svn__ui64toa_sep(stats->shared.packed_size, ',', pool),
@@ -82,7 +83,8 @@ print_rep_stats(svn_fs_fs__representatio
          svn__ui64toa_sep(stats->total.expanded_size, ',', pool),
          svn__ui64toa_sep(stats->shared.expanded_size, ',', pool),
          svn__ui64toa_sep(stats->expanded_size, ',', pool),
-         svn__ui64toa_sep(stats->references - stats->total.count, ',', pool));
+         svn__ui64toa_sep(stats->references - stats->total.count, ',', pool),
+         stats->chain_len / MAX(1.0, (double)stats->total.count));
 }
 
 /* Print the (used) contents of CHANGES.  Use POOL for allocations.
@@ -413,6 +415,7 @@ print_stats(svn_fs_fs__stats_t *stats,
            "%20s bytes in %12s representations of added file nodes\n"
            "%20s bytes in %12s directory property representations\n"
            "%20s bytes in %12s file property representations\n"
+           "                         with %12.3f average delta chain length\n"
            "%20s bytes in header & footer overhead\n"),
          svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',',
                          pool),
@@ -433,8 +436,10 @@ print_stats(svn_fs_fs__stats_t *stats,
          svn__ui64toa_sep(stats->file_prop_rep_stats.total.packed_size, ',',
                          pool),
          svn__ui64toa_sep(stats->file_prop_rep_stats.total.count, ',', pool),
+         stats->total_rep_stats.chain_len
+            / (double)stats->total_rep_stats.total.count,
          svn__ui64toa_sep(stats->total_rep_stats.total.overhead_size, ',',
-                        pool));
+                         pool));
 
   printf("\nDirectory representation statistics:\n");
   print_rep_stats(&stats->dir_rep_stats, pool);

Modified: subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py (original)
+++ subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py Mon Sep 28 18:13:44 2015
@@ -182,6 +182,7 @@ def test_stats(sbox):
                            '.*\d+ bytes in .*\d+ representations of added file nodes',
                            '.*\d+ bytes in .*\d+ directory property representations',
                            '.*\d+ bytes in .*\d+ file property representations',
+                           '.*\d+ average delta chain length',
                            '.*\d+ bytes in header & footer overhead' ],
     '.* representation statistics:' : 
                           ['.*\d+ bytes in .*\d+ reps',
@@ -189,7 +190,8 @@ def test_stats(sbox):
                            '.*\d+ bytes expanded size',
                            '.*\d+ bytes expanded shared size',
                            '.*\d+ bytes with rep-sharing off',
-                           '.*\d+ shared references' ],
+                           '.*\d+ shared references',
+                           '.*\d+ average delta chain length'],
     'Largest.*:'        : ['.*\d+ r\d+ */\S*'],
     'Extensions by number .*:' :
                           ['.*\d+ \( ?\d+%\) representations'],

Modified: subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c Mon Sep 28 18:13:44 2015
@@ -105,6 +105,10 @@ verify_representation_stats(const svn_fs
   SVN_TEST_ASSERT(stats->references == stats->total.count);
   SVN_TEST_ASSERT(stats->expanded_size == stats->total.expanded_size);
 
+  /* Reasonable delta chain lengths */
+  SVN_TEST_ASSERT(   stats->chain_len >= stats->total.count
+                  && stats->chain_len <= 5 * stats->total.count);
+
   return SVN_NO_ERROR;
 }