You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2015/09/28 20:13:44 UTC
svn commit: r1705739 - in /subversion/trunk/subversion:
include/private/svn_fs_fs_private.h libsvn_fs_fs/stats.c
svnfsfs/stats-cmd.c tests/cmdline/svnfsfs_tests.py
tests/libsvn_fs_fs/fs-fs-private-test.c
Author: stefan2
Date: Mon Sep 28 18:13:44 2015
New Revision: 1705739
URL: http://svn.apache.org/viewvc?rev=1705739&view=rev
Log:
Teach 'svnfsfs stats' to show the average lengths of the representation
delta chains.
We build that info as we go: The chain length is the chain length of the
base representation we deltify against + 1. For log. addressed repositories,
we want to keep the single pass process in place. Therefore, we collect the
delta chain references and only after the whole rev / pack file was read,
will we set the counters.
Once at it, also determine and store the correct rep header size for phys.
addressed reps.
* subversion/include/private/svn_fs_fs_private.h
(svn_fs_fs__representation_stats_t): Add the field to hold the sum of the
lengths of the delta chains.
* subversion/libsvn_fs_fs/stats.c
(rep_stats_t): Add field to hold the delta chain length.
(rep_ref_t): New temporary data structure.
(parse_representation): Set the chain length for phys. addressed reps.
(compare_representation_refs,
resolve_representation_refs): New functions to do the same for log.
addressed reps as a post-scan step.
(read_log_rev_or_packfile): Scan the file for reps as well and collect
the delta chain links. Call the above to
update the rep info afterwards.
(add_rep_stats): One more field to process in the aggregator.
* subversion/svnfsfs/stats-cmd.c
(print_rep_stats,
print_stats): Print the average delta chain lengths for these sections.
* subversion/tests/libsvn_fs_fs/fs-fs-private-test.c
(verify_representation_stats): Have some check on the new data as well.
* subversion/tests/cmdline/svnfsfs_tests.py
(test_stats): Update expected output pattern.
Modified:
subversion/trunk/subversion/include/private/svn_fs_fs_private.h
subversion/trunk/subversion/libsvn_fs_fs/stats.c
subversion/trunk/subversion/svnfsfs/stats-cmd.c
subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py
subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c
Modified: subversion/trunk/subversion/include/private/svn_fs_fs_private.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_fs_fs_private.h?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_fs_fs_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_fs_fs_private.h Mon Sep 28 18:13:44 2015
@@ -152,6 +152,9 @@ typedef struct svn_fs_fs__representation
/* sum of ref_count * expanded_size,
* i.e. total plaintext content if there was no rep sharing */
apr_uint64_t expanded_size;
+
+ /* sum of all representation delta chain lengths */
+ apr_uint64_t chain_len;
} svn_fs_fs__representation_stats_t;
/* Basic statistics we collect over a given set of noderevs.
Modified: subversion/trunk/subversion/libsvn_fs_fs/stats.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/stats.c?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/stats.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/stats.c Mon Sep 28 18:13:44 2015
@@ -93,8 +93,36 @@ typedef struct rep_stats_t
/* classification of the representation. values of rep_kind_t */
char kind;
+ /* length of the delta chain, including this representation,
+ * saturated to 255 - if need be */
+ apr_byte_t chain_length;
} rep_stats_t;
+/* Represents a link in the rep delta chain. REVISION + ITEM_INDEX points
+ * to BASE_REVISION + BASE_ITEM_INDEX. We collect this info while scanning
+ * a f7 repo in a single pass and resolve it afterwards. */
+typedef struct rep_ref_t
+{
+ /* Revision that contains this representation. */
+ svn_revnum_t revision;
+
+ /* Item index of this rep within REVISION. */
+ apr_uint64_t item_index;
+
+ /* Revision of the representation we deltified against.
+ * -1 if this representation is either PLAIN or a self-delta. */
+ svn_revnum_t base_revision;
+
+ /* Item index of that rep within BASE_REVISION. */
+ apr_uint64_t base_item_index;
+
+ /* Length of the PLAIN / DELTA line in the source file in bytes.
+ * We use this to update the info in the rep stats after scanning the
+ * whole file. */
+ apr_uint16_t header_size;
+
+} rep_ref_t;
+
/* Represents a single revision.
* There will be only one instance per revision. */
typedef struct revision_info_t
@@ -441,6 +469,23 @@ parse_representation(rep_stats_t **repre
scratch_pool, scratch_pool));
result->header_size = header->header_size;
+
+ /* Determine length of the delta chain. */
+ if (header->type == svn_fs_fs__rep_delta)
+ {
+ int base_idx;
+ rep_stats_t *base_rep
+ = find_representation(&base_idx, query, NULL,
+ header->base_revision,
+ header->base_item_index);
+
+ result->chain_length = 1 + MIN(base_rep->chain_length,
+ (apr_byte_t)0xfe);
+ }
+ else
+ {
+ result->chain_length = 1;
+ }
}
svn_sort__array_insert(revision_info->representations, &result, idx);
@@ -869,6 +914,70 @@ read_item(svn_stringbuf_t **contents,
return SVN_NO_ERROR;
}
+/* Predicate comparing the two rep_ref_t** LHS and RHS by the respective
+ * representation's revision.
+ */
+static int
+compare_representation_refs(const void *lhs, const void *rhs)
+{
+ svn_revnum_t lhs_rev = (*(const rep_ref_t *const *)lhs)->revision;
+ svn_revnum_t rhs_rev = (*(const rep_ref_t *const *)rhs)->revision;
+
+ if (lhs_rev < rhs_rev)
+ return -1;
+ return (lhs_rev > rhs_rev ? 1 : 0);
+}
+
+/* Given all the presentations found in a single rev / pack file as
+ * rep_ref_t * in REP_REFS, update the delta chain lengths in QUERY.
+ * REP_REFS and its contents can then be discarded.
+ */
+static svn_error_t *
+resolve_representation_refs(query_t *query,
+ apr_array_header_t *rep_refs)
+{
+ int i;
+
+ /* Because delta chains can only point to previous revs, after sorting
+ * REP_REFS, all base refs have already been updated. */
+ svn_sort__array(rep_refs, compare_representation_refs);
+
+ /* Build up the CHAIN_LENGTH values. */
+ for (i = 0; i < rep_refs->nelts; ++i)
+ {
+ int idx;
+ rep_ref_t *ref = APR_ARRAY_IDX(rep_refs, i, rep_ref_t *);
+ rep_stats_t *rep = find_representation(&idx, query, NULL,
+ ref->revision, ref->item_index);
+
+ /* No dangling pointers and all base reps have been processed. */
+ SVN_ERR_ASSERT(rep);
+ SVN_ERR_ASSERT(!rep->chain_length);
+
+ /* Set the HEADER_SIZE as we found it during the scan. */
+ rep->header_size = ref->header_size;
+
+ /* The delta chain got 1 element longer. */
+ if (ref->base_revision == SVN_INVALID_REVNUM)
+ {
+ rep->chain_length = 1;
+ }
+ else
+ {
+ rep_stats_t *base;
+
+ base = find_representation(&idx, query, NULL, ref->base_revision,
+ ref->base_item_index);
+ SVN_ERR_ASSERT(base);
+ SVN_ERR_ASSERT(base->chain_length);
+
+ rep->chain_length = 1 + MIN(base->chain_length, (apr_byte_t)0xfe);
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
/* Process the logically addressed revision contents of revisions BASE to
* BASE + COUNT - 1 in QUERY.
*
@@ -889,6 +998,12 @@ read_log_rev_or_packfile(query_t *query,
int i;
svn_fs_fs__revision_file_t *rev_file;
+ /* We collect the delta chain links as we scan the file. Afterwards,
+ * we determine the lengths of those delta chains and throw this
+ * temporary container away. */
+ apr_array_header_t *rep_refs = apr_array_make(scratch_pool, 64,
+ sizeof(rep_ref_t *));
+
/* we will process every revision in the rev / pack file */
for (i = 0; i < count; ++i)
{
@@ -960,12 +1075,49 @@ read_log_rev_or_packfile(query_t *query,
= get_log_change_count(item->data + 0, item->len);
info->changes_len += entry->size;
}
+ else if ( (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP)
+ || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
+ || (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
+ || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS))
+ {
+ /* Collect the delta chain link. */
+ svn_fs_fs__rep_header_t *header;
+ rep_ref_t *ref = apr_pcalloc(scratch_pool, sizeof(*ref));
+
+ SVN_ERR(svn_io_file_aligned_seek(rev_file->file,
+ rev_file->block_size,
+ NULL, entry->offset,
+ iterpool));
+ SVN_ERR(svn_fs_fs__read_rep_header(&header,
+ rev_file->stream,
+ iterpool, iterpool));
+
+ ref->header_size = header->header_size;
+ ref->revision = entry->item.revision;
+ ref->item_index = entry->item.number;
+
+ if (header->type == svn_fs_fs__rep_delta)
+ {
+ ref->base_item_index = header->base_item_index;
+ ref->base_revision = header->base_revision;
+ }
+ else
+ {
+ ref->base_item_index = SVN_FS_FS__ITEM_INDEX_UNUSED;
+ ref->base_revision = SVN_INVALID_REVNUM;
+ }
+
+ APR_ARRAY_PUSH(rep_refs, rep_ref_t *) = ref;
+ }
/* advance offset */
offset += entry->size;
}
}
+ /* Resolve the delta chain links. */
+ SVN_ERR(resolve_representation_refs(query, rep_refs));
+
/* clean up and close file handles */
svn_pool_destroy(iterpool);
@@ -1092,6 +1244,7 @@ add_rep_stats(svn_fs_fs__representation_
stats->references += rep->ref_count;
stats->expanded_size += rep->ref_count * rep->expanded_size;
+ stats->chain_len += rep->chain_length;
}
/* Aggregate the info the in revision_info_t * array REVISIONS into the
Modified: subversion/trunk/subversion/svnfsfs/stats-cmd.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svnfsfs/stats-cmd.c?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/svnfsfs/stats-cmd.c (original)
+++ subversion/trunk/subversion/svnfsfs/stats-cmd.c Mon Sep 28 18:13:44 2015
@@ -74,7 +74,8 @@ print_rep_stats(svn_fs_fs__representatio
"%20s bytes expanded size\n"
"%20s bytes expanded shared size\n"
"%20s bytes with rep-sharing off\n"
- "%20s shared references\n"),
+ "%20s shared references\n"
+ "%20.3f average delta chain length\n"),
svn__ui64toa_sep(stats->total.packed_size, ',', pool),
svn__ui64toa_sep(stats->total.count, ',', pool),
svn__ui64toa_sep(stats->shared.packed_size, ',', pool),
@@ -82,7 +83,8 @@ print_rep_stats(svn_fs_fs__representatio
svn__ui64toa_sep(stats->total.expanded_size, ',', pool),
svn__ui64toa_sep(stats->shared.expanded_size, ',', pool),
svn__ui64toa_sep(stats->expanded_size, ',', pool),
- svn__ui64toa_sep(stats->references - stats->total.count, ',', pool));
+ svn__ui64toa_sep(stats->references - stats->total.count, ',', pool),
+ stats->chain_len / MAX(1.0, (double)stats->total.count));
}
/* Print the (used) contents of CHANGES. Use POOL for allocations.
@@ -413,6 +415,7 @@ print_stats(svn_fs_fs__stats_t *stats,
"%20s bytes in %12s representations of added file nodes\n"
"%20s bytes in %12s directory property representations\n"
"%20s bytes in %12s file property representations\n"
+ " with %12.3f average delta chain length\n"
"%20s bytes in header & footer overhead\n"),
svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',',
pool),
@@ -433,8 +436,10 @@ print_stats(svn_fs_fs__stats_t *stats,
svn__ui64toa_sep(stats->file_prop_rep_stats.total.packed_size, ',',
pool),
svn__ui64toa_sep(stats->file_prop_rep_stats.total.count, ',', pool),
+ stats->total_rep_stats.chain_len
+ / (double)stats->total_rep_stats.total.count,
svn__ui64toa_sep(stats->total_rep_stats.total.overhead_size, ',',
- pool));
+ pool));
printf("\nDirectory representation statistics:\n");
print_rep_stats(&stats->dir_rep_stats, pool);
Modified: subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py (original)
+++ subversion/trunk/subversion/tests/cmdline/svnfsfs_tests.py Mon Sep 28 18:13:44 2015
@@ -182,6 +182,7 @@ def test_stats(sbox):
'.*\d+ bytes in .*\d+ representations of added file nodes',
'.*\d+ bytes in .*\d+ directory property representations',
'.*\d+ bytes in .*\d+ file property representations',
+ '.*\d+ average delta chain length',
'.*\d+ bytes in header & footer overhead' ],
'.* representation statistics:' :
['.*\d+ bytes in .*\d+ reps',
@@ -189,7 +190,8 @@ def test_stats(sbox):
'.*\d+ bytes expanded size',
'.*\d+ bytes expanded shared size',
'.*\d+ bytes with rep-sharing off',
- '.*\d+ shared references' ],
+ '.*\d+ shared references',
+ '.*\d+ average delta chain length'],
'Largest.*:' : ['.*\d+ r\d+ */\S*'],
'Extensions by number .*:' :
['.*\d+ \( ?\d+%\) representations'],
Modified: subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c?rev=1705739&r1=1705738&r2=1705739&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_fs_fs/fs-fs-private-test.c Mon Sep 28 18:13:44 2015
@@ -105,6 +105,10 @@ verify_representation_stats(const svn_fs
SVN_TEST_ASSERT(stats->references == stats->total.count);
SVN_TEST_ASSERT(stats->expanded_size == stats->total.expanded_size);
+ /* Reasonable delta chain lengths */
+ SVN_TEST_ASSERT( stats->chain_len >= stats->total.count
+ && stats->chain_len <= 5 * stats->total.count);
+
return SVN_NO_ERROR;
}