You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2013/01/20 10:16:48 UTC
svn commit: r1435805 - /subversion/trunk/tools/server-side/fsfs-stats.c
Author: stefan2
Date: Sun Jan 20 09:16:48 2013
New Revision: 1435805
URL: http://svn.apache.org/viewvc?rev=1435805&view=rev
Log:
Teach fsfs-stats tool to identify and print out the largest changes
in a repository.
* tools/server-side/fsfs-stats.c
(large_change_info_t,
largest_changes_t): new data structures
(fs_fs_t): add aggregator for largest changes in repo
(initialize_largest_changes,
add_change,
print_largest_reps): new utlity functions
(read_noderev): read path as well and forward data to new aggregator
(read_revisions): also initialize new aggregator
(print_stats): also print new size stats
Modified:
subversion/trunk/tools/server-side/fsfs-stats.c
Modified: subversion/trunk/tools/server-side/fsfs-stats.c
URL: http://svn.apache.org/viewvc/subversion/trunk/tools/server-side/fsfs-stats.c?rev=1435805&r1=1435804&r2=1435805&view=diff
==============================================================================
--- subversion/trunk/tools/server-side/fsfs-stats.c (original)
+++ subversion/trunk/tools/server-side/fsfs-stats.c Sun Jan 20 09:16:48 2013
@@ -160,6 +160,37 @@ typedef struct window_cache_key_t
apr_size_t offset;
} window_cache_key_t;
+/* Description of one large representation. It's content will be reused /
+ * overwritten when it gets replaced by an even larger representation.
+ */
+typedef struct large_change_info_t
+{
+ /* size of the (deltified) representation */
+ apr_size_t size;
+
+ /* revision of the representation */
+ svn_revnum_t revision;
+
+ /* node path. "" for unused instances */
+ svn_stringbuf_t *path;
+} large_change_info_t;
+
+/* Container for the largest representations found so far. The capacity
+ * is fixed and entries will be inserted by reusing the last one and
+ * reshuffling the entry pointers.
+ */
+typedef struct largest_changes_t
+{
+ /* number of entries allocated in CHANGES */
+ apr_size_t count;
+
+ /* size of the smallest change */
+ apr_size_t min_size;
+
+ /* changes kept in this struct */
+ large_change_info_t **changes;
+} largest_changes_t;
+
/* Root data structure containing all information about a given repository.
*/
typedef struct fs_fs_t
@@ -191,6 +222,9 @@ typedef struct fs_fs_t
/* undeltified txdelta window cache */
svn_cache__t *window_cache;
+
+ /* track the biggest contributors to repo size */
+ largest_changes_t *largest_changes;
} fs_fs_t;
/* Return the rev pack folder for revision REV in FS.
@@ -327,6 +361,70 @@ set_cached_window(fs_fs_t *fs,
pool));
}
+/* Initialize the LARGEST_CHANGES member in FS with a capacity of COUNT
+ * entries. Use POOL for allocations.
+ */
+static void
+initialize_largest_changes(fs_fs_t *fs,
+ apr_size_t count,
+ apr_pool_t *pool)
+{
+ apr_size_t i;
+
+ fs->largest_changes = apr_pcalloc(pool, sizeof(*fs->largest_changes));
+ fs->largest_changes->count = count;
+ fs->largest_changes->min_size = 1;
+ fs->largest_changes->changes
+ = apr_palloc(pool, count * sizeof(*fs->largest_changes->changes));
+
+ /* allocate *all* entries before the path stringbufs. This increases
+ * cache locality and enhances performance significantly. */
+ for (i = 0; i < count; ++i)
+ fs->largest_changes->changes[i]
+ = apr_palloc(pool, sizeof(**fs->largest_changes->changes));
+
+ /* now initialize them and allocate the stringbufs */
+ for (i = 0; i < count; ++i)
+ {
+ fs->largest_changes->changes[i]->size = 0;
+ fs->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM;
+ fs->largest_changes->changes[i]->path
+ = svn_stringbuf_create_ensure(1024, pool);
+ }
+}
+
+/* Update data aggregators in FS with this representation of on-disk SIZE
+ * for PATH in REVSION.
+ */
+static void
+add_change(fs_fs_t *fs,
+ apr_size_t size,
+ svn_revnum_t revision,
+ const char *path)
+{
+ if (size >= fs->largest_changes->min_size)
+ {
+ apr_size_t i;
+ large_change_info_t *info
+ = fs->largest_changes->changes[fs->largest_changes->count - 1];
+ info->size = size;
+ info->revision = revision;
+ svn_stringbuf_set(info->path, path);
+
+ /* linear insertion but not too bad since count is low and insertions
+ * near the end are more likely than close to front */
+ for (i = fs->largest_changes->count - 1; i > 0; --i)
+ if (fs->largest_changes->changes[i-1]->size >= size)
+ break;
+ else
+ fs->largest_changes->changes[i] = fs->largest_changes->changes[i-1];
+
+ fs->largest_changes->changes[i] = info;
+ fs->largest_changes->min_size
+ = fs->largest_changes->changes[fs->largest_changes->count-1]->size;
+ }
+}
+
/* Given rev pack PATH in FS, read the manifest file and return the offsets
* in *MANIFEST. Use POOL for allocations.
*/
@@ -1036,6 +1134,7 @@ read_noderev(fs_fs_t *fs,
representation_t *props = NULL;
apr_size_t start_offset = offset;
svn_boolean_t is_dir = FALSE;
+ const char *path = "???";
scratch_pool = svn_pool_create(scratch_pool);
@@ -1093,8 +1192,14 @@ read_noderev(fs_fs_t *fs,
if (++props->ref_count == 1)
props->kind = is_dir ? dir_property_rep : file_property_rep;
}
+ else if (key_matches(&key, "cpath"))
+ path = value.data;
}
+ /* record largest changes */
+ if (text && text->ref_count == 1)
+ add_change(fs, text->size, text->revision, path);
+
/* if this is a directory and has not been processed, yet, read and
* process it recursively */
if (is_dir && text && text->ref_count == 1)
@@ -1303,6 +1408,7 @@ read_revisions(fs_fs_t **fs,
(*fs)->max_revision + 1 - (*fs)->start_revision,
sizeof(revision_info_t *));
(*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base));
+ initialize_largest_changes(*fs, 64, pool);
SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache,
svn_cache__get_global_membuffer_cache(),
@@ -1425,6 +1531,20 @@ print_rep_stats(representation_stats_t *
svn__i64toa_sep(stats->references - stats->total.count, ',', pool));
}
+/* Print the (used) contents of CHANGES. Use POOL for allocations.
+ */
+static void
+print_largest_reps(largest_changes_t *changes,
+ apr_pool_t *pool)
+{
+ apr_size_t i;
+ for (i = 0; i < changes->count && changes->changes[i]->size; ++i)
+ printf(_("%12s r%-8ld %s\n"),
+ svn__i64toa_sep(changes->changes[i]->size, ',', pool),
+ changes->changes[i]->revision,
+ changes->changes[i]->path->data);
+}
+
/* Post-process stats for FS and print them to the console.
* Use POOL for allocations.
*/
@@ -1555,6 +1675,8 @@ print_stats(fs_fs_t *fs,
print_rep_stats(&dir_prop_rep_stats, pool);
printf("\nFile property representation statistics:\n");
print_rep_stats(&file_prop_rep_stats, pool);
+ printf("\nLargest representations:\n");
+ print_largest_reps(fs->largest_changes, pool);
}
/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and