You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2012/12/21 01:46:49 UTC

svn commit: r1424778 [3/3] - in /subversion/branches/in-repo-authz: ./ build/ subversion/include/ subversion/include/private/ subversion/libsvn_client/ subversion/libsvn_ra/ subversion/libsvn_subr/ subversion/libsvn_wc/ subversion/svn/ subversion/svnrd...

Modified: subversion/branches/in-repo-authz/tools/server-side/fsfs-stats.c
URL: http://svn.apache.org/viewvc/subversion/branches/in-repo-authz/tools/server-side/fsfs-stats.c?rev=1424778&r1=1424777&r2=1424778&view=diff
==============================================================================
--- subversion/branches/in-repo-authz/tools/server-side/fsfs-stats.c (original)
+++ subversion/branches/in-repo-authz/tools/server-side/fsfs-stats.c Fri Dec 21 00:46:48 2012
@@ -1,4 +1,4 @@
-/* diff.c -- test driver for text diffs
+/* fsfs-stats.c -- gather size statistics on FSFS repositories
  *
  * ====================================================================
  *    Licensed to the Apache Software Foundation (ASF) under one
@@ -22,7 +22,6 @@
 
 
 #include <assert.h>
-#include <sys/stat.h>
 
 #include <apr.h>
 #include <apr_general.h>
@@ -46,20 +45,29 @@
 #define _(x) x
 #endif
 
-#define ERROR_TAG "diff: "
+#define ERROR_TAG "fsfs-stats: "
 
+/* We group representations into 2x2 different kinds plus one default:
+ * [dir / file] x [text / prop]. The assignment is done by the first node
+ * that references the respective representation.
+ */
 typedef enum rep_kind_t
 {
+  /* The representation is _directly_ unused, i.e. not referenced by any
+   * noderev. However, some other representation may use it as delta base.
+   * null value. Should not occur in real-word repositories. */
   unused_rep,
 
+  /* a properties on directory rep  */
   dir_property_rep,
 
+  /* a properties on file rep  */
   file_property_rep,
 
-  /* a directory rep (including PLAIN / DELTA header) */
+  /* a directory rep  */
   dir_rep,
 
-  /* a file rep (including PLAIN / DELTA header) */
+  /* a file rep  */
   file_rep
 } rep_kind_t;
 
@@ -73,6 +81,7 @@ typedef struct representation_t
   /* item length in bytes */
   apr_size_t size;
 
+  /* item length after de-deltification */
   apr_size_t expanded_size;
 
   /* deltification base, or NULL if there is none */
@@ -80,13 +89,15 @@ typedef struct representation_t
 
   /* revision that contains this representation
    * (may be referenced by other revisions, though) */
-  
   svn_revnum_t revision;
+
+  /* number of nodes that reference this representation */
   apr_uint32_t ref_count;
 
   /* length of the PLAIN / DELTA line in the source file in bytes */
   apr_uint16_t header_size;
 
+  /* classification of the representation. values of rep_kind_t */
   char kind;
   
   /* the source content has a PLAIN header, so we may simply copy the
@@ -118,9 +129,16 @@ typedef struct revision_info_t
    * for non-packed revs) */
   apr_size_t end;
 
+  /* number of directory noderevs in this revision */
   apr_size_t dir_noderev_count;
+
+  /* number of file noderevs in this revision */
   apr_size_t file_noderev_count;
+
+  /* total size of directory noderevs (i.e. the structs - not the rep) */
   apr_size_t dir_noderev_size;
+
+  /* total size of file noderevs (i.e. the structs - not the rep) */
   apr_size_t file_noderev_size;
   
   /* all representation_t of this revision (in no particular order),
@@ -315,7 +333,7 @@ get_window_cache_index(fs_fs_t *fs,
   return (revision + offset * 0xd1f3da69) % fs->window_cache->entry_count;
 }
 
-/* Return the cached txdelta window stored in REPRESENTAION within FS.
+/* Return the cached txdelta window stored in REPRESENTATION within FS.
  * If that has not been found in cache, return NULL.
  */
 static svn_stringbuf_t *
@@ -334,7 +352,7 @@ get_cached_window(fs_fs_t *fs,
     : NULL;
 }
 
-/* Cache the undeltified txdelta WINDOW for REPRESENTAION within FS.
+/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS.
  */
 static void
 set_cached_window(fs_fs_t *fs,
@@ -365,8 +383,9 @@ set_cached_window(fs_fs_t *fs,
   entry->revision = revision;
 }
 
-/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
-   Use POOL for temporary allocations. */
+/* Given rev pack PATH in FS, read the manifest file and return the offsets
+ * in *MANIFEST. Use POOL for allocations.
+ */
 static svn_error_t *
 read_manifest(apr_array_header_t **manifest,
               fs_fs_t *fs,
@@ -409,6 +428,10 @@ read_manifest(apr_array_header_t **manif
   return svn_stream_close(manifest_stream);
 }
 
+/* Read header information for the revision stored in FILE_CONTENT (one
+ * whole revision).  Return the offsets within FILE_CONTENT for the
+ * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
+ * Use POOL for temporary allocations. */
 static svn_error_t *
 read_revision_header(apr_size_t *changes,
                      apr_size_t *changes_len,
@@ -447,8 +470,10 @@ read_revision_header(apr_size_t *changes
     return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
                             _("Final line in revision file missing space"));
 
+  /* terminate the header line */
   *space = 0;
-  
+
+  /* extract information */
   SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10));
   *root_noderev = (apr_size_t)val;
   SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10));
@@ -458,6 +483,10 @@ read_revision_header(apr_size_t *changes
   return SVN_NO_ERROR;
 }
 
+/* Read the FSFS format number and sharding size from the format file at
+ * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively.
+ * Use POOL for temporary allocations.
+ */
 static svn_error_t *
 read_format(int *pformat, int *max_files_per_dir,
             const char *path, apr_pool_t *pool)
@@ -467,6 +496,7 @@ read_format(int *pformat, int *max_files
   char buf[80];
   apr_size_t len;
 
+  /* open format file and read the first line */
   err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED,
                          APR_OS_DEFAULT, pool);
   if (err && APR_STATUS_IS_ENOENT(err->apr_err))
@@ -541,21 +571,27 @@ read_format(int *pformat, int *max_files
   return svn_io_file_close(file, pool);
 }
 
+/* Read the content of the file at PATH and return it in *RESULT.
+ * Use POOL for temporary allocations.
+ */
 static svn_error_t *
 read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool)
 {
   svn_stringbuf_t *content;
-  apr_int64_t number;
+  apr_uint64_t number;
   
   SVN_ERR(svn_stringbuf_from_file2(&content, path, pool));
 
   content->data[content->len-1] = 0;
-  SVN_ERR(svn_cstring_atoi64(&number, content->data));
+  SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10));
   *result = (svn_revnum_t)number;
 
   return SVN_NO_ERROR;
 }
 
+/* Create *FS for the repository at PATH and read the format and size info.
+ * Use POOL for temporary allocations.
+ */
 static svn_error_t *
 fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool)
 {
@@ -570,7 +606,8 @@ fs_open(fs_fs_t **fs, const char *path, 
                       pool));
   if (((*fs)->format != 4) && ((*fs)->format != 6))
     return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL);
-    
+
+  /* read size (HEAD) info */
   SVN_ERR(read_number(&(*fs)->min_unpacked_rev,
                       svn_dirent_join(path, "db/min-unpacked-rev", pool),
                       pool));
@@ -579,12 +616,18 @@ fs_open(fs_fs_t **fs, const char *path, 
                      pool);
 }
 
+/* Utility function that returns true if STRING->DATA matches KEY.
+ */
 static svn_boolean_t
 key_matches(svn_string_t *string, const char *key)
 {
   return strcmp(string->data, key) == 0;
 }
 
+/* Comparator used for binary search comparing the absolute file offset
+ * of a representation to some other offset. DATA is a *representation_t,
+ * KEY is a pointer to an apr_size_t.
+ */
 static int
 compare_representation_offsets(const void *data, const void *key)
 {
@@ -597,6 +640,15 @@ compare_representation_offsets(const voi
   return diff > 0 ? 1 : 0;
 }
 
+/* Find the revision_info_t object to the given REVISION in FS and return
+ * it in *REVISION_INFO. For performance reasons, we skip the lookup if
+ * the info is already provided.
+ *
+ * In that revision, look for the representation_t object for offset OFFSET.
+ * If it already exists, set *IDX to its index in *REVISION_INFO's
+ * representations list and return the representation object. Otherwise,
+ * set the index to where it must be inserted and return NULL.
+ */
 static representation_t *
 find_representation(int *idx,
                     fs_fs_t *fs,
@@ -606,7 +658,8 @@ find_representation(int *idx,
 {
   revision_info_t *info;
   *idx = -1;
-  
+
+  /* first let's find the revision */
   info = revision_info ? *revision_info : NULL;
   if (info == NULL || info->revision != revision)
     {
@@ -617,23 +670,36 @@ find_representation(int *idx,
         *revision_info = info;
     }
 
+  /* not found -> no result */
   if (info == NULL)
     return NULL;
+  
+  assert(revision == info->revision);
 
+  /* look for the representation */
   *idx = svn_sort__bsearch_lower_bound(&offset,
                                        info->representations,
                                        compare_representation_offsets);
   if (*idx < info->representations->nelts)
     {
+      /* return the representation, if this is the one we were looking for */
       representation_t *result
         = APR_ARRAY_IDX(info->representations, *idx, representation_t *);
       if (result->offset == offset)
         return result;
     }
 
+  /* not parsed, yet */
   return NULL;
 }
 
+/* Read the representation header in FILE_CONTENT at OFFSET.  Return its
+ * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and
+ * return the deltification base representation in *REPRESENTATION.  If
+ * there is none, set it to NULL.  Use FS to it look up.
+ *
+ * Use POOL for allocations and SCRATCH_POOL for temporaries.
+ */
 static svn_error_t *
 read_rep_base(representation_t **representation,
               apr_size_t *header_size,
@@ -649,10 +715,12 @@ read_rep_base(representation_t **represe
   svn_revnum_t revision;
   apr_uint64_t temp;
 
+  /* identify representation header (1 line) */
   const char *buffer = file_content->data + offset;
   const char *line_end = strchr(buffer, '\n');
   *header_size = line_end - buffer + 1;
 
+  /* check for PLAIN rep */
   if (strncmp(buffer, "PLAIN\n", *header_size) == 0)
     {
       *is_plain = TRUE;
@@ -660,6 +728,7 @@ read_rep_base(representation_t **represe
       return SVN_NO_ERROR;
     }
 
+  /* check for DELTA against empty rep */
   *is_plain = FALSE;
   if (strncmp(buffer, "DELTA\n", *header_size) == 0)
     {
@@ -671,7 +740,7 @@ read_rep_base(representation_t **represe
   str = apr_pstrndup(scratch_pool, buffer, line_end - buffer);
   last_str = str;
 
-  /* We hopefully have a DELTA vs. a non-empty base revision. */
+  /* parse it. */
   str = svn_cstring_tokenize(" ", &last_str);
   str = svn_cstring_tokenize(" ", &last_str);
   SVN_ERR(svn_revnum_parse(&revision, str, NULL));
@@ -679,10 +748,18 @@ read_rep_base(representation_t **represe
   str = svn_cstring_tokenize(" ", &last_str);
   SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10));
 
+  /* it should refer to a rep in an earlier revision.  Look it up */
   *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp);
   return SVN_NO_ERROR;
 }
 
+/* Parse the representation reference (text: or props:) in VALUE, look
+ * it up in FS and return it in *REPRESENTATION.  To be able to parse the
+ * base rep, we pass the FILE_CONTENT as well.
+ * 
+ * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp.
+ * allocations.
+ */
 static svn_error_t *
 parse_representation(representation_t **representation,
                      fs_fs_t *fs,
@@ -700,15 +777,20 @@ parse_representation(representation_t **
   apr_uint64_t expanded_size;
   int idx;
 
+  /* read location (revision, offset) and size */
   char *c = (char *)value->data;
   SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL));
   SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
   SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
   SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
 
+  /* look it up */
   result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset);
   if (!result)
     {
+      /* not parsed, yet (probably a rep in the same revision).
+       * Create a new rep object and determine its base rep as well.
+       */
       apr_size_t header_size;
       svn_boolean_t is_plain;
       
@@ -732,8 +814,10 @@ parse_representation(representation_t **
   return SVN_NO_ERROR;
 }
 
-/* Get the file content of revision REVISION in FS and return it in *DATA.
- * Use SCRATCH_POOL for temporary allocations.
+/* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in
+ * FS and return it in *CONTENT.  If no NULL, FILE_CONTENT must contain
+ * the contents of the revision that also contains the representation.
+ * Use POOL for allocations.
  */
 static svn_error_t *
 get_rep_content(svn_stringbuf_t **content,
@@ -773,8 +857,12 @@ get_rep_content(svn_stringbuf_t **conten
 }
 
 
-/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
-   window into *NWIN. */
+/* Read the delta window contents of all windows in REPRESENTATION in FS.
+ * If no NULL, FILE_CONTENT must contain the contents of the revision that
+ * also contains the representation.
+ * Return the data as svn_txdelta_window_t* instances in *WINDOWS.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 read_windows(apr_array_header_t **windows,
              fs_fs_t *fs,
@@ -789,13 +877,16 @@ read_windows(apr_array_header_t **window
 
   *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *));
 
+  /* get the whole revision content */
   SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool));
 
+  /* create a read stream and position it directly after the rep header */
   content->data += 3;
   content->len -= 3;
   stream = svn_stream_from_stringbuf(content, pool);
   SVN_ERR(svn_stream_read(stream, &version, &len));
 
+  /* read the windows from that stream */
   while (TRUE)
     {
       svn_txdelta_window_t *window;
@@ -816,9 +907,12 @@ read_windows(apr_array_header_t **window
   return SVN_NO_ERROR;
 }
 
-/* Get the undeltified window that is a result of combining all deltas
-   from the current desired representation identified in *RB with its
-   base representation.  Store the window in *RESULT. */
+/* Get the undeltified representation that is a result of combining all
+ * deltas from the current desired REPRESENTATION in FS with its base
+ * representation.  If no NULL, FILE_CONTENT must contain the contents of
+ * the revision that also contains the representation.  Store the result
+ * in *CONTENT.  Use POOL for allocations.
+ */
 static svn_error_t *
 get_combined_window(svn_stringbuf_t **content,
                     fs_fs_t *fs,
@@ -833,20 +927,28 @@ get_combined_window(svn_stringbuf_t **co
   apr_pool_t *sub_pool = svn_pool_create(pool);
   apr_pool_t *iter_pool = svn_pool_create(pool);
 
+  /* special case: no un-deltification necessary */
   if (representation->is_plain)
     return get_rep_content(content, fs, representation, file_content, pool);
 
+  /* special case: data already in cache */
   *content = get_cached_window(fs, representation, pool);
   if (*content)
     return SVN_NO_ERROR;
   
+  /* read the delta windows for this representation */
+  sub_pool = svn_pool_create(pool);
+  iter_pool = svn_pool_create(pool);
   SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool));
+
+  /* fetch the / create a base content */
   if (representation->delta_base && representation->delta_base->revision)
     SVN_ERR(get_combined_window(&base_content, fs,
                                 representation->delta_base, NULL, sub_pool));
   else
     base_content = svn_stringbuf_create_empty(sub_pool);
 
+  /* apply deltas */
   result = svn_stringbuf_create_empty(pool);
   source = base_content->data;
   
@@ -869,12 +971,15 @@ get_combined_window(svn_stringbuf_t **co
 
   svn_pool_destroy(iter_pool);
   svn_pool_destroy(sub_pool);
-  
+
+  /* cache result and return it */
   set_cached_window(fs, representation, result);
   *content = result;
+  
   return SVN_NO_ERROR;
 }
 
+/* forward declaration */
 static svn_error_t *
 read_noderev(fs_fs_t *fs,
              svn_stringbuf_t *file_content,
@@ -883,6 +988,12 @@ read_noderev(fs_fs_t *fs,
              apr_pool_t *pool,
              apr_pool_t *scratch_pool);
 
+/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all
+ * DAG nodes, directories and representations linked in that tree structure.
+ * Store them in FS and REVISION_INFO.  Also, read them only once.
+ *
+ * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
+ */
 static svn_error_t *
 parse_dir(fs_fs_t *fs,
           svn_stringbuf_t *file_content,
@@ -898,9 +1009,11 @@ parse_dir(fs_fs_t *fs,
   const char *revision_key;
   apr_size_t key_len;
 
+  /* special case: empty dir rep */
   if (representation == NULL)
     return SVN_NO_ERROR;
 
+  /* get the directory as unparsed string */
   iter_pool = svn_pool_create(scratch_pool);
   text_pool = svn_pool_create(scratch_pool);
 
@@ -908,14 +1021,16 @@ parse_dir(fs_fs_t *fs,
                               text_pool));
   current = text->data;
 
+  /* calculate some invariants */
   revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision);
   key_len = strlen(revision_key);
   
-  /* Translate the string dir entries into real entries. */
+  /* Parse and process all directory entries. */
   while (*current != 'E')
     {
       char *next;
 
+      /* skip "K ???\n<name>\nV ???\n" lines*/
       current = strchr(current, '\n');
       if (current)
         current = strchr(current+1, '\n');
@@ -927,11 +1042,14 @@ parse_dir(fs_fs_t *fs,
            _("Corrupt directory representation in rev %ld at offset %ld"),
                                  representation->revision,
                                  (long)representation->offset);
-      
+
+      /* iff this entry refers to a node in the same revision as this dir,
+       * recurse into that node */
       *next = 0;
       current = strstr(current, revision_key);
       if (current)
         {
+          /* recurse */
           apr_uint64_t offset;
 
           SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0,
@@ -949,6 +1067,13 @@ parse_dir(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes,
+ * directories and representations linked in that tree structure.  Store
+ * them in FS and REVISION_INFO.  Also, read them only once.  Return the
+ * result in *NODEREV.
+ *
+ * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
+ */
 static svn_error_t *
 read_noderev(fs_fs_t *fs,
              svn_stringbuf_t *file_content,
@@ -964,9 +1089,11 @@ read_noderev(fs_fs_t *fs,
   svn_boolean_t is_dir = FALSE;
 
   scratch_pool = svn_pool_create(scratch_pool);
-  
+
+  /* parse the noderev line-by-line until we find an empty line */
   while (1)
     {
+      /* for this line, extract key and value. Ignore invalid values */
       svn_string_t key;
       svn_string_t value;
       char *sep;
@@ -975,6 +1102,8 @@ read_noderev(fs_fs_t *fs,
 
       line = svn_string_ncreate(start, end - start, scratch_pool);
       offset += end - start + 1;
+
+      /* empty line -> end of noderev data */
       if (line->len == 0)
         break;
       
@@ -992,6 +1121,7 @@ read_noderev(fs_fs_t *fs,
       value.data = sep + 2;
       value.len = line->len - (key.len + 2);
 
+      /* translate (key, value) into noderev elements */
       if (key_matches(&key, "type"))
         is_dir = strcmp(value.data, "dir") == 0;
       else if (key_matches(&key, "text"))
@@ -999,6 +1129,8 @@ read_noderev(fs_fs_t *fs,
           SVN_ERR(parse_representation(&text, fs, file_content,
                                        &value, revision_info,
                                        pool, scratch_pool));
+          
+          /* if we are the first to use this rep, mark it as "text rep" */
           if (++text->ref_count == 1)
             text->kind = is_dir ? dir_rep : file_rep;
         }
@@ -1007,15 +1139,20 @@ read_noderev(fs_fs_t *fs,
           SVN_ERR(parse_representation(&props, fs, file_content,
                                        &value, revision_info,
                                        pool, scratch_pool));
+
+          /* if we are the first to use this rep, mark it as "prop rep" */
           if (++props->ref_count == 1)
             props->kind = is_dir ? dir_property_rep : file_property_rep;
         }
     }
 
+  /* if this is a directory and has not been processed, yet, read and
+   * process it recursively */
   if (is_dir && text && text->ref_count == 1)
     SVN_ERR(parse_dir(fs, file_content, text, revision_info,
                       pool, scratch_pool));
 
+  /* update stats */
   if (is_dir)
     {
       revision_info->dir_noderev_size += offset - start_offset;
@@ -1031,6 +1168,9 @@ read_noderev(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Given the unparsed changes list in CHANGES with LEN chars, return the
+ * number of changed paths encoded in it.
+ */
 static apr_size_t
 get_change_count(const char *changes,
                  apr_size_t len)
@@ -1038,19 +1178,27 @@ get_change_count(const char *changes,
   apr_size_t lines = 0;
   const char *end = changes + len;
 
+  /* line count */
   for (; changes < end; ++changes)
     if (*changes == '\n')
       ++lines;
 
+  /* two lines per change */
   return lines / 2;
 }
 
-static void print_progress(svn_revnum_t revision)
+/* Simple utility to print a REVISION number and make it appear immediately.
+ */
+static void
+print_progress(svn_revnum_t revision)
 {
   printf("%8ld", revision);
   fflush(stdout);
 }
 
+/* Read the content of the pack file staring at revision BASE and store it
+ * in FS.  Use POOL for allocations.
+ */
 static svn_error_t *
 read_pack_file(fs_fs_t *fs,
                svn_revnum_t base,
@@ -1063,17 +1211,20 @@ read_pack_file(fs_fs_t *fs,
   apr_off_t file_size = 0;
   const char *pack_folder = get_pack_folder(fs, base, local_pool);
 
+  /* parse the manifest file */
   SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool));
   if (manifest->nelts != fs->max_files_per_dir)
     return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL);
 
   SVN_ERR(rev_or_pack_file_size(&file_size, fs, base, pool));
 
+  /* process each revision in the pack file */
   for (i = 0; i < manifest->nelts; ++i)
     {
       apr_size_t root_node_offset;
       svn_stringbuf_t *rev_content;
   
+      /* create the revision info for the current rev */
       revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
       info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*));
 
@@ -1103,15 +1254,20 @@ read_pack_file(fs_fs_t *fs,
       info->representations = apr_array_copy(pool, info->representations);
       APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
       
+      /* destroy temps */
       svn_pool_clear(iter_pool);
     }
 
+  /* one more pack file processed */
   print_progress(base);
   apr_pool_destroy(local_pool);
 
   return SVN_NO_ERROR;
 }
 
+/* Read the content of the file for REVSION and store its contents in FS.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 read_revision_file(fs_fs_t *fs,
                    svn_revnum_t revision,
@@ -1123,8 +1279,10 @@ read_revision_file(fs_fs_t *fs,
   revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
   apr_off_t file_size = 0;
 
+  /* read the whole pack file into memory */
   SVN_ERR(rev_or_pack_file_size(&file_size, fs, revision, pool));
 
+  /* create the revision info for the current rev */
   info->representations = apr_array_make(pool, 4, sizeof(representation_t*));
 
   info->revision = revision;
@@ -1139,16 +1297,19 @@ read_revision_file(fs_fs_t *fs,
                                rev_content,
                                local_pool));
 
+  /* put it into our containers */
   APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
 
   info->change_count
     = get_change_count(rev_content->data + info->changes,
                        info->changes_len);
 
+  /* parse the revision content recursively. */
   SVN_ERR(read_noderev(fs, rev_content,
                        root_node_offset, info,
                        pool, local_pool));
 
+  /* show progress every 1000 revs or so */
   if (revision % fs->max_files_per_dir == 0)
     print_progress(revision);
 
@@ -1157,6 +1318,10 @@ read_revision_file(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Read the repository at PATH beginning with revision START_REVISION and
+ * return the result in *FS.  Allocate caches with MEMSIZE bytes total
+ * capacity.  Use POOL for non-cache allocations.
+ */
 static svn_error_t *
 read_revisions(fs_fs_t **fs,
                const char *path,
@@ -1176,6 +1341,7 @@ read_revisions(fs_fs_t **fs,
   
   SVN_ERR(fs_open(fs, path, pool));
 
+  /* create data containers and caches */
   (*fs)->start_revision = start_revision
                         - (start_revision % (*fs)->max_files_per_dir);
   (*fs)->revisions = apr_array_make(pool,
@@ -1187,41 +1353,71 @@ read_revisions(fs_fs_t **fs,
                          (svn_pool_create_allocator(FALSE)),
                           10000, window_cache_size);
 
+  /* read all packed revs */
   for ( revision = start_revision
       ; revision < (*fs)->min_unpacked_rev
       ; revision += (*fs)->max_files_per_dir)
     SVN_ERR(read_pack_file(*fs, revision, pool));
-    
+
+  /* read non-packed revs */
   for ( ; revision <= (*fs)->max_revision; ++revision)
     SVN_ERR(read_revision_file(*fs, revision, pool));
 
   return SVN_NO_ERROR;
 }
 
+/* Compression statistics we collect over a given set of representations.
+ */
 typedef struct rep_pack_stats_t
 {
+  /* number of representations */
   apr_int64_t count;
+
+  /* total size after deltification (i.e. on disk size) */
   apr_int64_t packed_size;
+  
+  /* total size after de-deltification (i.e. plain text size) */
   apr_int64_t expanded_size;
+
+  /* total on-disk header size */
   apr_int64_t overhead_size;
 } rep_pack_stats_t;
 
+/* Statistics we collect over a given set of representations.
+ * We group them into shared and non-shared ("unique") reps.
+ */
 typedef struct representation_stats_t
 {
+  /* stats over all representations */
   rep_pack_stats_t total;
+  
+  /* stats over those representations with ref_count == 1 */
   rep_pack_stats_t uniques;
+
+  /* stats over those representations with ref_count > 1 */
   rep_pack_stats_t shared;
   
+  /* sum of all ref_counts */
   apr_int64_t references;
+
+  /* sum of ref_count * expanded_size,
+   * i.e. total plaintext content if there was no rep sharing */
   apr_int64_t expanded_size;
 } representation_stats_t;
 
+/* Basic statistics we collect over a given set of noderevs.
+ */
 typedef struct node_stats_t
 {
+  /* number of noderev structs */
   apr_int64_t count;
+  
+  /* their total size on disk (structs only) */
   apr_int64_t size;
 } node_stats_t;
 
+/* Accumulate stats of REP in STATS.
+ */
 static void
 add_rep_pack_stats(rep_pack_stats_t *stats,
                    representation_t *rep)
@@ -1230,9 +1426,11 @@ add_rep_pack_stats(rep_pack_stats_t *sta
   
   stats->packed_size += rep->size;
   stats->expanded_size += rep->expanded_size;
-  stats->overhead_size += rep->header_size + 7;
+  stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
 }
 
+/* Accumulate stats of REP in STATS.
+ */
 static void
 add_rep_stats(representation_stats_t *stats,
               representation_t *rep)
@@ -1247,6 +1445,9 @@ add_rep_stats(representation_stats_t *st
   stats->expanded_size += rep->ref_count * rep->expanded_size;
 }
 
+/* Print statistics for the given group of representations to console.
+ * Use POOL for allocations.
+ */
 static void
 print_rep_stats(representation_stats_t *stats,
                 apr_pool_t *pool)
@@ -1267,12 +1468,16 @@ print_rep_stats(representation_stats_t *
          svn__i64toa_sep(stats->references - stats->total.count, ',', pool));
 }
 
+/* Post-process stats for FS and print them to the console.
+ * Use POOL for allocations.
+ */
 static void
 print_stats(fs_fs_t *fs,
             apr_pool_t *pool)
 {
   int i, k;
-  
+
+  /* initialize stats to collect */
   representation_stats_t file_rep_stats = { { 0 } };
   representation_stats_t dir_rep_stats = { { 0 } };
   representation_stats_t file_prop_rep_stats = { { 0 } };
@@ -1286,11 +1491,14 @@ print_stats(fs_fs_t *fs,
   apr_int64_t total_size = 0;
   apr_int64_t change_count = 0;
   apr_int64_t change_len = 0;
-  
+
+  /* aggregate info from all revisions */
   for (i = 0; i < fs->revisions->nelts; ++i)
     {
       revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i,
                                                 revision_info_t *);
+
+      /* data gathered on a revision level */
       change_count += revision->change_count;
       change_len += revision->changes_len;
       total_size += revision->end - revision->offset;
@@ -1303,11 +1511,14 @@ print_stats(fs_fs_t *fs,
                               + revision->file_noderev_count;
       total_node_stats.size += revision->dir_noderev_size
                              + revision->file_noderev_size;
-      
+
+      /* process representations */
       for (k = 0; k < revision->representations->nelts; ++k)
         {
           representation_t *rep = APR_ARRAY_IDX(revision->representations,
                                                 k, representation_t *);
+
+          /* accumulate in the right bucket */
           switch(rep->kind)
             {
               case file_rep:
@@ -1330,6 +1541,7 @@ print_stats(fs_fs_t *fs,
         }
     }
 
+  /* print results */
   printf("\nGlobal statistics:\n");
   printf(_("%20s bytes in %12s revisions\n"
            "%20s bytes in %12s changes\n"
@@ -1388,6 +1600,9 @@ print_stats(fs_fs_t *fs,
   print_rep_stats(&file_prop_rep_stats, pool);
 }
 
+/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
+ * POOL for allocations.
+ */
 static void
 print_usage(svn_stream_t *ostream, const char *progname,
             apr_pool_t *pool)
@@ -1404,6 +1619,7 @@ print_usage(svn_stream_t *ostream, const
      progname));
 }
 
+/* linear control flow */
 int main(int argc, const char *argv[])
 {
   apr_pool_t *pool;

Modified: subversion/branches/in-repo-authz/tools/server-side/svnpubsub/commit-hook.py
URL: http://svn.apache.org/viewvc/subversion/branches/in-repo-authz/tools/server-side/svnpubsub/commit-hook.py?rev=1424778&r1=1424777&r2=1424778&view=diff
==============================================================================
--- subversion/branches/in-repo-authz/tools/server-side/svnpubsub/commit-hook.py (original)
+++ subversion/branches/in-repo-authz/tools/server-side/svnpubsub/commit-hook.py Fri Dec 21 00:46:48 2012
@@ -42,11 +42,11 @@ def svncmd_uuid(repo):
 def svncmd_info(repo, revision):
     cmd = "%s info -r %s %s" % (SVNLOOK, revision, repo)
     p = svncmd(cmd)
-    data = p.stdout.read().strip().split("\n")
+    data = p.stdout.read().split("\n")
     #print data
-    return {'author': data[0],
-            'date': data[1],
-            'log': "\n".join(data[3:])}
+    return {'author': data[0].strip(),
+            'date': data[1].strip(),
+            'log': "\n".join(data[3:]).strip()}
 
 def svncmd_dirs(repo, revision):
     cmd = "%s dirs-changed  -r %s %s" % (SVNLOOK, revision, repo)
@@ -59,6 +59,19 @@ def svncmd_dirs(repo, revision):
         dirs.append(line.strip())
     return dirs
 
+def svncmd_changed(repo, revision):
+    cmd = "%s changed -r %s %s" % (SVNLOOK, revision, repo)
+    p = svncmd(cmd)
+    changed = {} 
+    while True:
+        line = p.stdout.readline()
+        if not line:
+            break
+        line = line.strip()
+        (flags, filename) = (line[0:3], line[4:])
+        changed[filename] = {'flags': flags} 
+    return changed
+
 def do_put(body):
     opener = urllib2.build_opener(urllib2.HTTPHandler)
     request = urllib2.Request("http://%s:%d/dirs-changed" %(HOST, PORT), data=body)
@@ -72,12 +85,14 @@ def main(repo, revision):
     i = svncmd_info(repo, revision)
     data = {'revision': int(revision),
             'dirs_changed': [],
+            'changed': {},
             'repos': svncmd_uuid(repo),
             'author': i['author'],
             'log': i['log'],
             'date': i['date'],
             }
     data['dirs_changed'].extend(svncmd_dirs(repo, revision))
+    data['changed'].update(svncmd_changed(repo, revision))
     body = json.dumps(data)
     #print body
     do_put(body)

Modified: subversion/branches/in-repo-authz/tools/server-side/svnpubsub/irkerbridge.py
URL: http://svn.apache.org/viewvc/subversion/branches/in-repo-authz/tools/server-side/svnpubsub/irkerbridge.py?rev=1424778&r1=1424777&r2=1424778&view=diff
==============================================================================
--- subversion/branches/in-repo-authz/tools/server-side/svnpubsub/irkerbridge.py (original)
+++ subversion/branches/in-repo-authz/tools/server-side/svnpubsub/irkerbridge.py Fri Dec 21 00:46:48 2012
@@ -145,6 +145,12 @@ class BigDoEverythingClass(object):
     return result
 
   def fill_in_extra_args(self, rev):
+    # Set any empty members to the string "<null>"
+    v = vars(rev)
+    for k in v.keys():
+      if not v[k]:
+        v[k] = '<null>'
+       
     # Add entries to the rev object that are useful for
     # formatting.
     rev.log_firstline = rev.log.split("\n",1)[0]

Modified: subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/client.py
URL: http://svn.apache.org/viewvc/subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/client.py?rev=1424778&r1=1424777&r2=1424778&view=diff
==============================================================================
--- subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/client.py (original)
+++ subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/client.py Fri Dec 21 00:46:48 2012
@@ -119,10 +119,15 @@ class XMLStreamHandler(xml.sax.handler.C
 
     self.rev = None
     self.chars = ''
+    self.parent = None
+    self.attrs = [ ] 
 
   def startElement(self, name, attrs):
+    self.attrs = attrs
     if name == 'commit':
       self.rev = Revision(attrs['repository'], int(attrs['revision']))
+    elif name == "dirs_changed" or name == "changed":
+      self.parent = name
     # No other elements to worry about.
 
   def characters(self, data):
@@ -134,10 +139,15 @@ class XMLStreamHandler(xml.sax.handler.C
       self.rev = None
     elif name == 'stillalive':
       self.event_callback('ping')
+    elif name == self.parent:
+      self.parent = None
     elif self.chars and self.rev:
       value = self.chars.strip()
-      if name == 'path':
+      if self.parent == 'dirs_changed' and name == 'path':
         self.rev.dirs_changed.append(value.decode('unicode_escape'))
+      elif self.parent == 'changed' and name == 'path':
+        path = value.decode('unicode_escape')
+        self.rev.changed[path] = dict(p for p in self.attrs.items())
       elif name == 'author':
         self.rev.author = value.decode('unicode_escape')
       elif name == 'date':
@@ -147,6 +157,8 @@ class XMLStreamHandler(xml.sax.handler.C
 
     # Toss out any accumulated characters for this element.
     self.chars = ''
+    # Toss out the saved attributes for this element.
+    self.attrs = [ ]
 
 
 class Revision(object):
@@ -154,6 +166,7 @@ class Revision(object):
     self.uuid = uuid
     self.rev = rev
     self.dirs_changed = [ ]
+    self.changed = { } 
     self.author = None
     self.date = None
     self.log = None

Modified: subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/server.py
URL: http://svn.apache.org/viewvc/subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/server.py?rev=1424778&r1=1424777&r2=1424778&view=diff
==============================================================================
--- subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/server.py (original)
+++ subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svnpubsub/server.py Fri Dec 21 00:46:48 2012
@@ -78,7 +78,8 @@ class Revision:
         # thus creating invalid XML, so the XML code paths do escaping.
         self.rev = r.get('revision')
         self.repos = r.get('repos')
-        self.dirs_changed = [x for x in r.get('dirs_changed')]
+        self.dirs_changed = r.get('dirs_changed')
+        self.changed = r.get('changed')
         self.author = r.get('author')
         self.log = r.get('log')
         self.date = r.get('date')
@@ -88,6 +89,7 @@ class Revision:
             return json.dumps({'commit': {'repository': self.repos,
                                           'revision': self.rev,
                                           'dirs_changed': self.dirs_changed,
+                                          'changed': self.changed,
                                           'author': self.author,
                                           'log': self.log,
                                           'date': self.date}}) +","
@@ -100,6 +102,11 @@ class Revision:
             for p in self.dirs_changed:
                 x = ET.SubElement(d, 'path')
                 x.text = p.encode('unicode_escape')
+            ch = ET.SubElement(c, 'changed')
+            for chp in self.changed.keys():
+                x = ET.SubElement(ch, 'path', self.changed[chp])
+                x.text = chp.encode('unicode_escape')
+
             str = ET.tostring(c, 'UTF-8') + "\n"
             return str[39:]
         else:

Propchange: subversion/branches/in-repo-authz/tools/server-side/svnpubsub/svntweet.py
------------------------------------------------------------------------------
    svn:executable = *