You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ju...@apache.org on 2013/01/06 03:33:39 UTC

svn commit: r1429457 [20/21] - in /subversion/branches/tree-read-api: ./ build/ build/ac-macros/ build/generator/templates/ build/win32/ contrib/server-side/svncutter/ doc/ subversion/bindings/cxxhl/include/ subversion/bindings/cxxhl/include/svncxxhl/ ...

Modified: subversion/branches/tree-read-api/tools/dist/release.py
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/dist/release.py?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/dist/release.py (original)
+++ subversion/branches/tree-read-api/tools/dist/release.py Sun Jan  6 02:33:34 2013
@@ -34,6 +34,9 @@
 # It'd be kind of nice to use the Subversion python bindings in this script,
 # but people.apache.org doesn't currently have them installed
 
+# Futures (Python 2.5 compatibility)
+from __future__ import with_statement
+
 # Stuff we need
 import os
 import re
@@ -88,6 +91,7 @@ secure_repos = 'https://svn.apache.org/r
 dist_repos = 'https://dist.apache.org/repos/dist'
 dist_dev_url = dist_repos + '/dev/subversion'
 dist_release_url = dist_repos + '/release/subversion'
+KEYS = 'https://people.apache.org/keys/group/subversion.asc'
 extns = ['zip', 'tar.gz', 'tar.bz2']
 
 
@@ -374,7 +378,10 @@ def compare_changes(repos, branch, revis
     if stderr:
       raise RuntimeError('svn mergeinfo failed: %s' % stderr)
     if stdout:
-      raise RuntimeError('CHANGES has unmerged revisions: %s' % stdout)
+      # Treat this as a warning since we are now putting entries for future
+      # minor releases in CHANGES on trunk.
+      logging.warning('CHANGES has unmerged revisions: %s' %
+                      stdout.replace("\n", " "))
 
 def roll_tarballs(args):
     'Create the release artifacts.'
@@ -620,7 +627,13 @@ def write_news(args):
 
 def get_sha1info(args, replace=False):
     'Return a list of sha1 info for the release'
-    sha1s = glob.glob(os.path.join(get_deploydir(args.base_dir), '*.sha1'))
+
+    if args.target:
+        target = args.target
+    else:
+        target = get_deploydir(args.base_dir)
+
+    sha1s = glob.glob(os.path.join(target, '*.sha1'))
 
     class info(object):
         pass
@@ -644,10 +657,11 @@ def get_sha1info(args, replace=False):
 def write_announcement(args):
     'Write the release announcement.'
     sha1info = get_sha1info(args)
+    siginfo = "\n".join(get_siginfo(args, True)) + "\n"
 
     data = { 'version'              : str(args.version),
              'sha1info'             : sha1info,
-             'siginfo'              : open('getsigs-output', 'r').read(),
+             'siginfo'              : siginfo,
              'major-minor'          : '%d.%d' % (args.version.major,
                                                  args.version.minor),
              'major-minor-patch'    : args.version.base,
@@ -682,8 +696,8 @@ def write_downloads(args):
 key_start = '-----BEGIN PGP SIGNATURE-----'
 fp_pattern = re.compile(r'^pub\s+(\w+\/\w+)[^\n]*\n\s+Key\sfingerprint\s=((\s+[0-9A-F]{4}){10})\nuid\s+([^<\(]+)\s')
 
-def check_sigs(args):
-    'Check the signatures for the release.'
+def get_siginfo(args, quiet=False):
+    'Returns a list of signatures for the release.'
 
     try:
         import gnupg
@@ -697,13 +711,16 @@ def check_sigs(args):
         target = get_deploydir(args.base_dir)
 
     good_sigs = {}
+    fingerprints = {}
+    output = []
 
     glob_pattern = os.path.join(target, 'subversion*-%s*.asc' % args.version)
     for filename in glob.glob(glob_pattern):
         text = open(filename).read()
         keys = text.split(key_start)
 
-        logging.info("Checking %d sig(s) in %s" % (len(keys[1:]), filename))
+        if not quiet:
+            logging.info("Checking %d sig(s) in %s" % (len(keys[1:]), filename))
         for key in keys[1:]:
             fd, fn = tempfile.mkstemp()
             os.write(fd, key_start + key)
@@ -733,9 +750,30 @@ def check_sigs(args):
                                                      if l[0:7] != 'Warning' ])
 
         fp = fp_pattern.match(gpg_output).groups()
-        print("   %s [%s] with fingerprint:" % (fp[3], fp[0]))
-        print("   %s" % fp[1])
+        fingerprints["%s [%s] %s" % (fp[3], fp[0], fp[1])] = fp
 
+    for entry in sorted(fingerprints.keys()):
+        fp = fingerprints[entry]
+        output.append("   %s [%s] with fingerprint:" % (fp[3], fp[0]))
+        output.append("   %s" % fp[1])
+
+    return output
+
+def check_sigs(args):
+    'Check the signatures for the release.'
+
+    output = get_siginfo(args)
+    for line in output:
+        print(line)
+
+def get_keys(args):
+    'Import the LDAP-based KEYS file to gpg'
+    # We use a tempfile because urlopen() objects don't have a .fileno()
+    with tempfile.SpooledTemporaryFile() as fd:
+        fd.write(urllib2.urlopen(KEYS).read())
+        fd.flush()
+        fd.seek(0)
+        subprocess.check_call(['gpg', '--import'], stdin=fd)
 
 #----------------------------------------------------------------------
 # Main entry point for argument parsing and handling
@@ -844,13 +882,18 @@ def main():
     subparser.add_argument('version', type=Version,
                     help='''The release label, such as '1.7.0-alpha1'.''')
 
+    # write-announcement
     subparser = subparsers.add_parser('write-announcement',
                     help='''Output to stdout template text for the emailed
                             release announcement.''')
     subparser.set_defaults(func=write_announcement)
+    subparser.add_argument('--target',
+                    help='''The full path to the directory containing
+                            release artifacts.''')
     subparser.add_argument('version', type=Version,
                     help='''The release label, such as '1.7.0-alpha1'.''')
 
+    # write-downloads
     subparser = subparsers.add_parser('write-downloads',
                     help='''Output to stdout template text for the download
                             table for subversion.apache.org''')
@@ -858,7 +901,7 @@ def main():
     subparser.add_argument('version', type=Version,
                     help='''The release label, such as '1.7.0-alpha1'.''')
 
-    # The check sigs subcommand
+    # check-sigs
     subparser = subparsers.add_parser('check-sigs',
                     help='''Output to stdout the signatures collected for this
                             release''')
@@ -869,6 +912,11 @@ def main():
                     help='''The full path to the directory containing
                             release artifacts.''')
 
+    # get-keys
+    subparser = subparsers.add_parser('get-keys',
+                    help='''Import committers' public keys to ~/.gpg/''')
+    subparser.set_defaults(func=get_keys)
+
     # A meta-target
     subparser = subparsers.add_parser('clean',
                     help='''The same as the '--clean' switch, but as a

Copied: subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example (from r1429420, subversion/trunk/tools/hook-scripts/validate-files.conf.example)
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example?p2=subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example&p1=subversion/trunk/tools/hook-scripts/validate-files.conf.example&r1=1429420&r2=1429457&rev=1429457&view=diff
==============================================================================
--- subversion/trunk/tools/hook-scripts/validate-files.conf.example (original)
+++ subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example Sun Jan  6 02:33:34 2013
@@ -30,38 +30,40 @@ svnlook = /usr/local/bin/svnlook
 # platform.
 #
 # The command option is the command to run, this command will be run via
-# the shell of your platform.  Your command will have variable replacement
-# made on it prior to execution as follows:
-#  $REPO or ${REPO} expands to the path of the repository for the commit.
-#  $TXN or ${TXN} expands to the transaction id of the commit.
-#  $FILE or ${FILE} expands to the name of the file that matched the pattern.
-#
-# $ characters that are not followed by one of the above variable names will
-# be untouched.
+# the shell of your platform.  The following environment variables will
+# be defined for you:
+#  REPO  = the path of the repository for the commit.
+#  TXN   = the transaction id of the commit.
+#  FILE  = the name of the file that matched the pattern.
 #
 # IMPORTANT: AS A CONSEQUENCE OF THE USE OF THE SHELL IT IS IMPORTANT TO
-# QUOTE THE ARGUMENTS OF YOUR COMMANDS.  THE $FILE VARIABLE DOES CONTAIN
+# QUOTE THE ARGUMENTS OF YOUR COMMANDS.  THE FILE VARIABLE DOES CONTAIN
 # USER GENERATED DATA AND SHELL METACHARACTERS ARE NOT ESCAPED FOR YOU!
-
+#
+# The following examples assume a POSIX shell, if your platform has a
+# different shell you may need to adjust them.  For example on Windows
+# cmd.exe uses %VARIABLENAME% instead of $VARIABLENAME to expand environment
+# variables.
+#
 # The following rule runs the svnauthz command's validate subcommand
 # for file named authz in the conf subdir if it is present in the commit.
 # This is a simple way to ensure that invalid authz files are not allowed
 # to be committed.
 #[rule:svnauthz-validate]
 #pattern = conf/authz
-#command = '%(svnauthz)s' validate -t '$TXN' '$REPO' '$FILE'
+#command = '%(svnauthz)s' validate -t "$TXN" "$REPO" "$FILE"
 
 # The following rule runs the svnauthz command's accessof subcommand
-# for any file ending in .authz for config subdir and checks that the admin
+# for any file ending in .authz for the conf subdir and checks that the admin
 # user has rw rights to the same file.  This can be used to prevent an
 # authz file being committed that would remove access for the admin user.
 # Note that accessof also validates the validity of the file as well as
 # checking the permissions, so it's unecessary to run validate and accessof.
 #[rule:admin-rw-authz]
 #pattern = /conf/*.authz
-#command = '%(svnauthz)s' accessof --username admin --path '${FILE}' --is rw -t '${TXN}' '${REPO}' '${FILE}'
+#command = '%(svnauthz)s' accessof --username admin --path "$FILE" --is rw -t "$TXN" "$REPO" "$FILE"
 
 # Use the xmllint command to validate all files ending in .xml
 #[rule:xmllint]
 #pattern = *.xml
-#command = '%(svnlook)s' cat -t '${TXN}' '${REPO}' '${FILE}' | '%(xmllint)s' --noout -
+#command = '%(svnlook)s' cat -t "$TXN" "$REPO" "$FILE" | '%(xmllint)s' --noout -

Copied: subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py (from r1429420, subversion/trunk/tools/hook-scripts/validate-files.py)
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py?p2=subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py&p1=subversion/trunk/tools/hook-scripts/validate-files.py&r1=1429420&r2=1429457&rev=1429457&view=diff
==============================================================================
--- subversion/trunk/tools/hook-scripts/validate-files.py (original)
+++ subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py Sun Jan  6 02:33:34 2013
@@ -25,7 +25,6 @@ import sys
 import os
 import subprocess
 import fnmatch
-from string import Template
 
 # Deal with the rename of ConfigParser to configparser in Python3
 try:
@@ -81,7 +80,7 @@ class Commands:
             line = p.stdout.readline()
             if not line:
                 break
-            line = line.strip()
+            line = line.decode().strip()
             text_mod = line[0:1]
             # Only if the contents of the file changed (by addition or update)
             # directories always end in / in the svnlook changed output
@@ -92,7 +91,7 @@ class Commands:
         # returncode/stderr output
         data = p.communicate()
         if p.returncode != 0:
-            sys.stderr.write(data[1])
+            sys.stderr.write(data[1].decode())
             sys.exit(2)
 
         return changed
@@ -103,12 +102,14 @@ class Commands:
         in the defined command.
 
         Returns a tuple of the exit code and the stderr output of the command"""
-        cmd_template = self.config.get(section, 'command')
-        cmd = Template(cmd_template).safe_substitute(REPO=repo,
-                                                     TXN=txn, FILE=fn)
-        p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
+        cmd = self.config.get(section, 'command')
+        cmd_env = os.environ.copy()
+        cmd_env['REPO'] = repo
+        cmd_env['TXN'] = txn
+        cmd_env['FILE'] = fn
+        p = subprocess.Popen(cmd, shell=True, env=cmd_env, stderr=subprocess.PIPE)
         data = p.communicate()
-        return (p.returncode, data[1])
+        return (p.returncode, data[1].decode())
 
 def main(repo, txn):
     exitcode = 0
@@ -154,5 +155,5 @@ if __name__ == "__main__":
     try:
         sys.exit(main(sys.argv[1], sys.argv[2]))
     except configparser.Error as e:
-	sys.stderr.write("Error with the validate-files.conf: %s\n" % e)
-	sys.exit(2)
+        sys.stderr.write("Error with the validate-files.conf: %s\n" % e)
+        sys.exit(2)

Modified: subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c (original)
+++ subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c Sun Jan  6 02:33:34 2013
@@ -1,4 +1,5 @@
-/* diff.c -- test driver for text diffs
+/* fsfs-reorg.c -- prototypic tool to reorganize packed FSFS repositories
+ *                 to reduce seeks
  *
  * ====================================================================
  *    Licensed to the Apache Software Foundation (ASF) under one
@@ -45,7 +46,7 @@
 #define _(x) x
 #endif
 
-#define ERROR_TAG "diff: "
+#define ERROR_TAG "fsfs-reporg: "
 
 /* forward declarations */
 typedef struct noderev_t noderev_t;
@@ -1800,7 +1801,7 @@ read_pack_file(fs_fs_t *fs,
 
   /* one more pack file processed */
   print_progress(base);
-  apr_pool_destroy(local_pool);
+  svn_pool_destroy(local_pool);
 
   return SVN_NO_ERROR;
 }
@@ -1866,7 +1867,7 @@ read_revision_file(fs_fs_t *fs,
   if (revision % fs->max_files_per_dir == 0)
     print_progress(revision);
 
-  apr_pool_destroy(local_pool);
+  svn_pool_destroy(local_pool);
 
   return SVN_NO_ERROR;
 }
@@ -1956,6 +1957,9 @@ get_max_offset_len(const revision_pack_t
   return result;
 }
 
+/* Create the fragments container in PACK and add revision header fragments
+ * to it.  Use POOL for allocations.
+ */
 static svn_error_t *
 add_revisions_pack_heads(revision_pack_t *pack,
                          apr_pool_t *pool)
@@ -2019,6 +2023,9 @@ add_revisions_pack_heads(revision_pack_t
   return SVN_NO_ERROR;
 }
 
+/* For the revision given by INFO in FS, return the fragment container in
+ * *FRAGMENTS and the current placement offset in *CURRENT_POS.
+ */
 static svn_error_t *
 get_target_offset(apr_size_t **current_pos,
                   apr_array_header_t **fragments,
@@ -2029,6 +2036,7 @@ get_target_offset(apr_size_t **current_p
   revision_pack_t *pack;
   svn_revnum_t revision = info->revision;
 
+  /* identify the pack object */
   if (fs->min_unpacked_rev > revision)
     {
       i = (revision - fs->start_revision) / fs->max_files_per_dir;
@@ -2039,6 +2047,7 @@ get_target_offset(apr_size_t **current_p
       i += revision - fs->min_unpacked_rev;
     }
 
+  /* extract the desired info from it */
   pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*);
   *current_pos = &pack->target_offset;
   *fragments = pack->fragments;
@@ -2046,11 +2055,19 @@ get_target_offset(apr_size_t **current_p
   return SVN_NO_ERROR;
 }
 
+/* forward declaration */
 static svn_error_t *
 add_noderev_recursively(fs_fs_t *fs,
                         noderev_t *node,
                         apr_pool_t *pool);
 
+/* Place fragments for the given REPRESENTATION of the given KIND, iff it
+ * has not been covered, yet.  Place the base reps along the deltification
+ * chain as far as those reps have not been covered, yet.  If REPRESENTATION
+ * is a directory, recursively place its elements.
+ * 
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 add_representation_recursively(fs_fs_t *fs,
                                representation_t *representation,
@@ -2060,13 +2077,16 @@ add_representation_recursively(fs_fs_t *
   apr_size_t *current_pos;
   apr_array_header_t *fragments;
   fragment_t fragment;
-  
+
+  /* place REPRESENTATION only once and only if it exists and will not
+   * be covered later as a directory. */
   if (   representation == NULL
       || representation->covered
       || (representation->dir && kind != dir_fragment)
       || representation == fs->null_base)
     return SVN_NO_ERROR;
 
+  /* add and place a fragment for REPRESENTATION */
   SVN_ERR(get_target_offset(&current_pos, &fragments,
                             fs, representation->revision));
   representation->target.offset = *current_pos;
@@ -2077,9 +2097,12 @@ add_representation_recursively(fs_fs_t *
   fragment.position = *current_pos;
   APR_ARRAY_PUSH(fragments, fragment_t) = fragment;
 
+  /* determine the size of data to be added to the target file */
   if (   kind != dir_fragment
       && representation->delta_base && representation->delta_base->dir)
     {
+      /* base rep is a dir -> would change -> need to store it as fulltext
+       * in our target file */
       apr_pool_t *text_pool = svn_pool_create(pool);
       svn_stringbuf_t *content;
 
@@ -2093,6 +2116,7 @@ add_representation_recursively(fs_fs_t *
     if (   kind == dir_fragment
         || (representation->delta_base && representation->delta_base->dir))
       {
+        /* deltified directories may grow considerably */
         if (representation->original.size < 50)
           *current_pos += 300;
         else
@@ -2100,6 +2124,8 @@ add_representation_recursively(fs_fs_t *
       }
     else
       {
+        /* plain / deltified content will not change but the header may
+         * grow slightly due to larger offsets. */
         representation->target.size = representation->original.size;
 
         if (representation->delta_base &&
@@ -2109,12 +2135,14 @@ add_representation_recursively(fs_fs_t *
           *current_pos += representation->original.size + 13;
       }
 
+  /* follow the delta chain and place base revs immediately after this */
   if (representation->delta_base)
     SVN_ERR(add_representation_recursively(fs,
                                            representation->delta_base,
                                            kind,
                                            pool));
 
+  /* finally, recurse into directories */
   if (representation->dir)
     {
       int i;
@@ -2131,6 +2159,11 @@ add_representation_recursively(fs_fs_t *
   return SVN_NO_ERROR;
 }
 
+/* Place fragments for the given NODE in FS, iff it has not been covered,
+ * yet.  Place the reps (text, props) immediately after the node.
+ *
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 add_noderev_recursively(fs_fs_t *fs,
                         noderev_t *node,
@@ -2140,9 +2173,11 @@ add_noderev_recursively(fs_fs_t *fs,
   apr_array_header_t *fragments;
   fragment_t fragment;
 
+  /* don't add it twice */
   if (node->covered)
     return SVN_NO_ERROR;
 
+  /* add and place a fragment for NODE */
   SVN_ERR(get_target_offset(&current_pos, &fragments, fs, node->revision));
   node->covered = TRUE;
   node->target.offset = *current_pos;
@@ -2152,8 +2187,10 @@ add_noderev_recursively(fs_fs_t *fs,
   fragment.position = *current_pos;
   APR_ARRAY_PUSH(fragments, fragment_t) = fragment;
 
+  /* size may slightly increase */
   *current_pos += node->original.size + 40;
-  
+
+  /* recurse into representations */
   if (node->text && node->text->dir)
     SVN_ERR(add_representation_recursively(fs, node->text, dir_fragment, pool));
   else
@@ -2164,6 +2201,8 @@ add_noderev_recursively(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Place a fragment for the last revision in PACK. Use POOL for allocations.
+ */
 static svn_error_t *
 add_revisions_pack_tail(revision_pack_t *pack,
                         apr_pool_t *pool)
@@ -2184,6 +2223,7 @@ add_revisions_pack_tail(revision_pack_t 
 
   pack->target_offset += 2 * offset_len + 3;
 
+  /* end of target file reached.  Store that info in all revs. */
   for (i = 0; i < pack->info->nelts; ++i)
     {
       info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
@@ -2193,6 +2233,9 @@ add_revisions_pack_tail(revision_pack_t 
   return SVN_NO_ERROR;
 }
 
+/* Place all fragments for all revisions / packs in FS.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 reorder_revisions(fs_fs_t *fs,
                   apr_pool_t *pool)
@@ -2233,12 +2276,20 @@ reorder_revisions(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* forward declaration */
 static svn_error_t *
 get_fragment_content(svn_string_t **content,
                      fs_fs_t *fs,
                      fragment_t *fragment,
                      apr_pool_t *pool);
 
+/* Directory content may change and with it, the deltified representations
+ * may significantly.  This function causes all directory target reps in
+ * PACK of FS to be built and their new MD5 as well as rep sizes be updated.
+ * We must do that before attempting to write noderevs.
+ * 
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 update_noderevs(fs_fs_t *fs,
                 revision_pack_t *pack,
@@ -2254,6 +2305,8 @@ update_noderevs(fs_fs_t *fs,
         {
           svn_string_t *content;
 
+          /* request updated rep content but ignore the result.
+           * We are only interested in the MD5, content and rep size updates. */
           SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
           svn_pool_clear(itempool);
         }
@@ -2264,6 +2317,11 @@ update_noderevs(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Determine the target size of the FRAGMENT in FS and return the value
+ * in *LENGTH.  If ADD_PADDING has been set, slightly fudge the numbers
+ * to account for changes in offset lengths etc.  Use POOL for temporary
+ * allocations.
+ */
 static svn_error_t *
 get_content_length(apr_size_t *length,
                    fs_fs_t *fs,
@@ -2293,6 +2351,9 @@ get_content_length(apr_size_t *length,
   return SVN_NO_ERROR;
 }
 
+/* Move the FRAGMENT to global file offset NEW_POSITION.  Update the target
+ * location info of the underlying object as well.
+ */
 static void
 move_fragment(fragment_t *fragment,
               apr_size_t new_position)
@@ -2300,9 +2361,11 @@ move_fragment(fragment_t *fragment,
   revision_info_t *info;
   representation_t *representation;
   noderev_t *node;
-  
+
+  /* move the fragment */
   fragment->position = new_position; 
 
+  /* move the underlying object */
   switch (fragment->kind)
     {
       case header_fragment:
@@ -2329,6 +2392,10 @@ move_fragment(fragment_t *fragment,
     }
 }
 
+/* Move the fragments in PACK's target fragment list to their final offsets.
+ * This may require several iterations if the fudge factors turned out to
+ * be insufficient.  Use POOL for allocations.
+ */
 static svn_error_t *
 pack_revisions(fs_fs_t *fs,
                revision_pack_t *pack,
@@ -2342,8 +2409,13 @@ pack_revisions(fs_fs_t *fs,
 
   apr_pool_t *itempool = svn_pool_create(pool);
 
+  /* update all directory reps. Chances are that most of the target rep
+   * sizes are now close to accurate. */
   SVN_ERR(update_noderevs(fs, pack, pool));
 
+  /* compression phase: pack all fragments tightly with only a very small
+   * fudge factor.  This should cause offsets to shrink, thus all the
+   * actual fragment rate should tend to be even smaller afterwards. */
   current_pos = pack->info->nelts > 1 ? 64 : 0;
   for (i = 0; i + 1 < pack->fragments->nelts; ++i)
     {
@@ -2355,9 +2427,15 @@ pack_revisions(fs_fs_t *fs,
       svn_pool_clear(itempool);
     }
 
+  /* don't forget the final fragment (last revision's revision header) */
   fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t);
   fragment->position = current_pos;
 
+  /* expansion phase: check whether all fragments fit into their allotted
+   * slots.  Grow them geometrically if they don't fit.  Retry until they
+   * all do fit.
+   * Note: there is an upper limit to which fragments can grow.  So, this
+   * loop will terminate.  Often, no expansion will be necessary at all. */
   do
     {
       needed_to_expand = FALSE;
@@ -2394,6 +2472,8 @@ pack_revisions(fs_fs_t *fs,
       fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t);
       fragment->position = current_pos;
 
+      /* update the revision
+       * sizes (they all end at the end of the pack file now) */
       SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool));
       current_pos += len;
 
@@ -2410,6 +2490,8 @@ pack_revisions(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Write reorg'ed target content for PACK in FS.  Use POOL for allocations.
+ */
 static svn_error_t *
 write_revisions(fs_fs_t *fs,
                 revision_pack_t *pack,
@@ -2426,6 +2508,7 @@ write_revisions(fs_fs_t *fs,
   apr_size_t current_pos = 0;
   svn_stringbuf_t *null_buffer = svn_stringbuf_create_empty(iterpool);
 
+  /* create the target file */
   const char *dir = apr_psprintf(iterpool, "%s/new/%ld%s",
                                   fs->path, pack->base / fs->max_files_per_dir,
                                   pack->info->nelts > 1 ? ".pack" : "");
@@ -2438,38 +2521,46 @@ write_revisions(fs_fs_t *fs,
                             APR_OS_DEFAULT,
                             iterpool));
 
+  /* write all fragments */
   for (i = 0; i < pack->fragments->nelts; ++i)
     {
       apr_size_t padding;
+
+      /* get fragment content to write */
       fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
       SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
-
       SVN_ERR_ASSERT(fragment->position >= current_pos);
+
+      /* number of bytes between this and the previous fragment */
       if (   fragment->kind == header_fragment
           && i+1 < pack->fragments->nelts)
+        /* special case: header fragments are aligned to the slot end */
         padding = APR_ARRAY_IDX(pack->fragments, i+1, fragment_t).position -
                   content->len - current_pos;
       else
+        /* standard case: fragments are aligned to the slot start */
         padding = fragment->position - current_pos;
 
+      /* write padding between fragments */
       if (padding)
         {
           while (null_buffer->len < padding)
             svn_stringbuf_appendbyte(null_buffer, 0);
 
           SVN_ERR(svn_io_file_write_full(file,
-                                          null_buffer->data,
-                                          padding,
-                                          NULL,
-                                          itempool));
+                                         null_buffer->data,
+                                         padding,
+                                         NULL,
+                                         itempool));
           current_pos += padding;
         }
 
+      /* write fragment content */
       SVN_ERR(svn_io_file_write_full(file,
-                                      content->data,
-                                      content->len,
-                                      NULL,
-                                      itempool));
+                                     content->data,
+                                     content->len,
+                                     NULL,
+                                     itempool));
       current_pos += content->len;
 
       svn_pool_clear(itempool);
@@ -2477,6 +2568,7 @@ write_revisions(fs_fs_t *fs,
 
   apr_file_close(file);
 
+  /* write new manifest file */
   if (pack->info->nelts > 1)
     {
       svn_stream_t *stream;
@@ -2498,12 +2590,17 @@ write_revisions(fs_fs_t *fs,
         }
     }
 
+  /* cleanup */
   svn_pool_destroy(itempool);
   svn_pool_destroy(iterpool);
 
   return SVN_NO_ERROR;
 }
 
+/* Write reorg'ed target content for all revisions in FS.  To maximize
+ * data locality, pack and write in one go per pack file.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 pack_and_write_revisions(fs_fs_t *fs,
                          apr_pool_t *pool)
@@ -2527,6 +2624,10 @@ pack_and_write_revisions(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* For the directory REPRESENTATION in FS, construct the new (target)
+ * serialized plaintext representation and return it in *CONTENT.
+ * Allocate the result in POOL and temporaries in SCRATCH_POOL.
+ */
 static svn_error_t *
 get_updated_dir(svn_string_t **content,
                 fs_fs_t *fs,
@@ -2540,14 +2641,19 @@ get_updated_dir(svn_string_t **content,
   int i;
   svn_stream_t *stream;
   svn_stringbuf_t *result;
-  
+
+  /* get the original content */
   SVN_ERR(read_dir(&hash, fs, representation, scratch_pool));
   hash = apr_hash_copy(hash_pool, hash);
+
+  /* update all entries */
   for (i = 0; i < dir->nelts; ++i)
     {
       char buffer[256];
       svn_string_t *new_val;
       apr_size_t pos;
+
+      /* find the original entry for for the current name */
       direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *);
       svn_string_t *str_val = apr_hash_get(hash, entry->name, entry->name_len);
       if (str_val == NULL)
@@ -2555,54 +2661,40 @@ get_updated_dir(svn_string_t **content,
                                  _("Dir entry '%s' not found"), entry->name);
 
       SVN_ERR_ASSERT(str_val->len < sizeof(buffer));
-      
+
+      /* create and updated node ID */
       memcpy(buffer, str_val->data, str_val->len+1);
       pos = strchr(buffer, '/') - buffer + 1;
       pos += svn__ui64toa(buffer + pos, entry->node->target.offset - entry->node->revision->target.offset);
       new_val = svn_string_ncreate(buffer, pos, hash_pool);
 
+      /* store it in the hash */
       apr_hash_set(hash, entry->name, entry->name_len, new_val);
     }
 
+  /* serialize the updated hash */
   result = svn_stringbuf_create_ensure(representation->target.size, pool);
   stream = svn_stream_from_stringbuf(result, hash_pool);
   SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, hash_pool));
   svn_pool_destroy(hash_pool);
 
+  /* done */
   *content = svn_stringbuf__morph_into_string(result);
   
   return SVN_NO_ERROR;
 }
 
-struct diff_write_baton_t
-{
-  svn_stream_t *stream;
-  apr_size_t size;
-};
-
-static svn_error_t *
-diff_write_handler(void *baton,
-                   const char *data,
-                   apr_size_t *len)
-{
-  struct diff_write_baton_t *whb = baton;
-
-  SVN_ERR(svn_stream_write(whb->stream, data, len));
-  whb->size += *len;
-
-  return SVN_NO_ERROR;
-}
-
+/* Calculate the delta representation for the given CONTENT and BASE.
+ * Return the rep in *DIFF.  Use POOL for allocations.
+ */
 static svn_error_t *
 diff_stringbufs(svn_stringbuf_t *diff,
-                apr_size_t *inflated_size,
                 svn_string_t *base,
                 svn_string_t *content,
                 apr_pool_t *pool)
 {
   svn_txdelta_window_handler_t diff_wh;
   void *diff_whb;
-  struct diff_write_baton_t whb;
 
   svn_stream_t *stream;
   svn_stream_t *source = svn_stream_from_string(base, pool);
@@ -2616,20 +2708,20 @@ diff_stringbufs(svn_stringbuf_t *diff,
                           SVN_DELTA_COMPRESSION_LEVEL_DEFAULT,
                           pool);
 
-  whb.stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
-  whb.size = 0;
-
-  stream = svn_stream_create(&whb, pool);
-  svn_stream_set_write(stream, diff_write_handler);
+  /* create delta stream */
+  stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
 
+  /* run delta */
   SVN_ERR(svn_stream_write(stream, content->data, &content->len));
-  SVN_ERR(svn_stream_close(whb.stream));
   SVN_ERR(svn_stream_close(stream));
 
-  *inflated_size = whb.size;
   return SVN_NO_ERROR;
 }
 
+/* Update the noderev id value for KEY in the textual noderev representation
+ * in NODE_REV.  Take the new id from NODE.  This is a no-op if the KEY
+ * cannot be found.
+ */
 static void
 update_id(svn_stringbuf_t *node_rev,
           const char *key,
@@ -2638,6 +2730,7 @@ update_id(svn_stringbuf_t *node_rev,
   char *newline_pos = 0;
   char *pos;
 
+  /* we need to update the offset only -> find its position */
   pos = strstr(node_rev->data, key);
   if (pos)
     pos = strchr(pos, '/');
@@ -2646,6 +2739,7 @@ update_id(svn_stringbuf_t *node_rev,
 
   if (pos && newline_pos)
     {
+      /* offset data has been found -> replace it */
       char temp[SVN_INT64_BUFFER_SIZE];
       apr_size_t len = svn__i64toa(temp, node->target.offset - node->revision->target.offset);
       svn_stringbuf_replace(node_rev,
@@ -2654,6 +2748,11 @@ update_id(svn_stringbuf_t *node_rev,
     }
 }
 
+/* Update the representation id value for KEY in the textual noderev
+ * representation in NODE_REV.  Take the offset, sizes and new MD5 from
+ * REPRESENTATION.  Use SCRATCH_POOL for allocations.
+ * This is a no-op if the KEY cannot be found.
+ */
 static void
 update_text(svn_stringbuf_t *node_rev,
             const char *key,
@@ -2670,6 +2769,7 @@ update_text(svn_stringbuf_t *node_rev,
   val_pos = pos + key_len;
   if (representation->dir)
     {
+      /* for directories, we need to write all rep info anew */
       char *newline_pos = strchr(val_pos, '\n');
       svn_checksum_t checksum;
       const char* temp = apr_psprintf(scratch_pool, "%ld %" APR_SIZE_T_FMT " %" 
@@ -2689,6 +2789,8 @@ update_text(svn_stringbuf_t *node_rev,
     }
   else
     {
+      /* ordinary representation: replace offset and rep size only.
+       * Content size and checksums are unchanged. */
       const char* temp;
       char *end_pos = strchr(val_pos, ' ');
       
@@ -2704,6 +2806,13 @@ update_text(svn_stringbuf_t *node_rev,
     }
 }
 
+/* Get the target content (data block as to be written to the file) for
+ * the given FRAGMENT in FS.  Return the content in *CONTENT.  Use POOL
+ * for allocations.
+ *
+ * Note that, as a side-effect, this will update the target rep. info for
+ * directories.
+ */
 static svn_error_t *
 get_fragment_content(svn_string_t **content,
                      fs_fs_t *fs,
@@ -2720,6 +2829,7 @@ get_fragment_content(svn_string_t **cont
 
   switch (fragment->kind)
     {
+      /* revision headers can be constructed from target position info */
       case header_fragment:
         info = fragment->data;
         *content = svn_string_createf(pool,
@@ -2728,6 +2838,7 @@ get_fragment_content(svn_string_t **cont
                                       info->target.changes);
         return SVN_NO_ERROR;
 
+      /* The changes list remains untouched */
       case changes_fragment:
         info = fragment->data;
         SVN_ERR(get_content(&revision_content, fs, info->revision, pool));
@@ -2737,6 +2848,9 @@ get_fragment_content(svn_string_t **cont
         (*content)->len = info->target.changes_len;
         return SVN_NO_ERROR;
 
+      /* property and file reps get new headers any need to be rewritten,
+       * iff the base rep is a directory.  The actual (deltified) content
+       * remains unchanged, though.  MD5 etc. do not change. */
       case property_fragment:
       case file_fragment:
         representation = fragment->data;
@@ -2746,6 +2860,8 @@ get_fragment_content(svn_string_t **cont
         if (representation->delta_base)
           if (representation->delta_base->dir)
             {
+              /* if the base happens to be a directory, reconstruct the
+               * full text and represent it as PLAIN rep. */
               SVN_ERR(get_combined_window(&text, fs, representation, pool));
               representation->target.size = text->len;
 
@@ -2756,6 +2872,7 @@ get_fragment_content(svn_string_t **cont
               return SVN_NO_ERROR;
             }
           else
+            /* construct a new rep header */
             if (representation->delta_base == fs->null_base)
               header = svn_stringbuf_create("DELTA\n", pool);
             else
@@ -2768,6 +2885,8 @@ get_fragment_content(svn_string_t **cont
         else
           header = svn_stringbuf_create("PLAIN\n", pool);
 
+        /* if it exists, the actual delta base is unchanged. Hence, this
+         * rep is unchanged even if it has been deltified. */
         header_size = strchr(revision_content->data +
                              representation->original.offset, '\n') -
                       revision_content->data -
@@ -2781,7 +2900,10 @@ get_fragment_content(svn_string_t **cont
         *content = svn_stringbuf__morph_into_string(header);
         return SVN_NO_ERROR;
 
+      /* directory reps need to be rewritten (and deltified) completely.
+       * As a side-effect, update the MD5 and target content size. */
       case dir_fragment:
+        /* construct new content and update MD5 */
         representation = fragment->data;
         SVN_ERR(get_updated_dir(&revision_content, fs, representation,
                                 pool, pool));
@@ -2792,15 +2914,18 @@ get_fragment_content(svn_string_t **cont
                checksum->digest,
                sizeof(representation->dir->target_md5));
 
+        /* deltify against the base rep if necessary */
         if (representation->delta_base)
           {
             if (representation->delta_base->dir == NULL)
               {
+                /* dummy or non-dir base rep -> self-compress only */
                 header = svn_stringbuf_create("DELTA\n", pool);
                 base_content = svn_string_create_empty(pool);
               }
             else
               {
+                /* deltify against base rep (which is a directory, too)*/
                 representation_t *base_rep = representation->delta_base;
                 header = svn_stringbuf_createf(pool,
                                                "DELTA %ld %" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n",
@@ -2811,16 +2936,18 @@ get_fragment_content(svn_string_t **cont
                                         pool, pool));
               }
 
+            /* run deltification and update target content size */
             header_size = header->len;
-            SVN_ERR(diff_stringbufs(header, &representation->dir->size,
-                                    base_content,
+            SVN_ERR(diff_stringbufs(header, base_content,
                                     revision_content, pool));
+            representation->dir->size = revision_content->len;
             representation->target.size = header->len - header_size;
             svn_stringbuf_appendcstr(header, "ENDREP\n");
             *content = svn_stringbuf__morph_into_string(header);
           }
         else
           {
+            /* no delta base (not even a dummy) -> PLAIN rep */
             representation->target.size = revision_content->len;
             representation->dir->size = revision_content->len;
             *content = svn_string_createf(pool, "PLAIN\n%sENDREP\n",
@@ -2829,7 +2956,9 @@ get_fragment_content(svn_string_t **cont
 
         return SVN_NO_ERROR;
 
+      /* construct the new noderev content.  No side-effects.*/
       case noderev_fragment:
+        /* get the original noderev as string */
         node = fragment->data;
         SVN_ERR(get_content(&revision_content, fs,
                             node->revision->revision, pool));
@@ -2838,6 +2967,7 @@ get_fragment_content(svn_string_t **cont
                                          node->original.size,
                                          pool);
 
+        /* update the values that may have hanged for target */
         update_id(node_rev, "id: ", node);
         update_id(node_rev, "pred: ", node->predecessor);
         update_text(node_rev, "text: ", node->text, pool);
@@ -2852,6 +2982,9 @@ get_fragment_content(svn_string_t **cont
   return SVN_NO_ERROR;
 }
 
+/* In the repository at PATH, restore the original content in case we ran
+ * this reorg tool before.  Use POOL for allocations.
+ */
 static svn_error_t *
 prepare_repo(const char *path, apr_pool_t *pool)
 {
@@ -2862,16 +2995,19 @@ prepare_repo(const char *path, apr_pool_
   const char *revs_path = svn_dirent_join(path, "db/revs", pool);
   const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool);
   const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool);
-  
+
+  /* is there a backup? */
   SVN_ERR(svn_io_check_path(old_path, &kind, pool));
   if (kind == svn_node_dir)
     {
+      /* yes, restore the org content from it */
       SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool));
       SVN_ERR(svn_io_file_move(revs_path, new_path, pool));
       SVN_ERR(svn_io_file_move(old_path, revs_path, pool));
       SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool));
     }
 
+  /* same for the rep cache db */
   SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool));
   if (kind == svn_node_file)
     SVN_ERR(svn_io_file_move(old_rep_cache_path, rep_cache_path, pool));
@@ -2879,6 +3015,9 @@ prepare_repo(const char *path, apr_pool_
   return SVN_NO_ERROR;
 }
 
+/* In the repository at PATH, create a backup of the orig content and
+ * replace it with the reorg'ed. Use POOL for allocations.
+ */
 static svn_error_t *
 activate_new_revs(const char *path, apr_pool_t *pool)
 {
@@ -2890,6 +3029,8 @@ activate_new_revs(const char *path, apr_
   const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool);
   const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool);
 
+  /* if there is no backup, yet, move the current repo content to the backup
+   * and place it with the new (reorg'ed) data. */
   SVN_ERR(svn_io_check_path(old_path, &kind, pool));
   if (kind == svn_node_none)
     {
@@ -2897,6 +3038,7 @@ activate_new_revs(const char *path, apr_
       SVN_ERR(svn_io_file_move(new_path, revs_path, pool));
     }
 
+  /* same for the rep cache db */
   SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool));
   if (kind == svn_node_none)
     SVN_ERR(svn_io_file_move(rep_cache_path, old_rep_cache_path, pool));
@@ -2904,6 +3046,9 @@ activate_new_revs(const char *path, apr_
   return SVN_NO_ERROR;
 }
 
+/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
+ * POOL for allocations.
+ */
 static void
 print_usage(svn_stream_t *ostream, const char *progname,
             apr_pool_t *pool)
@@ -2923,6 +3068,7 @@ print_usage(svn_stream_t *ostream, const
      progname));
 }
 
+/* linear control flow */
 int main(int argc, const char *argv[])
 {
   apr_pool_t *pool;

Modified: subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c (original)
+++ subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c Sun Jan  6 02:33:34 2013
@@ -1,4 +1,4 @@
-/* diff.c -- test driver for text diffs
+/* fsfs-stats.c -- gather size statistics on FSFS repositories
  *
  * ====================================================================
  *    Licensed to the Apache Software Foundation (ASF) under one
@@ -22,7 +22,6 @@
 
 
 #include <assert.h>
-#include <sys/stat.h>
 
 #include <apr.h>
 #include <apr_general.h>
@@ -37,29 +36,40 @@
 #include "svn_sorts.h"
 #include "svn_delta.h"
 #include "svn_hash.h"
+#include "svn_cache_config.h"
 
 #include "private/svn_string_private.h"
 #include "private/svn_subr_private.h"
 #include "private/svn_dep_compat.h"
+#include "private/svn_cache.h"
 
 #ifndef _
 #define _(x) x
 #endif
 
-#define ERROR_TAG "diff: "
+#define ERROR_TAG "fsfs-stats: "
 
+/* We group representations into 2x2 different kinds plus one default:
+ * [dir / file] x [text / prop]. The assignment is done by the first node
+ * that references the respective representation.
+ */
 typedef enum rep_kind_t
 {
+  /* The representation is _directly_ unused, i.e. not referenced by any
+   * noderev. However, some other representation may use it as delta base.
+   * null value. Should not occur in real-word repositories. */
   unused_rep,
 
+  /* a properties on directory rep  */
   dir_property_rep,
 
+  /* a properties on file rep  */
   file_property_rep,
 
-  /* a directory rep (including PLAIN / DELTA header) */
+  /* a directory rep  */
   dir_rep,
 
-  /* a file rep (including PLAIN / DELTA header) */
+  /* a file rep  */
   file_rep
 } rep_kind_t;
 
@@ -73,6 +83,7 @@ typedef struct representation_t
   /* item length in bytes */
   apr_size_t size;
 
+  /* item length after de-deltification */
   apr_size_t expanded_size;
 
   /* deltification base, or NULL if there is none */
@@ -80,13 +91,15 @@ typedef struct representation_t
 
   /* revision that contains this representation
    * (may be referenced by other revisions, though) */
-  
   svn_revnum_t revision;
+
+  /* number of nodes that reference this representation */
   apr_uint32_t ref_count;
 
   /* length of the PLAIN / DELTA line in the source file in bytes */
   apr_uint16_t header_size;
 
+  /* classification of the representation. values of rep_kind_t */
   char kind;
   
   /* the source content has a PLAIN header, so we may simply copy the
@@ -118,9 +131,16 @@ typedef struct revision_info_t
    * for non-packed revs) */
   apr_size_t end;
 
+  /* number of directory noderevs in this revision */
   apr_size_t dir_noderev_count;
+
+  /* number of file noderevs in this revision */
   apr_size_t file_noderev_count;
+
+  /* total size of directory noderevs (i.e. the structs - not the rep) */
   apr_size_t dir_noderev_size;
+
+  /* total size of file noderevs (i.e. the structs - not the rep) */
   apr_size_t file_noderev_size;
   
   /* all representation_t of this revision (in no particular order),
@@ -128,43 +148,17 @@ typedef struct revision_info_t
   apr_array_header_t *representations;
 } revision_info_t;
 
-/* A cached, undeltified txdelta window.
+/* Data type to identify a representation. It will be used to address
+ * cached combined (un-deltified) windows.
  */
-typedef struct window_cache_entry_t
+typedef struct window_cache_key_t
 {
-  /* revision containing the window */
+  /* revision of the representation */
   svn_revnum_t revision;
 
-  /* offset of the deltified window within that revision */
+  /* its offset */
   apr_size_t offset;
-
-  /* window content */
-  svn_stringbuf_t *window;
-} window_cache_entry_t;
-
-/* Cache for undeltified txdelta windows. (revision, offset) will be mapped
- * directly into the ENTRIES array of INSERT_COUNT buckets (most entries
- * will be NULL).
- *
- * The cache will be cleared when USED exceeds CAPACITY.
- */
-typedef struct window_cache_t
-{
-  /* fixed-size array of ENTRY_COUNT elements */
-  window_cache_entry_t *entries;
-
-  /* used to allocate windows */
-  apr_pool_t *pool;
-
-  /* size of ENTRIES in elements */
-  apr_size_t entry_count;
-
-  /* maximum combined size of all cached windows */
-  apr_size_t capacity;
-
-  /* current combined size of all cached windows */
-  apr_size_t used;
-} window_cache_t;
+} window_cache_key_t;
 
 /* Root data structure containing all information about a given repository.
  */
@@ -196,7 +190,7 @@ typedef struct fs_fs_t
   representation_t *null_base;
 
   /* undeltified txdelta window cache */
-  window_cache_t *window_cache;
+  svn_cache__t *window_cache;
 } fs_fs_t;
 
 /* Return the rev pack folder for revision REV in FS.
@@ -238,45 +232,55 @@ open_rev_or_pack_file(apr_file_t **file,
                           pool);
 }
 
-/* Read the whole content of the file containing REV in FS and return that
- * in *CONTENT.
- */
+/* Return the length of FILE in *FILE_SIZE.  Use POOL for allocations.
+*/
 static svn_error_t *
-rev_or_pack_file_size(apr_off_t *file_size,
-                      fs_fs_t *fs,
-                      svn_revnum_t rev,
-                      apr_pool_t *pool)
+get_file_size(apr_off_t *file_size,
+              apr_file_t *file,
+              apr_pool_t *pool)
 {
-  apr_file_t *file;
   apr_finfo_t finfo;
 
-  SVN_ERR(open_rev_or_pack_file(&file, fs, rev, pool));
   SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file, pool));
-  SVN_ERR(svn_io_file_close(file, pool));
 
   *file_size = finfo.size;
   return SVN_NO_ERROR;
 }
 
-/* Get the file content of revision REVISION in FS and return it in *DATA.
- * Use SCRATCH_POOL for temporary allocations.
+/* Get the file content of revision REVISION in FS and return it in *CONTENT.
+ * Read the LEN bytes starting at file OFFSET.  When provided, use FILE as
+ * packed or plain rev file.
+ * Use POOL for temporary allocations.
  */
 static svn_error_t *
 get_content(svn_stringbuf_t **content,
+            apr_file_t *file,
             fs_fs_t *fs,
             svn_revnum_t revision,
             apr_off_t offset,
             apr_size_t len,
             apr_pool_t *pool)
 {
-  apr_file_t *file;
   apr_pool_t * file_pool = svn_pool_create(pool);
+  apr_size_t large_buffer_size = 0x10000;
 
-  SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool));
-  SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
+  if (file == NULL)
+    SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool));
 
   *content = svn_stringbuf_create_ensure(len, pool);
   (*content)->len = len;
+
+#if APR_VERSION_AT_LEAST(1,3,0)
+  /* for better efficiency use larger buffers on large reads */
+  if (   (len >= large_buffer_size)
+      && (apr_file_buffer_size_get(file) < large_buffer_size))
+    apr_file_buffer_set(file,
+                        apr_palloc(apr_file_pool_get(file),
+                                   large_buffer_size),
+                        large_buffer_size);
+#endif
+    
+  SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
   SVN_ERR(svn_io_file_read_full2(file, (*content)->data, len,
                                  NULL, NULL, pool));
   svn_pool_destroy(file_pool);
@@ -284,89 +288,48 @@ get_content(svn_stringbuf_t **content,
   return SVN_NO_ERROR;
 }
 
-/* Return a new txdelta window cache with ENTRY_COUNT buckets in its index
- * and a the total CAPACITY given in bytes.
- * Use POOL for all cache-related allocations.
+/* In *RESULT, return the cached txdelta window stored in REPRESENTATION
+ * within FS.  If that has not been found in cache, return NULL.
+ * Allocate the result in POOL.
  */
-static window_cache_t *
-create_window_cache(apr_pool_t *pool,
-                    apr_size_t entry_count,
-                    apr_size_t capacity)
-{
-  window_cache_t *result = apr_pcalloc(pool, sizeof(*result));
-
-  result->pool = svn_pool_create(pool);
-  result->entry_count = entry_count;
-  result->capacity = capacity;
-  result->used = 0;
-  result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count);
-
-  return result;
-}
-
-/* Return the position within FS' window cache ENTRIES index for the given
- * (REVISION, OFFSET) pair. This is a cache-internal function.
- */
-static apr_size_t
-get_window_cache_index(fs_fs_t *fs,
-                       svn_revnum_t revision,
-                       apr_size_t offset)
-{
-  return (revision + offset * 0xd1f3da69) % fs->window_cache->entry_count;
-}
-
-/* Return the cached txdelta window stored in REPRESENTAION within FS.
- * If that has not been found in cache, return NULL.
- */
-static svn_stringbuf_t *
-get_cached_window(fs_fs_t *fs,
+static svn_error_t *
+get_cached_window(svn_stringbuf_t **result,
+                  fs_fs_t *fs,
                   representation_t *representation,
                   apr_pool_t *pool)
 {
-  svn_revnum_t revision = representation->revision;
-  apr_size_t offset = representation->offset;
-
-  apr_size_t i = get_window_cache_index(fs, revision, offset);
-  window_cache_entry_t *entry = &fs->window_cache->entries[i];
-
-  return entry->offset == offset && entry->revision == revision
-    ? svn_stringbuf_dup(entry->window, pool)
-    : NULL;
+  svn_boolean_t found = FALSE;
+  window_cache_key_t key;
+  key.revision = representation->revision;
+  key.offset = representation->offset;
+
+  *result = NULL;
+  return svn_error_trace(svn_cache__get((void**)result, &found,
+                                        fs->window_cache,
+                                        &key, pool));
 }
 
-/* Cache the undeltified txdelta WINDOW for REPRESENTAION within FS.
+/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS.
+ * Use POOL for temporaries.
  */
-static void
+static svn_error_t *
 set_cached_window(fs_fs_t *fs,
                   representation_t *representation,
-                  svn_stringbuf_t *window)
+                  svn_stringbuf_t *window,
+                  apr_pool_t *pool)
 {
   /* select entry */
-  svn_revnum_t revision = representation->revision;
-  apr_size_t offset = representation->offset;
+  window_cache_key_t key;
+  key.revision = representation->revision;
+  key.offset = representation->offset;
 
-  apr_size_t i = get_window_cache_index(fs, revision, offset);
-  window_cache_entry_t *entry = &fs->window_cache->entries[i];
-
-  /* if the capacity is exceeded, clear the cache */
-  fs->window_cache->used += window->len;
-  if (fs->window_cache->used >= fs->window_cache->capacity)
-    {
-      svn_pool_clear(fs->window_cache->pool);
-      memset(fs->window_cache->entries,
-             0,
-             sizeof(*fs->window_cache->entries) * fs->window_cache->entry_count);
-      fs->window_cache->used = window->len;
-    }
-
-  /* set the entry to a copy of the window data */
-  entry->window = svn_stringbuf_dup(window, fs->window_cache->pool);
-  entry->offset = offset;
-  entry->revision = revision;
+  return svn_error_trace(svn_cache__set(fs->window_cache, &key, window,
+                                        pool));
 }
 
-/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
-   Use POOL for temporary allocations. */
+/* Given rev pack PATH in FS, read the manifest file and return the offsets
+ * in *MANIFEST. Use POOL for allocations.
+ */
 static svn_error_t *
 read_manifest(apr_array_header_t **manifest,
               fs_fs_t *fs,
@@ -409,6 +372,10 @@ read_manifest(apr_array_header_t **manif
   return svn_stream_close(manifest_stream);
 }
 
+/* Read header information for the revision stored in FILE_CONTENT (one
+ * whole revision).  Return the offsets within FILE_CONTENT for the
+ * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
+ * Use POOL for temporary allocations. */
 static svn_error_t *
 read_revision_header(apr_size_t *changes,
                      apr_size_t *changes_len,
@@ -447,8 +414,10 @@ read_revision_header(apr_size_t *changes
     return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
                             _("Final line in revision file missing space"));
 
+  /* terminate the header line */
   *space = 0;
-  
+
+  /* extract information */
   SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10));
   *root_noderev = (apr_size_t)val;
   SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10));
@@ -458,6 +427,10 @@ read_revision_header(apr_size_t *changes
   return SVN_NO_ERROR;
 }
 
+/* Read the FSFS format number and sharding size from the format file at
+ * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively.
+ * Use POOL for temporary allocations.
+ */
 static svn_error_t *
 read_format(int *pformat, int *max_files_per_dir,
             const char *path, apr_pool_t *pool)
@@ -467,6 +440,7 @@ read_format(int *pformat, int *max_files
   char buf[80];
   apr_size_t len;
 
+  /* open format file and read the first line */
   err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED,
                          APR_OS_DEFAULT, pool);
   if (err && APR_STATUS_IS_ENOENT(err->apr_err))
@@ -541,21 +515,27 @@ read_format(int *pformat, int *max_files
   return svn_io_file_close(file, pool);
 }
 
+/* Read the content of the file at PATH and return it in *RESULT.
+ * Use POOL for temporary allocations.
+ */
 static svn_error_t *
 read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool)
 {
   svn_stringbuf_t *content;
-  apr_int64_t number;
+  apr_uint64_t number;
   
   SVN_ERR(svn_stringbuf_from_file2(&content, path, pool));
 
   content->data[content->len-1] = 0;
-  SVN_ERR(svn_cstring_atoi64(&number, content->data));
+  SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10));
   *result = (svn_revnum_t)number;
 
   return SVN_NO_ERROR;
 }
 
+/* Create *FS for the repository at PATH and read the format and size info.
+ * Use POOL for temporary allocations.
+ */
 static svn_error_t *
 fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool)
 {
@@ -570,7 +550,8 @@ fs_open(fs_fs_t **fs, const char *path, 
                       pool));
   if (((*fs)->format != 4) && ((*fs)->format != 6))
     return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL);
-    
+
+  /* read size (HEAD) info */
   SVN_ERR(read_number(&(*fs)->min_unpacked_rev,
                       svn_dirent_join(path, "db/min-unpacked-rev", pool),
                       pool));
@@ -579,12 +560,18 @@ fs_open(fs_fs_t **fs, const char *path, 
                      pool);
 }
 
+/* Utility function that returns true if STRING->DATA matches KEY.
+ */
 static svn_boolean_t
 key_matches(svn_string_t *string, const char *key)
 {
   return strcmp(string->data, key) == 0;
 }
 
+/* Comparator used for binary search comparing the absolute file offset
+ * of a representation to some other offset. DATA is a *representation_t,
+ * KEY is a pointer to an apr_size_t.
+ */
 static int
 compare_representation_offsets(const void *data, const void *key)
 {
@@ -597,6 +584,15 @@ compare_representation_offsets(const voi
   return diff > 0 ? 1 : 0;
 }
 
+/* Find the revision_info_t object to the given REVISION in FS and return
+ * it in *REVISION_INFO. For performance reasons, we skip the lookup if
+ * the info is already provided.
+ *
+ * In that revision, look for the representation_t object for offset OFFSET.
+ * If it already exists, set *IDX to its index in *REVISION_INFO's
+ * representations list and return the representation object. Otherwise,
+ * set the index to where it must be inserted and return NULL.
+ */
 static representation_t *
 find_representation(int *idx,
                     fs_fs_t *fs,
@@ -606,7 +602,8 @@ find_representation(int *idx,
 {
   revision_info_t *info;
   *idx = -1;
-  
+
+  /* first let's find the revision */
   info = revision_info ? *revision_info : NULL;
   if (info == NULL || info->revision != revision)
     {
@@ -617,23 +614,36 @@ find_representation(int *idx,
         *revision_info = info;
     }
 
+  /* not found -> no result */
   if (info == NULL)
     return NULL;
+  
+  assert(revision == info->revision);
 
+  /* look for the representation */
   *idx = svn_sort__bsearch_lower_bound(&offset,
                                        info->representations,
                                        compare_representation_offsets);
   if (*idx < info->representations->nelts)
     {
+      /* return the representation, if this is the one we were looking for */
       representation_t *result
         = APR_ARRAY_IDX(info->representations, *idx, representation_t *);
       if (result->offset == offset)
         return result;
     }
 
+  /* not parsed, yet */
   return NULL;
 }
 
+/* Read the representation header in FILE_CONTENT at OFFSET.  Return its
+ * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and
+ * return the deltification base representation in *REPRESENTATION.  If
+ * there is none, set it to NULL.  Use FS to it look up.
+ *
+ * Use POOL for allocations and SCRATCH_POOL for temporaries.
+ */
 static svn_error_t *
 read_rep_base(representation_t **representation,
               apr_size_t *header_size,
@@ -649,10 +659,12 @@ read_rep_base(representation_t **represe
   svn_revnum_t revision;
   apr_uint64_t temp;
 
+  /* identify representation header (1 line) */
   const char *buffer = file_content->data + offset;
   const char *line_end = strchr(buffer, '\n');
   *header_size = line_end - buffer + 1;
 
+  /* check for PLAIN rep */
   if (strncmp(buffer, "PLAIN\n", *header_size) == 0)
     {
       *is_plain = TRUE;
@@ -660,6 +672,7 @@ read_rep_base(representation_t **represe
       return SVN_NO_ERROR;
     }
 
+  /* check for DELTA against empty rep */
   *is_plain = FALSE;
   if (strncmp(buffer, "DELTA\n", *header_size) == 0)
     {
@@ -671,7 +684,7 @@ read_rep_base(representation_t **represe
   str = apr_pstrndup(scratch_pool, buffer, line_end - buffer);
   last_str = str;
 
-  /* We hopefully have a DELTA vs. a non-empty base revision. */
+  /* parse it. */
   str = svn_cstring_tokenize(" ", &last_str);
   str = svn_cstring_tokenize(" ", &last_str);
   SVN_ERR(svn_revnum_parse(&revision, str, NULL));
@@ -679,10 +692,18 @@ read_rep_base(representation_t **represe
   str = svn_cstring_tokenize(" ", &last_str);
   SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10));
 
+  /* it should refer to a rep in an earlier revision.  Look it up */
   *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp);
   return SVN_NO_ERROR;
 }
 
+/* Parse the representation reference (text: or props:) in VALUE, look
+ * it up in FS and return it in *REPRESENTATION.  To be able to parse the
+ * base rep, we pass the FILE_CONTENT as well.
+ * 
+ * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp.
+ * allocations.
+ */
 static svn_error_t *
 parse_representation(representation_t **representation,
                      fs_fs_t *fs,
@@ -700,15 +721,20 @@ parse_representation(representation_t **
   apr_uint64_t expanded_size;
   int idx;
 
+  /* read location (revision, offset) and size */
   char *c = (char *)value->data;
   SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL));
   SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
   SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
   SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
 
+  /* look it up */
   result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset);
   if (!result)
     {
+      /* not parsed, yet (probably a rep in the same revision).
+       * Create a new rep object and determine its base rep as well.
+       */
       apr_size_t header_size;
       svn_boolean_t is_plain;
       
@@ -732,8 +758,10 @@ parse_representation(representation_t **
   return SVN_NO_ERROR;
 }
 
-/* Get the file content of revision REVISION in FS and return it in *DATA.
- * Use SCRATCH_POOL for temporary allocations.
+/* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in
+ * FS and return it in *CONTENT.  If no NULL, FILE_CONTENT must contain
+ * the contents of the revision that also contains the representation.
+ * Use POOL for allocations.
  */
 static svn_error_t *
 get_rep_content(svn_stringbuf_t **content,
@@ -765,7 +793,7 @@ get_rep_content(svn_stringbuf_t **conten
       offset = revision_info->offset
              + representation->offset
              + representation->header_size;
-      SVN_ERR(get_content(content, fs, revision, offset,
+      SVN_ERR(get_content(content, NULL, fs, revision, offset,
                           representation->size, pool));
     }
 
@@ -773,8 +801,12 @@ get_rep_content(svn_stringbuf_t **conten
 }
 
 
-/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
-   window into *NWIN. */
+/* Read the delta window contents of all windows in REPRESENTATION in FS.
+ * If no NULL, FILE_CONTENT must contain the contents of the revision that
+ * also contains the representation.
+ * Return the data as svn_txdelta_window_t* instances in *WINDOWS.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 read_windows(apr_array_header_t **windows,
              fs_fs_t *fs,
@@ -789,13 +821,16 @@ read_windows(apr_array_header_t **window
 
   *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *));
 
+  /* get the whole revision content */
   SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool));
 
+  /* create a read stream and position it directly after the rep header */
   content->data += 3;
   content->len -= 3;
   stream = svn_stream_from_stringbuf(content, pool);
   SVN_ERR(svn_stream_read(stream, &version, &len));
 
+  /* read the windows from that stream */
   while (TRUE)
     {
       svn_txdelta_window_t *window;
@@ -816,9 +851,12 @@ read_windows(apr_array_header_t **window
   return SVN_NO_ERROR;
 }
 
-/* Get the undeltified window that is a result of combining all deltas
-   from the current desired representation identified in *RB with its
-   base representation.  Store the window in *RESULT. */
+/* Get the undeltified representation that is a result of combining all
+ * deltas from the current desired REPRESENTATION in FS with its base
+ * representation.  If no NULL, FILE_CONTENT must contain the contents of
+ * the revision that also contains the representation.  Store the result
+ * in *CONTENT.  Use POOL for allocations.
+ */
 static svn_error_t *
 get_combined_window(svn_stringbuf_t **content,
                     fs_fs_t *fs,
@@ -830,23 +868,36 @@ get_combined_window(svn_stringbuf_t **co
   apr_array_header_t *windows;
   svn_stringbuf_t *base_content, *result;
   const char *source;
-  apr_pool_t *sub_pool = svn_pool_create(pool);
-  apr_pool_t *iter_pool = svn_pool_create(pool);
+  apr_pool_t *sub_pool;
+  apr_pool_t *iter_pool;
 
+  /* special case: no un-deltification necessary */
   if (representation->is_plain)
-    return get_rep_content(content, fs, representation, file_content, pool);
+    {
+      SVN_ERR(get_rep_content(content, fs, representation, file_content,
+                              pool));
+      SVN_ERR(set_cached_window(fs, representation, *content, pool));
+      return SVN_NO_ERROR;
+    }
 
-  *content = get_cached_window(fs, representation, pool);
+  /* special case: data already in cache */
+  SVN_ERR(get_cached_window(content, fs, representation, pool));
   if (*content)
     return SVN_NO_ERROR;
   
+  /* read the delta windows for this representation */
+  sub_pool = svn_pool_create(pool);
+  iter_pool = svn_pool_create(pool);
   SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool));
+
+  /* fetch the / create a base content */
   if (representation->delta_base && representation->delta_base->revision)
     SVN_ERR(get_combined_window(&base_content, fs,
                                 representation->delta_base, NULL, sub_pool));
   else
     base_content = svn_stringbuf_create_empty(sub_pool);
 
+  /* apply deltas */
   result = svn_stringbuf_create_empty(pool);
   source = base_content->data;
   
@@ -867,14 +918,17 @@ get_combined_window(svn_stringbuf_t **co
       svn_pool_clear(iter_pool);
     }
 
+  /* cache result and return it */
+  SVN_ERR(set_cached_window(fs, representation, result, sub_pool));
+  *content = result;
+  
   svn_pool_destroy(iter_pool);
   svn_pool_destroy(sub_pool);
-  
-  set_cached_window(fs, representation, result);
-  *content = result;
+
   return SVN_NO_ERROR;
 }
 
+/* forward declaration */
 static svn_error_t *
 read_noderev(fs_fs_t *fs,
              svn_stringbuf_t *file_content,
@@ -883,6 +937,12 @@ read_noderev(fs_fs_t *fs,
              apr_pool_t *pool,
              apr_pool_t *scratch_pool);
 
+/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all
+ * DAG nodes, directories and representations linked in that tree structure.
+ * Store them in FS and REVISION_INFO.  Also, read them only once.
+ *
+ * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
+ */
 static svn_error_t *
 parse_dir(fs_fs_t *fs,
           svn_stringbuf_t *file_content,
@@ -898,9 +958,11 @@ parse_dir(fs_fs_t *fs,
   const char *revision_key;
   apr_size_t key_len;
 
+  /* special case: empty dir rep */
   if (representation == NULL)
     return SVN_NO_ERROR;
 
+  /* get the directory as unparsed string */
   iter_pool = svn_pool_create(scratch_pool);
   text_pool = svn_pool_create(scratch_pool);
 
@@ -908,14 +970,16 @@ parse_dir(fs_fs_t *fs,
                               text_pool));
   current = text->data;
 
+  /* calculate some invariants */
   revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision);
   key_len = strlen(revision_key);
   
-  /* Translate the string dir entries into real entries. */
+  /* Parse and process all directory entries. */
   while (*current != 'E')
     {
       char *next;
 
+      /* skip "K ???\n<name>\nV ???\n" lines*/
       current = strchr(current, '\n');
       if (current)
         current = strchr(current+1, '\n');
@@ -927,11 +991,14 @@ parse_dir(fs_fs_t *fs,
            _("Corrupt directory representation in rev %ld at offset %ld"),
                                  representation->revision,
                                  (long)representation->offset);
-      
+
+      /* iff this entry refers to a node in the same revision as this dir,
+       * recurse into that node */
       *next = 0;
       current = strstr(current, revision_key);
       if (current)
         {
+          /* recurse */
           apr_uint64_t offset;
 
           SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0,
@@ -949,6 +1016,13 @@ parse_dir(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes,
+ * directories and representations linked in that tree structure.  Store
+ * them in FS and REVISION_INFO.  Also, read them only once.  Return the
+ * result in *NODEREV.
+ *
+ * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
+ */
 static svn_error_t *
 read_noderev(fs_fs_t *fs,
              svn_stringbuf_t *file_content,
@@ -964,9 +1038,11 @@ read_noderev(fs_fs_t *fs,
   svn_boolean_t is_dir = FALSE;
 
   scratch_pool = svn_pool_create(scratch_pool);
-  
+
+  /* parse the noderev line-by-line until we find an empty line */
   while (1)
     {
+      /* for this line, extract key and value. Ignore invalid values */
       svn_string_t key;
       svn_string_t value;
       char *sep;
@@ -975,6 +1051,8 @@ read_noderev(fs_fs_t *fs,
 
       line = svn_string_ncreate(start, end - start, scratch_pool);
       offset += end - start + 1;
+
+      /* empty line -> end of noderev data */
       if (line->len == 0)
         break;
       
@@ -992,6 +1070,7 @@ read_noderev(fs_fs_t *fs,
       value.data = sep + 2;
       value.len = line->len - (key.len + 2);
 
+      /* translate (key, value) into noderev elements */
       if (key_matches(&key, "type"))
         is_dir = strcmp(value.data, "dir") == 0;
       else if (key_matches(&key, "text"))
@@ -999,6 +1078,8 @@ read_noderev(fs_fs_t *fs,
           SVN_ERR(parse_representation(&text, fs, file_content,
                                        &value, revision_info,
                                        pool, scratch_pool));
+          
+          /* if we are the first to use this rep, mark it as "text rep" */
           if (++text->ref_count == 1)
             text->kind = is_dir ? dir_rep : file_rep;
         }
@@ -1007,15 +1088,20 @@ read_noderev(fs_fs_t *fs,
           SVN_ERR(parse_representation(&props, fs, file_content,
                                        &value, revision_info,
                                        pool, scratch_pool));
+
+          /* if we are the first to use this rep, mark it as "prop rep" */
           if (++props->ref_count == 1)
             props->kind = is_dir ? dir_property_rep : file_property_rep;
         }
     }
 
+  /* if this is a directory and has not been processed, yet, read and
+   * process it recursively */
   if (is_dir && text && text->ref_count == 1)
     SVN_ERR(parse_dir(fs, file_content, text, revision_info,
                       pool, scratch_pool));
 
+  /* update stats */
   if (is_dir)
     {
       revision_info->dir_noderev_size += offset - start_offset;
@@ -1031,6 +1117,9 @@ read_noderev(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Given the unparsed changes list in CHANGES with LEN chars, return the
+ * number of changed paths encoded in it.
+ */
 static apr_size_t
 get_change_count(const char *changes,
                  apr_size_t len)
@@ -1038,19 +1127,27 @@ get_change_count(const char *changes,
   apr_size_t lines = 0;
   const char *end = changes + len;
 
+  /* line count */
   for (; changes < end; ++changes)
     if (*changes == '\n')
       ++lines;
 
+  /* two lines per change */
   return lines / 2;
 }
 
-static void print_progress(svn_revnum_t revision)
+/* Simple utility to print a REVISION number and make it appear immediately.
+ */
+static void
+print_progress(svn_revnum_t revision)
 {
   printf("%8ld", revision);
   fflush(stdout);
 }
 
+/* Read the content of the pack file staring at revision BASE and store it
+ * in FS.  Use POOL for allocations.
+ */
 static svn_error_t *
 read_pack_file(fs_fs_t *fs,
                svn_revnum_t base,
@@ -1061,19 +1158,24 @@ read_pack_file(fs_fs_t *fs,
   apr_pool_t *iter_pool = svn_pool_create(local_pool);
   int i;
   apr_off_t file_size = 0;
+  apr_file_t *file;
   const char *pack_folder = get_pack_folder(fs, base, local_pool);
 
+  /* parse the manifest file */
   SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool));
   if (manifest->nelts != fs->max_files_per_dir)
     return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL);
 
-  SVN_ERR(rev_or_pack_file_size(&file_size, fs, base, pool));
+  SVN_ERR(open_rev_or_pack_file(&file, fs, base, local_pool));
+  SVN_ERR(get_file_size(&file_size, file, local_pool));
 
+  /* process each revision in the pack file */
   for (i = 0; i < manifest->nelts; ++i)
     {
       apr_size_t root_node_offset;
       svn_stringbuf_t *rev_content;
   
+      /* create the revision info for the current rev */
       revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
       info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*));
 
@@ -1083,7 +1185,7 @@ read_pack_file(fs_fs_t *fs,
                          ? APR_ARRAY_IDX(manifest, i+1 , apr_size_t)
                          : file_size;
 
-      SVN_ERR(get_content(&rev_content, fs, info->revision,
+      SVN_ERR(get_content(&rev_content, file, fs, info->revision,
                           info->offset,
                           info->end - info->offset,
                           iter_pool));
@@ -1103,15 +1205,20 @@ read_pack_file(fs_fs_t *fs,
       info->representations = apr_array_copy(pool, info->representations);
       APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
       
+      /* destroy temps */
       svn_pool_clear(iter_pool);
     }
 
+  /* one more pack file processed */
   print_progress(base);
-  apr_pool_destroy(local_pool);
+  svn_pool_destroy(local_pool);
 
   return SVN_NO_ERROR;
 }
 
+/* Read the content of the file for REVSION and store its contents in FS.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 read_revision_file(fs_fs_t *fs,
                    svn_revnum_t revision,
@@ -1122,16 +1229,21 @@ read_revision_file(fs_fs_t *fs,
   svn_stringbuf_t *rev_content;
   revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
   apr_off_t file_size = 0;
+  apr_file_t *file;
 
-  SVN_ERR(rev_or_pack_file_size(&file_size, fs, revision, pool));
+  /* read the whole pack file into memory */
+  SVN_ERR(open_rev_or_pack_file(&file, fs, revision, local_pool));
+  SVN_ERR(get_file_size(&file_size, file, local_pool));
 
+  /* create the revision info for the current rev */
   info->representations = apr_array_make(pool, 4, sizeof(representation_t*));
 
   info->revision = revision;
   info->offset = 0;
   info->end = file_size;
 
-  SVN_ERR(get_content(&rev_content, fs, revision, 0, file_size, local_pool));
+  SVN_ERR(get_content(&rev_content, file, fs, revision, 0, file_size,
+                      local_pool));
 
   SVN_ERR(read_revision_header(&info->changes,
                                &info->changes_len,
@@ -1139,24 +1251,31 @@ read_revision_file(fs_fs_t *fs,
                                rev_content,
                                local_pool));
 
+  /* put it into our containers */
   APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
 
   info->change_count
     = get_change_count(rev_content->data + info->changes,
                        info->changes_len);
 
+  /* parse the revision content recursively. */
   SVN_ERR(read_noderev(fs, rev_content,
                        root_node_offset, info,
                        pool, local_pool));
 
+  /* show progress every 1000 revs or so */
   if (revision % fs->max_files_per_dir == 0)
     print_progress(revision);
 
-  apr_pool_destroy(local_pool);
+  svn_pool_destroy(local_pool);
 
   return SVN_NO_ERROR;
 }
 
+/* Read the repository at PATH beginning with revision START_REVISION and
+ * return the result in *FS.  Allocate caches with MEMSIZE bytes total
+ * capacity.  Use POOL for non-cache allocations.
+ */
 static svn_error_t *
 read_revisions(fs_fs_t **fs,
                const char *path,
@@ -1165,63 +1284,97 @@ read_revisions(fs_fs_t **fs,
                apr_pool_t *pool)
 {
   svn_revnum_t revision;
-  apr_size_t window_cache_size;
+  svn_cache_config_t cache_config = *svn_cache_config_get();
 
   /* determine cache sizes */
 
   if (memsize < 100)
     memsize = 100;
-  
-  window_cache_size = memsize * 1024 * 1024;
+
+  cache_config.cache_size = memsize * 1024 * 1024;
+  svn_cache_config_set(&cache_config);
   
   SVN_ERR(fs_open(fs, path, pool));
 
+  /* create data containers and caches */
   (*fs)->start_revision = start_revision
                         - (start_revision % (*fs)->max_files_per_dir);
   (*fs)->revisions = apr_array_make(pool,
                                     (*fs)->max_revision + 1 - (*fs)->start_revision,
                                     sizeof(revision_info_t *));
   (*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base));
-  (*fs)->window_cache = create_window_cache
-                    (apr_allocator_owner_get
-                         (svn_pool_create_allocator(FALSE)),
-                          10000, window_cache_size);
 
+  SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache,
+                                            svn_cache__get_global_membuffer_cache(),
+                                            NULL, NULL,
+                                            sizeof(window_cache_key_t),
+                                            "", FALSE, pool));
+
+  /* read all packed revs */
   for ( revision = start_revision
       ; revision < (*fs)->min_unpacked_rev
       ; revision += (*fs)->max_files_per_dir)
     SVN_ERR(read_pack_file(*fs, revision, pool));
-    
+
+  /* read non-packed revs */
   for ( ; revision <= (*fs)->max_revision; ++revision)
     SVN_ERR(read_revision_file(*fs, revision, pool));
 
   return SVN_NO_ERROR;
 }
 
+/* Compression statistics we collect over a given set of representations.
+ */
 typedef struct rep_pack_stats_t
 {
+  /* number of representations */
   apr_int64_t count;
+
+  /* total size after deltification (i.e. on disk size) */
   apr_int64_t packed_size;
+  
+  /* total size after de-deltification (i.e. plain text size) */
   apr_int64_t expanded_size;
+
+  /* total on-disk header size */
   apr_int64_t overhead_size;
 } rep_pack_stats_t;
 
+/* Statistics we collect over a given set of representations.
+ * We group them into shared and non-shared ("unique") reps.
+ */
 typedef struct representation_stats_t
 {
+  /* stats over all representations */
   rep_pack_stats_t total;
+  
+  /* stats over those representations with ref_count == 1 */
   rep_pack_stats_t uniques;
+
+  /* stats over those representations with ref_count > 1 */
   rep_pack_stats_t shared;
   
+  /* sum of all ref_counts */
   apr_int64_t references;
+
+  /* sum of ref_count * expanded_size,
+   * i.e. total plaintext content if there was no rep sharing */
   apr_int64_t expanded_size;
 } representation_stats_t;
 
+/* Basic statistics we collect over a given set of noderevs.
+ */
 typedef struct node_stats_t
 {
+  /* number of noderev structs */
   apr_int64_t count;
+  
+  /* their total size on disk (structs only) */
   apr_int64_t size;
 } node_stats_t;
 
+/* Accumulate stats of REP in STATS.
+ */
 static void
 add_rep_pack_stats(rep_pack_stats_t *stats,
                    representation_t *rep)
@@ -1230,9 +1383,11 @@ add_rep_pack_stats(rep_pack_stats_t *sta
   
   stats->packed_size += rep->size;
   stats->expanded_size += rep->expanded_size;
-  stats->overhead_size += rep->header_size + 7;
+  stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
 }
 
+/* Accumulate stats of REP in STATS.
+ */
 static void
 add_rep_stats(representation_stats_t *stats,
               representation_t *rep)
@@ -1247,6 +1402,9 @@ add_rep_stats(representation_stats_t *st
   stats->expanded_size += rep->ref_count * rep->expanded_size;
 }
 
+/* Print statistics for the given group of representations to console.
+ * Use POOL for allocations.
+ */
 static void
 print_rep_stats(representation_stats_t *stats,
                 apr_pool_t *pool)
@@ -1267,12 +1425,16 @@ print_rep_stats(representation_stats_t *
          svn__i64toa_sep(stats->references - stats->total.count, ',', pool));
 }
 
+/* Post-process stats for FS and print them to the console.
+ * Use POOL for allocations.
+ */
 static void
 print_stats(fs_fs_t *fs,
             apr_pool_t *pool)
 {
   int i, k;
-  
+
+  /* initialize stats to collect */
   representation_stats_t file_rep_stats = { { 0 } };
   representation_stats_t dir_rep_stats = { { 0 } };
   representation_stats_t file_prop_rep_stats = { { 0 } };
@@ -1286,11 +1448,14 @@ print_stats(fs_fs_t *fs,
   apr_int64_t total_size = 0;
   apr_int64_t change_count = 0;
   apr_int64_t change_len = 0;
-  
+
+  /* aggregate info from all revisions */
   for (i = 0; i < fs->revisions->nelts; ++i)
     {
       revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i,
                                                 revision_info_t *);
+
+      /* data gathered on a revision level */
       change_count += revision->change_count;
       change_len += revision->changes_len;
       total_size += revision->end - revision->offset;
@@ -1303,11 +1468,14 @@ print_stats(fs_fs_t *fs,
                               + revision->file_noderev_count;
       total_node_stats.size += revision->dir_noderev_size
                              + revision->file_noderev_size;
-      
+
+      /* process representations */
       for (k = 0; k < revision->representations->nelts; ++k)
         {
           representation_t *rep = APR_ARRAY_IDX(revision->representations,
                                                 k, representation_t *);
+
+          /* accumulate in the right bucket */
           switch(rep->kind)
             {
               case file_rep:
@@ -1330,6 +1498,7 @@ print_stats(fs_fs_t *fs,
         }
     }
 
+  /* print results */
   printf("\nGlobal statistics:\n");
   printf(_("%20s bytes in %12s revisions\n"
            "%20s bytes in %12s changes\n"
@@ -1388,6 +1557,9 @@ print_stats(fs_fs_t *fs,
   print_rep_stats(&file_prop_rep_stats, pool);
 }
 
+/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
+ * POOL for allocations.
+ */
 static void
 print_usage(svn_stream_t *ostream, const char *progname,
             apr_pool_t *pool)
@@ -1404,6 +1576,7 @@ print_usage(svn_stream_t *ostream, const
      progname));
 }
 
+/* linear control flow */
 int main(int argc, const char *argv[])
 {
   apr_pool_t *pool;

Modified: subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py (original)
+++ subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py Sun Jan  6 02:33:34 2013
@@ -42,11 +42,11 @@ def svncmd_uuid(repo):
 def svncmd_info(repo, revision):
     cmd = "%s info -r %s %s" % (SVNLOOK, revision, repo)
     p = svncmd(cmd)
-    data = p.stdout.read().strip().split("\n")
+    data = p.stdout.read().split("\n")
     #print data
-    return {'author': data[0],
-            'date': data[1],
-            'log': "\n".join(data[3:])}
+    return {'author': data[0].strip(),
+            'date': data[1].strip(),
+            'log': "\n".join(data[3:]).strip()}
 
 def svncmd_dirs(repo, revision):
     cmd = "%s dirs-changed  -r %s %s" % (SVNLOOK, revision, repo)
@@ -59,6 +59,19 @@ def svncmd_dirs(repo, revision):
         dirs.append(line.strip())
     return dirs
 
+def svncmd_changed(repo, revision):
+    cmd = "%s changed -r %s %s" % (SVNLOOK, revision, repo)
+    p = svncmd(cmd)
+    changed = {} 
+    while True:
+        line = p.stdout.readline()
+        if not line:
+            break
+        line = line.strip()
+        (flags, filename) = (line[0:3], line[4:])
+        changed[filename] = {'flags': flags} 
+    return changed
+
 def do_put(body):
     opener = urllib2.build_opener(urllib2.HTTPHandler)
     request = urllib2.Request("http://%s:%d/dirs-changed" %(HOST, PORT), data=body)
@@ -72,12 +85,14 @@ def main(repo, revision):
     i = svncmd_info(repo, revision)
     data = {'revision': int(revision),
             'dirs_changed': [],
+            'changed': {},
             'repos': svncmd_uuid(repo),
             'author': i['author'],
             'log': i['log'],
             'date': i['date'],
             }
     data['dirs_changed'].extend(svncmd_dirs(repo, revision))
+    data['changed'].update(svncmd_changed(repo, revision))
     body = json.dumps(data)
     #print body
     do_put(body)

Modified: subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py (original)
+++ subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py Sun Jan  6 02:33:34 2013
@@ -145,6 +145,12 @@ class BigDoEverythingClass(object):
     return result
 
   def fill_in_extra_args(self, rev):
+    # Set any empty members to the string "<null>"
+    v = vars(rev)
+    for k in v.keys():
+      if not v[k]:
+        v[k] = '<null>'
+       
     # Add entries to the rev object that are useful for
     # formatting.
     rev.log_firstline = rev.log.split("\n",1)[0]

Modified: subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd (original)
+++ subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd Sun Jan  6 02:33:34 2013
@@ -20,11 +20,13 @@ svnpubsub_user=${svnpubsub_user-"svn"}
 svnpubsub_group=${svnpubsub_group-"svn"}
 svnpubsub_reactor=${svnpubsub_reactor-"poll"}
 svnpubsub_pidfile=${svnpubsub_pidfile-"/var/run/svnpubsub/svnpubsub.pid"}
+svnpubsub_cmd_int=${svnpubsub_cmd_int-"python"}
 pidfile="${svnpubsub_pidfile}"
 
 export PYTHON_EGG_CACHE="/home/svn/.python-eggs"
 
 command="/usr/local/bin/twistd"
+command_interpreter="/usr/local/bin/${svnwcsub_cmd_int}"
 command_args="-y /usr/local/svnpubsub/svnpubsub.tac \
             --logfile=/var/log/vc/svnpubsub.log \
             --pidfile=${pidfile} \