You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ju...@apache.org on 2013/01/06 03:33:39 UTC
svn commit: r1429457 [20/21] - in /subversion/branches/tree-read-api: ./
build/ build/ac-macros/ build/generator/templates/ build/win32/
contrib/server-side/svncutter/ doc/ subversion/bindings/cxxhl/include/
subversion/bindings/cxxhl/include/svncxxhl/ ...
Modified: subversion/branches/tree-read-api/tools/dist/release.py
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/dist/release.py?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/dist/release.py (original)
+++ subversion/branches/tree-read-api/tools/dist/release.py Sun Jan 6 02:33:34 2013
@@ -34,6 +34,9 @@
# It'd be kind of nice to use the Subversion python bindings in this script,
# but people.apache.org doesn't currently have them installed
+# Futures (Python 2.5 compatibility)
+from __future__ import with_statement
+
# Stuff we need
import os
import re
@@ -88,6 +91,7 @@ secure_repos = 'https://svn.apache.org/r
dist_repos = 'https://dist.apache.org/repos/dist'
dist_dev_url = dist_repos + '/dev/subversion'
dist_release_url = dist_repos + '/release/subversion'
+KEYS = 'https://people.apache.org/keys/group/subversion.asc'
extns = ['zip', 'tar.gz', 'tar.bz2']
@@ -374,7 +378,10 @@ def compare_changes(repos, branch, revis
if stderr:
raise RuntimeError('svn mergeinfo failed: %s' % stderr)
if stdout:
- raise RuntimeError('CHANGES has unmerged revisions: %s' % stdout)
+ # Treat this as a warning since we are now putting entries for future
+ # minor releases in CHANGES on trunk.
+ logging.warning('CHANGES has unmerged revisions: %s' %
+ stdout.replace("\n", " "))
def roll_tarballs(args):
'Create the release artifacts.'
@@ -620,7 +627,13 @@ def write_news(args):
def get_sha1info(args, replace=False):
'Return a list of sha1 info for the release'
- sha1s = glob.glob(os.path.join(get_deploydir(args.base_dir), '*.sha1'))
+
+ if args.target:
+ target = args.target
+ else:
+ target = get_deploydir(args.base_dir)
+
+ sha1s = glob.glob(os.path.join(target, '*.sha1'))
class info(object):
pass
@@ -644,10 +657,11 @@ def get_sha1info(args, replace=False):
def write_announcement(args):
'Write the release announcement.'
sha1info = get_sha1info(args)
+ siginfo = "\n".join(get_siginfo(args, True)) + "\n"
data = { 'version' : str(args.version),
'sha1info' : sha1info,
- 'siginfo' : open('getsigs-output', 'r').read(),
+ 'siginfo' : siginfo,
'major-minor' : '%d.%d' % (args.version.major,
args.version.minor),
'major-minor-patch' : args.version.base,
@@ -682,8 +696,8 @@ def write_downloads(args):
key_start = '-----BEGIN PGP SIGNATURE-----'
fp_pattern = re.compile(r'^pub\s+(\w+\/\w+)[^\n]*\n\s+Key\sfingerprint\s=((\s+[0-9A-F]{4}){10})\nuid\s+([^<\(]+)\s')
-def check_sigs(args):
- 'Check the signatures for the release.'
+def get_siginfo(args, quiet=False):
+ 'Returns a list of signatures for the release.'
try:
import gnupg
@@ -697,13 +711,16 @@ def check_sigs(args):
target = get_deploydir(args.base_dir)
good_sigs = {}
+ fingerprints = {}
+ output = []
glob_pattern = os.path.join(target, 'subversion*-%s*.asc' % args.version)
for filename in glob.glob(glob_pattern):
text = open(filename).read()
keys = text.split(key_start)
- logging.info("Checking %d sig(s) in %s" % (len(keys[1:]), filename))
+ if not quiet:
+ logging.info("Checking %d sig(s) in %s" % (len(keys[1:]), filename))
for key in keys[1:]:
fd, fn = tempfile.mkstemp()
os.write(fd, key_start + key)
@@ -733,9 +750,30 @@ def check_sigs(args):
if l[0:7] != 'Warning' ])
fp = fp_pattern.match(gpg_output).groups()
- print(" %s [%s] with fingerprint:" % (fp[3], fp[0]))
- print(" %s" % fp[1])
+ fingerprints["%s [%s] %s" % (fp[3], fp[0], fp[1])] = fp
+ for entry in sorted(fingerprints.keys()):
+ fp = fingerprints[entry]
+ output.append(" %s [%s] with fingerprint:" % (fp[3], fp[0]))
+ output.append(" %s" % fp[1])
+
+ return output
+
+def check_sigs(args):
+ 'Check the signatures for the release.'
+
+ output = get_siginfo(args)
+ for line in output:
+ print(line)
+
+def get_keys(args):
+ 'Import the LDAP-based KEYS file to gpg'
+ # We use a tempfile because urlopen() objects don't have a .fileno()
+ with tempfile.SpooledTemporaryFile() as fd:
+ fd.write(urllib2.urlopen(KEYS).read())
+ fd.flush()
+ fd.seek(0)
+ subprocess.check_call(['gpg', '--import'], stdin=fd)
#----------------------------------------------------------------------
# Main entry point for argument parsing and handling
@@ -844,13 +882,18 @@ def main():
subparser.add_argument('version', type=Version,
help='''The release label, such as '1.7.0-alpha1'.''')
+ # write-announcement
subparser = subparsers.add_parser('write-announcement',
help='''Output to stdout template text for the emailed
release announcement.''')
subparser.set_defaults(func=write_announcement)
+ subparser.add_argument('--target',
+ help='''The full path to the directory containing
+ release artifacts.''')
subparser.add_argument('version', type=Version,
help='''The release label, such as '1.7.0-alpha1'.''')
+ # write-downloads
subparser = subparsers.add_parser('write-downloads',
help='''Output to stdout template text for the download
table for subversion.apache.org''')
@@ -858,7 +901,7 @@ def main():
subparser.add_argument('version', type=Version,
help='''The release label, such as '1.7.0-alpha1'.''')
- # The check sigs subcommand
+ # check-sigs
subparser = subparsers.add_parser('check-sigs',
help='''Output to stdout the signatures collected for this
release''')
@@ -869,6 +912,11 @@ def main():
help='''The full path to the directory containing
release artifacts.''')
+ # get-keys
+ subparser = subparsers.add_parser('get-keys',
+ help='''Import committers' public keys to ~/.gpg/''')
+ subparser.set_defaults(func=get_keys)
+
# A meta-target
subparser = subparsers.add_parser('clean',
help='''The same as the '--clean' switch, but as a
Copied: subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example (from r1429420, subversion/trunk/tools/hook-scripts/validate-files.conf.example)
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example?p2=subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example&p1=subversion/trunk/tools/hook-scripts/validate-files.conf.example&r1=1429420&r2=1429457&rev=1429457&view=diff
==============================================================================
--- subversion/trunk/tools/hook-scripts/validate-files.conf.example (original)
+++ subversion/branches/tree-read-api/tools/hook-scripts/validate-files.conf.example Sun Jan 6 02:33:34 2013
@@ -30,38 +30,40 @@ svnlook = /usr/local/bin/svnlook
# platform.
#
# The command option is the command to run, this command will be run via
-# the shell of your platform. Your command will have variable replacement
-# made on it prior to execution as follows:
-# $REPO or ${REPO} expands to the path of the repository for the commit.
-# $TXN or ${TXN} expands to the transaction id of the commit.
-# $FILE or ${FILE} expands to the name of the file that matched the pattern.
-#
-# $ characters that are not followed by one of the above variable names will
-# be untouched.
+# the shell of your platform. The following environment variables will
+# be defined for you:
+# REPO = the path of the repository for the commit.
+# TXN = the transaction id of the commit.
+# FILE = the name of the file that matched the pattern.
#
# IMPORTANT: AS A CONSEQUENCE OF THE USE OF THE SHELL IT IS IMPORTANT TO
-# QUOTE THE ARGUMENTS OF YOUR COMMANDS. THE $FILE VARIABLE DOES CONTAIN
+# QUOTE THE ARGUMENTS OF YOUR COMMANDS. THE FILE VARIABLE DOES CONTAIN
# USER GENERATED DATA AND SHELL METACHARACTERS ARE NOT ESCAPED FOR YOU!
-
+#
+# The following examples assume a POSIX shell, if your platform has a
+# different shell you may need to adjust them. For example on Windows
+# cmd.exe uses %VARIABLENAME% instead of $VARIABLENAME to expand environment
+# variables.
+#
# The following rule runs the svnauthz command's validate subcommand
# for file named authz in the conf subdir if it is present in the commit.
# This is a simple way to ensure that invalid authz files are not allowed
# to be committed.
#[rule:svnauthz-validate]
#pattern = conf/authz
-#command = '%(svnauthz)s' validate -t '$TXN' '$REPO' '$FILE'
+#command = '%(svnauthz)s' validate -t "$TXN" "$REPO" "$FILE"
# The following rule runs the svnauthz command's accessof subcommand
-# for any file ending in .authz for config subdir and checks that the admin
+# for any file ending in .authz for the conf subdir and checks that the admin
# user has rw rights to the same file. This can be used to prevent an
# authz file being committed that would remove access for the admin user.
# Note that accessof also validates the validity of the file as well as
# checking the permissions, so it's unecessary to run validate and accessof.
#[rule:admin-rw-authz]
#pattern = /conf/*.authz
-#command = '%(svnauthz)s' accessof --username admin --path '${FILE}' --is rw -t '${TXN}' '${REPO}' '${FILE}'
+#command = '%(svnauthz)s' accessof --username admin --path "$FILE" --is rw -t "$TXN" "$REPO" "$FILE"
# Use the xmllint command to validate all files ending in .xml
#[rule:xmllint]
#pattern = *.xml
-#command = '%(svnlook)s' cat -t '${TXN}' '${REPO}' '${FILE}' | '%(xmllint)s' --noout -
+#command = '%(svnlook)s' cat -t "$TXN" "$REPO" "$FILE" | '%(xmllint)s' --noout -
Copied: subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py (from r1429420, subversion/trunk/tools/hook-scripts/validate-files.py)
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py?p2=subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py&p1=subversion/trunk/tools/hook-scripts/validate-files.py&r1=1429420&r2=1429457&rev=1429457&view=diff
==============================================================================
--- subversion/trunk/tools/hook-scripts/validate-files.py (original)
+++ subversion/branches/tree-read-api/tools/hook-scripts/validate-files.py Sun Jan 6 02:33:34 2013
@@ -25,7 +25,6 @@ import sys
import os
import subprocess
import fnmatch
-from string import Template
# Deal with the rename of ConfigParser to configparser in Python3
try:
@@ -81,7 +80,7 @@ class Commands:
line = p.stdout.readline()
if not line:
break
- line = line.strip()
+ line = line.decode().strip()
text_mod = line[0:1]
# Only if the contents of the file changed (by addition or update)
# directories always end in / in the svnlook changed output
@@ -92,7 +91,7 @@ class Commands:
# returncode/stderr output
data = p.communicate()
if p.returncode != 0:
- sys.stderr.write(data[1])
+ sys.stderr.write(data[1].decode())
sys.exit(2)
return changed
@@ -103,12 +102,14 @@ class Commands:
in the defined command.
Returns a tuple of the exit code and the stderr output of the command"""
- cmd_template = self.config.get(section, 'command')
- cmd = Template(cmd_template).safe_substitute(REPO=repo,
- TXN=txn, FILE=fn)
- p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
+ cmd = self.config.get(section, 'command')
+ cmd_env = os.environ.copy()
+ cmd_env['REPO'] = repo
+ cmd_env['TXN'] = txn
+ cmd_env['FILE'] = fn
+ p = subprocess.Popen(cmd, shell=True, env=cmd_env, stderr=subprocess.PIPE)
data = p.communicate()
- return (p.returncode, data[1])
+ return (p.returncode, data[1].decode())
def main(repo, txn):
exitcode = 0
@@ -154,5 +155,5 @@ if __name__ == "__main__":
try:
sys.exit(main(sys.argv[1], sys.argv[2]))
except configparser.Error as e:
- sys.stderr.write("Error with the validate-files.conf: %s\n" % e)
- sys.exit(2)
+ sys.stderr.write("Error with the validate-files.conf: %s\n" % e)
+ sys.exit(2)
Modified: subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c (original)
+++ subversion/branches/tree-read-api/tools/server-side/fsfs-reorg.c Sun Jan 6 02:33:34 2013
@@ -1,4 +1,5 @@
-/* diff.c -- test driver for text diffs
+/* fsfs-reorg.c -- prototypic tool to reorganize packed FSFS repositories
+ * to reduce seeks
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
@@ -45,7 +46,7 @@
#define _(x) x
#endif
-#define ERROR_TAG "diff: "
+#define ERROR_TAG "fsfs-reporg: "
/* forward declarations */
typedef struct noderev_t noderev_t;
@@ -1800,7 +1801,7 @@ read_pack_file(fs_fs_t *fs,
/* one more pack file processed */
print_progress(base);
- apr_pool_destroy(local_pool);
+ svn_pool_destroy(local_pool);
return SVN_NO_ERROR;
}
@@ -1866,7 +1867,7 @@ read_revision_file(fs_fs_t *fs,
if (revision % fs->max_files_per_dir == 0)
print_progress(revision);
- apr_pool_destroy(local_pool);
+ svn_pool_destroy(local_pool);
return SVN_NO_ERROR;
}
@@ -1956,6 +1957,9 @@ get_max_offset_len(const revision_pack_t
return result;
}
+/* Create the fragments container in PACK and add revision header fragments
+ * to it. Use POOL for allocations.
+ */
static svn_error_t *
add_revisions_pack_heads(revision_pack_t *pack,
apr_pool_t *pool)
@@ -2019,6 +2023,9 @@ add_revisions_pack_heads(revision_pack_t
return SVN_NO_ERROR;
}
+/* For the revision given by INFO in FS, return the fragment container in
+ * *FRAGMENTS and the current placement offset in *CURRENT_POS.
+ */
static svn_error_t *
get_target_offset(apr_size_t **current_pos,
apr_array_header_t **fragments,
@@ -2029,6 +2036,7 @@ get_target_offset(apr_size_t **current_p
revision_pack_t *pack;
svn_revnum_t revision = info->revision;
+ /* identify the pack object */
if (fs->min_unpacked_rev > revision)
{
i = (revision - fs->start_revision) / fs->max_files_per_dir;
@@ -2039,6 +2047,7 @@ get_target_offset(apr_size_t **current_p
i += revision - fs->min_unpacked_rev;
}
+ /* extract the desired info from it */
pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*);
*current_pos = &pack->target_offset;
*fragments = pack->fragments;
@@ -2046,11 +2055,19 @@ get_target_offset(apr_size_t **current_p
return SVN_NO_ERROR;
}
+/* forward declaration */
static svn_error_t *
add_noderev_recursively(fs_fs_t *fs,
noderev_t *node,
apr_pool_t *pool);
+/* Place fragments for the given REPRESENTATION of the given KIND, iff it
+ * has not been covered, yet. Place the base reps along the deltification
+ * chain as far as those reps have not been covered, yet. If REPRESENTATION
+ * is a directory, recursively place its elements.
+ *
+ * Use POOL for allocations.
+ */
static svn_error_t *
add_representation_recursively(fs_fs_t *fs,
representation_t *representation,
@@ -2060,13 +2077,16 @@ add_representation_recursively(fs_fs_t *
apr_size_t *current_pos;
apr_array_header_t *fragments;
fragment_t fragment;
-
+
+ /* place REPRESENTATION only once and only if it exists and will not
+ * be covered later as a directory. */
if ( representation == NULL
|| representation->covered
|| (representation->dir && kind != dir_fragment)
|| representation == fs->null_base)
return SVN_NO_ERROR;
+ /* add and place a fragment for REPRESENTATION */
SVN_ERR(get_target_offset(¤t_pos, &fragments,
fs, representation->revision));
representation->target.offset = *current_pos;
@@ -2077,9 +2097,12 @@ add_representation_recursively(fs_fs_t *
fragment.position = *current_pos;
APR_ARRAY_PUSH(fragments, fragment_t) = fragment;
+ /* determine the size of data to be added to the target file */
if ( kind != dir_fragment
&& representation->delta_base && representation->delta_base->dir)
{
+ /* base rep is a dir -> would change -> need to store it as fulltext
+ * in our target file */
apr_pool_t *text_pool = svn_pool_create(pool);
svn_stringbuf_t *content;
@@ -2093,6 +2116,7 @@ add_representation_recursively(fs_fs_t *
if ( kind == dir_fragment
|| (representation->delta_base && representation->delta_base->dir))
{
+ /* deltified directories may grow considerably */
if (representation->original.size < 50)
*current_pos += 300;
else
@@ -2100,6 +2124,8 @@ add_representation_recursively(fs_fs_t *
}
else
{
+ /* plain / deltified content will not change but the header may
+ * grow slightly due to larger offsets. */
representation->target.size = representation->original.size;
if (representation->delta_base &&
@@ -2109,12 +2135,14 @@ add_representation_recursively(fs_fs_t *
*current_pos += representation->original.size + 13;
}
+ /* follow the delta chain and place base revs immediately after this */
if (representation->delta_base)
SVN_ERR(add_representation_recursively(fs,
representation->delta_base,
kind,
pool));
+ /* finally, recurse into directories */
if (representation->dir)
{
int i;
@@ -2131,6 +2159,11 @@ add_representation_recursively(fs_fs_t *
return SVN_NO_ERROR;
}
+/* Place fragments for the given NODE in FS, iff it has not been covered,
+ * yet. Place the reps (text, props) immediately after the node.
+ *
+ * Use POOL for allocations.
+ */
static svn_error_t *
add_noderev_recursively(fs_fs_t *fs,
noderev_t *node,
@@ -2140,9 +2173,11 @@ add_noderev_recursively(fs_fs_t *fs,
apr_array_header_t *fragments;
fragment_t fragment;
+ /* don't add it twice */
if (node->covered)
return SVN_NO_ERROR;
+ /* add and place a fragment for NODE */
SVN_ERR(get_target_offset(¤t_pos, &fragments, fs, node->revision));
node->covered = TRUE;
node->target.offset = *current_pos;
@@ -2152,8 +2187,10 @@ add_noderev_recursively(fs_fs_t *fs,
fragment.position = *current_pos;
APR_ARRAY_PUSH(fragments, fragment_t) = fragment;
+ /* size may slightly increase */
*current_pos += node->original.size + 40;
-
+
+ /* recurse into representations */
if (node->text && node->text->dir)
SVN_ERR(add_representation_recursively(fs, node->text, dir_fragment, pool));
else
@@ -2164,6 +2201,8 @@ add_noderev_recursively(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Place a fragment for the last revision in PACK. Use POOL for allocations.
+ */
static svn_error_t *
add_revisions_pack_tail(revision_pack_t *pack,
apr_pool_t *pool)
@@ -2184,6 +2223,7 @@ add_revisions_pack_tail(revision_pack_t
pack->target_offset += 2 * offset_len + 3;
+ /* end of target file reached. Store that info in all revs. */
for (i = 0; i < pack->info->nelts; ++i)
{
info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
@@ -2193,6 +2233,9 @@ add_revisions_pack_tail(revision_pack_t
return SVN_NO_ERROR;
}
+/* Place all fragments for all revisions / packs in FS.
+ * Use POOL for allocations.
+ */
static svn_error_t *
reorder_revisions(fs_fs_t *fs,
apr_pool_t *pool)
@@ -2233,12 +2276,20 @@ reorder_revisions(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* forward declaration */
static svn_error_t *
get_fragment_content(svn_string_t **content,
fs_fs_t *fs,
fragment_t *fragment,
apr_pool_t *pool);
+/* Directory content may change and with it, the deltified representations
+ * may significantly. This function causes all directory target reps in
+ * PACK of FS to be built and their new MD5 as well as rep sizes be updated.
+ * We must do that before attempting to write noderevs.
+ *
+ * Use POOL for allocations.
+ */
static svn_error_t *
update_noderevs(fs_fs_t *fs,
revision_pack_t *pack,
@@ -2254,6 +2305,8 @@ update_noderevs(fs_fs_t *fs,
{
svn_string_t *content;
+ /* request updated rep content but ignore the result.
+ * We are only interested in the MD5, content and rep size updates. */
SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
svn_pool_clear(itempool);
}
@@ -2264,6 +2317,11 @@ update_noderevs(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Determine the target size of the FRAGMENT in FS and return the value
+ * in *LENGTH. If ADD_PADDING has been set, slightly fudge the numbers
+ * to account for changes in offset lengths etc. Use POOL for temporary
+ * allocations.
+ */
static svn_error_t *
get_content_length(apr_size_t *length,
fs_fs_t *fs,
@@ -2293,6 +2351,9 @@ get_content_length(apr_size_t *length,
return SVN_NO_ERROR;
}
+/* Move the FRAGMENT to global file offset NEW_POSITION. Update the target
+ * location info of the underlying object as well.
+ */
static void
move_fragment(fragment_t *fragment,
apr_size_t new_position)
@@ -2300,9 +2361,11 @@ move_fragment(fragment_t *fragment,
revision_info_t *info;
representation_t *representation;
noderev_t *node;
-
+
+ /* move the fragment */
fragment->position = new_position;
+ /* move the underlying object */
switch (fragment->kind)
{
case header_fragment:
@@ -2329,6 +2392,10 @@ move_fragment(fragment_t *fragment,
}
}
+/* Move the fragments in PACK's target fragment list to their final offsets.
+ * This may require several iterations if the fudge factors turned out to
+ * be insufficient. Use POOL for allocations.
+ */
static svn_error_t *
pack_revisions(fs_fs_t *fs,
revision_pack_t *pack,
@@ -2342,8 +2409,13 @@ pack_revisions(fs_fs_t *fs,
apr_pool_t *itempool = svn_pool_create(pool);
+ /* update all directory reps. Chances are that most of the target rep
+ * sizes are now close to accurate. */
SVN_ERR(update_noderevs(fs, pack, pool));
+ /* compression phase: pack all fragments tightly with only a very small
+ * fudge factor. This should cause offsets to shrink, thus all the
+ * actual fragment rate should tend to be even smaller afterwards. */
current_pos = pack->info->nelts > 1 ? 64 : 0;
for (i = 0; i + 1 < pack->fragments->nelts; ++i)
{
@@ -2355,9 +2427,15 @@ pack_revisions(fs_fs_t *fs,
svn_pool_clear(itempool);
}
+ /* don't forget the final fragment (last revision's revision header) */
fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t);
fragment->position = current_pos;
+ /* expansion phase: check whether all fragments fit into their allotted
+ * slots. Grow them geometrically if they don't fit. Retry until they
+ * all do fit.
+ * Note: there is an upper limit to which fragments can grow. So, this
+ * loop will terminate. Often, no expansion will be necessary at all. */
do
{
needed_to_expand = FALSE;
@@ -2394,6 +2472,8 @@ pack_revisions(fs_fs_t *fs,
fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t);
fragment->position = current_pos;
+ /* update the revision
+ * sizes (they all end at the end of the pack file now) */
SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool));
current_pos += len;
@@ -2410,6 +2490,8 @@ pack_revisions(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Write reorg'ed target content for PACK in FS. Use POOL for allocations.
+ */
static svn_error_t *
write_revisions(fs_fs_t *fs,
revision_pack_t *pack,
@@ -2426,6 +2508,7 @@ write_revisions(fs_fs_t *fs,
apr_size_t current_pos = 0;
svn_stringbuf_t *null_buffer = svn_stringbuf_create_empty(iterpool);
+ /* create the target file */
const char *dir = apr_psprintf(iterpool, "%s/new/%ld%s",
fs->path, pack->base / fs->max_files_per_dir,
pack->info->nelts > 1 ? ".pack" : "");
@@ -2438,38 +2521,46 @@ write_revisions(fs_fs_t *fs,
APR_OS_DEFAULT,
iterpool));
+ /* write all fragments */
for (i = 0; i < pack->fragments->nelts; ++i)
{
apr_size_t padding;
+
+ /* get fragment content to write */
fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
-
SVN_ERR_ASSERT(fragment->position >= current_pos);
+
+ /* number of bytes between this and the previous fragment */
if ( fragment->kind == header_fragment
&& i+1 < pack->fragments->nelts)
+ /* special case: header fragments are aligned to the slot end */
padding = APR_ARRAY_IDX(pack->fragments, i+1, fragment_t).position -
content->len - current_pos;
else
+ /* standard case: fragments are aligned to the slot start */
padding = fragment->position - current_pos;
+ /* write padding between fragments */
if (padding)
{
while (null_buffer->len < padding)
svn_stringbuf_appendbyte(null_buffer, 0);
SVN_ERR(svn_io_file_write_full(file,
- null_buffer->data,
- padding,
- NULL,
- itempool));
+ null_buffer->data,
+ padding,
+ NULL,
+ itempool));
current_pos += padding;
}
+ /* write fragment content */
SVN_ERR(svn_io_file_write_full(file,
- content->data,
- content->len,
- NULL,
- itempool));
+ content->data,
+ content->len,
+ NULL,
+ itempool));
current_pos += content->len;
svn_pool_clear(itempool);
@@ -2477,6 +2568,7 @@ write_revisions(fs_fs_t *fs,
apr_file_close(file);
+ /* write new manifest file */
if (pack->info->nelts > 1)
{
svn_stream_t *stream;
@@ -2498,12 +2590,17 @@ write_revisions(fs_fs_t *fs,
}
}
+ /* cleanup */
svn_pool_destroy(itempool);
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
+/* Write reorg'ed target content for all revisions in FS. To maximize
+ * data locality, pack and write in one go per pack file.
+ * Use POOL for allocations.
+ */
static svn_error_t *
pack_and_write_revisions(fs_fs_t *fs,
apr_pool_t *pool)
@@ -2527,6 +2624,10 @@ pack_and_write_revisions(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* For the directory REPRESENTATION in FS, construct the new (target)
+ * serialized plaintext representation and return it in *CONTENT.
+ * Allocate the result in POOL and temporaries in SCRATCH_POOL.
+ */
static svn_error_t *
get_updated_dir(svn_string_t **content,
fs_fs_t *fs,
@@ -2540,14 +2641,19 @@ get_updated_dir(svn_string_t **content,
int i;
svn_stream_t *stream;
svn_stringbuf_t *result;
-
+
+ /* get the original content */
SVN_ERR(read_dir(&hash, fs, representation, scratch_pool));
hash = apr_hash_copy(hash_pool, hash);
+
+ /* update all entries */
for (i = 0; i < dir->nelts; ++i)
{
char buffer[256];
svn_string_t *new_val;
apr_size_t pos;
+
+ /* find the original entry for for the current name */
direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *);
svn_string_t *str_val = apr_hash_get(hash, entry->name, entry->name_len);
if (str_val == NULL)
@@ -2555,54 +2661,40 @@ get_updated_dir(svn_string_t **content,
_("Dir entry '%s' not found"), entry->name);
SVN_ERR_ASSERT(str_val->len < sizeof(buffer));
-
+
+ /* create and updated node ID */
memcpy(buffer, str_val->data, str_val->len+1);
pos = strchr(buffer, '/') - buffer + 1;
pos += svn__ui64toa(buffer + pos, entry->node->target.offset - entry->node->revision->target.offset);
new_val = svn_string_ncreate(buffer, pos, hash_pool);
+ /* store it in the hash */
apr_hash_set(hash, entry->name, entry->name_len, new_val);
}
+ /* serialize the updated hash */
result = svn_stringbuf_create_ensure(representation->target.size, pool);
stream = svn_stream_from_stringbuf(result, hash_pool);
SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, hash_pool));
svn_pool_destroy(hash_pool);
+ /* done */
*content = svn_stringbuf__morph_into_string(result);
return SVN_NO_ERROR;
}
-struct diff_write_baton_t
-{
- svn_stream_t *stream;
- apr_size_t size;
-};
-
-static svn_error_t *
-diff_write_handler(void *baton,
- const char *data,
- apr_size_t *len)
-{
- struct diff_write_baton_t *whb = baton;
-
- SVN_ERR(svn_stream_write(whb->stream, data, len));
- whb->size += *len;
-
- return SVN_NO_ERROR;
-}
-
+/* Calculate the delta representation for the given CONTENT and BASE.
+ * Return the rep in *DIFF. Use POOL for allocations.
+ */
static svn_error_t *
diff_stringbufs(svn_stringbuf_t *diff,
- apr_size_t *inflated_size,
svn_string_t *base,
svn_string_t *content,
apr_pool_t *pool)
{
svn_txdelta_window_handler_t diff_wh;
void *diff_whb;
- struct diff_write_baton_t whb;
svn_stream_t *stream;
svn_stream_t *source = svn_stream_from_string(base, pool);
@@ -2616,20 +2708,20 @@ diff_stringbufs(svn_stringbuf_t *diff,
SVN_DELTA_COMPRESSION_LEVEL_DEFAULT,
pool);
- whb.stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
- whb.size = 0;
-
- stream = svn_stream_create(&whb, pool);
- svn_stream_set_write(stream, diff_write_handler);
+ /* create delta stream */
+ stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
+ /* run delta */
SVN_ERR(svn_stream_write(stream, content->data, &content->len));
- SVN_ERR(svn_stream_close(whb.stream));
SVN_ERR(svn_stream_close(stream));
- *inflated_size = whb.size;
return SVN_NO_ERROR;
}
+/* Update the noderev id value for KEY in the textual noderev representation
+ * in NODE_REV. Take the new id from NODE. This is a no-op if the KEY
+ * cannot be found.
+ */
static void
update_id(svn_stringbuf_t *node_rev,
const char *key,
@@ -2638,6 +2730,7 @@ update_id(svn_stringbuf_t *node_rev,
char *newline_pos = 0;
char *pos;
+ /* we need to update the offset only -> find its position */
pos = strstr(node_rev->data, key);
if (pos)
pos = strchr(pos, '/');
@@ -2646,6 +2739,7 @@ update_id(svn_stringbuf_t *node_rev,
if (pos && newline_pos)
{
+ /* offset data has been found -> replace it */
char temp[SVN_INT64_BUFFER_SIZE];
apr_size_t len = svn__i64toa(temp, node->target.offset - node->revision->target.offset);
svn_stringbuf_replace(node_rev,
@@ -2654,6 +2748,11 @@ update_id(svn_stringbuf_t *node_rev,
}
}
+/* Update the representation id value for KEY in the textual noderev
+ * representation in NODE_REV. Take the offset, sizes and new MD5 from
+ * REPRESENTATION. Use SCRATCH_POOL for allocations.
+ * This is a no-op if the KEY cannot be found.
+ */
static void
update_text(svn_stringbuf_t *node_rev,
const char *key,
@@ -2670,6 +2769,7 @@ update_text(svn_stringbuf_t *node_rev,
val_pos = pos + key_len;
if (representation->dir)
{
+ /* for directories, we need to write all rep info anew */
char *newline_pos = strchr(val_pos, '\n');
svn_checksum_t checksum;
const char* temp = apr_psprintf(scratch_pool, "%ld %" APR_SIZE_T_FMT " %"
@@ -2689,6 +2789,8 @@ update_text(svn_stringbuf_t *node_rev,
}
else
{
+ /* ordinary representation: replace offset and rep size only.
+ * Content size and checksums are unchanged. */
const char* temp;
char *end_pos = strchr(val_pos, ' ');
@@ -2704,6 +2806,13 @@ update_text(svn_stringbuf_t *node_rev,
}
}
+/* Get the target content (data block as to be written to the file) for
+ * the given FRAGMENT in FS. Return the content in *CONTENT. Use POOL
+ * for allocations.
+ *
+ * Note that, as a side-effect, this will update the target rep. info for
+ * directories.
+ */
static svn_error_t *
get_fragment_content(svn_string_t **content,
fs_fs_t *fs,
@@ -2720,6 +2829,7 @@ get_fragment_content(svn_string_t **cont
switch (fragment->kind)
{
+ /* revision headers can be constructed from target position info */
case header_fragment:
info = fragment->data;
*content = svn_string_createf(pool,
@@ -2728,6 +2838,7 @@ get_fragment_content(svn_string_t **cont
info->target.changes);
return SVN_NO_ERROR;
+ /* The changes list remains untouched */
case changes_fragment:
info = fragment->data;
SVN_ERR(get_content(&revision_content, fs, info->revision, pool));
@@ -2737,6 +2848,9 @@ get_fragment_content(svn_string_t **cont
(*content)->len = info->target.changes_len;
return SVN_NO_ERROR;
+ /* property and file reps get new headers any need to be rewritten,
+ * iff the base rep is a directory. The actual (deltified) content
+ * remains unchanged, though. MD5 etc. do not change. */
case property_fragment:
case file_fragment:
representation = fragment->data;
@@ -2746,6 +2860,8 @@ get_fragment_content(svn_string_t **cont
if (representation->delta_base)
if (representation->delta_base->dir)
{
+ /* if the base happens to be a directory, reconstruct the
+ * full text and represent it as PLAIN rep. */
SVN_ERR(get_combined_window(&text, fs, representation, pool));
representation->target.size = text->len;
@@ -2756,6 +2872,7 @@ get_fragment_content(svn_string_t **cont
return SVN_NO_ERROR;
}
else
+ /* construct a new rep header */
if (representation->delta_base == fs->null_base)
header = svn_stringbuf_create("DELTA\n", pool);
else
@@ -2768,6 +2885,8 @@ get_fragment_content(svn_string_t **cont
else
header = svn_stringbuf_create("PLAIN\n", pool);
+ /* if it exists, the actual delta base is unchanged. Hence, this
+ * rep is unchanged even if it has been deltified. */
header_size = strchr(revision_content->data +
representation->original.offset, '\n') -
revision_content->data -
@@ -2781,7 +2900,10 @@ get_fragment_content(svn_string_t **cont
*content = svn_stringbuf__morph_into_string(header);
return SVN_NO_ERROR;
+ /* directory reps need to be rewritten (and deltified) completely.
+ * As a side-effect, update the MD5 and target content size. */
case dir_fragment:
+ /* construct new content and update MD5 */
representation = fragment->data;
SVN_ERR(get_updated_dir(&revision_content, fs, representation,
pool, pool));
@@ -2792,15 +2914,18 @@ get_fragment_content(svn_string_t **cont
checksum->digest,
sizeof(representation->dir->target_md5));
+ /* deltify against the base rep if necessary */
if (representation->delta_base)
{
if (representation->delta_base->dir == NULL)
{
+ /* dummy or non-dir base rep -> self-compress only */
header = svn_stringbuf_create("DELTA\n", pool);
base_content = svn_string_create_empty(pool);
}
else
{
+ /* deltify against base rep (which is a directory, too)*/
representation_t *base_rep = representation->delta_base;
header = svn_stringbuf_createf(pool,
"DELTA %ld %" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n",
@@ -2811,16 +2936,18 @@ get_fragment_content(svn_string_t **cont
pool, pool));
}
+ /* run deltification and update target content size */
header_size = header->len;
- SVN_ERR(diff_stringbufs(header, &representation->dir->size,
- base_content,
+ SVN_ERR(diff_stringbufs(header, base_content,
revision_content, pool));
+ representation->dir->size = revision_content->len;
representation->target.size = header->len - header_size;
svn_stringbuf_appendcstr(header, "ENDREP\n");
*content = svn_stringbuf__morph_into_string(header);
}
else
{
+ /* no delta base (not even a dummy) -> PLAIN rep */
representation->target.size = revision_content->len;
representation->dir->size = revision_content->len;
*content = svn_string_createf(pool, "PLAIN\n%sENDREP\n",
@@ -2829,7 +2956,9 @@ get_fragment_content(svn_string_t **cont
return SVN_NO_ERROR;
+ /* construct the new noderev content. No side-effects.*/
case noderev_fragment:
+ /* get the original noderev as string */
node = fragment->data;
SVN_ERR(get_content(&revision_content, fs,
node->revision->revision, pool));
@@ -2838,6 +2967,7 @@ get_fragment_content(svn_string_t **cont
node->original.size,
pool);
+ /* update the values that may have hanged for target */
update_id(node_rev, "id: ", node);
update_id(node_rev, "pred: ", node->predecessor);
update_text(node_rev, "text: ", node->text, pool);
@@ -2852,6 +2982,9 @@ get_fragment_content(svn_string_t **cont
return SVN_NO_ERROR;
}
+/* In the repository at PATH, restore the original content in case we ran
+ * this reorg tool before. Use POOL for allocations.
+ */
static svn_error_t *
prepare_repo(const char *path, apr_pool_t *pool)
{
@@ -2862,16 +2995,19 @@ prepare_repo(const char *path, apr_pool_
const char *revs_path = svn_dirent_join(path, "db/revs", pool);
const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool);
const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool);
-
+
+ /* is there a backup? */
SVN_ERR(svn_io_check_path(old_path, &kind, pool));
if (kind == svn_node_dir)
{
+ /* yes, restore the org content from it */
SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool));
SVN_ERR(svn_io_file_move(revs_path, new_path, pool));
SVN_ERR(svn_io_file_move(old_path, revs_path, pool));
SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool));
}
+ /* same for the rep cache db */
SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool));
if (kind == svn_node_file)
SVN_ERR(svn_io_file_move(old_rep_cache_path, rep_cache_path, pool));
@@ -2879,6 +3015,9 @@ prepare_repo(const char *path, apr_pool_
return SVN_NO_ERROR;
}
+/* In the repository at PATH, create a backup of the orig content and
+ * replace it with the reorg'ed. Use POOL for allocations.
+ */
static svn_error_t *
activate_new_revs(const char *path, apr_pool_t *pool)
{
@@ -2890,6 +3029,8 @@ activate_new_revs(const char *path, apr_
const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool);
const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool);
+ /* if there is no backup, yet, move the current repo content to the backup
+ * and place it with the new (reorg'ed) data. */
SVN_ERR(svn_io_check_path(old_path, &kind, pool));
if (kind == svn_node_none)
{
@@ -2897,6 +3038,7 @@ activate_new_revs(const char *path, apr_
SVN_ERR(svn_io_file_move(new_path, revs_path, pool));
}
+ /* same for the rep cache db */
SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool));
if (kind == svn_node_none)
SVN_ERR(svn_io_file_move(rep_cache_path, old_rep_cache_path, pool));
@@ -2904,6 +3046,9 @@ activate_new_revs(const char *path, apr_
return SVN_NO_ERROR;
}
+/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
+ * POOL for allocations.
+ */
static void
print_usage(svn_stream_t *ostream, const char *progname,
apr_pool_t *pool)
@@ -2923,6 +3068,7 @@ print_usage(svn_stream_t *ostream, const
progname));
}
+/* linear control flow */
int main(int argc, const char *argv[])
{
apr_pool_t *pool;
Modified: subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c (original)
+++ subversion/branches/tree-read-api/tools/server-side/fsfs-stats.c Sun Jan 6 02:33:34 2013
@@ -1,4 +1,4 @@
-/* diff.c -- test driver for text diffs
+/* fsfs-stats.c -- gather size statistics on FSFS repositories
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
@@ -22,7 +22,6 @@
#include <assert.h>
-#include <sys/stat.h>
#include <apr.h>
#include <apr_general.h>
@@ -37,29 +36,40 @@
#include "svn_sorts.h"
#include "svn_delta.h"
#include "svn_hash.h"
+#include "svn_cache_config.h"
#include "private/svn_string_private.h"
#include "private/svn_subr_private.h"
#include "private/svn_dep_compat.h"
+#include "private/svn_cache.h"
#ifndef _
#define _(x) x
#endif
-#define ERROR_TAG "diff: "
+#define ERROR_TAG "fsfs-stats: "
+/* We group representations into 2x2 different kinds plus one default:
+ * [dir / file] x [text / prop]. The assignment is done by the first node
+ * that references the respective representation.
+ */
typedef enum rep_kind_t
{
+ /* The representation is _directly_ unused, i.e. not referenced by any
+ * noderev. However, some other representation may use it as delta base.
+ * null value. Should not occur in real-word repositories. */
unused_rep,
+ /* a properties on directory rep */
dir_property_rep,
+ /* a properties on file rep */
file_property_rep,
- /* a directory rep (including PLAIN / DELTA header) */
+ /* a directory rep */
dir_rep,
- /* a file rep (including PLAIN / DELTA header) */
+ /* a file rep */
file_rep
} rep_kind_t;
@@ -73,6 +83,7 @@ typedef struct representation_t
/* item length in bytes */
apr_size_t size;
+ /* item length after de-deltification */
apr_size_t expanded_size;
/* deltification base, or NULL if there is none */
@@ -80,13 +91,15 @@ typedef struct representation_t
/* revision that contains this representation
* (may be referenced by other revisions, though) */
-
svn_revnum_t revision;
+
+ /* number of nodes that reference this representation */
apr_uint32_t ref_count;
/* length of the PLAIN / DELTA line in the source file in bytes */
apr_uint16_t header_size;
+ /* classification of the representation. values of rep_kind_t */
char kind;
/* the source content has a PLAIN header, so we may simply copy the
@@ -118,9 +131,16 @@ typedef struct revision_info_t
* for non-packed revs) */
apr_size_t end;
+ /* number of directory noderevs in this revision */
apr_size_t dir_noderev_count;
+
+ /* number of file noderevs in this revision */
apr_size_t file_noderev_count;
+
+ /* total size of directory noderevs (i.e. the structs - not the rep) */
apr_size_t dir_noderev_size;
+
+ /* total size of file noderevs (i.e. the structs - not the rep) */
apr_size_t file_noderev_size;
/* all representation_t of this revision (in no particular order),
@@ -128,43 +148,17 @@ typedef struct revision_info_t
apr_array_header_t *representations;
} revision_info_t;
-/* A cached, undeltified txdelta window.
+/* Data type to identify a representation. It will be used to address
+ * cached combined (un-deltified) windows.
*/
-typedef struct window_cache_entry_t
+typedef struct window_cache_key_t
{
- /* revision containing the window */
+ /* revision of the representation */
svn_revnum_t revision;
- /* offset of the deltified window within that revision */
+ /* its offset */
apr_size_t offset;
-
- /* window content */
- svn_stringbuf_t *window;
-} window_cache_entry_t;
-
-/* Cache for undeltified txdelta windows. (revision, offset) will be mapped
- * directly into the ENTRIES array of INSERT_COUNT buckets (most entries
- * will be NULL).
- *
- * The cache will be cleared when USED exceeds CAPACITY.
- */
-typedef struct window_cache_t
-{
- /* fixed-size array of ENTRY_COUNT elements */
- window_cache_entry_t *entries;
-
- /* used to allocate windows */
- apr_pool_t *pool;
-
- /* size of ENTRIES in elements */
- apr_size_t entry_count;
-
- /* maximum combined size of all cached windows */
- apr_size_t capacity;
-
- /* current combined size of all cached windows */
- apr_size_t used;
-} window_cache_t;
+} window_cache_key_t;
/* Root data structure containing all information about a given repository.
*/
@@ -196,7 +190,7 @@ typedef struct fs_fs_t
representation_t *null_base;
/* undeltified txdelta window cache */
- window_cache_t *window_cache;
+ svn_cache__t *window_cache;
} fs_fs_t;
/* Return the rev pack folder for revision REV in FS.
@@ -238,45 +232,55 @@ open_rev_or_pack_file(apr_file_t **file,
pool);
}
-/* Read the whole content of the file containing REV in FS and return that
- * in *CONTENT.
- */
+/* Return the length of FILE in *FILE_SIZE. Use POOL for allocations.
+*/
static svn_error_t *
-rev_or_pack_file_size(apr_off_t *file_size,
- fs_fs_t *fs,
- svn_revnum_t rev,
- apr_pool_t *pool)
+get_file_size(apr_off_t *file_size,
+ apr_file_t *file,
+ apr_pool_t *pool)
{
- apr_file_t *file;
apr_finfo_t finfo;
- SVN_ERR(open_rev_or_pack_file(&file, fs, rev, pool));
SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file, pool));
- SVN_ERR(svn_io_file_close(file, pool));
*file_size = finfo.size;
return SVN_NO_ERROR;
}
-/* Get the file content of revision REVISION in FS and return it in *DATA.
- * Use SCRATCH_POOL for temporary allocations.
+/* Get the file content of revision REVISION in FS and return it in *CONTENT.
+ * Read the LEN bytes starting at file OFFSET. When provided, use FILE as
+ * packed or plain rev file.
+ * Use POOL for temporary allocations.
*/
static svn_error_t *
get_content(svn_stringbuf_t **content,
+ apr_file_t *file,
fs_fs_t *fs,
svn_revnum_t revision,
apr_off_t offset,
apr_size_t len,
apr_pool_t *pool)
{
- apr_file_t *file;
apr_pool_t * file_pool = svn_pool_create(pool);
+ apr_size_t large_buffer_size = 0x10000;
- SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool));
- SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
+ if (file == NULL)
+ SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool));
*content = svn_stringbuf_create_ensure(len, pool);
(*content)->len = len;
+
+#if APR_VERSION_AT_LEAST(1,3,0)
+ /* for better efficiency use larger buffers on large reads */
+ if ( (len >= large_buffer_size)
+ && (apr_file_buffer_size_get(file) < large_buffer_size))
+ apr_file_buffer_set(file,
+ apr_palloc(apr_file_pool_get(file),
+ large_buffer_size),
+ large_buffer_size);
+#endif
+
+ SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
SVN_ERR(svn_io_file_read_full2(file, (*content)->data, len,
NULL, NULL, pool));
svn_pool_destroy(file_pool);
@@ -284,89 +288,48 @@ get_content(svn_stringbuf_t **content,
return SVN_NO_ERROR;
}
-/* Return a new txdelta window cache with ENTRY_COUNT buckets in its index
- * and a the total CAPACITY given in bytes.
- * Use POOL for all cache-related allocations.
+/* In *RESULT, return the cached txdelta window stored in REPRESENTATION
+ * within FS. If that has not been found in cache, return NULL.
+ * Allocate the result in POOL.
*/
-static window_cache_t *
-create_window_cache(apr_pool_t *pool,
- apr_size_t entry_count,
- apr_size_t capacity)
-{
- window_cache_t *result = apr_pcalloc(pool, sizeof(*result));
-
- result->pool = svn_pool_create(pool);
- result->entry_count = entry_count;
- result->capacity = capacity;
- result->used = 0;
- result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count);
-
- return result;
-}
-
-/* Return the position within FS' window cache ENTRIES index for the given
- * (REVISION, OFFSET) pair. This is a cache-internal function.
- */
-static apr_size_t
-get_window_cache_index(fs_fs_t *fs,
- svn_revnum_t revision,
- apr_size_t offset)
-{
- return (revision + offset * 0xd1f3da69) % fs->window_cache->entry_count;
-}
-
-/* Return the cached txdelta window stored in REPRESENTAION within FS.
- * If that has not been found in cache, return NULL.
- */
-static svn_stringbuf_t *
-get_cached_window(fs_fs_t *fs,
+static svn_error_t *
+get_cached_window(svn_stringbuf_t **result,
+ fs_fs_t *fs,
representation_t *representation,
apr_pool_t *pool)
{
- svn_revnum_t revision = representation->revision;
- apr_size_t offset = representation->offset;
-
- apr_size_t i = get_window_cache_index(fs, revision, offset);
- window_cache_entry_t *entry = &fs->window_cache->entries[i];
-
- return entry->offset == offset && entry->revision == revision
- ? svn_stringbuf_dup(entry->window, pool)
- : NULL;
+ svn_boolean_t found = FALSE;
+ window_cache_key_t key;
+ key.revision = representation->revision;
+ key.offset = representation->offset;
+
+ *result = NULL;
+ return svn_error_trace(svn_cache__get((void**)result, &found,
+ fs->window_cache,
+ &key, pool));
}
-/* Cache the undeltified txdelta WINDOW for REPRESENTAION within FS.
+/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS.
+ * Use POOL for temporaries.
*/
-static void
+static svn_error_t *
set_cached_window(fs_fs_t *fs,
representation_t *representation,
- svn_stringbuf_t *window)
+ svn_stringbuf_t *window,
+ apr_pool_t *pool)
{
/* select entry */
- svn_revnum_t revision = representation->revision;
- apr_size_t offset = representation->offset;
+ window_cache_key_t key;
+ key.revision = representation->revision;
+ key.offset = representation->offset;
- apr_size_t i = get_window_cache_index(fs, revision, offset);
- window_cache_entry_t *entry = &fs->window_cache->entries[i];
-
- /* if the capacity is exceeded, clear the cache */
- fs->window_cache->used += window->len;
- if (fs->window_cache->used >= fs->window_cache->capacity)
- {
- svn_pool_clear(fs->window_cache->pool);
- memset(fs->window_cache->entries,
- 0,
- sizeof(*fs->window_cache->entries) * fs->window_cache->entry_count);
- fs->window_cache->used = window->len;
- }
-
- /* set the entry to a copy of the window data */
- entry->window = svn_stringbuf_dup(window, fs->window_cache->pool);
- entry->offset = offset;
- entry->revision = revision;
+ return svn_error_trace(svn_cache__set(fs->window_cache, &key, window,
+ pool));
}
-/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
- Use POOL for temporary allocations. */
+/* Given rev pack PATH in FS, read the manifest file and return the offsets
+ * in *MANIFEST. Use POOL for allocations.
+ */
static svn_error_t *
read_manifest(apr_array_header_t **manifest,
fs_fs_t *fs,
@@ -409,6 +372,10 @@ read_manifest(apr_array_header_t **manif
return svn_stream_close(manifest_stream);
}
+/* Read header information for the revision stored in FILE_CONTENT (one
+ * whole revision). Return the offsets within FILE_CONTENT for the
+ * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
+ * Use POOL for temporary allocations. */
static svn_error_t *
read_revision_header(apr_size_t *changes,
apr_size_t *changes_len,
@@ -447,8 +414,10 @@ read_revision_header(apr_size_t *changes
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
_("Final line in revision file missing space"));
+ /* terminate the header line */
*space = 0;
-
+
+ /* extract information */
SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10));
*root_noderev = (apr_size_t)val;
SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10));
@@ -458,6 +427,10 @@ read_revision_header(apr_size_t *changes
return SVN_NO_ERROR;
}
+/* Read the FSFS format number and sharding size from the format file at
+ * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively.
+ * Use POOL for temporary allocations.
+ */
static svn_error_t *
read_format(int *pformat, int *max_files_per_dir,
const char *path, apr_pool_t *pool)
@@ -467,6 +440,7 @@ read_format(int *pformat, int *max_files
char buf[80];
apr_size_t len;
+ /* open format file and read the first line */
err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED,
APR_OS_DEFAULT, pool);
if (err && APR_STATUS_IS_ENOENT(err->apr_err))
@@ -541,21 +515,27 @@ read_format(int *pformat, int *max_files
return svn_io_file_close(file, pool);
}
+/* Read the content of the file at PATH and return it in *RESULT.
+ * Use POOL for temporary allocations.
+ */
static svn_error_t *
read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool)
{
svn_stringbuf_t *content;
- apr_int64_t number;
+ apr_uint64_t number;
SVN_ERR(svn_stringbuf_from_file2(&content, path, pool));
content->data[content->len-1] = 0;
- SVN_ERR(svn_cstring_atoi64(&number, content->data));
+ SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10));
*result = (svn_revnum_t)number;
return SVN_NO_ERROR;
}
+/* Create *FS for the repository at PATH and read the format and size info.
+ * Use POOL for temporary allocations.
+ */
static svn_error_t *
fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool)
{
@@ -570,7 +550,8 @@ fs_open(fs_fs_t **fs, const char *path,
pool));
if (((*fs)->format != 4) && ((*fs)->format != 6))
return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL);
-
+
+ /* read size (HEAD) info */
SVN_ERR(read_number(&(*fs)->min_unpacked_rev,
svn_dirent_join(path, "db/min-unpacked-rev", pool),
pool));
@@ -579,12 +560,18 @@ fs_open(fs_fs_t **fs, const char *path,
pool);
}
+/* Utility function that returns true if STRING->DATA matches KEY.
+ */
static svn_boolean_t
key_matches(svn_string_t *string, const char *key)
{
return strcmp(string->data, key) == 0;
}
+/* Comparator used for binary search comparing the absolute file offset
+ * of a representation to some other offset. DATA is a *representation_t,
+ * KEY is a pointer to an apr_size_t.
+ */
static int
compare_representation_offsets(const void *data, const void *key)
{
@@ -597,6 +584,15 @@ compare_representation_offsets(const voi
return diff > 0 ? 1 : 0;
}
+/* Find the revision_info_t object to the given REVISION in FS and return
+ * it in *REVISION_INFO. For performance reasons, we skip the lookup if
+ * the info is already provided.
+ *
+ * In that revision, look for the representation_t object for offset OFFSET.
+ * If it already exists, set *IDX to its index in *REVISION_INFO's
+ * representations list and return the representation object. Otherwise,
+ * set the index to where it must be inserted and return NULL.
+ */
static representation_t *
find_representation(int *idx,
fs_fs_t *fs,
@@ -606,7 +602,8 @@ find_representation(int *idx,
{
revision_info_t *info;
*idx = -1;
-
+
+ /* first let's find the revision */
info = revision_info ? *revision_info : NULL;
if (info == NULL || info->revision != revision)
{
@@ -617,23 +614,36 @@ find_representation(int *idx,
*revision_info = info;
}
+ /* not found -> no result */
if (info == NULL)
return NULL;
+
+ assert(revision == info->revision);
+ /* look for the representation */
*idx = svn_sort__bsearch_lower_bound(&offset,
info->representations,
compare_representation_offsets);
if (*idx < info->representations->nelts)
{
+ /* return the representation, if this is the one we were looking for */
representation_t *result
= APR_ARRAY_IDX(info->representations, *idx, representation_t *);
if (result->offset == offset)
return result;
}
+ /* not parsed, yet */
return NULL;
}
+/* Read the representation header in FILE_CONTENT at OFFSET. Return its
+ * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and
+ * return the deltification base representation in *REPRESENTATION. If
+ * there is none, set it to NULL. Use FS to it look up.
+ *
+ * Use POOL for allocations and SCRATCH_POOL for temporaries.
+ */
static svn_error_t *
read_rep_base(representation_t **representation,
apr_size_t *header_size,
@@ -649,10 +659,12 @@ read_rep_base(representation_t **represe
svn_revnum_t revision;
apr_uint64_t temp;
+ /* identify representation header (1 line) */
const char *buffer = file_content->data + offset;
const char *line_end = strchr(buffer, '\n');
*header_size = line_end - buffer + 1;
+ /* check for PLAIN rep */
if (strncmp(buffer, "PLAIN\n", *header_size) == 0)
{
*is_plain = TRUE;
@@ -660,6 +672,7 @@ read_rep_base(representation_t **represe
return SVN_NO_ERROR;
}
+ /* check for DELTA against empty rep */
*is_plain = FALSE;
if (strncmp(buffer, "DELTA\n", *header_size) == 0)
{
@@ -671,7 +684,7 @@ read_rep_base(representation_t **represe
str = apr_pstrndup(scratch_pool, buffer, line_end - buffer);
last_str = str;
- /* We hopefully have a DELTA vs. a non-empty base revision. */
+ /* parse it. */
str = svn_cstring_tokenize(" ", &last_str);
str = svn_cstring_tokenize(" ", &last_str);
SVN_ERR(svn_revnum_parse(&revision, str, NULL));
@@ -679,10 +692,18 @@ read_rep_base(representation_t **represe
str = svn_cstring_tokenize(" ", &last_str);
SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10));
+ /* it should refer to a rep in an earlier revision. Look it up */
*representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp);
return SVN_NO_ERROR;
}
+/* Parse the representation reference (text: or props:) in VALUE, look
+ * it up in FS and return it in *REPRESENTATION. To be able to parse the
+ * base rep, we pass the FILE_CONTENT as well.
+ *
+ * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp.
+ * allocations.
+ */
static svn_error_t *
parse_representation(representation_t **representation,
fs_fs_t *fs,
@@ -700,15 +721,20 @@ parse_representation(representation_t **
apr_uint64_t expanded_size;
int idx;
+ /* read location (revision, offset) and size */
char *c = (char *)value->data;
SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL));
SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
+ /* look it up */
result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset);
if (!result)
{
+ /* not parsed, yet (probably a rep in the same revision).
+ * Create a new rep object and determine its base rep as well.
+ */
apr_size_t header_size;
svn_boolean_t is_plain;
@@ -732,8 +758,10 @@ parse_representation(representation_t **
return SVN_NO_ERROR;
}
-/* Get the file content of revision REVISION in FS and return it in *DATA.
- * Use SCRATCH_POOL for temporary allocations.
+/* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in
+ * FS and return it in *CONTENT. If no NULL, FILE_CONTENT must contain
+ * the contents of the revision that also contains the representation.
+ * Use POOL for allocations.
*/
static svn_error_t *
get_rep_content(svn_stringbuf_t **content,
@@ -765,7 +793,7 @@ get_rep_content(svn_stringbuf_t **conten
offset = revision_info->offset
+ representation->offset
+ representation->header_size;
- SVN_ERR(get_content(content, fs, revision, offset,
+ SVN_ERR(get_content(content, NULL, fs, revision, offset,
representation->size, pool));
}
@@ -773,8 +801,12 @@ get_rep_content(svn_stringbuf_t **conten
}
-/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
- window into *NWIN. */
+/* Read the delta window contents of all windows in REPRESENTATION in FS.
+ * If no NULL, FILE_CONTENT must contain the contents of the revision that
+ * also contains the representation.
+ * Return the data as svn_txdelta_window_t* instances in *WINDOWS.
+ * Use POOL for allocations.
+ */
static svn_error_t *
read_windows(apr_array_header_t **windows,
fs_fs_t *fs,
@@ -789,13 +821,16 @@ read_windows(apr_array_header_t **window
*windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *));
+ /* get the whole revision content */
SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool));
+ /* create a read stream and position it directly after the rep header */
content->data += 3;
content->len -= 3;
stream = svn_stream_from_stringbuf(content, pool);
SVN_ERR(svn_stream_read(stream, &version, &len));
+ /* read the windows from that stream */
while (TRUE)
{
svn_txdelta_window_t *window;
@@ -816,9 +851,12 @@ read_windows(apr_array_header_t **window
return SVN_NO_ERROR;
}
-/* Get the undeltified window that is a result of combining all deltas
- from the current desired representation identified in *RB with its
- base representation. Store the window in *RESULT. */
+/* Get the undeltified representation that is a result of combining all
+ * deltas from the current desired REPRESENTATION in FS with its base
+ * representation. If no NULL, FILE_CONTENT must contain the contents of
+ * the revision that also contains the representation. Store the result
+ * in *CONTENT. Use POOL for allocations.
+ */
static svn_error_t *
get_combined_window(svn_stringbuf_t **content,
fs_fs_t *fs,
@@ -830,23 +868,36 @@ get_combined_window(svn_stringbuf_t **co
apr_array_header_t *windows;
svn_stringbuf_t *base_content, *result;
const char *source;
- apr_pool_t *sub_pool = svn_pool_create(pool);
- apr_pool_t *iter_pool = svn_pool_create(pool);
+ apr_pool_t *sub_pool;
+ apr_pool_t *iter_pool;
+ /* special case: no un-deltification necessary */
if (representation->is_plain)
- return get_rep_content(content, fs, representation, file_content, pool);
+ {
+ SVN_ERR(get_rep_content(content, fs, representation, file_content,
+ pool));
+ SVN_ERR(set_cached_window(fs, representation, *content, pool));
+ return SVN_NO_ERROR;
+ }
- *content = get_cached_window(fs, representation, pool);
+ /* special case: data already in cache */
+ SVN_ERR(get_cached_window(content, fs, representation, pool));
if (*content)
return SVN_NO_ERROR;
+ /* read the delta windows for this representation */
+ sub_pool = svn_pool_create(pool);
+ iter_pool = svn_pool_create(pool);
SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool));
+
+ /* fetch the / create a base content */
if (representation->delta_base && representation->delta_base->revision)
SVN_ERR(get_combined_window(&base_content, fs,
representation->delta_base, NULL, sub_pool));
else
base_content = svn_stringbuf_create_empty(sub_pool);
+ /* apply deltas */
result = svn_stringbuf_create_empty(pool);
source = base_content->data;
@@ -867,14 +918,17 @@ get_combined_window(svn_stringbuf_t **co
svn_pool_clear(iter_pool);
}
+ /* cache result and return it */
+ SVN_ERR(set_cached_window(fs, representation, result, sub_pool));
+ *content = result;
+
svn_pool_destroy(iter_pool);
svn_pool_destroy(sub_pool);
-
- set_cached_window(fs, representation, result);
- *content = result;
+
return SVN_NO_ERROR;
}
+/* forward declaration */
static svn_error_t *
read_noderev(fs_fs_t *fs,
svn_stringbuf_t *file_content,
@@ -883,6 +937,12 @@ read_noderev(fs_fs_t *fs,
apr_pool_t *pool,
apr_pool_t *scratch_pool);
+/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all
+ * DAG nodes, directories and representations linked in that tree structure.
+ * Store them in FS and REVISION_INFO. Also, read them only once.
+ *
+ * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
+ */
static svn_error_t *
parse_dir(fs_fs_t *fs,
svn_stringbuf_t *file_content,
@@ -898,9 +958,11 @@ parse_dir(fs_fs_t *fs,
const char *revision_key;
apr_size_t key_len;
+ /* special case: empty dir rep */
if (representation == NULL)
return SVN_NO_ERROR;
+ /* get the directory as unparsed string */
iter_pool = svn_pool_create(scratch_pool);
text_pool = svn_pool_create(scratch_pool);
@@ -908,14 +970,16 @@ parse_dir(fs_fs_t *fs,
text_pool));
current = text->data;
+ /* calculate some invariants */
revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision);
key_len = strlen(revision_key);
- /* Translate the string dir entries into real entries. */
+ /* Parse and process all directory entries. */
while (*current != 'E')
{
char *next;
+ /* skip "K ???\n<name>\nV ???\n" lines*/
current = strchr(current, '\n');
if (current)
current = strchr(current+1, '\n');
@@ -927,11 +991,14 @@ parse_dir(fs_fs_t *fs,
_("Corrupt directory representation in rev %ld at offset %ld"),
representation->revision,
(long)representation->offset);
-
+
+ /* iff this entry refers to a node in the same revision as this dir,
+ * recurse into that node */
*next = 0;
current = strstr(current, revision_key);
if (current)
{
+ /* recurse */
apr_uint64_t offset;
SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0,
@@ -949,6 +1016,13 @@ parse_dir(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes,
+ * directories and representations linked in that tree structure. Store
+ * them in FS and REVISION_INFO. Also, read them only once. Return the
+ * result in *NODEREV.
+ *
+ * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
+ */
static svn_error_t *
read_noderev(fs_fs_t *fs,
svn_stringbuf_t *file_content,
@@ -964,9 +1038,11 @@ read_noderev(fs_fs_t *fs,
svn_boolean_t is_dir = FALSE;
scratch_pool = svn_pool_create(scratch_pool);
-
+
+ /* parse the noderev line-by-line until we find an empty line */
while (1)
{
+ /* for this line, extract key and value. Ignore invalid values */
svn_string_t key;
svn_string_t value;
char *sep;
@@ -975,6 +1051,8 @@ read_noderev(fs_fs_t *fs,
line = svn_string_ncreate(start, end - start, scratch_pool);
offset += end - start + 1;
+
+ /* empty line -> end of noderev data */
if (line->len == 0)
break;
@@ -992,6 +1070,7 @@ read_noderev(fs_fs_t *fs,
value.data = sep + 2;
value.len = line->len - (key.len + 2);
+ /* translate (key, value) into noderev elements */
if (key_matches(&key, "type"))
is_dir = strcmp(value.data, "dir") == 0;
else if (key_matches(&key, "text"))
@@ -999,6 +1078,8 @@ read_noderev(fs_fs_t *fs,
SVN_ERR(parse_representation(&text, fs, file_content,
&value, revision_info,
pool, scratch_pool));
+
+ /* if we are the first to use this rep, mark it as "text rep" */
if (++text->ref_count == 1)
text->kind = is_dir ? dir_rep : file_rep;
}
@@ -1007,15 +1088,20 @@ read_noderev(fs_fs_t *fs,
SVN_ERR(parse_representation(&props, fs, file_content,
&value, revision_info,
pool, scratch_pool));
+
+ /* if we are the first to use this rep, mark it as "prop rep" */
if (++props->ref_count == 1)
props->kind = is_dir ? dir_property_rep : file_property_rep;
}
}
+ /* if this is a directory and has not been processed, yet, read and
+ * process it recursively */
if (is_dir && text && text->ref_count == 1)
SVN_ERR(parse_dir(fs, file_content, text, revision_info,
pool, scratch_pool));
+ /* update stats */
if (is_dir)
{
revision_info->dir_noderev_size += offset - start_offset;
@@ -1031,6 +1117,9 @@ read_noderev(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Given the unparsed changes list in CHANGES with LEN chars, return the
+ * number of changed paths encoded in it.
+ */
static apr_size_t
get_change_count(const char *changes,
apr_size_t len)
@@ -1038,19 +1127,27 @@ get_change_count(const char *changes,
apr_size_t lines = 0;
const char *end = changes + len;
+ /* line count */
for (; changes < end; ++changes)
if (*changes == '\n')
++lines;
+ /* two lines per change */
return lines / 2;
}
-static void print_progress(svn_revnum_t revision)
+/* Simple utility to print a REVISION number and make it appear immediately.
+ */
+static void
+print_progress(svn_revnum_t revision)
{
printf("%8ld", revision);
fflush(stdout);
}
+/* Read the content of the pack file staring at revision BASE and store it
+ * in FS. Use POOL for allocations.
+ */
static svn_error_t *
read_pack_file(fs_fs_t *fs,
svn_revnum_t base,
@@ -1061,19 +1158,24 @@ read_pack_file(fs_fs_t *fs,
apr_pool_t *iter_pool = svn_pool_create(local_pool);
int i;
apr_off_t file_size = 0;
+ apr_file_t *file;
const char *pack_folder = get_pack_folder(fs, base, local_pool);
+ /* parse the manifest file */
SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool));
if (manifest->nelts != fs->max_files_per_dir)
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL);
- SVN_ERR(rev_or_pack_file_size(&file_size, fs, base, pool));
+ SVN_ERR(open_rev_or_pack_file(&file, fs, base, local_pool));
+ SVN_ERR(get_file_size(&file_size, file, local_pool));
+ /* process each revision in the pack file */
for (i = 0; i < manifest->nelts; ++i)
{
apr_size_t root_node_offset;
svn_stringbuf_t *rev_content;
+ /* create the revision info for the current rev */
revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*));
@@ -1083,7 +1185,7 @@ read_pack_file(fs_fs_t *fs,
? APR_ARRAY_IDX(manifest, i+1 , apr_size_t)
: file_size;
- SVN_ERR(get_content(&rev_content, fs, info->revision,
+ SVN_ERR(get_content(&rev_content, file, fs, info->revision,
info->offset,
info->end - info->offset,
iter_pool));
@@ -1103,15 +1205,20 @@ read_pack_file(fs_fs_t *fs,
info->representations = apr_array_copy(pool, info->representations);
APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
+ /* destroy temps */
svn_pool_clear(iter_pool);
}
+ /* one more pack file processed */
print_progress(base);
- apr_pool_destroy(local_pool);
+ svn_pool_destroy(local_pool);
return SVN_NO_ERROR;
}
+/* Read the content of the file for REVSION and store its contents in FS.
+ * Use POOL for allocations.
+ */
static svn_error_t *
read_revision_file(fs_fs_t *fs,
svn_revnum_t revision,
@@ -1122,16 +1229,21 @@ read_revision_file(fs_fs_t *fs,
svn_stringbuf_t *rev_content;
revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
apr_off_t file_size = 0;
+ apr_file_t *file;
- SVN_ERR(rev_or_pack_file_size(&file_size, fs, revision, pool));
+ /* read the whole pack file into memory */
+ SVN_ERR(open_rev_or_pack_file(&file, fs, revision, local_pool));
+ SVN_ERR(get_file_size(&file_size, file, local_pool));
+ /* create the revision info for the current rev */
info->representations = apr_array_make(pool, 4, sizeof(representation_t*));
info->revision = revision;
info->offset = 0;
info->end = file_size;
- SVN_ERR(get_content(&rev_content, fs, revision, 0, file_size, local_pool));
+ SVN_ERR(get_content(&rev_content, file, fs, revision, 0, file_size,
+ local_pool));
SVN_ERR(read_revision_header(&info->changes,
&info->changes_len,
@@ -1139,24 +1251,31 @@ read_revision_file(fs_fs_t *fs,
rev_content,
local_pool));
+ /* put it into our containers */
APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
info->change_count
= get_change_count(rev_content->data + info->changes,
info->changes_len);
+ /* parse the revision content recursively. */
SVN_ERR(read_noderev(fs, rev_content,
root_node_offset, info,
pool, local_pool));
+ /* show progress every 1000 revs or so */
if (revision % fs->max_files_per_dir == 0)
print_progress(revision);
- apr_pool_destroy(local_pool);
+ svn_pool_destroy(local_pool);
return SVN_NO_ERROR;
}
+/* Read the repository at PATH beginning with revision START_REVISION and
+ * return the result in *FS. Allocate caches with MEMSIZE bytes total
+ * capacity. Use POOL for non-cache allocations.
+ */
static svn_error_t *
read_revisions(fs_fs_t **fs,
const char *path,
@@ -1165,63 +1284,97 @@ read_revisions(fs_fs_t **fs,
apr_pool_t *pool)
{
svn_revnum_t revision;
- apr_size_t window_cache_size;
+ svn_cache_config_t cache_config = *svn_cache_config_get();
/* determine cache sizes */
if (memsize < 100)
memsize = 100;
-
- window_cache_size = memsize * 1024 * 1024;
+
+ cache_config.cache_size = memsize * 1024 * 1024;
+ svn_cache_config_set(&cache_config);
SVN_ERR(fs_open(fs, path, pool));
+ /* create data containers and caches */
(*fs)->start_revision = start_revision
- (start_revision % (*fs)->max_files_per_dir);
(*fs)->revisions = apr_array_make(pool,
(*fs)->max_revision + 1 - (*fs)->start_revision,
sizeof(revision_info_t *));
(*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base));
- (*fs)->window_cache = create_window_cache
- (apr_allocator_owner_get
- (svn_pool_create_allocator(FALSE)),
- 10000, window_cache_size);
+ SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache,
+ svn_cache__get_global_membuffer_cache(),
+ NULL, NULL,
+ sizeof(window_cache_key_t),
+ "", FALSE, pool));
+
+ /* read all packed revs */
for ( revision = start_revision
; revision < (*fs)->min_unpacked_rev
; revision += (*fs)->max_files_per_dir)
SVN_ERR(read_pack_file(*fs, revision, pool));
-
+
+ /* read non-packed revs */
for ( ; revision <= (*fs)->max_revision; ++revision)
SVN_ERR(read_revision_file(*fs, revision, pool));
return SVN_NO_ERROR;
}
+/* Compression statistics we collect over a given set of representations.
+ */
typedef struct rep_pack_stats_t
{
+ /* number of representations */
apr_int64_t count;
+
+ /* total size after deltification (i.e. on disk size) */
apr_int64_t packed_size;
+
+ /* total size after de-deltification (i.e. plain text size) */
apr_int64_t expanded_size;
+
+ /* total on-disk header size */
apr_int64_t overhead_size;
} rep_pack_stats_t;
+/* Statistics we collect over a given set of representations.
+ * We group them into shared and non-shared ("unique") reps.
+ */
typedef struct representation_stats_t
{
+ /* stats over all representations */
rep_pack_stats_t total;
+
+ /* stats over those representations with ref_count == 1 */
rep_pack_stats_t uniques;
+
+ /* stats over those representations with ref_count > 1 */
rep_pack_stats_t shared;
+ /* sum of all ref_counts */
apr_int64_t references;
+
+ /* sum of ref_count * expanded_size,
+ * i.e. total plaintext content if there was no rep sharing */
apr_int64_t expanded_size;
} representation_stats_t;
+/* Basic statistics we collect over a given set of noderevs.
+ */
typedef struct node_stats_t
{
+ /* number of noderev structs */
apr_int64_t count;
+
+ /* their total size on disk (structs only) */
apr_int64_t size;
} node_stats_t;
+/* Accumulate stats of REP in STATS.
+ */
static void
add_rep_pack_stats(rep_pack_stats_t *stats,
representation_t *rep)
@@ -1230,9 +1383,11 @@ add_rep_pack_stats(rep_pack_stats_t *sta
stats->packed_size += rep->size;
stats->expanded_size += rep->expanded_size;
- stats->overhead_size += rep->header_size + 7;
+ stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
}
+/* Accumulate stats of REP in STATS.
+ */
static void
add_rep_stats(representation_stats_t *stats,
representation_t *rep)
@@ -1247,6 +1402,9 @@ add_rep_stats(representation_stats_t *st
stats->expanded_size += rep->ref_count * rep->expanded_size;
}
+/* Print statistics for the given group of representations to console.
+ * Use POOL for allocations.
+ */
static void
print_rep_stats(representation_stats_t *stats,
apr_pool_t *pool)
@@ -1267,12 +1425,16 @@ print_rep_stats(representation_stats_t *
svn__i64toa_sep(stats->references - stats->total.count, ',', pool));
}
+/* Post-process stats for FS and print them to the console.
+ * Use POOL for allocations.
+ */
static void
print_stats(fs_fs_t *fs,
apr_pool_t *pool)
{
int i, k;
-
+
+ /* initialize stats to collect */
representation_stats_t file_rep_stats = { { 0 } };
representation_stats_t dir_rep_stats = { { 0 } };
representation_stats_t file_prop_rep_stats = { { 0 } };
@@ -1286,11 +1448,14 @@ print_stats(fs_fs_t *fs,
apr_int64_t total_size = 0;
apr_int64_t change_count = 0;
apr_int64_t change_len = 0;
-
+
+ /* aggregate info from all revisions */
for (i = 0; i < fs->revisions->nelts; ++i)
{
revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i,
revision_info_t *);
+
+ /* data gathered on a revision level */
change_count += revision->change_count;
change_len += revision->changes_len;
total_size += revision->end - revision->offset;
@@ -1303,11 +1468,14 @@ print_stats(fs_fs_t *fs,
+ revision->file_noderev_count;
total_node_stats.size += revision->dir_noderev_size
+ revision->file_noderev_size;
-
+
+ /* process representations */
for (k = 0; k < revision->representations->nelts; ++k)
{
representation_t *rep = APR_ARRAY_IDX(revision->representations,
k, representation_t *);
+
+ /* accumulate in the right bucket */
switch(rep->kind)
{
case file_rep:
@@ -1330,6 +1498,7 @@ print_stats(fs_fs_t *fs,
}
}
+ /* print results */
printf("\nGlobal statistics:\n");
printf(_("%20s bytes in %12s revisions\n"
"%20s bytes in %12s changes\n"
@@ -1388,6 +1557,9 @@ print_stats(fs_fs_t *fs,
print_rep_stats(&file_prop_rep_stats, pool);
}
+/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
+ * POOL for allocations.
+ */
static void
print_usage(svn_stream_t *ostream, const char *progname,
apr_pool_t *pool)
@@ -1404,6 +1576,7 @@ print_usage(svn_stream_t *ostream, const
progname));
}
+/* linear control flow */
int main(int argc, const char *argv[])
{
apr_pool_t *pool;
Modified: subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py (original)
+++ subversion/branches/tree-read-api/tools/server-side/svnpubsub/commit-hook.py Sun Jan 6 02:33:34 2013
@@ -42,11 +42,11 @@ def svncmd_uuid(repo):
def svncmd_info(repo, revision):
cmd = "%s info -r %s %s" % (SVNLOOK, revision, repo)
p = svncmd(cmd)
- data = p.stdout.read().strip().split("\n")
+ data = p.stdout.read().split("\n")
#print data
- return {'author': data[0],
- 'date': data[1],
- 'log': "\n".join(data[3:])}
+ return {'author': data[0].strip(),
+ 'date': data[1].strip(),
+ 'log': "\n".join(data[3:]).strip()}
def svncmd_dirs(repo, revision):
cmd = "%s dirs-changed -r %s %s" % (SVNLOOK, revision, repo)
@@ -59,6 +59,19 @@ def svncmd_dirs(repo, revision):
dirs.append(line.strip())
return dirs
+def svncmd_changed(repo, revision):
+ cmd = "%s changed -r %s %s" % (SVNLOOK, revision, repo)
+ p = svncmd(cmd)
+ changed = {}
+ while True:
+ line = p.stdout.readline()
+ if not line:
+ break
+ line = line.strip()
+ (flags, filename) = (line[0:3], line[4:])
+ changed[filename] = {'flags': flags}
+ return changed
+
def do_put(body):
opener = urllib2.build_opener(urllib2.HTTPHandler)
request = urllib2.Request("http://%s:%d/dirs-changed" %(HOST, PORT), data=body)
@@ -72,12 +85,14 @@ def main(repo, revision):
i = svncmd_info(repo, revision)
data = {'revision': int(revision),
'dirs_changed': [],
+ 'changed': {},
'repos': svncmd_uuid(repo),
'author': i['author'],
'log': i['log'],
'date': i['date'],
}
data['dirs_changed'].extend(svncmd_dirs(repo, revision))
+ data['changed'].update(svncmd_changed(repo, revision))
body = json.dumps(data)
#print body
do_put(body)
Modified: subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py (original)
+++ subversion/branches/tree-read-api/tools/server-side/svnpubsub/irkerbridge.py Sun Jan 6 02:33:34 2013
@@ -145,6 +145,12 @@ class BigDoEverythingClass(object):
return result
def fill_in_extra_args(self, rev):
+ # Set any empty members to the string "<null>"
+ v = vars(rev)
+ for k in v.keys():
+ if not v[k]:
+ v[k] = '<null>'
+
# Add entries to the rev object that are useful for
# formatting.
rev.log_firstline = rev.log.split("\n",1)[0]
Modified: subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd
URL: http://svn.apache.org/viewvc/subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd?rev=1429457&r1=1429456&r2=1429457&view=diff
==============================================================================
--- subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd (original)
+++ subversion/branches/tree-read-api/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd Sun Jan 6 02:33:34 2013
@@ -20,11 +20,13 @@ svnpubsub_user=${svnpubsub_user-"svn"}
svnpubsub_group=${svnpubsub_group-"svn"}
svnpubsub_reactor=${svnpubsub_reactor-"poll"}
svnpubsub_pidfile=${svnpubsub_pidfile-"/var/run/svnpubsub/svnpubsub.pid"}
+svnpubsub_cmd_int=${svnpubsub_cmd_int-"python"}
pidfile="${svnpubsub_pidfile}"
export PYTHON_EGG_CACHE="/home/svn/.python-eggs"
command="/usr/local/bin/twistd"
+command_interpreter="/usr/local/bin/${svnwcsub_cmd_int}"
command_args="-y /usr/local/svnpubsub/svnpubsub.tac \
--logfile=/var/log/vc/svnpubsub.log \
--pidfile=${pidfile} \