You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2013/12/03 02:43:06 UTC

svn commit: r1547260 [3/5] - in /subversion/branches/fsfs-ucsnorm: ./ subversion/bindings/javahl/native/ subversion/include/ subversion/libsvn_fs_fs/ subversion/tests/cmdline/ subversion/tests/cmdline/svntest/ subversion/tests/libsvn_fs_fs/ tools/serve...

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.c?rev=1547260&r1=1547259&r2=1547260&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.c Tue Dec  3 01:43:05 2013
@@ -27,11 +27,13 @@
 #include "private/svn_temp_serializer.h"
 #include "private/svn_subr_private.h"
 #include "private/svn_string_private.h"
+#include "private/svn_io_private.h"
 
 #include "fs_fs.h"
 #include "pack.h"
 #include "util.h"
 #include "id.h"
+#include "index.h"
 #include "low_level.h"
 #include "revprops.h"
 #include "transaction.h"
@@ -41,6 +43,571 @@
 #include "svn_private_config.h"
 #include "temp_serializer.h"
 
+/* Logical addressing packing logic:
+ *
+ * We pack files on a pack file basis (e.g. 1000 revs) without changing
+ * existing pack files nor the revision files outside the range to pack.
+ *
+ * First, we will scan the revision file indexes to determine the number
+ * of items to "place" (i.e. determine their optimal position within the
+ * future pack file).  For each item, we will need a constant amount of
+ * memory to track it.  A MAX_MEM parameter sets a limit to the number of
+ * items we may place in one go.  That means, we may not be able to add
+ * all revisions at once.  Instead, we will run the placement for a subset
+ * of revisions at a time.  The very unlikely worst case will simply append
+ * all revision data with just a little reshuffling inside each revision.
+ *
+ * In a second step, we read all revisions in the selected range, build
+ * the item tracking information and copy the items themselves from the
+ * revision files to temporary files.  The latter serve as buckets for a
+ * very coarse bucket presort:  Separate change lists, file properties,
+ * directory properties and noderevs + representations from one another.
+ *
+ * The third step will determine an optimized placement for the items in
+ * each of the 4 buckets separately.  The first three will simply order
+ * their items by revision, starting with the newest once.  Placing rep
+ * and noderev items is a more elaborate process documented in the code.
+ * 
+ * In short, we store items in the following order:
+ * - changed paths lists
+ * - node property
+ * - directory properties
+ * - noderevs and representations, reverse lexical path order
+ *
+ * Step 4 copies the items from the temporary buckets into the final
+ * pack file and writes the temporary index files.
+ *
+ * Finally, after the last range of revisions, create the final indexes.
+ */
+
+/* Maximum amount of memory we allocate for placement information during
+ * the pack process.
+ */
+#define DEFAULT_MAX_MEM (64 * 1024 * 1024)
+
+/* Data structure describing a node change at PATH, REVISION.
+ * We will sort these instances by PATH and NODE_ID such that we can combine
+ * similar nodes in the same reps container and store containers in path
+ * major order.
+ */
+typedef struct path_order_t
+{
+  /* changed path */
+  svn_prefix_string__t *path;
+
+  /* node ID for this PATH in REVISION */
+  svn_fs_fs__id_part_t node_id;
+
+  /* when this change happened */
+  svn_revnum_t revision;
+
+  /* length of the expanded representation content */
+  apr_int64_t expanded_size;
+
+  /* item ID of the noderev linked to the change. May be (0, 0). */
+  svn_fs_fs__id_part_t noderev_id;
+
+  /* item ID of the representation containing the new data. May be (0, 0). */
+  svn_fs_fs__id_part_t rep_id;
+} path_order_t;
+
+/* Represents a reference from item FROM to item TO.  FROM may be a noderev
+ * or rep_id while TO is (currently) always a representation.  We will sort
+ * them by TO which allows us to collect all dependent items.
+ */
+typedef struct reference_t
+{
+  svn_fs_fs__id_part_t to;
+  svn_fs_fs__id_part_t from;
+} reference_t;
+
+/* This structure keeps track of all the temporary data and status that
+ * needs to be kept around during the creation of one pack file.  After
+ * each revision range (in case we can't process all revs at once due to
+ * memory restrictions), parts of the data will get re-initialized.
+ */
+typedef struct pack_context_t
+{
+  /* file system that we operate on */
+  svn_fs_t *fs;
+
+  /* cancel function to invoke at regular intervals. May be NULL */
+  svn_cancel_func_t cancel_func;
+
+  /* baton to pass to CANCEL_FUNC */
+  void *cancel_baton;
+
+  /* first revision in the shard (and future pack file) */
+  svn_revnum_t shard_rev;
+
+  /* first revision in the range to process (>= SHARD_REV) */
+  svn_revnum_t start_rev;
+
+  /* first revision after the range to process (<= SHARD_END_REV) */
+  svn_revnum_t end_rev;
+
+  /* first revision after the current shard */
+  svn_revnum_t shard_end_rev;
+
+  /* log-to-phys proto index for the whole pack file */
+  apr_file_t *proto_l2p_index;
+
+  /* phys-to-log proto index for the whole pack file */
+  apr_file_t *proto_p2l_index;
+
+  /* full shard directory path (containing the unpacked revisions) */
+  const char *shard_dir;
+
+  /* full packed shard directory path (containing the pack file + indexes) */
+  const char *pack_file_dir;
+
+  /* full pack file path (including PACK_FILE_DIR) */
+  const char *pack_file_path;
+
+  /* current write position (i.e. file length) in the pack file */
+  apr_off_t pack_offset;
+
+  /* the pack file to ultimately write all data to */
+  apr_file_t *pack_file;
+
+  /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists.
+   * Will be filled in phase 2 and be cleared after each revision range. */
+  apr_array_header_t *changes;
+
+  /* temp file receiving all change list items (referenced by CHANGES).
+   * Will be filled in phase 2 and be cleared after each revision range. */
+  apr_file_t *changes_file;
+
+  /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties.
+   * Will be filled in phase 2 and be cleared after each revision range. */
+  apr_array_header_t *file_props;
+
+  /* temp file receiving all file prop items (referenced by FILE_PROPS).
+   * Will be filled in phase 2 and be cleared after each revision range.*/
+  apr_file_t *file_props_file;
+
+  /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties.
+   * Will be filled in phase 2 and be cleared after each revision range. */
+  apr_array_header_t *dir_props;
+
+  /* temp file receiving all directory prop items (referenced by DIR_PROPS).
+   * Will be filled in phase 2 and be cleared after each revision range.*/
+  apr_file_t *dir_props_file;
+
+  /* container for all PATH members in PATH_ORDER. */
+  svn_prefix_tree__t *paths;
+
+  /* array of path_order_t *.  Will be filled in phase 2 and be cleared
+   * after each revision range.  Sorted by PATH, NODE_ID. */
+  apr_array_header_t *path_order;
+
+  /* array of reference_t *.  Will be filled in phase 2 and be cleared
+   * after each revision range.  It will be sorted by the TO members. */
+  apr_array_header_t *references;
+
+  /* array of svn_fs_fs__p2l_entry_t*.  Will be filled in phase 2 and be
+   * cleared after each revision range.  During phase 3, we will set items
+   * to NULL that we already processed. */
+  apr_array_header_t *reps;
+
+  /* array of int, marking for each revision, the which offset their items
+   * begin in REPS.  Will be filled in phase 2 and be cleared after
+   * each revision range. */
+  apr_array_header_t *rev_offsets;
+
+  /* temp file receiving all items referenced by REPS_INFOS.
+   * Will be filled in phase 2 and be cleared after each revision range.*/
+  apr_file_t *reps_file;
+
+  /* pool used for temporary data structures that will be cleaned up when
+   * the next range of revisions is being processed */
+  apr_pool_t *info_pool;
+} pack_context_t;
+
+/* Create and initialize a new pack context for packing shard SHARD_REV in
+ * SHARD_DIR into PACK_FILE_DIR within filesystem FS.  Allocate it in POOL
+ * and return the structure in *CONTEXT.
+ *
+ * Limit the number of items being copied per iteration to MAX_ITEMS.
+ * Set CANCEL_FUNC and CANCEL_BATON as well.
+ */
+static svn_error_t *
+initialize_pack_context(pack_context_t *context,
+                        svn_fs_t *fs,
+                        const char *pack_file_dir,
+                        const char *shard_dir,
+                        svn_revnum_t shard_rev,
+                        apr_size_t max_items,
+                        svn_cancel_func_t cancel_func,
+                        void *cancel_baton,
+                        apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+  const char *temp_dir;
+  apr_size_t max_revs = MIN(ffd->max_files_per_dir, (int)max_items);
+  
+  SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT);
+  SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
+  
+  /* where we will place our various temp files */
+  SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
+
+  /* store parameters */
+  context->fs = fs;
+  context->cancel_func = cancel_func;
+  context->cancel_baton = cancel_baton;
+
+  context->shard_rev = shard_rev;
+  context->start_rev = shard_rev;
+  context->end_rev = shard_rev;
+  context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
+  
+  /* Create the new directory and pack file. */
+  context->shard_dir = shard_dir;
+  context->pack_file_dir = pack_file_dir;
+  context->pack_file_path
+    = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
+  SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
+                           APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
+                             | APR_CREATE, APR_OS_DEFAULT, pool));
+
+  /* Proto index files */
+  SVN_ERR(svn_fs_fs__l2p_proto_index_open(
+             &context->proto_l2p_index,
+             svn_dirent_join(pack_file_dir,
+                             PATH_INDEX PATH_EXT_L2P_INDEX,
+                             pool),
+             pool));
+  SVN_ERR(svn_fs_fs__p2l_proto_index_open(
+             &context->proto_p2l_index,
+             svn_dirent_join(pack_file_dir,
+                             PATH_INDEX PATH_EXT_P2L_INDEX,
+                             pool),
+             pool));
+
+  /* item buckets: one item info array and one temp file per bucket */
+  context->changes = apr_array_make(pool, max_items,
+                                    sizeof(svn_fs_fs__p2l_entry_t *));
+  SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
+                                   svn_io_file_del_on_close, pool, pool));
+  context->file_props = apr_array_make(pool, max_items,
+                                       sizeof(svn_fs_fs__p2l_entry_t *));
+  SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
+                                   svn_io_file_del_on_close, pool, pool));
+  context->dir_props = apr_array_make(pool, max_items,
+                                      sizeof(svn_fs_fs__p2l_entry_t *));
+  SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
+                                   svn_io_file_del_on_close, pool, pool));
+
+  /* noderev and representation item bucket */
+  context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
+  context->path_order = apr_array_make(pool, max_items, sizeof(path_order_t *));
+  context->references = apr_array_make(pool, max_items, sizeof(reference_t *));
+  context->reps = apr_array_make(pool, max_items,
+                                 sizeof(svn_fs_fs__p2l_entry_t *));
+  SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
+                                   svn_io_file_del_on_close, pool, pool));
+
+  /* the pool used for temp structures */
+  context->info_pool = svn_pool_create(pool);
+  context->paths = svn_prefix_tree__create(context->info_pool);
+
+  return SVN_NO_ERROR;
+};
+
+/* Clean up / free all revision range specific data and files in CONTEXT.
+ * Use POOL for temporary allocations.
+ */
+static svn_error_t *
+reset_pack_context(pack_context_t *context,
+                   apr_pool_t *pool)
+{
+  apr_array_clear(context->changes);
+  SVN_ERR(svn_io_file_trunc(context->changes_file, 0, pool));
+  apr_array_clear(context->file_props);
+  SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, pool));
+  apr_array_clear(context->dir_props);
+  SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, pool));
+
+  apr_array_clear(context->rev_offsets);
+  apr_array_clear(context->path_order);
+  apr_array_clear(context->references);
+  apr_array_clear(context->reps);
+  SVN_ERR(svn_io_file_trunc(context->reps_file, 0, pool));
+
+  svn_pool_clear(context->info_pool);
+  
+  return SVN_NO_ERROR;
+};
+
+/* Call this after the last revision range.  It will finalize all index files
+ * for CONTEXT and close any open files.  Use POOL for temporary allocations.
+ */
+static svn_error_t *
+close_pack_context(pack_context_t *context,
+                   apr_pool_t *pool)
+{
+  const char *l2p_index_path
+    = apr_pstrcat(pool, context->pack_file_path, PATH_EXT_L2P_INDEX, NULL);
+  const char *p2l_index_path
+    = apr_pstrcat(pool, context->pack_file_path, PATH_EXT_P2L_INDEX, NULL);
+  const char *proto_l2p_index_path;
+  const char *proto_p2l_index_path;
+
+  /* need the file names for the actual index creation call further down */
+  SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
+                               context->proto_l2p_index, pool));
+  SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
+                               context->proto_p2l_index, pool));
+  
+  /* finalize proto index files */
+  SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool));
+  SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool));
+
+  /* Create the actual index files*/
+  SVN_ERR(svn_fs_fs__l2p_index_create(context->fs, l2p_index_path,
+                                      proto_l2p_index_path,
+                                      context->shard_rev, pool));
+  SVN_ERR(svn_fs_fs__p2l_index_create(context->fs, p2l_index_path,
+                                      proto_p2l_index_path,
+                                      context->shard_rev, pool));
+
+  /* remove proto index files */
+  SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool));
+  SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool));
+
+  SVN_ERR(svn_io_file_close(context->pack_file, pool));
+
+  return SVN_NO_ERROR;
+};
+
+/* Efficiently copy SIZE bytes from SOURCE to DEST.  Invoke the CANCEL_FUNC
+ * from CONTEXT at regular intervals.  Use POOL for allocations.
+ */
+static svn_error_t *
+copy_file_data(pack_context_t *context,
+               apr_file_t *dest,
+               apr_file_t *source,
+               apr_off_t size,
+               apr_pool_t *pool)
+{
+  /* most non-representation items will be small.  Minimize the buffer
+   * and infrastructure overhead in that case. */
+  enum { STACK_BUFFER_SIZE = 1024 };
+ 
+  if (size < STACK_BUFFER_SIZE)
+    {
+      /* copy small data using a fixed-size buffer on stack */
+      char buffer[STACK_BUFFER_SIZE];
+      SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
+                                     NULL, NULL, pool));
+      SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
+                                     NULL, pool));
+    }
+  else
+    {
+      /* use streaming copies for larger data blocks.  That may require
+       * the allocation of larger buffers and we should make sure that
+       * this extra memory is released asap. */
+      fs_fs_data_t *ffd = context->fs->fsap_data;
+      apr_pool_t *copypool = svn_pool_create(pool);
+      char *buffer = apr_palloc(copypool, ffd->block_size);
+
+      while (size)
+        {
+          apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
+          if (context->cancel_func)
+            SVN_ERR(context->cancel_func(context->cancel_baton));
+
+          SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
+                                         NULL, NULL, pool));
+          SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
+                                         NULL, pool));
+
+          size -= to_copy;
+        }
+
+      svn_pool_destroy(copypool);
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Writes SIZE bytes, all 0, to DEST.  Uses POOL for allocations.
+ */
+static svn_error_t *
+write_null_bytes(apr_file_t *dest,
+                 apr_off_t size,
+                 apr_pool_t *pool)
+{
+  /* Have a collection of high-quality, easy to access NUL bytes handy. */
+  enum { BUFFER_SIZE = 1024 };
+  static const char buffer[BUFFER_SIZE] = { 0 };
+
+  /* copy SIZE of them into the file's buffer */
+  while (size)
+    {
+      apr_size_t to_write = MIN(size, BUFFER_SIZE);
+      SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool));
+      size -= to_write;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Copy the "simple" item (changed paths list or property representation)
+ * from the current position in REV_FILE to TEMP_FILE using CONTEXT.  Add
+ * a copy of ENTRY to ENTRIES but with an updated offset value that points
+ * to the copy destination in TEMP_FILE.  Use POOL for allocations.
+ */
+static svn_error_t *
+copy_item_to_temp(pack_context_t *context,
+                  apr_array_header_t *entries,
+                  apr_file_t *temp_file,
+                  apr_file_t *rev_file,
+                  svn_fs_fs__p2l_entry_t *entry,
+                  apr_pool_t *pool)
+{
+  svn_fs_fs__p2l_entry_t *new_entry
+    = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
+  new_entry->offset = 0;
+  SVN_ERR(svn_io_file_seek(temp_file, SEEK_CUR, &new_entry->offset, pool));
+  APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry;
+  
+  SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool));
+  
+  return SVN_NO_ERROR;
+}
+
+/* Return the offset within CONTEXT->REPS_INFOS that corresponds to item
+ * ITEM_INDEX in  REVISION.
+ */
+static int
+get_item_array_index(pack_context_t *context,
+                     svn_revnum_t revision,
+                     apr_int64_t item_index)
+{
+  assert(revision >= context->start_rev);
+  return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
+                                         revision - context->start_rev,
+                                         int);
+}
+
+/* Write INFO to the correct position in CONTEXT->REP_INFOS.  The latter
+ * may need auto-expanding.  Overwriting an array element is not allowed.
+ */
+static void
+add_item_rep_mapping(pack_context_t *context,
+                     svn_fs_fs__p2l_entry_t *entry)
+{
+  int idx;
+
+  /* index of INFO */
+  idx = get_item_array_index(context,
+                             entry->item.revision,
+                             entry->item.number);
+
+  /* make sure the index exists in the array */
+  while (context->reps->nelts <= idx)
+    APR_ARRAY_PUSH(context->reps, void *) = NULL;
+
+  /* set the element.  If there is already an entry, there are probably
+   * two items claiming to be the same -> bail out */
+  assert(!APR_ARRAY_IDX(context->reps, idx, void *));
+  APR_ARRAY_IDX(context->reps, idx, void *) = entry;
+}
+
+/* Return the P2L entry from CONTEXT->REPS for the given ID.  If there is
+ * none (or not anymore), return NULL.  If RESET has been specified, set
+ * the array entry to NULL after returning the entry.
+ */
+static svn_fs_fs__p2l_entry_t *
+get_item(pack_context_t *context,
+         const svn_fs_fs__id_part_t *id,
+         svn_boolean_t reset)
+{
+  svn_fs_fs__p2l_entry_t *result = NULL;
+  if (id->number && id->revision >= context->start_rev)
+    {
+      int idx = get_item_array_index(context, id->revision, id->number);
+      if (context->reps->nelts > idx)
+        {
+          result = APR_ARRAY_IDX(context->reps, idx, void *);
+          if (result && reset)
+            APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
+        }
+    }
+
+  return result;
+}
+
+/* Copy representation item identified by ENTRY from the current position
+ * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
+ * our placement algorithm to CONTEXT.  Use POOL for temporary allocations.
+ */
+static svn_error_t *
+copy_rep_to_temp(pack_context_t *context,
+                 apr_file_t *rev_file,
+                 svn_fs_fs__p2l_entry_t *entry,
+                 apr_pool_t *pool)
+{
+  svn_fs_fs__rep_header_t *rep_header;
+  svn_stream_t *stream;
+  apr_off_t source_offset = entry->offset;
+
+  /* create a copy of ENTRY, make it point to the copy destination and
+   * store it in CONTEXT */
+  entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
+  entry->offset = 0;
+  SVN_ERR(svn_io_file_seek(context->reps_file, SEEK_CUR, &entry->offset,
+                           pool));
+  add_item_rep_mapping(context, entry);
+
+  /* read & parse the representation header */
+  stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
+  SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool));
+  svn_stream_close(stream);
+
+  /* if the representation is a delta against some other rep, link the two */
+  if (   rep_header->type == svn_fs_fs__rep_delta
+      && rep_header->base_revision >= context->start_rev)
+    {
+      reference_t *reference = apr_pcalloc(context->info_pool,
+                                           sizeof(*reference));
+      reference->from = entry->item;
+      reference->to.revision = rep_header->base_revision;
+      reference->to.number = rep_header->base_item_index;
+      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
+    }
+
+  /* copy the whole rep (including header!) to our temp file */
+  SVN_ERR(svn_io_file_seek(rev_file, SEEK_SET, &source_offset, pool));
+  SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
+                         pool));
+
+  return SVN_NO_ERROR;
+}
+
+/* Directories first, dirs / files sorted by name in reverse lexical order.
+ * This maximizes the chance of two items being located close to one another
+ * in *all* pack files independent of their change order.  It also groups
+ * multi-project repos nicely according to their sub-projects.  The reverse
+ * order aspect gives "trunk" preference over "tags" and "branches", so
+ * trunk-related items are more likely to be contiguous.
+ */
+static int
+compare_dir_entries_format7(const svn_sort__item_t *a,
+                            const svn_sort__item_t *b)
+{
+  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
+  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
+
+  if (lhs->kind != rhs->kind)
+    return lhs->kind == svn_node_dir ? -1 : 1;
+
+  return 0 - strcmp(lhs->name, rhs->name);
+}
+
 /* Directories entries sorted by revision (decreasing - to max cache hits)
  * and offset (increasing - to max benefit from APR file buffering).
  */
@@ -52,9 +619,9 @@ compare_dir_entries_format6(const svn_so
   const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
 
   const svn_fs_fs__id_part_t *lhs_rev_item
-    = svn_fs_fs__id_rev_offset(lhs->id);
+    = svn_fs_fs__id_rev_item(lhs->id);
   const svn_fs_fs__id_part_t *rhs_rev_item
-    = svn_fs_fs__id_rev_offset(rhs->id);
+    = svn_fs_fs__id_rev_item(rhs->id);
 
   /* decreasing ("reverse") order on revs */
   if (lhs_rev_item->revision != rhs_rev_item->revision)
@@ -70,10 +637,15 @@ compare_dir_entries_format6(const svn_so
 apr_array_header_t *
 svn_fs_fs__order_dir_entries(svn_fs_t *fs,
                              apr_hash_t *directory,
+                             svn_revnum_t revision,
                              apr_pool_t *pool)
 {
   apr_array_header_t *ordered
-    = svn_sort__hash(directory, compare_dir_entries_format6, pool);
+    = svn_sort__hash(directory,
+                     svn_fs_fs__use_log_addressing(fs, revision)
+                       ? compare_dir_entries_format7
+                       : compare_dir_entries_format6,
+                     pool);
 
   apr_array_header_t *result
     = apr_array_make(pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
@@ -86,6 +658,777 @@ svn_fs_fs__order_dir_entries(svn_fs_t *f
   return result;
 }
 
+/* Copy node revision item identified by ENTRY from the current position
+ * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
+ * our placement algorithm to CONTEXT.  Use POOL for temporary allocations.
+ */
+static svn_error_t *
+copy_node_to_temp(pack_context_t *context,
+                  apr_file_t *rev_file,
+                  svn_fs_fs__p2l_entry_t *entry,
+                  apr_pool_t *pool)
+{
+  path_order_t *path_order = apr_pcalloc(context->info_pool,
+                                         sizeof(*path_order));
+  node_revision_t *noderev;
+  svn_stream_t *stream;
+  apr_off_t source_offset = entry->offset;
+
+  /* read & parse noderev */
+  stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
+  SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool));
+  svn_stream_close(stream);
+
+  /* create a copy of ENTRY, make it point to the copy destination and
+   * store it in CONTEXT */
+  entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
+  entry->offset = 0;
+  SVN_ERR(svn_io_file_seek(context->reps_file, SEEK_CUR,
+                           &entry->offset, pool));
+  add_item_rep_mapping(context, entry);
+
+  /* copy the noderev to our temp file */
+  SVN_ERR(svn_io_file_seek(rev_file, SEEK_SET, &source_offset, pool));
+  SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
+                         pool));
+
+  /* if the node has a data representation, make that the node's "base".
+   * This will (often) cause the noderev to be placed right in front of
+   * its data representation. */
+
+  if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev)
+    {
+      reference_t *reference = apr_pcalloc(context->info_pool,
+                                           sizeof(*reference));
+      reference->from = entry->item;
+      reference->to.revision = noderev->data_rep->revision;
+      reference->to.number = noderev->data_rep->item_index;
+      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
+
+      path_order->rep_id = reference->to;
+      path_order->expanded_size = noderev->data_rep->expanded_size
+                                ? noderev->data_rep->expanded_size
+                                : noderev->data_rep->size;
+    }
+
+  path_order->path = svn_prefix_string__create(context->paths,
+                                               noderev->created_path);
+  path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
+  path_order->revision = svn_fs_fs__id_rev(noderev->id);
+  path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
+  APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
+
+  return SVN_NO_ERROR;
+}
+
+/* implements compare_fn_t.  Sort descending by PATH, NODE_ID and REVISION.
+ */
+static int
+compare_path_order(const path_order_t * const * lhs_p,
+                   const path_order_t * const * rhs_p)
+{
+  const path_order_t * lhs = *lhs_p;
+  const path_order_t * rhs = *rhs_p;
+
+  /* reverse lexicographic order on path and node (i.e. latest first) */
+  int diff = svn_prefix_string__compare(rhs->path, lhs->path);
+  if (diff)
+    return diff;
+
+  /* reverse order on node (i.e. latest first) */
+  diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id);
+  if (diff)
+    return diff;
+
+  /* reverse order on revision (i.e. latest first) */
+  if (lhs->revision != rhs->revision)
+    return lhs->revision < rhs->revision ? 1 : -1;
+
+  return 0;
+}
+
+/* implements compare_fn_t.  Sort ascending by TO, FROM.
+ */
+static int
+compare_references(const reference_t * const * lhs_p,
+                   const reference_t * const * rhs_p)
+{
+  const reference_t * lhs = *lhs_p;
+  const reference_t * rhs = *rhs_p;
+
+  int diff = svn_fs_fs__id_part_compare(&lhs->to, &rhs->to);
+  return diff ? diff : svn_fs_fs__id_part_compare(&lhs->from, &rhs->from);
+}
+
+/* Order the data collected in CONTEXT such that we can place them in the
+ * desired order.
+ */
+static void
+sort_reps(pack_context_t *context)
+{
+  qsort(context->path_order->elts, context->path_order->nelts,
+        context->path_order->elt_size,
+        (int (*)(const void *, const void *))compare_path_order);
+  qsort(context->references->elts, context->references->nelts,
+        context->references->elt_size,
+        (int (*)(const void *, const void *))compare_references);
+}
+
+/* implements compare_fn_t. Place LHS before RHS, if the latter is older.
+ */
+static int
+compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,
+                 const svn_fs_fs__p2l_entry_t * const * rhs)
+{
+  assert(*lhs != *rhs);
+
+  if ((*lhs)->item.revision == (*rhs)->item.revision)
+    return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1;
+
+  return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1;
+}
+
+/* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age.  Place the latest
+ * items first.
+ */
+static void
+sort_items(apr_array_header_t *entries)
+{
+  qsort(entries->elts, entries->nelts, entries->elt_size,
+        (int (*)(const void *, const void *))compare_p2l_info);
+}
+
+/* Return the remaining unused bytes in the current block in CONTEXT's
+ * pack file.
+ */
+static apr_ssize_t
+get_block_left(pack_context_t *context)
+{
+  fs_fs_data_t *ffd = context->fs->fsap_data;
+  return ffd->block_size - (context->pack_offset % ffd->block_size);
+}
+
+/* To prevent items from overlapping a block boundary, we will usually
+ * put them into the next block and top up the old one with NUL bytes.
+ * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does
+ * not fit into the current block and the padding is short enough.
+ * Use POOL for allocations.
+ */
+static svn_error_t *
+auto_pad_block(pack_context_t *context,
+               apr_off_t to_add,
+               apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = context->fs->fsap_data;
+
+  /* This is the maximum number of bytes "wasted" that way per block.
+   * Larger items will cross the block boundaries. */
+  const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
+
+  /* Is wasted space small enough to align the current item to the next
+   * block? */
+  apr_off_t padding = get_block_left(context);
+
+  if (padding < to_add && padding < max_padding)
+    {
+      /* Yes. To up with NUL bytes and don't forget to create
+       * an P2L index entry marking this section as unused. */
+      svn_fs_fs__p2l_entry_t null_entry;
+
+      null_entry.offset = context->pack_offset;
+      null_entry.size = padding;
+      null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
+      null_entry.item.number = SVN_INVALID_REVNUM;
+      null_entry.type = SVN_FS_FS__ITEM_INDEX_UNUSED;
+      null_entry.fnv1_checksum = 0;
+
+      SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
+      SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
+                   context->proto_p2l_index, &null_entry, pool));
+      context->pack_offset += padding;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Read the contents of the non-empty items in ITEMS from TEMP_FILE and
+ * write them to CONTEXT->PACK_FILE.  Use POOL for allocations.
+ */
+static svn_error_t *
+store_items(pack_context_t *context,
+            apr_file_t *temp_file,
+            apr_array_header_t *items,
+            apr_pool_t *pool)
+{
+  int i;
+  apr_pool_t *iterpool = svn_pool_create(pool);
+
+  /* copy all items in strict order */
+  for (i = 0; i < items->nelts; ++i)
+    {
+      apr_off_t safety_margin;
+
+      /* skip empty entries */
+      svn_fs_fs__p2l_entry_t *entry
+        = APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *);
+      if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
+        continue;
+
+      svn_pool_clear(iterpool);
+
+      /* If the next item does not fit into the current block, auto-pad it.
+         Take special care of textual noderevs since their parsers may
+         prefetch up to 80 bytes and we don't want them to cross block
+         boundaries. */
+      safety_margin = entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV
+                    ? SVN__LINE_CHUNK_SIZE
+                    : 0;
+      SVN_ERR(auto_pad_block(context, entry->size + safety_margin, iterpool));
+
+      /* select the item in the source file and copy it into the target
+       * pack file */
+      SVN_ERR(svn_io_file_seek(temp_file, SEEK_SET, &entry->offset,
+                               iterpool));
+      SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
+                             entry->size, iterpool));
+
+      /* write index entry and update current position */
+      entry->offset = context->pack_offset;
+      context->pack_offset += entry->size;
+
+      SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
+                   context->proto_p2l_index, entry, iterpool));
+
+      APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = entry;
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+/* implements compare_fn_t.  Sort ascending by TO.
+ */
+static int
+compare_ref_to_item(const reference_t * const * lhs_p,
+                    const svn_fs_fs__id_part_t * rhs_p)
+{
+  return svn_fs_fs__id_part_compare(&(*lhs_p)->to, rhs_p);
+}
+
+/* Return the index of the first entry in CONTEXT->REFERENCES that
+ * references ITEM if such entries exist.  All matching items will be
+ * consecutive.
+ */
+static int
+find_first_reference(pack_context_t *context,
+                     svn_fs_fs__p2l_entry_t *item)
+{
+  return svn_sort__bsearch_lower_bound(&item->item, context->references,
+                (int (*)(const void *, const void *))compare_ref_to_item);
+}
+
+/* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM.
+ */
+static svn_boolean_t
+is_reference_match(pack_context_t *context,
+                   int idx,
+                   svn_fs_fs__p2l_entry_t *item)
+{
+  reference_t *reference;
+  if (context->references->nelts <= idx)
+    return FALSE;
+
+  reference = APR_ARRAY_IDX(context->references, idx, reference_t *);
+  return svn_fs_fs__id_part_eq(&reference->to, &item->item);
+}
+
+/* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and
+ * noderevs that should be placed into the same container, respectively.
+ * Append the svn_fs_fs__p2l_entry_t * of the representations that to
+ * REP_PARTS and apend the svn_fs_fs__p2l_entry_t * of the noderevs
+ * referencing those reps will to NODE_PARTS.
+ *
+ * Remove all returned items from the CONTEXT->REPS container and prevent
+ * them from being placed a second time later on.  That also means that the
+ * caller has to place all items returned.
+ */
+static svn_error_t *
+select_reps(pack_context_t *context,
+            int idx,
+            apr_array_header_t *node_parts,
+            apr_array_header_t *rep_parts)
+{
+  apr_array_header_t *path_order = context->path_order;
+  path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *);
+
+  svn_fs_fs__p2l_entry_t *node_part;
+  svn_fs_fs__p2l_entry_t *rep_part;
+  svn_fs_fs__p2l_entry_t *depending;
+  int i, k;
+
+  /* collect all path_order records as well as rep and noderev items
+   * that occupy the same path with the same node. */
+  for (; idx < path_order->nelts; ++idx)
+    {
+      path_order_t *current_path
+        = APR_ARRAY_IDX(path_order, idx, path_order_t *);
+
+      if (!svn_fs_fs__id_part_eq(&start_path->node_id,
+                                 &current_path->node_id))
+        break;
+
+      APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL;
+      node_part = get_item(context, &current_path->noderev_id, TRUE);
+      rep_part = get_item(context, &current_path->rep_id, TRUE);
+
+      if (node_part)
+        APR_ARRAY_PUSH(node_parts, svn_fs_fs__p2l_entry_t *) = node_part;
+      if (rep_part)
+        APR_ARRAY_PUSH(rep_parts, svn_fs_fs__p2l_entry_t *) = rep_part;
+    }
+
+  /* collect depending reps and noderevs that reference any of the collected
+   * reps */
+  for (i = 0; i < rep_parts->nelts; ++i)
+    {
+      rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_fs__p2l_entry_t*);
+      for (k = find_first_reference(context, rep_part);
+           is_reference_match(context, k, rep_part);
+           ++k)
+        {
+          reference_t *reference
+            = APR_ARRAY_IDX(context->references, k, reference_t *);
+
+          depending = get_item(context, &reference->from, TRUE);
+          if (!depending)
+            continue;
+
+          if (depending->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
+            APR_ARRAY_PUSH(node_parts, svn_fs_fs__p2l_entry_t *) = depending;
+          else
+            APR_ARRAY_PUSH(rep_parts, svn_fs_fs__p2l_entry_t *) = depending;
+        }
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
+ * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
+ * Use POOL for temporary allocations.
+ */
+static svn_error_t *
+copy_reps_from_temp(pack_context_t *context,
+                    apr_file_t *temp_file,
+                    apr_pool_t *pool)
+{
+  apr_pool_t *iterpool = svn_pool_create(pool);
+  apr_array_header_t *path_order = context->path_order;
+  apr_array_header_t *node_parts = apr_array_make(pool, 16, sizeof(void*));
+  apr_array_header_t *rep_parts = apr_array_make(pool, 16, sizeof(void*));
+  int i;
+
+  /* copy items in path order. Create block-sized containers. */
+  for (i = 0; i < path_order->nelts; ++i)
+    {
+      if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL)
+        continue;
+
+      svn_pool_clear(iterpool);
+
+      /* Collect reps to combine and all noderevs referencing them */
+      SVN_ERR(select_reps(context, i, node_parts, rep_parts));
+
+      /* store the noderevs container in front of the reps */
+      SVN_ERR(store_items(context, temp_file, node_parts, iterpool));
+      SVN_ERR(store_items(context, temp_file, rep_parts, iterpool));
+      
+      /* processed all items */
+      apr_array_clear(node_parts);
+      apr_array_clear(rep_parts);
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+/* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
+ * a newer revision.
+ */
+static int
+compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,
+                     const svn_fs_fs__p2l_entry_t * const * rhs_p)
+{
+  const svn_fs_fs__p2l_entry_t * lhs = *lhs_p;
+  const svn_fs_fs__p2l_entry_t * rhs = *rhs_p;
+
+  if (lhs->item.revision == rhs->item.revision)
+    return 0;
+
+  return lhs->item.revision < rhs->item.revision ? -1 : 1;
+}
+
+/* Write the log-to-phys proto index file for CONTEXT and use POOL for
+ * temporary allocations.  All items in all buckets must have been placed
+ * by now.
+ */
+static svn_error_t *
+write_l2p_index(pack_context_t *context,
+                apr_pool_t *pool)
+{
+  apr_pool_t *iterpool = svn_pool_create(pool);
+  svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
+  int i, dest;
+
+  /* eliminate empty entries from CONTEXT->REPS */
+  for (i = 0, dest = 0; i < context->reps->nelts; ++i)
+    {
+      svn_fs_fs__p2l_entry_t *entry
+        = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
+      if (entry)
+        APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *)
+          = entry;
+    }
+  context->reps->nelts = dest;
+
+  /* we need to write the l2p index revision by revision */
+  qsort(context->reps->elts, context->reps->nelts, sizeof(void*),
+        (int (*)(const void *, const void *))compare_p2l_info_rev);
+
+  /* write index entries */
+  for (i = 0; i < context->reps->nelts; ++i)
+    {
+      svn_fs_fs__p2l_entry_t *p2l_entry
+        = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
+      if (p2l_entry == NULL)
+        continue;
+
+      /* next revision? */
+      if (prev_rev != p2l_entry->item.revision)
+        {
+          prev_rev = p2l_entry->item.revision;
+          SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(
+                       context->proto_l2p_index, iterpool));
+        }
+
+      /* add entry */
+      SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index,
+                                                   p2l_entry->offset,
+                                                   p2l_entry->item.number,
+                                                   iterpool));
+
+      /* keep memory usage in check */
+      if (i % 256 == 0)
+        svn_pool_clear(iterpool);
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+/* Pack the current revision range of CONTEXT, i.e. this covers phases 2
+ * to 4.  Use POOL for allocations.
+ */
+static svn_error_t *
+pack_range(pack_context_t *context,
+           apr_pool_t *pool)
+{
+  apr_pool_t *revpool = svn_pool_create(pool);
+  apr_pool_t *iterpool = svn_pool_create(pool);
+  apr_pool_t *iterpool2 = svn_pool_create(pool);
+
+  /* Phase 2: Copy items into various buckets and build tracking info */
+  svn_revnum_t revision;
+  for (revision = context->start_rev; revision < context->end_rev; ++revision)
+    {
+      apr_off_t offset = 0;
+      apr_finfo_t finfo;
+      svn_fs_fs__revision_file_t *rev_file;
+      const char *path;
+
+      svn_pool_clear(revpool);
+
+      /* Get the size of the file. */
+      path = svn_dirent_join(context->shard_dir,
+                             apr_psprintf(revpool, "%ld", revision),
+                             revpool);
+      SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, revpool));
+      SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
+                                               revision, revpool));
+
+      /* store the indirect array index */
+      APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
+  
+      /* read the phys-to-log index file until we covered the whole rev file.
+       * That index contains enough info to build both target indexes from it. */
+      while (offset < finfo.size)
+        {
+          /* read one cluster */
+          int i;
+          apr_array_header_t *entries;
+
+          svn_pool_clear(iterpool);
+
+          SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs,
+                                              rev_file, revision, offset,
+                                              iterpool));
+
+          for (i = 0; i < entries->nelts; ++i)
+            {
+              svn_fs_fs__p2l_entry_t *entry
+                = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+
+              /* skip first entry if that was duplicated due crossing a
+                 cluster boundary */
+              if (offset > entry->offset)
+                continue;
+
+              svn_pool_clear(iterpool2);
+
+              /* process entry while inside the rev file */
+              offset = entry->offset;
+              if (offset < finfo.size)
+                {
+                  SVN_ERR(svn_io_file_seek(rev_file->file, SEEK_SET, &offset,
+                                           iterpool2));
+
+                  if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
+                    SVN_ERR(copy_item_to_temp(context,
+                                              context->changes,
+                                              context->changes_file,
+                                              rev_file->file, entry,
+                                              iterpool2));
+                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
+                    SVN_ERR(copy_item_to_temp(context,
+                                              context->file_props,
+                                              context->file_props_file,
+                                              rev_file->file, entry,
+                                              iterpool2));
+                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
+                    SVN_ERR(copy_item_to_temp(context,
+                                              context->dir_props,
+                                              context->dir_props_file,
+                                              rev_file->file, entry,
+                                              iterpool2));
+                  else if (   entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP
+                           || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
+                    SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry,
+                                             iterpool2));
+                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
+                    SVN_ERR(copy_node_to_temp(context, rev_file->file, entry,
+                                              iterpool2));
+                  else
+                    SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED);
+
+                  offset += entry->size;
+                }
+            }
+
+          if (context->cancel_func)
+            SVN_ERR(context->cancel_func(context->cancel_baton));
+        }
+    }
+
+  svn_pool_destroy(iterpool2);
+  svn_pool_destroy(iterpool);
+
+  /* phase 3: placement.
+   * Use "newest first" placement for simple items. */
+  sort_items(context->changes);
+  sort_items(context->file_props);
+  sort_items(context->dir_props);
+
+  /* follow dependencies recursively for noderevs and data representations */
+  sort_reps(context);
+
+  /* phase 4: copy bucket data to pack file.  Write P2L index. */
+  SVN_ERR(store_items(context, context->changes_file, context->changes,
+                      revpool));
+  svn_pool_clear(revpool);
+  SVN_ERR(store_items(context, context->file_props_file, context->file_props,
+                      revpool));
+  svn_pool_clear(revpool);
+  SVN_ERR(store_items(context, context->dir_props_file, context->dir_props,
+                      revpool));
+  svn_pool_clear(revpool);
+  SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
+  svn_pool_clear(revpool);
+
+  /* write L2P index as well (now that we know all target offsets) */
+  SVN_ERR(write_l2p_index(context, revpool));
+
+  svn_pool_destroy(revpool);
+  
+  return SVN_NO_ERROR;
+}
+
+/* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
+ * This function will only be used for very large revisions (>>100k changes).
+ * Use POOL for temporary allocations.
+ */
+static svn_error_t *
+append_revision(pack_context_t *context,
+                apr_pool_t *pool)
+{
+  apr_off_t offset = 0;
+  apr_pool_t *iterpool = svn_pool_create(pool);
+  svn_fs_fs__revision_file_t *rev_file;
+  apr_finfo_t finfo;
+
+  /* Get the size of the file. */
+  const char *path = svn_dirent_join(context->shard_dir,
+                                     apr_psprintf(iterpool, "%ld",
+                                                  context->start_rev),
+                                     pool);
+  SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, pool));
+
+  /* Copy all the bits from the rev file to the end of the pack file. */
+  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
+                                           context->start_rev, pool));
+  SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
+                         finfo.size, iterpool));
+
+  /* mark the start of a new revision */
+  SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index,
+                                                  pool));
+
+  /* read the phys-to-log index file until we covered the whole rev file.
+   * That index contains enough info to build both target indexes from it. */
+  while (offset < finfo.size)
+    {
+      /* read one cluster */
+      int i;
+      apr_array_header_t *entries;
+
+      svn_pool_clear(iterpool);
+      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file,
+                                          context->start_rev, offset,
+                                          iterpool));
+
+      for (i = 0; i < entries->nelts; ++i)
+        {
+          svn_fs_fs__p2l_entry_t *entry
+            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+
+          /* skip first entry if that was duplicated due crossing a
+             cluster boundary */
+          if (offset > entry->offset)
+            continue;
+
+          /* process entry while inside the rev file */
+          offset = entry->offset;
+          if (offset < finfo.size)
+            {
+              entry->offset += context->pack_offset;
+              offset += entry->size;
+              SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(
+                         context->proto_l2p_index, entry->offset,
+                         entry->item.number, iterpool));
+              SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
+                         context->proto_p2l_index, entry, iterpool));
+            }
+        }
+    }
+
+  svn_pool_destroy(iterpool);
+  context->pack_offset += finfo.size;
+
+  return SVN_NO_ERROR;
+}
+
+/* Logical addressing mode packing logic.
+ *
+ * Pack the revision shard starting at SHARD_REV in filesystem FS from
+ * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations.  Limit
+ * the extra memory consumption to MAX_MEM bytes.  CANCEL_FUNC and
+ * CANCEL_BATON are what you think they are.
+ */
+static svn_error_t *
+pack_log_addressed(svn_fs_t *fs,
+                   const char *pack_file_dir,
+                   const char *shard_dir,
+                   svn_revnum_t shard_rev,
+                   apr_size_t max_mem,
+                   svn_cancel_func_t cancel_func,
+                   void *cancel_baton,
+                   apr_pool_t *pool)
+{
+  enum
+    {
+      /* estimated amount of memory used to represent one item in memory
+       * during rev file packing */
+      PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
+                   + APR_ALIGN_DEFAULT(2 *sizeof(void*))
+                   + APR_ALIGN_DEFAULT(sizeof(reference_t))
+                   + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t))
+                   + 6 * sizeof(void*)
+    };
+
+  apr_size_t max_items = max_mem / PER_ITEM_MEM;
+  apr_array_header_t *max_ids;
+  pack_context_t context = { 0 };
+  int i;
+  apr_size_t item_count = 0;
+  apr_pool_t *iterpool = svn_pool_create(pool);
+
+  /* set up a pack context */
+  SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
+                                  shard_rev, max_items, cancel_func,
+                                  cancel_baton, pool));
+
+  /* phase 1: determine the size of the revisions to pack */
+  SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev,
+                                     context.shard_end_rev - shard_rev,
+                                     pool));
+
+  /* pack revisions in ranges that don't exceed MAX_MEM */
+  for (i = 0; i < max_ids->nelts; ++i)
+    if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
+      {
+        context.end_rev++;
+      }
+    else
+      {
+        svn_pool_clear(iterpool);
+
+        /* some unpacked revisions before this one? */
+        if (context.start_rev < context.end_rev)
+          {
+            /* pack them intelligently (might be just 1 rev but still ...) */
+            SVN_ERR(pack_range(&context, iterpool));
+            SVN_ERR(reset_pack_context(&context, iterpool));
+            item_count = 0;
+          }
+
+        /* next revision range is to start with the current revision */
+        context.start_rev = i + context.shard_rev;
+        context.end_rev = context.start_rev + 1;
+
+        /* if this is a very large revision, we must place it as is */
+        if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
+          {
+            SVN_ERR(append_revision(&context, iterpool));
+            context.start_rev++;
+          }
+        else
+          item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
+      }
+
+  /* non-empty revision range at the end? */
+  if (context.start_rev < context.end_rev)
+    SVN_ERR(pack_range(&context, iterpool));
+
+  /* last phase: finalize indexes and clean up */
+  SVN_ERR(reset_pack_context(&context, iterpool));
+  SVN_ERR(close_pack_context(&context, iterpool));
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
 /* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
    Use POOL for temporary allocations. */
 svn_error_t *
@@ -151,7 +1494,8 @@ svn_fs_fs__get_packed_offset(apr_off_t *
   return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool);
 }
 
-/* Packing logic:  Simply concatenate all revision contents.
+/* Packing logic for physical addresssing mode:
+ * Simply concatenate all revision contents.
  * 
  * Pack the revision shard starting at SHARD_REV containing exactly
  * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
@@ -262,9 +1606,13 @@ pack_rev_shard(svn_fs_t *fs,
   SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool));
 
   /* Index information files */
-  SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
-                              max_files_per_dir, cancel_func,
-                              cancel_baton, pool));
+  if (svn_fs_fs__use_log_addressing(fs, shard_rev))
+    SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
+                               max_mem, cancel_func, cancel_baton, pool));
+  else
+    SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
+                                max_files_per_dir, cancel_func,
+                                cancel_baton, pool));
   
   SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool));
   SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool));
@@ -319,7 +1667,7 @@ pack_shard(const char *revs_dir,
 
   /* pack the revision content */
   SVN_ERR(pack_rev_shard(fs, rev_pack_file_dir, rev_shard_path,
-                         shard, max_files_per_dir, 64 * 1024 * 1024,
+                         shard, max_files_per_dir, DEFAULT_MAX_MEM,
                          cancel_func, cancel_baton, pool));
 
   /* if enabled, pack the revprops in an equivalent way */

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.h
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.h?rev=1547260&r1=1547259&r2=1547260&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.h (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/pack.h Tue Dec  3 01:43:05 2013
@@ -52,11 +52,13 @@ svn_fs_fs__get_packed_offset(apr_off_t *
 
 /* Return the svn_dir_entry_t* objects of DIRECTORY in an APR array
  * allocated in POOL with entries added in storage (on-disk) order.
- * FS format will be used to pick the optimal ordering strategy.
+ * FS format and the directory's REVISION number will be used to pick
+ * the optimal ordering strategy.
  */
 apr_array_header_t *
 svn_fs_fs__order_dir_entries(svn_fs_t *fs,
                              apr_hash_t *directory,
+                             svn_revnum_t revision,
                              apr_pool_t *pool);
 
 

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/recovery.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/recovery.c?rev=1547260&r1=1547259&r2=1547260&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/recovery.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/recovery.c Tue Dec  3 01:43:05 2013
@@ -26,6 +26,7 @@
 #include "svn_pools.h"
 #include "private/svn_string_private.h"
 
+#include "index.h"
 #include "low_level.h"
 #include "rep-cache.h"
 #include "revprops.h"
@@ -53,7 +54,8 @@ recover_get_largest_revision(svn_fs_t *f
   while (1)
     {
       svn_error_t *err;
-      apr_file_t *file;
+      svn_fs_fs__revision_file_t *file;
+      svn_pool_clear(iterpool);
       svn_pool_clear(iterpool);
 
       err = svn_fs_fs__open_pack_or_rev_file(&file, fs, right, iterpool);
@@ -76,7 +78,8 @@ recover_get_largest_revision(svn_fs_t *f
     {
       svn_revnum_t probe = left + ((right - left) / 2);
       svn_error_t *err;
-      apr_file_t *file;
+      svn_fs_fs__revision_file_t *file;
+      svn_pool_clear(iterpool);
       svn_pool_clear(iterpool);
 
       err = svn_fs_fs__open_pack_or_rev_file(&file, fs, probe, iterpool);
@@ -144,7 +147,7 @@ read_handler_recover(void *baton, char *
 static svn_error_t *
 recover_find_max_ids(svn_fs_t *fs,
                      svn_revnum_t rev,
-                     apr_file_t *rev_file,
+                     svn_fs_fs__revision_file_t *rev_file,
                      apr_off_t offset,
                      apr_uint64_t *max_node_id,
                      apr_uint64_t *max_copy_id,
@@ -158,8 +161,8 @@ recover_find_max_ids(svn_fs_t *fs,
   apr_pool_t *iterpool;
   node_revision_t *noderev;
 
-  baton.stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
-  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool));
+  baton.stream = rev_file->stream;
+  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, pool));
   SVN_ERR(svn_fs_fs__read_noderev(&noderev, baton.stream, pool));
 
   /* Check that this is a directory.  It should be. */
@@ -180,8 +183,9 @@ recover_find_max_ids(svn_fs_t *fs,
 
   /* We could use get_dir_contents(), but this is much cheaper.  It does
      rely on directory entries being stored as PLAIN reps, though. */
-  offset = noderev->data_rep->offset;
-  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool));
+  SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL,
+                                 noderev->data_rep->item_index, pool));
+  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, pool));
   SVN_ERR(svn_fs_fs__read_rep_header(&header, baton.stream, pool));
   if (header->type != svn_fs_fs__rep_plain)
     return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
@@ -209,7 +213,7 @@ recover_find_max_ids(svn_fs_t *fs,
       char *str;
       svn_node_kind_t kind;
       svn_fs_id_t *id;
-      const svn_fs_fs__id_part_t *rev_offset;
+      const svn_fs_fs__id_part_t *rev_item;
       apr_uint64_t node_id, copy_id;
       apr_off_t child_dir_offset;
       const svn_string_t *path = svn__apr_hash_index_val(hi);
@@ -240,8 +244,8 @@ recover_find_max_ids(svn_fs_t *fs,
 
       id = svn_fs_fs__id_parse(str, strlen(str), iterpool);
 
-      rev_offset = svn_fs_fs__id_rev_offset(id);
-      if (rev_offset->revision != rev)
+      rev_item = svn_fs_fs__id_rev_item(id);
+      if (rev_item->revision != rev)
         {
           /* If the node wasn't modified in this revision, we've already
              checked the node and copy id. */
@@ -259,7 +263,9 @@ recover_find_max_ids(svn_fs_t *fs,
       if (kind == svn_node_file)
         continue;
 
-      child_dir_offset = rev_offset->number;
+      SVN_ERR(svn_fs_fs__item_offset(&child_dir_offset, fs,
+                                     rev_file, rev, NULL, rev_item->number,
+                                     iterpool));
       SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset,
                                    max_node_id, max_copy_id, iterpool));
     }
@@ -277,19 +283,20 @@ svn_fs_fs__find_max_ids(svn_fs_t *fs,
 {
   fs_fs_data_t *ffd = fs->fsap_data;
   apr_off_t root_offset;
-  apr_file_t *rev_file;
+  svn_fs_fs__revision_file_t *rev_file;
   svn_fs_id_t *root_id;
 
   /* call this function for old repo formats only */
   SVN_ERR_ASSERT(ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT);
 
   SVN_ERR(svn_fs_fs__rev_get_root(&root_id, fs, youngest, pool));
-  root_offset = svn_fs_fs__id_offset(root_id);
+  SVN_ERR(svn_fs_fs__item_offset(&root_offset, fs, rev_file, youngest, NULL,
+                                 svn_fs_fs__id_item(root_id), pool));
 
   SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, youngest, pool));
   SVN_ERR(recover_find_max_ids(fs, youngest, rev_file, root_offset,
                                max_node_id, max_copy_id, pool));
-  SVN_ERR(svn_io_file_close(rev_file, pool));
+  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
 
   return SVN_NO_ERROR;
 }

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/rep-cache.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/rep-cache.c?rev=1547260&r1=1547259&r2=1547260&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/rep-cache.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/rep-cache.c Tue Dec  3 01:43:05 2013
@@ -204,7 +204,7 @@ svn_fs_fs__walk_rep_reference(svn_fs_t *
       rep->has_sha1 = TRUE;
       memcpy(rep->sha1_digest, checksum->digest, sizeof(rep->sha1_digest));
       rep->revision = svn_sqlite__column_revnum(stmt, 1);
-      rep->offset = svn_sqlite__column_int64(stmt, 2);
+      rep->item_index = svn_sqlite__column_int64(stmt, 2);
       rep->size = svn_sqlite__column_int64(stmt, 3);
       rep->expanded_size = svn_sqlite__column_int64(stmt, 4);
 
@@ -259,7 +259,7 @@ svn_fs_fs__get_rep_reference(representat
              sizeof((*rep)->sha1_digest));
       (*rep)->has_sha1 = TRUE;
       (*rep)->revision = svn_sqlite__column_revnum(stmt, 0);
-      (*rep)->offset = svn_sqlite__column_int64(stmt, 1);
+      (*rep)->item_index = svn_sqlite__column_int64(stmt, 1);
       (*rep)->size = svn_sqlite__column_int64(stmt, 2);
       (*rep)->expanded_size = svn_sqlite__column_int64(stmt, 3);
     }
@@ -301,7 +301,7 @@ svn_fs_fs__set_rep_reference(svn_fs_t *f
   SVN_ERR(svn_sqlite__bindf(stmt, "siiii",
                             svn_checksum_to_cstring(&checksum, pool),
                             (apr_int64_t) rep->revision,
-                            (apr_int64_t) rep->offset,
+                            (apr_int64_t) rep->item_index,
                             (apr_int64_t) rep->size,
                             (apr_int64_t) rep->expanded_size));
 
@@ -324,7 +324,7 @@ svn_fs_fs__set_rep_reference(svn_fs_t *f
       if (old_rep)
         {
           if (reject_dup && ((old_rep->revision != rep->revision)
-                             || (old_rep->offset != rep->offset)
+                             || (old_rep->item_index != rep->item_index)
                              || (old_rep->size != rep->size)
                              || (old_rep->expanded_size != rep->expanded_size)))
             return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
@@ -337,9 +337,9 @@ svn_fs_fs__set_rep_reference(svn_fs_t *f
                               SVN_FILESIZE_T_FMT, APR_OFF_T_FMT,
                               SVN_FILESIZE_T_FMT, SVN_FILESIZE_T_FMT),
                  svn_checksum_to_cstring_display(&checksum, pool),
-                 fs->path, old_rep->revision, old_rep->offset,
+                 fs->path, old_rep->revision, old_rep->item_index,
                  old_rep->size, old_rep->expanded_size, rep->revision,
-                 rep->offset, rep->size, rep->expanded_size);
+                 rep->item_index, rep->size, rep->expanded_size);
           else
             return SVN_NO_ERROR;
         }

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/structure
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/structure?rev=1547260&r1=1547259&r2=1547260&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/structure (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/structure Tue Dec  3 01:43:05 2013
@@ -37,6 +37,8 @@ repository) is:
     <shard>.pack/     Pack directory, if the repo has been packed (see below)
       pack            Pack file, if the repository has been packed (see below)
       manifest        Pack manifest file, if a pack file exists (see below)
+      pack.l2p        Log-to-phys index file (format 7+, see below)
+      pack.p2l        Phys-to-log index file (format 7+, see below)
   revprops/           Subdirectory containing rev-props
     <shard>/          Shard directory, if sharding is in use (see below)
       <revnum>        File containing rev-props for <revnum>
@@ -138,6 +140,7 @@ The formats are:
   Format 4, understood by Subversion 1.6+
   Format 5, understood by Subversion 1.7-dev, never released
   Format 6, understood by Subversion 1.8
+  Format 7, understood by Subversion 1.9
 
 The differences between the formats are:
 
@@ -148,6 +151,7 @@ Delta representation in revision files
 Format options
   Formats 1-2: none permitted
   Format 3+:   "layout" option
+  Format 7+:   "addressing" option
 
 Transaction name reuse
   Formats 1-2: transaction names may be reused
@@ -183,15 +187,21 @@ Shard packing:
   Format 6+:  Applied equally to revision data and revprop data
     (i.e. same min packed revision)
 
+Addressing:
+  Format 1-6: Physical addressing; uses fixed positions within a rev file
+  Format 7+:  Logical addressing; uses item index that will be translated
+    on-the-fly to the actual rev / pack file location
+
 # Incomplete list.  See SVN_FS_FS__MIN_*_FORMAT
 
 
 Filesystem format options
 -------------------------
 
-Currently, the only recognised format option is "layout", which
-specifies the paths that will be used to store the revision files and
-revision property files.
+Currently, the only recognised format options are "layout" and "addressing".
+The first specifies the paths that will be used to store the revision
+files and revision property files.  The second specifies for which
+revisions address translation is required.
 
 The "layout" option is followed by the name of the filesystem layout
 and any required parameters.  The default layout, if no "layout"
@@ -219,19 +229,91 @@ The known layouts, and the parameters th
   revs/0/ directory will contain revisions 0-999, revs/1/ will contain
   1000-1999, and so on.
 
+The "addressing" option is followed by the name of the addressing mode
+and any required parameters.  The default addressing, if no "addressing"
+keyword is specified, is the 'physical' addressing.
+
+The supported modes, and the parameters they require, are as follows:
+
+"physical"
+  All existing and future revision files will use the traditional
+  physical addressing scheme.  All references are given as rev/offset
+  pairs with "offset" being the byte offset relative to the beginning of
+  the revision in the respective rev or pack file.
+
+"logical <first-revision-to-use-it>"
+  'first-revision-to-use-it' specifies the first revision to use logical
+  addressing, must coincide with the beginning of a shard and may be a
+  future revision.  All earlier revisions use physical addressing.  It is
+  illegal to use logical addressing on non-sharded repositories.
+
+
+Addressing modes
+----------------
+
+Two addressing modes are supported in format 7: physical and logical
+addressing.  Both use the same address format but apply a different
+interpretation to it.  Older formats only support physical addressing.
+
+All items are addressed using <rev> <item_index> pairs.  In physical
+addressing mode, item_index is the (ASCII decimal) number of bytes from
+the start of the revision file to the start of the respective item.  For
+non-packed files that is also the absolute file offset.  Revision pack
+files simply concatenate multiple rev files, i.e. the absolute file offset
+is determined as
+
+  absolute offset = rev offset taken from manifest + item_index
+  
+This simple addressing scheme makes it hard to change the location of
+any item since that may break references from later revisions.
+  
+Logical addressing uses an index file to translate the rev / item_index
+pairs into absolute file offsets.  There is one such index for every rev /
+pack file using logical addressing and both are created in sync.  That
+makes it possible to reorder items during pack file creation, particularly
+to mix items from different revisions.
+
+Some item_index values are pre-defined and apply to every revision:
+
+  0 ... not used / invalid
+  1 ... changed path list
+  2 ... root node revision
+
+A reverse index (phys-to-log) is being created as well that allows for
+translating arbitrary file locations into item descriptions (type, rev,
+item_index, on-disk length).  Known item types
+
+  0 ... unused / empty section
+  1 ... file representation
+  2 ... directory representation
+  3 ... file property representation
+  4 ... directory property representation
+  5 ... node revision
+  6 ... changed paths list
+
+The various representation types all share the same morphology.  The
+distinction is only made to allow for more effective reordering heuristics.
+Zero-length items are allowed.
+
+
 Packing revisions
 -----------------
 
 A filesystem can optionally be "packed" to conserve space on disk.  The
 packing process concatenates all the revision files in each full shard to
-create pack files.  A manifest file is also created for each shard which
+create a pack file.  The original shard is removed, and reads are
+redirected to the pack file.
+
+With physical addressing, a manifest file is created for each shard which
 records the indexes of the corresponding revision files in the pack file.
-In addition, the original shard is removed, and reads are redirected to the
-pack file.
+The manifest file consists of a list of offsets, one for each revision in
+the pack file.  The offsets are stored as ASCII decimal, and separated by
+a newline character.
+
+Revision pack files using logical addressing don't use manifest files but
+index files instead.  The revisions inside a pack file will also get
+interleaved to reduce I/O for typical access patterns.
 
-The manifest file consists of a list of offsets, one for each revision in the
-pack file.  The offsets are stored as ASCII decimal, and separated by a newline
-character.
 
 Packing revision properties (format 5: SQLite)
 ---------------------------
@@ -341,13 +423,12 @@ Within a new transaction:
 Within a revision:
 
   Within a revision file, node-revs have a txn-id field of the form
-  "r<rev>/<offset>", to support easy lookup. The <offset> is the (ASCII
-  decimal) number of bytes from the start of the revision file to the
-  start of the node-rev.
+  "r<rev>/<item_index>", to support easy lookup.  See addressing modes
+  for details.
 
   During the final phase of a commit, node-revision IDs are rewritten
   to have repository-wide unique node-ID and copy-ID fields, and to have
-  "r<rev>/<offset>" txn-id fields.
+  "r<rev>/<item_index>" txn-id fields.
 
   In Format 3 and above, this uniqueness is done by changing a temporary
   id of "_<base36>" to "<base36>-<rev>".  Note that this means that the
@@ -429,13 +510,13 @@ A revision file contains a concatenation
   * Text and property representations
   * Node-revisions
   * The changed-path data
-  * Two offsets at the very end
+  * Two offsets at the very end (physical addressing mode only)
 
 A representation begins with a line containing either "PLAIN\n" or
-"DELTA\n" or "DELTA <rev> <offset> <length>\n", where <rev>, <offset>,
-and <length> give the location of the delta base of the representation
-and the amount of data it contains (not counting the header or
-trailer).  If no base location is given for a delta, the base is the
+"DELTA\n" or "DELTA <rev> <item_index> <length>\n", where <rev>,
+<item_index>, and <length> give the location of the delta base of the
+representation and the amount of data it contains (not counting the header
+or trailer).  If no base location is given for a delta, the base is the
 empty stream.  After the initial line comes raw svndiff data, followed
 by a cosmetic trailer "ENDREP\n".
 
@@ -459,9 +540,9 @@ defined:
   type      "file" or "dir"
   pred      The ID of the predecessor node-rev
   count     Count of node-revs since the base of the node
-  text      "<rev> <offset> <length> <size> <digest>" for text rep
-  props     "<rev> <offset> <length> <size> <digest>" for props rep
-            <rev> and <offset> give location of rep
+  text      "<rev> <item_index> <length> <size> <digest>" for text rep
+  props     "<rev> <item_index> <length> <size> <digest>" for props rep
+            <rev> and <item_index> give location of rep
             <length> gives length of rep, sans header and trailer
             <size> gives size of expanded rep; may be 0 if equal
              to the length
@@ -489,7 +570,7 @@ of the copy; it may be omitted if the no
 of revision 0).  Copy roots are identified by revision and
 created-path, not by node-rev ID, because a copy root may be a
 node-rev which exists later on within the same revision file, meaning
-its offset is not yet known.
+its location is not yet known.
 
 The changed-path data is represented as a series of changed-path
 items, each consisting of two lines.  The first line has the format
@@ -507,10 +588,10 @@ Starting with FS format 4, <action> may 
 "dir") of the node, after a hyphen; for example, an added directory
 may be represented as "add-dir".
 
-At the very end of a rev file is a pair of lines containing
-"\n<root-offset> <cp-offset>\n", where <root-offset> is the offset of
-the root directory node revision and <cp-offset> is the offset of the
-changed-path data.
+In physical addressing mode, at the very end of a rev file is a pair of
+lines containing "\n<root-offset> <cp-offset>\n", where <root-offset> is
+the offset of the root directory node revision and <cp-offset> is the
+offset of the changed-path data.
 
 All numbers in the rev file format are unsigned and are represented as
 ASCII decimal.
@@ -533,7 +614,15 @@ In FS formats 1 and 2, it also contains:
   rev                        Prototype rev file with new text reps
   rev-lock                   Lockfile for writing to the above
 
-In newer formats, these files are in the txn-protorevs/ directory.
+(In newer formats, these files are in the txn-protorevs/ directory.)
+
+In format 7+ logical addressing mode, it contains two additional index
+files (see structure-indexes for a detailed description) and one more
+counter file:
+
+  itemidx                    Next item_index value as decimal integer
+  index.l2p                  Log-to-phys proto-index
+  index.p2l                  Phys-to-log proto-index
 
 The prototype rev file is used to store the text representations as
 they are received from the client.  To ensure that only one client is
@@ -545,7 +634,7 @@ file will always be present.  The "node.
 only be present if the node-rev properties have been changed.
 
 The <sha1> files have been introduced in FS format 6. Their content
-is that of text rep references: "<rev> <offset> <length> <size> <digest>"
+is that of text rep references: "<rev> <item_offset> <length> <size> <digest>"
 They will be written for text reps in the current transaction and be
 used to eliminate duplicate reps within that transaction.
 
@@ -619,3 +708,14 @@ reference the same path as above, but lo
 that file (instead of lock information).  Children are listed as MD5
 digests, too, so you would simply iterate over those digests and
 consult the files they reference for lock information.
+
+
+Index files
+-----------
+
+Format 7 introduces logical addressing that requires item indexes
+to be translated / mapped to physical rev / pack file offsets.
+
+Details of the binary format used by these index files can be
+found in structure-indexes.
+