You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2015/11/06 15:12:47 UTC

svn commit: r1712967 - in /subversion/trunk/subversion: include/private/svn_subr_private.h libsvn_fs_x/cached_data.c libsvn_fs_x/transaction.c libsvn_subr/compress.c

Author: stefan2
Date: Fri Nov  6 14:12:46 2015
New Revision: 1712967

URL: http://svn.apache.org/viewvc?rev=1712967&view=rev
Log:
Replace the hash-based in-repository representation of directories with one
that is tighter and easier to write as well as parse.  Moreover, it allows
for entry names to be any utf8 string.

The new format is:

  directory  := u(entry-count) entry*
  entry      := name-as-utf8 NUL u(node-type) noderev-id
  noderev-id := i(change-set) u(item-index)

  u(x) ... unsigned int x in 7b/8b encoding
  i(x) ... signed int x in 7b/8b encoding

Incremental directory representations work by appending new records and
applying them to a temporary hash when reading them.  No special separator
is needed - only the entry-count information will be ignored in that case.
Deleted entries are signified by an SVN_FS_X__INVALID_CHANGE_SET.

* subversion/include/private/svn_subr_private.h
  (svn__encode_int,
   svn__decode_int): Declare signed int wrappers for svn__encode_uint
                     and svn__decode_uint, respectively.

* subversion/libsvn_subr/compress.c
  (svn__encode_int,
   svn__decode_int): Implement them.

* subversion/libsvn_fs_x/cached_data.c
  (read_dir_entries): Remove old parser.
  (parse_dir_entries): The new parser with a slightly modified signature.
                       We now expect and consume a DATA buffer instead of
                       a STREAM and we create the result instead of just
                       populating it.
  (get_dir_contents): Update caller to provide the serialized contents,
                      allocated in the RESULT_POOL.

* subversion/libsvn_fs_x/transaction.c
  (unparse_dir_entry,
   unparse_dir_entries): Write the new, simpler format.
  (svn_fs_x__set_entry): Update the representation of deleted entries.

Modified:
    subversion/trunk/subversion/include/private/svn_subr_private.h
    subversion/trunk/subversion/libsvn_fs_x/cached_data.c
    subversion/trunk/subversion/libsvn_fs_x/transaction.c
    subversion/trunk/subversion/libsvn_subr/compress.c

Modified: subversion/trunk/subversion/include/private/svn_subr_private.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_subr_private.h?rev=1712967&r1=1712966&r2=1712967&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_subr_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_subr_private.h Fri Nov  6 14:12:46 2015
@@ -525,6 +525,16 @@ svn_version__at_least(svn_version_t *ver
 unsigned char *
 svn__encode_uint(unsigned char *p, apr_uint64_t val);
 
+/* Wrapper around svn__encode_uint using the LSB to store the sign:
+ *
+ * If VAL >= 0
+ *   UINT_VAL = 2 * VAL
+ * else
+ *   UINT_VAL = (- 2 * VAL) - 1
+ */
+unsigned char *
+svn__encode_int(unsigned char *p, apr_int64_t val);
+
 /* Decode an unsigned 7b/8b-encoded integer into *VAL and return a pointer
    to the byte after the integer.  The bytes to be decoded live in the
    range [P..END-1].  If these bytes do not contain a whole encoded
@@ -537,6 +547,14 @@ svn__decode_uint(apr_uint64_t *val,
                  const unsigned char *p,
                  const unsigned char *end);
 
+/* Wrapper around svn__decode_uint, reversing the transformation performed
+ * by svn__encode_int.
+ */
+const unsigned char *
+svn__decode_int(apr_int64_t *val,
+                const unsigned char *p,
+                const unsigned char *end);
+
 /* Compress the data from DATA with length LEN, it according to the
  * specified COMPRESSION_METHOD and write the result to OUT.
  * SVN__COMPRESSION_NONE is valid for COMPRESSION_METHOD.

Modified: subversion/trunk/subversion/libsvn_fs_x/cached_data.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/cached_data.c?rev=1712967&r1=1712966&r2=1712967&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/cached_data.c (original)
+++ subversion/trunk/subversion/libsvn_fs_x/cached_data.c Fri Nov  6 14:12:46 2015
@@ -2366,108 +2366,107 @@ compare_dirent_name(const void *a,
   return strcmp(lhs->name, rhs);
 }
 
-/* Into ENTRIES, read all directories entries from the key-value text in
- * STREAM.  If INCREMENTAL is TRUE, read until the end of the STREAM and
+/* Into ENTRIES, parse all directories entries from the serialized form in
+ * DATA.  If INCREMENTAL is TRUE, read until the end of the STREAM and
  * update the data.  ID is provided for nicer error messages.
+ *
+ * The contents of DATA will be shared with the items in ENTRIES, i.e. it
+ * must not be modified afterwards and must remain valid as long as ENTRIES
+ * is valid.  Use SCRATCH_POOL for temporary allocations.
  */
 static svn_error_t *
-read_dir_entries(apr_array_header_t *entries,
-                 svn_stream_t *stream,
-                 svn_boolean_t incremental,
-                 const svn_fs_x__id_t *id,
-                 apr_pool_t *result_pool,
-                 apr_pool_t *scratch_pool)
+parse_dir_entries(apr_array_header_t **entries_p,
+                  const svn_stringbuf_t *data,
+                  svn_boolean_t incremental,
+                  const svn_fs_x__id_t *id,
+                  apr_pool_t *result_pool,
+                  apr_pool_t *scratch_pool)
 {
-  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+  const apr_byte_t *p = (const apr_byte_t *)data->data;
+  const apr_byte_t *end = p + data->len;
+  apr_uint64_t count;
   apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
-  const char *terminator = SVN_HASH_TERMINATOR;
-
-  /* Read until the terminator (non-incremental) or the end of STREAM
-     (incremental mode).  In the latter mode, we use a temporary HASH
-     to make updating and removing entries cheaper. */
-  while (1)
-    {
-      svn_hash__entry_t entry;
-      svn_fs_x__dirent_t *dirent;
-      char *str;
+  apr_array_header_t *entries;
 
-      svn_pool_clear(iterpool);
-      SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
-                                   incremental, iterpool));
-
-      /* End of directory? */
-      if (entry.key == NULL)
-        {
-          /* In incremental mode, we skip the terminator and read the
-             increments following it until the end of the stream. */
-          if (incremental && terminator)
-            terminator = NULL;
-          else
-            break;
-        }
+  /* Construct the resulting container. */
+  p = svn__decode_uint(&count, p, end);
+  if (count > INT_MAX)
+    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                             _("Directory for '%s' is too large"),
+                             svn_fs_x__id_unparse(id, scratch_pool)->data);
 
-      /* Deleted entry? */
-      if (entry.val == NULL)
-        {
-          /* We must be in incremental mode */
-          assert(hash);
-          apr_hash_set(hash, entry.key, entry.keylen, NULL);
-          continue;
-        }
+  entries = apr_array_make(result_pool, (int)count,
+                           sizeof(svn_fs_x__dirent_t *));
 
-      /* Add a new directory entry. */
+  while (p != end)
+    {
+      apr_size_t len;
+      svn_fs_x__dirent_t *dirent;
       dirent = apr_pcalloc(result_pool, sizeof(*dirent));
-      dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
 
-      str = svn_cstring_tokenize(" ", &entry.val);
-      if (str == NULL)
+      /* The part of the serialized entry that is not the name will be
+       * about 6 bytes or less.  Since APR allocates with an 8 byte
+       * alignment (4 bytes loss on average per string), simply using
+       * the name string in DATA already gives us near-optimal memory
+       * usage. */
+      dirent->name = (const char *)p;
+      len = strlen(dirent->name);
+      p += len + 1;
+      if (p == end)
         return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
-                      _("Directory entry corrupt in '%s'"),
-                      svn_fs_x__id_unparse(id, scratch_pool)->data);
+                            _("Directory entry missing kind in '%s'"),
+                            svn_fs_x__id_unparse(id, scratch_pool)->data);
 
-      if (strcmp(str, SVN_FS_X__KIND_FILE) == 0)
-        {
-          dirent->kind = svn_node_file;
-        }
-      else if (strcmp(str, SVN_FS_X__KIND_DIR) == 0)
-        {
-          dirent->kind = svn_node_dir;
-        }
-      else
-        {
-          return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
-                      _("Directory entry corrupt in '%s'"),
-                      svn_fs_x__id_unparse(id, scratch_pool)->data);
-        }
+      dirent->kind = (svn_node_kind_t)*(p++);
+      if (p == end)
+        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                            _("Directory entry missing change set in '%s'"),
+                            svn_fs_x__id_unparse(id, scratch_pool)->data);
 
-      str = svn_cstring_tokenize(" ", &entry.val);
-      if (str == NULL)
+      p = svn__decode_int(&dirent->id.change_set, p, end);
+      if (p == end)
         return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
-                      _("Directory entry corrupt in '%s'"),
-                      svn_fs_x__id_unparse(id, scratch_pool)->data);
+                            _("Directory entry missing item number in '%s'"),
+                            svn_fs_x__id_unparse(id, scratch_pool)->data);
 
-      SVN_ERR(svn_fs_x__id_parse(&dirent->id, str));
+      p = svn__decode_uint(&dirent->id.number, p, end);
 
       /* In incremental mode, update the hash; otherwise, write to the
        * final array. */
       if (incremental)
-        apr_hash_set(hash, dirent->name, entry.keylen, dirent);
+        {
+          /* Insertion / update or a deletion? */
+          if (svn_fs_x__id_used(&dirent->id))
+            apr_hash_set(hash, dirent->name, len, dirent);
+          else
+            apr_hash_set(hash, dirent->name, len, NULL);
+        }
       else
-        APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent;
+        {
+          APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent;
+        }
     }
 
-  /* Convert container to a sorted array. */
   if (incremental)
     {
+      /* Convert container into a sorted array. */
       apr_hash_index_t *hi;
-      for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
+      for (hi = apr_hash_first(scratch_pool, hash); hi; hi = apr_hash_next(hi))
         APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = apr_hash_this_val(hi);
-    }
 
-  if (!sorted(entries))
-    svn_sort__array(entries, compare_dirents);
+      if (!sorted(entries))
+        svn_sort__array(entries, compare_dirents);
+    }
+  else
+    {
+      /* Check that we read the expected amount of entries. */
+      if ((apr_uint64_t)entries->nelts != count)
+        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                            _("Directory length mismatch in '%s'"),
+                            svn_fs_x__id_unparse(id, scratch_pool)->data);
+    }
 
-  svn_pool_destroy(iterpool);
+ *entries_p = entries;
 
   return SVN_NO_ERROR;
 }
@@ -2515,9 +2514,11 @@ get_dir_contents(svn_fs_x__dir_data_t *d
 {
   svn_stream_t *contents;
   const svn_fs_x__id_t *id = &noderev->noderev_id;
+  apr_size_t len;
+  svn_stringbuf_t *text;
+  svn_boolean_t incremental;
 
   /* Initialize the result. */
-  dir->entries = apr_array_make(result_pool, 16, sizeof(svn_fs_x__dirent_t *));
   dir->txn_filesize = SVN_INVALID_FILESIZE;
 
   /* Read dir contents - unless there is none in which case we are done. */
@@ -2539,31 +2540,40 @@ get_dir_contents(svn_fs_x__dir_data_t *d
 
       /* Obtain txn children file size. */
       SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
+      len = (apr_size_t)dir->txn_filesize;
 
+      /* Finally, provide stream access to FILE. */
       contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
-      SVN_ERR(read_dir_entries(dir->entries, contents, TRUE, id,
-                               result_pool, scratch_pool));
-      SVN_ERR(svn_stream_close(contents));
+      incremental = TRUE;
     }
   else if (noderev->data_rep)
     {
-      /* Undeltify content before parsing it. Otherwise, we could only
-       * parse it byte-by-byte.
-       */
-      apr_size_t len = noderev->data_rep->expanded_size;
-      svn_stringbuf_t *text;
-
       /* The representation is immutable.  Read it normally. */
+      len = noderev->data_rep->expanded_size;
       SVN_ERR(svn_fs_x__get_contents(&contents, fs, noderev->data_rep,
                                      FALSE, scratch_pool));
-      SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
-      SVN_ERR(svn_stream_close(contents));
-
-      /* de-serialize hash */
-      contents = svn_stream_from_stringbuf(text, scratch_pool);
-      SVN_ERR(read_dir_entries(dir->entries, contents, FALSE, id,
-                               result_pool, scratch_pool));
+      incremental = FALSE;
     }
+  else
+    {
+      /* Empty representation == empty directory. */
+      dir->entries = apr_array_make(result_pool, 0,
+                                    sizeof(svn_fs_x__dirent_t *));
+      return SVN_NO_ERROR;
+    }
+
+  /* Read the whole stream contents into a single buffer.
+   * Due to our LEN hint, no allocation overhead occurs.
+   *
+   * Also, a large portion of TEXT will be file / dir names which we
+   * directly reference from DIR->ENTRIES instead of copying them.
+   * Hence, we need to use the RESULT_POOL here. */
+  SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, result_pool));
+  SVN_ERR(svn_stream_close(contents));
+
+  /* de-serialize hash */
+  SVN_ERR(parse_dir_entries(&dir->entries, text, incremental, id,
+                            result_pool, scratch_pool));
 
   return SVN_NO_ERROR;
 }

Modified: subversion/trunk/subversion/libsvn_fs_x/transaction.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/transaction.c?rev=1712967&r1=1712966&r2=1712967&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_x/transaction.c (original)
+++ subversion/trunk/subversion/libsvn_fs_x/transaction.c Fri Nov  6 14:12:46 2015
@@ -877,61 +877,33 @@ unparse_dir_entry(svn_fs_x__dirent_t *di
                   apr_pool_t *scratch_pool)
 {
   apr_size_t to_write;
-  svn_string_t *id_str = svn_fs_x__id_unparse(&dirent->id, scratch_pool);
   apr_size_t name_len = strlen(dirent->name);
 
-  /* Note that sizeof == len + 1, i.e. accounts for the space between
-   * type and ID. */
-  apr_size_t type_len = (dirent->kind == svn_node_file)
-                      ? sizeof(SVN_FS_X__KIND_FILE)
-                      : sizeof(SVN_FS_X__KIND_DIR);
-  apr_size_t value_len = type_len + id_str->len;
-
   /* A buffer with sufficient space for 
-   * - both string lines
-   * - 4 newlines
-   * - 2 lines K/V lines containing a number each
+   * - entry name + 1 terminating NUL
+   * - 1 byte for the node kind
+   * - 2 numbers in 7b/8b encoding for the noderev-id
    */
-  char *buffer = apr_palloc(scratch_pool,   name_len + value_len
-                                          + 4
-                                          + 2 * (2 + SVN_INT64_BUFFER_SIZE));
+  apr_byte_t *buffer = apr_palloc(scratch_pool,
+                                  name_len + 2 + 2 * SVN__MAX_ENCODED_UINT_LEN);
 
   /* Now construct the value. */
-  char *p = buffer;
-
-  /* The "K length(name)\n" line. */
-  p[0] = 'K';
-  p[1] = ' ';
-  p += 2;
-  p += svn__i64toa(p, name_len);
-  *(p++) = '\n';
-
-  /* The line with the key, i.e. dir entry name. */
-  memcpy(p, dirent->name, name_len);
-  p += name_len;
-  *(p++) = '\n';
-
-  /* The "V length(type+id)\n" line. */
-  p[0] = 'V';
-  p[1] = ' ';
-  p += 2;
-  p += svn__i64toa(p, value_len);
-  *(p++) = '\n';
+  apr_byte_t *p = buffer;
 
-  /* The line with the type and ID. */
-  memcpy(p,
-         (dirent->kind == svn_node_file) ? SVN_FS_X__KIND_FILE
-                                         : SVN_FS_X__KIND_DIR,
-         type_len - 1);
-  p += type_len - 1;
-  *(p++) = ' ';
-  memcpy(p, id_str->data, id_str->len);
-  p+=id_str->len;
-  *(p++) = '\n';
+  /* The entry name, terminated by NUL. */
+  memcpy(p, dirent->name, name_len + 1);
+  p += name_len + 1;
+
+  /* The entry type. */
+  p = svn__encode_uint(p, dirent->kind);
+
+  /* The ID. */
+  p = svn__encode_int(p, dirent->id.change_set);
+  p = svn__encode_uint(p, dirent->id.number);
 
   /* Add the entry to the output stream. */
   to_write = p - buffer;
-  SVN_ERR(svn_stream_write(stream, buffer, &to_write));
+  SVN_ERR(svn_stream_write(stream, (const char *)buffer, &to_write));
 
   return SVN_NO_ERROR;
 }
@@ -943,8 +915,15 @@ unparse_dir_entries(apr_array_header_t *
                     svn_stream_t *stream,
                     apr_pool_t *scratch_pool)
 {
+  apr_byte_t buffer[SVN__MAX_ENCODED_UINT_LEN];
   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
   int i;
+
+  /* Write the number of entries. */
+  apr_size_t to_write = svn__encode_uint(buffer, entries->nelts) - buffer;
+  SVN_ERR(svn_stream_write(stream, (const char *)buffer, &to_write));
+
+  /* Write all entries */
   for (i = 0; i < entries->nelts; ++i)
     {
       svn_fs_x__dirent_t *dirent;
@@ -954,9 +933,6 @@ unparse_dir_entries(apr_array_header_t *
       SVN_ERR(unparse_dir_entry(dirent, stream, iterpool));
     }
 
-  SVN_ERR(svn_stream_printf(stream, scratch_pool, "%s\n",
-                            SVN_HASH_TERMINATOR));
-
   svn_pool_destroy(iterpool);
   return SVN_NO_ERROR;
 }
@@ -1832,6 +1808,7 @@ svn_fs_x__set_entry(svn_fs_t *fs,
   svn_fs_x__data_t *ffd = fs->fsap_data;
   apr_pool_t *subpool = svn_pool_create(scratch_pool);
   const svn_fs_x__id_t *key = &(parent_noderev->noderev_id);
+  svn_fs_x__dirent_t entry;
 
   if (!rep || !svn_fs_x__is_txn(rep->id.change_set))
     {
@@ -1912,21 +1889,17 @@ svn_fs_x__set_entry(svn_fs_t *fs,
         }
     }
 
-  /* Append an incremental hash entry for the entry change. */
+  /* Append an incremental hash entry for the entry change.
+     A deletion is represented by an "unused" noderev-id. */
   if (id)
-    {
-      svn_fs_x__dirent_t entry;
-      entry.name = name;
-      entry.id = *id;
-      entry.kind = kind;
-
-      SVN_ERR(unparse_dir_entry(&entry, out, subpool));
-    }
+    entry.id = *id;
   else
-    {
-      SVN_ERR(svn_stream_printf(out, subpool, "D %" APR_SIZE_T_FMT "\n%s\n",
-                                strlen(name), name));
-    }
+    svn_fs_x__id_reset(&entry.id);
+
+  entry.name = name;
+  entry.kind = kind;
+
+  SVN_ERR(unparse_dir_entry(&entry, out, subpool));
 
   /* Flush APR buffers. */
   SVN_ERR(svn_io_file_flush(file, subpool));

Modified: subversion/trunk/subversion/libsvn_subr/compress.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/compress.c?rev=1712967&r1=1712966&r2=1712967&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/compress.c (original)
+++ subversion/trunk/subversion/libsvn_subr/compress.c Fri Nov  6 14:12:46 2015
@@ -82,6 +82,17 @@ svn__encode_uint(unsigned char *p, apr_u
   return p;
 }
 
+unsigned char *
+svn__encode_int(unsigned char *p, apr_int64_t val)
+{
+  apr_uint64_t value = val;
+  value = value & APR_UINT64_C(0x8000000000000000)
+        ? APR_UINT64_MAX - (2 * value)
+        : 2 * value;
+
+  return svn__encode_uint(p, value);
+}
+
 const unsigned char *
 svn__decode_uint(apr_uint64_t *val,
                  const unsigned char *p,
@@ -111,6 +122,22 @@ svn__decode_uint(apr_uint64_t *val,
   return NULL;
 }
 
+const unsigned char *
+svn__decode_int(apr_int64_t *val,
+                const unsigned char *p,
+                const unsigned char *end)
+{
+  apr_uint64_t value;
+  const unsigned char *result = svn__decode_uint(&value, p, end);
+
+  value = value & 1
+        ? (APR_UINT64_MAX - value / 2)
+        : value / 2;
+  *val = (apr_int64_t)value;
+
+  return result;
+}
+
 /* If IN is a string that is >= MIN_COMPRESS_SIZE and the COMPRESSION_LEVEL
    is not SVN_DELTA_COMPRESSION_LEVEL_NONE, zlib compress it and places the
    result in OUT, with an integer prepended specifying the original size.