You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2014/01/28 20:36:18 UTC

svn commit: r1562172 - in /subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs: cached_data.c cached_data.h dirent.h fs_fs.h temp_serializer.c temp_serializer.h transaction.c util.c

Author: brane
Date: Tue Jan 28 19:36:17 2014
New Revision: 1562172

URL: http://svn.apache.org/r1562172
Log:
On the fsfs-ucsnorm branch: Make serialization, deserialization, caching
and searching of directories aware of normalization-independent lookup.

We use a new in-memory (and in-cache) representation of a directory entry
which extends svn_fs_dirent_t, adding a (possibly normalized) key that may
be different from the dirent name. We populate the key when the directory is
parsed from disk, and drop it when it's unparsed again. The cache contains
a serialized key (iff it's different from the name), and we sort and search
arrays of directory entries by key, no longer by name.

The goal is to perform as few normalizations as possible, and only minimally
affect the size of the cache; in the usual case where entry names in the
repository are already normalized, the cached data uses only an additional
pointer per cached directory entry.

* subversion/libsvn_fs_fs/dirent.h: New file.
  (svn_fs_fs__dirent_t): New; a "subclass" of svn_fs_dirent_t.
  (svn_fs_fs__set_dirent_key):
   New; populates the key of a svn_fs_fs__dirent_t from an svn_fs_dirent_t.
* subversion/libsvn_fs_fs/fs_fs.h
  (svn_fs_fs__normalize): New; utility for normalizing a string.
* subversion/libsvn_fs_fs/util.c: Include svn_utf_private.h and dirent.h.
  (svn_fs_fs__dirent_t, svn_fs_fs__set_dirent_key): Implement.

* subversion/libsvn_fs_fs/cached_data.h: Include dirent.h.
  (svn_fs_fs__find_dir_entry,
   svn_fs_fs__rep_contents_dir_entry): Note normalization requirement.
    Change the return type to the subclassed entry type.
* subversion/libsvn_fs_fs/cached_data.c
  (sorted, compare_dirents, compare_dirent_name):
   Use keys, not names, for comparison.
  (read_dir_entries): Populate the (normalized) key when reading dirents.
  (get_dir_contents): Update calls of read_dir_entries.
  (svn_fs_fs__find_dir_entry):
   Change the return type to the subclassed entry type.
  (svn_fs_fs__rep_contents_dir_entry):
   Change the return type to the subclassed entry type.
   Propagate the entry key to the copied returned object.

* subversion/libsvn_fs_fs/temp_serializer.h: Include dirent.h.
  (svn_fs_fs__extract_dir_entry): Note that the reutrned object is
   a wrapped entry type.
  (replace_baton_t): Note that the 'name' member must be normalized.
   Change the type of the 'new_entry' member to a wrapped entry.
  (svn_fs_fs__replace_dir_entry): Note that the implementation
   expects a wrapped entry type.
* subversion/libsvn_fs_fs/temp_serializer.c
  (dir_data_t): Change the type of the 'entries' field.
  (serialize_dir_entry): Serialize a whole wrapped entry, including key.
  (serialize_dir): Operate on wrapped entries.
  (deserialize_dir): Operate on wrapped entries.
   Deserialize the wrapped entry's key, taking care to maintain the
   key == name invariant when the name and key are identical.
  (find_entry): Find entries by key, not by name.
  (svn_fs_fs__extract_dir_entry): Operate on wrapped entries,
   and deserialize the wrapped entry's key, taking care to maintain the
   key == name invariant when the name and key are identical.
  (slowly_replace_dir_entry): Operate on wrapped entries.
  (svn_fs_fs__replace_dir_entry): Operate on wrapped entries.

* subversion/libsvn_fs_fs/transaction.c
  (unparse_dir_entries): Operate on wrapped entries,
   but do not unparse the key.
  (svn_fs_fs__set_entry): Make sure that replace_baton_t::name contains
   a normalized name when normalized lookups are enabled, and that
   replace_baton_t::new_entry's key is defined.
  (write_final_rev): Operate on wrapped entries.

Added:
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/dirent.h   (with props)
Modified:
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.c
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.h
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/fs_fs.h
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.c
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.h
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/transaction.c
    subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/util.c

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.c?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.c Tue Jan 28 19:36:17 2014
@@ -1919,39 +1919,39 @@ svn_fs_fs__get_file_delta_stream(svn_txd
   return SVN_NO_ERROR;
 }
 
-/* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
+/* Return TRUE when all svn_fs_fs__dirent_t* in ENTRIES are already sorted
    by their respective name. */
 static svn_boolean_t
 sorted(apr_array_header_t *entries)
 {
   int i;
 
-  const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
+  const svn_fs_fs__dirent_t *const *dirents = (const void *)entries->elts;
   for (i = 0; i < entries->nelts-1; ++i)
-    if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
+    if (strcmp(dirents[i]->key, dirents[i+1]->key) > 0)
       return FALSE;
 
   return TRUE;
 }
 
-/* Compare the names of the two dirents given in **A and **B. */
+/* Compare the keys of the two dirents given in **A and **B. */
 static int
 compare_dirents(const void *a, const void *b)
 {
-  const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
-  const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
+  const svn_fs_fs__dirent_t *lhs = *((const svn_fs_fs__dirent_t * const *) a);
+  const svn_fs_fs__dirent_t *rhs = *((const svn_fs_fs__dirent_t * const *) b);
 
-  return strcmp(lhs->name, rhs->name);
+  return strcmp(lhs->key, rhs->key);
 }
 
-/* Compare the name of the dirents given in **A with the C string in *B. */
+/* Compare the key of the dirents given in **A with the C string in *B. */
 static int
 compare_dirent_name(const void *a, const void *b)
 {
-  const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
+  const svn_fs_fs__dirent_t *lhs = *((const svn_fs_fs__dirent_t * const *) a);
   const char *rhs = b;
 
-  return strcmp(lhs->name, rhs);
+  return strcmp(lhs->key, rhs);
 }
 
 /* Into ENTRIES, read all directories entries from the key-value text in
@@ -1963,6 +1963,7 @@ read_dir_entries(apr_array_header_t *ent
                  svn_stream_t *stream,
                  svn_boolean_t incremental,
                  const svn_fs_id_t *id,
+                 svn_boolean_t normalized_lookup,
                  apr_pool_t *result_pool,
                  apr_pool_t *scratch_pool)
 {
@@ -1976,7 +1977,7 @@ read_dir_entries(apr_array_header_t *ent
   while (1)
     {
       svn_hash__entry_t entry;
-      svn_fs_dirent_t *dirent;
+      svn_fs_fs__dirent_t *dirent;
       char *str;
 
       svn_pool_clear(iterpool);
@@ -2005,7 +2006,9 @@ read_dir_entries(apr_array_header_t *ent
 
       /* Add a new directory entry. */
       dirent = apr_pcalloc(result_pool, sizeof(*dirent));
-      dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
+      dirent->dirent.name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
+      SVN_ERR(svn_fs_fs__set_dirent_key(dirent, normalized_lookup,
+                                        result_pool, scratch_pool));
 
       str = svn_cstring_tokenize(" ", &entry.val);
       if (str == NULL)
@@ -2015,11 +2018,11 @@ read_dir_entries(apr_array_header_t *ent
 
       if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
         {
-          dirent->kind = svn_node_file;
+          dirent->dirent.kind = svn_node_file;
         }
       else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
         {
-          dirent->kind = svn_node_dir;
+          dirent->dirent.kind = svn_node_dir;
         }
       else
         {
@@ -2034,14 +2037,14 @@ read_dir_entries(apr_array_header_t *ent
                            _("Directory entry corrupt in '%s'"),
                            svn_fs_fs__id_unparse(id, scratch_pool)->data);
 
-      dirent->id = svn_fs_fs__id_parse(str, strlen(str), result_pool);
+      dirent->dirent.id = svn_fs_fs__id_parse(str, strlen(str), result_pool);
 
       /* In incremental mode, update the hash; otherwise, write to the
        * final array. */
       if (incremental)
         apr_hash_set(hash, entry.key, entry.keylen, dirent);
       else
-        APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
+        APR_ARRAY_PUSH(entries, svn_fs_fs__dirent_t *) = dirent;
     }
 
   /* Convert container to a sorted array. */
@@ -2049,7 +2052,7 @@ read_dir_entries(apr_array_header_t *ent
     {
       apr_hash_index_t *hi;
       for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
-        APR_ARRAY_PUSH(entries, svn_fs_dirent_t *)
+        APR_ARRAY_PUSH(entries, svn_fs_fs__dirent_t *)
           = svn__apr_hash_index_val(hi);
     }
 
@@ -2063,7 +2066,7 @@ read_dir_entries(apr_array_header_t *ent
 
 /* Fetch the contents of a directory into ENTRIES.  Values are stored
    as filename to string mappings; further conversion is necessary to
-   convert them into svn_fs_dirent_t values. */
+   convert them into svn_fs_fs__dirent_t values. */
 static svn_error_t *
 get_dir_contents(apr_array_header_t **entries,
                  svn_fs_t *fs,
@@ -2071,9 +2074,11 @@ get_dir_contents(apr_array_header_t **en
                  apr_pool_t *result_pool,
                  apr_pool_t *scratch_pool)
 {
+  const svn_boolean_t normalized_lookup =
+    ((fs_fs_data_t*)fs->fsap_data)->normalized_lookup;
   svn_stream_t *contents;
 
-  *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
+  *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_fs__dirent_t *));
   if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
     {
       const char *filename
@@ -2085,7 +2090,7 @@ get_dir_contents(apr_array_header_t **en
       SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
                                        scratch_pool));
       SVN_ERR(read_dir_entries(*entries, contents, TRUE,  noderev->id,
-                               result_pool, scratch_pool));
+                               normalized_lookup, result_pool, scratch_pool));
       SVN_ERR(svn_stream_close(contents));
     }
   else if (noderev->data_rep)
@@ -2109,7 +2114,7 @@ get_dir_contents(apr_array_header_t **en
       /* de-serialize hash */
       contents = svn_stream_from_stringbuf(text, text_pool);
       SVN_ERR(read_dir_entries(*entries, contents, FALSE,  noderev->id,
-                               result_pool, scratch_pool));
+                               normalized_lookup, result_pool, scratch_pool));
 
       svn_pool_destroy(text_pool);
     }
@@ -2191,18 +2196,18 @@ svn_fs_fs__rep_contents_dir(apr_array_he
   return SVN_NO_ERROR;
 }
 
-svn_fs_dirent_t *
+svn_fs_fs__dirent_t *
 svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
                           const char *name,
                           int *hint)
 {
-  svn_fs_dirent_t **result
+  svn_fs_fs__dirent_t **result
     = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
   return result ? *result : NULL;
 }
 
 svn_error_t *
-svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
+svn_fs_fs__rep_contents_dir_entry(svn_fs_fs__dirent_t **dirent,
                                   svn_fs_t *fs,
                                   node_revision_t *noderev,
                                   const char *name,
@@ -2232,8 +2237,8 @@ svn_fs_fs__rep_contents_dir_entry(svn_fs
   if (! found)
     {
       apr_array_header_t *entries;
-      svn_fs_dirent_t *entry;
-      svn_fs_dirent_t *entry_copy = NULL;
+      svn_fs_fs__dirent_t *entry;
+      svn_fs_fs__dirent_t *entry_copy = NULL;
 
       /* read the dir from the file system. It will probably be put it
          into the cache for faster lookup in future calls. */
@@ -2244,10 +2249,18 @@ svn_fs_fs__rep_contents_dir_entry(svn_fs
       entry = svn_fs_fs__find_dir_entry(entries, name, NULL);
       if (entry)
         {
+          svn_fs_dirent_t *dirent_copy;
+
           entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
-          entry_copy->name = apr_pstrdup(result_pool, entry->name);
-          entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
-          entry_copy->kind = entry->kind;
+          dirent_copy = &entry_copy->dirent;
+          dirent_copy->name = apr_pstrdup(result_pool, entry->dirent.name);
+          dirent_copy->id = svn_fs_fs__id_copy(entry->dirent.id, result_pool);
+          dirent_copy->kind = entry->dirent.kind;
+
+          if (entry->key != entry->dirent.name)
+            entry_copy->key = apr_pstrdup(result_pool, entry->key);
+          else
+            entry_copy->key = dirent_copy->name;
         }
 
       *dirent = entry_copy;

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.h
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.h?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.h (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/cached_data.h Tue Jan 28 19:36:17 2014
@@ -27,6 +27,7 @@
 #include "svn_fs.h"
 
 #include "fs.h"
+#include "dirent.h"
 
 
 
@@ -115,7 +116,8 @@ svn_fs_fs__rep_contents_dir(apr_array_he
    entry exists, return NULL.  If HINT is not NULL, set *HINT to the array
    index of the entry returned.  Successive calls in a linear scan scenario
    will be faster called with the same HINT variable. */
-svn_fs_dirent_t *
+/* UCSNORM TODO: NAME must always be the (normalized) entry key. */
+svn_fs_fs__dirent_t *
 svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
                           const char *name,
                           int *hint);
@@ -124,8 +126,9 @@ svn_fs_fs__find_dir_entry(apr_array_head
    by NODEREV in filesystem FS.  If no such entry exits, *DIRENT will
    be NULL. The returned object is allocated in RESULT_POOL; SCRATCH_POOL
    used for temporary allocations. */
+/* UCSNORM TODO: NAME must always be the (normalized) entry key. */
 svn_error_t *
-svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
+svn_fs_fs__rep_contents_dir_entry(svn_fs_fs__dirent_t **dirent,
                                   svn_fs_t *fs,
                                   node_revision_t *noderev,
                                   const char *name,

Added: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/dirent.h
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/dirent.h?rev=1562172&view=auto
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/dirent.h (added)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/dirent.h Tue Jan 28 19:36:17 2014
@@ -0,0 +1,60 @@
+/* dirent.h : utilities for normalization-independent path lookup
+ *
+ * ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+#ifndef SVN_LIBSVN_FS__DIRENT_H
+#define SVN_LIBSVN_FS__DIRENT_H
+
+#include "fs.h"
+
+/* Array item used for directory contents. */
+typedef struct svn_fs_fs__dirent_t
+{
+  /* The wrapped public directory entry.
+   *
+   * It must always be the first member of this structure, so that a
+   * pointer to an svn_fs_fs__dirent_t can be reinterpreted as a
+   * pointer to an svn_fs_dirent_t.
+   */
+  svn_fs_dirent_t dirent;
+
+  /* The directory entry key.
+   *
+   * When normalized lookup is disabled, or dirent.name is already
+   * normalized, this the same pointer value as dirent.name.
+   * Otherwise, it is its normalized form.
+   */
+  const char *key;
+} svn_fs_fs__dirent_t;
+
+
+/* Given a directory entry with a valid DIRENT->dirent.name, set
+   DIRENT->key according to specification. DIRENT *must* be allocated
+   in RESULT_POOL, and DIRENT->key will be, too.
+
+   Use SCRATCH_POOL for temporary allocations. */
+svn_error_t *
+svn_fs_fs__set_dirent_key(svn_fs_fs__dirent_t *dirent,
+                          svn_boolean_t normalized,
+                          apr_pool_t *result_pool,
+                          apr_pool_t *scratch_pool);
+
+#endif /* SVN_LIBSVN_FS__DIRENT_H */

Propchange: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/dirent.h
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/fs_fs.h
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/fs_fs.h?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/fs_fs.h (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/fs_fs.h Tue Jan 28 19:36:17 2014
@@ -212,4 +212,8 @@ svn_fs_fs__initialize_txn_caches(svn_fs_
 void
 svn_fs_fs__reset_txn_caches(svn_fs_t *fs);
 
-#endif
+/* Set *NORMSTR to a normalized form of STR, allocated from POOL. */
+svn_error_t *
+svn_fs_fs__normalize(const char **normstr, const char *str, apr_pool_t *pool);
+
+#endif /* SVN_LIBSVN_FS__FS_FS_H */

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.c?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.c Tue Jan 28 19:36:17 2014
@@ -166,7 +166,7 @@ typedef struct dir_data_t
   apr_size_t len;
 
   /* reference to the entries */
-  svn_fs_dirent_t **entries;
+  svn_fs_fs__dirent_t **entries;
 
   /* size of the serialized entries and don't be too wasteful
    * (needed since the entries are no longer in sequence) */
@@ -179,18 +179,26 @@ typedef struct dir_data_t
  */
 static void
 serialize_dir_entry(svn_temp_serializer__context_t *context,
-                    svn_fs_dirent_t **entry_p,
+                    svn_fs_fs__dirent_t **entry_p,
                     apr_uint32_t *length)
 {
-  svn_fs_dirent_t *entry = *entry_p;
+  static const char *const empty = "";
+  svn_fs_fs__dirent_t *entry = *entry_p;
   apr_size_t initial_length = svn_temp_serializer__get_length(context);
 
   svn_temp_serializer__push(context,
                             (const void * const *)entry_p,
-                            sizeof(svn_fs_dirent_t));
-
-  svn_fs_fs__id_serialize(context, &entry->id);
-  svn_temp_serializer__add_string(context, &entry->name);
+                            sizeof(*entry));
+  svn_fs_fs__id_serialize(context, &entry->dirent.id);
+  svn_temp_serializer__add_string(context, &entry->dirent.name);
+
+  /* Serialize the key. If it's the same as the dirent name, we'll
+     store an empty string instead, as a signal to the
+     deserializer. */
+  if (entry->key != entry->dirent.name)
+    svn_temp_serializer__add_string(context, &entry->key);
+  else
+    svn_temp_serializer__add_string(context, &empty);
 
   *length = (apr_uint32_t)(  svn_temp_serializer__get_length(context)
                            - APR_ALIGN_DEFAULT(initial_length));
@@ -211,7 +219,8 @@ serialize_dir(apr_array_header_t *entrie
   /* calculate sizes */
   apr_size_t count = entries->nelts;
   apr_size_t over_provision = 2 + count / 4;
-  apr_size_t entries_len = (count + over_provision) * sizeof(svn_fs_dirent_t*);
+  apr_size_t entries_len =
+    (count + over_provision) * sizeof(svn_fs_fs__dirent_t);
   apr_size_t lengths_len = (count + over_provision) * sizeof(apr_uint32_t);
 
   /* copy the hash entries to an auxilliary struct of known layout */
@@ -222,7 +231,7 @@ serialize_dir(apr_array_header_t *entrie
   dir_data.lengths = apr_palloc(pool, lengths_len);
 
   for (i = 0; i < count; ++i)
-    dir_data.entries[i] = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
+    dir_data.entries[i] = APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *);
 
   /* Serialize that aux. structure into a new one. Also, provide a good
    * estimate for the size of the buffer that we will need. */
@@ -259,11 +268,11 @@ static apr_array_header_t *
 deserialize_dir(void *buffer, dir_data_t *dir_data, apr_pool_t *pool)
 {
   apr_array_header_t *result
-    = apr_array_make(pool, dir_data->count, sizeof(svn_fs_dirent_t *));
+    = apr_array_make(pool, dir_data->count, sizeof(svn_fs_fs__dirent_t *));
   apr_size_t i;
   apr_size_t count;
-  svn_fs_dirent_t *entry;
-  svn_fs_dirent_t **entries;
+  svn_fs_fs__dirent_t *entry;
+  svn_fs_fs__dirent_t **entries;
 
   /* resolve the reference to the entries array */
   svn_temp_deserializer__resolve(buffer, (void **)&dir_data->entries);
@@ -276,11 +285,16 @@ deserialize_dir(void *buffer, dir_data_t
       entry = dir_data->entries[i];
 
       /* pointer fixup */
-      svn_temp_deserializer__resolve(entry, (void **)&entry->name);
-      svn_fs_fs__id_deserialize(entry, (svn_fs_id_t **)&entry->id);
+      svn_temp_deserializer__resolve(entry, (void **)&entry->key);
+      svn_temp_deserializer__resolve(entry, (void **)&entry->dirent.name);
+      svn_fs_fs__id_deserialize(entry, (svn_fs_id_t **)&entry->dirent.id);
+
+      /* fix up the entry key */
+      if (!(entry->key && *entry->key))
+        entry->key = entry->dirent.name;
 
       /* add the entry to the hash */
-      APR_ARRAY_PUSH(result, svn_fs_dirent_t *) = entry;
+      APR_ARRAY_PUSH(result, svn_fs_fs__dirent_t *) = entry;
     }
 
   /* return the now complete hash */
@@ -755,29 +769,29 @@ svn_fs_fs__get_sharded_offset(void **out
 }
 
 /* Utility function that returns the lowest index of the first entry in
- * *ENTRIES that points to a dir entry with a name equal or larger than NAME.
+ * *ENTRIES that points to a dir entry with a key equal or larger than KEY.
  * If an exact match has been found, *FOUND will be set to TRUE. COUNT is
  * the number of valid entries in ENTRIES.
  */
 static apr_size_t
-find_entry(svn_fs_dirent_t **entries,
-           const char *name,
+find_entry(svn_fs_fs__dirent_t **entries,
+           const char *key,
            apr_size_t count,
            svn_boolean_t *found)
 {
-  /* binary search for the desired entry by name */
+  /* binary search for the desired entry by key */
   apr_size_t lower = 0;
   apr_size_t upper = count;
   apr_size_t middle;
 
   for (middle = upper / 2; lower < upper; middle = (upper + lower) / 2)
     {
-      const svn_fs_dirent_t *entry =
-          svn_temp_deserializer__ptr(entries, (const void *const *)&entries[middle]);
-      const char* entry_name =
-          svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name);
+      const svn_fs_fs__dirent_t *entry =
+        svn_temp_deserializer__ptr(entries, (const void *const *)&entries[middle]);
+      const char* entry_key =
+        svn_temp_deserializer__ptr(entry, (const void *const *)&entry->key);
 
-      int diff = strcmp(entry_name, name);
+      int diff = strcmp(entry_key, key);
       if (diff < 0)
         lower = middle + 1;
       else
@@ -785,16 +799,16 @@ find_entry(svn_fs_dirent_t **entries,
     }
 
   /* check whether we actually found a match */
-  *found = FALSE;
-  if (lower < count)
+  if (lower >= count)
+    *found = FALSE;
+  else
     {
-      const svn_fs_dirent_t *entry =
-          svn_temp_deserializer__ptr(entries, (const void *const *)&entries[lower]);
-      const char* entry_name =
-          svn_temp_deserializer__ptr(entry, (const void *const *)&entry->name);
+      const svn_fs_fs__dirent_t *entry =
+        svn_temp_deserializer__ptr(entries, (const void *const *)&entries[lower]);
+      const char* entry_key =
+        svn_temp_deserializer__ptr(entry, (const void *const *)&entry->key);
 
-      if (strcmp(entry_name, name) == 0)
-        *found = TRUE;
+      *found = (strcmp(entry_key, key) == 0);
     }
 
   return lower;
@@ -812,7 +826,7 @@ svn_fs_fs__extract_dir_entry(void **out,
   svn_boolean_t found;
 
   /* resolve the reference to the entries array */
-  const svn_fs_dirent_t * const *entries =
+  const svn_fs_fs__dirent_t * const *entries =
     svn_temp_deserializer__ptr(data, (const void *const *)&dir_data->entries);
 
   /* resolve the reference to the lengths array */
@@ -820,7 +834,7 @@ svn_fs_fs__extract_dir_entry(void **out,
     svn_temp_deserializer__ptr(data, (const void *const *)&dir_data->lengths);
 
   /* binary search for the desired entry by name */
-  apr_size_t pos = find_entry((svn_fs_dirent_t **)entries,
+  apr_size_t pos = find_entry((svn_fs_fs__dirent_t **)entries,
                               name,
                               dir_data->count,
                               &found);
@@ -829,8 +843,8 @@ svn_fs_fs__extract_dir_entry(void **out,
   *out = NULL;
   if (found)
     {
-      const svn_fs_dirent_t *source =
-          svn_temp_deserializer__ptr(entries, (const void *const *)&entries[pos]);
+      const svn_fs_fs__dirent_t *source =
+        svn_temp_deserializer__ptr(entries, (const void *const *)&entries[pos]);
 
       /* Entries have been serialized one-by-one, each time including all
        * nested structures and strings. Therefore, they occupy a single
@@ -840,12 +854,20 @@ svn_fs_fs__extract_dir_entry(void **out,
       apr_size_t size = lengths[pos];
 
       /* copy & deserialize the entry */
-      svn_fs_dirent_t *new_entry = apr_palloc(pool, size);
+      svn_fs_fs__dirent_t *new_entry = apr_palloc(pool, size);
       memcpy(new_entry, source, size);
 
-      svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->name);
-      svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->id);
-      *(svn_fs_dirent_t **)out = new_entry;
+      /* FIXME: Extract common code from here and deserialize_dir(). */
+      /* pointer fixup */
+      svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->key);
+      svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->dirent.name);
+      svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->dirent.id);
+
+      /* fix up the entry key */
+      if (!(new_entry->key && *new_entry->key))
+        new_entry->key = new_entry->dirent.name;
+
+      *(svn_fs_fs__dirent_t **)out = new_entry;
     }
 
   return SVN_NO_ERROR;
@@ -864,7 +886,7 @@ slowly_replace_dir_entry(void **data,
   dir_data_t *dir_data = (dir_data_t *)*data;
   apr_array_header_t *dir;
   int idx = -1;
-  svn_fs_dirent_t *entry;
+  svn_fs_fs__dirent_t *entry;
 
   SVN_ERR(svn_fs_fs__deserialize_dir_entries((void **)&dir,
                                              *data,
@@ -878,7 +900,7 @@ slowly_replace_dir_entry(void **data,
     {
       /* Replace ENTRY with / insert the NEW_ENTRY */
       if (entry)
-        APR_ARRAY_IDX(dir, idx, svn_fs_dirent_t *) = replace_baton->new_entry;
+        APR_ARRAY_IDX(dir, idx, svn_fs_fs__dirent_t *) = replace_baton->new_entry;
       else
         svn_sort__array_insert(dir, &replace_baton->new_entry, idx);
     }
@@ -901,7 +923,7 @@ svn_fs_fs__replace_dir_entry(void **data
   replace_baton_t *replace_baton = (replace_baton_t *)baton;
   dir_data_t *dir_data = (dir_data_t *)*data;
   svn_boolean_t found;
-  svn_fs_dirent_t **entries;
+  svn_fs_fs__dirent_t **entries;
   apr_uint32_t *lengths;
   apr_uint32_t length;
   apr_size_t pos;
@@ -915,7 +937,7 @@ svn_fs_fs__replace_dir_entry(void **data
     return slowly_replace_dir_entry(data, data_len, baton, pool);
 
   /* resolve the reference to the entries array */
-  entries = (svn_fs_dirent_t **)
+  entries = (svn_fs_fs__dirent_t **)
     svn_temp_deserializer__ptr((const char *)dir_data,
                                (const void *const *)&dir_data->entries);
 

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.h
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.h?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.h (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/temp_serializer.h Tue Jan 28 19:36:17 2014
@@ -24,6 +24,7 @@
 #define SVN_LIBSVN_FS__TEMP_SERIALIZER_H
 
 #include "fs.h"
+#include "dirent.h"
 
 /**
  * Prepend the @a number to the @a string in a space efficient way such that
@@ -189,9 +190,10 @@ svn_fs_fs__get_sharded_offset(void **out
 
 /**
  * Implements #svn_cache__partial_getter_func_t for a single
- * #svn_fs_dirent_t within a serialized directory contents hash,
- * identified by its name (const char @a *baton).
+ * #svn_fs_fs__dirent_t within a serialized directory contents hash,
+ * identified by its key (const char @a *baton).
  */
+/* UCSNORM TODO: NAME must always be the (normalized) entry key. */
 svn_error_t *
 svn_fs_fs__extract_dir_entry(void **out,
                              const void *data,
@@ -207,16 +209,17 @@ svn_fs_fs__extract_dir_entry(void **out,
  */
 typedef struct replace_baton_t
 {
-  /** name of the directory entry to modify */
+  /** Name of the directory entry to modify.
+      When normalized lookup is enabled, this name must be normalized. */
   const char *name;
 
   /** directory entry to insert instead */
-  svn_fs_dirent_t *new_entry;
+  svn_fs_fs__dirent_t *new_entry;
 } replace_baton_t;
 
 /**
  * Implements #svn_cache__partial_setter_func_t for a single
- * #svn_fs_dirent_t within a serialized directory contents hash,
+ * #svn_fs_fs__dirent_t within a serialized directory contents hash,
  * identified by its name in the #replace_baton_t in @a baton.
  */
 svn_error_t *

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/transaction.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/transaction.c?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/transaction.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/transaction.c Tue Jan 28 19:36:17 2014
@@ -593,11 +593,11 @@ unparse_dir_entries(apr_array_header_t *
   int i;
   for (i = 0; i < entries->nelts; ++i)
     {
-      svn_fs_dirent_t *dirent;
+      svn_fs_fs__dirent_t *entry;
 
       svn_pool_clear(iterpool);
-      dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
-      SVN_ERR(unparse_dir_entry(dirent, stream, iterpool));
+      entry = APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *);
+      SVN_ERR(unparse_dir_entry(&entry->dirent, stream, iterpool));
     }
 
   SVN_ERR(svn_stream_printf(stream, pool, "%s\n", SVN_HASH_TERMINATOR));
@@ -1527,19 +1527,32 @@ svn_fs_fs__set_entry(svn_fs_t *fs,
   if (ffd->txn_dir_cache)
     {
       /* build parameters: (name, new entry) pair */
+      const svn_boolean_t normalized_lookup =
+        ((fs_fs_data_t*)fs->fsap_data)->normalized_lookup;
       const char *key =
-          svn_fs_fs__id_unparse(parent_noderev->id, subpool)->data;
+        svn_fs_fs__id_unparse(parent_noderev->id, subpool)->data;
       replace_baton_t baton;
 
-      baton.name = name;
-      baton.new_entry = NULL;
+
 
       if (id)
         {
           baton.new_entry = apr_pcalloc(subpool, sizeof(*baton.new_entry));
-          baton.new_entry->name = name;
-          baton.new_entry->kind = kind;
-          baton.new_entry->id = id;
+          baton.new_entry->dirent.name = name;
+          baton.new_entry->dirent.kind = kind;
+          baton.new_entry->dirent.id = id;
+          SVN_ERR(svn_fs_fs__set_dirent_key(baton.new_entry,
+                                            normalized_lookup,
+                                            subpool, subpool));
+          baton.name = baton.new_entry->key;
+        }
+      else
+        {
+          if (normalized_lookup)
+            SVN_ERR(svn_fs_fs__normalize(&baton.name, name, subpool));
+          else
+            baton.name = name;
+          baton.new_entry = NULL;
         }
 
       /* actually update the cached directory (if cached) */
@@ -2515,7 +2528,7 @@ write_hash_to_stream(svn_stream_t *strea
   return SVN_NO_ERROR;
 }
 
-/* Implement collection_writer_t writing the svn_fs_dirent_t* array given
+/* Implement collection_writer_t writing the svn_fs_fs__dirent_t* array given
    as BATON. */
 static svn_error_t *
 write_directory_to_stream(svn_stream_t *stream,
@@ -2915,16 +2928,16 @@ write_final_rev(const svn_fs_id_t **new_
                                           subpool));
       for (i = 0; i < entries->nelts; ++i)
         {
-          svn_fs_dirent_t *dirent
-            = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
+          svn_fs_fs__dirent_t *entry
+            = APR_ARRAY_IDX(entries, i, svn_fs_fs__dirent_t *);
 
           svn_pool_clear(subpool);
-          SVN_ERR(write_final_rev(&new_id, file, rev, fs, dirent->id,
+          SVN_ERR(write_final_rev(&new_id, file, rev, fs, entry->dirent.id,
                                   start_node_id, start_copy_id, initial_offset,
                                   reps_to_cache, reps_hash, reps_pool, FALSE,
                                   subpool));
           if (new_id && (svn_fs_fs__id_rev(new_id) == rev))
-            dirent->id = svn_fs_fs__id_copy(new_id, pool);
+            entry->dirent.id = svn_fs_fs__id_copy(new_id, pool);
         }
       svn_pool_destroy(subpool);
 

Modified: subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/util.c
URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/util.c?rev=1562172&r1=1562171&r2=1562172&view=diff
==============================================================================
--- subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/util.c (original)
+++ subversion/branches/fsfs-ucsnorm/subversion/libsvn_fs_fs/util.c Tue Jan 28 19:36:17 2014
@@ -25,7 +25,9 @@
 #include "svn_ctype.h"
 #include "svn_dirent_uri.h"
 #include "private/svn_string_private.h"
+#include "private/svn_utf_private.h"
 
+#include "dirent.h"
 #include "fs_fs.h"
 #include "pack.h"
 #include "util.h"
@@ -645,3 +647,33 @@ svn_fs_fs__supports_move(svn_fs_t *fs)
 
   return ffd->format >= SVN_FS_FS__MIN_MOVE_SUPPORT_FORMAT;
 }
+
+svn_error_t *
+svn_fs_fs__normalize(const char **normstr, const char *str, apr_pool_t *pool)
+{
+  svn_membuf_t buffer;
+  svn_membuf__create(&buffer, 0, pool);
+  return svn_error_trace(
+      svn_utf__normalize(normstr, str,
+                         SVN_UTF__UNKNOWN_LENGTH, &buffer));
+}
+
+svn_error_t *
+svn_fs_fs__set_dirent_key(svn_fs_fs__dirent_t *dirent,
+                          svn_boolean_t normalized,
+                          apr_pool_t *result_pool,
+                          apr_pool_t *scratch_pool)
+{
+  if (!normalized)
+    dirent->key = dirent->dirent.name;
+  else
+    {
+      SVN_ERR(svn_fs_fs__normalize(&dirent->key, dirent->dirent.name,
+                                   scratch_pool));
+      if (0 == strcmp(dirent->key, dirent->dirent.name))
+        dirent->key = dirent->dirent.name;
+      else if (result_pool != scratch_pool)
+        dirent->key = apr_pstrdup(result_pool, dirent->key);
+    }
+  return SVN_NO_ERROR;
+}