You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by Stefan Sperling <st...@elego.de> on 2014/06/23 23:41:35 UTC

returning hash from FSFS changes cache?

The FSFS changes cache returns an array representation of changed
path data which needs to be converted back to a hash for API reasons
by svn_fs_fs__paths_changed(). No other function accesses the
changes cache so returning a temporary array seems odd.
Wouldn't it make sense for the cache to return hashes directly?

Isn't it better to convert from hash to array during serialization,
which ideally happens only once, rather than constructing an array
and then a hash from array elements every time the cache is accessed?

I'm not sure how to measure the effects of this.

Index: subversion/libsvn_fs_fs/cached_data.c
===================================================================
--- subversion/libsvn_fs_fs/cached_data.c	(revision 1604933)
+++ subversion/libsvn_fs_fs/cached_data.c	(working copy)
@@ -2686,7 +2686,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
 }
 
 svn_error_t *
-svn_fs_fs__get_changes(apr_array_header_t **changes,
+svn_fs_fs__get_changes(apr_hash_t **changes,
                        svn_fs_t *fs,
                        svn_revnum_t rev,
                        apr_pool_t *result_pool)
@@ -3093,7 +3093,7 @@ auto_select_stream(svn_stream_t **stream,
  * FILE and wrapping FILE_STREAM.  Use POOL for allocations.
  */
 static svn_error_t *
-block_read_changes(apr_array_header_t **changes,
+block_read_changes(apr_hash_t **changes,
                    svn_fs_t *fs,
                    svn_fs_fs__revision_file_t *rev_file,
                    svn_fs_fs__p2l_entry_t *entry,
@@ -3303,7 +3303,7 @@ block_read(void **result,
                     break;
 
                   case SVN_FS_FS__ITEM_TYPE_CHANGES:
-                    SVN_ERR(block_read_changes((apr_array_header_t **)&item,
+                    SVN_ERR(block_read_changes((apr_hash_t **)&item,
                                                fs, revision_file,
                                                entry, is_result,
                                                pool, iterpool));
Index: subversion/libsvn_fs_fs/cached_data.h
===================================================================
--- subversion/libsvn_fs_fs/cached_data.h	(revision 1604933)
+++ subversion/libsvn_fs_fs/cached_data.h	(working copy)
@@ -150,7 +150,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist,
  * Allocate the result in POOL.
  */
 svn_error_t *
-svn_fs_fs__get_changes(apr_array_header_t **changes,
+svn_fs_fs__get_changes(apr_hash_t **changes,
                        svn_fs_t *fs,
                        svn_revnum_t rev,
                        apr_pool_t *pool);
Index: subversion/libsvn_fs_fs/low_level.c
===================================================================
--- subversion/libsvn_fs_fs/low_level.c	(revision 1604933)
+++ subversion/libsvn_fs_fs/low_level.c	(working copy)
@@ -381,7 +381,7 @@ read_change(change_t **change_p,
 }
 
 svn_error_t *
-svn_fs_fs__read_changes(apr_array_header_t **changes,
+svn_fs_fs__read_changes(apr_hash_t **changes,
                         svn_stream_t *stream,
                         apr_pool_t *result_pool,
                         apr_pool_t *scratch_pool)
@@ -389,15 +389,14 @@ svn_error_t *
   change_t *change;
   apr_pool_t *iterpool;
 
-  /* pre-allocate enough room for most change lists
-     (will be auto-expanded as necessary) */
-  *changes = apr_array_make(result_pool, 30, sizeof(change_t *));
+  *changes = apr_hash_make(result_pool);
 
   SVN_ERR(read_change(&change, stream, result_pool, scratch_pool));
   iterpool = svn_pool_create(scratch_pool);
   while (change)
     {
-      APR_ARRAY_PUSH(*changes, change_t*) = change;
+      apr_hash_set(*changes, change->path.data, change->path.len,
+                   &change->info);
       SVN_ERR(read_change(&change, stream, result_pool, iterpool));
       svn_pool_clear(iterpool);
     }
Index: subversion/libsvn_fs_fs/low_level.h
===================================================================
--- subversion/libsvn_fs_fs/low_level.h	(revision 1604933)
+++ subversion/libsvn_fs_fs/low_level.h	(working copy)
@@ -86,7 +86,7 @@ svn_fs_fs__unparse_footer(apr_off_t l2p_offset,
 /* Read all the changes from STREAM and store them in *CHANGES,
    allocated in RESULT_POOL. Do temporary allocations in SCRATCH_POOL. */
 svn_error_t *
-svn_fs_fs__read_changes(apr_array_header_t **changes,
+svn_fs_fs__read_changes(apr_hash_t **changes,
                         svn_stream_t *stream,
                         apr_pool_t *result_pool,
                         apr_pool_t *scratch_pool);
Index: subversion/libsvn_fs_fs/temp_serializer.c
===================================================================
--- subversion/libsvn_fs_fs/temp_serializer.c	(revision 1604933)
+++ subversion/libsvn_fs_fs/temp_serializer.c	(working copy)
@@ -1125,7 +1125,7 @@ deserialize_change(void *buffer, change_t **change
 }
 
 /* Auxiliary structure representing the content of a change_t array.
-   This structure is much easier to (de-)serialize than an APR array.
+   This structure is much easier to (de-)serialize than an APR hash.
  */
 typedef struct changes_data_t
 {
@@ -1142,20 +1142,30 @@ svn_fs_fs__serialize_changes(void **data,
                              void *in,
                              apr_pool_t *pool)
 {
-  apr_array_header_t *array = in;
+  apr_hash_t *hash = in;
   changes_data_t changes;
   svn_temp_serializer__context_t *context;
   svn_stringbuf_t *serialized;
   int i;
+  apr_hash_index_t *hi;
 
   /* initialize our auxiliary data structure */
-  changes.count = array->nelts;
+  changes.count = apr_hash_count(hash);
   changes.changes = apr_palloc(pool, sizeof(change_t*) * changes.count);
 
-  /* populate it with the array elements */
-  for (i = 0; i < changes.count; ++i)
-    changes.changes[i] = APR_ARRAY_IDX(array, i, change_t*);
+  /* populate it with the hash elements */
+  i = 0;
+  for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi), ++i)
+    {
+      const char *path = svn__apr_hash_index_key(hi);
+      svn_fs_path_change2_t *info = svn__apr_hash_index_val(hi);
 
+      changes.changes[i] = apr_palloc(pool, sizeof (change_t));
+      changes.changes[i]->path.data = path;
+      changes.changes[i]->path.len = strlen(path);
+      changes.changes[i]->info = *info;
+    }
+
   /* serialize it and all its elements */
   context = svn_temp_serializer__init(&changes,
                                       sizeof(changes),
@@ -1188,8 +1198,7 @@ svn_fs_fs__deserialize_changes(void **out,
 {
   int i;
   changes_data_t *changes = (changes_data_t *)data;
-  apr_array_header_t *array = apr_array_make(pool, changes->count,
-                                             sizeof(change_t *));
+  apr_hash_t *hash = apr_hash_make(pool);
 
   /* de-serialize our auxiliary data structure */
   svn_temp_deserializer__resolve(changes, (void**)&changes->changes);
@@ -1197,13 +1206,17 @@ svn_fs_fs__deserialize_changes(void **out,
   /* de-serialize each entry and add it to the array */
   for (i = 0; i < changes->count; ++i)
     {
+      change_t *change;
+
       deserialize_change(changes->changes,
                          (change_t **)&changes->changes[i]);
-      APR_ARRAY_PUSH(array, change_t *) = changes->changes[i];
+      change = changes->changes[i];
+      apr_hash_set(hash, change->path.data, change->path.len,
+                   &change->info);
     }
 
   /* done */
-  *out = array;
+  *out = hash;
 
   return SVN_NO_ERROR;
 }
Index: subversion/libsvn_fs_fs/transaction.c
===================================================================
--- subversion/libsvn_fs_fs/transaction.c	(revision 1604933)
+++ subversion/libsvn_fs_fs/transaction.c	(working copy)
@@ -881,23 +881,8 @@ svn_fs_fs__paths_changed(apr_hash_t **changed_path
                          svn_revnum_t rev,
                          apr_pool_t *pool)
 {
-  apr_hash_t *changed_paths;
-  apr_array_header_t *changes;
-  int i;
-
-  SVN_ERR(svn_fs_fs__get_changes(&changes, fs, rev, pool));
-
-  changed_paths = svn_hash__make(pool);
-  for (i = 0; i < changes->nelts; ++i)
-    {
-      change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
-      apr_hash_set(changed_paths, change->path.data, change->path.len,
-                   &change->info);
-    }
-
-  *changed_paths_p = changed_paths;
-
-  return SVN_NO_ERROR;
+  return svn_error_trace(svn_fs_fs__get_changes(changed_paths_p, fs, rev,
+                                                pool));
 }
 
 /* Copy a revision node-rev SRC into the current transaction TXN_ID in

Re: returning hash from FSFS changes cache?

Posted by Stefan Fuhrmann <st...@wandisco.com>.
On Mon, Jun 23, 2014 at 11:41 PM, Stefan Sperling <st...@elego.de> wrote:

> The FSFS changes cache returns an array representation of changed
> path data which needs to be converted back to a hash for API reasons
> by svn_fs_fs__paths_changed(). No other function accesses the
> changes cache so returning a temporary array seems odd.
> Wouldn't it make sense for the cache to return hashes directly?
>
> Isn't it better to convert from hash to array during serialization,
> which ideally happens only once, rather than constructing an array
> and then a hash from array elements every time the cache is accessed?
>

The rationale is as follows - which may or may not be correct:

* constructing an array is extremely cheap
  (even in the ruby example, it uses only ~2MB, half of it due to resize;
  adding entries is trivial) and having it as an intermediate step almost
  no extra costs.
* not every item that gets put into cache will ever get retrieved.
  Adding data to the cache is therefore the hot path (with notable
exceptions).

Looking at the (de-)serializer code, there is still room for improvement.
The array could be used directly instead of creating a temporary copy.

-- Stefan^2.


> I'm not sure how to measure the effects of this.
>
> Index: subversion/libsvn_fs_fs/cached_data.c
> ===================================================================
> --- subversion/libsvn_fs_fs/cached_data.c       (revision 1604933)
> +++ subversion/libsvn_fs_fs/cached_data.c       (working copy)
> @@ -2686,7 +2686,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
>  }
>
>  svn_error_t *
> -svn_fs_fs__get_changes(apr_array_header_t **changes,
> +svn_fs_fs__get_changes(apr_hash_t **changes,
>                         svn_fs_t *fs,
>                         svn_revnum_t rev,
>                         apr_pool_t *result_pool)
> @@ -3093,7 +3093,7 @@ auto_select_stream(svn_stream_t **stream,
>   * FILE and wrapping FILE_STREAM.  Use POOL for allocations.
>   */
>  static svn_error_t *
> -block_read_changes(apr_array_header_t **changes,
> +block_read_changes(apr_hash_t **changes,
>                     svn_fs_t *fs,
>                     svn_fs_fs__revision_file_t *rev_file,
>                     svn_fs_fs__p2l_entry_t *entry,
> @@ -3303,7 +3303,7 @@ block_read(void **result,
>                      break;
>
>                    case SVN_FS_FS__ITEM_TYPE_CHANGES:
> -                    SVN_ERR(block_read_changes((apr_array_header_t
> **)&item,
> +                    SVN_ERR(block_read_changes((apr_hash_t **)&item,
>                                                 fs, revision_file,
>                                                 entry, is_result,
>                                                 pool, iterpool));
> Index: subversion/libsvn_fs_fs/cached_data.h
> ===================================================================
> --- subversion/libsvn_fs_fs/cached_data.h       (revision 1604933)
> +++ subversion/libsvn_fs_fs/cached_data.h       (working copy)
> @@ -150,7 +150,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist,
>   * Allocate the result in POOL.
>   */
>  svn_error_t *
> -svn_fs_fs__get_changes(apr_array_header_t **changes,
> +svn_fs_fs__get_changes(apr_hash_t **changes,
>                         svn_fs_t *fs,
>                         svn_revnum_t rev,
>                         apr_pool_t *pool);
> Index: subversion/libsvn_fs_fs/low_level.c
> ===================================================================
> --- subversion/libsvn_fs_fs/low_level.c (revision 1604933)
> +++ subversion/libsvn_fs_fs/low_level.c (working copy)
> @@ -381,7 +381,7 @@ read_change(change_t **change_p,
>  }
>
>  svn_error_t *
> -svn_fs_fs__read_changes(apr_array_header_t **changes,
> +svn_fs_fs__read_changes(apr_hash_t **changes,
>                          svn_stream_t *stream,
>                          apr_pool_t *result_pool,
>                          apr_pool_t *scratch_pool)
> @@ -389,15 +389,14 @@ svn_error_t *
>    change_t *change;
>    apr_pool_t *iterpool;
>
> -  /* pre-allocate enough room for most change lists
> -     (will be auto-expanded as necessary) */
> -  *changes = apr_array_make(result_pool, 30, sizeof(change_t *));
> +  *changes = apr_hash_make(result_pool);
>
>    SVN_ERR(read_change(&change, stream, result_pool, scratch_pool));
>    iterpool = svn_pool_create(scratch_pool);
>    while (change)
>      {
> -      APR_ARRAY_PUSH(*changes, change_t*) = change;
> +      apr_hash_set(*changes, change->path.data, change->path.len,
> +                   &change->info);
>        SVN_ERR(read_change(&change, stream, result_pool, iterpool));
>        svn_pool_clear(iterpool);
>      }
> Index: subversion/libsvn_fs_fs/low_level.h
> ===================================================================
> --- subversion/libsvn_fs_fs/low_level.h (revision 1604933)
> +++ subversion/libsvn_fs_fs/low_level.h (working copy)
> @@ -86,7 +86,7 @@ svn_fs_fs__unparse_footer(apr_off_t l2p_offset,
>  /* Read all the changes from STREAM and store them in *CHANGES,
>     allocated in RESULT_POOL. Do temporary allocations in SCRATCH_POOL. */
>  svn_error_t *
> -svn_fs_fs__read_changes(apr_array_header_t **changes,
> +svn_fs_fs__read_changes(apr_hash_t **changes,
>                          svn_stream_t *stream,
>                          apr_pool_t *result_pool,
>                          apr_pool_t *scratch_pool);
> Index: subversion/libsvn_fs_fs/temp_serializer.c
> ===================================================================
> --- subversion/libsvn_fs_fs/temp_serializer.c   (revision 1604933)
> +++ subversion/libsvn_fs_fs/temp_serializer.c   (working copy)
> @@ -1125,7 +1125,7 @@ deserialize_change(void *buffer, change_t **change
>  }
>
>  /* Auxiliary structure representing the content of a change_t array.
> -   This structure is much easier to (de-)serialize than an APR array.
> +   This structure is much easier to (de-)serialize than an APR hash.
>   */
>  typedef struct changes_data_t
>  {
> @@ -1142,20 +1142,30 @@ svn_fs_fs__serialize_changes(void **data,
>                               void *in,
>                               apr_pool_t *pool)
>  {
> -  apr_array_header_t *array = in;
> +  apr_hash_t *hash = in;
>    changes_data_t changes;
>    svn_temp_serializer__context_t *context;
>    svn_stringbuf_t *serialized;
>    int i;
> +  apr_hash_index_t *hi;
>
>    /* initialize our auxiliary data structure */
> -  changes.count = array->nelts;
> +  changes.count = apr_hash_count(hash);
>    changes.changes = apr_palloc(pool, sizeof(change_t*) * changes.count);
>
> -  /* populate it with the array elements */
> -  for (i = 0; i < changes.count; ++i)
> -    changes.changes[i] = APR_ARRAY_IDX(array, i, change_t*);
> +  /* populate it with the hash elements */
> +  i = 0;
> +  for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi), ++i)
> +    {
> +      const char *path = svn__apr_hash_index_key(hi);
> +      svn_fs_path_change2_t *info = svn__apr_hash_index_val(hi);
>
> +      changes.changes[i] = apr_palloc(pool, sizeof (change_t));
> +      changes.changes[i]->path.data = path;
> +      changes.changes[i]->path.len = strlen(path);
> +      changes.changes[i]->info = *info;
> +    }
> +
>    /* serialize it and all its elements */
>    context = svn_temp_serializer__init(&changes,
>                                        sizeof(changes),
> @@ -1188,8 +1198,7 @@ svn_fs_fs__deserialize_changes(void **out,
>  {
>    int i;
>    changes_data_t *changes = (changes_data_t *)data;
> -  apr_array_header_t *array = apr_array_make(pool, changes->count,
> -                                             sizeof(change_t *));
> +  apr_hash_t *hash = apr_hash_make(pool);
>
>    /* de-serialize our auxiliary data structure */
>    svn_temp_deserializer__resolve(changes, (void**)&changes->changes);
> @@ -1197,13 +1206,17 @@ svn_fs_fs__deserialize_changes(void **out,
>    /* de-serialize each entry and add it to the array */
>    for (i = 0; i < changes->count; ++i)
>      {
> +      change_t *change;
> +
>        deserialize_change(changes->changes,
>                           (change_t **)&changes->changes[i]);
> -      APR_ARRAY_PUSH(array, change_t *) = changes->changes[i];
> +      change = changes->changes[i];
> +      apr_hash_set(hash, change->path.data, change->path.len,
> +                   &change->info);
>      }
>
>    /* done */
> -  *out = array;
> +  *out = hash;
>
>    return SVN_NO_ERROR;
>  }
> Index: subversion/libsvn_fs_fs/transaction.c
> ===================================================================
> --- subversion/libsvn_fs_fs/transaction.c       (revision 1604933)
> +++ subversion/libsvn_fs_fs/transaction.c       (working copy)
> @@ -881,23 +881,8 @@ svn_fs_fs__paths_changed(apr_hash_t **changed_path
>                           svn_revnum_t rev,
>                           apr_pool_t *pool)
>  {
> -  apr_hash_t *changed_paths;
> -  apr_array_header_t *changes;
> -  int i;
> -
> -  SVN_ERR(svn_fs_fs__get_changes(&changes, fs, rev, pool));
> -
> -  changed_paths = svn_hash__make(pool);
> -  for (i = 0; i < changes->nelts; ++i)
> -    {
> -      change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
> -      apr_hash_set(changed_paths, change->path.data, change->path.len,
> -                   &change->info);
> -    }
> -
> -  *changed_paths_p = changed_paths;
> -
> -  return SVN_NO_ERROR;
> +  return svn_error_trace(svn_fs_fs__get_changes(changed_paths_p, fs, rev,
> +                                                pool));
>  }
>
>  /* Copy a revision node-rev SRC into the current transaction TXN_ID in
>