You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2011/01/01 21:33:23 UTC

svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Author: stefan2
Date: Sat Jan  1 20:33:22 2011
New Revision: 1054286

URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
Log:
XDelta calculation is major part of svn_txdelta_send_txstream.
Therefore, speed up string matching code and the relatively
expensive hash-code (adler32) calculations.

* subversion/libsvn_delta/xdelta.c
  (init_adler32): init adler checksum with 0 instead of 1;
   use svn_adler32 for performance
  (adler32_out): "-1" can / must be ommitted now that we
   start at 0 instead of 1 for s1.
  (adler32_replace): special, faster implementation replacing
   a adler32_out(), adler32_in() sequence

  (match_length): new utility function
  (find_match): speed up the main loop by calling match_length()
  (compute_delta): optimize adler32 calculations

Modified:
    subversion/trunk/subversion/libsvn_delta/xdelta.c

Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
+++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan  1 20:33:22 2011
@@ -29,6 +29,8 @@
 
 #include "svn_delta.h"
 #include "delta.h"
+
+#include "private/svn_adler32.h"
 
 /* This is pseudo-adler32. It is adler32 without the prime modulus.
    The idea is borrowed from monotone, and is a translation of the C++
@@ -39,6 +41,10 @@
 #define ADLER32_MASK      0x0000ffff
 #define ADLER32_CHAR_MASK 0x000000ff
 
+/* Size of the blocks we compute checksums for. This was chosen out of
+   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
+#define MATCH_BLOCKSIZE 64
+
 /* Structure to store the state of our adler32 checksum.  */
 struct adler32
 {
@@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch
 {
   ad->s1 -= ((apr_uint32_t) (c)) & ADLER32_CHAR_MASK;
   ad->s1 &= ADLER32_MASK;
-  ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK)) + 1;
+  ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK));
   ad->s2 &= ADLER32_MASK;
   --ad->len;
 }
 
+/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
+ * This function may (and will) only be called for
+ * ad->len == MATCH_BLOCKSIZE.
+ */
+static APR_INLINE void
+adler32_replace(struct adler32 *ad, const char c_out, const char c_in)
+{
+  apr_uint32_t s1 = ad->s1;
+  apr_uint32_t s2 = ad->s2;
+
+  s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out) & ADLER32_CHAR_MASK));
+
+  s1 -= ((apr_uint32_t) (c_out)) & ADLER32_CHAR_MASK;
+  s1 += ((apr_uint32_t) (c_in)) & ADLER32_CHAR_MASK;
+
+  s2 += s1;
+
+  ad->s1 = s1 & ADLER32_MASK;
+  ad->s2 = s2 & ADLER32_MASK;
+}
+
 /* Return the current adler32 checksum in the adler32 structure.  */
 
 static APR_INLINE apr_uint32_t
@@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad)
 static APR_INLINE struct adler32 *
 init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen)
 {
-  ad->s1 = 1;
-  ad->s2 = 0;
-  ad->len = 0;
-  while (datalen--)
-    adler32_in(ad, *(data++));
+  apr_uint32_t adler32 = svn__adler32(0, data, datalen);
+
+  ad->s1 = adler32 & ADLER32_MASK;
+  ad->s2 = (adler32 >> 16) & ADLER32_MASK;
+  ad->len = datalen;
+
   return ad;
 }
 
-/* Size of the blocks we compute checksums for. This was chosen out of
-   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
-#define MATCH_BLOCKSIZE 64
-
 /* Information for a block of the delta source.  The length of the
    block is the smaller of MATCH_BLOCKSIZE and the difference between
    the size of the source data and the position of this block. */
@@ -201,6 +225,35 @@ init_blocks_table(const char *data,
     }
 }
 
+/* Return the lowest position at which A and B differ. If no difference
+ * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
+ */
+static apr_size_t
+match_length(const char *a, const char *b, apr_size_t max_len)
+{
+  apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+  /* Chunky operation is so much faster ...
+   *
+   * We can't make this work on architectures that require aligned access
+   * because A and B will probably have different alignment. So, skipping
+   * the first few chars until alignment is reached is not an option.
+   */
+  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
+    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
+      break;
+
+#endif
+
+  for (; pos < max_len; ++pos)
+    if (a[pos] != b[pos])
+      break;
+
+  return pos;
+}
+
 /* Try to find a match for the target data B in BLOCKS, and then
    extend the match as long as data in A and B at the match position
    continues to match.  We set the position in a we ended up in (in
@@ -229,6 +282,8 @@ find_match(const struct blocks *blocks,
   apr_uint32_t sum = adler32_sum(rolling);
   apr_size_t alen, badvance, apos;
   apr_size_t tpos, tlen;
+  apr_size_t delta, max_delta;
+  const char *aptr, *bptr;
 
   tpos = find_block(blocks, sum);
 
@@ -246,14 +301,15 @@ find_match(const struct blocks *blocks,
   apos = tpos;
   alen = tlen;
   badvance = tlen;
+
   /* Extend the match forward as far as possible */
-  while ((apos + alen < asize)
-         && (bpos + badvance < bsize)
-         && (a[apos + alen] == b[bpos + badvance]))
-    {
-      ++alen;
-      ++badvance;
-    }
+  max_delta = asize - apos - alen < bsize - bpos - badvance
+            ? asize - apos - alen
+            : bsize - bpos - badvance;
+  delta = match_length(a + apos + alen, b + bpos + badvance, max_delta);
+
+  alen += delta;
+  badvance += delta;
 
   /* See if we can extend backwards into a previous insert hunk.  */
   while (apos > 0
@@ -329,7 +385,6 @@ compute_delta(svn_txdelta__ops_baton_t *
       apr_size_t apos = 0;
       apr_size_t alen = 1;
       apr_size_t badvance = 1;
-      apr_size_t next;
       svn_boolean_t match;
 
       match = find_match(&blocks, &rolling, a, asize, b, bsize, lo, &apos,
@@ -354,14 +409,49 @@ compute_delta(svn_txdelta__ops_baton_t *
           svn_txdelta__insert_op(build_baton, svn_txdelta_source,
                                  apos, alen, NULL, pool);
         }
-      next = lo;
-      for (; next < lo + badvance; ++next)
+
+      if (badvance == 1)
+        {
+          /* This seems to be the _vast_ majority case -- even if
+           * you sum BADVANCE up, this case still accounts for 2/3
+           * of all bytes being processed.
+           */
+          if (lo + MATCH_BLOCKSIZE < bsize)
+            adler32_replace(&rolling, b[lo], b[lo + MATCH_BLOCKSIZE]);
+          else
+            adler32_out(&rolling, b[lo]);
+
+          lo++;
+        }
+      else if (badvance >= MATCH_BLOCKSIZE)
         {
-          adler32_out(&rolling, b[next]);
-          if (next + MATCH_BLOCKSIZE < bsize)
-            adler32_in(&rolling, b[next + MATCH_BLOCKSIZE]);
+          /* BADVANCE is often large enough that we can calculate the
+           * Adler32 sum directly instead of expensively updating the
+           * existing values.
+           */
+          apr_size_t remaining_block = lo + MATCH_BLOCKSIZE < bsize
+                                     ? MATCH_BLOCKSIZE
+                                     : bsize - (lo + MATCH_BLOCKSIZE);
+          init_adler32(&rolling,
+                       b + lo + badvance - remaining_block,
+                       remaining_block);
+          lo += badvance;
+        }
+      else
+        {
+          /* The very rare 3rd case
+           * (can only possibly happen close to end of the file).
+           */
+          apr_size_t next = lo;
+
+          for (; next < lo + badvance; ++next)
+            if (next + MATCH_BLOCKSIZE < bsize)
+              adler32_replace(&rolling, b[next], b[next + MATCH_BLOCKSIZE]);
+            else
+              adler32_out(&rolling, b[next]);
+
+          lo = next;
         }
-      lo = next;
     }
 
   /* If we still have an insert pending at the end, throw it in.  */



Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Stefan Fuhrmann <st...@alice-dsl.de>.
On 02.01.2011 16:57, Johan Corveleyn wrote:
> On Sat, Jan 1, 2011 at 9:33 PM,<st...@apache.org>  wrote:
>> Author: stefan2
>> Date: Sat Jan  1 20:33:22 2011
>> New Revision: 1054286
>>
>> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
>> Log:
>> XDelta calculation is major part of svn_txdelta_send_txstream.
>> Therefore, speed up string matching code and the relatively
>> expensive hash-code (adler32) calculations.
>>
>> * subversion/libsvn_delta/xdelta.c
>>   (init_adler32): init adler checksum with 0 instead of 1;
>>    use svn_adler32 for performance
>>   (adler32_out): "-1" can / must be ommitted now that we
>>    start at 0 instead of 1 for s1.
>>   (adler32_replace): special, faster implementation replacing
>>    a adler32_out(), adler32_in() sequence
>>
>>   (match_length): new utility function
>>   (find_match): speed up the main loop by calling match_length()
>>   (compute_delta): optimize adler32 calculations
>>
>> Modified:
>>     subversion/trunk/subversion/libsvn_delta/xdelta.c
>>
>> Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
>> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff
>> ==============================================================================
>> --- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
>> +++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan  1 20:33:22 2011
>> @@ -29,6 +29,8 @@
>>
>>   #include "svn_delta.h"
>>   #include "delta.h"
>> +
>> +#include "private/svn_adler32.h"
>>
>>   /* This is pseudo-adler32. It is adler32 without the prime modulus.
>>     The idea is borrowed from monotone, and is a translation of the C++
>> @@ -39,6 +41,10 @@
>>   #define ADLER32_MASK      0x0000ffff
>>   #define ADLER32_CHAR_MASK 0x000000ff
>>
>> +/* Size of the blocks we compute checksums for. This was chosen out of
>> +   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
>> +#define MATCH_BLOCKSIZE 64
>> +
>>   /* Structure to store the state of our adler32 checksum.  */
>>   struct adler32
>>   {
>> @@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch
>>   {
>>    ad->s1 -= ((apr_uint32_t) (c))&  ADLER32_CHAR_MASK;
>>    ad->s1&= ADLER32_MASK;
>> -  ad->s2 -= (ad->len * (((apr_uint32_t) c)&  ADLER32_CHAR_MASK)) + 1;
>> +  ad->s2 -= (ad->len * (((apr_uint32_t) c)&  ADLER32_CHAR_MASK));
>>    ad->s2&= ADLER32_MASK;
>>    --ad->len;
>>   }
>>
>> +/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
>> + * This function may (and will) only be called for
>> + * ad->len == MATCH_BLOCKSIZE.
>> + */
>> +static APR_INLINE void
>> +adler32_replace(struct adler32 *ad, const char c_out, const char c_in)
>> +{
>> +  apr_uint32_t s1 = ad->s1;
>> +  apr_uint32_t s2 = ad->s2;
>> +
>> +  s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out)&  ADLER32_CHAR_MASK));
>> +
>> +  s1 -= ((apr_uint32_t) (c_out))&  ADLER32_CHAR_MASK;
>> +  s1 += ((apr_uint32_t) (c_in))&  ADLER32_CHAR_MASK;
>> +
>> +  s2 += s1;
>> +
>> +  ad->s1 = s1&  ADLER32_MASK;
>> +  ad->s2 = s2&  ADLER32_MASK;
>> +}
>> +
>>   /* Return the current adler32 checksum in the adler32 structure.  */
>>
>>   static APR_INLINE apr_uint32_t
>> @@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad)
>>   static APR_INLINE struct adler32 *
>>   init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen)
>>   {
>> -  ad->s1 = 1;
>> -  ad->s2 = 0;
>> -  ad->len = 0;
>> -  while (datalen--)
>> -    adler32_in(ad, *(data++));
>> +  apr_uint32_t adler32 = svn__adler32(0, data, datalen);
>> +
>> +  ad->s1 = adler32&  ADLER32_MASK;
>> +  ad->s2 = (adler32>>  16)&  ADLER32_MASK;
>> +  ad->len = datalen;
>> +
>>    return ad;
>>   }
>>
>> -/* Size of the blocks we compute checksums for. This was chosen out of
>> -   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
>> -#define MATCH_BLOCKSIZE 64
>> -
>>   /* Information for a block of the delta source.  The length of the
>>     block is the smaller of MATCH_BLOCKSIZE and the difference between
>>     the size of the source data and the position of this block. */
>> @@ -201,6 +225,35 @@ init_blocks_table(const char *data,
>>      }
>>   }
>>
>> +/* Return the lowest position at which A and B differ. If no difference
>> + * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
>> + */
>> +static apr_size_t
>> +match_length(const char *a, const char *b, apr_size_t max_len)
>> +{
>> +  apr_size_t pos = 0;
>> +
>> +#if SVN_UNALIGNED_ACCESS_IS_OK
>> +
>> +  /* Chunky operation is so much faster ...
>> +   *
>> +   * We can't make this work on architectures that require aligned access
>> +   * because A and B will probably have different alignment. So, skipping
>> +   * the first few chars until alignment is reached is not an option.
>> +   */
>> +  for (; pos + sizeof(apr_size_t)<= max_len; pos += sizeof(apr_size_t))
>> +    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
>> +      break;
>> +
>> +#endif
>> +
>> +  for (; pos<  max_len; ++pos)
>> +    if (a[pos] != b[pos])
>> +      break;
>> +
>> +  return pos;
>> +}
>> +
>>   /* Try to find a match for the target data B in BLOCKS, and then
>>     extend the match as long as data in A and B at the match position
>>     continues to match.  We set the position in a we ended up in (in
>> @@ -229,6 +282,8 @@ find_match(const struct blocks *blocks,
>>    apr_uint32_t sum = adler32_sum(rolling);
>>    apr_size_t alen, badvance, apos;
>>    apr_size_t tpos, tlen;
>> +  apr_size_t delta, max_delta;
>> +  const char *aptr, *bptr;
> ..\..\..\subversion\libsvn_delta\xdelta.c(286): warning C4101: 'aptr'
> : unreferenced local variable
> ..\..\..\subversion\libsvn_delta\xdelta.c(286): warning C4101: 'bptr'
> : unreferenced local variable
>
Fixed as part of a larger change in r1054577.

-- Stefan^2.


Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Johan Corveleyn <jc...@gmail.com>.
On Sat, Jan 1, 2011 at 9:33 PM,  <st...@apache.org> wrote:
> Author: stefan2
> Date: Sat Jan  1 20:33:22 2011
> New Revision: 1054286
>
> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
> Log:
> XDelta calculation is major part of svn_txdelta_send_txstream.
> Therefore, speed up string matching code and the relatively
> expensive hash-code (adler32) calculations.
>
> * subversion/libsvn_delta/xdelta.c
>  (init_adler32): init adler checksum with 0 instead of 1;
>   use svn_adler32 for performance
>  (adler32_out): "-1" can / must be ommitted now that we
>   start at 0 instead of 1 for s1.
>  (adler32_replace): special, faster implementation replacing
>   a adler32_out(), adler32_in() sequence
>
>  (match_length): new utility function
>  (find_match): speed up the main loop by calling match_length()
>  (compute_delta): optimize adler32 calculations
>
> Modified:
>    subversion/trunk/subversion/libsvn_delta/xdelta.c
>
> Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
> +++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan  1 20:33:22 2011
> @@ -29,6 +29,8 @@
>
>  #include "svn_delta.h"
>  #include "delta.h"
> +
> +#include "private/svn_adler32.h"
>
>  /* This is pseudo-adler32. It is adler32 without the prime modulus.
>    The idea is borrowed from monotone, and is a translation of the C++
> @@ -39,6 +41,10 @@
>  #define ADLER32_MASK      0x0000ffff
>  #define ADLER32_CHAR_MASK 0x000000ff
>
> +/* Size of the blocks we compute checksums for. This was chosen out of
> +   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
> +#define MATCH_BLOCKSIZE 64
> +
>  /* Structure to store the state of our adler32 checksum.  */
>  struct adler32
>  {
> @@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch
>  {
>   ad->s1 -= ((apr_uint32_t) (c)) & ADLER32_CHAR_MASK;
>   ad->s1 &= ADLER32_MASK;
> -  ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK)) + 1;
> +  ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK));
>   ad->s2 &= ADLER32_MASK;
>   --ad->len;
>  }
>
> +/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
> + * This function may (and will) only be called for
> + * ad->len == MATCH_BLOCKSIZE.
> + */
> +static APR_INLINE void
> +adler32_replace(struct adler32 *ad, const char c_out, const char c_in)
> +{
> +  apr_uint32_t s1 = ad->s1;
> +  apr_uint32_t s2 = ad->s2;
> +
> +  s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out) & ADLER32_CHAR_MASK));
> +
> +  s1 -= ((apr_uint32_t) (c_out)) & ADLER32_CHAR_MASK;
> +  s1 += ((apr_uint32_t) (c_in)) & ADLER32_CHAR_MASK;
> +
> +  s2 += s1;
> +
> +  ad->s1 = s1 & ADLER32_MASK;
> +  ad->s2 = s2 & ADLER32_MASK;
> +}
> +
>  /* Return the current adler32 checksum in the adler32 structure.  */
>
>  static APR_INLINE apr_uint32_t
> @@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad)
>  static APR_INLINE struct adler32 *
>  init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen)
>  {
> -  ad->s1 = 1;
> -  ad->s2 = 0;
> -  ad->len = 0;
> -  while (datalen--)
> -    adler32_in(ad, *(data++));
> +  apr_uint32_t adler32 = svn__adler32(0, data, datalen);
> +
> +  ad->s1 = adler32 & ADLER32_MASK;
> +  ad->s2 = (adler32 >> 16) & ADLER32_MASK;
> +  ad->len = datalen;
> +
>   return ad;
>  }
>
> -/* Size of the blocks we compute checksums for. This was chosen out of
> -   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
> -#define MATCH_BLOCKSIZE 64
> -
>  /* Information for a block of the delta source.  The length of the
>    block is the smaller of MATCH_BLOCKSIZE and the difference between
>    the size of the source data and the position of this block. */
> @@ -201,6 +225,35 @@ init_blocks_table(const char *data,
>     }
>  }
>
> +/* Return the lowest position at which A and B differ. If no difference
> + * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
> + */
> +static apr_size_t
> +match_length(const char *a, const char *b, apr_size_t max_len)
> +{
> +  apr_size_t pos = 0;
> +
> +#if SVN_UNALIGNED_ACCESS_IS_OK
> +
> +  /* Chunky operation is so much faster ...
> +   *
> +   * We can't make this work on architectures that require aligned access
> +   * because A and B will probably have different alignment. So, skipping
> +   * the first few chars until alignment is reached is not an option.
> +   */
> +  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
> +    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
> +      break;
> +
> +#endif
> +
> +  for (; pos < max_len; ++pos)
> +    if (a[pos] != b[pos])
> +      break;
> +
> +  return pos;
> +}
> +
>  /* Try to find a match for the target data B in BLOCKS, and then
>    extend the match as long as data in A and B at the match position
>    continues to match.  We set the position in a we ended up in (in
> @@ -229,6 +282,8 @@ find_match(const struct blocks *blocks,
>   apr_uint32_t sum = adler32_sum(rolling);
>   apr_size_t alen, badvance, apos;
>   apr_size_t tpos, tlen;
> +  apr_size_t delta, max_delta;
> +  const char *aptr, *bptr;

..\..\..\subversion\libsvn_delta\xdelta.c(286): warning C4101: 'aptr'
: unreferenced local variable
..\..\..\subversion\libsvn_delta\xdelta.c(286): warning C4101: 'bptr'
: unreferenced local variable

-- 
Johan

Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Stefan Fuhrmann <st...@alice-dsl.de>.
On 02.01.2011 17:03, Johan Corveleyn wrote:
> On Sun, Jan 2, 2011 at 4:45 PM, Stefan Fuhrmann
> <st...@alice-dsl.de>  wrote:
>> On 02.01.2011 16:38, Johan Corveleyn wrote:
>>> On Sun, Jan 2, 2011 at 9:20 AM, Johan Corveleyn<jc...@gmail.com>    wrote:
>>>> On Sat, Jan 1, 2011 at 9:33 PM,<st...@apache.org>    wrote:
>>>>> Author: stefan2
>>>>> Date: Sat Jan  1 20:33:22 2011
>>>>> New Revision: 1054286
>>>>>
>>>>> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
>>>>> Log:
>>>>> XDelta calculation is major part of svn_txdelta_send_txstream.
>>>>> Therefore, speed up string matching code and the relatively
>>>>> expensive hash-code (adler32) calculations.
>>>>>
>>>>> * subversion/libsvn_delta/xdelta.c
>>>>>   (init_adler32): init adler checksum with 0 instead of 1;
>>>>>    use svn_adler32 for performance
>>>>>   (adler32_out): "-1" can / must be ommitted now that we
>>>>>    start at 0 instead of 1 for s1.
>>>>>   (adler32_replace): special, faster implementation replacing
>>>>>    a adler32_out(), adler32_in() sequence
>>>>>
>>>>>   (match_length): new utility function
>>>>>   (find_match): speed up the main loop by calling match_length()
>>>>>   (compute_delta): optimize adler32 calculations
>>>>>
>>>>> Modified:
>>>>>     subversion/trunk/subversion/libsvn_delta/xdelta.c
>>>> Hi Stefan,
>>>>
>>>> This makes my (Windows 32-bit VCE 2008) build fail:
>>>> svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external
>>>> symbol _svn__adler32 referenced in function _init_adler32
>>>> ..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal
>>>> error LNK1120: 1 unresolved externals
>>> It was actually r1054277 that broke it. The following patch fixes it:
>>> [[[
>>> Index: build.conf
>>> ===================================================================
>>> --- build.conf  (revision 1054417)
>>> +++ build.conf  (working copy)
>>> @@ -329,7 +329,7 @@ msvc-export =
>>>           private\svn_log.h private\svn_mergeinfo_private.h
>>>           private\svn_opt_private.h private\svn_skel.h private\svn_sqlite.h
>>>           private\svn_utf_private.h private\svn_eol_private.h
>>> -        private\svn_token.h
>>> +        private\svn_token.h private\svn_adler32.h
>>>
>>>   # Working copy management lib
>>>   [libsvn_wc]
>>> ]]]
>>>
>> Thanks Johan! Committed in r1054421.
>>
>> -- Stefan^2.
> No problem. I also saw two other warnings (than the ones I just sent),
> but don't know exactly which revision introduced them (and don't know
> how important they are). You'll probably know quicker than I do :-) :
>
> ..\..\..\subversion\libsvn_subr\adler32.c(64): warning C4057:
> 'function' : 'const Bytef *' differs in indirection to slightly
> different base types from 'const char *'
> ..\..\..\subversion\libsvn_subr\adler32.c(64): warning C4244:
> 'function' : conversion from 'apr_off_t' to 'uInt', possible loss of
> data
>
Thanks for notifying! Fixed in r1054579.

-- Stefan^2.


Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Johan Corveleyn <jc...@gmail.com>.
On Sun, Jan 2, 2011 at 4:45 PM, Stefan Fuhrmann
<st...@alice-dsl.de> wrote:
> On 02.01.2011 16:38, Johan Corveleyn wrote:
>>
>> On Sun, Jan 2, 2011 at 9:20 AM, Johan Corveleyn<jc...@gmail.com>  wrote:
>>>
>>> On Sat, Jan 1, 2011 at 9:33 PM,<st...@apache.org>  wrote:
>>>>
>>>> Author: stefan2
>>>> Date: Sat Jan  1 20:33:22 2011
>>>> New Revision: 1054286
>>>>
>>>> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
>>>> Log:
>>>> XDelta calculation is major part of svn_txdelta_send_txstream.
>>>> Therefore, speed up string matching code and the relatively
>>>> expensive hash-code (adler32) calculations.
>>>>
>>>> * subversion/libsvn_delta/xdelta.c
>>>>  (init_adler32): init adler checksum with 0 instead of 1;
>>>>   use svn_adler32 for performance
>>>>  (adler32_out): "-1" can / must be ommitted now that we
>>>>   start at 0 instead of 1 for s1.
>>>>  (adler32_replace): special, faster implementation replacing
>>>>   a adler32_out(), adler32_in() sequence
>>>>
>>>>  (match_length): new utility function
>>>>  (find_match): speed up the main loop by calling match_length()
>>>>  (compute_delta): optimize adler32 calculations
>>>>
>>>> Modified:
>>>>    subversion/trunk/subversion/libsvn_delta/xdelta.c
>>>
>>> Hi Stefan,
>>>
>>> This makes my (Windows 32-bit VCE 2008) build fail:
>>> svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external
>>> symbol _svn__adler32 referenced in function _init_adler32
>>> ..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal
>>> error LNK1120: 1 unresolved externals
>>
>> It was actually r1054277 that broke it. The following patch fixes it:
>> [[[
>> Index: build.conf
>> ===================================================================
>> --- build.conf  (revision 1054417)
>> +++ build.conf  (working copy)
>> @@ -329,7 +329,7 @@ msvc-export =
>>          private\svn_log.h private\svn_mergeinfo_private.h
>>          private\svn_opt_private.h private\svn_skel.h private\svn_sqlite.h
>>          private\svn_utf_private.h private\svn_eol_private.h
>> -        private\svn_token.h
>> +        private\svn_token.h private\svn_adler32.h
>>
>>  # Working copy management lib
>>  [libsvn_wc]
>> ]]]
>>
> Thanks Johan! Committed in r1054421.
>
> -- Stefan^2.

No problem. I also saw two other warnings (than the ones I just sent),
but don't know exactly which revision introduced them (and don't know
how important they are). You'll probably know quicker than I do :-) :

..\..\..\subversion\libsvn_subr\adler32.c(64): warning C4057:
'function' : 'const Bytef *' differs in indirection to slightly
different base types from 'const char *'
..\..\..\subversion\libsvn_subr\adler32.c(64): warning C4244:
'function' : conversion from 'apr_off_t' to 'uInt', possible loss of
data


-- 
Johan

Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Stefan Fuhrmann <st...@alice-dsl.de>.
On 02.01.2011 16:38, Johan Corveleyn wrote:
> On Sun, Jan 2, 2011 at 9:20 AM, Johan Corveleyn<jc...@gmail.com>  wrote:
>> On Sat, Jan 1, 2011 at 9:33 PM,<st...@apache.org>  wrote:
>>> Author: stefan2
>>> Date: Sat Jan  1 20:33:22 2011
>>> New Revision: 1054286
>>>
>>> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
>>> Log:
>>> XDelta calculation is major part of svn_txdelta_send_txstream.
>>> Therefore, speed up string matching code and the relatively
>>> expensive hash-code (adler32) calculations.
>>>
>>> * subversion/libsvn_delta/xdelta.c
>>>   (init_adler32): init adler checksum with 0 instead of 1;
>>>    use svn_adler32 for performance
>>>   (adler32_out): "-1" can / must be ommitted now that we
>>>    start at 0 instead of 1 for s1.
>>>   (adler32_replace): special, faster implementation replacing
>>>    a adler32_out(), adler32_in() sequence
>>>
>>>   (match_length): new utility function
>>>   (find_match): speed up the main loop by calling match_length()
>>>   (compute_delta): optimize adler32 calculations
>>>
>>> Modified:
>>>     subversion/trunk/subversion/libsvn_delta/xdelta.c
>> Hi Stefan,
>>
>> This makes my (Windows 32-bit VCE 2008) build fail:
>> svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external
>> symbol _svn__adler32 referenced in function _init_adler32
>> ..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal
>> error LNK1120: 1 unresolved externals
> It was actually r1054277 that broke it. The following patch fixes it:
> [[[
> Index: build.conf
> ===================================================================
> --- build.conf  (revision 1054417)
> +++ build.conf  (working copy)
> @@ -329,7 +329,7 @@ msvc-export =
>           private\svn_log.h private\svn_mergeinfo_private.h
>           private\svn_opt_private.h private\svn_skel.h private\svn_sqlite.h
>           private\svn_utf_private.h private\svn_eol_private.h
> -        private\svn_token.h
> +        private\svn_token.h private\svn_adler32.h
>
>   # Working copy management lib
>   [libsvn_wc]
> ]]]
>
Thanks Johan! Committed in r1054421.

-- Stefan^2.

Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Johan Corveleyn <jc...@gmail.com>.
On Sun, Jan 2, 2011 at 9:20 AM, Johan Corveleyn <jc...@gmail.com> wrote:
> On Sat, Jan 1, 2011 at 9:33 PM,  <st...@apache.org> wrote:
>> Author: stefan2
>> Date: Sat Jan  1 20:33:22 2011
>> New Revision: 1054286
>>
>> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
>> Log:
>> XDelta calculation is major part of svn_txdelta_send_txstream.
>> Therefore, speed up string matching code and the relatively
>> expensive hash-code (adler32) calculations.
>>
>> * subversion/libsvn_delta/xdelta.c
>>  (init_adler32): init adler checksum with 0 instead of 1;
>>   use svn_adler32 for performance
>>  (adler32_out): "-1" can / must be ommitted now that we
>>   start at 0 instead of 1 for s1.
>>  (adler32_replace): special, faster implementation replacing
>>   a adler32_out(), adler32_in() sequence
>>
>>  (match_length): new utility function
>>  (find_match): speed up the main loop by calling match_length()
>>  (compute_delta): optimize adler32 calculations
>>
>> Modified:
>>    subversion/trunk/subversion/libsvn_delta/xdelta.c
>
> Hi Stefan,
>
> This makes my (Windows 32-bit VCE 2008) build fail:
> svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external
> symbol _svn__adler32 referenced in function _init_adler32
> ..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal
> error LNK1120: 1 unresolved externals

It was actually r1054277 that broke it. The following patch fixes it:
[[[
Index: build.conf
===================================================================
--- build.conf  (revision 1054417)
+++ build.conf  (working copy)
@@ -329,7 +329,7 @@ msvc-export =
         private\svn_log.h private\svn_mergeinfo_private.h
         private\svn_opt_private.h private\svn_skel.h private\svn_sqlite.h
         private\svn_utf_private.h private\svn_eol_private.h
-        private\svn_token.h
+        private\svn_token.h private\svn_adler32.h

 # Working copy management lib
 [libsvn_wc]
]]]

Cheers,
-- 
Johan

Re: svn commit: r1054286 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Johan Corveleyn <jc...@gmail.com>.
On Sat, Jan 1, 2011 at 9:33 PM,  <st...@apache.org> wrote:
> Author: stefan2
> Date: Sat Jan  1 20:33:22 2011
> New Revision: 1054286
>
> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
> Log:
> XDelta calculation is major part of svn_txdelta_send_txstream.
> Therefore, speed up string matching code and the relatively
> expensive hash-code (adler32) calculations.
>
> * subversion/libsvn_delta/xdelta.c
>  (init_adler32): init adler checksum with 0 instead of 1;
>   use svn_adler32 for performance
>  (adler32_out): "-1" can / must be ommitted now that we
>   start at 0 instead of 1 for s1.
>  (adler32_replace): special, faster implementation replacing
>   a adler32_out(), adler32_in() sequence
>
>  (match_length): new utility function
>  (find_match): speed up the main loop by calling match_length()
>  (compute_delta): optimize adler32 calculations
>
> Modified:
>    subversion/trunk/subversion/libsvn_delta/xdelta.c

Hi Stefan,

This makes my (Windows 32-bit VCE 2008) build fail:
svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external
symbol _svn__adler32 referenced in function _init_adler32
..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal
error LNK1120: 1 unresolved externals

Johan

> Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
> +++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan  1 20:33:22 2011
> @@ -29,6 +29,8 @@
>
>  #include "svn_delta.h"
>  #include "delta.h"
> +
> +#include "private/svn_adler32.h"
>
>  /* This is pseudo-adler32. It is adler32 without the prime modulus.
>    The idea is borrowed from monotone, and is a translation of the C++
> @@ -39,6 +41,10 @@
>  #define ADLER32_MASK      0x0000ffff
>  #define ADLER32_CHAR_MASK 0x000000ff
>
> +/* Size of the blocks we compute checksums for. This was chosen out of
> +   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
> +#define MATCH_BLOCKSIZE 64
> +
>  /* Structure to store the state of our adler32 checksum.  */
>  struct adler32
>  {
> @@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch
>  {
>   ad->s1 -= ((apr_uint32_t) (c)) & ADLER32_CHAR_MASK;
>   ad->s1 &= ADLER32_MASK;
> -  ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK)) + 1;
> +  ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK));
>   ad->s2 &= ADLER32_MASK;
>   --ad->len;
>  }
>
> +/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
> + * This function may (and will) only be called for
> + * ad->len == MATCH_BLOCKSIZE.
> + */
> +static APR_INLINE void
> +adler32_replace(struct adler32 *ad, const char c_out, const char c_in)
> +{
> +  apr_uint32_t s1 = ad->s1;
> +  apr_uint32_t s2 = ad->s2;
> +
> +  s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out) & ADLER32_CHAR_MASK));
> +
> +  s1 -= ((apr_uint32_t) (c_out)) & ADLER32_CHAR_MASK;
> +  s1 += ((apr_uint32_t) (c_in)) & ADLER32_CHAR_MASK;
> +
> +  s2 += s1;
> +
> +  ad->s1 = s1 & ADLER32_MASK;
> +  ad->s2 = s2 & ADLER32_MASK;
> +}
> +
>  /* Return the current adler32 checksum in the adler32 structure.  */
>
>  static APR_INLINE apr_uint32_t
> @@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad)
>  static APR_INLINE struct adler32 *
>  init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen)
>  {
> -  ad->s1 = 1;
> -  ad->s2 = 0;
> -  ad->len = 0;
> -  while (datalen--)
> -    adler32_in(ad, *(data++));
> +  apr_uint32_t adler32 = svn__adler32(0, data, datalen);
> +
> +  ad->s1 = adler32 & ADLER32_MASK;
> +  ad->s2 = (adler32 >> 16) & ADLER32_MASK;
> +  ad->len = datalen;
> +
>   return ad;
>  }
>
> -/* Size of the blocks we compute checksums for. This was chosen out of
> -   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.  */
> -#define MATCH_BLOCKSIZE 64
> -
>  /* Information for a block of the delta source.  The length of the
>    block is the smaller of MATCH_BLOCKSIZE and the difference between
>    the size of the source data and the position of this block. */
> @@ -201,6 +225,35 @@ init_blocks_table(const char *data,
>     }
>  }
>
> +/* Return the lowest position at which A and B differ. If no difference
> + * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
> + */
> +static apr_size_t
> +match_length(const char *a, const char *b, apr_size_t max_len)
> +{
> +  apr_size_t pos = 0;
> +
> +#if SVN_UNALIGNED_ACCESS_IS_OK
> +
> +  /* Chunky operation is so much faster ...
> +   *
> +   * We can't make this work on architectures that require aligned access
> +   * because A and B will probably have different alignment. So, skipping
> +   * the first few chars until alignment is reached is not an option.
> +   */
> +  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
> +    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
> +      break;
> +
> +#endif
> +
> +  for (; pos < max_len; ++pos)
> +    if (a[pos] != b[pos])
> +      break;
> +
> +  return pos;
> +}
> +
>  /* Try to find a match for the target data B in BLOCKS, and then
>    extend the match as long as data in A and B at the match position
>    continues to match.  We set the position in a we ended up in (in
> @@ -229,6 +282,8 @@ find_match(const struct blocks *blocks,
>   apr_uint32_t sum = adler32_sum(rolling);
>   apr_size_t alen, badvance, apos;
>   apr_size_t tpos, tlen;
> +  apr_size_t delta, max_delta;
> +  const char *aptr, *bptr;
>
>   tpos = find_block(blocks, sum);
>
> @@ -246,14 +301,15 @@ find_match(const struct blocks *blocks,
>   apos = tpos;
>   alen = tlen;
>   badvance = tlen;
> +
>   /* Extend the match forward as far as possible */
> -  while ((apos + alen < asize)
> -         && (bpos + badvance < bsize)
> -         && (a[apos + alen] == b[bpos + badvance]))
> -    {
> -      ++alen;
> -      ++badvance;
> -    }
> +  max_delta = asize - apos - alen < bsize - bpos - badvance
> +            ? asize - apos - alen
> +            : bsize - bpos - badvance;
> +  delta = match_length(a + apos + alen, b + bpos + badvance, max_delta);
> +
> +  alen += delta;
> +  badvance += delta;
>
>   /* See if we can extend backwards into a previous insert hunk.  */
>   while (apos > 0
> @@ -329,7 +385,6 @@ compute_delta(svn_txdelta__ops_baton_t *
>       apr_size_t apos = 0;
>       apr_size_t alen = 1;
>       apr_size_t badvance = 1;
> -      apr_size_t next;
>       svn_boolean_t match;
>
>       match = find_match(&blocks, &rolling, a, asize, b, bsize, lo, &apos,
> @@ -354,14 +409,49 @@ compute_delta(svn_txdelta__ops_baton_t *
>           svn_txdelta__insert_op(build_baton, svn_txdelta_source,
>                                  apos, alen, NULL, pool);
>         }
> -      next = lo;
> -      for (; next < lo + badvance; ++next)
> +
> +      if (badvance == 1)
> +        {
> +          /* This seems to be the _vast_ majority case -- even if
> +           * you sum BADVANCE up, this case still accounts for 2/3
> +           * of all bytes being processed.
> +           */
> +          if (lo + MATCH_BLOCKSIZE < bsize)
> +            adler32_replace(&rolling, b[lo], b[lo + MATCH_BLOCKSIZE]);
> +          else
> +            adler32_out(&rolling, b[lo]);
> +
> +          lo++;
> +        }
> +      else if (badvance >= MATCH_BLOCKSIZE)
>         {
> -          adler32_out(&rolling, b[next]);
> -          if (next + MATCH_BLOCKSIZE < bsize)
> -            adler32_in(&rolling, b[next + MATCH_BLOCKSIZE]);
> +          /* BADVANCE is often large enough that we can calculate the
> +           * Adler32 sum directly instead of expensively updating the
> +           * existing values.
> +           */
> +          apr_size_t remaining_block = lo + MATCH_BLOCKSIZE < bsize
> +                                     ? MATCH_BLOCKSIZE
> +                                     : bsize - (lo + MATCH_BLOCKSIZE);
> +          init_adler32(&rolling,
> +                       b + lo + badvance - remaining_block,
> +                       remaining_block);
> +          lo += badvance;
> +        }
> +      else
> +        {
> +          /* The very rare 3rd case
> +           * (can only possibly happen close to end of the file).
> +           */
> +          apr_size_t next = lo;
> +
> +          for (; next < lo + badvance; ++next)
> +            if (next + MATCH_BLOCKSIZE < bsize)
> +              adler32_replace(&rolling, b[next], b[next + MATCH_BLOCKSIZE]);
> +            else
> +              adler32_out(&rolling, b[next]);
> +
> +          lo = next;
>         }
> -      lo = next;
>     }
>
>   /* If we still have an insert pending at the end, throw it in.  */
>
>
>



-- 
Johan