You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2014/01/20 17:23:15 UTC

svn commit: r1559767 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Author: stefan2
Date: Mon Jan 20 16:23:15 2014
New Revision: 1559767

URL: http://svn.apache.org/r1559767
Log:
Speed up txdelta for non-deltifyable sections.  This alone speeds up
the commit of a 1GB random data file form 28s to 14s.

The idea is to use a fixed-length bit array that tell us whether we
_might_ have a match for a given checksum.  In contrast to the iterative,
multi-level check in find_match, this pre-check is very fast and highly
predictable.

* subversion/libsvn_delta/xdelta.c
  (FLAGS_COUNT): Define a new array size constant.
  (blocks): Add the FLAGS array.
  (hash_flags): New, separate hash function for FLAGS.
  (add_block): Populate / update FLAGS as well.
  (init_blocks_table): Initialize FLAGS.
  (compute_delta): Add a tight loop skipping non-matching sections;
                   continue with normal lookup when there might be match.

Modified:
    subversion/trunk/subversion/libsvn_delta/xdelta.c

Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1559767&r1=1559766&r2=1559767&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
+++ subversion/trunk/subversion/libsvn_delta/xdelta.c Mon Jan 20 16:23:15 2014
@@ -45,6 +45,15 @@
  */
 #define MATCH_BLOCKSIZE 64
 
+/* Size of the checksum presense FLAGS array in BLOCKS_T.  With standard
+   MATCH_BLOCKSIZE and SVN_DELTA_WINDOW_SIZE, 32k entries is about 20x
+   the number of checksums that actually occur, i.e. we expect a >95%
+   probability that non-matching checksums get already detected by checking
+   against the FLAGS array.
+   Must be a power of 2.
+ */
+#define FLAGS_COUNT (32 * 1024)
+
 /* "no" / "invalid" / "unused" value for positions within the delta windows
  */
 #define NO_POSITION ((apr_uint32_t)-1)
@@ -119,8 +128,19 @@ struct blocks
      hte same width as the block position index, (struct
      block).pos. */
   apr_uint32_t max;
+
   /* Source buffer that the positions in SLOTS refer to. */
   const char* data;
+
+  /* Bit array indicating whether there may be a matching slot for a given
+     adler32 checksum.  Since FLAGS has much more entries than SLOTS, this
+     will indicate most cases of non-matching checksums with a "0" bit, i.e.
+     as "known not to have a match".
+     The mapping of adler32 checksum bits is [0..2][16..27] (LSB -> MSB),
+     i.e. address the byte by the multiplicative part of adler32 and address
+     the bits in that byte by the additive part of adler32. */
+  char flags[FLAGS_COUNT / 8];
+
   /* The vector of blocks.  A pos value of NO_POSITION represents an unused
      slot. */
   struct block *slots;
@@ -137,6 +157,15 @@ static apr_uint32_t hash_func(apr_uint32
   return sum ^ (sum >> 12);
 }
 
+/* Return the offset in BLOCKS.FLAGS for the adler32 SUM. */
+static apr_uint32_t hash_flags(apr_uint32_t sum)
+{
+  /* The upper half of SUM has a wider value range than the lower 16 bit.
+     Also, we want to a different folding than HASH_FUNC to minimize
+     correlation between different hash levels. */
+  return (sum >> 16) & ((FLAGS_COUNT / 8) - 1);
+}
+
 /* Insert a block with the checksum ADLERSUM at position POS in the source
    data into the table BLOCKS.  Ignore true duplicates, i.e. blocks with
    actually the same content. */
@@ -154,6 +183,7 @@ add_block(struct blocks *blocks, apr_uin
 
   blocks->slots[h].adlersum = adlersum;
   blocks->slots[h].pos = pos;
+  blocks->flags[hash_flags(adlersum)] |= 1 << (adlersum & 7);
 }
 
 /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
@@ -218,6 +248,9 @@ init_blocks_table(const char *data,
       blocks->slots[i].pos = NO_POSITION;
     }
 
+  /* No checksum entries in SLOTS, yet => reset all checksum flags. */
+  memset(blocks->flags, 0, sizeof(blocks->flags));
+
   /* If there is an odd block at the end of the buffer, we will
      not use that shorter block for deltification (only indirectly
      as an extension of some previous block). */
@@ -345,7 +378,7 @@ compute_delta(svn_txdelta__ops_baton_t *
 {
   struct blocks blocks;
   apr_uint32_t rolling;
-  apr_size_t lo = 0, pending_insert_start = 0;
+  apr_size_t lo = 0, pending_insert_start = 0, upper;
 
   /* Optimization: directly compare window starts. If more than 4
    * bytes match, we can immediately create a matching windows.
@@ -368,6 +401,8 @@ compute_delta(svn_txdelta__ops_baton_t *
       return;
     }
 
+  upper = bsize - MATCH_BLOCKSIZE; /* this is now known to be >= LO */
+
   /* Initialize the matches table.  */
   init_blocks_table(a, asize, &blocks, pool);
 
@@ -375,12 +410,23 @@ compute_delta(svn_txdelta__ops_baton_t *
   rolling = init_adler32(b + lo);
   while (lo < bsize)
     {
-      apr_size_t matchlen = 0;
+      apr_size_t matchlen;
       apr_size_t apos;
 
-      if (lo + MATCH_BLOCKSIZE <= bsize)
-        matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
-                              &lo, &apos, pending_insert_start);
+      /* Quickly skip positions whose respective ROLLING checksums
+         definitely do not match any SLOT in BLOCKS. */
+      while (!(blocks.flags[hash_flags(rolling)] & (1 << (rolling & 7)))
+             && lo < upper)
+        {
+          rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
+          lo++;
+        }
+
+      /* LO is still <= UPPER, i.e. the following lookup is legal:
+         Closely check whether we've got a match for the current location.
+         Due to the above pre-filter, chances are that we find one. */
+      matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
+                            &lo, &apos, pending_insert_start);
 
       /* If we didn't find a real match, insert the byte at the target
          position into the pending insert.  */



Re: svn commit: r1559767 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Stefan Fuhrmann <st...@wandisco.com>.
On Wed, Feb 19, 2014 at 2:34 AM, Philip Martin
<ph...@wandisco.com>wrote:

> Philip Martin <ph...@wandisco.com> writes:
>
> > $ valgrind -q .libs/lt-random-test 2
> > ==19698== Conditional jump or move depends on uninitialised value(s)
> > ==19698==    at 0x402EAA3: bcmp (mc_replace_strmem.c:935)
> > ==19698==    by 0x4052114: find_block (xdelta.c:201)
> > ==19698==    by 0x4052314: find_match (xdelta.c:284)
> > ==19698==    by 0x40527A3: compute_delta (xdelta.c:428)
> > ==19698==    by 0x4052A4D: svn_txdelta__xdelta (xdelta.c:494)
> > ==19698==    by 0x404FF49: compute_window (text_delta.c:160)
> > ==19698==    by 0x4050718: txdelta_next_window (text_delta.c:392)
> > ==19698==    by 0x40504D7: svn_txdelta_next_window (text_delta.c:346)
> > ==19698==    by 0x402794: do_random_combine_test (random-test.c:446)
> > ==19698==    by 0x402A4B: random_combine_test (random-test.c:499)
> > ==19698==    by 0x403BDBB: do_test_num (svn_test_main.c:396)
> > ==19698==    by 0x403CECD: main (svn_test_main.c:866)
>
> The following patch causes the test to abort showing that the
> pre-condition mentioned in the comment is not met:
>
> Index: sw/subversion/src2/subversion/libsvn_delta/xdelta.c
> ===================================================================
> --- sw/subversion/src2/subversion/libsvn_delta/xdelta.c (revision 1569558)
> +++ sw/subversion/src2/subversion/libsvn_delta/xdelta.c (working copy)
> @@ -412,6 +412,8 @@ compute_delta(svn_txdelta__ops_baton_t *build_bato
>      {
>        apr_size_t matchlen;
>        apr_size_t apos;
> +      apr_size_t lo_orig = lo;
> +      apr_uint32_t rolling_orig = rolling;
>
>        /* Quickly skip positions whose respective ROLLING checksums
>           definitely do not match any SLOT in BLOCKS. */
> @@ -425,6 +427,7 @@ compute_delta(svn_txdelta__ops_baton_t *build_bato
>        /* LO is still <= UPPER, i.e. the following lookup is legal:
>           Closely check whether we've got a match for the current location.
>           Due to the above pre-filter, chances are that we find one. */
> +      SVN_ERR_ASSERT_NO_RETURN(lo <= upper);
>        matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
>                              &lo, &apos, pending_insert_start);
>
> With seed=4237012947 and catching the abort in gdb I see
>
> (gdb) p lo
> $1 = 7666
> (gdb) p upper
> $2 = 7610
> (gdb) p lo_orig
> $3 = 7666
>
> The previous iteration, when lo_orig=7591, calls find_match with lo=7592
> and that sets lo=7587 and matchlen=79.  Then lo+=matchlen causes lo>upper.
>

Thanks to your analysis, Philip, this has been easy
to fix: r1570141.

-- Stefan^2.

Re: svn commit: r1559767 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Philip Martin <ph...@wandisco.com>.
Philip Martin <ph...@wandisco.com> writes:

> $ valgrind -q .libs/lt-random-test 2
> ==19698== Conditional jump or move depends on uninitialised value(s)
> ==19698==    at 0x402EAA3: bcmp (mc_replace_strmem.c:935)
> ==19698==    by 0x4052114: find_block (xdelta.c:201)
> ==19698==    by 0x4052314: find_match (xdelta.c:284)
> ==19698==    by 0x40527A3: compute_delta (xdelta.c:428)
> ==19698==    by 0x4052A4D: svn_txdelta__xdelta (xdelta.c:494)
> ==19698==    by 0x404FF49: compute_window (text_delta.c:160)
> ==19698==    by 0x4050718: txdelta_next_window (text_delta.c:392)
> ==19698==    by 0x40504D7: svn_txdelta_next_window (text_delta.c:346)
> ==19698==    by 0x402794: do_random_combine_test (random-test.c:446)
> ==19698==    by 0x402A4B: random_combine_test (random-test.c:499)
> ==19698==    by 0x403BDBB: do_test_num (svn_test_main.c:396)
> ==19698==    by 0x403CECD: main (svn_test_main.c:866)

The following patch causes the test to abort showing that the
pre-condition mentioned in the comment is not met:

Index: sw/subversion/src2/subversion/libsvn_delta/xdelta.c
===================================================================
--- sw/subversion/src2/subversion/libsvn_delta/xdelta.c	(revision 1569558)
+++ sw/subversion/src2/subversion/libsvn_delta/xdelta.c	(working copy)
@@ -412,6 +412,8 @@ compute_delta(svn_txdelta__ops_baton_t *build_bato
     {
       apr_size_t matchlen;
       apr_size_t apos;
+      apr_size_t lo_orig = lo;
+      apr_uint32_t rolling_orig = rolling;
 
       /* Quickly skip positions whose respective ROLLING checksums
          definitely do not match any SLOT in BLOCKS. */
@@ -425,6 +427,7 @@ compute_delta(svn_txdelta__ops_baton_t *build_bato
       /* LO is still <= UPPER, i.e. the following lookup is legal:
          Closely check whether we've got a match for the current location.
          Due to the above pre-filter, chances are that we find one. */
+      SVN_ERR_ASSERT_NO_RETURN(lo <= upper);
       matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
                             &lo, &apos, pending_insert_start);
 
With seed=4237012947 and catching the abort in gdb I see

(gdb) p lo
$1 = 7666
(gdb) p upper
$2 = 7610
(gdb) p lo_orig
$3 = 7666

The previous iteration, when lo_orig=7591, calls find_match with lo=7592
and that sets lo=7587 and matchlen=79.  Then lo+=matchlen causes lo>upper.

-- 
Philip Martin | Subversion Committer
WANdisco // *Non-Stop Data*

Re: svn commit: r1559767 - /subversion/trunk/subversion/libsvn_delta/xdelta.c

Posted by Philip Martin <ph...@wandisco.com>.
stefan2@apache.org writes:

> Author: stefan2
> Date: Mon Jan 20 16:23:15 2014
> New Revision: 1559767
>
> URL: http://svn.apache.org/r1559767
> Log:
> Speed up txdelta for non-deltifyable sections.  This alone speeds up
> the commit of a 1GB random data file form 28s to 14s.
>
> The idea is to use a fixed-length bit array that tell us whether we
> _might_ have a match for a given checksum.  In contrast to the iterative,
> multi-level check in find_match, this pre-check is very fast and highly
> predictable.
>
> * subversion/libsvn_delta/xdelta.c
>   (FLAGS_COUNT): Define a new array size constant.
>   (blocks): Add the FLAGS array.
>   (hash_flags): New, separate hash function for FLAGS.
>   (add_block): Populate / update FLAGS as well.
>   (init_blocks_table): Initialize FLAGS.
>   (compute_delta): Add a tight loop skipping non-matching sections;
>                    continue with normal lookup when there might be match.

This is triggering valgrind warnings in random-test:

$ valgrind -q .libs/lt-random-test 2
==19698== Conditional jump or move depends on uninitialised value(s)
==19698==    at 0x402EAA3: bcmp (mc_replace_strmem.c:935)
==19698==    by 0x4052114: find_block (xdelta.c:201)
==19698==    by 0x4052314: find_match (xdelta.c:284)
==19698==    by 0x40527A3: compute_delta (xdelta.c:428)
==19698==    by 0x4052A4D: svn_txdelta__xdelta (xdelta.c:494)
==19698==    by 0x404FF49: compute_window (text_delta.c:160)
==19698==    by 0x4050718: txdelta_next_window (text_delta.c:392)
==19698==    by 0x40504D7: svn_txdelta_next_window (text_delta.c:346)
==19698==    by 0x402794: do_random_combine_test (random-test.c:446)
==19698==    by 0x402A4B: random_combine_test (random-test.c:499)
==19698==    by 0x403BDBB: do_test_num (svn_test_main.c:396)
==19698==    by 0x403CECD: main (svn_test_main.c:866)
==19698== 
==19698== Conditional jump or move depends on uninitialised value(s)
==19698==    at 0x4052117: find_block (xdelta.c:201)
==19698==    by 0x4052314: find_match (xdelta.c:284)
==19698==    by 0x40527A3: compute_delta (xdelta.c:428)
==19698==    by 0x4052A4D: svn_txdelta__xdelta (xdelta.c:494)
==19698==    by 0x404FF49: compute_window (text_delta.c:160)
==19698==    by 0x4050718: txdelta_next_window (text_delta.c:392)
==19698==    by 0x40504D7: svn_txdelta_next_window (text_delta.c:346)
==19698==    by 0x402794: do_random_combine_test (random-test.c:446)
==19698==    by 0x402A4B: random_combine_test (random-test.c:499)
==19698==    by 0x403BDBB: do_test_num (svn_test_main.c:396)
==19698==    by 0x403CECD: main (svn_test_main.c:866)
==19698== 
PASS:  lt-random-test 2: random combine delta test

-- 
Philip Martin | Subversion Committer
WANdisco // *Non-Stop Data*