You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by jc...@apache.org on 2011/02/07 00:04:05 UTC

svn commit: r1067800 - in /subversion/trunk: ./ subversion/include/ subversion/include/private/ subversion/libsvn_diff/ subversion/libsvn_subr/

Author: jcorvel
Date: Sun Feb  6 23:04:04 2011
New Revision: 1067800

URL: http://svn.apache.org/viewvc?rev=1067800&view=rev
Log:
Reintegrate diff-optimizations-bytes branch with trunk.

Modified:
    subversion/trunk/   (props changed)
    subversion/trunk/subversion/include/private/svn_adler32.h   (props changed)
    subversion/trunk/subversion/include/svn_diff.h
    subversion/trunk/subversion/libsvn_diff/deprecated.c
    subversion/trunk/subversion/libsvn_diff/diff.c
    subversion/trunk/subversion/libsvn_diff/diff.h
    subversion/trunk/subversion/libsvn_diff/diff3.c
    subversion/trunk/subversion/libsvn_diff/diff4.c
    subversion/trunk/subversion/libsvn_diff/diff_file.c
    subversion/trunk/subversion/libsvn_diff/diff_memory.c
    subversion/trunk/subversion/libsvn_diff/lcs.c
    subversion/trunk/subversion/libsvn_diff/token.c
    subversion/trunk/subversion/libsvn_subr/adler32.c   (props changed)

Propchange: subversion/trunk/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb  6 23:04:04 2011
@@ -2,6 +2,8 @@
 /subversion/branches/atomic-revprop:965046-1000689
 /subversion/branches/bdb-reverse-deltas:872050-872529
 /subversion/branches/diff-callbacks3:870059-870761
+/subversion/branches/diff-optimizations:1031270-1037352
+/subversion/branches/diff-optimizations-bytes:1037353-1067789
 /subversion/branches/dont-save-plaintext-passwords-by-default:870728-871118
 /subversion/branches/double-delete:870511-872970
 /subversion/branches/explore-wc:875486,875493,875497,875507,875511,875514,875559,875580-875581,875584,875587,875611,875627,875647,875667-875668,875711-875712,875733-875734,875736,875744-875748,875751,875758,875782,875795-875796,875830,875836,875838,875842,875852,875855,875864,875870,875873,875880,875885-875888,875890,875897-875898,875905,875907-875909,875935,875943-875944,875946,875979,875982-875983,875985-875986,875990,875997

Propchange: subversion/trunk/subversion/include/private/svn_adler32.h
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb  6 23:04:04 2011
@@ -2,6 +2,8 @@
 /subversion/branches/atomic-revprop/subversion/libsvn_diff/diff.h:965046-1000689
 /subversion/branches/bdb-reverse-deltas/subversion/libsvn_diff/diff.h:872050-872529
 /subversion/branches/diff-callbacks3/subversion/libsvn_diff/diff.h:870059-870761
+/subversion/branches/diff-optimizations/subversion/include/private/svn_adler32.h:1031270-1037352
+/subversion/branches/diff-optimizations-bytes/subversion/include/private/svn_adler32.h:1037353-1067789
 /subversion/branches/dont-save-plaintext-passwords-by-default/subversion/libsvn_diff/diff.h:870728-871118
 /subversion/branches/double-delete/subversion/libsvn_diff/diff.h:870511-872970
 /subversion/branches/explore-wc/subversion/libsvn_diff/diff.h:875486,875493,875497,875507,875511,875514,875559,875580-875581,875584,875587,875611,875627,875647,875667-875668,875711-875712,875733-875734,875736,875744-875748,875751,875758,875782,875795-875796,875830,875836,875838,875842,875852,875855,875864,875870,875873,875880,875885-875888,875890,875897-875898,875905,875907-875909,875935,875943-875944,875946,875979,875982-875983,875985-875986,875990,875997

Modified: subversion/trunk/subversion/include/svn_diff.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_diff.h?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_diff.h (original)
+++ subversion/trunk/subversion/include/svn_diff.h Sun Feb  6 23:04:04 2011
@@ -106,6 +106,49 @@ typedef enum svn_diff_datasource_e
 
 
 /** A vtable for reading data from the three datasources. */
+typedef struct svn_diff_fns2_t
+{
+  /** Open the datasources of type @a datasources. */
+  svn_error_t *(*datasources_open)(void *diff_baton, apr_off_t *prefix_lines,
+                                   svn_diff_datasource_e datasource[],
+                                   apr_size_t datasource_len);
+
+  /** Close the datasource of type @a datasource. */
+  svn_error_t *(*datasource_close)(void *diff_baton,
+                                   svn_diff_datasource_e datasource);
+
+  /** Get the next "token" from the datasource of type @a datasource.
+   *  Return a "token" in @a *token.   Return a hash of "token" in @a *hash.
+   *  Leave @a token and @a hash untouched when the datasource is exhausted.
+   */
+  svn_error_t *(*datasource_get_next_token)(apr_uint32_t *hash, void **token,
+                                            void *diff_baton,
+                                            svn_diff_datasource_e datasource);
+
+  /** A function for ordering the tokens, resembling 'strcmp' in functionality.
+   * @a compare should contain the return value of the comparison:
+   * If @a ltoken and @a rtoken are "equal", return 0.  If @a ltoken is
+   * "less than" @a rtoken, return a number < 0.  If @a ltoken  is
+   * "greater than" @a rtoken, return a number > 0.
+   */
+  svn_error_t *(*token_compare)(void *diff_baton,
+                                void *ltoken,
+                                void *rtoken,
+                                int *compare);
+
+  /** Free @a token from memory, the diff algorithm is done with it. */
+  void (*token_discard)(void *diff_baton,
+                        void *token);
+
+  /** Free *all* tokens from memory, they're no longer needed. */
+  void (*token_discard_all)(void *diff_baton);
+} svn_diff_fns2_t;
+
+
+/** A vtable for reading data from the three datasources.
+ *
+ * @deprecated Provided for backward compatibility with the 1.6 API.
+ */
 typedef struct svn_diff_fns_t
 {
   /** Open the datasource of type @a datasource. */
@@ -149,8 +192,23 @@ typedef struct svn_diff_fns_t
 /** Given a vtable of @a diff_fns/@a diff_baton for reading datasources,
  * return a diff object in @a *diff that represents a difference between
  * an "original" and "modified" datasource.  Do all allocation in @a pool.
+ *
+ * @since New in 1.7.
  */
 svn_error_t *
+svn_diff_diff_2(svn_diff_t **diff,
+                void *diff_baton,
+                const svn_diff_fns2_t *diff_fns,
+                apr_pool_t *pool);
+
+/** Given a vtable of @a diff_fns/@a diff_baton for reading datasources,
+ * return a diff object in @a *diff that represents a difference between
+ * an "original" and "modified" datasource.  Do all allocation in @a pool.
+ *
+ * @deprecated Provided for backward compatibility with the 1.6 API.
+ */
+SVN_DEPRECATED
+svn_error_t *
 svn_diff_diff(svn_diff_t **diff,
               void *diff_baton,
               const svn_diff_fns_t *diff_fns,
@@ -160,7 +218,23 @@ svn_diff_diff(svn_diff_t **diff,
  * return a diff object in @a *diff that represents a difference between
  * three datasources: "original", "modified", and "latest".  Do all
  * allocation in @a pool.
+ *
+ * @since New in 1.7.
+ */
+svn_error_t *
+svn_diff_diff3_2(svn_diff_t **diff,
+                 void *diff_baton,
+                 const svn_diff_fns2_t *diff_fns,
+                 apr_pool_t *pool);
+
+/** Given a vtable of @a diff_fns/@a diff_baton for reading datasources,
+ * return a diff object in @a *diff that represents a difference between
+ * three datasources: "original", "modified", and "latest".  Do all
+ * allocation in @a pool.
+ *
+ * @deprecated Provided for backward compatibility with the 1.6 API.
  */
+SVN_DEPRECATED
 svn_error_t *
 svn_diff_diff3(svn_diff_t **diff,
                void *diff_baton,
@@ -172,8 +246,25 @@ svn_diff_diff3(svn_diff_t **diff,
  * two datasources: "original" and "latest", adjusted to become a full
  * difference between "original", "modified" and "latest" using "ancestor".
  * Do all allocation in @a pool.
+ *
+ * @since New in 1.7.
  */
 svn_error_t *
+svn_diff_diff4_2(svn_diff_t **diff,
+                 void *diff_baton,
+                 const svn_diff_fns2_t *diff_fns,
+                 apr_pool_t *pool);
+
+/** Given a vtable of @a diff_fns/@a diff_baton for reading datasources,
+ * return a diff object in @a *diff that represents a difference between
+ * two datasources: "original" and "latest", adjusted to become a full
+ * difference between "original", "modified" and "latest" using "ancestor".
+ * Do all allocation in @a pool.
+ *
+ * @deprecated Provided for backward compatibility with the 1.6 API.
+ */
+SVN_DEPRECATED
+svn_error_t *
 svn_diff_diff4(svn_diff_t **diff,
                void *diff_baton,
                const svn_diff_fns_t *diff_fns,

Modified: subversion/trunk/subversion/libsvn_diff/deprecated.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/deprecated.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/deprecated.c (original)
+++ subversion/trunk/subversion/libsvn_diff/deprecated.c Sun Feb  6 23:04:04 2011
@@ -41,6 +41,103 @@
 
 
 /*** Code. ***/
+struct fns_wrapper_baton
+{
+  /* We put the old baton in front of this one, so that we can still use
+     this baton in place of the old.  This prevents us from having to 
+     implement simple wrappers around each member of diff_fns_t. */
+  void *old_baton;
+  const svn_diff_fns_t *vtable;
+};
+
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasources[],
+                 apr_size_t datasource_len)
+{
+  struct fns_wrapper_baton *fwb = baton;
+  int i;
+
+  /* Just iterate over the datasources, using the old singular version. */
+  for (i = 0; i < datasource_len; i++)
+    {
+      SVN_ERR(fwb->vtable->datasource_open(fwb->old_baton, datasources[i]));
+    }
+
+  /* Don't claim any prefix matches. */
+  *prefix_lines = 0;
+
+  return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+datasource_close(void *baton,
+                 svn_diff_datasource_e datasource)
+{
+  struct fns_wrapper_baton *fwb = baton;
+  return fwb->vtable->datasource_close(fwb->old_baton, datasource);
+}
+
+static svn_error_t *
+datasource_get_next_token(apr_uint32_t *hash,
+                          void **token,
+                          void *baton,
+                          svn_diff_datasource_e datasource)
+{
+  struct fns_wrapper_baton *fwb = baton;
+  return fwb->vtable->datasource_get_next_token(hash, token, fwb->old_baton,
+                                                datasource);
+}
+
+static svn_error_t *
+token_compare(void *baton,
+              void *ltoken,
+              void *rtoken,
+              int *compare)
+{
+  struct fns_wrapper_baton *fwb = baton;
+  return fwb->vtable->token_compare(fwb->old_baton, ltoken, rtoken, compare);
+}
+
+static void
+token_discard(void *baton,
+              void *token)
+{
+  struct fns_wrapper_baton *fwb = baton;
+  return fwb->vtable->token_discard(fwb->old_baton, token);
+}
+
+static void
+token_discard_all(void *baton)
+{
+  struct fns_wrapper_baton *fwb = baton;
+  return fwb->vtable->token_discard_all(fwb->old_baton);
+}
+
+
+static void
+wrap_diff_fns(svn_diff_fns2_t **diff_fns2,
+              struct fns_wrapper_baton **baton2,
+              const svn_diff_fns_t *diff_fns,
+              void *baton,
+              apr_pool_t *result_pool)
+{
+  /* Initialize the return vtable. */
+  *diff_fns2 = apr_palloc(result_pool, sizeof(**diff_fns2));
+
+  (*diff_fns2)->datasources_open = datasources_open;
+  (*diff_fns2)->datasource_close = datasource_close;
+  (*diff_fns2)->datasource_get_next_token = datasource_get_next_token;
+  (*diff_fns2)->token_compare = token_compare;
+  (*diff_fns2)->token_discard = token_discard;
+  (*diff_fns2)->token_discard_all = token_discard_all;
+
+  /* Initialize the wrapper baton. */
+  *baton2 = apr_palloc(result_pool, sizeof (**baton2));
+  (*baton2)->old_baton = baton;
+  (*baton2)->vtable = diff_fns;
+}
+
 
 /*** From diff_file.c ***/
 svn_error_t *
@@ -142,3 +239,48 @@ svn_diff_file_output_merge(svn_stream_t 
                                      style,
                                      pool);
 }
+
+
+/*** From diff.c ***/
+svn_error_t *
+svn_diff_diff(svn_diff_t **diff,
+              void *diff_baton,
+              const svn_diff_fns_t *vtable,
+              apr_pool_t *pool)
+{
+  svn_diff_fns2_t *diff_fns2;
+  struct fns_wrapper_baton *fwb;
+
+  wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool);
+  return svn_diff_diff_2(diff, fwb, diff_fns2, pool);
+}
+
+
+/*** From diff3.c ***/
+svn_error_t *
+svn_diff_diff3(svn_diff_t **diff,
+               void *diff_baton,
+               const svn_diff_fns_t *vtable,
+               apr_pool_t *pool)
+{
+  svn_diff_fns2_t *diff_fns2;
+  struct fns_wrapper_baton *fwb;
+
+  wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool);
+  return svn_diff_diff3_2(diff, fwb, diff_fns2, pool);
+}
+
+
+/*** From diff4.c ***/
+svn_error_t *
+svn_diff_diff4(svn_diff_t **diff,
+               void *diff_baton,
+               const svn_diff_fns_t *vtable,
+               apr_pool_t *pool)
+{
+  svn_diff_fns2_t *diff_fns2;
+  struct fns_wrapper_baton *fwb;
+
+  wrap_diff_fns(&diff_fns2, &fwb, vtable, diff_baton, pool);
+  return svn_diff_diff4_2(diff, fwb, diff_fns2, pool);
+}

Modified: subversion/trunk/subversion/libsvn_diff/diff.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff.c (original)
+++ subversion/trunk/subversion/libsvn_diff/diff.c Sun Feb  6 23:04:04 2011
@@ -98,16 +98,19 @@ svn_diff__diff(svn_diff__lcs_t *lcs,
 
 
 svn_error_t *
-svn_diff_diff(svn_diff_t **diff,
-              void *diff_baton,
-              const svn_diff_fns_t *vtable,
-              apr_pool_t *pool)
+svn_diff_diff_2(svn_diff_t **diff,
+                void *diff_baton,
+                const svn_diff_fns2_t *vtable,
+                apr_pool_t *pool)
 {
   svn_diff__tree_t *tree;
   svn_diff__position_t *position_list[2];
+  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+                                        svn_diff_datasource_modified};
   svn_diff__lcs_t *lcs;
   apr_pool_t *subpool;
   apr_pool_t *treepool;
+  apr_off_t prefix_lines = 0;
 
   *diff = NULL;
 
@@ -116,17 +119,21 @@ svn_diff_diff(svn_diff_t **diff,
 
   svn_diff__tree_create(&tree, treepool);
 
+  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, datasource, 2));
+
   /* Insert the data into the tree */
   SVN_ERR(svn_diff__get_tokens(&position_list[0],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               prefix_lines,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               prefix_lines,
                                subpool));
 
   /* The cool part is that we don't need the tokens anymore.
@@ -139,7 +146,7 @@ svn_diff_diff(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs */
-  lcs = svn_diff__lcs(position_list[0], position_list[1], subpool);
+  lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, subpool);
 
   /* Produce the diff */
   *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool);

Modified: subversion/trunk/subversion/libsvn_diff/diff.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff.h?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff.h (original)
+++ subversion/trunk/subversion/libsvn_diff/diff.h Sun Feb  6 23:04:04 2011
@@ -91,6 +91,7 @@ typedef enum svn_diff__normalize_state_t
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
               svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+              apr_off_t prefix_lines,
               apr_pool_t *pool);
 
 
@@ -109,8 +110,9 @@ svn_error_t *
 svn_diff__get_tokens(svn_diff__position_t **position_list,
                      svn_diff__tree_t *tree,
                      void *diff_baton,
-                     const svn_diff_fns_t *vtable,
+                     const svn_diff_fns2_t *vtable,
                      svn_diff_datasource_e datasource,
+                     apr_off_t prefix_lines,
                      apr_pool_t *pool);
 
 

Modified: subversion/trunk/subversion/libsvn_diff/diff3.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff3.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff3.c (original)
+++ subversion/trunk/subversion/libsvn_diff/diff3.c Sun Feb  6 23:04:04 2011
@@ -173,7 +173,7 @@ svn_diff__resolve_conflict(svn_diff_t *h
         position[1]->next = start_position[1];
       }
 
-    *lcs_ref = svn_diff__lcs(position[0], position[1],
+    *lcs_ref = svn_diff__lcs(position[0], position[1], 0,
                              subpool);
 
     /* Fix up the EOF lcs element in case one of
@@ -244,17 +244,21 @@ svn_diff__resolve_conflict(svn_diff_t *h
 
 
 svn_error_t *
-svn_diff_diff3(svn_diff_t **diff,
-               void *diff_baton,
-               const svn_diff_fns_t *vtable,
-               apr_pool_t *pool)
+svn_diff_diff3_2(svn_diff_t **diff,
+                 void *diff_baton,
+                 const svn_diff_fns2_t *vtable,
+                 apr_pool_t *pool)
 {
   svn_diff__tree_t *tree;
   svn_diff__position_t *position_list[3];
+  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+                                        svn_diff_datasource_modified,
+                                        svn_diff_datasource_latest};
   svn_diff__lcs_t *lcs_om;
   svn_diff__lcs_t *lcs_ol;
   apr_pool_t *subpool;
   apr_pool_t *treepool;
+  apr_off_t prefix_lines = 0;
 
   *diff = NULL;
 
@@ -263,22 +267,27 @@ svn_diff_diff3(svn_diff_t **diff,
 
   svn_diff__tree_create(&tree, treepool);
 
+  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, datasource, 3));
+
   SVN_ERR(svn_diff__get_tokens(&position_list[0],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               prefix_lines,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               prefix_lines,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               prefix_lines,
                                subpool));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -289,9 +298,9 @@ svn_diff_diff3(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs for original-modified and original-latest */
-  lcs_om = svn_diff__lcs(position_list[0], position_list[1],
+  lcs_om = svn_diff__lcs(position_list[0], position_list[1], prefix_lines,
                          subpool);
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], prefix_lines,
                          subpool);
 
   /* Produce a merged diff */
@@ -324,7 +333,7 @@ svn_diff_diff3(svn_diff_t **diff,
       }
     else
       {
-        sentinel_position[0].offset = 1;
+        sentinel_position[0].offset = prefix_lines + 1;
         sentinel_position[0].next = NULL;
         position_list[1] = &sentinel_position[0];
       }
@@ -338,7 +347,7 @@ svn_diff_diff3(svn_diff_t **diff,
       }
     else
       {
-        sentinel_position[1].offset = 1;
+        sentinel_position[1].offset = prefix_lines + 1;
         sentinel_position[1].next = NULL;
         position_list[2] = &sentinel_position[1];
       }

Modified: subversion/trunk/subversion/libsvn_diff/diff4.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff4.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff4.c (original)
+++ subversion/trunk/subversion/libsvn_diff/diff4.c Sun Feb  6 23:04:04 2011
@@ -166,13 +166,17 @@ adjust_diff(svn_diff_t *diff, svn_diff_t
 }
 
 svn_error_t *
-svn_diff_diff4(svn_diff_t **diff,
-               void *diff_baton,
-               const svn_diff_fns_t *vtable,
-               apr_pool_t *pool)
+svn_diff_diff4_2(svn_diff_t **diff,
+                 void *diff_baton,
+                 const svn_diff_fns2_t *vtable,
+                 apr_pool_t *pool)
 {
   svn_diff__tree_t *tree;
   svn_diff__position_t *position_list[4];
+  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+                                        svn_diff_datasource_modified,
+                                        svn_diff_datasource_latest,
+                                        svn_diff_datasource_ancestor};
   svn_diff__lcs_t *lcs_ol;
   svn_diff__lcs_t *lcs_adjust;
   svn_diff_t *diff_ol;
@@ -181,6 +185,7 @@ svn_diff_diff4(svn_diff_t **diff,
   apr_pool_t *subpool;
   apr_pool_t *subpool2;
   apr_pool_t *subpool3;
+  apr_off_t prefix_lines = 0;
 
   *diff = NULL;
 
@@ -190,28 +195,34 @@ svn_diff_diff4(svn_diff_t **diff,
 
   svn_diff__tree_create(&tree, subpool3);
 
+  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, datasource, 4));
+
   SVN_ERR(svn_diff__get_tokens(&position_list[0],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               prefix_lines,
                                subpool2));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               prefix_lines,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               prefix_lines,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[3],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_ancestor,
+                               prefix_lines,
                                subpool2));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -222,7 +233,8 @@ svn_diff_diff4(svn_diff_t **diff,
   svn_pool_clear(subpool3);
 
   /* Get the lcs for original - latest */
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], subpool3);
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], prefix_lines,
+                         subpool3);
   diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
 
   svn_pool_clear(subpool3);
@@ -243,7 +255,8 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for common ancestor - original
    * Do reverse adjustements
    */
-  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], prefix_lines,
+                             subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 
@@ -252,7 +265,8 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for modified - common ancestor
    * Do forward adjustments
    */
-  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], prefix_lines,
+                             subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 

Modified: subversion/trunk/subversion/libsvn_diff/diff_file.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff_file.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff_file.c (original)
+++ subversion/trunk/subversion/libsvn_diff/diff_file.c Sun Feb  6 23:04:04 2011
@@ -83,6 +83,10 @@ typedef struct svn_diff__file_baton_t
     char *endp;    /* next memory address after the current chunk */
 
     svn_diff__normalize_state_t normalize_state;
+
+    /* Where the identical suffix starts in this datasource */
+    int suffix_start_chunk;
+    apr_off_t suffix_offset_in_chunk;
   } files[4];
 
   /* List of free tokens that may be reused. */
@@ -204,47 +208,541 @@ map_or_read_file(apr_file_t **file,
 }
 
 
-/* Let FILE stand for the file_info struct element of BATON->files that is
- * indexed by DATASOURCE.  BATON's type is (svn_diff__file_baton_t *).
+/* For all files in the FILE array, increment the curp pointer.  If a file
+ * points before the beginning of file, let it point at the first byte again.
+ * If the end of the current chunk is reached, read the next chunk in the
+ * buffer and point curp to the start of the chunk.  If EOF is reached, set
+ * curp equal to endp to indicate EOF. */
+#define INCREMENT_POINTERS(all_files, files_len, pool)                       \
+  do {                                                                       \
+    apr_size_t svn_macro__i;                                                 \
+                                                                             \
+    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
+    {                                                                        \
+      if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
+        (all_files)[svn_macro__i].curp++;                                    \
+      else                                                                   \
+        SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
+    }                                                                        \
+  } while (0)
+
+
+/* For all files in the FILE array, decrement the curp pointer.  If the
+ * start of a chunk is reached, read the previous chunk in the buffer and
+ * point curp to the last byte of the chunk.  If the beginning of a FILE is
+ * reached, set chunk to -1 to indicate BOF. */
+#define DECREMENT_POINTERS(all_files, files_len, pool)                       \
+  do {                                                                       \
+    apr_size_t svn_macro__i;                                                 \
+                                                                             \
+    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
+    {                                                                        \
+      if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
+        (all_files)[svn_macro__i].curp--;                                    \
+      else                                                                   \
+        SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
+    }                                                                        \
+  } while (0)
+
+
+static svn_error_t *
+increment_chunk(struct file_info *file, apr_pool_t *pool)
+{
+  apr_off_t length;
+  apr_off_t last_chunk = offset_to_chunk(file->size);
+
+  if (file->chunk == -1)
+    {
+      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
+      file->chunk = 0;
+      file->curp = file->buffer;
+    }
+  else if (file->chunk == last_chunk)
+    {
+      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
+      file->curp = file->endp;
+    }
+  else
+    {
+      /* There are still chunks left. Read next chunk and reset pointers. */
+      file->chunk++;
+      length = file->chunk == last_chunk ? 
+        offset_in_chunk(file->size) : CHUNK_SIZE;
+      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+                         length, chunk_to_offset(file->chunk),
+                         pool));
+      file->endp = file->buffer + length;
+      file->curp = file->buffer;
+    }
+  
+  return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+decrement_chunk(struct file_info *file, apr_pool_t *pool)
+{
+  if (file->chunk == 0)
+    {
+      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
+         by setting chunk = -1 and curp = endp - 1. Both conditions are
+         important. They help the increment step to catch the BOF situation
+         in an efficient way. */
+      file->chunk--; 
+      file->curp = file->endp - 1;
+    }
+  else
+    {
+      /* Read previous chunk and reset pointers. */
+      file->chunk--;
+      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+                         CHUNK_SIZE, chunk_to_offset(file->chunk),
+                         pool));
+      file->endp = file->buffer + CHUNK_SIZE;
+      file->curp = file->endp - 1;
+    }
+  
+  return SVN_NO_ERROR;
+}
+
+
+/* Check whether one of the FILEs has its pointers 'before' the beginning of
+ * the file (this can happen while scanning backwards). This is the case if
+ * one of them has chunk == -1. */
+static svn_boolean_t
+is_one_at_bof(struct file_info file[], apr_size_t file_len)
+{
+  apr_size_t i;
+
+  for (i = 0; i < file_len; i++)
+    if (file[i].chunk == -1)
+      return TRUE;
+
+  return FALSE;
+}
+
+/* Check whether one of the FILEs has its pointers at EOF (this is the case if
+ * one of them has curp == endp (this can only happen at the last chunk)) */
+static svn_boolean_t
+is_one_at_eof(struct file_info file[], apr_size_t file_len)
+{
+  apr_size_t i;
+
+  for (i = 0; i < file_len; i++)
+    if (file[i].curp == file[i].endp)
+      return TRUE;
+
+  return FALSE;
+}
+
+/* Quickly determine whether there is a eol char in CHUNK.
+ * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
+ */
+#if SVN_UNALIGNED_ACCESS_IS_OK
+#if APR_SIZEOF_VOIDP == 8
+#  define LOWER_7BITS_SET 0x7f7f7f7f7f7f7f7f
+#  define BIT_7_SET       0x8080808080808080
+#  define R_MASK          0x0a0a0a0a0a0a0a0a
+#  define N_MASK          0x0d0d0d0d0d0d0d0d
+#else
+#  define LOWER_7BITS_SET 0x7f7f7f7f
+#  define BIT_7_SET       0x80808080
+#  define R_MASK          0x0a0a0a0a
+#  define N_MASK          0x0d0d0d0d
+#endif
+#endif
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+static svn_boolean_t contains_eol(apr_size_t chunk)
+{
+  apr_size_t r_test = chunk ^ R_MASK;
+  apr_size_t n_test = chunk ^ N_MASK;
+
+  r_test |= (r_test & LOWER_7BITS_SET) + LOWER_7BITS_SET;
+  n_test |= (n_test & LOWER_7BITS_SET) + LOWER_7BITS_SET;
+
+  return (r_test & n_test & BIT_7_SET) != BIT_7_SET;
+}
+#endif
+
+/* Find the prefix which is identical between all elements of the FILE array.
+ * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
+ * set to TRUE if one of the FILEs reached its end while scanning prefix,
+ * i.e. at least one file consisted entirely of prefix.  Otherwise, 
+ * REACHED_ONE_EOF is set to FALSE.
  *
- * Open the file at FILE.path; initialize FILE.file, FILE.size, FILE.buffer,
- * FILE.curp and FILE.endp; allocate a buffer and read the first chunk.
+ * After this function is finished, the buffers, chunks, curp's and endp's 
+ * of the FILEs are set to point at the first byte after the prefix. */
+static svn_error_t *
+find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
+                      struct file_info file[], apr_size_t file_len,
+                      apr_pool_t *pool)
+{
+  svn_boolean_t had_cr = FALSE;
+  svn_boolean_t is_match;
+  apr_off_t lines = 0;
+  apr_size_t i;
+
+  for (i = 1, is_match = TRUE; i < file_len; i++)
+    is_match = is_match && *file[0].curp == *file[i].curp;
+  while (is_match)
+    {
+      apr_off_t max_delta, delta;
+
+      /* ### TODO: see if we can take advantage of 
+         diff options like ignore_eol_style or ignore_space. */
+      /* check for eol, and count */
+      if (*file[0].curp == '\r')
+        {
+          lines++;
+          had_cr = TRUE;
+        }
+      else if (*file[0].curp == '\n' && !had_cr)
+        {
+          lines++;
+        }
+      else 
+        {
+          had_cr = FALSE;
+        }
+
+      INCREMENT_POINTERS(file, file_len, pool);
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+      /* Try to advance as far as possible with machine-word granularity.
+       * Determine how far we may advance with chunky ops without reaching
+       * endp for any of the files.
+       * Signedness is important here if curp gets close to endp.
+       */
+      max_delta = file[0].endp - file[0].curp - sizeof(apr_size_t);
+      for (i = 1; i < file_len; i++)
+        {
+          delta = file[i].endp - file[i].curp - sizeof(apr_size_t);
+          if (delta < max_delta)
+            max_delta = delta;
+        }
+
+      is_match = TRUE;
+      for (delta = 0; delta < max_delta && is_match; delta += sizeof(apr_size_t))
+        {
+          apr_size_t chunk = *(const apr_size_t *)(file[0].curp + delta);
+          if (contains_eol(chunk))
+            break;
+
+          for (i = 1; i < file_len; i++)
+            if (chunk != *(const apr_size_t *)(file[i].curp + delta))
+              {
+                is_match = FALSE;
+                delta -= sizeof(apr_size_t);
+                break;
+              }
+        }
+
+      /* We either found a mismatch or an EOL at or shortly behind curp+delta
+       * or we cannot proceed with chunky ops without exceeding endp.
+       * In any way, everything up to curp + delta is equal and not an EOL.
+       */
+      for (i = 0; i < file_len; i++)
+        file[i].curp += delta;
+
+#endif
+
+      *reached_one_eof = is_one_at_eof(file, file_len);
+      if (*reached_one_eof)
+        break;
+      else
+        for (i = 1, is_match = TRUE; i < file_len; i++)
+          is_match = is_match && *file[0].curp == *file[i].curp;
+    }
+
+  if (had_cr)
+    {
+      /* Check if we ended in the middle of a \r\n for one file, but \r for 
+         another. If so, back up one byte, so the next loop will back up
+         the entire line. Also decrement lines, since we counted one
+         too many for the \r. */
+      svn_boolean_t ended_at_nonmatching_newline = FALSE;
+      for (i = 0; i < file_len; i++)
+        ended_at_nonmatching_newline = ended_at_nonmatching_newline 
+                                       || *file[i].curp == '\n';
+      if (ended_at_nonmatching_newline)
+        {
+          lines--;
+          DECREMENT_POINTERS(file, file_len, pool);
+        }
+    }
+
+  /* Back up one byte, so we point at the last identical byte */
+  DECREMENT_POINTERS(file, file_len, pool);
+
+  /* Back up to the last eol sequence (\n, \r\n or \r) */
+  while (!is_one_at_bof(file, file_len) && 
+         *file[0].curp != '\n' && *file[0].curp != '\r')
+    DECREMENT_POINTERS(file, file_len, pool);
+
+  /* Slide one byte forward, to point past the eol sequence */
+  INCREMENT_POINTERS(file, file_len, pool);
+
+  *prefix_lines = lines;
+
+  return SVN_NO_ERROR;
+}
+
+
+#define SUFFIX_LINES_TO_KEEP 50
+
+/* Find the suffix which is identical between all elements of the FILE array.
  *
- * Implements svn_diff_fns_t::datasource_open. */
+ * Before this function is called the FILEs' pointers and chunks should be 
+ * positioned right after the identical prefix (which is the case after 
+ * find_identical_prefix), so we can determine where suffix scanning should 
+ * ultimately stop. */
 static svn_error_t *
-datasource_open(void *baton, svn_diff_datasource_e datasource)
+find_identical_suffix(struct file_info file[], apr_size_t file_len,
+                      apr_pool_t *pool)
 {
-  svn_diff__file_baton_t *file_baton = baton;
-  struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
-  apr_finfo_t finfo;
-  apr_off_t length;
-  char *curp;
-  char *endp;
+  struct file_info file_for_suffix[4];
+  apr_off_t length[4];
+  apr_off_t suffix_min_chunk0;
+  apr_off_t suffix_min_offset0;
+  apr_off_t min_file_size;
+  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
+  svn_boolean_t is_match, can_read, reached_prefix;
+  apr_size_t i;
+
+  memset(file_for_suffix, 0, sizeof(file_for_suffix));
+
+  /* Initialize file_for_suffix[].
+     Read last chunk, position curp at last byte. */
+  for (i = 0; i < file_len; i++)
+    {
+      file_for_suffix[i].path = file[i].path;
+      file_for_suffix[i].file = file[i].file;
+      file_for_suffix[i].size = file[i].size;
+      file_for_suffix[i].chunk =
+        (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
+      length[i] = offset_in_chunk(file_for_suffix[i].size);
+      if (file_for_suffix[i].chunk == file[i].chunk)
+        {
+          /* Prefix ended in last chunk, so we can reuse the prefix buffer */
+          file_for_suffix[i].buffer = file[i].buffer;
+        }
+      else
+        {
+          /* There is at least more than 1 chunk,
+             so allocate full chunk size buffer */
+          file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
+          SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path,
+                             file_for_suffix[i].buffer, length[i],
+                             chunk_to_offset(file_for_suffix[i].chunk),
+                             pool));
+        }
+      file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
+      file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
+    }
+
+  /* Get the chunk and pointer offset (for file[0]) at which we should stop
+     scanning backward for the identical suffix, i.e. when we reach prefix. */
+  suffix_min_chunk0 = file[0].chunk;
+  suffix_min_offset0 = file[0].curp - file[0].buffer;
+
+  /* Compensate if other files are smaller than file[0] */
+  for (i = 1, min_file_size = file[0].size; i < file_len; i++)
+    if (file[i].size < min_file_size)
+      min_file_size = file[i].size;
+  if (file[0].size > min_file_size)
+    {
+      suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
+      suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
+    }
+
+  /* Scan backwards until mismatch or until we reach the prefix. */
+  while (1)
+    {
+      /* Initialize the minimum pointer positions. */
+      const char *min_curp[4];
+      svn_boolean_t can_read_word;
+
+      min_curp[0] = file_for_suffix[0].chunk == suffix_min_chunk0
+                  ? file_for_suffix[0].buffer + suffix_min_offset0 + 1
+                  : file_for_suffix[0].buffer + 1;
+      for (i = 1; i < file_len; i++)
+        min_curp[i] = file_for_suffix[i].buffer + 1;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+      /* Scan quickly by reading with machine-word granularity. */
+      for (i = 0, can_read_word = TRUE; i < file_len; i++)
+        can_read_word = can_read_word 
+                        && (   file_for_suffix[i].curp - sizeof(apr_size_t)
+                            >= min_curp[i]);
+      if (can_read_word)
+        {
+          do
+            {
+              for (i = 0; i < file_len; i++)
+                file_for_suffix[i].curp -= sizeof(apr_size_t);
 
-  SVN_ERR(svn_io_file_open(&file->file, file->path,
-                           APR_READ, APR_OS_DEFAULT, file_baton->pool));
+              for (i = 0, can_read_word = TRUE; i < file_len; i++)
+                can_read_word = can_read_word 
+                                && (   file_for_suffix[i].curp - sizeof(apr_size_t)
+                                    >= min_curp[i]);
+              for (i = 1, is_match = TRUE; i < file_len; i++)
+                is_match = is_match
+                           && (   *(const apr_size_t *)(file_for_suffix[0].curp + 1)
+                               == *(const apr_size_t *)(file_for_suffix[i].curp + 1));
+            } while (can_read_word && is_match);
 
-  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE,
-                               file->file, file_baton->pool));
+            for (i = 0; i < file_len; i++)
+              file_for_suffix[i].curp += sizeof(apr_size_t);
+        }
 
-  file->size = finfo.size;
-  length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size;
+#endif
 
-  if (length == 0)
-    return SVN_NO_ERROR;
+      for (i = 1, is_match = TRUE; i < file_len; i++)
+        is_match =
+          is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
+      for (i = 0, can_read = TRUE; i < file_len; i++)
+        can_read = can_read && (file_for_suffix[i].curp > min_curp[i]);
+      while (is_match && can_read)
+        {
+          for (i = 0; i < file_len; i++)
+            file_for_suffix[i].curp--;
+          for (i = 1, is_match = TRUE; i < file_len; i++)
+            is_match =
+              is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
+          for (i = 0, can_read = TRUE; i < file_len; i++)
+            can_read = can_read && (file_for_suffix[i].curp > min_curp[i]);
+        }
+
+      if (!is_match)
+        break;
+
+      DECREMENT_POINTERS(file_for_suffix, file_len, pool);
+
+      reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 
+                       && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
+                          == suffix_min_offset0;
+
+      if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
+        break;
+    }
+
+  /* Slide one byte forward, to point at the first byte of identical suffix */
+  INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+
+  /* Slide forward until we find an eol sequence to add the rest of the line
+     we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least 
+     one file reaches its end. */
+  do
+    {
+      while (!is_one_at_eof(file_for_suffix, file_len)
+             && *file_for_suffix[0].curp != '\n'
+             && *file_for_suffix[0].curp != '\r')
+        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+
+      /* Slide one or two more bytes, to point past the eol. */
+      if (!is_one_at_eof(file_for_suffix, file_len)
+          && *file_for_suffix[0].curp == '\r')
+        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+      if (!is_one_at_eof(file_for_suffix, file_len)
+          && *file_for_suffix[0].curp == '\n')
+        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
+    }
+  while (!is_one_at_eof(file_for_suffix, file_len) 
+         && suffix_lines_to_keep--);
 
-  endp = curp = apr_palloc(file_baton->pool, (apr_size_t) length);
-  endp += length;
+  /* Save the final suffix information in the original file_info */
+  for (i = 0; i < file_len; i++)
+    {
+      file[i].suffix_start_chunk = file_for_suffix[i].chunk;
+      file[i].suffix_offset_in_chunk = 
+        file_for_suffix[i].curp - file_for_suffix[i].buffer;
+    }
+
+  return SVN_NO_ERROR;
+}
 
-  file->buffer = file->curp = curp;
-  file->endp = endp;
 
-  return read_chunk(file->file, file->path,
-                    curp, length, 0, file_baton->pool);
+/* Let FILE stand for the array of file_info struct elements of BATON->files
+ * that are indexed by the elements of the DATASOURCE array.
+ * BATON's type is (svn_diff__file_baton_t *).
+ *
+ * For each file in the FILE array, open the file at FILE.path; initialize 
+ * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a 
+ * buffer and read the first chunk.  Then find the prefix and suffix lines
+ * which are identical between all the files.  Return the number of identical
+ * prefix lines in PREFIX_LINES.
+ *
+ * Finding the identical prefix and suffix allows us to exclude those from the
+ * rest of the diff algorithm, which increases performance by reducing the 
+ * problem space.
+ *
+ * Implements svn_diff_fns2_t::datasources_open. */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource[],
+                 apr_size_t datasource_len)
+{
+  svn_diff__file_baton_t *file_baton = baton;
+  struct file_info files[4];
+  apr_finfo_t finfo[4];
+  apr_off_t length[4];
+  svn_boolean_t reached_one_eof;
+  apr_size_t i;
+
+  /* Make sure prefix_lines is set correctly, even if we exit early because
+   * one of the files is empty. */
+  *prefix_lines = 0;
+
+  /* Open datasources and read first chunk */
+  for (i = 0; i < datasource_len; i++)
+    {
+      struct file_info *file
+          = &file_baton->files[datasource_to_index(datasource[i])];
+      SVN_ERR(svn_io_file_open(&file->file, file->path,
+                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
+      SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
+                                   file->file, file_baton->pool));
+      file->size = finfo[i].size;
+      length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
+      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
+      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+                         length[i], 0, file_baton->pool));
+      file->endp = file->buffer + length[i];
+      file->curp = file->buffer;
+      /* Set suffix_start_chunk to a guard value, so if suffix scanning is
+       * skipped because one of the files is empty, or because of
+       * reached_one_eof, we can still easily check for the suffix during
+       * token reading (datasource_get_next_token). */
+      file->suffix_start_chunk = -1;
+
+      files[i] = *file;
+    }
+
+  for (i = 0; i < datasource_len; i++)
+    if (length[i] == 0)
+      /* There will not be any identical prefix/suffix, so we're done. */
+      return SVN_NO_ERROR;
+
+  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
+                                files, datasource_len, file_baton->pool));
+
+  if (!reached_one_eof)
+    /* No file consisted totally of identical prefix,
+     * so there may be some identical suffix.  */
+    SVN_ERR(find_identical_suffix(files, datasource_len, file_baton->pool));
+
+  /* Copy local results back to baton. */
+  for (i = 0; i < datasource_len; i++)
+    file_baton->files[datasource_to_index(datasource[i])] = files[i];
+
+  return SVN_NO_ERROR;
 }
 
 
-/* Implements svn_diff_fns_t::datasource_close */
+/* Implements svn_diff_fns2_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
 {
@@ -255,7 +753,7 @@ datasource_close(void *baton, svn_diff_d
   return SVN_NO_ERROR;
 }
 
-/* Implements svn_diff_fns_t::datasource_get_next_token */
+/* Implements svn_diff_fns2_t::datasource_get_next_token */
 static svn_error_t *
 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
                           svn_diff_datasource_e datasource)
@@ -285,7 +783,14 @@ datasource_get_next_token(apr_uint32_t *
       return SVN_NO_ERROR;
     }
 
-  /* Get a new token */
+  /* Stop when we encounter the identical suffix. If suffix scanning was not
+   * performed, suffix_start_chunk will be -1, so this condition will never
+   * be true. */
+  if (file->chunk == file->suffix_start_chunk
+      && curp - file->buffer == file->suffix_offset_in_chunk)
+    return SVN_NO_ERROR;
+
+  /* Allocate a new token, or fetch one from the "reusable tokens" list. */
   file_token = file_baton->tokens;
   if (file_token)
     {
@@ -387,7 +892,7 @@ datasource_get_next_token(apr_uint32_t *
 
 #define COMPARE_CHUNK_SIZE 4096
 
-/* Implements svn_diff_fns_t::token_compare */
+/* Implements svn_diff_fns2_t::token_compare */
 static svn_error_t *
 token_compare(void *baton, void *token1, void *token2, int *compare)
 {
@@ -508,19 +1013,20 @@ token_compare(void *baton, void *token1,
 }
 
 
-/* Implements svn_diff_fns_t::token_discard */
+/* Implements svn_diff_fns2_t::token_discard */
 static void
 token_discard(void *baton, void *token)
 {
   svn_diff__file_baton_t *file_baton = baton;
   svn_diff__file_token_t *file_token = token;
 
+  /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
   file_token->next = file_baton->tokens;
   file_baton->tokens = file_token;
 }
 
 
-/* Implements svn_diff_fns_t::token_discard_all */
+/* Implements svn_diff_fns2_t::token_discard_all */
 static void
 token_discard_all(void *baton)
 {
@@ -531,9 +1037,9 @@ token_discard_all(void *baton)
 }
 
 
-static const svn_diff_fns_t svn_diff__file_vtable =
+static const svn_diff_fns2_t svn_diff__file_vtable =
 {
-  datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
   token_compare,
@@ -675,7 +1181,7 @@ svn_diff_file_diff_2(svn_diff_t **diff,
   baton.files[1].path = modified;
   baton.pool = svn_pool_create(pool);
 
-  SVN_ERR(svn_diff_diff(diff, &baton, &svn_diff__file_vtable, pool));
+  SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
 
   svn_pool_destroy(baton.pool);
   return SVN_NO_ERROR;
@@ -698,7 +1204,7 @@ svn_diff_file_diff3_2(svn_diff_t **diff,
   baton.files[2].path = latest;
   baton.pool = svn_pool_create(pool);
 
-  SVN_ERR(svn_diff_diff3(diff, &baton, &svn_diff__file_vtable, pool));
+  SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
 
   svn_pool_destroy(baton.pool);
   return SVN_NO_ERROR;
@@ -723,7 +1229,7 @@ svn_diff_file_diff4_2(svn_diff_t **diff,
   baton.files[3].path = ancestor;
   baton.pool = svn_pool_create(pool);
 
-  SVN_ERR(svn_diff_diff4(diff, &baton, &svn_diff__file_vtable, pool));
+  SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
 
   svn_pool_destroy(baton.pool);
   return SVN_NO_ERROR;

Modified: subversion/trunk/subversion/libsvn_diff/diff_memory.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff_memory.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff_memory.c (original)
+++ subversion/trunk/subversion/libsvn_diff/diff_memory.c Sun Feb  6 23:04:04 2011
@@ -88,16 +88,19 @@ datasource_to_index(svn_diff_datasource_
 }
 
 
-/* Implements svn_diff_fns_t::datasource_open */
+/* Implements svn_diff_fns2_t::datasources_open */
 static svn_error_t *
-datasource_open(void *baton, svn_diff_datasource_e datasource)
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource[], 
+                 apr_size_t datasource_len)
 {
   /* Do nothing: everything is already there and initialized to 0 */
+  *prefix_lines = 0;
   return SVN_NO_ERROR;
 }
 
 
-/* Implements svn_diff_fns_t::datasource_close */
+/* Implements svn_diff_fns2_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
 {
@@ -109,7 +112,7 @@ datasource_close(void *baton, svn_diff_d
 }
 
 
-/* Implements svn_diff_fns_t::datasource_get_next_token */
+/* Implements svn_diff_fns2_t::datasource_get_next_token */
 static svn_error_t *
 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
                           svn_diff_datasource_e datasource)
@@ -138,7 +141,7 @@ datasource_get_next_token(apr_uint32_t *
   return SVN_NO_ERROR;
 }
 
-/* Implements svn_diff_fns_t::token_compare */
+/* Implements svn_diff_fns2_t::token_compare */
 static svn_error_t *
 token_compare(void *baton, void *token1, void *token2, int *result)
 {
@@ -167,7 +170,7 @@ token_compare(void *baton, void *token1,
   return SVN_NO_ERROR;
 }
 
-/* Implements svn_diff_fns_t::token_discard */
+/* Implements svn_diff_fns2_t::token_discard */
 static void
 token_discard(void *baton, void *token)
 {
@@ -175,7 +178,7 @@ token_discard(void *baton, void *token)
 }
 
 
-/* Implements svn_diff_fns_t::token_discard_all */
+/* Implements svn_diff_fns2_t::token_discard_all */
 static void
 token_discard_all(void *baton)
 {
@@ -187,9 +190,9 @@ token_discard_all(void *baton)
 }
 
 
-static const svn_diff_fns_t svn_diff__mem_vtable =
+static const svn_diff_fns2_t svn_diff__mem_vtable =
 {
-  datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
   token_compare,
@@ -284,7 +287,7 @@ svn_diff_mem_string_diff(svn_diff_t **di
 
   baton.normalization_options = options;
 
-  return svn_diff_diff(diff, &baton, &svn_diff__mem_vtable, pool);
+  return svn_diff_diff_2(diff, &baton, &svn_diff__mem_vtable, pool);
 }
 
 svn_error_t *
@@ -304,7 +307,7 @@ svn_diff_mem_string_diff3(svn_diff_t **d
 
   baton.normalization_options = options;
 
-  return svn_diff_diff3(diff, &baton, &svn_diff__mem_vtable, pool);
+  return svn_diff_diff3_2(diff, &baton, &svn_diff__mem_vtable, pool);
 }
 
 
@@ -327,7 +330,7 @@ svn_diff_mem_string_diff4(svn_diff_t **d
 
   baton.normalization_options = options;
 
-  return svn_diff_diff4(diff, &baton, &svn_diff__mem_vtable, pool);
+  return svn_diff_diff4_2(diff, &baton, &svn_diff__mem_vtable, pool);
 }
 
 

Modified: subversion/trunk/subversion/libsvn_diff/lcs.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/lcs.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/lcs.c (original)
+++ subversion/trunk/subversion/libsvn_diff/lcs.c Sun Feb  6 23:04:04 2011
@@ -160,9 +160,36 @@ svn_diff__lcs_reverse(svn_diff__lcs_t *l
 }
 
 
+/* Prepends a new lcs chunk for the amount of PREFIX_LINES to the given LCS
+ * chain, and returns it. This function assumes PREFIX_LINES > 0. */
+static svn_diff__lcs_t *
+prepend_prefix_lcs(svn_diff__lcs_t *lcs,
+                   apr_off_t prefix_lines,
+                   apr_pool_t *pool)
+{
+  svn_diff__lcs_t *prefix_lcs;
+
+  SVN_ERR_ASSERT_NO_RETURN(prefix_lines > 0);
+
+  prefix_lcs = apr_palloc(pool, sizeof(*prefix_lcs));
+  prefix_lcs->position[0] = apr_pcalloc(pool, 
+                                        sizeof(*prefix_lcs->position[0]));
+  prefix_lcs->position[0]->offset = 1;
+  prefix_lcs->position[1] = apr_pcalloc(pool,
+                                        sizeof(*prefix_lcs->position[1]));
+  prefix_lcs->position[1]->offset = 1;
+  prefix_lcs->length = prefix_lines;
+  prefix_lcs->refcount = 1;
+  prefix_lcs->next = lcs;
+
+  return prefix_lcs;
+}
+
+
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
               svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+              apr_off_t prefix_lines,
               apr_pool_t *pool)
 {
   int idx;
@@ -180,15 +207,22 @@ svn_diff__lcs(svn_diff__position_t *posi
    */
   lcs = apr_palloc(pool, sizeof(*lcs));
   lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
-  lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1;
+  lcs->position[0]->offset = position_list1 ? 
+    position_list1->offset + 1 : prefix_lines + 1;
   lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
-  lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1;
+  lcs->position[1]->offset = position_list2 ?
+    position_list2->offset + 1 : prefix_lines + 1;
   lcs->length = 0;
   lcs->refcount = 1;
   lcs->next = NULL;
 
   if (position_list1 == NULL || position_list2 == NULL)
-    return lcs;
+    {
+      if (prefix_lines)
+        return prepend_prefix_lcs(lcs, prefix_lines, pool);
+      else
+        return lcs;
+    }
 
   /* Calculate length of both sequences to be compared */
   length[0] = position_list1->offset - position_list1->next->offset + 1;
@@ -249,5 +283,8 @@ svn_diff__lcs(svn_diff__position_t *posi
   position_list1->next = sentinel_position[idx].next;
   position_list2->next = sentinel_position[abs(1 - idx)].next;
 
-  return lcs;
+  if (prefix_lines)
+    return prepend_prefix_lcs(lcs, prefix_lines, pool);
+  else
+    return lcs;
 }

Modified: subversion/trunk/subversion/libsvn_diff/token.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/token.c?rev=1067800&r1=1067799&r2=1067800&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/token.c (original)
+++ subversion/trunk/subversion/libsvn_diff/token.c Sun Feb  6 23:04:04 2011
@@ -71,7 +71,7 @@ svn_diff__tree_create(svn_diff__tree_t *
 static svn_error_t *
 tree_insert_token(svn_diff__node_t **node, svn_diff__tree_t *tree,
                   void *diff_baton,
-                  const svn_diff_fns_t *vtable,
+                  const svn_diff_fns2_t *vtable,
                   apr_uint32_t hash, void *token)
 {
   svn_diff__node_t *new_node;
@@ -137,8 +137,9 @@ svn_error_t *
 svn_diff__get_tokens(svn_diff__position_t **position_list,
                      svn_diff__tree_t *tree,
                      void *diff_baton,
-                     const svn_diff_fns_t *vtable,
+                     const svn_diff_fns2_t *vtable,
                      svn_diff_datasource_e datasource,
+                     apr_off_t prefix_lines,
                      apr_pool_t *pool)
 {
   svn_diff__position_t *start_position;
@@ -151,11 +152,8 @@ svn_diff__get_tokens(svn_diff__position_
 
   *position_list = NULL;
 
-
-  SVN_ERR(vtable->datasource_open(diff_baton, datasource));
-
   position_ref = &start_position;
-  offset = 0;
+  offset = prefix_lines;
   hash = 0; /* The callback fn doesn't need to touch it per se */
   while (1)
     {

Propchange: subversion/trunk/subversion/libsvn_subr/adler32.c
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb  6 23:04:04 2011
@@ -0,0 +1,2 @@
+/subversion/branches/diff-optimizations/subversion/libsvn_subr/adler32.c:1031270-1037352
+/subversion/branches/diff-optimizations-bytes/subversion/libsvn_subr/adler32.c:1037353-1067789



Re: svn commit: r1067800 - in /subversion/trunk: ./ subversion/include/ subversion/include/private/ subversion/libsvn_diff/ subversion/libsvn_subr/

Posted by Johan Corveleyn <jc...@gmail.com>.
On Mon, Feb 14, 2011 at 9:49 AM, Ivan Zhakov <iv...@visualsvn.com> wrote:
> On Mon, Feb 7, 2011 at 01:04,  <jc...@apache.org> wrote:
>> Author: jcorvel
>> Date: Sun Feb  6 23:04:04 2011
>> New Revision: 1067800
>>
>> URL: http://svn.apache.org/viewvc?rev=1067800&view=rev
>> Log:
>> Reintegrate diff-optimizations-bytes branch with trunk.
>>
> Hi Johan,
>
> I think you can update status of "Diff/Blame optimizations" task on
> roadmap page [1] since diff-optimizations-bytes branch merged to
> trunk.
>
> [1] http://subversion.apache.org/roadmap.html

Okeedokee: r1070666.

Thanks,
-- 
Johan

Re: svn commit: r1067800 - in /subversion/trunk: ./ subversion/include/ subversion/include/private/ subversion/libsvn_diff/ subversion/libsvn_subr/

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Mon, Feb 7, 2011 at 01:04,  <jc...@apache.org> wrote:
> Author: jcorvel
> Date: Sun Feb  6 23:04:04 2011
> New Revision: 1067800
>
> URL: http://svn.apache.org/viewvc?rev=1067800&view=rev
> Log:
> Reintegrate diff-optimizations-bytes branch with trunk.
>
Hi Johan,

I think you can update status of "Diff/Blame optimizations" task on
roadmap page [1] since diff-optimizations-bytes branch merged to
trunk.

[1] http://subversion.apache.org/roadmap.html

-- 
Ivan Zhakov