You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2012/12/29 13:25:49 UTC

svn commit: r1426752 - in /subversion/trunk/subversion: libsvn_diff/diff_file.c tests/libsvn_diff/diff-diff3-test.c

Author: rhuijben
Date: Sat Dec 29 12:25:49 2012
New Revision: 1426752

URL: http://svn.apache.org/viewvc?rev=1426752&view=rev
Log:
Fix a whitespace normalization issue in the diff code, identified by Hideki
Iwamoto. This applies patch applies the same fix as applied by steveking in
r876890 on the other whitespace normalization function invocation.

If the normalized offset is set incorrectly, reading back tokens from older
chunks for diff-matching when the hash value of two tokens is identical,
is broken.

This is a slightly reworked version of the
Patch by: Hideki IWAMOTO <h-iwamoto{_AT-}kit.hi-ho.ne.jp>

* subversion/libsvn_diff/diff_file.c
  (datasource_get_next_token): Set the normalized offset earlier to allow
    updating the offset later by each of the possibly two normalization passes.

* subversion/tests/libsvn_diff/diff-diff3-test.c
  (test_wrap): Rename existing test to ...
  (test_norm_offset): ... and test the specific issue identified here.
  (test_funcs): Update reference and expect the test to pass now.

Modified:
    subversion/trunk/subversion/libsvn_diff/diff_file.c
    subversion/trunk/subversion/tests/libsvn_diff/diff-diff3-test.c

Modified: subversion/trunk/subversion/libsvn_diff/diff_file.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff_file.c?rev=1426752&r1=1426751&r2=1426752&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff_file.c (original)
+++ subversion/trunk/subversion/libsvn_diff/diff_file.c Sat Dec 29 12:25:49 2012
@@ -856,6 +856,7 @@ datasource_get_next_token(apr_uint32_t *
   file_token->datasource = datasource;
   file_token->offset = chunk_to_offset(file->chunk)
                        + (curp - file->buffer);
+  file_token->norm_offset = file_token->offset;
   file_token->raw_length = 0;
   file_token->length = 0;
 
@@ -884,11 +885,21 @@ datasource_get_next_token(apr_uint32_t *
 
       length = endp - curp;
       file_token->raw_length += length;
-      svn_diff__normalize_buffer(&curp, &length,
-                                 &file->normalize_state,
-                                 curp, file_baton->options);
-      file_token->length += length;
-      h = svn__adler32(h, curp, length);
+      {
+        char *c = curp;
+
+        svn_diff__normalize_buffer(&c, &length,
+                                   &file->normalize_state,
+                                   curp, file_baton->options);
+        if (file_token->length == 0)
+          {
+            /* When we are reading the first part of the token, move the
+               normalized offset past leading ignored characters, if any. */
+            file_token->norm_offset += (c - curp);
+          }
+        file_token->length += length;
+        h = svn__adler32(h, c, length);
+      }
 
       curp = endp = file->buffer;
       file->chunk++;
@@ -927,11 +938,12 @@ datasource_get_next_token(apr_uint32_t *
       svn_diff__normalize_buffer(&c, &length,
                                  &file->normalize_state,
                                  curp, file_baton->options);
-
-      file_token->norm_offset = file_token->offset;
       if (file_token->length == 0)
-        /* move past leading ignored characters */
-        file_token->norm_offset += (c - curp);
+        {
+          /* When we are reading the first part of the token, move the
+             normalized offset past leading ignored characters, if any. */
+          file_token->norm_offset += (c - curp);
+        }
 
       file_token->length += length;
 

Modified: subversion/trunk/subversion/tests/libsvn_diff/diff-diff3-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_diff/diff-diff3-test.c?rev=1426752&r1=1426751&r2=1426752&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_diff/diff-diff3-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_diff/diff-diff3-test.c Sat Dec 29 12:25:49 2012
@@ -2394,37 +2394,80 @@ merge_adjacent_changes(apr_pool_t *pool)
   return SVN_NO_ERROR;
 }
 
-/* Issue #4133, '"diff -x -w" showing wrong change'.
+/* Issue #4133, 'When sequences of whitespace characters at head of line
+   strides chunk boundary, "diff -x -w" showing wrong change'.
    The magic number used in this test, 1<<17, is
    CHUNK_SIZE from ../../libsvn_diff/diff_file.c
  */
 static svn_error_t *
-test_wrap(apr_pool_t *pool)
+test_norm_offset(apr_pool_t *pool)
 {
-  char ldata[(1<<17) + 4+4+3+1];
-  char rdata[(1<<17) + 4+3+3+1];
-  svn_string_t left, right;
+  apr_size_t chunk_size = 1 << 17;
+  const char *pattern1 = "       \n";
+  const char *pattern2 = "\n\n\n\n\n\n\n\n";
+  const char *pattern3 = "                        @@@@@@@\n";
+  const char *pattern4 = "               \n";
+  svn_stringbuf_t *original, *modified;
   svn_diff_file_options_t *diff_opts = svn_diff_file_options_create(pool);
-  diff_opts->ignore_space = svn_diff_file_ignore_space_change;
 
-  /* Two long lines. */
-  memset(ldata, '@', 1<<17);
-  memset(rdata, '@', 1<<17);
-  strcpy(&ldata[1<<17], "foo\n" "ba \n" "x \n");
-  strcpy(&rdata[1<<17], "foo\n" "ba\n"  "x\t\n");
-
-  /* Cast them to svn_string_t. */
-  left.data = ldata;
-  right.data = rdata;
-  left.len = sizeof(ldata)-1;
-  right.len = sizeof(rdata)-1;
+  /* The original contents become like this
+
+     $ hexdump -C norm-offset-original
+     00000000  20 20 20 20 20 20 20 0a  0a 0a 0a 0a 0a 0a 0a 0a  |       .........|
+     00000010  0a 0a 0a 0a 0a 0a 0a 0a  0a 0a 0a 0a 0a 0a 0a 0a  |................|
+     *
+     0001fff0  0a 0a 0a 0a 0a 0a 0a 0a  20 20 20 20 20 20 20 20  |........        |
+     00020000  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 20  |                |
+     00020010  40 40 40 40 40 40 40 0a  0a 0a 0a 0a 0a 0a 0a 0a  |@@@@@@@.........|
+     00020020  0a 0a 0a 0a 0a 0a 0a 0a  0a 0a 0a 0a 0a 0a 0a 0a  |................|
+     *
+     000203f0  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 0a  |               .|
+     00020400
+  */
+  original = svn_stringbuf_create_ensure(chunk_size + 1024, pool);
+  svn_stringbuf_appendcstr(original, pattern1);
+  while (original->len < chunk_size - 8)
+    {
+      svn_stringbuf_appendcstr(original, pattern2);
+    }
+  svn_stringbuf_appendcstr(original, pattern3);
+  while (original->len < chunk_size +1024 - 16)
+    {
+      svn_stringbuf_appendcstr(original, pattern2);
+    }
+  svn_stringbuf_appendcstr(original, pattern4);
+
+  /* The modified contents become like this.
+
+     $ hexdump -C norm-offset-modified
+     00000000  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 0a  |               .|
+     00000010  0a 0a 0a 0a 0a 0a 0a 0a  0a 0a 0a 0a 0a 0a 0a 0a  |................|
+     *
+     00020000  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 20  |                |
+     00020010  20 20 20 20 20 20 20 20  40 40 40 40 40 40 40 0a  |        @@@@@@@.|
+     00020020  0a 0a 0a 0a 0a 0a 0a 0a  0a 0a 0a 0a 0a 0a 0a 0a  |................|
+     *
+     000203f0  0a 0a 0a 0a 0a 0a 0a 0a  20 20 20 20 20 20 20 0a  |........       .|
+     00020400
+  */
+  modified = svn_stringbuf_create_ensure(chunk_size + 1024, pool);
+  svn_stringbuf_appendcstr(modified, pattern4);
+  while (modified->len < chunk_size)
+    {
+      svn_stringbuf_appendcstr(modified, pattern2);
+    }
+  svn_stringbuf_appendcstr(modified, pattern3);
+  while (modified->len < chunk_size +1024 - 8)
+    {
+      svn_stringbuf_appendcstr(modified, pattern2);
+    }
+  svn_stringbuf_appendcstr(modified, pattern1);
 
   /* Diff them.  Modulo whitespace, they are identical. */
-  {
-    svn_diff_t *diff;
-    SVN_ERR(svn_diff_mem_string_diff(&diff, &left, &right, diff_opts, pool));
-    SVN_TEST_ASSERT(FALSE == svn_diff_contains_diffs(diff));
-  }
+  diff_opts->ignore_space = svn_diff_file_ignore_space_all;
+  SVN_ERR(two_way_diff("norm-offset-original", "norm-offset-modified",
+                       original->data, modified->data, "",
+                       diff_opts, pool));
 
   return SVN_NO_ERROR;
 }
@@ -2458,7 +2501,7 @@ struct svn_test_descriptor_t test_funcs[
                    "3-way merge with conflict styles"),
     SVN_TEST_PASS2(test_diff4,
                    "4-way merge; see variance-adjusted-patching.html"),
-    SVN_TEST_XFAIL2(test_wrap,
-                   "difference at the start of a 128KB window"),
+    SVN_TEST_PASS2(test_norm_offset,
+                   "offset of the normalized token"),
     SVN_TEST_NULL
   };