You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by jc...@apache.org on 2010/11/21 03:36:07 UTC

svn commit: r1037371 - in /subversion/branches/diff-optimizations-bytes/subversion: include/ libsvn_diff/

Author: jcorvel
Date: Sun Nov 21 02:36:07 2010
New Revision: 1037371

URL: http://svn.apache.org/viewvc?rev=1037371&view=rev
Log:
On the diff-optimizations-bytes branch:

Make svn_diff skip identical prefix to make diff and blame faster.

* subversion/include/svn_diff.h
  (svn_diff_fns_t): Added new function type datasources_open to the vtable.

* subversion/libsvn_diff/diff_memory.c
  (datasources_open): New function (does nothing).
  (svn_diff__mem_vtable): Added new function datasources_open.

* subversion/libsvn_diff/diff_file.c
  (increment_pointers, decrement_pointers): New functions.
  (is_one_at_bof, is_one_at_eof): New functions.
  (find_identical_prefix): New function.
  (datasources_open): New function, to open multiple datasources and find
   their identical prefix, so this can be excluded from the rest
   of the diff algorithm, as a performance optimization.
  (svn_diff__file_vtable): Added new function datasources_open.

* subversion/libsvn_diff/diff.h
  (svn_diff__get_tokens): Added argument "datasource_opened", to indicate that
   the datasource was already opened, and argument "prefix_lines", the number
   of identical prefix lines. Use prefix_lines as the starting offset for the
   token we're getting.

* subversion/libsvn_diff/token.c
  (svn_diff__get_tokens): Added arguments "datasource_opened" and
   "prefix_lines". Only open the datasource if datasource_opened is FALSE.
   Set the starting offset of the position list to the number of prefix_lines.

* subversion/libsvn_diff/lcs.c
  (svn_diff__lcs): Added argument "prefix_lines". Use this to correctly set
   the offset of the sentinel position for EOF, even if one of the files
   became empty after eliminating the identical prefix.

* subversion/libsvn_diff/diff.c
  (svn_diff__diff): Add a chunk of "common" diff for identical prefix.
  (svn_diff_diff): Use new function datasources_open to open original and
   modified at once and find their identical prefix. Pass
   prefix_lines to svn_diff__get_tokens, svn_diff__lcs and svn_diff__diff.

* subversion/libsvn_diff/diff3.c
  (svn_diff_diff3): Pass datasource_opened = FALSE and prefix_lines = 0 to 
   svn_diff__get_tokens. Pass prefix_lines = 0 to svn_diff__lcs.

* subversion/libsvn_diff/diff4.c
  (svn_diff_diff4): Pass datasource_opened = FALSE and prefix_lines = 0 to
   svn_diff__get_tokens. Pass prefix_lines = 0 to svn_diff__lcs.

Modified:
    subversion/branches/diff-optimizations-bytes/subversion/include/svn_diff.h
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.c
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.h
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff3.c
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff4.c
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_memory.c
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/lcs.c
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/token.c

Modified: subversion/branches/diff-optimizations-bytes/subversion/include/svn_diff.h
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/include/svn_diff.h?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/include/svn_diff.h (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/include/svn_diff.h Sun Nov 21 02:36:07 2010
@@ -112,6 +112,11 @@ typedef struct svn_diff_fns_t
   svn_error_t *(*datasource_open)(void *diff_baton,
                                   svn_diff_datasource_e datasource);
 
+  /** Open the datasources of type @a datasources. */
+  svn_error_t *(*datasources_open)(void *diff_baton, apr_off_t *prefix_lines,
+                                   svn_diff_datasource_e datasource[],
+                                   int datasource_len);
+
   /** Close the datasource of type @a datasource. */
   svn_error_t *(*datasource_close)(void *diff_baton,
                                    svn_diff_datasource_e datasource);

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.c Sun Nov 21 02:36:07 2010
@@ -43,6 +43,22 @@ svn_diff__diff(svn_diff__lcs_t *lcs,
   svn_diff_t *diff;
   svn_diff_t **diff_ref = &diff;
 
+  if (want_common && (original_start > 1))
+    {
+      /* we have a prefix to skip */
+      (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+      (*diff_ref)->type = svn_diff__type_common;
+      (*diff_ref)->original_start = 0;
+      (*diff_ref)->original_length = original_start - 1;
+      (*diff_ref)->modified_start = 0;
+      (*diff_ref)->modified_length = modified_start - 1;
+      (*diff_ref)->latest_start = 0;
+      (*diff_ref)->latest_length = 0;
+
+      diff_ref = &(*diff_ref)->next;
+    }
+
   while (1)
     {
       if (original_start < lcs->position[0]->offset
@@ -105,9 +121,12 @@ svn_diff_diff(svn_diff_t **diff,
 {
   svn_diff__tree_t *tree;
   svn_diff__position_t *position_list[2];
+  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+                                        svn_diff_datasource_modified};
   svn_diff__lcs_t *lcs;
   apr_pool_t *subpool;
   apr_pool_t *treepool;
+  apr_off_t prefix_lines = 0;
 
   *diff = NULL;
 
@@ -116,17 +135,23 @@ svn_diff_diff(svn_diff_t **diff,
 
   svn_diff__tree_create(&tree, treepool);
 
+  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, datasource, 2));
+
   /* Insert the data into the tree */
   SVN_ERR(svn_diff__get_tokens(&position_list[0],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               TRUE,
+                               prefix_lines,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               TRUE,
+                               prefix_lines,
                                subpool));
 
   /* The cool part is that we don't need the tokens anymore.
@@ -139,10 +164,10 @@ svn_diff_diff(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs */
-  lcs = svn_diff__lcs(position_list[0], position_list[1], subpool);
+  lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, subpool);
 
   /* Produce the diff */
-  *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool);
+  *diff = svn_diff__diff(lcs, prefix_lines + 1, prefix_lines + 1, TRUE, pool);
 
   /* Get rid of all the data we don't have a use for anymore */
   svn_pool_destroy(subpool);

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.h
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.h?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.h (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff.h Sun Nov 21 02:36:07 2010
@@ -91,6 +91,7 @@ typedef enum svn_diff__normalize_state_t
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
               svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+              apr_off_t prefix_lines,
               apr_pool_t *pool);
 
 
@@ -111,6 +112,8 @@ svn_diff__get_tokens(svn_diff__position_
                      void *diff_baton,
                      const svn_diff_fns_t *vtable,
                      svn_diff_datasource_e datasource,
+                     svn_boolean_t datasource_opened,
+                     apr_off_t prefix_lines,
                      apr_pool_t *pool);
 
 

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff3.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff3.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff3.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff3.c Sun Nov 21 02:36:07 2010
@@ -173,7 +173,7 @@ svn_diff__resolve_conflict(svn_diff_t *h
         position[1]->next = start_position[1];
       }
 
-    *lcs_ref = svn_diff__lcs(position[0], position[1],
+    *lcs_ref = svn_diff__lcs(position[0], position[1], 0,
                              subpool);
 
     /* Fix up the EOF lcs element in case one of
@@ -267,18 +267,24 @@ svn_diff_diff3(svn_diff_t **diff,
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               FALSE,
+                               0,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               FALSE,
+                               0,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               FALSE,
+                               0,
                                subpool));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -289,9 +295,9 @@ svn_diff_diff3(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs for original-modified and original-latest */
-  lcs_om = svn_diff__lcs(position_list[0], position_list[1],
+  lcs_om = svn_diff__lcs(position_list[0], position_list[1], 0,
                          subpool);
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0,
                          subpool);
 
   /* Produce a merged diff */

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff4.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff4.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff4.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff4.c Sun Nov 21 02:36:07 2010
@@ -194,24 +194,32 @@ svn_diff_diff4(svn_diff_t **diff,
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               FALSE,
+                               0,
                                subpool2));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               FALSE,
+                               0,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               FALSE,
+                               0,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[3],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_ancestor,
+                               FALSE,
+                               0,
                                subpool2));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -222,7 +230,7 @@ svn_diff_diff4(svn_diff_t **diff,
   svn_pool_clear(subpool3);
 
   /* Get the lcs for original - latest */
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], subpool3);
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0, subpool3);
   diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
 
   svn_pool_clear(subpool3);
@@ -243,7 +251,7 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for common ancestor - original
    * Do reverse adjustements
    */
-  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], 0, subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 
@@ -252,7 +260,7 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for modified - common ancestor
    * Do forward adjustments
    */
-  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], 0, subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c Sun Nov 21 02:36:07 2010
@@ -243,6 +243,255 @@ datasource_open(void *baton, svn_diff_da
 }
 
 
+/* For all files in the FILE array, increment the curp pointer.  If a file
+ * points before the beginning of file, let it point at the first byte again.
+ * If the end of the current chunk is reached, read the next chunk in the
+ * buffer and point curp to the start of the chunk.  If EOF is reached, set
+ * curp equal to endp to indicate EOF. */
+static svn_error_t *
+increment_pointers(struct file_info *file[], int file_len, apr_pool_t *pool)
+{
+  int i;
+
+  for (i = 0; i < file_len; i++)
+    if (file[i]->chunk == -1) /* indicates before beginning of file */
+      {
+        file[i]->chunk = 0; /* point to beginning of file again */
+      }
+    else if (file[i]->curp == file[i]->endp - 1)
+      {
+        apr_off_t last_chunk = offset_to_chunk(file[i]->size);
+        if (file[i]->chunk == last_chunk)
+          {
+            file[i]->curp++; /* curp == endp signals end of file */
+          }
+        else
+          {
+            apr_off_t length;
+            file[i]->chunk++;
+            length = file[i]->chunk == last_chunk ? 
+              offset_in_chunk(file[i]->size) : CHUNK_SIZE;
+            SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer,
+                               length, chunk_to_offset(file[i]->chunk),
+                               pool));
+            file[i]->endp = file[i]->buffer + length;
+            file[i]->curp = file[i]->buffer;
+          }
+      }
+    else
+      {
+        file[i]->curp++;
+      }
+
+  return SVN_NO_ERROR;
+}
+
+/* For all files in the FILE array, decrement the curp pointer.  If the
+ * start of a chunk is reached, read the previous chunk in the buffer and
+ * point curp to the last byte of the chunk.  If the beginning of a FILE is
+ * reached, set chunk to -1 to indicate BOF. */
+static svn_error_t *
+decrement_pointers(struct file_info *file[], int file_len, apr_pool_t *pool)
+{
+  int i;
+
+  for (i = 0; i < file_len; i++)
+    if (file[i]->curp == file[i]->buffer)
+      {
+        if (file[i]->chunk == 0)
+          file[i]->chunk--; /* chunk == -1 signals beginning of file */
+        else
+          {
+            file[i]->chunk--;
+            SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer,
+                               CHUNK_SIZE, chunk_to_offset(file[i]->chunk),
+                               pool));
+            file[i]->endp = file[i]->buffer + CHUNK_SIZE;
+            file[i]->curp = file[i]->endp - 1;
+          }
+      }
+    else
+      {
+        file[i]->curp--;
+      }
+
+  return SVN_NO_ERROR;
+}
+
+/* Check whether one of the FILEs has its pointers 'before' the beginning of
+ * the file (this can happen while scanning backwards). This is the case if
+ * one of them has chunk == -1. */
+static svn_boolean_t
+is_one_at_bof(struct file_info *file[], int file_len)
+{
+  int i;
+
+  for (i = 0; i < file_len; i++)
+    if (file[i]->chunk == -1)
+      return TRUE;
+
+  return FALSE;
+}
+
+/* Check whether one of the FILEs has its pointers at EOF (this is the case if
+ * one of them has curp == endp (this can only happen at the last chunk)) */
+static svn_boolean_t
+is_one_at_eof(struct file_info *file[], int file_len)
+{
+  int i;
+
+  for (i = 0; i < file_len; i++)
+    if (file[i]->curp == file[i]->endp)
+      return TRUE;
+
+  return FALSE;
+}
+
+/* Find the prefix which is identical between all elements of the FILE array.
+ * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
+ * set to TRUE if one of the FILEs reached its end while scanning prefix,
+ * i.e. at least one file consisted entirely of prefix.  Otherwise, 
+ * REACHED_ONE_EOF is set to FALSE.
+ *
+ * After this function is finished, the buffers, chunks, curp's and endp's 
+ * of the FILEs are set to point at the first byte after the prefix. */
+static svn_error_t *
+find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
+                      struct file_info *file[], int file_len,
+                      apr_pool_t *pool)
+{
+  svn_boolean_t had_cr = FALSE;
+  svn_boolean_t is_match, reached_all_eof;
+  int i;
+
+  *prefix_lines = 0;
+  for (i = 1, is_match = TRUE; i < file_len; i++)
+    is_match = is_match && *file[0]->curp == *file[i]->curp;
+  while (is_match)
+    {
+      /* ### TODO: see if we can take advantage of 
+         diff options like ignore_eol_style or ignore_space. */
+      /* check for eol, and count */
+      if (*file[0]->curp == '\r')
+        {
+          (*prefix_lines)++;
+          had_cr = TRUE;
+        }
+      else if (*file[0]->curp == '\n' && !had_cr)
+        {
+          (*prefix_lines)++;
+          had_cr = FALSE;
+        }
+      else 
+        {
+          had_cr = FALSE;
+        }
+
+      increment_pointers(file, file_len, pool);
+
+      /* curp == endp indicates EOF (this can only happen with last chunk) */
+      *reached_one_eof = is_one_at_eof(file, file_len);
+      if (*reached_one_eof)
+        break;
+      else
+        for (i = 1, is_match = TRUE; i < file_len; i++)
+          is_match = is_match && *file[0]->curp == *file[i]->curp;
+    }
+
+  /* If all files reached their end (i.e. are fully identical), we're done */
+  for (i = 0, reached_all_eof = TRUE; i < file_len; i++)
+    reached_all_eof = reached_all_eof && file[i]->curp == file[i]->endp;
+  if (reached_all_eof)
+    return SVN_NO_ERROR;
+
+  if (had_cr)
+    {
+      /* Check if we ended in the middle of a \r\n for one file, but \r for 
+         another. If so, back up one byte, so the next loop will back up
+         the entire line. Also decrement *prefix_lines, since we counted one
+         too many for the \r. */
+      svn_boolean_t ended_at_nonmatching_newline = FALSE;
+      for (i = 0; i < file_len; i++)
+        ended_at_nonmatching_newline = ended_at_nonmatching_newline 
+                                       || *file[i]->curp == '\n';
+      if (ended_at_nonmatching_newline)
+        {
+          (*prefix_lines)--;
+          decrement_pointers(file, file_len, pool);
+        }
+    }
+
+  /* Back up one byte, so we point at the last identical byte */
+  decrement_pointers(file, file_len, pool);
+
+  /* Back up to the last eol sequence (\n, \r\n or \r) */
+  while (!is_one_at_bof(file, file_len) && 
+         *file[0]->curp != '\n' && *file[0]->curp != '\r')
+    decrement_pointers(file, file_len, pool);
+
+  /* Slide one byte forward, to point past the eol sequence */
+  increment_pointers(file, file_len, pool);
+
+  return SVN_NO_ERROR;
+}
+
+
+/* Let FILE stand for the array of file_info struct elements of BATON->files
+ * that are indexed by the elements of the DATASOURCE array.
+ * BATON's type is (svn_diff__file_baton_t *).
+ *
+ * For each file in the FILE array, open the file at FILE.path; initialize 
+ * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a 
+ * buffer and read the first chunk.  Then find the prefix lines
+ * which are identical between all the files.  Return the number of identical
+ * prefix lines in PREFIX_LINES.
+ *
+ * Finding the identical prefix lines allows us to exclude those from the
+ * rest of the diff algorithm, which increases performance by reducing the 
+ * problem space.
+ *
+ * Implements svn_diff_fns_t::datasources_open. */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource[],
+                 int datasource_len)
+{
+  svn_diff__file_baton_t *file_baton = baton;
+  struct file_info *file[4];
+  apr_finfo_t finfo[4];
+  apr_off_t length[4];
+  svn_boolean_t reached_one_eof;
+  int i;
+
+  /* Open datasources and read first chunk */
+  for (i = 0; i < datasource_len; i++)
+    {
+      file[i] = &file_baton->files[datasource_to_index(datasource[i])];
+      SVN_ERR(svn_io_file_open(&file[i]->file, file[i]->path,
+                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
+      SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
+                                   file[i]->file, file_baton->pool));
+      file[i]->size = finfo[i].size;
+      length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
+      file[i]->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
+      SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer,
+                         length[i], 0, file_baton->pool));
+      file[i]->endp = file[i]->buffer + length[i];
+      file[i]->curp = file[i]->buffer;
+    }
+
+  for (i = 0; i < datasource_len; i++)
+    if (length[i] == 0)
+      /* There will not be any identical prefix, so we're done. */
+      return SVN_NO_ERROR;
+
+  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
+                                file, datasource_len, file_baton->pool));
+
+  return SVN_NO_ERROR;
+}
+
+
 /* Implements svn_diff_fns_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
@@ -533,6 +782,7 @@ token_discard_all(void *baton)
 static const svn_diff_fns_t svn_diff__file_vtable =
 {
   datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
   token_compare,

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_memory.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_memory.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_memory.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_memory.c Sun Nov 21 02:36:07 2010
@@ -96,6 +96,17 @@ datasource_open(void *baton, svn_diff_da
 }
 
 
+/* Implements svn_diff_fns_t::datasources_open */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource[], 
+                 int datasource_len)
+{
+  /* Do nothing: everything is already there and initialized to 0 */
+  return SVN_NO_ERROR;
+}
+
+
 /* Implements svn_diff_fns_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
@@ -189,6 +200,7 @@ token_discard_all(void *baton)
 static const svn_diff_fns_t svn_diff__mem_vtable =
 {
   datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
   token_compare,

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/lcs.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/lcs.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/lcs.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/lcs.c Sun Nov 21 02:36:07 2010
@@ -163,6 +163,7 @@ svn_diff__lcs_reverse(svn_diff__lcs_t *l
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
               svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+              apr_off_t prefix_lines,
               apr_pool_t *pool)
 {
   int idx;
@@ -180,9 +181,11 @@ svn_diff__lcs(svn_diff__position_t *posi
    */
   lcs = apr_palloc(pool, sizeof(*lcs));
   lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
-  lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1;
+  lcs->position[0]->offset = position_list1 ? 
+    position_list1->offset + 1 : prefix_lines + 1;
   lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
-  lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1;
+  lcs->position[1]->offset = position_list2 ?
+    position_list2->offset + 1 : prefix_lines + 1;
   lcs->length = 0;
   lcs->refcount = 1;
   lcs->next = NULL;

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/token.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/token.c?rev=1037371&r1=1037370&r2=1037371&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/token.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/token.c Sun Nov 21 02:36:07 2010
@@ -139,6 +139,8 @@ svn_diff__get_tokens(svn_diff__position_
                      void *diff_baton,
                      const svn_diff_fns_t *vtable,
                      svn_diff_datasource_e datasource,
+                     svn_boolean_t datasource_opened,
+                     apr_off_t prefix_lines,
                      apr_pool_t *pool)
 {
   svn_diff__position_t *start_position;
@@ -152,10 +154,11 @@ svn_diff__get_tokens(svn_diff__position_
   *position_list = NULL;
 
 
-  SVN_ERR(vtable->datasource_open(diff_baton, datasource));
+  if (!datasource_opened)
+    SVN_ERR(vtable->datasource_open(diff_baton, datasource));
 
   position_ref = &start_position;
-  offset = 0;
+  offset = prefix_lines;
   hash = 0; /* The callback fn doesn't need to touch it per se */
   while (1)
     {