You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by jc...@apache.org on 2010/11/29 01:06:09 UTC

svn commit: r1039986 - in /subversion/branches/diff-optimizations-tokens/subversion: include/svn_diff.h libsvn_diff/diff_file.c libsvn_diff/diff_memory.c libsvn_diff/token.c

Author: jcorvel
Date: Mon Nov 29 00:06:08 2010
New Revision: 1039986

URL: http://svn.apache.org/viewvc?rev=1039986&view=rev
Log:
On the diff-optimizations-tokens branch:

Add skeleton implementation of suffix scanning.

Getting tokens backwards (datasource_get_previous_token), and pushing back
suffix tokens (token_pushback_suffix), are still stub implementations, which
will be added in a follow up commit.

* subversion/include/svn_diff.h
  (svn_diff_fns_t): Add new function types datasource_get_previous_token and
   token_pushback_suffix. Add parameter open_at_end to datasource_open.

* subversion/libsvn_diff/diff_file.c
  (datasource_get_previous_token): New function, stub implementation.
  (token_pushback_suffix): New function, stub implementation.
  (datasource_open): Add parameter open_at_end. Add implementation to open the
   datasource at the end (read last chunk, point curp to the last byte). Make
   sure the actual file is only opened if hasn't been opened yet, so this
   function can be reused to jump to the end or the beginning of the file
   at will. While we are at it, remove local variables curp and endp, since
   they aren't needed anymore.

* subversion/libsvn_diff/diff_memory.c
  (datasource_get_previous_token): New function, stub implementation.
  (token_pushback_suffix): New function, stub implementation.
  (datasource_open): Add parameter open_at_end.

* subversion/libsvn_diff/token.c
  (svn_diff__get_tokens): Pass FALSE for parameter open_at_end to
   datasource_open, for backwards compatibility.
  (find_identical_suffix): New function.
  (find_identical_prefix): Remove parameter reached_one_eof, and convert it to
   a local variable, since it doesn't need to be returned anymore.
  (svn_diff__get_all_tokens): Add call to find_identical_suffix, before
   calling find_identical_prefix, and call datasource_open in between to
   reposition the datasource at the end and the start respectively.

Modified:
    subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h
    subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c
    subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c
    subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c

Modified: subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h?rev=1039986&r1=1039985&r2=1039986&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h Mon Nov 29 00:06:08 2010
@@ -110,7 +110,8 @@ typedef struct svn_diff_fns_t
 {
   /** Open the datasource of type @a datasource. */
   svn_error_t *(*datasource_open)(void *diff_baton,
-                                  svn_diff_datasource_e datasource);
+                                  svn_diff_datasource_e datasource,
+                                  svn_boolean_t open_at_end);
 
   /** Close the datasource of type @a datasource. */
   svn_error_t *(*datasource_close)(void *diff_baton,
@@ -124,6 +125,14 @@ typedef struct svn_diff_fns_t
                                             void *diff_baton,
                                             svn_diff_datasource_e datasource);
 
+  /** Get the previous "token" from the datasource of type @a datasource
+   * (reading backwards). Return a "token" in @a *token.  Leave @a token
+   * untouched when the datasource is exhausted.
+   */
+  svn_error_t *(*datasource_get_previous_token)(void **token,
+                                                void *diff_baton,
+                                                svn_diff_datasource_e datasource);
+
   /** A function for ordering the tokens, resembling 'strcmp' in functionality.
    * @a compare should contain the return value of the comparison:
    * If @a ltoken and @a rtoken are "equal", return 0.  If @a ltoken is
@@ -139,6 +148,10 @@ typedef struct svn_diff_fns_t
                                         void *token,
                                         svn_diff_datasource_e datasource);
 
+  svn_error_t *(*token_pushback_suffix)(void *diff_baton,
+                                        void *token,
+                                        svn_diff_datasource_e datasource);
+
   /** Free @a token from memory, the diff algorithm is done with it. */
   void (*token_discard)(void *diff_baton,
                         void *token);

Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c?rev=1039986&r1=1039985&r2=1039986&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c Mon Nov 29 00:06:08 2010
@@ -214,35 +214,49 @@ map_or_read_file(apr_file_t **file,
  *
  * Implements svn_diff_fns_t::datasource_open. */
 static svn_error_t *
-datasource_open(void *baton, svn_diff_datasource_e datasource)
+datasource_open(void *baton,
+                svn_diff_datasource_e datasource,
+                svn_boolean_t open_at_end)
 {
   svn_diff__file_baton_t *file_baton = baton;
   struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
   apr_finfo_t finfo;
   apr_off_t length;
-  char *curp;
-  char *endp;
-
-  SVN_ERR(svn_io_file_open(&file->file, file->path,
-                           APR_READ, APR_OS_DEFAULT, file_baton->pool));
-
-  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE,
-                               file->file, file_baton->pool));
 
-  file->size = finfo.size;
-  length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size;
+  if (file->file == NULL)
+    {
+      SVN_ERR(svn_io_file_open(&file->file, file->path,
+                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
+      SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE,
+                                   file->file, file_baton->pool));
+      file->size = finfo.size;
+    }
 
-  if (length == 0)
+  if (file->size == 0)
     return SVN_NO_ERROR;
 
-  endp = curp = apr_palloc(file_baton->pool, (apr_size_t) length);
-  endp += length;
-
-  file->buffer = file->curp = curp;
-  file->endp = endp;
+  if (open_at_end)
+    {
+      file->chunk = (int) offset_to_chunk(file->size); /* last chunk */
+      length = offset_in_chunk(file->size);
+      if (file->chunk == 0)    /* if last chunk is the only chunk */
+        file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length);
+      else
+        file->buffer = apr_palloc(file_baton->pool, CHUNK_SIZE);
+      file->endp = file->buffer + length;
+      file->curp = file->endp - 1;
+    }
+  else
+    {
+      file->chunk = 0;
+      length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size;
+      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length);
+      file->endp = file->buffer + length;
+      file->curp = file->buffer;
+    }
 
-  return read_chunk(file->file, file->path,
-                    curp, length, 0, file_baton->pool);
+  return read_chunk(file->file, file->path, file->buffer, length,
+                    chunk_to_offset(file->chunk), file_baton->pool);
 }
 
 
@@ -426,6 +440,19 @@ datasource_get_next_token(apr_uint32_t *
   return SVN_NO_ERROR;
 }
 
+
+/* Implements svn_diff_fns_t::datasource_get_previous_token */
+static svn_error_t *
+datasource_get_previous_token(void **token, void *baton,
+                              svn_diff_datasource_e datasource)
+{
+  /* ### TODO */
+  *token = NULL;
+
+  return SVN_NO_ERROR;
+}
+
+
 #define COMPARE_CHUNK_SIZE 4096
 
 /* Implements svn_diff_fns_t::token_compare */
@@ -580,6 +607,17 @@ token_pushback_prefix(void *baton,
   return SVN_NO_ERROR;
 }
 
+
+static svn_error_t *
+token_pushback_suffix(void *baton,
+                      void *token,
+                      svn_diff_datasource_e datasource)
+{
+  /* ### TODO */
+  return SVN_NO_ERROR;
+}
+
+
 /* Implements svn_diff_fns_t::token_discard */
 static void
 token_discard(void *baton, void *token)
@@ -608,8 +646,10 @@ static const svn_diff_fns_t svn_diff__fi
   datasource_open,
   datasource_close,
   datasource_get_next_token,
+  datasource_get_previous_token,
   token_compare,
   token_pushback_prefix,
+  token_pushback_suffix,
   token_discard,
   token_discard_all
 };

Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c?rev=1039986&r1=1039985&r2=1039986&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c Mon Nov 29 00:06:08 2010
@@ -89,7 +89,9 @@ datasource_to_index(svn_diff_datasource_
 
 /* Implements svn_diff_fns_t::datasource_open */
 static svn_error_t *
-datasource_open(void *baton, svn_diff_datasource_e datasource)
+datasource_open(void *baton,
+                svn_diff_datasource_e datasource,
+                svn_boolean_t open_at_end)
 {
   /* Do nothing: everything is already there and initialized to 0 */
   return SVN_NO_ERROR;
@@ -138,6 +140,18 @@ datasource_get_next_token(apr_uint32_t *
   return SVN_NO_ERROR;
 }
 
+/* Implements svn_diff_fns_t::datasource_get_previous_token */
+static svn_error_t *
+datasource_get_previous_token(void **token, void *baton,
+                              svn_diff_datasource_e datasource)
+{
+  /* ### TODO */
+  *token = NULL;
+
+  return SVN_NO_ERROR;
+}
+
+
 /* Implements svn_diff_fns_t::token_compare */
 static svn_error_t *
 token_compare(void *baton, void *token1, void *token2, int *result)
@@ -180,6 +194,17 @@ token_pushback_prefix(void *baton,
   return SVN_NO_ERROR;
 }
 
+
+static svn_error_t *
+token_pushback_suffix(void *baton,
+                      void *token,
+                      svn_diff_datasource_e datasource)
+{
+  /* ### TODO */
+  return SVN_NO_ERROR;
+}
+
+
 /* Implements svn_diff_fns_t::token_discard */
 static void
 token_discard(void *baton, void *token)
@@ -205,8 +230,10 @@ static const svn_diff_fns_t svn_diff__me
   datasource_open,
   datasource_close,
   datasource_get_next_token,
+  datasource_get_previous_token,
   token_compare,
   token_pushback_prefix,
+  token_pushback_suffix,
   token_discard,
   token_discard_all
 };

Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c?rev=1039986&r1=1039985&r2=1039986&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c Mon Nov 29 00:06:08 2010
@@ -152,7 +152,7 @@ svn_diff__get_tokens(svn_diff__position_
   *position_list = NULL;
 
 
-  SVN_ERR(vtable->datasource_open(diff_baton, datasource));
+  SVN_ERR(vtable->datasource_open(diff_baton, datasource, FALSE));
 
   position_ref = &start_position;
   offset = 0;
@@ -188,22 +188,77 @@ svn_diff__get_tokens(svn_diff__position_
   return SVN_NO_ERROR;
 }
 
+/* Find identical suffix between all datasources
+ */
+static svn_error_t *
+find_identical_suffix(svn_boolean_t *reached_one_bof,
+                      void *diff_baton,
+                      const svn_diff_fns_t *vtable,
+                      svn_diff_datasource_e datasource[],
+                      int datasource_len)
+{
+  void *token[4];
+  svn_boolean_t is_match, reached_all_bof;
+  int i, rv;
+
+  *reached_one_bof = FALSE;
+  while (1)
+    {
+      /* Keep getting tokens and matching them, until there are no tokens
+         left, or we encounter a non-matching token. */
+      for (i = 0; i < datasource_len; i++)
+        {
+          SVN_ERR(vtable->datasource_get_previous_token(&token[i], diff_baton,
+                                                        datasource[i]));
+          *reached_one_bof = *reached_one_bof || token[i] == NULL;
+        }
+      if (*reached_one_bof)
+        {
+          break;
+        }
+      else
+        {
+          for (i = 1, is_match = TRUE; is_match && i < datasource_len; i++)
+            {
+              SVN_ERR(vtable->token_compare(diff_baton, token[0], token[i], &rv));
+              is_match = is_match && rv == 0;
+            }
+          if (!is_match)
+            break;
+        }
+    }
+
+  /* If all files reached their beginning (i.e. are fully identical),
+     we're done. */
+  for (i = 0, reached_all_bof = TRUE; i < datasource_len; i++)
+    reached_all_bof = reached_all_bof && token[i] == NULL;
+  if (reached_all_bof)
+    return SVN_NO_ERROR;
+
+  /* Push back the non-matching token we read. */
+  for (i = 0; i < datasource_len; i++)
+    if (token[i] != NULL)
+      SVN_ERR(vtable->token_pushback_suffix(diff_baton, token[i], datasource[i]));
+
+  return SVN_NO_ERROR;
+}
+
+
 /* Find identical prefix between all datasources
  */
 static svn_error_t *
-find_identical_prefix(svn_boolean_t *reached_one_eof,
-                      apr_off_t *prefix_lines,
+find_identical_prefix(apr_off_t *prefix_lines,
                       void *diff_baton,
                       const svn_diff_fns_t *vtable,
                       svn_diff_datasource_e datasource[],
                       int datasource_len)
 {
   void *token[4];
-  svn_boolean_t is_match, reached_all_eof;
+  svn_boolean_t is_match, reached_one_eof, reached_all_eof;
   int i, rv;
 
   *prefix_lines = 0;
-  *reached_one_eof = FALSE;
+  reached_one_eof = FALSE;
   while (1)
     {
       /* Keep getting tokens and matching them, until there are no tokens
@@ -212,9 +267,9 @@ find_identical_prefix(svn_boolean_t *rea
         {
           SVN_ERR(vtable->datasource_get_next_token(NULL, &token[i],
                                                     diff_baton, datasource[i]));
-          *reached_one_eof = *reached_one_eof || token[i] == NULL;
+          reached_one_eof = reached_one_eof || token[i] == NULL;
         }
-      if (*reached_one_eof)
+      if (reached_one_eof)
         {
           break;
         }
@@ -267,20 +322,28 @@ svn_diff__get_all_tokens(svn_diff__posit
   void *token;
   apr_off_t offset;
   apr_uint32_t hash;
-  svn_boolean_t reached_one_eof;
+  svn_boolean_t reached_one_bof;
   int i;
 
   for (i = 0; i < datasource_len; i++)
     {
       *position_list[i] = NULL;
-      SVN_ERR(vtable->datasource_open(diff_baton, datasource[i]));
+      SVN_ERR(vtable->datasource_open(diff_baton, datasource[i], TRUE));
     }
 
-  /* find identical prefix */
-  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, 
-                                diff_baton, vtable, datasource, datasource_len));
+  /* find identical suffix */
+  SVN_ERR(find_identical_suffix(&reached_one_bof, diff_baton, vtable,
+                                datasource, datasource_len));
+
+  for (i = 0; i < datasource_len; i++)
+    {
+      SVN_ERR(vtable->datasource_open(diff_baton, datasource[i], FALSE));
+    }
 
-  /* ### TODO: find identical suffix (if not eof) */
+  /* find identical prefix (but don't bother if one file was all suffix) */
+  /*if (!reached_one_bof)*/
+    SVN_ERR(find_identical_prefix(prefix_lines, diff_baton, vtable,
+                                  datasource, datasource_len));
 
   for (i = 0; i < datasource_len; i++)
     {