You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by jc...@apache.org on 2010/11/24 01:03:07 UTC
svn commit: r1038390 - in
/subversion/branches/diff-optimizations-tokens/subversion: include/
libsvn_diff/
Author: jcorvel
Date: Wed Nov 24 00:03:07 2010
New Revision: 1038390
URL: http://svn.apache.org/viewvc?rev=1038390&view=rev
Log:
On the diff-optimizations-tokens branch:
Make svn_diff skip identical prefix to make diff and blame faster.
* subversion/include/svn_diff.h
(svn_diff_fns_t): Add new function type token_pushback_prefix to the
vtable. This function is needed to push back the last token that has been
read during prefix scanning (the first non-matching line), so it can be
read again (and checksummed) during the get_next_token phase.
* subversion/libsvn_diff/diff_memory.c
(datasource_get_next_token): Make the checksum calculation optional (only
calculate checksum if HASH is not NULL).
(token_pushback_prefix): New function.
(svn_diff__mem_vtable): Add new function token_pushback_prefix.
* subversion/libsvn_diff/diff_file.c
(svn_diff__file_baton_t): Add member PREFIX_TOKENS, a linked list of pushed
back prefix tokens.
(datasource_get_next_token): Make the checksum calculation optional (only
calculate checksum if HASH is not NULL). If applicable, use a pushed back
prefix token to adjust the raw_length of a token that is being reread.
(token_pushback_prefix): New function.
(svn_diff__mem_vtable): Add new function token_pushback_prefix.
* subversion/libsvn_diff/diff.h
(svn_diff__lcs): Add argument "prefix_lines".
(svn_diff__get_all_tokens): New function declaration.
* subversion/libsvn_diff/token.c
(find_identical_prefix): New function.
(svn_diff__get_all_tokens): New function. The purpose of this function is to
read the tokens from all datasources at once, so it can first scan for
identical prefix and suffix.
* subversion/libsvn_diff/lcs.c
(svn_diff__lcs): Added argument "prefix_lines". Use this to correctly set
the offset of the sentinel position for EOF, even if one of the files
became empty after eliminating the identical prefix.
* subversion/libsvn_diff/diff.c
(svn_diff__diff): Add a chunk of "common" diff for identical prefix.
(svn_diff_diff): Use new function svn_diff__get_all_tokens to get the tokens
from original and modified at once (while scanning for identical prefix).
Pass prefix_lines to svn_diff__lcs and svn_diff__diff.
* subversion/libsvn_diff/diff3.c
(svn_diff_diff3): For now, don't use svn_diff__get_all_tokens yet.
Pass prefix_lines = 0 to svn_diff__lcs.
* subversion/libsvn_diff/diff4.c
(svn_diff_diff4): For now, don't use svn_diff__get_all_tokens yet.
Pass prefix_lines = 0 to svn_diff__lcs.
Modified:
subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.c
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.h
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff3.c
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff4.c
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/lcs.c
subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c
Modified: subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/include/svn_diff.h Wed Nov 24 00:03:07 2010
@@ -135,6 +135,10 @@ typedef struct svn_diff_fns_t
void *rtoken,
int *compare);
+ svn_error_t *(*token_pushback_prefix)(void *diff_baton,
+ void *token,
+ svn_diff_datasource_e datasource);
+
/** Free @a token from memory, the diff algorithm is done with it. */
void (*token_discard)(void *diff_baton,
void *token);
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.c Wed Nov 24 00:03:07 2010
@@ -43,6 +43,22 @@ svn_diff__diff(svn_diff__lcs_t *lcs,
svn_diff_t *diff;
svn_diff_t **diff_ref = &diff;
+ if (want_common && (original_start > 1))
+ {
+ /* we have a prefix to skip */
+ (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+ (*diff_ref)->type = svn_diff__type_common;
+ (*diff_ref)->original_start = 0;
+ (*diff_ref)->original_length = original_start - 1;
+ (*diff_ref)->modified_start = 0;
+ (*diff_ref)->modified_length = modified_start - 1;
+ (*diff_ref)->latest_start = 0;
+ (*diff_ref)->latest_length = 0;
+
+ diff_ref = &(*diff_ref)->next;
+ }
+
while (1)
{
if (original_start < lcs->position[0]->offset
@@ -105,11 +121,17 @@ svn_diff_diff(svn_diff_t **diff,
{
svn_diff__tree_t *tree;
svn_diff__position_t *position_list[2];
+ svn_diff__position_t **position_list_ref[2];
+ svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
+ svn_diff_datasource_modified};
svn_diff__lcs_t *lcs;
apr_pool_t *subpool;
apr_pool_t *treepool;
+ apr_off_t prefix_lines = 0;
*diff = NULL;
+ position_list_ref[0] = &position_list[0];
+ position_list_ref[1] = &position_list[1];
subpool = svn_pool_create(pool);
treepool = svn_pool_create(pool);
@@ -117,17 +139,12 @@ svn_diff_diff(svn_diff_t **diff,
svn_diff__tree_create(&tree, treepool);
/* Insert the data into the tree */
- SVN_ERR(svn_diff__get_tokens(&position_list[0],
- tree,
- diff_baton, vtable,
- svn_diff_datasource_original,
- subpool));
-
- SVN_ERR(svn_diff__get_tokens(&position_list[1],
- tree,
- diff_baton, vtable,
- svn_diff_datasource_modified,
- subpool));
+ SVN_ERR(svn_diff__get_all_tokens(position_list_ref,
+ &prefix_lines,
+ tree,
+ diff_baton, vtable,
+ datasource, 2,
+ subpool));
/* The cool part is that we don't need the tokens anymore.
* Allow the app to clean them up if it wants to.
@@ -139,10 +156,10 @@ svn_diff_diff(svn_diff_t **diff,
svn_pool_destroy(treepool);
/* Get the lcs */
- lcs = svn_diff__lcs(position_list[0], position_list[1], subpool);
+ lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, subpool);
/* Produce the diff */
- *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool);
+ *diff = svn_diff__diff(lcs, prefix_lines + 1, prefix_lines + 1, TRUE, pool);
/* Get rid of all the data we don't have a use for anymore */
svn_pool_destroy(subpool);
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.h
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.h?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.h (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff.h Wed Nov 24 00:03:07 2010
@@ -91,6 +91,7 @@ typedef enum svn_diff__normalize_state_t
svn_diff__lcs_t *
svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+ apr_off_t prefix_lines,
apr_pool_t *pool);
@@ -113,6 +114,20 @@ svn_diff__get_tokens(svn_diff__position_
svn_diff_datasource_e datasource,
apr_pool_t *pool);
+/*
+ * Get all tokens from all datasources (skipping identical prefix and suffix).
+ * Return the last item in the (circular) list.
+ */
+svn_error_t *
+svn_diff__get_all_tokens(svn_diff__position_t **position_list[],
+ apr_off_t *prefix_lines,
+ svn_diff__tree_t *tree,
+ void *diff_baton,
+ const svn_diff_fns_t *vtable,
+ svn_diff_datasource_e datasource[],
+ int datasource_len,
+ apr_pool_t *pool);
+
/* Morph a svn_lcs_t into a svn_diff_t. */
svn_diff_t *
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff3.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff3.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff3.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff3.c Wed Nov 24 00:03:07 2010
@@ -173,7 +173,7 @@ svn_diff__resolve_conflict(svn_diff_t *h
position[1]->next = start_position[1];
}
- *lcs_ref = svn_diff__lcs(position[0], position[1],
+ *lcs_ref = svn_diff__lcs(position[0], position[1], 0,
subpool);
/* Fix up the EOF lcs element in case one of
@@ -289,9 +289,9 @@ svn_diff_diff3(svn_diff_t **diff,
svn_pool_destroy(treepool);
/* Get the lcs for original-modified and original-latest */
- lcs_om = svn_diff__lcs(position_list[0], position_list[1],
+ lcs_om = svn_diff__lcs(position_list[0], position_list[1], 0,
subpool);
- lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
+ lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0,
subpool);
/* Produce a merged diff */
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff4.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff4.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff4.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff4.c Wed Nov 24 00:03:07 2010
@@ -222,7 +222,7 @@ svn_diff_diff4(svn_diff_t **diff,
svn_pool_clear(subpool3);
/* Get the lcs for original - latest */
- lcs_ol = svn_diff__lcs(position_list[0], position_list[2], subpool3);
+ lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0, subpool3);
diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
svn_pool_clear(subpool3);
@@ -243,7 +243,7 @@ svn_diff_diff4(svn_diff_t **diff,
/* Get the lcs for common ancestor - original
* Do reverse adjustements
*/
- lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], subpool3);
+ lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], 0, subpool3);
diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
adjust_diff(diff_ol, diff_adjust);
@@ -252,7 +252,7 @@ svn_diff_diff4(svn_diff_t **diff,
/* Get the lcs for modified - common ancestor
* Do forward adjustments
*/
- lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], subpool3);
+ lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], 0, subpool3);
diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
adjust_diff(diff_ol, diff_adjust);
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_file.c Wed Nov 24 00:03:07 2010
@@ -82,6 +82,9 @@ typedef struct svn_diff__file_baton_t
char *endp; /* next memory address after the current chunk */
svn_diff__normalize_state_t normalize_state;
+
+ /* Linked list of pushed back prefix tokens */
+ svn_diff__file_token_t *prefix_tokens;
} files[4];
/* List of free tokens that may be reused. */
@@ -330,7 +333,8 @@ datasource_get_next_token(apr_uint32_t *
&file->normalize_state,
curp, file_baton->options);
file_token->length += length;
- h = svn_diff__adler32(h, curp, length);
+ if (hash)
+ h = svn_diff__adler32(h, curp, length);
curp = endp = file->buffer;
file->chunk++;
@@ -377,10 +381,48 @@ datasource_get_next_token(apr_uint32_t *
file_token->length += length;
- *hash = svn_diff__adler32(h, c, length);
+ if (hash)
+ *hash = svn_diff__adler32(h, c, length);
*token = file_token;
}
+ /* If there is a pushed back prefix_token available, that means we have
+ read (and normalized in-place) this token before. Adjust our new token
+ with the original raw_length of the prefix_token. */
+ if (file->prefix_tokens)
+ {
+ svn_diff__file_token_t *prefix_token = file->prefix_tokens;
+ file->prefix_tokens = file->prefix_tokens->next;
+
+ if (file_token->raw_length < prefix_token->raw_length)
+ {
+ int length_difference = prefix_token->raw_length
+ - file_token->raw_length;
+ if (length_difference < file->endp - file->curp)
+ {
+ file->curp += length_difference;
+ }
+ else
+ {
+ /* ### TODO: rework this so it can handle crossing multiple
+ chunks, instead of only one. */
+ length_difference -= file->endp - file->curp;
+ file->curp = file->buffer;
+ file->chunk++;
+ length = file->chunk == last_chunk ?
+ offset_in_chunk(file->size) : CHUNK_SIZE;
+ file->endp = file->curp + length;
+
+ SVN_ERR(read_chunk(file->file, file->path,
+ file->curp, length,
+ chunk_to_offset(file->chunk),
+ file_baton->pool));
+ file->curp += length_difference;
+ }
+ file_token->raw_length = prefix_token->raw_length;
+ }
+ }
+
return SVN_NO_ERROR;
}
@@ -506,6 +548,37 @@ token_compare(void *baton, void *token1,
return SVN_NO_ERROR;
}
+static svn_error_t *
+token_pushback_prefix(void *baton,
+ void *token,
+ svn_diff_datasource_e datasource)
+{
+ svn_diff__file_baton_t *file_baton = baton;
+ svn_diff__file_token_t *file_token = token;
+ struct file_info *file =
+ &file_baton->files[datasource_to_index(datasource)];
+
+ file_token->next = file->prefix_tokens;
+ file->prefix_tokens = file_token;
+
+ if (offset_to_chunk(file_token->norm_offset) == file->chunk)
+ {
+ file->curp = file->buffer + offset_in_chunk(file_token->norm_offset);
+ }
+ else
+ {
+ /* ### TODO: rework this so it can handle crossing multiple
+ chunks, instead of only one. */
+ file->chunk--;
+ SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+ CHUNK_SIZE, chunk_to_offset(file->chunk),
+ file_baton->pool));
+ file->endp = file->buffer + CHUNK_SIZE;
+ file->curp = file->buffer + offset_in_chunk(file_token->norm_offset);
+ }
+
+ return SVN_NO_ERROR;
+}
/* Implements svn_diff_fns_t::token_discard */
static void
@@ -536,6 +609,7 @@ static const svn_diff_fns_t svn_diff__fi
datasource_close,
datasource_get_next_token,
token_compare,
+ token_pushback_prefix,
token_discard,
token_discard_all
};
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/diff_memory.c Wed Nov 24 00:03:07 2010
@@ -128,7 +128,8 @@ datasource_get_next_token(apr_uint32_t *
svn_diff__normalize_buffer(&buf, &len, &state, tok->data,
mem_baton->normalization_options);
- *hash = svn_diff__adler32(0, buf, len);
+ if (hash)
+ *hash = svn_diff__adler32(0, buf, len);
src->next_token++;
}
else
@@ -166,6 +167,19 @@ token_compare(void *baton, void *token1,
return SVN_NO_ERROR;
}
+static svn_error_t *
+token_pushback_prefix(void *baton,
+ void *token,
+ svn_diff_datasource_e datasource)
+{
+ diff_mem_baton_t *mem_baton = baton;
+ source_tokens_t *src = &(mem_baton->sources[datasource_to_index(datasource)]);
+
+ src->next_token--;
+
+ return SVN_NO_ERROR;
+}
+
/* Implements svn_diff_fns_t::token_discard */
static void
token_discard(void *baton, void *token)
@@ -192,6 +206,7 @@ static const svn_diff_fns_t svn_diff__me
datasource_close,
datasource_get_next_token,
token_compare,
+ token_pushback_prefix,
token_discard,
token_discard_all
};
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/lcs.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/lcs.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/lcs.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/lcs.c Wed Nov 24 00:03:07 2010
@@ -163,6 +163,7 @@ svn_diff__lcs_reverse(svn_diff__lcs_t *l
svn_diff__lcs_t *
svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
svn_diff__position_t *position_list2, /* pointer to tail (ring) */
+ apr_off_t prefix_lines,
apr_pool_t *pool)
{
int idx;
@@ -180,9 +181,11 @@ svn_diff__lcs(svn_diff__position_t *posi
*/
lcs = apr_palloc(pool, sizeof(*lcs));
lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
- lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1;
+ lcs->position[0]->offset = position_list1 ?
+ position_list1->offset + 1 : prefix_lines + 1;
lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
- lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1;
+ lcs->position[1]->offset = position_list2 ?
+ position_list2->offset + 1 : prefix_lines + 1;
lcs->length = 0;
lcs->refcount = 1;
lcs->next = NULL;
Modified: subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c?rev=1038390&r1=1038389&r2=1038390&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c (original)
+++ subversion/branches/diff-optimizations-tokens/subversion/libsvn_diff/token.c Wed Nov 24 00:03:07 2010
@@ -187,3 +187,148 @@ svn_diff__get_tokens(svn_diff__position_
return SVN_NO_ERROR;
}
+
+/* Find identical prefix between all datasources
+ */
+static svn_error_t *
+find_identical_prefix(svn_boolean_t *reached_one_eof,
+ apr_off_t *prefix_lines,
+ void *diff_baton,
+ const svn_diff_fns_t *vtable,
+ svn_diff_datasource_e datasource[],
+ int datasource_len)
+{
+ void *token[4];
+ svn_boolean_t is_match, reached_all_eof;
+ int i, rv;
+
+ *prefix_lines = 0;
+ *reached_one_eof = FALSE;
+
+ /* Read first token from every datasource, and check if it matches. */
+ for (i = 0; i < datasource_len; i++)
+ {
+ SVN_ERR(vtable->datasource_get_next_token(NULL, &token[i],
+ diff_baton, datasource[i]));
+ *reached_one_eof = *reached_one_eof || token[i] == NULL;
+ }
+ if (*reached_one_eof)
+ {
+ /* We're done. Push back tokens that we may have read too much. */
+ for (i = 0; i < datasource_len; i++)
+ if (token[i] != NULL)
+ SVN_ERR(vtable->token_pushback_prefix(diff_baton, token[i], datasource[i]));
+ return SVN_NO_ERROR;
+ }
+ for (i = 1, is_match = TRUE; is_match && i < datasource_len; i++)
+ {
+ SVN_ERR(vtable->token_compare(diff_baton, token[0], token[i], &rv));
+ is_match = is_match && rv == 0;
+ }
+
+ /* While tokens match up, continue getting tokens and matching. */
+ while (is_match)
+ {
+ (*prefix_lines)++;
+ for (i = 0; i < datasource_len; i++)
+ {
+ SVN_ERR(vtable->datasource_get_next_token(NULL, &token[i],
+ diff_baton, datasource[i]));
+ *reached_one_eof = *reached_one_eof || token[i] == NULL;
+ }
+ if (*reached_one_eof)
+ break;
+ else
+ for (i = 1, is_match = TRUE; is_match && i < datasource_len; i++)
+ {
+ SVN_ERR(vtable->token_compare(diff_baton, token[0], token[i], &rv));
+ is_match = is_match && rv == 0;
+ }
+ }
+
+ /* If all files reached their end (i.e. are fully identical), we're done. */
+ for (i = 0, reached_all_eof = TRUE; i < datasource_len; i++)
+ reached_all_eof = reached_all_eof && token[i] == NULL;
+ if (reached_all_eof)
+ return SVN_NO_ERROR;
+
+ /* Push back the non-matching token we read. */
+ for (i = 0; i < datasource_len; i++)
+ if (token[i] != NULL)
+ SVN_ERR(vtable->token_pushback_prefix(diff_baton, token[i], datasource[i]));
+
+ return SVN_NO_ERROR;
+}
+
+/*
+ * Get all tokens from the datasources. For each datasource, return the
+ * last item in the (circular) list.
+ */
+svn_error_t *
+svn_diff__get_all_tokens(svn_diff__position_t **position_list[],
+ apr_off_t *prefix_lines,
+ svn_diff__tree_t *tree,
+ void *diff_baton,
+ const svn_diff_fns_t *vtable,
+ svn_diff_datasource_e datasource[],
+ int datasource_len,
+ apr_pool_t *pool)
+{
+ svn_diff__position_t *start_position;
+ svn_diff__position_t *position = NULL;
+ svn_diff__position_t **position_ref;
+ svn_diff__node_t *node;
+ void *token;
+ apr_off_t offset;
+ apr_uint32_t hash;
+ svn_boolean_t reached_one_eof;
+ int i;
+
+ for (i = 0; i < datasource_len; i++)
+ {
+ *position_list[i] = NULL;
+ SVN_ERR(vtable->datasource_open(diff_baton, datasource[i]));
+ }
+
+ /* find identical prefix */
+ SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
+ diff_baton, vtable, datasource, datasource_len));
+
+ /* ### TODO: find identical suffix (if not eof) */
+
+ for (i = 0; i < datasource_len; i++)
+ {
+ position_ref = &start_position;
+ offset = *prefix_lines;
+ hash = 0; /* The callback fn doesn't need to touch it per se */
+ while (1)
+ {
+ SVN_ERR(vtable->datasource_get_next_token(&hash, &token,
+ diff_baton, datasource[i]));
+ if (token == NULL)
+ break;
+
+ offset++;
+ SVN_ERR(svn_diff__tree_insert_token(&node, tree,
+ diff_baton, vtable,
+ hash, token));
+
+ /* Create a new position */
+ position = apr_palloc(pool, sizeof(*position));
+ position->next = NULL;
+ position->node = node;
+ position->offset = offset;
+
+ *position_ref = position;
+ position_ref = &position->next;
+ }
+
+ *position_ref = start_position;
+
+ SVN_ERR(vtable->datasource_close(diff_baton, datasource[i]));
+
+ *position_list[i] = position;
+ }
+
+ return SVN_NO_ERROR;
+}