You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2015/09/21 19:27:18 UTC

svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Author: rhuijben
Date: Mon Sep 21 17:27:11 2015
New Revision: 1704374

URL: http://svn.apache.org/viewvc?rev=1704374&view=rev
Log:
Complete the parsing routines for parsing git-like binary blobs. This patch
completes the parsing, but leaves out the patch application code as that
needs a bit more work before committing.

* subversion/include/svn_diff.h
  (svn_diff_get_binary_diff_original_stream,
   svn_diff_get_binary_diff_result_stream): New functions.

* subversion/include/svn_error_codes.h
  (SVN_ERR_DIFF_UNEXPECTED_DATA): New error.

* subversion/libsvn_diff/binary_diff.c
  (includes): Add svn_private_config.h and diff.h.
  (base85_value,
   svn_diff__base85_decode_line): New functions.

* subversion/libsvn_diff/diff.h
  (svn_diff__base85_decode_line): New function.

* subversion/libsvn_diff/parse-diff.c
  (includes): Add svn_private_config.h and diff.h.

  (base85_baton_t): New struct.
  (read_handler_base85,
   close_handler_base85,
   get_base85_data_stream): New functions.

  (length_verify_baton_t): New struct.
  (read_handler_length_verify,
   close_handler_length_verify,
   get_verify_length_stream): New functions.

  (svn_diff_get_binary_diff_original_stream,
   svn_diff_get_binary_diff_result_stream): New functions.

  (parse_binary_patch): Properly set apr_file in binary patch. Switch src and
    dest to the same order as used by git.

* subversion/tests/cmdline/patch_tests.py
  (patch_binary_file): Update expected ordering.

Modified:
    subversion/trunk/subversion/include/svn_diff.h
    subversion/trunk/subversion/include/svn_error_codes.h
    subversion/trunk/subversion/libsvn_diff/binary_diff.c
    subversion/trunk/subversion/libsvn_diff/diff.h
    subversion/trunk/subversion/libsvn_diff/parse-diff.c
    subversion/trunk/subversion/tests/cmdline/patch_tests.py

Modified: subversion/trunk/subversion/include/svn_diff.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_diff.h?rev=1704374&r1=1704373&r2=1704374&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_diff.h (original)
+++ subversion/trunk/subversion/include/svn_diff.h Mon Sep 21 17:27:11 2015
@@ -1215,10 +1215,36 @@ typedef struct svn_prop_patch_t {
  * A binary patch representation. This basically describes replacing one
  * exact binary representation with another one.
  *
- * @since new in 1.10. */
+ * @since New in 1.10. */
 typedef struct svn_diff_binary_patch_t svn_diff_binary_patch_t;
 
 /**
+ * Creates a stream allocated in @a result_pool from which the original
+ * (pre-patch-application) version of the binary patched file can be read.
+ *
+ * @note Like many svn_diff_get functions over patches, this is implemented
+ * as reading from the backing patch file. Therefore it is recommended to
+ * read the whole stream before using other functions on the same patch file.
+ *
+ * @since New in 1.10 */
+svn_stream_t *
+svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
+                                         apr_pool_t *result_pool);
+
+/**
+ * Creates a stream allocated in @a result_pool from which the resulting
+ * (post-patch-application) version of the binary patched file can be read.
+ *
+ * @note Like many svn_diff_get functions over patches, this is implemented
+ * as reading from the backing patch file. Therefore it is recommended to
+ * read the whole stream before using other functions on the same patch file.
+ *
+ * @since New in 1.10 */
+svn_stream_t *
+svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
+                                       apr_pool_t *result_pool);
+
+/**
  * Data type to manage parsing of patches.
  * API users should not allocate structures of this type directly.
  *

Modified: subversion/trunk/subversion/include/svn_error_codes.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_error_codes.h?rev=1704374&r1=1704373&r2=1704374&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_error_codes.h (original)
+++ subversion/trunk/subversion/include/svn_error_codes.h Mon Sep 21 17:27:11 2015
@@ -1594,6 +1594,11 @@ SVN_ERROR_START
              SVN_ERR_DIFF_CATEGORY_START + 0,
              "Diff data source modified unexpectedly")
 
+  /** @since New in 1.10 */
+  SVN_ERRDEF(SVN_ERR_DIFF_UNEXPECTED_DATA,
+             SVN_ERR_DIFF_CATEGORY_START + 1,
+             "Diff data unexpected")
+
   /* libsvn_ra_serf errors */
   /** @since New in 1.5.
       @deprecated SSPI now handled by serf rather than libsvn_ra_serf. */

Modified: subversion/trunk/subversion/libsvn_diff/binary_diff.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/binary_diff.c?rev=1704374&r1=1704373&r2=1704374&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/binary_diff.c (original)
+++ subversion/trunk/subversion/libsvn_diff/binary_diff.c Mon Sep 21 17:27:11 2015
@@ -28,6 +28,10 @@
 #include "svn_diff.h"
 #include "svn_types.h"
 
+#include "diff.h"
+
+#include "svn_private_config.h"
+
 /* Copies the data from ORIGINAL_STREAM to a temporary file, returning both
    the original and compressed size. */
 static svn_error_t *
@@ -90,6 +94,64 @@ static const char b85str[] =
     "abcdefghijklmnopqrstuvwxyz"
     "!#$%&()*+-;<=>?@^_`{|}~";
 
+/* Helper function for svn_diff__base85_decode_line */
+static svn_error_t *
+base85_value(int *value, char c)
+{
+  const char *p = strchr(b85str, c);
+  if (!p)
+    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
+                            _("Invalid base85 value"));
+
+  *value = (p - b85str);
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_diff__base85_decode_line(char *output_data,
+                             apr_ssize_t output_len,
+                             const char *base85_data,
+                             apr_ssize_t base85_len,
+                             apr_pool_t *scratch_pool)
+{
+  {
+    apr_size_t expected_data = (output_len + 3) / 4 * 5;
+
+    if (base85_len != expected_data)
+      return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
+                              _("Unexpected base85 line length"));
+  }
+
+  while (base85_len)
+    {
+      unsigned info = 0;
+      apr_ssize_t i, n;
+
+      for (i = 0; i < 5; i++)
+        {
+          int value;
+
+          SVN_ERR(base85_value(&value, base85_data[i]));
+          info *= 85;
+          info += value;
+        }
+
+      for (i = 0, n=24; i < 4; i++, n-=8)
+        {
+          if (i < output_len)
+            output_data[i] = (info >> n) & 0xFF;
+        }
+
+      base85_data += 5;
+      base85_len -= 5;
+      output_data += 4;
+      output_len -= 4;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+
 /* Git length encoding table for write_literal */
 static const char b85lenstr[] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

Modified: subversion/trunk/subversion/libsvn_diff/diff.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff.h?rev=1704374&r1=1704373&r2=1704374&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/diff.h (original)
+++ subversion/trunk/subversion/libsvn_diff/diff.h Mon Sep 21 17:27:11 2015
@@ -214,4 +214,14 @@ svn_diff__unified_write_hunk_header(svn_
                                     apr_pool_t *scratch_pool);
 
 
+/* Decodes a single line of base85 data in BASE85_DATA of length BASE85_LEN,
+   to OUTPUT_DATA of length OUTPUT_LEN.
+ */
+svn_error_t *
+svn_diff__base85_decode_line(char *output_data,
+                             apr_ssize_t output_len,
+                             const char *base85_data,
+                             apr_ssize_t base85_len,
+                             apr_pool_t *scratch_pool);
+
 #endif /* DIFF_H */

Modified: subversion/trunk/subversion/libsvn_diff/parse-diff.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/parse-diff.c?rev=1704374&r1=1704373&r2=1704374&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_diff/parse-diff.c (original)
+++ subversion/trunk/subversion/libsvn_diff/parse-diff.c Mon Sep 21 17:27:11 2015
@@ -42,6 +42,10 @@
 #include "private/svn_dep_compat.h"
 #include "private/svn_sorts_private.h"
 
+#include "diff.h"
+
+#include "svn_private_config.h"
+
 /* Helper macro for readability */
 #define starts_with(str, start)  \
   (strncmp((str), (start), strlen(start)) == 0)
@@ -160,6 +164,217 @@ svn_diff_hunk_get_trailing_context(const
   return hunk->trailing_context;
 }
 
+/* Baton for the base85 stream implementation */
+struct base85_baton_t
+{
+  apr_file_t *file;
+  apr_pool_t *iterpool;
+  char buffer[52];        /* Bytes on current line */
+  apr_off_t next_pos;     /* Start position of next line */
+  apr_off_t end_pos;      /* Position after last line */
+  apr_size_t buf_size;    /* Bytes available (52 unless at eof) */
+  apr_size_t buf_pos;     /* Bytes in linebuffer */
+  svn_boolean_t done;     /* At eof? */
+};
+
+/* Implements svn_read_fn_t for the base85 read stream */
+static svn_error_t *
+read_handler_base85(void *baton, char *buffer, apr_size_t *len)
+{
+  struct base85_baton_t *b85b = baton;
+  apr_pool_t *iterpool = b85b->iterpool;
+  apr_size_t remaining = *len;
+  char *dest = buffer;
+
+  svn_pool_clear(iterpool);
+
+  if (b85b->done)
+    {
+      *len = 0;
+      return SVN_NO_ERROR;
+    }
+
+  while (remaining && (b85b->buf_size > b85b->buf_pos
+                       || b85b->next_pos < b85b->end_pos))
+    {
+      svn_stringbuf_t *line;
+      svn_boolean_t at_eof;
+
+      apr_size_t available = b85b->buf_size - b85b->buf_pos;
+      if (available)
+        {
+          apr_size_t n = (remaining < available) ? remaining : available;
+
+          memcpy(dest, b85b->buffer + b85b->buf_pos, n);
+          dest += n;
+          remaining -= n;
+          b85b->buf_pos += n;
+
+          if (!remaining)
+            return SVN_NO_ERROR; /* *len = OK */
+        }
+
+      if (b85b->next_pos >= b85b->end_pos)
+        break; /* At EOF */
+      SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
+                               iterpool));
+      SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
+                                   APR_SIZE_MAX, iterpool, iterpool));
+      if (at_eof)
+        b85b->next_pos = b85b->end_pos;
+      else
+        {
+          b85b->next_pos = 0;
+          SVN_ERR(svn_io_file_seek(b85b->file, APR_CUR, &b85b->next_pos,
+                                   iterpool));
+        }
+
+      if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
+        b85b->buf_size = line->data[0] - 'A' + 1;
+      else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
+        b85b->buf_size = line->data[0] - 'a' + 26 + 1;
+      else
+        return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
+                                _("Unexpected data in base85 section"));
+
+      if (b85b->buf_size < 52)
+        b85b->next_pos = b85b->end_pos; /* Handle as EOF */
+
+      SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
+                                           line->data + 1, line->len - 1,
+                                           iterpool));
+      b85b->buf_pos = 0;
+    }
+
+  *len -= remaining;
+  b85b->done = TRUE;
+
+  return SVN_NO_ERROR;
+}
+
+/* Implements svn_close_fn_t for the base85 read stream */
+static svn_error_t *
+close_handler_base85(void *baton)
+{
+  struct base85_baton_t *b85b = baton;
+
+  svn_pool_destroy(b85b->iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+/* Gets a stream that reads decoded base85 data from a segment of a file.
+   The current implementation might assume that both start_pos and end_pos
+   are located at line boundaries. */
+static svn_stream_t *
+get_base85_data_stream(apr_file_t *file,
+                       apr_off_t start_pos,
+                       apr_off_t end_pos,
+                       apr_pool_t *result_pool)
+{
+  struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
+  svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
+
+  b85b->file = file;
+  b85b->iterpool = svn_pool_create(result_pool);
+  b85b->next_pos = start_pos;
+  b85b->end_pos = end_pos;
+
+  svn_stream_set_read2(base85s, NULL /* only full read support */,
+                       read_handler_base85);
+  svn_stream_set_close(base85s, close_handler_base85);
+  return base85s;
+}
+
+/* Baton for the length verification stream functions */
+struct length_verify_baton_t
+{
+  svn_stream_t *inner;
+  svn_filesize_t remaining;
+};
+
+/* Implements svn_read_fn_t for the length verification stream */
+static svn_error_t *
+read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
+{
+  struct length_verify_baton_t *lvb = baton;
+  apr_size_t requested_len = *len;
+
+  SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
+
+  if (*len > lvb->remaining)
+    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
+                            _("Base85 data expands to longer than declared "
+                              "filesize"));
+  else if (requested_len > *len && *len != lvb->remaining)
+    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
+                            _("Base85 data expands to smaller than declared "
+                              "filesize"));
+
+  lvb->remaining -= *len;
+
+  return SVN_NO_ERROR;
+}
+
+/* Implements svn_close_fn_t for the length verification stream */
+static svn_error_t *
+close_handler_length_verify(void *baton)
+{
+  struct length_verify_baton_t *lvb = baton;
+
+  return svn_error_trace(svn_stream_close(lvb->inner));
+}
+
+/* Gets a stream that verifies on reads that the inner stream is exactly
+   of the specified length */
+static svn_stream_t *
+get_verify_length_stream(svn_stream_t *inner,
+                         svn_filesize_t expected_size,
+                         apr_pool_t *result_pool)
+{
+  struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
+  svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
+
+  lvb->inner = inner;
+  lvb->remaining = expected_size;
+
+  svn_stream_set_read2(len_stream, NULL /* only full read support */,
+                       read_handler_length_verify);
+  svn_stream_set_close(len_stream, close_handler_length_verify);
+
+  return len_stream;
+}
+
+svn_stream_t *
+svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
+                                         apr_pool_t *result_pool)
+{
+  svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
+                                           bpatch->src_end, result_pool);
+
+  s = svn_stream_compressed(s, result_pool);
+
+  /* ### If we (ever) want to support the DELTA format, then we should hook the
+         undelta handling here */
+
+  return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
+}
+
+svn_stream_t *
+svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
+                                       apr_pool_t *result_pool)
+{
+  svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
+                                           bpatch->dst_end, result_pool);
+
+  s = svn_stream_compressed(s, result_pool);
+
+  /* ### If we (ever) want to support the DELTA format, then we should hook the
+  undelta handling here */
+
+  return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
+}
+
 /* Try to parse a positive number from a decimal number encoded
  * in the string NUMBER. Return parsed number in OFFSET, and return
  * TRUE if parsing was successful. */
@@ -1387,7 +1602,9 @@ parse_binary_patch(svn_patch_t *patch, a
   svn_boolean_t eof = FALSE;
   svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
   svn_boolean_t in_blob = FALSE;
-  svn_boolean_t in_dst = FALSE;
+  svn_boolean_t in_src = FALSE;
+
+  bpatch->apr_file = apr_file;
 
   patch->operation = svn_diff_op_modified;
   patch->prop_patches = apr_hash_make(result_pool);
@@ -1416,17 +1633,17 @@ parse_binary_patch(svn_patch_t *patch, a
               && !strchr(line->data, ' '))
             {
               /* One more blop line */
-              if (in_dst)
-                bpatch->dst_end = pos;
-              else
+              if (in_src)
                 bpatch->src_end = pos;
+              else
+                bpatch->dst_end = pos;
             }
           else if (svn_stringbuf_first_non_whitespace(line) < line->len
-                   && !(in_dst && bpatch->dst_start < last_line))
+                   && !(in_src && bpatch->src_start < last_line))
             {
               break; /* Bad patch */
             }
-          else if (in_dst)
+          else if (in_src)
             {
               patch->binary_patch = bpatch; /* SUCCESS! */
               break; 
@@ -1434,7 +1651,7 @@ parse_binary_patch(svn_patch_t *patch, a
           else
             {
               in_blob = FALSE;
-              in_dst = TRUE;
+              in_src = TRUE;
             }
         }
       else if (starts_with(line->data, "literal "))
@@ -1450,15 +1667,15 @@ parse_binary_patch(svn_patch_t *patch, a
               break;
             }
 
-          if (in_dst)
+          if (in_src)
             {
-              bpatch->dst_start = pos;
-              bpatch->dst_filesize = expanded_size;
+              bpatch->src_start = pos;
+              bpatch->src_filesize = expanded_size;
             }
           else
             {
-              bpatch->src_start = pos;
-              bpatch->src_filesize = expanded_size;
+              bpatch->dst_start = pos;
+              bpatch->dst_filesize = expanded_size;
             }
           in_blob = TRUE;
         }
@@ -1471,8 +1688,8 @@ parse_binary_patch(svn_patch_t *patch, a
     /* Rewind to the start of the line just read, so subsequent calls
      * don't end up skipping the line. It may contain a patch or hunk header.*/
     SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
-  else if (in_dst
-           && ((bpatch->dst_end > bpatch->dst_start) || !bpatch->dst_filesize))
+  else if (in_src
+           && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
     {
       patch->binary_patch = bpatch; /* SUCCESS */
     }

Modified: subversion/trunk/subversion/tests/cmdline/patch_tests.py
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/cmdline/patch_tests.py?rev=1704374&r1=1704373&r2=1704374&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/cmdline/patch_tests.py (original)
+++ subversion/trunk/subversion/tests/cmdline/patch_tests.py Mon Sep 21 17:27:11 2015
@@ -5637,13 +5637,13 @@ def patch_binary_file(sbox):
     '===================================================================\n',
     'diff --git a/iota b/iota\n',
     'GIT binary patch\n',
-    'literal 25\n',
-    'ec$^E#$ShU>qLPeMg|y6^R0Z|S{E|d<JuU!m{s;*G\n',
-    '\n',
     'literal 48\n',
     'zc$^E#$ShU>qLPeMg|y6^R0Z|S{E|d<JuZf(=9bpB_PpZ!+|-hc%)E52)STkf{{Wp*\n',
     'B5)uFa\n',
     '\n',
+    'literal 25\n',
+    'ec$^E#$ShU>qLPeMg|y6^R0Z|S{E|d<JuU!m{s;*G\n',
+    '\n',
     'Property changes on: iota\n',
     '___________________________________________________________________\n',
     'Added: svn:mime-type\n',



Re: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Julian Foad <ju...@btopenworld.com>.
Johan Corveleyn wrote:
> Bert Huijben wrote:
>> Johan Corveleyn  wrote:
>>> Without completely understanding how the binary diff / patch is now
>>> implemented, I'd say: better to error out in this case. If it helps
>>> detecting corruption of the binary diff (say it was randomly changed
>>> by some wire transmission) ...
>>
>> This is completely against the rest of the patch implementation, and how other diff implementations work.
>>
>> For many reasons (including: legacy, compatibility, etc.) all of them ignore everything they don't understand. The current (1.8/1.9) patch code just ignores these hunks and doesn't error on them.
>>
>> The patch code only errors out on fatal errors.

I don't follow. Johan implicity argues that this sequence of base-85
input characters considered bad data. You (Bert) are saying 'svn
patch' should not error out when it gets bad data it should (and does)
ignore the patch. But it doesn't in this case. Instead, it converts
the input into some (more or less arbitray) output data and applies
it.

We're talking about the behaviour of one case inside the base-85
decoder. The decoder errors out in other cases:

  const char *p = strchr(b85str, c);
  if (!p)
    return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
                            _("Invalid base85 value"));
[...]
    if (base85_len != expected_data)
      return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
                              _("Unexpected base85 line length"));

We're talking about two different levels. Inside this base-85 decoder
we're talking about whether to return an error or some (more or less
arbitrary) output value when we read a 'bad' sequence of input
characters. If you want 'svn patch' to ignore the patch in this case,
then the base-85 decoder MUST detect this case and return an error.
Currently it doesn't.

Or did I misunderstand you?

- Julian

Re: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Johan Corveleyn <jc...@gmail.com>.
On Wed, Sep 23, 2015 at 3:34 PM, Bert Huijben <be...@qqmail.nl> wrote:
>
>
>> -----Original Message-----
>> From: Johan Corveleyn [mailto:jcorvel@gmail.com]
>> Sent: woensdag 23 september 2015 14:51
>> To: Julian Foad <ju...@btopenworld.com>
>> Cc: Bert Huijben <be...@qqmail.nl>; Stefan Sperling <st...@elego.de>; dev
>> <de...@subversion.apache.org>
>> Subject: Re: svn commit: r1704374 - in /subversion/trunk/subversion:
>> include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c
>> libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py
>
>
>> Without completely understanding how the binary diff / patch is now
>> implemented, I'd say: better to error out in this case. If it helps
>> detecting corruption of the binary diff (say it was randomly changed
>> by some wire transmission) ...
>
> This is completely against the rest of the patch implementation, and how other diff implementations work.
>
> For many reasons (including: legacy, compatibility, etc.) all of them ignore everything they don't understand. The current (1.8/1.9) patch code just ignores these hunks and doesn't error on them.
>
> The patch code only errors out on fatal errors.
>
> While it would be nice to error out and say the user should get a good file, there is no way he can.
>
> Hand editing binary patches (that are typically gzipped binary blobs internally) is virtually impossible and you can't trust the content came from some trusted source anyway. You should always review the resulting binary when you get it from an untrusted source. (The same applies to unified diffs, but reviewing those is much easier :-))
>

Okay, understood. Thanks for explaining.

-- 
Johan

RE: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Johan Corveleyn [mailto:jcorvel@gmail.com]
> Sent: woensdag 23 september 2015 14:51
> To: Julian Foad <ju...@btopenworld.com>
> Cc: Bert Huijben <be...@qqmail.nl>; Stefan Sperling <st...@elego.de>; dev
> <de...@subversion.apache.org>
> Subject: Re: svn commit: r1704374 - in /subversion/trunk/subversion:
> include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c
> libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py


> Without completely understanding how the binary diff / patch is now
> implemented, I'd say: better to error out in this case. If it helps
> detecting corruption of the binary diff (say it was randomly changed
> by some wire transmission) ...

This is completely against the rest of the patch implementation, and how other diff implementations work.

For many reasons (including: legacy, compatibility, etc.) all of them ignore everything they don't understand. The current (1.8/1.9) patch code just ignores these hunks and doesn't error on them.

The patch code only errors out on fatal errors.

While it would be nice to error out and say the user should get a good file, there is no way he can.

Hand editing binary patches (that are typically gzipped binary blobs internally) is virtually impossible and you can't trust the content came from some trusted source anyway. You should always review the resulting binary when you get it from an untrusted source. (The same applies to unified diffs, but reviewing those is much easier :-))

	Bert


Re: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Johan Corveleyn <jc...@gmail.com>.
On Tue, Sep 22, 2015 at 12:30 PM, Julian Foad
<ju...@btopenworld.com> wrote:
> Bert Huijben wrote:
>> Stefan Sperling wrote:
>>> > +      unsigned info = 0;
> [...]
>>> > +      for (i = 0; i < 5; i++)
>>> > +        {
>>> > +          int value;
>>> > +
>>> > +          SVN_ERR(base85_value(&value, base85_data[i]));
>>> > +          info *= 85;
>>>
>>> What happens if 'info' overflows here?
>>
>> Probably something similar to the git implementation... different final
>> output; most likely 100% identical on all systems. Something that would
>> happen on every unexpected bad value in a diff file.
>>
>>> > +          info += value;
>>> > +        }
>
> Just to add a few more words of explanation, for anybody else
> wondering about this...
>
> 85^5 is 4437053125, a little greater than 256^4 = 2^32 = 4294967296,
> so 5 characters of base-85 encoded text is just a little more than
> enough to encode 4 bytes of raw data.
>
> 'info' needs to be at least 32 bits wide. If it is exactly 32 bits
> then it could overflow here if the current group of five base-85
> characters is an invalid combination -- or, we could just as well say,
> if it is a non-canonical encoding of a smallish 32-bit value. In the
> latter interpretation, it will produce an output that we could say is
> perfectly valid, a correct decoding of the (non-canonical) input.
>
> Do we care about raising an error in that case? I don't know. It
> doesn't seem particularly important to me, one way or the other.

Without completely understanding how the binary diff / patch is now
implemented, I'd say: better to error out in this case. If it helps
detecting corruption of the binary diff (say it was randomly changed
by some wire transmission) ...

-- 
Johan

Re: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Julian Foad <ju...@btopenworld.com>.
Bert Huijben wrote:
> Stefan Sperling wrote:
>> > +      unsigned info = 0;
[...]
>> > +      for (i = 0; i < 5; i++)
>> > +        {
>> > +          int value;
>> > +
>> > +          SVN_ERR(base85_value(&value, base85_data[i]));
>> > +          info *= 85;
>>
>> What happens if 'info' overflows here?
>
> Probably something similar to the git implementation... different final
> output; most likely 100% identical on all systems. Something that would
> happen on every unexpected bad value in a diff file.
>
>> > +          info += value;
>> > +        }

Just to add a few more words of explanation, for anybody else
wondering about this...

85^5 is 4437053125, a little greater than 256^4 = 2^32 = 4294967296,
so 5 characters of base-85 encoded text is just a little more than
enough to encode 4 bytes of raw data.

'info' needs to be at least 32 bits wide. If it is exactly 32 bits
then it could overflow here if the current group of five base-85
characters is an invalid combination -- or, we could just as well say,
if it is a non-canonical encoding of a smallish 32-bit value. In the
latter interpretation, it will produce an output that we could say is
perfectly valid, a correct decoding of the (non-canonical) input.

Do we care about raising an error in that case? I don't know. It
doesn't seem particularly important to me, one way or the other.

- Julian

RE: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Stefan Sperling [mailto:stsp@elego.de]
> Sent: dinsdag 22 september 2015 10:40
> To: dev@subversion.apache.org
> Subject: Re: svn commit: r1704374 - in /subversion/trunk/subversion:
> include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c
> libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py
> 
> On Mon, Sep 21, 2015 at 05:27:18PM -0000, rhuijben@apache.org wrote:
> > Author: rhuijben
> > Date: Mon Sep 21 17:27:11 2015
> > New Revision: 1704374
> >
> > URL: http://svn.apache.org/viewvc?rev=1704374&view=rev
> > Log:
> > Complete the parsing routines for parsing git-like binary blobs. This
patch
> > completes the parsing, but leaves out the patch application code as that
> > needs a bit more work before committing.
> 
> > +  while (base85_len)
> > +    {
> > +      unsigned info = 0;
> 
> Please use an explicit integer type.
> 'unsigned int', or 'unsigned long', etc.

How is that more explicit?

Explicit would be strict 32 bit or strict 64 bit, which changing to those
types isn't.

Adding 'int' is just syntactic sugar but doesn't make it explicit.

> 
> > +      apr_ssize_t i, n;
> > +
> > +      for (i = 0; i < 5; i++)
> > +        {
> > +          int value;
> > +
> > +          SVN_ERR(base85_value(&value, base85_data[i]));
> > +          info *= 85;
> 
> What happens if 'info' overflows here?

Probably something similar to the git implementation... different final
output; most likely 100% identical on all systems. Something that would
happen on every unexpected bad value in a diff file.

> 
> > +          info += value;
> > +        }
> > +
> > +      for (i = 0, n=24; i < 4; i++, n-=8)
> > +        {
> > +          if (i < output_len)
> > +            output_data[i] = (info >> n) & 0xFF;
> > +        }
> > +
> > +      base85_data += 5;
> > +      base85_len -= 5;
> > +      output_data += 4;
> > +      output_len -= 4;
> > +    }
> > +
> > +  return SVN_NO_ERROR;
> 
> > +  while (remaining && (b85b->buf_size > b85b->buf_pos
> > +                       || b85b->next_pos < b85b->end_pos))
> > +    {
> > +      svn_stringbuf_t *line;
> > +      svn_boolean_t at_eof;
> > +
> > +      apr_size_t available = b85b->buf_size - b85b->buf_pos;
> > +      if (available)
> > +        {
> > +          apr_size_t n = (remaining < available) ? remaining :
available;
> > +
> > +          memcpy(dest, b85b->buffer + b85b->buf_pos, n);
> > +          dest += n;
> > +          remaining -= n;
> 
> What if 'remaining' becomes negative here?

n = MIN(remaining, available), where all values are unsigned.
So that is not possible.

> 
> > +          b85b->buf_pos += n;
> > +
> > +          if (!remaining)
> > +            return SVN_NO_ERROR; /* *len = OK */
> > +        }
> > +


Re: svn commit: r1704374 - in /subversion/trunk/subversion: include/svn_diff.h include/svn_error_codes.h libsvn_diff/binary_diff.c libsvn_diff/diff.h libsvn_diff/parse-diff.c tests/cmdline/patch_tests.py

Posted by Stefan Sperling <st...@elego.de>.
On Mon, Sep 21, 2015 at 05:27:18PM -0000, rhuijben@apache.org wrote:
> Author: rhuijben
> Date: Mon Sep 21 17:27:11 2015
> New Revision: 1704374
> 
> URL: http://svn.apache.org/viewvc?rev=1704374&view=rev
> Log:
> Complete the parsing routines for parsing git-like binary blobs. This patch
> completes the parsing, but leaves out the patch application code as that
> needs a bit more work before committing.

> +  while (base85_len)
> +    {
> +      unsigned info = 0;

Please use an explicit integer type.
'unsigned int', or 'unsigned long', etc.

> +      apr_ssize_t i, n;
> +
> +      for (i = 0; i < 5; i++)
> +        {
> +          int value;
> +
> +          SVN_ERR(base85_value(&value, base85_data[i]));
> +          info *= 85;

What happens if 'info' overflows here?

> +          info += value;
> +        }
> +
> +      for (i = 0, n=24; i < 4; i++, n-=8)
> +        {
> +          if (i < output_len)
> +            output_data[i] = (info >> n) & 0xFF;
> +        }
> +
> +      base85_data += 5;
> +      base85_len -= 5;
> +      output_data += 4;
> +      output_len -= 4;
> +    }
> +
> +  return SVN_NO_ERROR;

> +  while (remaining && (b85b->buf_size > b85b->buf_pos
> +                       || b85b->next_pos < b85b->end_pos))
> +    {
> +      svn_stringbuf_t *line;
> +      svn_boolean_t at_eof;
> +
> +      apr_size_t available = b85b->buf_size - b85b->buf_pos;
> +      if (available)
> +        {
> +          apr_size_t n = (remaining < available) ? remaining : available;
> +
> +          memcpy(dest, b85b->buffer + b85b->buf_pos, n);
> +          dest += n;
> +          remaining -= n;

What if 'remaining' becomes negative here?

> +          b85b->buf_pos += n;
> +
> +          if (!remaining)
> +            return SVN_NO_ERROR; /* *len = OK */
> +        }
> +