You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/02/24 15:13:46 UTC

[avro] branch branch-1.10 updated: AVRO-3049: Add checks to BinaryDecoder for bytes length (#1098)

This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch branch-1.10
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.10 by this push:
     new 4f9f970  AVRO-3049: Add checks to BinaryDecoder for bytes length (#1098)
4f9f970 is described below

commit 4f9f97002e4d3e78766536563baceb36d02aeb0d
Author: John Karp <jo...@users.noreply.github.com>
AuthorDate: Wed Feb 24 09:10:13 2021 -0600

    AVRO-3049: Add checks to BinaryDecoder for bytes length (#1098)
    
    * AVRO-3049: Handle negative length for bytes
    
    * AVRO-3049: Don't try to allocate beyond VM limits for bytes
    
    * AVRO-3049: Use clearer unit test names
    
    * AVRO-3049: Use Assert.assertThrows for cleaner UTs
    
    * AVRO-3049: Add org.apache.avro.limits.bytes.maxLength property
    
    * AVRO-3049: mvn spotless:apply
    
    * AVRO-3049: Call super() from BinaryDecoder ctor
    
    * AVRO-3049: Add JavaDoc re BinaryDecoder limits
---
 .../java/org/apache/avro/io/BinaryDecoder.java     | 38 ++++++++++++++--
 .../java/org/apache/avro/io/TestBinaryDecoder.java | 51 +++++++++++++++++++---
 2 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
index 7091a5c..44d2b76 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
@@ -27,6 +27,7 @@ import java.util.Arrays;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.InvalidNumberEncodingException;
 import org.apache.avro.util.Utf8;
+import org.slf4j.LoggerFactory;
 
 /**
  * An {@link Decoder} for binary-format data.
@@ -37,6 +38,13 @@ import org.apache.avro.util.Utf8;
  * required to serve its read methods. The number of unused bytes in the buffer
  * can be accessed by inputStream().remaining(), if the BinaryDecoder is not
  * 'direct'.
+ * <p/>
+ * To prevent this class from making large allocations when handling potentially
+ * pathological input data, set Java properties
+ * <tt>org.apache.avro.limits.string.maxLength</tt> and
+ * <tt>org.apache.avro.limits.bytes.maxLength</tt> before instantiating this
+ * class to limit the maximum sizes of <tt>string</tt> and <tt>bytes</tt> types
+ * handled. The default is to permit sizes up to Java's maximum array length.
  *
  * @see Encoder
  */
@@ -48,7 +56,10 @@ public class BinaryDecoder extends Decoder {
    * an array. Attempts to allocate larger arrays may result in OutOfMemoryError:
    * Requested array size exceeds VM limit
    */
-  private static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L;
+  static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L;
+
+  private static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength";
+  private final int maxBytesLength;
 
   private ByteSource source = null;
   // we keep the buffer and its state variables in this class and not in a
@@ -87,15 +98,27 @@ public class BinaryDecoder extends Decoder {
 
   /** protected constructor for child classes */
   protected BinaryDecoder() {
+    super();
+    String o = System.getProperty(MAX_BYTES_LENGTH_PROPERTY);
+    int i = Integer.MAX_VALUE;
+    if (o != null) {
+      try {
+        i = Integer.parseUnsignedInt(o);
+      } catch (NumberFormatException nfe) {
+        LoggerFactory.getLogger(BinaryDecoder.class)
+            .warn("Could not parse property " + MAX_BYTES_LENGTH_PROPERTY + ": " + o, nfe);
+      }
+    }
+    maxBytesLength = i;
   }
 
   BinaryDecoder(InputStream in, int bufferSize) {
-    super();
+    this();
     configure(in, bufferSize);
   }
 
   BinaryDecoder(byte[] data, int offset, int length) {
-    super();
+    this();
     configure(data, offset, length);
   }
 
@@ -307,6 +330,15 @@ public class BinaryDecoder extends Decoder {
   @Override
   public ByteBuffer readBytes(ByteBuffer old) throws IOException {
     int length = readInt();
+    if (length > MAX_ARRAY_SIZE) {
+      throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes");
+    }
+    if (length > maxBytesLength) {
+      throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed");
+    }
+    if (length < 0L) {
+      throw new AvroRuntimeException("Malformed data. Length is negative: " + length);
+    }
     final ByteBuffer result;
     if (old != null && length <= old.capacity()) {
       result = old;
diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
index 00fd0fa..7104929 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
@@ -339,16 +339,55 @@ public class TestBinaryDecoder {
   }
 
   @Test
-  public void testNegativeLengthEncoding() throws IOException {
+  public void testNegativeStringLength() throws IOException {
     byte[] bad = new byte[] { (byte) 1 };
     Decoder bd = factory.binaryDecoder(bad, null);
-    String message = "";
+
+    Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, bd::readString);
+  }
+
+  @Test
+  public void testStringMaxArraySize() throws IOException {
+    byte[] bad = new byte[10];
+    BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0);
+    Decoder bd = factory.binaryDecoder(bad, null);
+
+    Assert.assertThrows("Cannot read strings longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes",
+        UnsupportedOperationException.class, bd::readString);
+  }
+
+  @Test
+  public void testNegativeBytesLength() throws IOException {
+    byte[] bad = new byte[] { (byte) 1 };
+    Decoder bd = factory.binaryDecoder(bad, null);
+
+    Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, () -> bd.readBytes(null));
+  }
+
+  @Test
+  public void testBytesMaxArraySize() throws IOException {
+    byte[] bad = new byte[10];
+    BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0);
+    Decoder bd = factory.binaryDecoder(bad, null);
+
+    Assert.assertThrows("Cannot read arrays longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes",
+        UnsupportedOperationException.class, () -> bd.readBytes(null));
+  }
+
+  @Test
+  public void testBytesMaxLengthProperty() throws IOException {
+    int maxLength = 128;
+    byte[] bad = new byte[10];
+    BinaryData.encodeLong(maxLength + 1, bad, 0);
     try {
-      bd.readString();
-    } catch (AvroRuntimeException e) {
-      message = e.getMessage();
+      System.setProperty("org.apache.avro.limits.bytes.maxLength", Long.toString(maxLength));
+      Decoder bd = factory.binaryDecoder(bad, null);
+
+      Assert.assertThrows("Bytes length " + (maxLength + 1) + " exceeds maximum allowed", AvroRuntimeException.class,
+          () -> bd.readBytes(null));
+    } finally {
+      System.clearProperty("org.apache.avro.limits.bytes.maxLength");
     }
-    Assert.assertEquals("Malformed data. Length is negative: -1", message);
   }
 
   @Test(expected = UnsupportedOperationException.class)