You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2018/11/08 19:08:45 UTC

[avro] branch master updated (c6f772b -> 4fad66d)

This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git.


    from c6f772b  AVRO-2127: throw more specific exceptions from DataFileStream#initialize (#323)
     new 207d4ff  Since we will no longer support java6 and 7, remove the "optimizations" for those
     new 4fad66d  Remove some java6 code, add a configurable limit for maximum string lengths to read

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/main/java/org/apache/avro/util/Utf8.java   | 58 +++++++++-------------
 1 file changed, 23 insertions(+), 35 deletions(-)


[avro] 01/02: Since we will no longer support java6 and 7, remove the "optimizations" for those

Posted by dk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 207d4ff95d4dd48a4267e3d2577f86b304d0b11b
Author: Daniel Kulp <dk...@apache.org>
AuthorDate: Thu Nov 8 13:10:44 2018 -0500

    Since we will no longer support java6 and 7, remove the "optimizations" for those
---
 .../src/main/java/org/apache/avro/util/Utf8.java   | 37 ++--------------------
 1 file changed, 2 insertions(+), 35 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 6f4ec58..dd359dd 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -18,7 +18,6 @@
 package org.apache.avro.util;
 
 import java.nio.charset.Charset;
-import java.io.UnsupportedEncodingException;
 
 import org.apache.avro.io.BinaryData;
 
@@ -92,43 +91,11 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
     return this;
   }
 
-  private abstract static class Utf8Converter {
-    public abstract String fromUtf8(byte[] bytes, int length);
-    public abstract byte[] toUtf8(String str);
-  }
-
-  private static final Utf8Converter UTF8_CONVERTER =
-    System.getProperty("java.version").startsWith("1.6.")
-    ? new Utf8Converter() {                       // optimized for Java 6
-        public String fromUtf8(byte[] bytes, int length) {
-          try {
-            return new String(bytes, 0, length, "UTF-8");
-          } catch (UnsupportedEncodingException e) {
-            throw new RuntimeException(e);
-          }
-        }
-        public byte[] toUtf8(String str) {
-          try {
-            return str.getBytes("UTF-8");
-          } catch (UnsupportedEncodingException e) {
-            throw new RuntimeException(e);
-          }
-        }
-      }
-    : new Utf8Converter() {                       // faster in Java 7 & 8
-        public String fromUtf8(byte[] bytes, int length) {
-          return new String(bytes, 0, length, UTF8);
-        }
-        public byte[] toUtf8(String str) {
-          return str.getBytes(UTF8);
-        }
-      };
-
   @Override
   public String toString() {
     if (this.length == 0) return "";
     if (this.string == null) {
-      this.string = UTF8_CONVERTER.fromUtf8(bytes, length);
+      this.string = new String(bytes, 0, length, UTF8);
     }
     return this.string;
   }
@@ -169,7 +136,7 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
 
   /** Gets the UTF-8 bytes for a String */
   public static final byte[] getBytesFor(String str) {
-    return UTF8_CONVERTER.toUtf8(str);
+    return str.getBytes(UTF8);
   }
 
 }


[avro] 02/02: Remove some java6 code, add a configurable limit for maximum string lengths to read

Posted by dk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 4fad66df0132c815a2a74c01704168d87f47009f
Author: Daniel Kulp <dk...@apache.org>
AuthorDate: Thu Nov 8 13:28:50 2018 -0500

    Remove some java6 code, add a configurable limit for maximum string lengths to read
---
 .../src/main/java/org/apache/avro/util/Utf8.java    | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index dd359dd..9a62664 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -19,15 +19,33 @@ package org.apache.avro.util;
 
 import java.nio.charset.Charset;
 
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.io.BinaryData;
+import org.slf4j.LoggerFactory;
 
 /** A Utf8 string.  Unlike {@link String}, instances are mutable.  This is more
  * efficient than {@link String} when reading or writing a sequence of values,
  * as a single instance may be reused. */
 public class Utf8 implements Comparable<Utf8>, CharSequence {
+  private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength";
+  private static final int MAX_LENGTH;
   private static final byte[] EMPTY = new byte[0];
   private static final Charset UTF8 = Charset.forName("UTF-8");
 
+  static {
+    String o = System.getProperty(MAX_LENGTH_PROPERTY);
+    int i = Integer.MAX_VALUE;
+    if (o != null) {
+      try {
+        i = Integer.parseUnsignedInt(o);
+      } catch (NumberFormatException nfe) {
+        LoggerFactory.getLogger(Utf8.class)
+          .warn("Could not parse property " + MAX_LENGTH_PROPERTY + ": " + o, nfe);
+      }
+    }
+    MAX_LENGTH = i;
+  }
+
   private byte[] bytes = EMPTY;
   private int length;
   private String string;
@@ -73,6 +91,9 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
   /** Set length in bytes.  Should called whenever byte content changes, even
    * if the length does not change, as this also clears the cached String. */
   public Utf8 setByteLength(int newLength) {
+    if (newLength > MAX_LENGTH) {
+      throw new AvroRuntimeException("String length " + newLength + " exceeds maximum allowed");
+    }
     if (this.bytes.length < newLength) {
       byte[] newBytes = new byte[newLength];
       System.arraycopy(bytes, 0, newBytes, 0, this.length);