You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ie...@apache.org on 2019/01/28 15:06:05 UTC

[avro] branch master updated: AVRO-2308: Use Java StandardCharsets

This is an automated email from the ASF dual-hosted git repository.

iemejia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 3c16299  AVRO-2308: Use Java StandardCharsets
3c16299 is described below

commit 3c16299956d64dc4a1b76161228ebb545d3d400b
Author: Beluga Behr <da...@gmail.com>
AuthorDate: Sun Jan 27 19:32:25 2019 -0500

    AVRO-2308: Use Java StandardCharsets
---
 .../src/main/java/org/apache/avro/SchemaNormalization.java | 12 ++++--------
 .../main/java/org/apache/avro/file/DataFileReader12.java   |  8 ++------
 .../src/main/java/org/apache/avro/file/DataFileStream.java |  8 ++------
 .../src/main/java/org/apache/avro/file/DataFileWriter.java | 14 +++-----------
 .../src/main/java/org/apache/avro/reflect/ReflectData.java | 13 ++++---------
 .../java/avro/src/main/java/org/apache/avro/util/Utf8.java |  7 +++----
 .../java/org/apache/avro/util/internal/JacksonUtils.java   | 11 +++--------
 .../test/java/org/apache/avro/TestSchemaNormalization.java | 11 +++++------
 .../avro/src/test/java/org/apache/avro/util/TestUtf8.java  |  8 ++++----
 .../java/org/apache/avro/ipc/SaslSocketTransceiver.java    | 10 +++-------
 .../src/main/java/org/apache/avro/mapred/AvroJob.java      |  6 +++---
 .../main/java/org/apache/avro/mapred/AvroOutputFormat.java |  5 +++--
 .../org/apache/avro/mapred/TestAvroTextOutputFormat.java   | 10 ++++------
 .../core/src/main/java/org/apache/trevni/MetaData.java     |  7 ++-----
 14 files changed, 45 insertions(+), 85 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java
index ed9659b..07d8c4d 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java
@@ -20,7 +20,7 @@ package org.apache.avro;
 import java.util.Map;
 import java.util.HashMap;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 
@@ -89,18 +89,14 @@ public class SchemaNormalization {
   public static byte[] parsingFingerprint(String fpName, Schema s)
     throws NoSuchAlgorithmException
   {
-    try {
-      return fingerprint(fpName, toParsingForm(s).getBytes("UTF-8"));
-    } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); }
+    return fingerprint(fpName,
+        toParsingForm(s).getBytes(StandardCharsets.UTF_8));
   }
 
   /** Returns {@link #fingerprint64} applied to the parsing canonical form
     * of the supplied schema. */
   public static long parsingFingerprint64(Schema s) {
-    try {
-      return fingerprint64(toParsingForm(s).getBytes("UTF-8"));
-    } catch (java.io.UnsupportedEncodingException e)
-      { throw new RuntimeException(e); }
+    return fingerprint64(toParsingForm(s).getBytes(StandardCharsets.UTF_8));
   }
 
   private static Appendable build(Map<String,String> env, Schema s,
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
index 0c937bf..1e5193e 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
@@ -18,9 +18,9 @@
 package org.apache.avro.file;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.io.Closeable;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -113,11 +113,7 @@ public class DataFileReader12<D> implements FileReader<D>, Closeable {
     if (value == null) {
       return null;
     }
-    try {
-      return new String(value, "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
+    return new String(value, StandardCharsets.UTF_8);
   }
   /** Return the value of a metadata property. */
   public synchronized long getMetaLong(String key) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
index 562158b..377d2a6 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
@@ -21,8 +21,8 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Closeable;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -165,11 +165,7 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
     if (value == null) {
       return null;
     }
-    try {
-      return new String(value, "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
+    return new String(value, StandardCharsets.UTF_8);
   }
   /** Return the value of a metadata property. */
   public long getMetaLong(String key) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
index a185172..19273d1 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
@@ -25,8 +25,8 @@ import java.io.FilterOutputStream;
 import java.io.Flushable;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.HashMap;
@@ -268,11 +268,7 @@ public class DataFileWriter<D> implements Closeable, Flushable {
   }
 
   private DataFileWriter<D> setMetaInternal(String key, String value) {
-    try {
-      return setMetaInternal(key, value.getBytes("UTF-8"));
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
+    return setMetaInternal(key, value.getBytes(StandardCharsets.UTF_8));
   }
 
   /** Set a metadata property. */
@@ -289,11 +285,7 @@ public class DataFileWriter<D> implements Closeable, Flushable {
 
   /** Set a metadata property. */
   public DataFileWriter<D> setMeta(String key, String value) {
-    try {
-      return setMeta(key, value.getBytes("UTF-8"));
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
+    return setMeta(key, value.getBytes(StandardCharsets.UTF_8));
   }
   /** Set a metadata property. */
   public DataFileWriter<D> setMeta(String key, long value) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
index 10dab93..3d2c30f 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
@@ -18,7 +18,6 @@
 package org.apache.avro.reflect;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Field;
 import java.lang.reflect.GenericArrayType;
@@ -28,6 +27,7 @@ import java.lang.reflect.Parameter;
 import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -472,14 +472,9 @@ public class ReflectData extends SpecificData {
     }
 
     String name = keySchema.getFullName() + valueSchema.getFullName();
-    long fingerprint = 0;
-    try {
-      fingerprint = SchemaNormalization.fingerprint64(name.getBytes("UTF-8"));
-    } catch (UnsupportedEncodingException e) {
-      String msg = "Unable to create fingerprint for ("
-                   + keyType + ", "  + valueType + ") pair";
-      throw new AvroRuntimeException(msg, e);
-    }
+    long fingerprint = SchemaNormalization
+        .fingerprint64(name.getBytes(StandardCharsets.UTF_8));
+
     if (fingerprint < 0) fingerprint = -fingerprint;  // ignore sign
     String fpString = Long.toString(fingerprint, 16); // hex
     return NS_MAP_ARRAY_RECORD + fpString;
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 9a62664..8b1f9cb 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -17,7 +17,7 @@
  */
 package org.apache.avro.util;
 
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.io.BinaryData;
@@ -30,7 +30,6 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
   private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength";
   private static final int MAX_LENGTH;
   private static final byte[] EMPTY = new byte[0];
-  private static final Charset UTF8 = Charset.forName("UTF-8");
 
   static {
     String o = System.getProperty(MAX_LENGTH_PROPERTY);
@@ -116,7 +115,7 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
   public String toString() {
     if (this.length == 0) return "";
     if (this.string == null) {
-      this.string = new String(bytes, 0, length, UTF8);
+      this.string = new String(bytes, 0, length, StandardCharsets.UTF_8);
     }
     return this.string;
   }
@@ -157,7 +156,7 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
 
   /** Gets the UTF-8 bytes for a String */
   public static final byte[] getBytesFor(String str) {
-    return str.getBytes(UTF8);
+    return str.getBytes(StandardCharsets.UTF_8);
   }
 
 }
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java
index 5f5e2fe..0585689 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java
@@ -18,7 +18,7 @@
 package org.apache.avro.util.internal;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
@@ -35,7 +35,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.util.TokenBuffer;
 
 public class JacksonUtils {
-  static final String BYTES_CHARSET = "ISO-8859-1";
 
   private JacksonUtils() {
   }
@@ -71,7 +70,7 @@ public class JacksonUtils {
       }
       generator.writeEndArray();
     } else if (datum instanceof byte[]) { // bytes, fixed
-      generator.writeString(new String((byte[]) datum, BYTES_CHARSET));
+      generator.writeString(new String((byte[]) datum, StandardCharsets.ISO_8859_1));
     } else if (datum instanceof CharSequence || datum instanceof Enum<?>) { // string, enum
       generator.writeString(datum.toString());
     } else if (datum instanceof Double) { // double
@@ -123,11 +122,7 @@ public class JacksonUtils {
         return jsonNode.asText();
       } else if (schema.getType().equals(Schema.Type.BYTES)
               || schema.getType().equals(Schema.Type.FIXED)) {
-        try {
-          return jsonNode.textValue().getBytes(BYTES_CHARSET);
-        } catch (UnsupportedEncodingException e) {
-          throw new AvroRuntimeException(e);
-        }
+        return jsonNode.textValue().getBytes(StandardCharsets.ISO_8859_1);
       }
     } else if (jsonNode.isArray()) {
       List l = new ArrayList();
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java
index 063e0b6..8c7f6c0 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
 import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Formatter;
 import java.util.List;
@@ -106,12 +107,10 @@ public class TestSchemaNormalization {
     // by 64 zero bits).  This both deals with the fact that
     // CRCs ignore leading zeros, and also ensures some degree of
     // randomness for small inputs
-    try {
-      long tmp = altExtend(SchemaNormalization.EMPTY64, 64, ONE,
-                           s.getBytes("UTF-8"));
-      return altExtend(SchemaNormalization.EMPTY64, 64, tmp, POSTFIX);
-    } catch (java.io.UnsupportedEncodingException e)
-      { throw new RuntimeException(e); }
+
+    long tmp = altExtend(SchemaNormalization.EMPTY64, 64, ONE,
+        s.getBytes(StandardCharsets.UTF_8));
+    return altExtend(SchemaNormalization.EMPTY64, 64, tmp, POSTFIX);
   }
 
   private static long altExtend(long poly, int degree, long fp, byte[] b) {
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index 04709f5..677b7bd 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -20,13 +20,13 @@ package org.apache.avro.util;
 import static junit.framework.Assert.assertSame;
 import static org.junit.Assert.assertEquals;
 
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 
 import org.junit.Test;
 
 public class TestUtf8 {
   @Test public void testByteConstructor() throws Exception {
-    byte[] bs = "Foo".getBytes("UTF-8");
+    byte[] bs = "Foo".getBytes(StandardCharsets.UTF_8);
     Utf8 u = new Utf8(bs);
     assertEquals(bs.length, u.getLength());
     for (int i=0; i<bs.length; i++) {
@@ -34,8 +34,8 @@ public class TestUtf8 {
     }
   }
 
-  @Test public void testArrayReusedWhenLargerThanRequestedSize() throws UnsupportedEncodingException {
-    byte[] bs = "55555".getBytes("UTF-8");
+  @Test public void testArrayReusedWhenLargerThanRequestedSize() {
+    byte[] bs = "55555".getBytes(StandardCharsets.UTF_8);
     Utf8 u = new Utf8(bs);
     assertEquals(5, u.getByteLength());
     byte[] content = u.getBytes();
diff --git a/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java b/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java
index b934fd1..249b78a 100644
--- a/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java
+++ b/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java
@@ -21,9 +21,9 @@ package org.apache.avro.ipc;
 
 import java.io.IOException;
 import java.io.EOFException;
-import java.io.UnsupportedEncodingException;
 import java.net.SocketAddress;
 import java.nio.channels.SocketChannel;
+import java.nio.charset.StandardCharsets;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
@@ -169,12 +169,8 @@ public class SaslSocketTransceiver extends Transceiver {
     dataIsWrapped = (qop != null && !qop.equalsIgnoreCase("auth"));
   }
 
-  private String toString(ByteBuffer buffer) throws IOException {
-    try {
-      return new String(buffer.array(), "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new IOException(e.toString(), e);
-    }
+  private String toString(ByteBuffer buffer) {
+    return new String(buffer.array(), StandardCharsets.UTF_8);
   }
 
   @Override public synchronized List<ByteBuffer> readBuffers()
diff --git a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java
index 729a855..9d23fd3 100644
--- a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java
+++ b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java
@@ -21,6 +21,7 @@ package org.apache.avro.mapred;
 import java.util.Collection;
 import java.lang.reflect.Constructor;
 import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
 import java.io.UnsupportedEncodingException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -110,10 +111,9 @@ public class AvroJob {
   public static void setOutputMeta(JobConf job, String key, byte[] value) {
     try {
       job.set(BINARY_PREFIX+key,
-              URLEncoder.encode(new String(value, "ISO-8859-1"),
-                                "ISO-8859-1"));
+              URLEncoder.encode(new String(value, StandardCharsets.ISO_8859_1),
+                                StandardCharsets.ISO_8859_1.name()));
     } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
     }
   }
 
diff --git a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java
index 40595d2..3714988 100644
--- a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java
+++ b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.Map;
 import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.fs.FileSystem;
@@ -95,8 +96,8 @@ public class AvroOutputFormat <T>
                        e.getValue());
       if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
         writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
-                       URLDecoder.decode(e.getValue(), "ISO-8859-1")
-                       .getBytes("ISO-8859-1"));
+            URLDecoder.decode(e.getValue(), StandardCharsets.ISO_8859_1.name())
+                .getBytes(StandardCharsets.ISO_8859_1));
     }
   }
 
diff --git a/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java b/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java
index 13846d9..9d566db 100644
--- a/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java
+++ b/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java
@@ -22,8 +22,8 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 
 import java.io.File;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
@@ -43,8 +43,6 @@ public class TestAvroTextOutputFormat {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
-  private static final String UTF8 = "UTF-8";
-
   @Test
   public void testAvroTextRecordWriter() throws Exception {
     File file = new File(tmpFolder.getRoot().getPath(), "writer");
@@ -55,7 +53,7 @@ public class TestAvroTextOutputFormat {
       new DataFileWriter<>(datumWriter);
     fileWriter.create(schema, file);
     RecordWriter<Object, Object> rw = new AvroTextOutputFormat<>()
-      .new AvroTextRecordWriter(fileWriter, "\t".getBytes(UTF8));
+      .new AvroTextRecordWriter(fileWriter, "\t".getBytes(StandardCharsets.UTF_8));
 
     rw.write(null, null);
     rw.write(null, NullWritable.get());
@@ -85,10 +83,10 @@ public class TestAvroTextOutputFormat {
     assertFalse("End", fileReader.hasNext());
   }
 
-  private String asString(ByteBuffer buf) throws UnsupportedEncodingException {
+  private String asString(ByteBuffer buf) {
     byte[] b = new byte[buf.remaining()];
     buf.get(b);
-    return new String(b, UTF8);
+    return new String(b, StandardCharsets.UTF_8);
   }
 
 }
diff --git a/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java b/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
index 03fe3a7..700e4b9 100644
--- a/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
+++ b/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
@@ -19,6 +19,7 @@ package org.apache.trevni;
 
 import java.io.IOException;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import java.util.LinkedHashMap;
 
@@ -131,11 +132,7 @@ public class MetaData<T extends MetaData> extends LinkedHashMap<String,byte[]> {
     for (Map.Entry<String,byte[]> e : entrySet()) {
       builder.append(e.getKey());
       builder.append('=');
-      try {
-        builder.append(new String(e.getValue(), "ISO-8859-1"));
-      } catch (java.io.UnsupportedEncodingException error) {
-        throw new TrevniRuntimeException(error);
-      }
+      builder.append(new String(e.getValue(), StandardCharsets.ISO_8859_1));
       builder.append(' ');
     }
     builder.append('}');