You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ie...@apache.org on 2019/01/28 15:06:05 UTC
[avro] branch master updated: AVRO-2308: Use Java StandardCharsets
This is an automated email from the ASF dual-hosted git repository.
iemejia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 3c16299 AVRO-2308: Use Java StandardCharsets
3c16299 is described below
commit 3c16299956d64dc4a1b76161228ebb545d3d400b
Author: Beluga Behr <da...@gmail.com>
AuthorDate: Sun Jan 27 19:32:25 2019 -0500
AVRO-2308: Use Java StandardCharsets
---
.../src/main/java/org/apache/avro/SchemaNormalization.java | 12 ++++--------
.../main/java/org/apache/avro/file/DataFileReader12.java | 8 ++------
.../src/main/java/org/apache/avro/file/DataFileStream.java | 8 ++------
.../src/main/java/org/apache/avro/file/DataFileWriter.java | 14 +++-----------
.../src/main/java/org/apache/avro/reflect/ReflectData.java | 13 ++++---------
.../java/avro/src/main/java/org/apache/avro/util/Utf8.java | 7 +++----
.../java/org/apache/avro/util/internal/JacksonUtils.java | 11 +++--------
.../test/java/org/apache/avro/TestSchemaNormalization.java | 11 +++++------
.../avro/src/test/java/org/apache/avro/util/TestUtf8.java | 8 ++++----
.../java/org/apache/avro/ipc/SaslSocketTransceiver.java | 10 +++-------
.../src/main/java/org/apache/avro/mapred/AvroJob.java | 6 +++---
.../main/java/org/apache/avro/mapred/AvroOutputFormat.java | 5 +++--
.../org/apache/avro/mapred/TestAvroTextOutputFormat.java | 10 ++++------
.../core/src/main/java/org/apache/trevni/MetaData.java | 7 ++-----
14 files changed, 45 insertions(+), 85 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java
index ed9659b..07d8c4d 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaNormalization.java
@@ -20,7 +20,7 @@ package org.apache.avro;
import java.util.Map;
import java.util.HashMap;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -89,18 +89,14 @@ public class SchemaNormalization {
public static byte[] parsingFingerprint(String fpName, Schema s)
throws NoSuchAlgorithmException
{
- try {
- return fingerprint(fpName, toParsingForm(s).getBytes("UTF-8"));
- } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); }
+ return fingerprint(fpName,
+ toParsingForm(s).getBytes(StandardCharsets.UTF_8));
}
/** Returns {@link #fingerprint64} applied to the parsing canonical form
* of the supplied schema. */
public static long parsingFingerprint64(Schema s) {
- try {
- return fingerprint64(toParsingForm(s).getBytes("UTF-8"));
- } catch (java.io.UnsupportedEncodingException e)
- { throw new RuntimeException(e); }
+ return fingerprint64(toParsingForm(s).getBytes(StandardCharsets.UTF_8));
}
private static Appendable build(Map<String,String> env, Schema s,
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
index 0c937bf..1e5193e 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
@@ -18,9 +18,9 @@
package org.apache.avro.file;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.io.Closeable;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
@@ -113,11 +113,7 @@ public class DataFileReader12<D> implements FileReader<D>, Closeable {
if (value == null) {
return null;
}
- try {
- return new String(value, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
+ return new String(value, StandardCharsets.UTF_8);
}
/** Return the value of a metadata property. */
public synchronized long getMetaLong(String key) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
index 562158b..377d2a6 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
@@ -21,8 +21,8 @@ import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Closeable;
-import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
@@ -165,11 +165,7 @@ public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {
if (value == null) {
return null;
}
- try {
- return new String(value, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
+ return new String(value, StandardCharsets.UTF_8);
}
/** Return the value of a metadata property. */
public long getMetaLong(String key) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
index a185172..19273d1 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
@@ -25,8 +25,8 @@ import java.io.FilterOutputStream;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
@@ -268,11 +268,7 @@ public class DataFileWriter<D> implements Closeable, Flushable {
}
private DataFileWriter<D> setMetaInternal(String key, String value) {
- try {
- return setMetaInternal(key, value.getBytes("UTF-8"));
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
+ return setMetaInternal(key, value.getBytes(StandardCharsets.UTF_8));
}
/** Set a metadata property. */
@@ -289,11 +285,7 @@ public class DataFileWriter<D> implements Closeable, Flushable {
/** Set a metadata property. */
public DataFileWriter<D> setMeta(String key, String value) {
- try {
- return setMeta(key, value.getBytes("UTF-8"));
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
+ return setMeta(key, value.getBytes(StandardCharsets.UTF_8));
}
/** Set a metadata property. */
public DataFileWriter<D> setMeta(String key, long value) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
index 10dab93..3d2c30f 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
@@ -18,7 +18,6 @@
package org.apache.avro.reflect;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.lang.annotation.Annotation;
import java.lang.reflect.Field;
import java.lang.reflect.GenericArrayType;
@@ -28,6 +27,7 @@ import java.lang.reflect.Parameter;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -472,14 +472,9 @@ public class ReflectData extends SpecificData {
}
String name = keySchema.getFullName() + valueSchema.getFullName();
- long fingerprint = 0;
- try {
- fingerprint = SchemaNormalization.fingerprint64(name.getBytes("UTF-8"));
- } catch (UnsupportedEncodingException e) {
- String msg = "Unable to create fingerprint for ("
- + keyType + ", " + valueType + ") pair";
- throw new AvroRuntimeException(msg, e);
- }
+ long fingerprint = SchemaNormalization
+ .fingerprint64(name.getBytes(StandardCharsets.UTF_8));
+
if (fingerprint < 0) fingerprint = -fingerprint; // ignore sign
String fpString = Long.toString(fingerprint, 16); // hex
return NS_MAP_ARRAY_RECORD + fpString;
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 9a62664..8b1f9cb 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -17,7 +17,7 @@
*/
package org.apache.avro.util;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.io.BinaryData;
@@ -30,7 +30,6 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength";
private static final int MAX_LENGTH;
private static final byte[] EMPTY = new byte[0];
- private static final Charset UTF8 = Charset.forName("UTF-8");
static {
String o = System.getProperty(MAX_LENGTH_PROPERTY);
@@ -116,7 +115,7 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
public String toString() {
if (this.length == 0) return "";
if (this.string == null) {
- this.string = new String(bytes, 0, length, UTF8);
+ this.string = new String(bytes, 0, length, StandardCharsets.UTF_8);
}
return this.string;
}
@@ -157,7 +156,7 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
/** Gets the UTF-8 bytes for a String */
public static final byte[] getBytesFor(String str) {
- return str.getBytes(UTF8);
+ return str.getBytes(StandardCharsets.UTF_8);
}
}
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java
index 5f5e2fe..0585689 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java
@@ -18,7 +18,7 @@
package org.apache.avro.util.internal;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
@@ -35,7 +35,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.util.TokenBuffer;
public class JacksonUtils {
- static final String BYTES_CHARSET = "ISO-8859-1";
private JacksonUtils() {
}
@@ -71,7 +70,7 @@ public class JacksonUtils {
}
generator.writeEndArray();
} else if (datum instanceof byte[]) { // bytes, fixed
- generator.writeString(new String((byte[]) datum, BYTES_CHARSET));
+ generator.writeString(new String((byte[]) datum, StandardCharsets.ISO_8859_1));
} else if (datum instanceof CharSequence || datum instanceof Enum<?>) { // string, enum
generator.writeString(datum.toString());
} else if (datum instanceof Double) { // double
@@ -123,11 +122,7 @@ public class JacksonUtils {
return jsonNode.asText();
} else if (schema.getType().equals(Schema.Type.BYTES)
|| schema.getType().equals(Schema.Type.FIXED)) {
- try {
- return jsonNode.textValue().getBytes(BYTES_CHARSET);
- } catch (UnsupportedEncodingException e) {
- throw new AvroRuntimeException(e);
- }
+ return jsonNode.textValue().getBytes(StandardCharsets.ISO_8859_1);
}
} else if (jsonNode.isArray()) {
List l = new ArrayList();
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java
index 063e0b6..8c7f6c0 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
@@ -106,12 +107,10 @@ public class TestSchemaNormalization {
// by 64 zero bits). This both deals with the fact that
// CRCs ignore leading zeros, and also ensures some degree of
// randomness for small inputs
- try {
- long tmp = altExtend(SchemaNormalization.EMPTY64, 64, ONE,
- s.getBytes("UTF-8"));
- return altExtend(SchemaNormalization.EMPTY64, 64, tmp, POSTFIX);
- } catch (java.io.UnsupportedEncodingException e)
- { throw new RuntimeException(e); }
+
+ long tmp = altExtend(SchemaNormalization.EMPTY64, 64, ONE,
+ s.getBytes(StandardCharsets.UTF_8));
+ return altExtend(SchemaNormalization.EMPTY64, 64, tmp, POSTFIX);
}
private static long altExtend(long poly, int degree, long fp, byte[] b) {
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index 04709f5..677b7bd 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -20,13 +20,13 @@ package org.apache.avro.util;
import static junit.framework.Assert.assertSame;
import static org.junit.Assert.assertEquals;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import org.junit.Test;
public class TestUtf8 {
@Test public void testByteConstructor() throws Exception {
- byte[] bs = "Foo".getBytes("UTF-8");
+ byte[] bs = "Foo".getBytes(StandardCharsets.UTF_8);
Utf8 u = new Utf8(bs);
assertEquals(bs.length, u.getLength());
for (int i=0; i<bs.length; i++) {
@@ -34,8 +34,8 @@ public class TestUtf8 {
}
}
- @Test public void testArrayReusedWhenLargerThanRequestedSize() throws UnsupportedEncodingException {
- byte[] bs = "55555".getBytes("UTF-8");
+ @Test public void testArrayReusedWhenLargerThanRequestedSize() {
+ byte[] bs = "55555".getBytes(StandardCharsets.UTF_8);
Utf8 u = new Utf8(bs);
assertEquals(5, u.getByteLength());
byte[] content = u.getBytes();
diff --git a/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java b/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java
index b934fd1..249b78a 100644
--- a/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java
+++ b/lang/java/ipc/src/main/java/org/apache/avro/ipc/SaslSocketTransceiver.java
@@ -21,9 +21,9 @@ package org.apache.avro.ipc;
import java.io.IOException;
import java.io.EOFException;
-import java.io.UnsupportedEncodingException;
import java.net.SocketAddress;
import java.nio.channels.SocketChannel;
+import java.nio.charset.StandardCharsets;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
@@ -169,12 +169,8 @@ public class SaslSocketTransceiver extends Transceiver {
dataIsWrapped = (qop != null && !qop.equalsIgnoreCase("auth"));
}
- private String toString(ByteBuffer buffer) throws IOException {
- try {
- return new String(buffer.array(), "UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new IOException(e.toString(), e);
- }
+ private String toString(ByteBuffer buffer) {
+ return new String(buffer.array(), StandardCharsets.UTF_8);
}
@Override public synchronized List<ByteBuffer> readBuffers()
diff --git a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java
index 729a855..9d23fd3 100644
--- a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java
+++ b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroJob.java
@@ -21,6 +21,7 @@ package org.apache.avro.mapred;
import java.util.Collection;
import java.lang.reflect.Constructor;
import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
import java.io.UnsupportedEncodingException;
import org.apache.hadoop.conf.Configuration;
@@ -110,10 +111,9 @@ public class AvroJob {
public static void setOutputMeta(JobConf job, String key, byte[] value) {
try {
job.set(BINARY_PREFIX+key,
- URLEncoder.encode(new String(value, "ISO-8859-1"),
- "ISO-8859-1"));
+ URLEncoder.encode(new String(value, StandardCharsets.ISO_8859_1),
+ StandardCharsets.ISO_8859_1.name()));
} catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
}
}
diff --git a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java
index 40595d2..3714988 100644
--- a/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java
+++ b/lang/java/mapred/src/main/java/org/apache/avro/mapred/AvroOutputFormat.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.fs.FileSystem;
@@ -95,8 +96,8 @@ public class AvroOutputFormat <T>
e.getValue());
if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
- URLDecoder.decode(e.getValue(), "ISO-8859-1")
- .getBytes("ISO-8859-1"));
+ URLDecoder.decode(e.getValue(), StandardCharsets.ISO_8859_1.name())
+ .getBytes(StandardCharsets.ISO_8859_1));
}
}
diff --git a/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java b/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java
index 13846d9..9d566db 100644
--- a/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java
+++ b/lang/java/mapred/src/test/java/org/apache/avro/mapred/TestAvroTextOutputFormat.java
@@ -22,8 +22,8 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.File;
-import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
@@ -43,8 +43,6 @@ public class TestAvroTextOutputFormat {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
- private static final String UTF8 = "UTF-8";
-
@Test
public void testAvroTextRecordWriter() throws Exception {
File file = new File(tmpFolder.getRoot().getPath(), "writer");
@@ -55,7 +53,7 @@ public class TestAvroTextOutputFormat {
new DataFileWriter<>(datumWriter);
fileWriter.create(schema, file);
RecordWriter<Object, Object> rw = new AvroTextOutputFormat<>()
- .new AvroTextRecordWriter(fileWriter, "\t".getBytes(UTF8));
+ .new AvroTextRecordWriter(fileWriter, "\t".getBytes(StandardCharsets.UTF_8));
rw.write(null, null);
rw.write(null, NullWritable.get());
@@ -85,10 +83,10 @@ public class TestAvroTextOutputFormat {
assertFalse("End", fileReader.hasNext());
}
- private String asString(ByteBuffer buf) throws UnsupportedEncodingException {
+ private String asString(ByteBuffer buf) {
byte[] b = new byte[buf.remaining()];
buf.get(b);
- return new String(b, UTF8);
+ return new String(b, StandardCharsets.UTF_8);
}
}
diff --git a/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java b/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
index 03fe3a7..700e4b9 100644
--- a/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
+++ b/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
@@ -19,6 +19,7 @@ package org.apache.trevni;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.LinkedHashMap;
@@ -131,11 +132,7 @@ public class MetaData<T extends MetaData> extends LinkedHashMap<String,byte[]> {
for (Map.Entry<String,byte[]> e : entrySet()) {
builder.append(e.getKey());
builder.append('=');
- try {
- builder.append(new String(e.getValue(), "ISO-8859-1"));
- } catch (java.io.UnsupportedEncodingException error) {
- throw new TrevniRuntimeException(error);
- }
+ builder.append(new String(e.getValue(), StandardCharsets.ISO_8859_1));
builder.append(' ');
}
builder.append('}');