You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dm...@apache.org on 2020/07/22 13:43:04 UTC
[hive] branch master updated: HIVE-22674: Replace Base64 in serde
Package (David Mollitor, reviewed by Naveen Gangam)
This is an automated email from the ASF dual-hosted git repository.
dmollitor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new eacb4f3 HIVE-22674: Replace Base64 in serde Package (David Mollitor, reviewed by Naveen Gangam)
eacb4f3 is described below
commit eacb4f344352b66009e8f84797d9db4c3ae0ade7
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Wed Jul 22 09:42:55 2020 -0400
HIVE-22674: Replace Base64 in serde Package (David Mollitor, reviewed by Naveen Gangam)
---
.../clientpositive/llap/compute_stats_binary.q.out | 2 +-
.../org/apache/hadoop/hive/serde2/lazy/LazyBinary.java | 16 +++++++++-------
.../org/apache/hadoop/hive/serde2/lazy/LazyUtils.java | 4 ++--
.../hive/serde2/lazy/fast/LazySimpleSerializeWrite.java | 6 +++---
.../hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java | 6 ++++--
5 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out b/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out
index 133c01e..fc90c89 100644
--- a/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out
+++ b/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out
@@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 16) from tab_binary
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_binary
#### A masked pattern was here ####
-{"columntype":"Binary","maxlength":36,"avglength":20.0,"countnulls":0}
+{"columntype":"Binary","maxlength":58,"avglength":32.5,"countnulls":0}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
index 8c594a8..6ce4906 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
@@ -18,9 +18,11 @@
package org.apache.hadoop.hive.serde2.lazy;
-import org.apache.commons.codec.binary.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+
+import java.util.Base64;
+
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector;
import org.apache.hadoop.io.BytesWritable;
@@ -47,17 +49,17 @@ public class LazyBinary extends LazyPrimitive<LazyBinaryObjectInspector, BytesWr
byte[] recv = new byte[length];
System.arraycopy(bytes.getData(), start, recv, 0, length);
byte[] decoded = decodeIfNeeded(recv);
- // use the original bytes in case decoding should fail
- decoded = decoded.length > 0 ? decoded : recv;
data.set(decoded, 0, decoded.length);
}
// todo this should be configured in serde
public static byte[] decodeIfNeeded(byte[] recv) {
- boolean arrayByteBase64 = Base64.isArrayByteBase64(recv);
- if (LOG.isDebugEnabled() && arrayByteBase64) {
- LOG.debug("Data only contains Base64 alphabets only so try to decode the data.");
+ try {
+ return Base64.getDecoder().decode(recv);
+ } catch (IllegalArgumentException e) {
+ // use the original bytes in case decoding should fail
+ LOG.debug("Data does not contain only Base64 characters so return original byte array", e);
+ return recv;
}
- return arrayByteBase64 ? Base64.decodeBase64(recv) : recv;
}
}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
index 544a668..65a76ac 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
@@ -24,9 +24,9 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.Arrays;
+import java.util.Base64;
import java.util.Map;
-import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -294,7 +294,7 @@ public final class LazyUtils {
BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
byte[] toEncode = new byte[bw.getLength()];
System.arraycopy(bw.getBytes(), 0,toEncode, 0, bw.getLength());
- byte[] toWrite = Base64.encodeBase64(toEncode);
+ byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode);
out.write(toWrite, 0, toWrite.length);
break;
}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
index a42d6f4..4be9c40 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
@@ -21,11 +21,11 @@ package org.apache.hadoop.hive.serde2.lazy.fast;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayDeque;
+import java.util.Base64;
import java.util.Deque;
import java.util.List;
import java.util.Map;
-import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.slf4j.Logger;
@@ -277,7 +277,7 @@ public final class LazySimpleSerializeWrite implements SerializeWrite {
beginPrimitive();
byte[] toEncode = new byte[v.length];
System.arraycopy(v, 0, toEncode, 0, v.length);
- byte[] toWrite = Base64.encodeBase64(toEncode);
+ byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode);
output.write(toWrite, 0, toWrite.length);
finishPrimitive();
}
@@ -287,7 +287,7 @@ public final class LazySimpleSerializeWrite implements SerializeWrite {
beginPrimitive();
byte[] toEncode = new byte[length];
System.arraycopy(v, start, toEncode, 0, length);
- byte[] toWrite = Base64.encodeBase64(toEncode);
+ byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode);
output.write(toWrite, 0, toWrite.length);
finishPrimitive();
}
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
index c697dcf..91857d2 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
@@ -18,13 +18,13 @@
package org.apache.hadoop.hive.serde2.lazy;
import java.io.IOException;
+import java.util.Base64;
import java.util.List;
import java.util.Properties;
import java.util.Random;
-import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.ByteStream;
@@ -76,7 +76,9 @@ public class TestLazySimpleSerDe {
Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\tNULL\t");
t.append(new byte[]{(byte)Integer.parseInt("10111111", 2)}, 0, 1);
StringBuilder sb = new StringBuilder("123\t456\t789\t1000\t5.3\thive and hadoop\t1\tNULL\t");
- String s = sb.append(new String(Base64.encodeBase64(new byte[]{(byte)Integer.parseInt("10111111", 2)}))).toString();
+ String s = sb.append(
+ Base64.getEncoder().withoutPadding().encodeToString(new byte[] { (byte) Integer.parseInt("10111111", 2) }))
+ .toString();
Object[] expectedFieldsData = {new ByteWritable((byte) 123),
new ShortWritable((short) 456), new IntWritable(789),
new LongWritable(1000), new DoubleWritable(5.3),