You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ph...@apache.org on 2019/02/01 19:15:26 UTC

[impala] 02/05: IMPALA-7934: Switch to java.util.Base64 implementation

This is an automated email from the ASF dual-hosted git repository.

philz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b0942296ab5f24660473abc218d45978fc402d81
Author: Fredy Wijaya <fw...@cloudera.com>
AuthorDate: Tue Jan 22 13:29:34 2019 -0800

    IMPALA-7934: Switch to java.util.Base64 implementation
    
    It is shown that java.util.Base64 implementation seems to have better
    performance compared to Apache Commons Codec's Base64 implementation,
    which can benefit operations, such as incremental stats. This patch
    switches the implementation of Base64 from Apache Commons
    Codec to java.util.Base64 implementation.
    
    This is the JMH benchmark result comparing java.util.Base64 vs Commons
    Codec's Base64:
    
    Result "base64.Base64Benchmark.javaBase64":
      31.149 ±(99.9%) 1.567 ms/op [Average]
      (min, avg, max) = (27.564, 31.149, 34.675), stdev = 2.091
      CI (99.9%): [29.583, 32.716] (assumes normal distribution)
    
    Result "base64.Base64Benchmark.codecBase64":
      65.921 ±(99.9%) 4.762 ms/op [Average]
      (min, avg, max) = (58.072, 65.921, 80.470), stdev = 6.357
      CI (99.9%): [61.159, 70.683] (assumes normal distribution)
    
    Benchmark                    Mode  Cnt   Score   Error Units
    Base64Benchmark.javaBase64   avgt   25  31.149 ± 1.567 ms/op
    Base64Benchmark.codecBase64  avgt   25  65.921 ± 4.762 ms/op
    
    Testing:
    - Ran all FE tests
    - Created a table with incremental stats without a patch and read it
      with the patch
    
    Change-Id: I2d43d4a4f073a800d963ce4c77f21c9efa8471ac
    Reviewed-on: http://gerrit.cloudera.org:8080/12250
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 fe/src/main/java/org/apache/impala/catalog/Db.java                 | 4 ++--
 fe/src/main/java/org/apache/impala/catalog/PartitionStatsUtil.java | 6 +++---
 fe/src/main/java/org/apache/impala/util/FunctionUtils.java         | 4 ++--
 fe/src/test/java/org/apache/impala/catalog/CatalogTest.java        | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/Db.java b/fe/src/main/java/org/apache/impala/catalog/Db.java
index 2e26301..5878b95 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Db.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Db.java
@@ -18,12 +18,12 @@
 package org.apache.impala.catalog;
 
 import java.util.ArrayList;
+import java.util.Base64;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.impala.analysis.ColumnDef;
 import org.apache.impala.analysis.KuduPartitionParam;
 import org.apache.impala.common.ImpalaException;
@@ -246,7 +246,7 @@ public class Db extends CatalogObjectImpl implements FeDb {
       TSerializer serializer =
           new TSerializer(new TCompactProtocol.Factory());
       byte[] serializedFn = serializer.serialize(fn.toThrift());
-      String base64Fn = Base64.encodeBase64String(serializedFn);
+      String base64Fn = Base64.getEncoder().encodeToString(serializedFn);
       String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString();
       if (base64Fn.length() > HIVE_METASTORE_DB_PARAM_LIMIT_BYTES) {
         throw new ImpalaRuntimeException(
diff --git a/fe/src/main/java/org/apache/impala/catalog/PartitionStatsUtil.java b/fe/src/main/java/org/apache/impala/catalog/PartitionStatsUtil.java
index 38d304c..bc36439 100644
--- a/fe/src/main/java/org/apache/impala/catalog/PartitionStatsUtil.java
+++ b/fe/src/main/java/org/apache/impala/catalog/PartitionStatsUtil.java
@@ -18,10 +18,10 @@
 package org.apache.impala.catalog;
 
 import java.util.ArrayList;
+import java.util.Base64;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.ImpalaRuntimeException;
 import org.apache.impala.common.JniUtil;
@@ -109,7 +109,7 @@ public class PartitionStatsUtil {
       }
       encodedStats.append(chunk);
     }
-    byte[] decodedBytes = Base64.decodeBase64(encodedStats.toString());
+    byte[] decodedBytes = Base64.getDecoder().decode(encodedStats.toString());
     TPartitionStats stats = new TPartitionStats();
     JniUtil.deserializeThrift(new TCompactProtocol.Factory(), stats, decodedBytes);
     hasIncrStats.setRef(stats.isSetIntermediate_col_stats());
@@ -177,7 +177,7 @@ public class PartitionStatsUtil {
           "Error decompressing partition stats for " + partition.getPartitionName());
       return;
     }
-    String base64 = new String(Base64.encodeBase64(decompressed));
+    String base64 = new String(Base64.getEncoder().encode(decompressed));
     List<String> chunks =
       chunkStringForHms(base64, MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH);
     params.put(INCREMENTAL_STATS_NUM_CHUNKS, Integer.toString(chunks.size()));
diff --git a/fe/src/main/java/org/apache/impala/util/FunctionUtils.java b/fe/src/main/java/org/apache/impala/util/FunctionUtils.java
index 8b7a5f8..1c02653 100644
--- a/fe/src/main/java/org/apache/impala/util/FunctionUtils.java
+++ b/fe/src/main/java/org/apache/impala/util/FunctionUtils.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 import java.lang.reflect.Method;
 import java.net.URL;
 import java.net.URLClassLoader;
+import java.util.Base64;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.api.FunctionType;
 import org.apache.hadoop.hive.metastore.api.ResourceType;
@@ -154,7 +154,7 @@ public abstract class FunctionUtils {
       try {
         TFunction fn = new TFunction();
         JniUtil.deserializeThrift(protocolFactory, fn,
-            Base64.decodeBase64(entry.getValue()));
+            Base64.getDecoder().decode(entry.getValue()));
         results.add(Function.fromThrift(fn));
       } catch (ImpalaException e) {
         LOG.error("Encountered an error during function load: key=" +
diff --git a/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java
index bac5864..d6ca5b8 100644
--- a/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/CatalogTest.java
@@ -28,6 +28,7 @@ import static org.junit.Assert.fail;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Base64;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -38,7 +39,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.GlobalStorageStatistics;
 import org.apache.hadoop.fs.StorageStatistics;
@@ -842,7 +842,7 @@ public class CatalogTest {
     // Test to check if catalog can handle loading corrupt udfs
     Map<String, String> dbParams = new HashMap<>();
     String badFnKey = "impala_registered_function_badFn";
-    String badFnVal = Base64.encodeBase64String("badFn".getBytes());
+    String badFnVal = Base64.getEncoder().encodeToString("badFn".getBytes());
     String dbName = "corrupt_udf_test";
     dbParams.put(badFnKey, badFnVal);
     Db db = catalog_.getDb(dbName);