You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by nk...@apache.org on 2019/09/05 06:56:37 UTC

[parquet-mr] branch master updated: PARQUET-1530: Remove Dependency on commons-codec (#618)

This is an automated email from the ASF dual-hosted git repository.

nkollar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 340d157  PARQUET-1530: Remove Dependency on commons-codec (#618)
340d157 is described below

commit 340d157bda4c33b7d126cad82e222a2cfb1b2953
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Thu Sep 5 01:56:31 2019 -0500

    PARQUET-1530: Remove Dependency on commons-codec (#618)
---
 parquet-cli/pom.xml                                     |  6 ------
 .../src/main/java/org/apache/parquet/cli/Util.java      | 17 ++++++-----------
 parquet-cli/src/main/resources/META-INF/LICENSE         |  5 ++---
 parquet-column/pom.xml                                  |  6 ------
 parquet-encoding/pom.xml                                |  6 ------
 .../apache/parquet/hadoop/util/SerializationUtil.java   | 15 +++++++--------
 parquet-tools/src/main/resources/META-INF/LICENSE       |  9 ---------
 pom.xml                                                 |  1 -
 8 files changed, 15 insertions(+), 50 deletions(-)

diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
index 5988cab..84bf966 100644
--- a/parquet-cli/pom.xml
+++ b/parquet-cli/pom.xml
@@ -81,12 +81,6 @@
       <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <version>${commons-codec.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>
       <version>${hadoop.version}</version>
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
index 961c7f0..cf4b97e 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
@@ -19,11 +19,13 @@
 
 package org.apache.parquet.cli;
 
+import com.google.common.base.Ascii;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Iterables;
-import org.apache.commons.codec.binary.Hex;
+import com.google.common.hash.HashCode;
+
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.EncodingStats;
@@ -126,20 +128,13 @@ public class Util {
   }
 
   public static String humanReadable(byte[] bytes, int len) {
+    Preconditions.checkArgument(len >= 5, "Display length must be minimum 5");
     if (bytes == null || bytes.length == 0) {
       return "null";
     }
 
-    StringBuilder sb = new StringBuilder();
-    String asString = Hex.encodeHexString(bytes);
-    sb.append("0x");
-    if (asString.length() > len - 2) {
-      sb.append(asString.substring(0, (len - 5) / 2)).append("...");
-    } else {
-      sb.append(asString);
-    }
-
-    return sb.toString();
+    final String asString = HashCode.fromBytes(bytes).toString();
+    return "0x" + Ascii.truncate(asString, len - 2, "...");
   }
 
   public static String shortCodec(CompressionCodecName codec) {
diff --git a/parquet-cli/src/main/resources/META-INF/LICENSE b/parquet-cli/src/main/resources/META-INF/LICENSE
index 2b581f8..3479acc 100644
--- a/parquet-cli/src/main/resources/META-INF/LICENSE
+++ b/parquet-cli/src/main/resources/META-INF/LICENSE
@@ -267,11 +267,10 @@ License: http://www.apache.org/licenses/LICENSE-2.0.txt
 
 --------------------------------------------------------------------------------
 
-This product depends on Apache Commons and includes commons-codec,
-commons-pool, and commons-compress in this binary artifact.
+This product depends on Apache Commons and includes commons-pool, and
+commons-compress in this binary artifact.
 
 Copyright: 2002-2015 The Apache Software Foundation.
-Home page: https://commons.apache.org/proper/commons-codec/
 Home page: https://commons.apache.org/proper/commons-pool/
 License: http://www.apache.org/licenses/LICENSE-2.0
 
diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml
index 8bfba02..b2369b7 100644
--- a/parquet-column/pom.xml
+++ b/parquet-column/pom.xml
@@ -47,12 +47,6 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <version>1.10</version>
-      <scope>compile</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-encoding</artifactId>
       <version>${project.version}</version>
diff --git a/parquet-encoding/pom.xml b/parquet-encoding/pom.xml
index e3db2ca..6078d7c 100644
--- a/parquet-encoding/pom.xml
+++ b/parquet-encoding/pom.xml
@@ -41,12 +41,6 @@
       <artifactId>parquet-common</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <version>1.10</version>
-      <scope>compile</scope>
-    </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/SerializationUtil.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/SerializationUtil.java
index 06d5fea..199b774 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/SerializationUtil.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/SerializationUtil.java
@@ -24,15 +24,12 @@ import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.nio.charset.StandardCharsets;
+import java.util.Base64;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.conf.Configuration;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 /**
  * Serialization utils copied from:
  * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/util/HadoopUtils.java
@@ -40,7 +37,6 @@ import org.slf4j.LoggerFactory;
  * TODO: Refactor elephant-bird so that we can depend on utils like this without extra baggage.
  */
 public final class SerializationUtil {
-  private static final Logger LOG = LoggerFactory.getLogger(SerializationUtil.class);
 
   private SerializationUtil() { }
 
@@ -58,7 +54,9 @@ public final class SerializationUtil {
             ObjectOutputStream oos = new ObjectOutputStream(gos)) {
         oos.writeObject(obj);
       }
-      conf.set(key, new String(Base64.encodeBase64(baos.toByteArray()), StandardCharsets.UTF_8));
+      conf.set(key,
+          new String(Base64.getMimeEncoder().encode(baos.toByteArray()),
+              StandardCharsets.UTF_8));
     }
   }
 
@@ -79,7 +77,8 @@ public final class SerializationUtil {
       return null;
     }
 
-    byte[] bytes = Base64.decodeBase64(b64.getBytes(StandardCharsets.UTF_8));
+    byte[] bytes =
+        Base64.getMimeDecoder().decode(b64.getBytes(StandardCharsets.UTF_8));
 
     try (ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
            GZIPInputStream gis = new GZIPInputStream(bais);
@@ -88,7 +87,7 @@ public final class SerializationUtil {
     } catch (ClassNotFoundException e) {
       throw new IOException("Could not read object from config with key " + key, e);
     } catch (ClassCastException e) {
-      throw new IOException("Couldn't cast object read from config with key " + key, e);
+      throw new IOException("Could not cast object read from config with key " + key, e);
     }
   }
 }
diff --git a/parquet-tools/src/main/resources/META-INF/LICENSE b/parquet-tools/src/main/resources/META-INF/LICENSE
index d796f40..126400c 100644
--- a/parquet-tools/src/main/resources/META-INF/LICENSE
+++ b/parquet-tools/src/main/resources/META-INF/LICENSE
@@ -267,15 +267,6 @@ License: http://www.apache.org/licenses/LICENSE-2.0
 
 --------------------------------------------------------------------------------
 
-This product depends on Apache commons-codec and includes it in this binary
-artifact.
-
-Copyright: 2002-2015 The Apache Software Foundation.
-Home page: https://commons.apache.org/proper/commons-codec/
-License: http://www.apache.org/licenses/LICENSE-2.0
-
---------------------------------------------------------------------------------
-
 This product depends on Google guava and includes it in this binary artifact.
 
 Copyright: 2010-2015 The Guava Authors
diff --git a/pom.xml b/pom.xml
index 1808dba..e1d131e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -105,7 +105,6 @@
     <!-- parquet-cli dependencies -->
     <opencsv.version>2.3</opencsv.version>
     <jcommander.version>1.35</jcommander.version>
-    <commons-codec.version>1.10</commons-codec.version>
   </properties>
 
   <modules>