You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/04/22 07:31:19 UTC

[parquet-mr] branch master updated: PARQUET-1844: Eliminate using commons-lang (#787)

This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 70d7f52  PARQUET-1844: Eliminate using commons-lang (#787)
70d7f52 is described below

commit 70d7f5249042bcc02bd6d9a4c8748f38823bd8a9
Author: Gabor Szadovszky <ga...@apache.org>
AuthorDate: Wed Apr 22 09:31:07 2020 +0200

    PARQUET-1844: Eliminate using commons-lang (#787)
---
 .../org/apache/parquet/cascading/TupleReadSupport.java  |  9 +++++----
 parquet-cli/pom.xml                                     |  5 +++++
 .../parquet/cli/commands/ParquetMetadataCommand.java    |  4 ++--
 .../apache/parquet/cli/commands/ShowPagesCommand.java   |  4 ++--
 .../java/org/apache/parquet/cli/csv/CSVProperties.java  | 17 +++++++++++++----
 .../filter2/dictionarylevel/DictionaryFilterTest.java   |  5 +++--
 pom.xml                                                 |  1 +
 7 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java b/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java
index 42a5926..7b3fa0e 100644
--- a/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java
+++ b/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java
@@ -19,11 +19,11 @@
 package org.apache.parquet.cascading;
 
 import java.util.Map;
+import java.util.StringJoiner;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.commons.lang.StringUtils;
 
 import cascading.tuple.Tuple;
 import cascading.tuple.Fields;
@@ -44,7 +44,7 @@ public class TupleReadSupport extends ReadSupport<Tuple> {
     if(fieldsString == null)
       return Fields.ALL;
 
-    String[] parts = StringUtils.split(fieldsString, ":");
+    String[] parts = fieldsString.split(":");
     if(parts.length == 0)
       return Fields.ALL;
     else
@@ -52,8 +52,9 @@ public class TupleReadSupport extends ReadSupport<Tuple> {
   }
 
   static protected void setRequestedFields(JobConf configuration, Fields fields) {
-    String fieldsString = StringUtils.join(fields.iterator(), ":");
-    configuration.set(PARQUET_CASCADING_REQUESTED_FIELDS, fieldsString);
+    StringJoiner joiner = new StringJoiner(":");
+    fields.forEach(f -> joiner.add(f.toString()));
+    configuration.set(PARQUET_CASCADING_REQUESTED_FIELDS, joiner.toString());
   }
 
   @Override
diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
index 809b1f7..6f1c6d9 100644
--- a/parquet-cli/pom.xml
+++ b/parquet-cli/pom.xml
@@ -59,6 +59,11 @@
       <version>${opencsv.version}</version>
     </dependency>
     <dependency>
+       <groupId>org.apache.commons</groupId>
+       <artifactId>commons-text</artifactId>
+       <version>${commons-text.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-jackson</artifactId>
       <version>${project.version}</version>
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
index a452369..c72f862 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
@@ -26,7 +26,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import org.apache.parquet.cli.BaseCommand;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.text.TextStringBuilder;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.EncodingStats;
@@ -128,7 +128,7 @@ public class ParquetMetadataCommand extends BaseCommand {
         humanReadable(((float) compressedSize) / rowCount),
         start, humanReadable(compressedSize),
         filePath != null ? " path: " + filePath : "",
-        StringUtils.leftPad("", 80, '-')));
+        new TextStringBuilder(80).appendPadding(80, '-')));
 
     int size = maxSize(Iterables.transform(rowGroup.getColumns(),
         new Function<ColumnChunkMetaData, String>() {
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
index 1ac03aa..5832106 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
@@ -25,7 +25,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.apache.parquet.cli.BaseCommand;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.text.TextStringBuilder;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.page.DataPage;
 import org.apache.parquet.column.page.DataPageV1;
@@ -120,7 +120,7 @@ public class ShowPagesCommand extends BaseCommand {
 
     // TODO: Show total column size and overall size per value in the column summary line
     for (String columnName : formatted.keySet()) {
-      console.info(String.format("\nColumn: %s\n%s", columnName, StringUtils.leftPad("", 80, '-')));
+      console.info(String.format("\nColumn: %s\n%s", columnName, new TextStringBuilder(80).appendPadding(80, '-')));
       console.info(formatter.getHeader());
       for (String line : formatted.get(columnName)) {
         console.info(line);
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java b/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java
index bd4ba06..7d60947 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java
@@ -20,7 +20,8 @@
 package org.apache.parquet.cli.csv;
 
 import javax.annotation.concurrent.Immutable;
-import org.apache.commons.lang.StringEscapeUtils;
+
+import org.apache.commons.text.StringEscapeUtils;
 
 @Immutable
 public class CSVProperties {
@@ -62,23 +63,31 @@ public class CSVProperties {
     private int linesToSkip = DEFAULT_LINES_TO_SKIP;
     private String header = null;
 
+    private static String unescapeJava(String str) {
+      // StringEscapeUtils removes the single escape character
+      if (str == "\\") {
+        return str;
+      }
+      return StringEscapeUtils.unescapeJava(str);
+    }
+
     public Builder charset(String charset) {
       this.charset = charset;
       return this;
     }
 
     public Builder delimiter(String delimiter) {
-      this.delimiter = StringEscapeUtils.unescapeJava(delimiter);
+      this.delimiter = unescapeJava(delimiter);
       return this;
     }
 
     public Builder quote(String quote) {
-      this.quote = StringEscapeUtils.unescapeJava(quote);
+      this.quote = unescapeJava(quote);
       return this;
     }
 
     public Builder escape(String escape) {
-      this.escape = StringEscapeUtils.unescapeJava(escape);
+      this.escape = unescapeJava(escape);
       return this;
     }
 
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
index 3625ed0..1e243f8 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
@@ -21,7 +21,8 @@ package org.apache.parquet.filter2.dictionarylevel;
 
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
-import org.apache.commons.lang.ArrayUtils;
+import com.google.common.primitives.Ints;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -524,7 +525,7 @@ public class DictionaryFilterTest {
   public void testInverseUdp() throws Exception {
     InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));
     InInt32UDP undroppable = new InInt32UDP(ImmutableSet.of(205));
-    Set<Integer> allValues = ImmutableSet.copyOf(Arrays.asList(ArrayUtils.toObject(intValues)));
+    Set<Integer> allValues = ImmutableSet.copyOf(Ints.asList(intValues));
     InInt32UDP completeMatch = new InInt32UDP(allValues);
 
     FilterPredicate inverse =
diff --git a/pom.xml b/pom.xml
index 91e38c6..9ef8081 100644
--- a/pom.xml
+++ b/pom.xml
@@ -110,6 +110,7 @@
     <opencsv.version>2.3</opencsv.version>
     <jcommander.version>1.72</jcommander.version>
     <zstd-jni.version>1.4.0-1</zstd-jni.version>
+    <commons-text.version>1.8</commons-text.version>
 
     <!-- properties for the profiles -->
     <surefire.argLine>-Xmx512m</surefire.argLine>