You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/04/22 07:31:19 UTC
[parquet-mr] branch master updated: PARQUET-1844: Eliminate using
commons-lang (#787)
This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 70d7f52 PARQUET-1844: Eliminate using commons-lang (#787)
70d7f52 is described below
commit 70d7f5249042bcc02bd6d9a4c8748f38823bd8a9
Author: Gabor Szadovszky <ga...@apache.org>
AuthorDate: Wed Apr 22 09:31:07 2020 +0200
PARQUET-1844: Eliminate using commons-lang (#787)
---
.../org/apache/parquet/cascading/TupleReadSupport.java | 9 +++++----
parquet-cli/pom.xml | 5 +++++
.../parquet/cli/commands/ParquetMetadataCommand.java | 4 ++--
.../apache/parquet/cli/commands/ShowPagesCommand.java | 4 ++--
.../java/org/apache/parquet/cli/csv/CSVProperties.java | 17 +++++++++++++----
.../filter2/dictionarylevel/DictionaryFilterTest.java | 5 +++--
pom.xml | 1 +
7 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java b/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java
index 42a5926..7b3fa0e 100644
--- a/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java
+++ b/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/TupleReadSupport.java
@@ -19,11 +19,11 @@
package org.apache.parquet.cascading;
import java.util.Map;
+import java.util.StringJoiner;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.commons.lang.StringUtils;
import cascading.tuple.Tuple;
import cascading.tuple.Fields;
@@ -44,7 +44,7 @@ public class TupleReadSupport extends ReadSupport<Tuple> {
if(fieldsString == null)
return Fields.ALL;
- String[] parts = StringUtils.split(fieldsString, ":");
+ String[] parts = fieldsString.split(":");
if(parts.length == 0)
return Fields.ALL;
else
@@ -52,8 +52,9 @@ public class TupleReadSupport extends ReadSupport<Tuple> {
}
static protected void setRequestedFields(JobConf configuration, Fields fields) {
- String fieldsString = StringUtils.join(fields.iterator(), ":");
- configuration.set(PARQUET_CASCADING_REQUESTED_FIELDS, fieldsString);
+ StringJoiner joiner = new StringJoiner(":");
+ fields.forEach(f -> joiner.add(f.toString()));
+ configuration.set(PARQUET_CASCADING_REQUESTED_FIELDS, joiner.toString());
}
@Override
diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
index 809b1f7..6f1c6d9 100644
--- a/parquet-cli/pom.xml
+++ b/parquet-cli/pom.xml
@@ -59,6 +59,11 @@
<version>${opencsv.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-text</artifactId>
+ <version>${commons-text.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-jackson</artifactId>
<version>${project.version}</version>
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
index a452369..c72f862 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
@@ -26,7 +26,7 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.parquet.cli.BaseCommand;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.text.TextStringBuilder;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
@@ -128,7 +128,7 @@ public class ParquetMetadataCommand extends BaseCommand {
humanReadable(((float) compressedSize) / rowCount),
start, humanReadable(compressedSize),
filePath != null ? " path: " + filePath : "",
- StringUtils.leftPad("", 80, '-')));
+ new TextStringBuilder(80).appendPadding(80, '-')));
int size = maxSize(Iterables.transform(rowGroup.getColumns(),
new Function<ColumnChunkMetaData, String>() {
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
index 1ac03aa..5832106 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
@@ -25,7 +25,7 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.parquet.cli.BaseCommand;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.text.TextStringBuilder;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.page.DataPage;
import org.apache.parquet.column.page.DataPageV1;
@@ -120,7 +120,7 @@ public class ShowPagesCommand extends BaseCommand {
// TODO: Show total column size and overall size per value in the column summary line
for (String columnName : formatted.keySet()) {
- console.info(String.format("\nColumn: %s\n%s", columnName, StringUtils.leftPad("", 80, '-')));
+ console.info(String.format("\nColumn: %s\n%s", columnName, new TextStringBuilder(80).appendPadding(80, '-')));
console.info(formatter.getHeader());
for (String line : formatted.get(columnName)) {
console.info(line);
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java b/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java
index bd4ba06..7d60947 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/csv/CSVProperties.java
@@ -20,7 +20,8 @@
package org.apache.parquet.cli.csv;
import javax.annotation.concurrent.Immutable;
-import org.apache.commons.lang.StringEscapeUtils;
+
+import org.apache.commons.text.StringEscapeUtils;
@Immutable
public class CSVProperties {
@@ -62,23 +63,31 @@ public class CSVProperties {
private int linesToSkip = DEFAULT_LINES_TO_SKIP;
private String header = null;
+ private static String unescapeJava(String str) {
+ // StringEscapeUtils removes the single escape character
+ if (str == "\\") {
+ return str;
+ }
+ return StringEscapeUtils.unescapeJava(str);
+ }
+
public Builder charset(String charset) {
this.charset = charset;
return this;
}
public Builder delimiter(String delimiter) {
- this.delimiter = StringEscapeUtils.unescapeJava(delimiter);
+ this.delimiter = unescapeJava(delimiter);
return this;
}
public Builder quote(String quote) {
- this.quote = StringEscapeUtils.unescapeJava(quote);
+ this.quote = unescapeJava(quote);
return this;
}
public Builder escape(String escape) {
- this.escape = StringEscapeUtils.unescapeJava(escape);
+ this.escape = unescapeJava(escape);
return this;
}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
index 3625ed0..1e243f8 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
@@ -21,7 +21,8 @@ package org.apache.parquet.filter2.dictionarylevel;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
-import org.apache.commons.lang.ArrayUtils;
+import com.google.common.primitives.Ints;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -524,7 +525,7 @@ public class DictionaryFilterTest {
public void testInverseUdp() throws Exception {
InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));
InInt32UDP undroppable = new InInt32UDP(ImmutableSet.of(205));
- Set<Integer> allValues = ImmutableSet.copyOf(Arrays.asList(ArrayUtils.toObject(intValues)));
+ Set<Integer> allValues = ImmutableSet.copyOf(Ints.asList(intValues));
InInt32UDP completeMatch = new InInt32UDP(allValues);
FilterPredicate inverse =
diff --git a/pom.xml b/pom.xml
index 91e38c6..9ef8081 100644
--- a/pom.xml
+++ b/pom.xml
@@ -110,6 +110,7 @@
<opencsv.version>2.3</opencsv.version>
<jcommander.version>1.72</jcommander.version>
<zstd-jni.version>1.4.0-1</zstd-jni.version>
+ <commons-text.version>1.8</commons-text.version>
<!-- properties for the profiles -->
<surefire.argLine>-Xmx512m</surefire.argLine>