You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by fo...@apache.org on 2019/09/22 06:59:43 UTC
[parquet-mr] branch master updated: PARQUET-1601: Add zstd support
to parquet-cli to-avro (#653)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new e9d8716 PARQUET-1601: Add zstd support to parquet-cli to-avro (#653)
e9d8716 is described below
commit e9d87163c50bf91e7ae8c4382095fa641877fa07
Author: Kengo Seki <se...@apache.org>
AuthorDate: Sun Sep 22 15:59:37 2019 +0900
PARQUET-1601: Add zstd support to parquet-cli to-avro (#653)
---
parquet-cli/pom.xml | 5 +++++
.../java/org/apache/parquet/cli/util/Codecs.java | 2 ++
.../apache/parquet/cli/commands/AvroFileTest.java | 5 +++++
.../parquet/cli/commands/ToAvroCommandTest.java | 23 ++++++++++++++++++++++
pom.xml | 1 +
5 files changed, 36 insertions(+)
diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
index 4c0f13b..3d2202b 100644
--- a/parquet-cli/pom.xml
+++ b/parquet-cli/pom.xml
@@ -44,6 +44,11 @@
<version>${avro.version}</version>
</dependency>
<dependency>
+ <groupId>com.github.luben</groupId>
+ <artifactId>zstd-jni</artifactId>
+ <version>${zstd-jni.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java
index 06f12fd..ee79ee6 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java
@@ -42,6 +42,8 @@ public class Codecs {
return CodecFactory.snappyCodec();
case GZIP:
return CodecFactory.deflateCodec(9);
+ case ZSTD:
+ return CodecFactory.zstandardCodec(CodecFactory.DEFAULT_ZSTANDARD_LEVEL);
default:
throw new IllegalArgumentException(
"Codec incompatible with Avro: " + codec);
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java
index dd57cd8..40c05c7 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java
@@ -27,10 +27,15 @@ import java.util.Arrays;
public class AvroFileTest extends ParquetFileTest {
protected File toAvro(File parquetFile) throws IOException {
+ return toAvro(parquetFile, "GZIP");
+ }
+
+ protected File toAvro(File parquetFile, String compressionCodecName) throws IOException {
ToAvroCommand command = new ToAvroCommand(createLogger());
command.targets = Arrays.asList(parquetFile.getAbsolutePath());
File output = new File(getTempFolder(), getClass().getSimpleName() + ".avro");
command.outputPath = output.getAbsolutePath();
+ command.compressionCodecName = compressionCodecName;
command.setConf(new Configuration());
int exitCode = command.run();
assert(exitCode == 0);
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java
index ca7fda2..62d164d 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java
@@ -30,4 +30,27 @@ public class ToAvroCommandTest extends AvroFileTest {
File avroFile = toAvro(parquetFile());
Assert.assertTrue(avroFile.exists());
}
+
+ @Test
+ public void testToAvroCommandWithGzipCompression() throws IOException {
+ File avroFile = toAvro(parquetFile(), "GZIP");
+ Assert.assertTrue(avroFile.exists());
+ }
+
+ @Test
+ public void testToAvroCommandWithSnappyCompression() throws IOException {
+ File avroFile = toAvro(parquetFile(), "SNAPPY");
+ Assert.assertTrue(avroFile.exists());
+ }
+
+ @Test
+ public void testToAvroCommandWithZstdCompression() throws IOException {
+ File avroFile = toAvro(parquetFile(), "ZSTD");
+ Assert.assertTrue(avroFile.exists());
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testToAvroCommandWithInvalidCompression() throws IOException {
+ toAvro(parquetFile(), "FOO");
+ }
}
diff --git a/pom.xml b/pom.xml
index aaff467..ae5cedb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,6 +106,7 @@
<!-- parquet-cli dependencies -->
<opencsv.version>2.3</opencsv.version>
<jcommander.version>1.35</jcommander.version>
+ <zstd-jni.version>1.3.8-6</zstd-jni.version>
</properties>
<modules>