You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by fo...@apache.org on 2019/09/22 06:59:43 UTC

[parquet-mr] branch master updated: PARQUET-1601: Add zstd support to parquet-cli to-avro (#653)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new e9d8716  PARQUET-1601: Add zstd support to parquet-cli to-avro (#653)
e9d8716 is described below

commit e9d87163c50bf91e7ae8c4382095fa641877fa07
Author: Kengo Seki <se...@apache.org>
AuthorDate: Sun Sep 22 15:59:37 2019 +0900

    PARQUET-1601: Add zstd support to parquet-cli to-avro (#653)
---
 parquet-cli/pom.xml                                |  5 +++++
 .../java/org/apache/parquet/cli/util/Codecs.java   |  2 ++
 .../apache/parquet/cli/commands/AvroFileTest.java  |  5 +++++
 .../parquet/cli/commands/ToAvroCommandTest.java    | 23 ++++++++++++++++++++++
 pom.xml                                            |  1 +
 5 files changed, 36 insertions(+)

diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
index 4c0f13b..3d2202b 100644
--- a/parquet-cli/pom.xml
+++ b/parquet-cli/pom.xml
@@ -44,6 +44,11 @@
       <version>${avro.version}</version>
     </dependency>
     <dependency>
+      <groupId>com.github.luben</groupId>
+      <artifactId>zstd-jni</artifactId>
+      <version>${zstd-jni.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
       <version>${slf4j.version}</version>
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java
index 06f12fd..ee79ee6 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Codecs.java
@@ -42,6 +42,8 @@ public class Codecs {
         return CodecFactory.snappyCodec();
       case GZIP:
         return CodecFactory.deflateCodec(9);
+      case ZSTD:
+        return CodecFactory.zstandardCodec(CodecFactory.DEFAULT_ZSTANDARD_LEVEL);
       default:
         throw new IllegalArgumentException(
             "Codec incompatible with Avro: " + codec);
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java
index dd57cd8..40c05c7 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/AvroFileTest.java
@@ -27,10 +27,15 @@ import java.util.Arrays;
 public class AvroFileTest extends ParquetFileTest {
 
   protected File toAvro(File parquetFile) throws IOException {
+    return toAvro(parquetFile, "GZIP");
+  }
+
+  protected File toAvro(File parquetFile, String compressionCodecName) throws IOException {
     ToAvroCommand command = new ToAvroCommand(createLogger());
     command.targets = Arrays.asList(parquetFile.getAbsolutePath());
     File output = new File(getTempFolder(), getClass().getSimpleName() + ".avro");
     command.outputPath = output.getAbsolutePath();
+    command.compressionCodecName = compressionCodecName;
     command.setConf(new Configuration());
     int exitCode = command.run();
     assert(exitCode == 0);
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java
index ca7fda2..62d164d 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ToAvroCommandTest.java
@@ -30,4 +30,27 @@ public class ToAvroCommandTest extends AvroFileTest {
     File avroFile = toAvro(parquetFile());
     Assert.assertTrue(avroFile.exists());
   }
+
+  @Test
+  public void testToAvroCommandWithGzipCompression() throws IOException {
+    File avroFile = toAvro(parquetFile(), "GZIP");
+    Assert.assertTrue(avroFile.exists());
+  }
+
+  @Test
+  public void testToAvroCommandWithSnappyCompression() throws IOException {
+    File avroFile = toAvro(parquetFile(), "SNAPPY");
+    Assert.assertTrue(avroFile.exists());
+  }
+
+  @Test
+  public void testToAvroCommandWithZstdCompression() throws IOException {
+    File avroFile = toAvro(parquetFile(), "ZSTD");
+    Assert.assertTrue(avroFile.exists());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testToAvroCommandWithInvalidCompression() throws IOException {
+    toAvro(parquetFile(), "FOO");
+  }
 }
diff --git a/pom.xml b/pom.xml
index aaff467..ae5cedb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,6 +106,7 @@
     <!-- parquet-cli dependencies -->
     <opencsv.version>2.3</opencsv.version>
     <jcommander.version>1.35</jcommander.version>
+    <zstd-jni.version>1.3.8-6</zstd-jni.version>
   </properties>
 
   <modules>