You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2017/11/15 00:16:33 UTC
parquet-mr git commit: PARQUET-1143: Update to Parquet format 2.4.0.
Repository: parquet-mr
Updated Branches:
refs/heads/master ba7b8ba69 -> 132b2a8c5
PARQUET-1143: Update to Parquet format 2.4.0.
This adds new compression codecs that are required by format 2.4.0.
Author: Ryan Blue <bl...@apache.org>
Closes #430 from rdblue/PARQUET-1143-format-2.4.0-updates and squashes the following commits:
0aca87812 [Ryan Blue] PARQUET-1143: Remove staging repository now that 2.4.0 is released.
89b01cb64 [Ryan Blue] PARQUET-1143: Make brotli-codec an optional dependency.
a2f57ba5b [Ryan Blue] PARQUET-1143: Drop hadoop-1 tests from Travis CI.
d0f81d7cd [Ryan Blue] PARQUET-1143: Use slf4j-simple and log4j in Thrift/Pig tests.
326b8ac74 [Ryan Blue] PARQUET-1143: Update Travis to use the default ubuntu image.
4ad46f94c [Ryan Blue] PARQUET-1143: Use slf4j-log4j12 in Pig tests.
785e84dff [Ryan Blue] PARQUET-1143: Fix Travis CI.
efa171fda [Ryan Blue] PARQUET-1143: Ban slf4j-log4j12 dependency.
bf61e84ab [Ryan Blue] PARQUET-1143: Update to Parquet format 2.4.0.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/132b2a8c
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/132b2a8c
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/132b2a8c
Branch: refs/heads/master
Commit: 132b2a8c553bdcfd445e88680beac6f225c50ac4
Parents: ba7b8ba
Author: Ryan Blue <bl...@apache.org>
Authored: Tue Nov 14 16:16:28 2017 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Tue Nov 14 16:16:28 2017 -0800
----------------------------------------------------------------------
.travis.yml | 4 +--
parquet-avro/pom.xml | 6 ++++
parquet-benchmarks/pom.xml | 17 +++++++++--
parquet-cascading/pom.xml | 6 ++++
.../main/java/org/apache/parquet/cli/Util.java | 6 ++++
parquet-hadoop/pom.xml | 12 ++++++++
.../hadoop/metadata/CompressionCodecName.java | 5 +++-
.../parquet/hadoop/TestDirectCodecFactory.java | 14 +++++++--
parquet-pig/src/test/resources/log4j.properties | 23 +++++++++++++++
.../src/test/resources/log4j.properties | 23 +++++++++++++++
parquet-scrooge/pom.xml | 6 ++++
parquet-thrift/pom.xml | 15 +++++++++-
pom.xml | 30 +++++++++++++++++++-
13 files changed, 157 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index f4b56c6..55f6e9a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,12 +22,12 @@ before_install:
- tar zxf thrift-0.7.0.tar.gz
- cd thrift-0.7.0
- chmod +x ./configure
- - ./configure --disable-gen-erl --disable-gen-hs --without-php --without-nodejs --without-ruby --without-haskell --without-erlang
+ - ./configure --disable-gen-erl --disable-gen-hs --without-ruby --without-haskell --without-erlang --without-php
- sudo make install
- cd ..
env:
- - HADOOP_PROFILE=hadoop-1 TEST_CODECS=uncompressed
+ - HADOOP_PROFILE=default TEST_CODECS=uncompressed,brotli
- HADOOP_PROFILE=default TEST_CODECS=gzip,snappy
install: mvn install --batch-mode -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true > mvn_install.log || mvn install --batch-mode -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true > mvn_install.log || (cat mvn_install.log && false)
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-avro/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-avro/pom.xml b/parquet-avro/pom.xml
index ecb69bb..f2b4c9e 100644
--- a/parquet-avro/pom.xml
+++ b/parquet-avro/pom.xml
@@ -63,6 +63,12 @@
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-benchmarks/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/pom.xml b/parquet-benchmarks/pom.xml
index b01a967..df3d832 100644
--- a/parquet-benchmarks/pom.xml
+++ b/parquet-benchmarks/pom.xml
@@ -48,9 +48,15 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>${hadoop.version}</version>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
@@ -73,6 +79,11 @@
<build>
<plugins>
+ <!-- This module disables semver checks because it is not a public API.
+ <plugin>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ </plugin>
+ -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-cascading/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-cascading/pom.xml b/parquet-cascading/pom.xml
index 07e8f68..c386817 100644
--- a/parquet-cascading/pom.xml
+++ b/parquet-cascading/pom.xml
@@ -66,6 +66,12 @@
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
----------------------------------------------------------------------
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
index 860a218..07a5364 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
@@ -218,6 +218,12 @@ public class Util {
return "G";
case LZO:
return "L";
+ case BROTLI:
+ return "B";
+ case LZ4:
+ return "4";
+ case ZSTD:
+ return "Z";
default:
return "?";
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-hadoop/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml
index 84ef43f..5df2002 100644
--- a/parquet-hadoop/pom.xml
+++ b/parquet-hadoop/pom.xml
@@ -51,6 +51,12 @@
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
@@ -79,6 +85,12 @@
<artifactId>commons-pool</artifactId>
<version>1.6</version>
</dependency>
+ <dependency>
+ <groupId>com.github.rdblue</groupId>
+ <artifactId>brotli-codec</artifactId>
+ <version>${brotli-codec.version}</version>
+ <optional>true</optional>
+ </dependency>
<dependency>
<groupId>com.google.guava</groupId>
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
index d03d280..153133e 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
@@ -27,7 +27,10 @@ public enum CompressionCodecName {
UNCOMPRESSED(null, CompressionCodec.UNCOMPRESSED, ""),
SNAPPY("org.apache.parquet.hadoop.codec.SnappyCodec", CompressionCodec.SNAPPY, ".snappy"),
GZIP("org.apache.hadoop.io.compress.GzipCodec", CompressionCodec.GZIP, ".gz"),
- LZO("com.hadoop.compression.lzo.LzoCodec", CompressionCodec.LZO, ".lzo");
+ LZO("com.hadoop.compression.lzo.LzoCodec", CompressionCodec.LZO, ".lzo"),
+ BROTLI("org.apache.hadoop.io.compress.BrotliCodec", CompressionCodec.BROTLI, ".br"),
+ LZ4("org.apache.hadoop.io.compress.Lz4Codec", CompressionCodec.LZ4, ".lz4"),
+ ZSTD("org.apache.hadoop.io.compress.ZStandardCodec", CompressionCodec.ZSTD, ".zstd");
public static CompressionCodecName fromConf(String name) {
if (name == null) {
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestDirectCodecFactory.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestDirectCodecFactory.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestDirectCodecFactory.java
index caf2ed6..3dd17e9 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestDirectCodecFactory.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestDirectCodecFactory.java
@@ -18,7 +18,9 @@
package org.apache.parquet.hadoop;
import java.nio.ByteBuffer;
+import java.util.HashSet;
import java.util.Random;
+import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.bytes.ByteBufferAllocator;
@@ -30,6 +32,11 @@ import org.junit.Test;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.BROTLI;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.LZ4;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.LZO;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.ZSTD;
+
public class TestDirectCodecFactory {
private static enum Decompression {
@@ -146,13 +153,16 @@ public class TestDirectCodecFactory {
public void compressionCodecs() throws Exception {
final int[] sizes = { 4 * 1024, 1 * 1024 * 1024 };
final boolean[] comp = { true, false };
+ Set<CompressionCodecName> codecsToSkip = new HashSet<>();
+ codecsToSkip.add(LZO); // not distributed because it is GPL
+ codecsToSkip.add(LZ4); // not distributed in the default version of Hadoop
+ codecsToSkip.add(ZSTD); // not distributed in the default version of Hadoop
for (final int size : sizes) {
for (final boolean useOnHeapComp : comp) {
for (final Decompression decomp : Decompression.values()) {
for (final CompressionCodecName codec : CompressionCodecName.values()) {
- if (codec == CompressionCodecName.LZO) {
- // not installed as gpl.
+ if (codecsToSkip.contains(codec)) {
continue;
}
test(size, codec, useOnHeapComp, decomp);
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-pig/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/parquet-pig/src/test/resources/log4j.properties b/parquet-pig/src/test/resources/log4j.properties
new file mode 100644
index 0000000..b60c8c6
--- /dev/null
+++ b/parquet-pig/src/test/resources/log4j.properties
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+log4j.rootCategory=INFO, console
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %t %c{1}: %m%n
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-protobuf/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/resources/log4j.properties b/parquet-protobuf/src/test/resources/log4j.properties
new file mode 100644
index 0000000..b60c8c6
--- /dev/null
+++ b/parquet-protobuf/src/test/resources/log4j.properties
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+log4j.rootCategory=INFO, console
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %t %c{1}: %m%n
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-scrooge/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-scrooge/pom.xml b/parquet-scrooge/pom.xml
index 60867ba..0226efc 100644
--- a/parquet-scrooge/pom.xml
+++ b/parquet-scrooge/pom.xml
@@ -55,6 +55,12 @@
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/parquet-thrift/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml
index 20666bc..3e69e4e 100644
--- a/parquet-thrift/pom.xml
+++ b/parquet-thrift/pom.xml
@@ -52,6 +52,12 @@
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>com.twitter.elephantbird</groupId>
@@ -117,11 +123,18 @@
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
+ <artifactId>slf4j-simple</artifactId>
<version>${slf4j.version}</version>
<scope>test</scope>
</dependency>
<dependency>
+ <!-- needed for Pig tests -->
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.17</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>${project.version}</version>
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/132b2a8c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 40977a5..44b0b62 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,6 +50,15 @@
</mailingList>
</mailingLists>
+ <repositories>
+ <repository>
+ <id>jitpack.io</id>
+ <url>https://jitpack.io</url>
+ <name>Jitpack.io repository</name>
+ <!-- needed for brotli-codec -->
+ </repository>
+ </repositories>
+
<developers>
<developer>
<name>Julien Le Dem</name>
@@ -72,7 +81,7 @@
<hadoop1.version>1.2.1</hadoop1.version>
<cascading.version>2.7.1</cascading.version>
<cascading3.version>3.1.2</cascading3.version>
- <parquet.format.version>2.3.1</parquet.format.version>
+ <parquet.format.version>2.4.0</parquet.format.version>
<previous.version>1.7.0</previous.version>
<thrift.executable>thrift</thrift.executable>
<scala.version>2.10.6</scala.version>
@@ -88,6 +97,7 @@
<slf4j.version>1.7.22</slf4j.version>
<avro.version>1.8.2</avro.version>
<guava.version>20.0</guava.version>
+ <brotli-codec.version>0.1.1</brotli-codec.version>
<mockito.version>1.10.19</mockito.version>
<!-- parquet-cli dependencies -->
@@ -242,6 +252,8 @@
<exclude>org/apache/parquet/hadoop/CodecFactory**</exclude>
<exclude>shaded/**</exclude> <!-- shaded by parquet -->
<exclude>org/apache/parquet/it/unimi/dsi/fastutil/**</exclude> <!-- Another shaded dependency from parquet-column -->
+ <exclude>org/apache/parquet/benchmarks/**</exclude>
+ <exclude>org/openjdk/**</exclude>
<!-- temporary exclusions for false-positives -->
<exclude>org/apache/parquet/Version</exclude>
<exclude>org/apache/parquet/schema/**</exclude> <!-- methods moved to new superclass -->
@@ -256,6 +268,22 @@
</rules>
</configuration>
</execution>
+ <execution>
+ <id>enforce-banned-dependencies</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <bannedDependencies>
+ <excludes>
+ <exclude>org.slf4j:slf4j-log4j12</exclude>
+ </excludes>
+ </bannedDependencies>
+ </rules>
+ <fail>true</fail>
+ </configuration>
+ </execution>
</executions>
</plugin>
<plugin>