You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2020/05/28 00:18:57 UTC
[hudi] 14/40: Add changes for presto mor queries (#1578)
This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch release-0.5.3
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit d476ac541b00b6a00dd690c4a7bfa3fc6037e7ab
Author: bschell <bd...@gmail.com>
AuthorDate: Mon May 4 11:27:14 2020 -0700
Add changes for presto mor queries (#1578)
Adds the neccessary changes to hudi for support of presto querying hudi
merge-on-read table's realtime view.
Co-authored-by: Brandon Scheller <bs...@amazon.com>
---
.../hadoop/UseRecordReaderFromInputFormat.java | 38 ++++++++++++++++++++++
.../realtime/HoodieParquetRealtimeInputFormat.java | 2 ++
.../org/apache/hudi/hadoop/TestAnnotation.java | 23 +++++++++++--
packaging/hudi-presto-bundle/pom.xml | 18 ++++++++++
4 files changed, 79 insertions(+), 2 deletions(-)
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/UseRecordReaderFromInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/UseRecordReaderFromInputFormat.java
new file mode 100644
index 0000000..fe87323
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/UseRecordReaderFromInputFormat.java
@@ -0,0 +1,38 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hudi.hadoop;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+* When annotated on a InputFormat, informs the query engines, that they should use the RecordReader provided by the input
+* format to execute the queries.
+*/
+@Inherited
+@Documented
+@Target(ElementType.TYPE)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface UseRecordReaderFromInputFormat {
+
+}
\ No newline at end of file
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
index 4fe041a..59bf441 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.hudi.hadoop.UseRecordReaderFromInputFormat;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -62,6 +63,7 @@ import java.util.stream.Stream;
/**
* Input Format, that provides a real-time view of data in a Hoodie table.
*/
+@UseRecordReaderFromInputFormat
@UseFileSplitsFromInputFormat
public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat implements Configurable {
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java
index 3bdaa64..fefa25c 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java
@@ -19,7 +19,7 @@
package org.apache.hudi.hadoop;
import org.junit.Test;
-
+import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import java.lang.annotation.Annotation;
import static org.junit.Assert.assertTrue;
@@ -27,7 +27,7 @@ import static org.junit.Assert.assertTrue;
public class TestAnnotation {
@Test
- public void testAnnotation() {
+ public void testHoodieParquetInputFormatAnnotation() {
assertTrue(HoodieParquetInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class));
Annotation[] annotations = HoodieParquetInputFormat.class.getAnnotations();
boolean found = false;
@@ -38,4 +38,23 @@ public class TestAnnotation {
}
assertTrue(found);
}
+
+ @Test
+ public void testHoodieParquetRealtimeInputFormatAnnotations() {
+ assertTrue(HoodieParquetRealtimeInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class));
+ assertTrue(HoodieParquetRealtimeInputFormat.class.isAnnotationPresent(UseRecordReaderFromInputFormat.class));
+ Annotation[] annotations = HoodieParquetRealtimeInputFormat.class.getAnnotations();
+ boolean foundFileSplitsAnnotation = false;
+ boolean foundRecordReaderAnnotation = false;
+ for (Annotation annotation : annotations) {
+ if ("UseFileSplitsFromInputFormat".equals(annotation.annotationType().getSimpleName())) {
+ foundFileSplitsAnnotation = true;
+ }
+ if ("UseRecordReaderFromInputFormat".equals(annotation.annotationType().getSimpleName())) {
+ foundRecordReaderAnnotation = true;
+ }
+ }
+ assertTrue(foundFileSplitsAnnotation);
+ assertTrue(foundRecordReaderAnnotation);
+ }
}
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index 1235182..4900e39 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -68,6 +68,7 @@
<include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.parquet:parquet-avro</include>
+ <include>org.apache.avro:avro</include>
<include>com.esotericsoftware:kryo-shaded</include>
<include>org.objenesis:objenesis</include>
<include>com.esotericsoftware:minlog</include>
@@ -76,6 +77,10 @@
<relocations>
<relocation>
+ <pattern>org.apache.avro.</pattern>
+ <shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern>
+ </relocation>
+ <relocation>
<pattern>com.esotericsoftware.kryo.</pattern>
<shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
</relocation>
@@ -128,5 +133,18 @@
<artifactId>hudi-hadoop-mr-bundle</artifactId>
<version>${project.version}</version>
</dependency>
+
+ <!-- Parquet -->
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-avro</artifactId>
+ <scope>compile</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <scope>compile</scope>
+ </dependency>
</dependencies>
</project>