You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/10/22 14:57:23 UTC

[parquet-mr] branch master updated: PARQUET-1914: Allow ProtoParquetReader To Support InputFile (#817)

This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 472eebb  PARQUET-1914: Allow ProtoParquetReader To Support InputFile (#817)
472eebb is described below

commit 472eebb09d035e5073c384c319ba549f74d79691
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Thu Oct 22 10:57:05 2020 -0400

    PARQUET-1914: Allow ProtoParquetReader To Support InputFile (#817)
---
 .../apache/parquet/proto/ProtoParquetReader.java   | 36 +++++++++++++++++-----
 .../java/org/apache/parquet/proto/TestUtils.java   |  7 ++++-
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java
index 73ddec2..a864474 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java
@@ -20,21 +20,26 @@ package org.apache.parquet.proto;
 
 import java.io.IOException;
 
-import com.google.protobuf.MessageOrBuilder;
-
 import org.apache.hadoop.fs.Path;
-
 import org.apache.parquet.filter.UnboundRecordFilter;
 import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.api.ReadSupport;
+import org.apache.parquet.io.InputFile;
+
+import com.google.protobuf.MessageOrBuilder;
 
 /**
  * Read Protobuf records from a Parquet file.
  */
-public class ProtoParquetReader<T extends MessageOrBuilder> extends ParquetReader<T> {
+public class ProtoParquetReader<T extends MessageOrBuilder>
+    extends ParquetReader<T> {
 
-  @SuppressWarnings("unchecked")
-  public static <T> Builder<T> builder(Path file) {
-    return ParquetReader.builder(new ProtoReadSupport(), file);
+  public static <T> ParquetReader.Builder<T> builder(Path file) {
+    return new ProtoParquetReader.Builder<T>(file);
+  }
+
+  public static <T> ParquetReader.Builder<T> builder(InputFile file) {
+    return new ProtoParquetReader.Builder<T>(file);
   }
 
   /**
@@ -59,4 +64,21 @@ public class ProtoParquetReader<T extends MessageOrBuilder> extends ParquetReade
   public ProtoParquetReader(Path file, UnboundRecordFilter recordFilter) throws IOException {
     super(file, new ProtoReadSupport(), recordFilter);
   }
+
+  private static class Builder<T> extends ParquetReader.Builder<T> {
+
+    protected Builder(InputFile file) {
+      super(file);
+    }
+
+    protected Builder(Path path) {
+      super(path);
+    }
+
+    @SuppressWarnings({ "rawtypes", "unchecked" })
+    @Override
+    protected ReadSupport<T> getReadSupport() {
+      return new ProtoReadSupport();
+    }
+  }
 }
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java
index 2fbd2a4..38256f4 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java
@@ -22,8 +22,12 @@ import com.google.protobuf.Descriptors;
 import com.google.protobuf.Message;
 import com.google.protobuf.MessageOrBuilder;
 import com.twitter.elephantbird.util.Protobufs;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.io.InputFile;
 
 import java.io.File;
 import java.io.IOException;
@@ -170,7 +174,8 @@ public class TestUtils {
    * @return List of protobuf messages for the given type.
    */
   public static <T extends MessageOrBuilder> List<T> readMessages(Path file, Class<T> messageClass) throws IOException {
-    ParquetReader.Builder readerBuilder = ProtoParquetReader.builder(file);
+    InputFile inputFile = HadoopInputFile.fromPath(file, new Configuration());
+    ParquetReader.Builder readerBuilder = ProtoParquetReader.builder(inputFile);
     if (messageClass != null) {
       readerBuilder.set(ProtoReadSupport.PB_CLASS, messageClass.getName()).build();
     }