You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/10/22 14:57:23 UTC
[parquet-mr] branch master updated: PARQUET-1914: Allow
ProtoParquetReader To Support InputFile (#817)
This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 472eebb PARQUET-1914: Allow ProtoParquetReader To Support InputFile (#817)
472eebb is described below
commit 472eebb09d035e5073c384c319ba549f74d79691
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Thu Oct 22 10:57:05 2020 -0400
PARQUET-1914: Allow ProtoParquetReader To Support InputFile (#817)
---
.../apache/parquet/proto/ProtoParquetReader.java | 36 +++++++++++++++++-----
.../java/org/apache/parquet/proto/TestUtils.java | 7 ++++-
2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java
index 73ddec2..a864474 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoParquetReader.java
@@ -20,21 +20,26 @@ package org.apache.parquet.proto;
import java.io.IOException;
-import com.google.protobuf.MessageOrBuilder;
-
import org.apache.hadoop.fs.Path;
-
import org.apache.parquet.filter.UnboundRecordFilter;
import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.api.ReadSupport;
+import org.apache.parquet.io.InputFile;
+
+import com.google.protobuf.MessageOrBuilder;
/**
* Read Protobuf records from a Parquet file.
*/
-public class ProtoParquetReader<T extends MessageOrBuilder> extends ParquetReader<T> {
+public class ProtoParquetReader<T extends MessageOrBuilder>
+ extends ParquetReader<T> {
- @SuppressWarnings("unchecked")
- public static <T> Builder<T> builder(Path file) {
- return ParquetReader.builder(new ProtoReadSupport(), file);
+ public static <T> ParquetReader.Builder<T> builder(Path file) {
+ return new ProtoParquetReader.Builder<T>(file);
+ }
+
+ public static <T> ParquetReader.Builder<T> builder(InputFile file) {
+ return new ProtoParquetReader.Builder<T>(file);
}
/**
@@ -59,4 +64,21 @@ public class ProtoParquetReader<T extends MessageOrBuilder> extends ParquetReade
public ProtoParquetReader(Path file, UnboundRecordFilter recordFilter) throws IOException {
super(file, new ProtoReadSupport(), recordFilter);
}
+
+ private static class Builder<T> extends ParquetReader.Builder<T> {
+
+ protected Builder(InputFile file) {
+ super(file);
+ }
+
+ protected Builder(Path path) {
+ super(path);
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ @Override
+ protected ReadSupport<T> getReadSupport() {
+ return new ProtoReadSupport();
+ }
+ }
}
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java
index 2fbd2a4..38256f4 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/TestUtils.java
@@ -22,8 +22,12 @@ import com.google.protobuf.Descriptors;
import com.google.protobuf.Message;
import com.google.protobuf.MessageOrBuilder;
import com.twitter.elephantbird.util.Protobufs;
+
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.io.InputFile;
import java.io.File;
import java.io.IOException;
@@ -170,7 +174,8 @@ public class TestUtils {
* @return List of protobuf messages for the given type.
*/
public static <T extends MessageOrBuilder> List<T> readMessages(Path file, Class<T> messageClass) throws IOException {
- ParquetReader.Builder readerBuilder = ProtoParquetReader.builder(file);
+ InputFile inputFile = HadoopInputFile.fromPath(file, new Configuration());
+ ParquetReader.Builder readerBuilder = ProtoParquetReader.builder(inputFile);
if (messageClass != null) {
readerBuilder.set(ProtoReadSupport.PB_CLASS, messageClass.getName()).build();
}