You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by je...@apache.org on 2019/03/01 19:03:03 UTC

[incubator-pinot] 01/01: Adding orc configurability

This is an automated email from the ASF dual-hosted git repository.

jenniferdai pushed a commit to branch hadoopParamFileFormat
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 20fd224cb6d09f9ef8942b98dae8c6057cb29b1d
Author: Jennifer Dai <jd...@linkedin.com>
AuthorDate: Fri Mar 1 11:02:47 2019 -0800

    Adding orc configurability
---
 .../main/java/org/apache/pinot/core/data/readers/FileFormat.java   | 2 +-
 .../main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java  | 1 +
 .../org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java  | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java b/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
index 0826cd5..2d3149c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
@@ -19,5 +19,5 @@
 package org.apache.pinot.core.data.readers;
 
 public enum FileFormat {
-  AVRO, GZIPPED_AVRO, CSV, JSON, PINOT, THRIFT
+  AVRO, GZIPPED_AVRO, CSV, JSON, PINOT, THRIFT, ORC
 }
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
index 1a50c5c..60634e0 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
@@ -49,4 +49,5 @@ public class JobConfigConstants {
   public static final String PUSH_TO_PORT = "push.to.port";
 
   public static final String DEFAULT_PERMISSIONS_MASK = "fs.permissions.umask-mode";
+  public static final String INPUT_FILE_FORMAT = "input.file.format";
 }
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
index 4258e96..043bb2a 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
@@ -68,6 +68,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
 
   // Optional
   protected TableConfig _tableConfig;
+  protected String _inputFileFormat;
   protected Path _readerConfigFile;
 
   // HDFS segment tar directory
@@ -102,6 +103,8 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
       _readerConfigFile = new Path(readerConfigFile);
     }
 
+    _inputFileFormat = _jobConf.get(JobConfigConstants.INPUT_FILE_FORMAT, null);
+
     // Set up segment name generator
     String segmentNameGeneratorType =
         _jobConf.get(JobConfigConstants.SEGMENT_NAME_GENERATOR_TYPE, JobConfigConstants.DEFAULT_SEGMENT_NAME_GENERATOR);
@@ -235,6 +238,10 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
   }
 
   protected FileFormat getFileFormat(String fileName) {
+    // ORC files do not necessarily have the .orc file extension
+    if (_inputFileFormat != null && _inputFileFormat.equalsIgnoreCase(FileFormat.ORC.toString())) {
+      return FileFormat.ORC;
+    }
     if (fileName.endsWith(".avro")) {
       return FileFormat.AVRO;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org