You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by je...@apache.org on 2019/03/01 19:03:02 UTC

[incubator-pinot] branch hadoopParamFileFormat created (now 20fd224)

This is an automated email from the ASF dual-hosted git repository.

jenniferdai pushed a change to branch hadoopParamFileFormat
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git.


      at 20fd224  Adding orc configurability

This branch includes the following new commits:

     new 20fd224  Adding orc configurability

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 01/01: Adding orc configurability

Posted by je...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jenniferdai pushed a commit to branch hadoopParamFileFormat
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 20fd224cb6d09f9ef8942b98dae8c6057cb29b1d
Author: Jennifer Dai <jd...@linkedin.com>
AuthorDate: Fri Mar 1 11:02:47 2019 -0800

    Adding orc configurability
---
 .../main/java/org/apache/pinot/core/data/readers/FileFormat.java   | 2 +-
 .../main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java  | 1 +
 .../org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java  | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java b/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
index 0826cd5..2d3149c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
@@ -19,5 +19,5 @@
 package org.apache.pinot.core.data.readers;
 
 public enum FileFormat {
-  AVRO, GZIPPED_AVRO, CSV, JSON, PINOT, THRIFT
+  AVRO, GZIPPED_AVRO, CSV, JSON, PINOT, THRIFT, ORC
 }
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
index 1a50c5c..60634e0 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
@@ -49,4 +49,5 @@ public class JobConfigConstants {
   public static final String PUSH_TO_PORT = "push.to.port";
 
   public static final String DEFAULT_PERMISSIONS_MASK = "fs.permissions.umask-mode";
+  public static final String INPUT_FILE_FORMAT = "input.file.format";
 }
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
index 4258e96..043bb2a 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
@@ -68,6 +68,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
 
   // Optional
   protected TableConfig _tableConfig;
+  protected String _inputFileFormat;
   protected Path _readerConfigFile;
 
   // HDFS segment tar directory
@@ -102,6 +103,8 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
       _readerConfigFile = new Path(readerConfigFile);
     }
 
+    _inputFileFormat = _jobConf.get(JobConfigConstants.INPUT_FILE_FORMAT, null);
+
     // Set up segment name generator
     String segmentNameGeneratorType =
         _jobConf.get(JobConfigConstants.SEGMENT_NAME_GENERATOR_TYPE, JobConfigConstants.DEFAULT_SEGMENT_NAME_GENERATOR);
@@ -235,6 +238,10 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
   }
 
   protected FileFormat getFileFormat(String fileName) {
+    // ORC files do not necessarily have the .orc file extension
+    if (_inputFileFormat != null && _inputFileFormat.equalsIgnoreCase(FileFormat.ORC.toString())) {
+      return FileFormat.ORC;
+    }
     if (fileName.endsWith(".avro")) {
       return FileFormat.AVRO;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org