You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by je...@apache.org on 2019/03/01 19:03:03 UTC
[incubator-pinot] 01/01: Adding orc configurability
This is an automated email from the ASF dual-hosted git repository.
jenniferdai pushed a commit to branch hadoopParamFileFormat
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit 20fd224cb6d09f9ef8942b98dae8c6057cb29b1d
Author: Jennifer Dai <jd...@linkedin.com>
AuthorDate: Fri Mar 1 11:02:47 2019 -0800
Adding orc configurability
---
.../main/java/org/apache/pinot/core/data/readers/FileFormat.java | 2 +-
.../main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java | 1 +
.../org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java | 7 +++++++
3 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java b/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
index 0826cd5..2d3149c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/readers/FileFormat.java
@@ -19,5 +19,5 @@
package org.apache.pinot.core.data.readers;
public enum FileFormat {
- AVRO, GZIPPED_AVRO, CSV, JSON, PINOT, THRIFT
+ AVRO, GZIPPED_AVRO, CSV, JSON, PINOT, THRIFT, ORC
}
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
index 1a50c5c..60634e0 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
@@ -49,4 +49,5 @@ public class JobConfigConstants {
public static final String PUSH_TO_PORT = "push.to.port";
public static final String DEFAULT_PERMISSIONS_MASK = "fs.permissions.umask-mode";
+ public static final String INPUT_FILE_FORMAT = "input.file.format";
}
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
index 4258e96..043bb2a 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
@@ -68,6 +68,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
// Optional
protected TableConfig _tableConfig;
+ protected String _inputFileFormat;
protected Path _readerConfigFile;
// HDFS segment tar directory
@@ -102,6 +103,8 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
_readerConfigFile = new Path(readerConfigFile);
}
+ _inputFileFormat = _jobConf.get(JobConfigConstants.INPUT_FILE_FORMAT, null);
+
// Set up segment name generator
String segmentNameGeneratorType =
_jobConf.get(JobConfigConstants.SEGMENT_NAME_GENERATOR_TYPE, JobConfigConstants.DEFAULT_SEGMENT_NAME_GENERATOR);
@@ -235,6 +238,10 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
}
protected FileFormat getFileFormat(String fileName) {
+ // ORC files do not necessarily have the .orc file extension
+ if (_inputFileFormat != null && _inputFileFormat.equalsIgnoreCase(FileFormat.ORC.toString())) {
+ return FileFormat.ORC;
+ }
if (fileName.endsWith(".avro")) {
return FileFormat.AVRO;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org