You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by je...@apache.org on 2019/03/11 21:34:17 UTC
[incubator-pinot] 01/01: Adding abilitiy to set input file path to
remote path
This is an automated email from the ASF dual-hosted git repository.
jenniferdai pushed a commit to branch remotefiles
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit c0ca7de32cdb6d42e192bb9f7fe013cc73a0013c
Author: Jennifer Dai <jd...@linkedin.com>
AuthorDate: Mon Mar 11 14:34:02 2019 -0700
Adding abilitiy to set input file path to remote path
---
.../core/indexsegment/generator/SegmentGeneratorConfig.java | 4 +++-
.../maker/MetadataAndDictionaryAggregationPlanMakerTest.java | 4 ++--
.../index/creator/SegmentGenerationWithBytesTypeTest.java | 2 +-
.../pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java | 2 +-
.../java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java | 2 +-
.../org/apache/pinot/queries/BaseSingleValueQueriesTest.java | 2 +-
.../test/java/org/apache/pinot/queries/FastHllQueriesTest.java | 2 +-
.../org/apache/pinot/segments/v1/creator/SegmentTestUtils.java | 6 +++---
.../main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java | 2 +-
.../java/org/apache/pinot/hadoop/job/JobConfigConstants.java | 3 +++
.../apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java | 9 ++++++++-
.../test/java/org/apache/pinot/server/util/SegmentTestUtils.java | 2 +-
.../apache/pinot/tools/admin/command/CreateSegmentCommand.java | 2 +-
.../tools/segment/converter/ColumnarToStarTreeConverter.java | 2 +-
14 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java
index bc176bd..a43691c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java
@@ -310,10 +310,12 @@ public class SegmentGeneratorConfig {
return _inputFilePath;
}
- public void setInputFilePath(String inputFilePath) {
+ public void setInputFilePath(String inputFilePath, boolean isLocalPath) {
Preconditions.checkNotNull(inputFilePath);
File inputFile = new File(inputFilePath);
+ if (isLocalPath) {
Preconditions.checkState(inputFile.exists(), "Input path {} does not exist.", inputFilePath);
+ }
_inputFilePath = inputFile.getAbsolutePath();
}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
index 19c207b..e1a75d5 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
@@ -90,7 +90,7 @@ public class MetadataAndDictionaryAggregationPlanMakerTest {
// Create the segment generator config.
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
- segmentGeneratorConfig.setInputFilePath(filePath);
+ segmentGeneratorConfig.setInputFilePath(filePath, true);
segmentGeneratorConfig.setTableName("testTable");
segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
@@ -116,7 +116,7 @@ public class MetadataAndDictionaryAggregationPlanMakerTest {
// Create the segment generator config.
segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
- segmentGeneratorConfig.setInputFilePath(filePath);
+ segmentGeneratorConfig.setInputFilePath(filePath, true);
segmentGeneratorConfig.setTableName("testTableStarTree");
segmentGeneratorConfig.setSegmentName(SEGMENT_NAME_STARTREE);
segmentGeneratorConfig.setOutDir(INDEX_DIR_STARTREE.getAbsolutePath());
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java
index 6d6d810..bc4096e 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java
@@ -320,7 +320,7 @@ public class SegmentGenerationWithBytesTypeTest {
private IndexSegment buildSegmentFromAvro(Schema schema, String dirName, String avroName, String segmentName)
throws Exception {
SegmentGeneratorConfig config = new SegmentGeneratorConfig();
- config.setInputFilePath(dirName + File.separator + avroName);
+ config.setInputFilePath(dirName + File.separator + avroName, true);
config.setOutDir(dirName);
config.setSegmentName(segmentName);
config.setSchema(schema);
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java
index ca390cb..67cabe8 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java
@@ -124,7 +124,7 @@ public class SegmentWithHllIndexCreateHelper {
new SegmentGeneratorConfig(SegmentTestUtils.extractSchemaFromAvroWithoutTime(inputAvro));
// set other fields in segmentGenConfig
- segmentGenConfig.setInputFilePath(inputAvro.getAbsolutePath());
+ segmentGenConfig.setInputFilePath(inputAvro.getAbsolutePath(), true);
segmentGenConfig.setTimeColumnName(timeColumnName);
segmentGenConfig.setSegmentTimeUnit(timeUnit);
segmentGenConfig.setFormat(FileFormat.AVRO);
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java
index d642210..791ee01 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java
@@ -96,7 +96,7 @@ public abstract class BaseMultiValueQueriesTest extends BaseQueriesTest {
// Create the segment generator config.
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
- segmentGeneratorConfig.setInputFilePath(filePath);
+ segmentGeneratorConfig.setInputFilePath(filePath, true);
segmentGeneratorConfig.setTableName("testTable");
segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
segmentGeneratorConfig.setInvertedIndexCreationColumns(Arrays.asList("column3", "column7", "column8", "column9"));
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java
index b5b2176..b1465b1 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java
@@ -98,7 +98,7 @@ public abstract class BaseSingleValueQueriesTest extends BaseQueriesTest {
// Create the segment generator config.
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
- segmentGeneratorConfig.setInputFilePath(filePath);
+ segmentGeneratorConfig.setInputFilePath(filePath, true);
segmentGeneratorConfig.setTableName("testTable");
segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
segmentGeneratorConfig
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
index 57f0171..4ac4171 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
@@ -232,7 +232,7 @@ public class FastHllQueriesTest extends BaseQueriesTest {
// Create the segment generator config
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schemaBuilder.build());
- segmentGeneratorConfig.setInputFilePath(filePath);
+ segmentGeneratorConfig.setInputFilePath(filePath, true);
segmentGeneratorConfig.setTableName("testTable");
segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
segmentGeneratorConfig
diff --git a/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java b/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java
index 2dcaa4e..3c30b0b 100644
--- a/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java
+++ b/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java
@@ -62,7 +62,7 @@ public class SegmentTestUtils {
throws IOException {
SegmentGeneratorConfig segmentGeneratorConfig =
new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(avroFile));
- segmentGeneratorConfig.setInputFilePath(avroFile.getAbsolutePath());
+ segmentGeneratorConfig.setInputFilePath(avroFile.getAbsolutePath(), true);
segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath());
segmentGeneratorConfig.setTableName(tableName);
return segmentGeneratorConfig;
@@ -73,7 +73,7 @@ public class SegmentTestUtils {
throws IOException {
final SegmentGeneratorConfig segmentGenSpec =
new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(inputAvro));
- segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath());
+ segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath(), true);
segmentGenSpec.setTimeColumnName(timeColumn);
segmentGenSpec.setSegmentTimeUnit(timeUnit);
segmentGenSpec.setFormat(FileFormat.AVRO);
@@ -87,7 +87,7 @@ public class SegmentTestUtils {
public static SegmentGeneratorConfig getSegmentGeneratorConfigWithSchema(File inputAvro, File outputDir,
String tableName, Schema schema) {
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
- segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath());
+ segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath(), true);
segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath());
segmentGeneratorConfig.setFormat(FileFormat.AVRO);
segmentGeneratorConfig.setSegmentVersion(SegmentVersion.v1);
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java
index 6585295..b8200db 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java
@@ -100,7 +100,7 @@ public class PinotRecordWriter<K, V> extends RecordWriter<K, V> {
private void createSegment(String inputFile, SegmentIndexCreationDriver driver) {
try {
- _segmentConfig.setInputFilePath(inputFile);
+ _segmentConfig.setInputFilePath(inputFile, true);
driver.init(_segmentConfig);
driver.build();
} catch (Exception e) {
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
index 35557b5..1b67306 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
@@ -52,4 +52,7 @@ public class JobConfigConstants {
// The path to the record reader to be configured
public static final String RECORD_READER_PATH = "record.reader.path";
+
+ // Path to segments, local v. on hdfs
+ public static final String USE_HDFS_PATH_FOR_SEGMENT_GEN = "use.hdfs.path.for.segment.gen";
}
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
index 21f3f16..469ac2e 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
@@ -83,6 +83,8 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
protected FileSystem _fileSystem;
+ protected boolean _useHDFSPathForSegmentGen;
+
@Override
public void setup(Context context)
throws IOException, InterruptedException {
@@ -103,6 +105,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
}
_recordReaderPath = _jobConf.get(JobConfigConstants.RECORD_READER_PATH, null);
+ _useHDFSPathForSegmentGen = _jobConf.getBoolean(JobConfigConstants.USE_HDFS_PATH_FOR_SEGMENT_GEN, false);
// Set up segment name generator
String segmentNameGeneratorType =
@@ -203,7 +206,11 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(_tableConfig, _schema);
segmentGeneratorConfig.setTableName(_rawTableName);
- segmentGeneratorConfig.setInputFilePath(localInputFile.getPath());
+ if (_useHDFSPathForSegmentGen) {
+ segmentGeneratorConfig.setInputFilePath(hdfsInputFile.toString(), false);
+ } else {
+ segmentGeneratorConfig.setInputFilePath(localInputFile.getPath(), true);
+ }
segmentGeneratorConfig.setOutDir(_localSegmentDir.getPath());
segmentGeneratorConfig.setSegmentNameGenerator(_segmentNameGenerator);
segmentGeneratorConfig.setSequenceId(sequenceId);
diff --git a/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java b/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java
index be30064..27f613d 100644
--- a/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java
+++ b/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java
@@ -45,7 +45,7 @@ public class SegmentTestUtils {
segmentGeneratorConfig = new SegmentGeneratorConfig(pinotSchema);
}
- segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath());
+ segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath(), true);
segmentGeneratorConfig.setSegmentTimeUnit(timeUnit);
if (inputAvro.getName().endsWith("gz")) {
segmentGeneratorConfig.setFormat(FileFormat.GZIPPED_AVRO);
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
index 95c1371..058e28e 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
@@ -370,7 +370,7 @@ public class CreateSegmentCommand extends AbstractBaseAdminCommand implements Co
public void run() {
try {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
- config.setInputFilePath(file.getAbsolutePath());
+ config.setInputFilePath(file.getAbsolutePath(), true);
config.setSegmentName(_segmentName + "_" + segCnt);
config.loadConfigFiles();
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java
index 34094b9..42d2db7 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java
@@ -111,7 +111,7 @@ public class ColumnarToStarTreeConverter {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(columnarSegment);
SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentMetadata.getSchema());
config.setDataDir(_inputDirName);
- config.setInputFilePath(columnarSegment.getAbsolutePath());
+ config.setInputFilePath(columnarSegment.getAbsolutePath(), true);
config.setFormat(FileFormat.PINOT);
config.setOutDir(_outputDirName);
config.setOverwrite(_overwrite);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org