You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by je...@apache.org on 2019/03/11 21:34:17 UTC

[incubator-pinot] 01/01: Adding abilitiy to set input file path to remote path

This is an automated email from the ASF dual-hosted git repository.

jenniferdai pushed a commit to branch remotefiles
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit c0ca7de32cdb6d42e192bb9f7fe013cc73a0013c
Author: Jennifer Dai <jd...@linkedin.com>
AuthorDate: Mon Mar 11 14:34:02 2019 -0700

    Adding abilitiy to set input file path to remote path
---
 .../core/indexsegment/generator/SegmentGeneratorConfig.java      | 4 +++-
 .../maker/MetadataAndDictionaryAggregationPlanMakerTest.java     | 4 ++--
 .../index/creator/SegmentGenerationWithBytesTypeTest.java        | 2 +-
 .../pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java | 2 +-
 .../java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java | 2 +-
 .../org/apache/pinot/queries/BaseSingleValueQueriesTest.java     | 2 +-
 .../test/java/org/apache/pinot/queries/FastHllQueriesTest.java   | 2 +-
 .../org/apache/pinot/segments/v1/creator/SegmentTestUtils.java   | 6 +++---
 .../main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java  | 2 +-
 .../java/org/apache/pinot/hadoop/job/JobConfigConstants.java     | 3 +++
 .../apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java    | 9 ++++++++-
 .../test/java/org/apache/pinot/server/util/SegmentTestUtils.java | 2 +-
 .../apache/pinot/tools/admin/command/CreateSegmentCommand.java   | 2 +-
 .../tools/segment/converter/ColumnarToStarTreeConverter.java     | 2 +-
 14 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java
index bc176bd..a43691c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/generator/SegmentGeneratorConfig.java
@@ -310,10 +310,12 @@ public class SegmentGeneratorConfig {
     return _inputFilePath;
   }
 
-  public void setInputFilePath(String inputFilePath) {
+  public void setInputFilePath(String inputFilePath, boolean isLocalPath) {
     Preconditions.checkNotNull(inputFilePath);
     File inputFile = new File(inputFilePath);
+    if (isLocalPath) {
     Preconditions.checkState(inputFile.exists(), "Input path {} does not exist.", inputFilePath);
+    }
     _inputFilePath = inputFile.getAbsolutePath();
   }
 
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
index 19c207b..e1a75d5 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/MetadataAndDictionaryAggregationPlanMakerTest.java
@@ -90,7 +90,7 @@ public class MetadataAndDictionaryAggregationPlanMakerTest {
 
     // Create the segment generator config.
     SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
-    segmentGeneratorConfig.setInputFilePath(filePath);
+    segmentGeneratorConfig.setInputFilePath(filePath, true);
     segmentGeneratorConfig.setTableName("testTable");
     segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
     segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
@@ -116,7 +116,7 @@ public class MetadataAndDictionaryAggregationPlanMakerTest {
 
     // Create the segment generator config.
     segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
-    segmentGeneratorConfig.setInputFilePath(filePath);
+    segmentGeneratorConfig.setInputFilePath(filePath, true);
     segmentGeneratorConfig.setTableName("testTableStarTree");
     segmentGeneratorConfig.setSegmentName(SEGMENT_NAME_STARTREE);
     segmentGeneratorConfig.setOutDir(INDEX_DIR_STARTREE.getAbsolutePath());
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java
index 6d6d810..bc4096e 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/SegmentGenerationWithBytesTypeTest.java
@@ -320,7 +320,7 @@ public class SegmentGenerationWithBytesTypeTest {
   private IndexSegment buildSegmentFromAvro(Schema schema, String dirName, String avroName, String segmentName)
       throws Exception {
     SegmentGeneratorConfig config = new SegmentGeneratorConfig();
-    config.setInputFilePath(dirName + File.separator + avroName);
+    config.setInputFilePath(dirName + File.separator + avroName, true);
     config.setOutDir(dirName);
     config.setSegmentName(segmentName);
     config.setSchema(schema);
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java
index ca390cb..67cabe8 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/hll/SegmentWithHllIndexCreateHelper.java
@@ -124,7 +124,7 @@ public class SegmentWithHllIndexCreateHelper {
         new SegmentGeneratorConfig(SegmentTestUtils.extractSchemaFromAvroWithoutTime(inputAvro));
 
     // set other fields in segmentGenConfig
-    segmentGenConfig.setInputFilePath(inputAvro.getAbsolutePath());
+    segmentGenConfig.setInputFilePath(inputAvro.getAbsolutePath(), true);
     segmentGenConfig.setTimeColumnName(timeColumnName);
     segmentGenConfig.setSegmentTimeUnit(timeUnit);
     segmentGenConfig.setFormat(FileFormat.AVRO);
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java
index d642210..791ee01 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseMultiValueQueriesTest.java
@@ -96,7 +96,7 @@ public abstract class BaseMultiValueQueriesTest extends BaseQueriesTest {
 
     // Create the segment generator config.
     SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
-    segmentGeneratorConfig.setInputFilePath(filePath);
+    segmentGeneratorConfig.setInputFilePath(filePath, true);
     segmentGeneratorConfig.setTableName("testTable");
     segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
     segmentGeneratorConfig.setInvertedIndexCreationColumns(Arrays.asList("column3", "column7", "column8", "column9"));
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java
index b5b2176..b1465b1 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseSingleValueQueriesTest.java
@@ -98,7 +98,7 @@ public abstract class BaseSingleValueQueriesTest extends BaseQueriesTest {
 
     // Create the segment generator config.
     SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
-    segmentGeneratorConfig.setInputFilePath(filePath);
+    segmentGeneratorConfig.setInputFilePath(filePath, true);
     segmentGeneratorConfig.setTableName("testTable");
     segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
     segmentGeneratorConfig
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
index 57f0171..4ac4171 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/FastHllQueriesTest.java
@@ -232,7 +232,7 @@ public class FastHllQueriesTest extends BaseQueriesTest {
 
     // Create the segment generator config
     SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schemaBuilder.build());
-    segmentGeneratorConfig.setInputFilePath(filePath);
+    segmentGeneratorConfig.setInputFilePath(filePath, true);
     segmentGeneratorConfig.setTableName("testTable");
     segmentGeneratorConfig.setOutDir(INDEX_DIR.getAbsolutePath());
     segmentGeneratorConfig
diff --git a/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java b/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java
index 2dcaa4e..3c30b0b 100644
--- a/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java
+++ b/pinot-core/src/test/java/org/apache/pinot/segments/v1/creator/SegmentTestUtils.java
@@ -62,7 +62,7 @@ public class SegmentTestUtils {
       throws IOException {
     SegmentGeneratorConfig segmentGeneratorConfig =
         new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(avroFile));
-    segmentGeneratorConfig.setInputFilePath(avroFile.getAbsolutePath());
+    segmentGeneratorConfig.setInputFilePath(avroFile.getAbsolutePath(), true);
     segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath());
     segmentGeneratorConfig.setTableName(tableName);
     return segmentGeneratorConfig;
@@ -73,7 +73,7 @@ public class SegmentTestUtils {
       throws IOException {
     final SegmentGeneratorConfig segmentGenSpec =
         new SegmentGeneratorConfig(extractSchemaFromAvroWithoutTime(inputAvro));
-    segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath());
+    segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath(), true);
     segmentGenSpec.setTimeColumnName(timeColumn);
     segmentGenSpec.setSegmentTimeUnit(timeUnit);
     segmentGenSpec.setFormat(FileFormat.AVRO);
@@ -87,7 +87,7 @@ public class SegmentTestUtils {
   public static SegmentGeneratorConfig getSegmentGeneratorConfigWithSchema(File inputAvro, File outputDir,
       String tableName, Schema schema) {
     SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
-    segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath());
+    segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath(), true);
     segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath());
     segmentGeneratorConfig.setFormat(FileFormat.AVRO);
     segmentGeneratorConfig.setSegmentVersion(SegmentVersion.v1);
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java
index 6585295..b8200db 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/io/PinotRecordWriter.java
@@ -100,7 +100,7 @@ public class PinotRecordWriter<K, V> extends RecordWriter<K, V> {
 
   private void createSegment(String inputFile, SegmentIndexCreationDriver driver) {
     try {
-      _segmentConfig.setInputFilePath(inputFile);
+      _segmentConfig.setInputFilePath(inputFile, true);
       driver.init(_segmentConfig);
       driver.build();
     } catch (Exception e) {
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
index 35557b5..1b67306 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/JobConfigConstants.java
@@ -52,4 +52,7 @@ public class JobConfigConstants {
 
   // The path to the record reader to be configured
   public static final String RECORD_READER_PATH = "record.reader.path";
+
+  // Path to segments, local v. on hdfs
+  public static final String USE_HDFS_PATH_FOR_SEGMENT_GEN = "use.hdfs.path.for.segment.gen";
 }
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
index 21f3f16..469ac2e 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mapper/SegmentCreationMapper.java
@@ -83,6 +83,8 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
 
   protected FileSystem _fileSystem;
 
+  protected boolean _useHDFSPathForSegmentGen;
+
   @Override
   public void setup(Context context)
       throws IOException, InterruptedException {
@@ -103,6 +105,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
     }
 
     _recordReaderPath = _jobConf.get(JobConfigConstants.RECORD_READER_PATH, null);
+    _useHDFSPathForSegmentGen = _jobConf.getBoolean(JobConfigConstants.USE_HDFS_PATH_FOR_SEGMENT_GEN, false);
 
     // Set up segment name generator
     String segmentNameGeneratorType =
@@ -203,7 +206,11 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
 
     SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(_tableConfig, _schema);
     segmentGeneratorConfig.setTableName(_rawTableName);
-    segmentGeneratorConfig.setInputFilePath(localInputFile.getPath());
+    if (_useHDFSPathForSegmentGen) {
+      segmentGeneratorConfig.setInputFilePath(hdfsInputFile.toString(), false);
+    } else {
+      segmentGeneratorConfig.setInputFilePath(localInputFile.getPath(), true);
+    }
     segmentGeneratorConfig.setOutDir(_localSegmentDir.getPath());
     segmentGeneratorConfig.setSegmentNameGenerator(_segmentNameGenerator);
     segmentGeneratorConfig.setSequenceId(sequenceId);
diff --git a/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java b/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java
index be30064..27f613d 100644
--- a/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java
+++ b/pinot-server/src/test/java/org/apache/pinot/server/util/SegmentTestUtils.java
@@ -45,7 +45,7 @@ public class SegmentTestUtils {
       segmentGeneratorConfig = new SegmentGeneratorConfig(pinotSchema);
     }
 
-    segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath());
+    segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath(), true);
     segmentGeneratorConfig.setSegmentTimeUnit(timeUnit);
     if (inputAvro.getName().endsWith("gz")) {
       segmentGeneratorConfig.setFormat(FileFormat.GZIPPED_AVRO);
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
index 95c1371..058e28e 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/CreateSegmentCommand.java
@@ -370,7 +370,7 @@ public class CreateSegmentCommand extends AbstractBaseAdminCommand implements Co
         public void run() {
           try {
             SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
-            config.setInputFilePath(file.getAbsolutePath());
+            config.setInputFilePath(file.getAbsolutePath(), true);
             config.setSegmentName(_segmentName + "_" + segCnt);
             config.loadConfigFiles();
 
diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java
index 34094b9..42d2db7 100644
--- a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java
+++ b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/ColumnarToStarTreeConverter.java
@@ -111,7 +111,7 @@ public class ColumnarToStarTreeConverter {
     SegmentMetadata segmentMetadata = new SegmentMetadataImpl(columnarSegment);
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentMetadata.getSchema());
     config.setDataDir(_inputDirName);
-    config.setInputFilePath(columnarSegment.getAbsolutePath());
+    config.setInputFilePath(columnarSegment.getAbsolutePath(), true);
     config.setFormat(FileFormat.PINOT);
     config.setOutDir(_outputDirName);
     config.setOverwrite(_overwrite);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org