You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2019/10/01 18:36:02 UTC

[incubator-pinot] branch bugfixing_hadoop_filesystem updated (1381b64 -> 6c3e4cf)

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a change to branch bugfixing_hadoop_filesystem
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git.


 discard 1381b64  more logging
     new 6c3e4cf  more logging

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (1381b64)
            \
             N -- N -- N   refs/heads/bugfixing_hadoop_filesystem (6c3e4cf)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 01/01: more logging

Posted by xi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch bugfixing_hadoop_filesystem
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 6c3e4cfeab52c5d8ddb4b8e763def76dbc06d3dc
Author: Xiang Fu <xi...@alt-chain.io>
AuthorDate: Mon Sep 30 10:37:50 2019 -0700

    more logging
---
 .../src/main/java/org/apache/pinot/hadoop/job/BaseSegmentJob.java | 8 +++++++-
 .../main/java/org/apache/pinot/hadoop/job/SegmentCreationJob.java | 6 +++---
 .../apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java    | 2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/BaseSegmentJob.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/BaseSegmentJob.java
index 7828fdf..fb39ac1 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/BaseSegmentJob.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/BaseSegmentJob.java
@@ -62,7 +62,13 @@ public abstract class BaseSegmentJob extends Configured {
       throws IOException {
     List<Path> tarFilePaths = new ArrayList<>();
     FileSystem fileSystem = FileSystem.get(pathPattern.toUri(), _conf);
-    getDataFilePathsHelper(fileSystem, fileSystem.globStatus(pathPattern), tarFilePaths);
+    _logger.info("Using filesystem: {}", fileSystem);
+    final FileStatus[] fileStatuses = fileSystem.globStatus(pathPattern);
+    if (fileStatuses == null) {
+      _logger.warn("Unable to match file status from file path pattern: {}", pathPattern);
+    } else {
+      getDataFilePathsHelper(fileSystem, fileStatuses, tarFilePaths);
+    }
     return tarFilePaths;
   }
 
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/SegmentCreationJob.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/SegmentCreationJob.java
index aa68f10..9807757 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/SegmentCreationJob.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/SegmentCreationJob.java
@@ -109,7 +109,7 @@ public class SegmentCreationJob extends BaseSegmentJob {
       return true;
     }
     return fileName.endsWith(".avro") || fileName.endsWith(".csv") || fileName.endsWith(".json") || fileName
-        .endsWith(".thrift");
+        .endsWith(".thrift") || fileName.endsWith(".parquet");
   }
 
   public void run()
@@ -117,7 +117,7 @@ public class SegmentCreationJob extends BaseSegmentJob {
     _logger.info("Starting {}", getClass().getSimpleName());
 
     // Initialize all directories
-    _fileSystem = FileSystem.get(_conf);
+    _fileSystem = FileSystem.get(_outputDir.toUri(), _conf);
     JobPreparationHelper.mkdirs(_fileSystem, _outputDir, _defaultPermissionsMask);
     JobPreparationHelper.mkdirs(_fileSystem, _stagingDir, _defaultPermissionsMask);
     Path stagingInputDir = new Path(_stagingDir, "input");
@@ -208,7 +208,7 @@ public class SegmentCreationJob extends BaseSegmentJob {
       if (controllerRestApi != null) {
         return controllerRestApi.getSchema();
       } else {
-        try (InputStream inputStream = _fileSystem.open(_schemaFile)) {
+        try (InputStream inputStream = FileSystem.get(_schemaFile.toUri(), getConf()).open(_schemaFile)) {
           return Schema.fromInputSteam(inputStream);
         }
       }
diff --git a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java
index 013104a..823c370 100644
--- a/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java
+++ b/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java
@@ -148,7 +148,6 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
     Preconditions.checkState(_localSegmentDir.mkdir());
     Preconditions.checkState(_localSegmentTarDir.mkdir());
 
-    _fileSystem = FileSystem.get(context.getConfiguration());
 
     _logger.info("*********************************************************************");
     _logger.info("Raw Table Name: {}", _rawTableName);
@@ -194,6 +193,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
 
     Path hdfsInputFile = new Path(splits[0]);
     int sequenceId = Integer.parseInt(splits[1]);
+    _fileSystem = FileSystem.get(hdfsInputFile.toUri(), context.getConfiguration());
     _logger.info("Generating segment with HDFS input file: {}, sequence id: {}", hdfsInputFile, sequenceId);
 
     String inputFileName = hdfsInputFile.getName();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org