You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/04 04:55:09 UTC
[20/28] hive git commit: HIVE-11689 : minor flow changes to ORC split
generation (Sergey Shelukhin,
reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM
HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3ff3c6f1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3ff3c6f1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3ff3c6f1
Branch: refs/heads/llap
Commit: 3ff3c6f19ab82390f44c88cde5afea32a0299986
Parents: dbdd611
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 2 11:01:35 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 2 11:01:35 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3ff3c6f1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 05efc5f..cf8694e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -28,6 +28,7 @@ import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -373,7 +374,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
private final Configuration conf;
private static Cache<Path, FileInfo> footerCache;
private static ExecutorService threadPool = null;
- private static ExecutorCompletionService<AcidDirInfo> ecs = null;
private final int numBuckets;
private final long maxSize;
private final long minSize;
@@ -419,7 +419,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
threadPool = Executors.newFixedThreadPool(numThreads,
new ThreadFactoryBuilder().setDaemon(true)
.setNameFormat("ORC_GET_SPLITS #%d").build());
- ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
}
if (footerCache == null && cacheStripeDetails) {
@@ -440,7 +439,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
/**
* The full ACID directory information needed for splits; no more calls to HDFS needed.
* We could just live with AcidUtils.Directory but...
- * 1) That doesn't contain have base files.
+ * 1) That doesn't have base files for the base-directory case.
* 2) We save fs for convenience to avoid getting it twice.
*/
@VisibleForTesting
@@ -1031,17 +1030,18 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
// multi-threaded file statuses and split strategy
boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
Path[] paths = getInputPaths(conf);
+ CompletionService<AcidDirInfo> ecs = new ExecutorCompletionService<>(Context.threadPool);
for (Path dir : paths) {
FileSystem fs = dir.getFileSystem(conf);
FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
- pathFutures.add(Context.ecs.submit(fileGenerator));
+ pathFutures.add(ecs.submit(fileGenerator));
}
// complete path futures and schedule split generation
try {
for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
- AcidDirInfo adi = Context.ecs.take().get();
- SplitStrategy splitStrategy = determineSplitStrategy(
+ AcidDirInfo adi = ecs.take().get();
+ SplitStrategy<?> splitStrategy = determineSplitStrategy(
context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
if (isDebugEnabled) {
@@ -1049,12 +1049,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
if (splitStrategy instanceof ETLSplitStrategy) {
- List<SplitInfo> splitInfos = splitStrategy.getSplits();
+ List<SplitInfo> splitInfos = ((ETLSplitStrategy)splitStrategy).getSplits();
for (SplitInfo splitInfo : splitInfos) {
splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
}
} else {
- splits.addAll(splitStrategy.getSplits());
+ @SuppressWarnings("unchecked")
+ List<OrcSplit> readySplits = (List<OrcSplit>)splitStrategy.getSplits();
+ splits.addAll(readySplits);
}
}