You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/05/27 23:42:14 UTC

hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master 02b2fb5a9 -> 4e3da98d7


HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4e3da98d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4e3da98d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4e3da98d

Branch: refs/heads/master
Commit: 4e3da98d7f05ae29c71bd379c3f59691588c0209
Parents: 02b2fb5
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Fri May 27 16:41:50 2016 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Fri May 27 16:41:50 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  7 ++++---
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 21 ++++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4e3da98d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 33fe3b6..087207b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -155,6 +155,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
+  private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;
 
   /**
    * When picking the hosts for a split that crosses block boundaries,
@@ -510,7 +511,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     private final int splitStrategyBatchMs;
     private final long maxSize;
     private final long minSize;
-    private final int minSplits;
+    private final int etlFileThreshold;
     private final boolean footerInSplits;
     private final boolean cacheStripeDetails;
     private final boolean forceThreadpool;
@@ -555,7 +556,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
       cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
-      this.minSplits = Math.min(cacheStripeDetailsSize, minSplits);
+      this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits;
 
       synchronized (Context.class) {
         if (threadPool == null) {
@@ -1938,7 +1939,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
               deltas, covered, isOriginal, ugi, allowSyntheticFileIds);
         default:
           // HYBRID strategy
-          if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
+          if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) {
             return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseOrOriginalFiles,
                 deltas, covered, isOriginal, ugi, allowSyntheticFileIds);
           } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/4e3da98d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index c1ef0e7..52098ae 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -525,6 +525,27 @@ public class TestInputOutputFormat {
         }
       }
     }
+
+    k = 0;
+    conf.set("hive.orc.cache.stripe.details.size", "-1");
+    for (int c : counts) {
+      for (int s : sizes) {
+        final FileSystem fs = generateMockFiles(c, s);
+        for (int n : numSplits) {
+          final OrcInputFormat.Context context = new OrcInputFormat.Context(
+              conf, n);
+          OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
+              context, fs, new MockPath(fs, "mock:/a/b"), false, null);
+          final SplitStrategy splitStrategy = createSplitStrategy(context, gen);
+          assertTrue(
+              String.format(
+                  "Split strategy for %d files x %d size for %d splits", c, s,
+                  n),
+              splitStrategy.getClass().getSimpleName()
+                  .equals(strategyResults[k++]));
+        }
+      }
+    }
   }
 
   @Test