You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/05/26 01:22:48 UTC

hive git commit: HIVE-13821: OrcSplit groups all delta files together into a single split (Prasanth Jayachandran reviewed by Eugene Koifman)

Repository: hive
Updated Branches:
  refs/heads/master 51609a0f2 -> 76961d1f6


HIVE-13821: OrcSplit groups all delta files together into a single split (Prasanth Jayachandran reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76961d1f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76961d1f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76961d1f

Branch: refs/heads/master
Commit: 76961d1f67a5d5e3614d3d81c417684fab92c6c2
Parents: 51609a0
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Wed May 25 18:22:34 2016 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Wed May 25 18:22:34 2016 -0700

----------------------------------------------------------------------
 .../ql/exec/tez/ColumnarSplitSizeEstimator.java |  6 +++--
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 23 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/76961d1f/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
index dfc778a..ecd4ddc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
@@ -42,7 +42,6 @@ public class ColumnarSplitSizeEstimator implements SplitSizeEstimator {
       if (isDebugEnabled) {
         LOG.debug("Estimated column projection size: " + colProjSize);
       }
-      return colProjSize;
     } else if (inputSplit instanceof HiveInputFormat.HiveInputSplit) {
       InputSplit innerSplit = ((HiveInputFormat.HiveInputSplit) inputSplit).getInputSplit();
 
@@ -51,9 +50,12 @@ public class ColumnarSplitSizeEstimator implements SplitSizeEstimator {
         if (isDebugEnabled) {
           LOG.debug("Estimated column projection size: " + colProjSize);
         }
-        return colProjSize;
       }
     }
+    if (colProjSize <= 0) {
+      /* columnar splits of unknown size - estimate worst-case */
+      return Integer.MAX_VALUE;
+    }
     return colProjSize;
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/76961d1f/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 4eb0249..c1ef0e7 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
+import org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -556,6 +557,28 @@ public class TestInputOutputFormat {
   }
 
   @Test
+  public void testACIDSplitStrategy() throws Exception {
+    conf.set("bucket_count", "2");
+    OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
+    MockFileSystem fs = new MockFileSystem(conf,
+        new MockFile("mock:/a/delta_000_001/part-00", 1000, new byte[1], new MockBlock("host1")),
+        new MockFile("mock:/a/delta_000_001/part-01", 1000, new byte[1], new MockBlock("host1")),
+        new MockFile("mock:/a/delta_001_002/part-02", 1000, new byte[1], new MockBlock("host1")),
+        new MockFile("mock:/a/delta_001_002/part-03", 1000, new byte[1], new MockBlock("host1")));
+    OrcInputFormat.FileGenerator gen =
+        new OrcInputFormat.FileGenerator(context, fs,
+            new MockPath(fs, "mock:/a"), false, null);
+    OrcInputFormat.SplitStrategy splitStrategy = createSplitStrategy(context, gen);
+    assertEquals(true, splitStrategy instanceof OrcInputFormat.ACIDSplitStrategy);
+    List<OrcSplit> splits = splitStrategy.getSplits();
+    ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator();
+    for (OrcSplit split: splits) {
+      assertEquals(Integer.MAX_VALUE, splitSizeEstimator.getEstimatedSize(split));
+    }
+    assertEquals(2, splits.size());
+  }
+
+  @Test
   public void testBIStrategySplitBlockBoundary() throws Exception {
     conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
     OrcInputFormat.Context context = new OrcInputFormat.Context(conf);