You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/05/26 01:22:48 UTC
hive git commit: HIVE-13821: OrcSplit groups all delta files together
into a single split (Prasanth Jayachandran reviewed by Eugene Koifman)
Repository: hive
Updated Branches:
refs/heads/master 51609a0f2 -> 76961d1f6
HIVE-13821: OrcSplit groups all delta files together into a single split (Prasanth Jayachandran reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76961d1f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76961d1f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76961d1f
Branch: refs/heads/master
Commit: 76961d1f67a5d5e3614d3d81c417684fab92c6c2
Parents: 51609a0
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Wed May 25 18:22:34 2016 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Wed May 25 18:22:34 2016 -0700
----------------------------------------------------------------------
.../ql/exec/tez/ColumnarSplitSizeEstimator.java | 6 +++--
.../hive/ql/io/orc/TestInputOutputFormat.java | 23 ++++++++++++++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/76961d1f/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
index dfc778a..ecd4ddc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java
@@ -42,7 +42,6 @@ public class ColumnarSplitSizeEstimator implements SplitSizeEstimator {
if (isDebugEnabled) {
LOG.debug("Estimated column projection size: " + colProjSize);
}
- return colProjSize;
} else if (inputSplit instanceof HiveInputFormat.HiveInputSplit) {
InputSplit innerSplit = ((HiveInputFormat.HiveInputSplit) inputSplit).getInputSplit();
@@ -51,9 +50,12 @@ public class ColumnarSplitSizeEstimator implements SplitSizeEstimator {
if (isDebugEnabled) {
LOG.debug("Estimated column projection size: " + colProjSize);
}
- return colProjSize;
}
}
+ if (colProjSize <= 0) {
+ /* columnar splits of unknown size - estimate worst-case */
+ return Integer.MAX_VALUE;
+ }
return colProjSize;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/76961d1f/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 4eb0249..c1ef0e7 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
+import org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -556,6 +557,28 @@ public class TestInputOutputFormat {
}
@Test
+ public void testACIDSplitStrategy() throws Exception {
+ conf.set("bucket_count", "2");
+ OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
+ MockFileSystem fs = new MockFileSystem(conf,
+ new MockFile("mock:/a/delta_000_001/part-00", 1000, new byte[1], new MockBlock("host1")),
+ new MockFile("mock:/a/delta_000_001/part-01", 1000, new byte[1], new MockBlock("host1")),
+ new MockFile("mock:/a/delta_001_002/part-02", 1000, new byte[1], new MockBlock("host1")),
+ new MockFile("mock:/a/delta_001_002/part-03", 1000, new byte[1], new MockBlock("host1")));
+ OrcInputFormat.FileGenerator gen =
+ new OrcInputFormat.FileGenerator(context, fs,
+ new MockPath(fs, "mock:/a"), false, null);
+ OrcInputFormat.SplitStrategy splitStrategy = createSplitStrategy(context, gen);
+ assertEquals(true, splitStrategy instanceof OrcInputFormat.ACIDSplitStrategy);
+ List<OrcSplit> splits = splitStrategy.getSplits();
+ ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator();
+ for (OrcSplit split: splits) {
+ assertEquals(Integer.MAX_VALUE, splitSizeEstimator.getEstimatedSize(split));
+ }
+ assertEquals(2, splits.size());
+ }
+
+ @Test
public void testBIStrategySplitBlockBoundary() throws Exception {
conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
OrcInputFormat.Context context = new OrcInputFormat.Context(conf);