You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/02/28 03:37:15 UTC

[2/2] hive git commit: HIVE-14990 : run all tests for MM tables and fix the issues that are found - disable sort sampling (Sergey Shelukhin)

HIVE-14990 : run all tests for MM tables and fix the issues that are found - disable sort sampling (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6cba3e7e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6cba3e7e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6cba3e7e

Branch: refs/heads/hive-14535
Commit: 6cba3e7e464ca35751b7f7230f8b4cd23a8c89d0
Parents: bd96c49
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Feb 27 19:33:39 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Feb 27 19:33:39 2017 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/io/HiveInputFormat.java |  3 +++
 .../hive/ql/optimizer/physical/SamplingOptimizer.java | 14 ++++++++++++++
 2 files changed, 17 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6cba3e7e/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 5ea3cec..921aa53 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -669,6 +669,9 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
       throws IOException {
     PartitionDesc partDesc = pathToPartitionInfo.get(dir);
     if (partDesc == null) {
+      // TODO: HiveFileFormatUtils.getPartitionDescFromPathRecursively for MM tables?
+      //       So far, the only case when this is called for a MM directory was in error.
+      //       Keep it like this for now; may need replacement if we find a valid usage like this.
       partDesc = pathToPartitionInfo.get(Path.getPathWithoutSchemeAndAuthority(dir));
     }
     if (partDesc == null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6cba3e7e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java
index 2def168..5b89059 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java
@@ -20,6 +20,7 @@
 
 package org.apache.hadoop.hive.ql.optimizer.physical;
 
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
@@ -27,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
@@ -59,6 +61,18 @@ public class SamplingOptimizer implements PhysicalPlanResolver {
       if (!(operator instanceof TableScanOperator)) {
         continue;
       }
+      TableScanOperator tsop = (TableScanOperator)operator;
+      Table tbl = tsop.getConf().getTableMetadata();
+      if (tbl == null) {
+        continue;
+      }
+      if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) {
+        // Not supported for MM tables - sampler breaks separate MM dirs into splits, resulting in
+        // mismatch when the downstream task looks at them again assuming they are MM table roots.
+        // We could somehow unset the MM flag for the main job when the sampler succeeds, since the
+        // sampler will limit the input to the the correct directories, but we don't care about MR.
+        continue;
+      }
       ReduceSinkOperator child =
           OperatorUtils.findSingleOperator(operator, ReduceSinkOperator.class);
       if (child == null ||