You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/10/15 00:45:06 UTC

svn commit: r1631921 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ ql/src/java/org/apache/hadoop/hive/ql/index/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/ ql...

Author: sershe
Date: Tue Oct 14 22:45:05 2014
New Revision: 1631921

URL: http://svn.apache.org/r1631921
Log:
HIVE-8389 : Fix CBO when indexes are used (Pengcheng Xiong, reviewed by Sergey Shelukhin)

Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    hive/trunk/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q
    hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
    hive/trunk/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Oct 14 22:45:05 2014
@@ -1317,10 +1317,11 @@ public class HiveConf extends Configurat
 
     HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true, ""),
 
-    HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringSet("minimal", "more"),
+    HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringSet("none", "minimal", "more"),
         "Some select queries can be converted to single FETCH task minimizing latency.\n" +
         "Currently the query should be single sourced not having any subquery and should not have\n" +
         "any aggregations or distincts (which incurs RS), lateral views and joins.\n" +
+        "0. none : disable hive.fetch.task.conversion\n" +
         "1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only\n" +
         "2. more    : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns)"
     ),

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java Tue Oct 14 22:45:05 2014
@@ -182,6 +182,29 @@ public class IndexPredicateAnalyzer {
     }
     ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
     ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
+    // We may need to peel off the GenericUDFBridge that is added by CBO or user
+    boolean peelOffGenericUDFBridge = false;
+    while (expr1 instanceof ExprNodeGenericFuncDesc && expr2 instanceof ExprNodeGenericFuncDesc) {
+      GenericUDF udf1 = ((ExprNodeGenericFuncDesc) expr1).getGenericUDF();
+      GenericUDF udf2 = ((ExprNodeGenericFuncDesc) expr2).getGenericUDF();
+      // We assume that GenericUDFBridge that is added by CBO or user if they
+      // have the same udf names.
+      if (udf1.getUdfName() == udf2.getUdfName()) {
+        peelOffGenericUDFBridge = true;
+        expr1 = expr1.getChildren().get(0);
+        expr2 = expr2.getChildren().get(0);
+      } else {
+        break;
+      }
+    }
+    // We also need to update the expr so that the index query can be generated.
+    // Note that, hive does not support UDFToDouble in the query text.
+    if (peelOffGenericUDFBridge) {
+      List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
+      list.add(expr1);
+      list.add(expr2);
+      expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);
+    }
     ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
     if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
       return expr;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Tue Oct 14 22:45:05 2014
@@ -143,8 +143,10 @@ public class Optimizer {
       transformations.add(new AnnotateWithOpTraits());
     }
 
-    transformations.add(new SimpleFetchOptimizer());  // must be called last
-
+    if (!HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKCONVERSION).equals("none")) {
+      transformations.add(new SimpleFetchOptimizer()); // must be called last
+    }
+    
     if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) {
       transformations.add(new SimpleFetchAggregation());
     }

Modified: hive/trunk/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q (original)
+++ hive/trunk/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q Tue Oct 14 22:45:05 2014
@@ -1,4 +1,4 @@
--- should fail: hive.fetch.task.conversion accepts minimal or more
+-- should fail: hive.fetch.task.conversion accepts none, minimal or more
 desc src;
 
 set hive.conf.validation=true;

Modified: hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_partitioned.q Tue Oct 14 22:45:05 2014
@@ -1,4 +1,5 @@
 set hive.stats.dbclass=fs;
+set hive.fetch.task.conversion=none;
 
 -- SORT_QUERY_RESULTS
 -- test automatic use of index on table with partitions

Modified: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q Tue Oct 14 22:45:05 2014
@@ -1,4 +1,5 @@
 set hive.stats.dbclass=fs;
+set hive.fetch.task.conversion=none;
 
 -- SORT_QUERY_RESULTS
 

Modified: hive/trunk/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out?rev=1631921&r1=1631920&r2=1631921&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out Tue Oct 14 22:45:05 2014
@@ -1,11 +1,11 @@
-PREHOOK: query: -- should fail: hive.fetch.task.conversion accepts minimal or more
+PREHOOK: query: -- should fail: hive.fetch.task.conversion accepts none, minimal or more
 desc src
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@src
-POSTHOOK: query: -- should fail: hive.fetch.task.conversion accepts minimal or more
+POSTHOOK: query: -- should fail: hive.fetch.task.conversion accepts none, minimal or more
 desc src
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src
 key                 	string              	default             
 value               	string              	default             
-Query returned non-zero code: 1, cause: 'SET hive.fetch.task.conversion=true' FAILED in validation : Invalid value.. expects one of [minimal, more].
+Query returned non-zero code: 1, cause: 'SET hive.fetch.task.conversion=true' FAILED in validation : Invalid value.. expects one of [none, minimal, more].