You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/10/14 00:11:23 UTC

svn commit: r1631578 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ test/queries/clientpositive/ test/results/clientpositive/ test/results/clientpositive/tez/

Author: sershe
Date: Mon Oct 13 22:11:23 2014
New Revision: 1631578

URL: http://svn.apache.org/r1631578
Log:
HIVE-8225 : CBO trunk merge: union11 test fails due to incorrect plan (Pengcheng Xiong, reviewed by Sergey Shelukhin, Laljo John Pullokkaran)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
    hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q
    hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java Mon Oct 13 22:11:23 2014
@@ -28,7 +28,10 @@ import org.apache.hadoop.hive.ql.optimiz
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.eigenbase.rel.AggregateCall;
 import org.eigenbase.rel.AggregateRelBase;
+import org.eigenbase.rel.Aggregation;
 import org.eigenbase.rel.EmptyRel;
 import org.eigenbase.rel.FilterRelBase;
 import org.eigenbase.rel.JoinRelBase;
@@ -43,9 +46,12 @@ import org.eigenbase.rel.rules.MultiJoin
 import org.eigenbase.relopt.hep.HepRelVertex;
 import org.eigenbase.relopt.volcano.RelSubset;
 import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
 import org.eigenbase.rex.RexNode;
+import org.eigenbase.sql.SqlKind;
 import org.eigenbase.util.Pair;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 
 public class PlanModifierForASTConv {
@@ -108,8 +114,14 @@ public class PlanModifierForASTConv {
           introduceDerivedTable(((HiveSortRel) rel).getChild(), rel);
         }
       } else if (rel instanceof HiveAggregateRel) {
+        RelNode newParent = parent;
         if (!validGBParent(rel, parent)) {
-          introduceDerivedTable(rel, parent);
+          newParent = introduceDerivedTable(rel, parent);
+        }
+        // check if groupby is empty and there is no other cols in aggr
+        // this should only happen when newParent is constant.
+        if (isEmptyGrpAggr(rel)) {
+          replaceEmptyGroupAggr(rel, newParent);
         }
       }
     }
@@ -206,7 +218,7 @@ public class PlanModifierForASTConv {
     return select;
   }
 
-  private static void introduceDerivedTable(final RelNode rel, RelNode parent) {
+  private static RelNode introduceDerivedTable(final RelNode rel, RelNode parent) {
     int i = 0;
     int pos = -1;
     List<RelNode> childList = parent.getInputs();
@@ -226,6 +238,8 @@ public class PlanModifierForASTConv {
     RelNode select = introduceDerivedTable(rel);
 
     parent.replaceInput(pos, select);
+    
+    return select;
   }
 
   private static boolean validJoinParent(RelNode joinNode, RelNode parent) {
@@ -310,4 +324,40 @@ public class PlanModifierForASTConv {
 
     return validChild;
   }
+  
+  private static boolean isEmptyGrpAggr(RelNode gbNode) {
+    // Verify if both groupset and aggrfunction are empty)
+    AggregateRelBase aggrnode = (AggregateRelBase) gbNode;
+    if (aggrnode.getGroupSet().isEmpty() && aggrnode.getAggCallList().isEmpty()) {
+      return true;
+    }
+    return false;
+  }
+  
+  private static void replaceEmptyGroupAggr(final RelNode rel, RelNode parent) {
+    // If this function is called, the parent should only include constant
+    List<RexNode> exps = parent.getChildExps();
+    for (RexNode rexNode : exps) {
+      if (rexNode.getKind() != SqlKind.LITERAL) {
+        throw new RuntimeException("We expect " + parent.toString()
+            + " to contain only constants. However, " + rexNode.toString() + " is "
+            + rexNode.getKind());
+      }
+    }
+    HiveAggregateRel oldAggRel = (HiveAggregateRel) rel;
+    RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
+    RelDataType longType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory);
+    RelDataType intType = TypeConverter.convert(TypeInfoFactory.intTypeInfo, typeFactory);
+    // Create the dummy aggregation.
+    Aggregation countFn = (Aggregation) SqlFunctionConverter.getOptiqAggFn("count",
+        ImmutableList.of(intType), longType);
+    // TODO: Using 0 might be wrong; might need to walk down to find the
+    // proper index of a dummy.
+    List<Integer> argList = ImmutableList.of(0);
+    AggregateCall dummyCall = new AggregateCall(countFn, false, argList, longType, null);
+    AggregateRelBase newAggRel = oldAggRel.copy(oldAggRel.getTraitSet(), oldAggRel.getChild(),
+        oldAggRel.getGroupSet(), ImmutableList.of(dummyCall));
+    RelNode select = introduceDerivedTable(newAggRel);
+    parent.replaceInput(0, select);
+  }
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q Mon Oct 13 22:11:23 2014
@@ -456,7 +456,29 @@ from (select b.key, count(*) 
 ) a
 ;
 
--- 17. get stats with empty partition list
+-- 20. Test get stats with empty partition list
 select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true;
 
+-- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc;
+
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc;
+
+select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key;
+        
+select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key;
+
+select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+    UNION  ALL
+        select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key;
 

Modified: hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out Mon Oct 13 22:11:23 2014
@@ -18946,17 +18946,94 @@ POSTHOOK: Input: default@src_cbo
 96	1
 97	2
 98	2
-PREHOOK: query: -- 17. get stats with empty partition list
+PREHOOK: query: -- 20. Test get stats with empty partition list
 select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
 PREHOOK: Input: default@t2@dt=2014
 #### A masked pattern was here ####
-POSTHOOK: query: -- 17. get stats with empty partition list
+POSTHOOK: query: -- 20. Test get stats with empty partition list
 select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t2@dt=2014
 #### A masked pattern was here ####
+PREHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1	500
+PREHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg
+max
+min
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg	1.5
+max	3.0
+min	1.0
+PREHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+    UNION  ALL
+        select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+    UNION  ALL
+        select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg	1
+max	1
+min	1

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out Mon Oct 13 22:11:23 2014
@@ -18946,17 +18946,94 @@ POSTHOOK: Input: default@src_cbo
 96	1
 97	2
 98	2
-PREHOOK: query: -- 17. get stats with empty partition list
+PREHOOK: query: -- 20. Test get stats with empty partition list
 select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
 PREHOOK: Input: default@t2@dt=2014
 #### A masked pattern was here ####
-POSTHOOK: query: -- 17. get stats with empty partition list
+POSTHOOK: query: -- 20. Test get stats with empty partition list
 select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t2@dt=2014
 #### A masked pattern was here ####
+PREHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1	500
+PREHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg
+max
+min
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+	UNION  ALL
+    	select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg	1.5
+max	3.0
+min	1.0
+PREHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+    UNION  ALL
+        select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+    UNION  ALL
+        select 'min' as key,  min(c_int) as value from t3 s2
+    UNION ALL
+        select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg	1
+max	1
+min	1