You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/10/14 00:11:23 UTC
svn commit: r1631578 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/
test/queries/clientpositive/ test/results/clientpositive/
test/results/clientpositive/tez/
Author: sershe
Date: Mon Oct 13 22:11:23 2014
New Revision: 1631578
URL: http://svn.apache.org/r1631578
Log:
HIVE-8225 : CBO trunk merge: union11 test fails due to incorrect plan (Pengcheng Xiong, reviewed by Sergey Shelukhin, Laljo John Pullokkaran)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q
hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java Mon Oct 13 22:11:23 2014
@@ -28,7 +28,10 @@ import org.apache.hadoop.hive.ql.optimiz
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.eigenbase.rel.AggregateCall;
import org.eigenbase.rel.AggregateRelBase;
+import org.eigenbase.rel.Aggregation;
import org.eigenbase.rel.EmptyRel;
import org.eigenbase.rel.FilterRelBase;
import org.eigenbase.rel.JoinRelBase;
@@ -43,9 +46,12 @@ import org.eigenbase.rel.rules.MultiJoin
import org.eigenbase.relopt.hep.HepRelVertex;
import org.eigenbase.relopt.volcano.RelSubset;
import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeFactory;
import org.eigenbase.rex.RexNode;
+import org.eigenbase.sql.SqlKind;
import org.eigenbase.util.Pair;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
public class PlanModifierForASTConv {
@@ -108,8 +114,14 @@ public class PlanModifierForASTConv {
introduceDerivedTable(((HiveSortRel) rel).getChild(), rel);
}
} else if (rel instanceof HiveAggregateRel) {
+ RelNode newParent = parent;
if (!validGBParent(rel, parent)) {
- introduceDerivedTable(rel, parent);
+ newParent = introduceDerivedTable(rel, parent);
+ }
+ // check if groupby is empty and there is no other cols in aggr
+ // this should only happen when newParent is constant.
+ if (isEmptyGrpAggr(rel)) {
+ replaceEmptyGroupAggr(rel, newParent);
}
}
}
@@ -206,7 +218,7 @@ public class PlanModifierForASTConv {
return select;
}
- private static void introduceDerivedTable(final RelNode rel, RelNode parent) {
+ private static RelNode introduceDerivedTable(final RelNode rel, RelNode parent) {
int i = 0;
int pos = -1;
List<RelNode> childList = parent.getInputs();
@@ -226,6 +238,8 @@ public class PlanModifierForASTConv {
RelNode select = introduceDerivedTable(rel);
parent.replaceInput(pos, select);
+
+ return select;
}
private static boolean validJoinParent(RelNode joinNode, RelNode parent) {
@@ -310,4 +324,40 @@ public class PlanModifierForASTConv {
return validChild;
}
+
+ private static boolean isEmptyGrpAggr(RelNode gbNode) {
+ // Verify if both groupset and aggrfunction are empty)
+ AggregateRelBase aggrnode = (AggregateRelBase) gbNode;
+ if (aggrnode.getGroupSet().isEmpty() && aggrnode.getAggCallList().isEmpty()) {
+ return true;
+ }
+ return false;
+ }
+
+ private static void replaceEmptyGroupAggr(final RelNode rel, RelNode parent) {
+ // If this function is called, the parent should only include constant
+ List<RexNode> exps = parent.getChildExps();
+ for (RexNode rexNode : exps) {
+ if (rexNode.getKind() != SqlKind.LITERAL) {
+ throw new RuntimeException("We expect " + parent.toString()
+ + " to contain only constants. However, " + rexNode.toString() + " is "
+ + rexNode.getKind());
+ }
+ }
+ HiveAggregateRel oldAggRel = (HiveAggregateRel) rel;
+ RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
+ RelDataType longType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory);
+ RelDataType intType = TypeConverter.convert(TypeInfoFactory.intTypeInfo, typeFactory);
+ // Create the dummy aggregation.
+ Aggregation countFn = (Aggregation) SqlFunctionConverter.getOptiqAggFn("count",
+ ImmutableList.of(intType), longType);
+ // TODO: Using 0 might be wrong; might need to walk down to find the
+ // proper index of a dummy.
+ List<Integer> argList = ImmutableList.of(0);
+ AggregateCall dummyCall = new AggregateCall(countFn, false, argList, longType, null);
+ AggregateRelBase newAggRel = oldAggRel.copy(oldAggRel.getTraitSet(), oldAggRel.getChild(),
+ oldAggRel.getGroupSet(), ImmutableList.of(dummyCall));
+ RelNode select = introduceDerivedTable(newAggRel);
+ parent.replaceInput(0, select);
+ }
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/cbo_correctness.q Mon Oct 13 22:11:23 2014
@@ -456,7 +456,29 @@ from (select b.key, count(*)
) a
;
--- 17. get stats with empty partition list
+-- 20. Test get stats with empty partition list
select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true;
+-- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc;
+
+select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc;
+
+select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key;
+
+select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key;
+
+select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key;
Modified: hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/cbo_correctness.q.out Mon Oct 13 22:11:23 2014
@@ -18946,17 +18946,94 @@ POSTHOOK: Input: default@src_cbo
96 1
97 2
98 2
-PREHOOK: query: -- 17. get stats with empty partition list
+PREHOOK: query: -- 20. Test get stats with empty partition list
select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Input: default@t2@dt=2014
#### A masked pattern was here ####
-POSTHOOK: query: -- 17. get stats with empty partition list
+POSTHOOK: query: -- 20. Test get stats with empty partition list
select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Input: default@t2@dt=2014
#### A masked pattern was here ####
+PREHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1 500
+PREHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg
+max
+min
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg 1.5
+max 3.0
+min 1.0
+PREHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg 1
+max 1
+min 1
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out?rev=1631578&r1=1631577&r2=1631578&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out Mon Oct 13 22:11:23 2014
@@ -18946,17 +18946,94 @@ POSTHOOK: Input: default@src_cbo
96 1
97 2
98 2
-PREHOOK: query: -- 17. get stats with empty partition list
+PREHOOK: query: -- 20. Test get stats with empty partition list
select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Input: default@t2@dt=2014
#### A masked pattern was here ####
-POSTHOOK: query: -- 17. get stats with empty partition list
+POSTHOOK: query: -- 20. Test get stats with empty partition list
select t1.value from t1 join t2 on t1.key = t2.key where t1.dt = '10' and t1.c_boolean = true
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Input: default@t2@dt=2014
#### A masked pattern was here ####
+PREHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- 21. Test groupby is empty and there is no other cols in aggr
+select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src) unionsrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1 500
+PREHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg
+max
+min
+PREHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg 1.5
+max 3.0
+min 1.0
+PREHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from t3 s1
+ UNION ALL
+ select 'min' as key, min(c_int) as value from t3 s2
+ UNION ALL
+ select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+avg 1
+max 1
+min 1