You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/11/05 09:36:21 UTC

hive git commit: HIVE-12223: Filter on Grouping__ID does not work properly (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 0e94a1d9d -> 175087baf


HIVE-12223: Filter on Grouping__ID does not work properly (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/175087ba
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/175087ba
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/175087ba

Branch: refs/heads/master
Commit: 175087bafed97c07fffacdfae651faef327013ae
Parents: 0e94a1d
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Oct 22 09:06:30 2015 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Nov 5 09:35:00 2015 +0100

----------------------------------------------------------------------
 .../hive/ql/ppd/ExprWalkerProcFactory.java      | 12 ++++
 .../clientpositive/groupby_grouping_id3.q       | 22 +++++++
 .../clientpositive/groupby_grouping_id3.q.out   | 60 ++++++++++++++++++++
 3 files changed, 94 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/175087ba/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index 9bd1847..a09dcef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -28,6 +28,7 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -84,6 +85,17 @@ public final class ExprWalkerProcFactory {
         // replace the output expression with the input expression so that
         // parent op can understand this expression
         ExprNodeDesc exp = op.getColumnExprMap().get(colref.getColumn());
+        // if the operator is a groupby and we are referencing the grouping
+        // id column, we cannot push the predicate
+        if (op instanceof GroupByOperator) {
+          GroupByOperator groupBy = (GroupByOperator) op;
+          if (groupBy.getConf().isGroupingSetsPresent()) {
+            int groupingSetPlaceholderPos = groupBy.getConf().getKeys().size() - 1;
+            if (colref.getColumn().equals(groupBy.getSchema().getColumnNames().get(groupingSetPlaceholderPos))) {
+              exp = null;
+            }
+          }
+        }
         if (exp == null) {
           // means that expression can't be pushed either because it is value in
           // group by

http://git-wip-us.apache.org/repos/asf/hive/blob/175087ba/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
new file mode 100644
index 0000000..c6746a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
@@ -0,0 +1,22 @@
+CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
+
+set hive.cbo.enable = false;
+
+-- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1;
+
+set hive.cbo.enable = true;
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/175087ba/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out b/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out
new file mode 100644
index 0000000..c305bfd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby_grouping_id3.q.out
@@ -0,0 +1,60 @@
+PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1	NULL	1	2
+2	NULL	1	1
+3	NULL	1	2
+4	NULL	1	1
+PREHOOK: query: SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*)
+FROM T1
+GROUP BY key, value
+GROUPING SETS ((), (key))
+HAVING GROUPING__ID = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1	NULL	1	2
+2	NULL	1	1
+3	NULL	1	2
+4	NULL	1	1