You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/11/09 08:35:02 UTC

svn commit: r1407371 - in /hive/trunk: data/files/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientnegative/ ...

Author: namit
Date: Fri Nov  9 07:35:01 2012
New Revision: 1407371

URL: http://svn.apache.org/viewvc?rev=1407371&view=rev
Log:
HIVE-3613 Implement grouping_id function
(Ian Gorbachev via namit)


Added:
    hive/trunk/data/files/groupby_groupingid.txt
    hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
    hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
    hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Added: hive/trunk/data/files/groupby_groupingid.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/groupby_groupingid.txt?rev=1407371&view=auto
==============================================================================
--- hive/trunk/data/files/groupby_groupingid.txt (added)
+++ hive/trunk/data/files/groupby_groupingid.txt Fri Nov  9 07:35:01 2012
@@ -0,0 +1,6 @@
+1NULL
+11
+22
+33
+3NULL
+45

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java?rev=1407371&r1=1407370&r2=1407371&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java Fri Nov  9 07:35:01 2012
@@ -37,6 +37,16 @@ public class VirtualColumn implements Se
 
   public static VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
 
+  /**
+   * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE.
+   * It composes a bit vector with the "0" and "1" values for every
+   * column which is GROUP BY section. "1" is for a row in the result
+   * set if that column has been aggregated in that row. Otherwise the
+   * value is "0".  Returns the decimal representation of the bit vector.
+   */
+  public static VirtualColumn GROUPINGID =
+      new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo);
+
 
   private String name;
   private PrimitiveTypeInfo typeInfo;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1407371&r1=1407370&r2=1407371&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Nov  9 07:35:01 2012
@@ -221,8 +221,6 @@ public class SemanticAnalyzer extends Ba
   //Max characters when auto generating the column name with func name
   private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
 
-  private static final String GROUPING_SET_KEY = "GROUPING_SET_KEY";
-
   private static class Phase1Ctx {
     String dest;
     int nextNum;
@@ -2732,15 +2730,19 @@ public class SemanticAnalyzer extends Ba
       Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
     // For grouping sets, add a dummy grouping key
     String groupingSetColumnName =
-      groupByInputRowResolver.get(null, GROUPING_SET_KEY).getInternalName();
+      groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
     ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
       groupingSetColumnName, null, false);
     groupByKeys.add(inputExpr);
 
     String field = getColumnInternalName(groupByKeys.size() - 1);
     outputColumnNames.add(field);
-    groupByOutputRowResolver.put(null, GROUPING_SET_KEY,
-      new ColumnInfo(field, TypeInfoFactory.stringTypeInfo, "", false));
+    groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
+        new ColumnInfo(
+            field,
+            TypeInfoFactory.stringTypeInfo,
+            null,
+            true));
     colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
   }
 
@@ -2759,7 +2761,7 @@ public class SemanticAnalyzer extends Ba
       Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
     // add a key for reduce sink
     String groupingSetColumnName =
-      reduceSinkInputRowResolver.get(null, GROUPING_SET_KEY).getInternalName();
+      reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
     ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
       groupingSetColumnName, null, false);
     reduceKeys.add(inputExpr);
@@ -2768,8 +2770,8 @@ public class SemanticAnalyzer extends Ba
     String field = Utilities.ReduceField.KEY.toString() + "."
       + getColumnInternalName(reduceKeys.size() - 1);
     ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
-      reduceKeys.size() - 1).getTypeInfo(), null, false);
-    reduceSinkOutputRowResolver.put(null, GROUPING_SET_KEY, colInfo);
+      reduceKeys.size() - 1).getTypeInfo(), null, true);
+    reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
     colExprMap.put(colInfo.getInternalName(), inputExpr);
   }
 
@@ -3016,8 +3018,12 @@ public class SemanticAnalyzer extends Ba
       groupByKeys.add(constant);
       String field = getColumnInternalName(groupByKeys.size() - 1);
       outputColumnNames.add(field);
-      groupByOutputRowResolver.put(null, GROUPING_SET_KEY,
-        new ColumnInfo(field, TypeInfoFactory.stringTypeInfo, "", false));
+      groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
+        new ColumnInfo(
+            field,
+            TypeInfoFactory.stringTypeInfo,
+            null,
+            true));
       colExprMap.put(field, constant);
     }
 

Added: hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q Fri Nov  9 07:35:01 2012
@@ -0,0 +1,4 @@
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+SELECT GROUPING__ID FROM T1;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q Fri Nov  9 07:35:01 2012
@@ -0,0 +1,10 @@
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+SELECT key, val, GROUPING__ID from T1 group by key, val with cube;
+
+SELECT GROUPING__ID, key, val from T1 group by key, val with rollup;
+
+SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q Fri Nov  9 07:35:01 2012
@@ -0,0 +1,41 @@
+CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1;
+
+set hive.groupby.skewindata = true;
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
+
+SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t 
+GROUP BY GROUPING__ID;
+
+SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
+JOIN 
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID;
+
+
+
+
+
+set hive.groupby.skewindata = false;
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
+
+SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t 
+GROUP BY GROUPING__ID;
+
+SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
+JOIN 
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID;
+
+

Added: hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out Fri Nov  9 07:35:01 2012
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+FAILED: SemanticException [Error 10004]: Line 3:7 Invalid table alias or column reference 'GROUPING__ID': (possible column names are: key, val)

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out Fri Nov  9 07:35:01 2012
@@ -0,0 +1,83 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL	NULL	0
+NULL	11	2
+NULL	12	2
+NULL	13	2
+NULL	17	2
+NULL	18	2
+NULL	28	2
+1	NULL	1
+1	11	3
+2	NULL	1
+2	12	3
+3	NULL	1
+3	13	3
+7	NULL	1
+7	17	3
+8	NULL	1
+8	18	3
+8	28	3
+PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0	NULL	NULL
+1	1	NULL
+3	1	11
+1	2	NULL
+3	2	12
+1	3	NULL
+3	3	13
+1	7	NULL
+3	7	17
+1	8	NULL
+3	8	18
+3	8	28
+PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL	NULL	0	0
+NULL	11	2	2
+NULL	12	2	2
+NULL	13	2	2
+NULL	17	2	2
+NULL	18	2	2
+NULL	28	2	2
+1	NULL	1	1
+1	11	3	3
+2	NULL	1	1
+2	12	3	3
+3	NULL	1	1
+3	13	3	3
+7	NULL	1	1
+7	17	3	3
+8	NULL	1	1
+8	18	3	3
+8	28	3	3

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out Fri Nov  9 07:35:01 2012
@@ -0,0 +1,225 @@
+PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL	NULL	0	6
+1	NULL	1	2
+1	NULL	3	1
+1	1	3	1
+2	NULL	1	1
+2	2	3	1
+3	NULL	1	2
+3	NULL	3	1
+3	3	3	1
+4	NULL	1	1
+4	5	3	1
+PREHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t 
+GROUP BY GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t 
+GROUP BY GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0	1
+1	4
+3	6
+PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
+JOIN 
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
+JOIN 
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0	0
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL	NULL	0	6
+1	NULL	1	2
+1	NULL	3	1
+1	1	3	1
+2	NULL	1	1
+2	2	3	1
+3	NULL	1	2
+3	NULL	3	1
+3	3	3	1
+4	NULL	1	1
+4	5	3	1
+PREHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t 
+GROUP BY GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t 
+GROUP BY GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0	1
+1	4
+3	6
+PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
+JOIN 
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1  GROUP BY key,value WITH ROLLUP) t1
+JOIN 
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0	0
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3