You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/11/09 08:35:02 UTC
svn commit: r1407371 - in /hive/trunk: data/files/
ql/src/java/org/apache/hadoop/hive/ql/metadata/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/
ql/src/test/results/clientnegative/ ...
Author: namit
Date: Fri Nov 9 07:35:01 2012
New Revision: 1407371
URL: http://svn.apache.org/viewvc?rev=1407371&view=rev
Log:
HIVE-3613 Implement grouping_id function
(Ian Gorbachev via namit)
Added:
hive/trunk/data/files/groupby_groupingid.txt
hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q
hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Added: hive/trunk/data/files/groupby_groupingid.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/groupby_groupingid.txt?rev=1407371&view=auto
==============================================================================
--- hive/trunk/data/files/groupby_groupingid.txt (added)
+++ hive/trunk/data/files/groupby_groupingid.txt Fri Nov 9 07:35:01 2012
@@ -0,0 +1,6 @@
+1NULL
+11
+22
+33
+3NULL
+45
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java?rev=1407371&r1=1407370&r2=1407371&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java Fri Nov 9 07:35:01 2012
@@ -37,6 +37,16 @@ public class VirtualColumn implements Se
public static VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
+ /**
+ * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE.
+ * It composes a bit vector with the "0" and "1" values for every
+ * column which is GROUP BY section. "1" is for a row in the result
+ * set if that column has been aggregated in that row. Otherwise the
+ * value is "0". Returns the decimal representation of the bit vector.
+ */
+ public static VirtualColumn GROUPINGID =
+ new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo);
+
private String name;
private PrimitiveTypeInfo typeInfo;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1407371&r1=1407370&r2=1407371&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Nov 9 07:35:01 2012
@@ -221,8 +221,6 @@ public class SemanticAnalyzer extends Ba
//Max characters when auto generating the column name with func name
private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
- private static final String GROUPING_SET_KEY = "GROUPING_SET_KEY";
-
private static class Phase1Ctx {
String dest;
int nextNum;
@@ -2732,15 +2730,19 @@ public class SemanticAnalyzer extends Ba
Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
// For grouping sets, add a dummy grouping key
String groupingSetColumnName =
- groupByInputRowResolver.get(null, GROUPING_SET_KEY).getInternalName();
+ groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
groupingSetColumnName, null, false);
groupByKeys.add(inputExpr);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
- groupByOutputRowResolver.put(null, GROUPING_SET_KEY,
- new ColumnInfo(field, TypeInfoFactory.stringTypeInfo, "", false));
+ groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
+ new ColumnInfo(
+ field,
+ TypeInfoFactory.stringTypeInfo,
+ null,
+ true));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
@@ -2759,7 +2761,7 @@ public class SemanticAnalyzer extends Ba
Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
// add a key for reduce sink
String groupingSetColumnName =
- reduceSinkInputRowResolver.get(null, GROUPING_SET_KEY).getInternalName();
+ reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
groupingSetColumnName, null, false);
reduceKeys.add(inputExpr);
@@ -2768,8 +2770,8 @@ public class SemanticAnalyzer extends Ba
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
- reduceKeys.size() - 1).getTypeInfo(), null, false);
- reduceSinkOutputRowResolver.put(null, GROUPING_SET_KEY, colInfo);
+ reduceKeys.size() - 1).getTypeInfo(), null, true);
+ reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
@@ -3016,8 +3018,12 @@ public class SemanticAnalyzer extends Ba
groupByKeys.add(constant);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
- groupByOutputRowResolver.put(null, GROUPING_SET_KEY,
- new ColumnInfo(field, TypeInfoFactory.stringTypeInfo, "", false));
+ groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
+ new ColumnInfo(
+ field,
+ TypeInfoFactory.stringTypeInfo,
+ null,
+ true));
colExprMap.put(field, constant);
}
Added: hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_id1.q Fri Nov 9 07:35:01 2012
@@ -0,0 +1,4 @@
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+SELECT GROUPING__ID FROM T1;
+
Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id1.q Fri Nov 9 07:35:01 2012
@@ -0,0 +1,10 @@
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+SELECT key, val, GROUPING__ID from T1 group by key, val with cube;
+
+SELECT GROUPING__ID, key, val from T1 group by key, val with rollup;
+
+SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube;
+
Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_id2.q Fri Nov 9 07:35:01 2012
@@ -0,0 +1,41 @@
+CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1;
+
+set hive.groupby.skewindata = true;
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
+
+SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t
+GROUP BY GROUPING__ID;
+
+SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID;
+
+
+
+
+
+set hive.groupby.skewindata = false;
+
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
+
+SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t
+GROUP BY GROUPING__ID;
+
+SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID;
+
+
Added: hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_id1.q.out Fri Nov 9 07:35:01 2012
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+FAILED: SemanticException [Error 10004]: Line 3:7 Invalid table alias or column reference 'GROUPING__ID': (possible column names are: key, val)
Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id1.q.out Fri Nov 9 07:35:01 2012
@@ -0,0 +1,83 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 0
+NULL 11 2
+NULL 12 2
+NULL 13 2
+NULL 17 2
+NULL 18 2
+NULL 28 2
+1 NULL 1
+1 11 3
+2 NULL 1
+2 12 3
+3 NULL 1
+3 13 3
+7 NULL 1
+7 17 3
+8 NULL 1
+8 18 3
+8 28 3
+PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0 NULL NULL
+1 1 NULL
+3 1 11
+1 2 NULL
+3 2 12
+1 3 NULL
+3 3 13
+1 7 NULL
+3 7 17
+1 8 NULL
+3 8 18
+3 8 28
+PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 0 0
+NULL 11 2 2
+NULL 12 2 2
+NULL 13 2 2
+NULL 17 2 2
+NULL 18 2 2
+NULL 28 2 2
+1 NULL 1 1
+1 11 3 3
+2 NULL 1 1
+2 12 3 3
+3 NULL 1 1
+3 13 3 3
+7 NULL 1 1
+7 17 3 3
+8 NULL 1 1
+8 18 3 3
+8 28 3 3
Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out?rev=1407371&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out Fri Nov 9 07:35:01 2012
@@ -0,0 +1,225 @@
+PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 0 6
+1 NULL 1 2
+1 NULL 3 1
+1 1 3 1
+2 NULL 1 1
+2 2 3 1
+3 NULL 1 2
+3 NULL 3 1
+3 3 3 1
+4 NULL 1 1
+4 5 3 1
+PREHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t
+GROUP BY GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t
+GROUP BY GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0 1
+1 4
+3 6
+PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0 0
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 0 6
+1 NULL 1 2
+1 NULL 3 1
+1 1 3 1
+2 NULL 1 1
+2 2 3 1
+3 NULL 1 2
+3 NULL 3 1
+3 3 3 1
+4 NULL 1 1
+4 5 3 1
+PREHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t
+GROUP BY GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
+) t
+GROUP BY GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0 1
+1 4
+3 6
+PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0 0
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3
+3 3